aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorKirill Yukhin <kirill.yukhin@intel.com>2014-07-19 09:44:34 +0000
committerKirill Yukhin <kirill.yukhin@intel.com>2014-07-19 09:44:34 +0000
commite8adc0d2aa94cd3fabb32687214e9aa3c6df84cd (patch)
treee52ab4bdaab79d0b02ed31b25f44ff3cfba957b9
parent614ec7014faca3ecc66e9f3f8cf7d0e37860d720 (diff)
Initial support for AVX-512{VL,BW,DQ}avx512-vlbwdq
git-svn-id: https://gcc.gnu.org/svn/gcc/branches/avx512-vlbwdq@212839 138bc75d-0d04-0410-961f-82ee72b054a4
-rw-r--r--gcc/common/config/i386/i386-common.c52
-rw-r--r--gcc/config.gcc6
-rw-r--r--gcc/config/i386/avx512bwintrin.h2658
-rw-r--r--gcc/config/i386/avx512dqintrin.h2308
-rw-r--r--gcc/config/i386/avx512vlbwintrin.h4218
-rw-r--r--gcc/config/i386/avx512vldqintrin.h2035
-rw-r--r--gcc/config/i386/avx512vlintrin.h13213
-rw-r--r--gcc/config/i386/cpuid.h3
-rw-r--r--gcc/config/i386/driver-i386.c10
-rw-r--r--gcc/config/i386/i386-builtin-types.def453
-rw-r--r--gcc/config/i386/i386-c.c6
-rw-r--r--gcc/config/i386/i386-modes.def3
-rw-r--r--gcc/config/i386/i386.c3066
-rw-r--r--gcc/config/i386/i386.h19
-rw-r--r--gcc/config/i386/i386.md287
-rw-r--r--gcc/config/i386/i386.opt12
-rw-r--r--gcc/config/i386/immintrin.h10
-rw-r--r--gcc/config/i386/sse.md4953
-rw-r--r--gcc/config/i386/subst.md51
-rw-r--r--gcc/testsuite/g++.dg/other/i386-2.C2
-rw-r--r--gcc/testsuite/g++.dg/other/i386-3.C2
-rw-r--r--gcc/testsuite/gcc.target/i386/avx-1.c204
-rw-r--r--gcc/testsuite/gcc.target/i386/avx512bw-check.h47
-rw-r--r--gcc/testsuite/gcc.target/i386/avx512bw-kunpckdq-1.c17
-rw-r--r--gcc/testsuite/gcc.target/i386/avx512bw-kunpckwd-1.c17
-rw-r--r--gcc/testsuite/gcc.target/i386/avx512bw-vdbpsadbw-1.c34
-rw-r--r--gcc/testsuite/gcc.target/i386/avx512bw-vdbpsadbw-2.c80
-rw-r--r--gcc/testsuite/gcc.target/i386/avx512bw-vmovdqu16-1.c51
-rw-r--r--gcc/testsuite/gcc.target/i386/avx512bw-vmovdqu16-2.c60
-rw-r--r--gcc/testsuite/gcc.target/i386/avx512bw-vmovdqu8-1.c51
-rw-r--r--gcc/testsuite/gcc.target/i386/avx512bw-vmovdqu8-2.c60
-rw-r--r--gcc/testsuite/gcc.target/i386/avx512bw-vpabsb-1.c32
-rw-r--r--gcc/testsuite/gcc.target/i386/avx512bw-vpabsb-2.c51
-rw-r--r--gcc/testsuite/gcc.target/i386/avx512bw-vpabsw-1.c32
-rw-r--r--gcc/testsuite/gcc.target/i386/avx512bw-vpabsw-2.c51
-rw-r--r--gcc/testsuite/gcc.target/i386/avx512bw-vpackssdw-1.c31
-rw-r--r--gcc/testsuite/gcc.target/i386/avx512bw-vpackssdw-2.c77
-rw-r--r--gcc/testsuite/gcc.target/i386/avx512bw-vpacksswb-1.c31
-rw-r--r--gcc/testsuite/gcc.target/i386/avx512bw-vpacksswb-2.c77
-rw-r--r--gcc/testsuite/gcc.target/i386/avx512bw-vpackusdw-1.c32
-rw-r--r--gcc/testsuite/gcc.target/i386/avx512bw-vpackusdw-2.c77
-rw-r--r--gcc/testsuite/gcc.target/i386/avx512bw-vpackuswb-1.c31
-rw-r--r--gcc/testsuite/gcc.target/i386/avx512bw-vpackuswb-2.c77
-rw-r--r--gcc/testsuite/gcc.target/i386/avx512bw-vpaddb-1.c32
-rw-r--r--gcc/testsuite/gcc.target/i386/avx512bw-vpaddb-2.c53
-rw-r--r--gcc/testsuite/gcc.target/i386/avx512bw-vpaddsb-1.c32
-rw-r--r--gcc/testsuite/gcc.target/i386/avx512bw-vpaddsb-2.c56
-rw-r--r--gcc/testsuite/gcc.target/i386/avx512bw-vpaddsw-1.c32
-rw-r--r--gcc/testsuite/gcc.target/i386/avx512bw-vpaddsw-2.c56
-rw-r--r--gcc/testsuite/gcc.target/i386/avx512bw-vpaddusb-1.c32
-rw-r--r--gcc/testsuite/gcc.target/i386/avx512bw-vpaddusb-2.c54
-rw-r--r--gcc/testsuite/gcc.target/i386/avx512bw-vpaddusw-1.c32
-rw-r--r--gcc/testsuite/gcc.target/i386/avx512bw-vpaddusw-2.c54
-rw-r--r--gcc/testsuite/gcc.target/i386/avx512bw-vpaddw-1.c32
-rw-r--r--gcc/testsuite/gcc.target/i386/avx512bw-vpaddw-2.c53
-rw-r--r--gcc/testsuite/gcc.target/i386/avx512bw-vpalignr-1.c32
-rw-r--r--gcc/testsuite/gcc.target/i386/avx512bw-vpalignr-2.c74
-rw-r--r--gcc/testsuite/gcc.target/i386/avx512bw-vpavgb-1.c32
-rw-r--r--gcc/testsuite/gcc.target/i386/avx512bw-vpavgb-2.c51
-rw-r--r--gcc/testsuite/gcc.target/i386/avx512bw-vpavgw-1.c32
-rw-r--r--gcc/testsuite/gcc.target/i386/avx512bw-vpavgw-2.c51
-rw-r--r--gcc/testsuite/gcc.target/i386/avx512bw-vpblendmb-1.c20
-rw-r--r--gcc/testsuite/gcc.target/i386/avx512bw-vpblendmb-2.c41
-rw-r--r--gcc/testsuite/gcc.target/i386/avx512bw-vpblendmw-1.c20
-rw-r--r--gcc/testsuite/gcc.target/i386/avx512bw-vpblendmw-2.c41
-rw-r--r--gcc/testsuite/gcc.target/i386/avx512bw-vpbroadcastb-1.c39
-rw-r--r--gcc/testsuite/gcc.target/i386/avx512bw-vpbroadcastb-2.c76
-rw-r--r--gcc/testsuite/gcc.target/i386/avx512bw-vpbroadcastw-1.c39
-rw-r--r--gcc/testsuite/gcc.target/i386/avx512bw-vpbroadcastw-2.c76
-rw-r--r--gcc/testsuite/gcc.target/i386/avx512bw-vpcmpb-1.c28
-rw-r--r--gcc/testsuite/gcc.target/i386/avx512bw-vpcmpb-2.c107
-rw-r--r--gcc/testsuite/gcc.target/i386/avx512bw-vpcmpeqb-1.c28
-rw-r--r--gcc/testsuite/gcc.target/i386/avx512bw-vpcmpeqb-2.c51
-rw-r--r--gcc/testsuite/gcc.target/i386/avx512bw-vpcmpeqw-1.c28
-rw-r--r--gcc/testsuite/gcc.target/i386/avx512bw-vpcmpeqw-2.c49
-rw-r--r--gcc/testsuite/gcc.target/i386/avx512bw-vpcmpgtb-1.c28
-rw-r--r--gcc/testsuite/gcc.target/i386/avx512bw-vpcmpgtb-2.c49
-rw-r--r--gcc/testsuite/gcc.target/i386/avx512bw-vpcmpgtw-1.c28
-rw-r--r--gcc/testsuite/gcc.target/i386/avx512bw-vpcmpgtw-2.c49
-rw-r--r--gcc/testsuite/gcc.target/i386/avx512bw-vpcmpub-1.c28
-rw-r--r--gcc/testsuite/gcc.target/i386/avx512bw-vpcmpub-2.c107
-rw-r--r--gcc/testsuite/gcc.target/i386/avx512bw-vpcmpuw-1.c28
-rw-r--r--gcc/testsuite/gcc.target/i386/avx512bw-vpcmpuw-2.c91
-rw-r--r--gcc/testsuite/gcc.target/i386/avx512bw-vpcmpw-1.c28
-rw-r--r--gcc/testsuite/gcc.target/i386/avx512bw-vpcmpw-2.c91
-rw-r--r--gcc/testsuite/gcc.target/i386/avx512bw-vpermi2w-1.c25
-rw-r--r--gcc/testsuite/gcc.target/i386/avx512bw-vpermi2w-2.c58
-rw-r--r--gcc/testsuite/gcc.target/i386/avx512bw-vpermt2w-1.c37
-rw-r--r--gcc/testsuite/gcc.target/i386/avx512bw-vpermt2w-2.c70
-rw-r--r--gcc/testsuite/gcc.target/i386/avx512bw-vpermw-1.c34
-rw-r--r--gcc/testsuite/gcc.target/i386/avx512bw-vpermw-2.c52
-rw-r--r--gcc/testsuite/gcc.target/i386/avx512bw-vpmaddubsw-1.c27
-rw-r--r--gcc/testsuite/gcc.target/i386/avx512bw-vpmaddubsw-2.c66
-rw-r--r--gcc/testsuite/gcc.target/i386/avx512bw-vpmaddwd-1.c27
-rw-r--r--gcc/testsuite/gcc.target/i386/avx512bw-vpmaddwd-2.c58
-rw-r--r--gcc/testsuite/gcc.target/i386/avx512bw-vpmaxsb-1.c30
-rw-r--r--gcc/testsuite/gcc.target/i386/avx512bw-vpmaxsb-2.c51
-rw-r--r--gcc/testsuite/gcc.target/i386/avx512bw-vpmaxsw-1.c30
-rw-r--r--gcc/testsuite/gcc.target/i386/avx512bw-vpmaxsw-2.c51
-rw-r--r--gcc/testsuite/gcc.target/i386/avx512bw-vpmaxub-1.c30
-rw-r--r--gcc/testsuite/gcc.target/i386/avx512bw-vpmaxub-2.c51
-rw-r--r--gcc/testsuite/gcc.target/i386/avx512bw-vpmaxuw-1.c30
-rw-r--r--gcc/testsuite/gcc.target/i386/avx512bw-vpmaxuw-2.c51
-rw-r--r--gcc/testsuite/gcc.target/i386/avx512bw-vpminsb-1.c30
-rw-r--r--gcc/testsuite/gcc.target/i386/avx512bw-vpminsb-2.c51
-rw-r--r--gcc/testsuite/gcc.target/i386/avx512bw-vpminsw-1.c30
-rw-r--r--gcc/testsuite/gcc.target/i386/avx512bw-vpminsw-2.c51
-rw-r--r--gcc/testsuite/gcc.target/i386/avx512bw-vpminub-1.c30
-rw-r--r--gcc/testsuite/gcc.target/i386/avx512bw-vpminub-2.c51
-rw-r--r--gcc/testsuite/gcc.target/i386/avx512bw-vpminuw-1.c30
-rw-r--r--gcc/testsuite/gcc.target/i386/avx512bw-vpminuw-2.c51
-rw-r--r--gcc/testsuite/gcc.target/i386/avx512bw-vpmovb2m-1.c22
-rw-r--r--gcc/testsuite/gcc.target/i386/avx512bw-vpmovb2m-2.c43
-rw-r--r--gcc/testsuite/gcc.target/i386/avx512bw-vpmovm2b-1.c22
-rw-r--r--gcc/testsuite/gcc.target/i386/avx512bw-vpmovm2b-2.c32
-rw-r--r--gcc/testsuite/gcc.target/i386/avx512bw-vpmovm2w-1.c22
-rw-r--r--gcc/testsuite/gcc.target/i386/avx512bw-vpmovm2w-2.c32
-rw-r--r--gcc/testsuite/gcc.target/i386/avx512bw-vpmovswb-1.c34
-rw-r--r--gcc/testsuite/gcc.target/i386/avx512bw-vpmovswb-2.c60
-rw-r--r--gcc/testsuite/gcc.target/i386/avx512bw-vpmovsxbw-1.c32
-rw-r--r--gcc/testsuite/gcc.target/i386/avx512bw-vpmovsxbw-2.c56
-rw-r--r--gcc/testsuite/gcc.target/i386/avx512bw-vpmovuswb-1.c34
-rw-r--r--gcc/testsuite/gcc.target/i386/avx512bw-vpmovuswb-2.c53
-rw-r--r--gcc/testsuite/gcc.target/i386/avx512bw-vpmovw2m-1.c22
-rw-r--r--gcc/testsuite/gcc.target/i386/avx512bw-vpmovw2m-2.c43
-rw-r--r--gcc/testsuite/gcc.target/i386/avx512bw-vpmovwb-1.c34
-rw-r--r--gcc/testsuite/gcc.target/i386/avx512bw-vpmovwb-2.c53
-rw-r--r--gcc/testsuite/gcc.target/i386/avx512bw-vpmovzxbw-1.c32
-rw-r--r--gcc/testsuite/gcc.target/i386/avx512bw-vpmovzxbw-2.c55
-rw-r--r--gcc/testsuite/gcc.target/i386/avx512bw-vpmulhrsw-1.c27
-rw-r--r--gcc/testsuite/gcc.target/i386/avx512bw-vpmulhrsw-2.c55
-rw-r--r--gcc/testsuite/gcc.target/i386/avx512bw-vpmulhuw-1.c27
-rw-r--r--gcc/testsuite/gcc.target/i386/avx512bw-vpmulhuw-2.c52
-rw-r--r--gcc/testsuite/gcc.target/i386/avx512bw-vpmulhw-1.c27
-rw-r--r--gcc/testsuite/gcc.target/i386/avx512bw-vpmulhw-2.c52
-rw-r--r--gcc/testsuite/gcc.target/i386/avx512bw-vpmullw-1.c27
-rw-r--r--gcc/testsuite/gcc.target/i386/avx512bw-vpmullw-2.c51
-rw-r--r--gcc/testsuite/gcc.target/i386/avx512bw-vpshufb-1.c30
-rw-r--r--gcc/testsuite/gcc.target/i386/avx512bw-vpshufb-2.c58
-rw-r--r--gcc/testsuite/gcc.target/i386/avx512bw-vpshufhw-1.c30
-rw-r--r--gcc/testsuite/gcc.target/i386/avx512bw-vpshufhw-2.c62
-rw-r--r--gcc/testsuite/gcc.target/i386/avx512bw-vpshuflw-1.c30
-rw-r--r--gcc/testsuite/gcc.target/i386/avx512bw-vpshuflw-2.c62
-rw-r--r--gcc/testsuite/gcc.target/i386/avx512bw-vpslldq-1.c13
-rw-r--r--gcc/testsuite/gcc.target/i386/avx512bw-vpsllvw-1.c34
-rw-r--r--gcc/testsuite/gcc.target/i386/avx512bw-vpsllvw-2.c53
-rw-r--r--gcc/testsuite/gcc.target/i386/avx512bw-vpsllw-1.c32
-rw-r--r--gcc/testsuite/gcc.target/i386/avx512bw-vpsllw-2.c64
-rw-r--r--gcc/testsuite/gcc.target/i386/avx512bw-vpsllwi-1.c32
-rw-r--r--gcc/testsuite/gcc.target/i386/avx512bw-vpsllwi-2.c74
-rw-r--r--gcc/testsuite/gcc.target/i386/avx512bw-vpsravw-1.c34
-rw-r--r--gcc/testsuite/gcc.target/i386/avx512bw-vpsravw-2.c53
-rw-r--r--gcc/testsuite/gcc.target/i386/avx512bw-vpsraw-1.c30
-rw-r--r--gcc/testsuite/gcc.target/i386/avx512bw-vpsraw-2.c61
-rw-r--r--gcc/testsuite/gcc.target/i386/avx512bw-vpsrawi-1.c30
-rw-r--r--gcc/testsuite/gcc.target/i386/avx512bw-vpsrawi-2.c58
-rw-r--r--gcc/testsuite/gcc.target/i386/avx512bw-vpsrldq-1.c13
-rw-r--r--gcc/testsuite/gcc.target/i386/avx512bw-vpsrlvw-1.c34
-rw-r--r--gcc/testsuite/gcc.target/i386/avx512bw-vpsrlvw-2.c53
-rw-r--r--gcc/testsuite/gcc.target/i386/avx512bw-vpsrlw-1.c30
-rw-r--r--gcc/testsuite/gcc.target/i386/avx512bw-vpsrlw-2.c82
-rw-r--r--gcc/testsuite/gcc.target/i386/avx512bw-vpsrlwi-1.c30
-rw-r--r--gcc/testsuite/gcc.target/i386/avx512bw-vpsrlwi-2.c72
-rw-r--r--gcc/testsuite/gcc.target/i386/avx512bw-vpsubb-1.c32
-rw-r--r--gcc/testsuite/gcc.target/i386/avx512bw-vpsubb-2.c53
-rw-r--r--gcc/testsuite/gcc.target/i386/avx512bw-vpsubsb-1.c32
-rw-r--r--gcc/testsuite/gcc.target/i386/avx512bw-vpsubsb-2.c56
-rw-r--r--gcc/testsuite/gcc.target/i386/avx512bw-vpsubsw-1.c32
-rw-r--r--gcc/testsuite/gcc.target/i386/avx512bw-vpsubsw-2.c56
-rw-r--r--gcc/testsuite/gcc.target/i386/avx512bw-vpsubusb-1.c32
-rw-r--r--gcc/testsuite/gcc.target/i386/avx512bw-vpsubusb-2.c54
-rw-r--r--gcc/testsuite/gcc.target/i386/avx512bw-vpsubusw-1.c32
-rw-r--r--gcc/testsuite/gcc.target/i386/avx512bw-vpsubusw-2.c54
-rw-r--r--gcc/testsuite/gcc.target/i386/avx512bw-vpsubw-1.c32
-rw-r--r--gcc/testsuite/gcc.target/i386/avx512bw-vpsubw-2.c53
-rw-r--r--gcc/testsuite/gcc.target/i386/avx512bw-vptestmb-1.c28
-rw-r--r--gcc/testsuite/gcc.target/i386/avx512bw-vptestmb-2.c48
-rw-r--r--gcc/testsuite/gcc.target/i386/avx512bw-vptestmw-1.c28
-rw-r--r--gcc/testsuite/gcc.target/i386/avx512bw-vptestmw-2.c48
-rw-r--r--gcc/testsuite/gcc.target/i386/avx512bw-vptestnmb-1.c28
-rw-r--r--gcc/testsuite/gcc.target/i386/avx512bw-vptestnmb-2.c48
-rw-r--r--gcc/testsuite/gcc.target/i386/avx512bw-vptestnmw-1.c28
-rw-r--r--gcc/testsuite/gcc.target/i386/avx512bw-vptestnmw-2.c48
-rw-r--r--gcc/testsuite/gcc.target/i386/avx512bw-vpunpckhbw-1.c30
-rw-r--r--gcc/testsuite/gcc.target/i386/avx512bw-vpunpckhbw-2.c68
-rw-r--r--gcc/testsuite/gcc.target/i386/avx512bw-vpunpckhwd-1.c30
-rw-r--r--gcc/testsuite/gcc.target/i386/avx512bw-vpunpckhwd-2.c60
-rw-r--r--gcc/testsuite/gcc.target/i386/avx512bw-vpunpcklbw-1.c30
-rw-r--r--gcc/testsuite/gcc.target/i386/avx512bw-vpunpcklbw-2.c68
-rw-r--r--gcc/testsuite/gcc.target/i386/avx512bw-vpunpcklwd-1.c30
-rw-r--r--gcc/testsuite/gcc.target/i386/avx512bw-vpunpcklwd-2.c60
-rw-r--r--gcc/testsuite/gcc.target/i386/avx512dq-check.h47
-rw-r--r--gcc/testsuite/gcc.target/i386/avx512dq-vandnpd-1.c30
-rw-r--r--gcc/testsuite/gcc.target/i386/avx512dq-vandnpd-2.c54
-rw-r--r--gcc/testsuite/gcc.target/i386/avx512dq-vandnps-1.c31
-rw-r--r--gcc/testsuite/gcc.target/i386/avx512dq-vandnps-2.c54
-rw-r--r--gcc/testsuite/gcc.target/i386/avx512dq-vandpd-1.c30
-rw-r--r--gcc/testsuite/gcc.target/i386/avx512dq-vandpd-2.c54
-rw-r--r--gcc/testsuite/gcc.target/i386/avx512dq-vandps-1.c31
-rw-r--r--gcc/testsuite/gcc.target/i386/avx512dq-vandps-2.c54
-rw-r--r--gcc/testsuite/gcc.target/i386/avx512dq-vbroadcastf32x2-1.c27
-rw-r--r--gcc/testsuite/gcc.target/i386/avx512dq-vbroadcastf32x2-2.c53
-rw-r--r--gcc/testsuite/gcc.target/i386/avx512dq-vbroadcastf32x8-1.c19
-rw-r--r--gcc/testsuite/gcc.target/i386/avx512dq-vbroadcastf32x8-2.c53
-rw-r--r--gcc/testsuite/gcc.target/i386/avx512dq-vbroadcastf64x2-1.c26
-rw-r--r--gcc/testsuite/gcc.target/i386/avx512dq-vbroadcastf64x2-2.c53
-rw-r--r--gcc/testsuite/gcc.target/i386/avx512dq-vbroadcasti32x2-1.c34
-rw-r--r--gcc/testsuite/gcc.target/i386/avx512dq-vbroadcasti32x2-2.c53
-rw-r--r--gcc/testsuite/gcc.target/i386/avx512dq-vbroadcasti32x8-1.c19
-rw-r--r--gcc/testsuite/gcc.target/i386/avx512dq-vbroadcasti32x8-2.c53
-rw-r--r--gcc/testsuite/gcc.target/i386/avx512dq-vbroadcasti64x2-1.c26
-rw-r--r--gcc/testsuite/gcc.target/i386/avx512dq-vbroadcasti64x2-2.c53
-rw-r--r--gcc/testsuite/gcc.target/i386/avx512dq-vcvtpd2qq-1.c44
-rw-r--r--gcc/testsuite/gcc.target/i386/avx512dq-vcvtpd2qq-2.c54
-rw-r--r--gcc/testsuite/gcc.target/i386/avx512dq-vcvtpd2uqq-1.c44
-rw-r--r--gcc/testsuite/gcc.target/i386/avx512dq-vcvtpd2uqq-2.c52
-rw-r--r--gcc/testsuite/gcc.target/i386/avx512dq-vcvtps2qq-1.c40
-rw-r--r--gcc/testsuite/gcc.target/i386/avx512dq-vcvtps2qq-2.c50
-rw-r--r--gcc/testsuite/gcc.target/i386/avx512dq-vcvtps2uqq-1.c40
-rw-r--r--gcc/testsuite/gcc.target/i386/avx512dq-vcvtps2uqq-2.c48
-rw-r--r--gcc/testsuite/gcc.target/i386/avx512dq-vcvtqq2pd-1.c41
-rw-r--r--gcc/testsuite/gcc.target/i386/avx512dq-vcvtqq2pd-2.c51
-rw-r--r--gcc/testsuite/gcc.target/i386/avx512dq-vcvtqq2ps-1.c40
-rw-r--r--gcc/testsuite/gcc.target/i386/avx512dq-vcvtqq2ps-2.c52
-rw-r--r--gcc/testsuite/gcc.target/i386/avx512dq-vcvttpd2qq-1.c44
-rw-r--r--gcc/testsuite/gcc.target/i386/avx512dq-vcvttpd2qq-2.c53
-rw-r--r--gcc/testsuite/gcc.target/i386/avx512dq-vcvttpd2uqq-1.c44
-rw-r--r--gcc/testsuite/gcc.target/i386/avx512dq-vcvttpd2uqq-2.c52
-rw-r--r--gcc/testsuite/gcc.target/i386/avx512dq-vcvttps2qq-1.c40
-rw-r--r--gcc/testsuite/gcc.target/i386/avx512dq-vcvttps2qq-2.c50
-rw-r--r--gcc/testsuite/gcc.target/i386/avx512dq-vcvttps2uqq-1.c40
-rw-r--r--gcc/testsuite/gcc.target/i386/avx512dq-vcvttps2uqq-2.c48
-rw-r--r--gcc/testsuite/gcc.target/i386/avx512dq-vcvtuqq2pd-1.c41
-rw-r--r--gcc/testsuite/gcc.target/i386/avx512dq-vcvtuqq2pd-2.c50
-rw-r--r--gcc/testsuite/gcc.target/i386/avx512dq-vcvtuqq2ps-1.c40
-rw-r--r--gcc/testsuite/gcc.target/i386/avx512dq-vcvtuqq2ps-2.c51
-rw-r--r--gcc/testsuite/gcc.target/i386/avx512dq-vextractf32x8-1.c18
-rw-r--r--gcc/testsuite/gcc.target/i386/avx512dq-vextractf64x2-1.c23
-rw-r--r--gcc/testsuite/gcc.target/i386/avx512dq-vextractf64x2-2.c54
-rw-r--r--gcc/testsuite/gcc.target/i386/avx512dq-vextracti32x8-1.c18
-rw-r--r--gcc/testsuite/gcc.target/i386/avx512dq-vextracti64x2-1.c23
-rw-r--r--gcc/testsuite/gcc.target/i386/avx512dq-vextracti64x2-2.c55
-rw-r--r--gcc/testsuite/gcc.target/i386/avx512dq-vfpclasspd-1.c26
-rw-r--r--gcc/testsuite/gcc.target/i386/avx512dq-vfpclasspd-2.c74
-rw-r--r--gcc/testsuite/gcc.target/i386/avx512dq-vfpclassps-1.c27
-rw-r--r--gcc/testsuite/gcc.target/i386/avx512dq-vfpclassps-2.c75
-rw-r--r--gcc/testsuite/gcc.target/i386/avx512dq-vfpclasssd-1.c14
-rw-r--r--gcc/testsuite/gcc.target/i386/avx512dq-vfpclassss-1.c14
-rw-r--r--gcc/testsuite/gcc.target/i386/avx512dq-vinsertf32x8-1.c18
-rw-r--r--gcc/testsuite/gcc.target/i386/avx512dq-vinsertf64x2-1.c23
-rw-r--r--gcc/testsuite/gcc.target/i386/avx512dq-vinsertf64x2-2.c55
-rw-r--r--gcc/testsuite/gcc.target/i386/avx512dq-vinserti32x8-1.c18
-rw-r--r--gcc/testsuite/gcc.target/i386/avx512dq-vinserti64x2-1.c23
-rw-r--r--gcc/testsuite/gcc.target/i386/avx512dq-vinserti64x2-2.c55
-rw-r--r--gcc/testsuite/gcc.target/i386/avx512dq-vorpd-1.c32
-rw-r--r--gcc/testsuite/gcc.target/i386/avx512dq-vorpd-2.c52
-rw-r--r--gcc/testsuite/gcc.target/i386/avx512dq-vorps-1.c33
-rw-r--r--gcc/testsuite/gcc.target/i386/avx512dq-vorps-2.c52
-rw-r--r--gcc/testsuite/gcc.target/i386/avx512dq-vpmovd2m-1.c21
-rw-r--r--gcc/testsuite/gcc.target/i386/avx512dq-vpmovd2m-2.c43
-rw-r--r--gcc/testsuite/gcc.target/i386/avx512dq-vpmovm2d-1.c21
-rw-r--r--gcc/testsuite/gcc.target/i386/avx512dq-vpmovm2d-2.c32
-rw-r--r--gcc/testsuite/gcc.target/i386/avx512dq-vpmovm2q-1.c20
-rw-r--r--gcc/testsuite/gcc.target/i386/avx512dq-vpmovm2q-2.c32
-rw-r--r--gcc/testsuite/gcc.target/i386/avx512dq-vpmovq2m-1.c20
-rw-r--r--gcc/testsuite/gcc.target/i386/avx512dq-vpmovq2m-2.c43
-rw-r--r--gcc/testsuite/gcc.target/i386/avx512dq-vpmullq-1.c31
-rw-r--r--gcc/testsuite/gcc.target/i386/avx512dq-vpmullq-2.c49
-rw-r--r--gcc/testsuite/gcc.target/i386/avx512dq-vrangepd-1.c40
-rw-r--r--gcc/testsuite/gcc.target/i386/avx512dq-vrangepd-2.c59
-rw-r--r--gcc/testsuite/gcc.target/i386/avx512dq-vrangeps-1.c41
-rw-r--r--gcc/testsuite/gcc.target/i386/avx512dq-vrangeps-2.c59
-rw-r--r--gcc/testsuite/gcc.target/i386/avx512dq-vrangesd-1.c16
-rw-r--r--gcc/testsuite/gcc.target/i386/avx512dq-vrangess-1.c16
-rw-r--r--gcc/testsuite/gcc.target/i386/avx512dq-vreducepd-1.c36
-rw-r--r--gcc/testsuite/gcc.target/i386/avx512dq-vreducepd-2.c55
-rw-r--r--gcc/testsuite/gcc.target/i386/avx512dq-vreduceps-1.c37
-rw-r--r--gcc/testsuite/gcc.target/i386/avx512dq-vreduceps-2.c55
-rw-r--r--gcc/testsuite/gcc.target/i386/avx512dq-vreducesd-1.c14
-rw-r--r--gcc/testsuite/gcc.target/i386/avx512dq-vreducess-1.c14
-rw-r--r--gcc/testsuite/gcc.target/i386/avx512dq-vxorpd-1.c33
-rw-r--r--gcc/testsuite/gcc.target/i386/avx512dq-vxorpd-2.c52
-rw-r--r--gcc/testsuite/gcc.target/i386/avx512dq-vxorps-1.c33
-rw-r--r--gcc/testsuite/gcc.target/i386/avx512dq-vxorps-2.c52
-rw-r--r--gcc/testsuite/gcc.target/i386/avx512f-helper.h47
-rw-r--r--gcc/testsuite/gcc.target/i386/avx512f-mask-type.h20
-rw-r--r--gcc/testsuite/gcc.target/i386/avx512f-vcmppd-2.c32
-rw-r--r--gcc/testsuite/gcc.target/i386/avx512f-vcmpps-2.c33
-rw-r--r--gcc/testsuite/gcc.target/i386/avx512f-vcvtpd2ps-2.c2
-rw-r--r--gcc/testsuite/gcc.target/i386/avx512f-vcvtph2ps-2.c6
-rw-r--r--gcc/testsuite/gcc.target/i386/avx512f-vcvtps2ph-2.c15
-rw-r--r--gcc/testsuite/gcc.target/i386/avx512f-vpandd-2.c4
-rw-r--r--gcc/testsuite/gcc.target/i386/avx512f-vpandnd-2.c4
-rw-r--r--gcc/testsuite/gcc.target/i386/avx512f-vpandnq-2.c4
-rw-r--r--gcc/testsuite/gcc.target/i386/avx512f-vpandq-2.c4
-rw-r--r--gcc/testsuite/gcc.target/i386/avx512f-vpbroadcastq-2.c2
-rw-r--r--gcc/testsuite/gcc.target/i386/avx512f-vpcmpd-2.c33
-rw-r--r--gcc/testsuite/gcc.target/i386/avx512f-vpcmpq-2.c35
-rw-r--r--gcc/testsuite/gcc.target/i386/avx512f-vpcmpud-2.c33
-rw-r--r--gcc/testsuite/gcc.target/i386/avx512f-vpcmpuq-2.c33
-rw-r--r--gcc/testsuite/gcc.target/i386/avx512f-vpermd-2.c8
-rw-r--r--gcc/testsuite/gcc.target/i386/avx512f-vpermq-imm-2.c4
-rw-r--r--gcc/testsuite/gcc.target/i386/avx512f-vpermq-var-2.c8
-rw-r--r--gcc/testsuite/gcc.target/i386/avx512f-vpmovdb-2.c27
-rw-r--r--gcc/testsuite/gcc.target/i386/avx512f-vpmovdw-2.c28
-rw-r--r--gcc/testsuite/gcc.target/i386/avx512f-vpmovqb-2.c2
-rw-r--r--gcc/testsuite/gcc.target/i386/avx512f-vpmovqd-2.c28
-rw-r--r--gcc/testsuite/gcc.target/i386/avx512f-vpmovqw-2.c27
-rw-r--r--gcc/testsuite/gcc.target/i386/avx512f-vpmovsdb-2.c27
-rw-r--r--gcc/testsuite/gcc.target/i386/avx512f-vpmovsdw-2.c27
-rw-r--r--gcc/testsuite/gcc.target/i386/avx512f-vpmovsqb-2.c2
-rw-r--r--gcc/testsuite/gcc.target/i386/avx512f-vpmovsqd-2.c27
-rw-r--r--gcc/testsuite/gcc.target/i386/avx512f-vpmovsqw-2.c27
-rw-r--r--gcc/testsuite/gcc.target/i386/avx512f-vpmovusdb-2.c28
-rw-r--r--gcc/testsuite/gcc.target/i386/avx512f-vpmovusdw-2.c27
-rw-r--r--gcc/testsuite/gcc.target/i386/avx512f-vpmovusqb-2.c2
-rw-r--r--gcc/testsuite/gcc.target/i386/avx512f-vpmovusqd-2.c27
-rw-r--r--gcc/testsuite/gcc.target/i386/avx512f-vpmovusqw-2.c27
-rw-r--r--gcc/testsuite/gcc.target/i386/avx512f-vpord-2.c4
-rw-r--r--gcc/testsuite/gcc.target/i386/avx512f-vporq-2.c5
-rw-r--r--gcc/testsuite/gcc.target/i386/avx512f-vpxord-2.c4
-rw-r--r--gcc/testsuite/gcc.target/i386/avx512f-vpxorq-2.c6
-rw-r--r--gcc/testsuite/gcc.target/i386/avx512f-vrndscaleps-2.c2
-rw-r--r--gcc/testsuite/gcc.target/i386/avx512f-vshuff32x4-2.c4
-rw-r--r--gcc/testsuite/gcc.target/i386/avx512f-vshuff64x2-2.c4
-rw-r--r--gcc/testsuite/gcc.target/i386/avx512f-vshufi32x4-2.c4
-rw-r--r--gcc/testsuite/gcc.target/i386/avx512f-vshufi64x2-2.c4
-rw-r--r--gcc/testsuite/gcc.target/i386/avx512vl-check.h49
-rw-r--r--gcc/testsuite/gcc.target/i386/avx512vl-gather-1.c217
-rw-r--r--gcc/testsuite/gcc.target/i386/avx512vl-i32gatherd-1.c20
-rw-r--r--gcc/testsuite/gcc.target/i386/avx512vl-i32gatherpd-1.c21
-rw-r--r--gcc/testsuite/gcc.target/i386/avx512vl-i32gatherps-1.c22
-rw-r--r--gcc/testsuite/gcc.target/i386/avx512vl-i32gatherq-1.c20
-rw-r--r--gcc/testsuite/gcc.target/i386/avx512vl-i32scatterd-1.c21
-rw-r--r--gcc/testsuite/gcc.target/i386/avx512vl-i32scatterpd-1.c22
-rw-r--r--gcc/testsuite/gcc.target/i386/avx512vl-i32scatterps-1.c23
-rw-r--r--gcc/testsuite/gcc.target/i386/avx512vl-i32scatterq-1.c21
-rw-r--r--gcc/testsuite/gcc.target/i386/avx512vl-i64gatherd-1.c20
-rw-r--r--gcc/testsuite/gcc.target/i386/avx512vl-i64gatherpd-1.c22
-rw-r--r--gcc/testsuite/gcc.target/i386/avx512vl-i64gatherps-1.c21
-rw-r--r--gcc/testsuite/gcc.target/i386/avx512vl-i64gatherq-1.c20
-rw-r--r--gcc/testsuite/gcc.target/i386/avx512vl-i64scatterd-1.c21
-rw-r--r--gcc/testsuite/gcc.target/i386/avx512vl-i64scatterpd-1.c23
-rw-r--r--gcc/testsuite/gcc.target/i386/avx512vl-i64scatterps-1.c22
-rw-r--r--gcc/testsuite/gcc.target/i386/avx512vl-i64scatterq-1.c21
-rw-r--r--gcc/testsuite/gcc.target/i386/avx512vl-vaddpd-1.c22
-rw-r--r--gcc/testsuite/gcc.target/i386/avx512vl-vaddpd-2.c14
-rw-r--r--gcc/testsuite/gcc.target/i386/avx512vl-vaddps-1.c22
-rw-r--r--gcc/testsuite/gcc.target/i386/avx512vl-vaddps-2.c14
-rw-r--r--gcc/testsuite/gcc.target/i386/avx512vl-valignd-1.c26
-rw-r--r--gcc/testsuite/gcc.target/i386/avx512vl-valignd-2.c14
-rw-r--r--gcc/testsuite/gcc.target/i386/avx512vl-valignq-1.c26
-rw-r--r--gcc/testsuite/gcc.target/i386/avx512vl-valignq-2.c14
-rw-r--r--gcc/testsuite/gcc.target/i386/avx512vl-vandnpd-2.c14
-rw-r--r--gcc/testsuite/gcc.target/i386/avx512vl-vandnps-2.c14
-rw-r--r--gcc/testsuite/gcc.target/i386/avx512vl-vandpd-2.c14
-rw-r--r--gcc/testsuite/gcc.target/i386/avx512vl-vandps-2.c14
-rw-r--r--gcc/testsuite/gcc.target/i386/avx512vl-vblendmpd-1.c17
-rw-r--r--gcc/testsuite/gcc.target/i386/avx512vl-vblendmpd-2.c14
-rw-r--r--gcc/testsuite/gcc.target/i386/avx512vl-vblendmps-1.c17
-rw-r--r--gcc/testsuite/gcc.target/i386/avx512vl-vblendmps-2.c14
-rw-r--r--gcc/testsuite/gcc.target/i386/avx512vl-vbroadcastf32x2-2.c10
-rw-r--r--gcc/testsuite/gcc.target/i386/avx512vl-vbroadcastf32x4-1.c19
-rw-r--r--gcc/testsuite/gcc.target/i386/avx512vl-vbroadcastf32x4-2.c10
-rw-r--r--gcc/testsuite/gcc.target/i386/avx512vl-vbroadcastf64x2-2.c10
-rw-r--r--gcc/testsuite/gcc.target/i386/avx512vl-vbroadcasti32x2-2.c14
-rw-r--r--gcc/testsuite/gcc.target/i386/avx512vl-vbroadcasti32x4-1.c19
-rw-r--r--gcc/testsuite/gcc.target/i386/avx512vl-vbroadcasti32x4-2.c10
-rw-r--r--gcc/testsuite/gcc.target/i386/avx512vl-vbroadcasti64x2-2.c10
-rw-r--r--gcc/testsuite/gcc.target/i386/avx512vl-vbroadcastsd-1.c19
-rw-r--r--gcc/testsuite/gcc.target/i386/avx512vl-vbroadcastsd-2.c10
-rw-r--r--gcc/testsuite/gcc.target/i386/avx512vl-vbroadcastss-1.c25
-rw-r--r--gcc/testsuite/gcc.target/i386/avx512vl-vbroadcastss-2.c14
-rw-r--r--gcc/testsuite/gcc.target/i386/avx512vl-vcmppd-1.c21
-rw-r--r--gcc/testsuite/gcc.target/i386/avx512vl-vcmppd-2.c14
-rw-r--r--gcc/testsuite/gcc.target/i386/avx512vl-vcmpps-1.c21
-rw-r--r--gcc/testsuite/gcc.target/i386/avx512vl-vcmpps-2.c14
-rw-r--r--gcc/testsuite/gcc.target/i386/avx512vl-vcompresspd-1.c28
-rw-r--r--gcc/testsuite/gcc.target/i386/avx512vl-vcompresspd-2.c14
-rw-r--r--gcc/testsuite/gcc.target/i386/avx512vl-vcompressps-1.c28
-rw-r--r--gcc/testsuite/gcc.target/i386/avx512vl-vcompressps-2.c14
-rw-r--r--gcc/testsuite/gcc.target/i386/avx512vl-vcvtdq2pd-1.c23
-rw-r--r--gcc/testsuite/gcc.target/i386/avx512vl-vcvtdq2pd-2.c14
-rw-r--r--gcc/testsuite/gcc.target/i386/avx512vl-vcvtdq2ps-1.c24
-rw-r--r--gcc/testsuite/gcc.target/i386/avx512vl-vcvtdq2ps-2.c14
-rw-r--r--gcc/testsuite/gcc.target/i386/avx512vl-vcvtpd2dq-1.c23
-rw-r--r--gcc/testsuite/gcc.target/i386/avx512vl-vcvtpd2dq-2.c14
-rw-r--r--gcc/testsuite/gcc.target/i386/avx512vl-vcvtpd2ps-1.c22
-rw-r--r--gcc/testsuite/gcc.target/i386/avx512vl-vcvtpd2ps-2.c14
-rw-r--r--gcc/testsuite/gcc.target/i386/avx512vl-vcvtpd2qq-2.c14
-rw-r--r--gcc/testsuite/gcc.target/i386/avx512vl-vcvtpd2udq-1.c28
-rw-r--r--gcc/testsuite/gcc.target/i386/avx512vl-vcvtpd2udq-2.c14
-rw-r--r--gcc/testsuite/gcc.target/i386/avx512vl-vcvtpd2uqq-2.c14
-rw-r--r--gcc/testsuite/gcc.target/i386/avx512vl-vcvtph2ps-1.c22
-rw-r--r--gcc/testsuite/gcc.target/i386/avx512vl-vcvtph2ps-2.c14
-rw-r--r--gcc/testsuite/gcc.target/i386/avx512vl-vcvtps2dq-1.c23
-rw-r--r--gcc/testsuite/gcc.target/i386/avx512vl-vcvtps2dq-2.c14
-rw-r--r--gcc/testsuite/gcc.target/i386/avx512vl-vcvtps2pd-1.c23
-rw-r--r--gcc/testsuite/gcc.target/i386/avx512vl-vcvtps2pd-2.c14
-rw-r--r--gcc/testsuite/gcc.target/i386/avx512vl-vcvtps2ph-1.c20
-rw-r--r--gcc/testsuite/gcc.target/i386/avx512vl-vcvtps2ph-2.c14
-rw-r--r--gcc/testsuite/gcc.target/i386/avx512vl-vcvtps2qq-2.c14
-rw-r--r--gcc/testsuite/gcc.target/i386/avx512vl-vcvtps2udq-1.c27
-rw-r--r--gcc/testsuite/gcc.target/i386/avx512vl-vcvtps2udq-2.c14
-rw-r--r--gcc/testsuite/gcc.target/i386/avx512vl-vcvtps2uqq-2.c14
-rw-r--r--gcc/testsuite/gcc.target/i386/avx512vl-vcvtqq2pd-2.c14
-rw-r--r--gcc/testsuite/gcc.target/i386/avx512vl-vcvtqq2ps-2.c14
-rw-r--r--gcc/testsuite/gcc.target/i386/avx512vl-vcvttpd2dq-1.c23
-rw-r--r--gcc/testsuite/gcc.target/i386/avx512vl-vcvttpd2dq-2.c14
-rw-r--r--gcc/testsuite/gcc.target/i386/avx512vl-vcvttpd2qq-2.c14
-rw-r--r--gcc/testsuite/gcc.target/i386/avx512vl-vcvttpd2udq-1.c28
-rw-r--r--gcc/testsuite/gcc.target/i386/avx512vl-vcvttpd2udq-2.c14
-rw-r--r--gcc/testsuite/gcc.target/i386/avx512vl-vcvttpd2uqq-2.c14
-rw-r--r--gcc/testsuite/gcc.target/i386/avx512vl-vcvttps2dq-1.c23
-rw-r--r--gcc/testsuite/gcc.target/i386/avx512vl-vcvttps2dq-2.c14
-rw-r--r--gcc/testsuite/gcc.target/i386/avx512vl-vcvttps2qq-2.c14
-rw-r--r--gcc/testsuite/gcc.target/i386/avx512vl-vcvttps2udq-1.c27
-rw-r--r--gcc/testsuite/gcc.target/i386/avx512vl-vcvttps2udq-2.c14
-rw-r--r--gcc/testsuite/gcc.target/i386/avx512vl-vcvttps2uqq-2.c14
-rw-r--r--gcc/testsuite/gcc.target/i386/avx512vl-vcvtudq2pd-1.c28
-rw-r--r--gcc/testsuite/gcc.target/i386/avx512vl-vcvtudq2pd-2.c14
-rw-r--r--gcc/testsuite/gcc.target/i386/avx512vl-vcvtudq2ps-1.c29
-rw-r--r--gcc/testsuite/gcc.target/i386/avx512vl-vcvtudq2ps-2.c14
-rw-r--r--gcc/testsuite/gcc.target/i386/avx512vl-vcvtuqq2pd-2.c14
-rw-r--r--gcc/testsuite/gcc.target/i386/avx512vl-vcvtuqq2ps-2.c14
-rw-r--r--gcc/testsuite/gcc.target/i386/avx512vl-vdbpsadbw-2.c14
-rw-r--r--gcc/testsuite/gcc.target/i386/avx512vl-vdivpd-1.c23
-rw-r--r--gcc/testsuite/gcc.target/i386/avx512vl-vdivpd-2.c14
-rw-r--r--gcc/testsuite/gcc.target/i386/avx512vl-vdivps-1.c23
-rw-r--r--gcc/testsuite/gcc.target/i386/avx512vl-vdivps-2.c14
-rw-r--r--gcc/testsuite/gcc.target/i386/avx512vl-vexpandpd-1.c29
-rw-r--r--gcc/testsuite/gcc.target/i386/avx512vl-vexpandpd-2.c14
-rw-r--r--gcc/testsuite/gcc.target/i386/avx512vl-vexpandps-1.c29
-rw-r--r--gcc/testsuite/gcc.target/i386/avx512vl-vexpandps-2.c14
-rw-r--r--gcc/testsuite/gcc.target/i386/avx512vl-vextractf32x4-1.c18
-rw-r--r--gcc/testsuite/gcc.target/i386/avx512vl-vextractf32x4-2.c10
-rw-r--r--gcc/testsuite/gcc.target/i386/avx512vl-vextractf64x2-2.c10
-rw-r--r--gcc/testsuite/gcc.target/i386/avx512vl-vextracti32x4-1.c18
-rw-r--r--gcc/testsuite/gcc.target/i386/avx512vl-vextracti32x4-2.c10
-rw-r--r--gcc/testsuite/gcc.target/i386/avx512vl-vextracti64x2-2.c10
-rw-r--r--gcc/testsuite/gcc.target/i386/avx512vl-vfixupimmpd-1.c25
-rw-r--r--gcc/testsuite/gcc.target/i386/avx512vl-vfixupimmpd-2.c14
-rw-r--r--gcc/testsuite/gcc.target/i386/avx512vl-vfixupimmps-1.c25
-rw-r--r--gcc/testsuite/gcc.target/i386/avx512vl-vfixupimmps-2.c14
-rw-r--r--gcc/testsuite/gcc.target/i386/avx512vl-vfmaddXXXpd-1.c27
-rw-r--r--gcc/testsuite/gcc.target/i386/avx512vl-vfmaddXXXpd-2.c14
-rw-r--r--gcc/testsuite/gcc.target/i386/avx512vl-vfmaddXXXps-1.c27
-rw-r--r--gcc/testsuite/gcc.target/i386/avx512vl-vfmaddXXXps-2.c14
-rw-r--r--gcc/testsuite/gcc.target/i386/avx512vl-vfmaddsubXXXpd-1.c27
-rw-r--r--gcc/testsuite/gcc.target/i386/avx512vl-vfmaddsubXXXpd-2.c14
-rw-r--r--gcc/testsuite/gcc.target/i386/avx512vl-vfmaddsubXXXps-1.c27
-rw-r--r--gcc/testsuite/gcc.target/i386/avx512vl-vfmaddsubXXXps-2.c14
-rw-r--r--gcc/testsuite/gcc.target/i386/avx512vl-vfmsubXXXpd-1.c27
-rw-r--r--gcc/testsuite/gcc.target/i386/avx512vl-vfmsubXXXpd-2.c14
-rw-r--r--gcc/testsuite/gcc.target/i386/avx512vl-vfmsubXXXps-1.c27
-rw-r--r--gcc/testsuite/gcc.target/i386/avx512vl-vfmsubXXXps-2.c14
-rw-r--r--gcc/testsuite/gcc.target/i386/avx512vl-vfmsubaddXXXpd-1.c27
-rw-r--r--gcc/testsuite/gcc.target/i386/avx512vl-vfmsubaddXXXpd-2.c14
-rw-r--r--gcc/testsuite/gcc.target/i386/avx512vl-vfmsubaddXXXps-1.c27
-rw-r--r--gcc/testsuite/gcc.target/i386/avx512vl-vfmsubaddXXXps-2.c14
-rw-r--r--gcc/testsuite/gcc.target/i386/avx512vl-vfnmaddXXXpd-1.c27
-rw-r--r--gcc/testsuite/gcc.target/i386/avx512vl-vfnmaddXXXpd-2.c14
-rw-r--r--gcc/testsuite/gcc.target/i386/avx512vl-vfnmaddXXXps-1.c27
-rw-r--r--gcc/testsuite/gcc.target/i386/avx512vl-vfnmaddXXXps-2.c14
-rw-r--r--gcc/testsuite/gcc.target/i386/avx512vl-vfnmsubXXXpd-1.c27
-rw-r--r--gcc/testsuite/gcc.target/i386/avx512vl-vfnmsubXXXpd-2.c14
-rw-r--r--gcc/testsuite/gcc.target/i386/avx512vl-vfnmsubXXXps-1.c27
-rw-r--r--gcc/testsuite/gcc.target/i386/avx512vl-vfnmsubXXXps-2.c14
-rw-r--r--gcc/testsuite/gcc.target/i386/avx512vl-vfpclasspd-2.c14
-rw-r--r--gcc/testsuite/gcc.target/i386/avx512vl-vfpclassps-2.c14
-rw-r--r--gcc/testsuite/gcc.target/i386/avx512vl-vgetexppd-1.c25
-rw-r--r--gcc/testsuite/gcc.target/i386/avx512vl-vgetexppd-2.c14
-rw-r--r--gcc/testsuite/gcc.target/i386/avx512vl-vgetexpps-1.c25
-rw-r--r--gcc/testsuite/gcc.target/i386/avx512vl-vgetexpps-2.c14
-rw-r--r--gcc/testsuite/gcc.target/i386/avx512vl-vgetmantpd-1.c29
-rw-r--r--gcc/testsuite/gcc.target/i386/avx512vl-vgetmantpd-2.c14
-rw-r--r--gcc/testsuite/gcc.target/i386/avx512vl-vgetmantps-1.c29
-rw-r--r--gcc/testsuite/gcc.target/i386/avx512vl-vgetmantps-2.c14
-rw-r--r--gcc/testsuite/gcc.target/i386/avx512vl-vinsertf32x4-1.c18
-rw-r--r--gcc/testsuite/gcc.target/i386/avx512vl-vinsertf32x4-2.c10
-rw-r--r--gcc/testsuite/gcc.target/i386/avx512vl-vinsertf64x2-2.c10
-rw-r--r--gcc/testsuite/gcc.target/i386/avx512vl-vinserti32x4-1.c18
-rw-r--r--gcc/testsuite/gcc.target/i386/avx512vl-vinserti32x4-2.c10
-rw-r--r--gcc/testsuite/gcc.target/i386/avx512vl-vinserti64x2-2.c10
-rw-r--r--gcc/testsuite/gcc.target/i386/avx512vl-vmaxpd-1.c23
-rw-r--r--gcc/testsuite/gcc.target/i386/avx512vl-vmaxpd-2.c14
-rw-r--r--gcc/testsuite/gcc.target/i386/avx512vl-vmaxps-1.c23
-rw-r--r--gcc/testsuite/gcc.target/i386/avx512vl-vmaxps-2.c14
-rw-r--r--gcc/testsuite/gcc.target/i386/avx512vl-vminpd-1.c23
-rw-r--r--gcc/testsuite/gcc.target/i386/avx512vl-vminpd-2.c14
-rw-r--r--gcc/testsuite/gcc.target/i386/avx512vl-vminps-1.c23
-rw-r--r--gcc/testsuite/gcc.target/i386/avx512vl-vminps-2.c14
-rw-r--r--gcc/testsuite/gcc.target/i386/avx512vl-vmovapd-1.c38
-rw-r--r--gcc/testsuite/gcc.target/i386/avx512vl-vmovapd-2.c14
-rw-r--r--gcc/testsuite/gcc.target/i386/avx512vl-vmovaps-1.c38
-rw-r--r--gcc/testsuite/gcc.target/i386/avx512vl-vmovaps-2.c14
-rw-r--r--gcc/testsuite/gcc.target/i386/avx512vl-vmovddup-1.c22
-rw-r--r--gcc/testsuite/gcc.target/i386/avx512vl-vmovddup-2.c14
-rw-r--r--gcc/testsuite/gcc.target/i386/avx512vl-vmovdqa32-1.c38
-rw-r--r--gcc/testsuite/gcc.target/i386/avx512vl-vmovdqa32-2.c14
-rw-r--r--gcc/testsuite/gcc.target/i386/avx512vl-vmovdqa64-1.c48
-rw-r--r--gcc/testsuite/gcc.target/i386/avx512vl-vmovdqa64-2.c14
-rw-r--r--gcc/testsuite/gcc.target/i386/avx512vl-vmovdqu16-2.c14
-rw-r--r--gcc/testsuite/gcc.target/i386/avx512vl-vmovdqu32-1.c28
-rw-r--r--gcc/testsuite/gcc.target/i386/avx512vl-vmovdqu32-2.c14
-rw-r--r--gcc/testsuite/gcc.target/i386/avx512vl-vmovdqu64-1.c28
-rw-r--r--gcc/testsuite/gcc.target/i386/avx512vl-vmovdqu64-2.c14
-rw-r--r--gcc/testsuite/gcc.target/i386/avx512vl-vmovdqu8-2.c14
-rw-r--r--gcc/testsuite/gcc.target/i386/avx512vl-vmovntdqa-1.c14
-rw-r--r--gcc/testsuite/gcc.target/i386/avx512vl-vmovshdup-1.c22
-rw-r--r--gcc/testsuite/gcc.target/i386/avx512vl-vmovshdup-2.c14
-rw-r--r--gcc/testsuite/gcc.target/i386/avx512vl-vmovsldup-1.c22
-rw-r--r--gcc/testsuite/gcc.target/i386/avx512vl-vmovsldup-2.c14
-rw-r--r--gcc/testsuite/gcc.target/i386/avx512vl-vmovupd-1.c28
-rw-r--r--gcc/testsuite/gcc.target/i386/avx512vl-vmovupd-2.c14
-rw-r--r--gcc/testsuite/gcc.target/i386/avx512vl-vmovups-1.c28
-rw-r--r--gcc/testsuite/gcc.target/i386/avx512vl-vmovups-2.c14
-rw-r--r--gcc/testsuite/gcc.target/i386/avx512vl-vmulpd-1.c23
-rw-r--r--gcc/testsuite/gcc.target/i386/avx512vl-vmulpd-2.c14
-rw-r--r--gcc/testsuite/gcc.target/i386/avx512vl-vmulps-1.c23
-rw-r--r--gcc/testsuite/gcc.target/i386/avx512vl-vmulps-2.c14
-rw-r--r--gcc/testsuite/gcc.target/i386/avx512vl-vorpd-2.c14
-rw-r--r--gcc/testsuite/gcc.target/i386/avx512vl-vorps-2.c14
-rw-r--r--gcc/testsuite/gcc.target/i386/avx512vl-vpabsb-2.c14
-rw-r--r--gcc/testsuite/gcc.target/i386/avx512vl-vpabsd-1.c20
-rw-r--r--gcc/testsuite/gcc.target/i386/avx512vl-vpabsd-2.c14
-rw-r--r--gcc/testsuite/gcc.target/i386/avx512vl-vpabsq-1.c24
-rw-r--r--gcc/testsuite/gcc.target/i386/avx512vl-vpabsq-2.c14
-rw-r--r--gcc/testsuite/gcc.target/i386/avx512vl-vpabsw-2.c14
-rw-r--r--gcc/testsuite/gcc.target/i386/avx512vl-vpackssdw-2.c14
-rw-r--r--gcc/testsuite/gcc.target/i386/avx512vl-vpacksswb-2.c14
-rw-r--r--gcc/testsuite/gcc.target/i386/avx512vl-vpackusdw-2.c14
-rw-r--r--gcc/testsuite/gcc.target/i386/avx512vl-vpackuswb-2.c14
-rw-r--r--gcc/testsuite/gcc.target/i386/avx512vl-vpaddb-2.c14
-rw-r--r--gcc/testsuite/gcc.target/i386/avx512vl-vpaddd-1.c23
-rw-r--r--gcc/testsuite/gcc.target/i386/avx512vl-vpaddd-2.c14
-rw-r--r--gcc/testsuite/gcc.target/i386/avx512vl-vpaddq-1.c23
-rw-r--r--gcc/testsuite/gcc.target/i386/avx512vl-vpaddq-2.c14
-rw-r--r--gcc/testsuite/gcc.target/i386/avx512vl-vpaddsb-2.c14
-rw-r--r--gcc/testsuite/gcc.target/i386/avx512vl-vpaddsw-2.c14
-rw-r--r--gcc/testsuite/gcc.target/i386/avx512vl-vpaddusb-2.c14
-rw-r--r--gcc/testsuite/gcc.target/i386/avx512vl-vpaddusw-2.c14
-rw-r--r--gcc/testsuite/gcc.target/i386/avx512vl-vpaddw-2.c14
-rw-r--r--gcc/testsuite/gcc.target/i386/avx512vl-vpalignr-2.c14
-rw-r--r--gcc/testsuite/gcc.target/i386/avx512vl-vpandd-1.c24
-rw-r--r--gcc/testsuite/gcc.target/i386/avx512vl-vpandd-2.c14
-rw-r--r--gcc/testsuite/gcc.target/i386/avx512vl-vpandnd-1.c24
-rw-r--r--gcc/testsuite/gcc.target/i386/avx512vl-vpandnd-2.c14
-rw-r--r--gcc/testsuite/gcc.target/i386/avx512vl-vpandnq-1.c24
-rw-r--r--gcc/testsuite/gcc.target/i386/avx512vl-vpandnq-2.c14
-rw-r--r--gcc/testsuite/gcc.target/i386/avx512vl-vpandq-1.c24
-rw-r--r--gcc/testsuite/gcc.target/i386/avx512vl-vpandq-2.c14
-rw-r--r--gcc/testsuite/gcc.target/i386/avx512vl-vpavgb-2.c14
-rw-r--r--gcc/testsuite/gcc.target/i386/avx512vl-vpavgw-2.c14
-rw-r--r--gcc/testsuite/gcc.target/i386/avx512vl-vpblendmb-2.c14
-rw-r--r--gcc/testsuite/gcc.target/i386/avx512vl-vpblendmd-1.c17
-rw-r--r--gcc/testsuite/gcc.target/i386/avx512vl-vpblendmd-2.c14
-rw-r--r--gcc/testsuite/gcc.target/i386/avx512vl-vpblendmq-1.c17
-rw-r--r--gcc/testsuite/gcc.target/i386/avx512vl-vpblendmq-2.c14
-rw-r--r--gcc/testsuite/gcc.target/i386/avx512vl-vpblendmw-2.c14
-rw-r--r--gcc/testsuite/gcc.target/i386/avx512vl-vpbroadcastb-2.c14
-rw-r--r--gcc/testsuite/gcc.target/i386/avx512vl-vpbroadcastd-1.c31
-rw-r--r--gcc/testsuite/gcc.target/i386/avx512vl-vpbroadcastd-2.c14
-rw-r--r--gcc/testsuite/gcc.target/i386/avx512vl-vpbroadcastmb2q-1.c17
-rw-r--r--gcc/testsuite/gcc.target/i386/avx512vl-vpbroadcastmw2d-1.c17
-rw-r--r--gcc/testsuite/gcc.target/i386/avx512vl-vpbroadcastq-1.c31
-rw-r--r--gcc/testsuite/gcc.target/i386/avx512vl-vpbroadcastq-2.c14
-rw-r--r--gcc/testsuite/gcc.target/i386/avx512vl-vpbroadcastw-2.c14
-rw-r--r--gcc/testsuite/gcc.target/i386/avx512vl-vpcmpb-2.c14
-rw-r--r--gcc/testsuite/gcc.target/i386/avx512vl-vpcmpd-1.c21
-rw-r--r--gcc/testsuite/gcc.target/i386/avx512vl-vpcmpd-2.c14
-rw-r--r--gcc/testsuite/gcc.target/i386/avx512vl-vpcmpeqb-2.c14
-rw-r--r--gcc/testsuite/gcc.target/i386/avx512vl-vpcmpeqd-1.c21
-rw-r--r--gcc/testsuite/gcc.target/i386/avx512vl-vpcmpeqd-2.c14
-rw-r--r--gcc/testsuite/gcc.target/i386/avx512vl-vpcmpeqq-1.c21
-rw-r--r--gcc/testsuite/gcc.target/i386/avx512vl-vpcmpeqq-2.c14
-rw-r--r--gcc/testsuite/gcc.target/i386/avx512vl-vpcmpeqw-2.c14
-rw-r--r--gcc/testsuite/gcc.target/i386/avx512vl-vpcmpged-2.c14
-rw-r--r--gcc/testsuite/gcc.target/i386/avx512vl-vpcmpgeq-2.c14
-rw-r--r--gcc/testsuite/gcc.target/i386/avx512vl-vpcmpgeud-2.c14
-rw-r--r--gcc/testsuite/gcc.target/i386/avx512vl-vpcmpgeuq-2.c14
-rw-r--r--gcc/testsuite/gcc.target/i386/avx512vl-vpcmpgtb-2.c14
-rw-r--r--gcc/testsuite/gcc.target/i386/avx512vl-vpcmpgtd-1.c21
-rw-r--r--gcc/testsuite/gcc.target/i386/avx512vl-vpcmpgtd-2.c14
-rw-r--r--gcc/testsuite/gcc.target/i386/avx512vl-vpcmpgtq-1.c21
-rw-r--r--gcc/testsuite/gcc.target/i386/avx512vl-vpcmpgtq-2.c14
-rw-r--r--gcc/testsuite/gcc.target/i386/avx512vl-vpcmpgtw-2.c14
-rw-r--r--gcc/testsuite/gcc.target/i386/avx512vl-vpcmpled-2.c14
-rw-r--r--gcc/testsuite/gcc.target/i386/avx512vl-vpcmpleq-2.c14
-rw-r--r--gcc/testsuite/gcc.target/i386/avx512vl-vpcmpleud-2.c14
-rw-r--r--gcc/testsuite/gcc.target/i386/avx512vl-vpcmpleuq-2.c14
-rw-r--r--gcc/testsuite/gcc.target/i386/avx512vl-vpcmpltd-2.c14
-rw-r--r--gcc/testsuite/gcc.target/i386/avx512vl-vpcmpltq-2.c14
-rw-r--r--gcc/testsuite/gcc.target/i386/avx512vl-vpcmpltud-2.c14
-rw-r--r--gcc/testsuite/gcc.target/i386/avx512vl-vpcmpltuq-2.c14
-rw-r--r--gcc/testsuite/gcc.target/i386/avx512vl-vpcmpneqd-2.c14
-rw-r--r--gcc/testsuite/gcc.target/i386/avx512vl-vpcmpneqq-2.c14
-rw-r--r--gcc/testsuite/gcc.target/i386/avx512vl-vpcmpnequd-2.c14
-rw-r--r--gcc/testsuite/gcc.target/i386/avx512vl-vpcmpnequq-2.c14
-rw-r--r--gcc/testsuite/gcc.target/i386/avx512vl-vpcmpq-1.c21
-rw-r--r--gcc/testsuite/gcc.target/i386/avx512vl-vpcmpq-2.c14
-rw-r--r--gcc/testsuite/gcc.target/i386/avx512vl-vpcmpub-2.c14
-rw-r--r--gcc/testsuite/gcc.target/i386/avx512vl-vpcmpud-1.c21
-rw-r--r--gcc/testsuite/gcc.target/i386/avx512vl-vpcmpud-2.c14
-rw-r--r--gcc/testsuite/gcc.target/i386/avx512vl-vpcmpuq-1.c21
-rw-r--r--gcc/testsuite/gcc.target/i386/avx512vl-vpcmpuq-2.c14
-rw-r--r--gcc/testsuite/gcc.target/i386/avx512vl-vpcmpuw-2.c14
-rw-r--r--gcc/testsuite/gcc.target/i386/avx512vl-vpcmpw-2.c14
-rw-r--r--gcc/testsuite/gcc.target/i386/avx512vl-vpcompressd-1.c28
-rw-r--r--gcc/testsuite/gcc.target/i386/avx512vl-vpcompressd-2.c14
-rw-r--r--gcc/testsuite/gcc.target/i386/avx512vl-vpcompressq-1.c28
-rw-r--r--gcc/testsuite/gcc.target/i386/avx512vl-vpcompressq-2.c14
-rw-r--r--gcc/testsuite/gcc.target/i386/avx512vl-vpconflictd-1.c26
-rw-r--r--gcc/testsuite/gcc.target/i386/avx512vl-vpconflictq-1.c26
-rw-r--r--gcc/testsuite/gcc.target/i386/avx512vl-vpermd-1.c16
-rw-r--r--gcc/testsuite/gcc.target/i386/avx512vl-vpermd-2.c10
-rw-r--r--gcc/testsuite/gcc.target/i386/avx512vl-vpermi2d-1.c19
-rw-r--r--gcc/testsuite/gcc.target/i386/avx512vl-vpermi2d-2.c14
-rw-r--r--gcc/testsuite/gcc.target/i386/avx512vl-vpermi2pd-1.c19
-rw-r--r--gcc/testsuite/gcc.target/i386/avx512vl-vpermi2pd-2.c14
-rw-r--r--gcc/testsuite/gcc.target/i386/avx512vl-vpermi2ps-1.c19
-rw-r--r--gcc/testsuite/gcc.target/i386/avx512vl-vpermi2ps-2.c14
-rw-r--r--gcc/testsuite/gcc.target/i386/avx512vl-vpermi2q-1.c19
-rw-r--r--gcc/testsuite/gcc.target/i386/avx512vl-vpermi2q-2.c14
-rw-r--r--gcc/testsuite/gcc.target/i386/avx512vl-vpermi2w-2.c14
-rw-r--r--gcc/testsuite/gcc.target/i386/avx512vl-vpermilpd-1.c25
-rw-r--r--gcc/testsuite/gcc.target/i386/avx512vl-vpermilpd-2.c14
-rw-r--r--gcc/testsuite/gcc.target/i386/avx512vl-vpermilpdi-1.c23
-rw-r--r--gcc/testsuite/gcc.target/i386/avx512vl-vpermilpdi-2.c14
-rw-r--r--gcc/testsuite/gcc.target/i386/avx512vl-vpermilps-1.c25
-rw-r--r--gcc/testsuite/gcc.target/i386/avx512vl-vpermilps-2.c14
-rw-r--r--gcc/testsuite/gcc.target/i386/avx512vl-vpermilpsi-1.c23
-rw-r--r--gcc/testsuite/gcc.target/i386/avx512vl-vpermilpsi-2.c14
-rw-r--r--gcc/testsuite/gcc.target/i386/avx512vl-vpermpd-1.c19
-rw-r--r--gcc/testsuite/gcc.target/i386/avx512vl-vpermpd-2.c10
-rw-r--r--gcc/testsuite/gcc.target/i386/avx512vl-vpermpdi-1.c18
-rw-r--r--gcc/testsuite/gcc.target/i386/avx512vl-vpermpdi-2.c10
-rw-r--r--gcc/testsuite/gcc.target/i386/avx512vl-vpermps-1.c19
-rw-r--r--gcc/testsuite/gcc.target/i386/avx512vl-vpermps-2.c10
-rw-r--r--gcc/testsuite/gcc.target/i386/avx512vl-vpermq-imm-1.c16
-rw-r--r--gcc/testsuite/gcc.target/i386/avx512vl-vpermq-imm-2.c10
-rw-r--r--gcc/testsuite/gcc.target/i386/avx512vl-vpermq-var-1.c16
-rw-r--r--gcc/testsuite/gcc.target/i386/avx512vl-vpermq-var-2.c10
-rw-r--r--gcc/testsuite/gcc.target/i386/avx512vl-vpermt2d-1.c27
-rw-r--r--gcc/testsuite/gcc.target/i386/avx512vl-vpermt2d-2.c14
-rw-r--r--gcc/testsuite/gcc.target/i386/avx512vl-vpermt2pd-1.c27
-rw-r--r--gcc/testsuite/gcc.target/i386/avx512vl-vpermt2pd-2.c14
-rw-r--r--gcc/testsuite/gcc.target/i386/avx512vl-vpermt2ps-1.c27
-rw-r--r--gcc/testsuite/gcc.target/i386/avx512vl-vpermt2ps-2.c14
-rw-r--r--gcc/testsuite/gcc.target/i386/avx512vl-vpermt2q-1.c27
-rw-r--r--gcc/testsuite/gcc.target/i386/avx512vl-vpermt2q-2.c14
-rw-r--r--gcc/testsuite/gcc.target/i386/avx512vl-vpermt2w-2.c14
-rw-r--r--gcc/testsuite/gcc.target/i386/avx512vl-vpermw-2.c14
-rw-r--r--gcc/testsuite/gcc.target/i386/avx512vl-vpexpandd-1.c29
-rw-r--r--gcc/testsuite/gcc.target/i386/avx512vl-vpexpandd-2.c14
-rw-r--r--gcc/testsuite/gcc.target/i386/avx512vl-vpexpandq-1.c29
-rw-r--r--gcc/testsuite/gcc.target/i386/avx512vl-vpexpandq-2.c14
-rw-r--r--gcc/testsuite/gcc.target/i386/avx512vl-vplzcntd-1.c26
-rw-r--r--gcc/testsuite/gcc.target/i386/avx512vl-vplzcntq-1.c26
-rw-r--r--gcc/testsuite/gcc.target/i386/avx512vl-vpmaddubsw-2.c14
-rw-r--r--gcc/testsuite/gcc.target/i386/avx512vl-vpmaddwd-2.c14
-rw-r--r--gcc/testsuite/gcc.target/i386/avx512vl-vpmaxsb-2.c14
-rw-r--r--gcc/testsuite/gcc.target/i386/avx512vl-vpmaxsd-1.c21
-rw-r--r--gcc/testsuite/gcc.target/i386/avx512vl-vpmaxsd-2.c14
-rw-r--r--gcc/testsuite/gcc.target/i386/avx512vl-vpmaxsq-1.c25
-rw-r--r--gcc/testsuite/gcc.target/i386/avx512vl-vpmaxsq-2.c14
-rw-r--r--gcc/testsuite/gcc.target/i386/avx512vl-vpmaxsw-2.c14
-rw-r--r--gcc/testsuite/gcc.target/i386/avx512vl-vpmaxub-2.c14
-rw-r--r--gcc/testsuite/gcc.target/i386/avx512vl-vpmaxud-1.c21
-rw-r--r--gcc/testsuite/gcc.target/i386/avx512vl-vpmaxud-2.c14
-rw-r--r--gcc/testsuite/gcc.target/i386/avx512vl-vpmaxuq-1.c25
-rw-r--r--gcc/testsuite/gcc.target/i386/avx512vl-vpmaxuq-2.c14
-rw-r--r--gcc/testsuite/gcc.target/i386/avx512vl-vpmaxuw-2.c14
-rw-r--r--gcc/testsuite/gcc.target/i386/avx512vl-vpminsb-2.c14
-rw-r--r--gcc/testsuite/gcc.target/i386/avx512vl-vpminsd-1.c21
-rw-r--r--gcc/testsuite/gcc.target/i386/avx512vl-vpminsd-2.c14
-rw-r--r--gcc/testsuite/gcc.target/i386/avx512vl-vpminsq-1.c25
-rw-r--r--gcc/testsuite/gcc.target/i386/avx512vl-vpminsq-2.c14
-rw-r--r--gcc/testsuite/gcc.target/i386/avx512vl-vpminsw-2.c14
-rw-r--r--gcc/testsuite/gcc.target/i386/avx512vl-vpminub-2.c14
-rw-r--r--gcc/testsuite/gcc.target/i386/avx512vl-vpminud-1.c21
-rw-r--r--gcc/testsuite/gcc.target/i386/avx512vl-vpminud-2.c14
-rw-r--r--gcc/testsuite/gcc.target/i386/avx512vl-vpminuq-1.c25
-rw-r--r--gcc/testsuite/gcc.target/i386/avx512vl-vpminuq-2.c14
-rw-r--r--gcc/testsuite/gcc.target/i386/avx512vl-vpminuw-2.c14
-rw-r--r--gcc/testsuite/gcc.target/i386/avx512vl-vpmovb2m-2.c14
-rw-r--r--gcc/testsuite/gcc.target/i386/avx512vl-vpmovd2m-2.c14
-rw-r--r--gcc/testsuite/gcc.target/i386/avx512vl-vpmovdb-1.c25
-rw-r--r--gcc/testsuite/gcc.target/i386/avx512vl-vpmovdb-2.c14
-rw-r--r--gcc/testsuite/gcc.target/i386/avx512vl-vpmovdw-1.c25
-rw-r--r--gcc/testsuite/gcc.target/i386/avx512vl-vpmovdw-2.c14
-rw-r--r--gcc/testsuite/gcc.target/i386/avx512vl-vpmovm2b-2.c14
-rw-r--r--gcc/testsuite/gcc.target/i386/avx512vl-vpmovm2d-2.c14
-rw-r--r--gcc/testsuite/gcc.target/i386/avx512vl-vpmovm2q-2.c14
-rw-r--r--gcc/testsuite/gcc.target/i386/avx512vl-vpmovm2w-2.c14
-rw-r--r--gcc/testsuite/gcc.target/i386/avx512vl-vpmovq2m-2.c14
-rw-r--r--gcc/testsuite/gcc.target/i386/avx512vl-vpmovqb-1.c25
-rw-r--r--gcc/testsuite/gcc.target/i386/avx512vl-vpmovqb-2.c14
-rw-r--r--gcc/testsuite/gcc.target/i386/avx512vl-vpmovqd-1.c25
-rw-r--r--gcc/testsuite/gcc.target/i386/avx512vl-vpmovqd-2.c14
-rw-r--r--gcc/testsuite/gcc.target/i386/avx512vl-vpmovqw-1.c25
-rw-r--r--gcc/testsuite/gcc.target/i386/avx512vl-vpmovqw-2.c14
-rw-r--r--gcc/testsuite/gcc.target/i386/avx512vl-vpmovsdb-1.c25
-rw-r--r--gcc/testsuite/gcc.target/i386/avx512vl-vpmovsdb-2.c14
-rw-r--r--gcc/testsuite/gcc.target/i386/avx512vl-vpmovsdw-1.c25
-rw-r--r--gcc/testsuite/gcc.target/i386/avx512vl-vpmovsdw-2.c14
-rw-r--r--gcc/testsuite/gcc.target/i386/avx512vl-vpmovsqb-1.c25
-rw-r--r--gcc/testsuite/gcc.target/i386/avx512vl-vpmovsqb-2.c14
-rw-r--r--gcc/testsuite/gcc.target/i386/avx512vl-vpmovsqd-1.c25
-rw-r--r--gcc/testsuite/gcc.target/i386/avx512vl-vpmovsqd-2.c14
-rw-r--r--gcc/testsuite/gcc.target/i386/avx512vl-vpmovsqw-1.c25
-rw-r--r--gcc/testsuite/gcc.target/i386/avx512vl-vpmovsqw-2.c14
-rw-r--r--gcc/testsuite/gcc.target/i386/avx512vl-vpmovswb-2.c14
-rw-r--r--gcc/testsuite/gcc.target/i386/avx512vl-vpmovsxbd-1.c22
-rw-r--r--gcc/testsuite/gcc.target/i386/avx512vl-vpmovsxbd-2.c14
-rw-r--r--gcc/testsuite/gcc.target/i386/avx512vl-vpmovsxbq-1.c22
-rw-r--r--gcc/testsuite/gcc.target/i386/avx512vl-vpmovsxbq-2.c14
-rw-r--r--gcc/testsuite/gcc.target/i386/avx512vl-vpmovsxbw-2.c14
-rw-r--r--gcc/testsuite/gcc.target/i386/avx512vl-vpmovsxdq-1.c22
-rw-r--r--gcc/testsuite/gcc.target/i386/avx512vl-vpmovsxdq-2.c14
-rw-r--r--gcc/testsuite/gcc.target/i386/avx512vl-vpmovsxwd-1.c22
-rw-r--r--gcc/testsuite/gcc.target/i386/avx512vl-vpmovsxwd-2.c14
-rw-r--r--gcc/testsuite/gcc.target/i386/avx512vl-vpmovsxwq-1.c22
-rw-r--r--gcc/testsuite/gcc.target/i386/avx512vl-vpmovsxwq-2.c14
-rw-r--r--gcc/testsuite/gcc.target/i386/avx512vl-vpmovusdb-1.c25
-rw-r--r--gcc/testsuite/gcc.target/i386/avx512vl-vpmovusdb-2.c14
-rw-r--r--gcc/testsuite/gcc.target/i386/avx512vl-vpmovusdw-1.c25
-rw-r--r--gcc/testsuite/gcc.target/i386/avx512vl-vpmovusdw-2.c14
-rw-r--r--gcc/testsuite/gcc.target/i386/avx512vl-vpmovusqb-1.c25
-rw-r--r--gcc/testsuite/gcc.target/i386/avx512vl-vpmovusqb-2.c14
-rw-r--r--gcc/testsuite/gcc.target/i386/avx512vl-vpmovusqd-1.c25
-rw-r--r--gcc/testsuite/gcc.target/i386/avx512vl-vpmovusqd-2.c14
-rw-r--r--gcc/testsuite/gcc.target/i386/avx512vl-vpmovusqw-1.c25
-rw-r--r--gcc/testsuite/gcc.target/i386/avx512vl-vpmovusqw-2.c14
-rw-r--r--gcc/testsuite/gcc.target/i386/avx512vl-vpmovuswb-2.c14
-rw-r--r--gcc/testsuite/gcc.target/i386/avx512vl-vpmovw2m-2.c14
-rw-r--r--gcc/testsuite/gcc.target/i386/avx512vl-vpmovwb-2.c14
-rw-r--r--gcc/testsuite/gcc.target/i386/avx512vl-vpmovzxbd-1.c22
-rw-r--r--gcc/testsuite/gcc.target/i386/avx512vl-vpmovzxbd-2.c14
-rw-r--r--gcc/testsuite/gcc.target/i386/avx512vl-vpmovzxbq-1.c22
-rw-r--r--gcc/testsuite/gcc.target/i386/avx512vl-vpmovzxbq-2.c14
-rw-r--r--gcc/testsuite/gcc.target/i386/avx512vl-vpmovzxbw-2.c14
-rw-r--r--gcc/testsuite/gcc.target/i386/avx512vl-vpmovzxdq-1.c22
-rw-r--r--gcc/testsuite/gcc.target/i386/avx512vl-vpmovzxdq-2.c14
-rw-r--r--gcc/testsuite/gcc.target/i386/avx512vl-vpmovzxwd-1.c22
-rw-r--r--gcc/testsuite/gcc.target/i386/avx512vl-vpmovzxwd-2.c14
-rw-r--r--gcc/testsuite/gcc.target/i386/avx512vl-vpmovzxwq-1.c22
-rw-r--r--gcc/testsuite/gcc.target/i386/avx512vl-vpmovzxwq-2.c14
-rw-r--r--gcc/testsuite/gcc.target/i386/avx512vl-vpmuldq-1.c21
-rw-r--r--gcc/testsuite/gcc.target/i386/avx512vl-vpmuldq-2.c14
-rw-r--r--gcc/testsuite/gcc.target/i386/avx512vl-vpmulhrsw-2.c14
-rw-r--r--gcc/testsuite/gcc.target/i386/avx512vl-vpmulhuw-2.c14
-rw-r--r--gcc/testsuite/gcc.target/i386/avx512vl-vpmulhw-2.c14
-rw-r--r--gcc/testsuite/gcc.target/i386/avx512vl-vpmulld-1.c21
-rw-r--r--gcc/testsuite/gcc.target/i386/avx512vl-vpmulld-2.c14
-rw-r--r--gcc/testsuite/gcc.target/i386/avx512vl-vpmullq-2.c14
-rw-r--r--gcc/testsuite/gcc.target/i386/avx512vl-vpmullw-2.c14
-rw-r--r--gcc/testsuite/gcc.target/i386/avx512vl-vpmuludq-1.c20
-rw-r--r--gcc/testsuite/gcc.target/i386/avx512vl-vpmuludq-2.c14
-rw-r--r--gcc/testsuite/gcc.target/i386/avx512vl-vpord-1.c24
-rw-r--r--gcc/testsuite/gcc.target/i386/avx512vl-vpord-2.c14
-rw-r--r--gcc/testsuite/gcc.target/i386/avx512vl-vporq-1.c24
-rw-r--r--gcc/testsuite/gcc.target/i386/avx512vl-vporq-2.c14
-rw-r--r--gcc/testsuite/gcc.target/i386/avx512vl-vprold-1.c26
-rw-r--r--gcc/testsuite/gcc.target/i386/avx512vl-vprold-2.c14
-rw-r--r--gcc/testsuite/gcc.target/i386/avx512vl-vprolq-1.c26
-rw-r--r--gcc/testsuite/gcc.target/i386/avx512vl-vprolq-2.c14
-rw-r--r--gcc/testsuite/gcc.target/i386/avx512vl-vprolvd-1.c26
-rw-r--r--gcc/testsuite/gcc.target/i386/avx512vl-vprolvd-2.c14
-rw-r--r--gcc/testsuite/gcc.target/i386/avx512vl-vprolvq-1.c26
-rw-r--r--gcc/testsuite/gcc.target/i386/avx512vl-vprolvq-2.c14
-rw-r--r--gcc/testsuite/gcc.target/i386/avx512vl-vprord-1.c26
-rw-r--r--gcc/testsuite/gcc.target/i386/avx512vl-vprord-2.c14
-rw-r--r--gcc/testsuite/gcc.target/i386/avx512vl-vprorq-1.c26
-rw-r--r--gcc/testsuite/gcc.target/i386/avx512vl-vprorq-2.c14
-rw-r--r--gcc/testsuite/gcc.target/i386/avx512vl-vprorvd-1.c26
-rw-r--r--gcc/testsuite/gcc.target/i386/avx512vl-vprorvd-2.c14
-rw-r--r--gcc/testsuite/gcc.target/i386/avx512vl-vprorvq-1.c26
-rw-r--r--gcc/testsuite/gcc.target/i386/avx512vl-vprorvq-2.c14
-rw-r--r--gcc/testsuite/gcc.target/i386/avx512vl-vpsadbw-1.c13
-rw-r--r--gcc/testsuite/gcc.target/i386/avx512vl-vpshufb-2.c14
-rw-r--r--gcc/testsuite/gcc.target/i386/avx512vl-vpshufd-1.c21
-rw-r--r--gcc/testsuite/gcc.target/i386/avx512vl-vpshufd-2.c14
-rw-r--r--gcc/testsuite/gcc.target/i386/avx512vl-vpshufhw-2.c14
-rw-r--r--gcc/testsuite/gcc.target/i386/avx512vl-vpshuflw-2.c14
-rw-r--r--gcc/testsuite/gcc.target/i386/avx512vl-vpslld-1.c23
-rw-r--r--gcc/testsuite/gcc.target/i386/avx512vl-vpslld-2.c14
-rw-r--r--gcc/testsuite/gcc.target/i386/avx512vl-vpslldi-1.c24
-rw-r--r--gcc/testsuite/gcc.target/i386/avx512vl-vpslldi-2.c14
-rw-r--r--gcc/testsuite/gcc.target/i386/avx512vl-vpsllq-1.c23
-rw-r--r--gcc/testsuite/gcc.target/i386/avx512vl-vpsllq-2.c14
-rw-r--r--gcc/testsuite/gcc.target/i386/avx512vl-vpsllqi-1.c24
-rw-r--r--gcc/testsuite/gcc.target/i386/avx512vl-vpsllqi-2.c14
-rw-r--r--gcc/testsuite/gcc.target/i386/avx512vl-vpsllvd-1.c23
-rw-r--r--gcc/testsuite/gcc.target/i386/avx512vl-vpsllvd-2.c14
-rw-r--r--gcc/testsuite/gcc.target/i386/avx512vl-vpsllvq-1.c23
-rw-r--r--gcc/testsuite/gcc.target/i386/avx512vl-vpsllvq-2.c14
-rw-r--r--gcc/testsuite/gcc.target/i386/avx512vl-vpsllvw-2.c14
-rw-r--r--gcc/testsuite/gcc.target/i386/avx512vl-vpsllw-2.c14
-rw-r--r--gcc/testsuite/gcc.target/i386/avx512vl-vpsllwi-2.c14
-rw-r--r--gcc/testsuite/gcc.target/i386/avx512vl-vpsrad-1.c24
-rw-r--r--gcc/testsuite/gcc.target/i386/avx512vl-vpsrad-2.c14
-rw-r--r--gcc/testsuite/gcc.target/i386/avx512vl-vpsradi-1.c24
-rw-r--r--gcc/testsuite/gcc.target/i386/avx512vl-vpsradi-2.c14
-rw-r--r--gcc/testsuite/gcc.target/i386/avx512vl-vpsraq-1.c26
-rw-r--r--gcc/testsuite/gcc.target/i386/avx512vl-vpsraq-2.c14
-rw-r--r--gcc/testsuite/gcc.target/i386/avx512vl-vpsraqi-1.c26
-rw-r--r--gcc/testsuite/gcc.target/i386/avx512vl-vpsraqi-2.c14
-rw-r--r--gcc/testsuite/gcc.target/i386/avx512vl-vpsravd-1.c23
-rw-r--r--gcc/testsuite/gcc.target/i386/avx512vl-vpsravd-2.c14
-rw-r--r--gcc/testsuite/gcc.target/i386/avx512vl-vpsravq-1.c25
-rw-r--r--gcc/testsuite/gcc.target/i386/avx512vl-vpsravq-2.c14
-rw-r--r--gcc/testsuite/gcc.target/i386/avx512vl-vpsravw-2.c14
-rw-r--r--gcc/testsuite/gcc.target/i386/avx512vl-vpsraw-2.c14
-rw-r--r--gcc/testsuite/gcc.target/i386/avx512vl-vpsrawi-2.c14
-rw-r--r--gcc/testsuite/gcc.target/i386/avx512vl-vpsrld-1.c24
-rw-r--r--gcc/testsuite/gcc.target/i386/avx512vl-vpsrld-2.c14
-rw-r--r--gcc/testsuite/gcc.target/i386/avx512vl-vpsrldi-1.c24
-rw-r--r--gcc/testsuite/gcc.target/i386/avx512vl-vpsrldi-2.c14
-rw-r--r--gcc/testsuite/gcc.target/i386/avx512vl-vpsrlq-1.c26
-rw-r--r--gcc/testsuite/gcc.target/i386/avx512vl-vpsrlq-2.c14
-rw-r--r--gcc/testsuite/gcc.target/i386/avx512vl-vpsrlqi-1.c26
-rw-r--r--gcc/testsuite/gcc.target/i386/avx512vl-vpsrlqi-2.c14
-rw-r--r--gcc/testsuite/gcc.target/i386/avx512vl-vpsrlvd-1.c23
-rw-r--r--gcc/testsuite/gcc.target/i386/avx512vl-vpsrlvd-2.c14
-rw-r--r--gcc/testsuite/gcc.target/i386/avx512vl-vpsrlvq-1.c23
-rw-r--r--gcc/testsuite/gcc.target/i386/avx512vl-vpsrlvq-2.c14
-rw-r--r--gcc/testsuite/gcc.target/i386/avx512vl-vpsrlvw-2.c14
-rw-r--r--gcc/testsuite/gcc.target/i386/avx512vl-vpsrlw-2.c14
-rw-r--r--gcc/testsuite/gcc.target/i386/avx512vl-vpsrlwi-2.c14
-rw-r--r--gcc/testsuite/gcc.target/i386/avx512vl-vpsubb-2.c14
-rw-r--r--gcc/testsuite/gcc.target/i386/avx512vl-vpsubd-1.c23
-rw-r--r--gcc/testsuite/gcc.target/i386/avx512vl-vpsubd-2.c14
-rw-r--r--gcc/testsuite/gcc.target/i386/avx512vl-vpsubq-1.c23
-rw-r--r--gcc/testsuite/gcc.target/i386/avx512vl-vpsubq-2.c14
-rw-r--r--gcc/testsuite/gcc.target/i386/avx512vl-vpsubsb-2.c14
-rw-r--r--gcc/testsuite/gcc.target/i386/avx512vl-vpsubsw-2.c14
-rw-r--r--gcc/testsuite/gcc.target/i386/avx512vl-vpsubusb-2.c14
-rw-r--r--gcc/testsuite/gcc.target/i386/avx512vl-vpsubusw-2.c14
-rw-r--r--gcc/testsuite/gcc.target/i386/avx512vl-vpsubw-2.c14
-rw-r--r--gcc/testsuite/gcc.target/i386/avx512vl-vpternlogd-1.c26
-rw-r--r--gcc/testsuite/gcc.target/i386/avx512vl-vpternlogd-2.c14
-rw-r--r--gcc/testsuite/gcc.target/i386/avx512vl-vpternlogq-1.c26
-rw-r--r--gcc/testsuite/gcc.target/i386/avx512vl-vpternlogq-2.c14
-rw-r--r--gcc/testsuite/gcc.target/i386/avx512vl-vptestmb-2.c14
-rw-r--r--gcc/testsuite/gcc.target/i386/avx512vl-vptestmd-1.c21
-rw-r--r--gcc/testsuite/gcc.target/i386/avx512vl-vptestmd-2.c14
-rw-r--r--gcc/testsuite/gcc.target/i386/avx512vl-vptestmq-1.c21
-rw-r--r--gcc/testsuite/gcc.target/i386/avx512vl-vptestmq-2.c14
-rw-r--r--gcc/testsuite/gcc.target/i386/avx512vl-vptestmw-2.c14
-rw-r--r--gcc/testsuite/gcc.target/i386/avx512vl-vptestnmb-2.c14
-rw-r--r--gcc/testsuite/gcc.target/i386/avx512vl-vptestnmd-1.c21
-rw-r--r--gcc/testsuite/gcc.target/i386/avx512vl-vptestnmd-2.c14
-rw-r--r--gcc/testsuite/gcc.target/i386/avx512vl-vptestnmq-1.c21
-rw-r--r--gcc/testsuite/gcc.target/i386/avx512vl-vptestnmq-2.c14
-rw-r--r--gcc/testsuite/gcc.target/i386/avx512vl-vptestnmw-2.c14
-rw-r--r--gcc/testsuite/gcc.target/i386/avx512vl-vpunpckhbw-2.c14
-rw-r--r--gcc/testsuite/gcc.target/i386/avx512vl-vpunpckhdq-1.c21
-rw-r--r--gcc/testsuite/gcc.target/i386/avx512vl-vpunpckhdq-2.c14
-rw-r--r--gcc/testsuite/gcc.target/i386/avx512vl-vpunpckhqdq-1.c21
-rw-r--r--gcc/testsuite/gcc.target/i386/avx512vl-vpunpckhqdq-2.c14
-rw-r--r--gcc/testsuite/gcc.target/i386/avx512vl-vpunpckhwd-2.c14
-rw-r--r--gcc/testsuite/gcc.target/i386/avx512vl-vpunpcklbw-2.c14
-rw-r--r--gcc/testsuite/gcc.target/i386/avx512vl-vpunpckldq-1.c21
-rw-r--r--gcc/testsuite/gcc.target/i386/avx512vl-vpunpckldq-2.c14
-rw-r--r--gcc/testsuite/gcc.target/i386/avx512vl-vpunpcklqdq-1.c21
-rw-r--r--gcc/testsuite/gcc.target/i386/avx512vl-vpunpcklqdq-2.c14
-rw-r--r--gcc/testsuite/gcc.target/i386/avx512vl-vpunpcklwd-2.c14
-rw-r--r--gcc/testsuite/gcc.target/i386/avx512vl-vpxord-1.c24
-rw-r--r--gcc/testsuite/gcc.target/i386/avx512vl-vpxord-2.c14
-rw-r--r--gcc/testsuite/gcc.target/i386/avx512vl-vpxorq-1.c24
-rw-r--r--gcc/testsuite/gcc.target/i386/avx512vl-vpxorq-2.c14
-rw-r--r--gcc/testsuite/gcc.target/i386/avx512vl-vrangepd-2.c14
-rw-r--r--gcc/testsuite/gcc.target/i386/avx512vl-vrangeps-2.c14
-rw-r--r--gcc/testsuite/gcc.target/i386/avx512vl-vrcp14pd-1.c27
-rw-r--r--gcc/testsuite/gcc.target/i386/avx512vl-vrcp14pd-2.c14
-rw-r--r--gcc/testsuite/gcc.target/i386/avx512vl-vrcp14ps-1.c27
-rw-r--r--gcc/testsuite/gcc.target/i386/avx512vl-vrcp14ps-2.c14
-rw-r--r--gcc/testsuite/gcc.target/i386/avx512vl-vreducepd-2.c14
-rw-r--r--gcc/testsuite/gcc.target/i386/avx512vl-vreduceps-2.c14
-rw-r--r--gcc/testsuite/gcc.target/i386/avx512vl-vrndscalepd-1.c36
-rw-r--r--gcc/testsuite/gcc.target/i386/avx512vl-vrndscalepd-2.c14
-rw-r--r--gcc/testsuite/gcc.target/i386/avx512vl-vrndscaleps-1.c36
-rw-r--r--gcc/testsuite/gcc.target/i386/avx512vl-vrndscaleps-2.c14
-rw-r--r--gcc/testsuite/gcc.target/i386/avx512vl-vrsqrt14pd-1.c26
-rw-r--r--gcc/testsuite/gcc.target/i386/avx512vl-vrsqrt14pd-2.c14
-rw-r--r--gcc/testsuite/gcc.target/i386/avx512vl-vrsqrt14ps-1.c26
-rw-r--r--gcc/testsuite/gcc.target/i386/avx512vl-vrsqrt14ps-2.c14
-rw-r--r--gcc/testsuite/gcc.target/i386/avx512vl-vscalefpd-1.c25
-rw-r--r--gcc/testsuite/gcc.target/i386/avx512vl-vscalefpd-2.c14
-rw-r--r--gcc/testsuite/gcc.target/i386/avx512vl-vscalefps-1.c25
-rw-r--r--gcc/testsuite/gcc.target/i386/avx512vl-vscalefps-2.c14
-rw-r--r--gcc/testsuite/gcc.target/i386/avx512vl-vshuff32x4-1.c18
-rw-r--r--gcc/testsuite/gcc.target/i386/avx512vl-vshuff32x4-2.c10
-rw-r--r--gcc/testsuite/gcc.target/i386/avx512vl-vshuff64x2-1.c18
-rw-r--r--gcc/testsuite/gcc.target/i386/avx512vl-vshuff64x2-2.c10
-rw-r--r--gcc/testsuite/gcc.target/i386/avx512vl-vshufi32x4-1.c18
-rw-r--r--gcc/testsuite/gcc.target/i386/avx512vl-vshufi32x4-2.c10
-rw-r--r--gcc/testsuite/gcc.target/i386/avx512vl-vshufi64x2-1.c18
-rw-r--r--gcc/testsuite/gcc.target/i386/avx512vl-vshufi64x2-2.c10
-rw-r--r--gcc/testsuite/gcc.target/i386/avx512vl-vshufpd-1.c21
-rw-r--r--gcc/testsuite/gcc.target/i386/avx512vl-vshufpd-2.c14
-rw-r--r--gcc/testsuite/gcc.target/i386/avx512vl-vshufps-1.c21
-rw-r--r--gcc/testsuite/gcc.target/i386/avx512vl-vshufps-2.c14
-rw-r--r--gcc/testsuite/gcc.target/i386/avx512vl-vsqrtpd-1.c22
-rw-r--r--gcc/testsuite/gcc.target/i386/avx512vl-vsqrtpd-2.c14
-rw-r--r--gcc/testsuite/gcc.target/i386/avx512vl-vsqrtps-1.c22
-rw-r--r--gcc/testsuite/gcc.target/i386/avx512vl-vsqrtps-2.c14
-rw-r--r--gcc/testsuite/gcc.target/i386/avx512vl-vsubpd-1.c22
-rw-r--r--gcc/testsuite/gcc.target/i386/avx512vl-vsubpd-2.c14
-rw-r--r--gcc/testsuite/gcc.target/i386/avx512vl-vsubps-1.c22
-rw-r--r--gcc/testsuite/gcc.target/i386/avx512vl-vsubps-2.c14
-rw-r--r--gcc/testsuite/gcc.target/i386/avx512vl-vunpckhpd-1.c22
-rw-r--r--gcc/testsuite/gcc.target/i386/avx512vl-vunpckhpd-2.c14
-rw-r--r--gcc/testsuite/gcc.target/i386/avx512vl-vunpckhps-1.c22
-rw-r--r--gcc/testsuite/gcc.target/i386/avx512vl-vunpckhps-2.c14
-rw-r--r--gcc/testsuite/gcc.target/i386/avx512vl-vunpcklpd-1.c22
-rw-r--r--gcc/testsuite/gcc.target/i386/avx512vl-vunpcklpd-2.c14
-rw-r--r--gcc/testsuite/gcc.target/i386/avx512vl-vunpcklps-1.c24
-rw-r--r--gcc/testsuite/gcc.target/i386/avx512vl-vunpcklps-2.c14
-rw-r--r--gcc/testsuite/gcc.target/i386/avx512vl-vxorpd-2.c14
-rw-r--r--gcc/testsuite/gcc.target/i386/avx512vl-vxorps-2.c14
-rw-r--r--gcc/testsuite/gcc.target/i386/i386.exp47
-rw-r--r--gcc/testsuite/gcc.target/i386/m512-check.h4
-rw-r--r--gcc/testsuite/gcc.target/i386/sse-12.c2
-rw-r--r--gcc/testsuite/gcc.target/i386/sse-13.c206
-rw-r--r--gcc/testsuite/gcc.target/i386/sse-14.c2
-rw-r--r--gcc/testsuite/gcc.target/i386/sse-22.c4
-rw-r--r--gcc/testsuite/gcc.target/i386/sse-23.c206
-rw-r--r--gcc/tree-core.h2
930 files changed, 54883 insertions, 1746 deletions
diff --git a/gcc/common/config/i386/i386-common.c b/gcc/common/config/i386/i386-common.c
index 3012783d24a..da47e644b43 100644
--- a/gcc/common/config/i386/i386-common.c
+++ b/gcc/common/config/i386/i386-common.c
@@ -65,6 +65,12 @@ along with GCC; see the file COPYING3. If not see
(OPTION_MASK_ISA_AVX512PF | OPTION_MASK_ISA_AVX512F_SET)
#define OPTION_MASK_ISA_AVX512ER_SET \
(OPTION_MASK_ISA_AVX512ER | OPTION_MASK_ISA_AVX512F_SET)
+#define OPTION_MASK_ISA_AVX512DQ_SET \
+ (OPTION_MASK_ISA_AVX512DQ | OPTION_MASK_ISA_AVX512F_SET)
+#define OPTION_MASK_ISA_AVX512BW_SET \
+ (OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512F_SET)
+#define OPTION_MASK_ISA_AVX512VL_SET \
+ (OPTION_MASK_ISA_AVX512VL | OPTION_MASK_ISA_AVX512F_SET)
#define OPTION_MASK_ISA_RTM_SET OPTION_MASK_ISA_RTM
#define OPTION_MASK_ISA_PRFCHW_SET OPTION_MASK_ISA_PRFCHW
#define OPTION_MASK_ISA_RDSEED_SET OPTION_MASK_ISA_RDSEED
@@ -152,10 +158,15 @@ along with GCC; see the file COPYING3. If not see
(OPTION_MASK_ISA_AVX2 | OPTION_MASK_ISA_AVX512F_UNSET)
#define OPTION_MASK_ISA_AVX512F_UNSET \
(OPTION_MASK_ISA_AVX512F | OPTION_MASK_ISA_AVX512CD_UNSET \
- | OPTION_MASK_ISA_AVX512PF_UNSET | OPTION_MASK_ISA_AVX512ER_UNSET)
+ | OPTION_MASK_ISA_AVX512PF_UNSET | OPTION_MASK_ISA_AVX512ER_UNSET \
+ | OPTION_MASK_ISA_AVX512DQ_UNSET | OPTION_MASK_ISA_AVX512BW_UNSET \
+ | OPTION_MASK_ISA_AVX512VL_UNSET)
#define OPTION_MASK_ISA_AVX512CD_UNSET OPTION_MASK_ISA_AVX512CD
#define OPTION_MASK_ISA_AVX512PF_UNSET OPTION_MASK_ISA_AVX512PF
#define OPTION_MASK_ISA_AVX512ER_UNSET OPTION_MASK_ISA_AVX512ER
+#define OPTION_MASK_ISA_AVX512DQ_UNSET OPTION_MASK_ISA_AVX512DQ
+#define OPTION_MASK_ISA_AVX512BW_UNSET OPTION_MASK_ISA_AVX512BW
+#define OPTION_MASK_ISA_AVX512VL_UNSET OPTION_MASK_ISA_AVX512VL
#define OPTION_MASK_ISA_RTM_UNSET OPTION_MASK_ISA_RTM
#define OPTION_MASK_ISA_PRFCHW_UNSET OPTION_MASK_ISA_PRFCHW
#define OPTION_MASK_ISA_RDSEED_UNSET OPTION_MASK_ISA_RDSEED
@@ -393,6 +404,45 @@ ix86_handle_option (struct gcc_options *opts,
}
return true;
+ case OPT_mavx512dq:
+ if (value)
+ {
+ opts->x_ix86_isa_flags |= OPTION_MASK_ISA_AVX512DQ_SET;
+ opts->x_ix86_isa_flags_explicit |= OPTION_MASK_ISA_AVX512DQ_SET;
+ }
+ else
+ {
+ opts->x_ix86_isa_flags &= ~OPTION_MASK_ISA_AVX512DQ_UNSET;
+ opts->x_ix86_isa_flags_explicit |= OPTION_MASK_ISA_AVX512DQ_UNSET;
+ }
+ return true;
+
+ case OPT_mavx512bw:
+ if (value)
+ {
+ opts->x_ix86_isa_flags |= OPTION_MASK_ISA_AVX512BW_SET;
+ opts->x_ix86_isa_flags_explicit |= OPTION_MASK_ISA_AVX512BW_SET;
+ }
+ else
+ {
+ opts->x_ix86_isa_flags &= ~OPTION_MASK_ISA_AVX512BW_UNSET;
+ opts->x_ix86_isa_flags_explicit |= OPTION_MASK_ISA_AVX512BW_UNSET;
+ }
+ return true;
+
+ case OPT_mavx512vl:
+ if (value)
+ {
+ opts->x_ix86_isa_flags |= OPTION_MASK_ISA_AVX512VL_SET;
+ opts->x_ix86_isa_flags_explicit |= OPTION_MASK_ISA_AVX512VL_SET;
+ }
+ else
+ {
+ opts->x_ix86_isa_flags &= ~OPTION_MASK_ISA_AVX512VL_UNSET;
+ opts->x_ix86_isa_flags_explicit |= OPTION_MASK_ISA_AVX512VL_UNSET;
+ }
+ return true;
+
case OPT_mfma:
if (value)
{
diff --git a/gcc/config.gcc b/gcc/config.gcc
index aa2d1a97897..5e0b09b0afa 100644
--- a/gcc/config.gcc
+++ b/gcc/config.gcc
@@ -364,7 +364,8 @@ i[34567]86-*-*)
adxintrin.h fxsrintrin.h xsaveintrin.h xsaveoptintrin.h
avx512cdintrin.h avx512erintrin.h avx512pfintrin.h
shaintrin.h clflushoptintrin.h xsavecintrin.h
- xsavesintrin.h"
+ xsavesintrin.h avx512dqintrin.h avx512bwintrin.h
+ avx512vlintrin.h avx512vlbwintrin.h avx512vldqintrin.h"
;;
x86_64-*-*)
cpu_type=i386
@@ -382,7 +383,8 @@ x86_64-*-*)
adxintrin.h fxsrintrin.h xsaveintrin.h xsaveoptintrin.h
avx512cdintrin.h avx512erintrin.h avx512pfintrin.h
shaintrin.h clflushoptintrin.h xsavecintrin.h
- xsavesintrin.h"
+ xsavesintrin.h avx512dqintrin.h avx512bwintrin.h
+ avx512vlintrin.h avx512vlbwintrin.h avx512vldqintrin.h"
;;
ia64-*-*)
extra_headers=ia64intrin.h
diff --git a/gcc/config/i386/avx512bwintrin.h b/gcc/config/i386/avx512bwintrin.h
new file mode 100644
index 00000000000..b5caab956b5
--- /dev/null
+++ b/gcc/config/i386/avx512bwintrin.h
@@ -0,0 +1,2658 @@
+/* Copyright (C) 2013
+ Free Software Foundation, Inc.
+
+ This file is part of GCC.
+
+ GCC is free software; you can redistribute it and/or modify
+ it under the terms of the GNU General Public License as published by
+ the Free Software Foundation; either version 3, or (at your option)
+ any later version.
+
+ GCC is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ GNU General Public License for more details.
+
+ Under Section 7 of GPL version 3, you are granted additional
+ permissions described in the GCC Runtime Library Exception, version
+ 3.1, as published by the Free Software Foundation.
+
+ You should have received a copy of the GNU General Public License and
+ a copy of the GCC Runtime Library Exception along with this program;
+ see the files COPYING3 and COPYING.RUNTIME respectively. If not, see
+ <http://www.gnu.org/licenses/>. */
+
+#ifndef _IMMINTRIN_H_INCLUDED
+#error "Never use <avx512bwintrin.h> directly; include <immintrin.h> instead."
+#endif
+
+#ifndef _AVX512BWINTRIN_H_INCLUDED
+#define _AVX512BWINTRIN_H_INCLUDED
+
+#ifndef __AVX512BW__
+#pragma GCC push_options
+#pragma GCC target("avx512bw")
+#define __DISABLE_AVX512BW__
+#endif /* __AVX512BW__ */
+
+/* Internal data types for implementing the intrinsics. */
+typedef short __v32hi __attribute__ ((__vector_size__ (64)));
+typedef char __v64qi __attribute__ ((__vector_size__ (64)));
+
+typedef unsigned long long __mmask64;
+
+extern __inline __m512i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_setzero_qi (void)
+{
+ return __extension__ (__m512i)(__v64qi){ 0, 0, 0, 0, 0, 0, 0, 0,
+ 0, 0, 0, 0, 0, 0, 0, 0,
+ 0, 0, 0, 0, 0, 0, 0, 0,
+ 0, 0, 0, 0, 0, 0, 0, 0,
+ 0, 0, 0, 0, 0, 0, 0, 0,
+ 0, 0, 0, 0, 0, 0, 0, 0,
+ 0, 0, 0, 0, 0, 0, 0, 0,
+ 0, 0, 0, 0, 0, 0, 0, 0 };
+}
+
+extern __inline __m512i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_setzero_hi (void)
+{
+ return __extension__ (__m512i)(__v32hi){ 0, 0, 0, 0, 0, 0, 0, 0,
+ 0, 0, 0, 0, 0, 0, 0, 0,
+ 0, 0, 0, 0, 0, 0, 0, 0,
+ 0, 0, 0, 0, 0, 0, 0, 0 };
+}
+
+extern __inline __m512i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_mask_mov_epi16 (__m512i __W, __mmask32 __U, __m512i __A)
+{
+ return (__m512i) __builtin_ia32_movdquhi512_mask ((__v32hi) __A,
+ (__v32hi) __W,
+ (__mmask32) __U);
+}
+
+extern __inline __m512i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_maskz_mov_epi16 (__mmask32 __U, __m512i __A)
+{
+ return (__m512i) __builtin_ia32_movdquhi512_mask ((__v32hi) __A,
+ (__v32hi)
+ _mm512_setzero_hi (),
+ (__mmask32) __U);
+}
+
+extern __inline __m512i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_mask_loadu_epi16 (__m512i __W, __mmask32 __U, void const *__P)
+{
+ return (__m512i) __builtin_ia32_loaddquhi512_mask ((__v32hi *) __P,
+ (__v32hi) __W,
+ (__mmask32) __U);
+}
+
+extern __inline __m512i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_maskz_loadu_epi16 (__mmask32 __U, void const *__P)
+{
+ return (__m512i) __builtin_ia32_loaddquhi512_mask ((__v32hi *) __P,
+ (__v32hi)
+ _mm512_setzero_hi (),
+ (__mmask32) __U);
+}
+
+extern __inline void
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_mask_storeu_epi16 (void *__P, __mmask32 __U, __m512i __A)
+{
+ __builtin_ia32_storedquhi512_mask ((__v32hi *) __P,
+ (__v32hi) __A,
+ (__mmask32) __U);
+}
+
+extern __inline __m512i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_mask_mov_epi8 (__m512i __W, __mmask64 __U, __m512i __A)
+{
+ return (__m512i) __builtin_ia32_movdquqi512_mask ((__v64qi) __A,
+ (__v64qi) __W,
+ (__mmask64) __U);
+}
+
+extern __inline __m512i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_maskz_mov_epi8 (__mmask64 __U, __m512i __A)
+{
+ return (__m512i) __builtin_ia32_movdquqi512_mask ((__v64qi) __A,
+ (__v64qi)
+ _mm512_setzero_hi (),
+ (__mmask64) __U);
+}
+
+extern __inline __mmask32
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_kunpackw (__mmask32 __A, __mmask32 __B)
+{
+ return (__mmask32) __builtin_ia32_kunpcksi ((__mmask32) __A,
+ (__mmask32) __B);
+}
+
+extern __inline __mmask64
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_kunpackd (__mmask64 __A, __mmask64 __B)
+{
+ return (__mmask64) __builtin_ia32_kunpckdi ((__mmask64) __A,
+ (__mmask64) __B);
+}
+
+extern __inline __m512i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_mask_loadu_epi8 (__m512i __W, __mmask64 __U, void const *__P)
+{
+ return (__m512i) __builtin_ia32_loaddquqi512_mask ((__v64qi *) __P,
+ (__v64qi) __W,
+ (__mmask64) __U);
+}
+
+extern __inline __m512i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_maskz_loadu_epi8 (__mmask64 __U, void const *__P)
+{
+ return (__m512i) __builtin_ia32_loaddquqi512_mask ((__v64qi *) __P,
+ (__v64qi)
+ _mm512_setzero_hi (),
+ (__mmask64) __U);
+}
+
+extern __inline void
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_mask_storeu_epi8 (void *__P, __mmask64 __U, __m512i __A)
+{
+ __builtin_ia32_storedquqi512_mask ((__v64qi *) __P,
+ (__v64qi) __A,
+ (__mmask64) __U);
+}
+
+extern __inline __m512i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_sad_epu8 (__m512i __A, __m512i __B)
+{
+ return (__m512i) __builtin_ia32_psadbw512 ((__v64qi) __A,
+ (__v64qi) __B);
+}
+
+extern __inline __m256i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_cvtepi16_epi8 (__m512i __A)
+{
+ __v32qi __O;
+ return (__m256i) __builtin_ia32_pmovwb512_mask ((__v32hi) __A, __O,
+ (__mmask32) -1);
+}
+
+extern __inline __m256i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_mask_cvtepi16_epi8 (__m256i __O, __mmask32 __M, __m512i __A)
+{
+ return (__m256i) __builtin_ia32_pmovwb512_mask ((__v32hi) __A,
+ (__v32qi) __O, __M);
+}
+
+extern __inline __m256i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_maskz_cvtepi16_epi8 (__mmask32 __M, __m512i __A)
+{
+ return (__m256i) __builtin_ia32_pmovwb512_mask ((__v32hi) __A,
+ (__v32qi)
+ _mm256_setzero_si256 (),
+ __M);
+}
+
+extern __inline __m256i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_cvtsepi16_epi8 (__m512i __A)
+{
+ __v32qi __O;
+ return (__m256i) __builtin_ia32_pmovswb512_mask ((__v32hi) __A, __O,
+ (__mmask32) -1);
+}
+
+extern __inline __m256i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_mask_cvtsepi16_epi8 (__m256i __O, __mmask32 __M, __m512i __A)
+{
+ return (__m256i) __builtin_ia32_pmovswb512_mask ((__v32hi) __A,
+ (__v32qi) __O, __M);
+}
+
+extern __inline __m256i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_maskz_cvtsepi16_epi8 (__mmask32 __M, __m512i __A)
+{
+ return (__m256i) __builtin_ia32_pmovswb512_mask ((__v32hi) __A,
+ (__v32qi)
+ _mm256_setzero_si256 (),
+ __M);
+}
+
+extern __inline __m256i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_cvtusepi16_epi8 (__m512i __A)
+{
+ __v32qi __O;
+ return (__m256i) __builtin_ia32_pmovuswb512_mask ((__v32hi) __A, __O,
+ (__mmask32) -1);
+}
+
+extern __inline __m256i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_mask_cvtusepi16_epi8 (__m256i __O, __mmask32 __M, __m512i __A)
+{
+ return (__m256i) __builtin_ia32_pmovuswb512_mask ((__v32hi) __A,
+ (__v32qi) __O,
+ __M);
+}
+
+extern __inline __m256i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_maskz_cvtusepi16_epi8 (__mmask32 __M, __m512i __A)
+{
+ return (__m256i) __builtin_ia32_pmovuswb512_mask ((__v32hi) __A,
+ (__v32qi)
+ _mm256_setzero_si256 (),
+ __M);
+}
+
+extern __inline __m512i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_broadcastb_epi8 (__m128i __A)
+{
+ __v64qi __O;
+ return (__m512i) __builtin_ia32_pbroadcastb512_mask ((__v16qi) __A,
+ __O,
+ (__mmask64) -
+ 1);
+}
+
+extern __inline __m512i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_mask_broadcastb_epi8 (__m512i __O, __mmask64 __M, __m128i __A)
+{
+ return (__m512i) __builtin_ia32_pbroadcastb512_mask ((__v16qi) __A,
+ (__v64qi) __O,
+ __M);
+}
+
+extern __inline __m512i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_maskz_broadcastb_epi8 (__mmask64 __M, __m128i __A)
+{
+ return (__m512i) __builtin_ia32_pbroadcastb512_mask ((__v16qi) __A,
+ (__v64qi)
+ _mm512_setzero_si512 (),
+ __M);
+}
+
+extern __inline __m512i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_mask_set1_epi8 (__m512i __O, __mmask64 __M, char __A)
+{
+ return (__m512i) __builtin_ia32_pbroadcastb512_gpr_mask (__A,
+ (__v64qi) __O,
+ __M);
+}
+
+extern __inline __m512i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_maskz_set1_epi8 (__mmask64 __M, char __A)
+{
+ return (__m512i) __builtin_ia32_pbroadcastb512_gpr_mask (__A,
+ (__v64qi)
+ _mm512_setzero_si512 (),
+ __M);
+}
+
+extern __inline __m512i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_broadcastw_epi16 (__m128i __A)
+{
+ __v32hi __O;
+ return (__m512i) __builtin_ia32_pbroadcastw512_mask ((__v8hi) __A,
+ __O,
+ (__mmask32) -
+ 1);
+}
+
+extern __inline __m512i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_mask_broadcastw_epi16 (__m512i __O, __mmask32 __M, __m128i __A)
+{
+ return (__m512i) __builtin_ia32_pbroadcastw512_mask ((__v8hi) __A,
+ (__v32hi) __O,
+ __M);
+}
+
+extern __inline __m512i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_maskz_broadcastw_epi16 (__mmask32 __M, __m128i __A)
+{
+ return (__m512i) __builtin_ia32_pbroadcastw512_mask ((__v8hi) __A,
+ (__v32hi)
+ _mm512_setzero_si512 (),
+ __M);
+}
+
+extern __inline __m512i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_mask_set1_epi16 (__m512i __O, __mmask32 __M, short __A)
+{
+ return (__m512i) __builtin_ia32_pbroadcastw512_gpr_mask (__A,
+ (__v32hi) __O,
+ __M);
+}
+
+extern __inline __m512i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_maskz_set1_epi16 (__mmask32 __M, short __A)
+{
+ return (__m512i) __builtin_ia32_pbroadcastw512_gpr_mask (__A,
+ (__v32hi)
+ _mm512_setzero_si512 (),
+ __M);
+}
+
+extern __inline __m512i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_mulhrs_epi16 (__m512i __A, __m512i __B)
+{
+ return (__m512i) __builtin_ia32_pmulhrsw512_mask ((__v32hi) __A,
+ (__v32hi) __B,
+ (__v32hi)
+ _mm512_setzero_hi (),
+ (__mmask32) -1);
+}
+
+extern __inline __m512i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_mask_mulhrs_epi16 (__m512i __W, __mmask32 __U, __m512i __A,
+ __m512i __B)
+{
+ return (__m512i) __builtin_ia32_pmulhrsw512_mask ((__v32hi) __A,
+ (__v32hi) __B,
+ (__v32hi) __W,
+ (__mmask32) __U);
+}
+
+extern __inline __m512i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_maskz_mulhrs_epi16 (__mmask32 __U, __m512i __A, __m512i __B)
+{
+ return (__m512i) __builtin_ia32_pmulhrsw512_mask ((__v32hi) __A,
+ (__v32hi) __B,
+ (__v32hi)
+ _mm512_setzero_hi (),
+ (__mmask32) __U);
+}
+
+extern __inline __m512i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_mulhi_epi16 (__m512i __A, __m512i __B)
+{
+ return (__m512i) __builtin_ia32_pmulhw512_mask ((__v32hi) __A,
+ (__v32hi) __B,
+ (__v32hi)
+ _mm512_setzero_hi (),
+ (__mmask32) -1);
+}
+
+extern __inline __m512i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_mask_mulhi_epi16 (__m512i __W, __mmask32 __U, __m512i __A,
+ __m512i __B)
+{
+ return (__m512i) __builtin_ia32_pmulhw512_mask ((__v32hi) __A,
+ (__v32hi) __B,
+ (__v32hi) __W,
+ (__mmask32) __U);
+}
+
+extern __inline __m512i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_maskz_mulhi_epi16 (__mmask32 __U, __m512i __A, __m512i __B)
+{
+ return (__m512i) __builtin_ia32_pmulhw512_mask ((__v32hi) __A,
+ (__v32hi) __B,
+ (__v32hi)
+ _mm512_setzero_hi (),
+ (__mmask32) __U);
+}
+
+extern __inline __m512i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_mulhi_epu16 (__m512i __A, __m512i __B)
+{
+ return (__m512i) __builtin_ia32_pmulhuw512_mask ((__v32hi) __A,
+ (__v32hi) __B,
+ (__v32hi)
+ _mm512_setzero_hi (),
+ (__mmask32) -1);
+}
+
+extern __inline __m512i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_mask_mulhi_epu16 (__m512i __W, __mmask32 __U, __m512i __A,
+ __m512i __B)
+{
+ return (__m512i) __builtin_ia32_pmulhuw512_mask ((__v32hi) __A,
+ (__v32hi) __B,
+ (__v32hi) __W,
+ (__mmask32) __U);
+}
+
+extern __inline __m512i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_maskz_mulhi_epu16 (__mmask32 __U, __m512i __A, __m512i __B)
+{
+ return (__m512i) __builtin_ia32_pmulhuw512_mask ((__v32hi) __A,
+ (__v32hi) __B,
+ (__v32hi)
+ _mm512_setzero_hi (),
+ (__mmask32) __U);
+}
+
+extern __inline __m512i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_mullo_epi16 (__m512i __A, __m512i __B)
+{
+ return (__m512i) __builtin_ia32_pmullw512_mask ((__v32hi) __A,
+ (__v32hi) __B,
+ (__v32hi)
+ _mm512_setzero_hi (),
+ (__mmask32) -1);
+}
+
+extern __inline __m512i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_mask_mullo_epi16 (__m512i __W, __mmask32 __U, __m512i __A,
+ __m512i __B)
+{
+ return (__m512i) __builtin_ia32_pmullw512_mask ((__v32hi) __A,
+ (__v32hi) __B,
+ (__v32hi) __W,
+ (__mmask32) __U);
+}
+
+extern __inline __m512i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_maskz_mullo_epi16 (__mmask32 __U, __m512i __A, __m512i __B)
+{
+ return (__m512i) __builtin_ia32_pmullw512_mask ((__v32hi) __A,
+ (__v32hi) __B,
+ (__v32hi)
+ _mm512_setzero_hi (),
+ (__mmask32) __U);
+}
+
+extern __inline __m512i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_cvtepi8_epi16 (__m256i __A)
+{
+ return (__m512i) __builtin_ia32_pmovsxbw512_mask ((__v32qi) __A,
+ (__v32hi)
+ _mm512_setzero_hi (),
+ (__mmask32) -1);
+}
+
+extern __inline __m512i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_mask_cvtepi8_epi16 (__m512i __W, __mmask32 __U, __m256i __A)
+{
+ return (__m512i) __builtin_ia32_pmovsxbw512_mask ((__v32qi) __A,
+ (__v32hi) __W,
+ (__mmask32) __U);
+}
+
+extern __inline __m512i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_maskz_cvtepi8_epi16 (__mmask32 __U, __m256i __A)
+{
+ return (__m512i) __builtin_ia32_pmovsxbw512_mask ((__v32qi) __A,
+ (__v32hi)
+ _mm512_setzero_si512 (),
+ (__mmask32) __U);
+}
+
+extern __inline __m512i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_cvtepu8_epi16 (__m256i __A)
+{
+ return (__m512i) __builtin_ia32_pmovzxbw512_mask ((__v32qi) __A,
+ (__v32hi)
+ _mm512_setzero_hi (),
+ (__mmask32) -1);
+}
+
+extern __inline __m512i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_mask_cvtepu8_epi16 (__m512i __W, __mmask32 __U, __m256i __A)
+{
+ return (__m512i) __builtin_ia32_pmovzxbw512_mask ((__v32qi) __A,
+ (__v32hi) __W,
+ (__mmask32) __U);
+}
+
+extern __inline __m512i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_maskz_cvtepu8_epi16 (__mmask32 __U, __m256i __A)
+{
+ return (__m512i) __builtin_ia32_pmovzxbw512_mask ((__v32qi) __A,
+ (__v32hi)
+ _mm512_setzero_si512 (),
+ (__mmask32) __U);
+}
+
+extern __inline __m512i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_permutexvar_epi16 (__m512i __A, __m512i __B)
+{
+ return (__m512i) __builtin_ia32_permvarhi512_mask ((__v32hi) __A,
+ (__v32hi) __B,
+ (__v32hi)
+ _mm512_setzero_hi (),
+ (__mmask32) -1);
+}
+
+extern __inline __m512i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_maskz_permutexvar_epi16 (__mmask32 __M, __m512i __A,
+ __m512i __B)
+{
+ return (__m512i) __builtin_ia32_permvarhi512_mask ((__v32hi) __A,
+ (__v32hi) __B,
+ (__v32hi)
+ _mm512_setzero_si512 (),
+ (__mmask32) __M);
+}
+
+extern __inline __m512i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_mask_permutexvar_epi16 (__m512i __W, __mmask32 __M, __m512i __A,
+ __m512i __B)
+{
+ return (__m512i) __builtin_ia32_permvarhi512_mask ((__v32hi) __A,
+ (__v32hi) __B,
+ (__v32hi) __W,
+ (__mmask32) __M);
+}
+
+extern __inline __m512i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_permutex2var_epi16 (__m512i __A, __m512i __I, __m512i __B)
+{
+ return (__m512i) __builtin_ia32_vpermt2varhi512_mask ((__v32hi) __I
+ /* idx */ ,
+ (__v32hi) __A,
+ (__v32hi) __B,
+ (__mmask32) -
+ 1);
+}
+
+extern __inline __m512i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_mask_permutex2var_epi16 (__m512i __A, __mmask32 __U,
+ __m512i __I, __m512i __B)
+{
+ return (__m512i) __builtin_ia32_vpermt2varhi512_mask ((__v32hi) __I
+ /* idx */ ,
+ (__v32hi) __A,
+ (__v32hi) __B,
+ (__mmask32)
+ __U);
+}
+
+extern __inline __m512i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_mask2_permutex2var_epi16 (__m512i __A, __m512i __I,
+ __mmask32 __U, __m512i __B)
+{
+ return (__m512i) __builtin_ia32_vpermi2varhi512_mask ((__v32hi) __A,
+ (__v32hi) __I
+ /* idx */ ,
+ (__v32hi) __B,
+ (__mmask32)
+ __U);
+}
+
+extern __inline __m512i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_maskz_permutex2var_epi16 (__mmask32 __U, __m512i __A,
+ __m512i __I, __m512i __B)
+{
+ return (__m512i) __builtin_ia32_vpermt2varhi512_maskz ((__v32hi) __I
+ /* idx */ ,
+ (__v32hi) __A,
+ (__v32hi) __B,
+ (__mmask32)
+ __U);
+}
+
+extern __inline __m512i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_avg_epu8 (__m512i __A, __m512i __B)
+{
+ return (__m512i) __builtin_ia32_pavgb512_mask ((__v64qi) __A,
+ (__v64qi) __B,
+ (__v64qi)
+ _mm512_setzero_qi (),
+ (__mmask64) -1);
+}
+
+extern __inline __m512i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_mask_avg_epu8 (__m512i __W, __mmask64 __U, __m512i __A,
+ __m512i __B)
+{
+ return (__m512i) __builtin_ia32_pavgb512_mask ((__v64qi) __A,
+ (__v64qi) __B,
+ (__v64qi) __W,
+ (__mmask64) __U);
+}
+
+extern __inline __m512i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_maskz_avg_epu8 (__mmask64 __U, __m512i __A, __m512i __B)
+{
+ return (__m512i) __builtin_ia32_pavgb512_mask ((__v64qi) __A,
+ (__v64qi) __B,
+ (__v64qi)
+ _mm512_setzero_si512 (),
+ (__mmask64) __U);
+}
+
+extern __inline __m512i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_add_epi8 (__m512i __A, __m512i __B)
+{
+ return (__m512i) __builtin_ia32_paddb512_mask ((__v64qi) __A,
+ (__v64qi) __B,
+ (__v64qi)
+ _mm512_setzero_qi (),
+ (__mmask64) -1);
+}
+
+extern __inline __m512i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_mask_add_epi8 (__m512i __W, __mmask64 __U, __m512i __A,
+ __m512i __B)
+{
+ return (__m512i) __builtin_ia32_paddb512_mask ((__v64qi) __A,
+ (__v64qi) __B,
+ (__v64qi) __W,
+ (__mmask64) __U);
+}
+
+extern __inline __m512i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_maskz_add_epi8 (__mmask64 __U, __m512i __A, __m512i __B)
+{
+ return (__m512i) __builtin_ia32_paddb512_mask ((__v64qi) __A,
+ (__v64qi) __B,
+ (__v64qi)
+ _mm512_setzero_qi (),
+ (__mmask64) __U);
+}
+
+extern __inline __m512i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_sub_epi8 (__m512i __A, __m512i __B)
+{
+ return (__m512i) __builtin_ia32_psubb512_mask ((__v64qi) __A,
+ (__v64qi) __B,
+ (__v64qi)
+ _mm512_setzero_qi (),
+ (__mmask64) -1);
+}
+
+extern __inline __m512i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_mask_sub_epi8 (__m512i __W, __mmask64 __U, __m512i __A,
+ __m512i __B)
+{
+ return (__m512i) __builtin_ia32_psubb512_mask ((__v64qi) __A,
+ (__v64qi) __B,
+ (__v64qi) __W,
+ (__mmask64) __U);
+}
+
+extern __inline __m512i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_maskz_sub_epi8 (__mmask64 __U, __m512i __A, __m512i __B)
+{
+ return (__m512i) __builtin_ia32_psubb512_mask ((__v64qi) __A,
+ (__v64qi) __B,
+ (__v64qi)
+ _mm512_setzero_qi (),
+ (__mmask64) __U);
+}
+
+extern __inline __m512i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_avg_epu16 (__m512i __A, __m512i __B)
+{
+ return (__m512i) __builtin_ia32_pavgw512_mask ((__v32hi) __A,
+ (__v32hi) __B,
+ (__v32hi)
+ _mm512_setzero_hi (),
+ (__mmask32) -1);
+}
+
+extern __inline __m512i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_mask_avg_epu16 (__m512i __W, __mmask32 __U, __m512i __A,
+ __m512i __B)
+{
+ return (__m512i) __builtin_ia32_pavgw512_mask ((__v32hi) __A,
+ (__v32hi) __B,
+ (__v32hi) __W,
+ (__mmask32) __U);
+}
+
+extern __inline __m512i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_maskz_avg_epu16 (__mmask32 __U, __m512i __A, __m512i __B)
+{
+ return (__m512i) __builtin_ia32_pavgw512_mask ((__v32hi) __A,
+ (__v32hi) __B,
+ (__v32hi)
+ _mm512_setzero_si512 (),
+ (__mmask32) __U);
+}
+
+extern __inline __m512i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_subs_epi8 (__m512i __A, __m512i __B)
+{
+ return (__m512i) __builtin_ia32_psubsb512_mask ((__v64qi) __A,
+ (__v64qi) __B,
+ (__v64qi)
+ _mm512_setzero_qi (),
+ (__mmask64) -1);
+}
+
+extern __inline __m512i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_mask_subs_epi8 (__m512i __W, __mmask64 __U, __m512i __A,
+ __m512i __B)
+{
+ return (__m512i) __builtin_ia32_psubsb512_mask ((__v64qi) __A,
+ (__v64qi) __B,
+ (__v64qi) __W,
+ (__mmask64) __U);
+}
+
+extern __inline __m512i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_maskz_subs_epi8 (__mmask64 __U, __m512i __A, __m512i __B)
+{
+ return (__m512i) __builtin_ia32_psubsb512_mask ((__v64qi) __A,
+ (__v64qi) __B,
+ (__v64qi)
+ _mm512_setzero_qi (),
+ (__mmask64) __U);
+}
+
+extern __inline __m512i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_subs_epu8 (__m512i __A, __m512i __B)
+{
+ return (__m512i) __builtin_ia32_psubusb512_mask ((__v64qi) __A,
+ (__v64qi) __B,
+ (__v64qi)
+ _mm512_setzero_qi (),
+ (__mmask64) -1);
+}
+
+extern __inline __m512i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_mask_subs_epu8 (__m512i __W, __mmask64 __U, __m512i __A,
+ __m512i __B)
+{
+ return (__m512i) __builtin_ia32_psubusb512_mask ((__v64qi) __A,
+ (__v64qi) __B,
+ (__v64qi) __W,
+ (__mmask64) __U);
+}
+
+extern __inline __m512i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_maskz_subs_epu8 (__mmask64 __U, __m512i __A, __m512i __B)
+{
+ return (__m512i) __builtin_ia32_psubusb512_mask ((__v64qi) __A,
+ (__v64qi) __B,
+ (__v64qi)
+ _mm512_setzero_qi (),
+ (__mmask64) __U);
+}
+
+extern __inline __m512i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_adds_epi8 (__m512i __A, __m512i __B)
+{
+ return (__m512i) __builtin_ia32_paddsb512_mask ((__v64qi) __A,
+ (__v64qi) __B,
+ (__v64qi)
+ _mm512_setzero_qi (),
+ (__mmask64) -1);
+}
+
+extern __inline __m512i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_mask_adds_epi8 (__m512i __W, __mmask64 __U, __m512i __A,
+ __m512i __B)
+{
+ return (__m512i) __builtin_ia32_paddsb512_mask ((__v64qi) __A,
+ (__v64qi) __B,
+ (__v64qi) __W,
+ (__mmask64) __U);
+}
+
+extern __inline __m512i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_maskz_adds_epi8 (__mmask64 __U, __m512i __A, __m512i __B)
+{
+ return (__m512i) __builtin_ia32_paddsb512_mask ((__v64qi) __A,
+ (__v64qi) __B,
+ (__v64qi)
+ _mm512_setzero_qi (),
+ (__mmask64) __U);
+}
+
+extern __inline __m512i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_adds_epu8 (__m512i __A, __m512i __B)
+{
+ return (__m512i) __builtin_ia32_paddusb512_mask ((__v64qi) __A,
+ (__v64qi) __B,
+ (__v64qi)
+ _mm512_setzero_qi (),
+ (__mmask64) -1);
+}
+
+extern __inline __m512i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_mask_adds_epu8 (__m512i __W, __mmask64 __U, __m512i __A,
+ __m512i __B)
+{
+ return (__m512i) __builtin_ia32_paddusb512_mask ((__v64qi) __A,
+ (__v64qi) __B,
+ (__v64qi) __W,
+ (__mmask64) __U);
+}
+
+extern __inline __m512i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_maskz_adds_epu8 (__mmask64 __U, __m512i __A, __m512i __B)
+{
+ return (__m512i) __builtin_ia32_paddusb512_mask ((__v64qi) __A,
+ (__v64qi) __B,
+ (__v64qi)
+ _mm512_setzero_qi (),
+ (__mmask64) __U);
+}
+
+extern __inline __m512i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_sub_epi16 (__m512i __A, __m512i __B)
+{
+ return (__m512i) __builtin_ia32_psubw512_mask ((__v32hi) __A,
+ (__v32hi) __B,
+ (__v32hi)
+ _mm512_setzero_hi (),
+ (__mmask32) -1);
+}
+
+extern __inline __m512i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_mask_sub_epi16 (__m512i __W, __mmask32 __U, __m512i __A,
+ __m512i __B)
+{
+ return (__m512i) __builtin_ia32_psubw512_mask ((__v32hi) __A,
+ (__v32hi) __B,
+ (__v32hi) __W,
+ (__mmask32) __U);
+}
+
+extern __inline __m512i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_maskz_sub_epi16 (__mmask32 __U, __m512i __A, __m512i __B)
+{
+ return (__m512i) __builtin_ia32_psubw512_mask ((__v32hi) __A,
+ (__v32hi) __B,
+ (__v32hi)
+ _mm512_setzero_hi (),
+ (__mmask32) __U);
+}
+
+extern __inline __m512i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_subs_epi16 (__m512i __A, __m512i __B)
+{
+ return (__m512i) __builtin_ia32_psubsw512_mask ((__v32hi) __A,
+ (__v32hi) __B,
+ (__v32hi)
+ _mm512_setzero_hi (),
+ (__mmask32) -1);
+}
+
+extern __inline __m512i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_mask_subs_epi16 (__m512i __W, __mmask32 __U, __m512i __A,
+ __m512i __B)
+{
+ return (__m512i) __builtin_ia32_psubsw512_mask ((__v32hi) __A,
+ (__v32hi) __B,
+ (__v32hi) __W,
+ (__mmask32) __U);
+}
+
+extern __inline __m512i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_maskz_subs_epi16 (__mmask32 __U, __m512i __A, __m512i __B)
+{
+ return (__m512i) __builtin_ia32_psubsw512_mask ((__v32hi) __A,
+ (__v32hi) __B,
+ (__v32hi)
+ _mm512_setzero_hi (),
+ (__mmask32) __U);
+}
+
+extern __inline __m512i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_subs_epu16 (__m512i __A, __m512i __B)
+{
+ return (__m512i) __builtin_ia32_psubusw512_mask ((__v32hi) __A,
+ (__v32hi) __B,
+ (__v32hi)
+ _mm512_setzero_hi (),
+ (__mmask32) -1);
+}
+
+extern __inline __m512i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_mask_subs_epu16 (__m512i __W, __mmask32 __U, __m512i __A,
+ __m512i __B)
+{
+ return (__m512i) __builtin_ia32_psubusw512_mask ((__v32hi) __A,
+ (__v32hi) __B,
+ (__v32hi) __W,
+ (__mmask32) __U);
+}
+
+extern __inline __m512i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_maskz_subs_epu16 (__mmask32 __U, __m512i __A, __m512i __B)
+{
+ return (__m512i) __builtin_ia32_psubusw512_mask ((__v32hi) __A,
+ (__v32hi) __B,
+ (__v32hi)
+ _mm512_setzero_hi (),
+ (__mmask32) __U);
+}
+
+extern __inline __m512i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_add_epi16 (__m512i __A, __m512i __B)
+{
+ return (__m512i) __builtin_ia32_paddw512_mask ((__v32hi) __A,
+ (__v32hi) __B,
+ (__v32hi)
+ _mm512_setzero_hi (),
+ (__mmask32) -1);
+}
+
+extern __inline __m512i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_mask_add_epi16 (__m512i __W, __mmask32 __U, __m512i __A,
+ __m512i __B)
+{
+ return (__m512i) __builtin_ia32_paddw512_mask ((__v32hi) __A,
+ (__v32hi) __B,
+ (__v32hi) __W,
+ (__mmask32) __U);
+}
+
+extern __inline __m512i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_maskz_add_epi16 (__mmask32 __U, __m512i __A, __m512i __B)
+{
+ return (__m512i) __builtin_ia32_paddw512_mask ((__v32hi) __A,
+ (__v32hi) __B,
+ (__v32hi)
+ _mm512_setzero_hi (),
+ (__mmask32) __U);
+}
+
+extern __inline __m512i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_adds_epi16 (__m512i __A, __m512i __B)
+{
+ return (__m512i) __builtin_ia32_paddsw512_mask ((__v32hi) __A,
+ (__v32hi) __B,
+ (__v32hi)
+ _mm512_setzero_hi (),
+ (__mmask32) -1);
+}
+
+extern __inline __m512i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_mask_adds_epi16 (__m512i __W, __mmask32 __U, __m512i __A,
+ __m512i __B)
+{
+ return (__m512i) __builtin_ia32_paddsw512_mask ((__v32hi) __A,
+ (__v32hi) __B,
+ (__v32hi) __W,
+ (__mmask32) __U);
+}
+
+extern __inline __m512i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_maskz_adds_epi16 (__mmask32 __U, __m512i __A, __m512i __B)
+{
+ return (__m512i) __builtin_ia32_paddsw512_mask ((__v32hi) __A,
+ (__v32hi) __B,
+ (__v32hi)
+ _mm512_setzero_hi (),
+ (__mmask32) __U);
+}
+
+extern __inline __m512i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_adds_epu16 (__m512i __A, __m512i __B)
+{
+ return (__m512i) __builtin_ia32_paddusw512_mask ((__v32hi) __A,
+ (__v32hi) __B,
+ (__v32hi)
+ _mm512_setzero_hi (),
+ (__mmask32) -1);
+}
+
+extern __inline __m512i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_mask_adds_epu16 (__m512i __W, __mmask32 __U, __m512i __A,
+ __m512i __B)
+{
+ return (__m512i) __builtin_ia32_paddusw512_mask ((__v32hi) __A,
+ (__v32hi) __B,
+ (__v32hi) __W,
+ (__mmask32) __U);
+}
+
+extern __inline __m512i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_maskz_adds_epu16 (__mmask32 __U, __m512i __A, __m512i __B)
+{
+ return (__m512i) __builtin_ia32_paddusw512_mask ((__v32hi) __A,
+ (__v32hi) __B,
+ (__v32hi)
+ _mm512_setzero_hi (),
+ (__mmask32) __U);
+}
+
+extern __inline __m512i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_srl_epi16 (__m512i __A, __m128i __B)
+{
+ return (__m512i) __builtin_ia32_psrlw512_mask ((__v32hi) __A,
+ (__v8hi) __B,
+ (__v32hi)
+ _mm512_setzero_hi (),
+ (__mmask32) -1);
+}
+
+extern __inline __m512i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_mask_srl_epi16 (__m512i __W, __mmask32 __U, __m512i __A,
+ __m128i __B)
+{
+ return (__m512i) __builtin_ia32_psrlw512_mask ((__v32hi) __A,
+ (__v8hi) __B,
+ (__v32hi) __W,
+ (__mmask32) __U);
+}
+
+extern __inline __m512i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_maskz_srl_epi16 (__mmask32 __U, __m512i __A, __m128i __B)
+{
+ return (__m512i) __builtin_ia32_psrlw512_mask ((__v32hi) __A,
+ (__v8hi) __B,
+ (__v32hi)
+ _mm512_setzero_hi (),
+ (__mmask32) __U);
+}
+
+extern __inline __m512i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_packs_epi16 (__m512i __A, __m512i __B)
+{
+ return (__m512i) __builtin_ia32_packsswb512_mask ((__v32hi) __A,
+ (__v32hi) __B,
+ (__v64qi)
+ _mm512_setzero_qi (),
+ (__mmask64) -1);
+}
+
+extern __inline __m512i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_sll_epi16 (__m512i __A, __m128i __B)
+{
+ return (__m512i) __builtin_ia32_psllw512_mask ((__v32hi) __A,
+ (__v8hi) __B,
+ (__v32hi)
+ _mm512_setzero_hi (),
+ (__mmask32) -1);
+}
+
+extern __inline __m512i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_mask_sll_epi16 (__m512i __W, __mmask32 __U, __m512i __A,
+ __m128i __B)
+{
+ return (__m512i) __builtin_ia32_psllw512_mask ((__v32hi) __A,
+ (__v8hi) __B,
+ (__v32hi) __W,
+ (__mmask32) __U);
+}
+
+extern __inline __m512i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_maskz_sll_epi16 (__mmask32 __U, __m512i __A, __m128i __B)
+{
+ return (__m512i) __builtin_ia32_psllw512_mask ((__v32hi) __A,
+ (__v8hi) __B,
+ (__v32hi)
+ _mm512_setzero_hi (),
+ (__mmask32) __U);
+}
+
+extern __inline __m512i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_maddubs_epi16 (__m512i __X, __m512i __Y)
+{
+ return (__m512i) __builtin_ia32_pmaddubsw512_mask ((__v64qi) __X,
+ (__v64qi) __Y,
+ (__v32hi)
+ _mm512_setzero_hi (),
+ (__mmask32) -1);
+}
+
+extern __inline __m512i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_mask_maddubs_epi16 (__m512i __W, __mmask32 __U, __m512i __X,
+ __m512i __Y)
+{
+ return (__m512i) __builtin_ia32_pmaddubsw512_mask ((__v64qi) __X,
+ (__v64qi) __Y,
+ (__v32hi) __W,
+ (__mmask32) __U);
+}
+
+extern __inline __m512i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_maskz_maddubs_epi16 (__mmask32 __U, __m512i __X, __m512i __Y)
+{
+ return (__m512i) __builtin_ia32_pmaddubsw512_mask ((__v64qi) __X,
+ (__v64qi) __Y,
+ (__v32hi)
+ _mm512_setzero_hi (),
+ (__mmask32) __U);
+}
+
+extern __inline __m512i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_madd_epi16 (__m512i __A, __m512i __B)
+{
+ return (__m512i) __builtin_ia32_pmaddwd512_mask ((__v32hi) __A,
+ (__v32hi) __B,
+ (__v16si)
+ _mm512_setzero_si512 (),
+ (__mmask16) -1);
+}
+
+extern __inline __m512i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_mask_madd_epi16 (__m512i __W, __mmask16 __U, __m512i __A,
+ __m512i __B)
+{
+ return (__m512i) __builtin_ia32_pmaddwd512_mask ((__v32hi) __A,
+ (__v32hi) __B,
+ (__v16si) __W,
+ (__mmask16) __U);
+}
+
+extern __inline __m512i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_maskz_madd_epi16 (__mmask16 __U, __m512i __A, __m512i __B)
+{
+ return (__m512i) __builtin_ia32_pmaddwd512_mask ((__v32hi) __A,
+ (__v32hi) __B,
+ (__v16si)
+ _mm512_setzero_si512 (),
+ (__mmask16) __U);
+}
+
+extern __inline __m512i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_unpackhi_epi8 (__m512i __A, __m512i __B)
+{
+ return (__m512i) __builtin_ia32_punpckhbw512_mask ((__v64qi) __A,
+ (__v64qi) __B,
+ (__v64qi)
+ _mm512_setzero_qi (),
+ (__mmask64) -1);
+}
+
+extern __inline __m512i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_mask_unpackhi_epi8 (__m512i __W, __mmask64 __U, __m512i __A,
+ __m512i __B)
+{
+ return (__m512i) __builtin_ia32_punpckhbw512_mask ((__v64qi) __A,
+ (__v64qi) __B,
+ (__v64qi) __W,
+ (__mmask64) __U);
+}
+
+extern __inline __m512i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_maskz_unpackhi_epi8 (__mmask64 __U, __m512i __A, __m512i __B)
+{
+ return (__m512i) __builtin_ia32_punpckhbw512_mask ((__v64qi) __A,
+ (__v64qi) __B,
+ (__v64qi)
+ _mm512_setzero_si512 (),
+ (__mmask64) __U);
+}
+
+extern __inline __m512i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_unpackhi_epi16 (__m512i __A, __m512i __B)
+{
+ return (__m512i) __builtin_ia32_punpckhwd512_mask ((__v32hi) __A,
+ (__v32hi) __B,
+ (__v32hi)
+ _mm512_setzero_hi (),
+ (__mmask32) -1);
+}
+
+extern __inline __m512i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_mask_unpackhi_epi16 (__m512i __W, __mmask32 __U, __m512i __A,
+ __m512i __B)
+{
+ return (__m512i) __builtin_ia32_punpckhwd512_mask ((__v32hi) __A,
+ (__v32hi) __B,
+ (__v32hi) __W,
+ (__mmask32) __U);
+}
+
+extern __inline __m512i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_maskz_unpackhi_epi16 (__mmask32 __U, __m512i __A, __m512i __B)
+{
+ return (__m512i) __builtin_ia32_punpckhwd512_mask ((__v32hi) __A,
+ (__v32hi) __B,
+ (__v32hi)
+ _mm512_setzero_si512 (),
+ (__mmask32) __U);
+}
+
+extern __inline __m512i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_unpacklo_epi8 (__m512i __A, __m512i __B)
+{
+ return (__m512i) __builtin_ia32_punpcklbw512_mask ((__v64qi) __A,
+ (__v64qi) __B,
+ (__v64qi)
+ _mm512_setzero_qi (),
+ (__mmask64) -1);
+}
+
+extern __inline __m512i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_mask_unpacklo_epi8 (__m512i __W, __mmask64 __U, __m512i __A,
+ __m512i __B)
+{
+ return (__m512i) __builtin_ia32_punpcklbw512_mask ((__v64qi) __A,
+ (__v64qi) __B,
+ (__v64qi) __W,
+ (__mmask64) __U);
+}
+
+extern __inline __m512i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_maskz_unpacklo_epi8 (__mmask64 __U, __m512i __A, __m512i __B)
+{
+ return (__m512i) __builtin_ia32_punpcklbw512_mask ((__v64qi) __A,
+ (__v64qi) __B,
+ (__v64qi)
+ _mm512_setzero_si512 (),
+ (__mmask64) __U);
+}
+
+extern __inline __m512i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_unpacklo_epi16 (__m512i __A, __m512i __B)
+{
+ return (__m512i) __builtin_ia32_punpcklwd512_mask ((__v32hi) __A,
+ (__v32hi) __B,
+ (__v32hi)
+ _mm512_setzero_hi (),
+ (__mmask32) -1);
+}
+
+extern __inline __m512i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_mask_unpacklo_epi16 (__m512i __W, __mmask32 __U, __m512i __A,
+ __m512i __B)
+{
+ return (__m512i) __builtin_ia32_punpcklwd512_mask ((__v32hi) __A,
+ (__v32hi) __B,
+ (__v32hi) __W,
+ (__mmask32) __U);
+}
+
+extern __inline __m512i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_maskz_unpacklo_epi16 (__mmask32 __U, __m512i __A, __m512i __B)
+{
+ return (__m512i) __builtin_ia32_punpcklwd512_mask ((__v32hi) __A,
+ (__v32hi) __B,
+ (__v32hi)
+ _mm512_setzero_si512 (),
+ (__mmask32) __U);
+}
+
+extern __inline __mmask64
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_cmpeq_epi8_mask (__m512i __A, __m512i __B)
+{
+ return (__mmask64) __builtin_ia32_pcmpeqb512_mask ((__v64qi) __A,
+ (__v64qi) __B,
+ (__mmask64) -1);
+}
+
+extern __inline __mmask64
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_mask_cmpeq_epi8_mask (__mmask64 __U, __m512i __A, __m512i __B)
+{
+ return (__mmask64) __builtin_ia32_pcmpeqb512_mask ((__v64qi) __A,
+ (__v64qi) __B,
+ __U);
+}
+
+extern __inline __mmask32
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_cmpeq_epi16_mask (__m512i __A, __m512i __B)
+{
+ return (__mmask32) __builtin_ia32_pcmpeqw512_mask ((__v32hi) __A,
+ (__v32hi) __B,
+ (__mmask32) -1);
+}
+
+extern __inline __mmask32
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_mask_cmpeq_epi16_mask (__mmask32 __U, __m512i __A, __m512i __B)
+{
+ return (__mmask32) __builtin_ia32_pcmpeqw512_mask ((__v32hi) __A,
+ (__v32hi) __B,
+ __U);
+}
+
+extern __inline __mmask64
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_cmpgt_epi8_mask (__m512i __A, __m512i __B)
+{
+ return (__mmask64) __builtin_ia32_pcmpgtb512_mask ((__v64qi) __A,
+ (__v64qi) __B,
+ (__mmask64) -1);
+}
+
+extern __inline __mmask64
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_mask_cmpgt_epi8_mask (__mmask64 __U, __m512i __A, __m512i __B)
+{
+ return (__mmask64) __builtin_ia32_pcmpgtb512_mask ((__v64qi) __A,
+ (__v64qi) __B,
+ __U);
+}
+
+extern __inline __mmask32
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_cmpgt_epi16_mask (__m512i __A, __m512i __B)
+{
+ return (__mmask32) __builtin_ia32_pcmpgtw512_mask ((__v32hi) __A,
+ (__v32hi) __B,
+ (__mmask32) -1);
+}
+
+extern __inline __mmask32
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_mask_cmpgt_epi16_mask (__mmask32 __U, __m512i __A, __m512i __B)
+{
+ return (__mmask32) __builtin_ia32_pcmpgtw512_mask ((__v32hi) __A,
+ (__v32hi) __B,
+ __U);
+}
+
+extern __inline __mmask64
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_movepi8_mask (__m512i __A)
+{
+ return (__mmask64) __builtin_ia32_cvtb2mask512 ((__v64qi) __A);
+}
+
+extern __inline __mmask32
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_movepi16_mask (__m512i __A)
+{
+ return (__mmask32) __builtin_ia32_cvtw2mask512 ((__v32hi) __A);
+}
+
+extern __inline __m512i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_movm_epi8 (__mmask64 __A)
+{
+ return (__m512i) __builtin_ia32_cvtmask2b512 (__A);
+}
+
+extern __inline __m512i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_movm_epi16 (__mmask32 __A)
+{
+ return (__m512i) __builtin_ia32_cvtmask2w512 (__A);
+}
+
+extern __inline __mmask64
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_test_epi8_mask (__m512i __A, __m512i __B)
+{
+ return (__mmask64) __builtin_ia32_ptestmb512 ((__v64qi) __A,
+ (__v64qi) __B,
+ (__mmask64) -1);
+}
+
+extern __inline __mmask64
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_mask_test_epi8_mask (__mmask64 __U, __m512i __A, __m512i __B)
+{
+ return (__mmask64) __builtin_ia32_ptestmb512 ((__v64qi) __A,
+ (__v64qi) __B, __U);
+}
+
+extern __inline __mmask32
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_test_epi16_mask (__m512i __A, __m512i __B)
+{
+ return (__mmask32) __builtin_ia32_ptestmw512 ((__v32hi) __A,
+ (__v32hi) __B,
+ (__mmask32) -1);
+}
+
+extern __inline __mmask32
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_mask_test_epi16_mask (__mmask32 __U, __m512i __A, __m512i __B)
+{
+ return (__mmask32) __builtin_ia32_ptestmw512 ((__v32hi) __A,
+ (__v32hi) __B, __U);
+}
+
+extern __inline __mmask64
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_testn_epi8_mask (__m512i __A, __m512i __B)
+{
+ return (__mmask64) __builtin_ia32_ptestnmb512 ((__v64qi) __A,
+ (__v64qi) __B,
+ (__mmask64) -1);
+}
+
+extern __inline __mmask64
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_mask_testn_epi8_mask (__mmask64 __U, __m512i __A, __m512i __B)
+{
+ return (__mmask64) __builtin_ia32_ptestnmb512 ((__v64qi) __A,
+ (__v64qi) __B, __U);
+}
+
+extern __inline __mmask32
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_testn_epi16_mask (__m512i __A, __m512i __B)
+{
+ return (__mmask32) __builtin_ia32_ptestnmw512 ((__v32hi) __A,
+ (__v32hi) __B,
+ (__mmask32) -1);
+}
+
+extern __inline __mmask32
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_mask_testn_epi16_mask (__mmask32 __U, __m512i __A, __m512i __B)
+{
+ return (__mmask32) __builtin_ia32_ptestnmw512 ((__v32hi) __A,
+ (__v32hi) __B, __U);
+}
+
+extern __inline __m512i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_shuffle_epi8 (__m512i __A, __m512i __B)
+{
+ return (__m512i) __builtin_ia32_pshufb512_mask ((__v64qi) __A,
+ (__v64qi) __B,
+ (__v64qi)
+ _mm512_setzero_qi (),
+ (__mmask64) -1);
+}
+
+extern __inline __m512i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_mask_shuffle_epi8 (__m512i __W, __mmask64 __U, __m512i __A,
+ __m512i __B)
+{
+ return (__m512i) __builtin_ia32_pshufb512_mask ((__v64qi) __A,
+ (__v64qi) __B,
+ (__v64qi) __W,
+ (__mmask64) __U);
+}
+
+extern __inline __m512i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_maskz_shuffle_epi8 (__mmask64 __U, __m512i __A, __m512i __B)
+{
+ return (__m512i) __builtin_ia32_pshufb512_mask ((__v64qi) __A,
+ (__v64qi) __B,
+ (__v64qi)
+ _mm512_setzero_qi (),
+ (__mmask64) __U);
+}
+
+extern __inline __m512i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_min_epu16 (__m512i __A, __m512i __B)
+{
+ return (__m512i) __builtin_ia32_pminuw512_mask ((__v32hi) __A,
+ (__v32hi) __B,
+ (__v32hi)
+ _mm512_setzero_hi (),
+ (__mmask32) -1);
+}
+
+extern __inline __m512i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_maskz_min_epu16 (__mmask32 __M, __m512i __A, __m512i __B)
+{
+ return (__m512i) __builtin_ia32_pminuw512_mask ((__v32hi) __A,
+ (__v32hi) __B,
+ (__v32hi)
+ _mm512_setzero_si512 (),
+ (__mmask32) __M);
+}
+
+extern __inline __m512i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_mask_min_epu16 (__m512i __W, __mmask32 __M, __m512i __A,
+ __m512i __B)
+{
+ return (__m512i) __builtin_ia32_pminuw512_mask ((__v32hi) __A,
+ (__v32hi) __B,
+ (__v32hi) __W,
+ (__mmask32) __M);
+}
+
+extern __inline __m512i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_min_epi16 (__m512i __A, __m512i __B)
+{
+ return (__m512i) __builtin_ia32_pminsw512_mask ((__v32hi) __A,
+ (__v32hi) __B,
+ (__v32hi)
+ _mm512_setzero_hi (),
+ (__mmask32) -1);
+}
+
+extern __inline __m512i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_maskz_min_epi16 (__mmask32 __M, __m512i __A, __m512i __B)
+{
+ return (__m512i) __builtin_ia32_pminsw512_mask ((__v32hi) __A,
+ (__v32hi) __B,
+ (__v32hi)
+ _mm512_setzero_si512 (),
+ (__mmask32) __M);
+}
+
+extern __inline __m512i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_mask_min_epi16 (__m512i __W, __mmask32 __M, __m512i __A,
+ __m512i __B)
+{
+ return (__m512i) __builtin_ia32_pminsw512_mask ((__v32hi) __A,
+ (__v32hi) __B,
+ (__v32hi) __W,
+ (__mmask32) __M);
+}
+
+extern __inline __m512i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_max_epu8 (__m512i __A, __m512i __B)
+{
+ return (__m512i) __builtin_ia32_pmaxub512_mask ((__v64qi) __A,
+ (__v64qi) __B,
+ (__v64qi)
+ _mm512_setzero_qi (),
+ (__mmask64) -1);
+}
+
+extern __inline __m512i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_maskz_max_epu8 (__mmask64 __M, __m512i __A, __m512i __B)
+{
+ return (__m512i) __builtin_ia32_pmaxub512_mask ((__v64qi) __A,
+ (__v64qi) __B,
+ (__v64qi)
+ _mm512_setzero_si512 (),
+ (__mmask64) __M);
+}
+
+extern __inline __m512i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_mask_max_epu8 (__m512i __W, __mmask64 __M, __m512i __A,
+ __m512i __B)
+{
+ return (__m512i) __builtin_ia32_pmaxub512_mask ((__v64qi) __A,
+ (__v64qi) __B,
+ (__v64qi) __W,
+ (__mmask64) __M);
+}
+
+extern __inline __m512i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_max_epi8 (__m512i __A, __m512i __B)
+{
+ return (__m512i) __builtin_ia32_pmaxsb512_mask ((__v64qi) __A,
+ (__v64qi) __B,
+ (__v64qi)
+ _mm512_setzero_qi (),
+ (__mmask64) -1);
+}
+
+extern __inline __m512i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_maskz_max_epi8 (__mmask64 __M, __m512i __A, __m512i __B)
+{
+ return (__m512i) __builtin_ia32_pmaxsb512_mask ((__v64qi) __A,
+ (__v64qi) __B,
+ (__v64qi)
+ _mm512_setzero_si512 (),
+ (__mmask64) __M);
+}
+
+extern __inline __m512i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_mask_max_epi8 (__m512i __W, __mmask64 __M, __m512i __A,
+ __m512i __B)
+{
+ return (__m512i) __builtin_ia32_pmaxsb512_mask ((__v64qi) __A,
+ (__v64qi) __B,
+ (__v64qi) __W,
+ (__mmask64) __M);
+}
+
+extern __inline __m512i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_min_epu8 (__m512i __A, __m512i __B)
+{
+ return (__m512i) __builtin_ia32_pminub512_mask ((__v64qi) __A,
+ (__v64qi) __B,
+ (__v64qi)
+ _mm512_setzero_qi (),
+ (__mmask64) -1);
+}
+
+extern __inline __m512i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_maskz_min_epu8 (__mmask64 __M, __m512i __A, __m512i __B)
+{
+ return (__m512i) __builtin_ia32_pminub512_mask ((__v64qi) __A,
+ (__v64qi) __B,
+ (__v64qi)
+ _mm512_setzero_si512 (),
+ (__mmask64) __M);
+}
+
+extern __inline __m512i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_mask_min_epu8 (__m512i __W, __mmask64 __M, __m512i __A,
+ __m512i __B)
+{
+ return (__m512i) __builtin_ia32_pminub512_mask ((__v64qi) __A,
+ (__v64qi) __B,
+ (__v64qi) __W,
+ (__mmask64) __M);
+}
+
+extern __inline __m512i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_min_epi8 (__m512i __A, __m512i __B)
+{
+ return (__m512i) __builtin_ia32_pminsb512_mask ((__v64qi) __A,
+ (__v64qi) __B,
+ (__v64qi)
+ _mm512_setzero_qi (),
+ (__mmask64) -1);
+}
+
+extern __inline __m512i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_maskz_min_epi8 (__mmask64 __M, __m512i __A, __m512i __B)
+{
+ return (__m512i) __builtin_ia32_pminsb512_mask ((__v64qi) __A,
+ (__v64qi) __B,
+ (__v64qi)
+ _mm512_setzero_si512 (),
+ (__mmask64) __M);
+}
+
+extern __inline __m512i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_mask_min_epi8 (__m512i __W, __mmask64 __M, __m512i __A,
+ __m512i __B)
+{
+ return (__m512i) __builtin_ia32_pminsb512_mask ((__v64qi) __A,
+ (__v64qi) __B,
+ (__v64qi) __W,
+ (__mmask64) __M);
+}
+
+extern __inline __m512i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_max_epi16 (__m512i __A, __m512i __B)
+{
+ return (__m512i) __builtin_ia32_pmaxsw512_mask ((__v32hi) __A,
+ (__v32hi) __B,
+ (__v32hi)
+ _mm512_setzero_hi (),
+ (__mmask32) -1);
+}
+
+extern __inline __m512i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_maskz_max_epi16 (__mmask32 __M, __m512i __A, __m512i __B)
+{
+ return (__m512i) __builtin_ia32_pmaxsw512_mask ((__v32hi) __A,
+ (__v32hi) __B,
+ (__v32hi)
+ _mm512_setzero_si512 (),
+ (__mmask32) __M);
+}
+
+extern __inline __m512i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_mask_max_epi16 (__m512i __W, __mmask32 __M, __m512i __A,
+ __m512i __B)
+{
+ return (__m512i) __builtin_ia32_pmaxsw512_mask ((__v32hi) __A,
+ (__v32hi) __B,
+ (__v32hi) __W,
+ (__mmask32) __M);
+}
+
+extern __inline __m512i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_max_epu16 (__m512i __A, __m512i __B)
+{
+ return (__m512i) __builtin_ia32_pmaxuw512_mask ((__v32hi) __A,
+ (__v32hi) __B,
+ (__v32hi)
+ _mm512_setzero_hi (),
+ (__mmask32) -1);
+}
+
+extern __inline __m512i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_maskz_max_epu16 (__mmask32 __M, __m512i __A, __m512i __B)
+{
+ return (__m512i) __builtin_ia32_pmaxuw512_mask ((__v32hi) __A,
+ (__v32hi) __B,
+ (__v32hi)
+ _mm512_setzero_si512 (),
+ (__mmask32) __M);
+}
+
+extern __inline __m512i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_mask_max_epu16 (__m512i __W, __mmask32 __M, __m512i __A,
+ __m512i __B)
+{
+ return (__m512i) __builtin_ia32_pmaxuw512_mask ((__v32hi) __A,
+ (__v32hi) __B,
+ (__v32hi) __W,
+ (__mmask32) __M);
+}
+
+extern __inline __m512i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_sra_epi16 (__m512i __A, __m128i __B)
+{
+ return (__m512i) __builtin_ia32_psraw512_mask ((__v32hi) __A,
+ (__v8hi) __B,
+ (__v32hi)
+ _mm512_setzero_hi (),
+ (__mmask32) -1);
+}
+
+extern __inline __m512i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_mask_sra_epi16 (__m512i __W, __mmask32 __U, __m512i __A,
+ __m128i __B)
+{
+ return (__m512i) __builtin_ia32_psraw512_mask ((__v32hi) __A,
+ (__v8hi) __B,
+ (__v32hi) __W,
+ (__mmask32) __U);
+}
+
+extern __inline __m512i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_maskz_sra_epi16 (__mmask32 __U, __m512i __A, __m128i __B)
+{
+ return (__m512i) __builtin_ia32_psraw512_mask ((__v32hi) __A,
+ (__v8hi) __B,
+ (__v32hi)
+ _mm512_setzero_hi (),
+ (__mmask32) __U);
+}
+
+extern __inline __m512i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_srav_epi16 (__m512i __A, __m512i __B)
+{
+ return (__m512i) __builtin_ia32_psrav32hi_mask ((__v32hi) __A,
+ (__v32hi) __B,
+ (__v32hi)
+ _mm512_setzero_hi (),
+ (__mmask32) -1);
+}
+
+extern __inline __m512i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_mask_srav_epi16 (__m512i __W, __mmask32 __U, __m512i __A,
+ __m512i __B)
+{
+ return (__m512i) __builtin_ia32_psrav32hi_mask ((__v32hi) __A,
+ (__v32hi) __B,
+ (__v32hi) __W,
+ (__mmask32) __U);
+}
+
+extern __inline __m512i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_maskz_srav_epi16 (__mmask32 __U, __m512i __A, __m512i __B)
+{
+ return (__m512i) __builtin_ia32_psrav32hi_mask ((__v32hi) __A,
+ (__v32hi) __B,
+ (__v32hi)
+ _mm512_setzero_hi (),
+ (__mmask32) __U);
+}
+
+extern __inline __m512i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_srlv_epi16 (__m512i __A, __m512i __B)
+{
+ return (__m512i) __builtin_ia32_psrlv32hi_mask ((__v32hi) __A,
+ (__v32hi) __B,
+ (__v32hi)
+ _mm512_setzero_hi (),
+ (__mmask32) -1);
+}
+
+extern __inline __m512i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_mask_srlv_epi16 (__m512i __W, __mmask32 __U, __m512i __A,
+ __m512i __B)
+{
+ return (__m512i) __builtin_ia32_psrlv32hi_mask ((__v32hi) __A,
+ (__v32hi) __B,
+ (__v32hi) __W,
+ (__mmask32) __U);
+}
+
+extern __inline __m512i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_maskz_srlv_epi16 (__mmask32 __U, __m512i __A, __m512i __B)
+{
+ return (__m512i) __builtin_ia32_psrlv32hi_mask ((__v32hi) __A,
+ (__v32hi) __B,
+ (__v32hi)
+ _mm512_setzero_hi (),
+ (__mmask32) __U);
+}
+
+extern __inline __m512i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_sllv_epi16 (__m512i __A, __m512i __B)
+{
+ return (__m512i) __builtin_ia32_psllv32hi_mask ((__v32hi) __A,
+ (__v32hi) __B,
+ (__v32hi)
+ _mm512_setzero_hi (),
+ (__mmask32) -1);
+}
+
+extern __inline __m512i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_mask_sllv_epi16 (__m512i __W, __mmask32 __U, __m512i __A,
+ __m512i __B)
+{
+ return (__m512i) __builtin_ia32_psllv32hi_mask ((__v32hi) __A,
+ (__v32hi) __B,
+ (__v32hi) __W,
+ (__mmask32) __U);
+}
+
+extern __inline __m512i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_maskz_sllv_epi16 (__mmask32 __U, __m512i __A, __m512i __B)
+{
+ return (__m512i) __builtin_ia32_psllv32hi_mask ((__v32hi) __A,
+ (__v32hi) __B,
+ (__v32hi)
+ _mm512_setzero_hi (),
+ (__mmask32) __U);
+}
+
+extern __inline __m512i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_mask_packs_epi16 (__m512i __W, __mmask64 __M, __m512i __A,
+ __m512i __B)
+{
+ return (__m512i) __builtin_ia32_packsswb512_mask ((__v32hi) __A,
+ (__v32hi) __B,
+ (__v64qi) __W,
+ (__mmask64) __M);
+}
+
+extern __inline __m512i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_maskz_packs_epi16 (__mmask64 __M, __m512i __A, __m512i __B)
+{
+ return (__m512i) __builtin_ia32_packsswb512_mask ((__v32hi) __A,
+ (__v32hi) __B,
+ (__v64qi)
+ _mm512_setzero_si512 (),
+ __M);
+}
+
+extern __inline __m512i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_packus_epi16 (__m512i __A, __m512i __B)
+{
+ return (__m512i) __builtin_ia32_packuswb512_mask ((__v32hi) __A,
+ (__v32hi) __B,
+ (__v64qi)
+ _mm512_setzero_qi (),
+ (__mmask64) -1);
+}
+
+extern __inline __m512i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_mask_packus_epi16 (__m512i __W, __mmask64 __M, __m512i __A,
+ __m512i __B)
+{
+ return (__m512i) __builtin_ia32_packuswb512_mask ((__v32hi) __A,
+ (__v32hi) __B,
+ (__v64qi) __W,
+ (__mmask64) __M);
+}
+
+extern __inline __m512i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_maskz_packus_epi16 (__mmask64 __M, __m512i __A, __m512i __B)
+{
+ return (__m512i) __builtin_ia32_packuswb512_mask ((__v32hi) __A,
+ (__v32hi) __B,
+ (__v64qi)
+ _mm512_setzero_si512 (),
+ (__mmask64) __M);
+}
+
+extern __inline __m512i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_abs_epi8 (__m512i __A)
+{
+ return (__m512i) __builtin_ia32_pabsb512_mask ((__v64qi) __A,
+ (__v64qi)
+ _mm512_setzero_qi (),
+ (__mmask64) -1);
+}
+
+extern __inline __m512i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_mask_abs_epi8 (__m512i __W, __mmask64 __U, __m512i __A)
+{
+ return (__m512i) __builtin_ia32_pabsb512_mask ((__v64qi) __A,
+ (__v64qi) __W,
+ (__mmask64) __U);
+}
+
+extern __inline __m512i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_maskz_abs_epi8 (__mmask64 __U, __m512i __A)
+{
+ return (__m512i) __builtin_ia32_pabsb512_mask ((__v64qi) __A,
+ (__v64qi)
+ _mm512_setzero_qi (),
+ (__mmask64) __U);
+}
+
+extern __inline __m512i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_abs_epi16 (__m512i __A)
+{
+ return (__m512i) __builtin_ia32_pabsw512_mask ((__v32hi) __A,
+ (__v32hi)
+ _mm512_setzero_hi (),
+ (__mmask32) -1);
+}
+
+extern __inline __m512i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_mask_abs_epi16 (__m512i __W, __mmask32 __U, __m512i __A)
+{
+ return (__m512i) __builtin_ia32_pabsw512_mask ((__v32hi) __A,
+ (__v32hi) __W,
+ (__mmask32) __U);
+}
+
+extern __inline __m512i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_maskz_abs_epi16 (__mmask32 __U, __m512i __A)
+{
+ return (__m512i) __builtin_ia32_pabsw512_mask ((__v32hi) __A,
+ (__v32hi)
+ _mm512_setzero_hi (),
+ (__mmask32) __U);
+}
+
+#ifdef __OPTIMIZE__
+extern __inline __m512i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_alignr_epi8 (__m512i __A, __m512i __B, const int __N)
+{
+ return (__m512i) __builtin_ia32_palignr512 ((__v8di) __A,
+ (__v8di) __B, __N * 8);
+}
+
+extern __inline __m512i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_mask_alignr_epi8 (__m512i __W, __mmask64 __U, __m512i __A,
+ __m512i __B, const int __N)
+{
+ return (__m512i) __builtin_ia32_palignr512_mask ((__v8di) __A,
+ (__v8di) __B,
+ __N * 8,
+ (__v8di) __W,
+ (__mmask64) __U);
+}
+
+extern __inline __m512i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_maskz_alignr_epi8 (__mmask64 __U, __m512i __A, __m512i __B,
+ const int __N)
+{
+ return (__m512i) __builtin_ia32_palignr512_mask ((__v8di) __A,
+ (__v8di) __B,
+ __N * 8,
+ (__v8di)
+ _mm512_setzero_si512 (),
+ (__mmask64) __U);
+}
+
+extern __inline __m512i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_dbsad_epu8 (__m512i __A, __m512i __B, const int __imm)
+{
+ return (__m512i) __builtin_ia32_dbpsadbw512_mask ((__v64qi) __A,
+ (__v64qi) __B,
+ __imm,
+ (__v32hi)
+ _mm512_setzero_hi (),
+ (__mmask32) -1);
+}
+
+extern __inline __m512i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_mask_dbsad_epu8 (__m512i __W, __mmask32 __U, __m512i __A,
+ __m512i __B, const int __imm)
+{
+ return (__m512i) __builtin_ia32_dbpsadbw512_mask ((__v64qi) __A,
+ (__v64qi) __B,
+ __imm,
+ (__v32hi) __W,
+ (__mmask32) __U);
+}
+
+extern __inline __m512i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_maskz_dbsad_epu8 (__mmask32 __U, __m512i __A, __m512i __B,
+ const int __imm)
+{
+ return (__m512i) __builtin_ia32_dbpsadbw512_mask ((__v64qi) __A,
+ (__v64qi) __B,
+ __imm,
+ (__v32hi)
+ _mm512_setzero_si512 (),
+ (__mmask32) __U);
+}
+
+extern __inline __m512i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_srli_epi16 (__m512i __A, const int __imm)
+{
+ return (__m512i) __builtin_ia32_psrlwi512_mask ((__v32hi) __A, __imm,
+ (__v32hi)
+ _mm512_setzero_hi (),
+ (__mmask32) -1);
+}
+
+extern __inline __m512i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_mask_srli_epi16 (__m512i __W, __mmask32 __U, __m512i __A,
+ const int __imm)
+{
+ return (__m512i) __builtin_ia32_psrlwi512_mask ((__v32hi) __A, __imm,
+ (__v32hi) __W,
+ (__mmask32) __U);
+}
+
+extern __inline __m512i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_maskz_srli_epi16 (__mmask32 __U, __m512i __A, const int __imm)
+{
+ return (__m512i) __builtin_ia32_psrlwi512_mask ((__v32hi) __A, __imm,
+ (__v32hi)
+ _mm512_setzero_hi (),
+ (__mmask32) __U);
+}
+
+extern __inline __m512i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_slli_epi16 (__m512i __A, const int __B)
+{
+ return (__m512i) __builtin_ia32_psllwi512_mask ((__v32hi) __A, __B,
+ (__v32hi)
+ _mm512_setzero_hi (),
+ (__mmask32) -1);
+}
+
+extern __inline __m512i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_mask_slli_epi16 (__m512i __W, __mmask32 __U, __m512i __A,
+ const int __B)
+{
+ return (__m512i) __builtin_ia32_psllwi512_mask ((__v32hi) __A, __B,
+ (__v32hi) __W,
+ (__mmask32) __U);
+}
+
+extern __inline __m512i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_maskz_slli_epi16 (__mmask32 __U, __m512i __A, const int __B)
+{
+ return (__m512i) __builtin_ia32_psllwi512_mask ((__v32hi) __A, __B,
+ (__v32hi)
+ _mm512_setzero_hi (),
+ (__mmask32) __U);
+}
+
+extern __inline __m512i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_shufflehi_epi16 (__m512i __A, const int __imm)
+{
+ return (__m512i) __builtin_ia32_pshufhw512_mask ((__v32hi) __A,
+ __imm,
+ (__v32hi)
+ _mm512_setzero_hi (),
+ (__mmask32) -1);
+}
+
+extern __inline __m512i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_mask_shufflehi_epi16 (__m512i __W, __mmask32 __U, __m512i __A,
+ const int __imm)
+{
+ return (__m512i) __builtin_ia32_pshufhw512_mask ((__v32hi) __A,
+ __imm,
+ (__v32hi) __W,
+ (__mmask32) __U);
+}
+
+extern __inline __m512i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_maskz_shufflehi_epi16 (__mmask32 __U, __m512i __A,
+ const int __imm)
+{
+ return (__m512i) __builtin_ia32_pshufhw512_mask ((__v32hi) __A,
+ __imm,
+ (__v32hi)
+ _mm512_setzero_hi (),
+ (__mmask32) __U);
+}
+
+extern __inline __m512i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_shufflelo_epi16 (__m512i __A, const int __imm)
+{
+ return (__m512i) __builtin_ia32_pshuflw512_mask ((__v32hi) __A,
+ __imm,
+ (__v32hi)
+ _mm512_setzero_hi (),
+ (__mmask32) -1);
+}
+
+extern __inline __m512i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_mask_shufflelo_epi16 (__m512i __W, __mmask32 __U, __m512i __A,
+ const int __imm)
+{
+ return (__m512i) __builtin_ia32_pshuflw512_mask ((__v32hi) __A,
+ __imm,
+ (__v32hi) __W,
+ (__mmask32) __U);
+}
+
+extern __inline __m512i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_maskz_shufflelo_epi16 (__mmask32 __U, __m512i __A,
+ const int __imm)
+{
+ return (__m512i) __builtin_ia32_pshuflw512_mask ((__v32hi) __A,
+ __imm,
+ (__v32hi)
+ _mm512_setzero_hi (),
+ (__mmask32) __U);
+}
+
+extern __inline __m512i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_srai_epi16 (__m512i __A, const int __imm)
+{
+ return (__m512i) __builtin_ia32_psrawi512_mask ((__v32hi) __A, __imm,
+ (__v32hi)
+ _mm512_setzero_hi (),
+ (__mmask32) -1);
+}
+
+extern __inline __m512i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_mask_srai_epi16 (__m512i __W, __mmask32 __U, __m512i __A,
+ const int __imm)
+{
+ return (__m512i) __builtin_ia32_psrawi512_mask ((__v32hi) __A, __imm,
+ (__v32hi) __W,
+ (__mmask32) __U);
+}
+
+extern __inline __m512i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_maskz_srai_epi16 (__mmask32 __U, __m512i __A, const int __imm)
+{
+ return (__m512i) __builtin_ia32_psrawi512_mask ((__v32hi) __A, __imm,
+ (__v32hi)
+ _mm512_setzero_hi (),
+ (__mmask32) __U);
+}
+
+extern __inline __m512i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_mask_blend_epi16 (__mmask32 __U, __m512i __A, __m512i __W)
+{
+ return (__m512i) __builtin_ia32_blendmw_512_mask ((__v32hi) __A,
+ (__v32hi) __W,
+ (__mmask32) __U);
+}
+
+extern __inline __m512i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_mask_blend_epi8 (__mmask64 __U, __m512i __A, __m512i __W)
+{
+ return (__m512i) __builtin_ia32_blendmb_512_mask ((__v64qi) __A,
+ (__v64qi) __W,
+ (__mmask64) __U);
+}
+
+extern __inline __mmask32
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_mask_cmp_epi16_mask (__mmask32 __U, __m512i __X, __m512i __Y,
+ const int __P)
+{
+ return (__mmask32) __builtin_ia32_cmpw512_mask ((__v32hi) __X,
+ (__v32hi) __Y, __P,
+ (__mmask32) __U);
+}
+
+extern __inline __mmask32
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_cmp_epi16_mask (__m512i __X, __m512i __Y, const int __P)
+{
+ return (__mmask32) __builtin_ia32_cmpw512_mask ((__v32hi) __X,
+ (__v32hi) __Y, __P,
+ (__mmask32) -1);
+}
+
+extern __inline __mmask64
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_mask_cmp_epi8_mask (__mmask32 __U, __m512i __X, __m512i __Y,
+ const int __P)
+{
+ return (__mmask64) __builtin_ia32_cmpb512_mask ((__v64qi) __X,
+ (__v64qi) __Y, __P,
+ (__mmask64) __U);
+}
+
+extern __inline __mmask64
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_cmp_epi8_mask (__m512i __X, __m512i __Y, const int __P)
+{
+ return (__mmask64) __builtin_ia32_cmpb512_mask ((__v64qi) __X,
+ (__v64qi) __Y, __P,
+ (__mmask64) -1);
+}
+
+extern __inline __mmask32
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_mask_cmp_epu16_mask (__mmask32 __U, __m512i __X, __m512i __Y,
+ const int __P)
+{
+ return (__mmask32) __builtin_ia32_ucmpw512_mask ((__v32hi) __X,
+ (__v32hi) __Y, __P,
+ (__mmask32) __U);
+}
+
+extern __inline __mmask32
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_cmp_epu16_mask (__m512i __X, __m512i __Y, const int __P)
+{
+ return (__mmask32) __builtin_ia32_ucmpw512_mask ((__v32hi) __X,
+ (__v32hi) __Y, __P,
+ (__mmask32) -1);
+}
+
+extern __inline __mmask64
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_mask_cmp_epu8_mask (__mmask32 __U, __m512i __X, __m512i __Y,
+ const int __P)
+{
+ return (__mmask64) __builtin_ia32_ucmpb512_mask ((__v64qi) __X,
+ (__v64qi) __Y, __P,
+ (__mmask64) __U);
+}
+
+extern __inline __mmask64
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_cmp_epu8_mask (__m512i __X, __m512i __Y, const int __P)
+{
+ return (__mmask64) __builtin_ia32_ucmpb512_mask ((__v64qi) __X,
+ (__v64qi) __Y, __P,
+ (__mmask64) -1);
+}
+
+extern __inline __m512i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_packs_epi32 (__m512i __A, __m512i __B)
+{
+ return (__m512i) __builtin_ia32_packssdw512_mask ((__v16si) __A,
+ (__v16si) __B,
+ (__v32hi)
+ _mm512_setzero_hi (),
+ (__mmask32) -1);
+}
+
+extern __inline __m512i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_maskz_packs_epi32 (__mmask32 __M, __m512i __A, __m512i __B)
+{
+ return (__m512i) __builtin_ia32_packssdw512_mask ((__v16si) __A,
+ (__v16si) __B,
+ (__v32hi)
+ _mm512_setzero_si512 (),
+ __M);
+}
+
+extern __inline __m512i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_mask_packs_epi32 (__m512i __W, __mmask32 __M, __m512i __A,
+ __m512i __B)
+{
+ return (__m512i) __builtin_ia32_packssdw512_mask ((__v16si) __A,
+ (__v16si) __B,
+ (__v32hi) __W,
+ __M);
+}
+
+extern __inline __m512i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_packus_epi32 (__m512i __A, __m512i __B)
+{
+ return (__m512i) __builtin_ia32_packusdw512_mask ((__v16si) __A,
+ (__v16si) __B,
+ (__v32hi)
+ _mm512_setzero_hi (),
+ (__mmask32) -1);
+}
+
+extern __inline __m512i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_maskz_packus_epi32 (__mmask32 __M, __m512i __A, __m512i __B)
+{
+ return (__m512i) __builtin_ia32_packusdw512_mask ((__v16si) __A,
+ (__v16si) __B,
+ (__v32hi)
+ _mm512_setzero_si512 (),
+ __M);
+}
+
+extern __inline __m512i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_mask_packus_epi32 (__m512i __W, __mmask32 __M, __m512i __A,
+ __m512i __B)
+{
+ return (__m512i) __builtin_ia32_packusdw512_mask ((__v16si) __A,
+ (__v16si) __B,
+ (__v32hi) __W,
+ __M);
+}
+
+extern __inline __m512i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_bslli_epi128 (__m512i __A, const int __N)
+{
+ return (__m512i) __builtin_ia32_pslldq512 (__A, __N * 8);
+}
+
+extern __inline __m512i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_bsrli_epi128 (__m512i __A, const int __N)
+{
+ return (__m512i) __builtin_ia32_psrldq512 (__A, __N * 8);
+}
+
+#else
+#define _mm512_alignr_epi8(X, Y, N) \
+ ((__m512i) __builtin_ia32_palignr512 ((__v8di)(__m512i)(X), \
+ (__v8di)(__m512i)(Y), \
+ (int)(N * 8)))
+
+#define _mm512_mask_alignr_epi8(W, U, X, Y, N) \
+ ((__m512i) __builtin_ia32_palignr512_mask ((__v8di)(__m512i)(X), \
+ (__v8di)(__m512i)(Y), (int)(N * 8), \
+ (__v8di)(__m512i)(X), (__mmask64)(U)))
+
+#define _mm512_maskz_alignr_epi8(U, X, Y, N) \
+ ((__m512i) __builtin_ia32_palignr512_mask ((__v8di)(__m512i)(X), \
+ (__v8di)(__m512i)(Y), (int)(N * 8), \
+ (__v8di)(__m512i)_mm512_setzero_si512 (), \
+ (__mmask64)(U)))
+
+#define _mm512_dbsad_epu8(X, Y, C) \
+ ((__m512i) __builtin_ia32_dbpsadbw512_mask ((__v64qi)(__m512i) (X), \
+ (__v64qi)(__m512i) (Y), (int) (C), \
+ (__v32hi)(__m512i)_mm512_setzero_si512 (),\
+ (__mmask32)-1))
+
+#define _mm512_mask_dbsad_epu8(W, U, X, Y, C) \
+ ((__m512i) __builtin_ia32_dbpsadbw512_mask ((__v64qi)(__m512i) (X), \
+ (__v64qi)(__m512i) (Y), (int) (C), \
+ (__v32hi)(__m512i)(W), \
+ (__mmask32)(U)))
+
+#define _mm512_maskz_dbsad_epu8(U, X, Y, C) \
+ ((__m512i) __builtin_ia32_dbpsadbw512_mask ((__v64qi)(__m512i) (X), \
+ (__v64qi)(__m512i) (Y), (int) (C), \
+ (__v32hi)(__m512i)_mm512_setzero_si512 (),\
+ (__mmask32)(U)))
+
+#define _mm512_srli_epi16(A, B) \
+ ((__m512i) __builtin_ia32_psrlwi512_mask ((__v32hi)(__m512i)(A), \
+ (int)(B), (__v32hi)_mm512_setzero_hi(), (__mmask32)-1))
+
+#define _mm512_mask_srli_epi16(W, U, A, B) \
+ ((__m512i) __builtin_ia32_psrlwi512_mask ((__v32hi)(__m512i)(A), \
+ (int)(B), (__v32hi)(__m512i)(W), (__mmask32)(U)))
+
+#define _mm512_maskz_srli_epi16(U, A, B) \
+ ((__m512i) __builtin_ia32_psrlwi512_mask ((__v32hi)(__m512i)(A), \
+ (int)(B), (__v32hi)_mm512_setzero_hi(), (__mmask32)(U)))
+
+#define _mm512_slli_epi16(X, C) \
+ ((__m512i)__builtin_ia32_psllwi512_mask ((__v32hi)(__m512i)(X), (int)(C),\
+ (__v32hi)(__m512i)_mm512_setzero_hi(),\
+ (__mmask32)-1))
+
+#define _mm512_mask_slli_epi16(W, U, X, C) \
+ ((__m512i)__builtin_ia32_psllwi512_mask ((__v32hi)(__m512i)(X), (int)(C),\
+ (__v32hi)(__m512i)(W),\
+ (__mmask32)(U)))
+
+#define _mm512_maskz_slli_epi16(U, X, C) \
+ ((__m512i)__builtin_ia32_psllwi512_mask ((__v32hi)(__m512i)(X), (int)(C),\
+ (__v32hi)(__m512i)_mm512_setzero_hi(),\
+ (__mmask32)(U)))
+
+#define _mm512_shufflehi_epi16(A, B) \
+ ((__m512i) __builtin_ia32_pshufhw512_mask ((__v32hi)(__m512i)(A), (int)(B), \
+ (__v32hi)(__m512i)_mm512_setzero_hi(), \
+ (__mmask32)-1))
+
+#define _mm512_mask_shufflehi_epi16(W, U, A, B) \
+ ((__m512i) __builtin_ia32_pshufhw512_mask ((__v32hi)(__m512i)(A), (int)(B), \
+ (__v32hi)(__m512i)(W), \
+ (__mmask32)(U)))
+
+#define _mm512_maskz_shufflehi_epi16(U, A, B) \
+ ((__m512i) __builtin_ia32_pshufhw512_mask ((__v32hi)(__m512i)(A), (int)(B), \
+ (__v32hi)(__m512i)_mm512_setzero_hi(), \
+ (__mmask32)(U)))
+
+#define _mm512_shufflelo_epi16(A, B) \
+ ((__m512i) __builtin_ia32_pshuflw512_mask ((__v32hi)(__m512i)(A), (int)(B), \
+ (__v32hi)(__m512i)_mm512_setzero_hi(), \
+ (__mmask32)-1))
+
+#define _mm512_mask_shufflelo_epi16(W, U, A, B) \
+ ((__m512i) __builtin_ia32_pshuflw512_mask ((__v32hi)(__m512i)(A), (int)(B), \
+ (__v32hi)(__m512i)(W), \
+ (__mmask32)(U)))
+
+#define _mm512_maskz_shufflelo_epi16(U, A, B) \
+ ((__m512i) __builtin_ia32_pshuflw512_mask ((__v32hi)(__m512i)(A), (int)(B), \
+ (__v32hi)(__m512i)_mm512_setzero_hi(), \
+ (__mmask32)(U)))
+
+#define _mm512_srai_epi16(A, B) \
+ ((__m512i) __builtin_ia32_psrawi512_mask ((__v32hi)(__m512i)(A), \
+ (int)(B), (__v32hi)_mm512_setzero_hi(), (__mmask32)-1))
+
+#define _mm512_mask_srai_epi16(W, U, A, B) \
+ ((__m512i) __builtin_ia32_psrawi512_mask ((__v32hi)(__m512i)(A), \
+ (int)(B), (__v32hi)(__m512i)(W), (__mmask32)(U)))
+
+#define _mm512_maskz_srai_epi16(U, A, B) \
+ ((__m512i) __builtin_ia32_psrawi512_mask ((__v32hi)(__m512i)(A), \
+ (int)(B), (__v32hi)_mm512_setzero_hi(), (__mmask32)(U)))
+
+#define _mm512_mask_blend_epi16(__U, __A, __W) \
+ ((__m512i) __builtin_ia32_blendmw_512_mask ((__v32hi) (__A), \
+ (__v32hi) (__W), \
+ (__mmask32) (__U)))
+
+#define _mm512_mask_blend_epi8(__U, __A, __W) \
+ ((__m512i) __builtin_ia32_blendmb_512_mask ((__v64qi) (__A), \
+ (__v64qi) (__W), \
+ (__mmask64) (__U)))
+
+#define _mm512_cmp_epi16_mask(X, Y, P) \
+ ((__mmask32) __builtin_ia32_cmpw512_mask ((__v32hi)(__m512i)(X), \
+ (__v32hi)(__m512i)(Y), (int)(P),\
+ (__mmask32)(-1)))
+
+#define _mm512_cmp_epi8_mask(X, Y, P) \
+ ((__mmask64) __builtin_ia32_cmpb512_mask ((__v64qi)(__m512i)(X), \
+ (__v64qi)(__m512i)(Y), (int)(P),\
+ (__mmask64)(-1)))
+
+#define _mm512_cmp_epu16_mask(X, Y, P) \
+ ((__mmask32) __builtin_ia32_ucmpw512_mask ((__v32hi)(__m512i)(X), \
+ (__v32hi)(__m512i)(Y), (int)(P),\
+ (__mmask32)(-1)))
+
+#define _mm512_cmp_epu8_mask(X, Y, P) \
+ ((__mmask64) __builtin_ia32_ucmpb512_mask ((__v64qi)(__m512i)(X), \
+ (__v64qi)(__m512i)(Y), (int)(P),\
+ (__mmask64)(-1)))
+
+#define _mm512_mask_cmp_epi16_mask(M, X, Y, P) \
+ ((__mmask32) __builtin_ia32_cmpw512_mask ((__v32hi)(__m512i)(X), \
+ (__v32hi)(__m512i)(Y), (int)(P),\
+ (__mmask32)(M)))
+
+#define _mm512_mask_cmp_epi8_mask(M, X, Y, P) \
+ ((__mmask64) __builtin_ia32_cmpb512_mask ((__v64qi)(__m512i)(X), \
+ (__v64qi)(__m512i)(Y), (int)(P),\
+ (__mmask64)(M)))
+
+#define _mm512_mask_cmp_epu16_mask(M, X, Y, P) \
+ ((__mmask32) __builtin_ia32_ucmpw512_mask ((__v32hi)(__m512i)(X), \
+ (__v32hi)(__m512i)(Y), (int)(P),\
+ (__mmask32)(M)))
+
+#define _mm512_mask_cmp_epu8_mask(M, X, Y, P) \
+ ((__mmask64) __builtin_ia32_ucmpb512_mask ((__v64qi)(__m512i)(X), \
+ (__v64qi)(__m512i)(Y), (int)(P),\
+ (__mmask64)(M)))
+
+#define _mm512_bslli_epi128(A, N) \
+ ((__m512i)__builtin_ia32_pslldq512 ((__m512i)(A), (int)(N) * 8))
+
+#define _mm512_bsrli_epi128(A, N) \
+ ((__m512i)__builtin_ia32_psrldq512 ((__m512i)(A), (int)(N) * 8))
+
+#endif
+
+#ifdef __DISABLE_AVX512BW__
+#undef __DISABLE_AVX512BW__
+#pragma GCC pop_options
+#endif /* __DISABLE_AVX512BW__ */
+
+#endif /* _AVX512BWINTRIN_H_INCLUDED */
diff --git a/gcc/config/i386/avx512dqintrin.h b/gcc/config/i386/avx512dqintrin.h
new file mode 100644
index 00000000000..41e8345aacf
--- /dev/null
+++ b/gcc/config/i386/avx512dqintrin.h
@@ -0,0 +1,2308 @@
+/* Copyright (C) 2013
+ Free Software Foundation, Inc.
+
+ This file is part of GCC.
+
+ GCC is free software; you can redistribute it and/or modify
+ it under the terms of the GNU General Public License as published by
+ the Free Software Foundation; either version 3, or (at your option)
+ any later version.
+
+ GCC is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ GNU General Public License for more details.
+
+ Under Section 7 of GPL version 3, you are granted additional
+ permissions described in the GCC Runtime Library Exception, version
+ 3.1, as published by the Free Software Foundation.
+
+ You should have received a copy of the GNU General Public License and
+ a copy of the GCC Runtime Library Exception along with this program;
+ see the files COPYING3 and COPYING.RUNTIME respectively. If not, see
+ <http://www.gnu.org/licenses/>. */
+
+#ifndef _IMMINTRIN_H_INCLUDED
+#error "Never use <avx512dqintrin.h> directly; include <immintrin.h> instead."
+#endif
+
+#ifndef _AVX512DQINTRIN_H_INCLUDED
+#define _AVX512DQINTRIN_H_INCLUDED
+
+#ifndef __AVX512DQ__
+#pragma GCC push_options
+#pragma GCC target("avx512dq")
+#define __DISABLE_AVX512DQ__
+#endif /* __AVX512DQ__ */
+
+extern __inline __m512d
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_broadcast_f64x2 (__m128d __A)
+{
+ __v8df __O;
+ return (__m512d) __builtin_ia32_broadcastf64x2_512_mask ((__v2df)
+ __A, __O,
+ (__mmask8) -
+ 1);
+}
+
+extern __inline __m512d
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_mask_broadcast_f64x2 (__m512d __O, __mmask8 __M, __m128d __A)
+{
+ return (__m512d) __builtin_ia32_broadcastf64x2_512_mask ((__v2df)
+ __A,
+ (__v8df)
+ __O, __M);
+}
+
+extern __inline __m512d
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_maskz_broadcast_f64x2 (__mmask8 __M, __m128d __A)
+{
+ return (__m512d) __builtin_ia32_broadcastf64x2_512_mask ((__v2df)
+ __A,
+ (__v8df)
+ _mm512_setzero_ps (),
+ __M);
+}
+
+extern __inline __m512i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_broadcast_i64x2 (__m128i __A)
+{
+ __v8di __O;
+ return (__m512i) __builtin_ia32_broadcasti64x2_512_mask ((__v2di)
+ __A, __O,
+ (__mmask8) -
+ 1);
+}
+
+extern __inline __m512i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_mask_broadcast_i64x2 (__m512i __O, __mmask8 __M, __m128i __A)
+{
+ return (__m512i) __builtin_ia32_broadcasti64x2_512_mask ((__v2di)
+ __A,
+ (__v8di)
+ __O, __M);
+}
+
+extern __inline __m512i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_maskz_broadcast_i64x2 (__mmask8 __M, __m128i __A)
+{
+ return (__m512i) __builtin_ia32_broadcasti64x2_512_mask ((__v2di)
+ __A,
+ (__v8di)
+ _mm512_setzero_si512 (),
+ __M);
+}
+
+extern __inline __m512
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_broadcast_f32x2 (__m128 __A)
+{
+ __v16sf __O;
+ return (__m512) __builtin_ia32_broadcastf32x2_512_mask ((__v4sf) __A,
+ __O,
+ (__mmask16) -
+ 1);
+}
+
+extern __inline __m512
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_mask_broadcast_f32x2 (__m512 __O, __mmask16 __M, __m128 __A)
+{
+ return (__m512) __builtin_ia32_broadcastf32x2_512_mask ((__v4sf) __A,
+ (__v16sf)
+ __O, __M);
+}
+
+extern __inline __m512
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_maskz_broadcast_f32x2 (__mmask16 __M, __m128 __A)
+{
+ return (__m512) __builtin_ia32_broadcastf32x2_512_mask ((__v4sf) __A,
+ (__v16sf)
+ _mm512_setzero_ps (),
+ __M);
+}
+
+extern __inline __m512i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_broadcast_i32x2 (__m128i __A)
+{
+ __v16si __O;
+ return (__m512i) __builtin_ia32_broadcasti32x2_512_mask ((__v4si)
+ __A, __O,
+ (__mmask16)
+ -1);
+}
+
+extern __inline __m512i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_mask_broadcast_i32x2 (__m512i __O, __mmask16 __M, __m128i __A)
+{
+ return (__m512i) __builtin_ia32_broadcasti32x2_512_mask ((__v4si)
+ __A,
+ (__v16si)
+ __O, __M);
+}
+
+extern __inline __m512i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_maskz_broadcast_i32x2 (__mmask16 __M, __m128i __A)
+{
+ return (__m512i) __builtin_ia32_broadcasti32x2_512_mask ((__v4si)
+ __A,
+ (__v16si)
+ _mm512_setzero_si512 (),
+ __M);
+}
+
+extern __inline __m512
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_broadcast_f32x8 (__m256 __A)
+{
+ __v16sf __O;
+ return (__m512) __builtin_ia32_broadcastf32x8_512_mask ((__v8sf) __A,
+ __O,
+ (__mmask16) -
+ 1);
+}
+
+extern __inline __m512
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_mask_broadcast_f32x8 (__m512 __O, __mmask16 __M, __m256 __A)
+{
+ return (__m512) __builtin_ia32_broadcastf32x8_512_mask ((__v8sf) __A,
+ (__v16sf)
+ __O, __M);
+}
+
+extern __inline __m512
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_maskz_broadcast_f32x8 (__mmask16 __M, __m256 __A)
+{
+ return (__m512) __builtin_ia32_broadcastf32x8_512_mask ((__v8sf) __A,
+ (__v16sf)
+ _mm512_setzero_ps (),
+ __M);
+}
+
+extern __inline __m512i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_broadcast_i32x8 (__m256i __A)
+{
+ __v16si __O;
+ return (__m512i) __builtin_ia32_broadcasti32x8_512_mask ((__v8si)
+ __A, __O,
+ (__mmask16)
+ -1);
+}
+
+extern __inline __m512i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_mask_broadcast_i32x8 (__m512i __O, __mmask16 __M, __m256i __A)
+{
+ return (__m512i) __builtin_ia32_broadcasti32x8_512_mask ((__v8si)
+ __A,
+ (__v16si)
+ __O, __M);
+}
+
+extern __inline __m512i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_maskz_broadcast_i32x8 (__mmask16 __M, __m256i __A)
+{
+ return (__m512i) __builtin_ia32_broadcasti32x8_512_mask ((__v8si)
+ __A,
+ (__v16si)
+ _mm512_setzero_si512 (),
+ __M);
+}
+
+extern __inline __m512i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_mullo_epi64 (__m512i __A, __m512i __B)
+{
+ return (__m512i) __builtin_ia32_pmullq512_mask ((__v8di) __A,
+ (__v8di) __B,
+ (__v8di)
+ _mm512_setzero_si512 (),
+ (__mmask8) -1);
+}
+
+extern __inline __m512i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_mask_mullo_epi64 (__m512i __W, __mmask8 __U, __m512i __A,
+ __m512i __B)
+{
+ return (__m512i) __builtin_ia32_pmullq512_mask ((__v8di) __A,
+ (__v8di) __B,
+ (__v8di) __W,
+ (__mmask8) __U);
+}
+
+extern __inline __m512i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_maskz_mullo_epi64 (__mmask8 __U, __m512i __A, __m512i __B)
+{
+ return (__m512i) __builtin_ia32_pmullq512_mask ((__v8di) __A,
+ (__v8di) __B,
+ (__v8di)
+ _mm512_setzero_si512 (),
+ (__mmask8) __U);
+}
+
+extern __inline __m512d
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_xor_pd (__m512d __A, __m512d __B)
+{
+ return (__m512d) __builtin_ia32_xorpd512_mask ((__v8df) __A,
+ (__v8df) __B,
+ (__v8df)
+ _mm512_setzero_pd (),
+ (__mmask8) -1);
+}
+
+extern __inline __m512d
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_mask_xor_pd (__m512d __W, __mmask8 __U, __m512d __A,
+ __m512d __B)
+{
+ return (__m512d) __builtin_ia32_xorpd512_mask ((__v8df) __A,
+ (__v8df) __B,
+ (__v8df) __W,
+ (__mmask8) __U);
+}
+
+extern __inline __m512d
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_maskz_xor_pd (__mmask8 __U, __m512d __A, __m512d __B)
+{
+ return (__m512d) __builtin_ia32_xorpd512_mask ((__v8df) __A,
+ (__v8df) __B,
+ (__v8df)
+ _mm512_setzero_pd (),
+ (__mmask8) __U);
+}
+
+extern __inline __m512
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_xor_ps (__m512 __A, __m512 __B)
+{
+ return (__m512) __builtin_ia32_xorps512_mask ((__v16sf) __A,
+ (__v16sf) __B,
+ (__v16sf)
+ _mm512_setzero_ps (),
+ (__mmask16) -1);
+}
+
+extern __inline __m512
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_mask_xor_ps (__m512 __W, __mmask16 __U, __m512 __A, __m512 __B)
+{
+ return (__m512) __builtin_ia32_xorps512_mask ((__v16sf) __A,
+ (__v16sf) __B,
+ (__v16sf) __W,
+ (__mmask16) __U);
+}
+
+extern __inline __m512
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_maskz_xor_ps (__mmask16 __U, __m512 __A, __m512 __B)
+{
+ return (__m512) __builtin_ia32_xorps512_mask ((__v16sf) __A,
+ (__v16sf) __B,
+ (__v16sf)
+ _mm512_setzero_ps (),
+ (__mmask16) __U);
+}
+
+extern __inline __m512d
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_or_pd (__m512d __A, __m512d __B)
+{
+ return (__m512d) __builtin_ia32_orpd512_mask ((__v8df) __A,
+ (__v8df) __B,
+ (__v8df)
+ _mm512_setzero_pd (),
+ (__mmask8) -1);
+}
+
+extern __inline __m512d
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_mask_or_pd (__m512d __W, __mmask8 __U, __m512d __A, __m512d __B)
+{
+ return (__m512d) __builtin_ia32_orpd512_mask ((__v8df) __A,
+ (__v8df) __B,
+ (__v8df) __W,
+ (__mmask8) __U);
+}
+
+extern __inline __m512d
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_maskz_or_pd (__mmask8 __U, __m512d __A, __m512d __B)
+{
+ return (__m512d) __builtin_ia32_orpd512_mask ((__v8df) __A,
+ (__v8df) __B,
+ (__v8df)
+ _mm512_setzero_pd (),
+ (__mmask8) __U);
+}
+
+extern __inline __m512
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_or_ps (__m512 __A, __m512 __B)
+{
+ return (__m512) __builtin_ia32_orps512_mask ((__v16sf) __A,
+ (__v16sf) __B,
+ (__v16sf)
+ _mm512_setzero_ps (),
+ (__mmask16) -1);
+}
+
+extern __inline __m512
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_mask_or_ps (__m512 __W, __mmask16 __U, __m512 __A, __m512 __B)
+{
+ return (__m512) __builtin_ia32_orps512_mask ((__v16sf) __A,
+ (__v16sf) __B,
+ (__v16sf) __W,
+ (__mmask16) __U);
+}
+
+extern __inline __m512
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_maskz_or_ps (__mmask16 __U, __m512 __A, __m512 __B)
+{
+ return (__m512) __builtin_ia32_orps512_mask ((__v16sf) __A,
+ (__v16sf) __B,
+ (__v16sf)
+ _mm512_setzero_ps (),
+ (__mmask16) __U);
+}
+
+extern __inline __m512d
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_and_pd (__m512d __A, __m512d __B)
+{
+ return (__m512d) __builtin_ia32_andpd512_mask ((__v8df) __A,
+ (__v8df) __B,
+ (__v8df)
+ _mm512_setzero_pd (),
+ (__mmask8) -1);
+}
+
+extern __inline __m512d
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_mask_and_pd (__m512d __W, __mmask8 __U, __m512d __A,
+ __m512d __B)
+{
+ return (__m512d) __builtin_ia32_andpd512_mask ((__v8df) __A,
+ (__v8df) __B,
+ (__v8df) __W,
+ (__mmask8) __U);
+}
+
+extern __inline __m512d
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_maskz_and_pd (__mmask8 __U, __m512d __A, __m512d __B)
+{
+ return (__m512d) __builtin_ia32_andpd512_mask ((__v8df) __A,
+ (__v8df) __B,
+ (__v8df)
+ _mm512_setzero_pd (),
+ (__mmask8) __U);
+}
+
+extern __inline __m512
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_and_ps (__m512 __A, __m512 __B)
+{
+ return (__m512) __builtin_ia32_andps512_mask ((__v16sf) __A,
+ (__v16sf) __B,
+ (__v16sf)
+ _mm512_setzero_ps (),
+ (__mmask16) -1);
+}
+
+extern __inline __m512
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_mask_and_ps (__m512 __W, __mmask16 __U, __m512 __A, __m512 __B)
+{
+ return (__m512) __builtin_ia32_andps512_mask ((__v16sf) __A,
+ (__v16sf) __B,
+ (__v16sf) __W,
+ (__mmask16) __U);
+}
+
+extern __inline __m512
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_maskz_and_ps (__mmask16 __U, __m512 __A, __m512 __B)
+{
+ return (__m512) __builtin_ia32_andps512_mask ((__v16sf) __A,
+ (__v16sf) __B,
+ (__v16sf)
+ _mm512_setzero_ps (),
+ (__mmask16) __U);
+}
+
+extern __inline __m512d
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_andnot_pd (__m512d __A, __m512d __B)
+{
+ return (__m512d) __builtin_ia32_andnpd512_mask ((__v8df) __A,
+ (__v8df) __B,
+ (__v8df)
+ _mm512_setzero_pd (),
+ (__mmask8) -1);
+}
+
+extern __inline __m512d
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_mask_andnot_pd (__m512d __W, __mmask8 __U, __m512d __A,
+ __m512d __B)
+{
+ return (__m512d) __builtin_ia32_andnpd512_mask ((__v8df) __A,
+ (__v8df) __B,
+ (__v8df) __W,
+ (__mmask8) __U);
+}
+
+extern __inline __m512d
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_maskz_andnot_pd (__mmask8 __U, __m512d __A, __m512d __B)
+{
+ return (__m512d) __builtin_ia32_andnpd512_mask ((__v8df) __A,
+ (__v8df) __B,
+ (__v8df)
+ _mm512_setzero_pd (),
+ (__mmask8) __U);
+}
+
+extern __inline __m512
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_andnot_ps (__m512 __A, __m512 __B)
+{
+ return (__m512) __builtin_ia32_andnps512_mask ((__v16sf) __A,
+ (__v16sf) __B,
+ (__v16sf)
+ _mm512_setzero_ps (),
+ (__mmask16) -1);
+}
+
+extern __inline __m512
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_mask_andnot_ps (__m512 __W, __mmask16 __U, __m512 __A,
+ __m512 __B)
+{
+ return (__m512) __builtin_ia32_andnps512_mask ((__v16sf) __A,
+ (__v16sf) __B,
+ (__v16sf) __W,
+ (__mmask16) __U);
+}
+
+extern __inline __m512
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_maskz_andnot_ps (__mmask16 __U, __m512 __A, __m512 __B)
+{
+ return (__m512) __builtin_ia32_andnps512_mask ((__v16sf) __A,
+ (__v16sf) __B,
+ (__v16sf)
+ _mm512_setzero_ps (),
+ (__mmask16) __U);
+}
+
+extern __inline __mmask16
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_movepi32_mask (__m512i __A)
+{
+ return (__mmask16) __builtin_ia32_cvtd2mask512 ((__v16si) __A);
+}
+
+extern __inline __mmask8
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_movepi64_mask (__m512i __A)
+{
+ return (__mmask8) __builtin_ia32_cvtq2mask512 ((__v8di) __A);
+}
+
+extern __inline __m512i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_movm_epi32 (__mmask16 __A)
+{
+ return (__m512i) __builtin_ia32_cvtmask2d512 (__A);
+}
+
+extern __inline __m512i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_movm_epi64 (__mmask8 __A)
+{
+ return (__m512i) __builtin_ia32_cvtmask2q512 (__A);
+}
+
+extern __inline __m512i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_cvttpd_epi64 (__m512d __A)
+{
+ return (__m512i) __builtin_ia32_cvttpd2qq512_mask ((__v8df) __A,
+ (__v8di)
+ _mm512_setzero_si512 (),
+ (__mmask8) -1,
+ _MM_FROUND_CUR_DIRECTION);
+}
+
+extern __inline __m512i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_mask_cvttpd_epi64 (__m512i __W, __mmask8 __U, __m512d __A)
+{
+ return (__m512i) __builtin_ia32_cvttpd2qq512_mask ((__v8df) __A,
+ (__v8di) __W,
+ (__mmask8) __U,
+ _MM_FROUND_CUR_DIRECTION);
+}
+
+extern __inline __m512i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_maskz_cvttpd_epi64 (__mmask8 __U, __m512d __A)
+{
+ return (__m512i) __builtin_ia32_cvttpd2qq512_mask ((__v8df) __A,
+ (__v8di)
+ _mm512_setzero_si512 (),
+ (__mmask8) __U,
+ _MM_FROUND_CUR_DIRECTION);
+}
+
+extern __inline __m512i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_cvttpd_epu64 (__m512d __A)
+{
+ return (__m512i) __builtin_ia32_cvttpd2uqq512_mask ((__v8df) __A,
+ (__v8di)
+ _mm512_setzero_si512 (),
+ (__mmask8) -1,
+ _MM_FROUND_CUR_DIRECTION);
+}
+
+extern __inline __m512i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_mask_cvttpd_epu64 (__m512i __W, __mmask8 __U, __m512d __A)
+{
+ return (__m512i) __builtin_ia32_cvttpd2uqq512_mask ((__v8df) __A,
+ (__v8di) __W,
+ (__mmask8) __U,
+ _MM_FROUND_CUR_DIRECTION);
+}
+
+extern __inline __m512i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_maskz_cvttpd_epu64 (__mmask8 __U, __m512d __A)
+{
+ return (__m512i) __builtin_ia32_cvttpd2uqq512_mask ((__v8df) __A,
+ (__v8di)
+ _mm512_setzero_si512 (),
+ (__mmask8) __U,
+ _MM_FROUND_CUR_DIRECTION);
+}
+
+extern __inline __m512i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_cvttps_epi64 (__m256 __A)
+{
+ return (__m512i) __builtin_ia32_cvttps2qq512_mask ((__v8sf) __A,
+ (__v8di)
+ _mm512_setzero_si512 (),
+ (__mmask8) -1,
+ _MM_FROUND_CUR_DIRECTION);
+}
+
+extern __inline __m512i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_mask_cvttps_epi64 (__m512i __W, __mmask8 __U, __m256 __A)
+{
+ return (__m512i) __builtin_ia32_cvttps2qq512_mask ((__v8sf) __A,
+ (__v8di) __W,
+ (__mmask8) __U,
+ _MM_FROUND_CUR_DIRECTION);
+}
+
+extern __inline __m512i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_maskz_cvttps_epi64 (__mmask8 __U, __m256 __A)
+{
+ return (__m512i) __builtin_ia32_cvttps2qq512_mask ((__v8sf) __A,
+ (__v8di)
+ _mm512_setzero_si512 (),
+ (__mmask8) __U,
+ _MM_FROUND_CUR_DIRECTION);
+}
+
+extern __inline __m512i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_cvttps_epu64 (__m256 __A)
+{
+ return (__m512i) __builtin_ia32_cvttps2uqq512_mask ((__v8sf) __A,
+ (__v8di)
+ _mm512_setzero_si512 (),
+ (__mmask8) -1,
+ _MM_FROUND_CUR_DIRECTION);
+}
+
+extern __inline __m512i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_mask_cvttps_epu64 (__m512i __W, __mmask8 __U, __m256 __A)
+{
+ return (__m512i) __builtin_ia32_cvttps2uqq512_mask ((__v8sf) __A,
+ (__v8di) __W,
+ (__mmask8) __U,
+ _MM_FROUND_CUR_DIRECTION);
+}
+
+extern __inline __m512i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_maskz_cvttps_epu64 (__mmask8 __U, __m256 __A)
+{
+ return (__m512i) __builtin_ia32_cvttps2uqq512_mask ((__v8sf) __A,
+ (__v8di)
+ _mm512_setzero_si512 (),
+ (__mmask8) __U,
+ _MM_FROUND_CUR_DIRECTION);
+}
+
+extern __inline __m512i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_cvtpd_epi64 (__m512d __A)
+{
+ return (__m512i) __builtin_ia32_cvtpd2qq512_mask ((__v8df) __A,
+ (__v8di)
+ _mm512_setzero_si512 (),
+ (__mmask8) -1,
+ _MM_FROUND_CUR_DIRECTION);
+}
+
+extern __inline __m512i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_mask_cvtpd_epi64 (__m512i __W, __mmask8 __U, __m512d __A)
+{
+ return (__m512i) __builtin_ia32_cvtpd2qq512_mask ((__v8df) __A,
+ (__v8di) __W,
+ (__mmask8) __U,
+ _MM_FROUND_CUR_DIRECTION);
+}
+
+extern __inline __m512i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_maskz_cvtpd_epi64 (__mmask8 __U, __m512d __A)
+{
+ return (__m512i) __builtin_ia32_cvtpd2qq512_mask ((__v8df) __A,
+ (__v8di)
+ _mm512_setzero_si512 (),
+ (__mmask8) __U,
+ _MM_FROUND_CUR_DIRECTION);
+}
+
+extern __inline __m512i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_cvtpd_epu64 (__m512d __A)
+{
+ return (__m512i) __builtin_ia32_cvtpd2uqq512_mask ((__v8df) __A,
+ (__v8di)
+ _mm512_setzero_si512 (),
+ (__mmask8) -1,
+ _MM_FROUND_CUR_DIRECTION);
+}
+
+extern __inline __m512i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_mask_cvtpd_epu64 (__m512i __W, __mmask8 __U, __m512d __A)
+{
+ return (__m512i) __builtin_ia32_cvtpd2uqq512_mask ((__v8df) __A,
+ (__v8di) __W,
+ (__mmask8) __U,
+ _MM_FROUND_CUR_DIRECTION);
+}
+
+extern __inline __m512i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_maskz_cvtpd_epu64 (__mmask8 __U, __m512d __A)
+{
+ return (__m512i) __builtin_ia32_cvtpd2uqq512_mask ((__v8df) __A,
+ (__v8di)
+ _mm512_setzero_si512 (),
+ (__mmask8) __U,
+ _MM_FROUND_CUR_DIRECTION);
+}
+
+extern __inline __m512i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_cvtps_epi64 (__m256 __A)
+{
+ return (__m512i) __builtin_ia32_cvtps2qq512_mask ((__v8sf) __A,
+ (__v8di)
+ _mm512_setzero_si512 (),
+ (__mmask8) -1,
+ _MM_FROUND_CUR_DIRECTION);
+}
+
+extern __inline __m512i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_mask_cvtps_epi64 (__m512i __W, __mmask8 __U, __m256 __A)
+{
+ return (__m512i) __builtin_ia32_cvtps2qq512_mask ((__v8sf) __A,
+ (__v8di) __W,
+ (__mmask8) __U,
+ _MM_FROUND_CUR_DIRECTION);
+}
+
+extern __inline __m512i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_maskz_cvtps_epi64 (__mmask8 __U, __m256 __A)
+{
+ return (__m512i) __builtin_ia32_cvtps2qq512_mask ((__v8sf) __A,
+ (__v8di)
+ _mm512_setzero_si512 (),
+ (__mmask8) __U,
+ _MM_FROUND_CUR_DIRECTION);
+}
+
+extern __inline __m512i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_cvtps_epu64 (__m256 __A)
+{
+ return (__m512i) __builtin_ia32_cvtps2uqq512_mask ((__v8sf) __A,
+ (__v8di)
+ _mm512_setzero_si512 (),
+ (__mmask8) -1,
+ _MM_FROUND_CUR_DIRECTION);
+}
+
+extern __inline __m512i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_mask_cvtps_epu64 (__m512i __W, __mmask8 __U, __m256 __A)
+{
+ return (__m512i) __builtin_ia32_cvtps2uqq512_mask ((__v8sf) __A,
+ (__v8di) __W,
+ (__mmask8) __U,
+ _MM_FROUND_CUR_DIRECTION);
+}
+
+extern __inline __m512i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_maskz_cvtps_epu64 (__mmask8 __U, __m256 __A)
+{
+ return (__m512i) __builtin_ia32_cvtps2uqq512_mask ((__v8sf) __A,
+ (__v8di)
+ _mm512_setzero_si512 (),
+ (__mmask8) __U,
+ _MM_FROUND_CUR_DIRECTION);
+}
+
+extern __inline __m256
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_cvtepi64_ps (__m512i __A)
+{
+ return (__m256) __builtin_ia32_cvtqq2ps512_mask ((__v8di) __A,
+ (__v8sf)
+ _mm256_setzero_ps (),
+ (__mmask8) -1,
+ _MM_FROUND_CUR_DIRECTION);
+}
+
+extern __inline __m256
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_mask_cvtepi64_ps (__m256 __W, __mmask8 __U, __m512i __A)
+{
+ return (__m256) __builtin_ia32_cvtqq2ps512_mask ((__v8di) __A,
+ (__v8sf) __W,
+ (__mmask8) __U,
+ _MM_FROUND_CUR_DIRECTION);
+}
+
+extern __inline __m256
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_maskz_cvtepi64_ps (__mmask8 __U, __m512i __A)
+{
+ return (__m256) __builtin_ia32_cvtqq2ps512_mask ((__v8di) __A,
+ (__v8sf)
+ _mm256_setzero_ps (),
+ (__mmask8) __U,
+ _MM_FROUND_CUR_DIRECTION);
+}
+
+extern __inline __m256
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_cvtepu64_ps (__m512i __A)
+{
+ return (__m256) __builtin_ia32_cvtuqq2ps512_mask ((__v8di) __A,
+ (__v8sf)
+ _mm256_setzero_ps (),
+ (__mmask8) -1,
+ _MM_FROUND_CUR_DIRECTION);
+}
+
+extern __inline __m256
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_mask_cvtepu64_ps (__m256 __W, __mmask8 __U, __m512i __A)
+{
+ return (__m256) __builtin_ia32_cvtuqq2ps512_mask ((__v8di) __A,
+ (__v8sf) __W,
+ (__mmask8) __U,
+ _MM_FROUND_CUR_DIRECTION);
+}
+
+extern __inline __m256
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_maskz_cvtepu64_ps (__mmask8 __U, __m512i __A)
+{
+ return (__m256) __builtin_ia32_cvtuqq2ps512_mask ((__v8di) __A,
+ (__v8sf)
+ _mm256_setzero_ps (),
+ (__mmask8) __U,
+ _MM_FROUND_CUR_DIRECTION);
+}
+
+extern __inline __m512d
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_cvtepi64_pd (__m512i __A)
+{
+ return (__m512d) __builtin_ia32_cvtqq2pd512_mask ((__v8di) __A,
+ (__v8df)
+ _mm512_setzero_pd (),
+ (__mmask8) -1,
+ _MM_FROUND_CUR_DIRECTION);
+}
+
+extern __inline __m512d
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_mask_cvtepi64_pd (__m512d __W, __mmask8 __U, __m512i __A)
+{
+ return (__m512d) __builtin_ia32_cvtqq2pd512_mask ((__v8di) __A,
+ (__v8df) __W,
+ (__mmask8) __U,
+ _MM_FROUND_CUR_DIRECTION);
+}
+
+extern __inline __m512d
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_maskz_cvtepi64_pd (__mmask8 __U, __m512i __A)
+{
+ return (__m512d) __builtin_ia32_cvtqq2pd512_mask ((__v8di) __A,
+ (__v8df)
+ _mm512_setzero_pd (),
+ (__mmask8) __U,
+ _MM_FROUND_CUR_DIRECTION);
+}
+
+extern __inline __m512d
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_cvtepu64_pd (__m512i __A)
+{
+ return (__m512d) __builtin_ia32_cvtuqq2pd512_mask ((__v8di) __A,
+ (__v8df)
+ _mm512_setzero_pd (),
+ (__mmask8) -1,
+ _MM_FROUND_CUR_DIRECTION);
+}
+
+extern __inline __m512d
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_mask_cvtepu64_pd (__m512d __W, __mmask8 __U, __m512i __A)
+{
+ return (__m512d) __builtin_ia32_cvtuqq2pd512_mask ((__v8di) __A,
+ (__v8df) __W,
+ (__mmask8) __U,
+ _MM_FROUND_CUR_DIRECTION);
+}
+
+extern __inline __m512d
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_maskz_cvtepu64_pd (__mmask8 __U, __m512i __A)
+{
+ return (__m512d) __builtin_ia32_cvtuqq2pd512_mask ((__v8di) __A,
+ (__v8df)
+ _mm512_setzero_pd (),
+ (__mmask8) __U,
+ _MM_FROUND_CUR_DIRECTION);
+}
+
+#ifdef __OPTIMIZE__
+extern __inline __m512d
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_range_pd (__m512d __A, __m512d __B, int __C)
+{
+ return (__m512d) __builtin_ia32_rangepd512_mask ((__v8df) __A,
+ (__v8df) __B, __C,
+ (__v8df)
+ _mm512_setzero_pd (),
+ (__mmask8) -1,
+ _MM_FROUND_CUR_DIRECTION);
+}
+
+extern __inline __m512d
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_mask_range_pd (__m512d __W, __mmask8 __U,
+ __m512d __A, __m512d __B, int __C)
+{
+ return (__m512d) __builtin_ia32_rangepd512_mask ((__v8df) __A,
+ (__v8df) __B, __C,
+ (__v8df) __W,
+ (__mmask8) __U,
+ _MM_FROUND_CUR_DIRECTION);
+}
+
+extern __inline __m512d
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_maskz_range_pd (__mmask8 __U, __m512d __A, __m512d __B, int __C)
+{
+ return (__m512d) __builtin_ia32_rangepd512_mask ((__v8df) __A,
+ (__v8df) __B, __C,
+ (__v8df)
+ _mm512_setzero_pd (),
+ (__mmask8) __U,
+ _MM_FROUND_CUR_DIRECTION);
+}
+
+extern __inline __m512
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_range_ps (__m512 __A, __m512 __B, int __C)
+{
+ return (__m512) __builtin_ia32_rangeps512_mask ((__v16sf) __A,
+ (__v16sf) __B, __C,
+ (__v16sf)
+ _mm512_setzero_ps (),
+ (__mmask16) -1,
+ _MM_FROUND_CUR_DIRECTION);
+}
+
+extern __inline __m512
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_mask_range_ps (__m512 __W, __mmask16 __U,
+ __m512 __A, __m512 __B, int __C)
+{
+ return (__m512) __builtin_ia32_rangeps512_mask ((__v16sf) __A,
+ (__v16sf) __B, __C,
+ (__v16sf) __W,
+ (__mmask16) __U,
+ _MM_FROUND_CUR_DIRECTION);
+}
+
+extern __inline __m512
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_maskz_range_ps (__mmask16 __U, __m512 __A, __m512 __B, int __C)
+{
+ return (__m512) __builtin_ia32_rangeps512_mask ((__v16sf) __A,
+ (__v16sf) __B, __C,
+ (__v16sf)
+ _mm512_setzero_ps (),
+ (__mmask16) __U,
+ _MM_FROUND_CUR_DIRECTION);
+}
+
+extern __inline __m128d
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_reduce_sd (__m128d __A, __m128d __B, int __C)
+{
+ return (__m128d) __builtin_ia32_reducesd ((__v2df) __A,
+ (__v2df) __B, __C);
+}
+
+extern __inline __m128
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_reduce_ss (__m128 __A, __m128 __B, int __C)
+{
+ return (__m128) __builtin_ia32_reducess ((__v4sf) __A,
+ (__v4sf) __B, __C);
+}
+
+extern __inline __m128d
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_range_sd (__m128d __A, __m128d __B, int __C)
+{
+ return (__m128d) __builtin_ia32_rangesd128_round ((__v2df) __A,
+ (__v2df) __B, __C,
+ _MM_FROUND_CUR_DIRECTION);
+}
+
+
+extern __inline __m128
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_range_ss (__m128 __A, __m128 __B, int __C)
+{
+ return (__m128) __builtin_ia32_rangess128_round ((__v4sf) __A,
+ (__v4sf) __B, __C,
+ _MM_FROUND_CUR_DIRECTION);
+}
+
+extern __inline __m128d
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_range_round_sd (__m128d __A, __m128d __B, int __C, const int __R)
+{
+ return (__m128d) __builtin_ia32_rangesd128_round ((__v2df) __A,
+ (__v2df) __B, __C,
+ __R);
+}
+
+extern __inline __m128
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_range_round_ss (__m128 __A, __m128 __B, int __C, const int __R)
+{
+ return (__m128) __builtin_ia32_rangess128_round ((__v4sf) __A,
+ (__v4sf) __B, __C,
+ __R);
+}
+
+extern __inline __mmask8
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_fpclass_ss_mask (__m128 __A, const int __imm)
+{
+ return (__mmask8) __builtin_ia32_fpclassss ((__v4sf) __A, __imm);
+}
+
+extern __inline __mmask8
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_fpclass_sd_mask (__m128d __A, const int __imm)
+{
+ return (__mmask8) __builtin_ia32_fpclasssd ((__v2df) __A, __imm);
+}
+
+extern __inline __m512i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_cvtt_roundpd_epi64 (__m512d __A, const int __R)
+{
+ return (__m512i) __builtin_ia32_cvttpd2qq512_mask ((__v8df) __A,
+ (__v8di)
+ _mm512_setzero_si512 (),
+ (__mmask8) -1,
+ __R);
+}
+
+extern __inline __m512i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_mask_cvtt_roundpd_epi64 (__m512i __W, __mmask8 __U, __m512d __A,
+ const int __R)
+{
+ return (__m512i) __builtin_ia32_cvttpd2qq512_mask ((__v8df) __A,
+ (__v8di) __W,
+ (__mmask8) __U,
+ __R);
+}
+
+extern __inline __m512i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_maskz_cvtt_roundpd_epi64 (__mmask8 __U, __m512d __A,
+ const int __R)
+{
+ return (__m512i) __builtin_ia32_cvttpd2qq512_mask ((__v8df) __A,
+ (__v8di)
+ _mm512_setzero_si512 (),
+ (__mmask8) __U,
+ __R);
+}
+
+extern __inline __m512i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_cvtt_roundpd_epu64 (__m512d __A, const int __R)
+{
+ return (__m512i) __builtin_ia32_cvttpd2uqq512_mask ((__v8df) __A,
+ (__v8di)
+ _mm512_setzero_si512 (),
+ (__mmask8) -1,
+ __R);
+}
+
+extern __inline __m512i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_mask_cvtt_roundpd_epu64 (__m512i __W, __mmask8 __U, __m512d __A,
+ const int __R)
+{
+ return (__m512i) __builtin_ia32_cvttpd2uqq512_mask ((__v8df) __A,
+ (__v8di) __W,
+ (__mmask8) __U,
+ __R);
+}
+
+extern __inline __m512i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_maskz_cvtt_roundpd_epu64 (__mmask8 __U, __m512d __A,
+ const int __R)
+{
+ return (__m512i) __builtin_ia32_cvttpd2uqq512_mask ((__v8df) __A,
+ (__v8di)
+ _mm512_setzero_si512 (),
+ (__mmask8) __U,
+ __R);
+}
+
+extern __inline __m512i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_cvtt_roundps_epi64 (__m256 __A, const int __R)
+{
+ return (__m512i) __builtin_ia32_cvttps2qq512_mask ((__v8sf) __A,
+ (__v8di)
+ _mm512_setzero_si512 (),
+ (__mmask8) -1,
+ __R);
+}
+
+extern __inline __m512i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_mask_cvtt_roundps_epi64 (__m512i __W, __mmask8 __U, __m256 __A,
+ const int __R)
+{
+ return (__m512i) __builtin_ia32_cvttps2qq512_mask ((__v8sf) __A,
+ (__v8di) __W,
+ (__mmask8) __U,
+ __R);
+}
+
+extern __inline __m512i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_maskz_cvtt_roundps_epi64 (__mmask8 __U, __m256 __A,
+ const int __R)
+{
+ return (__m512i) __builtin_ia32_cvttps2qq512_mask ((__v8sf) __A,
+ (__v8di)
+ _mm512_setzero_si512 (),
+ (__mmask8) __U,
+ __R);
+}
+
+extern __inline __m512i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_cvtt_roundps_epu64 (__m256 __A, const int __R)
+{
+ return (__m512i) __builtin_ia32_cvttps2uqq512_mask ((__v8sf) __A,
+ (__v8di)
+ _mm512_setzero_si512 (),
+ (__mmask8) -1,
+ __R);
+}
+
+extern __inline __m512i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_mask_cvtt_roundps_epu64 (__m512i __W, __mmask8 __U, __m256 __A,
+ const int __R)
+{
+ return (__m512i) __builtin_ia32_cvttps2uqq512_mask ((__v8sf) __A,
+ (__v8di) __W,
+ (__mmask8) __U,
+ __R);
+}
+
+extern __inline __m512i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_maskz_cvtt_roundps_epu64 (__mmask8 __U, __m256 __A,
+ const int __R)
+{
+ return (__m512i) __builtin_ia32_cvttps2uqq512_mask ((__v8sf) __A,
+ (__v8di)
+ _mm512_setzero_si512 (),
+ (__mmask8) __U,
+ __R);
+}
+
+extern __inline __m512i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_cvt_roundpd_epi64 (__m512d __A, const int __R)
+{
+ return (__m512i) __builtin_ia32_cvtpd2qq512_mask ((__v8df) __A,
+ (__v8di)
+ _mm512_setzero_si512 (),
+ (__mmask8) -1,
+ __R);
+}
+
+extern __inline __m512i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_mask_cvt_roundpd_epi64 (__m512i __W, __mmask8 __U, __m512d __A,
+ const int __R)
+{
+ return (__m512i) __builtin_ia32_cvtpd2qq512_mask ((__v8df) __A,
+ (__v8di) __W,
+ (__mmask8) __U,
+ __R);
+}
+
+extern __inline __m512i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_maskz_cvt_roundpd_epi64 (__mmask8 __U, __m512d __A,
+ const int __R)
+{
+ return (__m512i) __builtin_ia32_cvtpd2qq512_mask ((__v8df) __A,
+ (__v8di)
+ _mm512_setzero_si512 (),
+ (__mmask8) __U,
+ __R);
+}
+
+extern __inline __m512i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_cvt_roundpd_epu64 (__m512d __A, const int __R)
+{
+ return (__m512i) __builtin_ia32_cvtpd2uqq512_mask ((__v8df) __A,
+ (__v8di)
+ _mm512_setzero_si512 (),
+ (__mmask8) -1,
+ __R);
+}
+
+extern __inline __m512i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_mask_cvt_roundpd_epu64 (__m512i __W, __mmask8 __U, __m512d __A,
+ const int __R)
+{
+ return (__m512i) __builtin_ia32_cvtpd2uqq512_mask ((__v8df) __A,
+ (__v8di) __W,
+ (__mmask8) __U,
+ __R);
+}
+
+extern __inline __m512i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_maskz_cvt_roundpd_epu64 (__mmask8 __U, __m512d __A,
+ const int __R)
+{
+ return (__m512i) __builtin_ia32_cvtpd2uqq512_mask ((__v8df) __A,
+ (__v8di)
+ _mm512_setzero_si512 (),
+ (__mmask8) __U,
+ __R);
+}
+
+extern __inline __m512i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_cvt_roundps_epi64 (__m256 __A, const int __R)
+{
+ return (__m512i) __builtin_ia32_cvtps2qq512_mask ((__v8sf) __A,
+ (__v8di)
+ _mm512_setzero_si512 (),
+ (__mmask8) -1,
+ __R);
+}
+
+extern __inline __m512i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_mask_cvt_roundps_epi64 (__m512i __W, __mmask8 __U, __m256 __A,
+ const int __R)
+{
+ return (__m512i) __builtin_ia32_cvtps2qq512_mask ((__v8sf) __A,
+ (__v8di) __W,
+ (__mmask8) __U,
+ __R);
+}
+
+extern __inline __m512i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_maskz_cvt_roundps_epi64 (__mmask8 __U, __m256 __A,
+ const int __R)
+{
+ return (__m512i) __builtin_ia32_cvtps2qq512_mask ((__v8sf) __A,
+ (__v8di)
+ _mm512_setzero_si512 (),
+ (__mmask8) __U,
+ __R);
+}
+
+extern __inline __m512i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_cvt_roundps_epu64 (__m256 __A, const int __R)
+{
+ return (__m512i) __builtin_ia32_cvtps2uqq512_mask ((__v8sf) __A,
+ (__v8di)
+ _mm512_setzero_si512 (),
+ (__mmask8) -1,
+ __R);
+}
+
+extern __inline __m512i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_mask_cvt_roundps_epu64 (__m512i __W, __mmask8 __U, __m256 __A,
+ const int __R)
+{
+ return (__m512i) __builtin_ia32_cvtps2uqq512_mask ((__v8sf) __A,
+ (__v8di) __W,
+ (__mmask8) __U,
+ __R);
+}
+
+extern __inline __m512i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_maskz_cvt_roundps_epu64 (__mmask8 __U, __m256 __A,
+ const int __R)
+{
+ return (__m512i) __builtin_ia32_cvtps2uqq512_mask ((__v8sf) __A,
+ (__v8di)
+ _mm512_setzero_si512 (),
+ (__mmask8) __U,
+ __R);
+}
+
+extern __inline __m256
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_cvt_roundepi64_ps (__m512i __A, const int __R)
+{
+ return (__m256) __builtin_ia32_cvtqq2ps512_mask ((__v8di) __A,
+ (__v8sf)
+ _mm256_setzero_ps (),
+ (__mmask8) -1,
+ __R);
+}
+
+extern __inline __m256
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_mask_cvt_roundepi64_ps (__m256 __W, __mmask8 __U, __m512i __A,
+ const int __R)
+{
+ return (__m256) __builtin_ia32_cvtqq2ps512_mask ((__v8di) __A,
+ (__v8sf) __W,
+ (__mmask8) __U,
+ __R);
+}
+
+extern __inline __m256
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_maskz_cvt_roundepi64_ps (__mmask8 __U, __m512i __A,
+ const int __R)
+{
+ return (__m256) __builtin_ia32_cvtqq2ps512_mask ((__v8di) __A,
+ (__v8sf)
+ _mm256_setzero_ps (),
+ (__mmask8) __U,
+ __R);
+}
+
+extern __inline __m256
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_cvt_roundepu64_ps (__m512i __A, const int __R)
+{
+ return (__m256) __builtin_ia32_cvtuqq2ps512_mask ((__v8di) __A,
+ (__v8sf)
+ _mm256_setzero_ps (),
+ (__mmask8) -1,
+ __R);
+}
+
+extern __inline __m256
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_mask_cvt_roundepu64_ps (__m256 __W, __mmask8 __U, __m512i __A,
+ const int __R)
+{
+ return (__m256) __builtin_ia32_cvtuqq2ps512_mask ((__v8di) __A,
+ (__v8sf) __W,
+ (__mmask8) __U,
+ __R);
+}
+
+extern __inline __m256
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_maskz_cvt_roundepu64_ps (__mmask8 __U, __m512i __A,
+ const int __R)
+{
+ return (__m256) __builtin_ia32_cvtuqq2ps512_mask ((__v8di) __A,
+ (__v8sf)
+ _mm256_setzero_ps (),
+ (__mmask8) __U,
+ __R);
+}
+
+extern __inline __m512d
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_cvt_roundepi64_pd (__m512i __A, const int __R)
+{
+ return (__m512d) __builtin_ia32_cvtqq2pd512_mask ((__v8di) __A,
+ (__v8df)
+ _mm512_setzero_pd (),
+ (__mmask8) -1,
+ __R);
+}
+
+extern __inline __m512d
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_mask_cvt_roundepi64_pd (__m512d __W, __mmask8 __U, __m512i __A,
+ const int __R)
+{
+ return (__m512d) __builtin_ia32_cvtqq2pd512_mask ((__v8di) __A,
+ (__v8df) __W,
+ (__mmask8) __U,
+ __R);
+}
+
+extern __inline __m512d
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_maskz_cvt_roundepi64_pd (__mmask8 __U, __m512i __A,
+ const int __R)
+{
+ return (__m512d) __builtin_ia32_cvtqq2pd512_mask ((__v8di) __A,
+ (__v8df)
+ _mm512_setzero_pd (),
+ (__mmask8) __U,
+ __R);
+}
+
+extern __inline __m512d
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_cvt_roundepu64_pd (__m512i __A, const int __R)
+{
+ return (__m512d) __builtin_ia32_cvtuqq2pd512_mask ((__v8di) __A,
+ (__v8df)
+ _mm512_setzero_pd (),
+ (__mmask8) -1,
+ __R);
+}
+
+extern __inline __m512d
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_mask_cvt_roundepu64_pd (__m512d __W, __mmask8 __U, __m512i __A,
+ const int __R)
+{
+ return (__m512d) __builtin_ia32_cvtuqq2pd512_mask ((__v8di) __A,
+ (__v8df) __W,
+ (__mmask8) __U,
+ __R);
+}
+
+extern __inline __m512d
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_maskz_cvt_roundepu64_pd (__mmask8 __U, __m512i __A,
+ const int __R)
+{
+ return (__m512d) __builtin_ia32_cvtuqq2pd512_mask ((__v8di) __A,
+ (__v8df)
+ _mm512_setzero_pd (),
+ (__mmask8) __U,
+ __R);
+}
+
+extern __inline __m512d
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_reduce_pd (__m512d __A, int __B)
+{
+ return (__m512d) __builtin_ia32_reducepd512_mask ((__v8df) __A, __B,
+ (__v8df)
+ _mm512_setzero_pd (),
+ (__mmask8) -1);
+}
+
+extern __inline __m512d
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_mask_reduce_pd (__m512d __W, __mmask8 __U, __m512d __A, int __B)
+{
+ return (__m512d) __builtin_ia32_reducepd512_mask ((__v8df) __A, __B,
+ (__v8df) __W,
+ (__mmask8) __U);
+}
+
+extern __inline __m512d
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_maskz_reduce_pd (__mmask8 __U, __m512d __A, int __B)
+{
+ return (__m512d) __builtin_ia32_reducepd512_mask ((__v8df) __A, __B,
+ (__v8df)
+ _mm512_setzero_pd (),
+ (__mmask8) __U);
+}
+
+extern __inline __m512
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_reduce_ps (__m512 __A, int __B)
+{
+ return (__m512) __builtin_ia32_reduceps512_mask ((__v16sf) __A, __B,
+ (__v16sf)
+ _mm512_setzero_ps (),
+ (__mmask16) -1);
+}
+
+extern __inline __m512
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_mask_reduce_ps (__m512 __W, __mmask16 __U, __m512 __A, int __B)
+{
+ return (__m512) __builtin_ia32_reduceps512_mask ((__v16sf) __A, __B,
+ (__v16sf) __W,
+ (__mmask16) __U);
+}
+
+extern __inline __m512
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_maskz_reduce_ps (__mmask16 __U, __m512 __A, int __B)
+{
+ return (__m512) __builtin_ia32_reduceps512_mask ((__v16sf) __A, __B,
+ (__v16sf)
+ _mm512_setzero_ps (),
+ (__mmask16) __U);
+}
+
+extern __inline __m256
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_extractf32x8_ps (__m512 __A, const int __imm)
+{
+ return (__m256) __builtin_ia32_extractf32x8_mask ((__v16sf) __A,
+ __imm,
+ (__v8sf)
+ _mm256_setzero_ps (),
+ (__mmask8) -1);
+}
+
+extern __inline __m256
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_mask_extractf32x8_ps (__m256 __W, __mmask8 __U, __m512 __A,
+ const int __imm)
+{
+ return (__m256) __builtin_ia32_extractf32x8_mask ((__v16sf) __A,
+ __imm,
+ (__v8sf) __W,
+ (__mmask8) __U);
+}
+
+extern __inline __m256
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_maskz_extractf32x8_ps (__mmask8 __U, __m512 __A,
+ const int __imm)
+{
+ return (__m256) __builtin_ia32_extractf32x8_mask ((__v16sf) __A,
+ __imm,
+ (__v8sf)
+ _mm256_setzero_ps (),
+ (__mmask8) __U);
+}
+
+extern __inline __m128d
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_extractf64x2_pd (__m512d __A, const int __imm)
+{
+ return (__m128d) __builtin_ia32_extractf64x2_512_mask ((__v8df) __A,
+ __imm,
+ (__v2df)
+ _mm_setzero_pd (),
+ (__mmask8) -
+ 1);
+}
+
+extern __inline __m128d
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_mask_extractf64x2_pd (__m128d __W, __mmask8 __U, __m512d __A,
+ const int __imm)
+{
+ return (__m128d) __builtin_ia32_extractf64x2_512_mask ((__v8df) __A,
+ __imm,
+ (__v2df) __W,
+ (__mmask8)
+ __U);
+}
+
+extern __inline __m128d
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_maskz_extractf64x2_pd (__mmask8 __U, __m512d __A,
+ const int __imm)
+{
+ return (__m128d) __builtin_ia32_extractf64x2_512_mask ((__v8df) __A,
+ __imm,
+ (__v2df)
+ _mm_setzero_pd (),
+ (__mmask8)
+ __U);
+}
+
+extern __inline __m256i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_extracti32x8_epi32 (__m512i __A, const int __imm)
+{
+ return (__m256i) __builtin_ia32_extracti32x8_mask ((__v16si) __A,
+ __imm,
+ (__v8si)
+ _mm256_setzero_si256 (),
+ (__mmask8) -1);
+}
+
+extern __inline __m256i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_mask_extracti32x8_epi32 (__m256i __W, __mmask8 __U, __m512i __A,
+ const int __imm)
+{
+ return (__m256i) __builtin_ia32_extracti32x8_mask ((__v16si) __A,
+ __imm,
+ (__v8si) __W,
+ (__mmask8) __U);
+}
+
+extern __inline __m256i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_maskz_extracti32x8_epi32 (__mmask8 __U, __m512i __A,
+ const int __imm)
+{
+ return (__m256i) __builtin_ia32_extracti32x8_mask ((__v16si) __A,
+ __imm,
+ (__v8si)
+ _mm256_setzero_si256 (),
+ (__mmask8) __U);
+}
+
+extern __inline __m128i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_extracti64x2_epi64 (__m512i __A, const int __imm)
+{
+ return (__m128i) __builtin_ia32_extracti64x2_512_mask ((__v8di) __A,
+ __imm,
+ (__v2di)
+ _mm_setzero_di (),
+ (__mmask8) -
+ 1);
+}
+
+extern __inline __m128i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_mask_extracti64x2_epi64 (__m128i __W, __mmask8 __U, __m512i __A,
+ const int __imm)
+{
+ return (__m128i) __builtin_ia32_extracti64x2_512_mask ((__v8di) __A,
+ __imm,
+ (__v2di) __W,
+ (__mmask8)
+ __U);
+}
+
+extern __inline __m128i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_maskz_extracti64x2_epi64 (__mmask8 __U, __m512i __A,
+ const int __imm)
+{
+ return (__m128i) __builtin_ia32_extracti64x2_512_mask ((__v8di) __A,
+ __imm,
+ (__v2di)
+ _mm_setzero_di (),
+ (__mmask8)
+ __U);
+}
+
+extern __inline __m512d
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_range_round_pd (__m512d __A, __m512d __B, int __C,
+ const int __R)
+{
+ return (__m512d) __builtin_ia32_rangepd512_mask ((__v8df) __A,
+ (__v8df) __B, __C,
+ (__v8df)
+ _mm512_setzero_pd (),
+ (__mmask8) -1,
+ __R);
+}
+
+extern __inline __m512d
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_mask_range_round_pd (__m512d __W, __mmask8 __U,
+ __m512d __A, __m512d __B, int __C,
+ const int __R)
+{
+ return (__m512d) __builtin_ia32_rangepd512_mask ((__v8df) __A,
+ (__v8df) __B, __C,
+ (__v8df) __W,
+ (__mmask8) __U,
+ __R);
+}
+
+extern __inline __m512d
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_maskz_range_round_pd (__mmask8 __U, __m512d __A, __m512d __B,
+ int __C, const int __R)
+{
+ return (__m512d) __builtin_ia32_rangepd512_mask ((__v8df) __A,
+ (__v8df) __B, __C,
+ (__v8df)
+ _mm512_setzero_pd (),
+ (__mmask8) __U,
+ __R);
+}
+
+extern __inline __m512
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_range_round_ps (__m512 __A, __m512 __B, int __C, const int __R)
+{
+ return (__m512) __builtin_ia32_rangeps512_mask ((__v16sf) __A,
+ (__v16sf) __B, __C,
+ (__v16sf)
+ _mm512_setzero_ps (),
+ (__mmask16) -1,
+ __R);
+}
+
+extern __inline __m512
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_mask_range_round_ps (__m512 __W, __mmask16 __U,
+ __m512 __A, __m512 __B, int __C,
+ const int __R)
+{
+ return (__m512) __builtin_ia32_rangeps512_mask ((__v16sf) __A,
+ (__v16sf) __B, __C,
+ (__v16sf) __W,
+ (__mmask16) __U,
+ __R);
+}
+
+extern __inline __m512
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_maskz_range_round_ps (__mmask16 __U, __m512 __A, __m512 __B,
+ int __C, const int __R)
+{
+ return (__m512) __builtin_ia32_rangeps512_mask ((__v16sf) __A,
+ (__v16sf) __B, __C,
+ (__v16sf)
+ _mm512_setzero_ps (),
+ (__mmask16) __U,
+ __R);
+}
+
+extern __inline __m512i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_inserti32x8 (__m512i __A, __m256i __B, const int __imm)
+{
+ return (__m512i) __builtin_ia32_inserti32x8_mask ((__v16si) __A,
+ (__v8si) __B,
+ __imm,
+ (__v16si)
+ _mm512_setzero_si512 (),
+ (__mmask16) -1);
+}
+
+extern __inline __m512i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_mask_inserti32x8 (__m512i __W, __mmask16 __U, __m512i __A,
+ __m256i __B, const int __imm)
+{
+ return (__m512i) __builtin_ia32_inserti32x8_mask ((__v16si) __A,
+ (__v8si) __B,
+ __imm,
+ (__v16si) __W,
+ (__mmask16) __U);
+}
+
+extern __inline __m512i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_maskz_inserti32x8 (__mmask16 __U, __m512i __A, __m256i __B,
+ const int __imm)
+{
+ return (__m512i) __builtin_ia32_inserti32x8_mask ((__v16si) __A,
+ (__v8si) __B,
+ __imm,
+ (__v16si)
+ _mm512_setzero_si512 (),
+ (__mmask16) __U);
+}
+
+extern __inline __m512
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_insertf32x8 (__m512 __A, __m256 __B, const int __imm)
+{
+ return (__m512) __builtin_ia32_insertf32x8_mask ((__v16sf) __A,
+ (__v8sf) __B,
+ __imm,
+ (__v16sf)
+ _mm512_setzero_ps (),
+ (__mmask16) -1);
+}
+
+extern __inline __m512
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_mask_insertf32x8 (__m512 __W, __mmask16 __U, __m512 __A,
+ __m256 __B, const int __imm)
+{
+ return (__m512) __builtin_ia32_insertf32x8_mask ((__v16sf) __A,
+ (__v8sf) __B,
+ __imm,
+ (__v16sf) __W,
+ (__mmask16) __U);
+}
+
+extern __inline __m512
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_maskz_insertf32x8 (__mmask16 __U, __m512 __A, __m256 __B,
+ const int __imm)
+{
+ return (__m512) __builtin_ia32_insertf32x8_mask ((__v16sf) __A,
+ (__v8sf) __B,
+ __imm,
+ (__v16sf)
+ _mm512_setzero_ps (),
+ (__mmask16) __U);
+}
+
+extern __inline __m512i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_inserti64x2 (__m512i __A, __m128i __B, const int __imm)
+{
+ return (__m512i) __builtin_ia32_inserti64x2_512_mask ((__v8di) __A,
+ (__v2di) __B,
+ __imm,
+ (__v8di)
+ _mm512_setzero_si512 (),
+ (__mmask8) -
+ 1);
+}
+
+extern __inline __m512i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_mask_inserti64x2 (__m512i __W, __mmask8 __U, __m512i __A,
+ __m128i __B, const int __imm)
+{
+ return (__m512i) __builtin_ia32_inserti64x2_512_mask ((__v8di) __A,
+ (__v2di) __B,
+ __imm,
+ (__v8di) __W,
+ (__mmask8)
+ __U);
+}
+
+extern __inline __m512i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_maskz_inserti64x2 (__mmask8 __U, __m512i __A, __m128i __B,
+ const int __imm)
+{
+ return (__m512i) __builtin_ia32_inserti64x2_512_mask ((__v8di) __A,
+ (__v2di) __B,
+ __imm,
+ (__v8di)
+ _mm512_setzero_si512 (),
+ (__mmask8)
+ __U);
+}
+
+extern __inline __m512d
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_insertf64x2 (__m512d __A, __m128d __B, const int __imm)
+{
+ return (__m512d) __builtin_ia32_insertf64x2_512_mask ((__v8df) __A,
+ (__v2df) __B,
+ __imm,
+ (__v8df)
+ _mm512_setzero_pd (),
+ (__mmask8) -
+ 1);
+}
+
+extern __inline __m512d
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_mask_insertf64x2 (__m512d __W, __mmask8 __U, __m512d __A,
+ __m128d __B, const int __imm)
+{
+ return (__m512d) __builtin_ia32_insertf64x2_512_mask ((__v8df) __A,
+ (__v2df) __B,
+ __imm,
+ (__v8df) __W,
+ (__mmask8)
+ __U);
+}
+
+extern __inline __m512d
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_maskz_insertf64x2 (__mmask8 __U, __m512d __A, __m128d __B,
+ const int __imm)
+{
+ return (__m512d) __builtin_ia32_insertf64x2_512_mask ((__v8df) __A,
+ (__v2df) __B,
+ __imm,
+ (__v8df)
+ _mm512_setzero_pd (),
+ (__mmask8)
+ __U);
+}
+
+extern __inline __mmask8
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_mask_fpclass_pd_mask (__mmask8 __U, __m512d __A,
+ const int __imm)
+{
+ return (__mmask8) __builtin_ia32_fpclasspd512_mask ((__v8df) __A,
+ __imm, __U);
+}
+
+extern __inline __mmask8
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_fpclass_pd_mask (__m512d __A, const int __imm)
+{
+ return (__mmask8) __builtin_ia32_fpclasspd512_mask ((__v8df) __A,
+ __imm,
+ (__mmask8) -1);
+}
+
+extern __inline __mmask16
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_mask_fpclass_ps_mask (__mmask16 __U, __m512 __A,
+ const int __imm)
+{
+ return (__mmask16) __builtin_ia32_fpclassps512_mask ((__v16sf) __A,
+ __imm, __U);
+}
+
+extern __inline __mmask16
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_fpclass_ps_mask (__m512 __A, const int __imm)
+{
+ return (__mmask16) __builtin_ia32_fpclassps512_mask ((__v16sf) __A,
+ __imm,
+ (__mmask16) -
+ 1);
+}
+
+#else
+#define _mm_range_sd(A, B, C) \
+ ((__m128d) __builtin_ia32_rangesd128_round ((__v2df)(__m128d)(A), \
+ (__v2df)(__m128d)(B), (int)(C), \
+ _MM_FROUND_CUR_DIRECTION))
+
+#define _mm_range_ss(A, B, C) \
+ ((__m128) __builtin_ia32_rangess128_round ((__v4sf)(__m128)(A), \
+ (__v4sf)(__m128)(B), (int)(C), \
+ _MM_FROUND_CUR_DIRECTION))
+
+#define _mm_range_round_sd(A, B, C, R) \
+ ((__m128d) __builtin_ia32_rangesd128_round ((__v2df)(__m128d)(A), \
+ (__v2df)(__m128d)(B), (int)(C), (R)))
+
+#define _mm_range_round_ss(A, B, C, R) \
+ ((__m128) __builtin_ia32_rangess128_round ((__v4sf)(__m128)(A), \
+ (__v4sf)(__m128)(B), (int)(C), (R)))
+
+#define _mm512_cvtt_roundpd_epi64(A, B) \
+ ((__m512i)__builtin_ia32_cvttpd2qq512_mask((A), (__v8di)_mm512_setzero_si512 (), -1, (B)))
+
+#define _mm512_mask_cvtt_roundpd_epi64(W, U, A, B) \
+ ((__m512i)__builtin_ia32_cvttpd2qq512_mask((A), (__v8di)(W), (U), (B)))
+
+#define _mm512_maskz_cvtt_roundpd_epi64(U, A, B) \
+ ((__m512i)__builtin_ia32_cvttpd2qq512_mask((A), (__v8di)_mm512_setzero_si512 (), (U), (B)))
+
+#define _mm512_cvtt_roundpd_epu64(A, B) \
+ ((__m512i)__builtin_ia32_cvttpd2uqq512_mask((A), (__v8di)_mm512_setzero_si512 (), -1, (B)))
+
+#define _mm512_mask_cvtt_roundpd_epu64(W, U, A, B) \
+ ((__m512i)__builtin_ia32_cvttpd2uqq512_mask((A), (__v8di)(W), (U), (B)))
+
+#define _mm512_maskz_cvtt_roundpd_epu64(U, A, B) \
+ ((__m512i)__builtin_ia32_cvttpd2uqq512_mask((A), (__v8di)_mm512_setzero_si512 (), (U), (B)))
+
+#define _mm512_cvtt_roundps_epi64(A, B) \
+ ((__m512i)__builtin_ia32_cvttps2qq512_mask((A), (__v8di)_mm512_setzero_si512 (), -1, (B)))
+
+#define _mm512_mask_cvtt_roundps_epi64(W, U, A, B) \
+ ((__m512i)__builtin_ia32_cvttps2qq512_mask((A), (__v8di)(W), (U), (B)))
+
+#define _mm512_maskz_cvtt_roundps_epi64(U, A, B) \
+ ((__m512i)__builtin_ia32_cvttps2qq512_mask((A), (__v8di)_mm512_setzero_si512 (), (U), (B)))
+
+#define _mm512_cvtt_roundps_epu64(A, B) \
+ ((__m512i)__builtin_ia32_cvttps2uqq512_mask((A), (__v8di)_mm512_setzero_si512 (), -1, (B)))
+
+#define _mm512_mask_cvtt_roundps_epu64(W, U, A, B) \
+ ((__m512i)__builtin_ia32_cvttps2uqq512_mask((A), (__v8di)(W), (U), (B)))
+
+#define _mm512_maskz_cvtt_roundps_epu64(U, A, B) \
+ ((__m512i)__builtin_ia32_cvttps2uqq512_mask((A), (__v8di)_mm512_setzero_si512 (), (U), (B)))
+
+#define _mm512_cvt_roundpd_epi64(A, B) \
+ ((__m512i)__builtin_ia32_cvtpd2qq512_mask((A), (__v8di)_mm512_setzero_si512 (), -1, (B)))
+
+#define _mm512_mask_cvt_roundpd_epi64(W, U, A, B) \
+ ((__m512i)__builtin_ia32_cvtpd2qq512_mask((A), (__v8di)(W), (U), (B)))
+
+#define _mm512_maskz_cvt_roundpd_epi64(U, A, B) \
+ ((__m512i)__builtin_ia32_cvtpd2qq512_mask((A), (__v8di)_mm512_setzero_si512 (), (U), (B)))
+
+#define _mm512_cvt_roundpd_epu64(A, B) \
+ ((__m512i)__builtin_ia32_cvtpd2uqq512_mask((A), (__v8di)_mm512_setzero_si512 (), -1, (B)))
+
+#define _mm512_mask_cvt_roundpd_epu64(W, U, A, B) \
+ ((__m512i)__builtin_ia32_cvtpd2uqq512_mask((A), (__v8di)(W), (U), (B)))
+
+#define _mm512_maskz_cvt_roundpd_epu64(U, A, B) \
+ ((__m512i)__builtin_ia32_cvtpd2uqq512_mask((A), (__v8di)_mm512_setzero_si512 (), (U), (B)))
+
+#define _mm512_cvt_roundps_epi64(A, B) \
+ ((__m512i)__builtin_ia32_cvtps2qq512_mask((A), (__v8di)_mm512_setzero_si512 (), -1, (B)))
+
+#define _mm512_mask_cvt_roundps_epi64(W, U, A, B) \
+ ((__m512i)__builtin_ia32_cvtps2qq512_mask((A), (__v8di)(W), (U), (B)))
+
+#define _mm512_maskz_cvt_roundps_epi64(U, A, B) \
+ ((__m512i)__builtin_ia32_cvtps2qq512_mask((A), (__v8di)_mm512_setzero_si512 (), (U), (B)))
+
+#define _mm512_cvt_roundps_epu64(A, B) \
+ ((__m512i)__builtin_ia32_cvtps2uqq512_mask((A), (__v8di)_mm512_setzero_si512 (), -1, (B)))
+
+#define _mm512_mask_cvt_roundps_epu64(W, U, A, B) \
+ ((__m512i)__builtin_ia32_cvtps2uqq512_mask((A), (__v8di)(W), (U), (B)))
+
+#define _mm512_maskz_cvt_roundps_epu64(U, A, B) \
+ ((__m512i)__builtin_ia32_cvtps2uqq512_mask((A), (__v8di)_mm512_setzero_si512 (), (U), (B)))
+
+#define _mm512_cvt_roundepi64_ps(A, B) \
+ ((__m256)__builtin_ia32_cvtqq2ps512_mask((__v8di)(A), (__v8sf)_mm256_setzero_ps(), -1, (B)))
+
+#define _mm512_mask_cvt_roundepi64_ps(W, U, A, B) \
+ ((__m256)__builtin_ia32_cvtqq2ps512_mask((__v8di)(A), (W), (U), (B)))
+
+#define _mm512_maskz_cvt_roundepi64_ps(U, A, B) \
+ ((__m256)__builtin_ia32_cvtqq2ps512_mask((__v8di)(A), (__v8sf)_mm256_setzero_ps(), (U), (B)))
+
+#define _mm512_cvt_roundepu64_ps(A, B) \
+ ((__m256)__builtin_ia32_cvtuqq2ps512_mask((__v8di)(A), (__v8sf)_mm256_setzero_ps(), -1, (B)))
+
+#define _mm512_mask_cvt_roundepu64_ps(W, U, A, B) \
+ ((__m256)__builtin_ia32_cvtuqq2ps512_mask((__v8di)(A), (W), (U), (B)))
+
+#define _mm512_maskz_cvt_roundepu64_ps(U, A, B) \
+ ((__m256)__builtin_ia32_cvtuqq2ps512_mask((__v8di)(A), (__v8sf)_mm256_setzero_ps(), (U), (B)))
+
+#define _mm512_cvt_roundepi64_pd(A, B) \
+ ((__m512d)__builtin_ia32_cvtqq2pd512_mask((__v8di)(A), (__v8df)_mm512_setzero_pd(), -1, (B)))
+
+#define _mm512_mask_cvt_roundepi64_pd(W, U, A, B) \
+ ((__m512d)__builtin_ia32_cvtqq2pd512_mask((__v8di)(A), (W), (U), (B)))
+
+#define _mm512_maskz_cvt_roundepi64_pd(U, A, B) \
+ ((__m512d)__builtin_ia32_cvtqq2pd512_mask((__v8di)(A), (__v8df)_mm512_setzero_pd(), (U), (B)))
+
+#define _mm512_cvt_roundepu64_pd(A, B) \
+ ((__m512d)__builtin_ia32_cvtuqq2pd512_mask((__v8di)(A), (__v8df)_mm512_setzero_pd(), -1, (B)))
+
+#define _mm512_mask_cvt_roundepu64_pd(W, U, A, B) \
+ ((__m512d)__builtin_ia32_cvtuqq2pd512_mask((__v8di)(A), (W), (U), (B)))
+
+#define _mm512_maskz_cvt_roundepu64_pd(U, A, B) \
+ ((__m512d)__builtin_ia32_cvtuqq2pd512_mask((__v8di)(A), (__v8df)_mm512_setzero_pd(), (U), (B)))
+
+#define _mm512_reduce_pd(A, B) \
+ ((__m512d) __builtin_ia32_reducepd512_mask ((__v8df)(__m512d)(A), \
+ (int)(B), (__v8df)_mm512_setzero_pd(), (__mmask8)-1))
+
+#define _mm512_mask_reduce_pd(W, U, A, B) \
+ ((__m512d) __builtin_ia32_reducepd512_mask ((__v8df)(__m512d)(A), \
+ (int)(B), (__v8df)(__m512d)(W), (__mmask8)(U)))
+
+#define _mm512_maskz_reduce_pd(U, A, B) \
+ ((__m512d) __builtin_ia32_reducepd512_mask ((__v8df)(__m512d)(A), \
+ (int)(B), (__v8df)_mm512_setzero_pd(), (__mmask8)(U)))
+
+#define _mm512_reduce_ps(A, B) \
+ ((__m512) __builtin_ia32_reduceps512_mask ((__v16sf)(__m512)(A), \
+ (int)(B), (__v16sf)_mm512_setzero_ps(), (__mmask16)-1))
+
+#define _mm512_mask_reduce_ps(W, U, A, B) \
+ ((__m512) __builtin_ia32_reduceps512_mask ((__v16sf)(__m512)(A), \
+ (int)(B), (__v16sf)(__m512)(W), (__mmask16)(U)))
+
+#define _mm512_maskz_reduce_ps(U, A, B) \
+ ((__m512) __builtin_ia32_reduceps512_mask ((__v16sf)(__m512)(A), \
+ (int)(B), (__v16sf)_mm512_setzero_ps(), (__mmask16)(U)))
+
+#define _mm512_extractf32x8_ps(X, C) \
+ ((__m256) __builtin_ia32_extractf32x8_mask ((__v16sf)(__m512) (X), \
+ (int) (C), (__v8sf)(__m256) _mm256_setzero_ps(), (__mmask8)-1))
+
+#define _mm512_mask_extractf32x8_ps(W, U, X, C) \
+ ((__m256) __builtin_ia32_extractf32x8_mask ((__v16sf)(__m512) (X), \
+ (int) (C), (__v8sf)(__m256) (W), (__mmask8) (U)))
+
+#define _mm512_maskz_extractf32x8_ps(U, X, C) \
+ ((__m256) __builtin_ia32_extractf32x8_mask ((__v16sf)(__m512) (X), \
+ (int) (C), (__v8sf)(__m256) _mm256_setzero_ps(), (__mmask8) (U)))
+
+#define _mm512_extractf64x2_pd(X, C) \
+ ((__m128d) __builtin_ia32_extractf64x2_512_mask ((__v8df)(__m512d) (X),\
+ (int) (C), (__v2df)(__m128d) _mm_setzero_pd(), (__mmask8)-1))
+
+#define _mm512_mask_extractf64x2_pd(W, U, X, C) \
+ ((__m128d) __builtin_ia32_extractf64x2_512_mask ((__v8df)(__m512d) (X),\
+ (int) (C), (__v2df)(__m128d) (W), (__mmask8) (U)))
+
+#define _mm512_maskz_extractf64x2_pd(U, X, C) \
+ ((__m128d) __builtin_ia32_extractf64x2_512_mask ((__v8df)(__m512d) (X),\
+ (int) (C), (__v2df)(__m128d) _mm_setzero_pd(), (__mmask8) (U)))
+
+#define _mm512_extracti32x8_epi32(X, C) \
+ ((__m256i) __builtin_ia32_extracti32x8_mask ((__v16si)(__m512i) (X), \
+ (int) (C), (__v8si)(__m256i) _mm256_setzero_si256(), (__mmask8)-1))
+
+#define _mm512_mask_extracti32x8_epi32(W, U, X, C) \
+ ((__m256i) __builtin_ia32_extracti32x8_mask ((__v16si)(__m512i) (X), \
+ (int) (C), (__v8si)(__m256i) (W), (__mmask8) (U)))
+
+#define _mm512_maskz_extracti32x8_epi32(U, X, C) \
+ ((__m256i) __builtin_ia32_extracti32x8_mask ((__v16si)(__m512i) (X), \
+ (int) (C), (__v8si)(__m256i) _mm256_setzero_si256(), (__mmask8) (U)))
+
+#define _mm512_extracti64x2_epi64(X, C) \
+ ((__m128i) __builtin_ia32_extracti64x2_512_mask ((__v8di)(__m512i) (X),\
+ (int) (C), (__v2di)(__m128i) _mm_setzero_di(), (__mmask8)-1))
+
+#define _mm512_mask_extracti64x2_epi64(W, U, X, C) \
+ ((__m128i) __builtin_ia32_extracti64x2_512_mask ((__v8di)(__m512i) (X),\
+ (int) (C), (__v2di)(__m128i) (W), (__mmask8) (U)))
+
+#define _mm512_maskz_extracti64x2_epi64(U, X, C) \
+ ((__m128i) __builtin_ia32_extracti64x2_512_mask ((__v8di)(__m512i) (X),\
+ (int) (C), (__v2di)(__m128i) _mm_setzero_di(), (__mmask8) (U)))
+
+#define _mm512_range_pd(A, B, C) \
+ ((__m512d) __builtin_ia32_rangepd512_mask ((__v8df)(__m512d)(A), \
+ (__v8df)(__m512d)(B), (int)(C), \
+ (__v8df)_mm512_setzero_pd(), (__mmask8)-1, _MM_FROUND_CUR_DIRECTION))
+
+#define _mm512_mask_range_pd(W, U, A, B, C) \
+ ((__m512d) __builtin_ia32_rangepd512_mask ((__v8df)(__m512d)(A), \
+ (__v8df)(__m512d)(B), (int)(C), \
+ (__v8df)(__m512d)(W), (__mmask8)(U), _MM_FROUND_CUR_DIRECTION))
+
+#define _mm512_maskz_range_pd(U, A, B, C) \
+ ((__m512d) __builtin_ia32_rangepd512_mask ((__v8df)(__m512d)(A), \
+ (__v8df)(__m512d)(B), (int)(C), \
+ (__v8df)_mm512_setzero_pd(), (__mmask8)(U), _MM_FROUND_CUR_DIRECTION))
+
+#define _mm512_range_ps(A, B, C) \
+ ((__m512) __builtin_ia32_rangeps512_mask ((__v16sf)(__m512)(A), \
+ (__v16sf)(__m512)(B), (int)(C), \
+ (__v16sf)_mm512_setzero_ps(), (__mmask16)-1, _MM_FROUND_CUR_DIRECTION))
+
+#define _mm512_mask_range_ps(W, U, A, B, C) \
+ ((__m512) __builtin_ia32_rangeps512_mask ((__v16sf)(__m512)(A), \
+ (__v16sf)(__m512)(B), (int)(C), \
+ (__v16sf)(__m512)(W), (__mmask16)(U), _MM_FROUND_CUR_DIRECTION))
+
+#define _mm512_maskz_range_ps(U, A, B, C) \
+ ((__m512) __builtin_ia32_rangeps512_mask ((__v16sf)(__m512)(A), \
+ (__v16sf)(__m512)(B), (int)(C), \
+ (__v16sf)_mm512_setzero_ps(), (__mmask16)(U), _MM_FROUND_CUR_DIRECTION))
+
+#define _mm512_range_round_pd(A, B, C, R) \
+ ((__m512d) __builtin_ia32_rangepd512_mask ((__v8df)(__m512d)(A), \
+ (__v8df)(__m512d)(B), (int)(C), \
+ (__v8df)_mm512_setzero_pd(), (__mmask8)-1, (R)))
+
+#define _mm512_mask_range_round_pd(W, U, A, B, C, R) \
+ ((__m512d) __builtin_ia32_rangepd512_mask ((__v8df)(__m512d)(A), \
+ (__v8df)(__m512d)(B), (int)(C), \
+ (__v8df)(__m512d)(W), (__mmask8)(U), (R)))
+
+#define _mm512_maskz_range_round_pd(U, A, B, C, R) \
+ ((__m512d) __builtin_ia32_rangepd512_mask ((__v8df)(__m512d)(A), \
+ (__v8df)(__m512d)(B), (int)(C), \
+ (__v8df)_mm512_setzero_pd(), (__mmask8)(U), (R)))
+
+#define _mm512_range_round_ps(A, B, C, R) \
+ ((__m512) __builtin_ia32_rangeps512_mask ((__v16sf)(__m512)(A), \
+ (__v16sf)(__m512)(B), (int)(C), \
+ (__v16sf)_mm512_setzero_ps(), (__mmask16)-1, (R)))
+
+#define _mm512_mask_range_round_ps(W, U, A, B, C, R) \
+ ((__m512) __builtin_ia32_rangeps512_mask ((__v16sf)(__m512)(A), \
+ (__v16sf)(__m512)(B), (int)(C), \
+ (__v16sf)(__m512)(W), (__mmask16)(U), (R)))
+
+#define _mm512_maskz_range_round_ps(U, A, B, C, R) \
+ ((__m512) __builtin_ia32_rangeps512_mask ((__v16sf)(__m512)(A), \
+ (__v16sf)(__m512)(B), (int)(C), \
+ (__v16sf)_mm512_setzero_ps(), (__mmask16)(U), (R)))
+
+#define _mm512_insertf64x2(X, Y, C) \
+ ((__m512d) __builtin_ia32_insertf64x2_512_mask ((__v8df)(__m512d) (X),\
+ (__v2df)(__m128d) (Y), (int) (C), (__v8df)(__m512d) (X), \
+ (__mmask8)-1))
+
+#define _mm512_mask_insertf64x2(W, U, X, Y, C) \
+ ((__m512d) __builtin_ia32_insertf64x2_512_mask ((__v8df)(__m512d) (X),\
+ (__v2df)(__m128d) (Y), (int) (C), (__v8df)(__m512d) (W), \
+ (__mmask8) (U)))
+
+#define _mm512_maskz_insertf64x2(U, X, Y, C) \
+ ((__m512d) __builtin_ia32_insertf64x2_512_mask ((__v8df)(__m512d) (X),\
+ (__v2df)(__m128d) (Y), (int) (C), \
+ (__v8df)(__m512d) _mm512_setzero_pd(), (__mmask8) (U)))
+
+#define _mm512_inserti64x2(X, Y, C) \
+ ((__m512i) __builtin_ia32_inserti64x2_512_mask ((__v8di)(__m512i) (X),\
+ (__v2di)(__m128i) (Y), (int) (C), (__v8di)(__m512i) (X), (__mmask8)-1))
+
+#define _mm512_mask_inserti64x2(W, U, X, Y, C) \
+ ((__m512i) __builtin_ia32_inserti64x2_512_mask ((__v8di)(__m512i) (X),\
+ (__v2di)(__m128i) (Y), (int) (C), (__v8di)(__m512i) (W), \
+ (__mmask8) (U)))
+
+#define _mm512_maskz_inserti64x2(U, X, Y, C) \
+ ((__m512i) __builtin_ia32_inserti64x2_512_mask ((__v8di)(__m512i) (X),\
+ (__v2di)(__m128i) (Y), (int) (C), \
+ (__v8di)(__m512i) _mm512_setzero_si512 (), (__mmask8) (U)))
+
+#define _mm512_insertf32x8(X, Y, C) \
+ ((__m512) __builtin_ia32_insertf32x8_mask ((__v16sf)(__m512) (X), \
+ (__v8sf)(__m256) (Y), (int) (C),\
+ (__v16sf)(__m512)_mm512_setzero_ps(),\
+ (__mmask16)-1))
+
+#define _mm512_mask_insertf32x8(W, U, X, Y, C) \
+ ((__m512) __builtin_ia32_insertf32x8_mask ((__v16sf)(__m512) (X), \
+ (__v8sf)(__m256) (Y), (int) (C),\
+ (__v16sf)(__m512)(W),\
+ (__mmask16)(U)))
+
+#define _mm512_maskz_insertf32x8(U, X, Y, C) \
+ ((__m512) __builtin_ia32_insertf32x8_mask ((__v16sf)(__m512) (X), \
+ (__v8sf)(__m256) (Y), (int) (C),\
+ (__v16sf)(__m512)_mm512_setzero_ps(),\
+ (__mmask16)(U)))
+
+#define _mm512_inserti32x8(X, Y, C) \
+ ((__m512i) __builtin_ia32_inserti32x8_mask ((__v16si)(__m512i) (X), \
+ (__v8si)(__m256i) (Y), (int) (C),\
+ (__v16si)(__m512i)_mm512_setzero_si512 (),\
+ (__mmask16)-1))
+
+#define _mm512_mask_inserti32x8(W, U, X, Y, C) \
+ ((__m512i) __builtin_ia32_inserti32x8_mask ((__v16si)(__m512i) (X), \
+ (__v8si)(__m256i) (Y), (int) (C),\
+ (__v16si)(__m512i)(W),\
+ (__mmask16)(U)))
+
+#define _mm512_maskz_inserti32x8(U, X, Y, C) \
+ ((__m512i) __builtin_ia32_inserti32x8_mask ((__v16si)(__m512i) (X), \
+ (__v8si)(__m256i) (Y), (int) (C),\
+ (__v16si)(__m512i)_mm512_setzero_si512 (),\
+ (__mmask16)(U)))
+
+#define _mm_fpclass_ss_mask(X, C) \
+ ((__mmask8) __builtin_ia32_fpclassss ((__v4sf) (__m128) (X), (int) (C))) \
+
+#define _mm_fpclass_sd_mask(X, C) \
+ ((__mmask8) __builtin_ia32_fpclasssd ((__v2df) (__m128d) (X), (int) (C))) \
+
+#define _mm512_mask_fpclass_pd_mask(u, X, C) \
+ ((__mmask8) __builtin_ia32_fpclasspd512_mask ((__v8df) (__m512d) (X), \
+ (int) (C), (__mmask8)(u)))
+
+#define _mm512_mask_fpclass_ps_mask(u, x, c) \
+ ((__mmask16) __builtin_ia32_fpclassps512_mask ((__v16sf) (__m512) (x),\
+ (int) (c),(__mmask8)(u)))
+
+#define _mm512_fpclass_pd_mask(X, C) \
+ ((__mmask8) __builtin_ia32_fpclasspd512_mask ((__v8df) (__m512d) (X), \
+ (int) (C), (__mmask8)-1))
+
+#define _mm512_fpclass_ps_mask(x, c) \
+ ((__mmask16) __builtin_ia32_fpclassps512_mask ((__v16sf) (__m512) (x),\
+ (int) (c),(__mmask8)-1))
+
+#define _mm_reduce_sd(A, B, C) \
+ ((__m128d) __builtin_ia32_reducesd ((__v2df)(__m128d)(A), \
+ (__v2df)(__m128d)(B), (int)(C))) \
+
+#define _mm_reduce_ss(A, B, C) \
+ ((__m128) __builtin_ia32_reducess ((__v4sf)(__m128)(A), \
+ (__v4sf)(__m128)(A), (int)(C))) \
+
+#endif
+
+#ifdef __DISABLE_AVX512DQ__
+#undef __DISABLE_AVX512DQ__
+#pragma GCC pop_options
+#endif /* __DISABLE_AVX512DQ__ */
+
+#endif /* _AVX512DQINTRIN_H_INCLUDED */
diff --git a/gcc/config/i386/avx512vlbwintrin.h b/gcc/config/i386/avx512vlbwintrin.h
new file mode 100644
index 00000000000..c704550c7fd
--- /dev/null
+++ b/gcc/config/i386/avx512vlbwintrin.h
@@ -0,0 +1,4218 @@
+/* Copyright (C) 2014
+ Free Software Foundation, Inc.
+
+ This file is part of GCC.
+
+ GCC is free software; you can redistribute it and/or modify
+ it under the terms of the GNU General Public License as published by
+ the Free Software Foundation; either version 3, or (at your option)
+ any later version.
+
+ GCC is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ GNU General Public License for more details.
+
+ Under Section 7 of GPL version 3, you are granted additional
+ permissions described in the GCC Runtime Library Exception, version
+ 3.1, as published by the Free Software Foundation.
+
+ You should have received a copy of the GNU General Public License and
+ a copy of the GCC Runtime Library Exception along with this program;
+ see the files COPYING3 and COPYING.RUNTIME respectively. If not, see
+ <http://www.gnu.org/licenses/>. */
+
+#ifndef _IMMINTRIN_H_INCLUDED
+#error "Never use <avx512vlbwintrin.h> directly; include <immintrin.h> instead."
+#endif
+
+#ifndef _AVX512VLBWINTRIN_H_INCLUDED
+#define _AVX512VLBWINTRIN_H_INCLUDED
+
+#if !defined(__AVX512VL__) || !defined(__AVX512BW__)
+#pragma GCC push_options
+#pragma GCC target("avx512vl,avx512bw")
+#define __DISABLE_AVX512VLBW__
+#endif /* __AVX512VLBW__ */
+
+
+extern __inline __m256i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_mask_mov_epi8 (__m256i __W, __mmask32 __U, __m256i __A)
+{
+ return (__m256i) __builtin_ia32_movdquqi256_mask ((__v32qi) __A,
+ (__v32qi) __W,
+ (__mmask32) __U);
+}
+
+extern __inline __m256i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_maskz_mov_epi8 (__mmask32 __U, __m256i __A)
+{
+ return (__m256i) __builtin_ia32_movdquqi256_mask ((__v32qi) __A,
+ (__v32qi)
+ _mm256_setzero_si256 (),
+ (__mmask32) __U);
+}
+
+extern __inline __m128i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_mask_mov_epi8 (__m128i __W, __mmask16 __U, __m128i __A)
+{
+ return (__m128i) __builtin_ia32_movdquqi128_mask ((__v16qi) __A,
+ (__v16qi) __W,
+ (__mmask16) __U);
+}
+
+extern __inline __m128i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_maskz_mov_epi8 (__mmask16 __U, __m128i __A)
+{
+ return (__m128i) __builtin_ia32_movdquqi128_mask ((__v16qi) __A,
+ (__v16qi)
+ _mm_setzero_hi (),
+ (__mmask16) __U);
+}
+
+extern __inline void
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_mask_storeu_epi8 (void *__P, __mmask32 __U, __m256i __A)
+{
+ __builtin_ia32_storedquqi256_mask ((__v32qi *) __P,
+ (__v32qi) __A,
+ (__mmask32) __U);
+}
+
+extern __inline void
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_mask_storeu_epi8 (void *__P, __mmask16 __U, __m128i __A)
+{
+ __builtin_ia32_storedquqi128_mask ((__v16qi *) __P,
+ (__v16qi) __A,
+ (__mmask16) __U);
+}
+
+extern __inline __m256i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_mask_loadu_epi16 (__m256i __W, __mmask16 __U, void const *__P)
+{
+ return (__m256i) __builtin_ia32_loaddquhi256_mask ((__v16hi *) __P,
+ (__v16hi) __W,
+ (__mmask16) __U);
+}
+
+extern __inline __m256i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_maskz_loadu_epi16 (__mmask16 __U, void const *__P)
+{
+ return (__m256i) __builtin_ia32_loaddquhi256_mask ((__v16hi *) __P,
+ (__v16hi)
+ _mm256_setzero_si256 (),
+ (__mmask16) __U);
+}
+
+extern __inline __m128i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_mask_loadu_epi16 (__m128i __W, __mmask8 __U, void const *__P)
+{
+ return (__m128i) __builtin_ia32_loaddquhi128_mask ((__v8hi *) __P,
+ (__v8hi) __W,
+ (__mmask8) __U);
+}
+
+extern __inline __m128i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_maskz_loadu_epi16 (__mmask8 __U, void const *__P)
+{
+ return (__m128i) __builtin_ia32_loaddquhi128_mask ((__v8hi *) __P,
+ (__v8hi)
+ _mm_setzero_hi (),
+ (__mmask8) __U);
+}
+
+
+extern __inline __m256i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_mask_mov_epi16 (__m256i __W, __mmask16 __U, __m256i __A)
+{
+ return (__m256i) __builtin_ia32_movdquhi256_mask ((__v16hi) __A,
+ (__v16hi) __W,
+ (__mmask16) __U);
+}
+
+extern __inline __m256i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_maskz_mov_epi16 (__mmask16 __U, __m256i __A)
+{
+ return (__m256i) __builtin_ia32_movdquhi256_mask ((__v16hi) __A,
+ (__v16hi)
+ _mm256_setzero_si256 (),
+ (__mmask16) __U);
+}
+
+extern __inline __m128i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_mask_mov_epi16 (__m128i __W, __mmask8 __U, __m128i __A)
+{
+ return (__m128i) __builtin_ia32_movdquhi128_mask ((__v8hi) __A,
+ (__v8hi) __W,
+ (__mmask8) __U);
+}
+
+extern __inline __m128i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_maskz_mov_epi16 (__mmask8 __U, __m128i __A)
+{
+ return (__m128i) __builtin_ia32_movdquhi128_mask ((__v8hi) __A,
+ (__v8hi)
+ _mm_setzero_hi (),
+ (__mmask8) __U);
+}
+
+extern __inline __m256i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_mask_loadu_epi8 (__m256i __W, __mmask32 __U, void const *__P)
+{
+ return (__m256i) __builtin_ia32_loaddquqi256_mask ((__v32qi *) __P,
+ (__v32qi) __W,
+ (__mmask32) __U);
+}
+
+extern __inline __m256i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_maskz_loadu_epi8 (__mmask32 __U, void const *__P)
+{
+ return (__m256i) __builtin_ia32_loaddquqi256_mask ((__v32qi *) __P,
+ (__v32qi)
+ _mm256_setzero_si256 (),
+ (__mmask32) __U);
+}
+
+extern __inline __m128i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_mask_loadu_epi8 (__m128i __W, __mmask16 __U, void const *__P)
+{
+ return (__m128i) __builtin_ia32_loaddquqi128_mask ((__v16qi *) __P,
+ (__v16qi) __W,
+ (__mmask16) __U);
+}
+
+extern __inline __m128i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_maskz_loadu_epi8 (__mmask16 __U, void const *__P)
+{
+ return (__m128i) __builtin_ia32_loaddquqi128_mask ((__v16qi *) __P,
+ (__v16qi)
+ _mm_setzero_hi (),
+ (__mmask16) __U);
+}
+
+extern __inline __m128i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_cvtepi16_epi8 (__m256i __A)
+{
+ __v16qi __O;
+ return (__m128i) __builtin_ia32_pmovwb256_mask ((__v16hi) __A, __O,
+ (__mmask16) -1);
+}
+
+extern __inline __m128i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_mask_cvtepi16_epi8 (__m128i __O, __mmask16 __M, __m256i __A)
+{
+ return (__m128i) __builtin_ia32_pmovwb256_mask ((__v16hi) __A,
+ (__v16qi) __O, __M);
+}
+
+extern __inline __m128i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_maskz_cvtepi16_epi8 (__mmask16 __M, __m256i __A)
+{
+ return (__m128i) __builtin_ia32_pmovwb256_mask ((__v16hi) __A,
+ (__v16qi)
+ _mm_setzero_si128 (),
+ __M);
+}
+
+extern __inline __m128i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_cvtsepi16_epi8 (__m128i __A)
+{
+ __v16qi __O;
+ return (__m128i) __builtin_ia32_pmovswb128_mask ((__v8hi) __A, __O,
+ (__mmask8) -1);
+}
+
+extern __inline __m128i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_mask_cvtsepi16_epi8 (__m128i __O, __mmask8 __M, __m128i __A)
+{
+ return (__m128i) __builtin_ia32_pmovswb128_mask ((__v8hi) __A,
+ (__v16qi) __O, __M);
+}
+
+extern __inline __m128i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_maskz_cvtsepi16_epi8 (__mmask8 __M, __m128i __A)
+{
+ return (__m128i) __builtin_ia32_pmovswb128_mask ((__v8hi) __A,
+ (__v16qi)
+ _mm_setzero_si128 (),
+ __M);
+}
+
+extern __inline __m128i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_cvtsepi16_epi8 (__m256i __A)
+{
+ __v16qi __O;
+ return (__m128i) __builtin_ia32_pmovswb256_mask ((__v16hi) __A, __O,
+ (__mmask16) -1);
+}
+
+extern __inline __m128i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_mask_cvtsepi16_epi8 (__m128i __O, __mmask16 __M, __m256i __A)
+{
+ return (__m128i) __builtin_ia32_pmovswb256_mask ((__v16hi) __A,
+ (__v16qi) __O, __M);
+}
+
+extern __inline __m128i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_maskz_cvtsepi16_epi8 (__mmask16 __M, __m256i __A)
+{
+ return (__m128i) __builtin_ia32_pmovswb256_mask ((__v16hi) __A,
+ (__v16qi)
+ _mm_setzero_si128 (),
+ __M);
+}
+
+extern __inline __m128i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_cvtusepi16_epi8 (__m128i __A)
+{
+ __v16qi __O;
+ return (__m128i) __builtin_ia32_pmovuswb128_mask ((__v8hi) __A, __O,
+ (__mmask8) -1);
+}
+
+extern __inline __m128i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_mask_cvtusepi16_epi8 (__m128i __O, __mmask8 __M, __m128i __A)
+{
+ return (__m128i) __builtin_ia32_pmovuswb128_mask ((__v8hi) __A,
+ (__v16qi) __O,
+ __M);
+}
+
+extern __inline __m128i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_maskz_cvtusepi16_epi8 (__mmask8 __M, __m128i __A)
+{
+ return (__m128i) __builtin_ia32_pmovuswb128_mask ((__v8hi) __A,
+ (__v16qi)
+ _mm_setzero_si128 (),
+ __M);
+}
+
+extern __inline __m128i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_cvtusepi16_epi8 (__m256i __A)
+{
+ __v16qi __O;
+ return (__m128i) __builtin_ia32_pmovuswb256_mask ((__v16hi) __A, __O,
+ (__mmask16) -1);
+}
+
+extern __inline __m128i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_mask_cvtusepi16_epi8 (__m128i __O, __mmask16 __M, __m256i __A)
+{
+ return (__m128i) __builtin_ia32_pmovuswb256_mask ((__v16hi) __A,
+ (__v16qi) __O,
+ __M);
+}
+
+extern __inline __m128i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_maskz_cvtusepi16_epi8 (__mmask16 __M, __m256i __A)
+{
+ return (__m128i) __builtin_ia32_pmovuswb256_mask ((__v16hi) __A,
+ (__v16qi)
+ _mm_setzero_si128 (),
+ __M);
+}
+
+extern __inline __m256i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_mask_broadcastb_epi8 (__m256i __O, __mmask32 __M, __m128i __A)
+{
+ return (__m256i) __builtin_ia32_pbroadcastb256_mask ((__v16qi) __A,
+ (__v32qi) __O,
+ __M);
+}
+
+extern __inline __m256i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_maskz_broadcastb_epi8 (__mmask32 __M, __m128i __A)
+{
+ return (__m256i) __builtin_ia32_pbroadcastb256_mask ((__v16qi) __A,
+ (__v32qi)
+ _mm256_setzero_si256 (),
+ __M);
+}
+
+extern __inline __m256i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_mask_set1_epi8 (__m256i __O, __mmask32 __M, char __A)
+{
+ return (__m256i) __builtin_ia32_pbroadcastb256_gpr_mask (__A,
+ (__v32qi) __O,
+ __M);
+}
+
+extern __inline __m256i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_maskz_set1_epi8 (__mmask32 __M, char __A)
+{
+ return (__m256i) __builtin_ia32_pbroadcastb256_gpr_mask (__A,
+ (__v32qi)
+ _mm256_setzero_si256 (),
+ __M);
+}
+
+extern __inline __m128i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_mask_broadcastb_epi8 (__m128i __O, __mmask16 __M, __m128i __A)
+{
+ return (__m128i) __builtin_ia32_pbroadcastb128_mask ((__v16qi) __A,
+ (__v16qi) __O,
+ __M);
+}
+
+extern __inline __m128i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_maskz_broadcastb_epi8 (__mmask16 __M, __m128i __A)
+{
+ return (__m128i) __builtin_ia32_pbroadcastb128_mask ((__v16qi) __A,
+ (__v16qi)
+ _mm_setzero_si128 (),
+ __M);
+}
+
+extern __inline __m128i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_mask_set1_epi8 (__m128i __O, __mmask16 __M, char __A)
+{
+ return (__m128i) __builtin_ia32_pbroadcastb128_gpr_mask (__A,
+ (__v16qi) __O,
+ __M);
+}
+
+extern __inline __m128i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_maskz_set1_epi8 (__mmask16 __M, char __A)
+{
+ return (__m128i) __builtin_ia32_pbroadcastb128_gpr_mask (__A,
+ (__v16qi)
+ _mm_setzero_si128 (),
+ __M);
+}
+
+extern __inline __m256i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_mask_broadcastw_epi16 (__m256i __O, __mmask16 __M, __m128i __A)
+{
+ return (__m256i) __builtin_ia32_pbroadcastw256_mask ((__v8hi) __A,
+ (__v16hi) __O,
+ __M);
+}
+
+extern __inline __m256i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_maskz_broadcastw_epi16 (__mmask16 __M, __m128i __A)
+{
+ return (__m256i) __builtin_ia32_pbroadcastw256_mask ((__v8hi) __A,
+ (__v16hi)
+ _mm256_setzero_si256 (),
+ __M);
+}
+
+extern __inline __m256i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_mask_set1_epi16 (__m256i __O, __mmask16 __M, short __A)
+{
+ return (__m256i) __builtin_ia32_pbroadcastw256_gpr_mask (__A,
+ (__v16hi) __O,
+ __M);
+}
+
+extern __inline __m256i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_maskz_set1_epi16 (__mmask16 __M, short __A)
+{
+ return (__m256i) __builtin_ia32_pbroadcastw256_gpr_mask (__A,
+ (__v16hi)
+ _mm256_setzero_si256 (),
+ __M);
+}
+
+extern __inline __m128i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_mask_broadcastw_epi16 (__m128i __O, __mmask8 __M, __m128i __A)
+{
+ return (__m128i) __builtin_ia32_pbroadcastw128_mask ((__v8hi) __A,
+ (__v8hi) __O,
+ __M);
+}
+
+extern __inline __m128i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_maskz_broadcastw_epi16 (__mmask8 __M, __m128i __A)
+{
+ return (__m128i) __builtin_ia32_pbroadcastw128_mask ((__v8hi) __A,
+ (__v8hi)
+ _mm_setzero_si128 (),
+ __M);
+}
+
+extern __inline __m128i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_mask_set1_epi16 (__m128i __O, __mmask8 __M, short __A)
+{
+ return (__m128i) __builtin_ia32_pbroadcastw128_gpr_mask (__A,
+ (__v8hi) __O,
+ __M);
+}
+
+extern __inline __m128i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_maskz_set1_epi16 (__mmask8 __M, short __A)
+{
+ return (__m128i) __builtin_ia32_pbroadcastw128_gpr_mask (__A,
+ (__v8hi)
+ _mm_setzero_si128 (),
+ __M);
+}
+
+extern __inline __m256i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_permutexvar_epi16 (__m256i __A, __m256i __B)
+{
+ return (__m256i) __builtin_ia32_permvarhi256_mask ((__v16hi) __A,
+ (__v16hi) __B,
+ (__v16hi)
+ _mm256_setzero_si256 (),
+ (__mmask16) -1);
+}
+
+extern __inline __m256i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_maskz_permutexvar_epi16 (__mmask16 __M, __m256i __A,
+ __m256i __B)
+{
+ return (__m256i) __builtin_ia32_permvarhi256_mask ((__v16hi) __A,
+ (__v16hi) __B,
+ (__v16hi)
+ _mm256_setzero_si256 (),
+ (__mmask16) __M);
+}
+
+extern __inline __m256i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_mask_permutexvar_epi16 (__m256i __W, __mmask16 __M, __m256i __A,
+ __m256i __B)
+{
+ return (__m256i) __builtin_ia32_permvarhi256_mask ((__v16hi) __A,
+ (__v16hi) __B,
+ (__v16hi) __W,
+ (__mmask16) __M);
+}
+
+extern __inline __m128i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_permutexvar_epi16 (__m128i __A, __m128i __B)
+{
+ return (__m128i) __builtin_ia32_permvarhi128_mask ((__v8hi) __A,
+ (__v8hi) __B,
+ (__v8hi)
+ _mm_setzero_hi (),
+ (__mmask8) -1);
+}
+
+extern __inline __m128i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_maskz_permutexvar_epi16 (__mmask8 __M, __m128i __A, __m128i __B)
+{
+ return (__m128i) __builtin_ia32_permvarhi128_mask ((__v8hi) __A,
+ (__v8hi) __B,
+ (__v8hi)
+ _mm_setzero_si128 (),
+ (__mmask8) __M);
+}
+
+extern __inline __m128i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_mask_permutexvar_epi16 (__m128i __W, __mmask8 __M, __m128i __A,
+ __m128i __B)
+{
+ return (__m128i) __builtin_ia32_permvarhi128_mask ((__v8hi) __A,
+ (__v8hi) __B,
+ (__v8hi) __W,
+ (__mmask8) __M);
+}
+
+extern __inline __m256i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_permutex2var_epi16 (__m256i __A, __m256i __I, __m256i __B)
+{
+ return (__m256i) __builtin_ia32_vpermt2varhi256_mask ((__v16hi) __I
+ /* idx */ ,
+ (__v16hi) __A,
+ (__v16hi) __B,
+ (__mmask16) -
+ 1);
+}
+
+extern __inline __m256i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_mask_permutex2var_epi16 (__m256i __A, __mmask16 __U,
+ __m256i __I, __m256i __B)
+{
+ return (__m256i) __builtin_ia32_vpermt2varhi256_mask ((__v16hi) __I
+ /* idx */ ,
+ (__v16hi) __A,
+ (__v16hi) __B,
+ (__mmask16)
+ __U);
+}
+
+extern __inline __m256i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_mask2_permutex2var_epi16 (__m256i __A, __m256i __I,
+ __mmask16 __U, __m256i __B)
+{
+ return (__m256i) __builtin_ia32_vpermi2varhi256_mask ((__v16hi) __A,
+ (__v16hi) __I
+ /* idx */ ,
+ (__v16hi) __B,
+ (__mmask16)
+ __U);
+}
+
+extern __inline __m256i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_maskz_permutex2var_epi16 (__mmask16 __U, __m256i __A,
+ __m256i __I, __m256i __B)
+{
+ return (__m256i) __builtin_ia32_vpermt2varhi256_maskz ((__v16hi) __I
+ /* idx */ ,
+ (__v16hi) __A,
+ (__v16hi) __B,
+ (__mmask16)
+ __U);
+}
+
+extern __inline __m128i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_permutex2var_epi16 (__m128i __A, __m128i __I, __m128i __B)
+{
+ return (__m128i) __builtin_ia32_vpermt2varhi128_mask ((__v8hi) __I
+ /* idx */ ,
+ (__v8hi) __A,
+ (__v8hi) __B,
+ (__mmask8) -
+ 1);
+}
+
+extern __inline __m128i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_mask_permutex2var_epi16 (__m128i __A, __mmask8 __U, __m128i __I,
+ __m128i __B)
+{
+ return (__m128i) __builtin_ia32_vpermt2varhi128_mask ((__v8hi) __I
+ /* idx */ ,
+ (__v8hi) __A,
+ (__v8hi) __B,
+ (__mmask8)
+ __U);
+}
+
+extern __inline __m128i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_mask2_permutex2var_epi16 (__m128i __A, __m128i __I, __mmask8 __U,
+ __m128i __B)
+{
+ return (__m128i) __builtin_ia32_vpermi2varhi128_mask ((__v8hi) __A,
+ (__v8hi) __I
+ /* idx */ ,
+ (__v8hi) __B,
+ (__mmask8)
+ __U);
+}
+
+extern __inline __m128i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_maskz_permutex2var_epi16 (__mmask8 __U, __m128i __A, __m128i __I,
+ __m128i __B)
+{
+ return (__m128i) __builtin_ia32_vpermt2varhi128_maskz ((__v8hi) __I
+ /* idx */ ,
+ (__v8hi) __A,
+ (__v8hi) __B,
+ (__mmask8)
+ __U);
+}
+
+extern __inline __m256i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_mask_maddubs_epi16 (__m256i __W, __mmask16 __U, __m256i __X,
+ __m256i __Y)
+{
+ return (__m256i) __builtin_ia32_pmaddubsw256_mask ((__v32qi) __X,
+ (__v32qi) __Y,
+ (__v16hi) __W,
+ (__mmask16) __U);
+}
+
+extern __inline __m256i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_maskz_maddubs_epi16 (__mmask16 __U, __m256i __X, __m256i __Y)
+{
+ return (__m256i) __builtin_ia32_pmaddubsw256_mask ((__v32qi) __X,
+ (__v32qi) __Y,
+ (__v16hi)
+ _mm256_setzero_si256 (),
+ (__mmask16) __U);
+}
+
+extern __inline __m128i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_mask_maddubs_epi16 (__m128i __W, __mmask8 __U, __m128i __X,
+ __m128i __Y)
+{
+ return (__m128i) __builtin_ia32_pmaddubsw128_mask ((__v16qi) __X,
+ (__v16qi) __Y,
+ (__v8hi) __W,
+ (__mmask8) __U);
+}
+
+extern __inline __m128i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_maskz_maddubs_epi16 (__mmask8 __U, __m128i __X, __m128i __Y)
+{
+ return (__m128i) __builtin_ia32_pmaddubsw128_mask ((__v16qi) __X,
+ (__v16qi) __Y,
+ (__v8hi)
+ _mm_setzero_hi (),
+ (__mmask8) __U);
+}
+
+extern __inline __m256i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_mask_madd_epi16 (__m256i __W, __mmask8 __U, __m256i __A,
+ __m256i __B)
+{
+ return (__m256i) __builtin_ia32_pmaddwd256_mask ((__v16hi) __A,
+ (__v16hi) __B,
+ (__v8si) __W,
+ (__mmask8) __U);
+}
+
+extern __inline __m256i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_maskz_madd_epi16 (__mmask8 __U, __m256i __A, __m256i __B)
+{
+ return (__m256i) __builtin_ia32_pmaddwd256_mask ((__v16hi) __A,
+ (__v16hi) __B,
+ (__v8si)
+ _mm256_setzero_si256 (),
+ (__mmask8) __U);
+}
+
+extern __inline __m128i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_mask_madd_epi16 (__m128i __W, __mmask8 __U, __m128i __A,
+ __m128i __B)
+{
+ return (__m128i) __builtin_ia32_pmaddwd128_mask ((__v8hi) __A,
+ (__v8hi) __B,
+ (__v4si) __W,
+ (__mmask8) __U);
+}
+
+extern __inline __m128i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_maskz_madd_epi16 (__mmask8 __U, __m128i __A, __m128i __B)
+{
+ return (__m128i) __builtin_ia32_pmaddwd128_mask ((__v8hi) __A,
+ (__v8hi) __B,
+ (__v4si)
+ _mm_setzero_si128 (),
+ (__mmask8) __U);
+}
+
+extern __inline __mmask16
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_movepi8_mask (__m128i __A)
+{
+ return (__mmask16) __builtin_ia32_cvtb2mask128 ((__v16qi) __A);
+}
+
+extern __inline __mmask32
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_movepi8_mask (__m256i __A)
+{
+ return (__mmask32) __builtin_ia32_cvtb2mask256 ((__v32qi) __A);
+}
+
+extern __inline __mmask8
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_movepi16_mask (__m128i __A)
+{
+ return (__mmask8) __builtin_ia32_cvtw2mask128 ((__v8hi) __A);
+}
+
+extern __inline __mmask16
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_movepi16_mask (__m256i __A)
+{
+ return (__mmask16) __builtin_ia32_cvtw2mask256 ((__v16hi) __A);
+}
+
+extern __inline __m128i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_movm_epi8 (__mmask16 __A)
+{
+ return (__m128i) __builtin_ia32_cvtmask2b128 (__A);
+}
+
+extern __inline __m256i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_movm_epi8 (__mmask32 __A)
+{
+ return (__m256i) __builtin_ia32_cvtmask2b256 (__A);
+}
+
+extern __inline __m128i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_movm_epi16 (__mmask8 __A)
+{
+ return (__m128i) __builtin_ia32_cvtmask2w128 (__A);
+}
+
+extern __inline __m256i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_movm_epi16 (__mmask16 __A)
+{
+ return (__m256i) __builtin_ia32_cvtmask2w256 (__A);
+}
+
+extern __inline __mmask16
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_test_epi8_mask (__m128i __A, __m128i __B)
+{
+ return (__mmask16) __builtin_ia32_ptestmb128 ((__v16qi) __A,
+ (__v16qi) __B,
+ (__mmask16) -1);
+}
+
+extern __inline __mmask16
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_mask_test_epi8_mask (__mmask16 __U, __m128i __A, __m128i __B)
+{
+ return (__mmask16) __builtin_ia32_ptestmb128 ((__v16qi) __A,
+ (__v16qi) __B, __U);
+}
+
+extern __inline __mmask32
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_test_epi8_mask (__m256i __A, __m256i __B)
+{
+ return (__mmask32) __builtin_ia32_ptestmb256 ((__v32qi) __A,
+ (__v32qi) __B,
+ (__mmask32) -1);
+}
+
+extern __inline __mmask32
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_mask_test_epi8_mask (__mmask32 __U, __m256i __A, __m256i __B)
+{
+ return (__mmask32) __builtin_ia32_ptestmb256 ((__v32qi) __A,
+ (__v32qi) __B, __U);
+}
+
+extern __inline __mmask8
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_test_epi16_mask (__m128i __A, __m128i __B)
+{
+ return (__mmask8) __builtin_ia32_ptestmw128 ((__v8hi) __A,
+ (__v8hi) __B,
+ (__mmask8) -1);
+}
+
+extern __inline __mmask8
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_mask_test_epi16_mask (__mmask8 __U, __m128i __A, __m128i __B)
+{
+ return (__mmask8) __builtin_ia32_ptestmw128 ((__v8hi) __A,
+ (__v8hi) __B, __U);
+}
+
+extern __inline __mmask16
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_test_epi16_mask (__m256i __A, __m256i __B)
+{
+ return (__mmask16) __builtin_ia32_ptestmw256 ((__v16hi) __A,
+ (__v16hi) __B,
+ (__mmask16) -1);
+}
+
+extern __inline __mmask16
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_mask_test_epi16_mask (__mmask16 __U, __m256i __A, __m256i __B)
+{
+ return (__mmask16) __builtin_ia32_ptestmw256 ((__v16hi) __A,
+ (__v16hi) __B, __U);
+}
+
+extern __inline __m256i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_maskz_min_epu16 (__mmask16 __M, __m256i __A, __m256i __B)
+{
+ return (__m256i) __builtin_ia32_pminuw256_mask ((__v16hi) __A,
+ (__v16hi) __B,
+ (__v16hi)
+ _mm256_setzero_si256 (),
+ (__mmask16) __M);
+}
+
+extern __inline __m256i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_mask_min_epu16 (__m256i __W, __mmask16 __M, __m256i __A,
+ __m256i __B)
+{
+ return (__m256i) __builtin_ia32_pminuw256_mask ((__v16hi) __A,
+ (__v16hi) __B,
+ (__v16hi) __W,
+ (__mmask16) __M);
+}
+
+extern __inline __m128i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_maskz_min_epu16 (__mmask8 __M, __m128i __A, __m128i __B)
+{
+ return (__m128i) __builtin_ia32_pminuw128_mask ((__v8hi) __A,
+ (__v8hi) __B,
+ (__v8hi)
+ _mm_setzero_di (),
+ (__mmask8) __M);
+}
+
+extern __inline __m128i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_mask_min_epu16 (__m128i __W, __mmask8 __M, __m128i __A,
+ __m128i __B)
+{
+ return (__m128i) __builtin_ia32_pminuw128_mask ((__v8hi) __A,
+ (__v8hi) __B,
+ (__v8hi) __W,
+ (__mmask8) __M);
+}
+
+extern __inline __m256i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_maskz_min_epi16 (__mmask16 __M, __m256i __A, __m256i __B)
+{
+ return (__m256i) __builtin_ia32_pminsw256_mask ((__v16hi) __A,
+ (__v16hi) __B,
+ (__v16hi)
+ _mm256_setzero_si256 (),
+ (__mmask16) __M);
+}
+
+extern __inline __m256i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_mask_min_epi16 (__m256i __W, __mmask16 __M, __m256i __A,
+ __m256i __B)
+{
+ return (__m256i) __builtin_ia32_pminsw256_mask ((__v16hi) __A,
+ (__v16hi) __B,
+ (__v16hi) __W,
+ (__mmask16) __M);
+}
+
+extern __inline __m256i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_maskz_max_epu8 (__mmask32 __M, __m256i __A, __m256i __B)
+{
+ return (__m256i) __builtin_ia32_pmaxub256_mask ((__v32qi) __A,
+ (__v32qi) __B,
+ (__v32qi)
+ _mm256_setzero_si256 (),
+ (__mmask32) __M);
+}
+
+extern __inline __m256i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_mask_max_epu8 (__m256i __W, __mmask32 __M, __m256i __A,
+ __m256i __B)
+{
+ return (__m256i) __builtin_ia32_pmaxub256_mask ((__v32qi) __A,
+ (__v32qi) __B,
+ (__v32qi) __W,
+ (__mmask32) __M);
+}
+
+extern __inline __m128i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_maskz_max_epu8 (__mmask16 __M, __m128i __A, __m128i __B)
+{
+ return (__m128i) __builtin_ia32_pmaxub128_mask ((__v16qi) __A,
+ (__v16qi) __B,
+ (__v16qi)
+ _mm_setzero_di (),
+ (__mmask16) __M);
+}
+
+extern __inline __m128i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_mask_max_epu8 (__m128i __W, __mmask16 __M, __m128i __A,
+ __m128i __B)
+{
+ return (__m128i) __builtin_ia32_pmaxub128_mask ((__v16qi) __A,
+ (__v16qi) __B,
+ (__v16qi) __W,
+ (__mmask16) __M);
+}
+
+extern __inline __m256i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_maskz_max_epi8 (__mmask32 __M, __m256i __A, __m256i __B)
+{
+ return (__m256i) __builtin_ia32_pmaxsb256_mask ((__v32qi) __A,
+ (__v32qi) __B,
+ (__v32qi)
+ _mm256_setzero_si256 (),
+ (__mmask32) __M);
+}
+
+extern __inline __m256i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_mask_max_epi8 (__m256i __W, __mmask32 __M, __m256i __A,
+ __m256i __B)
+{
+ return (__m256i) __builtin_ia32_pmaxsb256_mask ((__v32qi) __A,
+ (__v32qi) __B,
+ (__v32qi) __W,
+ (__mmask32) __M);
+}
+
+extern __inline __m128i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_maskz_max_epi8 (__mmask16 __M, __m128i __A, __m128i __B)
+{
+ return (__m128i) __builtin_ia32_pmaxsb128_mask ((__v16qi) __A,
+ (__v16qi) __B,
+ (__v16qi)
+ _mm_setzero_di (),
+ (__mmask16) __M);
+}
+
+extern __inline __m128i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_mask_max_epi8 (__m128i __W, __mmask16 __M, __m128i __A,
+ __m128i __B)
+{
+ return (__m128i) __builtin_ia32_pmaxsb128_mask ((__v16qi) __A,
+ (__v16qi) __B,
+ (__v16qi) __W,
+ (__mmask16) __M);
+}
+
+extern __inline __m256i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_maskz_min_epu8 (__mmask32 __M, __m256i __A, __m256i __B)
+{
+ return (__m256i) __builtin_ia32_pminub256_mask ((__v32qi) __A,
+ (__v32qi) __B,
+ (__v32qi)
+ _mm256_setzero_si256 (),
+ (__mmask32) __M);
+}
+
+extern __inline __m256i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_mask_min_epu8 (__m256i __W, __mmask32 __M, __m256i __A,
+ __m256i __B)
+{
+ return (__m256i) __builtin_ia32_pminub256_mask ((__v32qi) __A,
+ (__v32qi) __B,
+ (__v32qi) __W,
+ (__mmask32) __M);
+}
+
+extern __inline __m128i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_maskz_min_epu8 (__mmask16 __M, __m128i __A, __m128i __B)
+{
+ return (__m128i) __builtin_ia32_pminub128_mask ((__v16qi) __A,
+ (__v16qi) __B,
+ (__v16qi)
+ _mm_setzero_di (),
+ (__mmask16) __M);
+}
+
+extern __inline __m128i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_mask_min_epu8 (__m128i __W, __mmask16 __M, __m128i __A,
+ __m128i __B)
+{
+ return (__m128i) __builtin_ia32_pminub128_mask ((__v16qi) __A,
+ (__v16qi) __B,
+ (__v16qi) __W,
+ (__mmask16) __M);
+}
+
+extern __inline __m256i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_maskz_min_epi8 (__mmask32 __M, __m256i __A, __m256i __B)
+{
+ return (__m256i) __builtin_ia32_pminsb256_mask ((__v32qi) __A,
+ (__v32qi) __B,
+ (__v32qi)
+ _mm256_setzero_si256 (),
+ (__mmask32) __M);
+}
+
+extern __inline __m256i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_mask_min_epi8 (__m256i __W, __mmask32 __M, __m256i __A,
+ __m256i __B)
+{
+ return (__m256i) __builtin_ia32_pminsb256_mask ((__v32qi) __A,
+ (__v32qi) __B,
+ (__v32qi) __W,
+ (__mmask32) __M);
+}
+
+extern __inline __m128i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_maskz_min_epi8 (__mmask16 __M, __m128i __A, __m128i __B)
+{
+ return (__m128i) __builtin_ia32_pminsb128_mask ((__v16qi) __A,
+ (__v16qi) __B,
+ (__v16qi)
+ _mm_setzero_di (),
+ (__mmask16) __M);
+}
+
+extern __inline __m128i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_mask_min_epi8 (__m128i __W, __mmask16 __M, __m128i __A,
+ __m128i __B)
+{
+ return (__m128i) __builtin_ia32_pminsb128_mask ((__v16qi) __A,
+ (__v16qi) __B,
+ (__v16qi) __W,
+ (__mmask16) __M);
+}
+
+extern __inline __m256i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_maskz_max_epi16 (__mmask16 __M, __m256i __A, __m256i __B)
+{
+ return (__m256i) __builtin_ia32_pmaxsw256_mask ((__v16hi) __A,
+ (__v16hi) __B,
+ (__v16hi)
+ _mm256_setzero_si256 (),
+ (__mmask16) __M);
+}
+
+extern __inline __m256i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_mask_max_epi16 (__m256i __W, __mmask16 __M, __m256i __A,
+ __m256i __B)
+{
+ return (__m256i) __builtin_ia32_pmaxsw256_mask ((__v16hi) __A,
+ (__v16hi) __B,
+ (__v16hi) __W,
+ (__mmask16) __M);
+}
+
+extern __inline __m128i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_maskz_max_epi16 (__mmask8 __M, __m128i __A, __m128i __B)
+{
+ return (__m128i) __builtin_ia32_pmaxsw128_mask ((__v8hi) __A,
+ (__v8hi) __B,
+ (__v8hi)
+ _mm_setzero_di (),
+ (__mmask8) __M);
+}
+
+extern __inline __m128i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_mask_max_epi16 (__m128i __W, __mmask8 __M, __m128i __A,
+ __m128i __B)
+{
+ return (__m128i) __builtin_ia32_pmaxsw128_mask ((__v8hi) __A,
+ (__v8hi) __B,
+ (__v8hi) __W,
+ (__mmask8) __M);
+}
+
+extern __inline __m256i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_maskz_max_epu16 (__mmask16 __M, __m256i __A, __m256i __B)
+{
+ return (__m256i) __builtin_ia32_pmaxuw256_mask ((__v16hi) __A,
+ (__v16hi) __B,
+ (__v16hi)
+ _mm256_setzero_si256 (),
+ (__mmask16) __M);
+}
+
+extern __inline __m256i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_mask_max_epu16 (__m256i __W, __mmask16 __M, __m256i __A,
+ __m256i __B)
+{
+ return (__m256i) __builtin_ia32_pmaxuw256_mask ((__v16hi) __A,
+ (__v16hi) __B,
+ (__v16hi) __W,
+ (__mmask16) __M);
+}
+
+extern __inline __m128i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_maskz_max_epu16 (__mmask8 __M, __m128i __A, __m128i __B)
+{
+ return (__m128i) __builtin_ia32_pmaxuw128_mask ((__v8hi) __A,
+ (__v8hi) __B,
+ (__v8hi)
+ _mm_setzero_di (),
+ (__mmask8) __M);
+}
+
+extern __inline __m128i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_mask_max_epu16 (__m128i __W, __mmask8 __M, __m128i __A,
+ __m128i __B)
+{
+ return (__m128i) __builtin_ia32_pmaxuw128_mask ((__v8hi) __A,
+ (__v8hi) __B,
+ (__v8hi) __W,
+ (__mmask8) __M);
+}
+
+extern __inline __m128i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_maskz_min_epi16 (__mmask8 __M, __m128i __A, __m128i __B)
+{
+ return (__m128i) __builtin_ia32_pminsw128_mask ((__v8hi) __A,
+ (__v8hi) __B,
+ (__v8hi)
+ _mm_setzero_di (),
+ (__mmask8) __M);
+}
+
+extern __inline __m128i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_mask_min_epi16 (__m128i __W, __mmask8 __M, __m128i __A,
+ __m128i __B)
+{
+ return (__m128i) __builtin_ia32_pminsw128_mask ((__v8hi) __A,
+ (__v8hi) __B,
+ (__v8hi) __W,
+ (__mmask8) __M);
+}
+
+#ifdef __OPTIMIZE__
+extern __inline __m256i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_mask_alignr_epi8 (__m256i __W, __mmask32 __U, __m256i __A,
+ __m256i __B, const int __N)
+{
+ return (__m256i) __builtin_ia32_palignr256_mask ((__v4di) __A,
+ (__v4di) __B,
+ __N * 8,
+ (__v4di) __W,
+ (__mmask32) __U);
+}
+
+extern __inline __m256i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_maskz_alignr_epi8 (__mmask32 __U, __m256i __A, __m256i __B,
+ const int __N)
+{
+ return (__m256i) __builtin_ia32_palignr256_mask ((__v4di) __A,
+ (__v4di) __B,
+ __N * 8,
+ (__v4di)
+ _mm256_setzero_si256 (),
+ (__mmask32) __U);
+}
+
+extern __inline __m128i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_mask_alignr_epi8 (__m128i __W, __mmask16 __U, __m128i __A,
+ __m128i __B, const int __N)
+{
+ return (__m128i) __builtin_ia32_palignr128_mask ((__v2di) __A,
+ (__v2di) __B,
+ __N * 8,
+ (__v2di) __W,
+ (__mmask16) __U);
+}
+
+extern __inline __m128i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_maskz_alignr_epi8 (__mmask16 __U, __m128i __A, __m128i __B,
+ const int __N)
+{
+ return (__m128i) __builtin_ia32_palignr128_mask ((__v2di) __A,
+ (__v2di) __B,
+ __N * 8,
+ (__v2di)
+ _mm_setzero_si128 (),
+ (__mmask16) __U);
+}
+
+extern __inline __m256i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_dbsad_epu8 (__m256i __A, __m256i __B, const int __imm)
+{
+ return (__m256i) __builtin_ia32_dbpsadbw256_mask ((__v32qi) __A,
+ (__v32qi) __B,
+ __imm,
+ (__v16hi)
+ _mm256_setzero_si256 (),
+ (__mmask16) -1);
+}
+
+extern __inline __m256i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_mask_dbsad_epu8 (__m256i __W, __mmask16 __U, __m256i __A,
+ __m256i __B, const int __imm)
+{
+ return (__m256i) __builtin_ia32_dbpsadbw256_mask ((__v32qi) __A,
+ (__v32qi) __B,
+ __imm,
+ (__v16hi) __W,
+ (__mmask16) __U);
+}
+
+extern __inline __m256i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_maskz_dbsad_epu8 (__mmask16 __U, __m256i __A, __m256i __B,
+ const int __imm)
+{
+ return (__m256i) __builtin_ia32_dbpsadbw256_mask ((__v32qi) __A,
+ (__v32qi) __B,
+ __imm,
+ (__v16hi)
+ _mm256_setzero_si256 (),
+ (__mmask16) __U);
+}
+
+extern __inline __m128i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_dbsad_epu8 (__m128i __A, __m128i __B, const int __imm)
+{
+ return (__m128i) __builtin_ia32_dbpsadbw128_mask ((__v16qi) __A,
+ (__v16qi) __B,
+ __imm,
+ (__v8hi)
+ _mm_setzero_hi (),
+ (__mmask8) -1);
+}
+
+extern __inline __m128i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_mask_dbsad_epu8 (__m128i __W, __mmask8 __U, __m128i __A,
+ __m128i __B, const int __imm)
+{
+ return (__m128i) __builtin_ia32_dbpsadbw128_mask ((__v16qi) __A,
+ (__v16qi) __B,
+ __imm,
+ (__v8hi) __W,
+ (__mmask8) __U);
+}
+
+extern __inline __m128i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_maskz_dbsad_epu8 (__mmask8 __U, __m128i __A, __m128i __B,
+ const int __imm)
+{
+ return (__m128i) __builtin_ia32_dbpsadbw128_mask ((__v16qi) __A,
+ (__v16qi) __B,
+ __imm,
+ (__v8hi)
+ _mm_setzero_si128 (),
+ (__mmask8) __U);
+}
+
+extern __inline __m128i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_mask_blend_epi16 (__mmask8 __U, __m128i __A, __m128i __W)
+{
+ return (__m128i) __builtin_ia32_blendmw_128_mask ((__v8hi) __A,
+ (__v8hi) __W,
+ (__mmask8) __U);
+}
+
+extern __inline __m128i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_mask_blend_epi8 (__mmask16 __U, __m128i __A, __m128i __W)
+{
+ return (__m128i) __builtin_ia32_blendmb_128_mask ((__v16qi) __A,
+ (__v16qi) __W,
+ (__mmask16) __U);
+}
+
+extern __inline __m256i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_mask_blend_epi16 (__mmask16 __U, __m256i __A, __m256i __W)
+{
+ return (__m256i) __builtin_ia32_blendmw_256_mask ((__v16hi) __A,
+ (__v16hi) __W,
+ (__mmask16) __U);
+}
+
+extern __inline __m256i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_mask_blend_epi8 (__mmask32 __U, __m256i __A, __m256i __W)
+{
+ return (__m256i) __builtin_ia32_blendmb_256_mask ((__v32qi) __A,
+ (__v32qi) __W,
+ (__mmask32) __U);
+}
+
+extern __inline __mmask8
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_mask_cmp_epi16_mask (__mmask8 __U, __m128i __X, __m128i __Y,
+ const int __P)
+{
+ return (__mmask8) __builtin_ia32_cmpw128_mask ((__v8hi) __X,
+ (__v8hi) __Y, __P,
+ (__mmask8) __U);
+}
+
+extern __inline __mmask8
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_cmp_epi16_mask (__m128i __X, __m128i __Y, const int __P)
+{
+ return (__mmask8) __builtin_ia32_cmpw128_mask ((__v8hi) __X,
+ (__v8hi) __Y, __P,
+ (__mmask8) -1);
+}
+
+extern __inline __mmask16
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_mask_cmp_epi16_mask (__mmask16 __U, __m256i __X, __m256i __Y,
+ const int __P)
+{
+ return (__mmask16) __builtin_ia32_cmpw256_mask ((__v16hi) __X,
+ (__v16hi) __Y, __P,
+ (__mmask16) __U);
+}
+
+extern __inline __mmask16
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_cmp_epi16_mask (__m256i __X, __m256i __Y, const int __P)
+{
+ return (__mmask16) __builtin_ia32_cmpw256_mask ((__v16hi) __X,
+ (__v16hi) __Y, __P,
+ (__mmask16) -1);
+}
+
+extern __inline __mmask16
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_mask_cmp_epi8_mask (__mmask8 __U, __m128i __X, __m128i __Y,
+ const int __P)
+{
+ return (__mmask16) __builtin_ia32_cmpb128_mask ((__v16qi) __X,
+ (__v16qi) __Y, __P,
+ (__mmask16) __U);
+}
+
+extern __inline __mmask16
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_cmp_epi8_mask (__m128i __X, __m128i __Y, const int __P)
+{
+ return (__mmask16) __builtin_ia32_cmpb128_mask ((__v16qi) __X,
+ (__v16qi) __Y, __P,
+ (__mmask16) -1);
+}
+
+extern __inline __mmask32
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_mask_cmp_epi8_mask (__mmask16 __U, __m256i __X, __m256i __Y,
+ const int __P)
+{
+ return (__mmask32) __builtin_ia32_cmpb256_mask ((__v32qi) __X,
+ (__v32qi) __Y, __P,
+ (__mmask32) __U);
+}
+
+extern __inline __mmask16
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_cmp_epi8_mask (__m256i __X, __m256i __Y, const int __P)
+{
+ return (__mmask32) __builtin_ia32_cmpb256_mask ((__v32qi) __X,
+ (__v32qi) __Y, __P,
+ (__mmask32) -1);
+}
+
+extern __inline __mmask8
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_mask_cmp_epu16_mask (__mmask8 __U, __m128i __X, __m128i __Y,
+ const int __P)
+{
+ return (__mmask8) __builtin_ia32_ucmpw128_mask ((__v8hi) __X,
+ (__v8hi) __Y, __P,
+ (__mmask8) __U);
+}
+
+extern __inline __mmask8
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_cmp_epu16_mask (__m128i __X, __m128i __Y, const int __P)
+{
+ return (__mmask8) __builtin_ia32_ucmpw128_mask ((__v8hi) __X,
+ (__v8hi) __Y, __P,
+ (__mmask8) -1);
+}
+
+extern __inline __mmask16
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_mask_cmp_epu16_mask (__mmask16 __U, __m256i __X, __m256i __Y,
+ const int __P)
+{
+ return (__mmask16) __builtin_ia32_ucmpw256_mask ((__v16hi) __X,
+ (__v16hi) __Y, __P,
+ (__mmask16) __U);
+}
+
+extern __inline __mmask16
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_cmp_epu16_mask (__m256i __X, __m256i __Y, const int __P)
+{
+ return (__mmask16) __builtin_ia32_ucmpw256_mask ((__v16hi) __X,
+ (__v16hi) __Y, __P,
+ (__mmask16) -1);
+}
+
+extern __inline __mmask16
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_mask_cmp_epu8_mask (__mmask8 __U, __m128i __X, __m128i __Y,
+ const int __P)
+{
+ return (__mmask16) __builtin_ia32_ucmpb128_mask ((__v16qi) __X,
+ (__v16qi) __Y, __P,
+ (__mmask16) __U);
+}
+
+extern __inline __mmask16
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_cmp_epu8_mask (__m128i __X, __m128i __Y, const int __P)
+{
+ return (__mmask16) __builtin_ia32_ucmpb128_mask ((__v16qi) __X,
+ (__v16qi) __Y, __P,
+ (__mmask16) -1);
+}
+
+extern __inline __mmask32
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_mask_cmp_epu8_mask (__mmask16 __U, __m256i __X, __m256i __Y,
+ const int __P)
+{
+ return (__mmask32) __builtin_ia32_ucmpb256_mask ((__v32qi) __X,
+ (__v32qi) __Y, __P,
+ (__mmask32) __U);
+}
+
+extern __inline __mmask16
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_cmp_epu8_mask (__m256i __X, __m256i __Y, const int __P)
+{
+ return (__mmask32) __builtin_ia32_ucmpb256_mask ((__v32qi) __X,
+ (__v32qi) __Y, __P,
+ (__mmask32) -1);
+}
+
+extern __inline __m256i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_mask_srli_epi16 (__m256i __W, __mmask16 __U, __m256i __A,
+ const int __imm)
+{
+ return (__m256i) __builtin_ia32_psrlwi256_mask ((__v16hi) __A, __imm,
+ (__v16hi) __W,
+ (__mmask16) __U);
+}
+
+extern __inline __m256i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_maskz_srli_epi16 (__mmask16 __U, __m256i __A, const int __imm)
+{
+ return (__m256i) __builtin_ia32_psrlwi256_mask ((__v16hi) __A, __imm,
+ (__v16hi)
+ _mm256_setzero_si256 (),
+ (__mmask16) __U);
+}
+
+extern __inline __m128i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_mask_srli_epi16 (__m128i __W, __mmask8 __U, __m128i __A,
+ const int __imm)
+{
+ return (__m128i) __builtin_ia32_psrlwi128_mask ((__v8hi) __A, __imm,
+ (__v8hi) __W,
+ (__mmask8) __U);
+}
+
+extern __inline __m128i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_maskz_srli_epi16 (__mmask8 __U, __m128i __A, const int __imm)
+{
+ return (__m128i) __builtin_ia32_psrlwi128_mask ((__v8hi) __A, __imm,
+ (__v8hi)
+ _mm_setzero_si128 (),
+ (__mmask8) __U);
+}
+
+extern __inline __m256i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_mask_shufflehi_epi16 (__m256i __W, __mmask16 __U, __m256i __A,
+ const int __imm)
+{
+ return (__m256i) __builtin_ia32_pshufhw256_mask ((__v16hi) __A,
+ __imm,
+ (__v16hi) __W,
+ (__mmask16) __U);
+}
+
+extern __inline __m256i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_maskz_shufflehi_epi16 (__mmask16 __U, __m256i __A,
+ const int __imm)
+{
+ return (__m256i) __builtin_ia32_pshufhw256_mask ((__v16hi) __A,
+ __imm,
+ (__v16hi)
+ _mm256_setzero_si256 (),
+ (__mmask16) __U);
+}
+
+extern __inline __m128i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_mask_shufflehi_epi16 (__m128i __W, __mmask8 __U, __m128i __A,
+ const int __imm)
+{
+ return (__m128i) __builtin_ia32_pshufhw128_mask ((__v8hi) __A, __imm,
+ (__v8hi) __W,
+ (__mmask8) __U);
+}
+
+extern __inline __m128i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_maskz_shufflehi_epi16 (__mmask8 __U, __m128i __A, const int __imm)
+{
+ return (__m128i) __builtin_ia32_pshufhw128_mask ((__v8hi) __A, __imm,
+ (__v8hi)
+ _mm_setzero_hi (),
+ (__mmask8) __U);
+}
+
+extern __inline __m256i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_mask_shufflelo_epi16 (__m256i __W, __mmask16 __U, __m256i __A,
+ const int __imm)
+{
+ return (__m256i) __builtin_ia32_pshuflw256_mask ((__v16hi) __A,
+ __imm,
+ (__v16hi) __W,
+ (__mmask16) __U);
+}
+
+extern __inline __m256i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_maskz_shufflelo_epi16 (__mmask16 __U, __m256i __A,
+ const int __imm)
+{
+ return (__m256i) __builtin_ia32_pshuflw256_mask ((__v16hi) __A,
+ __imm,
+ (__v16hi)
+ _mm256_setzero_si256 (),
+ (__mmask16) __U);
+}
+
+extern __inline __m128i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_mask_shufflelo_epi16 (__m128i __W, __mmask8 __U, __m128i __A,
+ const int __imm)
+{
+ return (__m128i) __builtin_ia32_pshuflw128_mask ((__v8hi) __A, __imm,
+ (__v8hi) __W,
+ (__mmask8) __U);
+}
+
+extern __inline __m128i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_maskz_shufflelo_epi16 (__mmask8 __U, __m128i __A, const int __imm)
+{
+ return (__m128i) __builtin_ia32_pshuflw128_mask ((__v8hi) __A, __imm,
+ (__v8hi)
+ _mm_setzero_hi (),
+ (__mmask8) __U);
+}
+
+extern __inline __m256i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_mask_srai_epi16 (__m256i __W, __mmask16 __U, __m256i __A,
+ const int __imm)
+{
+ return (__m256i) __builtin_ia32_psrawi256_mask ((__v16hi) __A, __imm,
+ (__v16hi) __W,
+ (__mmask16) __U);
+}
+
+extern __inline __m256i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_maskz_srai_epi16 (__mmask16 __U, __m256i __A, const int __imm)
+{
+ return (__m256i) __builtin_ia32_psrawi256_mask ((__v16hi) __A, __imm,
+ (__v16hi)
+ _mm256_setzero_si256 (),
+ (__mmask16) __U);
+}
+
+extern __inline __m128i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_mask_srai_epi16 (__m128i __W, __mmask8 __U, __m128i __A,
+ const int __imm)
+{
+ return (__m128i) __builtin_ia32_psrawi128_mask ((__v8hi) __A, __imm,
+ (__v8hi) __W,
+ (__mmask8) __U);
+}
+
+extern __inline __m128i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_maskz_srai_epi16 (__mmask8 __U, __m128i __A, const int __imm)
+{
+ return (__m128i) __builtin_ia32_psrawi128_mask ((__v8hi) __A, __imm,
+ (__v8hi)
+ _mm_setzero_si128 (),
+ (__mmask8) __U);
+}
+
+extern __inline __m256i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_mask_slli_epi16 (__m256i __W, __mmask16 __U, __m256i __A,
+ int __B)
+{
+ return (__m256i) __builtin_ia32_psllwi256_mask ((__v16hi) __A, __B,
+ (__v16hi) __W,
+ (__mmask16) __U);
+}
+
+extern __inline __m256i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_maskz_slli_epi16 (__mmask16 __U, __m256i __A, int __B)
+{
+ return (__m256i) __builtin_ia32_psllwi256_mask ((__v16hi) __A, __B,
+ (__v16hi)
+ _mm256_setzero_si256 (),
+ (__mmask16) __U);
+}
+
+extern __inline __m128i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_mask_slli_epi16 (__m128i __W, __mmask8 __U, __m128i __A, int __B)
+{
+ return (__m128i) __builtin_ia32_psllwi128_mask ((__v8hi) __A, __B,
+ (__v8hi) __W,
+ (__mmask8) __U);
+}
+
+extern __inline __m128i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_maskz_slli_epi16 (__mmask8 __U, __m128i __A, int __B)
+{
+ return (__m128i) __builtin_ia32_psllwi128_mask ((__v8hi) __A, __B,
+ (__v8hi)
+ _mm_setzero_si128 (),
+ (__mmask8) __U);
+}
+
+#else
+#define _mm256_mask_alignr_epi8(W, U, X, Y, N) \
+ ((__m256i) __builtin_ia32_palignr256_mask ((__v4di)(__m256i)(X), \
+ (__v4di)(__m256i)(Y), (int)(N * 8), \
+ (__v4di)(__m256i)(X), (__mmask32)(U)))
+
+#define _mm256_mask_srli_epi16(W, U, A, B) \
+ ((__m256i) __builtin_ia32_psrlwi256_mask ((__v16hi)(__m256i)(A), \
+ (int)(B), (__v16hi)(__m256i)(W), (__mmask16)(U)))
+
+#define _mm256_maskz_srli_epi16(U, A, B) \
+ ((__m256i) __builtin_ia32_psrlwi256_mask ((__v16hi)(__m256i)(A), \
+ (int)(B), (__v16hi)_mm256_setzero_si256 (), (__mmask16)(U)))
+
+#define _mm_mask_srli_epi16(W, U, A, B) \
+ ((__m128i) __builtin_ia32_psrlwi128_mask ((__v8hi)(__m128i)(A), \
+ (int)(B), (__v8hi)(__m128i)(W), (__mmask8)(U)))
+
+#define _mm_maskz_srli_epi16(U, A, B) \
+ ((__m128i) __builtin_ia32_psrlwi128_mask ((__v8hi)(__m128i)(A), \
+ (int)(B), (__v8hi)_mm_setzero_si128(), (__mmask8)(U)))
+
+#define _mm256_mask_srai_epi16(W, U, A, B) \
+ ((__m256i) __builtin_ia32_psrawi256_mask ((__v16hi)(__m256i)(A), \
+ (int)(B), (__v16hi)(__m256i)(W), (__mmask16)(U)))
+
+#define _mm256_maskz_srai_epi16(U, A, B) \
+ ((__m256i) __builtin_ia32_psrawi256_mask ((__v16hi)(__m256i)(A), \
+ (int)(B), (__v16hi)_mm256_setzero_si256 (), (__mmask16)(U)))
+
+#define _mm_mask_srai_epi16(W, U, A, B) \
+ ((__m128i) __builtin_ia32_psrawi128_mask ((__v8hi)(__m128i)(A), \
+ (int)(B), (__v8hi)(__m128i)(W), (__mmask8)(U)))
+
+#define _mm_maskz_srai_epi16(U, A, B) \
+ ((__m128i) __builtin_ia32_psrawi128_mask ((__v8hi)(__m128i)(A), \
+ (int)(B), (__v8hi)_mm_setzero_si128(), (__mmask8)(U)))
+
+#define _mm256_mask_shufflehi_epi16(W, U, A, B) \
+ ((__m256i) __builtin_ia32_pshufhw256_mask ((__v16hi)(__m256i)(A), (int)(B), \
+ (__v16hi)(__m256i)(W), \
+ (__mmask16)(U)))
+
+#define _mm256_maskz_shufflehi_epi16(U, A, B) \
+ ((__m256i) __builtin_ia32_pshufhw256_mask ((__v16hi)(__m256i)(A), (int)(B), \
+ (__v16hi)(__m256i)_mm256_setzero_si256 (), \
+ (__mmask16)(U)))
+
+#define _mm_mask_shufflehi_epi16(W, U, A, B) \
+ ((__m128i) __builtin_ia32_pshufhw128_mask ((__v8hi)(__m128i)(A), (int)(B), \
+ (__v8hi)(__m128i)(W), \
+ (__mmask8)(U)))
+
+#define _mm_maskz_shufflehi_epi16(U, A, B) \
+ ((__m128i) __builtin_ia32_pshufhw128_mask ((__v8hi)(__m128i)(A), (int)(B), \
+ (__v8hi)(__m128i)_mm_setzero_hi(), \
+ (__mmask8)(U)))
+
+#define _mm256_mask_shufflelo_epi16(W, U, A, B) \
+ ((__m256i) __builtin_ia32_pshuflw256_mask ((__v16hi)(__m256i)(A), (int)(B), \
+ (__v16hi)(__m256i)(W), \
+ (__mmask16)(U)))
+
+#define _mm256_maskz_shufflelo_epi16(U, A, B) \
+ ((__m256i) __builtin_ia32_pshuflw256_mask ((__v16hi)(__m256i)(A), (int)(B), \
+ (__v16hi)(__m256i)_mm256_setzero_si256 (), \
+ (__mmask16)(U)))
+
+#define _mm_mask_shufflelo_epi16(W, U, A, B) \
+ ((__m128i) __builtin_ia32_pshuflw128_mask ((__v8hi)(__m128i)(A), (int)(B), \
+ (__v8hi)(__m128i)(W), \
+ (__mmask8)(U)))
+
+#define _mm_maskz_shufflelo_epi16(U, A, B) \
+ ((__m128i) __builtin_ia32_pshuflw128_mask ((__v8hi)(__m128i)(A), (int)(B), \
+ (__v8hi)(__m128i)_mm_setzero_hi(), \
+ (__mmask8)(U)))
+
+#define _mm256_maskz_alignr_epi8(U, X, Y, N) \
+ ((__m256i) __builtin_ia32_palignr256_mask ((__v4di)(__m256i)(X), \
+ (__v4di)(__m256i)(Y), (int)(N * 8), \
+ (__v4di)(__m256i)_mm256_setzero_si256 (), \
+ (__mmask32)(U)))
+
+#define _mm_mask_alignr_epi8(W, U, X, Y, N) \
+ ((__m128i) __builtin_ia32_palignr128_mask ((__v2di)(__m128i)(X), \
+ (__v2di)(__m128i)(Y), (int)(N * 8), \
+ (__v2di)(__m128i)(X), (__mmask16)(U)))
+
+#define _mm_maskz_alignr_epi8(U, X, Y, N) \
+ ((__m128i) __builtin_ia32_palignr128_mask ((__v2di)(__m128i)(X), \
+ (__v2di)(__m128i)(Y), (int)(N * 8), \
+ (__v2di)(__m128i)_mm_setzero_di(), \
+ (__mmask16)(U)))
+
+#define _mm_mask_slli_epi16(W, U, X, C) \
+ ((__m128i)__builtin_ia32_psllwi128_mask ((__v8hi)(__m128i)(X), (int)(C),\
+ (__v8hi)(__m128i)(W),\
+ (__mmask8)(U)))
+
+#define _mm_maskz_slli_epi16(U, X, C) \
+ ((__m128i)__builtin_ia32_psllwi128_mask ((__v8hi)(__m128i)(X), (int)(C),\
+ (__v8hi)(__m128i)_mm_setzero_hi(),\
+ (__mmask8)(U)))
+
+#define _mm256_dbsad_epu8(X, Y, C) \
+ ((__m256i) __builtin_ia32_dbpsadbw256_mask ((__v32qi)(__m256i) (X), \
+ (__v32qi)(__m256i) (Y), (int) (C), \
+ (__v16hi)(__m256i)_mm256_setzero_si256(),\
+ (__mmask16)-1))
+
+#define _mm256_mask_slli_epi16(W, U, X, C) \
+ ((__m256i)__builtin_ia32_psllwi256_mask ((__v16hi)(__m256i)(X), (int)(C),\
+ (__v16hi)(__m256i)(W),\
+ (__mmask16)(U)))
+
+#define _mm256_maskz_slli_epi16(U, X, C) \
+ ((__m256i)__builtin_ia32_psllwi256_mask ((__v16hi)(__m256i)(X), (int)(C),\
+ (__v16hi)(__m256i)_mm256_setzero_si256 (),\
+ (__mmask16)(U)))
+
+#define _mm256_mask_dbsad_epu8(W, U, X, Y, C) \
+ ((__m256i) __builtin_ia32_dbpsadbw256_mask ((__v32qi)(__m256i) (X), \
+ (__v32qi)(__m256i) (Y), (int) (C), \
+ (__v16hi)(__m256i)(W), \
+ (__mmask16)(U)))
+
+#define _mm256_maskz_dbsad_epu8(U, X, Y, C) \
+ ((__m256i) __builtin_ia32_dbpsadbw256_mask ((__v32qi)(__m256i) (X), \
+ (__v32qi)(__m256i) (Y), (int) (C), \
+ (__v16hi)(__m256i)_mm256_setzero_si256(),\
+ (__mmask16)(U)))
+
+#define _mm_dbsad_epu8(X, Y, C) \
+ ((__m128i) __builtin_ia32_dbpsadbw128_mask ((__v16qi)(__m128i) (X), \
+ (__v16qi)(__m128i) (Y), (int) (C), \
+ (__v8hi)(__m128i)_mm_setzero_si128(), \
+ (__mmask8)-1))
+
+#define _mm_mask_dbsad_epu8(W, U, X, Y, C) \
+ ((__m128i) __builtin_ia32_dbpsadbw128_mask ((__v16qi)(__m128i) (X), \
+ (__v16qi)(__m128i) (Y), (int) (C), \
+ (__v8hi)(__m128i)(W), \
+ (__mmask8)(U)))
+
+#define _mm_maskz_dbsad_epu8(U, X, Y, C) \
+ ((__m128i) __builtin_ia32_dbpsadbw128_mask ((__v16qi)(__m128i) (X), \
+ (__v16qi)(__m128i) (Y), (int) (C), \
+ (__v8hi)(__m128i)_mm_setzero_si128(), \
+ (__mmask8)(U)))
+
+#define _mm_mask_blend_epi16(__U, __A, __W) \
+ ((__m128i) __builtin_ia32_blendmw_128_mask ((__v8hi) (__A), \
+ (__v8hi) (__W), \
+ (__mmask8) (__U)))
+
+#define _mm_mask_blend_epi8(__U, __A, __W) \
+ ((__m128i) __builtin_ia32_blendmb_128_mask ((__v16qi) (__A), \
+ (__v16qi) (__W), \
+ (__mmask16) (__U)))
+
+#define _mm256_mask_blend_epi16(__U, __A, __W) \
+ ((__m256i) __builtin_ia32_blendmw_256_mask ((__v16hi) (__A), \
+ (__v16hi) (__W), \
+ (__mmask16) (__U)))
+
+#define _mm256_mask_blend_epi8(__U, __A, __W) \
+ ((__m256i) __builtin_ia32_blendmb_256_mask ((__v32qi) (__A), \
+ (__v32qi) (__W), \
+ (__mmask32) (__U)))
+
+#define _mm_cmp_epi16_mask(X, Y, P) \
+ ((__mmask8) __builtin_ia32_cmpw128_mask ((__v8hi)(__m128i)(X), \
+ (__v8hi)(__m128i)(Y), (int)(P),\
+ (__mmask8)(-1)))
+
+#define _mm_cmp_epi8_mask(X, Y, P) \
+ ((__mmask16) __builtin_ia32_cmpb128_mask ((__v16qi)(__m128i)(X), \
+ (__v16qi)(__m128i)(Y), (int)(P),\
+ (__mmask16)(-1)))
+
+#define _mm256_cmp_epi16_mask(X, Y, P) \
+ ((__mmask16) __builtin_ia32_cmpw256_mask ((__v16hi)(__m256i)(X), \
+ (__v16hi)(__m256i)(Y), (int)(P),\
+ (__mmask16)(-1)))
+
+#define _mm256_cmp_epi8_mask(X, Y, P) \
+ ((__mmask32) __builtin_ia32_cmpb256_mask ((__v32qi)(__m256i)(X), \
+ (__v32qi)(__m256i)(Y), (int)(P),\
+ (__mmask32)(-1)))
+
+#define _mm_cmp_epu16_mask(X, Y, P) \
+ ((__mmask8) __builtin_ia32_ucmpw128_mask ((__v8hi)(__m128i)(X), \
+ (__v8hi)(__m128i)(Y), (int)(P),\
+ (__mmask8)(-1)))
+
+#define _mm_cmp_epu8_mask(X, Y, P) \
+ ((__mmask16) __builtin_ia32_ucmpb128_mask ((__v16qi)(__m128i)(X), \
+ (__v16qi)(__m128i)(Y), (int)(P),\
+ (__mmask16)(-1)))
+
+#define _mm256_cmp_epu16_mask(X, Y, P) \
+ ((__mmask16) __builtin_ia32_ucmpw256_mask ((__v16hi)(__m256i)(X), \
+ (__v16hi)(__m256i)(Y), (int)(P),\
+ (__mmask16)(-1)))
+
+#define _mm256_cmp_epu8_mask(X, Y, P) \
+ ((__mmask32) __builtin_ia32_ucmpb256_mask ((__v32qi)(__m256i)(X), \
+ (__v32qi)(__m256i)(Y), (int)(P),\
+ (__mmask32)-1))
+
+#define _mm_mask_cmp_epi16_mask(M, X, Y, P) \
+ ((__mmask8) __builtin_ia32_cmpw128_mask ((__v8hi)(__m128i)(X), \
+ (__v8hi)(__m128i)(Y), (int)(P),\
+ (__mmask16)(M)))
+
+#define _mm_mask_cmp_epi8_mask(M, X, Y, P) \
+ ((__mmask16) __builtin_ia32_cmpb128_mask ((__v16qi)(__m128i)(X), \
+ (__v16qi)(__m128i)(Y), (int)(P),\
+ (__mmask16)(M)))
+
+#define _mm256_mask_cmp_epi16_mask(M, X, Y, P) \
+ ((__mmask16) __builtin_ia32_cmpw256_mask ((__v16hi)(__m256i)(X), \
+ (__v16hi)(__m256i)(Y), (int)(P),\
+ (__mmask16)(M)))
+
+#define _mm256_mask_cmp_epi8_mask(M, X, Y, P) \
+ ((__mmask32) __builtin_ia32_cmpb256_mask ((__v32qi)(__m256i)(X), \
+ (__v32qi)(__m256i)(Y), (int)(P),\
+ (__mmask32)(M)))
+
+#define _mm_mask_cmp_epu16_mask(M, X, Y, P) \
+ ((__mmask8) __builtin_ia32_ucmpw128_mask ((__v8hi)(__m128i)(X), \
+ (__v8hi)(__m128i)(Y), (int)(P),\
+ (__mmask8)(M)))
+
+#define _mm_mask_cmp_epu8_mask(M, X, Y, P) \
+ ((__mmask16) __builtin_ia32_ucmpb128_mask ((__v16qi)(__m128i)(X), \
+ (__v16qi)(__m128i)(Y), (int)(P),\
+ (__mmask16)(M)))
+
+#define _mm256_mask_cmp_epu16_mask(M, X, Y, P) \
+ ((__mmask16) __builtin_ia32_ucmpw256_mask ((__v16hi)(__m256i)(X), \
+ (__v16hi)(__m256i)(Y), (int)(P),\
+ (__mmask16)(M)))
+
+#define _mm256_mask_cmp_epu8_mask(M, X, Y, P) \
+ ((__mmask32) __builtin_ia32_ucmpb256_mask ((__v32qi)(__m256i)(X), \
+ (__v32qi)(__m256i)(Y), (int)(P),\
+ (__mmask32)M))
+#endif
+
+extern __inline __mmask32
+ __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_cmpneq_epi8_mask (__m256i __X, __m256i __Y)
+{
+ return (__mmask32) __builtin_ia32_cmpb256_mask ((__v32qi) __X,
+ (__v32qi) __Y, 4,
+ (__mmask32) - 1);
+}
+
+extern __inline __mmask32
+ __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_cmplt_epi8_mask (__m256i __X, __m256i __Y)
+{
+ return (__mmask32) __builtin_ia32_cmpb256_mask ((__v32qi) __X,
+ (__v32qi) __Y, 1,
+ (__mmask32) - 1);
+}
+
+extern __inline __mmask32
+ __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_cmpge_epi8_mask (__m256i __X, __m256i __Y)
+{
+ return (__mmask32) __builtin_ia32_cmpb256_mask ((__v32qi) __X,
+ (__v32qi) __Y, 5,
+ (__mmask32) - 1);
+}
+
+extern __inline __mmask32
+ __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_cmple_epi8_mask (__m256i __X, __m256i __Y)
+{
+ return (__mmask32) __builtin_ia32_cmpb256_mask ((__v32qi) __X,
+ (__v32qi) __Y, 2,
+ (__mmask32) - 1);
+}
+
+extern __inline __mmask16
+ __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_cmpneq_epi16_mask (__m256i __X, __m256i __Y)
+{
+ return (__mmask16) __builtin_ia32_cmpw256_mask ((__v16hi) __X,
+ (__v16hi) __Y, 4,
+ (__mmask16) - 1);
+}
+
+extern __inline __mmask16
+ __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_cmplt_epi16_mask (__m256i __X, __m256i __Y)
+{
+ return (__mmask16) __builtin_ia32_cmpw256_mask ((__v16hi) __X,
+ (__v16hi) __Y, 1,
+ (__mmask16) - 1);
+}
+
+extern __inline __mmask16
+ __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_cmpge_epi16_mask (__m256i __X, __m256i __Y)
+{
+ return (__mmask16) __builtin_ia32_cmpw256_mask ((__v16hi) __X,
+ (__v16hi) __Y, 5,
+ (__mmask16) - 1);
+}
+
+extern __inline __mmask16
+ __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_cmple_epi16_mask (__m256i __X, __m256i __Y)
+{
+ return (__mmask16) __builtin_ia32_cmpw256_mask ((__v16hi) __X,
+ (__v16hi) __Y, 2,
+ (__mmask16) - 1);
+}
+
+extern __inline __mmask16
+ __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_cmpneq_epu8_mask (__m128i __X, __m128i __Y)
+{
+ return (__mmask16) __builtin_ia32_ucmpb128_mask ((__v16qi) __X,
+ (__v16qi) __Y, 4,
+ (__mmask16) - 1);
+}
+
+extern __inline __mmask16
+ __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_cmplt_epu8_mask (__m128i __X, __m128i __Y)
+{
+ return (__mmask16) __builtin_ia32_ucmpb128_mask ((__v16qi) __X,
+ (__v16qi) __Y, 1,
+ (__mmask16) - 1);
+}
+
+extern __inline __mmask16
+ __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_cmpge_epu8_mask (__m128i __X, __m128i __Y)
+{
+ return (__mmask16) __builtin_ia32_ucmpb128_mask ((__v16qi) __X,
+ (__v16qi) __Y, 5,
+ (__mmask16) - 1);
+}
+
+extern __inline __mmask16
+ __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_cmple_epu8_mask (__m128i __X, __m128i __Y)
+{
+ return (__mmask16) __builtin_ia32_ucmpb128_mask ((__v16qi) __X,
+ (__v16qi) __Y, 2,
+ (__mmask16) - 1);
+}
+
+extern __inline __mmask8
+ __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_cmpneq_epu16_mask (__m128i __X, __m128i __Y)
+{
+ return (__mmask8) __builtin_ia32_ucmpw128_mask ((__v8hi) __X,
+ (__v8hi) __Y, 4,
+ (__mmask8) - 1);
+}
+
+extern __inline __mmask8
+ __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_cmplt_epu16_mask (__m128i __X, __m128i __Y)
+{
+ return (__mmask8) __builtin_ia32_ucmpw128_mask ((__v8hi) __X,
+ (__v8hi) __Y, 1,
+ (__mmask8) - 1);
+}
+
+extern __inline __mmask8
+ __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_cmpge_epu16_mask (__m128i __X, __m128i __Y)
+{
+ return (__mmask8) __builtin_ia32_ucmpw128_mask ((__v8hi) __X,
+ (__v8hi) __Y, 5,
+ (__mmask8) - 1);
+}
+
+extern __inline __mmask8
+ __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_cmple_epu16_mask (__m128i __X, __m128i __Y)
+{
+ return (__mmask8) __builtin_ia32_ucmpw128_mask ((__v8hi) __X,
+ (__v8hi) __Y, 2,
+ (__mmask8) - 1);
+}
+
+extern __inline __mmask16
+ __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_cmpneq_epi8_mask (__m128i __X, __m128i __Y)
+{
+ return (__mmask16) __builtin_ia32_cmpb128_mask ((__v16qi) __X,
+ (__v16qi) __Y, 4,
+ (__mmask16) - 1);
+}
+
+extern __inline __mmask16
+ __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_cmplt_epi8_mask (__m128i __X, __m128i __Y)
+{
+ return (__mmask16) __builtin_ia32_cmpb128_mask ((__v16qi) __X,
+ (__v16qi) __Y, 1,
+ (__mmask16) - 1);
+}
+
+extern __inline __mmask16
+ __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_cmpge_epi8_mask (__m128i __X, __m128i __Y)
+{
+ return (__mmask16) __builtin_ia32_cmpb128_mask ((__v16qi) __X,
+ (__v16qi) __Y, 5,
+ (__mmask16) - 1);
+}
+
+extern __inline __mmask16
+ __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_cmple_epi8_mask (__m128i __X, __m128i __Y)
+{
+ return (__mmask16) __builtin_ia32_cmpb128_mask ((__v16qi) __X,
+ (__v16qi) __Y, 2,
+ (__mmask16) - 1);
+}
+
+extern __inline __mmask8
+ __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_cmpneq_epi16_mask (__m128i __X, __m128i __Y)
+{
+ return (__mmask8) __builtin_ia32_cmpw128_mask ((__v8hi) __X,
+ (__v8hi) __Y, 4,
+ (__mmask8) - 1);
+}
+
+extern __inline __mmask8
+ __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_cmplt_epi16_mask (__m128i __X, __m128i __Y)
+{
+ return (__mmask8) __builtin_ia32_cmpw128_mask ((__v8hi) __X,
+ (__v8hi) __Y, 1,
+ (__mmask8) - 1);
+}
+
+extern __inline __mmask8
+ __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_cmpge_epi16_mask (__m128i __X, __m128i __Y)
+{
+ return (__mmask8) __builtin_ia32_cmpw128_mask ((__v8hi) __X,
+ (__v8hi) __Y, 5,
+ (__mmask8) - 1);
+}
+
+extern __inline __mmask8
+ __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_cmple_epi16_mask (__m128i __X, __m128i __Y)
+{
+ return (__mmask8) __builtin_ia32_cmpw128_mask ((__v8hi) __X,
+ (__v8hi) __Y, 2,
+ (__mmask8) - 1);
+}
+
+extern __inline __m256i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_mask_mulhrs_epi16 (__m256i __W, __mmask16 __U, __m256i __X,
+ __m256i __Y)
+{
+ return (__m256i) __builtin_ia32_pmulhrsw256_mask ((__v16hi) __X,
+ (__v16hi) __Y,
+ (__v16hi) __W,
+ (__mmask16) __U);
+}
+
+extern __inline __m256i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_maskz_mulhrs_epi16 (__mmask16 __U, __m256i __X, __m256i __Y)
+{
+ return (__m256i) __builtin_ia32_pmulhrsw256_mask ((__v16hi) __X,
+ (__v16hi) __Y,
+ (__v16hi)
+ _mm256_setzero_si256 (),
+ (__mmask16) __U);
+}
+
+extern __inline __m256i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_mask_mulhi_epu16 (__m256i __W, __mmask16 __U, __m256i __A,
+ __m256i __B)
+{
+ return (__m256i) __builtin_ia32_pmulhuw256_mask ((__v16hi) __A,
+ (__v16hi) __B,
+ (__v16hi) __W,
+ (__mmask16) __U);
+}
+
+extern __inline __m256i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_maskz_mulhi_epu16 (__mmask16 __U, __m256i __A, __m256i __B)
+{
+ return (__m256i) __builtin_ia32_pmulhuw256_mask ((__v16hi) __A,
+ (__v16hi) __B,
+ (__v16hi)
+ _mm256_setzero_si256 (),
+ (__mmask16) __U);
+}
+
+extern __inline __m256i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_mask_mulhi_epi16 (__m256i __W, __mmask16 __U, __m256i __A,
+ __m256i __B)
+{
+ return (__m256i) __builtin_ia32_pmulhw256_mask ((__v16hi) __A,
+ (__v16hi) __B,
+ (__v16hi) __W,
+ (__mmask16) __U);
+}
+
+extern __inline __m256i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_maskz_mulhi_epi16 (__mmask16 __U, __m256i __A, __m256i __B)
+{
+ return (__m256i) __builtin_ia32_pmulhw256_mask ((__v16hi) __A,
+ (__v16hi) __B,
+ (__v16hi)
+ _mm256_setzero_si256 (),
+ (__mmask16) __U);
+}
+
+extern __inline __m128i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_mask_mulhi_epi16 (__m128i __W, __mmask8 __U, __m128i __A,
+ __m128i __B)
+{
+ return (__m128i) __builtin_ia32_pmulhw128_mask ((__v8hi) __A,
+ (__v8hi) __B,
+ (__v8hi) __W,
+ (__mmask8) __U);
+}
+
+extern __inline __m128i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_maskz_mulhi_epi16 (__mmask8 __U, __m128i __A, __m128i __B)
+{
+ return (__m128i) __builtin_ia32_pmulhw128_mask ((__v8hi) __A,
+ (__v8hi) __B,
+ (__v8hi)
+ _mm_setzero_hi (),
+ (__mmask8) __U);
+}
+
+extern __inline __m128i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_mask_mulhi_epu16 (__m128i __W, __mmask8 __U, __m128i __A,
+ __m128i __B)
+{
+ return (__m128i) __builtin_ia32_pmulhuw128_mask ((__v8hi) __A,
+ (__v8hi) __B,
+ (__v8hi) __W,
+ (__mmask8) __U);
+}
+
+extern __inline __m128i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_maskz_mulhi_epu16 (__mmask8 __U, __m128i __A, __m128i __B)
+{
+ return (__m128i) __builtin_ia32_pmulhuw128_mask ((__v8hi) __A,
+ (__v8hi) __B,
+ (__v8hi)
+ _mm_setzero_hi (),
+ (__mmask8) __U);
+}
+
+extern __inline __m128i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_mask_mulhrs_epi16 (__m128i __W, __mmask8 __U, __m128i __X,
+ __m128i __Y)
+{
+ return (__m128i) __builtin_ia32_pmulhrsw128_mask ((__v8hi) __X,
+ (__v8hi) __Y,
+ (__v8hi) __W,
+ (__mmask8) __U);
+}
+
+extern __inline __m128i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_maskz_mulhrs_epi16 (__mmask8 __U, __m128i __X, __m128i __Y)
+{
+ return (__m128i) __builtin_ia32_pmulhrsw128_mask ((__v8hi) __X,
+ (__v8hi) __Y,
+ (__v8hi)
+ _mm_setzero_hi (),
+ (__mmask8) __U);
+}
+
+extern __inline __m256i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_mask_mullo_epi16 (__m256i __W, __mmask16 __U, __m256i __A,
+ __m256i __B)
+{
+ return (__m256i) __builtin_ia32_pmullw256_mask ((__v16hi) __A,
+ (__v16hi) __B,
+ (__v16hi) __W,
+ (__mmask16) __U);
+}
+
+extern __inline __m256i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_maskz_mullo_epi16 (__mmask16 __U, __m256i __A, __m256i __B)
+{
+ return (__m256i) __builtin_ia32_pmullw256_mask ((__v16hi) __A,
+ (__v16hi) __B,
+ (__v16hi)
+ _mm256_setzero_si256 (),
+ (__mmask16) __U);
+}
+
+extern __inline __m128i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_mask_mullo_epi16 (__m128i __W, __mmask8 __U, __m128i __A,
+ __m128i __B)
+{
+ return (__m128i) __builtin_ia32_pmullw128_mask ((__v8hi) __A,
+ (__v8hi) __B,
+ (__v8hi) __W,
+ (__mmask8) __U);
+}
+
+extern __inline __m128i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_maskz_mullo_epi16 (__mmask8 __U, __m128i __A, __m128i __B)
+{
+ return (__m128i) __builtin_ia32_pmullw128_mask ((__v8hi) __A,
+ (__v8hi) __B,
+ (__v8hi)
+ _mm_setzero_hi (),
+ (__mmask8) __U);
+}
+
+extern __inline __m256i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_mask_cvtepi8_epi16 (__m256i __W, __mmask32 __U, __m128i __A)
+{
+ return (__m256i) __builtin_ia32_pmovsxbw256_mask ((__v16qi) __A,
+ (__v16hi) __W,
+ (__mmask16) __U);
+}
+
+extern __inline __m256i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_maskz_cvtepi8_epi16 (__mmask16 __U, __m128i __A)
+{
+ return (__m256i) __builtin_ia32_pmovsxbw256_mask ((__v16qi) __A,
+ (__v16hi)
+ _mm256_setzero_si256 (),
+ (__mmask16) __U);
+}
+
+extern __inline __m128i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_mask_cvtepi8_epi16 (__m128i __W, __mmask32 __U, __m128i __A)
+{
+ return (__m128i) __builtin_ia32_pmovsxbw128_mask ((__v16qi) __A,
+ (__v8hi) __W,
+ (__mmask8) __U);
+}
+
+extern __inline __m128i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_maskz_cvtepi8_epi16 (__mmask8 __U, __m128i __A)
+{
+ return (__m128i) __builtin_ia32_pmovsxbw128_mask ((__v16qi) __A,
+ (__v8hi)
+ _mm_setzero_si128 (),
+ (__mmask8) __U);
+}
+
+extern __inline __m256i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_mask_cvtepu8_epi16 (__m256i __W, __mmask32 __U, __m128i __A)
+{
+ return (__m256i) __builtin_ia32_pmovzxbw256_mask ((__v16qi) __A,
+ (__v16hi) __W,
+ (__mmask16) __U);
+}
+
+extern __inline __m256i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_maskz_cvtepu8_epi16 (__mmask16 __U, __m128i __A)
+{
+ return (__m256i) __builtin_ia32_pmovzxbw256_mask ((__v16qi) __A,
+ (__v16hi)
+ _mm256_setzero_si256 (),
+ (__mmask16) __U);
+}
+
+extern __inline __m128i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_mask_cvtepu8_epi16 (__m128i __W, __mmask32 __U, __m128i __A)
+{
+ return (__m128i) __builtin_ia32_pmovzxbw128_mask ((__v16qi) __A,
+ (__v8hi) __W,
+ (__mmask8) __U);
+}
+
+extern __inline __m128i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_maskz_cvtepu8_epi16 (__mmask8 __U, __m128i __A)
+{
+ return (__m128i) __builtin_ia32_pmovzxbw128_mask ((__v16qi) __A,
+ (__v8hi)
+ _mm_setzero_si128 (),
+ (__mmask8) __U);
+}
+
+extern __inline __m256i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_mask_avg_epu8 (__m256i __W, __mmask32 __U, __m256i __A,
+ __m256i __B)
+{
+ return (__m256i) __builtin_ia32_pavgb256_mask ((__v32qi) __A,
+ (__v32qi) __B,
+ (__v32qi) __W,
+ (__mmask32) __U);
+}
+
+extern __inline __m256i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_maskz_avg_epu8 (__mmask32 __U, __m256i __A, __m256i __B)
+{
+ return (__m256i) __builtin_ia32_pavgb256_mask ((__v32qi) __A,
+ (__v32qi) __B,
+ (__v32qi)
+ _mm256_setzero_si256 (),
+ (__mmask32) __U);
+}
+
+extern __inline __m128i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_mask_avg_epu8 (__m128i __W, __mmask16 __U, __m128i __A,
+ __m128i __B)
+{
+ return (__m128i) __builtin_ia32_pavgb128_mask ((__v16qi) __A,
+ (__v16qi) __B,
+ (__v16qi) __W,
+ (__mmask16) __U);
+}
+
+extern __inline __m128i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_maskz_avg_epu8 (__mmask16 __U, __m128i __A, __m128i __B)
+{
+ return (__m128i) __builtin_ia32_pavgb128_mask ((__v16qi) __A,
+ (__v16qi) __B,
+ (__v16qi)
+ _mm_setzero_si128 (),
+ (__mmask16) __U);
+}
+
+extern __inline __m256i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_mask_avg_epu16 (__m256i __W, __mmask16 __U, __m256i __A,
+ __m256i __B)
+{
+ return (__m256i) __builtin_ia32_pavgw256_mask ((__v16hi) __A,
+ (__v16hi) __B,
+ (__v16hi) __W,
+ (__mmask16) __U);
+}
+
+extern __inline __m256i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_maskz_avg_epu16 (__mmask16 __U, __m256i __A, __m256i __B)
+{
+ return (__m256i) __builtin_ia32_pavgw256_mask ((__v16hi) __A,
+ (__v16hi) __B,
+ (__v16hi)
+ _mm256_setzero_si256 (),
+ (__mmask16) __U);
+}
+
+extern __inline __m128i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_mask_avg_epu16 (__m128i __W, __mmask8 __U, __m128i __A,
+ __m128i __B)
+{
+ return (__m128i) __builtin_ia32_pavgw128_mask ((__v8hi) __A,
+ (__v8hi) __B,
+ (__v8hi) __W,
+ (__mmask8) __U);
+}
+
+extern __inline __m128i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_maskz_avg_epu16 (__mmask8 __U, __m128i __A, __m128i __B)
+{
+ return (__m128i) __builtin_ia32_pavgw128_mask ((__v8hi) __A,
+ (__v8hi) __B,
+ (__v8hi)
+ _mm_setzero_si128 (),
+ (__mmask8) __U);
+}
+
+extern __inline __m256i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_mask_add_epi8 (__m256i __W, __mmask32 __U, __m256i __A,
+ __m256i __B)
+{
+ return (__m256i) __builtin_ia32_paddb256_mask ((__v32qi) __A,
+ (__v32qi) __B,
+ (__v32qi) __W,
+ (__mmask32) __U);
+}
+
+extern __inline __m256i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_maskz_add_epi8 (__mmask32 __U, __m256i __A, __m256i __B)
+{
+ return (__m256i) __builtin_ia32_paddb256_mask ((__v32qi) __A,
+ (__v32qi) __B,
+ (__v32qi)
+ _mm256_setzero_si256 (),
+ (__mmask32) __U);
+}
+
+extern __inline __m256i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_mask_add_epi16 (__m256i __W, __mmask16 __U, __m256i __A,
+ __m256i __B)
+{
+ return (__m256i) __builtin_ia32_paddw256_mask ((__v16hi) __A,
+ (__v16hi) __B,
+ (__v16hi) __W,
+ (__mmask16) __U);
+}
+
+extern __inline __m256i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_maskz_add_epi16 (__mmask16 __U, __m256i __A, __m256i __B)
+{
+ return (__m256i) __builtin_ia32_paddw256_mask ((__v16hi) __A,
+ (__v16hi) __B,
+ (__v16hi)
+ _mm256_setzero_si256 (),
+ (__mmask16) __U);
+}
+
+extern __inline __m256i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_mask_adds_epi8 (__m256i __W, __mmask32 __U, __m256i __A,
+ __m256i __B)
+{
+ return (__m256i) __builtin_ia32_paddsb256_mask ((__v32qi) __A,
+ (__v32qi) __B,
+ (__v32qi) __W,
+ (__mmask32) __U);
+}
+
+extern __inline __m256i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_maskz_adds_epi8 (__mmask32 __U, __m256i __A, __m256i __B)
+{
+ return (__m256i) __builtin_ia32_paddsb256_mask ((__v32qi) __A,
+ (__v32qi) __B,
+ (__v32qi)
+ _mm256_setzero_si256 (),
+ (__mmask32) __U);
+}
+
+extern __inline __m256i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_mask_adds_epi16 (__m256i __W, __mmask16 __U, __m256i __A,
+ __m256i __B)
+{
+ return (__m256i) __builtin_ia32_paddsw256_mask ((__v16hi) __A,
+ (__v16hi) __B,
+ (__v16hi) __W,
+ (__mmask16) __U);
+}
+
+extern __inline __m256i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_maskz_adds_epi16 (__mmask16 __U, __m256i __A, __m256i __B)
+{
+ return (__m256i) __builtin_ia32_paddsw256_mask ((__v16hi) __A,
+ (__v16hi) __B,
+ (__v16hi)
+ _mm256_setzero_si256 (),
+ (__mmask16) __U);
+}
+
+extern __inline __m256i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_mask_adds_epu8 (__m256i __W, __mmask32 __U, __m256i __A,
+ __m256i __B)
+{
+ return (__m256i) __builtin_ia32_paddusb256_mask ((__v32qi) __A,
+ (__v32qi) __B,
+ (__v32qi) __W,
+ (__mmask32) __U);
+}
+
+extern __inline __m256i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_maskz_adds_epu8 (__mmask32 __U, __m256i __A, __m256i __B)
+{
+ return (__m256i) __builtin_ia32_paddusb256_mask ((__v32qi) __A,
+ (__v32qi) __B,
+ (__v32qi)
+ _mm256_setzero_si256 (),
+ (__mmask32) __U);
+}
+
+extern __inline __m256i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_mask_adds_epu16 (__m256i __W, __mmask16 __U, __m256i __A,
+ __m256i __B)
+{
+ return (__m256i) __builtin_ia32_paddusw256_mask ((__v16hi) __A,
+ (__v16hi) __B,
+ (__v16hi) __W,
+ (__mmask16) __U);
+}
+
+extern __inline __m256i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_maskz_adds_epu16 (__mmask16 __U, __m256i __A, __m256i __B)
+{
+ return (__m256i) __builtin_ia32_paddusw256_mask ((__v16hi) __A,
+ (__v16hi) __B,
+ (__v16hi)
+ _mm256_setzero_si256 (),
+ (__mmask16) __U);
+}
+
+extern __inline __m256i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_mask_sub_epi8 (__m256i __W, __mmask32 __U, __m256i __A,
+ __m256i __B)
+{
+ return (__m256i) __builtin_ia32_psubb256_mask ((__v32qi) __A,
+ (__v32qi) __B,
+ (__v32qi) __W,
+ (__mmask32) __U);
+}
+
+extern __inline __m256i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_maskz_sub_epi8 (__mmask32 __U, __m256i __A, __m256i __B)
+{
+ return (__m256i) __builtin_ia32_psubb256_mask ((__v32qi) __A,
+ (__v32qi) __B,
+ (__v32qi)
+ _mm256_setzero_si256 (),
+ (__mmask32) __U);
+}
+
+extern __inline __m256i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_mask_sub_epi16 (__m256i __W, __mmask16 __U, __m256i __A,
+ __m256i __B)
+{
+ return (__m256i) __builtin_ia32_psubw256_mask ((__v16hi) __A,
+ (__v16hi) __B,
+ (__v16hi) __W,
+ (__mmask16) __U);
+}
+
+extern __inline __m256i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_maskz_sub_epi16 (__mmask16 __U, __m256i __A, __m256i __B)
+{
+ return (__m256i) __builtin_ia32_psubw256_mask ((__v16hi) __A,
+ (__v16hi) __B,
+ (__v16hi)
+ _mm256_setzero_si256 (),
+ (__mmask16) __U);
+}
+
+extern __inline __m256i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_mask_subs_epi8 (__m256i __W, __mmask32 __U, __m256i __A,
+ __m256i __B)
+{
+ return (__m256i) __builtin_ia32_psubsb256_mask ((__v32qi) __A,
+ (__v32qi) __B,
+ (__v32qi) __W,
+ (__mmask32) __U);
+}
+
+extern __inline __m256i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_maskz_subs_epi8 (__mmask32 __U, __m256i __A, __m256i __B)
+{
+ return (__m256i) __builtin_ia32_psubsb256_mask ((__v32qi) __A,
+ (__v32qi) __B,
+ (__v32qi)
+ _mm256_setzero_si256 (),
+ (__mmask32) __U);
+}
+
+extern __inline __m256i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_mask_subs_epi16 (__m256i __W, __mmask16 __U, __m256i __A,
+ __m256i __B)
+{
+ return (__m256i) __builtin_ia32_psubsw256_mask ((__v16hi) __A,
+ (__v16hi) __B,
+ (__v16hi) __W,
+ (__mmask16) __U);
+}
+
+extern __inline __m256i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_maskz_subs_epi16 (__mmask16 __U, __m256i __A, __m256i __B)
+{
+ return (__m256i) __builtin_ia32_psubsw256_mask ((__v16hi) __A,
+ (__v16hi) __B,
+ (__v16hi)
+ _mm256_setzero_si256 (),
+ (__mmask16) __U);
+}
+
+extern __inline __m256i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_mask_subs_epu8 (__m256i __W, __mmask32 __U, __m256i __A,
+ __m256i __B)
+{
+ return (__m256i) __builtin_ia32_psubusb256_mask ((__v32qi) __A,
+ (__v32qi) __B,
+ (__v32qi) __W,
+ (__mmask32) __U);
+}
+
+extern __inline __m256i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_maskz_subs_epu8 (__mmask32 __U, __m256i __A, __m256i __B)
+{
+ return (__m256i) __builtin_ia32_psubusb256_mask ((__v32qi) __A,
+ (__v32qi) __B,
+ (__v32qi)
+ _mm256_setzero_si256 (),
+ (__mmask32) __U);
+}
+
+extern __inline __m256i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_mask_subs_epu16 (__m256i __W, __mmask16 __U, __m256i __A,
+ __m256i __B)
+{
+ return (__m256i) __builtin_ia32_psubusw256_mask ((__v16hi) __A,
+ (__v16hi) __B,
+ (__v16hi) __W,
+ (__mmask16) __U);
+}
+
+extern __inline __m256i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_maskz_subs_epu16 (__mmask16 __U, __m256i __A, __m256i __B)
+{
+ return (__m256i) __builtin_ia32_psubusw256_mask ((__v16hi) __A,
+ (__v16hi) __B,
+ (__v16hi)
+ _mm256_setzero_si256 (),
+ (__mmask16) __U);
+}
+
+extern __inline __m128i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_mask_add_epi8 (__m128i __W, __mmask16 __U, __m128i __A,
+ __m128i __B)
+{
+ return (__m128i) __builtin_ia32_paddb128_mask ((__v16qi) __A,
+ (__v16qi) __B,
+ (__v16qi) __W,
+ (__mmask16) __U);
+}
+
+extern __inline __m128i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_maskz_add_epi8 (__mmask16 __U, __m128i __A, __m128i __B)
+{
+ return (__m128i) __builtin_ia32_paddb128_mask ((__v16qi) __A,
+ (__v16qi) __B,
+ (__v16qi)
+ _mm_setzero_si128 (),
+ (__mmask16) __U);
+}
+
+extern __inline __m128i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_mask_add_epi16 (__m128i __W, __mmask8 __U, __m128i __A,
+ __m128i __B)
+{
+ return (__m128i) __builtin_ia32_paddw128_mask ((__v8hi) __A,
+ (__v8hi) __B,
+ (__v8hi) __W,
+ (__mmask8) __U);
+}
+
+extern __inline __m128i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_maskz_add_epi16 (__mmask8 __U, __m128i __A, __m128i __B)
+{
+ return (__m128i) __builtin_ia32_paddw128_mask ((__v8hi) __A,
+ (__v8hi) __B,
+ (__v8hi)
+ _mm_setzero_si128 (),
+ (__mmask8) __U);
+}
+
+extern __inline __m256i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_mask_unpackhi_epi8 (__m256i __W, __mmask32 __U, __m256i __A,
+ __m256i __B)
+{
+ return (__m256i) __builtin_ia32_punpckhbw256_mask ((__v32qi) __A,
+ (__v32qi) __B,
+ (__v32qi) __W,
+ (__mmask32) __U);
+}
+
+extern __inline __m256i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_maskz_unpackhi_epi8 (__mmask32 __U, __m256i __A, __m256i __B)
+{
+ return (__m256i) __builtin_ia32_punpckhbw256_mask ((__v32qi) __A,
+ (__v32qi) __B,
+ (__v32qi)
+ _mm256_setzero_si256 (),
+ (__mmask32) __U);
+}
+
+extern __inline __m128i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_mask_unpackhi_epi8 (__m128i __W, __mmask16 __U, __m128i __A,
+ __m128i __B)
+{
+ return (__m128i) __builtin_ia32_punpckhbw128_mask ((__v16qi) __A,
+ (__v16qi) __B,
+ (__v16qi) __W,
+ (__mmask16) __U);
+}
+
+extern __inline __m128i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_maskz_unpackhi_epi8 (__mmask16 __U, __m128i __A, __m128i __B)
+{
+ return (__m128i) __builtin_ia32_punpckhbw128_mask ((__v16qi) __A,
+ (__v16qi) __B,
+ (__v16qi)
+ _mm_setzero_si128 (),
+ (__mmask16) __U);
+}
+
+extern __inline __m256i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_mask_unpackhi_epi16 (__m256i __W, __mmask16 __U, __m256i __A,
+ __m256i __B)
+{
+ return (__m256i) __builtin_ia32_punpckhwd256_mask ((__v16hi) __A,
+ (__v16hi) __B,
+ (__v16hi) __W,
+ (__mmask16) __U);
+}
+
+extern __inline __m256i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_maskz_unpackhi_epi16 (__mmask16 __U, __m256i __A, __m256i __B)
+{
+ return (__m256i) __builtin_ia32_punpckhwd256_mask ((__v16hi) __A,
+ (__v16hi) __B,
+ (__v16hi)
+ _mm256_setzero_si256 (),
+ (__mmask16) __U);
+}
+
+extern __inline __m128i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_mask_unpackhi_epi16 (__m128i __W, __mmask8 __U, __m128i __A,
+ __m128i __B)
+{
+ return (__m128i) __builtin_ia32_punpckhwd128_mask ((__v8hi) __A,
+ (__v8hi) __B,
+ (__v8hi) __W,
+ (__mmask8) __U);
+}
+
+extern __inline __m128i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_maskz_unpackhi_epi16 (__mmask8 __U, __m128i __A, __m128i __B)
+{
+ return (__m128i) __builtin_ia32_punpckhwd128_mask ((__v8hi) __A,
+ (__v8hi) __B,
+ (__v8hi)
+ _mm_setzero_si128 (),
+ (__mmask8) __U);
+}
+
+extern __inline __m256i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_mask_unpacklo_epi8 (__m256i __W, __mmask32 __U, __m256i __A,
+ __m256i __B)
+{
+ return (__m256i) __builtin_ia32_punpcklbw256_mask ((__v32qi) __A,
+ (__v32qi) __B,
+ (__v32qi) __W,
+ (__mmask32) __U);
+}
+
+extern __inline __m256i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_maskz_unpacklo_epi8 (__mmask32 __U, __m256i __A, __m256i __B)
+{
+ return (__m256i) __builtin_ia32_punpcklbw256_mask ((__v32qi) __A,
+ (__v32qi) __B,
+ (__v32qi)
+ _mm256_setzero_si256 (),
+ (__mmask32) __U);
+}
+
+extern __inline __m128i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_mask_unpacklo_epi8 (__m128i __W, __mmask16 __U, __m128i __A,
+ __m128i __B)
+{
+ return (__m128i) __builtin_ia32_punpcklbw128_mask ((__v16qi) __A,
+ (__v16qi) __B,
+ (__v16qi) __W,
+ (__mmask16) __U);
+}
+
+extern __inline __m128i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_maskz_unpacklo_epi8 (__mmask16 __U, __m128i __A, __m128i __B)
+{
+ return (__m128i) __builtin_ia32_punpcklbw128_mask ((__v16qi) __A,
+ (__v16qi) __B,
+ (__v16qi)
+ _mm_setzero_si128 (),
+ (__mmask16) __U);
+}
+
+extern __inline __m256i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_mask_unpacklo_epi16 (__m256i __W, __mmask16 __U, __m256i __A,
+ __m256i __B)
+{
+ return (__m256i) __builtin_ia32_punpcklwd256_mask ((__v16hi) __A,
+ (__v16hi) __B,
+ (__v16hi) __W,
+ (__mmask16) __U);
+}
+
+extern __inline __m256i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_maskz_unpacklo_epi16 (__mmask16 __U, __m256i __A, __m256i __B)
+{
+ return (__m256i) __builtin_ia32_punpcklwd256_mask ((__v16hi) __A,
+ (__v16hi) __B,
+ (__v16hi)
+ _mm256_setzero_si256 (),
+ (__mmask16) __U);
+}
+
+extern __inline __m128i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_mask_unpacklo_epi16 (__m128i __W, __mmask8 __U, __m128i __A,
+ __m128i __B)
+{
+ return (__m128i) __builtin_ia32_punpcklwd128_mask ((__v8hi) __A,
+ (__v8hi) __B,
+ (__v8hi) __W,
+ (__mmask8) __U);
+}
+
+extern __inline __m128i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_maskz_unpacklo_epi16 (__mmask8 __U, __m128i __A, __m128i __B)
+{
+ return (__m128i) __builtin_ia32_punpcklwd128_mask ((__v8hi) __A,
+ (__v8hi) __B,
+ (__v8hi)
+ _mm_setzero_si128 (),
+ (__mmask8) __U);
+}
+
+extern __inline __mmask16
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_cmpeq_epi8_mask (__m128i __A, __m128i __B)
+{
+ return (__mmask16) __builtin_ia32_pcmpeqb128_mask ((__v16qi) __A,
+ (__v16qi) __B,
+ (__mmask16) -1);
+}
+
+extern __inline __mmask16
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_mask_cmpeq_epi8_mask (__mmask16 __U, __m128i __A, __m128i __B)
+{
+ return (__mmask16) __builtin_ia32_pcmpeqb128_mask ((__v16qi) __A,
+ (__v16qi) __B,
+ __U);
+}
+
+extern __inline __mmask32
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_cmpeq_epi8_mask (__m256i __A, __m256i __B)
+{
+ return (__mmask32) __builtin_ia32_pcmpeqb256_mask ((__v32qi) __A,
+ (__v32qi) __B,
+ (__mmask32) -1);
+}
+
+extern __inline __mmask32
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_mask_cmpeq_epi8_mask (__mmask32 __U, __m256i __A, __m256i __B)
+{
+ return (__mmask32) __builtin_ia32_pcmpeqb256_mask ((__v32qi) __A,
+ (__v32qi) __B,
+ __U);
+}
+
+extern __inline __mmask8
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_cmpeq_epi16_mask (__m128i __A, __m128i __B)
+{
+ return (__mmask8) __builtin_ia32_pcmpeqw128_mask ((__v8hi) __A,
+ (__v8hi) __B,
+ (__mmask8) -1);
+}
+
+extern __inline __mmask8
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_mask_cmpeq_epi16_mask (__mmask8 __U, __m128i __A, __m128i __B)
+{
+ return (__mmask8) __builtin_ia32_pcmpeqw128_mask ((__v8hi) __A,
+ (__v8hi) __B, __U);
+}
+
+extern __inline __mmask16
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_cmpeq_epi16_mask (__m256i __A, __m256i __B)
+{
+ return (__mmask16) __builtin_ia32_pcmpeqw256_mask ((__v16hi) __A,
+ (__v16hi) __B,
+ (__mmask16) -1);
+}
+
+extern __inline __mmask16
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_mask_cmpeq_epi16_mask (__mmask16 __U, __m256i __A, __m256i __B)
+{
+ return (__mmask16) __builtin_ia32_pcmpeqw256_mask ((__v16hi) __A,
+ (__v16hi) __B,
+ __U);
+}
+
+extern __inline __mmask16
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_cmpgt_epi8_mask (__m128i __A, __m128i __B)
+{
+ return (__mmask16) __builtin_ia32_pcmpgtb128_mask ((__v16qi) __A,
+ (__v16qi) __B,
+ (__mmask16) -1);
+}
+
+extern __inline __mmask16
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_mask_cmpgt_epi8_mask (__mmask16 __U, __m128i __A, __m128i __B)
+{
+ return (__mmask16) __builtin_ia32_pcmpgtb128_mask ((__v16qi) __A,
+ (__v16qi) __B,
+ __U);
+}
+
+extern __inline __mmask32
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_cmpgt_epi8_mask (__m256i __A, __m256i __B)
+{
+ return (__mmask32) __builtin_ia32_pcmpgtb256_mask ((__v32qi) __A,
+ (__v32qi) __B,
+ (__mmask32) -1);
+}
+
+extern __inline __mmask32
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_mask_cmpgt_epi8_mask (__mmask32 __U, __m256i __A, __m256i __B)
+{
+ return (__mmask32) __builtin_ia32_pcmpgtb256_mask ((__v32qi) __A,
+ (__v32qi) __B,
+ __U);
+}
+
+extern __inline __mmask8
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_cmpgt_epi16_mask (__m128i __A, __m128i __B)
+{
+ return (__mmask8) __builtin_ia32_pcmpgtw128_mask ((__v8hi) __A,
+ (__v8hi) __B,
+ (__mmask8) -1);
+}
+
+extern __inline __mmask8
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_mask_cmpgt_epi16_mask (__mmask8 __U, __m128i __A, __m128i __B)
+{
+ return (__mmask8) __builtin_ia32_pcmpgtw128_mask ((__v8hi) __A,
+ (__v8hi) __B, __U);
+}
+
+extern __inline __mmask16
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_cmpgt_epi16_mask (__m256i __A, __m256i __B)
+{
+ return (__mmask16) __builtin_ia32_pcmpgtw256_mask ((__v16hi) __A,
+ (__v16hi) __B,
+ (__mmask16) -1);
+}
+
+extern __inline __mmask16
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_mask_cmpgt_epi16_mask (__mmask16 __U, __m256i __A, __m256i __B)
+{
+ return (__mmask16) __builtin_ia32_pcmpgtw256_mask ((__v16hi) __A,
+ (__v16hi) __B,
+ __U);
+}
+
+extern __inline __mmask16
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_testn_epi8_mask (__m128i __A, __m128i __B)
+{
+ return (__mmask16) __builtin_ia32_ptestnmb128 ((__v16qi) __A,
+ (__v16qi) __B,
+ (__mmask16) -1);
+}
+
+extern __inline __mmask16
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_mask_testn_epi8_mask (__mmask16 __U, __m128i __A, __m128i __B)
+{
+ return (__mmask16) __builtin_ia32_ptestnmb128 ((__v16qi) __A,
+ (__v16qi) __B, __U);
+}
+
+extern __inline __mmask32
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_testn_epi8_mask (__m256i __A, __m256i __B)
+{
+ return (__mmask32) __builtin_ia32_ptestnmb256 ((__v32qi) __A,
+ (__v32qi) __B,
+ (__mmask32) -1);
+}
+
+extern __inline __mmask32
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_mask_testn_epi8_mask (__mmask32 __U, __m256i __A, __m256i __B)
+{
+ return (__mmask32) __builtin_ia32_ptestnmb256 ((__v32qi) __A,
+ (__v32qi) __B, __U);
+}
+
+extern __inline __mmask8
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_testn_epi16_mask (__m128i __A, __m128i __B)
+{
+ return (__mmask8) __builtin_ia32_ptestnmw128 ((__v8hi) __A,
+ (__v8hi) __B,
+ (__mmask8) -1);
+}
+
+extern __inline __mmask8
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_mask_testn_epi16_mask (__mmask8 __U, __m128i __A, __m128i __B)
+{
+ return (__mmask8) __builtin_ia32_ptestnmw128 ((__v8hi) __A,
+ (__v8hi) __B, __U);
+}
+
+extern __inline __mmask16
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_testn_epi16_mask (__m256i __A, __m256i __B)
+{
+ return (__mmask16) __builtin_ia32_ptestnmw256 ((__v16hi) __A,
+ (__v16hi) __B,
+ (__mmask16) -1);
+}
+
+extern __inline __mmask16
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_mask_testn_epi16_mask (__mmask16 __U, __m256i __A, __m256i __B)
+{
+ return (__mmask16) __builtin_ia32_ptestnmw256 ((__v16hi) __A,
+ (__v16hi) __B, __U);
+}
+
+extern __inline __m256i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_mask_shuffle_epi8 (__m256i __W, __mmask32 __U, __m256i __A,
+ __m256i __B)
+{
+ return (__m256i) __builtin_ia32_pshufb256_mask ((__v32qi) __A,
+ (__v32qi) __B,
+ (__v32qi) __W,
+ (__mmask32) __U);
+}
+
+extern __inline __m256i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_maskz_shuffle_epi8 (__mmask32 __U, __m256i __A, __m256i __B)
+{
+ return (__m256i) __builtin_ia32_pshufb256_mask ((__v32qi) __A,
+ (__v32qi) __B,
+ (__v32qi)
+ _mm256_setzero_si256 (),
+ (__mmask32) __U);
+}
+
+extern __inline __m128i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_mask_shuffle_epi8 (__m128i __W, __mmask16 __U, __m128i __A,
+ __m128i __B)
+{
+ return (__m128i) __builtin_ia32_pshufb128_mask ((__v16qi) __A,
+ (__v16qi) __B,
+ (__v16qi) __W,
+ (__mmask16) __U);
+}
+
+extern __inline __m128i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_maskz_shuffle_epi8 (__mmask16 __U, __m128i __A, __m128i __B)
+{
+ return (__m128i) __builtin_ia32_pshufb128_mask ((__v16qi) __A,
+ (__v16qi) __B,
+ (__v16qi)
+ _mm_setzero_si128 (),
+ (__mmask16) __U);
+}
+
+extern __inline __m256i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_maskz_packs_epi16 (__mmask32 __M, __m256i __A, __m256i __B)
+{
+ return (__m256i) __builtin_ia32_packsswb256_mask ((__v16hi) __A,
+ (__v16hi) __B,
+ (__v32qi)
+ _mm256_setzero_si256 (),
+ __M);
+}
+
+extern __inline __m256i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_mask_packs_epi16 (__m256i __W, __mmask32 __M, __m256i __A,
+ __m256i __B)
+{
+ return (__m256i) __builtin_ia32_packsswb256_mask ((__v16hi) __A,
+ (__v16hi) __B,
+ (__v32qi) __W,
+ __M);
+}
+
+extern __inline __m128i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_maskz_packs_epi16 (__mmask16 __M, __m128i __A, __m128i __B)
+{
+ return (__m128i) __builtin_ia32_packsswb128_mask ((__v8hi) __A,
+ (__v8hi) __B,
+ (__v16qi)
+ _mm_setzero_si128 (),
+ __M);
+}
+
+extern __inline __m128i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_mask_packs_epi16 (__m128i __W, __mmask16 __M, __m128i __A,
+ __m128i __B)
+{
+ return (__m128i) __builtin_ia32_packsswb128_mask ((__v8hi) __A,
+ (__v8hi) __B,
+ (__v16qi) __W,
+ __M);
+}
+
+extern __inline __m256i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_maskz_packus_epi16 (__mmask32 __M, __m256i __A, __m256i __B)
+{
+ return (__m256i) __builtin_ia32_packuswb256_mask ((__v16hi) __A,
+ (__v16hi) __B,
+ (__v32qi)
+ _mm256_setzero_si256 (),
+ __M);
+}
+
+extern __inline __m256i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_mask_packus_epi16 (__m256i __W, __mmask32 __M, __m256i __A,
+ __m256i __B)
+{
+ return (__m256i) __builtin_ia32_packuswb256_mask ((__v16hi) __A,
+ (__v16hi) __B,
+ (__v32qi) __W,
+ __M);
+}
+
+extern __inline __m128i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_maskz_packus_epi16 (__mmask16 __M, __m128i __A, __m128i __B)
+{
+ return (__m128i) __builtin_ia32_packuswb128_mask ((__v8hi) __A,
+ (__v8hi) __B,
+ (__v16qi)
+ _mm_setzero_si128 (),
+ __M);
+}
+
+extern __inline __m128i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_mask_packus_epi16 (__m128i __W, __mmask16 __M, __m128i __A,
+ __m128i __B)
+{
+ return (__m128i) __builtin_ia32_packuswb128_mask ((__v8hi) __A,
+ (__v8hi) __B,
+ (__v16qi) __W,
+ __M);
+}
+
+extern __inline __m256i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_mask_abs_epi8 (__m256i __W, __mmask32 __U, __m256i __A)
+{
+ return (__m256i) __builtin_ia32_pabsb256_mask ((__v32qi) __A,
+ (__v32qi) __W,
+ (__mmask32) __U);
+}
+
+extern __inline __m256i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_maskz_abs_epi8 (__mmask32 __U, __m256i __A)
+{
+ return (__m256i) __builtin_ia32_pabsb256_mask ((__v32qi) __A,
+ (__v32qi)
+ _mm256_setzero_si256 (),
+ (__mmask32) __U);
+}
+
+extern __inline __m128i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_mask_abs_epi8 (__m128i __W, __mmask16 __U, __m128i __A)
+{
+ return (__m128i) __builtin_ia32_pabsb128_mask ((__v16qi) __A,
+ (__v16qi) __W,
+ (__mmask16) __U);
+}
+
+extern __inline __m128i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_maskz_abs_epi8 (__mmask16 __U, __m128i __A)
+{
+ return (__m128i) __builtin_ia32_pabsb128_mask ((__v16qi) __A,
+ (__v16qi)
+ _mm_setzero_si128 (),
+ (__mmask16) __U);
+}
+
+extern __inline __m256i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_mask_abs_epi16 (__m256i __W, __mmask16 __U, __m256i __A)
+{
+ return (__m256i) __builtin_ia32_pabsw256_mask ((__v16hi) __A,
+ (__v16hi) __W,
+ (__mmask16) __U);
+}
+
+extern __inline __m256i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_maskz_abs_epi16 (__mmask16 __U, __m256i __A)
+{
+ return (__m256i) __builtin_ia32_pabsw256_mask ((__v16hi) __A,
+ (__v16hi)
+ _mm256_setzero_si256 (),
+ (__mmask16) __U);
+}
+
+extern __inline __m128i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_mask_abs_epi16 (__m128i __W, __mmask8 __U, __m128i __A)
+{
+ return (__m128i) __builtin_ia32_pabsw128_mask ((__v8hi) __A,
+ (__v8hi) __W,
+ (__mmask8) __U);
+}
+
+extern __inline __m128i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_maskz_abs_epi16 (__mmask8 __U, __m128i __A)
+{
+ return (__m128i) __builtin_ia32_pabsw128_mask ((__v8hi) __A,
+ (__v8hi)
+ _mm_setzero_si128 (),
+ (__mmask8) __U);
+}
+
+extern __inline __mmask32
+ __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_cmpneq_epu8_mask (__m256i __X, __m256i __Y)
+{
+ return (__mmask32) __builtin_ia32_ucmpb256_mask ((__v32qi) __X,
+ (__v32qi) __Y, 4,
+ (__mmask32) - 1);
+}
+
+extern __inline __mmask32
+ __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_cmplt_epu8_mask (__m256i __X, __m256i __Y)
+{
+ return (__mmask32) __builtin_ia32_ucmpb256_mask ((__v32qi) __X,
+ (__v32qi) __Y, 1,
+ (__mmask32) - 1);
+}
+
+extern __inline __mmask32
+ __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_cmpge_epu8_mask (__m256i __X, __m256i __Y)
+{
+ return (__mmask32) __builtin_ia32_ucmpb256_mask ((__v32qi) __X,
+ (__v32qi) __Y, 5,
+ (__mmask32) - 1);
+}
+
+extern __inline __mmask32
+ __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_cmple_epu8_mask (__m256i __X, __m256i __Y)
+{
+ return (__mmask32) __builtin_ia32_ucmpb256_mask ((__v32qi) __X,
+ (__v32qi) __Y, 2,
+ (__mmask32) - 1);
+}
+
+extern __inline __mmask16
+ __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_cmpneq_epu16_mask (__m256i __X, __m256i __Y)
+{
+ return (__mmask16) __builtin_ia32_ucmpw256_mask ((__v16hi) __X,
+ (__v16hi) __Y, 4,
+ (__mmask16) - 1);
+}
+
+extern __inline __mmask16
+ __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_cmplt_epu16_mask (__m256i __X, __m256i __Y)
+{
+ return (__mmask16) __builtin_ia32_ucmpw256_mask ((__v16hi) __X,
+ (__v16hi) __Y, 1,
+ (__mmask16) - 1);
+}
+
+extern __inline __mmask16
+ __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_cmpge_epu16_mask (__m256i __X, __m256i __Y)
+{
+ return (__mmask16) __builtin_ia32_ucmpw256_mask ((__v16hi) __X,
+ (__v16hi) __Y, 5,
+ (__mmask16) - 1);
+}
+
+extern __inline __mmask16
+ __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_cmple_epu16_mask (__m256i __X, __m256i __Y)
+{
+ return (__mmask16) __builtin_ia32_ucmpw256_mask ((__v16hi) __X,
+ (__v16hi) __Y, 2,
+ (__mmask16) - 1);
+}
+
+extern __inline void
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_mask_storeu_epi16 (void *__P, __mmask16 __U, __m256i __A)
+{
+ __builtin_ia32_storedquhi256_mask ((__v16hi *) __P,
+ (__v16hi) __A,
+ (__mmask16) __U);
+}
+
+extern __inline void
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_mask_storeu_epi16 (void *__P, __mmask8 __U, __m128i __A)
+{
+ __builtin_ia32_storedquhi128_mask ((__v8hi *) __P,
+ (__v8hi) __A,
+ (__mmask8) __U);
+}
+
+extern __inline __m128i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_mask_adds_epi16 (__m128i __W, __mmask8 __U, __m128i __A,
+ __m128i __B)
+{
+ return (__m128i) __builtin_ia32_paddsw128_mask ((__v8hi) __A,
+ (__v8hi) __B,
+ (__v8hi) __W,
+ (__mmask8) __U);
+}
+
+extern __inline __m128i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_mask_subs_epi8 (__m128i __W, __mmask16 __U, __m128i __A,
+ __m128i __B)
+{
+ return (__m128i) __builtin_ia32_psubsb128_mask ((__v16qi) __A,
+ (__v16qi) __B,
+ (__v16qi) __W,
+ (__mmask16) __U);
+}
+
+extern __inline __m128i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_maskz_subs_epi8 (__mmask16 __U, __m128i __A, __m128i __B)
+{
+ return (__m128i) __builtin_ia32_psubsb128_mask ((__v16qi) __A,
+ (__v16qi) __B,
+ (__v16qi)
+ _mm_setzero_si128 (),
+ (__mmask16) __U);
+}
+
+extern __inline __m128i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_mask_subs_epi16 (__m128i __W, __mmask8 __U, __m128i __A,
+ __m128i __B)
+{
+ return (__m128i) __builtin_ia32_psubsw128_mask ((__v8hi) __A,
+ (__v8hi) __B,
+ (__v8hi) __W,
+ (__mmask8) __U);
+}
+
+extern __inline __m128i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_maskz_subs_epi16 (__mmask8 __U, __m128i __A, __m128i __B)
+{
+ return (__m128i) __builtin_ia32_psubsw128_mask ((__v8hi) __A,
+ (__v8hi) __B,
+ (__v8hi)
+ _mm_setzero_si128 (),
+ (__mmask8) __U);
+}
+
+extern __inline __m128i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_mask_subs_epu8 (__m128i __W, __mmask16 __U, __m128i __A,
+ __m128i __B)
+{
+ return (__m128i) __builtin_ia32_psubusb128_mask ((__v16qi) __A,
+ (__v16qi) __B,
+ (__v16qi) __W,
+ (__mmask16) __U);
+}
+
+extern __inline __m128i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_maskz_subs_epu8 (__mmask16 __U, __m128i __A, __m128i __B)
+{
+ return (__m128i) __builtin_ia32_psubusb128_mask ((__v16qi) __A,
+ (__v16qi) __B,
+ (__v16qi)
+ _mm_setzero_si128 (),
+ (__mmask16) __U);
+}
+
+extern __inline __m128i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_mask_subs_epu16 (__m128i __W, __mmask8 __U, __m128i __A,
+ __m128i __B)
+{
+ return (__m128i) __builtin_ia32_psubusw128_mask ((__v8hi) __A,
+ (__v8hi) __B,
+ (__v8hi) __W,
+ (__mmask8) __U);
+}
+
+extern __inline __m128i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_maskz_subs_epu16 (__mmask8 __U, __m128i __A, __m128i __B)
+{
+ return (__m128i) __builtin_ia32_psubusw128_mask ((__v8hi) __A,
+ (__v8hi) __B,
+ (__v8hi)
+ _mm_setzero_si128 (),
+ (__mmask8) __U);
+}
+
+extern __inline __m256i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_mask_srl_epi16 (__m256i __W, __mmask16 __U, __m256i __A,
+ __m128i __B)
+{
+ return (__m256i) __builtin_ia32_psrlw256_mask ((__v16hi) __A,
+ (__v8hi) __B,
+ (__v16hi) __W,
+ (__mmask16) __U);
+}
+
+extern __inline __m256i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_maskz_srl_epi16 (__mmask16 __U, __m256i __A, __m128i __B)
+{
+ return (__m256i) __builtin_ia32_psrlw256_mask ((__v16hi) __A,
+ (__v8hi) __B,
+ (__v16hi)
+ _mm256_setzero_si256 (),
+ (__mmask16) __U);
+}
+
+extern __inline __m128i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_mask_srl_epi16 (__m128i __W, __mmask8 __U, __m128i __A,
+ __m128i __B)
+{
+ return (__m128i) __builtin_ia32_psrlw128_mask ((__v8hi) __A,
+ (__v8hi) __B,
+ (__v8hi) __W,
+ (__mmask8) __U);
+}
+
+extern __inline __m128i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_maskz_srl_epi16 (__mmask8 __U, __m128i __A, __m128i __B)
+{
+ return (__m128i) __builtin_ia32_psrlw128_mask ((__v8hi) __A,
+ (__v8hi) __B,
+ (__v8hi)
+ _mm_setzero_si128 (),
+ (__mmask8) __U);
+}
+
+extern __inline __m256i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_mask_sra_epi16 (__m256i __W, __mmask16 __U, __m256i __A,
+ __m128i __B)
+{
+ return (__m256i) __builtin_ia32_psraw256_mask ((__v16hi) __A,
+ (__v8hi) __B,
+ (__v16hi) __W,
+ (__mmask16) __U);
+}
+
+extern __inline __m256i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_maskz_sra_epi16 (__mmask16 __U, __m256i __A, __m128i __B)
+{
+ return (__m256i) __builtin_ia32_psraw256_mask ((__v16hi) __A,
+ (__v8hi) __B,
+ (__v16hi)
+ _mm256_setzero_si256 (),
+ (__mmask16) __U);
+}
+
+extern __inline __m128i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_mask_sra_epi16 (__m128i __W, __mmask8 __U, __m128i __A,
+ __m128i __B)
+{
+ return (__m128i) __builtin_ia32_psraw128_mask ((__v8hi) __A,
+ (__v8hi) __B,
+ (__v8hi) __W,
+ (__mmask8) __U);
+}
+
+extern __inline __m128i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_maskz_sra_epi16 (__mmask8 __U, __m128i __A, __m128i __B)
+{
+ return (__m128i) __builtin_ia32_psraw128_mask ((__v8hi) __A,
+ (__v8hi) __B,
+ (__v8hi)
+ _mm_setzero_si128 (),
+ (__mmask8) __U);
+}
+
+extern __inline __m128i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_maskz_adds_epi16 (__mmask8 __U, __m128i __A, __m128i __B)
+{
+ return (__m128i) __builtin_ia32_paddsw128_mask ((__v8hi) __A,
+ (__v8hi) __B,
+ (__v8hi)
+ _mm_setzero_si128 (),
+ (__mmask8) __U);
+}
+
+extern __inline __m128i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_mask_adds_epu8 (__m128i __W, __mmask16 __U, __m128i __A,
+ __m128i __B)
+{
+ return (__m128i) __builtin_ia32_paddusb128_mask ((__v16qi) __A,
+ (__v16qi) __B,
+ (__v16qi) __W,
+ (__mmask16) __U);
+}
+
+extern __inline __m128i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_maskz_adds_epu8 (__mmask16 __U, __m128i __A, __m128i __B)
+{
+ return (__m128i) __builtin_ia32_paddusb128_mask ((__v16qi) __A,
+ (__v16qi) __B,
+ (__v16qi)
+ _mm_setzero_si128 (),
+ (__mmask16) __U);
+}
+
+extern __inline __m128i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_mask_adds_epu16 (__m128i __W, __mmask8 __U, __m128i __A,
+ __m128i __B)
+{
+ return (__m128i) __builtin_ia32_paddusw128_mask ((__v8hi) __A,
+ (__v8hi) __B,
+ (__v8hi) __W,
+ (__mmask8) __U);
+}
+
+extern __inline __m128i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_maskz_adds_epu16 (__mmask8 __U, __m128i __A, __m128i __B)
+{
+ return (__m128i) __builtin_ia32_paddusw128_mask ((__v8hi) __A,
+ (__v8hi) __B,
+ (__v8hi)
+ _mm_setzero_si128 (),
+ (__mmask8) __U);
+}
+
+extern __inline __m128i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_mask_sub_epi8 (__m128i __W, __mmask16 __U, __m128i __A,
+ __m128i __B)
+{
+ return (__m128i) __builtin_ia32_psubb128_mask ((__v16qi) __A,
+ (__v16qi) __B,
+ (__v16qi) __W,
+ (__mmask16) __U);
+}
+
+extern __inline __m128i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_maskz_sub_epi8 (__mmask16 __U, __m128i __A, __m128i __B)
+{
+ return (__m128i) __builtin_ia32_psubb128_mask ((__v16qi) __A,
+ (__v16qi) __B,
+ (__v16qi)
+ _mm_setzero_si128 (),
+ (__mmask16) __U);
+}
+
+extern __inline __m128i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_mask_sub_epi16 (__m128i __W, __mmask8 __U, __m128i __A,
+ __m128i __B)
+{
+ return (__m128i) __builtin_ia32_psubw128_mask ((__v8hi) __A,
+ (__v8hi) __B,
+ (__v8hi) __W,
+ (__mmask8) __U);
+}
+
+extern __inline __m128i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_maskz_sub_epi16 (__mmask8 __U, __m128i __A, __m128i __B)
+{
+ return (__m128i) __builtin_ia32_psubw128_mask ((__v8hi) __A,
+ (__v8hi) __B,
+ (__v8hi)
+ _mm_setzero_si128 (),
+ (__mmask8) __U);
+}
+
+extern __inline __m128i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_mask_adds_epi8 (__m128i __W, __mmask16 __U, __m128i __A,
+ __m128i __B)
+{
+ return (__m128i) __builtin_ia32_paddsb128_mask ((__v16qi) __A,
+ (__v16qi) __B,
+ (__v16qi) __W,
+ (__mmask16) __U);
+}
+
+extern __inline __m128i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_maskz_adds_epi8 (__mmask16 __U, __m128i __A, __m128i __B)
+{
+ return (__m128i) __builtin_ia32_paddsb128_mask ((__v16qi) __A,
+ (__v16qi) __B,
+ (__v16qi)
+ _mm_setzero_si128 (),
+ (__mmask16) __U);
+}
+
+extern __inline __m128i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_cvtepi16_epi8 (__m128i __A)
+{
+ __v16qi __O;
+ return (__m128i) __builtin_ia32_pmovwb128_mask ((__v8hi) __A, __O,
+ (__mmask8) -1);
+}
+
+extern __inline __m128i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_mask_cvtepi16_epi8 (__m128i __O, __mmask8 __M, __m128i __A)
+{
+ return (__m128i) __builtin_ia32_pmovwb128_mask ((__v8hi) __A,
+ (__v16qi) __O, __M);
+}
+
+extern __inline __m128i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_maskz_cvtepi16_epi8 (__mmask8 __M, __m128i __A)
+{
+ return (__m128i) __builtin_ia32_pmovwb128_mask ((__v8hi) __A,
+ (__v16qi)
+ _mm_setzero_si128 (),
+ __M);
+}
+
+extern __inline __m256i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_srav_epi16 (__m256i __A, __m256i __B)
+{
+ return (__m256i) __builtin_ia32_psrav16hi_mask ((__v16hi) __A,
+ (__v16hi) __B,
+ (__v16hi)
+ _mm256_setzero_si256 (),
+ (__mmask16) -1);
+}
+
+extern __inline __m256i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_mask_srav_epi16 (__m256i __W, __mmask16 __U, __m256i __A,
+ __m256i __B)
+{
+ return (__m256i) __builtin_ia32_psrav16hi_mask ((__v16hi) __A,
+ (__v16hi) __B,
+ (__v16hi) __W,
+ (__mmask16) __U);
+}
+
+extern __inline __m256i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_maskz_srav_epi16 (__mmask16 __U, __m256i __A, __m256i __B)
+{
+ return (__m256i) __builtin_ia32_psrav16hi_mask ((__v16hi) __A,
+ (__v16hi) __B,
+ (__v16hi)
+ _mm256_setzero_si256 (),
+ (__mmask16) __U);
+}
+
+extern __inline __m128i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_srav_epi16 (__m128i __A, __m128i __B)
+{
+ return (__m128i) __builtin_ia32_psrav8hi_mask ((__v8hi) __A,
+ (__v8hi) __B,
+ (__v8hi)
+ _mm_setzero_hi (),
+ (__mmask8) -1);
+}
+
+extern __inline __m128i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_mask_srav_epi16 (__m128i __W, __mmask8 __U, __m128i __A,
+ __m128i __B)
+{
+ return (__m128i) __builtin_ia32_psrav8hi_mask ((__v8hi) __A,
+ (__v8hi) __B,
+ (__v8hi) __W,
+ (__mmask8) __U);
+}
+
+extern __inline __m128i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_maskz_srav_epi16 (__mmask8 __U, __m128i __A, __m128i __B)
+{
+ return (__m128i) __builtin_ia32_psrav8hi_mask ((__v8hi) __A,
+ (__v8hi) __B,
+ (__v8hi)
+ _mm_setzero_si128 (),
+ (__mmask8) __U);
+}
+
+extern __inline __m256i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_srlv_epi16 (__m256i __A, __m256i __B)
+{
+ return (__m256i) __builtin_ia32_psrlv16hi_mask ((__v16hi) __A,
+ (__v16hi) __B,
+ (__v16hi)
+ _mm256_setzero_si256 (),
+ (__mmask16) -1);
+}
+
+extern __inline __m256i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_mask_srlv_epi16 (__m256i __W, __mmask16 __U, __m256i __A,
+ __m256i __B)
+{
+ return (__m256i) __builtin_ia32_psrlv16hi_mask ((__v16hi) __A,
+ (__v16hi) __B,
+ (__v16hi) __W,
+ (__mmask16) __U);
+}
+
+extern __inline __m256i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_maskz_srlv_epi16 (__mmask16 __U, __m256i __A, __m256i __B)
+{
+ return (__m256i) __builtin_ia32_psrlv16hi_mask ((__v16hi) __A,
+ (__v16hi) __B,
+ (__v16hi)
+ _mm256_setzero_si256 (),
+ (__mmask16) __U);
+}
+
+extern __inline __m128i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_srlv_epi16 (__m128i __A, __m128i __B)
+{
+ return (__m128i) __builtin_ia32_psrlv8hi_mask ((__v8hi) __A,
+ (__v8hi) __B,
+ (__v8hi)
+ _mm_setzero_hi (),
+ (__mmask8) -1);
+}
+
+extern __inline __m128i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_mask_srlv_epi16 (__m128i __W, __mmask8 __U, __m128i __A,
+ __m128i __B)
+{
+ return (__m128i) __builtin_ia32_psrlv8hi_mask ((__v8hi) __A,
+ (__v8hi) __B,
+ (__v8hi) __W,
+ (__mmask8) __U);
+}
+
+extern __inline __m128i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_maskz_srlv_epi16 (__mmask8 __U, __m128i __A, __m128i __B)
+{
+ return (__m128i) __builtin_ia32_psrlv8hi_mask ((__v8hi) __A,
+ (__v8hi) __B,
+ (__v8hi)
+ _mm_setzero_si128 (),
+ (__mmask8) __U);
+}
+
+extern __inline __m256i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_sllv_epi16 (__m256i __A, __m256i __B)
+{
+ return (__m256i) __builtin_ia32_psllv16hi_mask ((__v16hi) __A,
+ (__v16hi) __B,
+ (__v16hi)
+ _mm256_setzero_si256 (),
+ (__mmask16) -1);
+}
+
+extern __inline __m256i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_mask_sllv_epi16 (__m256i __W, __mmask16 __U, __m256i __A,
+ __m256i __B)
+{
+ return (__m256i) __builtin_ia32_psllv16hi_mask ((__v16hi) __A,
+ (__v16hi) __B,
+ (__v16hi) __W,
+ (__mmask16) __U);
+}
+
+extern __inline __m256i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_maskz_sllv_epi16 (__mmask16 __U, __m256i __A, __m256i __B)
+{
+ return (__m256i) __builtin_ia32_psllv16hi_mask ((__v16hi) __A,
+ (__v16hi) __B,
+ (__v16hi)
+ _mm256_setzero_si256 (),
+ (__mmask16) __U);
+}
+
+extern __inline __m128i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_sllv_epi16 (__m128i __A, __m128i __B)
+{
+ return (__m128i) __builtin_ia32_psllv8hi_mask ((__v8hi) __A,
+ (__v8hi) __B,
+ (__v8hi)
+ _mm_setzero_hi (),
+ (__mmask8) -1);
+}
+
+extern __inline __m128i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_mask_sllv_epi16 (__m128i __W, __mmask8 __U, __m128i __A,
+ __m128i __B)
+{
+ return (__m128i) __builtin_ia32_psllv8hi_mask ((__v8hi) __A,
+ (__v8hi) __B,
+ (__v8hi) __W,
+ (__mmask8) __U);
+}
+
+extern __inline __m128i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_maskz_sllv_epi16 (__mmask8 __U, __m128i __A, __m128i __B)
+{
+ return (__m128i) __builtin_ia32_psllv8hi_mask ((__v8hi) __A,
+ (__v8hi) __B,
+ (__v8hi)
+ _mm_setzero_si128 (),
+ (__mmask8) __U);
+}
+
+extern __inline __m128i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_mask_sll_epi16 (__m128i __W, __mmask8 __U, __m128i __A,
+ __m128i __B)
+{
+ return (__m128i) __builtin_ia32_psllw128_mask ((__v8hi) __A,
+ (__v8hi) __B,
+ (__v8hi) __W,
+ (__mmask8) __U);
+}
+
+extern __inline __m128i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_maskz_sll_epi16 (__mmask8 __U, __m128i __A, __m128i __B)
+{
+ return (__m128i) __builtin_ia32_psllw128_mask ((__v8hi) __A,
+ (__v8hi) __B,
+ (__v8hi)
+ _mm_setzero_si128 (),
+ (__mmask8) __U);
+}
+
+extern __inline __m256i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_mask_sll_epi16 (__m256i __W, __mmask16 __U, __m256i __A,
+ __m128i __B)
+{
+ return (__m256i) __builtin_ia32_psllw256_mask ((__v16hi) __A,
+ (__v8hi) __B,
+ (__v16hi) __W,
+ (__mmask16) __U);
+}
+
+extern __inline __m256i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_maskz_sll_epi16 (__mmask16 __U, __m256i __A, __m128i __B)
+{
+ return (__m256i) __builtin_ia32_psllw256_mask ((__v16hi) __A,
+ (__v8hi) __B,
+ (__v16hi)
+ _mm256_setzero_si256 (),
+ (__mmask16) __U);
+}
+
+extern __inline __m256i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_maskz_packus_epi32 (__mmask16 __M, __m256i __A, __m256i __B)
+{
+ return (__m256i) __builtin_ia32_packusdw256_mask ((__v8si) __A,
+ (__v8si) __B,
+ (__v16hi)
+ _mm256_setzero_si256 (),
+ __M);
+}
+
+extern __inline __m256i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_mask_packus_epi32 (__m256i __W, __mmask16 __M, __m256i __A,
+ __m256i __B)
+{
+ return (__m256i) __builtin_ia32_packusdw256_mask ((__v8si) __A,
+ (__v8si) __B,
+ (__v16hi) __W,
+ __M);
+}
+
+extern __inline __m128i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_maskz_packus_epi32 (__mmask8 __M, __m128i __A, __m128i __B)
+{
+ return (__m128i) __builtin_ia32_packusdw128_mask ((__v4si) __A,
+ (__v4si) __B,
+ (__v8hi)
+ _mm_setzero_si128 (),
+ __M);
+}
+
+extern __inline __m128i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_mask_packus_epi32 (__m128i __W, __mmask16 __M, __m128i __A,
+ __m128i __B)
+{
+ return (__m128i) __builtin_ia32_packusdw128_mask ((__v4si) __A,
+ (__v4si) __B,
+ (__v8hi) __W, __M);
+}
+
+extern __inline __m256i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_maskz_packs_epi32 (__mmask16 __M, __m256i __A, __m256i __B)
+{
+ return (__m256i) __builtin_ia32_packssdw256_mask ((__v8si) __A,
+ (__v8si) __B,
+ (__v16hi)
+ _mm256_setzero_si256 (),
+ __M);
+}
+
+extern __inline __m256i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_mask_packs_epi32 (__m256i __W, __mmask16 __M, __m256i __A,
+ __m256i __B)
+{
+ return (__m256i) __builtin_ia32_packssdw256_mask ((__v8si) __A,
+ (__v8si) __B,
+ (__v16hi) __W,
+ __M);
+}
+
+extern __inline __m128i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_maskz_packs_epi32 (__mmask8 __M, __m128i __A, __m128i __B)
+{
+ return (__m128i) __builtin_ia32_packssdw128_mask ((__v4si) __A,
+ (__v4si) __B,
+ (__v8hi)
+ _mm_setzero_si128 (),
+ __M);
+}
+
+extern __inline __m128i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_mask_packs_epi32 (__m128i __W, __mmask16 __M, __m128i __A,
+ __m128i __B)
+{
+ return (__m128i) __builtin_ia32_packssdw128_mask ((__v4si) __A,
+ (__v4si) __B,
+ (__v8hi) __W, __M);
+}
+
+#ifdef __DISABLE_AVX512VLBW__
+#undef __DISABLE_AVX512VLBW__
+#pragma GCC pop_options
+#endif /* __DISABLE_AVX512VLBW__ */
+
+#endif /* _AVX512VLBWINTRIN_H_INCLUDED */
diff --git a/gcc/config/i386/avx512vldqintrin.h b/gcc/config/i386/avx512vldqintrin.h
new file mode 100644
index 00000000000..bff3ead8637
--- /dev/null
+++ b/gcc/config/i386/avx512vldqintrin.h
@@ -0,0 +1,2035 @@
+/* Copyright (C) 2014
+ Free Software Foundation, Inc.
+
+ This file is part of GCC.
+
+ GCC is free software; you can redistribute it and/or modify
+ it under the terms of the GNU General Public License as published by
+ the Free Software Foundation; either version 3, or (at your option)
+ any later version.
+
+ GCC is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ GNU General Public License for more details.
+
+ Under Section 7 of GPL version 3, you are granted additional
+ permissions described in the GCC Runtime Library Exception, version
+ 3.1, as published by the Free Software Foundation.
+
+ You should have received a copy of the GNU General Public License and
+ a copy of the GCC Runtime Library Exception along with this program;
+ see the files COPYING3 and COPYING.RUNTIME respectively. If not, see
+ <http://www.gnu.org/licenses/>. */
+
+#ifndef _IMMINTRIN_H_INCLUDED
+#error "Never use <avx512vldqintrin.h> directly; include <immintrin.h> instead."
+#endif
+
+#ifndef _AVX512VLDQINTRIN_H_INCLUDED
+#define _AVX512VLDQINTRIN_H_INCLUDED
+
+#if !defined(__AVX512VL__) || !defined(__AVX512DQ__)
+#pragma GCC push_options
+#pragma GCC target("avx512vl,avx512dq")
+#define __DISABLE_AVX512VLDQ__
+#endif /* __AVX512VLDQ__ */
+
+extern __inline __m256i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_cvttpd_epi64 (__m256d __A)
+{
+ return (__m256i) __builtin_ia32_cvttpd2qq256_mask ((__v4df) __A,
+ (__v4di)
+ _mm256_setzero_si256 (),
+ (__mmask8) -1);
+}
+
+extern __inline __m256i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_mask_cvttpd_epi64 (__m256i __W, __mmask8 __U, __m256d __A)
+{
+ return (__m256i) __builtin_ia32_cvttpd2qq256_mask ((__v4df) __A,
+ (__v4di) __W,
+ (__mmask8) __U);
+}
+
+extern __inline __m256i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_maskz_cvttpd_epi64 (__mmask8 __U, __m256d __A)
+{
+ return (__m256i) __builtin_ia32_cvttpd2qq256_mask ((__v4df) __A,
+ (__v4di)
+ _mm256_setzero_si256 (),
+ (__mmask8) __U);
+}
+
+extern __inline __m128i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_cvttpd_epi64 (__m128d __A)
+{
+ return (__m128i) __builtin_ia32_cvttpd2qq128_mask ((__v2df) __A,
+ (__v2di)
+ _mm_setzero_di (),
+ (__mmask8) -1);
+}
+
+extern __inline __m128i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_mask_cvttpd_epi64 (__m128i __W, __mmask8 __U, __m128d __A)
+{
+ return (__m128i) __builtin_ia32_cvttpd2qq128_mask ((__v2df) __A,
+ (__v2di) __W,
+ (__mmask8) __U);
+}
+
+extern __inline __m128i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_maskz_cvttpd_epi64 (__mmask8 __U, __m128d __A)
+{
+ return (__m128i) __builtin_ia32_cvttpd2qq128_mask ((__v2df) __A,
+ (__v2di)
+ _mm_setzero_si128 (),
+ (__mmask8) __U);
+}
+
+extern __inline __m256i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_cvttpd_epu64 (__m256d __A)
+{
+ return (__m256i) __builtin_ia32_cvttpd2uqq256_mask ((__v4df) __A,
+ (__v4di)
+ _mm256_setzero_si256 (),
+ (__mmask8) -1);
+}
+
+extern __inline __m256i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_mask_cvttpd_epu64 (__m256i __W, __mmask8 __U, __m256d __A)
+{
+ return (__m256i) __builtin_ia32_cvttpd2uqq256_mask ((__v4df) __A,
+ (__v4di) __W,
+ (__mmask8) __U);
+}
+
+extern __inline __m256i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_maskz_cvttpd_epu64 (__mmask8 __U, __m256d __A)
+{
+ return (__m256i) __builtin_ia32_cvttpd2uqq256_mask ((__v4df) __A,
+ (__v4di)
+ _mm256_setzero_si256 (),
+ (__mmask8) __U);
+}
+
+extern __inline __m128i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_cvttpd_epu64 (__m128d __A)
+{
+ return (__m128i) __builtin_ia32_cvttpd2uqq128_mask ((__v2df) __A,
+ (__v2di)
+ _mm_setzero_di (),
+ (__mmask8) -1);
+}
+
+extern __inline __m128i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_mask_cvttpd_epu64 (__m128i __W, __mmask8 __U, __m128d __A)
+{
+ return (__m128i) __builtin_ia32_cvttpd2uqq128_mask ((__v2df) __A,
+ (__v2di) __W,
+ (__mmask8) __U);
+}
+
+extern __inline __m128i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_maskz_cvttpd_epu64 (__mmask8 __U, __m128d __A)
+{
+ return (__m128i) __builtin_ia32_cvttpd2uqq128_mask ((__v2df) __A,
+ (__v2di)
+ _mm_setzero_si128 (),
+ (__mmask8) __U);
+}
+
+extern __inline __m256i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_cvtpd_epi64 (__m256d __A)
+{
+ return (__m256i) __builtin_ia32_cvtpd2qq256_mask ((__v4df) __A,
+ (__v4di)
+ _mm256_setzero_si256 (),
+ (__mmask8) -1);
+}
+
+extern __inline __m256i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_mask_cvtpd_epi64 (__m256i __W, __mmask8 __U, __m256d __A)
+{
+ return (__m256i) __builtin_ia32_cvtpd2qq256_mask ((__v4df) __A,
+ (__v4di) __W,
+ (__mmask8) __U);
+}
+
+extern __inline __m256i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_maskz_cvtpd_epi64 (__mmask8 __U, __m256d __A)
+{
+ return (__m256i) __builtin_ia32_cvtpd2qq256_mask ((__v4df) __A,
+ (__v4di)
+ _mm256_setzero_si256 (),
+ (__mmask8) __U);
+}
+
+extern __inline __m128i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_cvtpd_epi64 (__m128d __A)
+{
+ return (__m128i) __builtin_ia32_cvtpd2qq128_mask ((__v2df) __A,
+ (__v2di)
+ _mm_setzero_di (),
+ (__mmask8) -1);
+}
+
+extern __inline __m128i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_mask_cvtpd_epi64 (__m128i __W, __mmask8 __U, __m128d __A)
+{
+ return (__m128i) __builtin_ia32_cvtpd2qq128_mask ((__v2df) __A,
+ (__v2di) __W,
+ (__mmask8) __U);
+}
+
+extern __inline __m128i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_maskz_cvtpd_epi64 (__mmask8 __U, __m128d __A)
+{
+ return (__m128i) __builtin_ia32_cvtpd2qq128_mask ((__v2df) __A,
+ (__v2di)
+ _mm_setzero_si128 (),
+ (__mmask8) __U);
+}
+
+extern __inline __m256i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_cvtpd_epu64 (__m256d __A)
+{
+ return (__m256i) __builtin_ia32_cvtpd2uqq256_mask ((__v4df) __A,
+ (__v4di)
+ _mm256_setzero_si256 (),
+ (__mmask8) -1);
+}
+
+extern __inline __m256i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_mask_cvtpd_epu64 (__m256i __W, __mmask8 __U, __m256d __A)
+{
+ return (__m256i) __builtin_ia32_cvtpd2uqq256_mask ((__v4df) __A,
+ (__v4di) __W,
+ (__mmask8) __U);
+}
+
+extern __inline __m256i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_maskz_cvtpd_epu64 (__mmask8 __U, __m256d __A)
+{
+ return (__m256i) __builtin_ia32_cvtpd2uqq256_mask ((__v4df) __A,
+ (__v4di)
+ _mm256_setzero_si256 (),
+ (__mmask8) __U);
+}
+
+extern __inline __m128i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_cvtpd_epu64 (__m128d __A)
+{
+ return (__m128i) __builtin_ia32_cvtpd2uqq128_mask ((__v2df) __A,
+ (__v2di)
+ _mm_setzero_di (),
+ (__mmask8) -1);
+}
+
+extern __inline __m128i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_mask_cvtpd_epu64 (__m128i __W, __mmask8 __U, __m128d __A)
+{
+ return (__m128i) __builtin_ia32_cvtpd2uqq128_mask ((__v2df) __A,
+ (__v2di) __W,
+ (__mmask8) __U);
+}
+
+extern __inline __m128i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_maskz_cvtpd_epu64 (__mmask8 __U, __m128d __A)
+{
+ return (__m128i) __builtin_ia32_cvtpd2uqq128_mask ((__v2df) __A,
+ (__v2di)
+ _mm_setzero_si128 (),
+ (__mmask8) __U);
+}
+
+extern __inline __m256i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_cvttps_epi64 (__m128 __A)
+{
+ return (__m256i) __builtin_ia32_cvttps2qq256_mask ((__v4sf) __A,
+ (__v4di)
+ _mm256_setzero_si256 (),
+ (__mmask8) -1);
+}
+
+extern __inline __m256i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_mask_cvttps_epi64 (__m256i __W, __mmask8 __U, __m128 __A)
+{
+ return (__m256i) __builtin_ia32_cvttps2qq256_mask ((__v4sf) __A,
+ (__v4di) __W,
+ (__mmask8) __U);
+}
+
+extern __inline __m256i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_maskz_cvttps_epi64 (__mmask8 __U, __m128 __A)
+{
+ return (__m256i) __builtin_ia32_cvttps2qq256_mask ((__v4sf) __A,
+ (__v4di)
+ _mm256_setzero_si256 (),
+ (__mmask8) __U);
+}
+
+extern __inline __m128i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_cvttps_epi64 (__m128 __A)
+{
+ return (__m128i) __builtin_ia32_cvttps2qq128_mask ((__v4sf) __A,
+ (__v2di)
+ _mm_setzero_di (),
+ (__mmask8) -1);
+}
+
+extern __inline __m128i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_mask_cvttps_epi64 (__m128i __W, __mmask8 __U, __m128 __A)
+{
+ return (__m128i) __builtin_ia32_cvttps2qq128_mask ((__v4sf) __A,
+ (__v2di) __W,
+ (__mmask8) __U);
+}
+
+extern __inline __m128i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_maskz_cvttps_epi64 (__mmask8 __U, __m128 __A)
+{
+ return (__m128i) __builtin_ia32_cvttps2qq128_mask ((__v4sf) __A,
+ (__v2di)
+ _mm_setzero_di (),
+ (__mmask8) __U);
+}
+
+extern __inline __m256i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_cvttps_epu64 (__m128 __A)
+{
+ return (__m256i) __builtin_ia32_cvttps2uqq256_mask ((__v4sf) __A,
+ (__v4di)
+ _mm256_setzero_si256 (),
+ (__mmask8) -1);
+}
+
+extern __inline __m256i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_mask_cvttps_epu64 (__m256i __W, __mmask8 __U, __m128 __A)
+{
+ return (__m256i) __builtin_ia32_cvttps2uqq256_mask ((__v4sf) __A,
+ (__v4di) __W,
+ (__mmask8) __U);
+}
+
+extern __inline __m256i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_maskz_cvttps_epu64 (__mmask8 __U, __m128 __A)
+{
+ return (__m256i) __builtin_ia32_cvttps2uqq256_mask ((__v4sf) __A,
+ (__v4di)
+ _mm256_setzero_si256 (),
+ (__mmask8) __U);
+}
+
+extern __inline __m128i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_cvttps_epu64 (__m128 __A)
+{
+ return (__m128i) __builtin_ia32_cvttps2uqq128_mask ((__v4sf) __A,
+ (__v2di)
+ _mm_setzero_di (),
+ (__mmask8) -1);
+}
+
+extern __inline __m128i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_mask_cvttps_epu64 (__m128i __W, __mmask8 __U, __m128 __A)
+{
+ return (__m128i) __builtin_ia32_cvttps2uqq128_mask ((__v4sf) __A,
+ (__v2di) __W,
+ (__mmask8) __U);
+}
+
+extern __inline __m128i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_maskz_cvttps_epu64 (__mmask8 __U, __m128 __A)
+{
+ return (__m128i) __builtin_ia32_cvttps2uqq128_mask ((__v4sf) __A,
+ (__v2di)
+ _mm_setzero_di (),
+ (__mmask8) __U);
+}
+
+extern __inline __m256d
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_broadcast_f64x2 (__m128d __A)
+{
+ __v4df __O;
+ return (__m256d) __builtin_ia32_broadcastf64x2_256_mask ((__v2df)
+ __A, __O,
+ (__mmask8) -
+ 1);
+}
+
+extern __inline __m256d
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_mask_broadcast_f64x2 (__m256d __O, __mmask8 __M, __m128d __A)
+{
+ return (__m256d) __builtin_ia32_broadcastf64x2_256_mask ((__v2df)
+ __A,
+ (__v4df)
+ __O, __M);
+}
+
+extern __inline __m256d
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_maskz_broadcast_f64x2 (__mmask8 __M, __m128d __A)
+{
+ return (__m256d) __builtin_ia32_broadcastf64x2_256_mask ((__v2df)
+ __A,
+ (__v4df)
+ _mm256_setzero_ps (),
+ __M);
+}
+
+extern __inline __m256i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_broadcast_i64x2 (__m128i __A)
+{
+ __v4di __O;
+ return (__m256i) __builtin_ia32_broadcasti64x2_256_mask ((__v2di)
+ __A, __O,
+ (__mmask8) -
+ 1);
+}
+
+extern __inline __m256i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_mask_broadcast_i64x2 (__m256i __O, __mmask8 __M, __m128i __A)
+{
+ return (__m256i) __builtin_ia32_broadcasti64x2_256_mask ((__v2di)
+ __A,
+ (__v4di)
+ __O, __M);
+}
+
+extern __inline __m256i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_maskz_broadcast_i64x2 (__mmask8 __M, __m128i __A)
+{
+ return (__m256i) __builtin_ia32_broadcasti64x2_256_mask ((__v2di)
+ __A,
+ (__v4di)
+ _mm256_setzero_si256 (),
+ __M);
+}
+
+extern __inline __m256
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_broadcast_f32x2 (__m128 __A)
+{
+ __v8sf __O;
+ return (__m256) __builtin_ia32_broadcastf32x2_256_mask ((__v4sf) __A,
+ __O,
+ (__mmask16) -
+ 1);
+}
+
+extern __inline __m256
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_mask_broadcast_f32x2 (__m256 __O, __mmask8 __M, __m128 __A)
+{
+ return (__m256) __builtin_ia32_broadcastf32x2_256_mask ((__v4sf) __A,
+ (__v8sf) __O,
+ __M);
+}
+
+extern __inline __m256
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_maskz_broadcast_f32x2 (__mmask8 __M, __m128 __A)
+{
+ return (__m256) __builtin_ia32_broadcastf32x2_256_mask ((__v4sf) __A,
+ (__v8sf)
+ _mm256_setzero_ps (),
+ __M);
+}
+
+extern __inline __m256i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_broadcast_i32x2 (__m128i __A)
+{
+ __v8si __O;
+ return (__m256i) __builtin_ia32_broadcasti32x2_256_mask ((__v4si)
+ __A, __O,
+ (__mmask8) -
+ 1);
+}
+
+extern __inline __m256i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_mask_broadcast_i32x2 (__m256i __O, __mmask8 __M, __m128i __A)
+{
+ return (__m256i) __builtin_ia32_broadcasti32x2_256_mask ((__v4si)
+ __A,
+ (__v8si)
+ __O, __M);
+}
+
+extern __inline __m256i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_maskz_broadcast_i32x2 (__mmask8 __M, __m128i __A)
+{
+ return (__m256i) __builtin_ia32_broadcasti32x2_256_mask ((__v4si)
+ __A,
+ (__v8si)
+ _mm256_setzero_si256 (),
+ __M);
+}
+
+extern __inline __m128i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_broadcast_i32x2 (__m128i __A)
+{
+ __v4si __O;
+ return (__m128i) __builtin_ia32_broadcasti32x2_128_mask ((__v4si)
+ __A, __O,
+ (__mmask8) -
+ 1);
+}
+
+extern __inline __m128i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_mask_broadcast_i32x2 (__m128i __O, __mmask8 __M, __m128i __A)
+{
+ return (__m128i) __builtin_ia32_broadcasti32x2_128_mask ((__v4si)
+ __A,
+ (__v4si)
+ __O, __M);
+}
+
+extern __inline __m128i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_maskz_broadcast_i32x2 (__mmask8 __M, __m128i __A)
+{
+ return (__m128i) __builtin_ia32_broadcasti32x2_128_mask ((__v4si)
+ __A,
+ (__v4si)
+ _mm_setzero_si128 (),
+ __M);
+}
+
+extern __inline __m256i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_mullo_epi64 (__m256i __A, __m256i __B)
+{
+ return (__m256i) __builtin_ia32_pmullq256_mask ((__v4di) __A,
+ (__v4di) __B,
+ (__v4di)
+ _mm256_setzero_si256 (),
+ (__mmask8) -1);
+}
+
+extern __inline __m256i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_mask_mullo_epi64 (__m256i __W, __mmask8 __U, __m256i __A,
+ __m256i __B)
+{
+ return (__m256i) __builtin_ia32_pmullq256_mask ((__v4di) __A,
+ (__v4di) __B,
+ (__v4di) __W,
+ (__mmask8) __U);
+}
+
+extern __inline __m256i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_maskz_mullo_epi64 (__mmask8 __U, __m256i __A, __m256i __B)
+{
+ return (__m256i) __builtin_ia32_pmullq256_mask ((__v4di) __A,
+ (__v4di) __B,
+ (__v4di)
+ _mm256_setzero_si256 (),
+ (__mmask8) __U);
+}
+
+extern __inline __m128i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_mullo_epi64 (__m128i __A, __m128i __B)
+{
+ return (__m128i) __builtin_ia32_pmullq128_mask ((__v2di) __A,
+ (__v2di) __B,
+ (__v2di)
+ _mm_setzero_di (),
+ (__mmask8) -1);
+}
+
+extern __inline __m128i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_mask_mullo_epi64 (__m128i __W, __mmask8 __U, __m128i __A,
+ __m128i __B)
+{
+ return (__m128i) __builtin_ia32_pmullq128_mask ((__v2di) __A,
+ (__v2di) __B,
+ (__v2di) __W,
+ (__mmask8) __U);
+}
+
+extern __inline __m128i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_maskz_mullo_epi64 (__mmask8 __U, __m128i __A, __m128i __B)
+{
+ return (__m128i) __builtin_ia32_pmullq128_mask ((__v2di) __A,
+ (__v2di) __B,
+ (__v2di)
+ _mm_setzero_di (),
+ (__mmask8) __U);
+}
+
+extern __inline __m256d
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_mask_andnot_pd (__m256d __W, __mmask8 __U, __m256d __A,
+ __m256d __B)
+{
+ return (__m256d) __builtin_ia32_andnpd256_mask ((__v4df) __A,
+ (__v4df) __B,
+ (__v4df) __W,
+ (__mmask8) __U);
+}
+
+extern __inline __m256d
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_maskz_andnot_pd (__mmask8 __U, __m256d __A, __m256d __B)
+{
+ return (__m256d) __builtin_ia32_andnpd256_mask ((__v4df) __A,
+ (__v4df) __B,
+ (__v4df)
+ _mm256_setzero_pd (),
+ (__mmask8) __U);
+}
+
+extern __inline __m128d
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_mask_andnot_pd (__m128d __W, __mmask8 __U, __m128d __A,
+ __m128d __B)
+{
+ return (__m128d) __builtin_ia32_andnpd128_mask ((__v2df) __A,
+ (__v2df) __B,
+ (__v2df) __W,
+ (__mmask8) __U);
+}
+
+extern __inline __m128d
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_maskz_andnot_pd (__mmask8 __U, __m128d __A, __m128d __B)
+{
+ return (__m128d) __builtin_ia32_andnpd128_mask ((__v2df) __A,
+ (__v2df) __B,
+ (__v2df)
+ _mm_setzero_pd (),
+ (__mmask8) __U);
+}
+
+extern __inline __m256
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_mask_andnot_ps (__m256 __W, __mmask8 __U, __m256 __A,
+ __m256 __B)
+{
+ return (__m256) __builtin_ia32_andnps256_mask ((__v8sf) __A,
+ (__v8sf) __B,
+ (__v8sf) __W,
+ (__mmask8) __U);
+}
+
+extern __inline __m256
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_maskz_andnot_ps (__mmask8 __U, __m256 __A, __m256 __B)
+{
+ return (__m256) __builtin_ia32_andnps256_mask ((__v8sf) __A,
+ (__v8sf) __B,
+ (__v8sf)
+ _mm256_setzero_ps (),
+ (__mmask8) __U);
+}
+
+extern __inline __m128
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_mask_andnot_ps (__m128 __W, __mmask8 __U, __m128 __A, __m128 __B)
+{
+ return (__m128) __builtin_ia32_andnps128_mask ((__v4sf) __A,
+ (__v4sf) __B,
+ (__v4sf) __W,
+ (__mmask8) __U);
+}
+
+extern __inline __m128
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_maskz_andnot_ps (__mmask8 __U, __m128 __A, __m128 __B)
+{
+ return (__m128) __builtin_ia32_andnps128_mask ((__v4sf) __A,
+ (__v4sf) __B,
+ (__v4sf)
+ _mm_setzero_ps (),
+ (__mmask8) __U);
+}
+
+extern __inline __m256i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_cvtps_epi64 (__m128 __A)
+{
+ return (__m256i) __builtin_ia32_cvtps2qq256_mask ((__v4sf) __A,
+ (__v4di)
+ _mm256_setzero_si256 (),
+ (__mmask8) -1);
+}
+
+extern __inline __m256i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_mask_cvtps_epi64 (__m256i __W, __mmask8 __U, __m128 __A)
+{
+ return (__m256i) __builtin_ia32_cvtps2qq256_mask ((__v4sf) __A,
+ (__v4di) __W,
+ (__mmask8) __U);
+}
+
+extern __inline __m256i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_maskz_cvtps_epi64 (__mmask8 __U, __m128 __A)
+{
+ return (__m256i) __builtin_ia32_cvtps2qq256_mask ((__v4sf) __A,
+ (__v4di)
+ _mm256_setzero_si256 (),
+ (__mmask8) __U);
+}
+
+extern __inline __m128i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_cvtps_epi64 (__m128 __A)
+{
+ return (__m128i) __builtin_ia32_cvtps2qq128_mask ((__v4sf) __A,
+ (__v2di)
+ _mm_setzero_di (),
+ (__mmask8) -1);
+}
+
+extern __inline __m128i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_mask_cvtps_epi64 (__m128i __W, __mmask8 __U, __m128 __A)
+{
+ return (__m128i) __builtin_ia32_cvtps2qq128_mask ((__v4sf) __A,
+ (__v2di) __W,
+ (__mmask8) __U);
+}
+
+extern __inline __m128i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_maskz_cvtps_epi64 (__mmask8 __U, __m128 __A)
+{
+ return (__m128i) __builtin_ia32_cvtps2qq128_mask ((__v4sf) __A,
+ (__v2di)
+ _mm_setzero_di (),
+ (__mmask8) __U);
+}
+
+extern __inline __m256i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_cvtps_epu64 (__m128 __A)
+{
+ return (__m256i) __builtin_ia32_cvtps2uqq256_mask ((__v4sf) __A,
+ (__v4di)
+ _mm256_setzero_si256 (),
+ (__mmask8) -1);
+}
+
+extern __inline __m256i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_mask_cvtps_epu64 (__m256i __W, __mmask8 __U, __m128 __A)
+{
+ return (__m256i) __builtin_ia32_cvtps2uqq256_mask ((__v4sf) __A,
+ (__v4di) __W,
+ (__mmask8) __U);
+}
+
+extern __inline __m256i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_maskz_cvtps_epu64 (__mmask8 __U, __m128 __A)
+{
+ return (__m256i) __builtin_ia32_cvtps2uqq256_mask ((__v4sf) __A,
+ (__v4di)
+ _mm256_setzero_si256 (),
+ (__mmask8) __U);
+}
+
+extern __inline __m128i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_cvtps_epu64 (__m128 __A)
+{
+ return (__m128i) __builtin_ia32_cvtps2uqq128_mask ((__v4sf) __A,
+ (__v2di)
+ _mm_setzero_di (),
+ (__mmask8) -1);
+}
+
+extern __inline __m128i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_mask_cvtps_epu64 (__m128i __W, __mmask8 __U, __m128 __A)
+{
+ return (__m128i) __builtin_ia32_cvtps2uqq128_mask ((__v4sf) __A,
+ (__v2di) __W,
+ (__mmask8) __U);
+}
+
+extern __inline __m128i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_maskz_cvtps_epu64 (__mmask8 __U, __m128 __A)
+{
+ return (__m128i) __builtin_ia32_cvtps2uqq128_mask ((__v4sf) __A,
+ (__v2di)
+ _mm_setzero_di (),
+ (__mmask8) __U);
+}
+
+extern __inline __m128
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_cvtepi64_ps (__m256i __A)
+{
+ return (__m128) __builtin_ia32_cvtqq2ps256_mask ((__v4di) __A,
+ (__v4sf)
+ _mm_setzero_ps (),
+ (__mmask8) -1);
+}
+
+extern __inline __m128
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_mask_cvtepi64_ps (__m128 __W, __mmask8 __U, __m256i __A)
+{
+ return (__m128) __builtin_ia32_cvtqq2ps256_mask ((__v4di) __A,
+ (__v4sf) __W,
+ (__mmask8) __U);
+}
+
+extern __inline __m128
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_maskz_cvtepi64_ps (__mmask8 __U, __m256i __A)
+{
+ return (__m128) __builtin_ia32_cvtqq2ps256_mask ((__v4di) __A,
+ (__v4sf)
+ _mm_setzero_ps (),
+ (__mmask8) __U);
+}
+
+extern __inline __m128
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_cvtepi64_ps (__m128i __A)
+{
+ return (__m128) __builtin_ia32_cvtqq2ps128_mask ((__v2di) __A,
+ (__v4sf)
+ _mm_setzero_ps (),
+ (__mmask8) -1);
+}
+
+extern __inline __m128
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_mask_cvtepi64_ps (__m128 __W, __mmask8 __U, __m128i __A)
+{
+ return (__m128) __builtin_ia32_cvtqq2ps128_mask ((__v2di) __A,
+ (__v4sf) __W,
+ (__mmask8) __U);
+}
+
+extern __inline __m128
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_maskz_cvtepi64_ps (__mmask8 __U, __m128i __A)
+{
+ return (__m128) __builtin_ia32_cvtqq2ps128_mask ((__v2di) __A,
+ (__v4sf)
+ _mm_setzero_ps (),
+ (__mmask8) __U);
+}
+
+extern __inline __m128
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_cvtepu64_ps (__m256i __A)
+{
+ return (__m128) __builtin_ia32_cvtuqq2ps256_mask ((__v4di) __A,
+ (__v4sf)
+ _mm_setzero_ps (),
+ (__mmask8) -1);
+}
+
+extern __inline __m128
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_mask_cvtepu64_ps (__m128 __W, __mmask8 __U, __m256i __A)
+{
+ return (__m128) __builtin_ia32_cvtuqq2ps256_mask ((__v4di) __A,
+ (__v4sf) __W,
+ (__mmask8) __U);
+}
+
+extern __inline __m128
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_maskz_cvtepu64_ps (__mmask8 __U, __m256i __A)
+{
+ return (__m128) __builtin_ia32_cvtuqq2ps256_mask ((__v4di) __A,
+ (__v4sf)
+ _mm_setzero_ps (),
+ (__mmask8) __U);
+}
+
+extern __inline __m128
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_cvtepu64_ps (__m128i __A)
+{
+ return (__m128) __builtin_ia32_cvtuqq2ps128_mask ((__v2di) __A,
+ (__v4sf)
+ _mm_setzero_ps (),
+ (__mmask8) -1);
+}
+
+extern __inline __m128
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_mask_cvtepu64_ps (__m128 __W, __mmask8 __U, __m128i __A)
+{
+ return (__m128) __builtin_ia32_cvtuqq2ps128_mask ((__v2di) __A,
+ (__v4sf) __W,
+ (__mmask8) __U);
+}
+
+extern __inline __m128
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_maskz_cvtepu64_ps (__mmask8 __U, __m128i __A)
+{
+ return (__m128) __builtin_ia32_cvtuqq2ps128_mask ((__v2di) __A,
+ (__v4sf)
+ _mm_setzero_ps (),
+ (__mmask8) __U);
+}
+
+extern __inline __m256d
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_cvtepi64_pd (__m256i __A)
+{
+ return (__m256d) __builtin_ia32_cvtqq2pd256_mask ((__v4di) __A,
+ (__v4df)
+ _mm256_setzero_pd (),
+ (__mmask8) -1);
+}
+
+extern __inline __m256d
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_mask_cvtepi64_pd (__m256d __W, __mmask8 __U, __m256i __A)
+{
+ return (__m256d) __builtin_ia32_cvtqq2pd256_mask ((__v4di) __A,
+ (__v4df) __W,
+ (__mmask8) __U);
+}
+
+extern __inline __m256d
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_maskz_cvtepi64_pd (__mmask8 __U, __m256i __A)
+{
+ return (__m256d) __builtin_ia32_cvtqq2pd256_mask ((__v4di) __A,
+ (__v4df)
+ _mm256_setzero_pd (),
+ (__mmask8) __U);
+}
+
+extern __inline __m128d
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_cvtepi64_pd (__m128i __A)
+{
+ return (__m128d) __builtin_ia32_cvtqq2pd128_mask ((__v2di) __A,
+ (__v2df)
+ _mm_setzero_pd (),
+ (__mmask8) -1);
+}
+
+extern __inline __m128d
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_mask_cvtepi64_pd (__m128d __W, __mmask8 __U, __m128i __A)
+{
+ return (__m128d) __builtin_ia32_cvtqq2pd128_mask ((__v2di) __A,
+ (__v2df) __W,
+ (__mmask8) __U);
+}
+
+extern __inline __m128d
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_maskz_cvtepi64_pd (__mmask8 __U, __m128i __A)
+{
+ return (__m128d) __builtin_ia32_cvtqq2pd128_mask ((__v2di) __A,
+ (__v2df)
+ _mm_setzero_pd (),
+ (__mmask8) __U);
+}
+
+extern __inline __m256d
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_cvtepu64_pd (__m256i __A)
+{
+ return (__m256d) __builtin_ia32_cvtuqq2pd256_mask ((__v4di) __A,
+ (__v4df)
+ _mm256_setzero_pd (),
+ (__mmask8) -1);
+}
+
+extern __inline __m256d
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_mask_cvtepu64_pd (__m256d __W, __mmask8 __U, __m256i __A)
+{
+ return (__m256d) __builtin_ia32_cvtuqq2pd256_mask ((__v4di) __A,
+ (__v4df) __W,
+ (__mmask8) __U);
+}
+
+extern __inline __m256d
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_maskz_cvtepu64_pd (__mmask8 __U, __m256i __A)
+{
+ return (__m256d) __builtin_ia32_cvtuqq2pd256_mask ((__v4di) __A,
+ (__v4df)
+ _mm256_setzero_pd (),
+ (__mmask8) __U);
+}
+
+extern __inline __m256d
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_mask_and_pd (__m256d __W, __mmask8 __U, __m256d __A,
+ __m256d __B)
+{
+ return (__m256d) __builtin_ia32_andpd256_mask ((__v4df) __A,
+ (__v4df) __B,
+ (__v4df) __W,
+ (__mmask8) __U);
+}
+
+extern __inline __m256d
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_maskz_and_pd (__mmask8 __U, __m256d __A, __m256d __B)
+{
+ return (__m256d) __builtin_ia32_andpd256_mask ((__v4df) __A,
+ (__v4df) __B,
+ (__v4df)
+ _mm256_setzero_pd (),
+ (__mmask8) __U);
+}
+
+extern __inline __m128d
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_mask_and_pd (__m128d __W, __mmask8 __U, __m128d __A, __m128d __B)
+{
+ return (__m128d) __builtin_ia32_andpd128_mask ((__v2df) __A,
+ (__v2df) __B,
+ (__v2df) __W,
+ (__mmask8) __U);
+}
+
+extern __inline __m128d
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_maskz_and_pd (__mmask8 __U, __m128d __A, __m128d __B)
+{
+ return (__m128d) __builtin_ia32_andpd128_mask ((__v2df) __A,
+ (__v2df) __B,
+ (__v2df)
+ _mm_setzero_pd (),
+ (__mmask8) __U);
+}
+
+extern __inline __m256
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_mask_and_ps (__m256 __W, __mmask8 __U, __m256 __A, __m256 __B)
+{
+ return (__m256) __builtin_ia32_andps256_mask ((__v8sf) __A,
+ (__v8sf) __B,
+ (__v8sf) __W,
+ (__mmask8) __U);
+}
+
+extern __inline __m256
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_maskz_and_ps (__mmask8 __U, __m256 __A, __m256 __B)
+{
+ return (__m256) __builtin_ia32_andps256_mask ((__v8sf) __A,
+ (__v8sf) __B,
+ (__v8sf)
+ _mm256_setzero_ps (),
+ (__mmask8) __U);
+}
+
+extern __inline __m128
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_mask_and_ps (__m128 __W, __mmask8 __U, __m128 __A, __m128 __B)
+{
+ return (__m128) __builtin_ia32_andps128_mask ((__v4sf) __A,
+ (__v4sf) __B,
+ (__v4sf) __W,
+ (__mmask8) __U);
+}
+
+extern __inline __m128
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_maskz_and_ps (__mmask8 __U, __m128 __A, __m128 __B)
+{
+ return (__m128) __builtin_ia32_andps128_mask ((__v4sf) __A,
+ (__v4sf) __B,
+ (__v4sf)
+ _mm_setzero_ps (),
+ (__mmask8) __U);
+}
+
+extern __inline __m128d
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_cvtepu64_pd (__m128i __A)
+{
+ return (__m128d) __builtin_ia32_cvtuqq2pd128_mask ((__v2di) __A,
+ (__v2df)
+ _mm_setzero_pd (),
+ (__mmask8) -1);
+}
+
+extern __inline __m128d
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_mask_cvtepu64_pd (__m128d __W, __mmask8 __U, __m128i __A)
+{
+ return (__m128d) __builtin_ia32_cvtuqq2pd128_mask ((__v2di) __A,
+ (__v2df) __W,
+ (__mmask8) __U);
+}
+
+extern __inline __m128d
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_maskz_cvtepu64_pd (__mmask8 __U, __m128i __A)
+{
+ return (__m128d) __builtin_ia32_cvtuqq2pd128_mask ((__v2di) __A,
+ (__v2df)
+ _mm_setzero_pd (),
+ (__mmask8) __U);
+}
+
+extern __inline __m256d
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_mask_xor_pd (__m256d __W, __mmask8 __U, __m256d __A,
+ __m256d __B)
+{
+ return (__m256d) __builtin_ia32_xorpd256_mask ((__v4df) __A,
+ (__v4df) __B,
+ (__v4df) __W,
+ (__mmask8) __U);
+}
+
+extern __inline __m256d
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_maskz_xor_pd (__mmask8 __U, __m256d __A, __m256d __B)
+{
+ return (__m256d) __builtin_ia32_xorpd256_mask ((__v4df) __A,
+ (__v4df) __B,
+ (__v4df)
+ _mm256_setzero_pd (),
+ (__mmask8) __U);
+}
+
+extern __inline __m128d
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_mask_xor_pd (__m128d __W, __mmask8 __U, __m128d __A, __m128d __B)
+{
+ return (__m128d) __builtin_ia32_xorpd128_mask ((__v2df) __A,
+ (__v2df) __B,
+ (__v2df) __W,
+ (__mmask8) __U);
+}
+
+extern __inline __m128d
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_maskz_xor_pd (__mmask8 __U, __m128d __A, __m128d __B)
+{
+ return (__m128d) __builtin_ia32_xorpd128_mask ((__v2df) __A,
+ (__v2df) __B,
+ (__v2df)
+ _mm_setzero_pd (),
+ (__mmask8) __U);
+}
+
+extern __inline __m256
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_mask_xor_ps (__m256 __W, __mmask8 __U, __m256 __A, __m256 __B)
+{
+ return (__m256) __builtin_ia32_xorps256_mask ((__v8sf) __A,
+ (__v8sf) __B,
+ (__v8sf) __W,
+ (__mmask8) __U);
+}
+
+extern __inline __m256
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_maskz_xor_ps (__mmask8 __U, __m256 __A, __m256 __B)
+{
+ return (__m256) __builtin_ia32_xorps256_mask ((__v8sf) __A,
+ (__v8sf) __B,
+ (__v8sf)
+ _mm256_setzero_ps (),
+ (__mmask8) __U);
+}
+
+extern __inline __m128
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_mask_xor_ps (__m128 __W, __mmask8 __U, __m128 __A, __m128 __B)
+{
+ return (__m128) __builtin_ia32_xorps128_mask ((__v4sf) __A,
+ (__v4sf) __B,
+ (__v4sf) __W,
+ (__mmask8) __U);
+}
+
+extern __inline __m128
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_maskz_xor_ps (__mmask8 __U, __m128 __A, __m128 __B)
+{
+ return (__m128) __builtin_ia32_xorps128_mask ((__v4sf) __A,
+ (__v4sf) __B,
+ (__v4sf)
+ _mm_setzero_ps (),
+ (__mmask8) __U);
+}
+
+extern __inline __m256d
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_mask_or_pd (__m256d __W, __mmask8 __U, __m256d __A, __m256d __B)
+{
+ return (__m256d) __builtin_ia32_orpd256_mask ((__v4df) __A,
+ (__v4df) __B,
+ (__v4df) __W,
+ (__mmask8) __U);
+}
+
+extern __inline __m256d
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_maskz_or_pd (__mmask8 __U, __m256d __A, __m256d __B)
+{
+ return (__m256d) __builtin_ia32_orpd256_mask ((__v4df) __A,
+ (__v4df) __B,
+ (__v4df)
+ _mm256_setzero_pd (),
+ (__mmask8) __U);
+}
+
+extern __inline __m128d
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_mask_or_pd (__m128d __W, __mmask8 __U, __m128d __A, __m128d __B)
+{
+ return (__m128d) __builtin_ia32_orpd128_mask ((__v2df) __A,
+ (__v2df) __B,
+ (__v2df) __W,
+ (__mmask8) __U);
+}
+
+extern __inline __m128d
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_maskz_or_pd (__mmask8 __U, __m128d __A, __m128d __B)
+{
+ return (__m128d) __builtin_ia32_orpd128_mask ((__v2df) __A,
+ (__v2df) __B,
+ (__v2df)
+ _mm_setzero_pd (),
+ (__mmask8) __U);
+}
+
+extern __inline __m256
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_mask_or_ps (__m256 __W, __mmask8 __U, __m256 __A, __m256 __B)
+{
+ return (__m256) __builtin_ia32_orps256_mask ((__v8sf) __A,
+ (__v8sf) __B,
+ (__v8sf) __W,
+ (__mmask8) __U);
+}
+
+extern __inline __m256
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_maskz_or_ps (__mmask8 __U, __m256 __A, __m256 __B)
+{
+ return (__m256) __builtin_ia32_orps256_mask ((__v8sf) __A,
+ (__v8sf) __B,
+ (__v8sf)
+ _mm256_setzero_ps (),
+ (__mmask8) __U);
+}
+
+extern __inline __m128
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_mask_or_ps (__m128 __W, __mmask8 __U, __m128 __A, __m128 __B)
+{
+ return (__m128) __builtin_ia32_orps128_mask ((__v4sf) __A,
+ (__v4sf) __B,
+ (__v4sf) __W,
+ (__mmask8) __U);
+}
+
+extern __inline __m128
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_maskz_or_ps (__mmask8 __U, __m128 __A, __m128 __B)
+{
+ return (__m128) __builtin_ia32_orps128_mask ((__v4sf) __A,
+ (__v4sf) __B,
+ (__v4sf)
+ _mm_setzero_ps (),
+ (__mmask8) __U);
+}
+
+extern __inline __m128i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_movm_epi32 (__mmask8 __A)
+{
+ return (__m128i) __builtin_ia32_cvtmask2d128 (__A);
+}
+
+extern __inline __m256i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_movm_epi32 (__mmask8 __A)
+{
+ return (__m256i) __builtin_ia32_cvtmask2d256 (__A);
+}
+
+extern __inline __m128i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_movm_epi64 (__mmask8 __A)
+{
+ return (__m128i) __builtin_ia32_cvtmask2q128 (__A);
+}
+
+extern __inline __m256i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_movm_epi64 (__mmask8 __A)
+{
+ return (__m256i) __builtin_ia32_cvtmask2q256 (__A);
+}
+
+extern __inline __mmask8
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_movepi32_mask (__m128i __A)
+{
+ return (__mmask8) __builtin_ia32_cvtd2mask128 ((__v4si) __A);
+}
+
+extern __inline __mmask8
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_movepi32_mask (__m256i __A)
+{
+ return (__mmask8) __builtin_ia32_cvtd2mask256 ((__v8si) __A);
+}
+
+extern __inline __mmask8
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_movepi64_mask (__m128i __A)
+{
+ return (__mmask8) __builtin_ia32_cvtq2mask128 ((__v2di) __A);
+}
+
+extern __inline __mmask8
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_movepi64_mask (__m256i __A)
+{
+ return (__mmask8) __builtin_ia32_cvtq2mask256 ((__v4di) __A);
+}
+
+#ifdef __OPTIMIZE__
+extern __inline __m128d
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_extractf64x2_pd (__m256d __A, const int __imm)
+{
+ return (__m128d) __builtin_ia32_extractf64x2_256_mask ((__v4df) __A,
+ __imm,
+ (__v2df)
+ _mm_setzero_pd (),
+ (__mmask8) -
+ 1);
+}
+
+extern __inline __m128d
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_mask_extractf64x2_pd (__m128d __W, __mmask8 __U, __m256d __A,
+ const int __imm)
+{
+ return (__m128d) __builtin_ia32_extractf64x2_256_mask ((__v4df) __A,
+ __imm,
+ (__v2df) __W,
+ (__mmask8)
+ __U);
+}
+
+extern __inline __m128d
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_maskz_extractf64x2_pd (__mmask8 __U, __m256d __A,
+ const int __imm)
+{
+ return (__m128d) __builtin_ia32_extractf64x2_256_mask ((__v4df) __A,
+ __imm,
+ (__v2df)
+ _mm_setzero_pd (),
+ (__mmask8)
+ __U);
+}
+
+extern __inline __m128i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_extracti64x2_epi64 (__m256i __A, const int __imm)
+{
+ return (__m128i) __builtin_ia32_extracti64x2_256_mask ((__v4di) __A,
+ __imm,
+ (__v2di)
+ _mm_setzero_di (),
+ (__mmask8) -
+ 1);
+}
+
+extern __inline __m128i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_mask_extracti64x2_epi64 (__m128i __W, __mmask8 __U, __m256i __A,
+ const int __imm)
+{
+ return (__m128i) __builtin_ia32_extracti64x2_256_mask ((__v4di) __A,
+ __imm,
+ (__v2di) __W,
+ (__mmask8)
+ __U);
+}
+
+extern __inline __m128i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_maskz_extracti64x2_epi64 (__mmask8 __U, __m256i __A,
+ const int __imm)
+{
+ return (__m128i) __builtin_ia32_extracti64x2_256_mask ((__v4di) __A,
+ __imm,
+ (__v2di)
+ _mm_setzero_di (),
+ (__mmask8)
+ __U);
+}
+
+extern __inline __m256d
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_reduce_pd (__m256d __A, int __B)
+{
+ return (__m256d) __builtin_ia32_reducepd256_mask ((__v4df) __A, __B,
+ (__v4df)
+ _mm256_setzero_pd (),
+ (__mmask8) -1);
+}
+
+extern __inline __m256d
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_mask_reduce_pd (__m256d __W, __mmask8 __U, __m256d __A, int __B)
+{
+ return (__m256d) __builtin_ia32_reducepd256_mask ((__v4df) __A, __B,
+ (__v4df) __W,
+ (__mmask8) __U);
+}
+
+extern __inline __m256d
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_maskz_reduce_pd (__mmask8 __U, __m256d __A, int __B)
+{
+ return (__m256d) __builtin_ia32_reducepd256_mask ((__v4df) __A, __B,
+ (__v4df)
+ _mm256_setzero_pd (),
+ (__mmask8) __U);
+}
+
+extern __inline __m128d
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_reduce_pd (__m128d __A, int __B)
+{
+ return (__m128d) __builtin_ia32_reducepd128_mask ((__v2df) __A, __B,
+ (__v2df)
+ _mm_setzero_pd (),
+ (__mmask8) -1);
+}
+
+extern __inline __m128d
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_mask_reduce_pd (__m128d __W, __mmask8 __U, __m128d __A, int __B)
+{
+ return (__m128d) __builtin_ia32_reducepd128_mask ((__v2df) __A, __B,
+ (__v2df) __W,
+ (__mmask8) __U);
+}
+
+extern __inline __m128d
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_maskz_reduce_pd (__mmask8 __U, __m128d __A, int __B)
+{
+ return (__m128d) __builtin_ia32_reducepd128_mask ((__v2df) __A, __B,
+ (__v2df)
+ _mm_setzero_pd (),
+ (__mmask8) __U);
+}
+
+extern __inline __m256
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_reduce_ps (__m256 __A, int __B)
+{
+ return (__m256) __builtin_ia32_reduceps256_mask ((__v8sf) __A, __B,
+ (__v8sf)
+ _mm256_setzero_ps (),
+ (__mmask8) -1);
+}
+
+extern __inline __m256
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_mask_reduce_ps (__m256 __W, __mmask8 __U, __m256 __A, int __B)
+{
+ return (__m256) __builtin_ia32_reduceps256_mask ((__v8sf) __A, __B,
+ (__v8sf) __W,
+ (__mmask8) __U);
+}
+
+extern __inline __m256
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_maskz_reduce_ps (__mmask8 __U, __m256 __A, int __B)
+{
+ return (__m256) __builtin_ia32_reduceps256_mask ((__v8sf) __A, __B,
+ (__v8sf)
+ _mm256_setzero_ps (),
+ (__mmask8) __U);
+}
+
+extern __inline __m128
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_reduce_ps (__m128 __A, int __B)
+{
+ return (__m128) __builtin_ia32_reduceps128_mask ((__v4sf) __A, __B,
+ (__v4sf)
+ _mm_setzero_ps (),
+ (__mmask8) -1);
+}
+
+extern __inline __m128
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_mask_reduce_ps (__m128 __W, __mmask8 __U, __m128 __A, int __B)
+{
+ return (__m128) __builtin_ia32_reduceps128_mask ((__v4sf) __A, __B,
+ (__v4sf) __W,
+ (__mmask8) __U);
+}
+
+extern __inline __m128
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_maskz_reduce_ps (__mmask8 __U, __m128 __A, int __B)
+{
+ return (__m128) __builtin_ia32_reduceps128_mask ((__v4sf) __A, __B,
+ (__v4sf)
+ _mm_setzero_ps (),
+ (__mmask8) __U);
+}
+
+extern __inline __m256d
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_range_pd (__m256d __A, __m256d __B, int __C)
+{
+ return (__m256d) __builtin_ia32_rangepd256_mask ((__v4df) __A,
+ (__v4df) __B, __C,
+ (__v4df)
+ _mm256_setzero_pd (),
+ (__mmask8) -1);
+}
+
+extern __inline __m256d
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_mask_range_pd (__m256d __W, __mmask8 __U,
+ __m256d __A, __m256d __B, int __C)
+{
+ return (__m256d) __builtin_ia32_rangepd256_mask ((__v4df) __A,
+ (__v4df) __B, __C,
+ (__v4df) __W,
+ (__mmask8) __U);
+}
+
+extern __inline __m256d
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_maskz_range_pd (__mmask8 __U, __m256d __A, __m256d __B, int __C)
+{
+ return (__m256d) __builtin_ia32_rangepd256_mask ((__v4df) __A,
+ (__v4df) __B, __C,
+ (__v4df)
+ _mm256_setzero_pd (),
+ (__mmask8) __U);
+}
+
+extern __inline __m128d
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_range_pd (__m128d __A, __m128d __B, int __C)
+{
+ return (__m128d) __builtin_ia32_rangepd128_mask ((__v2df) __A,
+ (__v2df) __B, __C,
+ (__v2df)
+ _mm_setzero_pd (),
+ (__mmask8) -1);
+}
+
+extern __inline __m128d
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_mask_range_pd (__m128d __W, __mmask8 __U,
+ __m128d __A, __m128d __B, int __C)
+{
+ return (__m128d) __builtin_ia32_rangepd128_mask ((__v2df) __A,
+ (__v2df) __B, __C,
+ (__v2df) __W,
+ (__mmask8) __U);
+}
+
+extern __inline __m128d
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_maskz_range_pd (__mmask8 __U, __m128d __A, __m128d __B, int __C)
+{
+ return (__m128d) __builtin_ia32_rangepd128_mask ((__v2df) __A,
+ (__v2df) __B, __C,
+ (__v2df)
+ _mm_setzero_pd (),
+ (__mmask8) __U);
+}
+
+extern __inline __m256
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_range_ps (__m256 __A, __m256 __B, int __C)
+{
+ return (__m256) __builtin_ia32_rangeps256_mask ((__v8sf) __A,
+ (__v8sf) __B, __C,
+ (__v8sf)
+ _mm256_setzero_ps (),
+ (__mmask8) -1);
+}
+
+extern __inline __m256
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_mask_range_ps (__m256 __W, __mmask8 __U, __m256 __A, __m256 __B,
+ int __C)
+{
+ return (__m256) __builtin_ia32_rangeps256_mask ((__v8sf) __A,
+ (__v8sf) __B, __C,
+ (__v8sf) __W,
+ (__mmask8) __U);
+}
+
+extern __inline __m256
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_maskz_range_ps (__mmask8 __U, __m256 __A, __m256 __B, int __C)
+{
+ return (__m256) __builtin_ia32_rangeps256_mask ((__v8sf) __A,
+ (__v8sf) __B, __C,
+ (__v8sf)
+ _mm256_setzero_ps (),
+ (__mmask8) __U);
+}
+
+extern __inline __m128
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_range_ps (__m128 __A, __m128 __B, int __C)
+{
+ return (__m128) __builtin_ia32_rangeps128_mask ((__v4sf) __A,
+ (__v4sf) __B, __C,
+ (__v4sf)
+ _mm_setzero_ps (),
+ (__mmask8) -1);
+}
+
+extern __inline __m128
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_mask_range_ps (__m128 __W, __mmask8 __U,
+ __m128 __A, __m128 __B, int __C)
+{
+ return (__m128) __builtin_ia32_rangeps128_mask ((__v4sf) __A,
+ (__v4sf) __B, __C,
+ (__v4sf) __W,
+ (__mmask8) __U);
+}
+
+extern __inline __m128
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_maskz_range_ps (__mmask8 __U, __m128 __A, __m128 __B, int __C)
+{
+ return (__m128) __builtin_ia32_rangeps128_mask ((__v4sf) __A,
+ (__v4sf) __B, __C,
+ (__v4sf)
+ _mm_setzero_ps (),
+ (__mmask8) __U);
+}
+
+extern __inline __mmask8
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_mask_fpclass_pd_mask (__mmask8 __U, __m256d __A,
+ const int __imm)
+{
+ return (__mmask8) __builtin_ia32_fpclasspd256_mask ((__v4df) __A,
+ __imm, __U);
+}
+
+extern __inline __mmask8
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_fpclass_pd_mask (__m256d __A, const int __imm)
+{
+ return (__mmask8) __builtin_ia32_fpclasspd256_mask ((__v4df) __A,
+ __imm,
+ (__mmask8) -1);
+}
+
+extern __inline __mmask8
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_mask_fpclass_ps_mask (__mmask8 __U, __m256 __A, const int __imm)
+{
+ return (__mmask8) __builtin_ia32_fpclassps256_mask ((__v8sf) __A,
+ __imm, __U);
+}
+
+extern __inline __mmask8
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_fpclass_ps_mask (__m256 __A, const int __imm)
+{
+ return (__mmask8) __builtin_ia32_fpclassps256_mask ((__v8sf) __A,
+ __imm,
+ (__mmask8) -1);
+}
+
+extern __inline __mmask8
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_mask_fpclass_pd_mask (__mmask8 __U, __m128d __A, const int __imm)
+{
+ return (__mmask8) __builtin_ia32_fpclasspd128_mask ((__v2df) __A,
+ __imm, __U);
+}
+
+extern __inline __mmask8
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_fpclass_pd_mask (__m128d __A, const int __imm)
+{
+ return (__mmask8) __builtin_ia32_fpclasspd128_mask ((__v2df) __A,
+ __imm,
+ (__mmask8) -1);
+}
+
+extern __inline __mmask8
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_mask_fpclass_ps_mask (__mmask8 __U, __m128 __A, const int __imm)
+{
+ return (__mmask8) __builtin_ia32_fpclassps128_mask ((__v4sf) __A,
+ __imm, __U);
+}
+
+extern __inline __mmask8
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_fpclass_ps_mask (__m128 __A, const int __imm)
+{
+ return (__mmask8) __builtin_ia32_fpclassps128_mask ((__v4sf) __A,
+ __imm,
+ (__mmask8) -1);
+}
+
+extern __inline __m256i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_inserti64x2 (__m256i __A, __m128i __B, const int __imm)
+{
+ return (__m256i) __builtin_ia32_inserti64x2_256_mask ((__v4di) __A,
+ (__v2di) __B,
+ __imm,
+ (__v4di)
+ _mm256_setzero_si256 (),
+ (__mmask8) -
+ 1);
+}
+
+extern __inline __m256i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_mask_inserti64x2 (__m256i __W, __mmask8 __U, __m256i __A,
+ __m128i __B, const int __imm)
+{
+ return (__m256i) __builtin_ia32_inserti64x2_256_mask ((__v4di) __A,
+ (__v2di) __B,
+ __imm,
+ (__v4di) __W,
+ (__mmask8)
+ __U);
+}
+
+extern __inline __m256i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_maskz_inserti64x2 (__mmask8 __U, __m256i __A, __m128i __B,
+ const int __imm)
+{
+ return (__m256i) __builtin_ia32_inserti64x2_256_mask ((__v4di) __A,
+ (__v2di) __B,
+ __imm,
+ (__v4di)
+ _mm256_setzero_si256 (),
+ (__mmask8)
+ __U);
+}
+
+extern __inline __m256d
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_insertf64x2 (__m256d __A, __m128d __B, const int __imm)
+{
+ return (__m256d) __builtin_ia32_insertf64x2_256_mask ((__v4df) __A,
+ (__v2df) __B,
+ __imm,
+ (__v4df)
+ _mm256_setzero_pd (),
+ (__mmask8) -
+ 1);
+}
+
+extern __inline __m256d
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_mask_insertf64x2 (__m256d __W, __mmask8 __U, __m256d __A,
+ __m128d __B, const int __imm)
+{
+ return (__m256d) __builtin_ia32_insertf64x2_256_mask ((__v4df) __A,
+ (__v2df) __B,
+ __imm,
+ (__v4df) __W,
+ (__mmask8)
+ __U);
+}
+
+extern __inline __m256d
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_maskz_insertf64x2 (__mmask8 __U, __m256d __A, __m128d __B,
+ const int __imm)
+{
+ return (__m256d) __builtin_ia32_insertf64x2_256_mask ((__v4df) __A,
+ (__v2df) __B,
+ __imm,
+ (__v4df)
+ _mm256_setzero_pd (),
+ (__mmask8)
+ __U);
+}
+
+#else
+#define _mm256_insertf64x2(X, Y, C) \
+ ((__m256d) __builtin_ia32_insertf64x2_256_mask ((__v4df)(__m256d) (X),\
+ (__v2df)(__m128d) (Y), (int) (C), \
+ (__v4df)(__m256d)_mm256_setzero_pd(), \
+ (__mmask8)-1))
+
+#define _mm256_mask_insertf64x2(W, U, X, Y, C) \
+ ((__m256d) __builtin_ia32_insertf64x2_256_mask ((__v4df)(__m256d) (X),\
+ (__v2df)(__m128d) (Y), (int) (C), \
+ (__v4df)(__m256d)(W), \
+ (__mmask8)(U)))
+
+#define _mm256_maskz_insertf64x2(U, X, Y, C) \
+ ((__m256d) __builtin_ia32_insertf64x2_256_mask ((__v4df)(__m256d) (X),\
+ (__v2df)(__m128d) (Y), (int) (C), \
+ (__v4df)(__m256d)_mm256_setzero_pd(), \
+ (__mmask8)(U)))
+
+#define _mm256_inserti64x2(X, Y, C) \
+ ((__m256i) __builtin_ia32_inserti64x2_256_mask ((__v4di)(__m256i) (X),\
+ (__v2di)(__m128i) (Y), (int) (C), \
+ (__v4di)(__m256i)_mm256_setzero_si256 (), \
+ (__mmask8)-1))
+
+#define _mm256_mask_inserti64x2(W, U, X, Y, C) \
+ ((__m256i) __builtin_ia32_inserti64x2_256_mask ((__v4di)(__m256i) (X),\
+ (__v2di)(__m128i) (Y), (int) (C), \
+ (__v4di)(__m256i)(W), \
+ (__mmask8)(U)))
+
+#define _mm256_maskz_inserti64x2(U, X, Y, C) \
+ ((__m256i) __builtin_ia32_inserti64x2_256_mask ((__v4di)(__m256i) (X),\
+ (__v2di)(__m128i) (Y), (int) (C), \
+ (__v4di)(__m256i)_mm256_setzero_si256 (), \
+ (__mmask8)(U)))
+
+#define _mm256_extractf64x2_pd(X, C) \
+ ((__m128d) __builtin_ia32_extractf64x2_256_mask ((__v4df)(__m256d) (X),\
+ (int) (C), (__v2df)(__m128d) _mm_setzero_pd(), (__mmask8)-1))
+
+#define _mm256_mask_extractf64x2_pd(W, U, X, C) \
+ ((__m128d) __builtin_ia32_extractf64x2_256_mask ((__v4df)(__m256d) (X),\
+ (int) (C), (__v2df)(__m128d) (W), (__mmask8) (U)))
+
+#define _mm256_maskz_extractf64x2_pd(U, X, C) \
+ ((__m128d) __builtin_ia32_extractf64x2_256_mask ((__v4df)(__m256d) (X),\
+ (int) (C), (__v2df)(__m128d) _mm_setzero_pd(), (__mmask8) (U)))
+
+#define _mm256_extracti64x2_epi64(X, C) \
+ ((__m128i) __builtin_ia32_extracti64x2_256_mask ((__v4di)(__m256i) (X),\
+ (int) (C), (__v2di)(__m128i) _mm_setzero_di(), (__mmask8)-1))
+
+#define _mm256_mask_extracti64x2_epi64(W, U, X, C) \
+ ((__m128i) __builtin_ia32_extracti64x2_256_mask ((__v4di)(__m256i) (X),\
+ (int) (C), (__v2di)(__m128i) (W), (__mmask8) (U)))
+
+#define _mm256_maskz_extracti64x2_epi64(U, X, C) \
+ ((__m128i) __builtin_ia32_extracti64x2_256_mask ((__v4di)(__m256i) (X),\
+ (int) (C), (__v2di)(__m128i) _mm_setzero_di(), (__mmask8) (U)))
+
+#define _mm256_reduce_pd(A, B) \
+ ((__m256d) __builtin_ia32_reducepd256_mask ((__v4df)(__m256d)(A), \
+ (int)(B), (__v4df)_mm256_setzero_pd(), (__mmask8)-1))
+
+#define _mm256_mask_reduce_pd(W, U, A, B) \
+ ((__m256d) __builtin_ia32_reducepd256_mask ((__v4df)(__m256d)(A), \
+ (int)(B), (__v4df)(__m256d)(W), (__mmask8)(U)))
+
+#define _mm256_maskz_reduce_pd(U, A, B) \
+ ((__m256d) __builtin_ia32_reducepd256_mask ((__v4df)(__m256d)(A), \
+ (int)(B), (__v4df)_mm256_setzero_pd(), (__mmask8)(U)))
+
+#define _mm_reduce_pd(A, B) \
+ ((__m128d) __builtin_ia32_reducepd128_mask ((__v2df)(__m128d)(A), \
+ (int)(B), (__v2df)_mm_setzero_pd(), (__mmask8)-1))
+
+#define _mm_mask_reduce_pd(W, U, A, B) \
+ ((__m128d) __builtin_ia32_reducepd128_mask ((__v2df)(__m128d)(A), \
+ (int)(B), (__v2df)(__m128d)(W), (__mmask8)(U)))
+
+#define _mm_maskz_reduce_pd(U, A, B) \
+ ((__m128d) __builtin_ia32_reducepd128_mask ((__v2df)(__m128d)(A), \
+ (int)(B), (__v2df)_mm_setzero_pd(), (__mmask8)(U)))
+
+#define _mm256_reduce_ps(A, B) \
+ ((__m256) __builtin_ia32_reduceps256_mask ((__v8sf)(__m256)(A), \
+ (int)(B), (__v8sf)_mm256_setzero_ps(), (__mmask8)-1))
+
+#define _mm256_mask_reduce_ps(W, U, A, B) \
+ ((__m256) __builtin_ia32_reduceps256_mask ((__v8sf)(__m256)(A), \
+ (int)(B), (__v8sf)(__m256)(W), (__mmask8)(U)))
+
+#define _mm256_maskz_reduce_ps(U, A, B) \
+ ((__m256) __builtin_ia32_reduceps256_mask ((__v8sf)(__m256)(A), \
+ (int)(B), (__v8sf)_mm256_setzero_ps(), (__mmask8)(U)))
+
+#define _mm_reduce_ps(A, B) \
+ ((__m128) __builtin_ia32_reduceps128_mask ((__v4sf)(__m128)(A), \
+ (int)(B), (__v4sf)_mm_setzero_ps(), (__mmask8)-1))
+
+#define _mm_mask_reduce_ps(W, U, A, B) \
+ ((__m128) __builtin_ia32_reduceps128_mask ((__v4sf)(__m128)(A), \
+ (int)(B), (__v4sf)(__m128)(W), (__mmask8)(U)))
+
+#define _mm_maskz_reduce_ps(U, A, B) \
+ ((__m128) __builtin_ia32_reduceps128_mask ((__v4sf)(__m128)(A), \
+ (int)(B), (__v4sf)_mm_setzero_ps(), (__mmask8)(U)))
+
+#define _mm256_range_pd(A, B, C) \
+ ((__m256d) __builtin_ia32_rangepd256_mask ((__v4df)(__m256d)(A), \
+ (__v4df)(__m256d)(B), (int)(C), \
+ (__v4df)_mm256_setzero_pd(), (__mmask8)-1))
+
+#define _mm256_maskz_range_pd(U, A, B, C) \
+ ((__m256d) __builtin_ia32_rangepd256_mask ((__v4df)(__m256d)(A), \
+ (__v4df)(__m256d)(B), (int)(C), \
+ (__v4df)_mm256_setzero_pd(), (__mmask8)(U)))
+
+#define _mm_range_pd(A, B, C) \
+ ((__m128d) __builtin_ia32_rangepd128_mask ((__v2df)(__m128d)(A), \
+ (__v2df)(__m128d)(B), (int)(C), \
+ (__v2df)_mm_setzero_pd(), (__mmask8)-1))
+
+#define _mm256_range_ps(A, B, C) \
+ ((__m256) __builtin_ia32_rangeps256_mask ((__v8sf)(__m256)(A), \
+ (__v8sf)(__m256)(B), (int)(C), \
+ (__v8sf)_mm256_setzero_ps(), (__mmask8)-1))
+
+#define _mm256_mask_range_ps(W, U, A, B, C) \
+ ((__m256) __builtin_ia32_rangeps256_mask ((__v8sf)(__m256)(A), \
+ (__v8sf)(__m256)(B), (int)(C), \
+ (__v8sf)(__m256)(W), (__mmask8)(U)))
+
+#define _mm256_maskz_range_ps(U, A, B, C) \
+ ((__m256) __builtin_ia32_rangeps256_mask ((__v8sf)(__m256)(A), \
+ (__v8sf)(__m256)(B), (int)(C), \
+ (__v8sf)_mm256_setzero_ps(), (__mmask8)(U)))
+
+#define _mm_range_ps(A, B, C) \
+ ((__m128) __builtin_ia32_rangeps128_mask ((__v4sf)(__m128)(A), \
+ (__v4sf)(__m128)(B), (int)(C), \
+ (__v4sf)_mm_setzero_ps(), (__mmask8)-1))
+
+#define _mm_mask_range_ps(W, U, A, B, C) \
+ ((__m128) __builtin_ia32_rangeps128_mask ((__v4sf)(__m128)(A), \
+ (__v4sf)(__m128)(B), (int)(C), \
+ (__v4sf)(__m128)(W), (__mmask8)(U)))
+
+#define _mm_maskz_range_ps(U, A, B, C) \
+ ((__m128) __builtin_ia32_rangeps128_mask ((__v4sf)(__m128)(A), \
+ (__v4sf)(__m128)(B), (int)(C), \
+ (__v4sf)_mm_setzero_ps(), (__mmask8)(U)))
+
+#define _mm256_mask_range_pd(W, U, A, B, C) \
+ ((__m256d) __builtin_ia32_rangepd256_mask ((__v4df)(__m256d)(A), \
+ (__v4df)(__m256d)(B), (int)(C), \
+ (__v4df)(__m256d)(W), (__mmask8)(U)))
+
+#define _mm_mask_range_pd(W, U, A, B, C) \
+ ((__m128d) __builtin_ia32_rangepd128_mask ((__v2df)(__m128d)(A), \
+ (__v2df)(__m128d)(B), (int)(C), \
+ (__v2df)(__m128d)(W), (__mmask8)(U)))
+
+#define _mm_maskz_range_pd(U, A, B, C) \
+ ((__m128d) __builtin_ia32_rangepd128_mask ((__v2df)(__m128d)(A), \
+ (__v2df)(__m128d)(B), (int)(C), \
+ (__v2df)_mm_setzero_pd(), (__mmask8)(U)))
+
+#define _mm256_mask_fpclass_pd_mask(u, X, C) \
+ ((__mmask8) __builtin_ia32_fpclasspd256_mask ((__v4df) (__m256d) (X), \
+ (int) (C),(__mmask8)(u)))
+
+#define _mm256_mask_fpclass_ps_mask(u, X, C) \
+ ((__mmask8) __builtin_ia32_fpclassps256_mask ((__v8sf) (__m256) (X), \
+ (int) (C),(__mmask8)(u)))
+
+#define _mm_mask_fpclass_pd_mask(u, X, C) \
+ ((__mmask8) __builtin_ia32_fpclasspd128_mask ((__v2df) (__m128d) (X), \
+ (int) (C),(__mmask8)(u)))
+
+#define _mm_mask_fpclass_ps_mask(u, X, C) \
+ ((__mmask8) __builtin_ia32_fpclassps128_mask ((__v4sf) (__m128) (X), \
+ (int) (C),(__mmask8)(u)))
+
+#define _mm256_fpclass_pd_mask(X, C) \
+ ((__mmask8) __builtin_ia32_fpclasspd256_mask ((__v4df) (__m256d) (X), \
+ (int) (C),(__mmask8)-1))
+
+#define _mm256_fpclass_ps_mask(X, C) \
+ ((__mmask8) __builtin_ia32_fpclassps256_mask ((__v8sf) (__m256) (X), \
+ (int) (C),(__mmask8)-1))
+
+#define _mm_fpclass_pd_mask(X, C) \
+ ((__mmask8) __builtin_ia32_fpclasspd128_mask ((__v2df) (__m128d) (X), \
+ (int) (C),(__mmask8)-1))
+
+#define _mm_fpclass_ps_mask(X, C) \
+ ((__mmask8) __builtin_ia32_fpclassps128_mask ((__v4sf) (__m128) (X), \
+ (int) (C),(__mmask8)-1))
+
+#endif
+
+#ifdef __DISABLE_AVX512VLDQ__
+#undef __DISABLE_AVX512VLDQ__
+#pragma GCC pop_options
+#endif /* __DISABLE_AVX512VLDQ__ */
+
+#endif /* _AVX512VLDQINTRIN_H_INCLUDED */
diff --git a/gcc/config/i386/avx512vlintrin.h b/gcc/config/i386/avx512vlintrin.h
new file mode 100644
index 00000000000..5c45e8b5d9b
--- /dev/null
+++ b/gcc/config/i386/avx512vlintrin.h
@@ -0,0 +1,13213 @@
+/* Copyright (C) 2014
+ Free Software Foundation, Inc.
+
+ This file is part of GCC.
+
+ GCC is free software; you can redistribute it and/or modify
+ it under the terms of the GNU General Public License as published by
+ the Free Software Foundation; either version 3, or (at your option)
+ any later version.
+
+ GCC is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ GNU General Public License for more details.
+
+ Under Section 7 of GPL version 3, you are granted additional
+ permissions described in the GCC Runtime Library Exception, version
+ 3.1, as published by the Free Software Foundation.
+
+ You should have received a copy of the GNU General Public License and
+ a copy of the GCC Runtime Library Exception along with this program;
+ see the files COPYING3 and COPYING.RUNTIME respectively. If not, see
+ <http://www.gnu.org/licenses/>. */
+
+#ifndef _IMMINTRIN_H_INCLUDED
+#error "Never use <avx512vlintrin.h> directly; include <immintrin.h> instead."
+#endif
+
+#ifndef _AVX512VLINTRIN_H_INCLUDED
+#define _AVX512VLINTRIN_H_INCLUDED
+
+/* Doesn't require avx512vl target and is used in avx512dqintrin.h. */
+extern __inline __m128i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_setzero_di (void)
+{
+ return __extension__ (__m128i)(__v2di){ 0, 0};
+}
+
+#ifndef __AVX512VL__
+#pragma GCC push_options
+#pragma GCC target("avx512vl")
+#define __DISABLE_AVX512VL__
+#endif /* __AVX512VL__ */
+
+/* Internal data types for implementing the intrinsics. */
+typedef unsigned int __mmask32;
+
+extern __inline __m256d
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_mask_mov_pd (__m256d __W, __mmask8 __U, __m256d __A)
+{
+ return (__m256d) __builtin_ia32_movapd256_mask ((__v4df) __A,
+ (__v4df) __W,
+ (__mmask8) __U);
+}
+
+extern __inline __m256d
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_maskz_mov_pd (__mmask8 __U, __m256d __A)
+{
+ return (__m256d) __builtin_ia32_movapd256_mask ((__v4df) __A,
+ (__v4df)
+ _mm256_setzero_pd (),
+ (__mmask8) __U);
+}
+
+extern __inline __m128d
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_mask_mov_pd (__m128d __W, __mmask8 __U, __m128d __A)
+{
+ return (__m128d) __builtin_ia32_movapd128_mask ((__v2df) __A,
+ (__v2df) __W,
+ (__mmask8) __U);
+}
+
+extern __inline __m128d
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_maskz_mov_pd (__mmask8 __U, __m128d __A)
+{
+ return (__m128d) __builtin_ia32_movapd128_mask ((__v2df) __A,
+ (__v2df)
+ _mm_setzero_pd (),
+ (__mmask8) __U);
+}
+
+extern __inline __m256d
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_mask_load_pd (__m256d __W, __mmask8 __U, void const *__P)
+{
+ return (__m256d) __builtin_ia32_loadapd256_mask ((__v4df *) __P,
+ (__v4df) __W,
+ (__mmask8) __U);
+}
+
+extern __inline __m256d
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_maskz_load_pd (__mmask8 __U, void const *__P)
+{
+ return (__m256d) __builtin_ia32_loadapd256_mask ((__v4df *) __P,
+ (__v4df)
+ _mm256_setzero_pd (),
+ (__mmask8) __U);
+}
+
+extern __inline __m128d
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_mask_load_pd (__m128d __W, __mmask8 __U, void const *__P)
+{
+ return (__m128d) __builtin_ia32_loadapd128_mask ((__v2df *) __P,
+ (__v2df) __W,
+ (__mmask8) __U);
+}
+
+extern __inline __m128d
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_maskz_load_pd (__mmask8 __U, void const *__P)
+{
+ return (__m128d) __builtin_ia32_loadapd128_mask ((__v2df *) __P,
+ (__v2df)
+ _mm_setzero_pd (),
+ (__mmask8) __U);
+}
+
+extern __inline void
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_mask_store_pd (void *__P, __mmask8 __U, __m256d __A)
+{
+ __builtin_ia32_storeapd256_mask ((__v4df *) __P,
+ (__v4df) __A,
+ (__mmask8) __U);
+}
+
+extern __inline void
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_mask_store_pd (void *__P, __mmask8 __U, __m128d __A)
+{
+ __builtin_ia32_storeapd128_mask ((__v2df *) __P,
+ (__v2df) __A,
+ (__mmask8) __U);
+}
+
+extern __inline __m256
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_mask_mov_ps (__m256 __W, __mmask8 __U, __m256 __A)
+{
+ return (__m256) __builtin_ia32_movaps256_mask ((__v8sf) __A,
+ (__v8sf) __W,
+ (__mmask8) __U);
+}
+
+extern __inline __m256
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_maskz_mov_ps (__mmask8 __U, __m256 __A)
+{
+ return (__m256) __builtin_ia32_movaps256_mask ((__v8sf) __A,
+ (__v8sf)
+ _mm256_setzero_ps (),
+ (__mmask8) __U);
+}
+
+extern __inline __m128
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_mask_mov_ps (__m128 __W, __mmask8 __U, __m128 __A)
+{
+ return (__m128) __builtin_ia32_movaps128_mask ((__v4sf) __A,
+ (__v4sf) __W,
+ (__mmask8) __U);
+}
+
+extern __inline __m128
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_maskz_mov_ps (__mmask8 __U, __m128 __A)
+{
+ return (__m128) __builtin_ia32_movaps128_mask ((__v4sf) __A,
+ (__v4sf)
+ _mm_setzero_ps (),
+ (__mmask8) __U);
+}
+
+extern __inline __m256
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_mask_load_ps (__m256 __W, __mmask8 __U, void const *__P)
+{
+ return (__m256) __builtin_ia32_loadaps256_mask ((__v8sf *) __P,
+ (__v8sf) __W,
+ (__mmask8) __U);
+}
+
+extern __inline __m256
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_maskz_load_ps (__mmask8 __U, void const *__P)
+{
+ return (__m256) __builtin_ia32_loadaps256_mask ((__v8sf *) __P,
+ (__v8sf)
+ _mm256_setzero_ps (),
+ (__mmask8) __U);
+}
+
+extern __inline __m128
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_mask_load_ps (__m128 __W, __mmask8 __U, void const *__P)
+{
+ return (__m128) __builtin_ia32_loadaps128_mask ((__v4sf *) __P,
+ (__v4sf) __W,
+ (__mmask8) __U);
+}
+
+extern __inline __m128
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_maskz_load_ps (__mmask8 __U, void const *__P)
+{
+ return (__m128) __builtin_ia32_loadaps128_mask ((__v4sf *) __P,
+ (__v4sf)
+ _mm_setzero_ps (),
+ (__mmask8) __U);
+}
+
+extern __inline void
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_mask_store_ps (void *__P, __mmask8 __U, __m256 __A)
+{
+ __builtin_ia32_storeaps256_mask ((__v8sf *) __P,
+ (__v8sf) __A,
+ (__mmask8) __U);
+}
+
+extern __inline void
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_mask_store_ps (void *__P, __mmask8 __U, __m128 __A)
+{
+ __builtin_ia32_storeaps128_mask ((__v4sf *) __P,
+ (__v4sf) __A,
+ (__mmask8) __U);
+}
+
+extern __inline __m256i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_mask_mov_epi64 (__m256i __W, __mmask8 __U, __m256i __A)
+{
+ return (__m256i) __builtin_ia32_movdqa64_256_mask ((__v4di) __A,
+ (__v4di) __W,
+ (__mmask8) __U);
+}
+
+extern __inline __m256i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_maskz_mov_epi64 (__mmask8 __U, __m256i __A)
+{
+ return (__m256i) __builtin_ia32_movdqa64_256_mask ((__v4di) __A,
+ (__v4di)
+ _mm256_setzero_si256 (),
+ (__mmask8) __U);
+}
+
+extern __inline __m128i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_mask_mov_epi64 (__m128i __W, __mmask8 __U, __m128i __A)
+{
+ return (__m128i) __builtin_ia32_movdqa64_128_mask ((__v2di) __A,
+ (__v2di) __W,
+ (__mmask8) __U);
+}
+
+extern __inline __m128i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_maskz_mov_epi64 (__mmask8 __U, __m128i __A)
+{
+ return (__m128i) __builtin_ia32_movdqa64_128_mask ((__v2di) __A,
+ (__v2di)
+ _mm_setzero_di (),
+ (__mmask8) __U);
+}
+
+extern __inline __m256i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_mask_load_epi64 (__m256i __W, __mmask8 __U, void const *__P)
+{
+ return (__m256i) __builtin_ia32_movdqa64load256_mask ((__v4di *) __P,
+ (__v4di) __W,
+ (__mmask8)
+ __U);
+}
+
+extern __inline __m256i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_maskz_load_epi64 (__mmask8 __U, void const *__P)
+{
+ return (__m256i) __builtin_ia32_movdqa64load256_mask ((__v4di *) __P,
+ (__v4di)
+ _mm256_setzero_si256 (),
+ (__mmask8)
+ __U);
+}
+
+extern __inline __m128i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_mask_load_epi64 (__m128i __W, __mmask8 __U, void const *__P)
+{
+ return (__m128i) __builtin_ia32_movdqa64load128_mask ((__v2di *) __P,
+ (__v2di) __W,
+ (__mmask8)
+ __U);
+}
+
+extern __inline __m128i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_maskz_load_epi64 (__mmask8 __U, void const *__P)
+{
+ return (__m128i) __builtin_ia32_movdqa64load128_mask ((__v2di *) __P,
+ (__v2di)
+ _mm_setzero_di (),
+ (__mmask8)
+ __U);
+}
+
+extern __inline void
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_mask_store_epi64 (void *__P, __mmask8 __U, __m256i __A)
+{
+ __builtin_ia32_movdqa64store256_mask ((__v4di *) __P,
+ (__v4di) __A,
+ (__mmask8) __U);
+}
+
+extern __inline void
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_mask_store_epi64 (void *__P, __mmask8 __U, __m128i __A)
+{
+ __builtin_ia32_movdqa64store128_mask ((__v2di *) __P,
+ (__v2di) __A,
+ (__mmask8) __U);
+}
+
+extern __inline __m256i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_mask_mov_epi32 (__m256i __W, __mmask8 __U, __m256i __A)
+{
+ return (__m256i) __builtin_ia32_movdqa32_256_mask ((__v8si) __A,
+ (__v8si) __W,
+ (__mmask8) __U);
+}
+
+extern __inline __m256i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_maskz_mov_epi32 (__mmask8 __U, __m256i __A)
+{
+ return (__m256i) __builtin_ia32_movdqa32_256_mask ((__v8si) __A,
+ (__v8si)
+ _mm256_setzero_si256 (),
+ (__mmask8) __U);
+}
+
+extern __inline __m128i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_mask_mov_epi32 (__m128i __W, __mmask8 __U, __m128i __A)
+{
+ return (__m128i) __builtin_ia32_movdqa32_128_mask ((__v4si) __A,
+ (__v4si) __W,
+ (__mmask8) __U);
+}
+
+extern __inline __m128i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_maskz_mov_epi32 (__mmask8 __U, __m128i __A)
+{
+ return (__m128i) __builtin_ia32_movdqa32_128_mask ((__v4si) __A,
+ (__v4si)
+ _mm_setzero_si128 (),
+ (__mmask8) __U);
+}
+
+extern __inline __m256i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_mask_load_epi32 (__m256i __W, __mmask8 __U, void const *__P)
+{
+ return (__m256i) __builtin_ia32_movdqa32load256_mask ((__v8si *) __P,
+ (__v8si) __W,
+ (__mmask8)
+ __U);
+}
+
+extern __inline __m256i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_maskz_load_epi32 (__mmask8 __U, void const *__P)
+{
+ return (__m256i) __builtin_ia32_movdqa32load256_mask ((__v8si *) __P,
+ (__v8si)
+ _mm256_setzero_si256 (),
+ (__mmask8)
+ __U);
+}
+
+extern __inline __m128i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_mask_load_epi32 (__m128i __W, __mmask8 __U, void const *__P)
+{
+ return (__m128i) __builtin_ia32_movdqa32load128_mask ((__v4si *) __P,
+ (__v4si) __W,
+ (__mmask8)
+ __U);
+}
+
+extern __inline __m128i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_maskz_load_epi32 (__mmask8 __U, void const *__P)
+{
+ return (__m128i) __builtin_ia32_movdqa32load128_mask ((__v4si *) __P,
+ (__v4si)
+ _mm_setzero_si128 (),
+ (__mmask8)
+ __U);
+}
+
+extern __inline void
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_mask_store_epi32 (void *__P, __mmask8 __U, __m256i __A)
+{
+ __builtin_ia32_movdqa32store256_mask ((__v8si *) __P,
+ (__v8si) __A,
+ (__mmask8) __U);
+}
+
+extern __inline void
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_mask_store_epi32 (void *__P, __mmask8 __U, __m128i __A)
+{
+ __builtin_ia32_movdqa32store128_mask ((__v4si *) __P,
+ (__v4si) __A,
+ (__mmask8) __U);
+}
+
+extern __inline __m128i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_setzero_hi (void)
+{
+ return __extension__ (__m128i) (__v8hi)
+ {
+ 0, 0, 0, 0, 0, 0, 0, 0};
+}
+
+extern __inline __m128d
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_mask_add_pd (__m128d __W, __mmask8 __U, __m128d __A, __m128d __B)
+{
+ return (__m128d) __builtin_ia32_addpd128_mask ((__v2df) __A,
+ (__v2df) __B,
+ (__v2df) __W,
+ (__mmask8) __U);
+}
+
+extern __inline __m128d
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_maskz_add_pd (__mmask8 __U, __m128d __A, __m128d __B)
+{
+ return (__m128d) __builtin_ia32_addpd128_mask ((__v2df) __A,
+ (__v2df) __B,
+ (__v2df)
+ _mm_setzero_pd (),
+ (__mmask8) __U);
+}
+
+extern __inline __m256d
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_mask_add_pd (__m256d __W, __mmask8 __U, __m256d __A,
+ __m256d __B)
+{
+ return (__m256d) __builtin_ia32_addpd256_mask ((__v4df) __A,
+ (__v4df) __B,
+ (__v4df) __W,
+ (__mmask8) __U);
+}
+
+extern __inline __m256d
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_maskz_add_pd (__mmask8 __U, __m256d __A, __m256d __B)
+{
+ return (__m256d) __builtin_ia32_addpd256_mask ((__v4df) __A,
+ (__v4df) __B,
+ (__v4df)
+ _mm256_setzero_pd (),
+ (__mmask8) __U);
+}
+
+extern __inline __m128
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_mask_add_ps (__m128 __W, __mmask16 __U, __m128 __A, __m128 __B)
+{
+ return (__m128) __builtin_ia32_addps128_mask ((__v4sf) __A,
+ (__v4sf) __B,
+ (__v4sf) __W,
+ (__mmask8) __U);
+}
+
+extern __inline __m128
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_maskz_add_ps (__mmask16 __U, __m128 __A, __m128 __B)
+{
+ return (__m128) __builtin_ia32_addps128_mask ((__v4sf) __A,
+ (__v4sf) __B,
+ (__v4sf)
+ _mm_setzero_ps (),
+ (__mmask8) __U);
+}
+
+extern __inline __m256
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_mask_add_ps (__m256 __W, __mmask16 __U, __m256 __A, __m256 __B)
+{
+ return (__m256) __builtin_ia32_addps256_mask ((__v8sf) __A,
+ (__v8sf) __B,
+ (__v8sf) __W,
+ (__mmask8) __U);
+}
+
+extern __inline __m256
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_maskz_add_ps (__mmask16 __U, __m256 __A, __m256 __B)
+{
+ return (__m256) __builtin_ia32_addps256_mask ((__v8sf) __A,
+ (__v8sf) __B,
+ (__v8sf)
+ _mm256_setzero_ps (),
+ (__mmask8) __U);
+}
+
+extern __inline __m128d
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_mask_sub_pd (__m128d __W, __mmask8 __U, __m128d __A, __m128d __B)
+{
+ return (__m128d) __builtin_ia32_subpd128_mask ((__v2df) __A,
+ (__v2df) __B,
+ (__v2df) __W,
+ (__mmask8) __U);
+}
+
+extern __inline __m128d
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_maskz_sub_pd (__mmask8 __U, __m128d __A, __m128d __B)
+{
+ return (__m128d) __builtin_ia32_subpd128_mask ((__v2df) __A,
+ (__v2df) __B,
+ (__v2df)
+ _mm_setzero_pd (),
+ (__mmask8) __U);
+}
+
+extern __inline __m256d
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_mask_sub_pd (__m256d __W, __mmask8 __U, __m256d __A,
+ __m256d __B)
+{
+ return (__m256d) __builtin_ia32_subpd256_mask ((__v4df) __A,
+ (__v4df) __B,
+ (__v4df) __W,
+ (__mmask8) __U);
+}
+
+extern __inline __m256d
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_maskz_sub_pd (__mmask8 __U, __m256d __A, __m256d __B)
+{
+ return (__m256d) __builtin_ia32_subpd256_mask ((__v4df) __A,
+ (__v4df) __B,
+ (__v4df)
+ _mm256_setzero_pd (),
+ (__mmask8) __U);
+}
+
+extern __inline __m128
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_mask_sub_ps (__m128 __W, __mmask16 __U, __m128 __A, __m128 __B)
+{
+ return (__m128) __builtin_ia32_subps128_mask ((__v4sf) __A,
+ (__v4sf) __B,
+ (__v4sf) __W,
+ (__mmask8) __U);
+}
+
+extern __inline __m128
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_maskz_sub_ps (__mmask16 __U, __m128 __A, __m128 __B)
+{
+ return (__m128) __builtin_ia32_subps128_mask ((__v4sf) __A,
+ (__v4sf) __B,
+ (__v4sf)
+ _mm_setzero_ps (),
+ (__mmask8) __U);
+}
+
+extern __inline __m256
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_mask_sub_ps (__m256 __W, __mmask16 __U, __m256 __A, __m256 __B)
+{
+ return (__m256) __builtin_ia32_subps256_mask ((__v8sf) __A,
+ (__v8sf) __B,
+ (__v8sf) __W,
+ (__mmask8) __U);
+}
+
+extern __inline __m256
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_maskz_sub_ps (__mmask16 __U, __m256 __A, __m256 __B)
+{
+ return (__m256) __builtin_ia32_subps256_mask ((__v8sf) __A,
+ (__v8sf) __B,
+ (__v8sf)
+ _mm256_setzero_ps (),
+ (__mmask8) __U);
+}
+
+extern __inline __m256i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_load_epi64 (void const *__P)
+{
+ return *(__m256i *) __P;
+}
+
+extern __inline __m128i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_load_epi64 (void const *__P)
+{
+ return *(__m128i *) __P;
+}
+
+extern __inline void
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_store_epi64 (void *__P, __m256i __A)
+{
+ *(__m256i *) __P = __A;
+}
+
+extern __inline void
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_store_epi64 (void *__P, __m128i __A)
+{
+ *(__m128i *) __P = __A;
+}
+
+extern __inline __m256d
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_mask_loadu_pd (__m256d __W, __mmask8 __U, void const *__P)
+{
+ return (__m256d) __builtin_ia32_loadupd256_mask ((__v4df *) __P,
+ (__v4df) __W,
+ (__mmask8) __U);
+}
+
+extern __inline __m256d
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_maskz_loadu_pd (__mmask8 __U, void const *__P)
+{
+ return (__m256d) __builtin_ia32_loadupd256_mask ((__v4df *) __P,
+ (__v4df)
+ _mm256_setzero_pd (),
+ (__mmask8) __U);
+}
+
+extern __inline __m128d
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_mask_loadu_pd (__m128d __W, __mmask8 __U, void const *__P)
+{
+ return (__m128d) __builtin_ia32_loadupd128_mask ((__v2df *) __P,
+ (__v2df) __W,
+ (__mmask8) __U);
+}
+
+extern __inline __m128d
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_maskz_loadu_pd (__mmask8 __U, void const *__P)
+{
+ return (__m128d) __builtin_ia32_loadupd128_mask ((__v2df *) __P,
+ (__v2df)
+ _mm_setzero_pd (),
+ (__mmask8) __U);
+}
+
+extern __inline void
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_mask_storeu_pd (void *__P, __mmask8 __U, __m256d __A)
+{
+ __builtin_ia32_storeupd256_mask ((__v4df *) __P,
+ (__v4df) __A,
+ (__mmask8) __U);
+}
+
+extern __inline void
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_mask_storeu_pd (void *__P, __mmask8 __U, __m128d __A)
+{
+ __builtin_ia32_storeupd128_mask ((__v2df *) __P,
+ (__v2df) __A,
+ (__mmask8) __U);
+}
+
+extern __inline __m256
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_mask_loadu_ps (__m256 __W, __mmask8 __U, void const *__P)
+{
+ return (__m256) __builtin_ia32_loadups256_mask ((__v8sf *) __P,
+ (__v8sf) __W,
+ (__mmask8) __U);
+}
+
+extern __inline __m256
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_maskz_loadu_ps (__mmask8 __U, void const *__P)
+{
+ return (__m256) __builtin_ia32_loadups256_mask ((__v8sf *) __P,
+ (__v8sf)
+ _mm256_setzero_ps (),
+ (__mmask8) __U);
+}
+
+extern __inline __m128
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_mask_loadu_ps (__m128 __W, __mmask8 __U, void const *__P)
+{
+ return (__m128) __builtin_ia32_loadups128_mask ((__v4sf *) __P,
+ (__v4sf) __W,
+ (__mmask8) __U);
+}
+
+extern __inline __m128
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_maskz_loadu_ps (__mmask8 __U, void const *__P)
+{
+ return (__m128) __builtin_ia32_loadups128_mask ((__v4sf *) __P,
+ (__v4sf)
+ _mm_setzero_ps (),
+ (__mmask8) __U);
+}
+
+extern __inline void
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_mask_storeu_ps (void *__P, __mmask8 __U, __m256 __A)
+{
+ __builtin_ia32_storeups256_mask ((__v8sf *) __P,
+ (__v8sf) __A,
+ (__mmask8) __U);
+}
+
+extern __inline void
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_mask_storeu_ps (void *__P, __mmask8 __U, __m128 __A)
+{
+ __builtin_ia32_storeups128_mask ((__v4sf *) __P,
+ (__v4sf) __A,
+ (__mmask8) __U);
+}
+
+extern __inline __m256i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_mask_loadu_epi64 (__m256i __W, __mmask8 __U, void const *__P)
+{
+ return (__m256i) __builtin_ia32_loaddqudi256_mask ((__v4di *) __P,
+ (__v4di) __W,
+ (__mmask8) __U);
+}
+
+extern __inline __m256i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_maskz_loadu_epi64 (__mmask8 __U, void const *__P)
+{
+ return (__m256i) __builtin_ia32_loaddqudi256_mask ((__v4di *) __P,
+ (__v4di)
+ _mm256_setzero_si256 (),
+ (__mmask8) __U);
+}
+
+extern __inline __m128i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_mask_loadu_epi64 (__m128i __W, __mmask8 __U, void const *__P)
+{
+ return (__m128i) __builtin_ia32_loaddqudi128_mask ((__v2di *) __P,
+ (__v2di) __W,
+ (__mmask8) __U);
+}
+
+extern __inline __m128i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_maskz_loadu_epi64 (__mmask8 __U, void const *__P)
+{
+ return (__m128i) __builtin_ia32_loaddqudi128_mask ((__v2di *) __P,
+ (__v2di)
+ _mm_setzero_di (),
+ (__mmask8) __U);
+}
+
+extern __inline void
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_mask_storeu_epi64 (void *__P, __mmask8 __U, __m256i __A)
+{
+ __builtin_ia32_storedqudi256_mask ((__v4di *) __P,
+ (__v4di) __A,
+ (__mmask8) __U);
+}
+
+extern __inline void
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_mask_storeu_epi64 (void *__P, __mmask8 __U, __m128i __A)
+{
+ __builtin_ia32_storedqudi128_mask ((__v2di *) __P,
+ (__v2di) __A,
+ (__mmask8) __U);
+}
+
+extern __inline __m256i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_mask_loadu_epi32 (__m256i __W, __mmask8 __U, void const *__P)
+{
+ return (__m256i) __builtin_ia32_loaddqusi256_mask ((__v8si *) __P,
+ (__v8si) __W,
+ (__mmask8) __U);
+}
+
+extern __inline __m256i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_maskz_loadu_epi32 (__mmask8 __U, void const *__P)
+{
+ return (__m256i) __builtin_ia32_loaddqusi256_mask ((__v8si *) __P,
+ (__v8si)
+ _mm256_setzero_si256 (),
+ (__mmask8) __U);
+}
+
+extern __inline __m128i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_mask_loadu_epi32 (__m128i __W, __mmask8 __U, void const *__P)
+{
+ return (__m128i) __builtin_ia32_loaddqusi128_mask ((__v4si *) __P,
+ (__v4si) __W,
+ (__mmask8) __U);
+}
+
+extern __inline __m128i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_maskz_loadu_epi32 (__mmask8 __U, void const *__P)
+{
+ return (__m128i) __builtin_ia32_loaddqusi128_mask ((__v4si *) __P,
+ (__v4si)
+ _mm_setzero_si128 (),
+ (__mmask8) __U);
+}
+
+extern __inline void
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_mask_storeu_epi32 (void *__P, __mmask8 __U, __m256i __A)
+{
+ __builtin_ia32_storedqusi256_mask ((__v8si *) __P,
+ (__v8si) __A,
+ (__mmask8) __U);
+}
+
+extern __inline void
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_mask_storeu_epi32 (void *__P, __mmask8 __U, __m128i __A)
+{
+ __builtin_ia32_storedqusi128_mask ((__v4si *) __P,
+ (__v4si) __A,
+ (__mmask8) __U);
+}
+
+extern __inline __m256i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_mask_abs_epi32 (__m256i __W, __mmask8 __U, __m256i __A)
+{
+ return (__m256i) __builtin_ia32_pabsd256_mask ((__v8si) __A,
+ (__v8si) __W,
+ (__mmask8) __U);
+}
+
+extern __inline __m256i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_maskz_abs_epi32 (__mmask8 __U, __m256i __A)
+{
+ return (__m256i) __builtin_ia32_pabsd256_mask ((__v8si) __A,
+ (__v8si)
+ _mm256_setzero_si256 (),
+ (__mmask8) __U);
+}
+
+extern __inline __m128i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_mask_abs_epi32 (__m128i __W, __mmask8 __U, __m128i __A)
+{
+ return (__m128i) __builtin_ia32_pabsd128_mask ((__v4si) __A,
+ (__v4si) __W,
+ (__mmask8) __U);
+}
+
+extern __inline __m128i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_maskz_abs_epi32 (__mmask8 __U, __m128i __A)
+{
+ return (__m128i) __builtin_ia32_pabsd128_mask ((__v4si) __A,
+ (__v4si)
+ _mm_setzero_si128 (),
+ (__mmask8) __U);
+}
+
+extern __inline __m256i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_abs_epi64 (__m256i __A)
+{
+ return (__m256i) __builtin_ia32_pabsq256_mask ((__v4di) __A,
+ (__v4di)
+ _mm256_setzero_si256 (),
+ (__mmask8) -1);
+}
+
+extern __inline __m256i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_mask_abs_epi64 (__m256i __W, __mmask8 __U, __m256i __A)
+{
+ return (__m256i) __builtin_ia32_pabsq256_mask ((__v4di) __A,
+ (__v4di) __W,
+ (__mmask8) __U);
+}
+
+extern __inline __m256i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_maskz_abs_epi64 (__mmask8 __U, __m256i __A)
+{
+ return (__m256i) __builtin_ia32_pabsq256_mask ((__v4di) __A,
+ (__v4di)
+ _mm256_setzero_si256 (),
+ (__mmask8) __U);
+}
+
+extern __inline __m128i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_abs_epi64 (__m128i __A)
+{
+ return (__m128i) __builtin_ia32_pabsq128_mask ((__v2di) __A,
+ (__v2di)
+ _mm_setzero_di (),
+ (__mmask8) -1);
+}
+
+extern __inline __m128i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_mask_abs_epi64 (__m128i __W, __mmask8 __U, __m128i __A)
+{
+ return (__m128i) __builtin_ia32_pabsq128_mask ((__v2di) __A,
+ (__v2di) __W,
+ (__mmask8) __U);
+}
+
+extern __inline __m128i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_maskz_abs_epi64 (__mmask8 __U, __m128i __A)
+{
+ return (__m128i) __builtin_ia32_pabsq128_mask ((__v2di) __A,
+ (__v2di)
+ _mm_setzero_di (),
+ (__mmask8) __U);
+}
+
+extern __inline __m128i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_cvtpd_epu32 (__m256d __A)
+{
+ return (__m128i) __builtin_ia32_cvtpd2udq256_mask ((__v4df) __A,
+ (__v4si)
+ _mm_setzero_si128 (),
+ (__mmask8) -1);
+}
+
+extern __inline __m128i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_mask_cvtpd_epu32 (__m128i __W, __mmask8 __U, __m256d __A)
+{
+ return (__m128i) __builtin_ia32_cvtpd2udq256_mask ((__v4df) __A,
+ (__v4si) __W,
+ (__mmask8) __U);
+}
+
+extern __inline __m128i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_maskz_cvtpd_epu32 (__mmask8 __U, __m256d __A)
+{
+ return (__m128i) __builtin_ia32_cvtpd2udq256_mask ((__v4df) __A,
+ (__v4si)
+ _mm_setzero_si128 (),
+ (__mmask8) __U);
+}
+
+extern __inline __m128i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_cvtpd_epu32 (__m128d __A)
+{
+ return (__m128i) __builtin_ia32_cvtpd2udq128_mask ((__v2df) __A,
+ (__v4si)
+ _mm_setzero_si128 (),
+ (__mmask8) -1);
+}
+
+extern __inline __m128i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_mask_cvtpd_epu32 (__m128i __W, __mmask8 __U, __m128d __A)
+{
+ return (__m128i) __builtin_ia32_cvtpd2udq128_mask ((__v2df) __A,
+ (__v4si) __W,
+ (__mmask8) __U);
+}
+
+extern __inline __m128i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_maskz_cvtpd_epu32 (__mmask8 __U, __m128d __A)
+{
+ return (__m128i) __builtin_ia32_cvtpd2udq128_mask ((__v2df) __A,
+ (__v4si)
+ _mm_setzero_si128 (),
+ (__mmask8) __U);
+}
+
+extern __inline __m256i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_mask_cvttps_epi32 (__m256i __W, __mmask8 __U, __m256 __A)
+{
+ return (__m256i) __builtin_ia32_cvttps2dq256_mask ((__v8sf) __A,
+ (__v8si) __W,
+ (__mmask8) __U);
+}
+
+extern __inline __m256i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_maskz_cvttps_epi32 (__mmask8 __U, __m256 __A)
+{
+ return (__m256i) __builtin_ia32_cvttps2dq256_mask ((__v8sf) __A,
+ (__v8si)
+ _mm256_setzero_si256 (),
+ (__mmask8) __U);
+}
+
+extern __inline __m128i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_mask_cvttps_epi32 (__m128i __W, __mmask8 __U, __m128 __A)
+{
+ return (__m128i) __builtin_ia32_cvttps2dq128_mask ((__v4sf) __A,
+ (__v4si) __W,
+ (__mmask8) __U);
+}
+
+extern __inline __m128i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_maskz_cvttps_epi32 (__mmask8 __U, __m128 __A)
+{
+ return (__m128i) __builtin_ia32_cvttps2dq128_mask ((__v4sf) __A,
+ (__v4si)
+ _mm_setzero_si128 (),
+ (__mmask8) __U);
+}
+
+extern __inline __m256i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_cvttps_epu32 (__m256 __A)
+{
+ return (__m256i) __builtin_ia32_cvttps2udq256_mask ((__v8sf) __A,
+ (__v8si)
+ _mm256_setzero_si256 (),
+ (__mmask8) -1);
+}
+
+extern __inline __m256i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_mask_cvttps_epu32 (__m256i __W, __mmask8 __U, __m256 __A)
+{
+ return (__m256i) __builtin_ia32_cvttps2udq256_mask ((__v8sf) __A,
+ (__v8si) __W,
+ (__mmask8) __U);
+}
+
+extern __inline __m256i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_maskz_cvttps_epu32 (__mmask8 __U, __m256 __A)
+{
+ return (__m256i) __builtin_ia32_cvttps2udq256_mask ((__v8sf) __A,
+ (__v8si)
+ _mm256_setzero_si256 (),
+ (__mmask8) __U);
+}
+
+extern __inline __m128i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_cvttps_epu32 (__m128 __A)
+{
+ return (__m128i) __builtin_ia32_cvttps2udq128_mask ((__v4sf) __A,
+ (__v4si)
+ _mm_setzero_si128 (),
+ (__mmask8) -1);
+}
+
+extern __inline __m128i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_mask_cvttps_epu32 (__m128i __W, __mmask8 __U, __m128 __A)
+{
+ return (__m128i) __builtin_ia32_cvttps2udq128_mask ((__v4sf) __A,
+ (__v4si) __W,
+ (__mmask8) __U);
+}
+
+extern __inline __m128i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_maskz_cvttps_epu32 (__mmask8 __U, __m128 __A)
+{
+ return (__m128i) __builtin_ia32_cvttps2udq128_mask ((__v4sf) __A,
+ (__v4si)
+ _mm_setzero_si128 (),
+ (__mmask8) __U);
+}
+
+extern __inline __m128i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_mask_cvttpd_epi32 (__m128i __W, __mmask8 __U, __m256d __A)
+{
+ return (__m128i) __builtin_ia32_cvttpd2dq256_mask ((__v4df) __A,
+ (__v4si) __W,
+ (__mmask8) __U);
+}
+
+extern __inline __m128i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_maskz_cvttpd_epi32 (__mmask8 __U, __m256d __A)
+{
+ return (__m128i) __builtin_ia32_cvttpd2dq256_mask ((__v4df) __A,
+ (__v4si)
+ _mm_setzero_si128 (),
+ (__mmask8) __U);
+}
+
+extern __inline __m128i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_mask_cvttpd_epi32 (__m128i __W, __mmask8 __U, __m128d __A)
+{
+ return (__m128i) __builtin_ia32_cvttpd2dq128_mask ((__v2df) __A,
+ (__v4si) __W,
+ (__mmask8) __U);
+}
+
+extern __inline __m128i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_maskz_cvttpd_epi32 (__mmask8 __U, __m128d __A)
+{
+ return (__m128i) __builtin_ia32_cvttpd2dq128_mask ((__v2df) __A,
+ (__v4si)
+ _mm_setzero_si128 (),
+ (__mmask8) __U);
+}
+
+extern __inline __m128i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_cvttpd_epu32 (__m256d __A)
+{
+ return (__m128i) __builtin_ia32_cvttpd2udq256_mask ((__v4df) __A,
+ (__v4si)
+ _mm_setzero_si128 (),
+ (__mmask8) -1);
+}
+
+extern __inline __m128i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_mask_cvttpd_epu32 (__m128i __W, __mmask8 __U, __m256d __A)
+{
+ return (__m128i) __builtin_ia32_cvttpd2udq256_mask ((__v4df) __A,
+ (__v4si) __W,
+ (__mmask8) __U);
+}
+
+extern __inline __m128i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_maskz_cvttpd_epu32 (__mmask8 __U, __m256d __A)
+{
+ return (__m128i) __builtin_ia32_cvttpd2udq256_mask ((__v4df) __A,
+ (__v4si)
+ _mm_setzero_si128 (),
+ (__mmask8) __U);
+}
+
+extern __inline __m128i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_cvttpd_epu32 (__m128d __A)
+{
+ return (__m128i) __builtin_ia32_cvttpd2udq128_mask ((__v2df) __A,
+ (__v4si)
+ _mm_setzero_si128 (),
+ (__mmask8) -1);
+}
+
+extern __inline __m128i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_mask_cvttpd_epu32 (__m128i __W, __mmask8 __U, __m128d __A)
+{
+ return (__m128i) __builtin_ia32_cvttpd2udq128_mask ((__v2df) __A,
+ (__v4si) __W,
+ (__mmask8) __U);
+}
+
+extern __inline __m128i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_maskz_cvttpd_epu32 (__mmask8 __U, __m128d __A)
+{
+ return (__m128i) __builtin_ia32_cvttpd2udq128_mask ((__v2df) __A,
+ (__v4si)
+ _mm_setzero_si128 (),
+ (__mmask8) __U);
+}
+
+extern __inline __m128i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_mask_cvtpd_epi32 (__m128i __W, __mmask8 __U, __m256d __A)
+{
+ return (__m128i) __builtin_ia32_cvtpd2dq256_mask ((__v4df) __A,
+ (__v4si) __W,
+ (__mmask8) __U);
+}
+
+extern __inline __m128i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_maskz_cvtpd_epi32 (__mmask8 __U, __m256d __A)
+{
+ return (__m128i) __builtin_ia32_cvtpd2dq256_mask ((__v4df) __A,
+ (__v4si)
+ _mm_setzero_si128 (),
+ (__mmask8) __U);
+}
+
+extern __inline __m128i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_mask_cvtpd_epi32 (__m128i __W, __mmask8 __U, __m128d __A)
+{
+ return (__m128i) __builtin_ia32_cvtpd2dq128_mask ((__v2df) __A,
+ (__v4si) __W,
+ (__mmask8) __U);
+}
+
+extern __inline __m128i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_maskz_cvtpd_epi32 (__mmask8 __U, __m128d __A)
+{
+ return (__m128i) __builtin_ia32_cvtpd2dq128_mask ((__v2df) __A,
+ (__v4si)
+ _mm_setzero_si128 (),
+ (__mmask8) __U);
+}
+
+extern __inline __m256d
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_mask_cvtepi32_pd (__m256d __W, __mmask8 __U, __m128i __A)
+{
+ return (__m256d) __builtin_ia32_cvtdq2pd256_mask ((__v4si) __A,
+ (__v4df) __W,
+ (__mmask8) __U);
+}
+
+extern __inline __m256d
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_maskz_cvtepi32_pd (__mmask8 __U, __m128i __A)
+{
+ return (__m256d) __builtin_ia32_cvtdq2pd256_mask ((__v4si) __A,
+ (__v4df)
+ _mm256_setzero_pd (),
+ (__mmask8) __U);
+}
+
+extern __inline __m128d
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_mask_cvtepi32_pd (__m128d __W, __mmask8 __U, __m128i __A)
+{
+ return (__m128d) __builtin_ia32_cvtdq2pd128_mask ((__v4si) __A,
+ (__v2df) __W,
+ (__mmask8) __U);
+}
+
+extern __inline __m128d
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_maskz_cvtepi32_pd (__mmask8 __U, __m128i __A)
+{
+ return (__m128d) __builtin_ia32_cvtdq2pd128_mask ((__v4si) __A,
+ (__v2df)
+ _mm_setzero_pd (),
+ (__mmask8) __U);
+}
+
+extern __inline __m256d
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_cvtepu32_pd (__m128i __A)
+{
+ return (__m256d) __builtin_ia32_cvtudq2pd256_mask ((__v4si) __A,
+ (__v4df)
+ _mm256_setzero_pd (),
+ (__mmask8) -1);
+}
+
+extern __inline __m256d
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_mask_cvtepu32_pd (__m256d __W, __mmask8 __U, __m128i __A)
+{
+ return (__m256d) __builtin_ia32_cvtudq2pd256_mask ((__v4si) __A,
+ (__v4df) __W,
+ (__mmask8) __U);
+}
+
+extern __inline __m256d
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_maskz_cvtepu32_pd (__mmask8 __U, __m128i __A)
+{
+ return (__m256d) __builtin_ia32_cvtudq2pd256_mask ((__v4si) __A,
+ (__v4df)
+ _mm256_setzero_pd (),
+ (__mmask8) __U);
+}
+
+extern __inline __m128d
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_cvtepu32_pd (__m128i __A)
+{
+ return (__m128d) __builtin_ia32_cvtudq2pd128_mask ((__v4si) __A,
+ (__v2df)
+ _mm_setzero_pd (),
+ (__mmask8) -1);
+}
+
+extern __inline __m128d
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_mask_cvtepu32_pd (__m128d __W, __mmask8 __U, __m128i __A)
+{
+ return (__m128d) __builtin_ia32_cvtudq2pd128_mask ((__v4si) __A,
+ (__v2df) __W,
+ (__mmask8) __U);
+}
+
+extern __inline __m128d
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_maskz_cvtepu32_pd (__mmask8 __U, __m128i __A)
+{
+ return (__m128d) __builtin_ia32_cvtudq2pd128_mask ((__v4si) __A,
+ (__v2df)
+ _mm_setzero_pd (),
+ (__mmask8) __U);
+}
+
+extern __inline __m256
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_mask_cvtepi32_ps (__m256 __W, __mmask8 __U, __m256i __A)
+{
+ return (__m256) __builtin_ia32_cvtdq2ps256_mask ((__v8si) __A,
+ (__v8sf) __W,
+ (__mmask8) __U);
+}
+
+extern __inline __m256
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_maskz_cvtepi32_ps (__mmask16 __U, __m256i __A)
+{
+ return (__m256) __builtin_ia32_cvtdq2ps256_mask ((__v8si) __A,
+ (__v8sf)
+ _mm256_setzero_ps (),
+ (__mmask8) __U);
+}
+
+extern __inline __m128
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_mask_cvtepi32_ps (__m128 __W, __mmask8 __U, __m128i __A)
+{
+ return (__m128) __builtin_ia32_cvtdq2ps128_mask ((__v4si) __A,
+ (__v4sf) __W,
+ (__mmask8) __U);
+}
+
+extern __inline __m128
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_maskz_cvtepi32_ps (__mmask16 __U, __m128i __A)
+{
+ return (__m128) __builtin_ia32_cvtdq2ps128_mask ((__v4si) __A,
+ (__v4sf)
+ _mm_setzero_ps (),
+ (__mmask8) __U);
+}
+
+extern __inline __m256
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_cvtepu32_ps (__m256i __A)
+{
+ return (__m256) __builtin_ia32_cvtudq2ps256_mask ((__v8si) __A,
+ (__v8sf)
+ _mm256_setzero_ps (),
+ (__mmask8) -1);
+}
+
+extern __inline __m256
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_mask_cvtepu32_ps (__m256 __W, __mmask8 __U, __m256i __A)
+{
+ return (__m256) __builtin_ia32_cvtudq2ps256_mask ((__v8si) __A,
+ (__v8sf) __W,
+ (__mmask8) __U);
+}
+
+extern __inline __m256
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_maskz_cvtepu32_ps (__mmask8 __U, __m256i __A)
+{
+ return (__m256) __builtin_ia32_cvtudq2ps256_mask ((__v8si) __A,
+ (__v8sf)
+ _mm256_setzero_ps (),
+ (__mmask8) __U);
+}
+
+extern __inline __m128
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_cvtepu32_ps (__m128i __A)
+{
+ return (__m128) __builtin_ia32_cvtudq2ps128_mask ((__v4si) __A,
+ (__v4sf)
+ _mm_setzero_ps (),
+ (__mmask8) -1);
+}
+
+extern __inline __m128
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_mask_cvtepu32_ps (__m128 __W, __mmask8 __U, __m128i __A)
+{
+ return (__m128) __builtin_ia32_cvtudq2ps128_mask ((__v4si) __A,
+ (__v4sf) __W,
+ (__mmask8) __U);
+}
+
+extern __inline __m128
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_maskz_cvtepu32_ps (__mmask8 __U, __m128i __A)
+{
+ return (__m128) __builtin_ia32_cvtudq2ps128_mask ((__v4si) __A,
+ (__v4sf)
+ _mm_setzero_ps (),
+ (__mmask8) __U);
+}
+
+extern __inline __m256d
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_mask_cvtps_pd (__m256d __W, __mmask8 __U, __m128 __A)
+{
+ return (__m256d) __builtin_ia32_cvtps2pd256_mask ((__v4sf) __A,
+ (__v4df) __W,
+ (__mmask8) __U);
+}
+
+extern __inline __m256d
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_maskz_cvtps_pd (__mmask8 __U, __m128 __A)
+{
+ return (__m256d) __builtin_ia32_cvtps2pd256_mask ((__v4sf) __A,
+ (__v4df)
+ _mm256_setzero_pd (),
+ (__mmask8) __U);
+}
+
+extern __inline __m128d
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_mask_cvtps_pd (__m128d __W, __mmask8 __U, __m128 __A)
+{
+ return (__m128d) __builtin_ia32_cvtps2pd128_mask ((__v4sf) __A,
+ (__v2df) __W,
+ (__mmask8) __U);
+}
+
+extern __inline __m128d
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_maskz_cvtps_pd (__mmask8 __U, __m128 __A)
+{
+ return (__m128d) __builtin_ia32_cvtps2pd128_mask ((__v4sf) __A,
+ (__v2df)
+ _mm_setzero_pd (),
+ (__mmask8) __U);
+}
+
+extern __inline __m128i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_cvtepi32_epi8 (__m128i __A)
+{
+ __v16qi __O;
+ return (__m128i) __builtin_ia32_pmovdb128_mask ((__v4si) __A, __O,
+ (__mmask8) -1);
+}
+
+extern __inline void
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_mask_cvtepi32_storeu_epi8 (void * __P, __mmask8 __M, __m128i __A)
+{
+ __builtin_ia32_pmovdb128mem_mask ((__v16qi *) __P, (__v4si) __A, __M);
+}
+
+extern __inline __m128i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_mask_cvtepi32_epi8 (__m128i __O, __mmask8 __M, __m128i __A)
+{
+ return (__m128i) __builtin_ia32_pmovdb128_mask ((__v4si) __A,
+ (__v16qi) __O, __M);
+}
+
+extern __inline __m128i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_maskz_cvtepi32_epi8 (__mmask8 __M, __m128i __A)
+{
+ return (__m128i) __builtin_ia32_pmovdb128_mask ((__v4si) __A,
+ (__v16qi)
+ _mm_setzero_si128 (),
+ __M);
+}
+
+extern __inline __m128i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_cvtepi32_epi8 (__m256i __A)
+{
+ __v16qi __O;
+ return (__m128i) __builtin_ia32_pmovdb256_mask ((__v8si) __A, __O,
+ (__mmask8) -1);
+}
+
+extern __inline __m128i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_mask_cvtepi32_epi8 (__m128i __O, __mmask8 __M, __m256i __A)
+{
+ return (__m128i) __builtin_ia32_pmovdb256_mask ((__v8si) __A,
+ (__v16qi) __O, __M);
+}
+
+extern __inline void
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_mask_cvtepi32_storeu_epi8 (void * __P, __mmask8 __M, __m256i __A)
+{
+ __builtin_ia32_pmovdb256mem_mask ((__v16qi *) __P, (__v8si) __A, __M);
+}
+
+extern __inline __m128i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_maskz_cvtepi32_epi8 (__mmask8 __M, __m256i __A)
+{
+ return (__m128i) __builtin_ia32_pmovdb256_mask ((__v8si) __A,
+ (__v16qi)
+ _mm_setzero_si128 (),
+ __M);
+}
+
+extern __inline __m128i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_cvtsepi32_epi8 (__m128i __A)
+{
+ __v16qi __O;
+ return (__m128i) __builtin_ia32_pmovsdb128_mask ((__v4si) __A, __O,
+ (__mmask8) -1);
+}
+
+extern __inline void
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_mask_cvtsepi32_storeu_epi8 (void * __P, __mmask8 __M, __m128i __A)
+{
+ __builtin_ia32_pmovsdb128mem_mask ((__v16qi *) __P, (__v4si) __A, __M);
+}
+
+extern __inline __m128i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_mask_cvtsepi32_epi8 (__m128i __O, __mmask8 __M, __m128i __A)
+{
+ return (__m128i) __builtin_ia32_pmovsdb128_mask ((__v4si) __A,
+ (__v16qi) __O, __M);
+}
+
+extern __inline __m128i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_maskz_cvtsepi32_epi8 (__mmask8 __M, __m128i __A)
+{
+ return (__m128i) __builtin_ia32_pmovsdb128_mask ((__v4si) __A,
+ (__v16qi)
+ _mm_setzero_si128 (),
+ __M);
+}
+
+extern __inline __m128i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_cvtsepi32_epi8 (__m256i __A)
+{
+ __v16qi __O;
+ return (__m128i) __builtin_ia32_pmovsdb256_mask ((__v8si) __A, __O,
+ (__mmask8) -1);
+}
+
+extern __inline void
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_mask_cvtsepi32_storeu_epi8 (void * __P, __mmask8 __M, __m256i __A)
+{
+ __builtin_ia32_pmovsdb256mem_mask ((__v16qi *) __P, (__v8si) __A, __M);
+}
+
+extern __inline __m128i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_mask_cvtsepi32_epi8 (__m128i __O, __mmask8 __M, __m256i __A)
+{
+ return (__m128i) __builtin_ia32_pmovsdb256_mask ((__v8si) __A,
+ (__v16qi) __O, __M);
+}
+
+extern __inline __m128i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_maskz_cvtsepi32_epi8 (__mmask8 __M, __m256i __A)
+{
+ return (__m128i) __builtin_ia32_pmovsdb256_mask ((__v8si) __A,
+ (__v16qi)
+ _mm_setzero_si128 (),
+ __M);
+}
+
+extern __inline __m128i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_cvtusepi32_epi8 (__m128i __A)
+{
+ __v16qi __O;
+ return (__m128i) __builtin_ia32_pmovusdb128_mask ((__v4si) __A, __O,
+ (__mmask8) -1);
+}
+
+extern __inline void
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_mask_cvtusepi32_storeu_epi8 (void * __P, __mmask8 __M, __m128i __A)
+{
+ __builtin_ia32_pmovusdb128mem_mask ((__v16qi *) __P, (__v4si) __A, __M);
+}
+
+extern __inline __m128i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_mask_cvtusepi32_epi8 (__m128i __O, __mmask8 __M, __m128i __A)
+{
+ return (__m128i) __builtin_ia32_pmovusdb128_mask ((__v4si) __A,
+ (__v16qi) __O,
+ __M);
+}
+
+extern __inline __m128i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_maskz_cvtusepi32_epi8 (__mmask8 __M, __m128i __A)
+{
+ return (__m128i) __builtin_ia32_pmovusdb128_mask ((__v4si) __A,
+ (__v16qi)
+ _mm_setzero_si128 (),
+ __M);
+}
+
+extern __inline __m128i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_cvtusepi32_epi8 (__m256i __A)
+{
+ __v16qi __O;
+ return (__m128i) __builtin_ia32_pmovusdb256_mask ((__v8si) __A, __O,
+ (__mmask8) -1);
+}
+
+extern __inline void
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_mask_cvtusepi32_storeu_epi8 (void * __P, __mmask8 __M, __m256i __A)
+{
+ __builtin_ia32_pmovusdb256mem_mask ((__v16qi*) __P, (__v8si) __A, __M);
+}
+
+extern __inline __m128i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_mask_cvtusepi32_epi8 (__m128i __O, __mmask8 __M, __m256i __A)
+{
+ return (__m128i) __builtin_ia32_pmovusdb256_mask ((__v8si) __A,
+ (__v16qi) __O,
+ __M);
+}
+
+extern __inline __m128i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_maskz_cvtusepi32_epi8 (__mmask8 __M, __m256i __A)
+{
+ return (__m128i) __builtin_ia32_pmovusdb256_mask ((__v8si) __A,
+ (__v16qi)
+ _mm_setzero_si128 (),
+ __M);
+}
+
+extern __inline __m128i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_cvtepi32_epi16 (__m128i __A)
+{
+ __v8hi __O;
+ return (__m128i) __builtin_ia32_pmovdw128_mask ((__v4si) __A, __O,
+ (__mmask8) -1);
+}
+
+extern __inline void
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_mask_cvtepi32_storeu_epi16 (void * __P, __mmask8 __M, __m128i __A)
+{
+ __builtin_ia32_pmovdw128mem_mask ((__v8hi *) __P, (__v4si) __A, __M);
+}
+
+extern __inline __m128i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_mask_cvtepi32_epi16 (__m128i __O, __mmask8 __M, __m128i __A)
+{
+ return (__m128i) __builtin_ia32_pmovdw128_mask ((__v4si) __A,
+ (__v8hi) __O, __M);
+}
+
+extern __inline __m128i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_maskz_cvtepi32_epi16 (__mmask8 __M, __m128i __A)
+{
+ return (__m128i) __builtin_ia32_pmovdw128_mask ((__v4si) __A,
+ (__v8hi)
+ _mm_setzero_si128 (),
+ __M);
+}
+
+extern __inline __m128i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_cvtepi32_epi16 (__m256i __A)
+{
+ __v8hi __O;
+ return (__m128i) __builtin_ia32_pmovdw256_mask ((__v8si) __A, __O,
+ (__mmask8) -1);
+}
+
+extern __inline
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_mask_cvtepi32_storeu_epi16 (void * __P, __mmask8 __M, __m256i __A)
+{
+ __builtin_ia32_pmovdw256mem_mask ((__v8hi *) __P, (__v8si) __A, __M);
+}
+
+extern __inline __m128i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_mask_cvtepi32_epi16 (__m128i __O, __mmask8 __M, __m256i __A)
+{
+ return (__m128i) __builtin_ia32_pmovdw256_mask ((__v8si) __A,
+ (__v8hi) __O, __M);
+}
+
+extern __inline __m128i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_maskz_cvtepi32_epi16 (__mmask8 __M, __m256i __A)
+{
+ return (__m128i) __builtin_ia32_pmovdw256_mask ((__v8si) __A,
+ (__v8hi)
+ _mm_setzero_si128 (),
+ __M);
+}
+
+extern __inline __m128i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_cvtsepi32_epi16 (__m128i __A)
+{
+ __v8hi __O;
+ return (__m128i) __builtin_ia32_pmovsdw128_mask ((__v4si) __A, __O,
+ (__mmask8) -1);
+}
+
+extern __inline void
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_mask_cvtsepi32_storeu_epi16 (void * __P, __mmask8 __M, __m128i __A)
+{
+ __builtin_ia32_pmovsdw128mem_mask ((__v8hi *) __P, (__v4si) __A, __M);
+}
+
+extern __inline __m128i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_mask_cvtsepi32_epi16 (__m128i __O, __mmask8 __M, __m128i __A)
+{
+ return (__m128i) __builtin_ia32_pmovsdw128_mask ((__v4si) __A,
+ (__v8hi) __O, __M);
+}
+
+extern __inline __m128i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_maskz_cvtsepi32_epi16 (__mmask8 __M, __m128i __A)
+{
+ return (__m128i) __builtin_ia32_pmovsdw128_mask ((__v4si) __A,
+ (__v8hi)
+ _mm_setzero_si128 (),
+ __M);
+}
+
+extern __inline __m128i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_cvtsepi32_epi16 (__m256i __A)
+{
+ __v8hi __O;
+ return (__m128i) __builtin_ia32_pmovsdw256_mask ((__v8si) __A, __O,
+ (__mmask8) -1);
+}
+
+extern __inline void
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_mask_cvtsepi32_storeu_epi16 (void * __P, __mmask8 __M, __m256i __A)
+{
+ __builtin_ia32_pmovsdw256mem_mask ((__v8hi *) __P, (__v8si) __A, __M);
+}
+
+extern __inline __m128i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_mask_cvtsepi32_epi16 (__m128i __O, __mmask8 __M, __m256i __A)
+{
+ return (__m128i) __builtin_ia32_pmovsdw256_mask ((__v8si) __A,
+ (__v8hi) __O, __M);
+}
+
+extern __inline __m128i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_maskz_cvtsepi32_epi16 (__mmask8 __M, __m256i __A)
+{
+ return (__m128i) __builtin_ia32_pmovsdw256_mask ((__v8si) __A,
+ (__v8hi)
+ _mm_setzero_si128 (),
+ __M);
+}
+
+extern __inline __m128i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_cvtusepi32_epi16 (__m128i __A)
+{
+ __v8hi __O;
+ return (__m128i) __builtin_ia32_pmovusdw128_mask ((__v4si) __A, __O,
+ (__mmask8) -1);
+}
+
+extern __inline
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_mask_cvtusepi32_storeu_epi16 (void * __P, __mmask8 __M, __m128i __A)
+{
+ __builtin_ia32_pmovusdw128mem_mask ((__v8hi *) __P, (__v4si) __A, __M);
+}
+
+extern __inline __m128i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_mask_cvtusepi32_epi16 (__m128i __O, __mmask8 __M, __m128i __A)
+{
+ return (__m128i) __builtin_ia32_pmovusdw128_mask ((__v4si) __A,
+ (__v8hi) __O, __M);
+}
+
+extern __inline __m128i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_maskz_cvtusepi32_epi16 (__mmask8 __M, __m128i __A)
+{
+ return (__m128i) __builtin_ia32_pmovusdw128_mask ((__v4si) __A,
+ (__v8hi)
+ _mm_setzero_si128 (),
+ __M);
+}
+
+extern __inline __m128i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_cvtusepi32_epi16 (__m256i __A)
+{
+ __v8hi __O;
+ return (__m128i) __builtin_ia32_pmovusdw256_mask ((__v8si) __A, __O,
+ (__mmask8) -1);
+}
+
+extern __inline void
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_mask_cvtusepi32_storeu_epi16 (void * __P, __mmask8 __M, __m256i __A)
+{
+ __builtin_ia32_pmovusdw256mem_mask ((__v8hi *) __P, (__v8si) __A, __M);
+}
+
+extern __inline __m128i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_mask_cvtusepi32_epi16 (__m128i __O, __mmask8 __M, __m256i __A)
+{
+ return (__m128i) __builtin_ia32_pmovusdw256_mask ((__v8si) __A,
+ (__v8hi) __O, __M);
+}
+
+extern __inline __m128i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_maskz_cvtusepi32_epi16 (__mmask8 __M, __m256i __A)
+{
+ return (__m128i) __builtin_ia32_pmovusdw256_mask ((__v8si) __A,
+ (__v8hi)
+ _mm_setzero_si128 (),
+ __M);
+}
+
+extern __inline __m128i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_cvtepi64_epi8 (__m128i __A)
+{
+ __v16qi __O;
+ return (__m128i) __builtin_ia32_pmovqb128_mask ((__v2di) __A, __O,
+ (__mmask8) -1);
+}
+
+extern __inline void
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_mask_cvtepi64_storeu_epi8 (void * __P, __mmask8 __M, __m128i __A)
+{
+ __builtin_ia32_pmovqb128mem_mask ((__v16qi *) __P, (__v2di) __A, __M);
+}
+
+extern __inline __m128i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_mask_cvtepi64_epi8 (__m128i __O, __mmask8 __M, __m128i __A)
+{
+ return (__m128i) __builtin_ia32_pmovqb128_mask ((__v2di) __A,
+ (__v16qi) __O, __M);
+}
+
+extern __inline __m128i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_maskz_cvtepi64_epi8 (__mmask8 __M, __m128i __A)
+{
+ return (__m128i) __builtin_ia32_pmovqb128_mask ((__v2di) __A,
+ (__v16qi)
+ _mm_setzero_si128 (),
+ __M);
+}
+
+extern __inline __m128i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_cvtepi64_epi8 (__m256i __A)
+{
+ __v16qi __O;
+ return (__m128i) __builtin_ia32_pmovqb256_mask ((__v4di) __A, __O,
+ (__mmask8) -1);
+}
+
+extern __inline void
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_mask_cvtepi64_storeu_epi8 (void * __P, __mmask8 __M, __m256i __A)
+{
+ __builtin_ia32_pmovqb256mem_mask ((__v16qi *) __P, (__v4di) __A, __M);
+}
+
+extern __inline __m128i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_mask_cvtepi64_epi8 (__m128i __O, __mmask8 __M, __m256i __A)
+{
+ return (__m128i) __builtin_ia32_pmovqb256_mask ((__v4di) __A,
+ (__v16qi) __O, __M);
+}
+
+extern __inline __m128i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_maskz_cvtepi64_epi8 (__mmask8 __M, __m256i __A)
+{
+ return (__m128i) __builtin_ia32_pmovqb256_mask ((__v4di) __A,
+ (__v16qi)
+ _mm_setzero_si128 (),
+ __M);
+}
+
+extern __inline __m128i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_cvtsepi64_epi8 (__m128i __A)
+{
+ __v16qi __O;
+ return (__m128i) __builtin_ia32_pmovsqb128_mask ((__v2di) __A, __O,
+ (__mmask8) -1);
+}
+
+extern __inline void
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_mask_cvtsepi64_storeu_epi8 (void * __P, __mmask8 __M, __m128i __A)
+{
+ __builtin_ia32_pmovsqb128mem_mask ((__v16qi *) __P, (__v2di) __A, __M);
+}
+
+extern __inline __m128i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_mask_cvtsepi64_epi8 (__m128i __O, __mmask8 __M, __m128i __A)
+{
+ return (__m128i) __builtin_ia32_pmovsqb128_mask ((__v2di) __A,
+ (__v16qi) __O, __M);
+}
+
+extern __inline __m128i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_maskz_cvtsepi64_epi8 (__mmask8 __M, __m128i __A)
+{
+ return (__m128i) __builtin_ia32_pmovsqb128_mask ((__v2di) __A,
+ (__v16qi)
+ _mm_setzero_si128 (),
+ __M);
+}
+
+extern __inline __m128i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_cvtsepi64_epi8 (__m256i __A)
+{
+ __v16qi __O;
+ return (__m128i) __builtin_ia32_pmovsqb256_mask ((__v4di) __A, __O,
+ (__mmask8) -1);
+}
+
+extern __inline void
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_mask_cvtsepi64_storeu_epi8 (void * __P, __mmask8 __M, __m256i __A)
+{
+ __builtin_ia32_pmovsqb256mem_mask ((__v16qi *) __P, (__v4di) __A, __M);
+}
+
+extern __inline __m128i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_mask_cvtsepi64_epi8 (__m128i __O, __mmask8 __M, __m256i __A)
+{
+ return (__m128i) __builtin_ia32_pmovsqb256_mask ((__v4di) __A,
+ (__v16qi) __O, __M);
+}
+
+extern __inline __m128i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_maskz_cvtsepi64_epi8 (__mmask8 __M, __m256i __A)
+{
+ return (__m128i) __builtin_ia32_pmovsqb256_mask ((__v4di) __A,
+ (__v16qi)
+ _mm_setzero_si128 (),
+ __M);
+}
+
+extern __inline __m128i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_cvtusepi64_epi8 (__m128i __A)
+{
+ __v16qi __O;
+ return (__m128i) __builtin_ia32_pmovusqb128_mask ((__v2di) __A, __O,
+ (__mmask8) -1);
+}
+
+extern __inline void
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_mask_cvtusepi64_storeu_epi8 (void * __P, __mmask8 __M, __m128i __A)
+{
+ __builtin_ia32_pmovusqb128mem_mask ((__v16qi *) __P, (__v2di) __A, __M);
+}
+
+extern __inline __m128i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_mask_cvtusepi64_epi8 (__m128i __O, __mmask8 __M, __m128i __A)
+{
+ return (__m128i) __builtin_ia32_pmovusqb128_mask ((__v2di) __A,
+ (__v16qi) __O,
+ __M);
+}
+
+extern __inline __m128i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_maskz_cvtusepi64_epi8 (__mmask8 __M, __m128i __A)
+{
+ return (__m128i) __builtin_ia32_pmovusqb128_mask ((__v2di) __A,
+ (__v16qi)
+ _mm_setzero_si128 (),
+ __M);
+}
+
+extern __inline __m128i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_cvtusepi64_epi8 (__m256i __A)
+{
+ __v16qi __O;
+ return (__m128i) __builtin_ia32_pmovusqb256_mask ((__v4di) __A, __O,
+ (__mmask8) -1);
+}
+
+extern __inline void
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_mask_cvtusepi64_storeu_epi8 (void * __P, __mmask8 __M, __m256i __A)
+{
+ __builtin_ia32_pmovusqb256mem_mask ((__v16qi *) __P, (__v4di) __A, __M);
+}
+
+extern __inline __m128i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_mask_cvtusepi64_epi8 (__m128i __O, __mmask8 __M, __m256i __A)
+{
+ return (__m128i) __builtin_ia32_pmovusqb256_mask ((__v4di) __A,
+ (__v16qi) __O,
+ __M);
+}
+
+extern __inline __m128i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_maskz_cvtusepi64_epi8 (__mmask8 __M, __m256i __A)
+{
+ return (__m128i) __builtin_ia32_pmovusqb256_mask ((__v4di) __A,
+ (__v16qi)
+ _mm_setzero_si128 (),
+ __M);
+}
+
+extern __inline __m128i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_cvtepi64_epi16 (__m128i __A)
+{
+ __v8hi __O;
+ return (__m128i) __builtin_ia32_pmovqw128_mask ((__v2di) __A, __O,
+ (__mmask8) -1);
+}
+
+extern __inline void
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_mask_cvtepi64_storeu_epi16 (void * __P, __mmask8 __M, __m128i __A)
+{
+ __builtin_ia32_pmovqw128mem_mask ((__v8hi *) __P, (__v2di) __A, __M);
+}
+
+extern __inline __m128i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_mask_cvtepi64_epi16 (__m128i __O, __mmask8 __M, __m128i __A)
+{
+ return (__m128i) __builtin_ia32_pmovqw128_mask ((__v2di) __A,
+ (__v8hi) __O, __M);
+}
+
+extern __inline __m128i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_maskz_cvtepi64_epi16 (__mmask8 __M, __m128i __A)
+{
+ return (__m128i) __builtin_ia32_pmovqw128_mask ((__v2di) __A,
+ (__v8hi)
+ _mm_setzero_si128 (),
+ __M);
+}
+
+extern __inline __m128i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_cvtepi64_epi16 (__m256i __A)
+{
+ __v8hi __O;
+ return (__m128i) __builtin_ia32_pmovqw256_mask ((__v4di) __A, __O,
+ (__mmask8) -1);
+}
+
+extern __inline void
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_mask_cvtepi64_storeu_epi16 (void * __P, __mmask8 __M, __m256i __A)
+{
+ __builtin_ia32_pmovqw256mem_mask ((__v8hi *) __P, (__v4di) __A, __M);
+}
+
+extern __inline __m128i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_mask_cvtepi64_epi16 (__m128i __O, __mmask8 __M, __m256i __A)
+{
+ return (__m128i) __builtin_ia32_pmovqw256_mask ((__v4di) __A,
+ (__v8hi) __O, __M);
+}
+
+extern __inline __m128i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_maskz_cvtepi64_epi16 (__mmask8 __M, __m256i __A)
+{
+ return (__m128i) __builtin_ia32_pmovqw256_mask ((__v4di) __A,
+ (__v8hi)
+ _mm_setzero_si128 (),
+ __M);
+}
+
+extern __inline __m128i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_cvtsepi64_epi16 (__m128i __A)
+{
+ __v8hi __O;
+ return (__m128i) __builtin_ia32_pmovsqw128_mask ((__v2di) __A, __O,
+ (__mmask8) -1);
+}
+
+extern __inline void
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_mask_cvtsepi64_storeu_epi16 (void * __P, __mmask8 __M, __m128i __A)
+{
+ __builtin_ia32_pmovsqw128mem_mask ((__v8hi *) __P, (__v2di) __A, __M);
+}
+
+extern __inline __m128i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_mask_cvtsepi64_epi16 (__m128i __O, __mmask8 __M, __m128i __A)
+{
+ return (__m128i) __builtin_ia32_pmovsqw128_mask ((__v2di) __A,
+ (__v8hi) __O, __M);
+}
+
+extern __inline __m128i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_maskz_cvtsepi64_epi16 (__mmask8 __M, __m128i __A)
+{
+ return (__m128i) __builtin_ia32_pmovsqw128_mask ((__v2di) __A,
+ (__v8hi)
+ _mm_setzero_si128 (),
+ __M);
+}
+
+extern __inline __m128i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_cvtsepi64_epi16 (__m256i __A)
+{
+ __v8hi __O;
+ return (__m128i) __builtin_ia32_pmovsqw256_mask ((__v4di) __A, __O,
+ (__mmask8) -1);
+}
+
+extern __inline void
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_mask_cvtsepi64_storeu_epi16 (void * __P, __mmask8 __M, __m256i __A)
+{
+ __builtin_ia32_pmovsqw256mem_mask ((__v8hi *) __P, (__v4di) __A, __M);
+}
+
+extern __inline __m128i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_mask_cvtsepi64_epi16 (__m128i __O, __mmask8 __M, __m256i __A)
+{
+ return (__m128i) __builtin_ia32_pmovsqw256_mask ((__v4di) __A,
+ (__v8hi) __O, __M);
+}
+
+extern __inline __m128i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_maskz_cvtsepi64_epi16 (__mmask8 __M, __m256i __A)
+{
+ return (__m128i) __builtin_ia32_pmovsqw256_mask ((__v4di) __A,
+ (__v8hi)
+ _mm_setzero_si128 (),
+ __M);
+}
+
+extern __inline __m128i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_cvtusepi64_epi16 (__m128i __A)
+{
+ __v8hi __O;
+ return (__m128i) __builtin_ia32_pmovusqw128_mask ((__v2di) __A, __O,
+ (__mmask8) -1);
+}
+
+extern __inline void
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_mask_cvtusepi64_storeu_epi16 (void * __P, __mmask8 __M, __m128i __A)
+{
+ __builtin_ia32_pmovusqw128mem_mask ((__v8hi *) __P, (__v2di) __A, __M);
+}
+
+extern __inline __m128i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_mask_cvtusepi64_epi16 (__m128i __O, __mmask8 __M, __m128i __A)
+{
+ return (__m128i) __builtin_ia32_pmovusqw128_mask ((__v2di) __A,
+ (__v8hi) __O, __M);
+}
+
+extern __inline __m128i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_maskz_cvtusepi64_epi16 (__mmask8 __M, __m128i __A)
+{
+ return (__m128i) __builtin_ia32_pmovusqw128_mask ((__v2di) __A,
+ (__v8hi)
+ _mm_setzero_si128 (),
+ __M);
+}
+
+extern __inline __m128i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_cvtusepi64_epi16 (__m256i __A)
+{
+ __v8hi __O;
+ return (__m128i) __builtin_ia32_pmovusqw256_mask ((__v4di) __A, __O,
+ (__mmask8) -1);
+}
+
+extern __inline void
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_mask_cvtusepi64_storeu_epi16 (void * __P, __mmask8 __M, __m256i __A)
+{
+ __builtin_ia32_pmovusqw256mem_mask ((__v8hi *) __P, (__v4di) __A, __M);
+}
+
+extern __inline __m128i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_mask_cvtusepi64_epi16 (__m128i __O, __mmask8 __M, __m256i __A)
+{
+ return (__m128i) __builtin_ia32_pmovusqw256_mask ((__v4di) __A,
+ (__v8hi) __O, __M);
+}
+
+extern __inline __m128i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_maskz_cvtusepi64_epi16 (__mmask8 __M, __m256i __A)
+{
+ return (__m128i) __builtin_ia32_pmovusqw256_mask ((__v4di) __A,
+ (__v8hi)
+ _mm_setzero_si128 (),
+ __M);
+}
+
+extern __inline __m128i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_cvtepi64_epi32 (__m128i __A)
+{
+ __v4si __O;
+ return (__m128i) __builtin_ia32_pmovqd128_mask ((__v2di) __A, __O,
+ (__mmask8) -1);
+}
+
+extern __inline void
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_mask_cvtepi64_storeu_epi32 (void * __P, __mmask8 __M, __m128i __A)
+{
+ __builtin_ia32_pmovqd128mem_mask ((__v4si *) __P, (__v2di) __A, __M);
+}
+
+extern __inline __m128i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_mask_cvtepi64_epi32 (__m128i __O, __mmask8 __M, __m128i __A)
+{
+ return (__m128i) __builtin_ia32_pmovqd128_mask ((__v2di) __A,
+ (__v4si) __O, __M);
+}
+
+extern __inline __m128i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_maskz_cvtepi64_epi32 (__mmask8 __M, __m128i __A)
+{
+ return (__m128i) __builtin_ia32_pmovqd128_mask ((__v2di) __A,
+ (__v4si)
+ _mm_setzero_si128 (),
+ __M);
+}
+
+extern __inline __m128i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_cvtepi64_epi32 (__m256i __A)
+{
+ __v4si __O;
+ return (__m128i) __builtin_ia32_pmovqd256_mask ((__v4di) __A, __O,
+ (__mmask8) -1);
+}
+
+extern __inline void
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_mask_cvtepi64_storeu_epi32 (void * __P, __mmask8 __M, __m256i __A)
+{
+ __builtin_ia32_pmovqd256mem_mask ((__v4si *) __P, (__v4di) __A, __M);
+}
+
+extern __inline __m128i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_mask_cvtepi64_epi32 (__m128i __O, __mmask8 __M, __m256i __A)
+{
+ return (__m128i) __builtin_ia32_pmovqd256_mask ((__v4di) __A,
+ (__v4si) __O, __M);
+}
+
+extern __inline __m128i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_maskz_cvtepi64_epi32 (__mmask8 __M, __m256i __A)
+{
+ return (__m128i) __builtin_ia32_pmovqd256_mask ((__v4di) __A,
+ (__v4si)
+ _mm_setzero_si128 (),
+ __M);
+}
+
+extern __inline __m128i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_cvtsepi64_epi32 (__m128i __A)
+{
+ __v4si __O;
+ return (__m128i) __builtin_ia32_pmovsqd128_mask ((__v2di) __A, __O,
+ (__mmask8) -1);
+}
+
+extern __inline
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_mask_cvtsepi64_storeu_epi32 (void * __P, __mmask8 __M, __m128i __A)
+{
+ __builtin_ia32_pmovsqd128mem_mask ((__v4si *) __P, (__v2di) __A, __M);
+}
+
+extern __inline __m128i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_mask_cvtsepi64_epi32 (__m128i __O, __mmask8 __M, __m128i __A)
+{
+ return (__m128i) __builtin_ia32_pmovsqd128_mask ((__v2di) __A,
+ (__v4si) __O, __M);
+}
+
+extern __inline __m128i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_maskz_cvtsepi64_epi32 (__mmask8 __M, __m128i __A)
+{
+ return (__m128i) __builtin_ia32_pmovsqd128_mask ((__v2di) __A,
+ (__v4si)
+ _mm_setzero_si128 (),
+ __M);
+}
+
+extern __inline __m128i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_cvtsepi64_epi32 (__m256i __A)
+{
+ __v4si __O;
+ return (__m128i) __builtin_ia32_pmovsqd256_mask ((__v4di) __A, __O,
+ (__mmask8) -1);
+}
+
+extern __inline void
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_mask_cvtsepi64_storeu_epi32 (void * __P, __mmask8 __M, __m256i __A)
+{
+ __builtin_ia32_pmovsqd256mem_mask ((__v4si *) __P, (__v4di) __A, __M);
+}
+
+extern __inline __m128i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_mask_cvtsepi64_epi32 (__m128i __O, __mmask8 __M, __m256i __A)
+{
+ return (__m128i) __builtin_ia32_pmovsqd256_mask ((__v4di) __A,
+ (__v4si) __O, __M);
+}
+
+extern __inline __m128i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_maskz_cvtsepi64_epi32 (__mmask8 __M, __m256i __A)
+{
+ return (__m128i) __builtin_ia32_pmovsqd256_mask ((__v4di) __A,
+ (__v4si)
+ _mm_setzero_si128 (),
+ __M);
+}
+
+extern __inline __m128i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_cvtusepi64_epi32 (__m128i __A)
+{
+ __v4si __O;
+ return (__m128i) __builtin_ia32_pmovusqd128_mask ((__v2di) __A, __O,
+ (__mmask8) -1);
+}
+
+extern __inline void
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_mask_cvtusepi64_storeu_epi32 (void * __P, __mmask8 __M, __m128i __A)
+{
+ __builtin_ia32_pmovusqd128mem_mask ((__v4si *) __P, (__v2di) __A, __M);
+}
+
+extern __inline __m128i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_mask_cvtusepi64_epi32 (__m128i __O, __mmask8 __M, __m128i __A)
+{
+ return (__m128i) __builtin_ia32_pmovusqd128_mask ((__v2di) __A,
+ (__v4si) __O, __M);
+}
+
+extern __inline __m128i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_maskz_cvtusepi64_epi32 (__mmask8 __M, __m128i __A)
+{
+ return (__m128i) __builtin_ia32_pmovusqd128_mask ((__v2di) __A,
+ (__v4si)
+ _mm_setzero_si128 (),
+ __M);
+}
+
+extern __inline __m128i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_cvtusepi64_epi32 (__m256i __A)
+{
+ __v4si __O;
+ return (__m128i) __builtin_ia32_pmovusqd256_mask ((__v4di) __A, __O,
+ (__mmask8) -1);
+}
+
+extern __inline void
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_mask_cvtusepi64_storeu_epi32 (void * __P, __mmask8 __M, __m256i __A)
+{
+ __builtin_ia32_pmovusqd256mem_mask ((__v4si *) __P, (__v4di) __A, __M);
+}
+
+extern __inline __m128i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_mask_cvtusepi64_epi32 (__m128i __O, __mmask8 __M, __m256i __A)
+{
+ return (__m128i) __builtin_ia32_pmovusqd256_mask ((__v4di) __A,
+ (__v4si) __O, __M);
+}
+
+extern __inline __m128i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_maskz_cvtusepi64_epi32 (__mmask8 __M, __m256i __A)
+{
+ return (__m128i) __builtin_ia32_pmovusqd256_mask ((__v4di) __A,
+ (__v4si)
+ _mm_setzero_si128 (),
+ __M);
+}
+
+extern __inline __m256
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_mask_broadcastss_ps (__m256 __O, __mmask8 __M, __m128 __A)
+{
+ return (__m256) __builtin_ia32_broadcastss256_mask ((__v4sf) __A,
+ (__v8sf) __O,
+ __M);
+}
+
+extern __inline __m256
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_maskz_broadcastss_ps (__mmask8 __M, __m128 __A)
+{
+ return (__m256) __builtin_ia32_broadcastss256_mask ((__v4sf) __A,
+ (__v8sf)
+ _mm256_setzero_ps (),
+ __M);
+}
+
+extern __inline __m128
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_mask_broadcastss_ps (__m128 __O, __mmask8 __M, __m128 __A)
+{
+ return (__m128) __builtin_ia32_broadcastss128_mask ((__v4sf) __A,
+ (__v4sf) __O,
+ __M);
+}
+
+extern __inline __m128
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_maskz_broadcastss_ps (__mmask8 __M, __m128 __A)
+{
+ return (__m128) __builtin_ia32_broadcastss128_mask ((__v4sf) __A,
+ (__v4sf)
+ _mm_setzero_ps (),
+ __M);
+}
+
+extern __inline __m256d
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_mask_broadcastsd_pd (__m256d __O, __mmask8 __M, __m128d __A)
+{
+ return (__m256d) __builtin_ia32_broadcastsd256_mask ((__v2df) __A,
+ (__v4df) __O,
+ __M);
+}
+
+extern __inline __m256d
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_maskz_broadcastsd_pd (__mmask8 __M, __m128d __A)
+{
+ return (__m256d) __builtin_ia32_broadcastsd256_mask ((__v2df) __A,
+ (__v4df)
+ _mm256_setzero_pd (),
+ __M);
+}
+
+extern __inline __m256i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_mask_broadcastd_epi32 (__m256i __O, __mmask8 __M, __m128i __A)
+{
+ return (__m256i) __builtin_ia32_pbroadcastd256_mask ((__v4si) __A,
+ (__v8si) __O,
+ __M);
+}
+
+extern __inline __m256i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_maskz_broadcastd_epi32 (__mmask8 __M, __m128i __A)
+{
+ return (__m256i) __builtin_ia32_pbroadcastd256_mask ((__v4si) __A,
+ (__v8si)
+ _mm256_setzero_si256 (),
+ __M);
+}
+
+extern __inline __m256i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_mask_set1_epi32 (__m256i __O, __mmask8 __M, int __A)
+{
+ return (__m256i) __builtin_ia32_pbroadcastd256_gpr_mask (__A, (__v8si) __O,
+ __M);
+}
+
+extern __inline __m256i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_maskz_set1_epi32 (__mmask8 __M, int __A)
+{
+ return (__m256i) __builtin_ia32_pbroadcastd256_gpr_mask (__A,
+ (__v8si)
+ _mm256_setzero_si256 (),
+ __M);
+}
+
+extern __inline __m128i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_mask_broadcastd_epi32 (__m128i __O, __mmask8 __M, __m128i __A)
+{
+ return (__m128i) __builtin_ia32_pbroadcastd128_mask ((__v4si) __A,
+ (__v4si) __O,
+ __M);
+}
+
+extern __inline __m128i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_maskz_broadcastd_epi32 (__mmask8 __M, __m128i __A)
+{
+ return (__m128i) __builtin_ia32_pbroadcastd128_mask ((__v4si) __A,
+ (__v4si)
+ _mm_setzero_si128 (),
+ __M);
+}
+
+extern __inline __m128i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_mask_set1_epi32 (__m128i __O, __mmask8 __M, int __A)
+{
+ return (__m128i) __builtin_ia32_pbroadcastd128_gpr_mask (__A, (__v4si) __O,
+ __M);
+}
+
+extern __inline __m128i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_maskz_set1_epi32 (__mmask8 __M, int __A)
+{
+ return (__m128i) __builtin_ia32_pbroadcastd128_gpr_mask (__A,
+ (__v4si)
+ _mm_setzero_si128 (),
+ __M);
+}
+
+extern __inline __m256i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_mask_broadcastq_epi64 (__m256i __O, __mmask8 __M, __m128i __A)
+{
+ return (__m256i) __builtin_ia32_pbroadcastq256_mask ((__v2di) __A,
+ (__v4di) __O,
+ __M);
+}
+
+extern __inline __m256i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_maskz_broadcastq_epi64 (__mmask8 __M, __m128i __A)
+{
+ return (__m256i) __builtin_ia32_pbroadcastq256_mask ((__v2di) __A,
+ (__v4di)
+ _mm256_setzero_si256 (),
+ __M);
+}
+
+extern __inline __m256i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_mask_set1_epi64 (__m256i __O, __mmask8 __M, long long __A)
+{
+#ifdef TARGET_64BIT
+ return (__m256i) __builtin_ia32_pbroadcastq256_gpr_mask (__A, (__v4di) __O,
+ __M);
+#else
+ return (__m256i) __builtin_ia32_pbroadcastq256_mem_mask (__A, (__v4di) __O,
+ __M);
+#endif
+}
+
+extern __inline __m256i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_maskz_set1_epi64 (__mmask8 __M, long long __A)
+{
+#ifdef TARGET_64BIT
+ return (__m256i) __builtin_ia32_pbroadcastq256_gpr_mask (__A,
+ (__v4di)
+ _mm256_setzero_si256 (),
+ __M);
+#else
+ return (__m256i) __builtin_ia32_pbroadcastq256_mem_mask (__A,
+ (__v4di)
+ _mm256_setzero_si256 (),
+ __M);
+#endif
+}
+
+extern __inline __m128i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_mask_broadcastq_epi64 (__m128i __O, __mmask8 __M, __m128i __A)
+{
+ return (__m128i) __builtin_ia32_pbroadcastq128_mask ((__v2di) __A,
+ (__v2di) __O,
+ __M);
+}
+
+extern __inline __m128i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_maskz_broadcastq_epi64 (__mmask8 __M, __m128i __A)
+{
+ return (__m128i) __builtin_ia32_pbroadcastq128_mask ((__v2di) __A,
+ (__v2di)
+ _mm_setzero_si128 (),
+ __M);
+}
+
+extern __inline __m128i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_mask_set1_epi64 (__m128i __O, __mmask8 __M, long long __A)
+{
+#ifdef TARGET_64BIT
+ return (__m128i) __builtin_ia32_pbroadcastq128_gpr_mask (__A, (__v2di) __O,
+ __M);
+#else
+ return (__m128i) __builtin_ia32_pbroadcastq128_mem_mask (__A, (__v2di) __O,
+ __M);
+#endif
+}
+
+extern __inline __m128i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_maskz_set1_epi64 (__mmask8 __M, long long __A)
+{
+#ifdef TARGET_64BIT
+ return (__m128i) __builtin_ia32_pbroadcastq128_gpr_mask (__A,
+ (__v2di)
+ _mm_setzero_si128 (),
+ __M);
+#else
+ return (__m128i) __builtin_ia32_pbroadcastq128_mem_mask (__A,
+ (__v2di)
+ _mm_setzero_si128 (),
+ __M);
+#endif
+}
+
+extern __inline __m256
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_broadcast_f32x4 (__m128 __A)
+{
+ __v8sf __O;
+ return (__m256) __builtin_ia32_broadcastf32x4_256_mask ((__v4sf) __A,
+ __O,
+ (__mmask8) -
+ 1);
+}
+
+extern __inline __m256
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_mask_broadcast_f32x4 (__m256 __O, __mmask8 __M, __m128 __A)
+{
+ return (__m256) __builtin_ia32_broadcastf32x4_256_mask ((__v4sf) __A,
+ (__v8sf) __O,
+ __M);
+}
+
+extern __inline __m256
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_maskz_broadcast_f32x4 (__mmask8 __M, __m128 __A)
+{
+ return (__m256) __builtin_ia32_broadcastf32x4_256_mask ((__v4sf) __A,
+ (__v8sf)
+ _mm256_setzero_ps (),
+ __M);
+}
+
+extern __inline __m256i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_broadcast_i32x4 (__m128i __A)
+{
+ __v8si __O;
+ return (__m256i) __builtin_ia32_broadcasti32x4_256_mask ((__v4si)
+ __A, __O,
+ (__mmask8) -
+ 1);
+}
+
+extern __inline __m256i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_mask_broadcast_i32x4 (__m256i __O, __mmask8 __M, __m128i __A)
+{
+ return (__m256i) __builtin_ia32_broadcasti32x4_256_mask ((__v4si)
+ __A,
+ (__v8si)
+ __O, __M);
+}
+
+extern __inline __m256i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_maskz_broadcast_i32x4 (__mmask8 __M, __m128i __A)
+{
+ return (__m256i) __builtin_ia32_broadcasti32x4_256_mask ((__v4si)
+ __A,
+ (__v8si)
+ _mm256_setzero_si256 (),
+ __M);
+}
+
+extern __inline __m256i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_mask_cvtepi8_epi32 (__m256i __W, __mmask8 __U, __m128i __A)
+{
+ return (__m256i) __builtin_ia32_pmovsxbd256_mask ((__v16qi) __A,
+ (__v8si) __W,
+ (__mmask8) __U);
+}
+
+extern __inline __m256i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_maskz_cvtepi8_epi32 (__mmask8 __U, __m128i __A)
+{
+ return (__m256i) __builtin_ia32_pmovsxbd256_mask ((__v16qi) __A,
+ (__v8si)
+ _mm256_setzero_si256 (),
+ (__mmask8) __U);
+}
+
+extern __inline __m128i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_mask_cvtepi8_epi32 (__m128i __W, __mmask8 __U, __m128i __A)
+{
+ return (__m128i) __builtin_ia32_pmovsxbd128_mask ((__v16qi) __A,
+ (__v4si) __W,
+ (__mmask8) __U);
+}
+
+extern __inline __m128i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_maskz_cvtepi8_epi32 (__mmask8 __U, __m128i __A)
+{
+ return (__m128i) __builtin_ia32_pmovsxbd128_mask ((__v16qi) __A,
+ (__v4si)
+ _mm_setzero_si128 (),
+ (__mmask8) __U);
+}
+
+extern __inline __m256i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_mask_cvtepi8_epi64 (__m256i __W, __mmask8 __U, __m128i __A)
+{
+ return (__m256i) __builtin_ia32_pmovsxbq256_mask ((__v16qi) __A,
+ (__v4di) __W,
+ (__mmask8) __U);
+}
+
+extern __inline __m256i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_maskz_cvtepi8_epi64 (__mmask8 __U, __m128i __A)
+{
+ return (__m256i) __builtin_ia32_pmovsxbq256_mask ((__v16qi) __A,
+ (__v4di)
+ _mm256_setzero_si256 (),
+ (__mmask8) __U);
+}
+
+extern __inline __m128i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_mask_cvtepi8_epi64 (__m128i __W, __mmask8 __U, __m128i __A)
+{
+ return (__m128i) __builtin_ia32_pmovsxbq128_mask ((__v16qi) __A,
+ (__v2di) __W,
+ (__mmask8) __U);
+}
+
+extern __inline __m128i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_maskz_cvtepi8_epi64 (__mmask8 __U, __m128i __A)
+{
+ return (__m128i) __builtin_ia32_pmovsxbq128_mask ((__v16qi) __A,
+ (__v2di)
+ _mm_setzero_si128 (),
+ (__mmask8) __U);
+}
+
+extern __inline __m256i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_mask_cvtepi16_epi32 (__m256i __W, __mmask8 __U, __m128i __A)
+{
+ return (__m256i) __builtin_ia32_pmovsxwd256_mask ((__v8hi) __A,
+ (__v8si) __W,
+ (__mmask8) __U);
+}
+
+extern __inline __m256i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_maskz_cvtepi16_epi32 (__mmask8 __U, __m128i __A)
+{
+ return (__m256i) __builtin_ia32_pmovsxwd256_mask ((__v8hi) __A,
+ (__v8si)
+ _mm256_setzero_si256 (),
+ (__mmask8) __U);
+}
+
+extern __inline __m128i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_mask_cvtepi16_epi32 (__m128i __W, __mmask8 __U, __m128i __A)
+{
+ return (__m128i) __builtin_ia32_pmovsxwd128_mask ((__v8hi) __A,
+ (__v4si) __W,
+ (__mmask8) __U);
+}
+
+extern __inline __m128i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_maskz_cvtepi16_epi32 (__mmask8 __U, __m128i __A)
+{
+ return (__m128i) __builtin_ia32_pmovsxwd128_mask ((__v8hi) __A,
+ (__v4si)
+ _mm_setzero_si128 (),
+ (__mmask8) __U);
+}
+
+extern __inline __m256i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_mask_cvtepi16_epi64 (__m256i __W, __mmask8 __U, __m128i __A)
+{
+ return (__m256i) __builtin_ia32_pmovsxwq256_mask ((__v8hi) __A,
+ (__v4di) __W,
+ (__mmask8) __U);
+}
+
+extern __inline __m256i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_maskz_cvtepi16_epi64 (__mmask8 __U, __m128i __A)
+{
+ return (__m256i) __builtin_ia32_pmovsxwq256_mask ((__v8hi) __A,
+ (__v4di)
+ _mm256_setzero_si256 (),
+ (__mmask8) __U);
+}
+
+extern __inline __m128i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_mask_cvtepi16_epi64 (__m128i __W, __mmask8 __U, __m128i __A)
+{
+ return (__m128i) __builtin_ia32_pmovsxwq128_mask ((__v8hi) __A,
+ (__v2di) __W,
+ (__mmask8) __U);
+}
+
+extern __inline __m128i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_maskz_cvtepi16_epi64 (__mmask8 __U, __m128i __A)
+{
+ return (__m128i) __builtin_ia32_pmovsxwq128_mask ((__v8hi) __A,
+ (__v2di)
+ _mm_setzero_si128 (),
+ (__mmask8) __U);
+}
+
+extern __inline __m256i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_mask_cvtepi32_epi64 (__m256i __W, __mmask8 __U, __m128i __X)
+{
+ return (__m256i) __builtin_ia32_pmovsxdq256_mask ((__v4si) __X,
+ (__v4di) __W,
+ (__mmask8) __U);
+}
+
+extern __inline __m256i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_maskz_cvtepi32_epi64 (__mmask8 __U, __m128i __X)
+{
+ return (__m256i) __builtin_ia32_pmovsxdq256_mask ((__v4si) __X,
+ (__v4di)
+ _mm256_setzero_si256 (),
+ (__mmask8) __U);
+}
+
+extern __inline __m128i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_mask_cvtepi32_epi64 (__m128i __W, __mmask8 __U, __m128i __X)
+{
+ return (__m128i) __builtin_ia32_pmovsxdq128_mask ((__v4si) __X,
+ (__v2di) __W,
+ (__mmask8) __U);
+}
+
+extern __inline __m128i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_maskz_cvtepi32_epi64 (__mmask8 __U, __m128i __X)
+{
+ return (__m128i) __builtin_ia32_pmovsxdq128_mask ((__v4si) __X,
+ (__v2di)
+ _mm_setzero_si128 (),
+ (__mmask8) __U);
+}
+
+extern __inline __m256i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_mask_cvtepu8_epi32 (__m256i __W, __mmask8 __U, __m128i __A)
+{
+ return (__m256i) __builtin_ia32_pmovzxbd256_mask ((__v16qi) __A,
+ (__v8si) __W,
+ (__mmask8) __U);
+}
+
+extern __inline __m256i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_maskz_cvtepu8_epi32 (__mmask8 __U, __m128i __A)
+{
+ return (__m256i) __builtin_ia32_pmovzxbd256_mask ((__v16qi) __A,
+ (__v8si)
+ _mm256_setzero_si256 (),
+ (__mmask8) __U);
+}
+
+extern __inline __m128i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_mask_cvtepu8_epi32 (__m128i __W, __mmask8 __U, __m128i __A)
+{
+ return (__m128i) __builtin_ia32_pmovzxbd128_mask ((__v16qi) __A,
+ (__v4si) __W,
+ (__mmask8) __U);
+}
+
+extern __inline __m128i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_maskz_cvtepu8_epi32 (__mmask8 __U, __m128i __A)
+{
+ return (__m128i) __builtin_ia32_pmovzxbd128_mask ((__v16qi) __A,
+ (__v4si)
+ _mm_setzero_si128 (),
+ (__mmask8) __U);
+}
+
+extern __inline __m256i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_mask_cvtepu8_epi64 (__m256i __W, __mmask8 __U, __m128i __A)
+{
+ return (__m256i) __builtin_ia32_pmovzxbq256_mask ((__v16qi) __A,
+ (__v4di) __W,
+ (__mmask8) __U);
+}
+
+extern __inline __m256i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_maskz_cvtepu8_epi64 (__mmask8 __U, __m128i __A)
+{
+ return (__m256i) __builtin_ia32_pmovzxbq256_mask ((__v16qi) __A,
+ (__v4di)
+ _mm256_setzero_si256 (),
+ (__mmask8) __U);
+}
+
+extern __inline __m128i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_mask_cvtepu8_epi64 (__m128i __W, __mmask8 __U, __m128i __A)
+{
+ return (__m128i) __builtin_ia32_pmovzxbq128_mask ((__v16qi) __A,
+ (__v2di) __W,
+ (__mmask8) __U);
+}
+
+extern __inline __m128i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_maskz_cvtepu8_epi64 (__mmask8 __U, __m128i __A)
+{
+ return (__m128i) __builtin_ia32_pmovzxbq128_mask ((__v16qi) __A,
+ (__v2di)
+ _mm_setzero_si128 (),
+ (__mmask8) __U);
+}
+
+extern __inline __m256i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_mask_cvtepu16_epi32 (__m256i __W, __mmask8 __U, __m128i __A)
+{
+ return (__m256i) __builtin_ia32_pmovzxwd256_mask ((__v8hi) __A,
+ (__v8si) __W,
+ (__mmask8) __U);
+}
+
+extern __inline __m256i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_maskz_cvtepu16_epi32 (__mmask8 __U, __m128i __A)
+{
+ return (__m256i) __builtin_ia32_pmovzxwd256_mask ((__v8hi) __A,
+ (__v8si)
+ _mm256_setzero_si256 (),
+ (__mmask8) __U);
+}
+
+extern __inline __m128i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_mask_cvtepu16_epi32 (__m128i __W, __mmask8 __U, __m128i __A)
+{
+ return (__m128i) __builtin_ia32_pmovzxwd128_mask ((__v8hi) __A,
+ (__v4si) __W,
+ (__mmask8) __U);
+}
+
+extern __inline __m128i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_maskz_cvtepu16_epi32 (__mmask8 __U, __m128i __A)
+{
+ return (__m128i) __builtin_ia32_pmovzxwd128_mask ((__v8hi) __A,
+ (__v4si)
+ _mm_setzero_si128 (),
+ (__mmask8) __U);
+}
+
+extern __inline __m256i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_mask_cvtepu16_epi64 (__m256i __W, __mmask8 __U, __m128i __A)
+{
+ return (__m256i) __builtin_ia32_pmovzxwq256_mask ((__v8hi) __A,
+ (__v4di) __W,
+ (__mmask8) __U);
+}
+
+extern __inline __m256i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_maskz_cvtepu16_epi64 (__mmask8 __U, __m128i __A)
+{
+ return (__m256i) __builtin_ia32_pmovzxwq256_mask ((__v8hi) __A,
+ (__v4di)
+ _mm256_setzero_si256 (),
+ (__mmask8) __U);
+}
+
+extern __inline __m128i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_mask_cvtepu16_epi64 (__m128i __W, __mmask8 __U, __m128i __A)
+{
+ return (__m128i) __builtin_ia32_pmovzxwq128_mask ((__v8hi) __A,
+ (__v2di) __W,
+ (__mmask8) __U);
+}
+
+extern __inline __m128i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_maskz_cvtepu16_epi64 (__mmask8 __U, __m128i __A)
+{
+ return (__m128i) __builtin_ia32_pmovzxwq128_mask ((__v8hi) __A,
+ (__v2di)
+ _mm_setzero_si128 (),
+ (__mmask8) __U);
+}
+
+extern __inline __m256i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_mask_cvtepu32_epi64 (__m256i __W, __mmask8 __U, __m128i __X)
+{
+ return (__m256i) __builtin_ia32_pmovzxdq256_mask ((__v4si) __X,
+ (__v4di) __W,
+ (__mmask8) __U);
+}
+
+extern __inline __m256i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_maskz_cvtepu32_epi64 (__mmask8 __U, __m128i __X)
+{
+ return (__m256i) __builtin_ia32_pmovzxdq256_mask ((__v4si) __X,
+ (__v4di)
+ _mm256_setzero_si256 (),
+ (__mmask8) __U);
+}
+
+extern __inline __m128i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_mask_cvtepu32_epi64 (__m128i __W, __mmask8 __U, __m128i __X)
+{
+ return (__m128i) __builtin_ia32_pmovzxdq128_mask ((__v4si) __X,
+ (__v2di) __W,
+ (__mmask8) __U);
+}
+
+extern __inline __m128i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_maskz_cvtepu32_epi64 (__mmask8 __U, __m128i __X)
+{
+ return (__m128i) __builtin_ia32_pmovzxdq128_mask ((__v4si) __X,
+ (__v2di)
+ _mm_setzero_si128 (),
+ (__mmask8) __U);
+}
+
+extern __inline __m256d
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_rcp14_pd (__m256d __A)
+{
+ return (__m256d) __builtin_ia32_rcp14pd256_mask ((__v4df) __A,
+ (__v4df)
+ _mm256_setzero_pd (),
+ (__mmask8) -1);
+}
+
+extern __inline __m256d
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_mask_rcp14_pd (__m256d __W, __mmask8 __U, __m256d __A)
+{
+ return (__m256d) __builtin_ia32_rcp14pd256_mask ((__v4df) __A,
+ (__v4df) __W,
+ (__mmask8) __U);
+}
+
+extern __inline __m256d
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_maskz_rcp14_pd (__mmask8 __U, __m256d __A)
+{
+ return (__m256d) __builtin_ia32_rcp14pd256_mask ((__v4df) __A,
+ (__v4df)
+ _mm256_setzero_pd (),
+ (__mmask8) __U);
+}
+
+extern __inline __m128d
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_rcp14_pd (__m128d __A)
+{
+ return (__m128d) __builtin_ia32_rcp14pd128_mask ((__v2df) __A,
+ (__v2df)
+ _mm_setzero_pd (),
+ (__mmask8) -1);
+}
+
+extern __inline __m128d
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_mask_rcp14_pd (__m128d __W, __mmask8 __U, __m128d __A)
+{
+ return (__m128d) __builtin_ia32_rcp14pd128_mask ((__v2df) __A,
+ (__v2df) __W,
+ (__mmask8) __U);
+}
+
+extern __inline __m128d
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_maskz_rcp14_pd (__mmask8 __U, __m128d __A)
+{
+ return (__m128d) __builtin_ia32_rcp14pd128_mask ((__v2df) __A,
+ (__v2df)
+ _mm_setzero_pd (),
+ (__mmask8) __U);
+}
+
+extern __inline __m256
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_rcp14_ps (__m256 __A)
+{
+ return (__m256) __builtin_ia32_rcp14ps256_mask ((__v8sf) __A,
+ (__v8sf)
+ _mm256_setzero_ps (),
+ (__mmask8) -1);
+}
+
+extern __inline __m256
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_mask_rcp14_ps (__m256 __W, __mmask8 __U, __m256 __A)
+{
+ return (__m256) __builtin_ia32_rcp14ps256_mask ((__v8sf) __A,
+ (__v8sf) __W,
+ (__mmask8) __U);
+}
+
+extern __inline __m256
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_maskz_rcp14_ps (__mmask8 __U, __m256 __A)
+{
+ return (__m256) __builtin_ia32_rcp14ps256_mask ((__v8sf) __A,
+ (__v8sf)
+ _mm256_setzero_ps (),
+ (__mmask8) __U);
+}
+
+extern __inline __m128
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_rcp14_ps (__m128 __A)
+{
+ return (__m128) __builtin_ia32_rcp14ps128_mask ((__v4sf) __A,
+ (__v4sf)
+ _mm_setzero_ps (),
+ (__mmask8) -1);
+}
+
+extern __inline __m128
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_mask_rcp14_ps (__m128 __W, __mmask8 __U, __m128 __A)
+{
+ return (__m128) __builtin_ia32_rcp14ps128_mask ((__v4sf) __A,
+ (__v4sf) __W,
+ (__mmask8) __U);
+}
+
+extern __inline __m128
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_maskz_rcp14_ps (__mmask8 __U, __m128 __A)
+{
+ return (__m128) __builtin_ia32_rcp14ps128_mask ((__v4sf) __A,
+ (__v4sf)
+ _mm_setzero_ps (),
+ (__mmask8) __U);
+}
+
+extern __inline __m256d
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_rsqrt14_pd (__m256d __A)
+{
+ return (__m256d) __builtin_ia32_rsqrt14pd256_mask ((__v4df) __A,
+ (__v4df)
+ _mm256_setzero_pd (),
+ (__mmask8) -1);
+}
+
+extern __inline __m256d
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_mask_rsqrt14_pd (__m256d __W, __mmask8 __U, __m256d __A)
+{
+ return (__m256d) __builtin_ia32_rsqrt14pd256_mask ((__v4df) __A,
+ (__v4df) __W,
+ (__mmask8) __U);
+}
+
+extern __inline __m256d
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_maskz_rsqrt14_pd (__mmask8 __U, __m256d __A)
+{
+ return (__m256d) __builtin_ia32_rsqrt14pd256_mask ((__v4df) __A,
+ (__v4df)
+ _mm256_setzero_pd (),
+ (__mmask8) __U);
+}
+
+extern __inline __m128d
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_rsqrt14_pd (__m128d __A)
+{
+ return (__m128d) __builtin_ia32_rsqrt14pd128_mask ((__v2df) __A,
+ (__v2df)
+ _mm_setzero_pd (),
+ (__mmask8) -1);
+}
+
+extern __inline __m128d
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_mask_rsqrt14_pd (__m128d __W, __mmask8 __U, __m128d __A)
+{
+ return (__m128d) __builtin_ia32_rsqrt14pd128_mask ((__v2df) __A,
+ (__v2df) __W,
+ (__mmask8) __U);
+}
+
+extern __inline __m128d
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_maskz_rsqrt14_pd (__mmask8 __U, __m128d __A)
+{
+ return (__m128d) __builtin_ia32_rsqrt14pd128_mask ((__v2df) __A,
+ (__v2df)
+ _mm_setzero_pd (),
+ (__mmask8) __U);
+}
+
+extern __inline __m256
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_rsqrt14_ps (__m256 __A)
+{
+ return (__m256) __builtin_ia32_rsqrt14ps256_mask ((__v8sf) __A,
+ (__v8sf)
+ _mm256_setzero_ps (),
+ (__mmask8) -1);
+}
+
+extern __inline __m256
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_mask_rsqrt14_ps (__m256 __W, __mmask8 __U, __m256 __A)
+{
+ return (__m256) __builtin_ia32_rsqrt14ps256_mask ((__v8sf) __A,
+ (__v8sf) __W,
+ (__mmask8) __U);
+}
+
+extern __inline __m256
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_maskz_rsqrt14_ps (__mmask8 __U, __m256 __A)
+{
+ return (__m256) __builtin_ia32_rsqrt14ps256_mask ((__v8sf) __A,
+ (__v8sf)
+ _mm256_setzero_ps (),
+ (__mmask8) __U);
+}
+
+extern __inline __m128
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_rsqrt14_ps (__m128 __A)
+{
+ return (__m128) __builtin_ia32_rsqrt14ps128_mask ((__v4sf) __A,
+ (__v4sf)
+ _mm_setzero_ps (),
+ (__mmask8) -1);
+}
+
+extern __inline __m128
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_mask_rsqrt14_ps (__m128 __W, __mmask8 __U, __m128 __A)
+{
+ return (__m128) __builtin_ia32_rsqrt14ps128_mask ((__v4sf) __A,
+ (__v4sf) __W,
+ (__mmask8) __U);
+}
+
+extern __inline __m128
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_maskz_rsqrt14_ps (__mmask8 __U, __m128 __A)
+{
+ return (__m128) __builtin_ia32_rsqrt14ps128_mask ((__v4sf) __A,
+ (__v4sf)
+ _mm_setzero_ps (),
+ (__mmask8) __U);
+}
+
+extern __inline __m256d
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_mask_sqrt_pd (__m256d __W, __mmask8 __U, __m256d __A)
+{
+ return (__m256d) __builtin_ia32_sqrtpd256_mask ((__v4df) __A,
+ (__v4df) __W,
+ (__mmask8) __U);
+}
+
+extern __inline __m256d
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_maskz_sqrt_pd (__mmask8 __U, __m256d __A)
+{
+ return (__m256d) __builtin_ia32_sqrtpd256_mask ((__v4df) __A,
+ (__v4df)
+ _mm256_setzero_pd (),
+ (__mmask8) __U);
+}
+
+extern __inline __m128d
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_mask_sqrt_pd (__m128d __W, __mmask8 __U, __m128d __A)
+{
+ return (__m128d) __builtin_ia32_sqrtpd128_mask ((__v2df) __A,
+ (__v2df) __W,
+ (__mmask8) __U);
+}
+
+extern __inline __m128d
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_maskz_sqrt_pd (__mmask8 __U, __m128d __A)
+{
+ return (__m128d) __builtin_ia32_sqrtpd128_mask ((__v2df) __A,
+ (__v2df)
+ _mm_setzero_pd (),
+ (__mmask8) __U);
+}
+
+extern __inline __m256
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_mask_sqrt_ps (__m256 __W, __mmask8 __U, __m256 __A)
+{
+ return (__m256) __builtin_ia32_sqrtps256_mask ((__v8sf) __A,
+ (__v8sf) __W,
+ (__mmask8) __U);
+}
+
+extern __inline __m256
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_maskz_sqrt_ps (__mmask8 __U, __m256 __A)
+{
+ return (__m256) __builtin_ia32_sqrtps256_mask ((__v8sf) __A,
+ (__v8sf)
+ _mm256_setzero_ps (),
+ (__mmask8) __U);
+}
+
+extern __inline __m128
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_mask_sqrt_ps (__m128 __W, __mmask8 __U, __m128 __A)
+{
+ return (__m128) __builtin_ia32_sqrtps128_mask ((__v4sf) __A,
+ (__v4sf) __W,
+ (__mmask8) __U);
+}
+
+extern __inline __m128
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_maskz_sqrt_ps (__mmask8 __U, __m128 __A)
+{
+ return (__m128) __builtin_ia32_sqrtps128_mask ((__v4sf) __A,
+ (__v4sf)
+ _mm_setzero_ps (),
+ (__mmask8) __U);
+}
+
+extern __inline __m256i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_mask_add_epi32 (__m256i __W, __mmask8 __U, __m256i __A,
+ __m256i __B)
+{
+ return (__m256i) __builtin_ia32_paddd256_mask ((__v8si) __A,
+ (__v8si) __B,
+ (__v8si) __W,
+ (__mmask8) __U);
+}
+
+extern __inline __m256i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_maskz_add_epi32 (__mmask8 __U, __m256i __A, __m256i __B)
+{
+ return (__m256i) __builtin_ia32_paddd256_mask ((__v8si) __A,
+ (__v8si) __B,
+ (__v8si)
+ _mm256_setzero_si256 (),
+ (__mmask8) __U);
+}
+
+extern __inline __m256i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_mask_add_epi64 (__m256i __W, __mmask8 __U, __m256i __A,
+ __m256i __B)
+{
+ return (__m256i) __builtin_ia32_paddq256_mask ((__v4di) __A,
+ (__v4di) __B,
+ (__v4di) __W,
+ (__mmask8) __U);
+}
+
+extern __inline __m256i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_maskz_add_epi64 (__mmask8 __U, __m256i __A, __m256i __B)
+{
+ return (__m256i) __builtin_ia32_paddq256_mask ((__v4di) __A,
+ (__v4di) __B,
+ (__v4di)
+ _mm256_setzero_si256 (),
+ (__mmask8) __U);
+}
+
+extern __inline __m256i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_mask_sub_epi32 (__m256i __W, __mmask8 __U, __m256i __A,
+ __m256i __B)
+{
+ return (__m256i) __builtin_ia32_psubd256_mask ((__v8si) __A,
+ (__v8si) __B,
+ (__v8si) __W,
+ (__mmask8) __U);
+}
+
+extern __inline __m256i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_maskz_sub_epi32 (__mmask8 __U, __m256i __A, __m256i __B)
+{
+ return (__m256i) __builtin_ia32_psubd256_mask ((__v8si) __A,
+ (__v8si) __B,
+ (__v8si)
+ _mm256_setzero_si256 (),
+ (__mmask8) __U);
+}
+
+extern __inline __m256i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_mask_sub_epi64 (__m256i __W, __mmask8 __U, __m256i __A,
+ __m256i __B)
+{
+ return (__m256i) __builtin_ia32_psubq256_mask ((__v4di) __A,
+ (__v4di) __B,
+ (__v4di) __W,
+ (__mmask8) __U);
+}
+
+extern __inline __m256i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_maskz_sub_epi64 (__mmask8 __U, __m256i __A, __m256i __B)
+{
+ return (__m256i) __builtin_ia32_psubq256_mask ((__v4di) __A,
+ (__v4di) __B,
+ (__v4di)
+ _mm256_setzero_si256 (),
+ (__mmask8) __U);
+}
+
+extern __inline __m128i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_mask_add_epi32 (__m128i __W, __mmask8 __U, __m128i __A,
+ __m128i __B)
+{
+ return (__m128i) __builtin_ia32_paddd128_mask ((__v4si) __A,
+ (__v4si) __B,
+ (__v4si) __W,
+ (__mmask8) __U);
+}
+
+extern __inline __m128i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_maskz_add_epi32 (__mmask8 __U, __m128i __A, __m128i __B)
+{
+ return (__m128i) __builtin_ia32_paddd128_mask ((__v4si) __A,
+ (__v4si) __B,
+ (__v4si)
+ _mm_setzero_si128 (),
+ (__mmask8) __U);
+}
+
+extern __inline __m128i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_mask_add_epi64 (__m128i __W, __mmask8 __U, __m128i __A,
+ __m128i __B)
+{
+ return (__m128i) __builtin_ia32_paddq128_mask ((__v2di) __A,
+ (__v2di) __B,
+ (__v2di) __W,
+ (__mmask8) __U);
+}
+
+extern __inline __m128i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_maskz_add_epi64 (__mmask8 __U, __m128i __A, __m128i __B)
+{
+ return (__m128i) __builtin_ia32_paddq128_mask ((__v2di) __A,
+ (__v2di) __B,
+ (__v2di)
+ _mm_setzero_si128 (),
+ (__mmask8) __U);
+}
+
+extern __inline __m128i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_mask_sub_epi32 (__m128i __W, __mmask8 __U, __m128i __A,
+ __m128i __B)
+{
+ return (__m128i) __builtin_ia32_psubd128_mask ((__v4si) __A,
+ (__v4si) __B,
+ (__v4si) __W,
+ (__mmask8) __U);
+}
+
+extern __inline __m128i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_maskz_sub_epi32 (__mmask8 __U, __m128i __A, __m128i __B)
+{
+ return (__m128i) __builtin_ia32_psubd128_mask ((__v4si) __A,
+ (__v4si) __B,
+ (__v4si)
+ _mm_setzero_si128 (),
+ (__mmask8) __U);
+}
+
+extern __inline __m128i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_mask_sub_epi64 (__m128i __W, __mmask8 __U, __m128i __A,
+ __m128i __B)
+{
+ return (__m128i) __builtin_ia32_psubq128_mask ((__v2di) __A,
+ (__v2di) __B,
+ (__v2di) __W,
+ (__mmask8) __U);
+}
+
+extern __inline __m128i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_maskz_sub_epi64 (__mmask8 __U, __m128i __A, __m128i __B)
+{
+ return (__m128i) __builtin_ia32_psubq128_mask ((__v2di) __A,
+ (__v2di) __B,
+ (__v2di)
+ _mm_setzero_si128 (),
+ (__mmask8) __U);
+}
+
+extern __inline __m256
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_getexp_ps (__m256 __A)
+{
+ return (__m256) __builtin_ia32_getexpps256_mask ((__v8sf) __A,
+ (__v8sf)
+ _mm256_setzero_ps (),
+ (__mmask8) -1);
+}
+
+extern __inline __m256
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_mask_getexp_ps (__m256 __W, __mmask8 __U, __m256 __A)
+{
+ return (__m256) __builtin_ia32_getexpps256_mask ((__v8sf) __A,
+ (__v8sf) __W,
+ (__mmask8) __U);
+}
+
+extern __inline __m256
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_maskz_getexp_ps (__mmask8 __U, __m256 __A)
+{
+ return (__m256) __builtin_ia32_getexpps256_mask ((__v8sf) __A,
+ (__v8sf)
+ _mm256_setzero_ps (),
+ (__mmask8) __U);
+}
+
+extern __inline __m256d
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_getexp_pd (__m256d __A)
+{
+ return (__m256d) __builtin_ia32_getexppd256_mask ((__v4df) __A,
+ (__v4df)
+ _mm256_setzero_pd (),
+ (__mmask8) -1);
+}
+
+extern __inline __m256d
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_mask_getexp_pd (__m256d __W, __mmask8 __U, __m256d __A)
+{
+ return (__m256d) __builtin_ia32_getexppd256_mask ((__v4df) __A,
+ (__v4df) __W,
+ (__mmask8) __U);
+}
+
+extern __inline __m256d
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_maskz_getexp_pd (__mmask8 __U, __m256d __A)
+{
+ return (__m256d) __builtin_ia32_getexppd256_mask ((__v4df) __A,
+ (__v4df)
+ _mm256_setzero_pd (),
+ (__mmask8) __U);
+}
+
+extern __inline __m128
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_getexp_ps (__m128 __A)
+{
+ return (__m128) __builtin_ia32_getexpps128_mask ((__v4sf) __A,
+ (__v4sf)
+ _mm_setzero_ps (),
+ (__mmask8) -1);
+}
+
+extern __inline __m128
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_mask_getexp_ps (__m128 __W, __mmask8 __U, __m128 __A)
+{
+ return (__m128) __builtin_ia32_getexpps128_mask ((__v4sf) __A,
+ (__v4sf) __W,
+ (__mmask8) __U);
+}
+
+extern __inline __m128
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_maskz_getexp_ps (__mmask8 __U, __m128 __A)
+{
+ return (__m128) __builtin_ia32_getexpps128_mask ((__v4sf) __A,
+ (__v4sf)
+ _mm_setzero_ps (),
+ (__mmask8) __U);
+}
+
+extern __inline __m128d
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_getexp_pd (__m128d __A)
+{
+ return (__m128d) __builtin_ia32_getexppd128_mask ((__v2df) __A,
+ (__v2df)
+ _mm_setzero_pd (),
+ (__mmask8) -1);
+}
+
+extern __inline __m128d
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_mask_getexp_pd (__m128d __W, __mmask8 __U, __m128d __A)
+{
+ return (__m128d) __builtin_ia32_getexppd128_mask ((__v2df) __A,
+ (__v2df) __W,
+ (__mmask8) __U);
+}
+
+extern __inline __m128d
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_maskz_getexp_pd (__mmask8 __U, __m128d __A)
+{
+ return (__m128d) __builtin_ia32_getexppd128_mask ((__v2df) __A,
+ (__v2df)
+ _mm_setzero_pd (),
+ (__mmask8) __U);
+}
+
+extern __inline __m256i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_mask_srl_epi32 (__m256i __W, __mmask8 __U, __m256i __A,
+ __m128i __B)
+{
+ return (__m256i) __builtin_ia32_psrld256_mask ((__v8si) __A,
+ (__v4si) __B,
+ (__v8si) __W,
+ (__mmask8) __U);
+}
+
+extern __inline __m256i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_maskz_srl_epi32 (__mmask8 __U, __m256i __A, __m128i __B)
+{
+ return (__m256i) __builtin_ia32_psrld256_mask ((__v8si) __A,
+ (__v4si) __B,
+ (__v8si)
+ _mm256_setzero_si256 (),
+ (__mmask8) __U);
+}
+
+extern __inline __m128i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_mask_srl_epi32 (__m128i __W, __mmask8 __U, __m128i __A,
+ __m128i __B)
+{
+ return (__m128i) __builtin_ia32_psrld128_mask ((__v4si) __A,
+ (__v4si) __B,
+ (__v4si) __W,
+ (__mmask8) __U);
+}
+
+extern __inline __m128i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_maskz_srl_epi32 (__mmask8 __U, __m128i __A, __m128i __B)
+{
+ return (__m128i) __builtin_ia32_psrld128_mask ((__v4si) __A,
+ (__v4si) __B,
+ (__v4si)
+ _mm_setzero_si128 (),
+ (__mmask8) __U);
+}
+
+extern __inline __m256i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_mask_srl_epi64 (__m256i __W, __mmask8 __U, __m256i __A,
+ __m128i __B)
+{
+ return (__m256i) __builtin_ia32_psrlq256_mask ((__v4di) __A,
+ (__v2di) __B,
+ (__v4di) __W,
+ (__mmask8) __U);
+}
+
+extern __inline __m256i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_maskz_srl_epi64 (__mmask8 __U, __m256i __A, __m128i __B)
+{
+ return (__m256i) __builtin_ia32_psrlq256_mask ((__v4di) __A,
+ (__v2di) __B,
+ (__v4di)
+ _mm256_setzero_si256 (),
+ (__mmask8) __U);
+}
+
+extern __inline __m128i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_mask_srl_epi64 (__m128i __W, __mmask8 __U, __m128i __A,
+ __m128i __B)
+{
+ return (__m128i) __builtin_ia32_psrlq128_mask ((__v2di) __A,
+ (__v2di) __B,
+ (__v2di) __W,
+ (__mmask8) __U);
+}
+
+extern __inline __m128i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_maskz_srl_epi64 (__mmask8 __U, __m128i __A, __m128i __B)
+{
+ return (__m128i) __builtin_ia32_psrlq128_mask ((__v2di) __A,
+ (__v2di) __B,
+ (__v2di)
+ _mm_setzero_di (),
+ (__mmask8) __U);
+}
+
+extern __inline __m256i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_mask_and_epi32 (__m256i __W, __mmask8 __U, __m256i __A,
+ __m256i __B)
+{
+ return (__m256i) __builtin_ia32_pandd256_mask ((__v8si) __A,
+ (__v8si) __B,
+ (__v8si) __W,
+ (__mmask8) __U);
+}
+
+extern __inline __m256i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_maskz_and_epi32 (__mmask8 __U, __m256i __A, __m256i __B)
+{
+ return (__m256i) __builtin_ia32_pandd256_mask ((__v8si) __A,
+ (__v8si) __B,
+ (__v8si)
+ _mm256_setzero_si256 (),
+ (__mmask8) __U);
+}
+
+extern __inline __m256d
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_scalef_pd (__m256d __A, __m256d __B)
+{
+ return (__m256d) __builtin_ia32_scalefpd256_mask ((__v4df) __A,
+ (__v4df) __B,
+ (__v4df)
+ _mm256_setzero_pd (),
+ (__mmask8) -1);
+}
+
+extern __inline __m256d
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_mask_scalef_pd (__m256d __W, __mmask8 __U, __m256d __A,
+ __m256d __B)
+{
+ return (__m256d) __builtin_ia32_scalefpd256_mask ((__v4df) __A,
+ (__v4df) __B,
+ (__v4df) __W,
+ (__mmask8) __U);
+}
+
+extern __inline __m256d
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_maskz_scalef_pd (__mmask8 __U, __m256d __A, __m256d __B)
+{
+ return (__m256d) __builtin_ia32_scalefpd256_mask ((__v4df) __A,
+ (__v4df) __B,
+ (__v4df)
+ _mm256_setzero_pd (),
+ (__mmask8) __U);
+}
+
+extern __inline __m256
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_scalef_ps (__m256 __A, __m256 __B)
+{
+ return (__m256) __builtin_ia32_scalefps256_mask ((__v8sf) __A,
+ (__v8sf) __B,
+ (__v8sf)
+ _mm256_setzero_ps (),
+ (__mmask8) -1);
+}
+
+extern __inline __m256
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_mask_scalef_ps (__m256 __W, __mmask8 __U, __m256 __A,
+ __m256 __B)
+{
+ return (__m256) __builtin_ia32_scalefps256_mask ((__v8sf) __A,
+ (__v8sf) __B,
+ (__v8sf) __W,
+ (__mmask8) __U);
+}
+
+extern __inline __m256
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_maskz_scalef_ps (__mmask8 __U, __m256 __A, __m256 __B)
+{
+ return (__m256) __builtin_ia32_scalefps256_mask ((__v8sf) __A,
+ (__v8sf) __B,
+ (__v8sf)
+ _mm256_setzero_ps (),
+ (__mmask8) __U);
+}
+
+extern __inline __m128d
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_scalef_pd (__m128d __A, __m128d __B)
+{
+ return (__m128d) __builtin_ia32_scalefpd128_mask ((__v2df) __A,
+ (__v2df) __B,
+ (__v2df)
+ _mm_setzero_pd (),
+ (__mmask8) -1);
+}
+
+extern __inline __m128d
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_mask_scalef_pd (__m128d __W, __mmask8 __U, __m128d __A,
+ __m128d __B)
+{
+ return (__m128d) __builtin_ia32_scalefpd128_mask ((__v2df) __A,
+ (__v2df) __B,
+ (__v2df) __W,
+ (__mmask8) __U);
+}
+
+extern __inline __m128d
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_maskz_scalef_pd (__mmask8 __U, __m128d __A, __m128d __B)
+{
+ return (__m128d) __builtin_ia32_scalefpd128_mask ((__v2df) __A,
+ (__v2df) __B,
+ (__v2df)
+ _mm_setzero_pd (),
+ (__mmask8) __U);
+}
+
+extern __inline __m128
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_scalef_ps (__m128 __A, __m128 __B)
+{
+ return (__m128) __builtin_ia32_scalefps128_mask ((__v4sf) __A,
+ (__v4sf) __B,
+ (__v4sf)
+ _mm_setzero_ps (),
+ (__mmask8) -1);
+}
+
+extern __inline __m128
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_mask_scalef_ps (__m128 __W, __mmask8 __U, __m128 __A, __m128 __B)
+{
+ return (__m128) __builtin_ia32_scalefps128_mask ((__v4sf) __A,
+ (__v4sf) __B,
+ (__v4sf) __W,
+ (__mmask8) __U);
+}
+
+extern __inline __m128
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_maskz_scalef_ps (__mmask8 __U, __m128 __A, __m128 __B)
+{
+ return (__m128) __builtin_ia32_scalefps128_mask ((__v4sf) __A,
+ (__v4sf) __B,
+ (__v4sf)
+ _mm_setzero_ps (),
+ (__mmask8) __U);
+}
+
+extern __inline __m256d
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_mask_fmadd_pd (__m256d __A, __mmask8 __U, __m256d __B,
+ __m256d __C)
+{
+ return (__m256d) __builtin_ia32_vfmaddpd256_mask ((__v4df) __A,
+ (__v4df) __B,
+ (__v4df) __C,
+ (__mmask8) __U);
+}
+
+extern __inline __m256d
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_mask3_fmadd_pd (__m256d __A, __m256d __B, __m256d __C,
+ __mmask8 __U)
+{
+ return (__m256d) __builtin_ia32_vfmaddpd256_mask3 ((__v4df) __A,
+ (__v4df) __B,
+ (__v4df) __C,
+ (__mmask8) __U);
+}
+
+extern __inline __m256d
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_maskz_fmadd_pd (__mmask8 __U, __m256d __A, __m256d __B,
+ __m256d __C)
+{
+ return (__m256d) __builtin_ia32_vfmaddpd256_maskz ((__v4df) __A,
+ (__v4df) __B,
+ (__v4df) __C,
+ (__mmask8) __U);
+}
+
+extern __inline __m128d
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_mask_fmadd_pd (__m128d __A, __mmask8 __U, __m128d __B, __m128d __C)
+{
+ return (__m128d) __builtin_ia32_vfmaddpd128_mask ((__v2df) __A,
+ (__v2df) __B,
+ (__v2df) __C,
+ (__mmask8) __U);
+}
+
+extern __inline __m128d
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_mask3_fmadd_pd (__m128d __A, __m128d __B, __m128d __C,
+ __mmask8 __U)
+{
+ return (__m128d) __builtin_ia32_vfmaddpd128_mask3 ((__v2df) __A,
+ (__v2df) __B,
+ (__v2df) __C,
+ (__mmask8) __U);
+}
+
+extern __inline __m128d
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_maskz_fmadd_pd (__mmask8 __U, __m128d __A, __m128d __B,
+ __m128d __C)
+{
+ return (__m128d) __builtin_ia32_vfmaddpd128_maskz ((__v2df) __A,
+ (__v2df) __B,
+ (__v2df) __C,
+ (__mmask8) __U);
+}
+
+extern __inline __m256
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_mask_fmadd_ps (__m256 __A, __mmask8 __U, __m256 __B, __m256 __C)
+{
+ return (__m256) __builtin_ia32_vfmaddps256_mask ((__v8sf) __A,
+ (__v8sf) __B,
+ (__v8sf) __C,
+ (__mmask8) __U);
+}
+
+extern __inline __m256
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_mask3_fmadd_ps (__m256 __A, __m256 __B, __m256 __C,
+ __mmask8 __U)
+{
+ return (__m256) __builtin_ia32_vfmaddps256_mask3 ((__v8sf) __A,
+ (__v8sf) __B,
+ (__v8sf) __C,
+ (__mmask8) __U);
+}
+
+extern __inline __m256
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_maskz_fmadd_ps (__mmask8 __U, __m256 __A, __m256 __B,
+ __m256 __C)
+{
+ return (__m256) __builtin_ia32_vfmaddps256_maskz ((__v8sf) __A,
+ (__v8sf) __B,
+ (__v8sf) __C,
+ (__mmask8) __U);
+}
+
+extern __inline __m128
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_mask_fmadd_ps (__m128 __A, __mmask8 __U, __m128 __B, __m128 __C)
+{
+ return (__m128) __builtin_ia32_vfmaddps128_mask ((__v4sf) __A,
+ (__v4sf) __B,
+ (__v4sf) __C,
+ (__mmask8) __U);
+}
+
+extern __inline __m128
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_mask3_fmadd_ps (__m128 __A, __m128 __B, __m128 __C, __mmask8 __U)
+{
+ return (__m128) __builtin_ia32_vfmaddps128_mask3 ((__v4sf) __A,
+ (__v4sf) __B,
+ (__v4sf) __C,
+ (__mmask8) __U);
+}
+
+extern __inline __m128
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_maskz_fmadd_ps (__mmask8 __U, __m128 __A, __m128 __B, __m128 __C)
+{
+ return (__m128) __builtin_ia32_vfmaddps128_maskz ((__v4sf) __A,
+ (__v4sf) __B,
+ (__v4sf) __C,
+ (__mmask8) __U);
+}
+
+extern __inline __m256d
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_mask_fmsub_pd (__m256d __A, __mmask8 __U, __m256d __B,
+ __m256d __C)
+{
+ return (__m256d) __builtin_ia32_vfmaddpd256_mask ((__v4df) __A,
+ (__v4df) __B,
+ -(__v4df) __C,
+ (__mmask8) __U);
+}
+
+extern __inline __m256d
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_mask3_fmsub_pd (__m256d __A, __m256d __B, __m256d __C,
+ __mmask8 __U)
+{
+ return (__m256d) __builtin_ia32_vfmsubpd256_mask3 ((__v4df) __A,
+ (__v4df) __B,
+ (__v4df) __C,
+ (__mmask8) __U);
+}
+
+extern __inline __m256d
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_maskz_fmsub_pd (__mmask8 __U, __m256d __A, __m256d __B,
+ __m256d __C)
+{
+ return (__m256d) __builtin_ia32_vfmaddpd256_maskz ((__v4df) __A,
+ (__v4df) __B,
+ -(__v4df) __C,
+ (__mmask8) __U);
+}
+
+extern __inline __m128d
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_mask_fmsub_pd (__m128d __A, __mmask8 __U, __m128d __B, __m128d __C)
+{
+ return (__m128d) __builtin_ia32_vfmaddpd128_mask ((__v2df) __A,
+ (__v2df) __B,
+ -(__v2df) __C,
+ (__mmask8) __U);
+}
+
+extern __inline __m128d
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_mask3_fmsub_pd (__m128d __A, __m128d __B, __m128d __C,
+ __mmask8 __U)
+{
+ return (__m128d) __builtin_ia32_vfmsubpd128_mask3 ((__v2df) __A,
+ (__v2df) __B,
+ (__v2df) __C,
+ (__mmask8) __U);
+}
+
+extern __inline __m128d
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_maskz_fmsub_pd (__mmask8 __U, __m128d __A, __m128d __B,
+ __m128d __C)
+{
+ return (__m128d) __builtin_ia32_vfmaddpd128_maskz ((__v2df) __A,
+ (__v2df) __B,
+ -(__v2df) __C,
+ (__mmask8) __U);
+}
+
+extern __inline __m256
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_mask_fmsub_ps (__m256 __A, __mmask8 __U, __m256 __B, __m256 __C)
+{
+ return (__m256) __builtin_ia32_vfmaddps256_mask ((__v8sf) __A,
+ (__v8sf) __B,
+ -(__v8sf) __C,
+ (__mmask8) __U);
+}
+
+extern __inline __m256
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_mask3_fmsub_ps (__m256 __A, __m256 __B, __m256 __C,
+ __mmask8 __U)
+{
+ return (__m256) __builtin_ia32_vfmsubps256_mask3 ((__v8sf) __A,
+ (__v8sf) __B,
+ (__v8sf) __C,
+ (__mmask8) __U);
+}
+
+extern __inline __m256
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_maskz_fmsub_ps (__mmask8 __U, __m256 __A, __m256 __B,
+ __m256 __C)
+{
+ return (__m256) __builtin_ia32_vfmaddps256_maskz ((__v8sf) __A,
+ (__v8sf) __B,
+ -(__v8sf) __C,
+ (__mmask8) __U);
+}
+
+extern __inline __m128
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_mask_fmsub_ps (__m128 __A, __mmask8 __U, __m128 __B, __m128 __C)
+{
+ return (__m128) __builtin_ia32_vfmaddps128_mask ((__v4sf) __A,
+ (__v4sf) __B,
+ -(__v4sf) __C,
+ (__mmask8) __U);
+}
+
+extern __inline __m128
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_mask3_fmsub_ps (__m128 __A, __m128 __B, __m128 __C, __mmask8 __U)
+{
+ return (__m128) __builtin_ia32_vfmsubps128_mask3 ((__v4sf) __A,
+ (__v4sf) __B,
+ (__v4sf) __C,
+ (__mmask8) __U);
+}
+
+extern __inline __m128
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_maskz_fmsub_ps (__mmask8 __U, __m128 __A, __m128 __B, __m128 __C)
+{
+ return (__m128) __builtin_ia32_vfmaddps128_maskz ((__v4sf) __A,
+ (__v4sf) __B,
+ -(__v4sf) __C,
+ (__mmask8) __U);
+}
+
+extern __inline __m256d
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_mask_fmaddsub_pd (__m256d __A, __mmask8 __U, __m256d __B,
+ __m256d __C)
+{
+ return (__m256d) __builtin_ia32_vfmaddsubpd256_mask ((__v4df) __A,
+ (__v4df) __B,
+ (__v4df) __C,
+ (__mmask8) __U);
+}
+
+extern __inline __m256d
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_mask3_fmaddsub_pd (__m256d __A, __m256d __B, __m256d __C,
+ __mmask8 __U)
+{
+ return (__m256d) __builtin_ia32_vfmaddsubpd256_mask3 ((__v4df) __A,
+ (__v4df) __B,
+ (__v4df) __C,
+ (__mmask8)
+ __U);
+}
+
+extern __inline __m256d
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_maskz_fmaddsub_pd (__mmask8 __U, __m256d __A, __m256d __B,
+ __m256d __C)
+{
+ return (__m256d) __builtin_ia32_vfmaddsubpd256_maskz ((__v4df) __A,
+ (__v4df) __B,
+ (__v4df) __C,
+ (__mmask8)
+ __U);
+}
+
+extern __inline __m128d
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_mask_fmaddsub_pd (__m128d __A, __mmask8 __U, __m128d __B,
+ __m128d __C)
+{
+ return (__m128d) __builtin_ia32_vfmaddsubpd128_mask ((__v2df) __A,
+ (__v2df) __B,
+ (__v2df) __C,
+ (__mmask8) __U);
+}
+
+extern __inline __m128d
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_mask3_fmaddsub_pd (__m128d __A, __m128d __B, __m128d __C,
+ __mmask8 __U)
+{
+ return (__m128d) __builtin_ia32_vfmaddsubpd128_mask3 ((__v2df) __A,
+ (__v2df) __B,
+ (__v2df) __C,
+ (__mmask8)
+ __U);
+}
+
+extern __inline __m128d
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_maskz_fmaddsub_pd (__mmask8 __U, __m128d __A, __m128d __B,
+ __m128d __C)
+{
+ return (__m128d) __builtin_ia32_vfmaddsubpd128_maskz ((__v2df) __A,
+ (__v2df) __B,
+ (__v2df) __C,
+ (__mmask8)
+ __U);
+}
+
+extern __inline __m256
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_mask_fmaddsub_ps (__m256 __A, __mmask8 __U, __m256 __B,
+ __m256 __C)
+{
+ return (__m256) __builtin_ia32_vfmaddsubps256_mask ((__v8sf) __A,
+ (__v8sf) __B,
+ (__v8sf) __C,
+ (__mmask8) __U);
+}
+
+extern __inline __m256
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_mask3_fmaddsub_ps (__m256 __A, __m256 __B, __m256 __C,
+ __mmask8 __U)
+{
+ return (__m256) __builtin_ia32_vfmaddsubps256_mask3 ((__v8sf) __A,
+ (__v8sf) __B,
+ (__v8sf) __C,
+ (__mmask8) __U);
+}
+
+extern __inline __m256
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_maskz_fmaddsub_ps (__mmask8 __U, __m256 __A, __m256 __B,
+ __m256 __C)
+{
+ return (__m256) __builtin_ia32_vfmaddsubps256_maskz ((__v8sf) __A,
+ (__v8sf) __B,
+ (__v8sf) __C,
+ (__mmask8) __U);
+}
+
+extern __inline __m128
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_mask_fmaddsub_ps (__m128 __A, __mmask8 __U, __m128 __B, __m128 __C)
+{
+ return (__m128) __builtin_ia32_vfmaddsubps128_mask ((__v4sf) __A,
+ (__v4sf) __B,
+ (__v4sf) __C,
+ (__mmask8) __U);
+}
+
+extern __inline __m128
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_mask3_fmaddsub_ps (__m128 __A, __m128 __B, __m128 __C,
+ __mmask8 __U)
+{
+ return (__m128) __builtin_ia32_vfmaddsubps128_mask3 ((__v4sf) __A,
+ (__v4sf) __B,
+ (__v4sf) __C,
+ (__mmask8) __U);
+}
+
+extern __inline __m128
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_maskz_fmaddsub_ps (__mmask8 __U, __m128 __A, __m128 __B,
+ __m128 __C)
+{
+ return (__m128) __builtin_ia32_vfmaddsubps128_maskz ((__v4sf) __A,
+ (__v4sf) __B,
+ (__v4sf) __C,
+ (__mmask8) __U);
+}
+
+extern __inline __m256d
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_mask_fmsubadd_pd (__m256d __A, __mmask8 __U, __m256d __B,
+ __m256d __C)
+{
+ return (__m256d) __builtin_ia32_vfmaddsubpd256_mask ((__v4df) __A,
+ (__v4df) __B,
+ -(__v4df) __C,
+ (__mmask8) __U);
+}
+
+extern __inline __m256d
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_mask3_fmsubadd_pd (__m256d __A, __m256d __B, __m256d __C,
+ __mmask8 __U)
+{
+ return (__m256d) __builtin_ia32_vfmsubaddpd256_mask3 ((__v4df) __A,
+ (__v4df) __B,
+ (__v4df) __C,
+ (__mmask8)
+ __U);
+}
+
+extern __inline __m256d
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_maskz_fmsubadd_pd (__mmask8 __U, __m256d __A, __m256d __B,
+ __m256d __C)
+{
+ return (__m256d) __builtin_ia32_vfmaddsubpd256_maskz ((__v4df) __A,
+ (__v4df) __B,
+ -(__v4df) __C,
+ (__mmask8)
+ __U);
+}
+
+extern __inline __m128d
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_mask_fmsubadd_pd (__m128d __A, __mmask8 __U, __m128d __B,
+ __m128d __C)
+{
+ return (__m128d) __builtin_ia32_vfmaddsubpd128_mask ((__v2df) __A,
+ (__v2df) __B,
+ -(__v2df) __C,
+ (__mmask8) __U);
+}
+
+extern __inline __m128d
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_mask3_fmsubadd_pd (__m128d __A, __m128d __B, __m128d __C,
+ __mmask8 __U)
+{
+ return (__m128d) __builtin_ia32_vfmsubaddpd128_mask3 ((__v2df) __A,
+ (__v2df) __B,
+ (__v2df) __C,
+ (__mmask8)
+ __U);
+}
+
+extern __inline __m128d
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_maskz_fmsubadd_pd (__mmask8 __U, __m128d __A, __m128d __B,
+ __m128d __C)
+{
+ return (__m128d) __builtin_ia32_vfmaddsubpd128_maskz ((__v2df) __A,
+ (__v2df) __B,
+ -(__v2df) __C,
+ (__mmask8)
+ __U);
+}
+
+extern __inline __m256
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_mask_fmsubadd_ps (__m256 __A, __mmask8 __U, __m256 __B,
+ __m256 __C)
+{
+ return (__m256) __builtin_ia32_vfmaddsubps256_mask ((__v8sf) __A,
+ (__v8sf) __B,
+ -(__v8sf) __C,
+ (__mmask8) __U);
+}
+
+extern __inline __m256
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_mask3_fmsubadd_ps (__m256 __A, __m256 __B, __m256 __C,
+ __mmask8 __U)
+{
+ return (__m256) __builtin_ia32_vfmsubaddps256_mask3 ((__v8sf) __A,
+ (__v8sf) __B,
+ (__v8sf) __C,
+ (__mmask8) __U);
+}
+
+extern __inline __m256
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_maskz_fmsubadd_ps (__mmask8 __U, __m256 __A, __m256 __B,
+ __m256 __C)
+{
+ return (__m256) __builtin_ia32_vfmaddsubps256_maskz ((__v8sf) __A,
+ (__v8sf) __B,
+ -(__v8sf) __C,
+ (__mmask8) __U);
+}
+
+extern __inline __m128
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_mask_fmsubadd_ps (__m128 __A, __mmask8 __U, __m128 __B, __m128 __C)
+{
+ return (__m128) __builtin_ia32_vfmaddsubps128_mask ((__v4sf) __A,
+ (__v4sf) __B,
+ -(__v4sf) __C,
+ (__mmask8) __U);
+}
+
+extern __inline __m128
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_mask3_fmsubadd_ps (__m128 __A, __m128 __B, __m128 __C,
+ __mmask8 __U)
+{
+ return (__m128) __builtin_ia32_vfmsubaddps128_mask3 ((__v4sf) __A,
+ (__v4sf) __B,
+ (__v4sf) __C,
+ (__mmask8) __U);
+}
+
+extern __inline __m128
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_maskz_fmsubadd_ps (__mmask8 __U, __m128 __A, __m128 __B,
+ __m128 __C)
+{
+ return (__m128) __builtin_ia32_vfmaddsubps128_maskz ((__v4sf) __A,
+ (__v4sf) __B,
+ -(__v4sf) __C,
+ (__mmask8) __U);
+}
+
+extern __inline __m256d
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_mask_fnmadd_pd (__m256d __A, __mmask8 __U, __m256d __B,
+ __m256d __C)
+{
+ return (__m256d) __builtin_ia32_vfnmaddpd256_mask ((__v4df) __A,
+ (__v4df) __B,
+ (__v4df) __C,
+ (__mmask8) __U);
+}
+
+extern __inline __m256d
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_mask3_fnmadd_pd (__m256d __A, __m256d __B, __m256d __C,
+ __mmask8 __U)
+{
+ return (__m256d) __builtin_ia32_vfmaddpd256_mask3 (-(__v4df) __A,
+ (__v4df) __B,
+ (__v4df) __C,
+ (__mmask8) __U);
+}
+
+extern __inline __m256d
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_maskz_fnmadd_pd (__mmask8 __U, __m256d __A, __m256d __B,
+ __m256d __C)
+{
+ return (__m256d) __builtin_ia32_vfmaddpd256_maskz (-(__v4df) __A,
+ (__v4df) __B,
+ (__v4df) __C,
+ (__mmask8) __U);
+}
+
+extern __inline __m128d
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_mask_fnmadd_pd (__m128d __A, __mmask8 __U, __m128d __B,
+ __m128d __C)
+{
+ return (__m128d) __builtin_ia32_vfnmaddpd128_mask ((__v2df) __A,
+ (__v2df) __B,
+ (__v2df) __C,
+ (__mmask8) __U);
+}
+
+extern __inline __m128d
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_mask3_fnmadd_pd (__m128d __A, __m128d __B, __m128d __C,
+ __mmask8 __U)
+{
+ return (__m128d) __builtin_ia32_vfmaddpd128_mask3 (-(__v2df) __A,
+ (__v2df) __B,
+ (__v2df) __C,
+ (__mmask8) __U);
+}
+
+extern __inline __m128d
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_maskz_fnmadd_pd (__mmask8 __U, __m128d __A, __m128d __B,
+ __m128d __C)
+{
+ return (__m128d) __builtin_ia32_vfmaddpd128_maskz (-(__v2df) __A,
+ (__v2df) __B,
+ (__v2df) __C,
+ (__mmask8) __U);
+}
+
+extern __inline __m256
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_mask_fnmadd_ps (__m256 __A, __mmask8 __U, __m256 __B,
+ __m256 __C)
+{
+ return (__m256) __builtin_ia32_vfnmaddps256_mask ((__v8sf) __A,
+ (__v8sf) __B,
+ (__v8sf) __C,
+ (__mmask8) __U);
+}
+
+extern __inline __m256
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_mask3_fnmadd_ps (__m256 __A, __m256 __B, __m256 __C,
+ __mmask8 __U)
+{
+ return (__m256) __builtin_ia32_vfmaddps256_mask3 (-(__v8sf) __A,
+ (__v8sf) __B,
+ (__v8sf) __C,
+ (__mmask8) __U);
+}
+
+extern __inline __m256
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_maskz_fnmadd_ps (__mmask8 __U, __m256 __A, __m256 __B,
+ __m256 __C)
+{
+ return (__m256) __builtin_ia32_vfmaddps256_maskz (-(__v8sf) __A,
+ (__v8sf) __B,
+ (__v8sf) __C,
+ (__mmask8) __U);
+}
+
+extern __inline __m128
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_mask_fnmadd_ps (__m128 __A, __mmask8 __U, __m128 __B, __m128 __C)
+{
+ return (__m128) __builtin_ia32_vfnmaddps128_mask ((__v4sf) __A,
+ (__v4sf) __B,
+ (__v4sf) __C,
+ (__mmask8) __U);
+}
+
+extern __inline __m128
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_mask3_fnmadd_ps (__m128 __A, __m128 __B, __m128 __C, __mmask8 __U)
+{
+ return (__m128) __builtin_ia32_vfmaddps128_mask3 (-(__v4sf) __A,
+ (__v4sf) __B,
+ (__v4sf) __C,
+ (__mmask8) __U);
+}
+
+extern __inline __m128
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_maskz_fnmadd_ps (__mmask8 __U, __m128 __A, __m128 __B, __m128 __C)
+{
+ return (__m128) __builtin_ia32_vfmaddps128_maskz (-(__v4sf) __A,
+ (__v4sf) __B,
+ (__v4sf) __C,
+ (__mmask8) __U);
+}
+
+extern __inline __m256d
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_mask_fnmsub_pd (__m256d __A, __mmask8 __U, __m256d __B,
+ __m256d __C)
+{
+ return (__m256d) __builtin_ia32_vfnmsubpd256_mask ((__v4df) __A,
+ (__v4df) __B,
+ (__v4df) __C,
+ (__mmask8) __U);
+}
+
+extern __inline __m256d
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_mask3_fnmsub_pd (__m256d __A, __m256d __B, __m256d __C,
+ __mmask8 __U)
+{
+ return (__m256d) __builtin_ia32_vfnmsubpd256_mask3 ((__v4df) __A,
+ (__v4df) __B,
+ (__v4df) __C,
+ (__mmask8) __U);
+}
+
+extern __inline __m256d
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_maskz_fnmsub_pd (__mmask8 __U, __m256d __A, __m256d __B,
+ __m256d __C)
+{
+ return (__m256d) __builtin_ia32_vfmaddpd256_maskz (-(__v4df) __A,
+ (__v4df) __B,
+ -(__v4df) __C,
+ (__mmask8) __U);
+}
+
+extern __inline __m128d
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_mask_fnmsub_pd (__m128d __A, __mmask8 __U, __m128d __B,
+ __m128d __C)
+{
+ return (__m128d) __builtin_ia32_vfnmsubpd128_mask ((__v2df) __A,
+ (__v2df) __B,
+ (__v2df) __C,
+ (__mmask8) __U);
+}
+
+extern __inline __m128d
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_mask3_fnmsub_pd (__m128d __A, __m128d __B, __m128d __C,
+ __mmask8 __U)
+{
+ return (__m128d) __builtin_ia32_vfnmsubpd128_mask3 ((__v2df) __A,
+ (__v2df) __B,
+ (__v2df) __C,
+ (__mmask8) __U);
+}
+
+extern __inline __m128d
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_maskz_fnmsub_pd (__mmask8 __U, __m128d __A, __m128d __B,
+ __m128d __C)
+{
+ return (__m128d) __builtin_ia32_vfmaddpd128_maskz (-(__v2df) __A,
+ (__v2df) __B,
+ -(__v2df) __C,
+ (__mmask8) __U);
+}
+
+extern __inline __m256
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_mask_fnmsub_ps (__m256 __A, __mmask8 __U, __m256 __B,
+ __m256 __C)
+{
+ return (__m256) __builtin_ia32_vfnmsubps256_mask ((__v8sf) __A,
+ (__v8sf) __B,
+ (__v8sf) __C,
+ (__mmask8) __U);
+}
+
+extern __inline __m256
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_mask3_fnmsub_ps (__m256 __A, __m256 __B, __m256 __C,
+ __mmask8 __U)
+{
+ return (__m256) __builtin_ia32_vfnmsubps256_mask3 ((__v8sf) __A,
+ (__v8sf) __B,
+ (__v8sf) __C,
+ (__mmask8) __U);
+}
+
+extern __inline __m256
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_maskz_fnmsub_ps (__mmask8 __U, __m256 __A, __m256 __B,
+ __m256 __C)
+{
+ return (__m256) __builtin_ia32_vfmaddps256_maskz (-(__v8sf) __A,
+ (__v8sf) __B,
+ -(__v8sf) __C,
+ (__mmask8) __U);
+}
+
+extern __inline __m128
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_mask_fnmsub_ps (__m128 __A, __mmask8 __U, __m128 __B, __m128 __C)
+{
+ return (__m128) __builtin_ia32_vfnmsubps128_mask ((__v4sf) __A,
+ (__v4sf) __B,
+ (__v4sf) __C,
+ (__mmask8) __U);
+}
+
+extern __inline __m128
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_mask3_fnmsub_ps (__m128 __A, __m128 __B, __m128 __C, __mmask8 __U)
+{
+ return (__m128) __builtin_ia32_vfnmsubps128_mask3 ((__v4sf) __A,
+ (__v4sf) __B,
+ (__v4sf) __C,
+ (__mmask8) __U);
+}
+
+extern __inline __m128
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_maskz_fnmsub_ps (__mmask8 __U, __m128 __A, __m128 __B, __m128 __C)
+{
+ return (__m128) __builtin_ia32_vfmaddps128_maskz (-(__v4sf) __A,
+ (__v4sf) __B,
+ -(__v4sf) __C,
+ (__mmask8) __U);
+}
+
+extern __inline __m128i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_mask_and_epi32 (__m128i __W, __mmask8 __U, __m128i __A,
+ __m128i __B)
+{
+ return (__m128i) __builtin_ia32_pandd128_mask ((__v4si) __A,
+ (__v4si) __B,
+ (__v4si) __W,
+ (__mmask8) __U);
+}
+
+extern __inline __m128i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_maskz_and_epi32 (__mmask8 __U, __m128i __A, __m128i __B)
+{
+ return (__m128i) __builtin_ia32_pandd128_mask ((__v4si) __A,
+ (__v4si) __B,
+ (__v4si)
+ _mm_setzero_si128 (),
+ (__mmask8) __U);
+}
+
+extern __inline __m256i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_mask_andnot_epi32 (__m256i __W, __mmask8 __U, __m256i __A,
+ __m256i __B)
+{
+ return (__m256i) __builtin_ia32_pandnd256_mask ((__v8si) __A,
+ (__v8si) __B,
+ (__v8si) __W,
+ (__mmask8) __U);
+}
+
+extern __inline __m256i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_maskz_andnot_epi32 (__mmask8 __U, __m256i __A, __m256i __B)
+{
+ return (__m256i) __builtin_ia32_pandnd256_mask ((__v8si) __A,
+ (__v8si) __B,
+ (__v8si)
+ _mm256_setzero_si256 (),
+ (__mmask8) __U);
+}
+
+extern __inline __m128i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_mask_andnot_epi32 (__m128i __W, __mmask8 __U, __m128i __A,
+ __m128i __B)
+{
+ return (__m128i) __builtin_ia32_pandnd128_mask ((__v4si) __A,
+ (__v4si) __B,
+ (__v4si) __W,
+ (__mmask8) __U);
+}
+
+extern __inline __m128i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_maskz_andnot_epi32 (__mmask8 __U, __m128i __A, __m128i __B)
+{
+ return (__m128i) __builtin_ia32_pandnd128_mask ((__v4si) __A,
+ (__v4si) __B,
+ (__v4si)
+ _mm_setzero_si128 (),
+ (__mmask8) __U);
+}
+
+extern __inline __m256i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_mask_or_epi32 (__m256i __W, __mmask8 __U, __m256i __A,
+ __m256i __B)
+{
+ return (__m256i) __builtin_ia32_pord256_mask ((__v8si) __A,
+ (__v8si) __B,
+ (__v8si) __W,
+ (__mmask8) __U);
+}
+
+extern __inline __m256i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_maskz_or_epi32 (__mmask8 __U, __m256i __A, __m256i __B)
+{
+ return (__m256i) __builtin_ia32_pord256_mask ((__v8si) __A,
+ (__v8si) __B,
+ (__v8si)
+ _mm256_setzero_si256 (),
+ (__mmask8) __U);
+}
+
+extern __inline __m128i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_mask_or_epi32 (__m128i __W, __mmask8 __U, __m128i __A, __m128i __B)
+{
+ return (__m128i) __builtin_ia32_pord128_mask ((__v4si) __A,
+ (__v4si) __B,
+ (__v4si) __W,
+ (__mmask8) __U);
+}
+
+extern __inline __m128i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_maskz_or_epi32 (__mmask8 __U, __m128i __A, __m128i __B)
+{
+ return (__m128i) __builtin_ia32_pord128_mask ((__v4si) __A,
+ (__v4si) __B,
+ (__v4si)
+ _mm_setzero_si128 (),
+ (__mmask8) __U);
+}
+
+extern __inline __m256i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_mask_xor_epi32 (__m256i __W, __mmask8 __U, __m256i __A,
+ __m256i __B)
+{
+ return (__m256i) __builtin_ia32_pxord256_mask ((__v8si) __A,
+ (__v8si) __B,
+ (__v8si) __W,
+ (__mmask8) __U);
+}
+
+extern __inline __m256i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_maskz_xor_epi32 (__mmask8 __U, __m256i __A, __m256i __B)
+{
+ return (__m256i) __builtin_ia32_pxord256_mask ((__v8si) __A,
+ (__v8si) __B,
+ (__v8si)
+ _mm256_setzero_si256 (),
+ (__mmask8) __U);
+}
+
+extern __inline __m128i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_mask_xor_epi32 (__m128i __W, __mmask8 __U, __m128i __A,
+ __m128i __B)
+{
+ return (__m128i) __builtin_ia32_pxord128_mask ((__v4si) __A,
+ (__v4si) __B,
+ (__v4si) __W,
+ (__mmask8) __U);
+}
+
+extern __inline __m128i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_maskz_xor_epi32 (__mmask8 __U, __m128i __A, __m128i __B)
+{
+ return (__m128i) __builtin_ia32_pxord128_mask ((__v4si) __A,
+ (__v4si) __B,
+ (__v4si)
+ _mm_setzero_si128 (),
+ (__mmask8) __U);
+}
+
+extern __inline __m128
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_mask_cvtpd_ps (__m128 __W, __mmask8 __U, __m128d __A)
+{
+ return (__m128) __builtin_ia32_cvtpd2ps_mask ((__v2df) __A,
+ (__v4sf) __W,
+ (__mmask8) __U);
+}
+
+extern __inline __m128
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_maskz_cvtpd_ps (__mmask8 __U, __m128d __A)
+{
+ return (__m128) __builtin_ia32_cvtpd2ps_mask ((__v2df) __A,
+ (__v4sf)
+ _mm_setzero_ps (),
+ (__mmask8) __U);
+}
+
+extern __inline __m128
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_mask_cvtpd_ps (__m128 __W, __mmask8 __U, __m256d __A)
+{
+ return (__m128) __builtin_ia32_cvtpd2ps256_mask ((__v4df) __A,
+ (__v4sf) __W,
+ (__mmask8) __U);
+}
+
+extern __inline __m128
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_maskz_cvtpd_ps (__mmask8 __U, __m256d __A)
+{
+ return (__m128) __builtin_ia32_cvtpd2ps256_mask ((__v4df) __A,
+ (__v4sf)
+ _mm_setzero_ps (),
+ (__mmask8) __U);
+}
+
+extern __inline __m256i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_mask_cvtps_epi32 (__m256i __W, __mmask8 __U, __m256 __A)
+{
+ return (__m256i) __builtin_ia32_cvtps2dq256_mask ((__v8sf) __A,
+ (__v8si) __W,
+ (__mmask8) __U);
+}
+
+extern __inline __m256i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_maskz_cvtps_epi32 (__mmask8 __U, __m256 __A)
+{
+ return (__m256i) __builtin_ia32_cvtps2dq256_mask ((__v8sf) __A,
+ (__v8si)
+ _mm256_setzero_si256 (),
+ (__mmask8) __U);
+}
+
+extern __inline __m128i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_mask_cvtps_epi32 (__m128i __W, __mmask8 __U, __m128 __A)
+{
+ return (__m128i) __builtin_ia32_cvtps2dq128_mask ((__v4sf) __A,
+ (__v4si) __W,
+ (__mmask8) __U);
+}
+
+extern __inline __m128i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_maskz_cvtps_epi32 (__mmask8 __U, __m128 __A)
+{
+ return (__m128i) __builtin_ia32_cvtps2dq128_mask ((__v4sf) __A,
+ (__v4si)
+ _mm_setzero_si128 (),
+ (__mmask8) __U);
+}
+
+extern __inline __m256i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_cvtps_epu32 (__m256 __A)
+{
+ return (__m256i) __builtin_ia32_cvtps2udq256_mask ((__v8sf) __A,
+ (__v8si)
+ _mm256_setzero_si256 (),
+ (__mmask8) -1);
+}
+
+extern __inline __m256i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_mask_cvtps_epu32 (__m256i __W, __mmask8 __U, __m256 __A)
+{
+ return (__m256i) __builtin_ia32_cvtps2udq256_mask ((__v8sf) __A,
+ (__v8si) __W,
+ (__mmask8) __U);
+}
+
+extern __inline __m256i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_maskz_cvtps_epu32 (__mmask8 __U, __m256 __A)
+{
+ return (__m256i) __builtin_ia32_cvtps2udq256_mask ((__v8sf) __A,
+ (__v8si)
+ _mm256_setzero_si256 (),
+ (__mmask8) __U);
+}
+
+extern __inline __m128i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_cvtps_epu32 (__m128 __A)
+{
+ return (__m128i) __builtin_ia32_cvtps2udq128_mask ((__v4sf) __A,
+ (__v4si)
+ _mm_setzero_si128 (),
+ (__mmask8) -1);
+}
+
+extern __inline __m128i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_mask_cvtps_epu32 (__m128i __W, __mmask8 __U, __m128 __A)
+{
+ return (__m128i) __builtin_ia32_cvtps2udq128_mask ((__v4sf) __A,
+ (__v4si) __W,
+ (__mmask8) __U);
+}
+
+extern __inline __m128i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_maskz_cvtps_epu32 (__mmask8 __U, __m128 __A)
+{
+ return (__m128i) __builtin_ia32_cvtps2udq128_mask ((__v4sf) __A,
+ (__v4si)
+ _mm_setzero_si128 (),
+ (__mmask8) __U);
+}
+
+extern __inline __m256d
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_mask_movedup_pd (__m256d __W, __mmask8 __U, __m256d __A)
+{
+ return (__m256d) __builtin_ia32_movddup256_mask ((__v4df) __A,
+ (__v4df) __W,
+ (__mmask8) __U);
+}
+
+extern __inline __m256d
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_maskz_movedup_pd (__mmask8 __U, __m256d __A)
+{
+ return (__m256d) __builtin_ia32_movddup256_mask ((__v4df) __A,
+ (__v4df)
+ _mm256_setzero_pd (),
+ (__mmask8) __U);
+}
+
+extern __inline __m128d
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_mask_movedup_pd (__m128d __W, __mmask8 __U, __m128d __A)
+{
+ return (__m128d) __builtin_ia32_movddup128_mask ((__v2df) __A,
+ (__v2df) __W,
+ (__mmask8) __U);
+}
+
+extern __inline __m128d
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_maskz_movedup_pd (__mmask8 __U, __m128d __A)
+{
+ return (__m128d) __builtin_ia32_movddup128_mask ((__v2df) __A,
+ (__v2df)
+ _mm_setzero_pd (),
+ (__mmask8) __U);
+}
+
+extern __inline __m256
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_mask_movehdup_ps (__m256 __W, __mmask8 __U, __m256 __A)
+{
+ return (__m256) __builtin_ia32_movshdup256_mask ((__v8sf) __A,
+ (__v8sf) __W,
+ (__mmask8) __U);
+}
+
+extern __inline __m256
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_maskz_movehdup_ps (__mmask8 __U, __m256 __A)
+{
+ return (__m256) __builtin_ia32_movshdup256_mask ((__v8sf) __A,
+ (__v8sf)
+ _mm256_setzero_ps (),
+ (__mmask8) __U);
+}
+
+extern __inline __m128
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_mask_movehdup_ps (__m128 __W, __mmask8 __U, __m128 __A)
+{
+ return (__m128) __builtin_ia32_movshdup128_mask ((__v4sf) __A,
+ (__v4sf) __W,
+ (__mmask8) __U);
+}
+
+extern __inline __m128
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_maskz_movehdup_ps (__mmask8 __U, __m128 __A)
+{
+ return (__m128) __builtin_ia32_movshdup128_mask ((__v4sf) __A,
+ (__v4sf)
+ _mm_setzero_ps (),
+ (__mmask8) __U);
+}
+
+extern __inline __m256
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_mask_moveldup_ps (__m256 __W, __mmask8 __U, __m256 __A)
+{
+ return (__m256) __builtin_ia32_movsldup256_mask ((__v8sf) __A,
+ (__v8sf) __W,
+ (__mmask8) __U);
+}
+
+extern __inline __m256
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_maskz_moveldup_ps (__mmask8 __U, __m256 __A)
+{
+ return (__m256) __builtin_ia32_movsldup256_mask ((__v8sf) __A,
+ (__v8sf)
+ _mm256_setzero_ps (),
+ (__mmask8) __U);
+}
+
+extern __inline __m128
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_mask_moveldup_ps (__m128 __W, __mmask8 __U, __m128 __A)
+{
+ return (__m128) __builtin_ia32_movsldup128_mask ((__v4sf) __A,
+ (__v4sf) __W,
+ (__mmask8) __U);
+}
+
+extern __inline __m128
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_maskz_moveldup_ps (__mmask8 __U, __m128 __A)
+{
+ return (__m128) __builtin_ia32_movsldup128_mask ((__v4sf) __A,
+ (__v4sf)
+ _mm_setzero_ps (),
+ (__mmask8) __U);
+}
+
+extern __inline __m128i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_mask_unpackhi_epi32 (__m128i __W, __mmask8 __U, __m128i __A,
+ __m128i __B)
+{
+ return (__m128i) __builtin_ia32_punpckhdq128_mask ((__v4si) __A,
+ (__v4si) __B,
+ (__v4si) __W,
+ (__mmask8) __U);
+}
+
+extern __inline __m128i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_maskz_unpackhi_epi32 (__mmask8 __U, __m128i __A, __m128i __B)
+{
+ return (__m128i) __builtin_ia32_punpckhdq128_mask ((__v4si) __A,
+ (__v4si) __B,
+ (__v4si)
+ _mm_setzero_si128 (),
+ (__mmask8) __U);
+}
+
+extern __inline __m256i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_mask_unpackhi_epi32 (__m256i __W, __mmask8 __U, __m256i __A,
+ __m256i __B)
+{
+ return (__m256i) __builtin_ia32_punpckhdq256_mask ((__v8si) __A,
+ (__v8si) __B,
+ (__v8si) __W,
+ (__mmask8) __U);
+}
+
+extern __inline __m256i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_maskz_unpackhi_epi32 (__mmask8 __U, __m256i __A, __m256i __B)
+{
+ return (__m256i) __builtin_ia32_punpckhdq256_mask ((__v8si) __A,
+ (__v8si) __B,
+ (__v8si)
+ _mm256_setzero_si256 (),
+ (__mmask8) __U);
+}
+
+extern __inline __m128i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_mask_unpackhi_epi64 (__m128i __W, __mmask8 __U, __m128i __A,
+ __m128i __B)
+{
+ return (__m128i) __builtin_ia32_punpckhqdq128_mask ((__v2di) __A,
+ (__v2di) __B,
+ (__v2di) __W,
+ (__mmask8) __U);
+}
+
+extern __inline __m128i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_maskz_unpackhi_epi64 (__mmask8 __U, __m128i __A, __m128i __B)
+{
+ return (__m128i) __builtin_ia32_punpckhqdq128_mask ((__v2di) __A,
+ (__v2di) __B,
+ (__v2di)
+ _mm_setzero_di (),
+ (__mmask8) __U);
+}
+
+extern __inline __m256i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_mask_unpackhi_epi64 (__m256i __W, __mmask8 __U, __m256i __A,
+ __m256i __B)
+{
+ return (__m256i) __builtin_ia32_punpckhqdq256_mask ((__v4di) __A,
+ (__v4di) __B,
+ (__v4di) __W,
+ (__mmask8) __U);
+}
+
+extern __inline __m256i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_maskz_unpackhi_epi64 (__mmask8 __U, __m256i __A, __m256i __B)
+{
+ return (__m256i) __builtin_ia32_punpckhqdq256_mask ((__v4di) __A,
+ (__v4di) __B,
+ (__v4di)
+ _mm256_setzero_si256 (),
+ (__mmask8) __U);
+}
+
+extern __inline __m128i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_mask_unpacklo_epi32 (__m128i __W, __mmask8 __U, __m128i __A,
+ __m128i __B)
+{
+ return (__m128i) __builtin_ia32_punpckldq128_mask ((__v4si) __A,
+ (__v4si) __B,
+ (__v4si) __W,
+ (__mmask8) __U);
+}
+
+extern __inline __m128i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_maskz_unpacklo_epi32 (__mmask8 __U, __m128i __A, __m128i __B)
+{
+ return (__m128i) __builtin_ia32_punpckldq128_mask ((__v4si) __A,
+ (__v4si) __B,
+ (__v4si)
+ _mm_setzero_si128 (),
+ (__mmask8) __U);
+}
+
+extern __inline __m256i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_mask_unpacklo_epi32 (__m256i __W, __mmask8 __U, __m256i __A,
+ __m256i __B)
+{
+ return (__m256i) __builtin_ia32_punpckldq256_mask ((__v8si) __A,
+ (__v8si) __B,
+ (__v8si) __W,
+ (__mmask8) __U);
+}
+
+extern __inline __m256i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_maskz_unpacklo_epi32 (__mmask8 __U, __m256i __A, __m256i __B)
+{
+ return (__m256i) __builtin_ia32_punpckldq256_mask ((__v8si) __A,
+ (__v8si) __B,
+ (__v8si)
+ _mm256_setzero_si256 (),
+ (__mmask8) __U);
+}
+
+extern __inline __m128i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_mask_unpacklo_epi64 (__m128i __W, __mmask8 __U, __m128i __A,
+ __m128i __B)
+{
+ return (__m128i) __builtin_ia32_punpcklqdq128_mask ((__v2di) __A,
+ (__v2di) __B,
+ (__v2di) __W,
+ (__mmask8) __U);
+}
+
+extern __inline __m128i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_maskz_unpacklo_epi64 (__mmask8 __U, __m128i __A, __m128i __B)
+{
+ return (__m128i) __builtin_ia32_punpcklqdq128_mask ((__v2di) __A,
+ (__v2di) __B,
+ (__v2di)
+ _mm_setzero_di (),
+ (__mmask8) __U);
+}
+
+extern __inline __m256i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_mask_unpacklo_epi64 (__m256i __W, __mmask8 __U, __m256i __A,
+ __m256i __B)
+{
+ return (__m256i) __builtin_ia32_punpcklqdq256_mask ((__v4di) __A,
+ (__v4di) __B,
+ (__v4di) __W,
+ (__mmask8) __U);
+}
+
+extern __inline __m256i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_maskz_unpacklo_epi64 (__mmask8 __U, __m256i __A, __m256i __B)
+{
+ return (__m256i) __builtin_ia32_punpcklqdq256_mask ((__v4di) __A,
+ (__v4di) __B,
+ (__v4di)
+ _mm256_setzero_si256 (),
+ (__mmask8) __U);
+}
+
+extern __inline __mmask8
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_cmpeq_epi32_mask (__m128i __A, __m128i __B)
+{
+ return (__mmask8) __builtin_ia32_pcmpeqd128_mask ((__v4si) __A,
+ (__v4si) __B,
+ (__mmask8) -1);
+}
+
+extern __inline __mmask8
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_mask_cmpeq_epi32_mask (__mmask8 __U, __m128i __A, __m128i __B)
+{
+ return (__mmask8) __builtin_ia32_pcmpeqd128_mask ((__v4si) __A,
+ (__v4si) __B, __U);
+}
+
+extern __inline __mmask8
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_cmpeq_epi32_mask (__m256i __A, __m256i __B)
+{
+ return (__mmask8) __builtin_ia32_pcmpeqd256_mask ((__v8si) __A,
+ (__v8si) __B,
+ (__mmask8) -1);
+}
+
+extern __inline __mmask8
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_mask_cmpeq_epi32_mask (__mmask8 __U, __m256i __A, __m256i __B)
+{
+ return (__mmask8) __builtin_ia32_pcmpeqd256_mask ((__v8si) __A,
+ (__v8si) __B, __U);
+}
+
+extern __inline __mmask8
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_cmpeq_epi64_mask (__m128i __A, __m128i __B)
+{
+ return (__mmask8) __builtin_ia32_pcmpeqq128_mask ((__v2di) __A,
+ (__v2di) __B,
+ (__mmask8) -1);
+}
+
+extern __inline __mmask8
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_mask_cmpeq_epi64_mask (__mmask8 __U, __m128i __A, __m128i __B)
+{
+ return (__mmask8) __builtin_ia32_pcmpeqq128_mask ((__v2di) __A,
+ (__v2di) __B, __U);
+}
+
+extern __inline __mmask8
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_cmpeq_epi64_mask (__m256i __A, __m256i __B)
+{
+ return (__mmask8) __builtin_ia32_pcmpeqq256_mask ((__v4di) __A,
+ (__v4di) __B,
+ (__mmask8) -1);
+}
+
+extern __inline __mmask8
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_mask_cmpeq_epi64_mask (__mmask8 __U, __m256i __A, __m256i __B)
+{
+ return (__mmask8) __builtin_ia32_pcmpeqq256_mask ((__v4di) __A,
+ (__v4di) __B, __U);
+}
+
+extern __inline __mmask8
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_cmpgt_epi32_mask (__m128i __A, __m128i __B)
+{
+ return (__mmask8) __builtin_ia32_pcmpgtd128_mask ((__v4si) __A,
+ (__v4si) __B,
+ (__mmask8) -1);
+}
+
+extern __inline __mmask8
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_mask_cmpgt_epi32_mask (__mmask8 __U, __m128i __A, __m128i __B)
+{
+ return (__mmask8) __builtin_ia32_pcmpgtd128_mask ((__v4si) __A,
+ (__v4si) __B, __U);
+}
+
+extern __inline __mmask8
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_cmpgt_epi32_mask (__m256i __A, __m256i __B)
+{
+ return (__mmask8) __builtin_ia32_pcmpgtd256_mask ((__v8si) __A,
+ (__v8si) __B,
+ (__mmask8) -1);
+}
+
+extern __inline __mmask8
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_mask_cmpgt_epi32_mask (__mmask8 __U, __m256i __A, __m256i __B)
+{
+ return (__mmask8) __builtin_ia32_pcmpgtd256_mask ((__v8si) __A,
+ (__v8si) __B, __U);
+}
+
+extern __inline __mmask8
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_cmpgt_epi64_mask (__m128i __A, __m128i __B)
+{
+ return (__mmask8) __builtin_ia32_pcmpgtq128_mask ((__v2di) __A,
+ (__v2di) __B,
+ (__mmask8) -1);
+}
+
+extern __inline __mmask8
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_mask_cmpgt_epi64_mask (__mmask8 __U, __m128i __A, __m128i __B)
+{
+ return (__mmask8) __builtin_ia32_pcmpgtq128_mask ((__v2di) __A,
+ (__v2di) __B, __U);
+}
+
+extern __inline __mmask8
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_cmpgt_epi64_mask (__m256i __A, __m256i __B)
+{
+ return (__mmask8) __builtin_ia32_pcmpgtq256_mask ((__v4di) __A,
+ (__v4di) __B,
+ (__mmask8) -1);
+}
+
+extern __inline __mmask8
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_mask_cmpgt_epi64_mask (__mmask8 __U, __m256i __A, __m256i __B)
+{
+ return (__mmask8) __builtin_ia32_pcmpgtq256_mask ((__v4di) __A,
+ (__v4di) __B, __U);
+}
+
+extern __inline __mmask8
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_test_epi32_mask (__m128i __A, __m128i __B)
+{
+ return (__mmask8) __builtin_ia32_ptestmd128 ((__v4si) __A,
+ (__v4si) __B,
+ (__mmask8) -1);
+}
+
+extern __inline __mmask8
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_mask_test_epi32_mask (__mmask8 __U, __m128i __A, __m128i __B)
+{
+ return (__mmask8) __builtin_ia32_ptestmd128 ((__v4si) __A,
+ (__v4si) __B, __U);
+}
+
+extern __inline __mmask8
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_test_epi32_mask (__m256i __A, __m256i __B)
+{
+ return (__mmask8) __builtin_ia32_ptestmd256 ((__v8si) __A,
+ (__v8si) __B,
+ (__mmask8) -1);
+}
+
+extern __inline __mmask8
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_mask_test_epi32_mask (__mmask8 __U, __m256i __A, __m256i __B)
+{
+ return (__mmask8) __builtin_ia32_ptestmd256 ((__v8si) __A,
+ (__v8si) __B, __U);
+}
+
+extern __inline __mmask8
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_test_epi64_mask (__m128i __A, __m128i __B)
+{
+ return (__mmask8) __builtin_ia32_ptestmq128 ((__v2di) __A,
+ (__v2di) __B,
+ (__mmask8) -1);
+}
+
+extern __inline __mmask8
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_mask_test_epi64_mask (__mmask8 __U, __m128i __A, __m128i __B)
+{
+ return (__mmask8) __builtin_ia32_ptestmq128 ((__v2di) __A,
+ (__v2di) __B, __U);
+}
+
+extern __inline __mmask8
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_test_epi64_mask (__m256i __A, __m256i __B)
+{
+ return (__mmask8) __builtin_ia32_ptestmq256 ((__v4di) __A,
+ (__v4di) __B,
+ (__mmask8) -1);
+}
+
+extern __inline __mmask8
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_mask_test_epi64_mask (__mmask8 __U, __m256i __A, __m256i __B)
+{
+ return (__mmask8) __builtin_ia32_ptestmq256 ((__v4di) __A,
+ (__v4di) __B, __U);
+}
+
+extern __inline __mmask8
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_testn_epi32_mask (__m128i __A, __m128i __B)
+{
+ return (__mmask8) __builtin_ia32_ptestnmd128 ((__v4si) __A,
+ (__v4si) __B,
+ (__mmask8) -1);
+}
+
+extern __inline __mmask8
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_mask_testn_epi32_mask (__mmask8 __U, __m128i __A, __m128i __B)
+{
+ return (__mmask8) __builtin_ia32_ptestnmd128 ((__v4si) __A,
+ (__v4si) __B, __U);
+}
+
+extern __inline __mmask8
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_testn_epi32_mask (__m256i __A, __m256i __B)
+{
+ return (__mmask8) __builtin_ia32_ptestnmd256 ((__v8si) __A,
+ (__v8si) __B,
+ (__mmask8) -1);
+}
+
+extern __inline __mmask8
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_mask_testn_epi32_mask (__mmask8 __U, __m256i __A, __m256i __B)
+{
+ return (__mmask8) __builtin_ia32_ptestnmd256 ((__v8si) __A,
+ (__v8si) __B, __U);
+}
+
+extern __inline __mmask8
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_testn_epi64_mask (__m128i __A, __m128i __B)
+{
+ return (__mmask8) __builtin_ia32_ptestnmq128 ((__v2di) __A,
+ (__v2di) __B,
+ (__mmask8) -1);
+}
+
+extern __inline __mmask8
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_mask_testn_epi64_mask (__mmask8 __U, __m128i __A, __m128i __B)
+{
+ return (__mmask8) __builtin_ia32_ptestnmq128 ((__v2di) __A,
+ (__v2di) __B, __U);
+}
+
+extern __inline __mmask8
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_testn_epi64_mask (__m256i __A, __m256i __B)
+{
+ return (__mmask8) __builtin_ia32_ptestnmq256 ((__v4di) __A,
+ (__v4di) __B,
+ (__mmask8) -1);
+}
+
+extern __inline __mmask8
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_mask_testn_epi64_mask (__mmask8 __U, __m256i __A, __m256i __B)
+{
+ return (__mmask8) __builtin_ia32_ptestnmq256 ((__v4di) __A,
+ (__v4di) __B, __U);
+}
+
+extern __inline __m256d
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_mask_compress_pd (__m256d __W, __mmask8 __U, __m256d __A)
+{
+ return (__m256d) __builtin_ia32_compressdf256_mask ((__v4df) __A,
+ (__v4df) __W,
+ (__mmask8) __U);
+}
+
+extern __inline __m256d
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_maskz_compress_pd (__mmask8 __U, __m256d __A)
+{
+ return (__m256d) __builtin_ia32_compressdf256_mask ((__v4df) __A,
+ (__v4df)
+ _mm256_setzero_pd (),
+ (__mmask8) __U);
+}
+
+extern __inline void
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_mask_compressstoreu_pd (void *__P, __mmask8 __U, __m256d __A)
+{
+ __builtin_ia32_compressstoredf256_mask ((__v4df *) __P,
+ (__v4df) __A,
+ (__mmask8) __U);
+}
+
+extern __inline __m128d
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_mask_compress_pd (__m128d __W, __mmask8 __U, __m128d __A)
+{
+ return (__m128d) __builtin_ia32_compressdf128_mask ((__v2df) __A,
+ (__v2df) __W,
+ (__mmask8) __U);
+}
+
+extern __inline __m128d
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_maskz_compress_pd (__mmask8 __U, __m128d __A)
+{
+ return (__m128d) __builtin_ia32_compressdf128_mask ((__v2df) __A,
+ (__v2df)
+ _mm_setzero_pd (),
+ (__mmask8) __U);
+}
+
+extern __inline void
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_mask_compressstoreu_pd (void *__P, __mmask8 __U, __m128d __A)
+{
+ __builtin_ia32_compressstoredf128_mask ((__v2df *) __P,
+ (__v2df) __A,
+ (__mmask8) __U);
+}
+
+extern __inline __m256
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_mask_compress_ps (__m256 __W, __mmask8 __U, __m256 __A)
+{
+ return (__m256) __builtin_ia32_compresssf256_mask ((__v8sf) __A,
+ (__v8sf) __W,
+ (__mmask8) __U);
+}
+
+extern __inline __m256
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_maskz_compress_ps (__mmask8 __U, __m256 __A)
+{
+ return (__m256) __builtin_ia32_compresssf256_mask ((__v8sf) __A,
+ (__v8sf)
+ _mm256_setzero_ps (),
+ (__mmask8) __U);
+}
+
+extern __inline void
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_mask_compressstoreu_ps (void *__P, __mmask8 __U, __m256 __A)
+{
+ __builtin_ia32_compressstoresf256_mask ((__v8sf *) __P,
+ (__v8sf) __A,
+ (__mmask8) __U);
+}
+
+extern __inline __m128
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_mask_compress_ps (__m128 __W, __mmask8 __U, __m128 __A)
+{
+ return (__m128) __builtin_ia32_compresssf128_mask ((__v4sf) __A,
+ (__v4sf) __W,
+ (__mmask8) __U);
+}
+
+extern __inline __m128
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_maskz_compress_ps (__mmask8 __U, __m128 __A)
+{
+ return (__m128) __builtin_ia32_compresssf128_mask ((__v4sf) __A,
+ (__v4sf)
+ _mm_setzero_ps (),
+ (__mmask8) __U);
+}
+
+extern __inline void
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_mask_compressstoreu_ps (void *__P, __mmask8 __U, __m128 __A)
+{
+ __builtin_ia32_compressstoresf128_mask ((__v4sf *) __P,
+ (__v4sf) __A,
+ (__mmask8) __U);
+}
+
+extern __inline __m256i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_mask_compress_epi64 (__m256i __W, __mmask8 __U, __m256i __A)
+{
+ return (__m256i) __builtin_ia32_compressdi256_mask ((__v4di) __A,
+ (__v4di) __W,
+ (__mmask8) __U);
+}
+
+extern __inline __m256i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_maskz_compress_epi64 (__mmask8 __U, __m256i __A)
+{
+ return (__m256i) __builtin_ia32_compressdi256_mask ((__v4di) __A,
+ (__v4di)
+ _mm256_setzero_si256 (),
+ (__mmask8) __U);
+}
+
+extern __inline void
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_mask_compressstoreu_epi64 (void *__P, __mmask8 __U, __m256i __A)
+{
+ __builtin_ia32_compressstoredi256_mask ((__v4di *) __P,
+ (__v4di) __A,
+ (__mmask8) __U);
+}
+
+extern __inline __m128i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_mask_compress_epi64 (__m128i __W, __mmask8 __U, __m128i __A)
+{
+ return (__m128i) __builtin_ia32_compressdi128_mask ((__v2di) __A,
+ (__v2di) __W,
+ (__mmask8) __U);
+}
+
+extern __inline __m128i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_maskz_compress_epi64 (__mmask8 __U, __m128i __A)
+{
+ return (__m128i) __builtin_ia32_compressdi128_mask ((__v2di) __A,
+ (__v2di)
+ _mm_setzero_di (),
+ (__mmask8) __U);
+}
+
+extern __inline void
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_mask_compressstoreu_epi64 (void *__P, __mmask8 __U, __m128i __A)
+{
+ __builtin_ia32_compressstoredi128_mask ((__v2di *) __P,
+ (__v2di) __A,
+ (__mmask8) __U);
+}
+
+extern __inline __m256i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_mask_compress_epi32 (__m256i __W, __mmask8 __U, __m256i __A)
+{
+ return (__m256i) __builtin_ia32_compresssi256_mask ((__v8si) __A,
+ (__v8si) __W,
+ (__mmask8) __U);
+}
+
+extern __inline __m256i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_maskz_compress_epi32 (__mmask8 __U, __m256i __A)
+{
+ return (__m256i) __builtin_ia32_compresssi256_mask ((__v8si) __A,
+ (__v8si)
+ _mm256_setzero_si256 (),
+ (__mmask8) __U);
+}
+
+extern __inline void
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_mask_compressstoreu_epi32 (void *__P, __mmask8 __U, __m256i __A)
+{
+ __builtin_ia32_compressstoresi256_mask ((__v8si *) __P,
+ (__v8si) __A,
+ (__mmask8) __U);
+}
+
+extern __inline __m128i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_mask_compress_epi32 (__m128i __W, __mmask8 __U, __m128i __A)
+{
+ return (__m128i) __builtin_ia32_compresssi128_mask ((__v4si) __A,
+ (__v4si) __W,
+ (__mmask8) __U);
+}
+
+extern __inline __m128i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_maskz_compress_epi32 (__mmask8 __U, __m128i __A)
+{
+ return (__m128i) __builtin_ia32_compresssi128_mask ((__v4si) __A,
+ (__v4si)
+ _mm_setzero_si128 (),
+ (__mmask8) __U);
+}
+
+extern __inline void
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_mask_compressstoreu_epi32 (void *__P, __mmask8 __U, __m128i __A)
+{
+ __builtin_ia32_compressstoresi128_mask ((__v4si *) __P,
+ (__v4si) __A,
+ (__mmask8) __U);
+}
+
+extern __inline __m256d
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_mask_expand_pd (__m256d __W, __mmask8 __U, __m256d __A)
+{
+ return (__m256d) __builtin_ia32_expanddf256_mask ((__v4df) __A,
+ (__v4df) __W,
+ (__mmask8) __U);
+}
+
+extern __inline __m256d
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_maskz_expand_pd (__mmask8 __U, __m256d __A)
+{
+ return (__m256d) __builtin_ia32_expanddf256_maskz ((__v4df) __A,
+ (__v4df)
+ _mm256_setzero_pd (),
+ (__mmask8) __U);
+}
+
+extern __inline __m256d
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_mask_expandloadu_pd (__m256d __W, __mmask8 __U, void const *__P)
+{
+ return (__m256d) __builtin_ia32_expandloaddf256_mask ((__v4df *) __P,
+ (__v4df) __W,
+ (__mmask8)
+ __U);
+}
+
+extern __inline __m256d
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_maskz_expandloadu_pd (__mmask8 __U, void const *__P)
+{
+ return (__m256d) __builtin_ia32_expandloaddf256_maskz ((__v4df *) __P,
+ (__v4df)
+ _mm256_setzero_pd (),
+ (__mmask8)
+ __U);
+}
+
+extern __inline __m128d
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_mask_expand_pd (__m128d __W, __mmask8 __U, __m128d __A)
+{
+ return (__m128d) __builtin_ia32_expanddf128_mask ((__v2df) __A,
+ (__v2df) __W,
+ (__mmask8) __U);
+}
+
+extern __inline __m128d
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_maskz_expand_pd (__mmask8 __U, __m128d __A)
+{
+ return (__m128d) __builtin_ia32_expanddf128_maskz ((__v2df) __A,
+ (__v2df)
+ _mm_setzero_pd (),
+ (__mmask8) __U);
+}
+
+extern __inline __m128d
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_mask_expandloadu_pd (__m128d __W, __mmask8 __U, void const *__P)
+{
+ return (__m128d) __builtin_ia32_expandloaddf128_mask ((__v2df *) __P,
+ (__v2df) __W,
+ (__mmask8)
+ __U);
+}
+
+extern __inline __m128d
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_maskz_expandloadu_pd (__mmask8 __U, void const *__P)
+{
+ return (__m128d) __builtin_ia32_expandloaddf128_maskz ((__v2df *) __P,
+ (__v2df)
+ _mm_setzero_pd (),
+ (__mmask8)
+ __U);
+}
+
+extern __inline __m256
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_mask_expand_ps (__m256 __W, __mmask8 __U, __m256 __A)
+{
+ return (__m256) __builtin_ia32_expandsf256_mask ((__v8sf) __A,
+ (__v8sf) __W,
+ (__mmask8) __U);
+}
+
+extern __inline __m256
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_maskz_expand_ps (__mmask8 __U, __m256 __A)
+{
+ return (__m256) __builtin_ia32_expandsf256_maskz ((__v8sf) __A,
+ (__v8sf)
+ _mm256_setzero_ps (),
+ (__mmask8) __U);
+}
+
+extern __inline __m256
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_mask_expandloadu_ps (__m256 __W, __mmask8 __U, void const *__P)
+{
+ return (__m256) __builtin_ia32_expandloadsf256_mask ((__v8sf *) __P,
+ (__v8sf) __W,
+ (__mmask8) __U);
+}
+
+extern __inline __m256
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_maskz_expandloadu_ps (__mmask8 __U, void const *__P)
+{
+ return (__m256) __builtin_ia32_expandloadsf256_maskz ((__v8sf *) __P,
+ (__v8sf)
+ _mm256_setzero_ps (),
+ (__mmask8)
+ __U);
+}
+
+extern __inline __m128
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_mask_expand_ps (__m128 __W, __mmask8 __U, __m128 __A)
+{
+ return (__m128) __builtin_ia32_expandsf128_mask ((__v4sf) __A,
+ (__v4sf) __W,
+ (__mmask8) __U);
+}
+
+extern __inline __m128
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_maskz_expand_ps (__mmask8 __U, __m128 __A)
+{
+ return (__m128) __builtin_ia32_expandsf128_maskz ((__v4sf) __A,
+ (__v4sf)
+ _mm_setzero_ps (),
+ (__mmask8) __U);
+}
+
+extern __inline __m128
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_mask_expandloadu_ps (__m128 __W, __mmask8 __U, void const *__P)
+{
+ return (__m128) __builtin_ia32_expandloadsf128_mask ((__v4sf *) __P,
+ (__v4sf) __W,
+ (__mmask8) __U);
+}
+
+extern __inline __m128
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_maskz_expandloadu_ps (__mmask8 __U, void const *__P)
+{
+ return (__m128) __builtin_ia32_expandloadsf128_maskz ((__v4sf *) __P,
+ (__v4sf)
+ _mm_setzero_ps (),
+ (__mmask8)
+ __U);
+}
+
+extern __inline __m256i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_mask_expand_epi64 (__m256i __W, __mmask8 __U, __m256i __A)
+{
+ return (__m256i) __builtin_ia32_expanddi256_mask ((__v4di) __A,
+ (__v4di) __W,
+ (__mmask8) __U);
+}
+
+extern __inline __m256i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_maskz_expand_epi64 (__mmask8 __U, __m256i __A)
+{
+ return (__m256i) __builtin_ia32_expanddi256_maskz ((__v4di) __A,
+ (__v4di)
+ _mm256_setzero_si256 (),
+ (__mmask8) __U);
+}
+
+extern __inline __m256i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_mask_expandloadu_epi64 (__m256i __W, __mmask8 __U,
+ void const *__P)
+{
+ return (__m256i) __builtin_ia32_expandloaddi256_mask ((__v4di *) __P,
+ (__v4di) __W,
+ (__mmask8)
+ __U);
+}
+
+extern __inline __m256i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_maskz_expandloadu_epi64 (__mmask8 __U, void const *__P)
+{
+ return (__m256i) __builtin_ia32_expandloaddi256_maskz ((__v4di *) __P,
+ (__v4di)
+ _mm256_setzero_si256 (),
+ (__mmask8)
+ __U);
+}
+
+extern __inline __m128i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_mask_expand_epi64 (__m128i __W, __mmask8 __U, __m128i __A)
+{
+ return (__m128i) __builtin_ia32_expanddi128_mask ((__v2di) __A,
+ (__v2di) __W,
+ (__mmask8) __U);
+}
+
+extern __inline __m128i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_maskz_expand_epi64 (__mmask8 __U, __m128i __A)
+{
+ return (__m128i) __builtin_ia32_expanddi128_maskz ((__v2di) __A,
+ (__v2di)
+ _mm_setzero_si128 (),
+ (__mmask8) __U);
+}
+
+extern __inline __m128i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_mask_expandloadu_epi64 (__m128i __W, __mmask8 __U, void const *__P)
+{
+ return (__m128i) __builtin_ia32_expandloaddi128_mask ((__v2di *) __P,
+ (__v2di) __W,
+ (__mmask8)
+ __U);
+}
+
+extern __inline __m128i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_maskz_expandloadu_epi64 (__mmask8 __U, void const *__P)
+{
+ return (__m128i) __builtin_ia32_expandloaddi128_maskz ((__v2di *) __P,
+ (__v2di)
+ _mm_setzero_si128 (),
+ (__mmask8)
+ __U);
+}
+
+extern __inline __m256i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_mask_expand_epi32 (__m256i __W, __mmask8 __U, __m256i __A)
+{
+ return (__m256i) __builtin_ia32_expandsi256_mask ((__v8si) __A,
+ (__v8si) __W,
+ (__mmask8) __U);
+}
+
+extern __inline __m256i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_maskz_expand_epi32 (__mmask8 __U, __m256i __A)
+{
+ return (__m256i) __builtin_ia32_expandsi256_maskz ((__v8si) __A,
+ (__v8si)
+ _mm256_setzero_si256 (),
+ (__mmask8) __U);
+}
+
+extern __inline __m256i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_mask_expandloadu_epi32 (__m256i __W, __mmask8 __U,
+ void const *__P)
+{
+ return (__m256i) __builtin_ia32_expandloadsi256_mask ((__v8si *) __P,
+ (__v8si) __W,
+ (__mmask8)
+ __U);
+}
+
+extern __inline __m256i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_maskz_expandloadu_epi32 (__mmask8 __U, void const *__P)
+{
+ return (__m256i) __builtin_ia32_expandloadsi256_maskz ((__v8si *) __P,
+ (__v8si)
+ _mm256_setzero_si256 (),
+ (__mmask8)
+ __U);
+}
+
+extern __inline __m128i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_mask_expand_epi32 (__m128i __W, __mmask8 __U, __m128i __A)
+{
+ return (__m128i) __builtin_ia32_expandsi128_mask ((__v4si) __A,
+ (__v4si) __W,
+ (__mmask8) __U);
+}
+
+extern __inline __m128i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_maskz_expand_epi32 (__mmask8 __U, __m128i __A)
+{
+ return (__m128i) __builtin_ia32_expandsi128_maskz ((__v4si) __A,
+ (__v4si)
+ _mm_setzero_si128 (),
+ (__mmask8) __U);
+}
+
+extern __inline __m128i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_mask_expandloadu_epi32 (__m128i __W, __mmask8 __U, void const *__P)
+{
+ return (__m128i) __builtin_ia32_expandloadsi128_mask ((__v4si *) __P,
+ (__v4si) __W,
+ (__mmask8)
+ __U);
+}
+
+extern __inline __m128i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_maskz_expandloadu_epi32 (__mmask8 __U, void const *__P)
+{
+ return (__m128i) __builtin_ia32_expandloadsi128_maskz ((__v4si *) __P,
+ (__v4si)
+ _mm_setzero_si128 (),
+ (__mmask8)
+ __U);
+}
+
+extern __inline __m256d
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_permutex2var_pd (__m256d __A, __m256i __I, __m256d __B)
+{
+ return (__m256d) __builtin_ia32_vpermt2varpd256_mask ((__v4di) __I
+ /* idx */ ,
+ (__v4df) __A,
+ (__v4df) __B,
+ (__mmask8) -
+ 1);
+}
+
+extern __inline __m256d
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_mask_permutex2var_pd (__m256d __A, __mmask8 __U, __m256i __I,
+ __m256d __B)
+{
+ return (__m256d) __builtin_ia32_vpermt2varpd256_mask ((__v4di) __I
+ /* idx */ ,
+ (__v4df) __A,
+ (__v4df) __B,
+ (__mmask8)
+ __U);
+}
+
+extern __inline __m256d
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_mask2_permutex2var_pd (__m256d __A, __m256i __I, __mmask8 __U,
+ __m256d __B)
+{
+ return (__m256d) __builtin_ia32_vpermi2varpd256_mask ((__v4df) __A,
+ (__v4di) __I
+ /* idx */ ,
+ (__v4df) __B,
+ (__mmask8)
+ __U);
+}
+
+extern __inline __m256d
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_maskz_permutex2var_pd (__mmask8 __U, __m256d __A, __m256i __I,
+ __m256d __B)
+{
+ return (__m256d) __builtin_ia32_vpermt2varpd256_maskz ((__v4di) __I
+ /* idx */ ,
+ (__v4df) __A,
+ (__v4df) __B,
+ (__mmask8)
+ __U);
+}
+
+extern __inline __m256
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_permutex2var_ps (__m256 __A, __m256i __I, __m256 __B)
+{
+ return (__m256) __builtin_ia32_vpermt2varps256_mask ((__v8si) __I
+ /* idx */ ,
+ (__v8sf) __A,
+ (__v8sf) __B,
+ (__mmask8) -1);
+}
+
+extern __inline __m256
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_mask_permutex2var_ps (__m256 __A, __mmask8 __U, __m256i __I,
+ __m256 __B)
+{
+ return (__m256) __builtin_ia32_vpermt2varps256_mask ((__v8si) __I
+ /* idx */ ,
+ (__v8sf) __A,
+ (__v8sf) __B,
+ (__mmask8) __U);
+}
+
+extern __inline __m256
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_mask2_permutex2var_ps (__m256 __A, __m256i __I, __mmask8 __U,
+ __m256 __B)
+{
+ return (__m256) __builtin_ia32_vpermi2varps256_mask ((__v8sf) __A,
+ (__v8si) __I
+ /* idx */ ,
+ (__v8sf) __B,
+ (__mmask8) __U);
+}
+
+extern __inline __m256
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_maskz_permutex2var_ps (__mmask8 __U, __m256 __A, __m256i __I,
+ __m256 __B)
+{
+ return (__m256) __builtin_ia32_vpermt2varps256_maskz ((__v8si) __I
+ /* idx */ ,
+ (__v8sf) __A,
+ (__v8sf) __B,
+ (__mmask8)
+ __U);
+}
+
+extern __inline __m128i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_permutex2var_epi64 (__m128i __A, __m128i __I, __m128i __B)
+{
+ return (__m128i) __builtin_ia32_vpermt2varq128_mask ((__v2di) __I
+ /* idx */ ,
+ (__v2di) __A,
+ (__v2di) __B,
+ (__mmask8) -1);
+}
+
+extern __inline __m128i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_mask_permutex2var_epi64 (__m128i __A, __mmask8 __U, __m128i __I,
+ __m128i __B)
+{
+ return (__m128i) __builtin_ia32_vpermt2varq128_mask ((__v2di) __I
+ /* idx */ ,
+ (__v2di) __A,
+ (__v2di) __B,
+ (__mmask8) __U);
+}
+
+extern __inline __m128i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_mask2_permutex2var_epi64 (__m128i __A, __m128i __I, __mmask8 __U,
+ __m128i __B)
+{
+ return (__m128i) __builtin_ia32_vpermi2varq128_mask ((__v2di) __A,
+ (__v2di) __I
+ /* idx */ ,
+ (__v2di) __B,
+ (__mmask8) __U);
+}
+
+extern __inline __m128i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_maskz_permutex2var_epi64 (__mmask8 __U, __m128i __A, __m128i __I,
+ __m128i __B)
+{
+ return (__m128i) __builtin_ia32_vpermt2varq128_maskz ((__v2di) __I
+ /* idx */ ,
+ (__v2di) __A,
+ (__v2di) __B,
+ (__mmask8)
+ __U);
+}
+
+extern __inline __m128i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_permutex2var_epi32 (__m128i __A, __m128i __I, __m128i __B)
+{
+ return (__m128i) __builtin_ia32_vpermt2vard128_mask ((__v4si) __I
+ /* idx */ ,
+ (__v4si) __A,
+ (__v4si) __B,
+ (__mmask8) -1);
+}
+
+extern __inline __m128i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_mask_permutex2var_epi32 (__m128i __A, __mmask8 __U, __m128i __I,
+ __m128i __B)
+{
+ return (__m128i) __builtin_ia32_vpermt2vard128_mask ((__v4si) __I
+ /* idx */ ,
+ (__v4si) __A,
+ (__v4si) __B,
+ (__mmask8) __U);
+}
+
+extern __inline __m128i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_mask2_permutex2var_epi32 (__m128i __A, __m128i __I, __mmask8 __U,
+ __m128i __B)
+{
+ return (__m128i) __builtin_ia32_vpermi2vard128_mask ((__v4si) __A,
+ (__v4si) __I
+ /* idx */ ,
+ (__v4si) __B,
+ (__mmask8) __U);
+}
+
+extern __inline __m128i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_maskz_permutex2var_epi32 (__mmask8 __U, __m128i __A, __m128i __I,
+ __m128i __B)
+{
+ return (__m128i) __builtin_ia32_vpermt2vard128_maskz ((__v4si) __I
+ /* idx */ ,
+ (__v4si) __A,
+ (__v4si) __B,
+ (__mmask8)
+ __U);
+}
+
+extern __inline __m256i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_permutex2var_epi64 (__m256i __A, __m256i __I, __m256i __B)
+{
+ return (__m256i) __builtin_ia32_vpermt2varq256_mask ((__v4di) __I
+ /* idx */ ,
+ (__v4di) __A,
+ (__v4di) __B,
+ (__mmask8) -1);
+}
+
+extern __inline __m256i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_mask_permutex2var_epi64 (__m256i __A, __mmask8 __U, __m256i __I,
+ __m256i __B)
+{
+ return (__m256i) __builtin_ia32_vpermt2varq256_mask ((__v4di) __I
+ /* idx */ ,
+ (__v4di) __A,
+ (__v4di) __B,
+ (__mmask8) __U);
+}
+
+extern __inline __m256i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_mask2_permutex2var_epi64 (__m256i __A, __m256i __I,
+ __mmask8 __U, __m256i __B)
+{
+ return (__m256i) __builtin_ia32_vpermi2varq256_mask ((__v4di) __A,
+ (__v4di) __I
+ /* idx */ ,
+ (__v4di) __B,
+ (__mmask8) __U);
+}
+
+extern __inline __m256i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_maskz_permutex2var_epi64 (__mmask8 __U, __m256i __A,
+ __m256i __I, __m256i __B)
+{
+ return (__m256i) __builtin_ia32_vpermt2varq256_maskz ((__v4di) __I
+ /* idx */ ,
+ (__v4di) __A,
+ (__v4di) __B,
+ (__mmask8)
+ __U);
+}
+
+extern __inline __m256i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_permutex2var_epi32 (__m256i __A, __m256i __I, __m256i __B)
+{
+ return (__m256i) __builtin_ia32_vpermt2vard256_mask ((__v8si) __I
+ /* idx */ ,
+ (__v8si) __A,
+ (__v8si) __B,
+ (__mmask8) -1);
+}
+
+extern __inline __m256i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_mask_permutex2var_epi32 (__m256i __A, __mmask8 __U, __m256i __I,
+ __m256i __B)
+{
+ return (__m256i) __builtin_ia32_vpermt2vard256_mask ((__v8si) __I
+ /* idx */ ,
+ (__v8si) __A,
+ (__v8si) __B,
+ (__mmask8) __U);
+}
+
+extern __inline __m256i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_mask2_permutex2var_epi32 (__m256i __A, __m256i __I,
+ __mmask8 __U, __m256i __B)
+{
+ return (__m256i) __builtin_ia32_vpermi2vard256_mask ((__v8si) __A,
+ (__v8si) __I
+ /* idx */ ,
+ (__v8si) __B,
+ (__mmask8) __U);
+}
+
+extern __inline __m256i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_maskz_permutex2var_epi32 (__mmask8 __U, __m256i __A,
+ __m256i __I, __m256i __B)
+{
+ return (__m256i) __builtin_ia32_vpermt2vard256_maskz ((__v8si) __I
+ /* idx */ ,
+ (__v8si) __A,
+ (__v8si) __B,
+ (__mmask8)
+ __U);
+}
+
+extern __inline __m128d
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_permutex2var_pd (__m128d __A, __m128i __I, __m128d __B)
+{
+ return (__m128d) __builtin_ia32_vpermt2varpd128_mask ((__v2di) __I
+ /* idx */ ,
+ (__v2df) __A,
+ (__v2df) __B,
+ (__mmask8) -
+ 1);
+}
+
+extern __inline __m128d
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_mask_permutex2var_pd (__m128d __A, __mmask8 __U, __m128i __I,
+ __m128d __B)
+{
+ return (__m128d) __builtin_ia32_vpermt2varpd128_mask ((__v2di) __I
+ /* idx */ ,
+ (__v2df) __A,
+ (__v2df) __B,
+ (__mmask8)
+ __U);
+}
+
+extern __inline __m128d
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_mask2_permutex2var_pd (__m128d __A, __m128i __I, __mmask8 __U,
+ __m128d __B)
+{
+ return (__m128d) __builtin_ia32_vpermi2varpd128_mask ((__v2df) __A,
+ (__v2di) __I
+ /* idx */ ,
+ (__v2df) __B,
+ (__mmask8)
+ __U);
+}
+
+extern __inline __m128d
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_maskz_permutex2var_pd (__mmask8 __U, __m128d __A, __m128i __I,
+ __m128d __B)
+{
+ return (__m128d) __builtin_ia32_vpermt2varpd128_maskz ((__v2di) __I
+ /* idx */ ,
+ (__v2df) __A,
+ (__v2df) __B,
+ (__mmask8)
+ __U);
+}
+
+extern __inline __m128
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_permutex2var_ps (__m128 __A, __m128i __I, __m128 __B)
+{
+ return (__m128) __builtin_ia32_vpermt2varps128_mask ((__v4si) __I
+ /* idx */ ,
+ (__v4sf) __A,
+ (__v4sf) __B,
+ (__mmask8) -1);
+}
+
+extern __inline __m128
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_mask_permutex2var_ps (__m128 __A, __mmask8 __U, __m128i __I,
+ __m128 __B)
+{
+ return (__m128) __builtin_ia32_vpermt2varps128_mask ((__v4si) __I
+ /* idx */ ,
+ (__v4sf) __A,
+ (__v4sf) __B,
+ (__mmask8) __U);
+}
+
+extern __inline __m128
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_mask2_permutex2var_ps (__m128 __A, __m128i __I, __mmask8 __U,
+ __m128 __B)
+{
+ return (__m128) __builtin_ia32_vpermi2varps128_mask ((__v4sf) __A,
+ (__v4si) __I
+ /* idx */ ,
+ (__v4sf) __B,
+ (__mmask8) __U);
+}
+
+extern __inline __m128
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_maskz_permutex2var_ps (__mmask8 __U, __m128 __A, __m128i __I,
+ __m128 __B)
+{
+ return (__m128) __builtin_ia32_vpermt2varps128_maskz ((__v4si) __I
+ /* idx */ ,
+ (__v4sf) __A,
+ (__v4sf) __B,
+ (__mmask8)
+ __U);
+}
+
+extern __inline __m128i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_srav_epi64 (__m128i __X, __m128i __Y)
+{
+ return (__m128i) __builtin_ia32_psravq128_mask ((__v2di) __X,
+ (__v2di) __Y,
+ (__v2di)
+ _mm_setzero_di (),
+ (__mmask8) -1);
+}
+
+extern __inline __m128i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_mask_srav_epi64 (__m128i __W, __mmask8 __U, __m128i __X,
+ __m128i __Y)
+{
+ return (__m128i) __builtin_ia32_psravq128_mask ((__v2di) __X,
+ (__v2di) __Y,
+ (__v2di) __W,
+ (__mmask8) __U);
+}
+
+extern __inline __m128i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_maskz_srav_epi64 (__mmask8 __U, __m128i __X, __m128i __Y)
+{
+ return (__m128i) __builtin_ia32_psravq128_mask ((__v2di) __X,
+ (__v2di) __Y,
+ (__v2di)
+ _mm_setzero_di (),
+ (__mmask8) __U);
+}
+
+extern __inline __m256i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_mask_sllv_epi32 (__m256i __W, __mmask8 __U, __m256i __X,
+ __m256i __Y)
+{
+ return (__m256i) __builtin_ia32_psllv8si_mask ((__v8si) __X,
+ (__v8si) __Y,
+ (__v8si) __W,
+ (__mmask8) __U);
+}
+
+extern __inline __m256i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_maskz_sllv_epi32 (__mmask8 __U, __m256i __X, __m256i __Y)
+{
+ return (__m256i) __builtin_ia32_psllv8si_mask ((__v8si) __X,
+ (__v8si) __Y,
+ (__v8si)
+ _mm256_setzero_si256 (),
+ (__mmask8) __U);
+}
+
+extern __inline __m128i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_mask_sllv_epi32 (__m128i __W, __mmask8 __U, __m128i __X,
+ __m128i __Y)
+{
+ return (__m128i) __builtin_ia32_psllv4si_mask ((__v4si) __X,
+ (__v4si) __Y,
+ (__v4si) __W,
+ (__mmask8) __U);
+}
+
+extern __inline __m128i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_maskz_sllv_epi32 (__mmask8 __U, __m128i __X, __m128i __Y)
+{
+ return (__m128i) __builtin_ia32_psllv4si_mask ((__v4si) __X,
+ (__v4si) __Y,
+ (__v4si)
+ _mm_setzero_si128 (),
+ (__mmask8) __U);
+}
+
+extern __inline __m256i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_mask_sllv_epi64 (__m256i __W, __mmask8 __U, __m256i __X,
+ __m256i __Y)
+{
+ return (__m256i) __builtin_ia32_psllv4di_mask ((__v4di) __X,
+ (__v4di) __Y,
+ (__v4di) __W,
+ (__mmask8) __U);
+}
+
+extern __inline __m256i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_maskz_sllv_epi64 (__mmask8 __U, __m256i __X, __m256i __Y)
+{
+ return (__m256i) __builtin_ia32_psllv4di_mask ((__v4di) __X,
+ (__v4di) __Y,
+ (__v4di)
+ _mm256_setzero_si256 (),
+ (__mmask8) __U);
+}
+
+extern __inline __m128i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_mask_sllv_epi64 (__m128i __W, __mmask8 __U, __m128i __X,
+ __m128i __Y)
+{
+ return (__m128i) __builtin_ia32_psllv2di_mask ((__v2di) __X,
+ (__v2di) __Y,
+ (__v2di) __W,
+ (__mmask8) __U);
+}
+
+extern __inline __m128i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_maskz_sllv_epi64 (__mmask8 __U, __m128i __X, __m128i __Y)
+{
+ return (__m128i) __builtin_ia32_psllv2di_mask ((__v2di) __X,
+ (__v2di) __Y,
+ (__v2di)
+ _mm_setzero_di (),
+ (__mmask8) __U);
+}
+
+extern __inline __m256i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_mask_srav_epi32 (__m256i __W, __mmask8 __U, __m256i __X,
+ __m256i __Y)
+{
+ return (__m256i) __builtin_ia32_psrav8si_mask ((__v8si) __X,
+ (__v8si) __Y,
+ (__v8si) __W,
+ (__mmask8) __U);
+}
+
+extern __inline __m256i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_maskz_srav_epi32 (__mmask8 __U, __m256i __X, __m256i __Y)
+{
+ return (__m256i) __builtin_ia32_psrav8si_mask ((__v8si) __X,
+ (__v8si) __Y,
+ (__v8si)
+ _mm256_setzero_si256 (),
+ (__mmask8) __U);
+}
+
+extern __inline __m128i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_mask_srav_epi32 (__m128i __W, __mmask8 __U, __m128i __X,
+ __m128i __Y)
+{
+ return (__m128i) __builtin_ia32_psrav4si_mask ((__v4si) __X,
+ (__v4si) __Y,
+ (__v4si) __W,
+ (__mmask8) __U);
+}
+
+extern __inline __m128i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_maskz_srav_epi32 (__mmask8 __U, __m128i __X, __m128i __Y)
+{
+ return (__m128i) __builtin_ia32_psrav4si_mask ((__v4si) __X,
+ (__v4si) __Y,
+ (__v4si)
+ _mm_setzero_si128 (),
+ (__mmask8) __U);
+}
+
+extern __inline __m256i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_mask_srlv_epi32 (__m256i __W, __mmask8 __U, __m256i __X,
+ __m256i __Y)
+{
+ return (__m256i) __builtin_ia32_psrlv8si_mask ((__v8si) __X,
+ (__v8si) __Y,
+ (__v8si) __W,
+ (__mmask8) __U);
+}
+
+extern __inline __m256i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_maskz_srlv_epi32 (__mmask8 __U, __m256i __X, __m256i __Y)
+{
+ return (__m256i) __builtin_ia32_psrlv8si_mask ((__v8si) __X,
+ (__v8si) __Y,
+ (__v8si)
+ _mm256_setzero_si256 (),
+ (__mmask8) __U);
+}
+
+extern __inline __m128i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_mask_srlv_epi32 (__m128i __W, __mmask8 __U, __m128i __X,
+ __m128i __Y)
+{
+ return (__m128i) __builtin_ia32_psrlv4si_mask ((__v4si) __X,
+ (__v4si) __Y,
+ (__v4si) __W,
+ (__mmask8) __U);
+}
+
+extern __inline __m128i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_maskz_srlv_epi32 (__mmask8 __U, __m128i __X, __m128i __Y)
+{
+ return (__m128i) __builtin_ia32_psrlv4si_mask ((__v4si) __X,
+ (__v4si) __Y,
+ (__v4si)
+ _mm_setzero_si128 (),
+ (__mmask8) __U);
+}
+
+extern __inline __m256i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_mask_srlv_epi64 (__m256i __W, __mmask8 __U, __m256i __X,
+ __m256i __Y)
+{
+ return (__m256i) __builtin_ia32_psrlv4di_mask ((__v4di) __X,
+ (__v4di) __Y,
+ (__v4di) __W,
+ (__mmask8) __U);
+}
+
+extern __inline __m256i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_maskz_srlv_epi64 (__mmask8 __U, __m256i __X, __m256i __Y)
+{
+ return (__m256i) __builtin_ia32_psrlv4di_mask ((__v4di) __X,
+ (__v4di) __Y,
+ (__v4di)
+ _mm256_setzero_si256 (),
+ (__mmask8) __U);
+}
+
+extern __inline __m128i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_mask_srlv_epi64 (__m128i __W, __mmask8 __U, __m128i __X,
+ __m128i __Y)
+{
+ return (__m128i) __builtin_ia32_psrlv2di_mask ((__v2di) __X,
+ (__v2di) __Y,
+ (__v2di) __W,
+ (__mmask8) __U);
+}
+
+extern __inline __m128i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_maskz_srlv_epi64 (__mmask8 __U, __m128i __X, __m128i __Y)
+{
+ return (__m128i) __builtin_ia32_psrlv2di_mask ((__v2di) __X,
+ (__v2di) __Y,
+ (__v2di)
+ _mm_setzero_di (),
+ (__mmask8) __U);
+}
+
+extern __inline __m256i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_rolv_epi32 (__m256i __A, __m256i __B)
+{
+ return (__m256i) __builtin_ia32_prolvd256_mask ((__v8si) __A,
+ (__v8si) __B,
+ (__v8si)
+ _mm256_setzero_si256 (),
+ (__mmask8) -1);
+}
+
+extern __inline __m256i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_mask_rolv_epi32 (__m256i __W, __mmask8 __U, __m256i __A,
+ __m256i __B)
+{
+ return (__m256i) __builtin_ia32_prolvd256_mask ((__v8si) __A,
+ (__v8si) __B,
+ (__v8si) __W,
+ (__mmask8) __U);
+}
+
+extern __inline __m256i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_maskz_rolv_epi32 (__mmask8 __U, __m256i __A, __m256i __B)
+{
+ return (__m256i) __builtin_ia32_prolvd256_mask ((__v8si) __A,
+ (__v8si) __B,
+ (__v8si)
+ _mm256_setzero_si256 (),
+ (__mmask8) __U);
+}
+
+extern __inline __m128i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_rolv_epi32 (__m128i __A, __m128i __B)
+{
+ return (__m128i) __builtin_ia32_prolvd128_mask ((__v4si) __A,
+ (__v4si) __B,
+ (__v4si)
+ _mm_setzero_si128 (),
+ (__mmask8) -1);
+}
+
+extern __inline __m128i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_mask_rolv_epi32 (__m128i __W, __mmask8 __U, __m128i __A,
+ __m128i __B)
+{
+ return (__m128i) __builtin_ia32_prolvd128_mask ((__v4si) __A,
+ (__v4si) __B,
+ (__v4si) __W,
+ (__mmask8) __U);
+}
+
+extern __inline __m128i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_maskz_rolv_epi32 (__mmask8 __U, __m128i __A, __m128i __B)
+{
+ return (__m128i) __builtin_ia32_prolvd128_mask ((__v4si) __A,
+ (__v4si) __B,
+ (__v4si)
+ _mm_setzero_si128 (),
+ (__mmask8) __U);
+}
+
+extern __inline __m256i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_rorv_epi32 (__m256i __A, __m256i __B)
+{
+ return (__m256i) __builtin_ia32_prorvd256_mask ((__v8si) __A,
+ (__v8si) __B,
+ (__v8si)
+ _mm256_setzero_si256 (),
+ (__mmask8) -1);
+}
+
+extern __inline __m256i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_mask_rorv_epi32 (__m256i __W, __mmask8 __U, __m256i __A,
+ __m256i __B)
+{
+ return (__m256i) __builtin_ia32_prorvd256_mask ((__v8si) __A,
+ (__v8si) __B,
+ (__v8si) __W,
+ (__mmask8) __U);
+}
+
+extern __inline __m256i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_maskz_rorv_epi32 (__mmask8 __U, __m256i __A, __m256i __B)
+{
+ return (__m256i) __builtin_ia32_prorvd256_mask ((__v8si) __A,
+ (__v8si) __B,
+ (__v8si)
+ _mm256_setzero_si256 (),
+ (__mmask8) __U);
+}
+
+extern __inline __m128i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_rorv_epi32 (__m128i __A, __m128i __B)
+{
+ return (__m128i) __builtin_ia32_prorvd128_mask ((__v4si) __A,
+ (__v4si) __B,
+ (__v4si)
+ _mm_setzero_si128 (),
+ (__mmask8) -1);
+}
+
+extern __inline __m128i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_mask_rorv_epi32 (__m128i __W, __mmask8 __U, __m128i __A,
+ __m128i __B)
+{
+ return (__m128i) __builtin_ia32_prorvd128_mask ((__v4si) __A,
+ (__v4si) __B,
+ (__v4si) __W,
+ (__mmask8) __U);
+}
+
+extern __inline __m128i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_maskz_rorv_epi32 (__mmask8 __U, __m128i __A, __m128i __B)
+{
+ return (__m128i) __builtin_ia32_prorvd128_mask ((__v4si) __A,
+ (__v4si) __B,
+ (__v4si)
+ _mm_setzero_si128 (),
+ (__mmask8) __U);
+}
+
+extern __inline __m256i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_rolv_epi64 (__m256i __A, __m256i __B)
+{
+ return (__m256i) __builtin_ia32_prolvq256_mask ((__v4di) __A,
+ (__v4di) __B,
+ (__v4di)
+ _mm256_setzero_si256 (),
+ (__mmask8) -1);
+}
+
+extern __inline __m256i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_mask_rolv_epi64 (__m256i __W, __mmask8 __U, __m256i __A,
+ __m256i __B)
+{
+ return (__m256i) __builtin_ia32_prolvq256_mask ((__v4di) __A,
+ (__v4di) __B,
+ (__v4di) __W,
+ (__mmask8) __U);
+}
+
+extern __inline __m256i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_maskz_rolv_epi64 (__mmask8 __U, __m256i __A, __m256i __B)
+{
+ return (__m256i) __builtin_ia32_prolvq256_mask ((__v4di) __A,
+ (__v4di) __B,
+ (__v4di)
+ _mm256_setzero_si256 (),
+ (__mmask8) __U);
+}
+
+extern __inline __m128i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_rolv_epi64 (__m128i __A, __m128i __B)
+{
+ return (__m128i) __builtin_ia32_prolvq128_mask ((__v2di) __A,
+ (__v2di) __B,
+ (__v2di)
+ _mm_setzero_di (),
+ (__mmask8) -1);
+}
+
+extern __inline __m128i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_mask_rolv_epi64 (__m128i __W, __mmask8 __U, __m128i __A,
+ __m128i __B)
+{
+ return (__m128i) __builtin_ia32_prolvq128_mask ((__v2di) __A,
+ (__v2di) __B,
+ (__v2di) __W,
+ (__mmask8) __U);
+}
+
+extern __inline __m128i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_maskz_rolv_epi64 (__mmask8 __U, __m128i __A, __m128i __B)
+{
+ return (__m128i) __builtin_ia32_prolvq128_mask ((__v2di) __A,
+ (__v2di) __B,
+ (__v2di)
+ _mm_setzero_di (),
+ (__mmask8) __U);
+}
+
+extern __inline __m256i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_rorv_epi64 (__m256i __A, __m256i __B)
+{
+ return (__m256i) __builtin_ia32_prorvq256_mask ((__v4di) __A,
+ (__v4di) __B,
+ (__v4di)
+ _mm256_setzero_si256 (),
+ (__mmask8) -1);
+}
+
+extern __inline __m256i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_mask_rorv_epi64 (__m256i __W, __mmask8 __U, __m256i __A,
+ __m256i __B)
+{
+ return (__m256i) __builtin_ia32_prorvq256_mask ((__v4di) __A,
+ (__v4di) __B,
+ (__v4di) __W,
+ (__mmask8) __U);
+}
+
+extern __inline __m256i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_maskz_rorv_epi64 (__mmask8 __U, __m256i __A, __m256i __B)
+{
+ return (__m256i) __builtin_ia32_prorvq256_mask ((__v4di) __A,
+ (__v4di) __B,
+ (__v4di)
+ _mm256_setzero_si256 (),
+ (__mmask8) __U);
+}
+
+extern __inline __m128i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_rorv_epi64 (__m128i __A, __m128i __B)
+{
+ return (__m128i) __builtin_ia32_prorvq128_mask ((__v2di) __A,
+ (__v2di) __B,
+ (__v2di)
+ _mm_setzero_di (),
+ (__mmask8) -1);
+}
+
+extern __inline __m128i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_mask_rorv_epi64 (__m128i __W, __mmask8 __U, __m128i __A,
+ __m128i __B)
+{
+ return (__m128i) __builtin_ia32_prorvq128_mask ((__v2di) __A,
+ (__v2di) __B,
+ (__v2di) __W,
+ (__mmask8) __U);
+}
+
+extern __inline __m128i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_maskz_rorv_epi64 (__mmask8 __U, __m128i __A, __m128i __B)
+{
+ return (__m128i) __builtin_ia32_prorvq128_mask ((__v2di) __A,
+ (__v2di) __B,
+ (__v2di)
+ _mm_setzero_di (),
+ (__mmask8) __U);
+}
+
+extern __inline __m256i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_srav_epi64 (__m256i __X, __m256i __Y)
+{
+ return (__m256i) __builtin_ia32_psravq256_mask ((__v4di) __X,
+ (__v4di) __Y,
+ (__v4di)
+ _mm256_setzero_si256 (),
+ (__mmask8) -1);
+}
+
+extern __inline __m256i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_mask_srav_epi64 (__m256i __W, __mmask8 __U, __m256i __X,
+ __m256i __Y)
+{
+ return (__m256i) __builtin_ia32_psravq256_mask ((__v4di) __X,
+ (__v4di) __Y,
+ (__v4di) __W,
+ (__mmask8) __U);
+}
+
+extern __inline __m256i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_maskz_srav_epi64 (__mmask8 __U, __m256i __X, __m256i __Y)
+{
+ return (__m256i) __builtin_ia32_psravq256_mask ((__v4di) __X,
+ (__v4di) __Y,
+ (__v4di)
+ _mm256_setzero_si256 (),
+ (__mmask8) __U);
+}
+
+extern __inline __m256i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_mask_and_epi64 (__m256i __W, __mmask8 __U, __m256i __A,
+ __m256i __B)
+{
+ return (__m256i) __builtin_ia32_pandq256_mask ((__v4di) __A,
+ (__v4di) __B,
+ (__v4di) __W, __U);
+}
+
+extern __inline __m256i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_maskz_and_epi64 (__mmask8 __U, __m256i __A, __m256i __B)
+{
+ return (__m256i) __builtin_ia32_pandq256_mask ((__v4di) __A,
+ (__v4di) __B,
+ (__v4di)
+ _mm256_setzero_pd (),
+ __U);
+}
+
+extern __inline __m128i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_mask_and_epi64 (__m128i __W, __mmask8 __U, __m128i __A,
+ __m128i __B)
+{
+ return (__m128i) __builtin_ia32_pandq128_mask ((__v2di) __A,
+ (__v2di) __B,
+ (__v2di) __W, __U);
+}
+
+extern __inline __m128i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_maskz_and_epi64 (__mmask8 __U, __m128i __A, __m128i __B)
+{
+ return (__m128i) __builtin_ia32_pandq128_mask ((__v2di) __A,
+ (__v2di) __B,
+ (__v2di)
+ _mm_setzero_pd (),
+ __U);
+}
+
+extern __inline __m256i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_mask_andnot_epi64 (__m256i __W, __mmask8 __U, __m256i __A,
+ __m256i __B)
+{
+ return (__m256i) __builtin_ia32_pandnq256_mask ((__v4di) __A,
+ (__v4di) __B,
+ (__v4di) __W, __U);
+}
+
+extern __inline __m256i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_maskz_andnot_epi64 (__mmask8 __U, __m256i __A, __m256i __B)
+{
+ return (__m256i) __builtin_ia32_pandnq256_mask ((__v4di) __A,
+ (__v4di) __B,
+ (__v4di)
+ _mm256_setzero_pd (),
+ __U);
+}
+
+extern __inline __m128i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_mask_andnot_epi64 (__m128i __W, __mmask8 __U, __m128i __A,
+ __m128i __B)
+{
+ return (__m128i) __builtin_ia32_pandnq128_mask ((__v2di) __A,
+ (__v2di) __B,
+ (__v2di) __W, __U);
+}
+
+extern __inline __m128i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_maskz_andnot_epi64 (__mmask8 __U, __m128i __A, __m128i __B)
+{
+ return (__m128i) __builtin_ia32_pandnq128_mask ((__v2di) __A,
+ (__v2di) __B,
+ (__v2di)
+ _mm_setzero_pd (),
+ __U);
+}
+
+extern __inline __m256i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_mask_or_epi64 (__m256i __W, __mmask8 __U, __m256i __A,
+ __m256i __B)
+{
+ return (__m256i) __builtin_ia32_porq256_mask ((__v4di) __A,
+ (__v4di) __B,
+ (__v4di) __W,
+ (__mmask8) __U);
+}
+
+extern __inline __m256i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_maskz_or_epi64 (__mmask8 __U, __m256i __A, __m256i __B)
+{
+ return (__m256i) __builtin_ia32_porq256_mask ((__v4di) __A,
+ (__v4di) __B,
+ (__v4di)
+ _mm256_setzero_si256 (),
+ (__mmask8) __U);
+}
+
+extern __inline __m128i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_mask_or_epi64 (__m128i __W, __mmask8 __U, __m128i __A, __m128i __B)
+{
+ return (__m128i) __builtin_ia32_porq128_mask ((__v2di) __A,
+ (__v2di) __B,
+ (__v2di) __W,
+ (__mmask8) __U);
+}
+
+extern __inline __m128i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_maskz_or_epi64 (__mmask8 __U, __m128i __A, __m128i __B)
+{
+ return (__m128i) __builtin_ia32_porq128_mask ((__v2di) __A,
+ (__v2di) __B,
+ (__v2di)
+ _mm_setzero_si128 (),
+ (__mmask8) __U);
+}
+
+extern __inline __m256i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_mask_xor_epi64 (__m256i __W, __mmask8 __U, __m256i __A,
+ __m256i __B)
+{
+ return (__m256i) __builtin_ia32_pxorq256_mask ((__v4di) __A,
+ (__v4di) __B,
+ (__v4di) __W,
+ (__mmask8) __U);
+}
+
+extern __inline __m256i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_maskz_xor_epi64 (__mmask8 __U, __m256i __A, __m256i __B)
+{
+ return (__m256i) __builtin_ia32_pxorq256_mask ((__v4di) __A,
+ (__v4di) __B,
+ (__v4di)
+ _mm256_setzero_si256 (),
+ (__mmask8) __U);
+}
+
+extern __inline __m128i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_mask_xor_epi64 (__m128i __W, __mmask8 __U, __m128i __A,
+ __m128i __B)
+{
+ return (__m128i) __builtin_ia32_pxorq128_mask ((__v2di) __A,
+ (__v2di) __B,
+ (__v2di) __W,
+ (__mmask8) __U);
+}
+
+extern __inline __m128i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_maskz_xor_epi64 (__mmask8 __U, __m128i __A, __m128i __B)
+{
+ return (__m128i) __builtin_ia32_pxorq128_mask ((__v2di) __A,
+ (__v2di) __B,
+ (__v2di)
+ _mm_setzero_si128 (),
+ (__mmask8) __U);
+}
+
+extern __inline __m256d
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_mask_max_pd (__m256d __W, __mmask8 __U, __m256d __A,
+ __m256d __B)
+{
+ return (__m256d) __builtin_ia32_maxpd256_mask ((__v4df) __A,
+ (__v4df) __B,
+ (__v4df) __W,
+ (__mmask8) __U);
+}
+
+extern __inline __m256d
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_maskz_max_pd (__mmask8 __U, __m256d __A, __m256d __B)
+{
+ return (__m256d) __builtin_ia32_maxpd256_mask ((__v4df) __A,
+ (__v4df) __B,
+ (__v4df)
+ _mm256_setzero_pd (),
+ (__mmask8) __U);
+}
+
+extern __inline __m256
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_mask_max_ps (__m256 __W, __mmask8 __U, __m256 __A, __m256 __B)
+{
+ return (__m256) __builtin_ia32_maxps256_mask ((__v8sf) __A,
+ (__v8sf) __B,
+ (__v8sf) __W,
+ (__mmask8) __U);
+}
+
+extern __inline __m256
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_maskz_max_ps (__mmask8 __U, __m256 __A, __m256 __B)
+{
+ return (__m256) __builtin_ia32_maxps256_mask ((__v8sf) __A,
+ (__v8sf) __B,
+ (__v8sf)
+ _mm256_setzero_ps (),
+ (__mmask8) __U);
+}
+
+extern __inline __m128
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_mask_div_ps (__m128 __W, __mmask8 __U, __m128 __A, __m128 __B)
+{
+ return (__m128) __builtin_ia32_divps_mask ((__v4sf) __A,
+ (__v4sf) __B,
+ (__v4sf) __W,
+ (__mmask8) __U);
+}
+
+extern __inline __m128
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_maskz_div_ps (__mmask8 __U, __m128 __A, __m128 __B)
+{
+ return (__m128) __builtin_ia32_divps_mask ((__v4sf) __A,
+ (__v4sf) __B,
+ (__v4sf)
+ _mm_setzero_ps (),
+ (__mmask8) __U);
+}
+
+extern __inline __m128d
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_mask_div_pd (__m128d __W, __mmask8 __U, __m128d __A, __m128d __B)
+{
+ return (__m128d) __builtin_ia32_divpd_mask ((__v2df) __A,
+ (__v2df) __B,
+ (__v2df) __W,
+ (__mmask8) __U);
+}
+
+extern __inline __m128d
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_maskz_div_pd (__mmask8 __U, __m128d __A, __m128d __B)
+{
+ return (__m128d) __builtin_ia32_divpd_mask ((__v2df) __A,
+ (__v2df) __B,
+ (__v2df)
+ _mm_setzero_pd (),
+ (__mmask8) __U);
+}
+
+extern __inline __m256d
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_mask_min_pd (__m256d __W, __mmask8 __U, __m256d __A,
+ __m256d __B)
+{
+ return (__m256d) __builtin_ia32_minpd256_mask ((__v4df) __A,
+ (__v4df) __B,
+ (__v4df) __W,
+ (__mmask8) __U);
+}
+
+extern __inline __m256d
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_mask_div_pd (__m256d __W, __mmask8 __U, __m256d __A,
+ __m256d __B)
+{
+ return (__m256d) __builtin_ia32_divpd256_mask ((__v4df) __A,
+ (__v4df) __B,
+ (__v4df) __W,
+ (__mmask8) __U);
+}
+
+extern __inline __m256d
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_maskz_min_pd (__mmask8 __U, __m256d __A, __m256d __B)
+{
+ return (__m256d) __builtin_ia32_minpd256_mask ((__v4df) __A,
+ (__v4df) __B,
+ (__v4df)
+ _mm256_setzero_pd (),
+ (__mmask8) __U);
+}
+
+extern __inline __m256
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_mask_min_ps (__m256 __W, __mmask8 __U, __m256 __A, __m256 __B)
+{
+ return (__m256) __builtin_ia32_minps256_mask ((__v8sf) __A,
+ (__v8sf) __B,
+ (__v8sf) __W,
+ (__mmask8) __U);
+}
+
+extern __inline __m256d
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_maskz_div_pd (__mmask8 __U, __m256d __A, __m256d __B)
+{
+ return (__m256d) __builtin_ia32_divpd256_mask ((__v4df) __A,
+ (__v4df) __B,
+ (__v4df)
+ _mm256_setzero_pd (),
+ (__mmask8) __U);
+}
+
+extern __inline __m256
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_mask_div_ps (__m256 __W, __mmask8 __U, __m256 __A, __m256 __B)
+{
+ return (__m256) __builtin_ia32_divps256_mask ((__v8sf) __A,
+ (__v8sf) __B,
+ (__v8sf) __W,
+ (__mmask8) __U);
+}
+
+extern __inline __m256
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_maskz_min_ps (__mmask8 __U, __m256 __A, __m256 __B)
+{
+ return (__m256) __builtin_ia32_minps256_mask ((__v8sf) __A,
+ (__v8sf) __B,
+ (__v8sf)
+ _mm256_setzero_ps (),
+ (__mmask8) __U);
+}
+
+extern __inline __m256
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_maskz_div_ps (__mmask8 __U, __m256 __A, __m256 __B)
+{
+ return (__m256) __builtin_ia32_divps256_mask ((__v8sf) __A,
+ (__v8sf) __B,
+ (__v8sf)
+ _mm256_setzero_ps (),
+ (__mmask8) __U);
+}
+
+extern __inline __m128
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_mask_min_ps (__m128 __W, __mmask8 __U, __m128 __A, __m128 __B)
+{
+ return (__m128) __builtin_ia32_minps_mask ((__v4sf) __A,
+ (__v4sf) __B,
+ (__v4sf) __W,
+ (__mmask8) __U);
+}
+
+extern __inline __m128
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_mask_mul_ps (__m128 __W, __mmask8 __U, __m128 __A, __m128 __B)
+{
+ return (__m128) __builtin_ia32_mulps_mask ((__v4sf) __A,
+ (__v4sf) __B,
+ (__v4sf) __W,
+ (__mmask8) __U);
+}
+
+extern __inline __m128
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_maskz_min_ps (__mmask8 __U, __m128 __A, __m128 __B)
+{
+ return (__m128) __builtin_ia32_minps_mask ((__v4sf) __A,
+ (__v4sf) __B,
+ (__v4sf)
+ _mm_setzero_ps (),
+ (__mmask8) __U);
+}
+
+extern __inline __m128
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_maskz_mul_ps (__mmask8 __U, __m128 __A, __m128 __B)
+{
+ return (__m128) __builtin_ia32_mulps_mask ((__v4sf) __A,
+ (__v4sf) __B,
+ (__v4sf)
+ _mm_setzero_ps (),
+ (__mmask8) __U);
+}
+
+extern __inline __m128
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_mask_max_ps (__m128 __W, __mmask8 __U, __m128 __A, __m128 __B)
+{
+ return (__m128) __builtin_ia32_maxps_mask ((__v4sf) __A,
+ (__v4sf) __B,
+ (__v4sf) __W,
+ (__mmask8) __U);
+}
+
+extern __inline __m128
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_maskz_max_ps (__mmask8 __U, __m128 __A, __m128 __B)
+{
+ return (__m128) __builtin_ia32_maxps_mask ((__v4sf) __A,
+ (__v4sf) __B,
+ (__v4sf)
+ _mm_setzero_ps (),
+ (__mmask8) __U);
+}
+
+extern __inline __m128d
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_mask_min_pd (__m128d __W, __mmask8 __U, __m128d __A, __m128d __B)
+{
+ return (__m128d) __builtin_ia32_minpd_mask ((__v2df) __A,
+ (__v2df) __B,
+ (__v2df) __W,
+ (__mmask8) __U);
+}
+
+extern __inline __m128d
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_maskz_min_pd (__mmask8 __U, __m128d __A, __m128d __B)
+{
+ return (__m128d) __builtin_ia32_minpd_mask ((__v2df) __A,
+ (__v2df) __B,
+ (__v2df)
+ _mm_setzero_pd (),
+ (__mmask8) __U);
+}
+
+extern __inline __m128d
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_mask_max_pd (__m128d __W, __mmask8 __U, __m128d __A, __m128d __B)
+{
+ return (__m128d) __builtin_ia32_maxpd_mask ((__v2df) __A,
+ (__v2df) __B,
+ (__v2df) __W,
+ (__mmask8) __U);
+}
+
+extern __inline __m128d
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_maskz_max_pd (__mmask8 __U, __m128d __A, __m128d __B)
+{
+ return (__m128d) __builtin_ia32_maxpd_mask ((__v2df) __A,
+ (__v2df) __B,
+ (__v2df)
+ _mm_setzero_pd (),
+ (__mmask8) __U);
+}
+
+extern __inline __m128d
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_mask_mul_pd (__m128d __W, __mmask8 __U, __m128d __A, __m128d __B)
+{
+ return (__m128d) __builtin_ia32_mulpd_mask ((__v2df) __A,
+ (__v2df) __B,
+ (__v2df) __W,
+ (__mmask8) __U);
+}
+
+extern __inline __m128d
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_maskz_mul_pd (__mmask8 __U, __m128d __A, __m128d __B)
+{
+ return (__m128d) __builtin_ia32_mulpd_mask ((__v2df) __A,
+ (__v2df) __B,
+ (__v2df)
+ _mm_setzero_pd (),
+ (__mmask8) __U);
+}
+
+extern __inline __m256
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_mask_mul_ps (__m256 __W, __mmask8 __U, __m256 __A, __m256 __B)
+{
+ return (__m256) __builtin_ia32_mulps256_mask ((__v8sf) __A,
+ (__v8sf) __B,
+ (__v8sf) __W,
+ (__mmask8) __U);
+}
+
+extern __inline __m256
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_maskz_mul_ps (__mmask8 __U, __m256 __A, __m256 __B)
+{
+ return (__m256) __builtin_ia32_mulps256_mask ((__v8sf) __A,
+ (__v8sf) __B,
+ (__v8sf)
+ _mm256_setzero_ps (),
+ (__mmask8) __U);
+}
+
+extern __inline __m256d
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_mask_mul_pd (__m256d __W, __mmask8 __U, __m256d __A,
+ __m256d __B)
+{
+ return (__m256d) __builtin_ia32_mulpd256_mask ((__v4df) __A,
+ (__v4df) __B,
+ (__v4df) __W,
+ (__mmask8) __U);
+}
+
+extern __inline __m256d
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_maskz_mul_pd (__mmask8 __U, __m256d __A, __m256d __B)
+{
+ return (__m256d) __builtin_ia32_mulpd256_mask ((__v4df) __A,
+ (__v4df) __B,
+ (__v4df)
+ _mm256_setzero_pd (),
+ (__mmask8) __U);
+}
+
+extern __inline __m256i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_maskz_max_epi64 (__mmask8 __M, __m256i __A, __m256i __B)
+{
+ return (__m256i) __builtin_ia32_pmaxsq256_mask ((__v4di) __A,
+ (__v4di) __B,
+ (__v4di)
+ _mm256_setzero_si256 (),
+ __M);
+}
+
+extern __inline __m256i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_mask_max_epi64 (__m256i __W, __mmask8 __M, __m256i __A,
+ __m256i __B)
+{
+ return (__m256i) __builtin_ia32_pmaxsq256_mask ((__v4di) __A,
+ (__v4di) __B,
+ (__v4di) __W, __M);
+}
+
+extern __inline __m256i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_min_epi64 (__m256i __A, __m256i __B)
+{
+ return (__m256i) __builtin_ia32_pminsq256_mask ((__v4di) __A,
+ (__v4di) __B,
+ (__v4di)
+ _mm256_setzero_si256 (),
+ (__mmask8) -1);
+}
+
+extern __inline __m256i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_mask_min_epi64 (__m256i __W, __mmask8 __M, __m256i __A,
+ __m256i __B)
+{
+ return (__m256i) __builtin_ia32_pminsq256_mask ((__v4di) __A,
+ (__v4di) __B,
+ (__v4di) __W, __M);
+}
+
+extern __inline __m256i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_maskz_min_epi64 (__mmask8 __M, __m256i __A, __m256i __B)
+{
+ return (__m256i) __builtin_ia32_pminsq256_mask ((__v4di) __A,
+ (__v4di) __B,
+ (__v4di)
+ _mm256_setzero_si256 (),
+ __M);
+}
+
+extern __inline __m256i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_maskz_max_epu64 (__mmask8 __M, __m256i __A, __m256i __B)
+{
+ return (__m256i) __builtin_ia32_pmaxuq256_mask ((__v4di) __A,
+ (__v4di) __B,
+ (__v4di)
+ _mm256_setzero_si256 (),
+ __M);
+}
+
+extern __inline __m256i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_max_epi64 (__m256i __A, __m256i __B)
+{
+ return (__m256i) __builtin_ia32_pmaxsq256_mask ((__v4di) __A,
+ (__v4di) __B,
+ (__v4di)
+ _mm256_setzero_si256 (),
+ (__mmask8) -1);
+}
+
+extern __inline __m256i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_max_epu64 (__m256i __A, __m256i __B)
+{
+ return (__m256i) __builtin_ia32_pmaxuq256_mask ((__v4di) __A,
+ (__v4di) __B,
+ (__v4di)
+ _mm256_setzero_si256 (),
+ (__mmask8) -1);
+}
+
+extern __inline __m256i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_mask_max_epu64 (__m256i __W, __mmask8 __M, __m256i __A,
+ __m256i __B)
+{
+ return (__m256i) __builtin_ia32_pmaxuq256_mask ((__v4di) __A,
+ (__v4di) __B,
+ (__v4di) __W, __M);
+}
+
+extern __inline __m256i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_min_epu64 (__m256i __A, __m256i __B)
+{
+ return (__m256i) __builtin_ia32_pminuq256_mask ((__v4di) __A,
+ (__v4di) __B,
+ (__v4di)
+ _mm256_setzero_si256 (),
+ (__mmask8) -1);
+}
+
+extern __inline __m256i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_mask_min_epu64 (__m256i __W, __mmask8 __M, __m256i __A,
+ __m256i __B)
+{
+ return (__m256i) __builtin_ia32_pminuq256_mask ((__v4di) __A,
+ (__v4di) __B,
+ (__v4di) __W, __M);
+}
+
+extern __inline __m256i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_maskz_min_epu64 (__mmask8 __M, __m256i __A, __m256i __B)
+{
+ return (__m256i) __builtin_ia32_pminuq256_mask ((__v4di) __A,
+ (__v4di) __B,
+ (__v4di)
+ _mm256_setzero_si256 (),
+ __M);
+}
+
+extern __inline __m256i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_maskz_max_epi32 (__mmask8 __M, __m256i __A, __m256i __B)
+{
+ return (__m256i) __builtin_ia32_pmaxsd256_mask ((__v8si) __A,
+ (__v8si) __B,
+ (__v8si)
+ _mm256_setzero_si256 (),
+ __M);
+}
+
+extern __inline __m256i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_mask_max_epi32 (__m256i __W, __mmask8 __M, __m256i __A,
+ __m256i __B)
+{
+ return (__m256i) __builtin_ia32_pmaxsd256_mask ((__v8si) __A,
+ (__v8si) __B,
+ (__v8si) __W, __M);
+}
+
+extern __inline __m256i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_maskz_min_epi32 (__mmask8 __M, __m256i __A, __m256i __B)
+{
+ return (__m256i) __builtin_ia32_pminsd256_mask ((__v8si) __A,
+ (__v8si) __B,
+ (__v8si)
+ _mm256_setzero_si256 (),
+ __M);
+}
+
+extern __inline __m256i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_mask_min_epi32 (__m256i __W, __mmask8 __M, __m256i __A,
+ __m256i __B)
+{
+ return (__m256i) __builtin_ia32_pminsd256_mask ((__v8si) __A,
+ (__v8si) __B,
+ (__v8si) __W, __M);
+}
+
+extern __inline __m256i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_maskz_max_epu32 (__mmask8 __M, __m256i __A, __m256i __B)
+{
+ return (__m256i) __builtin_ia32_pmaxud256_mask ((__v8si) __A,
+ (__v8si) __B,
+ (__v8si)
+ _mm256_setzero_si256 (),
+ __M);
+}
+
+extern __inline __m256i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_mask_max_epu32 (__m256i __W, __mmask8 __M, __m256i __A,
+ __m256i __B)
+{
+ return (__m256i) __builtin_ia32_pmaxud256_mask ((__v8si) __A,
+ (__v8si) __B,
+ (__v8si) __W, __M);
+}
+
+extern __inline __m256i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_maskz_min_epu32 (__mmask8 __M, __m256i __A, __m256i __B)
+{
+ return (__m256i) __builtin_ia32_pminud256_mask ((__v8si) __A,
+ (__v8si) __B,
+ (__v8si)
+ _mm256_setzero_si256 (),
+ __M);
+}
+
+extern __inline __m256i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_mask_min_epu32 (__m256i __W, __mmask8 __M, __m256i __A,
+ __m256i __B)
+{
+ return (__m256i) __builtin_ia32_pminud256_mask ((__v8si) __A,
+ (__v8si) __B,
+ (__v8si) __W, __M);
+}
+
+extern __inline __m128i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_maskz_max_epi64 (__mmask8 __M, __m128i __A, __m128i __B)
+{
+ return (__m128i) __builtin_ia32_pmaxsq128_mask ((__v2di) __A,
+ (__v2di) __B,
+ (__v2di)
+ _mm_setzero_si128 (),
+ __M);
+}
+
+extern __inline __m128i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_mask_max_epi64 (__m128i __W, __mmask8 __M, __m128i __A,
+ __m128i __B)
+{
+ return (__m128i) __builtin_ia32_pmaxsq128_mask ((__v2di) __A,
+ (__v2di) __B,
+ (__v2di) __W, __M);
+}
+
+extern __inline __m128i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_min_epi64 (__m128i __A, __m128i __B)
+{
+ return (__m128i) __builtin_ia32_pminsq128_mask ((__v2di) __A,
+ (__v2di) __B,
+ (__v2di)
+ _mm_setzero_di (),
+ (__mmask8) -1);
+}
+
+extern __inline __m128i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_mask_min_epi64 (__m128i __W, __mmask8 __M, __m128i __A,
+ __m128i __B)
+{
+ return (__m128i) __builtin_ia32_pminsq128_mask ((__v2di) __A,
+ (__v2di) __B,
+ (__v2di) __W, __M);
+}
+
+extern __inline __m128i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_maskz_min_epi64 (__mmask8 __M, __m128i __A, __m128i __B)
+{
+ return (__m128i) __builtin_ia32_pminsq128_mask ((__v2di) __A,
+ (__v2di) __B,
+ (__v2di)
+ _mm_setzero_si128 (),
+ __M);
+}
+
+extern __inline __m128i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_maskz_max_epu64 (__mmask8 __M, __m128i __A, __m128i __B)
+{
+ return (__m128i) __builtin_ia32_pmaxuq128_mask ((__v2di) __A,
+ (__v2di) __B,
+ (__v2di)
+ _mm_setzero_si128 (),
+ __M);
+}
+
+extern __inline __m128i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_max_epi64 (__m128i __A, __m128i __B)
+{
+ return (__m128i) __builtin_ia32_pmaxsq128_mask ((__v2di) __A,
+ (__v2di) __B,
+ (__v2di)
+ _mm_setzero_di (),
+ (__mmask8) -1);
+}
+
+extern __inline __m128i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_max_epu64 (__m128i __A, __m128i __B)
+{
+ return (__m128i) __builtin_ia32_pmaxuq128_mask ((__v2di) __A,
+ (__v2di) __B,
+ (__v2di)
+ _mm_setzero_di (),
+ (__mmask8) -1);
+}
+
+extern __inline __m128i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_mask_max_epu64 (__m128i __W, __mmask8 __M, __m128i __A,
+ __m128i __B)
+{
+ return (__m128i) __builtin_ia32_pmaxuq128_mask ((__v2di) __A,
+ (__v2di) __B,
+ (__v2di) __W, __M);
+}
+
+extern __inline __m128i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_min_epu64 (__m128i __A, __m128i __B)
+{
+ return (__m128i) __builtin_ia32_pminuq128_mask ((__v2di) __A,
+ (__v2di) __B,
+ (__v2di)
+ _mm_setzero_di (),
+ (__mmask8) -1);
+}
+
+extern __inline __m128i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_mask_min_epu64 (__m128i __W, __mmask8 __M, __m128i __A,
+ __m128i __B)
+{
+ return (__m128i) __builtin_ia32_pminuq128_mask ((__v2di) __A,
+ (__v2di) __B,
+ (__v2di) __W, __M);
+}
+
+extern __inline __m128i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_maskz_min_epu64 (__mmask8 __M, __m128i __A, __m128i __B)
+{
+ return (__m128i) __builtin_ia32_pminuq128_mask ((__v2di) __A,
+ (__v2di) __B,
+ (__v2di)
+ _mm_setzero_si128 (),
+ __M);
+}
+
+extern __inline __m128i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_maskz_max_epi32 (__mmask8 __M, __m128i __A, __m128i __B)
+{
+ return (__m128i) __builtin_ia32_pmaxsd128_mask ((__v4si) __A,
+ (__v4si) __B,
+ (__v4si)
+ _mm_setzero_si128 (),
+ __M);
+}
+
+extern __inline __m128i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_mask_max_epi32 (__m128i __W, __mmask8 __M, __m128i __A,
+ __m128i __B)
+{
+ return (__m128i) __builtin_ia32_pmaxsd128_mask ((__v4si) __A,
+ (__v4si) __B,
+ (__v4si) __W, __M);
+}
+
+extern __inline __m128i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_maskz_min_epi32 (__mmask8 __M, __m128i __A, __m128i __B)
+{
+ return (__m128i) __builtin_ia32_pminsd128_mask ((__v4si) __A,
+ (__v4si) __B,
+ (__v4si)
+ _mm_setzero_si128 (),
+ __M);
+}
+
+extern __inline __m128i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_mask_min_epi32 (__m128i __W, __mmask8 __M, __m128i __A,
+ __m128i __B)
+{
+ return (__m128i) __builtin_ia32_pminsd128_mask ((__v4si) __A,
+ (__v4si) __B,
+ (__v4si) __W, __M);
+}
+
+extern __inline __m128i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_maskz_max_epu32 (__mmask8 __M, __m128i __A, __m128i __B)
+{
+ return (__m128i) __builtin_ia32_pmaxud128_mask ((__v4si) __A,
+ (__v4si) __B,
+ (__v4si)
+ _mm_setzero_si128 (),
+ __M);
+}
+
+extern __inline __m128i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_mask_max_epu32 (__m128i __W, __mmask8 __M, __m128i __A,
+ __m128i __B)
+{
+ return (__m128i) __builtin_ia32_pmaxud128_mask ((__v4si) __A,
+ (__v4si) __B,
+ (__v4si) __W, __M);
+}
+
+extern __inline __m128i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_maskz_min_epu32 (__mmask8 __M, __m128i __A, __m128i __B)
+{
+ return (__m128i) __builtin_ia32_pminud128_mask ((__v4si) __A,
+ (__v4si) __B,
+ (__v4si)
+ _mm_setzero_si128 (),
+ __M);
+}
+
+extern __inline __m128i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_mask_min_epu32 (__m128i __W, __mmask8 __M, __m128i __A,
+ __m128i __B)
+{
+ return (__m128i) __builtin_ia32_pminud128_mask ((__v4si) __A,
+ (__v4si) __B,
+ (__v4si) __W, __M);
+}
+
+#ifndef __AVX512CD__
+#pragma GCC push_options
+#pragma GCC target("avx512vl,avx512cd")
+#define __DISABLE_AVX512VLCD__
+#endif
+
+extern __inline __m128i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_broadcastmb_epi64 (__mmask8 __A)
+{
+ return (__m128i) __builtin_ia32_broadcastmb128 (__A);
+}
+
+extern __inline __m256i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_broadcastmb_epi64 (__mmask8 __A)
+{
+ return (__m256i) __builtin_ia32_broadcastmb256 (__A);
+}
+
+extern __inline __m128i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_broadcastmw_epi32 (__mmask16 __A)
+{
+ return (__m128i) __builtin_ia32_broadcastmw128 (__A);
+}
+
+extern __inline __m256i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_broadcastmw_epi32 (__mmask16 __A)
+{
+ return (__m256i) __builtin_ia32_broadcastmw256 (__A);
+}
+
+extern __inline __m256i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_lzcnt_epi32 (__m256i __A)
+{
+ return (__m256i) __builtin_ia32_vplzcntd_256_mask ((__v8si) __A,
+ (__v8si)
+ _mm256_setzero_si256 (),
+ (__mmask8) -1);
+}
+
+extern __inline __m256i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_mask_lzcnt_epi32 (__m256i __W, __mmask8 __U, __m256i __A)
+{
+ return (__m256i) __builtin_ia32_vplzcntd_256_mask ((__v8si) __A,
+ (__v8si) __W,
+ (__mmask8) __U);
+}
+
+extern __inline __m256i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_maskz_lzcnt_epi32 (__mmask8 __U, __m256i __A)
+{
+ return (__m256i) __builtin_ia32_vplzcntd_256_mask ((__v8si) __A,
+ (__v8si)
+ _mm256_setzero_si256 (),
+ (__mmask8) __U);
+}
+
+extern __inline __m256i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_lzcnt_epi64 (__m256i __A)
+{
+ return (__m256i) __builtin_ia32_vplzcntq_256_mask ((__v4di) __A,
+ (__v4di)
+ _mm256_setzero_si256 (),
+ (__mmask8) -1);
+}
+
+extern __inline __m256i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_mask_lzcnt_epi64 (__m256i __W, __mmask8 __U, __m256i __A)
+{
+ return (__m256i) __builtin_ia32_vplzcntq_256_mask ((__v4di) __A,
+ (__v4di) __W,
+ (__mmask8) __U);
+}
+
+extern __inline __m256i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_maskz_lzcnt_epi64 (__mmask8 __U, __m256i __A)
+{
+ return (__m256i) __builtin_ia32_vplzcntq_256_mask ((__v4di) __A,
+ (__v4di)
+ _mm256_setzero_si256 (),
+ (__mmask8) __U);
+}
+
+extern __inline __m256i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_conflict_epi64 (__m256i __A)
+{
+ return (__m256i) __builtin_ia32_vpconflictdi_256_mask ((__v4di) __A,
+ (__v4di)
+ _mm256_setzero_si256 (),
+ (__mmask8) -
+ 1);
+}
+
+extern __inline __m256i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_mask_conflict_epi64 (__m256i __W, __mmask8 __U, __m256i __A)
+{
+ return (__m256i) __builtin_ia32_vpconflictdi_256_mask ((__v4di) __A,
+ (__v4di) __W,
+ (__mmask8)
+ __U);
+}
+
+extern __inline __m256i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_maskz_conflict_epi64 (__mmask8 __U, __m256i __A)
+{
+ return (__m256i) __builtin_ia32_vpconflictdi_256_mask ((__v4di) __A,
+ (__v4di)
+ _mm256_setzero_si256 (),
+ (__mmask8)
+ __U);
+}
+
+extern __inline __m256i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_conflict_epi32 (__m256i __A)
+{
+ return (__m256i) __builtin_ia32_vpconflictsi_256_mask ((__v8si) __A,
+ (__v8si)
+ _mm256_setzero_si256 (),
+ (__mmask8) -
+ 1);
+}
+
+extern __inline __m256i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_mask_conflict_epi32 (__m256i __W, __mmask8 __U, __m256i __A)
+{
+ return (__m256i) __builtin_ia32_vpconflictsi_256_mask ((__v8si) __A,
+ (__v8si) __W,
+ (__mmask8)
+ __U);
+}
+
+extern __inline __m256i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_maskz_conflict_epi32 (__mmask8 __U, __m256i __A)
+{
+ return (__m256i) __builtin_ia32_vpconflictsi_256_mask ((__v8si) __A,
+ (__v8si)
+ _mm256_setzero_si256 (),
+ (__mmask8)
+ __U);
+}
+
+extern __inline __m128i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_lzcnt_epi32 (__m128i __A)
+{
+ return (__m128i) __builtin_ia32_vplzcntd_128_mask ((__v4si) __A,
+ (__v4si)
+ _mm_setzero_si128 (),
+ (__mmask8) -1);
+}
+
+extern __inline __m128i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_mask_lzcnt_epi32 (__m128i __W, __mmask8 __U, __m128i __A)
+{
+ return (__m128i) __builtin_ia32_vplzcntd_128_mask ((__v4si) __A,
+ (__v4si) __W,
+ (__mmask8) __U);
+}
+
+extern __inline __m128i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_maskz_lzcnt_epi32 (__mmask8 __U, __m128i __A)
+{
+ return (__m128i) __builtin_ia32_vplzcntd_128_mask ((__v4si) __A,
+ (__v4si)
+ _mm_setzero_si128 (),
+ (__mmask8) __U);
+}
+
+extern __inline __m128i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_lzcnt_epi64 (__m128i __A)
+{
+ return (__m128i) __builtin_ia32_vplzcntq_128_mask ((__v2di) __A,
+ (__v2di)
+ _mm_setzero_di (),
+ (__mmask8) -1);
+}
+
+extern __inline __m128i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_mask_lzcnt_epi64 (__m128i __W, __mmask8 __U, __m128i __A)
+{
+ return (__m128i) __builtin_ia32_vplzcntq_128_mask ((__v2di) __A,
+ (__v2di) __W,
+ (__mmask8) __U);
+}
+
+extern __inline __m128i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_maskz_lzcnt_epi64 (__mmask8 __U, __m128i __A)
+{
+ return (__m128i) __builtin_ia32_vplzcntq_128_mask ((__v2di) __A,
+ (__v2di)
+ _mm_setzero_di (),
+ (__mmask8) __U);
+}
+
+extern __inline __m128i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_conflict_epi64 (__m128i __A)
+{
+ return (__m128i) __builtin_ia32_vpconflictdi_128_mask ((__v2di) __A,
+ (__v2di)
+ _mm_setzero_di (),
+ (__mmask8) -
+ 1);
+}
+
+extern __inline __m128i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_mask_conflict_epi64 (__m128i __W, __mmask8 __U, __m128i __A)
+{
+ return (__m128i) __builtin_ia32_vpconflictdi_128_mask ((__v2di) __A,
+ (__v2di) __W,
+ (__mmask8)
+ __U);
+}
+
+extern __inline __m128i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_maskz_conflict_epi64 (__mmask8 __U, __m128i __A)
+{
+ return (__m128i) __builtin_ia32_vpconflictdi_128_mask ((__v2di) __A,
+ (__v2di)
+ _mm_setzero_di (),
+ (__mmask8)
+ __U);
+}
+
+extern __inline __m128i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_conflict_epi32 (__m128i __A)
+{
+ return (__m128i) __builtin_ia32_vpconflictsi_128_mask ((__v4si) __A,
+ (__v4si)
+ _mm_setzero_si128 (),
+ (__mmask8) -
+ 1);
+}
+
+extern __inline __m128i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_mask_conflict_epi32 (__m128i __W, __mmask8 __U, __m128i __A)
+{
+ return (__m128i) __builtin_ia32_vpconflictsi_128_mask ((__v4si) __A,
+ (__v4si) __W,
+ (__mmask8)
+ __U);
+}
+
+extern __inline __m128i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_maskz_conflict_epi32 (__mmask8 __U, __m128i __A)
+{
+ return (__m128i) __builtin_ia32_vpconflictsi_128_mask ((__v4si) __A,
+ (__v4si)
+ _mm_setzero_si128 (),
+ (__mmask8)
+ __U);
+}
+
+#ifdef __DISABLE_AVX512VLCD__
+#pragma GCC pop_options
+#endif
+
+extern __inline __m256d
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_mask_unpacklo_pd (__m256d __W, __mmask8 __U, __m256d __A,
+ __m256d __B)
+{
+ return (__m256d) __builtin_ia32_unpcklpd256_mask ((__v4df) __A,
+ (__v4df) __B,
+ (__v4df) __W,
+ (__mmask8) __U);
+}
+
+extern __inline __m256d
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_maskz_unpacklo_pd (__mmask8 __U, __m256d __A, __m256d __B)
+{
+ return (__m256d) __builtin_ia32_unpcklpd256_mask ((__v4df) __A,
+ (__v4df) __B,
+ (__v4df)
+ _mm256_setzero_pd (),
+ (__mmask8) __U);
+}
+
+extern __inline __m128d
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_mask_unpacklo_pd (__m128d __W, __mmask8 __U, __m128d __A,
+ __m128d __B)
+{
+ return (__m128d) __builtin_ia32_unpcklpd128_mask ((__v2df) __A,
+ (__v2df) __B,
+ (__v2df) __W,
+ (__mmask8) __U);
+}
+
+extern __inline __m128d
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_maskz_unpacklo_pd (__mmask8 __U, __m128d __A, __m128d __B)
+{
+ return (__m128d) __builtin_ia32_unpcklpd128_mask ((__v2df) __A,
+ (__v2df) __B,
+ (__v2df)
+ _mm_setzero_pd (),
+ (__mmask8) __U);
+}
+
+extern __inline __m256
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_mask_unpacklo_ps (__m256 __W, __mmask8 __U, __m256 __A,
+ __m256 __B)
+{
+ return (__m256) __builtin_ia32_unpcklps256_mask ((__v8sf) __A,
+ (__v8sf) __B,
+ (__v8sf) __W,
+ (__mmask8) __U);
+}
+
+extern __inline __m256d
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_mask_unpackhi_pd (__m256d __W, __mmask8 __U, __m256d __A,
+ __m256d __B)
+{
+ return (__m256d) __builtin_ia32_unpckhpd256_mask ((__v4df) __A,
+ (__v4df) __B,
+ (__v4df) __W,
+ (__mmask8) __U);
+}
+
+extern __inline __m256d
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_maskz_unpackhi_pd (__mmask8 __U, __m256d __A, __m256d __B)
+{
+ return (__m256d) __builtin_ia32_unpckhpd256_mask ((__v4df) __A,
+ (__v4df) __B,
+ (__v4df)
+ _mm256_setzero_pd (),
+ (__mmask8) __U);
+}
+
+extern __inline __m128d
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_mask_unpackhi_pd (__m128d __W, __mmask8 __U, __m128d __A,
+ __m128d __B)
+{
+ return (__m128d) __builtin_ia32_unpckhpd128_mask ((__v2df) __A,
+ (__v2df) __B,
+ (__v2df) __W,
+ (__mmask8) __U);
+}
+
+extern __inline __m128d
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_maskz_unpackhi_pd (__mmask8 __U, __m128d __A, __m128d __B)
+{
+ return (__m128d) __builtin_ia32_unpckhpd128_mask ((__v2df) __A,
+ (__v2df) __B,
+ (__v2df)
+ _mm_setzero_pd (),
+ (__mmask8) __U);
+}
+
+extern __inline __m256
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_mask_unpackhi_ps (__m256 __W, __mmask8 __U, __m256 __A,
+ __m256 __B)
+{
+ return (__m256) __builtin_ia32_unpckhps256_mask ((__v8sf) __A,
+ (__v8sf) __B,
+ (__v8sf) __W,
+ (__mmask8) __U);
+}
+
+extern __inline __m256
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_maskz_unpackhi_ps (__mmask8 __U, __m256 __A, __m256 __B)
+{
+ return (__m256) __builtin_ia32_unpckhps256_mask ((__v8sf) __A,
+ (__v8sf) __B,
+ (__v8sf)
+ _mm256_setzero_ps (),
+ (__mmask8) __U);
+}
+
+extern __inline __m128
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_mask_unpackhi_ps (__m128 __W, __mmask8 __U, __m128 __A, __m128 __B)
+{
+ return (__m128) __builtin_ia32_unpckhps128_mask ((__v4sf) __A,
+ (__v4sf) __B,
+ (__v4sf) __W,
+ (__mmask8) __U);
+}
+
+extern __inline __m128
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_maskz_unpackhi_ps (__mmask8 __U, __m128 __A, __m128 __B)
+{
+ return (__m128) __builtin_ia32_unpckhps128_mask ((__v4sf) __A,
+ (__v4sf) __B,
+ (__v4sf)
+ _mm_setzero_ps (),
+ (__mmask8) __U);
+}
+
+extern __inline __m128
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_mask_cvtph_ps (__m128 __W, __mmask8 __U, __m128i __A)
+{
+ return (__m128) __builtin_ia32_vcvtph2ps_mask ((__v8hi) __A,
+ (__v4sf) __W,
+ (__mmask8) __U);
+}
+
+extern __inline __m128
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_maskz_cvtph_ps (__mmask8 __U, __m128i __A)
+{
+ return (__m128) __builtin_ia32_vcvtph2ps_mask ((__v8hi) __A,
+ (__v4sf)
+ _mm_setzero_ps (),
+ (__mmask8) __U);
+}
+
+extern __inline __m256
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_maskz_unpacklo_ps (__mmask8 __U, __m256 __A, __m256 __B)
+{
+ return (__m256) __builtin_ia32_unpcklps256_mask ((__v8sf) __A,
+ (__v8sf) __B,
+ (__v8sf)
+ _mm256_setzero_ps (),
+ (__mmask8) __U);
+}
+
+extern __inline __m256
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_mask_cvtph_ps (__m256 __W, __mmask8 __U, __m128i __A)
+{
+ return (__m256) __builtin_ia32_vcvtph2ps256_mask ((__v8hi) __A,
+ (__v8sf) __W,
+ (__mmask8) __U);
+}
+
+extern __inline __m256
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_maskz_cvtph_ps (__mmask8 __U, __m128i __A)
+{
+ return (__m256) __builtin_ia32_vcvtph2ps256_mask ((__v8hi) __A,
+ (__v8sf)
+ _mm256_setzero_ps (),
+ (__mmask8) __U);
+}
+
+extern __inline __m128
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_mask_unpacklo_ps (__m128 __W, __mmask8 __U, __m128 __A, __m128 __B)
+{
+ return (__m128) __builtin_ia32_unpcklps128_mask ((__v4sf) __A,
+ (__v4sf) __B,
+ (__v4sf) __W,
+ (__mmask8) __U);
+}
+
+extern __inline __m128
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_maskz_unpacklo_ps (__mmask8 __U, __m128 __A, __m128 __B)
+{
+ return (__m128) __builtin_ia32_unpcklps128_mask ((__v4sf) __A,
+ (__v4sf) __B,
+ (__v4sf)
+ _mm_setzero_ps (),
+ (__mmask8) __U);
+}
+
+extern __inline __m256i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_mask_sra_epi32 (__m256i __W, __mmask8 __U, __m256i __A,
+ __m128i __B)
+{
+ return (__m256i) __builtin_ia32_psrad256_mask ((__v8si) __A,
+ (__v4si) __B,
+ (__v8si) __W,
+ (__mmask8) __U);
+}
+
+extern __inline __m256i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_maskz_sra_epi32 (__mmask8 __U, __m256i __A, __m128i __B)
+{
+ return (__m256i) __builtin_ia32_psrad256_mask ((__v8si) __A,
+ (__v4si) __B,
+ (__v8si)
+ _mm256_setzero_si256 (),
+ (__mmask8) __U);
+}
+
+extern __inline __m128i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_mask_sra_epi32 (__m128i __W, __mmask8 __U, __m128i __A,
+ __m128i __B)
+{
+ return (__m128i) __builtin_ia32_psrad128_mask ((__v4si) __A,
+ (__v4si) __B,
+ (__v4si) __W,
+ (__mmask8) __U);
+}
+
+extern __inline __m128i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_maskz_sra_epi32 (__mmask8 __U, __m128i __A, __m128i __B)
+{
+ return (__m128i) __builtin_ia32_psrad128_mask ((__v4si) __A,
+ (__v4si) __B,
+ (__v4si)
+ _mm_setzero_si128 (),
+ (__mmask8) __U);
+}
+
+extern __inline __m256i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_sra_epi64 (__m256i __A, __m128i __B)
+{
+ return (__m256i) __builtin_ia32_psraq256_mask ((__v4di) __A,
+ (__v2di) __B,
+ (__v4di)
+ _mm256_setzero_si256 (),
+ (__mmask8) -1);
+}
+
+extern __inline __m256i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_mask_sra_epi64 (__m256i __W, __mmask8 __U, __m256i __A,
+ __m128i __B)
+{
+ return (__m256i) __builtin_ia32_psraq256_mask ((__v4di) __A,
+ (__v2di) __B,
+ (__v4di) __W,
+ (__mmask8) __U);
+}
+
+extern __inline __m256i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_maskz_sra_epi64 (__mmask8 __U, __m256i __A, __m128i __B)
+{
+ return (__m256i) __builtin_ia32_psraq256_mask ((__v4di) __A,
+ (__v2di) __B,
+ (__v4di)
+ _mm256_setzero_si256 (),
+ (__mmask8) __U);
+}
+
+extern __inline __m128i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_sra_epi64 (__m128i __A, __m128i __B)
+{
+ return (__m128i) __builtin_ia32_psraq128_mask ((__v2di) __A,
+ (__v2di) __B,
+ (__v2di)
+ _mm_setzero_di (),
+ (__mmask8) -1);
+}
+
+extern __inline __m128i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_mask_sra_epi64 (__m128i __W, __mmask8 __U, __m128i __A,
+ __m128i __B)
+{
+ return (__m128i) __builtin_ia32_psraq128_mask ((__v2di) __A,
+ (__v2di) __B,
+ (__v2di) __W,
+ (__mmask8) __U);
+}
+
+extern __inline __m128i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_maskz_sra_epi64 (__mmask8 __U, __m128i __A, __m128i __B)
+{
+ return (__m128i) __builtin_ia32_psraq128_mask ((__v2di) __A,
+ (__v2di) __B,
+ (__v2di)
+ _mm_setzero_di (),
+ (__mmask8) __U);
+}
+
+extern __inline __m128i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_mask_sll_epi32 (__m128i __W, __mmask8 __U, __m128i __A,
+ __m128i __B)
+{
+ return (__m128i) __builtin_ia32_pslld128_mask ((__v4si) __A,
+ (__v4si) __B,
+ (__v4si) __W,
+ (__mmask8) __U);
+}
+
+extern __inline __m128i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_maskz_sll_epi32 (__mmask8 __U, __m128i __A, __m128i __B)
+{
+ return (__m128i) __builtin_ia32_pslld128_mask ((__v4si) __A,
+ (__v4si) __B,
+ (__v4si)
+ _mm_setzero_si128 (),
+ (__mmask8) __U);
+}
+
+extern __inline __m128i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_mask_sll_epi64 (__m128i __W, __mmask8 __U, __m128i __A,
+ __m128i __B)
+{
+ return (__m128i) __builtin_ia32_psllq128_mask ((__v2di) __A,
+ (__v2di) __B,
+ (__v2di) __W,
+ (__mmask8) __U);
+}
+
+extern __inline __m128i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_maskz_sll_epi64 (__mmask8 __U, __m128i __A, __m128i __B)
+{
+ return (__m128i) __builtin_ia32_psllq128_mask ((__v2di) __A,
+ (__v2di) __B,
+ (__v2di)
+ _mm_setzero_di (),
+ (__mmask8) __U);
+}
+
+extern __inline __m256i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_mask_sll_epi32 (__m256i __W, __mmask8 __U, __m256i __A,
+ __m128i __B)
+{
+ return (__m256i) __builtin_ia32_pslld256_mask ((__v8si) __A,
+ (__v4si) __B,
+ (__v8si) __W,
+ (__mmask8) __U);
+}
+
+extern __inline __m256i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_maskz_sll_epi32 (__mmask8 __U, __m256i __A, __m128i __B)
+{
+ return (__m256i) __builtin_ia32_pslld256_mask ((__v8si) __A,
+ (__v4si) __B,
+ (__v8si)
+ _mm256_setzero_si256 (),
+ (__mmask8) __U);
+}
+
+extern __inline __m256i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_mask_sll_epi64 (__m256i __W, __mmask8 __U, __m256i __A,
+ __m128i __B)
+{
+ return (__m256i) __builtin_ia32_psllq256_mask ((__v4di) __A,
+ (__v2di) __B,
+ (__v4di) __W,
+ (__mmask8) __U);
+}
+
+extern __inline __m256i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_maskz_sll_epi64 (__mmask8 __U, __m256i __A, __m128i __B)
+{
+ return (__m256i) __builtin_ia32_psllq256_mask ((__v4di) __A,
+ (__v2di) __B,
+ (__v4di)
+ _mm256_setzero_si256 (),
+ (__mmask8) __U);
+}
+
+extern __inline __m256
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_mask_permutexvar_ps (__m256 __W, __mmask8 __U, __m256i __X,
+ __m256 __Y)
+{
+ return (__m256) __builtin_ia32_permvarsf256_mask ((__v8sf) __Y,
+ (__v8si) __X,
+ (__v8sf) __W,
+ (__mmask8) __U);
+}
+
+extern __inline __m256
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_maskz_permutexvar_ps (__mmask8 __U, __m256i __X, __m256 __Y)
+{
+ return (__m256) __builtin_ia32_permvarsf256_mask ((__v8sf) __Y,
+ (__v8si) __X,
+ (__v8sf)
+ _mm256_setzero_ps (),
+ (__mmask8) __U);
+}
+
+extern __inline __m256d
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_permutexvar_pd (__m256i __X, __m256d __Y)
+{
+ return (__m256d) __builtin_ia32_permvardf256_mask ((__v4df) __Y,
+ (__v4di) __X,
+ (__v4df)
+ _mm256_setzero_pd (),
+ (__mmask8) -1);
+}
+
+extern __inline __m256d
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_mask_permutexvar_pd (__m256d __W, __mmask8 __U, __m256i __X,
+ __m256d __Y)
+{
+ return (__m256d) __builtin_ia32_permvardf256_mask ((__v4df) __Y,
+ (__v4di) __X,
+ (__v4df) __W,
+ (__mmask8) __U);
+}
+
+extern __inline __m256d
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_maskz_permutexvar_pd (__mmask8 __U, __m256i __X, __m256d __Y)
+{
+ return (__m256d) __builtin_ia32_permvardf256_mask ((__v4df) __Y,
+ (__v4di) __X,
+ (__v4df)
+ _mm256_setzero_pd (),
+ (__mmask8) __U);
+}
+
+extern __inline __m256d
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_mask_permutevar_pd (__m256d __W, __mmask8 __U, __m256d __A,
+ __m256i __C)
+{
+ return (__m256d) __builtin_ia32_vpermilvarpd256_mask ((__v4df) __A,
+ (__v4di) __C,
+ (__v4df) __W,
+ (__mmask8)
+ __U);
+}
+
+extern __inline __m256d
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_maskz_permutevar_pd (__mmask8 __U, __m256d __A, __m256i __C)
+{
+ return (__m256d) __builtin_ia32_vpermilvarpd256_mask ((__v4df) __A,
+ (__v4di) __C,
+ (__v4df)
+ _mm256_setzero_pd (),
+ (__mmask8)
+ __U);
+}
+
+extern __inline __m256
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_mask_permutevar_ps (__m256 __W, __mmask8 __U, __m256 __A,
+ __m256i __C)
+{
+ return (__m256) __builtin_ia32_vpermilvarps256_mask ((__v8sf) __A,
+ (__v8si) __C,
+ (__v8sf) __W,
+ (__mmask8) __U);
+}
+
+extern __inline __m256
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_maskz_permutevar_ps (__mmask8 __U, __m256 __A, __m256i __C)
+{
+ return (__m256) __builtin_ia32_vpermilvarps256_mask ((__v8sf) __A,
+ (__v8si) __C,
+ (__v8sf)
+ _mm256_setzero_ps (),
+ (__mmask8) __U);
+}
+
+extern __inline __m128d
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_mask_permutevar_pd (__m128d __W, __mmask8 __U, __m128d __A,
+ __m128i __C)
+{
+ return (__m128d) __builtin_ia32_vpermilvarpd_mask ((__v2df) __A,
+ (__v2di) __C,
+ (__v2df) __W,
+ (__mmask8) __U);
+}
+
+extern __inline __m128d
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_maskz_permutevar_pd (__mmask8 __U, __m128d __A, __m128i __C)
+{
+ return (__m128d) __builtin_ia32_vpermilvarpd_mask ((__v2df) __A,
+ (__v2di) __C,
+ (__v2df)
+ _mm_setzero_pd (),
+ (__mmask8) __U);
+}
+
+extern __inline __m128
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_mask_permutevar_ps (__m128 __W, __mmask8 __U, __m128 __A,
+ __m128i __C)
+{
+ return (__m128) __builtin_ia32_vpermilvarps_mask ((__v4sf) __A,
+ (__v4si) __C,
+ (__v4sf) __W,
+ (__mmask8) __U);
+}
+
+extern __inline __m128
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_maskz_permutevar_ps (__mmask8 __U, __m128 __A, __m128i __C)
+{
+ return (__m128) __builtin_ia32_vpermilvarps_mask ((__v4sf) __A,
+ (__v4si) __C,
+ (__v4sf)
+ _mm_setzero_ps (),
+ (__mmask8) __U);
+}
+
+extern __inline __m256i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_maskz_mullo_epi32 (__mmask8 __M, __m256i __A, __m256i __B)
+{
+ return (__m256i) __builtin_ia32_pmulld256_mask ((__v8si) __A,
+ (__v8si) __B,
+ (__v8si)
+ _mm256_setzero_si256 (),
+ __M);
+}
+
+extern __inline __m256i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_maskz_permutexvar_epi64 (__mmask8 __M, __m256i __X, __m256i __Y)
+{
+ return (__m256i) __builtin_ia32_permvardi256_mask ((__v4di) __Y,
+ (__v4di) __X,
+ (__v4di)
+ _mm256_setzero_si256 (),
+ __M);
+}
+
+extern __inline __m256i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_mask_mullo_epi32 (__m256i __W, __mmask8 __M, __m256i __A,
+ __m256i __B)
+{
+ return (__m256i) __builtin_ia32_pmulld256_mask ((__v8si) __A,
+ (__v8si) __B,
+ (__v8si) __W, __M);
+}
+
+extern __inline __m128i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_maskz_mullo_epi32 (__mmask8 __M, __m128i __A, __m128i __B)
+{
+ return (__m128i) __builtin_ia32_pmulld128_mask ((__v4si) __A,
+ (__v4si) __B,
+ (__v4si)
+ _mm_setzero_si128 (),
+ __M);
+}
+
+extern __inline __m128i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_mask_mullo_epi32 (__m128i __W, __mmask16 __M, __m128i __A,
+ __m128i __B)
+{
+ return (__m128i) __builtin_ia32_pmulld128_mask ((__v4si) __A,
+ (__v4si) __B,
+ (__v4si) __W, __M);
+}
+
+extern __inline __m256i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_mask_mul_epi32 (__m256i __W, __mmask8 __M, __m256i __X,
+ __m256i __Y)
+{
+ return (__m256i) __builtin_ia32_pmuldq256_mask ((__v8si) __X,
+ (__v8si) __Y,
+ (__v4di) __W, __M);
+}
+
+extern __inline __m256i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_maskz_mul_epi32 (__mmask8 __M, __m256i __X, __m256i __Y)
+{
+ return (__m256i) __builtin_ia32_pmuldq256_mask ((__v8si) __X,
+ (__v8si) __Y,
+ (__v4di)
+ _mm256_setzero_si256 (),
+ __M);
+}
+
+extern __inline __m128i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_mask_mul_epi32 (__m128i __W, __mmask8 __M, __m128i __X,
+ __m128i __Y)
+{
+ return (__m128i) __builtin_ia32_pmuldq128_mask ((__v4si) __X,
+ (__v4si) __Y,
+ (__v2di) __W, __M);
+}
+
+extern __inline __m128i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_maskz_mul_epi32 (__mmask8 __M, __m128i __X, __m128i __Y)
+{
+ return (__m128i) __builtin_ia32_pmuldq128_mask ((__v4si) __X,
+ (__v4si) __Y,
+ (__v2di)
+ _mm_setzero_si128 (),
+ __M);
+}
+
+extern __inline __m256i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_mask_permutexvar_epi64 (__m256i __W, __mmask8 __M, __m256i __X,
+ __m256i __Y)
+{
+ return (__m256i) __builtin_ia32_permvardi256_mask ((__v4di) __Y,
+ (__v4di) __X,
+ (__v4di) __W,
+ __M);
+}
+
+extern __inline __m256i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_mask_mul_epu32 (__m256i __W, __mmask8 __M, __m256i __X,
+ __m256i __Y)
+{
+ return (__m256i) __builtin_ia32_pmuludq256_mask ((__v8si) __X,
+ (__v8si) __Y,
+ (__v4di) __W, __M);
+}
+
+extern __inline __m256i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_maskz_permutexvar_epi32 (__mmask8 __M, __m256i __X, __m256i __Y)
+{
+ return (__m256i) __builtin_ia32_permvarsi256_mask ((__v8si) __Y,
+ (__v8si) __X,
+ (__v8si)
+ _mm256_setzero_si256 (),
+ __M);
+}
+
+extern __inline __m256i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_maskz_mul_epu32 (__mmask8 __M, __m256i __X, __m256i __Y)
+{
+ return (__m256i) __builtin_ia32_pmuludq256_mask ((__v8si) __X,
+ (__v8si) __Y,
+ (__v4di)
+ _mm256_setzero_si256 (),
+ __M);
+}
+
+extern __inline __m128i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_mask_mul_epu32 (__m128i __W, __mmask8 __M, __m128i __X,
+ __m128i __Y)
+{
+ return (__m128i) __builtin_ia32_pmuludq128_mask ((__v4si) __X,
+ (__v4si) __Y,
+ (__v2di) __W, __M);
+}
+
+extern __inline __m128i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_maskz_mul_epu32 (__mmask8 __M, __m128i __X, __m128i __Y)
+{
+ return (__m128i) __builtin_ia32_pmuludq128_mask ((__v4si) __X,
+ (__v4si) __Y,
+ (__v2di)
+ _mm_setzero_si128 (),
+ __M);
+}
+
+extern __inline __m256i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_mask_permutexvar_epi32 (__m256i __W, __mmask8 __M, __m256i __X,
+ __m256i __Y)
+{
+ return (__m256i) __builtin_ia32_permvarsi256_mask ((__v8si) __Y,
+ (__v8si) __X,
+ (__v8si) __W,
+ __M);
+}
+
+#ifdef __OPTIMIZE__
+extern __inline __m256i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_mask_permutex_epi64 (__m256i __W, __mmask8 __M,
+ __m256i __X, const int __I)
+{
+ return (__m256i) __builtin_ia32_permdi256_mask ((__v4di) __X,
+ __I,
+ (__v4di) __W,
+ (__mmask8) __M);
+}
+
+extern __inline __m256i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_maskz_permutex_epi64 (__mmask8 __M, __m256i __X, const int __I)
+{
+ return (__m256i) __builtin_ia32_permdi256_mask ((__v4di) __X,
+ __I,
+ (__v4di)
+ _mm256_setzero_si256 (),
+ (__mmask8) __M);
+}
+
+extern __inline __m256d
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_mask_shuffle_pd (__m256d __W, __mmask8 __U, __m256d __A,
+ __m256d __B, const int __imm)
+{
+ return (__m256d) __builtin_ia32_shufpd256_mask ((__v4df) __A,
+ (__v4df) __B, __imm,
+ (__v4df) __W,
+ (__mmask8) __U);
+}
+
+extern __inline __m256d
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_maskz_shuffle_pd (__mmask8 __U, __m256d __A, __m256d __B,
+ const int __imm)
+{
+ return (__m256d) __builtin_ia32_shufpd256_mask ((__v4df) __A,
+ (__v4df) __B, __imm,
+ (__v4df)
+ _mm256_setzero_pd (),
+ (__mmask8) __U);
+}
+
+extern __inline __m128d
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_mask_shuffle_pd (__m128d __W, __mmask8 __U, __m128d __A,
+ __m128d __B, const int __imm)
+{
+ return (__m128d) __builtin_ia32_shufpd128_mask ((__v2df) __A,
+ (__v2df) __B, __imm,
+ (__v2df) __W,
+ (__mmask8) __U);
+}
+
+extern __inline __m128d
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_maskz_shuffle_pd (__mmask8 __U, __m128d __A, __m128d __B,
+ const int __imm)
+{
+ return (__m128d) __builtin_ia32_shufpd128_mask ((__v2df) __A,
+ (__v2df) __B, __imm,
+ (__v2df)
+ _mm_setzero_pd (),
+ (__mmask8) __U);
+}
+
+extern __inline __m256
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_mask_shuffle_ps (__m256 __W, __mmask8 __U, __m256 __A,
+ __m256 __B, const int __imm)
+{
+ return (__m256) __builtin_ia32_shufps256_mask ((__v8sf) __A,
+ (__v8sf) __B, __imm,
+ (__v8sf) __W,
+ (__mmask8) __U);
+}
+
+extern __inline __m256
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_maskz_shuffle_ps (__mmask8 __U, __m256 __A, __m256 __B,
+ const int __imm)
+{
+ return (__m256) __builtin_ia32_shufps256_mask ((__v8sf) __A,
+ (__v8sf) __B, __imm,
+ (__v8sf)
+ _mm256_setzero_ps (),
+ (__mmask8) __U);
+}
+
+extern __inline __m128
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_mask_shuffle_ps (__m128 __W, __mmask8 __U, __m128 __A, __m128 __B,
+ const int __imm)
+{
+ return (__m128) __builtin_ia32_shufps128_mask ((__v4sf) __A,
+ (__v4sf) __B, __imm,
+ (__v4sf) __W,
+ (__mmask8) __U);
+}
+
+extern __inline __m128
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_maskz_shuffle_ps (__mmask8 __U, __m128 __A, __m128 __B,
+ const int __imm)
+{
+ return (__m128) __builtin_ia32_shufps128_mask ((__v4sf) __A,
+ (__v4sf) __B, __imm,
+ (__v4sf)
+ _mm_setzero_ps (),
+ (__mmask8) __U);
+}
+
+extern __inline __m256i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_inserti32x4 (__m256i __A, __m128i __B, const int __imm)
+{
+ return (__m256i) __builtin_ia32_inserti32x4_256_mask ((__v8si) __A,
+ (__v4si) __B,
+ __imm,
+ (__v8si)
+ _mm256_setzero_si256 (),
+ (__mmask8) -
+ 1);
+}
+
+extern __inline __m256i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_mask_inserti32x4 (__m256i __W, __mmask8 __U, __m256i __A,
+ __m128i __B, const int __imm)
+{
+ return (__m256i) __builtin_ia32_inserti32x4_256_mask ((__v8si) __A,
+ (__v4si) __B,
+ __imm,
+ (__v8si) __W,
+ (__mmask8)
+ __U);
+}
+
+extern __inline __m256i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_maskz_inserti32x4 (__mmask8 __U, __m256i __A, __m128i __B,
+ const int __imm)
+{
+ return (__m256i) __builtin_ia32_inserti32x4_256_mask ((__v8si) __A,
+ (__v4si) __B,
+ __imm,
+ (__v8si)
+ _mm256_setzero_si256 (),
+ (__mmask8)
+ __U);
+}
+
+extern __inline __m256
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_insertf32x4 (__m256 __A, __m128 __B, const int __imm)
+{
+ return (__m256) __builtin_ia32_insertf32x4_256_mask ((__v8sf) __A,
+ (__v4sf) __B,
+ __imm,
+ (__v8sf)
+ _mm256_setzero_ps (),
+ (__mmask8) -1);
+}
+
+extern __inline __m256
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_mask_insertf32x4 (__m256 __W, __mmask8 __U, __m256 __A,
+ __m128 __B, const int __imm)
+{
+ return (__m256) __builtin_ia32_insertf32x4_256_mask ((__v8sf) __A,
+ (__v4sf) __B,
+ __imm,
+ (__v8sf) __W,
+ (__mmask8) __U);
+}
+
+extern __inline __m256
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_maskz_insertf32x4 (__mmask8 __U, __m256 __A, __m128 __B,
+ const int __imm)
+{
+ return (__m256) __builtin_ia32_insertf32x4_256_mask ((__v8sf) __A,
+ (__v4sf) __B,
+ __imm,
+ (__v8sf)
+ _mm256_setzero_ps (),
+ (__mmask8) __U);
+}
+
+extern __inline __m128i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_extracti32x4_epi32 (__m256i __A, const int __imm)
+{
+ return (__m128i) __builtin_ia32_extracti32x4_256_mask ((__v8si) __A,
+ __imm,
+ (__v4si)
+ _mm_setzero_si128 (),
+ (__mmask8) -
+ 1);
+}
+
+extern __inline __m128i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_mask_extracti32x4_epi32 (__m128i __W, __mmask8 __U, __m256i __A,
+ const int __imm)
+{
+ return (__m128i) __builtin_ia32_extracti32x4_256_mask ((__v8si) __A,
+ __imm,
+ (__v4si) __W,
+ (__mmask8)
+ __U);
+}
+
+extern __inline __m128i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_maskz_extracti32x4_epi32 (__mmask8 __U, __m256i __A,
+ const int __imm)
+{
+ return (__m128i) __builtin_ia32_extracti32x4_256_mask ((__v8si) __A,
+ __imm,
+ (__v4si)
+ _mm_setzero_si128 (),
+ (__mmask8)
+ __U);
+}
+
+extern __inline __m128
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_extractf32x4_ps (__m256 __A, const int __imm)
+{
+ return (__m128) __builtin_ia32_extractf32x4_256_mask ((__v8sf) __A,
+ __imm,
+ (__v4sf)
+ _mm_setzero_ps (),
+ (__mmask8) -
+ 1);
+}
+
+extern __inline __m128
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_mask_extractf32x4_ps (__m128 __W, __mmask8 __U, __m256 __A,
+ const int __imm)
+{
+ return (__m128) __builtin_ia32_extractf32x4_256_mask ((__v8sf) __A,
+ __imm,
+ (__v4sf) __W,
+ (__mmask8)
+ __U);
+}
+
+extern __inline __m128
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_maskz_extractf32x4_ps (__mmask8 __U, __m256 __A,
+ const int __imm)
+{
+ return (__m128) __builtin_ia32_extractf32x4_256_mask ((__v8sf) __A,
+ __imm,
+ (__v4sf)
+ _mm_setzero_ps (),
+ (__mmask8)
+ __U);
+}
+
+extern __inline __m256i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_shuffle_i64x2 (__m256i __A, __m256i __B, const int __imm)
+{
+ return (__m256i) __builtin_ia32_shuf_i64x2_256_mask ((__v4di) __A,
+ (__v4di) __B,
+ __imm,
+ (__v4di)
+ _mm256_setzero_si256 (),
+ (__mmask8) -1);
+}
+
+extern __inline __m256i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_mask_shuffle_i64x2 (__m256i __W, __mmask8 __U, __m256i __A,
+ __m256i __B, const int __imm)
+{
+ return (__m256i) __builtin_ia32_shuf_i64x2_256_mask ((__v4di) __A,
+ (__v4di) __B,
+ __imm,
+ (__v4di) __W,
+ (__mmask8) __U);
+}
+
+extern __inline __m256i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_maskz_shuffle_i64x2 (__mmask8 __U, __m256i __A, __m256i __B,
+ const int __imm)
+{
+ return (__m256i) __builtin_ia32_shuf_i64x2_256_mask ((__v4di) __A,
+ (__v4di) __B,
+ __imm,
+ (__v4di)
+ _mm256_setzero_si256 (),
+ (__mmask8) __U);
+}
+
+extern __inline __m256i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_shuffle_i32x4 (__m256i __A, __m256i __B, const int __imm)
+{
+ return (__m256i) __builtin_ia32_shuf_i32x4_256_mask ((__v8si) __A,
+ (__v8si) __B,
+ __imm,
+ (__v8si)
+ _mm256_setzero_si256 (),
+ (__mmask8) -1);
+}
+
+extern __inline __m256i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_mask_shuffle_i32x4 (__m256i __W, __mmask8 __U, __m256i __A,
+ __m256i __B, const int __imm)
+{
+ return (__m256i) __builtin_ia32_shuf_i32x4_256_mask ((__v8si) __A,
+ (__v8si) __B,
+ __imm,
+ (__v8si) __W,
+ (__mmask8) __U);
+}
+
+extern __inline __m256i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_maskz_shuffle_i32x4 (__mmask8 __U, __m256i __A, __m256i __B,
+ const int __imm)
+{
+ return (__m256i) __builtin_ia32_shuf_i32x4_256_mask ((__v8si) __A,
+ (__v8si) __B,
+ __imm,
+ (__v8si)
+ _mm256_setzero_si256 (),
+ (__mmask8) __U);
+}
+
+extern __inline __m256d
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_shuffle_f64x2 (__m256d __A, __m256d __B, const int __imm)
+{
+ return (__m256d) __builtin_ia32_shuf_f64x2_256_mask ((__v4df) __A,
+ (__v4df) __B,
+ __imm,
+ (__v4df)
+ _mm256_setzero_pd (),
+ (__mmask8) -1);
+}
+
+extern __inline __m256d
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_mask_shuffle_f64x2 (__m256d __W, __mmask8 __U, __m256d __A,
+ __m256d __B, const int __imm)
+{
+ return (__m256d) __builtin_ia32_shuf_f64x2_256_mask ((__v4df) __A,
+ (__v4df) __B,
+ __imm,
+ (__v4df) __W,
+ (__mmask8) __U);
+}
+
+extern __inline __m256d
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_maskz_shuffle_f64x2 (__mmask8 __U, __m256d __A, __m256d __B,
+ const int __imm)
+{
+ return (__m256d) __builtin_ia32_shuf_f64x2_256_mask ((__v4df) __A,
+ (__v4df) __B,
+ __imm,
+ (__v4df)
+ _mm256_setzero_pd (),
+ (__mmask8) __U);
+}
+
+extern __inline __m256
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_shuffle_f32x4 (__m256 __A, __m256 __B, const int __imm)
+{
+ return (__m256) __builtin_ia32_shuf_f32x4_256_mask ((__v8sf) __A,
+ (__v8sf) __B,
+ __imm,
+ (__v8sf)
+ _mm256_setzero_ps (),
+ (__mmask8) -1);
+}
+
+extern __inline __m256
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_mask_shuffle_f32x4 (__m256 __W, __mmask8 __U, __m256 __A,
+ __m256 __B, const int __imm)
+{
+ return (__m256) __builtin_ia32_shuf_f32x4_256_mask ((__v8sf) __A,
+ (__v8sf) __B,
+ __imm,
+ (__v8sf) __W,
+ (__mmask8) __U);
+}
+
+extern __inline __m256
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_maskz_shuffle_f32x4 (__mmask8 __U, __m256 __A, __m256 __B,
+ const int __imm)
+{
+ return (__m256) __builtin_ia32_shuf_f32x4_256_mask ((__v8sf) __A,
+ (__v8sf) __B,
+ __imm,
+ (__v8sf)
+ _mm256_setzero_ps (),
+ (__mmask8) __U);
+}
+
+extern __inline __m256d
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_fixupimm_pd (__m256d __A, __m256d __B, __m256i __C,
+ const int __imm)
+{
+ return (__m256d) __builtin_ia32_fixupimmpd256 ((__v4df) __A,
+ (__v4df) __B,
+ (__v4di) __C, __imm);
+}
+
+extern __inline __m256d
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_mask_fixupimm_pd (__m256d __A, __mmask8 __U, __m256d __B,
+ __m256i __C, const int __imm)
+{
+ return (__m256d) __builtin_ia32_fixupimmpd256_mask ((__v4df) __A,
+ (__v4df) __B,
+ (__v4di) __C,
+ __imm,
+ (__mmask8) __U);
+}
+
+extern __inline __m256d
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_maskz_fixupimm_pd (__mmask8 __U, __m256d __A, __m256d __B,
+ __m256i __C, const int __imm)
+{
+ return (__m256d) __builtin_ia32_fixupimmpd256_maskz ((__v4df) __A,
+ (__v4df) __B,
+ (__v4di) __C,
+ __imm,
+ (__mmask8) __U);
+}
+
+extern __inline __m256
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_fixupimm_ps (__m256 __A, __m256 __B, __m256i __C,
+ const int __imm)
+{
+ return (__m256) __builtin_ia32_fixupimmps256 ((__v8sf) __A,
+ (__v8sf) __B,
+ (__v8si) __C, __imm);
+}
+
+extern __inline __m256
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_mask_fixupimm_ps (__m256 __A, __mmask8 __U, __m256 __B,
+ __m256i __C, const int __imm)
+{
+ return (__m256) __builtin_ia32_fixupimmps256_mask ((__v8sf) __A,
+ (__v8sf) __B,
+ (__v8si) __C,
+ __imm,
+ (__mmask8) __U);
+}
+
+extern __inline __m256
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_maskz_fixupimm_ps (__mmask8 __U, __m256 __A, __m256 __B,
+ __m256i __C, const int __imm)
+{
+ return (__m256) __builtin_ia32_fixupimmps256_maskz ((__v8sf) __A,
+ (__v8sf) __B,
+ (__v8si) __C,
+ __imm,
+ (__mmask8) __U);
+}
+
+extern __inline __m128d
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_fixupimm_pd (__m128d __A, __m128d __B, __m128i __C,
+ const int __imm)
+{
+ return (__m128d) __builtin_ia32_fixupimmpd128 ((__v2df) __A,
+ (__v2df) __B,
+ (__v2di) __C, __imm);
+}
+
+extern __inline __m128d
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_mask_fixupimm_pd (__m128d __A, __mmask8 __U, __m128d __B,
+ __m128i __C, const int __imm)
+{
+ return (__m128d) __builtin_ia32_fixupimmpd128_mask ((__v2df) __A,
+ (__v2df) __B,
+ (__v2di) __C,
+ __imm,
+ (__mmask8) __U);
+}
+
+extern __inline __m128d
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_maskz_fixupimm_pd (__mmask8 __U, __m128d __A, __m128d __B,
+ __m128i __C, const int __imm)
+{
+ return (__m128d) __builtin_ia32_fixupimmpd128_maskz ((__v2df) __A,
+ (__v2df) __B,
+ (__v2di) __C,
+ __imm,
+ (__mmask8) __U);
+}
+
+extern __inline __m128
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_fixupimm_ps (__m128 __A, __m128 __B, __m128i __C, const int __imm)
+{
+ return (__m128) __builtin_ia32_fixupimmps128 ((__v4sf) __A,
+ (__v4sf) __B,
+ (__v4si) __C, __imm);
+}
+
+extern __inline __m128
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_mask_fixupimm_ps (__m128 __A, __mmask8 __U, __m128 __B,
+ __m128i __C, const int __imm)
+{
+ return (__m128) __builtin_ia32_fixupimmps128_mask ((__v4sf) __A,
+ (__v4sf) __B,
+ (__v4si) __C,
+ __imm,
+ (__mmask8) __U);
+}
+
+extern __inline __m128
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_maskz_fixupimm_ps (__mmask8 __U, __m128 __A, __m128 __B,
+ __m128i __C, const int __imm)
+{
+ return (__m128) __builtin_ia32_fixupimmps128_maskz ((__v4sf) __A,
+ (__v4sf) __B,
+ (__v4si) __C,
+ __imm,
+ (__mmask8) __U);
+}
+
+extern __inline __m256i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_mask_srli_epi32 (__m256i __W, __mmask8 __U, __m256i __A,
+ const int __imm)
+{
+ return (__m256i) __builtin_ia32_psrldi256_mask ((__v8si) __A, __imm,
+ (__v8si) __W,
+ (__mmask8) __U);
+}
+
+extern __inline __m256i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_maskz_srli_epi32 (__mmask8 __U, __m256i __A, const int __imm)
+{
+ return (__m256i) __builtin_ia32_psrldi256_mask ((__v8si) __A, __imm,
+ (__v8si)
+ _mm256_setzero_si256 (),
+ (__mmask8) __U);
+}
+
+extern __inline __m128i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_mask_srli_epi32 (__m128i __W, __mmask8 __U, __m128i __A,
+ const int __imm)
+{
+ return (__m128i) __builtin_ia32_psrldi128_mask ((__v4si) __A, __imm,
+ (__v4si) __W,
+ (__mmask8) __U);
+}
+
+extern __inline __m128i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_maskz_srli_epi32 (__mmask8 __U, __m128i __A, const int __imm)
+{
+ return (__m128i) __builtin_ia32_psrldi128_mask ((__v4si) __A, __imm,
+ (__v4si)
+ _mm_setzero_si128 (),
+ (__mmask8) __U);
+}
+
+extern __inline __m256i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_mask_srli_epi64 (__m256i __W, __mmask8 __U, __m256i __A,
+ const int __imm)
+{
+ return (__m256i) __builtin_ia32_psrlqi256_mask ((__v4di) __A, __imm,
+ (__v4di) __W,
+ (__mmask8) __U);
+}
+
+extern __inline __m256i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_maskz_srli_epi64 (__mmask8 __U, __m256i __A, const int __imm)
+{
+ return (__m256i) __builtin_ia32_psrlqi256_mask ((__v4di) __A, __imm,
+ (__v4di)
+ _mm256_setzero_si256 (),
+ (__mmask8) __U);
+}
+
+extern __inline __m128i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_mask_srli_epi64 (__m128i __W, __mmask8 __U, __m128i __A,
+ const int __imm)
+{
+ return (__m128i) __builtin_ia32_psrlqi128_mask ((__v2di) __A, __imm,
+ (__v2di) __W,
+ (__mmask8) __U);
+}
+
+extern __inline __m128i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_maskz_srli_epi64 (__mmask8 __U, __m128i __A, const int __imm)
+{
+ return (__m128i) __builtin_ia32_psrlqi128_mask ((__v2di) __A, __imm,
+ (__v2di)
+ _mm_setzero_si128 (),
+ (__mmask8) __U);
+}
+
+extern __inline __m256i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_ternarylogic_epi64 (__m256i __A, __m256i __B, __m256i __C,
+ const int imm)
+{
+ return (__m256i) __builtin_ia32_pternlogq256_mask ((__v4di) __A,
+ (__v4di) __B,
+ (__v4di) __C, imm,
+ (__mmask8) -1);
+}
+
+extern __inline __m256i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_mask_ternarylogic_epi64 (__m256i __A, __mmask8 __U,
+ __m256i __B, __m256i __C,
+ const int imm)
+{
+ return (__m256i) __builtin_ia32_pternlogq256_mask ((__v4di) __A,
+ (__v4di) __B,
+ (__v4di) __C, imm,
+ (__mmask8) __U);
+}
+
+extern __inline __m256i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_maskz_ternarylogic_epi64 (__mmask8 __U, __m256i __A,
+ __m256i __B, __m256i __C,
+ const int imm)
+{
+ return (__m256i) __builtin_ia32_pternlogq256_maskz ((__v4di) __A,
+ (__v4di) __B,
+ (__v4di) __C,
+ imm,
+ (__mmask8) __U);
+}
+
+extern __inline __m256i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_ternarylogic_epi32 (__m256i __A, __m256i __B, __m256i __C,
+ const int imm)
+{
+ return (__m256i) __builtin_ia32_pternlogd256_mask ((__v8si) __A,
+ (__v8si) __B,
+ (__v8si) __C, imm,
+ (__mmask8) -1);
+}
+
+extern __inline __m256i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_mask_ternarylogic_epi32 (__m256i __A, __mmask8 __U,
+ __m256i __B, __m256i __C,
+ const int imm)
+{
+ return (__m256i) __builtin_ia32_pternlogd256_mask ((__v8si) __A,
+ (__v8si) __B,
+ (__v8si) __C, imm,
+ (__mmask8) __U);
+}
+
+extern __inline __m256i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_maskz_ternarylogic_epi32 (__mmask8 __U, __m256i __A,
+ __m256i __B, __m256i __C,
+ const int imm)
+{
+ return (__m256i) __builtin_ia32_pternlogd256_maskz ((__v8si) __A,
+ (__v8si) __B,
+ (__v8si) __C,
+ imm,
+ (__mmask8) __U);
+}
+
+extern __inline __m128i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_ternarylogic_epi64 (__m128i __A, __m128i __B, __m128i __C,
+ const int imm)
+{
+ return (__m128i) __builtin_ia32_pternlogq128_mask ((__v2di) __A,
+ (__v2di) __B,
+ (__v2di) __C, imm,
+ (__mmask8) -1);
+}
+
+extern __inline __m128i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_mask_ternarylogic_epi64 (__m128i __A, __mmask8 __U,
+ __m128i __B, __m128i __C, const int imm)
+{
+ return (__m128i) __builtin_ia32_pternlogq128_mask ((__v2di) __A,
+ (__v2di) __B,
+ (__v2di) __C, imm,
+ (__mmask8) __U);
+}
+
+extern __inline __m128i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_maskz_ternarylogic_epi64 (__mmask8 __U, __m128i __A,
+ __m128i __B, __m128i __C, const int imm)
+{
+ return (__m128i) __builtin_ia32_pternlogq128_maskz ((__v2di) __A,
+ (__v2di) __B,
+ (__v2di) __C,
+ imm,
+ (__mmask8) __U);
+}
+
+extern __inline __m128i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_ternarylogic_epi32 (__m128i __A, __m128i __B, __m128i __C,
+ const int imm)
+{
+ return (__m128i) __builtin_ia32_pternlogd128_mask ((__v4si) __A,
+ (__v4si) __B,
+ (__v4si) __C, imm,
+ (__mmask8) -1);
+}
+
+extern __inline __m128i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_mask_ternarylogic_epi32 (__m128i __A, __mmask8 __U,
+ __m128i __B, __m128i __C, const int imm)
+{
+ return (__m128i) __builtin_ia32_pternlogd128_mask ((__v4si) __A,
+ (__v4si) __B,
+ (__v4si) __C, imm,
+ (__mmask8) __U);
+}
+
+extern __inline __m128i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_maskz_ternarylogic_epi32 (__mmask8 __U, __m128i __A,
+ __m128i __B, __m128i __C, const int imm)
+{
+ return (__m128i) __builtin_ia32_pternlogd128_maskz ((__v4si) __A,
+ (__v4si) __B,
+ (__v4si) __C,
+ imm,
+ (__mmask8) __U);
+}
+
+extern __inline __m256
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_roundscale_ps (__m256 __A, const int __imm)
+{
+ return (__m256) __builtin_ia32_rndscaleps_256_mask ((__v8sf) __A,
+ __imm,
+ (__v8sf)
+ _mm256_setzero_ps (),
+ (__mmask8) -1);
+}
+
+extern __inline __m256
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_mask_roundscale_ps (__m256 __W, __mmask8 __U, __m256 __A,
+ const int __imm)
+{
+ return (__m256) __builtin_ia32_rndscaleps_256_mask ((__v8sf) __A,
+ __imm,
+ (__v8sf) __W,
+ (__mmask8) __U);
+}
+
+extern __inline __m256
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_maskz_roundscale_ps (__mmask8 __U, __m256 __A, const int __imm)
+{
+ return (__m256) __builtin_ia32_rndscaleps_256_mask ((__v8sf) __A,
+ __imm,
+ (__v8sf)
+ _mm256_setzero_ps (),
+ (__mmask8) __U);
+}
+
+extern __inline __m256d
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_roundscale_pd (__m256d __A, const int __imm)
+{
+ return (__m256d) __builtin_ia32_rndscalepd_256_mask ((__v4df) __A,
+ __imm,
+ (__v4df)
+ _mm256_setzero_pd (),
+ (__mmask8) -1);
+}
+
+extern __inline __m256d
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_mask_roundscale_pd (__m256d __W, __mmask8 __U, __m256d __A,
+ const int __imm)
+{
+ return (__m256d) __builtin_ia32_rndscalepd_256_mask ((__v4df) __A,
+ __imm,
+ (__v4df) __W,
+ (__mmask8) __U);
+}
+
+extern __inline __m256d
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_maskz_roundscale_pd (__mmask8 __U, __m256d __A, const int __imm)
+{
+ return (__m256d) __builtin_ia32_rndscalepd_256_mask ((__v4df) __A,
+ __imm,
+ (__v4df)
+ _mm256_setzero_pd (),
+ (__mmask8) __U);
+}
+
+extern __inline __m128
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_roundscale_ps (__m128 __A, const int __imm)
+{
+ return (__m128) __builtin_ia32_rndscaleps_128_mask ((__v4sf) __A,
+ __imm,
+ (__v4sf)
+ _mm_setzero_ps (),
+ (__mmask8) -1);
+}
+
+extern __inline __m128
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_mask_roundscale_ps (__m128 __W, __mmask8 __U, __m128 __A,
+ const int __imm)
+{
+ return (__m128) __builtin_ia32_rndscaleps_128_mask ((__v4sf) __A,
+ __imm,
+ (__v4sf) __W,
+ (__mmask8) __U);
+}
+
+extern __inline __m128
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_maskz_roundscale_ps (__mmask8 __U, __m128 __A, const int __imm)
+{
+ return (__m128) __builtin_ia32_rndscaleps_128_mask ((__v4sf) __A,
+ __imm,
+ (__v4sf)
+ _mm_setzero_ps (),
+ (__mmask8) __U);
+}
+
+extern __inline __m128d
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_roundscale_pd (__m128d __A, const int __imm)
+{
+ return (__m128d) __builtin_ia32_rndscalepd_128_mask ((__v2df) __A,
+ __imm,
+ (__v2df)
+ _mm_setzero_pd (),
+ (__mmask8) -1);
+}
+
+extern __inline __m128d
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_mask_roundscale_pd (__m128d __W, __mmask8 __U, __m128d __A,
+ const int __imm)
+{
+ return (__m128d) __builtin_ia32_rndscalepd_128_mask ((__v2df) __A,
+ __imm,
+ (__v2df) __W,
+ (__mmask8) __U);
+}
+
+extern __inline __m128d
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_maskz_roundscale_pd (__mmask8 __U, __m128d __A, const int __imm)
+{
+ return (__m128d) __builtin_ia32_rndscalepd_128_mask ((__v2df) __A,
+ __imm,
+ (__v2df)
+ _mm_setzero_pd (),
+ (__mmask8) __U);
+}
+
+extern __inline __m256
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_getmant_ps (__m256 __A, _MM_MANTISSA_NORM_ENUM __B,
+ _MM_MANTISSA_SIGN_ENUM __C)
+{
+ return (__m256) __builtin_ia32_getmantps256_mask ((__v8sf) __A,
+ (__C << 2) | __B,
+ (__v8sf)
+ _mm256_setzero_ps (),
+ (__mmask8) -1);
+}
+
+extern __inline __m256
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_mask_getmant_ps (__m256 __W, __mmask8 __U, __m256 __A,
+ _MM_MANTISSA_NORM_ENUM __B,
+ _MM_MANTISSA_SIGN_ENUM __C)
+{
+ return (__m256) __builtin_ia32_getmantps256_mask ((__v8sf) __A,
+ (__C << 2) | __B,
+ (__v8sf) __W,
+ (__mmask8) __U);
+}
+
+extern __inline __m256
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_maskz_getmant_ps (__mmask8 __U, __m256 __A,
+ _MM_MANTISSA_NORM_ENUM __B,
+ _MM_MANTISSA_SIGN_ENUM __C)
+{
+ return (__m256) __builtin_ia32_getmantps256_mask ((__v8sf) __A,
+ (__C << 2) | __B,
+ (__v8sf)
+ _mm256_setzero_ps (),
+ (__mmask8) __U);
+}
+
+extern __inline __m128
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_getmant_ps (__m128 __A, _MM_MANTISSA_NORM_ENUM __B,
+ _MM_MANTISSA_SIGN_ENUM __C)
+{
+ return (__m128) __builtin_ia32_getmantps128_mask ((__v4sf) __A,
+ (__C << 2) | __B,
+ (__v4sf)
+ _mm_setzero_ps (),
+ (__mmask8) -1);
+}
+
+extern __inline __m128
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_mask_getmant_ps (__m128 __W, __mmask8 __U, __m128 __A,
+ _MM_MANTISSA_NORM_ENUM __B,
+ _MM_MANTISSA_SIGN_ENUM __C)
+{
+ return (__m128) __builtin_ia32_getmantps128_mask ((__v4sf) __A,
+ (__C << 2) | __B,
+ (__v4sf) __W,
+ (__mmask8) __U);
+}
+
+extern __inline __m128
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_maskz_getmant_ps (__mmask8 __U, __m128 __A,
+ _MM_MANTISSA_NORM_ENUM __B,
+ _MM_MANTISSA_SIGN_ENUM __C)
+{
+ return (__m128) __builtin_ia32_getmantps128_mask ((__v4sf) __A,
+ (__C << 2) | __B,
+ (__v4sf)
+ _mm_setzero_ps (),
+ (__mmask8) __U);
+}
+
+extern __inline __m256d
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_getmant_pd (__m256d __A, _MM_MANTISSA_NORM_ENUM __B,
+ _MM_MANTISSA_SIGN_ENUM __C)
+{
+ return (__m256d) __builtin_ia32_getmantpd256_mask ((__v4df) __A,
+ (__C << 2) | __B,
+ (__v4df)
+ _mm256_setzero_pd (),
+ (__mmask8) -1);
+}
+
+extern __inline __m256d
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_mask_getmant_pd (__m256d __W, __mmask8 __U, __m256d __A,
+ _MM_MANTISSA_NORM_ENUM __B,
+ _MM_MANTISSA_SIGN_ENUM __C)
+{
+ return (__m256d) __builtin_ia32_getmantpd256_mask ((__v4df) __A,
+ (__C << 2) | __B,
+ (__v4df) __W,
+ (__mmask8) __U);
+}
+
+extern __inline __m256d
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_maskz_getmant_pd (__mmask8 __U, __m256d __A,
+ _MM_MANTISSA_NORM_ENUM __B,
+ _MM_MANTISSA_SIGN_ENUM __C)
+{
+ return (__m256d) __builtin_ia32_getmantpd256_mask ((__v4df) __A,
+ (__C << 2) | __B,
+ (__v4df)
+ _mm256_setzero_pd (),
+ (__mmask8) __U);
+}
+
+extern __inline __m128d
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_getmant_pd (__m128d __A, _MM_MANTISSA_NORM_ENUM __B,
+ _MM_MANTISSA_SIGN_ENUM __C)
+{
+ return (__m128d) __builtin_ia32_getmantpd128_mask ((__v2df) __A,
+ (__C << 2) | __B,
+ (__v2df)
+ _mm_setzero_pd (),
+ (__mmask8) -1);
+}
+
+extern __inline __m128d
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_mask_getmant_pd (__m128d __W, __mmask8 __U, __m128d __A,
+ _MM_MANTISSA_NORM_ENUM __B,
+ _MM_MANTISSA_SIGN_ENUM __C)
+{
+ return (__m128d) __builtin_ia32_getmantpd128_mask ((__v2df) __A,
+ (__C << 2) | __B,
+ (__v2df) __W,
+ (__mmask8) __U);
+}
+
+extern __inline __m128d
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_maskz_getmant_pd (__mmask8 __U, __m128d __A,
+ _MM_MANTISSA_NORM_ENUM __B,
+ _MM_MANTISSA_SIGN_ENUM __C)
+{
+ return (__m128d) __builtin_ia32_getmantpd128_mask ((__v2df) __A,
+ (__C << 2) | __B,
+ (__v2df)
+ _mm_setzero_pd (),
+ (__mmask8) __U);
+}
+
+extern __inline __m256
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_mmask_i32gather_ps (__m256 __v1_old, __mmask8 __mask,
+ __m256i __index, float const *__addr,
+ int __scale)
+{
+ return (__m256) __builtin_ia32_gather3siv8sf ((__v8sf) __v1_old,
+ __addr,
+ (__v8si) __index,
+ __mask, __scale);
+}
+
+extern __inline __m128
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_mmask_i32gather_ps (__m128 __v1_old, __mmask8 __mask,
+ __m128i __index, float const *__addr,
+ int __scale)
+{
+ return (__m128) __builtin_ia32_gather3siv4sf ((__v4sf) __v1_old,
+ __addr,
+ (__v4si) __index,
+ __mask, __scale);
+}
+
+extern __inline __m256d
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_mmask_i32gather_pd (__m256d __v1_old, __mmask8 __mask,
+ __m128i __index, double const *__addr,
+ int __scale)
+{
+ return (__m256d) __builtin_ia32_gather3siv4df ((__v4df) __v1_old,
+ __addr,
+ (__v4si) __index,
+ __mask, __scale);
+}
+
+extern __inline __m128d
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_mmask_i32gather_pd (__m128d __v1_old, __mmask8 __mask,
+ __m128i __index, double const *__addr,
+ int __scale)
+{
+ return (__m128d) __builtin_ia32_gather3siv2df ((__v2df) __v1_old,
+ __addr,
+ (__v4si) __index,
+ __mask, __scale);
+}
+
+extern __inline __m128
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_mmask_i64gather_ps (__m128 __v1_old, __mmask8 __mask,
+ __m256i __index, float const *__addr,
+ int __scale)
+{
+ return (__m128) __builtin_ia32_gather3div8sf ((__v4sf) __v1_old,
+ __addr,
+ (__v4di) __index,
+ __mask, __scale);
+}
+
+extern __inline __m128
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_mmask_i64gather_ps (__m128 __v1_old, __mmask8 __mask,
+ __m128i __index, float const *__addr,
+ int __scale)
+{
+ return (__m128) __builtin_ia32_gather3div4sf ((__v4sf) __v1_old,
+ __addr,
+ (__v2di) __index,
+ __mask, __scale);
+}
+
+extern __inline __m256d
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_mmask_i64gather_pd (__m256d __v1_old, __mmask8 __mask,
+ __m256i __index, double const *__addr,
+ int __scale)
+{
+ return (__m256d) __builtin_ia32_gather3div4df ((__v4df) __v1_old,
+ __addr,
+ (__v4di) __index,
+ __mask, __scale);
+}
+
+extern __inline __m128d
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_mmask_i64gather_pd (__m128d __v1_old, __mmask8 __mask,
+ __m128i __index, double const *__addr,
+ int __scale)
+{
+ return (__m128d) __builtin_ia32_gather3div2df ((__v2df) __v1_old,
+ __addr,
+ (__v2di) __index,
+ __mask, __scale);
+}
+
+extern __inline __m256i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_mmask_i32gather_epi32 (__m256i __v1_old, __mmask8 __mask,
+ __m256i __index, int const *__addr,
+ int __scale)
+{
+ return (__m256i) __builtin_ia32_gather3siv8si ((__v8si) __v1_old,
+ __addr,
+ (__v8si) __index,
+ __mask, __scale);
+}
+
+extern __inline __m128i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_mmask_i32gather_epi32 (__m128i __v1_old, __mmask8 __mask,
+ __m128i __index, int const *__addr,
+ int __scale)
+{
+ return (__m128i) __builtin_ia32_gather3siv4si ((__v4si) __v1_old,
+ __addr,
+ (__v4si) __index,
+ __mask, __scale);
+}
+
+extern __inline __m256i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_mmask_i32gather_epi64 (__m256i __v1_old, __mmask8 __mask,
+ __m128i __index, long long const *__addr,
+ int __scale)
+{
+ return (__m256i) __builtin_ia32_gather3siv4di ((__v4di) __v1_old,
+ __addr,
+ (__v4si) __index,
+ __mask, __scale);
+}
+
+extern __inline __m128i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_mmask_i32gather_epi64 (__m128i __v1_old, __mmask8 __mask,
+ __m128i __index, long long const *__addr,
+ int __scale)
+{
+ return (__m128i) __builtin_ia32_gather3siv2di ((__v2di) __v1_old,
+ __addr,
+ (__v4si) __index,
+ __mask, __scale);
+}
+
+extern __inline __m128i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_mmask_i64gather_epi32 (__m128i __v1_old, __mmask8 __mask,
+ __m256i __index, int const *__addr,
+ int __scale)
+{
+ return (__m128i) __builtin_ia32_gather3div8si ((__v4si) __v1_old,
+ __addr,
+ (__v4di) __index,
+ __mask, __scale);
+}
+
+extern __inline __m128i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_mmask_i64gather_epi32 (__m128i __v1_old, __mmask8 __mask,
+ __m128i __index, int const *__addr,
+ int __scale)
+{
+ return (__m128i) __builtin_ia32_gather3div4si ((__v4si) __v1_old,
+ __addr,
+ (__v2di) __index,
+ __mask, __scale);
+}
+
+extern __inline __m256i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_mmask_i64gather_epi64 (__m256i __v1_old, __mmask8 __mask,
+ __m256i __index, long long const *__addr,
+ int __scale)
+{
+ return (__m256i) __builtin_ia32_gather3div4di ((__v4di) __v1_old,
+ __addr,
+ (__v4di) __index,
+ __mask, __scale);
+}
+
+extern __inline __m128i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_mmask_i64gather_epi64 (__m128i __v1_old, __mmask8 __mask,
+ __m128i __index, long long const *__addr,
+ int __scale)
+{
+ return (__m128i) __builtin_ia32_gather3div2di ((__v2di) __v1_old,
+ __addr,
+ (__v2di) __index,
+ __mask, __scale);
+}
+
+extern __inline void
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_i32scatter_ps (float *__addr, __m256i __index,
+ __m256 __v1, const int __scale)
+{
+ __builtin_ia32_scattersiv8sf (__addr, (__mmask8) 0xFF,
+ (__v8si) __index, (__v8sf) __v1,
+ __scale);
+}
+
+extern __inline void
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_mask_i32scatter_ps (float *__addr, __mmask8 __mask,
+ __m256i __index, __m256 __v1,
+ const int __scale)
+{
+ __builtin_ia32_scattersiv8sf (__addr, __mask, (__v8si) __index,
+ (__v8sf) __v1, __scale);
+}
+
+extern __inline void
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_i32scatter_ps (float *__addr, __m128i __index, __m128 __v1,
+ const int __scale)
+{
+ __builtin_ia32_scattersiv4sf (__addr, (__mmask8) 0xFF,
+ (__v4si) __index, (__v4sf) __v1,
+ __scale);
+}
+
+extern __inline void
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_mask_i32scatter_ps (float *__addr, __mmask8 __mask,
+ __m128i __index, __m128 __v1,
+ const int __scale)
+{
+ __builtin_ia32_scattersiv4sf (__addr, __mask, (__v4si) __index,
+ (__v4sf) __v1, __scale);
+}
+
+extern __inline void
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_i32scatter_pd (double *__addr, __m128i __index,
+ __m256d __v1, const int __scale)
+{
+ __builtin_ia32_scattersiv4df (__addr, (__mmask8) 0xFF,
+ (__v4si) __index, (__v4df) __v1,
+ __scale);
+}
+
+extern __inline void
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_mask_i32scatter_pd (double *__addr, __mmask8 __mask,
+ __m128i __index, __m256d __v1,
+ const int __scale)
+{
+ __builtin_ia32_scattersiv4df (__addr, __mask, (__v4si) __index,
+ (__v4df) __v1, __scale);
+}
+
+extern __inline void
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_i32scatter_pd (double *__addr, __m128i __index,
+ __m128d __v1, const int __scale)
+{
+ __builtin_ia32_scattersiv2df (__addr, (__mmask8) 0xFF,
+ (__v4si) __index, (__v2df) __v1,
+ __scale);
+}
+
+extern __inline void
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_mask_i32scatter_pd (double *__addr, __mmask8 __mask,
+ __m128i __index, __m128d __v1,
+ const int __scale)
+{
+ __builtin_ia32_scattersiv2df (__addr, __mask, (__v4si) __index,
+ (__v2df) __v1, __scale);
+}
+
+extern __inline void
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_i64scatter_ps (float *__addr, __m256i __index,
+ __m128 __v1, const int __scale)
+{
+ __builtin_ia32_scatterdiv8sf (__addr, (__mmask8) 0xFF,
+ (__v4di) __index, (__v4sf) __v1,
+ __scale);
+}
+
+extern __inline void
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_mask_i64scatter_ps (float *__addr, __mmask8 __mask,
+ __m256i __index, __m128 __v1,
+ const int __scale)
+{
+ __builtin_ia32_scatterdiv8sf (__addr, __mask, (__v4di) __index,
+ (__v4sf) __v1, __scale);
+}
+
+extern __inline void
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_i64scatter_ps (float *__addr, __m128i __index, __m128 __v1,
+ const int __scale)
+{
+ __builtin_ia32_scatterdiv4sf (__addr, (__mmask8) 0xFF,
+ (__v2di) __index, (__v4sf) __v1,
+ __scale);
+}
+
+extern __inline void
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_mask_i64scatter_ps (float *__addr, __mmask8 __mask,
+ __m128i __index, __m128 __v1,
+ const int __scale)
+{
+ __builtin_ia32_scatterdiv4sf (__addr, __mask, (__v2di) __index,
+ (__v4sf) __v1, __scale);
+}
+
+extern __inline void
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_i64scatter_pd (double *__addr, __m256i __index,
+ __m256d __v1, const int __scale)
+{
+ __builtin_ia32_scatterdiv4df (__addr, (__mmask8) 0xFF,
+ (__v4di) __index, (__v4df) __v1,
+ __scale);
+}
+
+extern __inline void
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_mask_i64scatter_pd (double *__addr, __mmask8 __mask,
+ __m256i __index, __m256d __v1,
+ const int __scale)
+{
+ __builtin_ia32_scatterdiv4df (__addr, __mask, (__v4di) __index,
+ (__v4df) __v1, __scale);
+}
+
+extern __inline void
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_i64scatter_pd (double *__addr, __m128i __index,
+ __m128d __v1, const int __scale)
+{
+ __builtin_ia32_scatterdiv2df (__addr, (__mmask8) 0xFF,
+ (__v2di) __index, (__v2df) __v1,
+ __scale);
+}
+
+extern __inline void
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_mask_i64scatter_pd (double *__addr, __mmask8 __mask,
+ __m128i __index, __m128d __v1,
+ const int __scale)
+{
+ __builtin_ia32_scatterdiv2df (__addr, __mask, (__v2di) __index,
+ (__v2df) __v1, __scale);
+}
+
+extern __inline void
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_i32scatter_epi32 (int *__addr, __m256i __index,
+ __m256i __v1, const int __scale)
+{
+ __builtin_ia32_scattersiv8si (__addr, (__mmask8) 0xFF,
+ (__v8si) __index, (__v8si) __v1,
+ __scale);
+}
+
+extern __inline void
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_mask_i32scatter_epi32 (int *__addr, __mmask8 __mask,
+ __m256i __index, __m256i __v1,
+ const int __scale)
+{
+ __builtin_ia32_scattersiv8si (__addr, __mask, (__v8si) __index,
+ (__v8si) __v1, __scale);
+}
+
+extern __inline void
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_i32scatter_epi32 (int *__addr, __m128i __index,
+ __m128i __v1, const int __scale)
+{
+ __builtin_ia32_scattersiv4si (__addr, (__mmask8) 0xFF,
+ (__v4si) __index, (__v4si) __v1,
+ __scale);
+}
+
+extern __inline void
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_mask_i32scatter_epi32 (int *__addr, __mmask8 __mask,
+ __m128i __index, __m128i __v1,
+ const int __scale)
+{
+ __builtin_ia32_scattersiv4si (__addr, __mask, (__v4si) __index,
+ (__v4si) __v1, __scale);
+}
+
+extern __inline void
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_i32scatter_epi64 (long long *__addr, __m128i __index,
+ __m256i __v1, const int __scale)
+{
+ __builtin_ia32_scattersiv4di (__addr, (__mmask8) 0xFF,
+ (__v4si) __index, (__v4di) __v1,
+ __scale);
+}
+
+extern __inline void
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_mask_i32scatter_epi64 (long long *__addr, __mmask8 __mask,
+ __m128i __index, __m256i __v1,
+ const int __scale)
+{
+ __builtin_ia32_scattersiv4di (__addr, __mask, (__v4si) __index,
+ (__v4di) __v1, __scale);
+}
+
+extern __inline void
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_i32scatter_epi64 (long long *__addr, __m128i __index,
+ __m128i __v1, const int __scale)
+{
+ __builtin_ia32_scattersiv2di (__addr, (__mmask8) 0xFF,
+ (__v4si) __index, (__v2di) __v1,
+ __scale);
+}
+
+extern __inline void
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_mask_i32scatter_epi64 (long long *__addr, __mmask8 __mask,
+ __m128i __index, __m128i __v1,
+ const int __scale)
+{
+ __builtin_ia32_scattersiv2di (__addr, __mask, (__v4si) __index,
+ (__v2di) __v1, __scale);
+}
+
+extern __inline void
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_i64scatter_epi32 (int *__addr, __m256i __index,
+ __m128i __v1, const int __scale)
+{
+ __builtin_ia32_scatterdiv8si (__addr, (__mmask8) 0xFF,
+ (__v4di) __index, (__v4si) __v1,
+ __scale);
+}
+
+extern __inline void
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_mask_i64scatter_epi32 (int *__addr, __mmask8 __mask,
+ __m256i __index, __m128i __v1,
+ const int __scale)
+{
+ __builtin_ia32_scatterdiv8si (__addr, __mask, (__v4di) __index,
+ (__v4si) __v1, __scale);
+}
+
+extern __inline void
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_i64scatter_epi32 (int *__addr, __m128i __index,
+ __m128i __v1, const int __scale)
+{
+ __builtin_ia32_scatterdiv4si (__addr, (__mmask8) 0xFF,
+ (__v2di) __index, (__v4si) __v1,
+ __scale);
+}
+
+extern __inline void
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_mask_i64scatter_epi32 (int *__addr, __mmask8 __mask,
+ __m128i __index, __m128i __v1,
+ const int __scale)
+{
+ __builtin_ia32_scatterdiv4si (__addr, __mask, (__v2di) __index,
+ (__v4si) __v1, __scale);
+}
+
+extern __inline void
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_i64scatter_epi64 (long long *__addr, __m256i __index,
+ __m256i __v1, const int __scale)
+{
+ __builtin_ia32_scatterdiv4di (__addr, (__mmask8) 0xFF,
+ (__v4di) __index, (__v4di) __v1,
+ __scale);
+}
+
+extern __inline void
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_mask_i64scatter_epi64 (long long *__addr, __mmask8 __mask,
+ __m256i __index, __m256i __v1,
+ const int __scale)
+{
+ __builtin_ia32_scatterdiv4di (__addr, __mask, (__v4di) __index,
+ (__v4di) __v1, __scale);
+}
+
+extern __inline void
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_i64scatter_epi64 (long long *__addr, __m128i __index,
+ __m128i __v1, const int __scale)
+{
+ __builtin_ia32_scatterdiv2di (__addr, (__mmask8) 0xFF,
+ (__v2di) __index, (__v2di) __v1,
+ __scale);
+}
+
+extern __inline void
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_mask_i64scatter_epi64 (long long *__addr, __mmask8 __mask,
+ __m128i __index, __m128i __v1,
+ const int __scale)
+{
+ __builtin_ia32_scatterdiv2di (__addr, __mask, (__v2di) __index,
+ (__v2di) __v1, __scale);
+}
+
+extern __inline __m256i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_mask_shuffle_epi32 (__m256i __W, __mmask8 __U, __m256i __A,
+ _MM_PERM_ENUM __mask)
+{
+ return (__m256i) __builtin_ia32_pshufd256_mask ((__v8si) __A, __mask,
+ (__v8si) __W,
+ (__mmask8) __U);
+}
+
+extern __inline __m256i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_maskz_shuffle_epi32 (__mmask8 __U, __m256i __A,
+ _MM_PERM_ENUM __mask)
+{
+ return (__m256i) __builtin_ia32_pshufd256_mask ((__v8si) __A, __mask,
+ (__v8si)
+ _mm256_setzero_si256 (),
+ (__mmask8) __U);
+}
+
+extern __inline __m128i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_mask_shuffle_epi32 (__m128i __W, __mmask8 __U, __m128i __A,
+ _MM_PERM_ENUM __mask)
+{
+ return (__m128i) __builtin_ia32_pshufd128_mask ((__v4si) __A, __mask,
+ (__v4si) __W,
+ (__mmask8) __U);
+}
+
+extern __inline __m128i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_maskz_shuffle_epi32 (__mmask8 __U, __m128i __A,
+ _MM_PERM_ENUM __mask)
+{
+ return (__m128i) __builtin_ia32_pshufd128_mask ((__v4si) __A, __mask,
+ (__v4si)
+ _mm_setzero_si128 (),
+ (__mmask8) __U);
+}
+
+extern __inline __m256i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_rol_epi32 (__m256i __A, const int __B)
+{
+ return (__m256i) __builtin_ia32_prold256_mask ((__v8si) __A, __B,
+ (__v8si)
+ _mm256_setzero_si256 (),
+ (__mmask8) -1);
+}
+
+extern __inline __m256i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_mask_rol_epi32 (__m256i __W, __mmask8 __U, __m256i __A,
+ const int __B)
+{
+ return (__m256i) __builtin_ia32_prold256_mask ((__v8si) __A, __B,
+ (__v8si) __W,
+ (__mmask8) __U);
+}
+
+extern __inline __m256i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_maskz_rol_epi32 (__mmask8 __U, __m256i __A, const int __B)
+{
+ return (__m256i) __builtin_ia32_prold256_mask ((__v8si) __A, __B,
+ (__v8si)
+ _mm256_setzero_si256 (),
+ (__mmask8) __U);
+}
+
+extern __inline __m128i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_rol_epi32 (__m128i __A, const int __B)
+{
+ return (__m128i) __builtin_ia32_prold128_mask ((__v4si) __A, __B,
+ (__v4si)
+ _mm_setzero_si128 (),
+ (__mmask8) -1);
+}
+
+extern __inline __m128i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_mask_rol_epi32 (__m128i __W, __mmask8 __U, __m128i __A,
+ const int __B)
+{
+ return (__m128i) __builtin_ia32_prold128_mask ((__v4si) __A, __B,
+ (__v4si) __W,
+ (__mmask8) __U);
+}
+
+extern __inline __m128i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_maskz_rol_epi32 (__mmask8 __U, __m128i __A, const int __B)
+{
+ return (__m128i) __builtin_ia32_prold128_mask ((__v4si) __A, __B,
+ (__v4si)
+ _mm_setzero_si128 (),
+ (__mmask8) __U);
+}
+
+extern __inline __m256i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_ror_epi32 (__m256i __A, const int __B)
+{
+ return (__m256i) __builtin_ia32_prord256_mask ((__v8si) __A, __B,
+ (__v8si)
+ _mm256_setzero_si256 (),
+ (__mmask8) -1);
+}
+
+extern __inline __m256i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_mask_ror_epi32 (__m256i __W, __mmask8 __U, __m256i __A,
+ const int __B)
+{
+ return (__m256i) __builtin_ia32_prord256_mask ((__v8si) __A, __B,
+ (__v8si) __W,
+ (__mmask8) __U);
+}
+
+extern __inline __m256i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_maskz_ror_epi32 (__mmask8 __U, __m256i __A, const int __B)
+{
+ return (__m256i) __builtin_ia32_prord256_mask ((__v8si) __A, __B,
+ (__v8si)
+ _mm256_setzero_si256 (),
+ (__mmask8) __U);
+}
+
+extern __inline __m128i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_ror_epi32 (__m128i __A, const int __B)
+{
+ return (__m128i) __builtin_ia32_prord128_mask ((__v4si) __A, __B,
+ (__v4si)
+ _mm_setzero_si128 (),
+ (__mmask8) -1);
+}
+
+extern __inline __m128i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_mask_ror_epi32 (__m128i __W, __mmask8 __U, __m128i __A,
+ const int __B)
+{
+ return (__m128i) __builtin_ia32_prord128_mask ((__v4si) __A, __B,
+ (__v4si) __W,
+ (__mmask8) __U);
+}
+
+extern __inline __m128i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_maskz_ror_epi32 (__mmask8 __U, __m128i __A, const int __B)
+{
+ return (__m128i) __builtin_ia32_prord128_mask ((__v4si) __A, __B,
+ (__v4si)
+ _mm_setzero_si128 (),
+ (__mmask8) __U);
+}
+
+extern __inline __m256i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_rol_epi64 (__m256i __A, const int __B)
+{
+ return (__m256i) __builtin_ia32_prolq256_mask ((__v4di) __A, __B,
+ (__v4di)
+ _mm256_setzero_si256 (),
+ (__mmask8) -1);
+}
+
+extern __inline __m256i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_mask_rol_epi64 (__m256i __W, __mmask8 __U, __m256i __A,
+ const int __B)
+{
+ return (__m256i) __builtin_ia32_prolq256_mask ((__v4di) __A, __B,
+ (__v4di) __W,
+ (__mmask8) __U);
+}
+
+extern __inline __m256i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_maskz_rol_epi64 (__mmask8 __U, __m256i __A, const int __B)
+{
+ return (__m256i) __builtin_ia32_prolq256_mask ((__v4di) __A, __B,
+ (__v4di)
+ _mm256_setzero_si256 (),
+ (__mmask8) __U);
+}
+
+extern __inline __m128i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_rol_epi64 (__m128i __A, const int __B)
+{
+ return (__m128i) __builtin_ia32_prolq128_mask ((__v2di) __A, __B,
+ (__v2di)
+ _mm_setzero_di (),
+ (__mmask8) -1);
+}
+
+extern __inline __m128i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_mask_rol_epi64 (__m128i __W, __mmask8 __U, __m128i __A,
+ const int __B)
+{
+ return (__m128i) __builtin_ia32_prolq128_mask ((__v2di) __A, __B,
+ (__v2di) __W,
+ (__mmask8) __U);
+}
+
+extern __inline __m128i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_maskz_rol_epi64 (__mmask8 __U, __m128i __A, const int __B)
+{
+ return (__m128i) __builtin_ia32_prolq128_mask ((__v2di) __A, __B,
+ (__v2di)
+ _mm_setzero_di (),
+ (__mmask8) __U);
+}
+
+extern __inline __m256i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_ror_epi64 (__m256i __A, const int __B)
+{
+ return (__m256i) __builtin_ia32_prorq256_mask ((__v4di) __A, __B,
+ (__v4di)
+ _mm256_setzero_si256 (),
+ (__mmask8) -1);
+}
+
+extern __inline __m256i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_mask_ror_epi64 (__m256i __W, __mmask8 __U, __m256i __A,
+ const int __B)
+{
+ return (__m256i) __builtin_ia32_prorq256_mask ((__v4di) __A, __B,
+ (__v4di) __W,
+ (__mmask8) __U);
+}
+
+extern __inline __m256i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_maskz_ror_epi64 (__mmask8 __U, __m256i __A, const int __B)
+{
+ return (__m256i) __builtin_ia32_prorq256_mask ((__v4di) __A, __B,
+ (__v4di)
+ _mm256_setzero_si256 (),
+ (__mmask8) __U);
+}
+
+extern __inline __m128i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_ror_epi64 (__m128i __A, const int __B)
+{
+ return (__m128i) __builtin_ia32_prorq128_mask ((__v2di) __A, __B,
+ (__v2di)
+ _mm_setzero_di (),
+ (__mmask8) -1);
+}
+
+extern __inline __m128i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_mask_ror_epi64 (__m128i __W, __mmask8 __U, __m128i __A,
+ const int __B)
+{
+ return (__m128i) __builtin_ia32_prorq128_mask ((__v2di) __A, __B,
+ (__v2di) __W,
+ (__mmask8) __U);
+}
+
+extern __inline __m128i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_maskz_ror_epi64 (__mmask8 __U, __m128i __A, const int __B)
+{
+ return (__m128i) __builtin_ia32_prorq128_mask ((__v2di) __A, __B,
+ (__v2di)
+ _mm_setzero_di (),
+ (__mmask8) __U);
+}
+
+extern __inline __m128i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_alignr_epi32 (__m128i __A, __m128i __B, const int __imm)
+{
+ return (__m128i) __builtin_ia32_alignd128_mask ((__v4si) __A,
+ (__v4si) __B, __imm,
+ (__v4si)
+ _mm_setzero_si128 (),
+ (__mmask8) -1);
+}
+
+extern __inline __m128i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_mask_alignr_epi32 (__m128i __W, __mmask8 __U, __m128i __A,
+ __m128i __B, const int __imm)
+{
+ return (__m128i) __builtin_ia32_alignd128_mask ((__v4si) __A,
+ (__v4si) __B, __imm,
+ (__v4si) __W,
+ (__mmask8) __U);
+}
+
+extern __inline __m128i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_maskz_alignr_epi32 (__mmask8 __U, __m128i __A, __m128i __B,
+ const int __imm)
+{
+ return (__m128i) __builtin_ia32_alignd128_mask ((__v4si) __A,
+ (__v4si) __B, __imm,
+ (__v4si)
+ _mm_setzero_si128 (),
+ (__mmask8) __U);
+}
+
+extern __inline __m128i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_alignr_epi64 (__m128i __A, __m128i __B, const int __imm)
+{
+ return (__m128i) __builtin_ia32_alignq128_mask ((__v2di) __A,
+ (__v2di) __B, __imm,
+ (__v2di)
+ _mm_setzero_di (),
+ (__mmask8) -1);
+}
+
+extern __inline __m128i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_mask_alignr_epi64 (__m128i __W, __mmask8 __U, __m128i __A,
+ __m128i __B, const int __imm)
+{
+ return (__m128i) __builtin_ia32_alignq128_mask ((__v2di) __A,
+ (__v2di) __B, __imm,
+ (__v2di) __W,
+ (__mmask8) __U);
+}
+
+extern __inline __m128i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_maskz_alignr_epi64 (__mmask8 __U, __m128i __A, __m128i __B,
+ const int __imm)
+{
+ return (__m128i) __builtin_ia32_alignq128_mask ((__v2di) __A,
+ (__v2di) __B, __imm,
+ (__v2di)
+ _mm_setzero_di (),
+ (__mmask8) __U);
+}
+
+extern __inline __m256i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_alignr_epi32 (__m256i __A, __m256i __B, const int __imm)
+{
+ return (__m256i) __builtin_ia32_alignd256_mask ((__v8si) __A,
+ (__v8si) __B, __imm,
+ (__v8si)
+ _mm256_setzero_si256 (),
+ (__mmask8) -1);
+}
+
+extern __inline __m256i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_mask_alignr_epi32 (__m256i __W, __mmask8 __U, __m256i __A,
+ __m256i __B, const int __imm)
+{
+ return (__m256i) __builtin_ia32_alignd256_mask ((__v8si) __A,
+ (__v8si) __B, __imm,
+ (__v8si) __W,
+ (__mmask8) __U);
+}
+
+extern __inline __m256i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_maskz_alignr_epi32 (__mmask8 __U, __m256i __A, __m256i __B,
+ const int __imm)
+{
+ return (__m256i) __builtin_ia32_alignd256_mask ((__v8si) __A,
+ (__v8si) __B, __imm,
+ (__v8si)
+ _mm256_setzero_si256 (),
+ (__mmask8) __U);
+}
+
+extern __inline __m256i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_alignr_epi64 (__m256i __A, __m256i __B, const int __imm)
+{
+ return (__m256i) __builtin_ia32_alignq256_mask ((__v4di) __A,
+ (__v4di) __B, __imm,
+ (__v4di)
+ _mm256_setzero_si256 (),
+ (__mmask8) -1);
+}
+
+extern __inline __m256i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_mask_alignr_epi64 (__m256i __W, __mmask8 __U, __m256i __A,
+ __m256i __B, const int __imm)
+{
+ return (__m256i) __builtin_ia32_alignq256_mask ((__v4di) __A,
+ (__v4di) __B, __imm,
+ (__v4di) __W,
+ (__mmask8) __U);
+}
+
+extern __inline __m256i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_maskz_alignr_epi64 (__mmask8 __U, __m256i __A, __m256i __B,
+ const int __imm)
+{
+ return (__m256i) __builtin_ia32_alignq256_mask ((__v4di) __A,
+ (__v4di) __B, __imm,
+ (__v4di)
+ _mm256_setzero_si256 (),
+ (__mmask8) __U);
+}
+
+extern __inline __m128i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_mask_cvtps_ph (__m128i __W, __mmask8 __U, __m128 __A,
+ const int __I)
+{
+ return (__m128i) __builtin_ia32_vcvtps2ph_mask ((__v4sf) __A, __I,
+ (__v8hi) __W,
+ (__mmask8) __U);
+}
+
+extern __inline __m128i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_maskz_cvtps_ph (__mmask8 __U, __m128 __A, const int __I)
+{
+ return (__m128i) __builtin_ia32_vcvtps2ph_mask ((__v4sf) __A, __I,
+ (__v8hi)
+ _mm_setzero_hi (),
+ (__mmask8) __U);
+}
+
+extern __inline __m128i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_mask_cvtps_ph (__m128i __W, __mmask8 __U, __m256 __A,
+ const int __I)
+{
+ return (__m128i) __builtin_ia32_vcvtps2ph256_mask ((__v8sf) __A, __I,
+ (__v8hi) __W,
+ (__mmask8) __U);
+}
+
+extern __inline __m128i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_maskz_cvtps_ph (__mmask8 __U, __m256 __A, const int __I)
+{
+ return (__m128i) __builtin_ia32_vcvtps2ph256_mask ((__v8sf) __A, __I,
+ (__v8hi)
+ _mm_setzero_hi (),
+ (__mmask8) __U);
+}
+
+extern __inline __m256i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_mask_srai_epi32 (__m256i __W, __mmask8 __U, __m256i __A,
+ const int __imm)
+{
+ return (__m256i) __builtin_ia32_psradi256_mask ((__v8si) __A, __imm,
+ (__v8si) __W,
+ (__mmask8) __U);
+}
+
+extern __inline __m256i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_maskz_srai_epi32 (__mmask8 __U, __m256i __A, const int __imm)
+{
+ return (__m256i) __builtin_ia32_psradi256_mask ((__v8si) __A, __imm,
+ (__v8si)
+ _mm256_setzero_si256 (),
+ (__mmask8) __U);
+}
+
+extern __inline __m128i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_mask_srai_epi32 (__m128i __W, __mmask8 __U, __m128i __A,
+ const int __imm)
+{
+ return (__m128i) __builtin_ia32_psradi128_mask ((__v4si) __A, __imm,
+ (__v4si) __W,
+ (__mmask8) __U);
+}
+
+extern __inline __m128i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_maskz_srai_epi32 (__mmask8 __U, __m128i __A, const int __imm)
+{
+ return (__m128i) __builtin_ia32_psradi128_mask ((__v4si) __A, __imm,
+ (__v4si)
+ _mm_setzero_si128 (),
+ (__mmask8) __U);
+}
+
+extern __inline __m256i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_srai_epi64 (__m256i __A, const int __imm)
+{
+ return (__m256i) __builtin_ia32_psraqi256_mask ((__v4di) __A, __imm,
+ (__v4di)
+ _mm256_setzero_si256 (),
+ (__mmask8) -1);
+}
+
+extern __inline __m256i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_mask_srai_epi64 (__m256i __W, __mmask8 __U, __m256i __A,
+ const int __imm)
+{
+ return (__m256i) __builtin_ia32_psraqi256_mask ((__v4di) __A, __imm,
+ (__v4di) __W,
+ (__mmask8) __U);
+}
+
+extern __inline __m256i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_maskz_srai_epi64 (__mmask8 __U, __m256i __A, const int __imm)
+{
+ return (__m256i) __builtin_ia32_psraqi256_mask ((__v4di) __A, __imm,
+ (__v4di)
+ _mm256_setzero_si256 (),
+ (__mmask8) __U);
+}
+
+extern __inline __m128i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_srai_epi64 (__m128i __A, const int __imm)
+{
+ return (__m128i) __builtin_ia32_psraqi128_mask ((__v2di) __A, __imm,
+ (__v2di)
+ _mm_setzero_di (),
+ (__mmask8) -1);
+}
+
+extern __inline __m128i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_mask_srai_epi64 (__m128i __W, __mmask8 __U, __m128i __A,
+ const int __imm)
+{
+ return (__m128i) __builtin_ia32_psraqi128_mask ((__v2di) __A, __imm,
+ (__v2di) __W,
+ (__mmask8) __U);
+}
+
+extern __inline __m128i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_maskz_srai_epi64 (__mmask8 __U, __m128i __A, const int __imm)
+{
+ return (__m128i) __builtin_ia32_psraqi128_mask ((__v2di) __A, __imm,
+ (__v2di)
+ _mm_setzero_si128 (),
+ (__mmask8) __U);
+}
+
+extern __inline __m128i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_mask_slli_epi32 (__m128i __W, __mmask8 __U, __m128i __A, int __B)
+{
+ return (__m128i) __builtin_ia32_pslldi128_mask ((__v4si) __A, __B,
+ (__v4si) __W,
+ (__mmask8) __U);
+}
+
+extern __inline __m128i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_maskz_slli_epi32 (__mmask8 __U, __m128i __A, int __B)
+{
+ return (__m128i) __builtin_ia32_pslldi128_mask ((__v4si) __A, __B,
+ (__v4si)
+ _mm_setzero_si128 (),
+ (__mmask8) __U);
+}
+
+extern __inline __m128i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_mask_slli_epi64 (__m128i __W, __mmask8 __U, __m128i __A, int __B)
+{
+ return (__m128i) __builtin_ia32_psllqi128_mask ((__v2di) __A, __B,
+ (__v2di) __W,
+ (__mmask8) __U);
+}
+
+extern __inline __m128i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_maskz_slli_epi64 (__mmask8 __U, __m128i __A, int __B)
+{
+ return (__m128i) __builtin_ia32_psllqi128_mask ((__v2di) __A, __B,
+ (__v2di)
+ _mm_setzero_di (),
+ (__mmask8) __U);
+}
+
+extern __inline __m256i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_mask_slli_epi32 (__m256i __W, __mmask8 __U, __m256i __A,
+ int __B)
+{
+ return (__m256i) __builtin_ia32_pslldi256_mask ((__v8si) __A, __B,
+ (__v8si) __W,
+ (__mmask8) __U);
+}
+
+extern __inline __m256i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_maskz_slli_epi32 (__mmask8 __U, __m256i __A, int __B)
+{
+ return (__m256i) __builtin_ia32_pslldi256_mask ((__v8si) __A, __B,
+ (__v8si)
+ _mm256_setzero_si256 (),
+ (__mmask8) __U);
+}
+
+extern __inline __m256i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_mask_slli_epi64 (__m256i __W, __mmask8 __U, __m256i __A,
+ int __B)
+{
+ return (__m256i) __builtin_ia32_psllqi256_mask ((__v4di) __A, __B,
+ (__v4di) __W,
+ (__mmask8) __U);
+}
+
+extern __inline __m256i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_maskz_slli_epi64 (__mmask8 __U, __m256i __A, int __B)
+{
+ return (__m256i) __builtin_ia32_psllqi256_mask ((__v4di) __A, __B,
+ (__v4di)
+ _mm256_setzero_si256 (),
+ (__mmask8) __U);
+}
+
+extern __inline __m256d
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_mask_permutex_pd (__m256d __W, __mmask8 __U, __m256d __X,
+ const int __imm)
+{
+ return (__m256d) __builtin_ia32_permdf256_mask ((__v4df) __X, __imm,
+ (__v4df) __W,
+ (__mmask8) __U);
+}
+
+extern __inline __m256d
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_maskz_permutex_pd (__mmask8 __U, __m256d __X, const int __imm)
+{
+ return (__m256d) __builtin_ia32_permdf256_mask ((__v4df) __X, __imm,
+ (__v4df)
+ _mm256_setzero_pd (),
+ (__mmask8) __U);
+}
+
+extern __inline __m256d
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_mask_permute_pd (__m256d __W, __mmask8 __U, __m256d __X,
+ const int __C)
+{
+ return (__m256d) __builtin_ia32_vpermilpd256_mask ((__v4df) __X, __C,
+ (__v4df) __W,
+ (__mmask8) __U);
+}
+
+extern __inline __m256d
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_maskz_permute_pd (__mmask8 __U, __m256d __X, const int __C)
+{
+ return (__m256d) __builtin_ia32_vpermilpd256_mask ((__v4df) __X, __C,
+ (__v4df)
+ _mm256_setzero_pd (),
+ (__mmask8) __U);
+}
+
+extern __inline __m128d
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_mask_permute_pd (__m128d __W, __mmask8 __U, __m128d __X,
+ const int __C)
+{
+ return (__m128d) __builtin_ia32_vpermilpd_mask ((__v2df) __X, __C,
+ (__v2df) __W,
+ (__mmask8) __U);
+}
+
+extern __inline __m128d
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_maskz_permute_pd (__mmask8 __U, __m128d __X, const int __C)
+{
+ return (__m128d) __builtin_ia32_vpermilpd_mask ((__v2df) __X, __C,
+ (__v2df)
+ _mm_setzero_pd (),
+ (__mmask8) __U);
+}
+
+extern __inline __m256
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_mask_permute_ps (__m256 __W, __mmask8 __U, __m256 __X,
+ const int __C)
+{
+ return (__m256) __builtin_ia32_vpermilps256_mask ((__v8sf) __X, __C,
+ (__v8sf) __W,
+ (__mmask8) __U);
+}
+
+extern __inline __m256
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_maskz_permute_ps (__mmask8 __U, __m256 __X, const int __C)
+{
+ return (__m256) __builtin_ia32_vpermilps256_mask ((__v8sf) __X, __C,
+ (__v8sf)
+ _mm256_setzero_ps (),
+ (__mmask8) __U);
+}
+
+extern __inline __m128
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_mask_permute_ps (__m128 __W, __mmask8 __U, __m128 __X,
+ const int __C)
+{
+ return (__m128) __builtin_ia32_vpermilps_mask ((__v4sf) __X, __C,
+ (__v4sf) __W,
+ (__mmask8) __U);
+}
+
+extern __inline __m128
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_maskz_permute_ps (__mmask8 __U, __m128 __X, const int __C)
+{
+ return (__m128) __builtin_ia32_vpermilps_mask ((__v4sf) __X, __C,
+ (__v4sf)
+ _mm_setzero_ps (),
+ (__mmask8) __U);
+}
+
+extern __inline __m256d
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_mask_blend_pd (__mmask8 __U, __m256d __A, __m256d __W)
+{
+ return (__m256d) __builtin_ia32_blendmpd_256_mask ((__v4df) __A,
+ (__v4df) __W,
+ (__mmask8) __U);
+}
+
+extern __inline __m256
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_mask_blend_ps (__mmask8 __U, __m256 __A, __m256 __W)
+{
+ return (__m256) __builtin_ia32_blendmps_256_mask ((__v8sf) __A,
+ (__v8sf) __W,
+ (__mmask8) __U);
+}
+
+extern __inline __m256i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_mask_blend_epi64 (__mmask8 __U, __m256i __A, __m256i __W)
+{
+ return (__m256i) __builtin_ia32_blendmq_256_mask ((__v4di) __A,
+ (__v4di) __W,
+ (__mmask8) __U);
+}
+
+extern __inline __m256i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_mask_blend_epi32 (__mmask8 __U, __m256i __A, __m256i __W)
+{
+ return (__m256i) __builtin_ia32_blendmd_256_mask ((__v8si) __A,
+ (__v8si) __W,
+ (__mmask8) __U);
+}
+
+extern __inline __m128d
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_mask_blend_pd (__mmask8 __U, __m128d __A, __m128d __W)
+{
+ return (__m128d) __builtin_ia32_blendmpd_128_mask ((__v2df) __A,
+ (__v2df) __W,
+ (__mmask8) __U);
+}
+
+extern __inline __m128
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_mask_blend_ps (__mmask8 __U, __m128 __A, __m128 __W)
+{
+ return (__m128) __builtin_ia32_blendmps_128_mask ((__v4sf) __A,
+ (__v4sf) __W,
+ (__mmask8) __U);
+}
+
+extern __inline __m128i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_mask_blend_epi64 (__mmask8 __U, __m128i __A, __m128i __W)
+{
+ return (__m128i) __builtin_ia32_blendmq_128_mask ((__v2di) __A,
+ (__v2di) __W,
+ (__mmask8) __U);
+}
+
+extern __inline __m128i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_mask_blend_epi32 (__mmask8 __U, __m128i __A, __m128i __W)
+{
+ return (__m128i) __builtin_ia32_blendmd_128_mask ((__v4si) __A,
+ (__v4si) __W,
+ (__mmask8) __U);
+}
+
+extern __inline __mmask8
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_cmp_epi64_mask (__m256i __X, __m256i __Y, const int __P)
+{
+ return (__mmask8) __builtin_ia32_cmpq256_mask ((__v4di) __X,
+ (__v4di) __Y, __P,
+ (__mmask8) -1);
+}
+
+extern __inline __mmask8
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_cmp_epi32_mask (__m256i __X, __m256i __Y, const int __P)
+{
+ return (__mmask8) __builtin_ia32_cmpd256_mask ((__v8si) __X,
+ (__v8si) __Y, __P,
+ (__mmask8) -1);
+}
+
+extern __inline __mmask8
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_cmp_epu64_mask (__m256i __X, __m256i __Y, const int __P)
+{
+ return (__mmask8) __builtin_ia32_ucmpq256_mask ((__v4di) __X,
+ (__v4di) __Y, __P,
+ (__mmask8) -1);
+}
+
+extern __inline __mmask8
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_cmp_epu32_mask (__m256i __X, __m256i __Y, const int __P)
+{
+ return (__mmask8) __builtin_ia32_ucmpd256_mask ((__v8si) __X,
+ (__v8si) __Y, __P,
+ (__mmask8) -1);
+}
+
+extern __inline __mmask8
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_cmp_pd_mask (__m256d __X, __m256d __Y, const int __P)
+{
+ return (__mmask8) __builtin_ia32_cmppd256_mask ((__v4df) __X,
+ (__v4df) __Y, __P,
+ (__mmask8) -1);
+}
+
+extern __inline __mmask8
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_cmp_ps_mask (__m256 __X, __m256 __Y, const int __P)
+{
+ return (__mmask8) __builtin_ia32_cmpps256_mask ((__v8sf) __X,
+ (__v8sf) __Y, __P,
+ (__mmask8) -1);
+}
+
+extern __inline __mmask8
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_mask_cmp_epi64_mask (__mmask8 __U, __m256i __X, __m256i __Y,
+ const int __P)
+{
+ return (__mmask8) __builtin_ia32_cmpq256_mask ((__v4di) __X,
+ (__v4di) __Y, __P,
+ (__mmask8) __U);
+}
+
+extern __inline __mmask8
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_mask_cmp_epi32_mask (__mmask8 __U, __m256i __X, __m256i __Y,
+ const int __P)
+{
+ return (__mmask8) __builtin_ia32_cmpd256_mask ((__v8si) __X,
+ (__v8si) __Y, __P,
+ (__mmask8) __U);
+}
+
+extern __inline __mmask8
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_mask_cmp_epu64_mask (__mmask8 __U, __m256i __X, __m256i __Y,
+ const int __P)
+{
+ return (__mmask8) __builtin_ia32_ucmpq256_mask ((__v4di) __X,
+ (__v4di) __Y, __P,
+ (__mmask8) __U);
+}
+
+extern __inline __mmask8
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_mask_cmp_epu32_mask (__mmask8 __U, __m256i __X, __m256i __Y,
+ const int __P)
+{
+ return (__mmask8) __builtin_ia32_ucmpd256_mask ((__v8si) __X,
+ (__v8si) __Y, __P,
+ (__mmask8) __U);
+}
+
+extern __inline __mmask8
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_mask_cmp_pd_mask (__mmask8 __U, __m256d __X, __m256d __Y,
+ const int __P)
+{
+ return (__mmask8) __builtin_ia32_cmppd256_mask ((__v4df) __X,
+ (__v4df) __Y, __P,
+ (__mmask8) __U);
+}
+
+extern __inline __mmask8
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_mask_cmp_ps_mask (__mmask8 __U, __m256 __X, __m256 __Y,
+ const int __P)
+{
+ return (__mmask8) __builtin_ia32_cmpps256_mask ((__v8sf) __X,
+ (__v8sf) __Y, __P,
+ (__mmask8) __U);
+}
+
+extern __inline __mmask8
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_cmp_epi64_mask (__m128i __X, __m128i __Y, const int __P)
+{
+ return (__mmask8) __builtin_ia32_cmpq128_mask ((__v2di) __X,
+ (__v2di) __Y, __P,
+ (__mmask8) -1);
+}
+
+extern __inline __mmask8
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_cmp_epi32_mask (__m128i __X, __m128i __Y, const int __P)
+{
+ return (__mmask8) __builtin_ia32_cmpd128_mask ((__v4si) __X,
+ (__v4si) __Y, __P,
+ (__mmask8) -1);
+}
+
+extern __inline __mmask8
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_cmp_epu64_mask (__m128i __X, __m128i __Y, const int __P)
+{
+ return (__mmask8) __builtin_ia32_ucmpq128_mask ((__v2di) __X,
+ (__v2di) __Y, __P,
+ (__mmask8) -1);
+}
+
+extern __inline __mmask8
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_cmp_epu32_mask (__m128i __X, __m128i __Y, const int __P)
+{
+ return (__mmask8) __builtin_ia32_ucmpd128_mask ((__v4si) __X,
+ (__v4si) __Y, __P,
+ (__mmask8) -1);
+}
+
+extern __inline __mmask8
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_cmp_pd_mask (__m128d __X, __m128d __Y, const int __P)
+{
+ return (__mmask8) __builtin_ia32_cmppd128_mask ((__v2df) __X,
+ (__v2df) __Y, __P,
+ (__mmask8) -1);
+}
+
+extern __inline __mmask8
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_cmp_ps_mask (__m128 __X, __m128 __Y, const int __P)
+{
+ return (__mmask8) __builtin_ia32_cmpps128_mask ((__v4sf) __X,
+ (__v4sf) __Y, __P,
+ (__mmask8) -1);
+}
+
+extern __inline __mmask8
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_mask_cmp_epi64_mask (__mmask8 __U, __m128i __X, __m128i __Y,
+ const int __P)
+{
+ return (__mmask8) __builtin_ia32_cmpq128_mask ((__v2di) __X,
+ (__v2di) __Y, __P,
+ (__mmask8) __U);
+}
+
+extern __inline __mmask8
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_mask_cmp_epi32_mask (__mmask8 __U, __m128i __X, __m128i __Y,
+ const int __P)
+{
+ return (__mmask8) __builtin_ia32_cmpd128_mask ((__v4si) __X,
+ (__v4si) __Y, __P,
+ (__mmask8) __U);
+}
+
+extern __inline __mmask8
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_mask_cmp_epu64_mask (__mmask8 __U, __m128i __X, __m128i __Y,
+ const int __P)
+{
+ return (__mmask8) __builtin_ia32_ucmpq128_mask ((__v2di) __X,
+ (__v2di) __Y, __P,
+ (__mmask8) __U);
+}
+
+extern __inline __mmask8
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_mask_cmp_epu32_mask (__mmask8 __U, __m128i __X, __m128i __Y,
+ const int __P)
+{
+ return (__mmask8) __builtin_ia32_ucmpd128_mask ((__v4si) __X,
+ (__v4si) __Y, __P,
+ (__mmask8) __U);
+}
+
+extern __inline __mmask8
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_mask_cmp_pd_mask (__mmask8 __U, __m128d __X, __m128d __Y,
+ const int __P)
+{
+ return (__mmask8) __builtin_ia32_cmppd128_mask ((__v2df) __X,
+ (__v2df) __Y, __P,
+ (__mmask8) __U);
+}
+
+extern __inline __mmask8
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_mask_cmp_ps_mask (__mmask8 __U, __m128 __X, __m128 __Y,
+ const int __P)
+{
+ return (__mmask8) __builtin_ia32_cmpps128_mask ((__v4sf) __X,
+ (__v4sf) __Y, __P,
+ (__mmask8) __U);
+}
+
+extern __inline __m256d
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_permutex_pd (__m256d __X, const int __M)
+{
+ return (__m256d) __builtin_ia32_permdf256_mask ((__v4df) __X, __M,
+ (__v4df)
+ _mm256_undefined_pd (),
+ (__mmask8) -1);
+}
+
+extern __inline __mmask8
+ __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_cmpneq_epu32_mask (__m256i __X, __m256i __Y)
+{
+ return (__mmask8) __builtin_ia32_ucmpd256_mask ((__v8si) __X,
+ (__v8si) __Y, 4,
+ (__mmask8) - 1);
+}
+
+extern __inline __mmask8
+ __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_cmplt_epu32_mask (__m256i __X, __m256i __Y)
+{
+ return (__mmask8) __builtin_ia32_ucmpd256_mask ((__v8si) __X,
+ (__v8si) __Y, 1,
+ (__mmask8) - 1);
+}
+
+extern __inline __mmask8
+ __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_cmpge_epu32_mask (__m256i __X, __m256i __Y)
+{
+ return (__mmask8) __builtin_ia32_ucmpd256_mask ((__v8si) __X,
+ (__v8si) __Y, 5,
+ (__mmask8) - 1);
+}
+
+extern __inline __mmask8
+ __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_cmple_epu32_mask (__m256i __X, __m256i __Y)
+{
+ return (__mmask8) __builtin_ia32_ucmpd256_mask ((__v8si) __X,
+ (__v8si) __Y, 2,
+ (__mmask8) - 1);
+}
+
+extern __inline __mmask8
+ __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_cmpneq_epu64_mask (__m256i __X, __m256i __Y)
+{
+ return (__mmask8) __builtin_ia32_ucmpq256_mask ((__v4di) __X,
+ (__v4di) __Y, 4,
+ (__mmask8) - 1);
+}
+
+extern __inline __mmask8
+ __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_cmplt_epu64_mask (__m256i __X, __m256i __Y)
+{
+ return (__mmask8) __builtin_ia32_ucmpq256_mask ((__v4di) __X,
+ (__v4di) __Y, 1,
+ (__mmask8) - 1);
+}
+
+extern __inline __mmask8
+ __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_cmpge_epu64_mask (__m256i __X, __m256i __Y)
+{
+ return (__mmask8) __builtin_ia32_ucmpq256_mask ((__v4di) __X,
+ (__v4di) __Y, 5,
+ (__mmask8) - 1);
+}
+
+extern __inline __mmask8
+ __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_cmple_epu64_mask (__m256i __X, __m256i __Y)
+{
+ return (__mmask8) __builtin_ia32_ucmpq256_mask ((__v4di) __X,
+ (__v4di) __Y, 2,
+ (__mmask8) - 1);
+}
+
+extern __inline __mmask8
+ __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_cmpneq_epi32_mask (__m256i __X, __m256i __Y)
+{
+ return (__mmask8) __builtin_ia32_cmpd256_mask ((__v8si) __X,
+ (__v8si) __Y, 4,
+ (__mmask8) - 1);
+}
+
+extern __inline __mmask8
+ __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_cmplt_epi32_mask (__m256i __X, __m256i __Y)
+{
+ return (__mmask8) __builtin_ia32_cmpd256_mask ((__v8si) __X,
+ (__v8si) __Y, 1,
+ (__mmask8) - 1);
+}
+
+extern __inline __mmask8
+ __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_cmpge_epi32_mask (__m256i __X, __m256i __Y)
+{
+ return (__mmask8) __builtin_ia32_cmpd256_mask ((__v8si) __X,
+ (__v8si) __Y, 5,
+ (__mmask8) - 1);
+}
+
+extern __inline __mmask8
+ __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_cmple_epi32_mask (__m256i __X, __m256i __Y)
+{
+ return (__mmask8) __builtin_ia32_cmpd256_mask ((__v8si) __X,
+ (__v8si) __Y, 2,
+ (__mmask8) - 1);
+}
+
+extern __inline __mmask8
+ __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_cmpneq_epi64_mask (__m256i __X, __m256i __Y)
+{
+ return (__mmask8) __builtin_ia32_cmpq256_mask ((__v4di) __X,
+ (__v4di) __Y, 4,
+ (__mmask8) - 1);
+}
+
+extern __inline __mmask8
+ __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_cmplt_epi64_mask (__m256i __X, __m256i __Y)
+{
+ return (__mmask8) __builtin_ia32_cmpq256_mask ((__v4di) __X,
+ (__v4di) __Y, 1,
+ (__mmask8) - 1);
+}
+
+extern __inline __mmask8
+ __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_cmpge_epi64_mask (__m256i __X, __m256i __Y)
+{
+ return (__mmask8) __builtin_ia32_cmpq256_mask ((__v4di) __X,
+ (__v4di) __Y, 5,
+ (__mmask8) - 1);
+}
+
+extern __inline __mmask8
+ __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_cmple_epi64_mask (__m256i __X, __m256i __Y)
+{
+ return (__mmask8) __builtin_ia32_cmpq256_mask ((__v4di) __X,
+ (__v4di) __Y, 2,
+ (__mmask8) - 1);
+}
+
+extern __inline __mmask8
+ __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_cmpneq_epu32_mask (__m128i __X, __m128i __Y)
+{
+ return (__mmask8) __builtin_ia32_ucmpd128_mask ((__v4si) __X,
+ (__v4si) __Y, 4,
+ (__mmask8) - 1);
+}
+
+extern __inline __mmask8
+ __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_cmplt_epu32_mask (__m128i __X, __m128i __Y)
+{
+ return (__mmask8) __builtin_ia32_ucmpd128_mask ((__v4si) __X,
+ (__v4si) __Y, 1,
+ (__mmask8) - 1);
+}
+
+extern __inline __mmask8
+ __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_cmpge_epu32_mask (__m128i __X, __m128i __Y)
+{
+ return (__mmask8) __builtin_ia32_ucmpd128_mask ((__v4si) __X,
+ (__v4si) __Y, 5,
+ (__mmask8) - 1);
+}
+
+extern __inline __mmask8
+ __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_cmple_epu32_mask (__m128i __X, __m128i __Y)
+{
+ return (__mmask8) __builtin_ia32_ucmpd128_mask ((__v4si) __X,
+ (__v4si) __Y, 2,
+ (__mmask8) - 1);
+}
+
+extern __inline __mmask8
+ __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_cmpneq_epu64_mask (__m128i __X, __m128i __Y)
+{
+ return (__mmask8) __builtin_ia32_ucmpq128_mask ((__v2di) __X,
+ (__v2di) __Y, 4,
+ (__mmask8) - 1);
+}
+
+extern __inline __mmask8
+ __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_cmplt_epu64_mask (__m128i __X, __m128i __Y)
+{
+ return (__mmask8) __builtin_ia32_ucmpq128_mask ((__v2di) __X,
+ (__v2di) __Y, 1,
+ (__mmask8) - 1);
+}
+
+extern __inline __mmask8
+ __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_cmpge_epu64_mask (__m128i __X, __m128i __Y)
+{
+ return (__mmask8) __builtin_ia32_ucmpq128_mask ((__v2di) __X,
+ (__v2di) __Y, 5,
+ (__mmask8) - 1);
+}
+
+extern __inline __mmask8
+ __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_cmple_epu64_mask (__m128i __X, __m128i __Y)
+{
+ return (__mmask8) __builtin_ia32_ucmpq128_mask ((__v2di) __X,
+ (__v2di) __Y, 2,
+ (__mmask8) - 1);
+}
+
+extern __inline __mmask8
+ __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_cmpneq_epi32_mask (__m128i __X, __m128i __Y)
+{
+ return (__mmask8) __builtin_ia32_cmpd128_mask ((__v4si) __X,
+ (__v4si) __Y, 4,
+ (__mmask8) - 1);
+}
+
+extern __inline __mmask8
+ __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_cmplt_epi32_mask (__m128i __X, __m128i __Y)
+{
+ return (__mmask8) __builtin_ia32_cmpd128_mask ((__v4si) __X,
+ (__v4si) __Y, 1,
+ (__mmask8) - 1);
+}
+
+extern __inline __mmask8
+ __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_cmpge_epi32_mask (__m128i __X, __m128i __Y)
+{
+ return (__mmask8) __builtin_ia32_cmpd128_mask ((__v4si) __X,
+ (__v4si) __Y, 5,
+ (__mmask8) - 1);
+}
+
+extern __inline __mmask8
+ __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_cmple_epi32_mask (__m128i __X, __m128i __Y)
+{
+ return (__mmask8) __builtin_ia32_cmpd128_mask ((__v4si) __X,
+ (__v4si) __Y, 2,
+ (__mmask8) - 1);
+}
+
+extern __inline __mmask8
+ __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_cmpneq_epi64_mask (__m128i __X, __m128i __Y)
+{
+ return (__mmask8) __builtin_ia32_cmpq128_mask ((__v2di) __X,
+ (__v2di) __Y, 4,
+ (__mmask8) - 1);
+}
+
+extern __inline __mmask8
+ __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_cmplt_epi64_mask (__m128i __X, __m128i __Y)
+{
+ return (__mmask8) __builtin_ia32_cmpq128_mask ((__v2di) __X,
+ (__v2di) __Y, 1,
+ (__mmask8) - 1);
+}
+
+extern __inline __mmask8
+ __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_cmpge_epi64_mask (__m128i __X, __m128i __Y)
+{
+ return (__mmask8) __builtin_ia32_cmpq128_mask ((__v2di) __X,
+ (__v2di) __Y, 5,
+ (__mmask8) - 1);
+}
+
+extern __inline __mmask8
+ __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_cmple_epi64_mask (__m128i __X, __m128i __Y)
+{
+ return (__mmask8) __builtin_ia32_cmpq128_mask ((__v2di) __X,
+ (__v2di) __Y, 2,
+ (__mmask8) - 1);
+}
+
+#else
+#define _mm256_permutex_pd(X, M) \
+ ((__m256d) __builtin_ia32_permdf256_mask ((__v4df)(__m256d)(X), (int)(M), \
+ (__v4df)(__m256d)_mm256_undefined_pd(),\
+ (__mmask8)-1))
+
+#define _mm256_maskz_permutex_epi64(M, X, I) \
+ ((__m256i) __builtin_ia32_permdi256_mask ((__v4di)(__m256i)(X), \
+ (int)(I), \
+ (__v4di)(__m256i) \
+ (_mm256_setzero_si256()),\
+ (__mmask8)(M)))
+
+#define _mm256_mask_permutex_epi64(W, M, X, I) \
+ ((__m256i) __builtin_ia32_permdi256_mask ((__v4di)(__m256i)(X), \
+ (int)(I), \
+ (__v4di)(__m256i)(W), \
+ (__mmask8)(M)))
+
+#define _mm256_insertf32x4(X, Y, C) \
+ ((__m256) __builtin_ia32_insertf32x4_256_mask ((__v8sf)(__m256) (X), \
+ (__v4sf)(__m128) (Y), (int) (C), \
+ (__v8sf)(__m256)_mm256_setzero_ps(), \
+ (__mmask8)-1))
+
+#define _mm256_mask_insertf32x4(W, U, X, Y, C) \
+ ((__m256) __builtin_ia32_insertf32x4_256_mask ((__v8sf)(__m256) (X), \
+ (__v4sf)(__m128) (Y), (int) (C), \
+ (__v8sf)(__m256)(W), \
+ (__mmask8)(U)))
+
+#define _mm256_maskz_insertf32x4(U, X, Y, C) \
+ ((__m256) __builtin_ia32_insertf32x4_256_mask ((__v8sf)(__m256) (X), \
+ (__v4sf)(__m128) (Y), (int) (C), \
+ (__v8sf)(__m256)_mm256_setzero_ps(), \
+ (__mmask8)(U)))
+
+#define _mm256_inserti32x4(X, Y, C) \
+ ((__m256i) __builtin_ia32_inserti32x4_256_mask ((__v8si)(__m256i) (X),\
+ (__v4si)(__m128i) (Y), (int) (C), \
+ (__v8si)(__m256i)_mm256_setzero_si256(), \
+ (__mmask8)-1))
+
+#define _mm256_mask_inserti32x4(W, U, X, Y, C) \
+ ((__m256i) __builtin_ia32_inserti32x4_256_mask ((__v8si)(__m256i) (X),\
+ (__v4si)(__m128i) (Y), (int) (C), \
+ (__v8si)(__m256i)(W), \
+ (__mmask8)(U)))
+
+#define _mm256_maskz_inserti32x4(U, X, Y, C) \
+ ((__m256i) __builtin_ia32_inserti32x4_256_mask ((__v8si)(__m256i) (X),\
+ (__v4si)(__m128i) (Y), (int) (C), \
+ (__v8si)(__m256i)_mm256_setzero_si256(), \
+ (__mmask8)(U)))
+
+#define _mm256_extractf32x4_ps(X, C) \
+ ((__m128) __builtin_ia32_extractf32x4_256_mask ((__v8sf)(__m256) (X), \
+ (int) (C), \
+ (__v4sf)(__m128)_mm_setzero_ps(), \
+ (__mmask8)-1))
+
+#define _mm256_mask_extractf32x4_ps(W, U, X, C) \
+ ((__m128) __builtin_ia32_extractf32x4_256_mask ((__v8sf)(__m256) (X), \
+ (int) (C), \
+ (__v4sf)(__m128)(W), \
+ (__mmask8)(U)))
+
+#define _mm256_maskz_extractf32x4_ps(U, X, C) \
+ ((__m128) __builtin_ia32_extractf32x4_256_mask ((__v8sf)(__m256) (X), \
+ (int) (C), \
+ (__v4sf)(__m128)_mm_setzero_ps(), \
+ (__mmask8)(U)))
+
+#define _mm256_extracti32x4_epi32(X, C) \
+ ((__m128i) __builtin_ia32_extracti32x4_256_mask ((__v8si)(__m256i) (X),\
+ (int) (C), (__v4si)(__m128i)_mm_setzero_si128 (), (__mmask8)-1))
+
+#define _mm256_mask_extracti32x4_epi32(W, U, X, C) \
+ ((__m128i) __builtin_ia32_extracti32x4_256_mask ((__v8si)(__m256i) (X),\
+ (int) (C), (__v4si)(__m128i)(W), (__mmask8)(U)))
+
+#define _mm256_maskz_extracti32x4_epi32(U, X, C) \
+ ((__m128i) __builtin_ia32_extracti32x4_256_mask ((__v8si)(__m256i) (X),\
+ (int) (C), (__v4si)(__m128i)_mm_setzero_si128 (), (__mmask8)(U)))
+
+#define _mm256_shuffle_i64x2(X, Y, C) \
+ ((__m256i) __builtin_ia32_shuf_i64x2_256_mask ((__v4di)(__m256i)(X), \
+ (__v4di)(__m256i)(Y), (int)(C), \
+ (__v4di)(__m256i)_mm256_setzero_si256 (), \
+ (__mmask8)-1))
+
+#define _mm256_mask_shuffle_i64x2(W, U, X, Y, C) \
+ ((__m256i) __builtin_ia32_shuf_i64x2_256_mask ((__v4di)(__m256i)(X), \
+ (__v4di)(__m256i)(Y), (int)(C), \
+ (__v4di)(__m256i)(W),\
+ (__mmask8)(U)))
+
+#define _mm256_maskz_shuffle_i64x2(U, X, Y, C) \
+ ((__m256i) __builtin_ia32_shuf_i64x2_256_mask ((__v4di)(__m256i)(X), \
+ (__v4di)(__m256i)(Y), (int)(C), \
+ (__v4di)(__m256i)_mm256_setzero_si256 (), \
+ (__mmask8)(U)))
+
+#define _mm256_shuffle_i32x4(X, Y, C) \
+ ((__m256i) __builtin_ia32_shuf_i32x4_256_mask ((__v8si)(__m256i)(X), \
+ (__v8si)(__m256i)(Y), (int)(C), \
+ (__v8si)(__m256i)_mm256_setzero_si256(), \
+ (__mmask8)-1))
+
+#define _mm256_mask_shuffle_i32x4(W, U, X, Y, C) \
+ ((__m256i) __builtin_ia32_shuf_i32x4_256_mask ((__v8si)(__m256i)(X), \
+ (__v8si)(__m256i)(Y), (int)(C), \
+ (__v8si)(__m256i)(W), \
+ (__mmask8)(U)))
+
+#define _mm256_maskz_shuffle_i32x4(U, X, Y, C) \
+ ((__m256i) __builtin_ia32_shuf_i32x4_256_mask ((__v8si)(__m256i)(X), \
+ (__v8si)(__m256i)(Y), (int)(C), \
+ (__v8si)(__m256i)_mm256_setzero_si256(), \
+ (__mmask8)(U)))
+
+#define _mm256_shuffle_f64x2(X, Y, C) \
+ ((__m256d) __builtin_ia32_shuf_f64x2_256_mask ((__v4df)(__m256d)(X), \
+ (__v4df)(__m256d)(Y), (int)(C), \
+ (__v4df)(__m256d)_mm256_setzero_pd(), \
+ (__mmask8)-1))
+
+#define _mm256_mask_shuffle_f64x2(W, U, X, Y, C) \
+ ((__m256d) __builtin_ia32_shuf_f64x2_256_mask ((__v4df)(__m256d)(X), \
+ (__v4df)(__m256d)(Y), (int)(C), \
+ (__v4df)(__m256d)(W), \
+ (__mmask8)(U)))
+
+#define _mm256_maskz_shuffle_f64x2(U, X, Y, C) \
+ ((__m256d) __builtin_ia32_shuf_f64x2_256_mask ((__v4df)(__m256d)(X), \
+ (__v4df)(__m256d)(Y), (int)(C), \
+ (__v4df)(__m256d)_mm256_setzero_pd(), \
+ (__mmask8)(U)))
+
+#define _mm256_shuffle_f32x4(X, Y, C) \
+ ((__m256) __builtin_ia32_shuf_f32x4_256_mask ((__v8sf)(__m256)(X), \
+ (__v8sf)(__m256)(Y), (int)(C), \
+ (__v8sf)(__m256)_mm256_setzero_ps(), \
+ (__mmask8)-1))
+
+#define _mm256_mask_shuffle_f32x4(W, U, X, Y, C) \
+ ((__m256) __builtin_ia32_shuf_f32x4_256_mask ((__v8sf)(__m256)(X), \
+ (__v8sf)(__m256)(Y), (int)(C), \
+ (__v8sf)(__m256)(W), \
+ (__mmask8)(U)))
+
+#define _mm256_maskz_shuffle_f32x4(U, X, Y, C) \
+ ((__m256) __builtin_ia32_shuf_f32x4_256_mask ((__v8sf)(__m256)(X), \
+ (__v8sf)(__m256)(Y), (int)(C), \
+ (__v8sf)(__m256)_mm256_setzero_ps(), \
+ (__mmask8)(U)))
+
+#define _mm256_mask_shuffle_pd(W, U, A, B, C) \
+ ((__m256d)__builtin_ia32_shufpd256_mask ((__v4df)(__m256d)(A), \
+ (__v4df)(__m256d)(B), (int)(C), \
+ (__v4df)(__m256d)(W), \
+ (__mmask8)(U)))
+
+#define _mm256_maskz_shuffle_pd(U, A, B, C) \
+ ((__m256d)__builtin_ia32_shufpd256_mask ((__v4df)(__m256d)(A), \
+ (__v4df)(__m256d)(B), (int)(C), \
+ (__v4df)(__m256d)_mm256_setzero_pd(),\
+ (__mmask8)(U)))
+
+#define _mm_mask_shuffle_pd(W, U, A, B, C) \
+ ((__m128d)__builtin_ia32_shufpd128_mask ((__v2df)(__m128d)(A), \
+ (__v2df)(__m128d)(B), (int)(C), \
+ (__v2df)(__m128d)(W), \
+ (__mmask8)(U)))
+
+#define _mm_maskz_shuffle_pd(U, A, B, C) \
+ ((__m128d)__builtin_ia32_shufpd128_mask ((__v2df)(__m128d)(A), \
+ (__v2df)(__m128d)(B), (int)(C), \
+ (__v2df)(__m128d)_mm_setzero_pd(), \
+ (__mmask8)(U)))
+
+#define _mm256_mask_shuffle_ps(W, U, A, B, C) \
+ ((__m256) __builtin_ia32_shufps256_mask ((__v8sf)(__m256)(A), \
+ (__v8sf)(__m256)(B), (int)(C), \
+ (__v8sf)(__m256)(W), \
+ (__mmask8)(U)))
+
+#define _mm256_maskz_shuffle_ps(U, A, B, C) \
+ ((__m256) __builtin_ia32_shufps256_mask ((__v8sf)(__m256)(A), \
+ (__v8sf)(__m256)(B), (int)(C), \
+ (__v8sf)(__m256)_mm256_setzero_ps(), \
+ (__mmask8)(U)))
+
+#define _mm_mask_shuffle_ps(W, U, A, B, C) \
+ ((__m128) __builtin_ia32_shufps128_mask ((__v4sf)(__m128)(A), \
+ (__v4sf)(__m128)(B), (int)(C), \
+ (__v4sf)(__m128)(W), \
+ (__mmask8)(U)))
+
+#define _mm_maskz_shuffle_ps(U, A, B, C) \
+ ((__m128) __builtin_ia32_shufps128_mask ((__v4sf)(__m128)(A), \
+ (__v4sf)(__m128)(B), (int)(C), \
+ (__v4sf)(__m128)_mm_setzero_ps(), \
+ (__mmask8)(U)))
+
+#define _mm256_fixupimm_pd(X, Y, Z, C) \
+ ((__m256d)__builtin_ia32_fixupimmpd256 ((__v4df)(__m256d)(X), \
+ (__v4df)(__m256d)(Y), \
+ (__v4di)(__m256i)(Z), (int)(C)))
+
+#define _mm256_mask_fixupimm_pd(X, U, Y, Z, C) \
+ ((__m256d)__builtin_ia32_fixupimmpd256_mask ((__v4df)(__m256d)(X), \
+ (__v4df)(__m256d)(Y), \
+ (__v4di)(__m256i)(Z), (int)(C), \
+ (__mmask8)(U)))
+
+#define _mm256_maskz_fixupimm_pd(U, X, Y, Z, C) \
+ ((__m256d)__builtin_ia32_fixupimmpd256_maskz ((__v4df)(__m256d)(X), \
+ (__v4df)(__m256d)(Y), \
+ (__v4di)(__m256i)(Z), (int)(C),\
+ (__mmask8)(U)))
+
+#define _mm256_fixupimm_ps(X, Y, Z, C) \
+ ((__m256)__builtin_ia32_fixupimmps256 ((__v8sf)(__m256)(X), \
+ (__v8sf)(__m256)(Y), \
+ (__v8si)(__m256i)(Z), (int)(C)))
+
+#define _mm256_mask_fixupimm_ps(X, U, Y, Z, C) \
+ ((__m256)__builtin_ia32_fixupimmps256_mask ((__v8sf)(__m256)(X), \
+ (__v8sf)(__m256)(Y), \
+ (__v8si)(__m256i)(Z), (int)(C), \
+ (__mmask8)(U)))
+
+#define _mm256_maskz_fixupimm_ps(U, X, Y, Z, C) \
+ ((__m256)__builtin_ia32_fixupimmps256_maskz ((__v8sf)(__m256)(X), \
+ (__v8sf)(__m256)(Y), \
+ (__v8si)(__m256i)(Z), (int)(C),\
+ (__mmask8)(U)))
+
+#define _mm_fixupimm_pd(X, Y, Z, C) \
+ ((__m128d)__builtin_ia32_fixupimmpd128 ((__v2df)(__m128d)(X), \
+ (__v2df)(__m128d)(Y), \
+ (__v2di)(__m128i)(Z), (int)(C)))
+
+#define _mm_mask_fixupimm_pd(X, U, Y, Z, C) \
+ ((__m128d)__builtin_ia32_fixupimmpd128_mask ((__v2df)(__m128d)(X), \
+ (__v2df)(__m128d)(Y), \
+ (__v2di)(__m128i)(Z), (int)(C), \
+ (__mmask8)(U)))
+
+#define _mm_maskz_fixupimm_pd(U, X, Y, Z, C) \
+ ((__m128d)__builtin_ia32_fixupimmpd128_maskz ((__v2df)(__m128d)(X), \
+ (__v2df)(__m128d)(Y), \
+ (__v2di)(__m128i)(Z), (int)(C),\
+ (__mmask8)(U)))
+
+#define _mm_fixupimm_ps(X, Y, Z, C) \
+ ((__m128)__builtin_ia32_fixupimmps128 ((__v4sf)(__m128)(X), \
+ (__v4sf)(__m128)(Y), \
+ (__v4si)(__m128i)(Z), (int)(C)))
+
+#define _mm_mask_fixupimm_ps(X, U, Y, Z, C) \
+ ((__m128)__builtin_ia32_fixupimmps128_mask ((__v4sf)(__m128)(X), \
+ (__v4sf)(__m128)(Y), \
+ (__v4si)(__m128i)(Z), (int)(C),\
+ (__mmask8)(U)))
+
+#define _mm_maskz_fixupimm_ps(U, X, Y, Z, C) \
+ ((__m128)__builtin_ia32_fixupimmps128_maskz ((__v4sf)(__m128)(X), \
+ (__v4sf)(__m128)(Y), \
+ (__v4si)(__m128i)(Z), (int)(C),\
+ (__mmask8)(U)))
+
+#define _mm256_mask_srli_epi32(W, U, A, B) \
+ ((__m256i) __builtin_ia32_psrldi256_mask ((__v8si)(__m256i)(A), \
+ (int)(B), (__v8si)(__m256i)(W), (__mmask8)(U)))
+
+#define _mm256_maskz_srli_epi32(U, A, B) \
+ ((__m256i) __builtin_ia32_psrldi256_mask ((__v8si)(__m256i)(A), \
+ (int)(B), (__v8si)_mm256_setzero_si256(), (__mmask8)(U)))
+
+#define _mm_mask_srli_epi32(W, U, A, B) \
+ ((__m128i) __builtin_ia32_psrldi128_mask ((__v4si)(__m128i)(A), \
+ (int)(B), (__v4si)(__m128i)(W), (__mmask8)(U)))
+
+#define _mm_maskz_srli_epi32(U, A, B) \
+ ((__m128i) __builtin_ia32_psrldi128_mask ((__v4si)(__m128i)(A), \
+ (int)(B), (__v4si)_mm_setzero_si128(), (__mmask8)(U)))
+
+#define _mm256_mask_srli_epi64(W, U, A, B) \
+ ((__m256i) __builtin_ia32_psrlqi256_mask ((__v4di)(__m256i)(A), \
+ (int)(B), (__v4di)(__m256i)(W), (__mmask8)(U)))
+
+#define _mm256_maskz_srli_epi64(U, A, B) \
+ ((__m256i) __builtin_ia32_psrlqi256_mask ((__v4di)(__m256i)(A), \
+ (int)(B), (__v4di)_mm256_setzero_si256 (), (__mmask8)(U)))
+
+#define _mm_mask_srli_epi64(W, U, A, B) \
+ ((__m128i) __builtin_ia32_psrlqi128_mask ((__v2di)(__m128i)(A), \
+ (int)(B), (__v2di)(__m128i)(W), (__mmask8)(U)))
+
+#define _mm_maskz_srli_epi64(U, A, B) \
+ ((__m128i) __builtin_ia32_psrlqi128_mask ((__v2di)(__m128i)(A), \
+ (int)(B), (__v2di)_mm_setzero_si128(), (__mmask8)(U)))
+
+#define _mm256_mask_slli_epi32(W, U, X, C) \
+ ((__m256i)__builtin_ia32_pslldi256_mask ((__v8si)(__m256i)(X), (int)(C),\
+ (__v8si)(__m256i)(W),\
+ (__mmask8)(U)))
+
+#define _mm256_maskz_slli_epi32(U, X, C) \
+ ((__m256i)__builtin_ia32_pslldi256_mask ((__v8si)(__m256i)(X), (int)(C),\
+ (__v8si)(__m256i)_mm256_setzero_si256(),\
+ (__mmask8)(U)))
+
+#define _mm256_mask_slli_epi64(W, U, X, C) \
+ ((__m256i)__builtin_ia32_psllqi256_mask ((__v4di)(__m256i)(X), (int)(C),\
+ (__v4di)(__m256i)(W),\
+ (__mmask8)(U)))
+
+#define _mm256_maskz_slli_epi64(U, X, C) \
+ ((__m256i)__builtin_ia32_psllqi256_mask ((__v4di)(__m256i)(X), (int)(C),\
+ (__v4di)(__m256i)_mm256_setzero_si256 (),\
+ (__mmask8)(U)))
+
+#define _mm_mask_slli_epi32(W, U, X, C) \
+ ((__m128i)__builtin_ia32_pslldi128_mask ((__v4si)(__m128i)(X), (int)(C),\
+ (__v4si)(__m128i)(W),\
+ (__mmask8)(U)))
+
+#define _mm_maskz_slli_epi32(U, X, C) \
+ ((__m128i)__builtin_ia32_pslldi128_mask ((__v4si)(__m128i)(X), (int)(C),\
+ (__v4si)(__m128i)_mm_setzero_si128 (),\
+ (__mmask8)(U)))
+
+#define _mm_mask_slli_epi64(W, U, X, C) \
+ ((__m128i)__builtin_ia32_psllqi128_mask ((__v2di)(__m128i)(X), (int)(C),\
+ (__v2di)(__m128i)(W),\
+ (__mmask8)(U)))
+
+#define _mm_maskz_slli_epi64(U, X, C) \
+ ((__m128i)__builtin_ia32_psllqi128_mask ((__v2di)(__m128i)(X), (int)(C),\
+ (__v2di)(__m128i)_mm_setzero_di(),\
+ (__mmask8)(U)))
+
+#define _mm256_ternarylogic_epi64(A, B, C, I) \
+ ((__m256i) __builtin_ia32_pternlogq256_mask ((__v4di)(__m256i)(A), \
+ (__v4di)(__m256i)(B), (__v4di)(__m256i)(C), (int)(I), (__mmask8)-1))
+
+#define _mm256_mask_ternarylogic_epi64(A, U, B, C, I) \
+ ((__m256i) __builtin_ia32_pternlogq256_mask ((__v4di)(__m256i)(A), \
+ (__v4di)(__m256i)(B), (__v4di)(__m256i)(C), (int)(I), (__mmask8)(U)))
+
+#define _mm256_maskz_ternarylogic_epi64(U, A, B, C, I) \
+ ((__m256i) __builtin_ia32_pternlogq256_maskz ((__v4di)(__m256i)(A), \
+ (__v4di)(__m256i)(B), (__v4di)(__m256i)(C), (int)(I), (__mmask8)(U)))
+
+#define _mm256_ternarylogic_epi32(A, B, C, I) \
+ ((__m256i) __builtin_ia32_pternlogd256_mask ((__v8si)(__m256i)(A), \
+ (__v8si)(__m256i)(B), (__v8si)(__m256i)(C), (int)(I), (__mmask8)-1))
+
+#define _mm256_mask_ternarylogic_epi32(A, U, B, C, I) \
+ ((__m256i) __builtin_ia32_pternlogd256_mask ((__v8si)(__m256i)(A), \
+ (__v8si)(__m256i)(B), (__v8si)(__m256i)(C), (int)(I), (__mmask8)(U)))
+
+#define _mm256_maskz_ternarylogic_epi32(U, A, B, C, I) \
+ ((__m256i) __builtin_ia32_pternlogd256_maskz ((__v8si)(__m256i)(A), \
+ (__v8si)(__m256i)(B), (__v8si)(__m256i)(C), (int)(I), (__mmask8)(U)))
+
+#define _mm_ternarylogic_epi64(A, B, C, I) \
+ ((__m128i) __builtin_ia32_pternlogq128_mask ((__v2di)(__m128i)(A), \
+ (__v2di)(__m128i)(B), (__v2di)(__m128i)(C), (int)(I), (__mmask8)-1))
+
+#define _mm_mask_ternarylogic_epi64(A, U, B, C, I) \
+ ((__m128i) __builtin_ia32_pternlogq128_mask ((__v2di)(__m128i)(A), \
+ (__v2di)(__m128i)(B), (__v2di)(__m128i)(C), (int)(I), (__mmask8)(U)))
+
+#define _mm_maskz_ternarylogic_epi64(U, A, B, C, I) \
+ ((__m128i) __builtin_ia32_pternlogq128_maskz ((__v2di)(__m128i)(A), \
+ (__v2di)(__m128i)(B), (__v2di)(__m128i)(C), (int)(I), (__mmask8)(U)))
+
+#define _mm_ternarylogic_epi32(A, B, C, I) \
+ ((__m128i) __builtin_ia32_pternlogd128_mask ((__v4si)(__m128i)(A), \
+ (__v4si)(__m128i)(B), (__v4si)(__m128i)(C), (int)(I), (__mmask8)-1))
+
+#define _mm_mask_ternarylogic_epi32(A, U, B, C, I) \
+ ((__m128i) __builtin_ia32_pternlogd128_mask ((__v4si)(__m128i)(A), \
+ (__v4si)(__m128i)(B), (__v4si)(__m128i)(C), (int)(I), (__mmask8)(U)))
+
+#define _mm_maskz_ternarylogic_epi32(U, A, B, C, I) \
+ ((__m128i) __builtin_ia32_pternlogd128_maskz ((__v4si)(__m128i)(A), \
+ (__v4si)(__m128i)(B), (__v4si)(__m128i)(C), (int)(I), (__mmask8)(U)))
+
+#define _mm256_roundscale_ps(A, B) \
+ ((__m256) __builtin_ia32_rndscaleps_256_mask ((__v8sf)(__m256)(A), \
+ (int)(B), (__v8sf)(__m256)_mm256_setzero_ps(), (__mmask8)-1))
+
+#define _mm256_mask_roundscale_ps(W, U, A, B) \
+ ((__m256) __builtin_ia32_rndscaleps_256_mask ((__v8sf)(__m256)(A), \
+ (int)(B), (__v8sf)(__m256)(W), (__mmask8)(U)))
+
+#define _mm256_maskz_roundscale_ps(U, A, B) \
+ ((__m256) __builtin_ia32_rndscaleps_256_mask ((__v8sf)(__m256)(A), \
+ (int)(B), (__v8sf)(__m256)_mm256_setzero_ps(), (__mmask8)(U)))
+
+#define _mm256_roundscale_pd(A, B) \
+ ((__m256d) __builtin_ia32_rndscalepd_256_mask ((__v4df)(__m256d)(A), \
+ (int)(B), (__v4df)(__m256d)_mm256_setzero_pd(), (__mmask8)-1))
+
+#define _mm256_mask_roundscale_pd(W, U, A, B) \
+ ((__m256d) __builtin_ia32_rndscalepd_256_mask ((__v4df)(__m256d)(A), \
+ (int)(B), (__v4df)(__m256d)(W), (__mmask8)(U)))
+
+#define _mm256_maskz_roundscale_pd(U, A, B) \
+ ((__m256d) __builtin_ia32_rndscalepd_256_mask ((__v4df)(__m256d)(A), \
+ (int)(B), (__v4df)(__m256d)_mm256_setzero_pd(), (__mmask8)(U)))
+
+#define _mm_roundscale_ps(A, B) \
+ ((__m128) __builtin_ia32_rndscaleps_128_mask ((__v4sf)(__m128)(A), \
+ (int)(B), (__v4sf)(__m128)_mm_setzero_ps(), (__mmask8)-1))
+
+#define _mm_mask_roundscale_ps(W, U, A, B) \
+ ((__m128) __builtin_ia32_rndscaleps_128_mask ((__v4sf)(__m128)(A), \
+ (int)(B), (__v4sf)(__m128)(W), (__mmask8)(U)))
+
+#define _mm_maskz_roundscale_ps(U, A, B) \
+ ((__m128) __builtin_ia32_rndscaleps_128_mask ((__v4sf)(__m128)(A), \
+ (int)(B), (__v4sf)(__m128)_mm_setzero_ps(), (__mmask8)(U)))
+
+#define _mm_roundscale_pd(A, B) \
+ ((__m128d) __builtin_ia32_rndscalepd_128_mask ((__v2df)(__m128d)(A), \
+ (int)(B), (__v2df)(__m128d)_mm_setzero_pd(), (__mmask8)-1))
+
+#define _mm_mask_roundscale_pd(W, U, A, B) \
+ ((__m128d) __builtin_ia32_rndscalepd_128_mask ((__v2df)(__m128d)(A), \
+ (int)(B), (__v2df)(__m128d)(W), (__mmask8)(U)))
+
+#define _mm_maskz_roundscale_pd(U, A, B) \
+ ((__m128d) __builtin_ia32_rndscalepd_128_mask ((__v2df)(__m128d)(A), \
+ (int)(B), (__v2df)(__m128d)_mm_setzero_pd(), (__mmask8)(U)))
+
+#define _mm256_getmant_ps(X, B, C) \
+ ((__m256) __builtin_ia32_getmantps256_mask ((__v8sf)(__m256) (X), \
+ (int)(((C)<<2) | (B)), \
+ (__v8sf)(__m256)_mm256_setzero_ps(), \
+ (__mmask8)-1))
+
+#define _mm256_mask_getmant_ps(W, U, X, B, C) \
+ ((__m256) __builtin_ia32_getmantps256_mask ((__v8sf)(__m256) (X), \
+ (int)(((C)<<2) | (B)), \
+ (__v8sf)(__m256)(W), \
+ (__mmask8)(U)))
+
+#define _mm256_maskz_getmant_ps(U, X, B, C) \
+ ((__m256) __builtin_ia32_getmantps256_mask ((__v8sf)(__m256) (X), \
+ (int)(((C)<<2) | (B)), \
+ (__v8sf)(__m256)_mm256_setzero_ps(), \
+ (__mmask8)(U)))
+
+#define _mm_getmant_ps(X, B, C) \
+ ((__m128) __builtin_ia32_getmantps128_mask ((__v4sf)(__m128) (X), \
+ (int)(((C)<<2) | (B)), \
+ (__v4sf)(__m128)_mm_setzero_ps(), \
+ (__mmask8)-1))
+
+#define _mm_mask_getmant_ps(W, U, X, B, C) \
+ ((__m128) __builtin_ia32_getmantps128_mask ((__v4sf)(__m128) (X), \
+ (int)(((C)<<2) | (B)), \
+ (__v4sf)(__m128)(W), \
+ (__mmask8)(U)))
+
+#define _mm_maskz_getmant_ps(U, X, B, C) \
+ ((__m128) __builtin_ia32_getmantps128_mask ((__v4sf)(__m128) (X), \
+ (int)(((C)<<2) | (B)), \
+ (__v4sf)(__m128)_mm_setzero_ps(), \
+ (__mmask8)(U)))
+
+#define _mm256_getmant_pd(X, B, C) \
+ ((__m256d) __builtin_ia32_getmantpd256_mask ((__v4df)(__m256d) (X), \
+ (int)(((C)<<2) | (B)), \
+ (__v4df)(__m256d)_mm256_setzero_pd(), \
+ (__mmask8)-1))
+
+#define _mm256_mask_getmant_pd(W, U, X, B, C) \
+ ((__m256d) __builtin_ia32_getmantpd256_mask ((__v4df)(__m256d) (X), \
+ (int)(((C)<<2) | (B)), \
+ (__v4df)(__m256d)(W), \
+ (__mmask8)(U)))
+
+#define _mm256_maskz_getmant_pd(U, X, B, C) \
+ ((__m256d) __builtin_ia32_getmantpd256_mask ((__v4df)(__m256d) (X), \
+ (int)(((C)<<2) | (B)), \
+ (__v4df)(__m256d)_mm256_setzero_pd(), \
+ (__mmask8)(U)))
+
+#define _mm_getmant_pd(X, B, C) \
+ ((__m128d) __builtin_ia32_getmantpd128_mask ((__v2df)(__m128d) (X), \
+ (int)(((C)<<2) | (B)), \
+ (__v2df)(__m128d)_mm_setzero_pd(), \
+ (__mmask8)-1))
+
+#define _mm_mask_getmant_pd(W, U, X, B, C) \
+ ((__m128d) __builtin_ia32_getmantpd128_mask ((__v2df)(__m128d) (X), \
+ (int)(((C)<<2) | (B)), \
+ (__v2df)(__m128d)(W), \
+ (__mmask8)(U)))
+
+#define _mm_maskz_getmant_pd(U, X, B, C) \
+ ((__m128d) __builtin_ia32_getmantpd128_mask ((__v2df)(__m128d) (X), \
+ (int)(((C)<<2) | (B)), \
+ (__v2df)(__m128d)_mm_setzero_pd(), \
+ (__mmask8)(U)))
+
+#define _mm256_mmask_i32gather_ps(V1OLD, MASK, INDEX, ADDR, SCALE) \
+ (__m256) __builtin_ia32_gather3siv8sf ((__v8sf)(__m256)V1OLD, \
+ (float const *)ADDR, \
+ (__v8si)(__m256i)INDEX, \
+ (__mmask8)MASK, (int)SCALE)
+
+#define _mm_mmask_i32gather_ps(V1OLD, MASK, INDEX, ADDR, SCALE) \
+ (__m128) __builtin_ia32_gather3siv4sf ((__v4sf)(__m128)V1OLD, \
+ (float const *)ADDR, \
+ (__v4si)(__m128i)INDEX, \
+ (__mmask8)MASK, (int)SCALE)
+
+#define _mm256_mmask_i32gather_pd(V1OLD, MASK, INDEX, ADDR, SCALE) \
+ (__m256d) __builtin_ia32_gather3siv4df ((__v4df)(__m256d)V1OLD, \
+ (double const *)ADDR, \
+ (__v4si)(__m128i)INDEX, \
+ (__mmask8)MASK, (int)SCALE)
+
+#define _mm_mmask_i32gather_pd(V1OLD, MASK, INDEX, ADDR, SCALE) \
+ (__m128d) __builtin_ia32_gather3siv2df ((__v2df)(__m128d)V1OLD, \
+ (double const *)ADDR, \
+ (__v4si)(__m128i)INDEX, \
+ (__mmask8)MASK, (int)SCALE)
+
+#define _mm256_mmask_i64gather_ps(V1OLD, MASK, INDEX, ADDR, SCALE) \
+ (__m128) __builtin_ia32_gather3div8sf ((__v4sf)(__m128)V1OLD, \
+ (float const *)ADDR, \
+ (__v4di)(__m256i)INDEX, \
+ (__mmask8)MASK, (int)SCALE)
+
+#define _mm_mmask_i64gather_ps(V1OLD, MASK, INDEX, ADDR, SCALE) \
+ (__m128) __builtin_ia32_gather3div4sf ((__v4sf)(__m128)V1OLD, \
+ (float const *)ADDR, \
+ (__v2di)(__m128i)INDEX, \
+ (__mmask8)MASK, (int)SCALE)
+
+#define _mm256_mmask_i64gather_pd(V1OLD, MASK, INDEX, ADDR, SCALE) \
+ (__m256d) __builtin_ia32_gather3div4df ((__v4df)(__m256d)V1OLD, \
+ (double const *)ADDR, \
+ (__v4di)(__m256i)INDEX, \
+ (__mmask8)MASK, (int)SCALE)
+
+#define _mm_mmask_i64gather_pd(V1OLD, MASK, INDEX, ADDR, SCALE) \
+ (__m128d) __builtin_ia32_gather3div2df ((__v2df)(__m128d)V1OLD, \
+ (double const *)ADDR, \
+ (__v2di)(__m128i)INDEX, \
+ (__mmask8)MASK, (int)SCALE)
+
+#define _mm256_mmask_i32gather_epi32(V1OLD, MASK, INDEX, ADDR, SCALE) \
+ (__m256i) __builtin_ia32_gather3siv8si ((__v8si)(__m256i)V1OLD, \
+ (int const *)ADDR, \
+ (__v8si)(__m256i)INDEX, \
+ (__mmask8)MASK, (int)SCALE)
+
+#define _mm_mmask_i32gather_epi32(V1OLD, MASK, INDEX, ADDR, SCALE) \
+ (__m128i) __builtin_ia32_gather3siv4si ((__v4si)(__m128i)V1OLD, \
+ (int const *)ADDR, \
+ (__v4si)(__m128i)INDEX, \
+ (__mmask8)MASK, (int)SCALE)
+
+#define _mm256_mmask_i32gather_epi64(V1OLD, MASK, INDEX, ADDR, SCALE) \
+ (__m256i) __builtin_ia32_gather3siv4di ((__v4di)(__m256i)V1OLD, \
+ (long long const *)ADDR, \
+ (__v4si)(__m128i)INDEX, \
+ (__mmask8)MASK, (int)SCALE)
+
+#define _mm_mmask_i32gather_epi64(V1OLD, MASK, INDEX, ADDR, SCALE) \
+ (__m128i) __builtin_ia32_gather3siv2di ((__v2di)(__m128i)V1OLD, \
+ (long long const *)ADDR, \
+ (__v4si)(__m128i)INDEX, \
+ (__mmask8)MASK, (int)SCALE)
+
+#define _mm256_mmask_i64gather_epi32(V1OLD, MASK, INDEX, ADDR, SCALE) \
+ (__m128i) __builtin_ia32_gather3div8si ((__v4si)(__m128i)V1OLD, \
+ (int const *)ADDR, \
+ (__v4di)(__m256i)INDEX, \
+ (__mmask8)MASK, (int)SCALE)
+
+#define _mm_mmask_i64gather_epi32(V1OLD, MASK, INDEX, ADDR, SCALE) \
+ (__m128i) __builtin_ia32_gather3div4si ((__v4si)(__m128i)V1OLD, \
+ (int const *)ADDR, \
+ (__v2di)(__m128i)INDEX, \
+ (__mmask8)MASK, (int)SCALE)
+
+#define _mm256_mmask_i64gather_epi64(V1OLD, MASK, INDEX, ADDR, SCALE) \
+ (__m256i) __builtin_ia32_gather3div4di ((__v4di)(__m256i)V1OLD, \
+ (long long const *)ADDR, \
+ (__v4di)(__m256i)INDEX, \
+ (__mmask8)MASK, (int)SCALE)
+
+#define _mm_mmask_i64gather_epi64(V1OLD, MASK, INDEX, ADDR, SCALE) \
+ (__m128i) __builtin_ia32_gather3div2di ((__v2di)(__m128i)V1OLD, \
+ (long long const *)ADDR, \
+ (__v2di)(__m128i)INDEX, \
+ (__mmask8)MASK, (int)SCALE)
+
+#define _mm256_i32scatter_ps(ADDR, INDEX, V1, SCALE) \
+ __builtin_ia32_scattersiv8sf ((float *)ADDR, (__mmask8)0xFF, \
+ (__v8si)(__m256i)INDEX, \
+ (__v8sf)(__m256)V1, (int)SCALE)
+
+#define _mm256_mask_i32scatter_ps(ADDR, MASK, INDEX, V1, SCALE) \
+ __builtin_ia32_scattersiv8sf ((float *)ADDR, (__mmask8)MASK, \
+ (__v8si)(__m256i)INDEX, \
+ (__v8sf)(__m256)V1, (int)SCALE)
+
+#define _mm_i32scatter_ps(ADDR, INDEX, V1, SCALE) \
+ __builtin_ia32_scattersiv4sf ((float *)ADDR, (__mmask8)0xFF, \
+ (__v4si)(__m128i)INDEX, \
+ (__v4sf)(__m128)V1, (int)SCALE)
+
+#define _mm_mask_i32scatter_ps(ADDR, MASK, INDEX, V1, SCALE) \
+ __builtin_ia32_scattersiv4sf ((float *)ADDR, (__mmask8)MASK, \
+ (__v4si)(__m128i)INDEX, \
+ (__v4sf)(__m128)V1, (int)SCALE)
+
+#define _mm256_i32scatter_pd(ADDR, INDEX, V1, SCALE) \
+ __builtin_ia32_scattersiv4df ((double *)ADDR, (__mmask8)0xFF, \
+ (__v4si)(__m128i)INDEX, \
+ (__v4df)(__m256d)V1, (int)SCALE)
+
+#define _mm256_mask_i32scatter_pd(ADDR, MASK, INDEX, V1, SCALE) \
+ __builtin_ia32_scattersiv4df ((double *)ADDR, (__mmask8)MASK, \
+ (__v4si)(__m128i)INDEX, \
+ (__v4df)(__m256d)V1, (int)SCALE)
+
+#define _mm_i32scatter_pd(ADDR, INDEX, V1, SCALE) \
+ __builtin_ia32_scattersiv2df ((double *)ADDR, (__mmask8)0xFF, \
+ (__v4si)(__m128i)INDEX, \
+ (__v2df)(__m128d)V1, (int)SCALE)
+
+#define _mm_mask_i32scatter_pd(ADDR, MASK, INDEX, V1, SCALE) \
+ __builtin_ia32_scattersiv2df ((double *)ADDR, (__mmask8)MASK, \
+ (__v4si)(__m128i)INDEX, \
+ (__v2df)(__m128d)V1, (int)SCALE)
+
+#define _mm256_i64scatter_ps(ADDR, INDEX, V1, SCALE) \
+ __builtin_ia32_scatterdiv8sf ((float *)ADDR, (__mmask8)0xFF, \
+ (__v4di)(__m256i)INDEX, \
+ (__v4sf)(__m128)V1, (int)SCALE)
+
+#define _mm256_mask_i64scatter_ps(ADDR, MASK, INDEX, V1, SCALE) \
+ __builtin_ia32_scatterdiv8sf ((float *)ADDR, (__mmask8)MASK, \
+ (__v4di)(__m256i)INDEX, \
+ (__v4sf)(__m128)V1, (int)SCALE)
+
+#define _mm_i64scatter_ps(ADDR, INDEX, V1, SCALE) \
+ __builtin_ia32_scatterdiv4sf ((float *)ADDR, (__mmask8)0xFF, \
+ (__v2di)(__m128i)INDEX, \
+ (__v4sf)(__m128)V1, (int)SCALE)
+
+#define _mm_mask_i64scatter_ps(ADDR, MASK, INDEX, V1, SCALE) \
+ __builtin_ia32_scatterdiv4sf ((float *)ADDR, (__mmask8)MASK, \
+ (__v2di)(__m128i)INDEX, \
+ (__v4sf)(__m128)V1, (int)SCALE)
+
+#define _mm256_i64scatter_pd(ADDR, INDEX, V1, SCALE) \
+ __builtin_ia32_scatterdiv4df ((double *)ADDR, (__mmask8)0xFF, \
+ (__v4di)(__m256i)INDEX, \
+ (__v4df)(__m256d)V1, (int)SCALE)
+
+#define _mm256_mask_i64scatter_pd(ADDR, MASK, INDEX, V1, SCALE) \
+ __builtin_ia32_scatterdiv4df ((double *)ADDR, (__mmask8)MASK, \
+ (__v4di)(__m256i)INDEX, \
+ (__v4df)(__m256d)V1, (int)SCALE)
+
+#define _mm_i64scatter_pd(ADDR, INDEX, V1, SCALE) \
+ __builtin_ia32_scatterdiv2df ((double *)ADDR, (__mmask8)0xFF, \
+ (__v2di)(__m128i)INDEX, \
+ (__v2df)(__m128d)V1, (int)SCALE)
+
+#define _mm_mask_i64scatter_pd(ADDR, MASK, INDEX, V1, SCALE) \
+ __builtin_ia32_scatterdiv2df ((double *)ADDR, (__mmask8)MASK, \
+ (__v2di)(__m128i)INDEX, \
+ (__v2df)(__m128d)V1, (int)SCALE)
+
+#define _mm256_i32scatter_epi32(ADDR, INDEX, V1, SCALE) \
+ __builtin_ia32_scattersiv8si ((int *)ADDR, (__mmask8)0xFF, \
+ (__v8si)(__m256i)INDEX, \
+ (__v8si)(__m256i)V1, (int)SCALE)
+
+#define _mm256_mask_i32scatter_epi32(ADDR, MASK, INDEX, V1, SCALE) \
+ __builtin_ia32_scattersiv8si ((int *)ADDR, (__mmask8)MASK, \
+ (__v8si)(__m256i)INDEX, \
+ (__v8si)(__m256i)V1, (int)SCALE)
+
+#define _mm_i32scatter_epi32(ADDR, INDEX, V1, SCALE) \
+ __builtin_ia32_scattersiv4si ((int *)ADDR, (__mmask8)0xFF, \
+ (__v4si)(__m128i)INDEX, \
+ (__v4si)(__m128i)V1, (int)SCALE)
+
+#define _mm_mask_i32scatter_epi32(ADDR, MASK, INDEX, V1, SCALE) \
+ __builtin_ia32_scattersiv4si ((int *)ADDR, (__mmask8)MASK, \
+ (__v4si)(__m128i)INDEX, \
+ (__v4si)(__m128i)V1, (int)SCALE)
+
+#define _mm256_i32scatter_epi64(ADDR, INDEX, V1, SCALE) \
+ __builtin_ia32_scattersiv4di ((long long *)ADDR, (__mmask8)0xFF, \
+ (__v4si)(__m128i)INDEX, \
+ (__v4di)(__m256i)V1, (int)SCALE)
+
+#define _mm256_mask_i32scatter_epi64(ADDR, MASK, INDEX, V1, SCALE) \
+ __builtin_ia32_scattersiv4di ((long long *)ADDR, (__mmask8)MASK, \
+ (__v4si)(__m128i)INDEX, \
+ (__v4di)(__m256i)V1, (int)SCALE)
+
+#define _mm_i32scatter_epi64(ADDR, INDEX, V1, SCALE) \
+ __builtin_ia32_scattersiv2di ((long long *)ADDR, (__mmask8)0xFF, \
+ (__v4si)(__m128i)INDEX, \
+ (__v2di)(__m128i)V1, (int)SCALE)
+
+#define _mm_mask_i32scatter_epi64(ADDR, MASK, INDEX, V1, SCALE) \
+ __builtin_ia32_scattersiv2di ((long long *)ADDR, (__mmask8)MASK, \
+ (__v4si)(__m128i)INDEX, \
+ (__v2di)(__m128i)V1, (int)SCALE)
+
+#define _mm256_i64scatter_epi32(ADDR, INDEX, V1, SCALE) \
+ __builtin_ia32_scatterdiv8si ((int *)ADDR, (__mmask8)0xFF, \
+ (__v4di)(__m256i)INDEX, \
+ (__v4si)(__m128i)V1, (int)SCALE)
+
+#define _mm256_mask_i64scatter_epi32(ADDR, MASK, INDEX, V1, SCALE) \
+ __builtin_ia32_scatterdiv8si ((int *)ADDR, (__mmask8)MASK, \
+ (__v4di)(__m256i)INDEX, \
+ (__v4si)(__m128i)V1, (int)SCALE)
+
+#define _mm_i64scatter_epi32(ADDR, INDEX, V1, SCALE) \
+ __builtin_ia32_scatterdiv4si ((int *)ADDR, (__mmask8)0xFF, \
+ (__v2di)(__m128i)INDEX, \
+ (__v4si)(__m128i)V1, (int)SCALE)
+
+#define _mm_mask_i64scatter_epi32(ADDR, MASK, INDEX, V1, SCALE) \
+ __builtin_ia32_scatterdiv4si ((int *)ADDR, (__mmask8)MASK, \
+ (__v2di)(__m128i)INDEX, \
+ (__v4si)(__m128i)V1, (int)SCALE)
+
+#define _mm256_i64scatter_epi64(ADDR, INDEX, V1, SCALE) \
+ __builtin_ia32_scatterdiv4di ((long long *)ADDR, (__mmask8)0xFF, \
+ (__v4di)(__m256i)INDEX, \
+ (__v4di)(__m256i)V1, (int)SCALE)
+
+#define _mm256_mask_i64scatter_epi64(ADDR, MASK, INDEX, V1, SCALE) \
+ __builtin_ia32_scatterdiv4di ((long long *)ADDR, (__mmask8)MASK, \
+ (__v4di)(__m256i)INDEX, \
+ (__v4di)(__m256i)V1, (int)SCALE)
+
+#define _mm_i64scatter_epi64(ADDR, INDEX, V1, SCALE) \
+ __builtin_ia32_scatterdiv2di ((long long *)ADDR, (__mmask8)0xFF, \
+ (__v2di)(__m128i)INDEX, \
+ (__v2di)(__m128i)V1, (int)SCALE)
+
+#define _mm_mask_i64scatter_epi64(ADDR, MASK, INDEX, V1, SCALE) \
+ __builtin_ia32_scatterdiv2di ((long long *)ADDR, (__mmask8)MASK, \
+ (__v2di)(__m128i)INDEX, \
+ (__v2di)(__m128i)V1, (int)SCALE)
+
+#define _mm256_mask_shuffle_epi32(W, U, X, C) \
+ ((__m256i) __builtin_ia32_pshufd256_mask ((__v8si)(__m256i)(X), (int)(C), \
+ (__v8si)(__m256i)(W), \
+ (__mmask8)(U)))
+
+#define _mm256_maskz_shuffle_epi32(U, X, C) \
+ ((__m256i) __builtin_ia32_pshufd256_mask ((__v8si)(__m256i)(X), (int)(C), \
+ (__v8si)(__m256i)_mm256_setzero_si256(), \
+ (__mmask8)(U)))
+
+#define _mm_mask_shuffle_epi32(W, U, X, C) \
+ ((__m128i) __builtin_ia32_pshufd128_mask ((__v4si)(__m128i)(X), (int)(C), \
+ (__v4si)(__m128i)(W), \
+ (__mmask8)(U)))
+
+#define _mm_maskz_shuffle_epi32(U, X, C) \
+ ((__m128i) __builtin_ia32_pshufd128_mask ((__v4si)(__m128i)(X), (int)(C), \
+ (__v4si)(__m128i)_mm_setzero_si128 (), \
+ (__mmask8)(U)))
+
+#define _mm256_rol_epi64(A, B) \
+ ((__m256i)__builtin_ia32_prolq256_mask ((__v4di)(__m256i)(A), (int)(B), \
+ (__v4di)(__m256i)_mm256_setzero_si256 (),\
+ (__mmask8)-1))
+
+#define _mm256_mask_rol_epi64(W, U, A, B) \
+ ((__m256i)__builtin_ia32_prolq256_mask ((__v4di)(__m256i)(A), (int)(B), \
+ (__v4di)(__m256i)(W), \
+ (__mmask8)(U)))
+
+#define _mm256_maskz_rol_epi64(U, A, B) \
+ ((__m256i)__builtin_ia32_prolq256_mask ((__v4di)(__m256i)(A), (int)(B), \
+ (__v4di)(__m256i)_mm256_setzero_si256 (),\
+ (__mmask8)(U)))
+
+#define _mm_rol_epi64(A, B) \
+ ((__m128i)__builtin_ia32_prolq128_mask ((__v2di)(__m128i)(A), (int)(B), \
+ (__v2di)(__m128i)_mm_setzero_di(), \
+ (__mmask8)-1))
+
+#define _mm_mask_rol_epi64(W, U, A, B) \
+ ((__m128i)__builtin_ia32_prolq128_mask ((__v2di)(__m128i)(A), (int)(B), \
+ (__v2di)(__m128i)(W), \
+ (__mmask8)(U)))
+
+#define _mm_maskz_rol_epi64(U, A, B) \
+ ((__m128i)__builtin_ia32_prolq128_mask ((__v2di)(__m128i)(A), (int)(B), \
+ (__v2di)(__m128i)_mm_setzero_di(), \
+ (__mmask8)(U)))
+
+#define _mm256_ror_epi64(A, B) \
+ ((__m256i)__builtin_ia32_prorq256_mask ((__v4di)(__m256i)(A), (int)(B), \
+ (__v4di)(__m256i)_mm256_setzero_si256 (),\
+ (__mmask8)-1))
+
+#define _mm256_mask_ror_epi64(W, U, A, B) \
+ ((__m256i)__builtin_ia32_prorq256_mask ((__v4di)(__m256i)(A), (int)(B), \
+ (__v4di)(__m256i)(W), \
+ (__mmask8)(U)))
+
+#define _mm256_maskz_ror_epi64(U, A, B) \
+ ((__m256i)__builtin_ia32_prorq256_mask ((__v4di)(__m256i)(A), (int)(B), \
+ (__v4di)(__m256i)_mm256_setzero_si256 (),\
+ (__mmask8)(U)))
+
+#define _mm_ror_epi64(A, B) \
+ ((__m128i)__builtin_ia32_prorq128_mask ((__v2di)(__m128i)(A), (int)(B), \
+ (__v2di)(__m128i)_mm_setzero_di(), \
+ (__mmask8)-1))
+
+#define _mm_mask_ror_epi64(W, U, A, B) \
+ ((__m128i)__builtin_ia32_prorq128_mask ((__v2di)(__m128i)(A), (int)(B), \
+ (__v2di)(__m128i)(W), \
+ (__mmask8)(U)))
+
+#define _mm_maskz_ror_epi64(U, A, B) \
+ ((__m128i)__builtin_ia32_prorq128_mask ((__v2di)(__m128i)(A), (int)(B), \
+ (__v2di)(__m128i)_mm_setzero_di(), \
+ (__mmask8)(U)))
+
+#define _mm256_rol_epi32(A, B) \
+ ((__m256i)__builtin_ia32_prold256_mask ((__v8si)(__m256i)(A), (int)(B), \
+ (__v8si)(__m256i)_mm256_setzero_si256(),\
+ (__mmask8)-1))
+
+#define _mm256_mask_rol_epi32(W, U, A, B) \
+ ((__m256i)__builtin_ia32_prold256_mask ((__v8si)(__m256i)(A), (int)(B), \
+ (__v8si)(__m256i)(W), \
+ (__mmask8)(U)))
+
+#define _mm256_maskz_rol_epi32(U, A, B) \
+ ((__m256i)__builtin_ia32_prold256_mask ((__v8si)(__m256i)(A), (int)(B), \
+ (__v8si)(__m256i)_mm256_setzero_si256(),\
+ (__mmask8)(U)))
+
+#define _mm_rol_epi32(A, B) \
+ ((__m128i)__builtin_ia32_prold128_mask ((__v4si)(__m128i)(A), (int)(B), \
+ (__v4si)(__m128i)_mm_setzero_si128 (), \
+ (__mmask8)-1))
+
+#define _mm_mask_rol_epi32(W, U, A, B) \
+ ((__m128i)__builtin_ia32_prold128_mask ((__v4si)(__m128i)(A), (int)(B), \
+ (__v4si)(__m128i)(W), \
+ (__mmask8)(U)))
+
+#define _mm_maskz_rol_epi32(U, A, B) \
+ ((__m128i)__builtin_ia32_prold128_mask ((__v4si)(__m128i)(A), (int)(B), \
+ (__v4si)(__m128i)_mm_setzero_si128 (), \
+ (__mmask8)(U)))
+
+#define _mm256_ror_epi32(A, B) \
+ ((__m256i)__builtin_ia32_prord256_mask ((__v8si)(__m256i)(A), (int)(B), \
+ (__v8si)(__m256i)_mm256_setzero_si256(),\
+ (__mmask8)-1))
+
+#define _mm256_mask_ror_epi32(W, U, A, B) \
+ ((__m256i)__builtin_ia32_prord256_mask ((__v8si)(__m256i)(A), (int)(B), \
+ (__v8si)(__m256i)(W), \
+ (__mmask8)(U)))
+
+#define _mm256_maskz_ror_epi32(U, A, B) \
+ ((__m256i)__builtin_ia32_prord256_mask ((__v8si)(__m256i)(A), (int)(B), \
+ (__v8si)(__m256i)_mm256_setzero_si256(),\
+ (__mmask8)(U)))
+
+#define _mm_ror_epi32(A, B) \
+ ((__m128i)__builtin_ia32_prord128_mask ((__v4si)(__m128i)(A), (int)(B), \
+ (__v4si)(__m128i)_mm_setzero_si128 (), \
+ (__mmask8)-1))
+
+#define _mm_mask_ror_epi32(W, U, A, B) \
+ ((__m128i)__builtin_ia32_prord128_mask ((__v4si)(__m128i)(A), (int)(B), \
+ (__v4si)(__m128i)(W), \
+ (__mmask8)(U)))
+
+#define _mm_maskz_ror_epi32(U, A, B) \
+ ((__m128i)__builtin_ia32_prord128_mask ((__v4si)(__m128i)(A), (int)(B), \
+ (__v4si)(__m128i)_mm_setzero_si128 (), \
+ (__mmask8)(U)))
+
+#define _mm256_alignr_epi32(X, Y, C) \
+ ((__m256i)__builtin_ia32_alignd256_mask ((__v8si)(__m256i)(X), \
+ (__v8si)(__m256i)(Y), (int)(C), (__v8si)(__m256i)(X), (__mmask8)-1))
+
+#define _mm256_mask_alignr_epi32(W, U, X, Y, C) \
+ ((__m256i)__builtin_ia32_alignd256_mask ((__v8si)(__m256i)(X), \
+ (__v8si)(__m256i)(Y), (int)(C), (__v8si)(__m256i)(W), (__mmask8)(U)))
+
+#define _mm256_maskz_alignr_epi32(U, X, Y, C) \
+ ((__m256i)__builtin_ia32_alignd256_mask ((__v8si)(__m256i)(X), \
+ (__v8si)(__m256i)(Y), (int)(C), (__v8si)(__m256i)_mm256_setzero_si256 (),\
+ (__mmask8)(U)))
+
+#define _mm256_alignr_epi64(X, Y, C) \
+ ((__m256i)__builtin_ia32_alignq256_mask ((__v4di)(__m256i)(X), \
+ (__v4di)(__m256i)(Y), (int)(C), (__v4di)(__m256i)(X), (__mmask8)-1))
+
+#define _mm256_mask_alignr_epi64(W, U, X, Y, C) \
+ ((__m256i)__builtin_ia32_alignq256_mask ((__v4di)(__m256i)(X), \
+ (__v4di)(__m256i)(Y), (int)(C), (__v4di)(__m256i)(W), (__mmask8)(U)))
+
+#define _mm256_maskz_alignr_epi64(U, X, Y, C) \
+ ((__m256i)__builtin_ia32_alignq256_mask ((__v4di)(__m256i)(X), \
+ (__v4di)(__m256i)(Y), (int)(C), (__v4di)(__m256i)_mm256_setzero_si256 (),\
+ (__mmask8)(U)))
+
+#define _mm_alignr_epi32(X, Y, C) \
+ ((__m128i)__builtin_ia32_alignd128_mask ((__v4si)(__m128i)(X), \
+ (__v4si)(__m128i)(Y), (int)(C), (__v4si)(__m128i)(X), (__mmask8)-1))
+
+#define _mm_mask_alignr_epi32(W, U, X, Y, C) \
+ ((__m128i)__builtin_ia32_alignd128_mask ((__v4si)(__m128i)(X), \
+ (__v4si)(__m128i)(Y), (int)(C), (__v4si)(__m128i)(W), (__mmask8)(U)))
+
+#define _mm_maskz_alignr_epi32(U, X, Y, C) \
+ ((__m128i)__builtin_ia32_alignd128_mask ((__v4si)(__m128i)(X), \
+ (__v4si)(__m128i)(Y), (int)(C), (__v4si)(__m128i)_mm_setzero_si128(),\
+ (__mmask8)(U)))
+
+#define _mm_alignr_epi64(X, Y, C) \
+ ((__m128i)__builtin_ia32_alignq128_mask ((__v2di)(__m128i)(X), \
+ (__v2di)(__m128i)(Y), (int)(C), (__v2di)(__m128i)(X), (__mmask8)-1))
+
+#define _mm_mask_alignr_epi64(W, U, X, Y, C) \
+ ((__m128i)__builtin_ia32_alignq128_mask ((__v2di)(__m128i)(X), \
+ (__v2di)(__m128i)(Y), (int)(C), (__v2di)(__m128i)(X), (__mmask8)-1))
+
+#define _mm_maskz_alignr_epi64(U, X, Y, C) \
+ ((__m128i)__builtin_ia32_alignq128_mask ((__v2di)(__m128i)(X), \
+ (__v2di)(__m128i)(Y), (int)(C), (__v2di)(__m128i)_mm_setzero_si128(),\
+ (__mmask8)(U)))
+
+#define _mm_mask_cvtps_ph(W, U, A, I) \
+ ((__m128i) __builtin_ia32_vcvtps2ph_mask ((__v4sf)(__m128) A, (int) (I), \
+ (__v8hi)(__m128i) (W), (__mmask8) (U)))
+
+#define _mm_maskz_cvtps_ph(U, A, I) \
+ ((__m128i) __builtin_ia32_vcvtps2ph_mask ((__v4sf)(__m128) A, (int) (I), \
+ (__v8hi)(__m128i) _mm_setzero_hi(), (__mmask8) (U)))
+
+#define _mm256_mask_cvtps_ph(W, U, A, I) \
+ ((__m128i) __builtin_ia32_vcvtps2ph256_mask ((__v8sf)(__m256) A, (int) (I), \
+ (__v8hi)(__m128i) (W), (__mmask8) (U)))
+
+#define _mm256_maskz_cvtps_ph(U, A, I) \
+ ((__m128i) __builtin_ia32_vcvtps2ph256_mask ((__v8sf)(__m256) A, (int) (I), \
+ (__v8hi)(__m128i) _mm_setzero_hi(), (__mmask8) (U)))
+
+#define _mm256_mask_srai_epi32(W, U, A, B) \
+ ((__m256i) __builtin_ia32_psradi256_mask ((__v8si)(__m256i)(A), \
+ (int)(B), (__v8si)(__m256i)(W), (__mmask8)(U)))
+
+#define _mm256_maskz_srai_epi32(U, A, B) \
+ ((__m256i) __builtin_ia32_psradi256_mask ((__v8si)(__m256i)(A), \
+ (int)(B), (__v8si)_mm256_setzero_si256(), (__mmask8)(U)))
+
+#define _mm_mask_srai_epi32(W, U, A, B) \
+ ((__m128i) __builtin_ia32_psradi128_mask ((__v4si)(__m128i)(A), \
+ (int)(B), (__v4si)(__m128i)(W), (__mmask8)(U)))
+
+#define _mm_maskz_srai_epi32(U, A, B) \
+ ((__m128i) __builtin_ia32_psradi128_mask ((__v4si)(__m128i)(A), \
+ (int)(B), (__v4si)_mm_setzero_si128(), (__mmask8)(U)))
+
+#define _mm256_srai_epi64(A, B) \
+ ((__m256i) __builtin_ia32_psraqi256_mask ((__v4di)(__m256i)(A), \
+ (int)(B), (__v4di)_mm256_setzero_si256 (), (__mmask8)-1))
+
+#define _mm256_mask_srai_epi64(W, U, A, B) \
+ ((__m256i) __builtin_ia32_psraqi256_mask ((__v4di)(__m256i)(A), \
+ (int)(B), (__v4di)(__m256i)(W), (__mmask8)(U)))
+
+#define _mm256_maskz_srai_epi64(U, A, B) \
+ ((__m256i) __builtin_ia32_psraqi256_mask ((__v4di)(__m256i)(A), \
+ (int)(B), (__v4di)_mm256_setzero_si256 (), (__mmask8)(U)))
+
+#define _mm_srai_epi64(A, B) \
+ ((__m128i) __builtin_ia32_psraqi128_mask ((__v2di)(__m128i)(A), \
+ (int)(B), (__v2di)_mm_setzero_si128(), (__mmask8)-1))
+
+#define _mm_mask_srai_epi64(W, U, A, B) \
+ ((__m128i) __builtin_ia32_psraqi128_mask ((__v2di)(__m128i)(A), \
+ (int)(B), (__v2di)(__m128i)(W), (__mmask8)(U)))
+
+#define _mm_maskz_srai_epi64(U, A, B) \
+ ((__m128i) __builtin_ia32_psraqi128_mask ((__v2di)(__m128i)(A), \
+ (int)(B), (__v2di)_mm_setzero_si128(), (__mmask8)(U)))
+
+#define _mm256_mask_permutex_pd(W, U, A, B) \
+ ((__m256d) __builtin_ia32_permdf256_mask ((__v4df)(__m256d)(A), \
+ (int)(B), (__v4df)(__m256d)(W), (__mmask8)(U)))
+
+#define _mm256_maskz_permutex_pd(U, A, B) \
+ ((__m256d) __builtin_ia32_permdf256_mask ((__v4df)(__m256d)(A), \
+ (int)(B), (__v4df)(__m256d)_mm256_setzero_pd(), (__mmask8)(U)))
+
+#define _mm256_mask_permute_pd(W, U, X, C) \
+ ((__m256d) __builtin_ia32_vpermilpd256_mask ((__v4df)(__m256d)(X), (int)(C), \
+ (__v4df)(__m256d)(W), \
+ (__mmask8)(U)))
+
+#define _mm256_maskz_permute_pd(U, X, C) \
+ ((__m256d) __builtin_ia32_vpermilpd256_mask ((__v4df)(__m256d)(X), (int)(C), \
+ (__v4df)(__m256d)_mm256_setzero_pd(), \
+ (__mmask8)(U)))
+
+#define _mm256_mask_permute_ps(W, U, X, C) \
+ ((__m256) __builtin_ia32_vpermilps256_mask ((__v8sf)(__m256)(X), (int)(C), \
+ (__v8sf)(__m256)(W), (__mmask8)(U)))
+
+#define _mm256_maskz_permute_ps(U, X, C) \
+ ((__m256) __builtin_ia32_vpermilps256_mask ((__v8sf)(__m256)(X), (int)(C), \
+ (__v8sf)(__m256)_mm256_setzero_ps(), \
+ (__mmask8)(U)))
+
+#define _mm_mask_permute_pd(W, U, X, C) \
+ ((__m128d) __builtin_ia32_vpermilpd_mask ((__v2df)(__m128d)(X), (int)(C), \
+ (__v2df)(__m128d)(W), (__mmask8)(U)))
+
+#define _mm_maskz_permute_pd(U, X, C) \
+ ((__m128d) __builtin_ia32_vpermilpd_mask ((__v2df)(__m128d)(X), (int)(C), \
+ (__v2df)(__m128d)_mm_setzero_pd(), \
+ (__mmask8)(U)))
+
+#define _mm_mask_permute_ps(W, U, X, C) \
+ ((__m128) __builtin_ia32_vpermilps_mask ((__v4sf)(__m128)(X), (int)(C), \
+ (__v4sf)(__m128)(W), (__mmask8)(U)))
+
+#define _mm_maskz_permute_ps(U, X, C) \
+ ((__m128) __builtin_ia32_vpermilps_mask ((__v4sf)(__m128)(X), (int)(C), \
+ (__v4sf)(__m128)_mm_setzero_ps(), \
+ (__mmask8)(U)))
+
+#define _mm256_mask_blend_pd(__U, __A, __W) \
+ ((__m256d) __builtin_ia32_blendmpd_256_mask ((__v4df) (__A), \
+ (__v4df) (__W), \
+ (__mmask8) (__U)))
+
+#define _mm256_mask_blend_ps(__U, __A, __W) \
+ ((__m256) __builtin_ia32_blendmps_256_mask ((__v8sf) (__A), \
+ (__v8sf) (__W), \
+ (__mmask8) (__U)))
+
+#define _mm256_mask_blend_epi64(__U, __A, __W) \
+ ((__m256i) __builtin_ia32_blendmq_256_mask ((__v4di) (__A), \
+ (__v4di) (__W), \
+ (__mmask8) (__U)))
+
+#define _mm256_mask_blend_epi32(__U, __A, __W) \
+ ((__m256i) __builtin_ia32_blendmd_256_mask ((__v8si) (__A), \
+ (__v8si) (__W), \
+ (__mmask8) (__U)))
+
+#define _mm_mask_blend_pd(__U, __A, __W) \
+ ((__m128d) __builtin_ia32_blendmpd_128_mask ((__v2df) (__A), \
+ (__v2df) (__W), \
+ (__mmask8) (__U)))
+
+#define _mm_mask_blend_ps(__U, __A, __W) \
+ ((__m128) __builtin_ia32_blendmps_128_mask ((__v4sf) (__A), \
+ (__v4sf) (__W), \
+ (__mmask8) (__U)))
+
+#define _mm_mask_blend_epi64(__U, __A, __W) \
+ ((__m128i) __builtin_ia32_blendmq_128_mask ((__v2di) (__A), \
+ (__v2di) (__W), \
+ (__mmask8) (__U)))
+
+#define _mm_mask_blend_epi32(__U, __A, __W) \
+ ((__m128i) __builtin_ia32_blendmd_128_mask ((__v4si) (__A), \
+ (__v4si) (__W), \
+ (__mmask8) (__U)))
+
+#define _mm256_cmp_epu32_mask(X, Y, P) \
+ ((__mmask8) __builtin_ia32_ucmpd256_mask ((__v8si)(__m256i)(X), \
+ (__v8si)(__m256i)(Y), (int)(P),\
+ (__mmask8)-1))
+
+#define _mm256_cmp_epi64_mask(X, Y, P) \
+ ((__mmask8) __builtin_ia32_cmpq256_mask ((__v4di)(__m256i)(X), \
+ (__v4di)(__m256i)(Y), (int)(P),\
+ (__mmask8)-1))
+
+#define _mm256_cmp_epi32_mask(X, Y, P) \
+ ((__mmask8) __builtin_ia32_cmpd256_mask ((__v8si)(__m256i)(X), \
+ (__v8si)(__m256i)(Y), (int)(P),\
+ (__mmask8)-1))
+
+#define _mm256_cmp_epu64_mask(X, Y, P) \
+ ((__mmask8) __builtin_ia32_ucmpq256_mask ((__v4di)(__m256i)(X), \
+ (__v4di)(__m256i)(Y), (int)(P),\
+ (__mmask8)-1))
+
+#define _mm256_cmp_pd_mask(X, Y, P) \
+ ((__mmask8) __builtin_ia32_cmppd256_mask ((__v4df)(__m256d)(X), \
+ (__v4df)(__m256d)(Y), (int)(P),\
+ (__mmask8)-1))
+
+#define _mm256_cmp_ps_mask(X, Y, P) \
+ ((__mmask8) __builtin_ia32_cmpps256_mask ((__v8sf)(__m256)(X), \
+ (__v8sf)(__m256)(Y), (int)(P),\
+ (__mmask8)-1))
+
+#define _mm256_mask_cmp_epi64_mask(M, X, Y, P) \
+ ((__mmask8) __builtin_ia32_cmpq256_mask ((__v4di)(__m256i)(X), \
+ (__v4di)(__m256i)(Y), (int)(P),\
+ (__mmask8)(M)))
+
+#define _mm256_mask_cmp_epi32_mask(M, X, Y, P) \
+ ((__mmask8) __builtin_ia32_cmpd256_mask ((__v8si)(__m256i)(X), \
+ (__v8si)(__m256i)(Y), (int)(P),\
+ (__mmask8)(M)))
+
+#define _mm256_mask_cmp_epu64_mask(M, X, Y, P) \
+ ((__mmask8) __builtin_ia32_ucmpq256_mask ((__v4di)(__m256i)(X), \
+ (__v4di)(__m256i)(Y), (int)(P),\
+ (__mmask8)(M)))
+
+#define _mm256_mask_cmp_epu32_mask(M, X, Y, P) \
+ ((__mmask8) __builtin_ia32_ucmpd256_mask ((__v8si)(__m256i)(X), \
+ (__v8si)(__m256i)(Y), (int)(P),\
+ (__mmask8)(M)))
+
+#define _mm256_mask_cmp_pd_mask(M, X, Y, P) \
+ ((__mmask8) __builtin_ia32_cmppd256_mask ((__v4df)(__m256d)(X), \
+ (__v4df)(__m256d)(Y), (int)(P),\
+ (__mmask8)(M)))
+
+#define _mm256_mask_cmp_ps_mask(M, X, Y, P) \
+ ((__mmask8) __builtin_ia32_cmpps256_mask ((__v8sf)(__m256)(X), \
+ (__v8sf)(__m256)(Y), (int)(P),\
+ (__mmask8)(M)))
+
+#define _mm_cmp_epi64_mask(X, Y, P) \
+ ((__mmask8) __builtin_ia32_cmpq128_mask ((__v2di)(__m128i)(X), \
+ (__v2di)(__m128i)(Y), (int)(P),\
+ (__mmask8)-1))
+
+#define _mm_cmp_epi32_mask(X, Y, P) \
+ ((__mmask8) __builtin_ia32_cmpd128_mask ((__v4si)(__m128i)(X), \
+ (__v4si)(__m128i)(Y), (int)(P),\
+ (__mmask8)-1))
+
+#define _mm_cmp_epu64_mask(X, Y, P) \
+ ((__mmask8) __builtin_ia32_ucmpq128_mask ((__v2di)(__m128i)(X), \
+ (__v2di)(__m128i)(Y), (int)(P),\
+ (__mmask8)-1))
+
+#define _mm_cmp_epu32_mask(X, Y, P) \
+ ((__mmask8) __builtin_ia32_ucmpd128_mask ((__v4si)(__m128i)(X), \
+ (__v4si)(__m128i)(Y), (int)(P),\
+ (__mmask8)-1))
+
+#define _mm_cmp_pd_mask(X, Y, P) \
+ ((__mmask8) __builtin_ia32_cmppd128_mask ((__v2df)(__m128d)(X), \
+ (__v2df)(__m128d)(Y), (int)(P),\
+ (__mmask8)-1))
+
+#define _mm_cmp_ps_mask(X, Y, P) \
+ ((__mmask8) __builtin_ia32_cmpps128_mask ((__v4sf)(__m128)(X), \
+ (__v4sf)(__m128)(Y), (int)(P),\
+ (__mmask8)-1))
+
+#define _mm_mask_cmp_epi64_mask(M, X, Y, P) \
+ ((__mmask8) __builtin_ia32_cmpq128_mask ((__v2di)(__m128i)(X), \
+ (__v2di)(__m128i)(Y), (int)(P),\
+ (__mmask8)(M)))
+
+#define _mm_mask_cmp_epi32_mask(M, X, Y, P) \
+ ((__mmask8) __builtin_ia32_cmpd128_mask ((__v4si)(__m128i)(X), \
+ (__v4si)(__m128i)(Y), (int)(P),\
+ (__mmask8)(M)))
+
+#define _mm_mask_cmp_epu64_mask(M, X, Y, P) \
+ ((__mmask8) __builtin_ia32_ucmpq128_mask ((__v2di)(__m128i)(X), \
+ (__v2di)(__m128i)(Y), (int)(P),\
+ (__mmask8)(M)))
+
+#define _mm_mask_cmp_epu32_mask(M, X, Y, P) \
+ ((__mmask8) __builtin_ia32_ucmpd128_mask ((__v4si)(__m128i)(X), \
+ (__v4si)(__m128i)(Y), (int)(P),\
+ (__mmask8)(M)))
+
+#define _mm_mask_cmp_pd_mask(M, X, Y, P) \
+ ((__mmask8) __builtin_ia32_cmppd128_mask ((__v2df)(__m128d)(X), \
+ (__v2df)(__m128d)(Y), (int)(P),\
+ (__mmask8)(M)))
+
+#define _mm_mask_cmp_ps_mask(M, X, Y, P) \
+ ((__mmask8) __builtin_ia32_cmpps128_mask ((__v4sf)(__m128)(X), \
+ (__v4sf)(__m128)(Y), (int)(P),\
+ (__mmask8)(M)))
+
+#endif
+
+#define _mm256_mask_ceil_ps(A, B, C) _mm256_mask_roundscale_ps((A), (B), (C), _MM_FROUND_CEIL)
+#define _mm256_mask_floor_ps(A, B, C) _mm256_mask_roundscale_ps((A), (B), (C), _MM_FROUND_FLOOR)
+#define _mm256_mask_ceil_pd(A, B, C) _mm256_mask_roundscale_pd((A), (B), (C), _MM_FROUND_CEIL)
+#define _mm256_mask_floor_pd(A, B, C) _mm256_mask_roundscale_pd((A), (B), (C), _MM_FROUND_FLOOR)
+#define _mm256_maskz_ceil_ps(A, B) _mm256_maskz_roundscale_ps((A), (B), _MM_FROUND_CEIL)
+#define _mm256_maskz_floor_ps(A, B) _mm256_maskz_roundscale_ps((A), (B), _MM_FROUND_FLOOR)
+#define _mm256_maskz_ceil_pd(A, B) _mm256_maskz_roundscale_pd((A), (B), _MM_FROUND_CEIL)
+#define _mm256_maskz_floor_pd(A, B) _mm256_maskz_roundscale_pd((A), (B), _MM_FROUND_FLOOR)
+#define _mm_mask_ceil_ps(A, B, C) _mm_mask_roundscale_ps((A), (B), (C), _MM_FROUND_CEIL)
+#define _mm_mask_floor_ps(A, B, C) _mm_mask_roundscale_ps((A), (B), (C), _MM_FROUND_FLOOR)
+#define _mm_mask_ceil_pd(A, B, C) _mm_mask_roundscale_pd((A), (B), (C), _MM_FROUND_CEIL)
+#define _mm_mask_floor_pd(A, B, C) _mm_mask_roundscale_pd((A), (B), (C), _MM_FROUND_FLOOR)
+#define _mm_maskz_ceil_ps(A, B) _mm_maskz_roundscale_ps((A), (B), _MM_FROUND_CEIL)
+#define _mm_maskz_floor_ps(A, B) _mm_maskz_roundscale_ps((A), (B), _MM_FROUND_FLOOR)
+#define _mm_maskz_ceil_pd(A, B) _mm_maskz_roundscale_pd((A), (B), _MM_FROUND_CEIL)
+#define _mm_maskz_floor_pd(A, B) _mm_maskz_roundscale_pd((A), (B), _MM_FROUND_FLOOR)
+#define _mm256_permutexvar_ps(A, B) _mm256_permutevar8x32_ps((B), (A))
+
+#ifdef __DISABLE_AVX512VL__
+#undef __DISABLE_AVX512VL__
+#pragma GCC pop_options
+#endif /* __DISABLE_AVX512VL__ */
+
+#endif /* _AVX512VLINTRIN_H_INCLUDED */
diff --git a/gcc/config/i386/cpuid.h b/gcc/config/i386/cpuid.h
index 7ac22a1683d..a191bd4fd8d 100644
--- a/gcc/config/i386/cpuid.h
+++ b/gcc/config/i386/cpuid.h
@@ -73,6 +73,7 @@
#define bit_BMI2 (1 << 8)
#define bit_RTM (1 << 11)
#define bit_AVX512F (1 << 16)
+#define bit_AVX512DQ (1 << 17)
#define bit_RDSEED (1 << 18)
#define bit_ADX (1 << 19)
#define bit_CLFLUSHOPT (1 << 23)
@@ -80,6 +81,8 @@
#define bit_AVX512ER (1 << 27)
#define bit_AVX512CD (1 << 28)
#define bit_SHA (1 << 29)
+#define bit_AVX512BW (1 << 30)
+#define bit_AVX512VL (1 << 31)
/* %ecx */
#define bit_PREFETCHWT1 (1 << 0)
diff --git a/gcc/config/i386/driver-i386.c b/gcc/config/i386/driver-i386.c
index 4cd0b3d2e4b..4935dc63bf0 100644
--- a/gcc/config/i386/driver-i386.c
+++ b/gcc/config/i386/driver-i386.c
@@ -411,6 +411,7 @@ const char *host_detect_local_cpu (int argc, const char **argv)
unsigned int has_avx512er = 0, has_avx512pf = 0, has_avx512cd = 0;
unsigned int has_avx512f = 0, has_sha = 0, has_prefetchwt1 = 0;
unsigned int has_clflushopt = 0, has_xsavec = 0, has_xsaves = 0;
+ unsigned int has_avx512dq = 0, has_avx512bw = 0, has_avx512vl = 0;
bool arch;
@@ -488,6 +489,9 @@ const char *host_detect_local_cpu (int argc, const char **argv)
has_avx512cd = ebx & bit_AVX512CD;
has_sha = ebx & bit_SHA;
has_clflushopt = ebx & bit_CLFLUSHOPT;
+ has_avx512dq = ebx & bit_AVX512DQ;
+ has_avx512bw = ebx & bit_AVX512BW;
+ has_avx512vl = ebx & bit_AVX512VL;
has_prefetchwt1 = ecx & bit_PREFETCHWT1;
}
@@ -900,6 +904,9 @@ const char *host_detect_local_cpu (int argc, const char **argv)
const char *clflushopt = has_clflushopt ? " -mclflushopt" : " -mno-clflushopt";
const char *xsavec = has_xsavec ? " -mxsavec" : " -mno-xsavec";
const char *xsaves = has_xsaves ? " -mxsaves" : " -mno-xsaves";
+ const char *avx512dq = has_avx512dq ? " -mavx512dq" : " -mno-avx512dq";
+ const char *avx512bw = has_avx512bw ? " -mavx512bw" : " -mno-avx512bw";
+ const char *avx512vl = has_avx512vl ? " -mavx512vl" : " -mno-avx512vl";
options = concat (options, mmx, mmx3dnow, sse, sse2, sse3, ssse3,
sse4a, cx16, sahf, movbe, aes, sha, pclmul,
@@ -908,7 +915,8 @@ const char *host_detect_local_cpu (int argc, const char **argv)
hle, rdrnd, f16c, fsgsbase, rdseed, prfchw, adx,
fxsr, xsave, xsaveopt, avx512f, avx512er,
avx512cd, avx512pf, prefetchwt1, clflushopt,
- xsavec, xsaves, NULL);
+ xsavec, xsaves, avx512dq, avx512bw, avx512vl,
+ NULL);
}
done:
diff --git a/gcc/config/i386/i386-builtin-types.def b/gcc/config/i386/i386-builtin-types.def
index 35c0035a0e6..08a6c90941c 100644
--- a/gcc/config/i386/i386-builtin-types.def
+++ b/gcc/config/i386/i386-builtin-types.def
@@ -56,6 +56,7 @@ DEF_PRIMITIVE_TYPE (UHI, unsigned_intHI_type_node)
DEF_PRIMITIVE_TYPE (USI, unsigned_intSI_type_node)
DEF_PRIMITIVE_TYPE (UDI, long_long_unsigned_type_node)
# ??? Some of the types below should use the mode types above.
+DEF_PRIMITIVE_TYPE (SHORT, short_integer_type_node)
DEF_PRIMITIVE_TYPE (USHORT, short_unsigned_type_node)
DEF_PRIMITIVE_TYPE (INT, integer_type_node)
DEF_PRIMITIVE_TYPE (UINT, unsigned_type_node)
@@ -107,7 +108,14 @@ DEF_VECTOR_TYPE (V16SF, FLOAT)
DEF_VECTOR_TYPE (V8DF, DOUBLE)
DEF_VECTOR_TYPE (V8DI, DI)
DEF_VECTOR_TYPE (V16SI, SI)
+DEF_VECTOR_TYPE (V32HI, HI)
DEF_VECTOR_TYPE (V64QI, QI)
+DEF_VECTOR_TYPE (V12QI, QI)
+DEF_VECTOR_TYPE (V14QI, QI)
+DEF_VECTOR_TYPE (V32SI, SI)
+DEF_VECTOR_TYPE (V8UDI, UDI, V8DI)
+DEF_VECTOR_TYPE (V16USI, USI, V16SI)
+DEF_VECTOR_TYPE (V32UHI, UHI, V32HI)
DEF_POINTER_TYPE (PCCHAR, CHAR, CONST)
DEF_POINTER_TYPE (PCDOUBLE, DOUBLE, CONST)
@@ -119,6 +127,7 @@ DEF_POINTER_TYPE (PCVOID, VOID, CONST)
DEF_POINTER_TYPE (PVOID, VOID)
DEF_POINTER_TYPE (PDOUBLE, DOUBLE)
DEF_POINTER_TYPE (PFLOAT, FLOAT)
+DEF_POINTER_TYPE (PSHORT, SHORT)
DEF_POINTER_TYPE (PUSHORT, USHORT)
DEF_POINTER_TYPE (PINT, INT)
DEF_POINTER_TYPE (PLONGLONG, LONGLONG)
@@ -142,6 +151,9 @@ DEF_POINTER_TYPE (PV16QI, V16QI)
DEF_POINTER_TYPE (PV16HI, V16HI)
DEF_POINTER_TYPE (PV16SI, V16SI)
DEF_POINTER_TYPE (PV16SF, V16SF)
+DEF_POINTER_TYPE (PV32QI, V32QI)
+DEF_POINTER_TYPE (PV32HI, V32HI)
+DEF_POINTER_TYPE (PV64QI, V64QI)
DEF_POINTER_TYPE (PCV2SI, V2SI, CONST)
DEF_POINTER_TYPE (PCV2DF, V2DF, CONST)
@@ -155,9 +167,15 @@ DEF_POINTER_TYPE (PCV16SF, V16SF, CONST)
DEF_POINTER_TYPE (PCV2DI, V2DI, CONST)
DEF_POINTER_TYPE (PCV4SI, V4SI, CONST)
DEF_POINTER_TYPE (PCV4DI, V4DI, CONST)
+DEF_POINTER_TYPE (PCV8HI, V8HI, CONST)
DEF_POINTER_TYPE (PCV8SI, V8SI, CONST)
DEF_POINTER_TYPE (PCV8DI, V8DI, CONST)
+DEF_POINTER_TYPE (PCV16QI, V16QI, CONST)
+DEF_POINTER_TYPE (PCV16HI, V16HI, CONST)
DEF_POINTER_TYPE (PCV16SI, V16SI, CONST)
+DEF_POINTER_TYPE (PCV32QI, V32QI, CONST)
+DEF_POINTER_TYPE (PCV32HI, V32HI, CONST)
+DEF_POINTER_TYPE (PCV64QI, V64QI, CONST)
DEF_FUNCTION_TYPE (FLOAT128)
DEF_FUNCTION_TYPE (UINT64)
@@ -217,12 +235,15 @@ DEF_FUNCTION_TYPE (V8DF, V8DF)
DEF_FUNCTION_TYPE (V4HI, V4HI)
DEF_FUNCTION_TYPE (V4SF, PCFLOAT)
DEF_FUNCTION_TYPE (V4SF, V2DF)
+DEF_FUNCTION_TYPE (V4SF, V2DF, V4SF, QI)
DEF_FUNCTION_TYPE (V4SF, V4DF)
+DEF_FUNCTION_TYPE (V4SF, V4DF, V4SF, QI)
DEF_FUNCTION_TYPE (V4SF, V4SF)
DEF_FUNCTION_TYPE (V4SF, PCV4SF)
DEF_FUNCTION_TYPE (V4SF, V4SI)
DEF_FUNCTION_TYPE (V4SF, V8SF)
DEF_FUNCTION_TYPE (V4SF, V8HI)
+DEF_FUNCTION_TYPE (V4SF, V8HI, V4SF, QI)
DEF_FUNCTION_TYPE (V4SI, V16QI)
DEF_FUNCTION_TYPE (V4SI, V2DF)
DEF_FUNCTION_TYPE (V4SI, V4DF)
@@ -241,6 +262,7 @@ DEF_FUNCTION_TYPE (V8SF, V4SF)
DEF_FUNCTION_TYPE (V8SF, V8SF)
DEF_FUNCTION_TYPE (V8SF, V8SI)
DEF_FUNCTION_TYPE (V8SF, V8HI)
+DEF_FUNCTION_TYPE (V8SF, V8HI, V8SF, QI)
DEF_FUNCTION_TYPE (V16SF, V16SF)
DEF_FUNCTION_TYPE (V8SI, V8DI)
DEF_FUNCTION_TYPE (V8SI, V4SI)
@@ -251,6 +273,9 @@ DEF_FUNCTION_TYPE (V32QI, V32QI)
DEF_FUNCTION_TYPE (V32QI, V16QI)
DEF_FUNCTION_TYPE (V16HI, V16SI)
DEF_FUNCTION_TYPE (V16HI, V16HI)
+DEF_FUNCTION_TYPE (V16SF, V16HI)
+DEF_FUNCTION_TYPE (V16SF, V16HI, V16SF, HI)
+DEF_FUNCTION_TYPE (V16SF, V16SI)
DEF_FUNCTION_TYPE (V16HI, V8HI)
DEF_FUNCTION_TYPE (V8SI, V8SI)
DEF_FUNCTION_TYPE (VOID, PCVOID)
@@ -266,13 +291,16 @@ DEF_FUNCTION_TYPE (V4DI, V16QI)
DEF_FUNCTION_TYPE (V8SI, V8HI)
DEF_FUNCTION_TYPE (V4DI, V8HI)
DEF_FUNCTION_TYPE (V4DI, V4SI)
+DEF_FUNCTION_TYPE (V4DI, V4DI)
DEF_FUNCTION_TYPE (V4DI, PV4DI)
DEF_FUNCTION_TYPE (V4DI, V2DI)
+DEF_FUNCTION_TYPE (V16SI, V16SF)
DEF_FUNCTION_TYPE (V16SF, FLOAT)
DEF_FUNCTION_TYPE (V16SI, INT)
DEF_FUNCTION_TYPE (V8DF, DOUBLE)
DEF_FUNCTION_TYPE (V8DI, INT64)
DEF_FUNCTION_TYPE (V16SF, V4SF)
+DEF_FUNCTION_TYPE (V16SI, V4SI)
DEF_FUNCTION_TYPE (V8DF, V4DF)
DEF_FUNCTION_TYPE (V8DI, V4DI)
DEF_FUNCTION_TYPE (V16QI, V8DI)
@@ -286,10 +314,38 @@ DEF_FUNCTION_TYPE (V8DI, V8DI)
DEF_FUNCTION_TYPE (V8DI, V8DI, V8DI, QI)
DEF_FUNCTION_TYPE (V16SI, PV4SI)
DEF_FUNCTION_TYPE (V16SF, PV4SF)
+DEF_FUNCTION_TYPE (V8DI, PV2DI)
+DEF_FUNCTION_TYPE (V8DF, PV2DF)
+DEF_FUNCTION_TYPE (V4DI, PV2DI)
+DEF_FUNCTION_TYPE (V4DF, PV2DF)
+DEF_FUNCTION_TYPE (V16SI, PV2SI)
+DEF_FUNCTION_TYPE (V16SF, PV2SF)
DEF_FUNCTION_TYPE (V8DI, PV4DI)
DEF_FUNCTION_TYPE (V8DF, PV4DF)
+DEF_FUNCTION_TYPE (V8SF, FLOAT)
+DEF_FUNCTION_TYPE (V4SF, FLOAT)
+DEF_FUNCTION_TYPE (V4DF, DOUBLE)
+DEF_FUNCTION_TYPE (V8SF, PV4SF)
+DEF_FUNCTION_TYPE (V8SI, PV4SI)
+DEF_FUNCTION_TYPE (V4SI, PV2SI)
+DEF_FUNCTION_TYPE (V8SF, PV2SF)
+DEF_FUNCTION_TYPE (V8SI, PV2SI)
+DEF_FUNCTION_TYPE (V16SF, PV8SF)
+DEF_FUNCTION_TYPE (V16SI, PV8SI)
+DEF_FUNCTION_TYPE (V8DI, V8SF)
+DEF_FUNCTION_TYPE (V4DI, V4SF)
+DEF_FUNCTION_TYPE (V2DI, V4SF)
+DEF_FUNCTION_TYPE (V64QI, QI)
+DEF_FUNCTION_TYPE (V32HI, HI)
DEF_FUNCTION_TYPE (V8UHI, V8UHI)
+DEF_FUNCTION_TYPE (V16UHI, V16UHI)
+DEF_FUNCTION_TYPE (V32UHI, V32UHI)
+DEF_FUNCTION_TYPE (V2UDI, V2UDI)
+DEF_FUNCTION_TYPE (V4UDI, V4UDI)
+DEF_FUNCTION_TYPE (V8UDI, V8UDI)
+DEF_FUNCTION_TYPE (V4USI, V4USI)
DEF_FUNCTION_TYPE (V8USI, V8USI)
+DEF_FUNCTION_TYPE (V16USI, V16USI)
DEF_FUNCTION_TYPE (V8DI, PV8DI)
DEF_FUNCTION_TYPE (DI, V2DI, INT)
@@ -332,9 +388,13 @@ DEF_FUNCTION_TYPE (V2DF, V2DF, V2DF)
DEF_FUNCTION_TYPE (V2DF, V2DF, V2DI)
DEF_FUNCTION_TYPE (V2DF, V2DF, V4SF)
DEF_FUNCTION_TYPE (V2DF, V4DF, INT)
+DEF_FUNCTION_TYPE (V2DF, V4DF, INT, V2DF, QI)
+DEF_FUNCTION_TYPE (V2DF, V8DF, INT)
+DEF_FUNCTION_TYPE (V2DF, V8DF, INT, V2DF, QI)
DEF_FUNCTION_TYPE (V2DI, V16QI, V16QI)
DEF_FUNCTION_TYPE (V2DI, V2DF, V2DF)
DEF_FUNCTION_TYPE (V2DI, V2DI, INT)
+DEF_FUNCTION_TYPE (V2DI, V2DI, INT, V2DI, QI)
DEF_FUNCTION_TYPE (V2DI, V2DI, SI)
DEF_FUNCTION_TYPE (V2DI, V2DI, V16QI)
DEF_FUNCTION_TYPE (V2DI, V2DI, V2DI)
@@ -349,6 +409,7 @@ DEF_FUNCTION_TYPE (V2SI, V2SI, V2SI)
DEF_FUNCTION_TYPE (V2SI, V4HI, V4HI)
DEF_FUNCTION_TYPE (V4DF, PCV4DF, V4DI)
DEF_FUNCTION_TYPE (V4DF, V4DF, INT)
+DEF_FUNCTION_TYPE (V8DF, V8DF, INT)
DEF_FUNCTION_TYPE (V4DF, V8DF, INT)
DEF_FUNCTION_TYPE (V4DF, V8DF, INT, V4DF, QI)
DEF_FUNCTION_TYPE (V4DF, V4DF, V4DF)
@@ -373,25 +434,36 @@ DEF_FUNCTION_TYPE (V4SF, V4SF, V2SI)
DEF_FUNCTION_TYPE (V4SF, V4SF, V4SF)
DEF_FUNCTION_TYPE (V4SF, V4SF, V4SI)
DEF_FUNCTION_TYPE (V4SF, V8SF, INT)
+DEF_FUNCTION_TYPE (V4SF, V8SF, INT, V4SF, QI)
DEF_FUNCTION_TYPE (V4SI, V2DF, V2DF)
+DEF_FUNCTION_TYPE (V4SI, V2DF, V4SI, QI)
DEF_FUNCTION_TYPE (V4SI, V4SF, V4SF)
DEF_FUNCTION_TYPE (V4SI, V4SI, INT)
+DEF_FUNCTION_TYPE (V4SI, V4SI, INT, V4SI, QI)
DEF_FUNCTION_TYPE (V4SI, V4SI, SI)
DEF_FUNCTION_TYPE (V4SI, V4SI, V4SI)
DEF_FUNCTION_TYPE (V4SI, V8HI, V8HI)
+DEF_FUNCTION_TYPE (V4SI, V8HI, V8HI, V4SI, QI)
DEF_FUNCTION_TYPE (V4SI, V8SI, INT)
+DEF_FUNCTION_TYPE (V4SI, V8SI, INT, V4SI, QI)
DEF_FUNCTION_TYPE (V4SI, PCV4SI, V4SI)
DEF_FUNCTION_TYPE (V8HI, V16QI, V16QI)
+DEF_FUNCTION_TYPE (V8HI, V16QI, V16QI, V8HI, QI)
DEF_FUNCTION_TYPE (V8HI, V4SI, V4SI)
DEF_FUNCTION_TYPE (V8HI, V8HI, INT)
+DEF_FUNCTION_TYPE (V8DI, V8DI, INT)
DEF_FUNCTION_TYPE (V8HI, V8HI, SI)
DEF_FUNCTION_TYPE (V8HI, V8HI, V8HI)
DEF_FUNCTION_TYPE (V8HI, V8SF, INT)
+DEF_FUNCTION_TYPE (V8HI, V8SF, INT, V8HI, QI)
DEF_FUNCTION_TYPE (V8HI, V4SF, INT)
+DEF_FUNCTION_TYPE (V8HI, V4SF, INT, V8HI, QI)
DEF_FUNCTION_TYPE (V8QI, V4HI, V4HI)
DEF_FUNCTION_TYPE (V8QI, V8QI, V8QI)
DEF_FUNCTION_TYPE (V8SF, PCV8SF, V8SI)
DEF_FUNCTION_TYPE (V8SF, V8SF, INT)
+DEF_FUNCTION_TYPE (V8SF, V16SF, INT)
+DEF_FUNCTION_TYPE (V8SF, V16SF, INT, V8SF, QI)
DEF_FUNCTION_TYPE (V16SF, V16SF, INT)
DEF_FUNCTION_TYPE (V4SF, V16SF, INT)
DEF_FUNCTION_TYPE (V4SF, V16SF, INT, V4SF, QI)
@@ -400,29 +472,46 @@ DEF_FUNCTION_TYPE (V16SF, V16SF, V16SF)
DEF_FUNCTION_TYPE (V8SF, V8SF, V8SI)
DEF_FUNCTION_TYPE (V16SF, V16SF, V16SI)
DEF_FUNCTION_TYPE (V32QI, V16HI, V16HI)
+DEF_FUNCTION_TYPE (V64QI, V32HI, V32HI)
DEF_FUNCTION_TYPE (V16HI, V8SI, V8SI)
+DEF_FUNCTION_TYPE (V32HI, V16SI, V16SI)
+DEF_FUNCTION_TYPE (V8DF, V8DF, V2DF, INT)
+DEF_FUNCTION_TYPE (V8DF, V8DF, V2DF, INT, V8DF, QI)
DEF_FUNCTION_TYPE (V8DF, V8DF, V4DF, INT, V8DF, QI)
+DEF_FUNCTION_TYPE (V8DF, V8DF, V8DF, INT)
DEF_FUNCTION_TYPE (V8DF, V8DF, V8DF, INT, V8DF, QI)
+DEF_FUNCTION_TYPE (V8DF, V8DF, V8DF, INT, V8DF, QI, INT)
DEF_FUNCTION_TYPE (V8DF, V8DF, INT, V8DF, QI)
+DEF_FUNCTION_TYPE (V8DF, V8DF, V8DF, V8DI, INT)
+DEF_FUNCTION_TYPE (V8DF, V8DF, V8DF, V8DI, INT, QI)
+DEF_FUNCTION_TYPE (V4DF, V4DF, V4DF, V4DI, INT, QI)
+DEF_FUNCTION_TYPE (V2DF, V2DF, V2DF, V2DI, INT, QI)
DEF_FUNCTION_TYPE (V8DF, V8DF, V8DF, V8DI, INT, QI, INT)
DEF_FUNCTION_TYPE (V8DF, V8DF, V8DF)
DEF_FUNCTION_TYPE (V16SF, V16SF, V16SF, INT)
DEF_FUNCTION_TYPE (V16SF, V16SF, V16SF, INT, V16SF, HI)
+DEF_FUNCTION_TYPE (V16SF, V16SF, V16SF, INT, V16SF, HI, INT)
DEF_FUNCTION_TYPE (V16SF, V16SF, INT, V16SF, HI)
DEF_FUNCTION_TYPE (V16SI, V16SI, V4SI, INT, V16SI, HI)
DEF_FUNCTION_TYPE (V16SF, V16SF, V16SF, V16SI, INT)
DEF_FUNCTION_TYPE (V16SF, V16SF, V16SF, V16SI, INT, HI)
DEF_FUNCTION_TYPE (V16SF, V16SF, V16SF, V16SI, INT, HI, INT)
+DEF_FUNCTION_TYPE (V8SF, V8SF, V8SF, V8SI, INT, QI)
DEF_FUNCTION_TYPE (V4SF, V4SF, V4SF, V4SI, INT, QI)
DEF_FUNCTION_TYPE (V4SF, V4SF, V4SF, V4SI, INT, QI, INT)
-DEF_FUNCTION_TYPE (V2DF, V2DF, V2DF, V2DI, INT, QI)
DEF_FUNCTION_TYPE (V2DF, V2DF, V2DF, V2DI, INT, QI, INT)
DEF_FUNCTION_TYPE (V16SF, V16SF, V4SF, INT)
DEF_FUNCTION_TYPE (V16SF, V16SF, V4SF, INT, V16SF, HI)
+DEF_FUNCTION_TYPE (V16SF, V16SF, V8SF, INT, V16SF, HI)
DEF_FUNCTION_TYPE (V32QI, V32QI, V32QI)
DEF_FUNCTION_TYPE (V16HI, V32QI, V32QI)
+DEF_FUNCTION_TYPE (V32HI, V64QI, V64QI)
+DEF_FUNCTION_TYPE (V16HI, V32QI, V32QI, V16HI, HI)
+DEF_FUNCTION_TYPE (V32HI, V64QI, V64QI, V32HI, SI)
DEF_FUNCTION_TYPE (V16HI, V16HI, V8HI)
DEF_FUNCTION_TYPE (V16HI, V16HI, V16HI)
+DEF_FUNCTION_TYPE (V32HI, V32HI, V32HI)
+DEF_FUNCTION_TYPE (V32HI, V32HI, INT)
DEF_FUNCTION_TYPE (V16HI, V16HI, INT)
DEF_FUNCTION_TYPE (V16HI, V16SF, INT)
DEF_FUNCTION_TYPE (V16HI, V16SF, INT, V16HI, HI)
@@ -434,13 +523,20 @@ DEF_FUNCTION_TYPE (V8SI, V4DF, V4DF)
DEF_FUNCTION_TYPE (V8SI, V8SI, V4SI)
DEF_FUNCTION_TYPE (V16SI, V16SI, V4SI)
DEF_FUNCTION_TYPE (V16SI, V16SI, V4SI, INT)
+DEF_FUNCTION_TYPE (V16SI, V16SI, V8SI, INT, V16SI, HI)
DEF_FUNCTION_TYPE (V4SI, V16SI, INT)
DEF_FUNCTION_TYPE (V4SI, V16SI, INT, V4SI, QI)
DEF_FUNCTION_TYPE (V8SI, V8SI, V8SI)
DEF_FUNCTION_TYPE (V16SI, V16SI, V16SI)
DEF_FUNCTION_TYPE (V16SI, V16SI, V16SI, INT, V16SI, HI)
DEF_FUNCTION_TYPE (V8SI, V16HI, V16HI)
+DEF_FUNCTION_TYPE (V16SI, V32HI, V32HI)
+DEF_FUNCTION_TYPE (V8SI, V16HI, V16HI, V8SI, QI)
+DEF_FUNCTION_TYPE (V16SI, V32HI, V32HI, V16SI, HI)
DEF_FUNCTION_TYPE (V8SI, V8SI, INT)
+DEF_FUNCTION_TYPE (V8SI, V8SI, INT, V8SI, QI)
+DEF_FUNCTION_TYPE (V8SI, V16SI, INT)
+DEF_FUNCTION_TYPE (V8SI, V16SI, INT, V8SI, QI)
DEF_FUNCTION_TYPE (V8SI, V8SI, SI)
DEF_FUNCTION_TYPE (V16SI, V16SI, SI)
DEF_FUNCTION_TYPE (V16SI, V16SI, INT)
@@ -451,9 +547,17 @@ DEF_FUNCTION_TYPE (V4DI, V4DI, V4DI)
DEF_FUNCTION_TYPE (V8DI, V8DI, V8DI)
DEF_FUNCTION_TYPE (V16SI, V8DF, V8DF)
DEF_FUNCTION_TYPE (V8DI, V8DI, V8DI, INT, V8DI, QI)
+DEF_FUNCTION_TYPE (V8DI, V8DI, V8DI, INT, V8DI, DI)
+DEF_FUNCTION_TYPE (V4DI, V4DI, V4DI, INT, V4DI, SI)
+DEF_FUNCTION_TYPE (V2DI, V2DI, V2DI, INT, V2DI, HI)
+DEF_FUNCTION_TYPE (V8DI, V8DI, V4DI, INT)
+DEF_FUNCTION_TYPE (V8DI, V8DI, V2DI, INT)
+DEF_FUNCTION_TYPE (V8DI, V8DI, V2DI, INT, V8DI, QI)
DEF_FUNCTION_TYPE (V8DI, V8DI, V4DI, INT, V8DI, QI)
DEF_FUNCTION_TYPE (V4DI, V8SI, V8SI)
DEF_FUNCTION_TYPE (V4UDI, V8USI, V8USI)
+DEF_FUNCTION_TYPE (V8DI, V16SI, V16SI)
+DEF_FUNCTION_TYPE (V8DI, V64QI, V64QI)
DEF_FUNCTION_TYPE (V4DI, V4DI, V2DI)
DEF_FUNCTION_TYPE (V8DI, V8DI, V2DI)
DEF_FUNCTION_TYPE (V4DI, PCV4DI, V4DI)
@@ -462,8 +566,12 @@ DEF_FUNCTION_TYPE (V4DI, V8DI, INT, V4DI, QI)
DEF_FUNCTION_TYPE (V8DI, V8DI, V2DI, V8DI, QI)
DEF_FUNCTION_TYPE (V8DI, V8DI, INT, V8DI, QI)
DEF_FUNCTION_TYPE (V4DI, V4DI, INT)
+DEF_FUNCTION_TYPE (V4DI, V4DI, INT, V4DI, QI)
DEF_FUNCTION_TYPE (V2DI, V4DI, INT)
DEF_FUNCTION_TYPE (VOID, PVOID, INT64)
+DEF_FUNCTION_TYPE (V2DI, V4DI, INT, V2DI, QI)
+DEF_FUNCTION_TYPE (V2DI, V8DI, INT)
+DEF_FUNCTION_TYPE (V2DI, V8DI, INT, V2DI, QI)
DEF_FUNCTION_TYPE (VOID, PCHAR, V16QI)
DEF_FUNCTION_TYPE (VOID, PCHAR, V32QI)
DEF_FUNCTION_TYPE (VOID, PDOUBLE, V2DF)
@@ -485,9 +593,72 @@ DEF_FUNCTION_TYPE (VOID, UNSIGNED, UNSIGNED)
DEF_FUNCTION_TYPE (VOID, PV8DI, V8DI)
# Instructions returning mask
+DEF_FUNCTION_TYPE (QI, QI)
DEF_FUNCTION_TYPE (HI, HI)
+DEF_FUNCTION_TYPE (SI, SI)
+DEF_FUNCTION_TYPE (DI, DI)
+DEF_FUNCTION_TYPE (HI, V16QI)
+DEF_FUNCTION_TYPE (SI, V32QI)
+DEF_FUNCTION_TYPE (DI, V64QI)
+DEF_FUNCTION_TYPE (QI, V8HI)
+DEF_FUNCTION_TYPE (HI, V16HI)
+DEF_FUNCTION_TYPE (SI, V32HI)
+DEF_FUNCTION_TYPE (QI, V4SI)
+DEF_FUNCTION_TYPE (QI, V8SI)
+DEF_FUNCTION_TYPE (HI, V16SI)
+DEF_FUNCTION_TYPE (QI, V2DI)
+DEF_FUNCTION_TYPE (QI, V4DI)
+DEF_FUNCTION_TYPE (QI, V8DI)
+DEF_FUNCTION_TYPE (V16QI, HI)
+DEF_FUNCTION_TYPE (V32QI, SI)
+DEF_FUNCTION_TYPE (V64QI, DI)
+DEF_FUNCTION_TYPE (V8HI, QI)
+DEF_FUNCTION_TYPE (V16HI, HI)
+DEF_FUNCTION_TYPE (V32HI, SI)
+DEF_FUNCTION_TYPE (V4SI, QI)
+DEF_FUNCTION_TYPE (V4SI, HI)
+DEF_FUNCTION_TYPE (V8SI, QI)
+DEF_FUNCTION_TYPE (V8SI, HI)
+DEF_FUNCTION_TYPE (V2DI, QI)
+DEF_FUNCTION_TYPE (V4DI, QI)
+DEF_FUNCTION_TYPE (QI, QI, QI)
DEF_FUNCTION_TYPE (HI, HI, HI)
+DEF_FUNCTION_TYPE (SI, SI, SI)
+DEF_FUNCTION_TYPE (DI, DI, DI)
+DEF_FUNCTION_TYPE (QI, QI, INT)
DEF_FUNCTION_TYPE (HI, HI, INT)
+DEF_FUNCTION_TYPE (SI, SI, INT)
+DEF_FUNCTION_TYPE (DI, DI, INT)
+DEF_FUNCTION_TYPE (HI, V16QI, V16QI)
+DEF_FUNCTION_TYPE (HI, V16QI, V16QI, HI)
+DEF_FUNCTION_TYPE (HI, V16QI, V16QI, INT, HI)
+DEF_FUNCTION_TYPE (SI, V32QI, V32QI)
+DEF_FUNCTION_TYPE (SI, V32QI, V32QI, SI)
+DEF_FUNCTION_TYPE (SI, V32QI, V32QI, INT, SI)
+DEF_FUNCTION_TYPE (DI, V64QI, V64QI)
+DEF_FUNCTION_TYPE (DI, V64QI, V64QI, DI)
+DEF_FUNCTION_TYPE (DI, V64QI, V64QI, INT, DI)
+DEF_FUNCTION_TYPE (QI, V8HI, V8HI)
+DEF_FUNCTION_TYPE (QI, V8HI, V8HI, QI)
+DEF_FUNCTION_TYPE (QI, V8HI, V8HI, INT, QI)
+DEF_FUNCTION_TYPE (HI, V16HI, V16HI)
+DEF_FUNCTION_TYPE (HI, V16HI, V16HI, HI)
+DEF_FUNCTION_TYPE (HI, V16HI, V16HI, INT, HI)
+DEF_FUNCTION_TYPE (SI, V32HI, V32HI)
+DEF_FUNCTION_TYPE (SI, V32HI, V32HI, SI)
+DEF_FUNCTION_TYPE (SI, V32HI, V32HI, INT, SI)
+DEF_FUNCTION_TYPE (QI, V4SI, V4SI)
+DEF_FUNCTION_TYPE (QI, V4SI, V4SI, QI)
+DEF_FUNCTION_TYPE (QI, V4SI, V4SI, INT, QI)
+DEF_FUNCTION_TYPE (QI, V8SI, V8SI)
+DEF_FUNCTION_TYPE (QI, V8SI, V8SI, QI)
+DEF_FUNCTION_TYPE (QI, V8SI, V8SI, INT, QI)
+DEF_FUNCTION_TYPE (QI, V2DI, V2DI)
+DEF_FUNCTION_TYPE (QI, V2DI, V2DI, QI)
+DEF_FUNCTION_TYPE (QI, V2DI, V2DI, INT, QI)
+DEF_FUNCTION_TYPE (QI, V4DI, V4DI)
+DEF_FUNCTION_TYPE (QI, V4DI, V4DI, QI)
+DEF_FUNCTION_TYPE (QI, V4DI, V4DI, INT, QI)
DEF_FUNCTION_TYPE (QI, V8DI, V8DI)
DEF_FUNCTION_TYPE (QI, V8DI, V8DI, QI)
DEF_FUNCTION_TYPE (HI, V16SI, V16SI)
@@ -514,6 +685,7 @@ DEF_FUNCTION_TYPE (V8DI, QI)
DEF_FUNCTION_TYPE (INT, V16QI, V16QI, INT)
DEF_FUNCTION_TYPE (UCHAR, UINT, UINT, UINT)
DEF_FUNCTION_TYPE (UCHAR, UINT64, UINT, UINT)
+DEF_FUNCTION_TYPE (V32HI, V32HI, V32HI, V32HI)
DEF_FUNCTION_TYPE (V16HI, V16HI, V16HI, V16HI)
DEF_FUNCTION_TYPE (V16QI, V16QI, QI, INT)
DEF_FUNCTION_TYPE (V16QI, V16QI, V16QI, INT)
@@ -521,7 +693,10 @@ DEF_FUNCTION_TYPE (V16QI, V16QI, V16QI, V16QI)
DEF_FUNCTION_TYPE (V1DI, V1DI, V1DI, INT)
DEF_FUNCTION_TYPE (V2DF, V2DF, V2DF, INT)
DEF_FUNCTION_TYPE (V2DF, V2DF, V2DF, INT, INT)
+DEF_FUNCTION_TYPE (V4DF, V4DF, V4DI, INT)
DEF_FUNCTION_TYPE (V2DF, V2DF, V2DF, V2DF)
+DEF_FUNCTION_TYPE (V2DF, V2DI, V2DF, V2DF, QI)
+DEF_FUNCTION_TYPE (V2DF, V2DF, V2DI, V2DF, QI)
DEF_FUNCTION_TYPE (V2DF, V2DF, V2DF, V2DI, INT)
DEF_FUNCTION_TYPE (V2DI, V2DI, DI, INT)
DEF_FUNCTION_TYPE (V2DI, V2DI, UINT, UINT)
@@ -529,9 +704,11 @@ DEF_FUNCTION_TYPE (V2DI, V2DI, V2DI, INT)
DEF_FUNCTION_TYPE (V2DI, V2DI, V2DI, V2DI)
DEF_FUNCTION_TYPE (V32QI, V32QI, V32QI, V32QI)
DEF_FUNCTION_TYPE (V4DF, V4DF, V2DF, INT)
+DEF_FUNCTION_TYPE (V4DF, V4DF, V2DF, INT, V4DF, QI)
DEF_FUNCTION_TYPE (V4DF, V4DF, V4DF, INT)
DEF_FUNCTION_TYPE (V4DF, V4DF, V4DF, V4DF)
DEF_FUNCTION_TYPE (V4DF, V4DF, V4DF, V4DI, INT)
+DEF_FUNCTION_TYPE (V8DI, V8DI, V8DI, INT)
DEF_FUNCTION_TYPE (V4DI, V4DI, V4DI, V4DI)
DEF_FUNCTION_TYPE (V4HI, V4HI, HI, INT)
DEF_FUNCTION_TYPE (V4SF, V4SF, FLOAT, INT)
@@ -540,6 +717,10 @@ DEF_FUNCTION_TYPE (V4SF, V4SF, V4SF, INT, INT)
DEF_FUNCTION_TYPE (V4SF, V4SF, V2DF, INT)
DEF_FUNCTION_TYPE (V2DF, V2DF, V4SF, INT)
DEF_FUNCTION_TYPE (V4SF, V4SF, V4SF, V4SF)
+DEF_FUNCTION_TYPE (V4SF, V4SI, V4SF, V4SF, QI)
+DEF_FUNCTION_TYPE (V4SF, V4SF, V4SI, V4SF, QI)
+DEF_FUNCTION_TYPE (V4SF, V4SF, V4SF, V4SF, QI)
+DEF_FUNCTION_TYPE (V4SF, V4SF, V2DF, V4SF, QI)
DEF_FUNCTION_TYPE (V4SF, V4SF, V4SF, V4SI, INT)
DEF_FUNCTION_TYPE (V4SI, V4SI, SI, INT)
DEF_FUNCTION_TYPE (V4SI, V4SI, V4SI, INT)
@@ -550,76 +731,263 @@ DEF_FUNCTION_TYPE (V8HI, V8HI, V8HI, INT)
DEF_FUNCTION_TYPE (V8HI, V8HI, V8HI, V4SI)
DEF_FUNCTION_TYPE (V8HI, V8HI, V8HI, V8HI)
DEF_FUNCTION_TYPE (V8SF, V8SF, V4SF, INT)
+DEF_FUNCTION_TYPE (V8SF, V8SF, V4SF, INT, V8SF, QI)
DEF_FUNCTION_TYPE (V8SF, V8SF, V8SF, INT)
DEF_FUNCTION_TYPE (V8SF, V8SF, V8SF, V8SF)
DEF_FUNCTION_TYPE (V8SF, V8SF, V8SF, V8SI, INT)
DEF_FUNCTION_TYPE (V8DF, V8DF, V8DF, V8DF)
DEF_FUNCTION_TYPE (V16SF, V16SF, V16SF, V16SF)
DEF_FUNCTION_TYPE (V8SI, V8SI, V4SI, INT)
+DEF_FUNCTION_TYPE (V8SI, V8SI, V4SI, INT, V8SI, QI)
DEF_FUNCTION_TYPE (V8SI, V8SI, V8SI, INT)
DEF_FUNCTION_TYPE (V8SI, V8SI, V8SI, V8SI)
DEF_FUNCTION_TYPE (V4DI, V4DI, V4DI, INT)
DEF_FUNCTION_TYPE (V4DI, V4DI, V2DI, INT)
+DEF_FUNCTION_TYPE (V4DI, V4DI, V2DI, INT, V4DI, QI)
# Instructions with masking
+DEF_FUNCTION_TYPE (V2DF, V2DF, V2DF, QI)
+DEF_FUNCTION_TYPE (V2DF, V4SF, V2DF, QI)
+DEF_FUNCTION_TYPE (V2DF, V4SI, V2DF, QI)
+DEF_FUNCTION_TYPE (V4DF, V4DF, V4DF, QI)
+DEF_FUNCTION_TYPE (V4DF, V4SF, V4DF, QI)
+DEF_FUNCTION_TYPE (V4DF, V4SI, V4DF, QI)
DEF_FUNCTION_TYPE (V8DF, V8DF, V8DF, QI)
DEF_FUNCTION_TYPE (V8DF, V8SF, V8DF, QI)
DEF_FUNCTION_TYPE (V8DF, V8SI, V8DF, QI)
+DEF_FUNCTION_TYPE (V2DI, V4SI, V2DI, QI)
+DEF_FUNCTION_TYPE (V2DI, V8HI, V2DI, QI)
+DEF_FUNCTION_TYPE (V8DI, V8DF, V8DI, QI)
+DEF_FUNCTION_TYPE (V4DI, V4DF, V4DI, QI)
+DEF_FUNCTION_TYPE (V2DI, V2DF, V2DI, QI)
+DEF_FUNCTION_TYPE (V2DI, V2DI, V2DI, V2DI, QI)
+DEF_FUNCTION_TYPE (V2DI, V2DI, V2DI, INT, V2DI, QI)
+DEF_FUNCTION_TYPE (V4DI, V4DI, V4DI, V4DI, QI)
+DEF_FUNCTION_TYPE (V4DI, V4DI, V4DI, INT, V4DI, QI)
DEF_FUNCTION_TYPE (V8DI, V8SI, V8DI, QI)
DEF_FUNCTION_TYPE (V8DI, V8HI, V8DI, QI)
DEF_FUNCTION_TYPE (V8DI, V16QI, V8DI, QI)
+DEF_FUNCTION_TYPE (V2DI, V16QI, V2DI, QI)
+DEF_FUNCTION_TYPE (V4DI, V16QI, V4DI, QI)
+DEF_FUNCTION_TYPE (V4DI, V4DI, V4DI, QI)
+DEF_FUNCTION_TYPE (V4DI, V4SI, V4DI, QI)
+DEF_FUNCTION_TYPE (V4DI, V8HI, V4DI, QI)
DEF_FUNCTION_TYPE (V8DI, V8DI, V8DI, V8DI, QI)
DEF_FUNCTION_TYPE (V8DF, V8DI, V8DF, V8DF)
DEF_FUNCTION_TYPE (V8DF, V8DI, V8DF, V8DF, QI)
DEF_FUNCTION_TYPE (V8DF, V8DF, V8DI, V8DF, QI)
+DEF_FUNCTION_TYPE (V4DF, V4DI, V4DF, V4DF, QI)
+DEF_FUNCTION_TYPE (V4DF, V4DF, V4DI, V4DF, QI)
+DEF_FUNCTION_TYPE (V4DF, V4DF, V4DF, V4DF, QI)
DEF_FUNCTION_TYPE (V8DF, V8DF, V8DF, V8DF, QI)
+DEF_FUNCTION_TYPE (V16QI, V16QI, V16QI, V16QI, HI)
+DEF_FUNCTION_TYPE (V16HI, V16HI, V16HI, V16HI, HI)
DEF_FUNCTION_TYPE (V16SI, V16SI, V16SI, V16SI, HI)
DEF_FUNCTION_TYPE (V2DF, V2DF, V2DF, V2DF, QI)
DEF_FUNCTION_TYPE (V2DF, V2DF, V4SF, V2DF, QI)
+DEF_FUNCTION_TYPE (V32HI, V32HI, V32HI, V32HI, SI)
+DEF_FUNCTION_TYPE (V64QI, V64QI, V64QI, V64QI, DI)
+DEF_FUNCTION_TYPE (V32QI, V32QI, V32QI, V32QI, SI)
+DEF_FUNCTION_TYPE (V8HI, V8HI, V8HI, V8HI, QI)
+DEF_FUNCTION_TYPE (V4SF, V4SF, V4SF, QI)
+DEF_FUNCTION_TYPE (V4SF, V4SI, V4SF, QI)
+DEF_FUNCTION_TYPE (V8SF, V8SF, V8SF, QI)
+DEF_FUNCTION_TYPE (V8SF, V8SI, V8SF, QI)
DEF_FUNCTION_TYPE (V16SF, V16SF, V16SF, HI)
DEF_FUNCTION_TYPE (V16SF, V16SI, V16SF, HI)
+DEF_FUNCTION_TYPE (V4SI, V16QI, V4SI, QI)
+DEF_FUNCTION_TYPE (V4SI, V8HI, V4SI, QI)
+DEF_FUNCTION_TYPE (V8SI, V8SI, V8SI, QI)
+DEF_FUNCTION_TYPE (V8SI, V8HI, V8SI, QI)
+DEF_FUNCTION_TYPE (V8SI, V16QI, V8SI, QI)
+DEF_FUNCTION_TYPE (V4SI, V4SI, V4SI, V4SI, QI)
+DEF_FUNCTION_TYPE (V4SI, V4SI, V4SI, INT, V4SI, QI)
+DEF_FUNCTION_TYPE (V8SF, V8SF, V8SF, V8SF, QI)
+DEF_FUNCTION_TYPE (V8SF, V8SI, V8SF, V8SF, QI)
+DEF_FUNCTION_TYPE (V8SF, V8SF, V8SI, V8SF, QI)
+DEF_FUNCTION_TYPE (V8SI, V8SI, V8SI, V8SI, QI)
+DEF_FUNCTION_TYPE (V8SI, V8SI, V8SI, INT, V8SI, QI)
DEF_FUNCTION_TYPE (V16SF, V16SF, V16SF, V16SF, HI)
DEF_FUNCTION_TYPE (V16SF, V16SI, V16SF, V16SF)
DEF_FUNCTION_TYPE (V16SF, V16SI, V16SF, V16SF, HI)
DEF_FUNCTION_TYPE (V16SF, V16SF, V16SI, V16SF, HI)
-DEF_FUNCTION_TYPE (V4SF, V4SF, V2DF, V4SF, QI)
-DEF_FUNCTION_TYPE (V4SF, V4SF, V4SF, V4SF, QI)
+DEF_FUNCTION_TYPE (V16SF, V8SF, V16SF, HI)
DEF_FUNCTION_TYPE (V16SF, V4SF, V16SF, HI)
DEF_FUNCTION_TYPE (V8DF, V4DF, V8DF, QI)
DEF_FUNCTION_TYPE (V8DF, V2DF, V8DF, QI)
+DEF_FUNCTION_TYPE (V16SI, V8SI, V16SI, HI)
DEF_FUNCTION_TYPE (V16SI, V4SI, V16SI, HI)
DEF_FUNCTION_TYPE (V16SI, SI, V16SI, HI)
DEF_FUNCTION_TYPE (V16SI, V16HI, V16SI, HI)
DEF_FUNCTION_TYPE (V16SI, V16QI, V16SI, HI)
DEF_FUNCTION_TYPE (V8SI, V8DF, V8SI, QI)
DEF_FUNCTION_TYPE (V8DI, V4DI, V8DI, QI)
+DEF_FUNCTION_TYPE (V4SI, V4DF, V4SI, QI)
DEF_FUNCTION_TYPE (V8DI, V2DI, V8DI, QI)
DEF_FUNCTION_TYPE (V8DI, DI, V8DI, QI)
+DEF_FUNCTION_TYPE (V8DI, PCCHAR, V8DI, QI)
+DEF_FUNCTION_TYPE (V8SF, PCFLOAT, V8SF, QI)
+DEF_FUNCTION_TYPE (V4SF, PCFLOAT, V4SF, QI)
+DEF_FUNCTION_TYPE (V4DF, PCDOUBLE, V4DF, QI)
+DEF_FUNCTION_TYPE (V2DF, PCDOUBLE, V2DF, QI)
+DEF_FUNCTION_TYPE (V8SI, PCCHAR, V8SI, QI)
+DEF_FUNCTION_TYPE (V4SI, PCCHAR, V4SI, QI)
+DEF_FUNCTION_TYPE (V4DI, PCCHAR, V4DI, QI)
+DEF_FUNCTION_TYPE (V2DI, PCCHAR, V2DI, QI)
+DEF_FUNCTION_TYPE (V16QI, V16SI, V16QI, HI)
+DEF_FUNCTION_TYPE (V16QI, V8DI, V16QI, QI)
+DEF_FUNCTION_TYPE (V32HI, V32HI, V32HI, SI)
+DEF_FUNCTION_TYPE (V32HI, V64QI, V64QI, INT)
+DEF_FUNCTION_TYPE (V32HI, V32QI, V32HI, SI)
+DEF_FUNCTION_TYPE (V16HI, V16HI, V16HI, HI)
+DEF_FUNCTION_TYPE (V16HI, V32QI, V32QI, INT)
+DEF_FUNCTION_TYPE (V16HI, V16QI, V16HI, HI)
+DEF_FUNCTION_TYPE (V8HI, V16QI, V8HI, QI)
+DEF_FUNCTION_TYPE (V8HI, V16QI, V16QI, INT)
+DEF_FUNCTION_TYPE (V8SF, V4SF, V8SF, QI)
+DEF_FUNCTION_TYPE (V4DF, V2DF, V4DF, QI)
+DEF_FUNCTION_TYPE (V8SI, V4SI, V8SI, QI)
+DEF_FUNCTION_TYPE (V8SI, SI, V8SI, QI)
+DEF_FUNCTION_TYPE (V4SI, V4SI, V4SI, QI)
+DEF_FUNCTION_TYPE (V4SI, SI, V4SI, QI)
+DEF_FUNCTION_TYPE (V4DI, V2DI, V4DI, QI)
+DEF_FUNCTION_TYPE (V4DI, DI, V4DI, QI)
+DEF_FUNCTION_TYPE (V2DI, V2DI, V2DI, QI)
+DEF_FUNCTION_TYPE (V2DI, DI, V2DI, QI)
+DEF_FUNCTION_TYPE (V64QI, V64QI, V64QI, DI)
+DEF_FUNCTION_TYPE (V64QI, V16QI, V64QI, DI)
+DEF_FUNCTION_TYPE (V64QI, QI, V64QI, DI)
+DEF_FUNCTION_TYPE (V32QI, V32QI, V32QI, SI)
+DEF_FUNCTION_TYPE (V32QI, V16QI, V32QI, SI)
+DEF_FUNCTION_TYPE (V32QI, QI, V32QI, SI)
+DEF_FUNCTION_TYPE (V16QI, V16QI, V16QI, HI)
+DEF_FUNCTION_TYPE (V16QI, QI, V16QI, HI)
+DEF_FUNCTION_TYPE (V32HI, V8HI, V32HI, SI)
+DEF_FUNCTION_TYPE (V32HI, HI, V32HI, SI)
+DEF_FUNCTION_TYPE (V16HI, V8HI, V16HI, HI)
+DEF_FUNCTION_TYPE (V16HI, HI, V16HI, HI)
+DEF_FUNCTION_TYPE (V8HI, V8HI, V8HI, QI)
+DEF_FUNCTION_TYPE (V8HI, HI, V8HI, QI)
+DEF_FUNCTION_TYPE (V64QI, PCV64QI, V64QI, DI)
+DEF_FUNCTION_TYPE (V32HI, PCV32HI, V32HI, SI)
+DEF_FUNCTION_TYPE (V32QI, PCV32QI, V32QI, SI)
DEF_FUNCTION_TYPE (V16SF, PCV16SF, V16SF, HI)
+DEF_FUNCTION_TYPE (V16SF, PCV8SF, V16SF, HI)
DEF_FUNCTION_TYPE (V8DF, PCV8DF, V8DF, QI)
DEF_FUNCTION_TYPE (V16SI, PCV16SI, V16SI, HI)
+DEF_FUNCTION_TYPE (V16SI, PCV8SI, V16SI, HI)
+DEF_FUNCTION_TYPE (V16HI, PCV16HI, V16HI, HI)
+DEF_FUNCTION_TYPE (V16QI, PCV16QI, V16QI, HI)
+DEF_FUNCTION_TYPE (V8DF, PCV2DF, V8DF, QI)
+DEF_FUNCTION_TYPE (V8SF, PCV8SF, V8SF, QI)
+DEF_FUNCTION_TYPE (V8SF, PCV4SF, V8SF, QI)
DEF_FUNCTION_TYPE (V8DI, PCV8DI, V8DI, QI)
-DEF_FUNCTION_TYPE (V2DF, PCDOUBLE, V2DF, QI)
-DEF_FUNCTION_TYPE (V4SF, PCFLOAT, V4SF, QI)
-DEF_FUNCTION_TYPE (V16QI, V16SI, V16QI, HI)
+DEF_FUNCTION_TYPE (V8DI, PCV2DI, V8DI, QI)
+DEF_FUNCTION_TYPE (V8SI, PCV8SI, V8SI, QI)
+DEF_FUNCTION_TYPE (V8SI, PCV4SI, V8SI, QI)
+DEF_FUNCTION_TYPE (V8HI, PCV8HI, V8HI, QI)
+DEF_FUNCTION_TYPE (V4DF, PCV2DF, V4DF, QI)
+DEF_FUNCTION_TYPE (V4DF, PCV4DF, V4DF, QI)
+DEF_FUNCTION_TYPE (V4SF, PCV4SF, V4SF, QI)
+DEF_FUNCTION_TYPE (V4DI, PCV4DI, V4DI, QI)
+DEF_FUNCTION_TYPE (V4DI, PCV2DI, V4DI, QI)
+DEF_FUNCTION_TYPE (V4SI, PCV4SI, V4SI, QI)
+DEF_FUNCTION_TYPE (V2DF, PCV2DF, V2DF, QI)
+DEF_FUNCTION_TYPE (V2DI, PCV2DI, V2DI, QI)
DEF_FUNCTION_TYPE (V16HI, V16SI, V16HI, HI)
DEF_FUNCTION_TYPE (V8SI, V8DI, V8SI, QI)
DEF_FUNCTION_TYPE (V8HI, V8DI, V8HI, QI)
-DEF_FUNCTION_TYPE (V16QI, V8DI, V16QI, QI)
+DEF_FUNCTION_TYPE (V16QI, V8HI, V16QI, QI)
+DEF_FUNCTION_TYPE (V16QI, V16HI, V16QI, HI)
+DEF_FUNCTION_TYPE (V16QI, V4SI, V16QI, QI)
+DEF_FUNCTION_TYPE (V16QI, V8SI, V16QI, QI)
+DEF_FUNCTION_TYPE (V8HI, V4SI, V8HI, QI)
+DEF_FUNCTION_TYPE (V8HI, V8SI, V8HI, QI)
+DEF_FUNCTION_TYPE (V16QI, V2DI, V16QI, QI)
+DEF_FUNCTION_TYPE (V16QI, V4DI, V16QI, QI)
+DEF_FUNCTION_TYPE (V8HI, V2DI, V8HI, QI)
+DEF_FUNCTION_TYPE (V8HI, V4DI, V8HI, QI)
+DEF_FUNCTION_TYPE (V4SI, V2DI, V4SI, QI)
+DEF_FUNCTION_TYPE (V4SI, V4DI, V4SI, QI)
+DEF_FUNCTION_TYPE (V32QI, V32HI, V32QI, SI)
+DEF_FUNCTION_TYPE (V2DF, V2DF, INT, V2DF, QI)
+DEF_FUNCTION_TYPE (V4DF, V4DF, INT, V4DF, QI)
+DEF_FUNCTION_TYPE (V4SF, V4SF, INT, V4SF, QI)
+DEF_FUNCTION_TYPE (V8SF, V8SF, INT, V8SF, QI)
+DEF_FUNCTION_TYPE (V4DF, V4DF, V4DF, INT, V4DF, QI)
+DEF_FUNCTION_TYPE (V2DF, V2DF, V2DF, INT, V2DF, QI)
+DEF_FUNCTION_TYPE (V8SF, V8SF, V8SF, INT, V8SF, QI)
+DEF_FUNCTION_TYPE (V4SF, V4SF, V4SF, INT, V4SF, QI)
DEF_FUNCTION_TYPE (VOID, PV8DF, V8DF, QI)
DEF_FUNCTION_TYPE (VOID, PV8SI, V8DI, QI)
DEF_FUNCTION_TYPE (VOID, PV8HI, V8DI, QI)
+DEF_FUNCTION_TYPE (VOID, PV8HI, V4DI, QI)
+DEF_FUNCTION_TYPE (VOID, PV8HI, V2DI, QI)
+DEF_FUNCTION_TYPE (VOID, PV4SI, V4DI, QI)
+DEF_FUNCTION_TYPE (VOID, PV4SI, V2DI, QI)
+DEF_FUNCTION_TYPE (VOID, PV8HI, V8SI, QI)
+DEF_FUNCTION_TYPE (VOID, PV8HI, V4SI, QI)
+DEF_FUNCTION_TYPE (VOID, PV4DF, V4DF, QI)
+DEF_FUNCTION_TYPE (VOID, PV2DF, V2DF, QI)
DEF_FUNCTION_TYPE (VOID, PV16SF, V16SF, HI)
+DEF_FUNCTION_TYPE (VOID, PV8SF, V8SF, QI)
+DEF_FUNCTION_TYPE (VOID, PV4SF, V4SF, QI)
DEF_FUNCTION_TYPE (VOID, PV8DI, V8DI, QI)
+DEF_FUNCTION_TYPE (VOID, PV4DI, V4DI, QI)
+DEF_FUNCTION_TYPE (VOID, PV2DI, V2DI, QI)
DEF_FUNCTION_TYPE (VOID, PV16SI, V16SI, HI)
DEF_FUNCTION_TYPE (VOID, PV16HI, V16SI, HI)
DEF_FUNCTION_TYPE (VOID, PV16QI, V16SI, HI)
+DEF_FUNCTION_TYPE (VOID, PV16QI, V8SI, QI)
+DEF_FUNCTION_TYPE (VOID, PV16QI, V4SI, QI)
DEF_FUNCTION_TYPE (VOID, PV16QI, V8DI, QI)
+DEF_FUNCTION_TYPE (VOID, PV16QI, V4DI, QI)
+DEF_FUNCTION_TYPE (VOID, PV16QI, V2DI, QI)
+DEF_FUNCTION_TYPE (VOID, PV8SI, V8SI, QI)
+DEF_FUNCTION_TYPE (VOID, PV4SI, V4SI, QI)
+DEF_FUNCTION_TYPE (VOID, PV32HI, V32HI, SI)
+DEF_FUNCTION_TYPE (VOID, PV16HI, V16HI, HI)
+DEF_FUNCTION_TYPE (VOID, PV8HI, V8HI, QI)
+DEF_FUNCTION_TYPE (VOID, PV64QI, V64QI, DI)
+DEF_FUNCTION_TYPE (VOID, PV32QI, V32QI, SI)
+DEF_FUNCTION_TYPE (VOID, PV16QI, V16QI, HI)
DEF_FUNCTION_TYPE (VOID, PDOUBLE, V2DF, QI)
DEF_FUNCTION_TYPE (VOID, PFLOAT, V4SF, QI)
DEF_FUNCTION_TYPE (V16SI, V16SF, V16SI, HI)
DEF_FUNCTION_TYPE (V8DI, V8DI, V8DI, V8DI, INT, QI)
+DEF_FUNCTION_TYPE (V8SI, V8SF, V8SI, QI)
+DEF_FUNCTION_TYPE (V4SI, V4SF, V4SI, QI)
+DEF_FUNCTION_TYPE (V8DI, V8SF, V8DI, QI)
+DEF_FUNCTION_TYPE (V4DI, V4SF, V4DI, QI)
+DEF_FUNCTION_TYPE (V2DI, V4SF, V2DI, QI)
+DEF_FUNCTION_TYPE (V8SF, V8DI, V8SF, QI)
+DEF_FUNCTION_TYPE (V4SF, V4DI, V4SF, QI)
+DEF_FUNCTION_TYPE (V4SF, V2DI, V4SF, QI)
+DEF_FUNCTION_TYPE (V8DF, V8DI, V8DF, QI)
+DEF_FUNCTION_TYPE (V4DF, V4DI, V4DF, QI)
+DEF_FUNCTION_TYPE (V2DF, V2DI, V2DF, QI)
+DEF_FUNCTION_TYPE (V32HI, V32HI, INT, V32HI, SI)
+DEF_FUNCTION_TYPE (V32HI, V32HI, V8HI, V32HI, SI)
+DEF_FUNCTION_TYPE (V16HI, V16HI, INT, V16HI, HI)
+DEF_FUNCTION_TYPE (V16HI, V16HI, V8HI, V16HI, HI)
+DEF_FUNCTION_TYPE (V8HI, V8HI, INT, V8HI, QI)
+DEF_FUNCTION_TYPE (V32HI, V64QI, V64QI, INT, V32HI, SI)
+DEF_FUNCTION_TYPE (V16HI, V32QI, V32QI, INT, V16HI, HI)
+DEF_FUNCTION_TYPE (V8HI, V16QI, V16QI, INT, V8HI, QI)
+DEF_FUNCTION_TYPE (V64QI, V32HI, V32HI, V64QI, DI)
+DEF_FUNCTION_TYPE (V32QI, V16HI, V16HI, V32QI, SI)
+DEF_FUNCTION_TYPE (V16QI, V8HI, V8HI, V16QI, HI)
+DEF_FUNCTION_TYPE (V32HI, V16SI, V16SI, V32HI, SI)
+DEF_FUNCTION_TYPE (V16HI, V8SI, V8SI, V16HI, HI)
+DEF_FUNCTION_TYPE (V8HI, V4SI, V4SI, V8HI, QI)
+DEF_FUNCTION_TYPE (V8DI, V16SI, V16SI, V8DI, QI)
+DEF_FUNCTION_TYPE (V4DI, V8SI, V8SI, V4DI, QI)
+DEF_FUNCTION_TYPE (V2DI, V4SI, V4SI, V2DI, QI)
DEF_FUNCTION_TYPE (V16SI, V16SI, V16SI, V16SI, INT, HI)
+DEF_FUNCTION_TYPE (V8SI, V8SI, V8SI, V8SI, INT, QI)
+DEF_FUNCTION_TYPE (V4DI, V4DI, V4DI, V4DI, INT, QI)
+DEF_FUNCTION_TYPE (V4SI, V4SI, V4SI, V4SI, INT, QI)
+DEF_FUNCTION_TYPE (V2DI, V2DI, V2DI, V2DI, INT, QI)
DEF_FUNCTION_TYPE (VOID, PCVOID, UNSIGNED, UNSIGNED)
DEF_FUNCTION_TYPE (VOID, PV2DF, V2DI, V2DF)
@@ -644,11 +1012,30 @@ DEF_FUNCTION_TYPE (V4DF, V4DF, V4DF, V4DI)
DEF_FUNCTION_TYPE (V8SF, V8SF, V8SF, V8SI)
DEF_FUNCTION_TYPE (V8DI, V8DI, V8DI, V8DI)
DEF_FUNCTION_TYPE (V16SI, V16SI, V16SI, V16SI)
+DEF_FUNCTION_TYPE (V2DF, V2DF, V2DI, V2DF)
+DEF_FUNCTION_TYPE (V4DF, V4DF, V4DI, V4DF)
DEF_FUNCTION_TYPE (V8DF, V8DF, V8DI, V8DF)
+DEF_FUNCTION_TYPE (V4SF, V4SF, V4SI, V4SF)
+DEF_FUNCTION_TYPE (V8SF, V8SF, V8SI, V8SF)
DEF_FUNCTION_TYPE (V16SF, V16SF, V16SI, V16SF)
-DEF_FUNCTION_TYPE (V4SF, V4SF, V4SF, INT, V4SF, QI)
-DEF_FUNCTION_TYPE (V2DF, V2DF, V2DF, INT, V2DF, QI)
-DEF_FUNCTION_TYPE (V8DI, V16SI, V16SI, V8DI, QI)
+
+DEF_FUNCTION_TYPE (V8SI, V8SI, V4SI, V8SI, QI)
+DEF_FUNCTION_TYPE (V4DI, V4DI, V2DI, V4DI, QI)
+
+DEF_FUNCTION_TYPE (QI, V8DF, INT)
+DEF_FUNCTION_TYPE (QI, V4DF, INT)
+DEF_FUNCTION_TYPE (QI, V4DF, V4DF, INT, QI)
+DEF_FUNCTION_TYPE (QI, V2DF, INT)
+DEF_FUNCTION_TYPE (HI, V16SF, INT)
+DEF_FUNCTION_TYPE (QI, V8SF, INT)
+DEF_FUNCTION_TYPE (QI, V8SF, V8SF, INT, QI)
+DEF_FUNCTION_TYPE (QI, V4SF, INT)
+DEF_FUNCTION_TYPE (QI, V8DF, INT, QI)
+DEF_FUNCTION_TYPE (QI, V4DF, INT, QI)
+DEF_FUNCTION_TYPE (QI, V2DF, INT, QI)
+DEF_FUNCTION_TYPE (HI, V16SF, INT, HI)
+DEF_FUNCTION_TYPE (QI, V8SF, INT, QI)
+DEF_FUNCTION_TYPE (QI, V4SF, INT, QI)
DEF_FUNCTION_TYPE (V2DI, V2DI, V2DI, UINT, UINT)
DEF_FUNCTION_TYPE (V4HI, HI, HI, HI, HI)
@@ -684,6 +1071,10 @@ DEF_FUNCTION_TYPE (V8SI, V8DF, V8SI, QI, INT)
DEF_FUNCTION_TYPE (V8SF, V8DF, V8SF, QI, INT)
DEF_FUNCTION_TYPE (V8DF, V8DF, V8DF, QI, INT)
DEF_FUNCTION_TYPE (V8DF, V8SF, V8DF, QI, INT)
+DEF_FUNCTION_TYPE (V8DI, V8DF, V8DI, QI, INT)
+DEF_FUNCTION_TYPE (V8DI, V8SF, V8DI, QI, INT)
+DEF_FUNCTION_TYPE (V8DF, V8DI, V8DF, QI, INT)
+DEF_FUNCTION_TYPE (V8SF, V8DI, V8SF, QI, INT)
DEF_FUNCTION_TYPE (V16SF, V16SF, V16SF, V16SF, HI, INT)
DEF_FUNCTION_TYPE (V8DF, V8DF, V8DF, V8DF, QI, INT)
DEF_FUNCTION_TYPE (V4SF, V4SF, V4SF, V4SF, QI, INT)
@@ -732,16 +1123,53 @@ DEF_FUNCTION_TYPE (V8DI, V8DI, PCINT64, V8SI, QI, INT)
DEF_FUNCTION_TYPE (V8DI, V8DI, PCINT64, V16SI, QI, INT)
DEF_FUNCTION_TYPE (V8SI, V8SI, PCINT, V8DI, QI, INT)
DEF_FUNCTION_TYPE (V8DI, V8DI, PCINT64, V8DI, QI, INT)
+DEF_FUNCTION_TYPE (V2DF, V2DF, PCDOUBLE, V4SI, QI, INT)
+DEF_FUNCTION_TYPE (V4DF, V4DF, PCDOUBLE, V4SI, QI, INT)
+DEF_FUNCTION_TYPE (V4DF, V4DF, PCDOUBLE, V8SI, QI, INT)
+DEF_FUNCTION_TYPE (V2DF, V2DF, PCDOUBLE, V2DI, QI, INT)
+DEF_FUNCTION_TYPE (V4DF, V4DF, PCDOUBLE, V4DI, QI, INT)
+DEF_FUNCTION_TYPE (V4SF, V4SF, PCFLOAT, V4SI, QI, INT)
+DEF_FUNCTION_TYPE (V8SF, V8SF, PCFLOAT, V8SI, QI, INT)
+DEF_FUNCTION_TYPE (V4SF, V4SF, PCFLOAT, V2DI, QI, INT)
+DEF_FUNCTION_TYPE (V4SF, V4SF, PCFLOAT, V4DI, QI, INT)
+DEF_FUNCTION_TYPE (V8SF, V8SF, PCFLOAT, V4DI, QI, INT)
+DEF_FUNCTION_TYPE (V2DI, V2DI, PCINT64, V4SI, QI, INT)
+DEF_FUNCTION_TYPE (V4DI, V4DI, PCINT64, V4SI, QI, INT)
+DEF_FUNCTION_TYPE (V4DI, V4DI, PCINT64, V8SI, QI, INT)
+DEF_FUNCTION_TYPE (V2DI, V2DI, PCINT64, V2DI, QI, INT)
+DEF_FUNCTION_TYPE (V4DI, V4DI, PCINT64, V4DI, QI, INT)
+DEF_FUNCTION_TYPE (V4SI, V4SI, PCINT, V4SI, QI, INT)
+DEF_FUNCTION_TYPE (V8SI, V8SI, PCINT, V8SI, QI, INT)
+DEF_FUNCTION_TYPE (V4SI, V4SI, PCINT, V2DI, QI, INT)
+DEF_FUNCTION_TYPE (V4SI, V4SI, PCINT, V4DI, QI, INT)
+DEF_FUNCTION_TYPE (V8SI, V8SI, PCINT, V4DI, QI, INT)
+
DEF_FUNCTION_TYPE (VOID, PFLOAT, HI, V16SI, V16SF, INT)
+DEF_FUNCTION_TYPE (VOID, PFLOAT, QI, V8SI, V8SF, INT)
+DEF_FUNCTION_TYPE (VOID, PFLOAT, QI, V4SI, V4SF, INT)
DEF_FUNCTION_TYPE (VOID, PDOUBLE, QI, V8SI, V8DF, INT)
+DEF_FUNCTION_TYPE (VOID, PDOUBLE, QI, V4SI, V4DF, INT)
+DEF_FUNCTION_TYPE (VOID, PDOUBLE, QI, V4SI, V2DF, INT)
DEF_FUNCTION_TYPE (VOID, PFLOAT, QI, V8DI, V8SF, INT)
+DEF_FUNCTION_TYPE (VOID, PFLOAT, QI, V4DI, V4SF, INT)
+DEF_FUNCTION_TYPE (VOID, PFLOAT, QI, V2DI, V4SF, INT)
DEF_FUNCTION_TYPE (VOID, PDOUBLE, QI, V8DI, V8DF, INT)
+DEF_FUNCTION_TYPE (VOID, PDOUBLE, QI, V4DI, V4DF, INT)
+DEF_FUNCTION_TYPE (VOID, PDOUBLE, QI, V2DI, V2DF, INT)
DEF_FUNCTION_TYPE (VOID, PINT, HI, V16SI, V16SI, INT)
+DEF_FUNCTION_TYPE (VOID, PINT, QI, V8SI, V8SI, INT)
+DEF_FUNCTION_TYPE (VOID, PINT, QI, V4SI, V4SI, INT)
DEF_FUNCTION_TYPE (VOID, PLONGLONG, QI, V8SI, V8DI, INT)
+DEF_FUNCTION_TYPE (VOID, PLONGLONG, QI, V4SI, V4DI, INT)
+DEF_FUNCTION_TYPE (VOID, PLONGLONG, QI, V4SI, V2DI, INT)
DEF_FUNCTION_TYPE (VOID, PINT, QI, V8DI, V8SI, INT)
+DEF_FUNCTION_TYPE (VOID, PINT, QI, V4DI, V4SI, INT)
+DEF_FUNCTION_TYPE (VOID, PINT, QI, V2DI, V4SI, INT)
DEF_FUNCTION_TYPE (VOID, PLONGLONG, QI, V8DI, V8DI, INT)
DEF_FUNCTION_TYPE (VOID, QI, V8SI, PCINT64, INT, INT)
+DEF_FUNCTION_TYPE (VOID, PLONGLONG, QI, V4DI, V4DI, INT)
+DEF_FUNCTION_TYPE (VOID, PLONGLONG, QI, V2DI, V2DI, INT)
DEF_FUNCTION_TYPE (VOID, HI, V16SI, PCINT, INT, INT)
DEF_FUNCTION_TYPE (VOID, QI, V8DI, PCINT64, INT, INT)
DEF_FUNCTION_TYPE (VOID, QI, V8DI, PCINT, INT, INT)
@@ -789,8 +1217,13 @@ DEF_FUNCTION_TYPE_ALIAS (V4DI_FTYPE_V4DI_V2DI, COUNT)
DEF_FUNCTION_TYPE_ALIAS (V2DF_FTYPE_V2DF_V2DF, SWAP)
DEF_FUNCTION_TYPE_ALIAS (V4SF_FTYPE_V4SF_V4SF, SWAP)
+DEF_FUNCTION_TYPE_ALIAS (V8DI_FTYPE_V8DI_INT, CONVERT)
DEF_FUNCTION_TYPE_ALIAS (V4DI_FTYPE_V4DI_INT, CONVERT)
DEF_FUNCTION_TYPE_ALIAS (V2DI_FTYPE_V2DI_INT, CONVERT)
+DEF_FUNCTION_TYPE_ALIAS (V8DI_FTYPE_V8DI_V8DI_INT, CONVERT)
+DEF_FUNCTION_TYPE_ALIAS (V8DI_FTYPE_V8DI_V8DI_INT_V8DI_DI, CONVERT)
+DEF_FUNCTION_TYPE_ALIAS (V4DI_FTYPE_V4DI_V4DI_INT_V4DI_SI, CONVERT)
+DEF_FUNCTION_TYPE_ALIAS (V2DI_FTYPE_V2DI_V2DI_INT_V2DI_HI, CONVERT)
DEF_FUNCTION_TYPE_ALIAS (V4DI_FTYPE_V4DI_V4DI_INT, CONVERT)
DEF_FUNCTION_TYPE_ALIAS (V2DI_FTYPE_V2DI_V2DI_INT, CONVERT)
DEF_FUNCTION_TYPE_ALIAS (V1DI_FTYPE_V1DI_V1DI_INT, CONVERT)
diff --git a/gcc/config/i386/i386-c.c b/gcc/config/i386/i386-c.c
index 2c05cec0c4b..a3858edb240 100644
--- a/gcc/config/i386/i386-c.c
+++ b/gcc/config/i386/i386-c.c
@@ -345,6 +345,12 @@ ix86_target_macros_internal (HOST_WIDE_INT isa_flag,
def_or_undef (parse_in, "__AVX512CD__");
if (isa_flag & OPTION_MASK_ISA_AVX512PF)
def_or_undef (parse_in, "__AVX512PF__");
+ if (isa_flag & OPTION_MASK_ISA_AVX512DQ)
+ def_or_undef (parse_in, "__AVX512DQ__");
+ if (isa_flag & OPTION_MASK_ISA_AVX512BW)
+ def_or_undef (parse_in, "__AVX512BW__");
+ if (isa_flag & OPTION_MASK_ISA_AVX512VL)
+ def_or_undef (parse_in, "__AVX512VL__");
if (isa_flag & OPTION_MASK_ISA_FMA)
def_or_undef (parse_in, "__FMA__");
if (isa_flag & OPTION_MASK_ISA_RTM)
diff --git a/gcc/config/i386/i386-modes.def b/gcc/config/i386/i386-modes.def
index 07e572058cc..c24abe6fea7 100644
--- a/gcc/config/i386/i386-modes.def
+++ b/gcc/config/i386/i386-modes.def
@@ -86,6 +86,9 @@ VECTOR_MODE (INT, TI, 1); /* V1TI */
VECTOR_MODE (INT, DI, 1); /* V1DI */
VECTOR_MODE (INT, SI, 1); /* V1SI */
VECTOR_MODE (INT, QI, 2); /* V2QI */
+VECTOR_MODE (INT, QI, 12); /* V12QI */
+VECTOR_MODE (INT, QI, 14); /* V14QI */
+VECTOR_MODE (INT, HI, 6); /* V6HI */
INT_MODE (OI, 32);
INT_MODE (XI, 64);
diff --git a/gcc/config/i386/i386.c b/gcc/config/i386/i386.c
index 542945f1176..b385bd00179 100644
--- a/gcc/config/i386/i386.c
+++ b/gcc/config/i386/i386.c
@@ -2593,6 +2593,9 @@ ix86_target_string (HOST_WIDE_INT isa, int flags, const char *arch,
{ "-mavx512er", OPTION_MASK_ISA_AVX512ER },
{ "-mavx512cd", OPTION_MASK_ISA_AVX512CD },
{ "-mavx512pf", OPTION_MASK_ISA_AVX512PF },
+ { "-mavx512dq", OPTION_MASK_ISA_AVX512DQ },
+ { "-mavx512bw", OPTION_MASK_ISA_AVX512BW },
+ { "-mavx512vl", OPTION_MASK_ISA_AVX512VL },
{ "-msse4a", OPTION_MASK_ISA_SSE4A },
{ "-msse4.2", OPTION_MASK_ISA_SSE4_2 },
{ "-msse4.1", OPTION_MASK_ISA_SSE4_1 },
@@ -3123,6 +3126,9 @@ ix86_option_override_internal (bool main_args_p,
#define PTA_CLFLUSHOPT (HOST_WIDE_INT_1 << 47)
#define PTA_XSAVEC (HOST_WIDE_INT_1 << 48)
#define PTA_XSAVES (HOST_WIDE_INT_1 << 49)
+#define PTA_AVX512DQ (HOST_WIDE_INT_1 << 50)
+#define PTA_AVX512BW (HOST_WIDE_INT_1 << 51)
+#define PTA_AVX512VL (HOST_WIDE_INT_1 << 52)
#define PTA_CORE2 \
(PTA_64BIT | PTA_MMX | PTA_SSE | PTA_SSE2 | PTA_SSE3 | PTA_SSSE3 \
@@ -3689,6 +3695,15 @@ ix86_option_override_internal (bool main_args_p,
if (processor_alias_table[i].flags & PTA_XSAVES
&& !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_XSAVES))
opts->x_ix86_isa_flags |= OPTION_MASK_ISA_XSAVES;
+ if (processor_alias_table[i].flags & PTA_AVX512DQ
+ && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_AVX512DQ))
+ opts->x_ix86_isa_flags |= OPTION_MASK_ISA_AVX512DQ;
+ if (processor_alias_table[i].flags & PTA_AVX512BW
+ && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_AVX512BW))
+ opts->x_ix86_isa_flags |= OPTION_MASK_ISA_AVX512BW;
+ if (processor_alias_table[i].flags & PTA_AVX512VL
+ && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_AVX512VL))
+ opts->x_ix86_isa_flags |= OPTION_MASK_ISA_AVX512VL;
if (processor_alias_table[i].flags & (PTA_PREFETCH_SSE | PTA_SSE))
x86_prefetch_sse = true;
@@ -4545,6 +4560,9 @@ ix86_valid_target_attribute_inner_p (tree args, char *p_strings[],
IX86_ATTR_ISA ("avx512pf", OPT_mavx512pf),
IX86_ATTR_ISA ("avx512er", OPT_mavx512er),
IX86_ATTR_ISA ("avx512cd", OPT_mavx512cd),
+ IX86_ATTR_ISA ("avx512dq", OPT_mavx512dq),
+ IX86_ATTR_ISA ("avx512bw", OPT_mavx512bw),
+ IX86_ATTR_ISA ("avx512vl", OPT_mavx512vl),
IX86_ATTR_ISA ("mmx", OPT_mmmx),
IX86_ATTR_ISA ("pclmul", OPT_mpclmul),
IX86_ATTR_ISA ("popcnt", OPT_mpopcnt),
@@ -8983,19 +9001,24 @@ standard_sse_constant_opcode (rtx insn, rtx x)
switch (get_attr_mode (insn))
{
case MODE_XI:
- case MODE_V16SF:
return "vpxord\t%g0, %g0, %g0";
+ case MODE_V16SF:
+ return TARGET_AVX512DQ ? "vxorps\t%g0, %g0, %g0"
+ : "vpxord\t%g0, %g0, %g0";
case MODE_V8DF:
- return "vpxorq\t%g0, %g0, %g0";
+ return TARGET_AVX512DQ ? "vxorpd\t%g0, %g0, %g0"
+ : "vpxorq\t%g0, %g0, %g0";
case MODE_TI:
- return "%vpxor\t%0, %d0";
+ return TARGET_AVX512VL ? "vpxord\t%t0, %t0, %t0"
+ : "%vpxor\t%0, %d0";
case MODE_V2DF:
return "%vxorpd\t%0, %d0";
case MODE_V4SF:
return "%vxorps\t%0, %d0";
case MODE_OI:
- return "vpxor\t%x0, %x0, %x0";
+ return TARGET_AVX512VL ? "vpxord\t%x0, %x0, %x0"
+ : "vpxor\t%x0, %x0, %x0";
case MODE_V4DF:
return "vxorpd\t%x0, %x0, %x0";
case MODE_V8SF:
@@ -9006,7 +9029,8 @@ standard_sse_constant_opcode (rtx insn, rtx x)
}
case 2:
- if (get_attr_mode (insn) == MODE_XI
+ if (TARGET_AVX512VL
+ ||get_attr_mode (insn) == MODE_XI
|| get_attr_mode (insn) == MODE_V8DF
|| get_attr_mode (insn) == MODE_V16SF)
return "vpternlogd\t{$0xFF, %g0, %g0, %g0|%g0, %g0, %g0, 0xFF}";
@@ -14688,7 +14712,7 @@ print_reg (rtx x, int code, FILE *file)
case 8:
case 4:
case 12:
- if (! ANY_FP_REG_P (x))
+ if (! ANY_FP_REG_P (x) && ! ANY_MASK_REG_P (x))
putc (code == 8 && TARGET_64BIT ? 'r' : 'e', file);
/* FALLTHRU */
case 16:
@@ -17517,8 +17541,10 @@ ix86_expand_vector_logical_operator (enum rtx_code code, enum machine_mode mode,
{
case V4SFmode:
case V8SFmode:
+ case V16SFmode:
case V2DFmode:
case V4DFmode:
+ case V8DFmode:
dst = gen_reg_rtx (GET_MODE (SUBREG_REG (op1)));
if (GET_CODE (op2) == CONST_VECTOR)
{
@@ -18825,6 +18851,19 @@ ix86_expand_vector_convert_uns_vsivsf (rtx target, rtx val)
enum machine_mode fltmode = GET_MODE (target);
rtx (*cvt) (rtx, rtx);
+ if (intmode == V16SImode)
+ {
+ emit_insn (gen_ufloatv16siv16sf2 (target, val));
+ return;
+ }
+ if (TARGET_AVX512VL)
+ {
+ if (intmode == V4SImode)
+ emit_insn (gen_ufloatv4siv4sf2 (target, val));
+ else
+ emit_insn (gen_ufloatv8siv8sf2 (target, val));
+ return;
+ }
if (intmode == V4SImode)
cvt = gen_floatv4siv4sf2;
else
@@ -21013,6 +21052,12 @@ ix86_expand_sse_movcc (rtx dest, rtx cmp, rtx op_true, rtx op_false)
}
break;
+ case V64QImode:
+ gen = gen_avx512bw_blendmv64qi;
+ break;
+ case V32HImode:
+ gen = gen_avx512bw_blendmv32hi;
+ break;
case V16SImode:
gen = gen_avx512f_blendmv16si;
break;
@@ -21329,6 +21374,8 @@ ix86_expand_int_vcond (rtx operands[])
}
break;
+ case V64QImode:
+ case V32HImode:
case V32QImode:
case V16HImode:
case V16QImode:
@@ -21377,20 +21424,113 @@ ix86_expand_vec_perm_vpermi2 (rtx target, rtx op0, rtx mask, rtx op1)
enum machine_mode mode = GET_MODE (op0);
switch (mode)
{
+ /* There is no byte version of vpermi2. So we use vpermi2w. */
+ case V64QImode:
+ if (!TARGET_AVX512BW)
+ return false;
+ rtx mask_lowpart, op0_lowpart, op1_lowpart;
+ rtx perm_lo, perm_hi, tmp, res_lo, tmp2, res_hi;
+
+ mask_lowpart = gen_lowpart (V32HImode, force_reg (V64QImode, mask));
+ op0_lowpart = gen_lowpart (V32HImode, op0);
+ op1_lowpart = gen_lowpart (V32HImode, op1);
+ tmp = gen_reg_rtx (V32HImode);
+ tmp2 = gen_reg_rtx (V32HImode);
+ perm_lo = gen_reg_rtx (V32HImode);
+ perm_hi = gen_reg_rtx (V32HImode);
+ res_lo = gen_reg_rtx (V32HImode);
+ res_hi = gen_reg_rtx (V32HImode);
+
+ emit_insn (gen_ashlv32hi3 (tmp, mask_lowpart, GEN_INT (8)));
+ emit_insn (gen_ashrv32hi3 (perm_lo, tmp, GEN_INT (9)));
+ emit_insn (gen_ashrv32hi3 (perm_hi, mask_lowpart, GEN_INT (9)));
+ emit_insn (gen_avx512bw_vpermi2varv32hi3 (res_lo, op0_lowpart,
+ perm_lo, op1_lowpart));
+ emit_insn (gen_avx512bw_vpermi2varv32hi3 (tmp2, op0_lowpart,
+ perm_hi, op1_lowpart));
+ emit_insn (gen_ashlv32hi3 (res_hi, tmp2, GEN_INT (8)));
+ emit_insn (gen_avx512bw_blendmv64qi (target, gen_lowpart (V64QImode, res_lo),
+ gen_lowpart (V64QImode, res_hi),
+ force_reg (DImode, GEN_INT (0xAAAAAAAAAAAAAAAALL))));
+ return true;
+ case V8HImode:
+ if (!TARGET_AVX512VL)
+ return false;
+ emit_insn (gen_avx512vl_vpermi2varv8hi3 (target, op0,
+ force_reg (V8HImode, mask), op1));
+ return true;
+ case V16HImode:
+ if (!TARGET_AVX512VL)
+ return false;
+ emit_insn (gen_avx512vl_vpermi2varv16hi3 (target, op0,
+ force_reg (V16HImode, mask), op1));
+ return true;
+ case V32HImode:
+ emit_insn (gen_avx512bw_vpermi2varv32hi3 (target, op0,
+ force_reg (V32HImode, mask), op1));
+ return true;
+ case V4SImode:
+ if (!TARGET_AVX512VL)
+ return false;
+ emit_insn (gen_avx512vl_vpermi2varv4si3 (target, op0,
+ force_reg (V4SImode, mask), op1));
+ return true;
+ case V8SImode:
+ if (!TARGET_AVX512VL)
+ return false;
+ emit_insn (gen_avx512vl_vpermi2varv8si3 (target, op0,
+ force_reg (V8SImode, mask), op1));
+ return true;
case V16SImode:
emit_insn (gen_avx512f_vpermi2varv16si3 (target, op0,
force_reg (V16SImode, mask),
op1));
return true;
+ case V4SFmode:
+ if (!TARGET_AVX512VL)
+ return false;
+ emit_insn (gen_avx512vl_vpermi2varv4sf3 (target, op0,
+ force_reg (V4SImode, mask), op1));
+ return true;
+ case V8SFmode:
+ if (!TARGET_AVX512VL)
+ return false;
+ emit_insn (gen_avx512vl_vpermi2varv8sf3 (target, op0,
+ force_reg (V8SImode, mask), op1));
+ return true;
case V16SFmode:
emit_insn (gen_avx512f_vpermi2varv16sf3 (target, op0,
force_reg (V16SImode, mask),
op1));
return true;
+ case V2DImode:
+ if (!TARGET_AVX512VL)
+ return false;
+ emit_insn (gen_avx512vl_vpermi2varv2di3 (target, op0,
+ force_reg (V2DImode, mask), op1));
+ return true;
+ case V4DImode:
+ if (!TARGET_AVX512VL)
+ return false;
+ emit_insn (gen_avx512vl_vpermi2varv4di3 (target, op0,
+ force_reg (V4DImode, mask), op1));
+ return true;
case V8DImode:
emit_insn (gen_avx512f_vpermi2varv8di3 (target, op0,
force_reg (V8DImode, mask), op1));
return true;
+ case V2DFmode:
+ if (!TARGET_AVX512VL)
+ return false;
+ emit_insn (gen_avx512vl_vpermi2varv2df3 (target, op0,
+ force_reg (V2DImode, mask), op1));
+ return true;
+ case V4DFmode:
+ if (!TARGET_AVX512VL)
+ return false;
+ emit_insn (gen_avx512vl_vpermi2varv4df3 (target, op0,
+ force_reg (V4DImode, mask), op1));
+ return true;
case V8DFmode:
emit_insn (gen_avx512f_vpermi2varv8df3 (target, op0,
force_reg (V8DImode, mask), op1));
@@ -21792,6 +21932,15 @@ ix86_expand_sse_unpack (rtx dest, rtx src, bool unsigned_p, bool high_p)
switch (imode)
{
+ case V64QImode:
+ if (unsigned_p)
+ unpack = gen_avx512bw_zero_extendv32qiv32hi2;
+ else
+ unpack = gen_avx512bw_sign_extendv32qiv32hi2;
+ halfmode = V32QImode;
+ extract
+ = high_p ? gen_vec_extract_hi_v64qi : gen_vec_extract_lo_v64qi;
+ break;
case V32QImode:
if (unsigned_p)
unpack = gen_avx2_zero_extendv16qiv16hi2;
@@ -28482,6 +28631,999 @@ enum ix86_builtins
IX86_BUILTIN_KXOR16,
IX86_BUILTIN_KMOV16,
+ /* AVX512VL. */
+ IX86_BUILTIN_PMOVUSQD256_MEM,
+ IX86_BUILTIN_PMOVUSQD128_MEM,
+ IX86_BUILTIN_PMOVSQD256_MEM,
+ IX86_BUILTIN_PMOVSQD128_MEM,
+ IX86_BUILTIN_PMOVQD256_MEM,
+ IX86_BUILTIN_PMOVQD128_MEM,
+ IX86_BUILTIN_PMOVUSQW256_MEM,
+ IX86_BUILTIN_PMOVUSQW128_MEM,
+ IX86_BUILTIN_PMOVSQW256_MEM,
+ IX86_BUILTIN_PMOVSQW128_MEM,
+ IX86_BUILTIN_PMOVQW256_MEM,
+ IX86_BUILTIN_PMOVQW128_MEM,
+ IX86_BUILTIN_PMOVUSQB256_MEM,
+ IX86_BUILTIN_PMOVUSQB128_MEM,
+ IX86_BUILTIN_PMOVSQB256_MEM,
+ IX86_BUILTIN_PMOVSQB128_MEM,
+ IX86_BUILTIN_PMOVQB256_MEM,
+ IX86_BUILTIN_PMOVQB128_MEM,
+ IX86_BUILTIN_PMOVUSDW256_MEM,
+ IX86_BUILTIN_PMOVUSDW128_MEM,
+ IX86_BUILTIN_PMOVSDW256_MEM,
+ IX86_BUILTIN_PMOVSDW128_MEM,
+ IX86_BUILTIN_PMOVDW256_MEM,
+ IX86_BUILTIN_PMOVDW128_MEM,
+ IX86_BUILTIN_PMOVUSDB256_MEM,
+ IX86_BUILTIN_PMOVUSDB128_MEM,
+ IX86_BUILTIN_PMOVSDB256_MEM,
+ IX86_BUILTIN_PMOVSDB128_MEM,
+ IX86_BUILTIN_PMOVDB256_MEM,
+ IX86_BUILTIN_PMOVDB128_MEM,
+ IX86_BUILTIN_MOVDQA64LOAD256_MASK,
+ IX86_BUILTIN_MOVDQA64LOAD128_MASK,
+ IX86_BUILTIN_MOVDQA32LOAD256_MASK,
+ IX86_BUILTIN_MOVDQA32LOAD128_MASK,
+ IX86_BUILTIN_MOVDQA64STORE256_MASK,
+ IX86_BUILTIN_MOVDQA64STORE128_MASK,
+ IX86_BUILTIN_MOVDQA32STORE256_MASK,
+ IX86_BUILTIN_MOVDQA32STORE128_MASK,
+ IX86_BUILTIN_LOADAPD256_MASK,
+ IX86_BUILTIN_LOADAPD128_MASK,
+ IX86_BUILTIN_LOADAPS256_MASK,
+ IX86_BUILTIN_LOADAPS128_MASK,
+ IX86_BUILTIN_STOREAPD256_MASK,
+ IX86_BUILTIN_STOREAPD128_MASK,
+ IX86_BUILTIN_STOREAPS256_MASK,
+ IX86_BUILTIN_STOREAPS128_MASK,
+ IX86_BUILTIN_LOADUPD256_MASK,
+ IX86_BUILTIN_LOADUPD128_MASK,
+ IX86_BUILTIN_LOADUPS256_MASK,
+ IX86_BUILTIN_LOADUPS128_MASK,
+ IX86_BUILTIN_STOREUPD256_MASK,
+ IX86_BUILTIN_STOREUPD128_MASK,
+ IX86_BUILTIN_STOREUPS256_MASK,
+ IX86_BUILTIN_STOREUPS128_MASK,
+ IX86_BUILTIN_LOADDQUDI256_MASK,
+ IX86_BUILTIN_LOADDQUDI128_MASK,
+ IX86_BUILTIN_LOADDQUSI256_MASK,
+ IX86_BUILTIN_LOADDQUSI128_MASK,
+ IX86_BUILTIN_LOADDQUHI256_MASK,
+ IX86_BUILTIN_LOADDQUHI128_MASK,
+ IX86_BUILTIN_LOADDQUQI256_MASK,
+ IX86_BUILTIN_LOADDQUQI128_MASK,
+ IX86_BUILTIN_STOREDQUDI256_MASK,
+ IX86_BUILTIN_STOREDQUDI128_MASK,
+ IX86_BUILTIN_STOREDQUSI256_MASK,
+ IX86_BUILTIN_STOREDQUSI128_MASK,
+ IX86_BUILTIN_STOREDQUHI256_MASK,
+ IX86_BUILTIN_STOREDQUHI128_MASK,
+ IX86_BUILTIN_STOREDQUQI256_MASK,
+ IX86_BUILTIN_STOREDQUQI128_MASK,
+ IX86_BUILTIN_COMPRESSPDSTORE256,
+ IX86_BUILTIN_COMPRESSPDSTORE128,
+ IX86_BUILTIN_COMPRESSPSSTORE256,
+ IX86_BUILTIN_COMPRESSPSSTORE128,
+ IX86_BUILTIN_PCOMPRESSQSTORE256,
+ IX86_BUILTIN_PCOMPRESSQSTORE128,
+ IX86_BUILTIN_PCOMPRESSDSTORE256,
+ IX86_BUILTIN_PCOMPRESSDSTORE128,
+ IX86_BUILTIN_EXPANDPDLOAD256,
+ IX86_BUILTIN_EXPANDPDLOAD128,
+ IX86_BUILTIN_EXPANDPSLOAD256,
+ IX86_BUILTIN_EXPANDPSLOAD128,
+ IX86_BUILTIN_PEXPANDQLOAD256,
+ IX86_BUILTIN_PEXPANDQLOAD128,
+ IX86_BUILTIN_PEXPANDDLOAD256,
+ IX86_BUILTIN_PEXPANDDLOAD128,
+ IX86_BUILTIN_EXPANDPDLOAD256Z,
+ IX86_BUILTIN_EXPANDPDLOAD128Z,
+ IX86_BUILTIN_EXPANDPSLOAD256Z,
+ IX86_BUILTIN_EXPANDPSLOAD128Z,
+ IX86_BUILTIN_PEXPANDQLOAD256Z,
+ IX86_BUILTIN_PEXPANDQLOAD128Z,
+ IX86_BUILTIN_PEXPANDDLOAD256Z,
+ IX86_BUILTIN_PEXPANDDLOAD128Z,
+ IX86_BUILTIN_PALIGNR256_MASK,
+ IX86_BUILTIN_PALIGNR128_MASK,
+ IX86_BUILTIN_MOVDQA64_256_MASK,
+ IX86_BUILTIN_MOVDQA64_128_MASK,
+ IX86_BUILTIN_MOVDQA32_256_MASK,
+ IX86_BUILTIN_MOVDQA32_128_MASK,
+ IX86_BUILTIN_MOVAPD256_MASK,
+ IX86_BUILTIN_MOVAPD128_MASK,
+ IX86_BUILTIN_MOVAPS256_MASK,
+ IX86_BUILTIN_MOVAPS128_MASK,
+ IX86_BUILTIN_MOVDQUHI256_MASK,
+ IX86_BUILTIN_MOVDQUHI128_MASK,
+ IX86_BUILTIN_MOVDQUQI256_MASK,
+ IX86_BUILTIN_MOVDQUQI128_MASK,
+ IX86_BUILTIN_MINPS128_MASK,
+ IX86_BUILTIN_MAXPS128_MASK,
+ IX86_BUILTIN_MINPD128_MASK,
+ IX86_BUILTIN_MAXPD128_MASK,
+ IX86_BUILTIN_MAXPD256_MASK,
+ IX86_BUILTIN_MAXPS256_MASK,
+ IX86_BUILTIN_MINPD256_MASK,
+ IX86_BUILTIN_MINPS256_MASK,
+ IX86_BUILTIN_MULPS128_MASK,
+ IX86_BUILTIN_DIVPS128_MASK,
+ IX86_BUILTIN_MULPD128_MASK,
+ IX86_BUILTIN_DIVPD128_MASK,
+ IX86_BUILTIN_DIVPD256_MASK,
+ IX86_BUILTIN_DIVPS256_MASK,
+ IX86_BUILTIN_MULPD256_MASK,
+ IX86_BUILTIN_MULPS256_MASK,
+ IX86_BUILTIN_ADDPD128_MASK,
+ IX86_BUILTIN_ADDPD256_MASK,
+ IX86_BUILTIN_ADDPS128_MASK,
+ IX86_BUILTIN_ADDPS256_MASK,
+ IX86_BUILTIN_SUBPD128_MASK,
+ IX86_BUILTIN_SUBPD256_MASK,
+ IX86_BUILTIN_SUBPS128_MASK,
+ IX86_BUILTIN_SUBPS256_MASK,
+ IX86_BUILTIN_XORPD256_MASK,
+ IX86_BUILTIN_XORPD128_MASK,
+ IX86_BUILTIN_XORPS256_MASK,
+ IX86_BUILTIN_XORPS128_MASK,
+ IX86_BUILTIN_ORPD256_MASK,
+ IX86_BUILTIN_ORPD128_MASK,
+ IX86_BUILTIN_ORPS256_MASK,
+ IX86_BUILTIN_ORPS128_MASK,
+ IX86_BUILTIN_BROADCASTF32x2_256,
+ IX86_BUILTIN_BROADCASTI32x2_256,
+ IX86_BUILTIN_BROADCASTI32x2_128,
+ IX86_BUILTIN_BROADCASTF64X2_256,
+ IX86_BUILTIN_BROADCASTI64X2_256,
+ IX86_BUILTIN_BROADCASTF32X4_256,
+ IX86_BUILTIN_BROADCASTI32X4_256,
+ IX86_BUILTIN_EXTRACTF32X4_256,
+ IX86_BUILTIN_EXTRACTI32X4_256,
+ IX86_BUILTIN_DBPSADBW256,
+ IX86_BUILTIN_DBPSADBW128,
+ IX86_BUILTIN_CVTTPD2QQ256,
+ IX86_BUILTIN_CVTTPD2QQ128,
+ IX86_BUILTIN_CVTTPD2UQQ256,
+ IX86_BUILTIN_CVTTPD2UQQ128,
+ IX86_BUILTIN_CVTPD2QQ256,
+ IX86_BUILTIN_CVTPD2QQ128,
+ IX86_BUILTIN_CVTPD2UQQ256,
+ IX86_BUILTIN_CVTPD2UQQ128,
+ IX86_BUILTIN_CVTPD2UDQ256_MASK,
+ IX86_BUILTIN_CVTPD2UDQ128_MASK,
+ IX86_BUILTIN_CVTTPS2QQ256,
+ IX86_BUILTIN_CVTTPS2QQ128,
+ IX86_BUILTIN_CVTTPS2UQQ256,
+ IX86_BUILTIN_CVTTPS2UQQ128,
+ IX86_BUILTIN_CVTTPS2DQ256_MASK,
+ IX86_BUILTIN_CVTTPS2DQ128_MASK,
+ IX86_BUILTIN_CVTTPS2UDQ256,
+ IX86_BUILTIN_CVTTPS2UDQ128,
+ IX86_BUILTIN_CVTTPD2DQ256_MASK,
+ IX86_BUILTIN_CVTTPD2DQ128_MASK,
+ IX86_BUILTIN_CVTTPD2UDQ256_MASK,
+ IX86_BUILTIN_CVTTPD2UDQ128_MASK,
+ IX86_BUILTIN_CVTPD2DQ256_MASK,
+ IX86_BUILTIN_CVTPD2DQ128_MASK,
+ IX86_BUILTIN_CVTDQ2PD256_MASK,
+ IX86_BUILTIN_CVTDQ2PD128_MASK,
+ IX86_BUILTIN_CVTUDQ2PD256_MASK,
+ IX86_BUILTIN_CVTUDQ2PD128_MASK,
+ IX86_BUILTIN_CVTDQ2PS256_MASK,
+ IX86_BUILTIN_CVTDQ2PS128_MASK,
+ IX86_BUILTIN_CVTUDQ2PS256_MASK,
+ IX86_BUILTIN_CVTUDQ2PS128_MASK,
+ IX86_BUILTIN_CVTPS2PD256_MASK,
+ IX86_BUILTIN_CVTPS2PD128_MASK,
+ IX86_BUILTIN_PBROADCASTB256_MASK,
+ IX86_BUILTIN_PBROADCASTB256_GPR_MASK,
+ IX86_BUILTIN_PBROADCASTB128_MASK,
+ IX86_BUILTIN_PBROADCASTB128_GPR_MASK,
+ IX86_BUILTIN_PBROADCASTW256_MASK,
+ IX86_BUILTIN_PBROADCASTW256_GPR_MASK,
+ IX86_BUILTIN_PBROADCASTW128_MASK,
+ IX86_BUILTIN_PBROADCASTW128_GPR_MASK,
+ IX86_BUILTIN_PBROADCASTD256_MASK,
+ IX86_BUILTIN_PBROADCASTD256_GPR_MASK,
+ IX86_BUILTIN_PBROADCASTD128_MASK,
+ IX86_BUILTIN_PBROADCASTD128_GPR_MASK,
+ IX86_BUILTIN_PBROADCASTQ256_MASK,
+ IX86_BUILTIN_PBROADCASTQ256_GPR_MASK,
+ IX86_BUILTIN_PBROADCASTQ256_MEM_MASK,
+ IX86_BUILTIN_PBROADCASTQ128_MASK,
+ IX86_BUILTIN_PBROADCASTQ128_GPR_MASK,
+ IX86_BUILTIN_PBROADCASTQ128_MEM_MASK,
+ IX86_BUILTIN_BROADCASTSS256,
+ IX86_BUILTIN_BROADCASTSS128,
+ IX86_BUILTIN_BROADCASTSD256,
+ IX86_BUILTIN_EXTRACTF64X2_256,
+ IX86_BUILTIN_EXTRACTI64X2_256,
+ IX86_BUILTIN_INSERTF32X4_256,
+ IX86_BUILTIN_INSERTI32X4_256,
+ IX86_BUILTIN_PMOVSXBW256_MASK,
+ IX86_BUILTIN_PMOVSXBW128_MASK,
+ IX86_BUILTIN_PMOVSXBD256_MASK,
+ IX86_BUILTIN_PMOVSXBD128_MASK,
+ IX86_BUILTIN_PMOVSXBQ256_MASK,
+ IX86_BUILTIN_PMOVSXBQ128_MASK,
+ IX86_BUILTIN_PMOVSXWD256_MASK,
+ IX86_BUILTIN_PMOVSXWD128_MASK,
+ IX86_BUILTIN_PMOVSXWQ256_MASK,
+ IX86_BUILTIN_PMOVSXWQ128_MASK,
+ IX86_BUILTIN_PMOVSXDQ256_MASK,
+ IX86_BUILTIN_PMOVSXDQ128_MASK,
+ IX86_BUILTIN_PMOVZXBW256_MASK,
+ IX86_BUILTIN_PMOVZXBW128_MASK,
+ IX86_BUILTIN_PMOVZXBD256_MASK,
+ IX86_BUILTIN_PMOVZXBD128_MASK,
+ IX86_BUILTIN_PMOVZXBQ256_MASK,
+ IX86_BUILTIN_PMOVZXBQ128_MASK,
+ IX86_BUILTIN_PMOVZXWD256_MASK,
+ IX86_BUILTIN_PMOVZXWD128_MASK,
+ IX86_BUILTIN_PMOVZXWQ256_MASK,
+ IX86_BUILTIN_PMOVZXWQ128_MASK,
+ IX86_BUILTIN_PMOVZXDQ256_MASK,
+ IX86_BUILTIN_PMOVZXDQ128_MASK,
+ IX86_BUILTIN_REDUCEPD256_MASK,
+ IX86_BUILTIN_REDUCEPD128_MASK,
+ IX86_BUILTIN_REDUCEPS256_MASK,
+ IX86_BUILTIN_REDUCEPS128_MASK,
+ IX86_BUILTIN_REDUCESD_MASK,
+ IX86_BUILTIN_REDUCESS_MASK,
+ IX86_BUILTIN_VPERMVARHI256_MASK,
+ IX86_BUILTIN_VPERMVARHI128_MASK,
+ IX86_BUILTIN_VPERMT2VARHI256,
+ IX86_BUILTIN_VPERMT2VARHI256_MASKZ,
+ IX86_BUILTIN_VPERMT2VARHI128,
+ IX86_BUILTIN_VPERMT2VARHI128_MASKZ,
+ IX86_BUILTIN_VPERMI2VARHI256,
+ IX86_BUILTIN_VPERMI2VARHI128,
+ IX86_BUILTIN_RCP14PD256,
+ IX86_BUILTIN_RCP14PD128,
+ IX86_BUILTIN_RCP14PS256,
+ IX86_BUILTIN_RCP14PS128,
+ IX86_BUILTIN_RSQRT14PD256_MASK,
+ IX86_BUILTIN_RSQRT14PD128_MASK,
+ IX86_BUILTIN_RSQRT14PS256_MASK,
+ IX86_BUILTIN_RSQRT14PS128_MASK,
+ IX86_BUILTIN_SQRTPD256_MASK,
+ IX86_BUILTIN_SQRTPD128_MASK,
+ IX86_BUILTIN_SQRTPS256_MASK,
+ IX86_BUILTIN_SQRTPS128_MASK,
+ IX86_BUILTIN_PADDB128_MASK,
+ IX86_BUILTIN_PADDW128_MASK,
+ IX86_BUILTIN_PADDD128_MASK,
+ IX86_BUILTIN_PADDQ128_MASK,
+ IX86_BUILTIN_PSUBB128_MASK,
+ IX86_BUILTIN_PSUBW128_MASK,
+ IX86_BUILTIN_PSUBD128_MASK,
+ IX86_BUILTIN_PSUBQ128_MASK,
+ IX86_BUILTIN_PADDSB128_MASK,
+ IX86_BUILTIN_PADDSW128_MASK,
+ IX86_BUILTIN_PSUBSB128_MASK,
+ IX86_BUILTIN_PSUBSW128_MASK,
+ IX86_BUILTIN_PADDUSB128_MASK,
+ IX86_BUILTIN_PADDUSW128_MASK,
+ IX86_BUILTIN_PSUBUSB128_MASK,
+ IX86_BUILTIN_PSUBUSW128_MASK,
+ IX86_BUILTIN_PADDB256_MASK,
+ IX86_BUILTIN_PADDW256_MASK,
+ IX86_BUILTIN_PADDD256_MASK,
+ IX86_BUILTIN_PADDQ256_MASK,
+ IX86_BUILTIN_PADDSB256_MASK,
+ IX86_BUILTIN_PADDSW256_MASK,
+ IX86_BUILTIN_PADDUSB256_MASK,
+ IX86_BUILTIN_PADDUSW256_MASK,
+ IX86_BUILTIN_PSUBB256_MASK,
+ IX86_BUILTIN_PSUBW256_MASK,
+ IX86_BUILTIN_PSUBD256_MASK,
+ IX86_BUILTIN_PSUBQ256_MASK,
+ IX86_BUILTIN_PSUBSB256_MASK,
+ IX86_BUILTIN_PSUBSW256_MASK,
+ IX86_BUILTIN_PSUBUSB256_MASK,
+ IX86_BUILTIN_PSUBUSW256_MASK,
+ IX86_BUILTIN_SHUF_F64x2_256,
+ IX86_BUILTIN_SHUF_I64x2_256,
+ IX86_BUILTIN_SHUF_I32x4_256,
+ IX86_BUILTIN_SHUF_F32x4_256,
+ IX86_BUILTIN_PMOVWB128,
+ IX86_BUILTIN_PMOVWB256,
+ IX86_BUILTIN_PMOVSWB128,
+ IX86_BUILTIN_PMOVSWB256,
+ IX86_BUILTIN_PMOVUSWB128,
+ IX86_BUILTIN_PMOVUSWB256,
+ IX86_BUILTIN_PMOVDB128,
+ IX86_BUILTIN_PMOVDB256,
+ IX86_BUILTIN_PMOVSDB128,
+ IX86_BUILTIN_PMOVSDB256,
+ IX86_BUILTIN_PMOVUSDB128,
+ IX86_BUILTIN_PMOVUSDB256,
+ IX86_BUILTIN_PMOVDW128,
+ IX86_BUILTIN_PMOVDW256,
+ IX86_BUILTIN_PMOVSDW128,
+ IX86_BUILTIN_PMOVSDW256,
+ IX86_BUILTIN_PMOVUSDW128,
+ IX86_BUILTIN_PMOVUSDW256,
+ IX86_BUILTIN_PMOVQB128,
+ IX86_BUILTIN_PMOVQB256,
+ IX86_BUILTIN_PMOVSQB128,
+ IX86_BUILTIN_PMOVSQB256,
+ IX86_BUILTIN_PMOVUSQB128,
+ IX86_BUILTIN_PMOVUSQB256,
+ IX86_BUILTIN_PMOVQW128,
+ IX86_BUILTIN_PMOVQW256,
+ IX86_BUILTIN_PMOVSQW128,
+ IX86_BUILTIN_PMOVSQW256,
+ IX86_BUILTIN_PMOVUSQW128,
+ IX86_BUILTIN_PMOVUSQW256,
+ IX86_BUILTIN_PMOVQD128,
+ IX86_BUILTIN_PMOVQD256,
+ IX86_BUILTIN_PMOVSQD128,
+ IX86_BUILTIN_PMOVSQD256,
+ IX86_BUILTIN_PMOVUSQD128,
+ IX86_BUILTIN_PMOVUSQD256,
+ IX86_BUILTIN_RANGEPD256,
+ IX86_BUILTIN_RANGEPD128,
+ IX86_BUILTIN_RANGEPS256,
+ IX86_BUILTIN_RANGEPS128,
+ IX86_BUILTIN_GETEXPPS256,
+ IX86_BUILTIN_GETEXPPD256,
+ IX86_BUILTIN_GETEXPPS128,
+ IX86_BUILTIN_GETEXPPD128,
+ IX86_BUILTIN_FIXUPIMMPD256,
+ IX86_BUILTIN_FIXUPIMMPD256_MASK,
+ IX86_BUILTIN_FIXUPIMMPD256_MASKZ,
+ IX86_BUILTIN_FIXUPIMMPS256,
+ IX86_BUILTIN_FIXUPIMMPS256_MASK,
+ IX86_BUILTIN_FIXUPIMMPS256_MASKZ,
+ IX86_BUILTIN_FIXUPIMMPD128,
+ IX86_BUILTIN_FIXUPIMMPD128_MASK,
+ IX86_BUILTIN_FIXUPIMMPD128_MASKZ,
+ IX86_BUILTIN_FIXUPIMMPS128,
+ IX86_BUILTIN_FIXUPIMMPS128_MASK,
+ IX86_BUILTIN_FIXUPIMMPS128_MASKZ,
+ IX86_BUILTIN_PABSQ256,
+ IX86_BUILTIN_PABSQ128,
+ IX86_BUILTIN_PABSD256_MASK,
+ IX86_BUILTIN_PABSD128_MASK,
+ IX86_BUILTIN_PMULHRSW256_MASK,
+ IX86_BUILTIN_PMULHRSW128_MASK,
+ IX86_BUILTIN_PMULHUW128_MASK,
+ IX86_BUILTIN_PMULHUW256_MASK,
+ IX86_BUILTIN_PMULHW256_MASK,
+ IX86_BUILTIN_PMULHW128_MASK,
+ IX86_BUILTIN_PMULLW256_MASK,
+ IX86_BUILTIN_PMULLW128_MASK,
+ IX86_BUILTIN_PMULLQ256,
+ IX86_BUILTIN_PMULLQ128,
+ IX86_BUILTIN_ANDPD256_MASK,
+ IX86_BUILTIN_ANDPD128_MASK,
+ IX86_BUILTIN_ANDPS256_MASK,
+ IX86_BUILTIN_ANDPS128_MASK,
+ IX86_BUILTIN_ANDNPD256_MASK,
+ IX86_BUILTIN_ANDNPD128_MASK,
+ IX86_BUILTIN_ANDNPS256_MASK,
+ IX86_BUILTIN_ANDNPS128_MASK,
+ IX86_BUILTIN_PSLLWI128_MASK,
+ IX86_BUILTIN_PSLLDI128_MASK,
+ IX86_BUILTIN_PSLLQI128_MASK,
+ IX86_BUILTIN_PSLLW128_MASK,
+ IX86_BUILTIN_PSLLD128_MASK,
+ IX86_BUILTIN_PSLLQ128_MASK,
+ IX86_BUILTIN_PSLLWI256_MASK ,
+ IX86_BUILTIN_PSLLW256_MASK,
+ IX86_BUILTIN_PSLLDI256_MASK,
+ IX86_BUILTIN_PSLLD256_MASK,
+ IX86_BUILTIN_PSLLQI256_MASK,
+ IX86_BUILTIN_PSLLQ256_MASK,
+ IX86_BUILTIN_PSRADI128_MASK,
+ IX86_BUILTIN_PSRAD128_MASK,
+ IX86_BUILTIN_PSRADI256_MASK,
+ IX86_BUILTIN_PSRAD256_MASK,
+ IX86_BUILTIN_PSRAQI128_MASK,
+ IX86_BUILTIN_PSRAQ128_MASK,
+ IX86_BUILTIN_PSRAQI256_MASK,
+ IX86_BUILTIN_PSRAQ256_MASK,
+ IX86_BUILTIN_PANDD256,
+ IX86_BUILTIN_PANDD128,
+ IX86_BUILTIN_PSRLDI128_MASK,
+ IX86_BUILTIN_PSRLD128_MASK,
+ IX86_BUILTIN_PSRLDI256_MASK,
+ IX86_BUILTIN_PSRLD256_MASK,
+ IX86_BUILTIN_PSRLQI128_MASK,
+ IX86_BUILTIN_PSRLQ128_MASK,
+ IX86_BUILTIN_PSRLQI256_MASK,
+ IX86_BUILTIN_PSRLQ256_MASK,
+ IX86_BUILTIN_PANDQ256,
+ IX86_BUILTIN_PANDQ128,
+ IX86_BUILTIN_PANDND256,
+ IX86_BUILTIN_PANDND128,
+ IX86_BUILTIN_PANDNQ256,
+ IX86_BUILTIN_PANDNQ128,
+ IX86_BUILTIN_PORD256,
+ IX86_BUILTIN_PORD128,
+ IX86_BUILTIN_PORQ256,
+ IX86_BUILTIN_PORQ128,
+ IX86_BUILTIN_PXORD256,
+ IX86_BUILTIN_PXORD128,
+ IX86_BUILTIN_PXORQ256,
+ IX86_BUILTIN_PXORQ128,
+ IX86_BUILTIN_PACKSSWB256_MASK,
+ IX86_BUILTIN_PACKSSWB128_MASK,
+ IX86_BUILTIN_PACKUSWB256_MASK,
+ IX86_BUILTIN_PACKUSWB128_MASK,
+ IX86_BUILTIN_RNDSCALEPS256,
+ IX86_BUILTIN_RNDSCALEPD256,
+ IX86_BUILTIN_RNDSCALEPS128,
+ IX86_BUILTIN_RNDSCALEPD128,
+ IX86_BUILTIN_VTERNLOGQ256_MASK,
+ IX86_BUILTIN_VTERNLOGQ256_MASKZ,
+ IX86_BUILTIN_VTERNLOGD256_MASK,
+ IX86_BUILTIN_VTERNLOGD256_MASKZ,
+ IX86_BUILTIN_VTERNLOGQ128_MASK,
+ IX86_BUILTIN_VTERNLOGQ128_MASKZ,
+ IX86_BUILTIN_VTERNLOGD128_MASK,
+ IX86_BUILTIN_VTERNLOGD128_MASKZ,
+ IX86_BUILTIN_SCALEFPD256,
+ IX86_BUILTIN_SCALEFPS256,
+ IX86_BUILTIN_SCALEFPD128,
+ IX86_BUILTIN_SCALEFPS128,
+ IX86_BUILTIN_VFMADDPD256_MASK,
+ IX86_BUILTIN_VFMADDPD256_MASK3,
+ IX86_BUILTIN_VFMADDPD256_MASKZ,
+ IX86_BUILTIN_VFMADDPD128_MASK,
+ IX86_BUILTIN_VFMADDPD128_MASK3,
+ IX86_BUILTIN_VFMADDPD128_MASKZ,
+ IX86_BUILTIN_VFMADDPS256_MASK,
+ IX86_BUILTIN_VFMADDPS256_MASK3,
+ IX86_BUILTIN_VFMADDPS256_MASKZ,
+ IX86_BUILTIN_VFMADDPS128_MASK,
+ IX86_BUILTIN_VFMADDPS128_MASK3,
+ IX86_BUILTIN_VFMADDPS128_MASKZ,
+ IX86_BUILTIN_VFMSUBPD256_MASK3,
+ IX86_BUILTIN_VFMSUBPD128_MASK3,
+ IX86_BUILTIN_VFMSUBPS256_MASK3,
+ IX86_BUILTIN_VFMSUBPS128_MASK3,
+ IX86_BUILTIN_VFNMADDPD256_MASK,
+ IX86_BUILTIN_VFNMADDPD128_MASK,
+ IX86_BUILTIN_VFNMADDPS256_MASK,
+ IX86_BUILTIN_VFNMADDPS128_MASK,
+ IX86_BUILTIN_VFNMSUBPD256_MASK,
+ IX86_BUILTIN_VFNMSUBPD256_MASK3,
+ IX86_BUILTIN_VFNMSUBPD128_MASK,
+ IX86_BUILTIN_VFNMSUBPD128_MASK3,
+ IX86_BUILTIN_VFNMSUBPS256_MASK,
+ IX86_BUILTIN_VFNMSUBPS256_MASK3,
+ IX86_BUILTIN_VFNMSUBPS128_MASK,
+ IX86_BUILTIN_VFNMSUBPS128_MASK3,
+ IX86_BUILTIN_VFMADDSUBPD256_MASK,
+ IX86_BUILTIN_VFMADDSUBPD256_MASK3,
+ IX86_BUILTIN_VFMADDSUBPD256_MASKZ,
+ IX86_BUILTIN_VFMADDSUBPD128_MASK,
+ IX86_BUILTIN_VFMADDSUBPD128_MASK3,
+ IX86_BUILTIN_VFMADDSUBPD128_MASKZ,
+ IX86_BUILTIN_VFMADDSUBPS256_MASK,
+ IX86_BUILTIN_VFMADDSUBPS256_MASK3,
+ IX86_BUILTIN_VFMADDSUBPS256_MASKZ,
+ IX86_BUILTIN_VFMADDSUBPS128_MASK,
+ IX86_BUILTIN_VFMADDSUBPS128_MASK3,
+ IX86_BUILTIN_VFMADDSUBPS128_MASKZ,
+ IX86_BUILTIN_VFMSUBADDPD256_MASK3,
+ IX86_BUILTIN_VFMSUBADDPD128_MASK3,
+ IX86_BUILTIN_VFMSUBADDPS256_MASK3,
+ IX86_BUILTIN_VFMSUBADDPS128_MASK3,
+ IX86_BUILTIN_INSERTF64X2_256,
+ IX86_BUILTIN_INSERTI64X2_256,
+ IX86_BUILTIN_PSRAVV16HI,
+ IX86_BUILTIN_PSRAVV8HI,
+ IX86_BUILTIN_PMADDUBSW256_MASK,
+ IX86_BUILTIN_PMADDUBSW128_MASK,
+ IX86_BUILTIN_PMADDWD256_MASK,
+ IX86_BUILTIN_PMADDWD128_MASK,
+ IX86_BUILTIN_PSRLVV16HI,
+ IX86_BUILTIN_PSRLVV8HI,
+ IX86_BUILTIN_CVTPS2DQ256_MASK,
+ IX86_BUILTIN_CVTPS2DQ128_MASK,
+ IX86_BUILTIN_CVTPS2UDQ256,
+ IX86_BUILTIN_CVTPS2UDQ128,
+ IX86_BUILTIN_CVTPS2QQ256,
+ IX86_BUILTIN_CVTPS2QQ128,
+ IX86_BUILTIN_CVTPS2UQQ256,
+ IX86_BUILTIN_CVTPS2UQQ128,
+ IX86_BUILTIN_GETMANTPS256,
+ IX86_BUILTIN_GETMANTPS128,
+ IX86_BUILTIN_GETMANTPD256,
+ IX86_BUILTIN_GETMANTPD128,
+ IX86_BUILTIN_MOVDDUP256_MASK,
+ IX86_BUILTIN_MOVDDUP128_MASK,
+ IX86_BUILTIN_MOVSHDUP256_MASK,
+ IX86_BUILTIN_MOVSHDUP128_MASK,
+ IX86_BUILTIN_MOVSLDUP256_MASK,
+ IX86_BUILTIN_MOVSLDUP128_MASK,
+ IX86_BUILTIN_CVTQQ2PS256,
+ IX86_BUILTIN_CVTQQ2PS128,
+ IX86_BUILTIN_CVTUQQ2PS256,
+ IX86_BUILTIN_CVTUQQ2PS128,
+ IX86_BUILTIN_CVTQQ2PD256,
+ IX86_BUILTIN_CVTQQ2PD128,
+ IX86_BUILTIN_CVTUQQ2PD256,
+ IX86_BUILTIN_CVTUQQ2PD128,
+ IX86_BUILTIN_VPERMT2VARQ256,
+ IX86_BUILTIN_VPERMT2VARQ256_MASKZ,
+ IX86_BUILTIN_VPERMT2VARD256,
+ IX86_BUILTIN_VPERMT2VARD256_MASKZ,
+ IX86_BUILTIN_VPERMI2VARQ256,
+ IX86_BUILTIN_VPERMI2VARD256,
+ IX86_BUILTIN_VPERMT2VARPD256,
+ IX86_BUILTIN_VPERMT2VARPD256_MASKZ,
+ IX86_BUILTIN_VPERMT2VARPS256,
+ IX86_BUILTIN_VPERMT2VARPS256_MASKZ,
+ IX86_BUILTIN_VPERMI2VARPD256,
+ IX86_BUILTIN_VPERMI2VARPS256,
+ IX86_BUILTIN_VPERMT2VARQ128,
+ IX86_BUILTIN_VPERMT2VARQ128_MASKZ,
+ IX86_BUILTIN_VPERMT2VARD128,
+ IX86_BUILTIN_VPERMT2VARD128_MASKZ,
+ IX86_BUILTIN_VPERMI2VARQ128,
+ IX86_BUILTIN_VPERMI2VARD128,
+ IX86_BUILTIN_VPERMT2VARPD128,
+ IX86_BUILTIN_VPERMT2VARPD128_MASKZ,
+ IX86_BUILTIN_VPERMT2VARPS128,
+ IX86_BUILTIN_VPERMT2VARPS128_MASKZ,
+ IX86_BUILTIN_VPERMI2VARPD128,
+ IX86_BUILTIN_VPERMI2VARPS128,
+ IX86_BUILTIN_PSHUFB256_MASK,
+ IX86_BUILTIN_PSHUFB128_MASK,
+ IX86_BUILTIN_PSHUFHW256_MASK,
+ IX86_BUILTIN_PSHUFHW128_MASK,
+ IX86_BUILTIN_PSHUFLW256_MASK,
+ IX86_BUILTIN_PSHUFLW128_MASK,
+ IX86_BUILTIN_PSHUFD256_MASK,
+ IX86_BUILTIN_PSHUFD128_MASK,
+ IX86_BUILTIN_SHUFPD256_MASK,
+ IX86_BUILTIN_SHUFPD128_MASK,
+ IX86_BUILTIN_SHUFPS256_MASK,
+ IX86_BUILTIN_SHUFPS128_MASK,
+ IX86_BUILTIN_PROLVQ256,
+ IX86_BUILTIN_PROLVQ128,
+ IX86_BUILTIN_PROLQ256,
+ IX86_BUILTIN_PROLQ128,
+ IX86_BUILTIN_PRORVQ256,
+ IX86_BUILTIN_PRORVQ128,
+ IX86_BUILTIN_PRORQ256,
+ IX86_BUILTIN_PRORQ128,
+ IX86_BUILTIN_PSRAVQ128,
+ IX86_BUILTIN_PSRAVQ256,
+ IX86_BUILTIN_PSLLVV4DI_MASK,
+ IX86_BUILTIN_PSLLVV2DI_MASK,
+ IX86_BUILTIN_PSLLVV8SI_MASK,
+ IX86_BUILTIN_PSLLVV4SI_MASK,
+ IX86_BUILTIN_PSRAVV8SI_MASK,
+ IX86_BUILTIN_PSRAVV4SI_MASK,
+ IX86_BUILTIN_PSRLVV4DI_MASK,
+ IX86_BUILTIN_PSRLVV2DI_MASK,
+ IX86_BUILTIN_PSRLVV8SI_MASK,
+ IX86_BUILTIN_PSRLVV4SI_MASK,
+ IX86_BUILTIN_PSRAWI256_MASK,
+ IX86_BUILTIN_PSRAW256_MASK,
+ IX86_BUILTIN_PSRAWI128_MASK,
+ IX86_BUILTIN_PSRAW128_MASK,
+ IX86_BUILTIN_PSRLWI256_MASK,
+ IX86_BUILTIN_PSRLW256_MASK,
+ IX86_BUILTIN_PSRLWI128_MASK,
+ IX86_BUILTIN_PSRLW128_MASK,
+ IX86_BUILTIN_PRORVD256,
+ IX86_BUILTIN_PROLVD256,
+ IX86_BUILTIN_PRORD256,
+ IX86_BUILTIN_PROLD256,
+ IX86_BUILTIN_PRORVD128,
+ IX86_BUILTIN_PROLVD128,
+ IX86_BUILTIN_PRORD128,
+ IX86_BUILTIN_PROLD128,
+ IX86_BUILTIN_FPCLASSPD256,
+ IX86_BUILTIN_FPCLASSPD128,
+ IX86_BUILTIN_FPCLASSSD,
+ IX86_BUILTIN_FPCLASSPS256,
+ IX86_BUILTIN_FPCLASSPS128,
+ IX86_BUILTIN_FPCLASSSS,
+ IX86_BUILTIN_CVTB2MASK128,
+ IX86_BUILTIN_CVTB2MASK256,
+ IX86_BUILTIN_CVTW2MASK128,
+ IX86_BUILTIN_CVTW2MASK256,
+ IX86_BUILTIN_CVTD2MASK128,
+ IX86_BUILTIN_CVTD2MASK256,
+ IX86_BUILTIN_CVTQ2MASK128,
+ IX86_BUILTIN_CVTQ2MASK256,
+ IX86_BUILTIN_CVTMASK2B128,
+ IX86_BUILTIN_CVTMASK2B256,
+ IX86_BUILTIN_CVTMASK2W128,
+ IX86_BUILTIN_CVTMASK2W256,
+ IX86_BUILTIN_CVTMASK2D128,
+ IX86_BUILTIN_CVTMASK2D256,
+ IX86_BUILTIN_CVTMASK2Q128,
+ IX86_BUILTIN_CVTMASK2Q256,
+ IX86_BUILTIN_PCMPEQB128_MASK,
+ IX86_BUILTIN_PCMPEQB256_MASK,
+ IX86_BUILTIN_PCMPEQW128_MASK,
+ IX86_BUILTIN_PCMPEQW256_MASK,
+ IX86_BUILTIN_PCMPEQD128_MASK,
+ IX86_BUILTIN_PCMPEQD256_MASK,
+ IX86_BUILTIN_PCMPEQQ128_MASK,
+ IX86_BUILTIN_PCMPEQQ256_MASK,
+ IX86_BUILTIN_PCMPGTB128_MASK,
+ IX86_BUILTIN_PCMPGTB256_MASK,
+ IX86_BUILTIN_PCMPGTW128_MASK,
+ IX86_BUILTIN_PCMPGTW256_MASK,
+ IX86_BUILTIN_PCMPGTD128_MASK,
+ IX86_BUILTIN_PCMPGTD256_MASK,
+ IX86_BUILTIN_PCMPGTQ128_MASK,
+ IX86_BUILTIN_PCMPGTQ256_MASK,
+ IX86_BUILTIN_PTESTMB128,
+ IX86_BUILTIN_PTESTMB256,
+ IX86_BUILTIN_PTESTMW128,
+ IX86_BUILTIN_PTESTMW256,
+ IX86_BUILTIN_PTESTMD128,
+ IX86_BUILTIN_PTESTMD256,
+ IX86_BUILTIN_PTESTMQ128,
+ IX86_BUILTIN_PTESTMQ256,
+ IX86_BUILTIN_PTESTNMB128,
+ IX86_BUILTIN_PTESTNMB256,
+ IX86_BUILTIN_PTESTNMW128,
+ IX86_BUILTIN_PTESTNMW256,
+ IX86_BUILTIN_PTESTNMD128,
+ IX86_BUILTIN_PTESTNMD256,
+ IX86_BUILTIN_PTESTNMQ128,
+ IX86_BUILTIN_PTESTNMQ256,
+ IX86_BUILTIN_PBROADCASTMB128,
+ IX86_BUILTIN_PBROADCASTMB256,
+ IX86_BUILTIN_PBROADCASTMW128,
+ IX86_BUILTIN_PBROADCASTMW256,
+ IX86_BUILTIN_COMPRESSPD256,
+ IX86_BUILTIN_COMPRESSPD128,
+ IX86_BUILTIN_COMPRESSPS256,
+ IX86_BUILTIN_COMPRESSPS128,
+ IX86_BUILTIN_PCOMPRESSQ256,
+ IX86_BUILTIN_PCOMPRESSQ128,
+ IX86_BUILTIN_PCOMPRESSD256,
+ IX86_BUILTIN_PCOMPRESSD128,
+ IX86_BUILTIN_EXPANDPD256,
+ IX86_BUILTIN_EXPANDPD128,
+ IX86_BUILTIN_EXPANDPS256,
+ IX86_BUILTIN_EXPANDPS128,
+ IX86_BUILTIN_PEXPANDQ256,
+ IX86_BUILTIN_PEXPANDQ128,
+ IX86_BUILTIN_PEXPANDD256,
+ IX86_BUILTIN_PEXPANDD128,
+ IX86_BUILTIN_EXPANDPD256Z,
+ IX86_BUILTIN_EXPANDPD128Z,
+ IX86_BUILTIN_EXPANDPS256Z,
+ IX86_BUILTIN_EXPANDPS128Z,
+ IX86_BUILTIN_PEXPANDQ256Z,
+ IX86_BUILTIN_PEXPANDQ128Z,
+ IX86_BUILTIN_PEXPANDD256Z,
+ IX86_BUILTIN_PEXPANDD128Z,
+ IX86_BUILTIN_PMAXSD256_MASK,
+ IX86_BUILTIN_PMINSD256_MASK,
+ IX86_BUILTIN_PMAXUD256_MASK,
+ IX86_BUILTIN_PMINUD256_MASK,
+ IX86_BUILTIN_PMAXSD128_MASK,
+ IX86_BUILTIN_PMINSD128_MASK,
+ IX86_BUILTIN_PMAXUD128_MASK,
+ IX86_BUILTIN_PMINUD128_MASK,
+ IX86_BUILTIN_PMAXSQ256_MASK,
+ IX86_BUILTIN_PMINSQ256_MASK,
+ IX86_BUILTIN_PMAXUQ256_MASK,
+ IX86_BUILTIN_PMINUQ256_MASK,
+ IX86_BUILTIN_PMAXSQ128_MASK,
+ IX86_BUILTIN_PMINSQ128_MASK,
+ IX86_BUILTIN_PMAXUQ128_MASK,
+ IX86_BUILTIN_PMINUQ128_MASK,
+ IX86_BUILTIN_PMINSB256_MASK,
+ IX86_BUILTIN_PMINUB256_MASK,
+ IX86_BUILTIN_PMAXSB256_MASK,
+ IX86_BUILTIN_PMAXUB256_MASK,
+ IX86_BUILTIN_PMINSB128_MASK,
+ IX86_BUILTIN_PMINUB128_MASK,
+ IX86_BUILTIN_PMAXSB128_MASK,
+ IX86_BUILTIN_PMAXUB128_MASK,
+ IX86_BUILTIN_PMINSW256_MASK,
+ IX86_BUILTIN_PMINUW256_MASK,
+ IX86_BUILTIN_PMAXSW256_MASK,
+ IX86_BUILTIN_PMAXUW256_MASK,
+ IX86_BUILTIN_PMINSW128_MASK,
+ IX86_BUILTIN_PMINUW128_MASK,
+ IX86_BUILTIN_PMAXSW128_MASK,
+ IX86_BUILTIN_PMAXUW128_MASK,
+ IX86_BUILTIN_VPCONFLICTQ256,
+ IX86_BUILTIN_VPCONFLICTD256,
+ IX86_BUILTIN_VPCLZCNTQ256,
+ IX86_BUILTIN_VPCLZCNTD256,
+ IX86_BUILTIN_UNPCKHPD256_MASK,
+ IX86_BUILTIN_UNPCKHPD128_MASK,
+ IX86_BUILTIN_UNPCKHPS256_MASK,
+ IX86_BUILTIN_UNPCKHPS128_MASK,
+ IX86_BUILTIN_UNPCKLPD256_MASK,
+ IX86_BUILTIN_UNPCKLPD128_MASK,
+ IX86_BUILTIN_UNPCKLPS256_MASK,
+ IX86_BUILTIN_VPCONFLICTQ128,
+ IX86_BUILTIN_VPCONFLICTD128,
+ IX86_BUILTIN_VPCLZCNTQ128,
+ IX86_BUILTIN_VPCLZCNTD128,
+ IX86_BUILTIN_UNPCKLPS128_MASK,
+ IX86_BUILTIN_ALIGND256,
+ IX86_BUILTIN_ALIGNQ256,
+ IX86_BUILTIN_ALIGND128,
+ IX86_BUILTIN_ALIGNQ128,
+ IX86_BUILTIN_CVTPS2PH256_MASK,
+ IX86_BUILTIN_CVTPS2PH_MASK,
+ IX86_BUILTIN_CVTPH2PS_MASK,
+ IX86_BUILTIN_CVTPH2PS256_MASK,
+ IX86_BUILTIN_PUNPCKHDQ128_MASK,
+ IX86_BUILTIN_PUNPCKHDQ256_MASK,
+ IX86_BUILTIN_PUNPCKHQDQ128_MASK,
+ IX86_BUILTIN_PUNPCKHQDQ256_MASK,
+ IX86_BUILTIN_PUNPCKLDQ128_MASK,
+ IX86_BUILTIN_PUNPCKLDQ256_MASK,
+ IX86_BUILTIN_PUNPCKLQDQ128_MASK,
+ IX86_BUILTIN_PUNPCKLQDQ256_MASK,
+ IX86_BUILTIN_PUNPCKHBW128_MASK,
+ IX86_BUILTIN_PUNPCKHBW256_MASK,
+ IX86_BUILTIN_PUNPCKHWD128_MASK,
+ IX86_BUILTIN_PUNPCKHWD256_MASK,
+ IX86_BUILTIN_PUNPCKLBW128_MASK,
+ IX86_BUILTIN_PUNPCKLBW256_MASK,
+ IX86_BUILTIN_PUNPCKLWD128_MASK,
+ IX86_BUILTIN_PUNPCKLWD256_MASK,
+ IX86_BUILTIN_PSLLVV16HI,
+ IX86_BUILTIN_PSLLVV8HI,
+ IX86_BUILTIN_PACKSSDW256_MASK,
+ IX86_BUILTIN_PACKSSDW128_MASK,
+ IX86_BUILTIN_PACKUSDW256_MASK,
+ IX86_BUILTIN_PACKUSDW128_MASK,
+ IX86_BUILTIN_PAVGB256_MASK,
+ IX86_BUILTIN_PAVGW256_MASK,
+ IX86_BUILTIN_PAVGB128_MASK,
+ IX86_BUILTIN_PAVGW128_MASK,
+ IX86_BUILTIN_VPERMVARSF256_MASK,
+ IX86_BUILTIN_VPERMVARDF256_MASK,
+ IX86_BUILTIN_VPERMDF256_MASK,
+ IX86_BUILTIN_PABSB256_MASK,
+ IX86_BUILTIN_PABSB128_MASK,
+ IX86_BUILTIN_PABSW256_MASK,
+ IX86_BUILTIN_PABSW128_MASK,
+ IX86_BUILTIN_VPERMILVARPD_MASK,
+ IX86_BUILTIN_VPERMILVARPS_MASK,
+ IX86_BUILTIN_VPERMILVARPD256_MASK,
+ IX86_BUILTIN_VPERMILVARPS256_MASK,
+ IX86_BUILTIN_VPERMILPD_MASK,
+ IX86_BUILTIN_VPERMILPS_MASK,
+ IX86_BUILTIN_VPERMILPD256_MASK,
+ IX86_BUILTIN_VPERMILPS256_MASK,
+ IX86_BUILTIN_BLENDMQ256,
+ IX86_BUILTIN_BLENDMD256,
+ IX86_BUILTIN_BLENDMPD256,
+ IX86_BUILTIN_BLENDMPS256,
+ IX86_BUILTIN_BLENDMQ128,
+ IX86_BUILTIN_BLENDMD128,
+ IX86_BUILTIN_BLENDMPD128,
+ IX86_BUILTIN_BLENDMPS128,
+ IX86_BUILTIN_BLENDMW256,
+ IX86_BUILTIN_BLENDMB256,
+ IX86_BUILTIN_BLENDMW128,
+ IX86_BUILTIN_BLENDMB128,
+ IX86_BUILTIN_PMULLD256_MASK,
+ IX86_BUILTIN_PMULLD128_MASK,
+ IX86_BUILTIN_PMULUDQ256_MASK,
+ IX86_BUILTIN_PMULDQ256_MASK,
+ IX86_BUILTIN_PMULDQ128_MASK,
+ IX86_BUILTIN_PMULUDQ128_MASK,
+ IX86_BUILTIN_CVTPD2PS256_MASK,
+ IX86_BUILTIN_CVTPD2PS_MASK,
+ IX86_BUILTIN_VPERMVARSI256_MASK,
+ IX86_BUILTIN_VPERMVARDI256_MASK,
+ IX86_BUILTIN_VPERMDI256_MASK,
+ IX86_BUILTIN_CMPQ256,
+ IX86_BUILTIN_CMPD256,
+ IX86_BUILTIN_UCMPQ256,
+ IX86_BUILTIN_UCMPD256,
+ IX86_BUILTIN_CMPB256,
+ IX86_BUILTIN_CMPW256,
+ IX86_BUILTIN_UCMPB256,
+ IX86_BUILTIN_UCMPW256,
+ IX86_BUILTIN_CMPPD256_MASK,
+ IX86_BUILTIN_CMPPS256_MASK,
+ IX86_BUILTIN_CMPQ128,
+ IX86_BUILTIN_CMPD128,
+ IX86_BUILTIN_UCMPQ128,
+ IX86_BUILTIN_UCMPD128,
+ IX86_BUILTIN_CMPB128,
+ IX86_BUILTIN_CMPW128,
+ IX86_BUILTIN_UCMPB128,
+ IX86_BUILTIN_UCMPW128,
+ IX86_BUILTIN_CMPPD128_MASK,
+ IX86_BUILTIN_CMPPS128_MASK,
+
+ IX86_BUILTIN_GATHER3SIV8SF,
+ IX86_BUILTIN_GATHER3SIV4SF,
+ IX86_BUILTIN_GATHER3SIV4DF,
+ IX86_BUILTIN_GATHER3SIV2DF,
+ IX86_BUILTIN_GATHER3DIV8SF,
+ IX86_BUILTIN_GATHER3DIV4SF,
+ IX86_BUILTIN_GATHER3DIV4DF,
+ IX86_BUILTIN_GATHER3DIV2DF,
+ IX86_BUILTIN_GATHER3SIV8SI,
+ IX86_BUILTIN_GATHER3SIV4SI,
+ IX86_BUILTIN_GATHER3SIV4DI,
+ IX86_BUILTIN_GATHER3SIV2DI,
+ IX86_BUILTIN_GATHER3DIV8SI,
+ IX86_BUILTIN_GATHER3DIV4SI,
+ IX86_BUILTIN_GATHER3DIV4DI,
+ IX86_BUILTIN_GATHER3DIV2DI,
+ IX86_BUILTIN_SCATTERSIV8SF,
+ IX86_BUILTIN_SCATTERSIV4SF,
+ IX86_BUILTIN_SCATTERSIV4DF,
+ IX86_BUILTIN_SCATTERSIV2DF,
+ IX86_BUILTIN_SCATTERDIV8SF,
+ IX86_BUILTIN_SCATTERDIV4SF,
+ IX86_BUILTIN_SCATTERDIV4DF,
+ IX86_BUILTIN_SCATTERDIV2DF,
+ IX86_BUILTIN_SCATTERSIV8SI,
+ IX86_BUILTIN_SCATTERSIV4SI,
+ IX86_BUILTIN_SCATTERSIV4DI,
+ IX86_BUILTIN_SCATTERSIV2DI,
+ IX86_BUILTIN_SCATTERDIV8SI,
+ IX86_BUILTIN_SCATTERDIV4SI,
+ IX86_BUILTIN_SCATTERDIV4DI,
+ IX86_BUILTIN_SCATTERDIV2DI,
+
+ /* AVX512DQ. */
+ IX86_BUILTIN_RANGESD128,
+ IX86_BUILTIN_RANGESS128,
+ IX86_BUILTIN_KUNPCKWD,
+ IX86_BUILTIN_KUNPCKDQ,
+ IX86_BUILTIN_BROADCASTF32x2_512,
+ IX86_BUILTIN_BROADCASTI32x2_512,
+ IX86_BUILTIN_BROADCASTF64X2_512,
+ IX86_BUILTIN_BROADCASTI64X2_512,
+ IX86_BUILTIN_BROADCASTF32X8_512,
+ IX86_BUILTIN_BROADCASTI32X8_512,
+ IX86_BUILTIN_EXTRACTF64X2_512,
+ IX86_BUILTIN_EXTRACTF32X8,
+ IX86_BUILTIN_EXTRACTI64X2_512,
+ IX86_BUILTIN_EXTRACTI32X8,
+ IX86_BUILTIN_REDUCEPD512_MASK,
+ IX86_BUILTIN_REDUCEPS512_MASK,
+ IX86_BUILTIN_PMULLQ512,
+ IX86_BUILTIN_XORPD512,
+ IX86_BUILTIN_XORPS512,
+ IX86_BUILTIN_ORPD512,
+ IX86_BUILTIN_ORPS512,
+ IX86_BUILTIN_ANDPD512,
+ IX86_BUILTIN_ANDPS512,
+ IX86_BUILTIN_ANDNPD512,
+ IX86_BUILTIN_ANDNPS512,
+ IX86_BUILTIN_INSERTF32X8,
+ IX86_BUILTIN_INSERTI32X8,
+ IX86_BUILTIN_INSERTF64X2_512,
+ IX86_BUILTIN_INSERTI64X2_512,
+ IX86_BUILTIN_FPCLASSPD512,
+ IX86_BUILTIN_FPCLASSPS512,
+ IX86_BUILTIN_CVTD2MASK512,
+ IX86_BUILTIN_CVTQ2MASK512,
+ IX86_BUILTIN_CVTMASK2D512,
+ IX86_BUILTIN_CVTMASK2Q512,
+ IX86_BUILTIN_CVTPD2QQ512,
+ IX86_BUILTIN_CVTPS2QQ512,
+ IX86_BUILTIN_CVTPD2UQQ512,
+ IX86_BUILTIN_CVTPS2UQQ512,
+ IX86_BUILTIN_CVTQQ2PS512,
+ IX86_BUILTIN_CVTUQQ2PS512,
+ IX86_BUILTIN_CVTQQ2PD512,
+ IX86_BUILTIN_CVTUQQ2PD512,
+ IX86_BUILTIN_CVTTPS2QQ512,
+ IX86_BUILTIN_CVTTPS2UQQ512,
+ IX86_BUILTIN_CVTTPD2QQ512,
+ IX86_BUILTIN_CVTTPD2UQQ512,
+ IX86_BUILTIN_RANGEPS512,
+ IX86_BUILTIN_RANGEPD512,
+
+ /* AVX512BW. */
+ IX86_BUILTIN_PACKUSDW512,
+ IX86_BUILTIN_PACKSSDW512,
+ IX86_BUILTIN_LOADDQUHI512_MASK,
+ IX86_BUILTIN_LOADDQUQI512_MASK,
+ IX86_BUILTIN_PSLLDQ512,
+ IX86_BUILTIN_PSRLDQ512,
+ IX86_BUILTIN_STOREDQUHI512_MASK,
+ IX86_BUILTIN_STOREDQUQI512_MASK,
+ IX86_BUILTIN_PALIGNR512,
+ IX86_BUILTIN_PALIGNR512_MASK,
+ IX86_BUILTIN_MOVDQUHI512_MASK,
+ IX86_BUILTIN_MOVDQUQI512_MASK,
+ IX86_BUILTIN_PSADBW512,
+ IX86_BUILTIN_DBPSADBW512,
+ IX86_BUILTIN_PBROADCASTB512,
+ IX86_BUILTIN_PBROADCASTB512_GPR,
+ IX86_BUILTIN_PBROADCASTW512,
+ IX86_BUILTIN_PBROADCASTW512_GPR,
+ IX86_BUILTIN_PMOVSXBW512_MASK,
+ IX86_BUILTIN_PMOVZXBW512_MASK,
+ IX86_BUILTIN_VPERMVARHI512_MASK,
+ IX86_BUILTIN_VPERMT2VARHI512,
+ IX86_BUILTIN_VPERMT2VARHI512_MASKZ,
+ IX86_BUILTIN_VPERMI2VARHI512,
+ IX86_BUILTIN_PAVGB512,
+ IX86_BUILTIN_PAVGW512,
+ IX86_BUILTIN_PADDB512,
+ IX86_BUILTIN_PSUBB512,
+ IX86_BUILTIN_PSUBSB512,
+ IX86_BUILTIN_PADDSB512,
+ IX86_BUILTIN_PSUBUSB512,
+ IX86_BUILTIN_PADDUSB512,
+ IX86_BUILTIN_PSUBW512,
+ IX86_BUILTIN_PADDW512,
+ IX86_BUILTIN_PSUBSW512,
+ IX86_BUILTIN_PADDSW512,
+ IX86_BUILTIN_PSUBUSW512,
+ IX86_BUILTIN_PADDUSW512,
+ IX86_BUILTIN_PMAXUW512,
+ IX86_BUILTIN_PMAXSW512,
+ IX86_BUILTIN_PMINUW512,
+ IX86_BUILTIN_PMINSW512,
+ IX86_BUILTIN_PMAXUB512,
+ IX86_BUILTIN_PMAXSB512,
+ IX86_BUILTIN_PMINUB512,
+ IX86_BUILTIN_PMINSB512,
+ IX86_BUILTIN_PMOVWB512,
+ IX86_BUILTIN_PMOVSWB512,
+ IX86_BUILTIN_PMOVUSWB512,
+ IX86_BUILTIN_PMULHRSW512_MASK,
+ IX86_BUILTIN_PMULHUW512_MASK,
+ IX86_BUILTIN_PMULHW512_MASK,
+ IX86_BUILTIN_PMULLW512_MASK,
+ IX86_BUILTIN_PSLLWI512_MASK,
+ IX86_BUILTIN_PSLLW512_MASK,
+ IX86_BUILTIN_PACKSSWB512,
+ IX86_BUILTIN_PACKUSWB512,
+ IX86_BUILTIN_PSRAVV32HI,
+ IX86_BUILTIN_PMADDUBSW512_MASK,
+ IX86_BUILTIN_PMADDWD512_MASK,
+ IX86_BUILTIN_PSRLVV32HI,
+ IX86_BUILTIN_PUNPCKHBW512,
+ IX86_BUILTIN_PUNPCKHWD512,
+ IX86_BUILTIN_PUNPCKLBW512,
+ IX86_BUILTIN_PUNPCKLWD512,
+ IX86_BUILTIN_PSHUFB512,
+ IX86_BUILTIN_PSHUFHW512,
+ IX86_BUILTIN_PSHUFLW512,
+ IX86_BUILTIN_PSRAWI512,
+ IX86_BUILTIN_PSRAW512,
+ IX86_BUILTIN_PSRLWI512,
+ IX86_BUILTIN_PSRLW512,
+ IX86_BUILTIN_CVTB2MASK512,
+ IX86_BUILTIN_CVTW2MASK512,
+ IX86_BUILTIN_CVTMASK2B512,
+ IX86_BUILTIN_CVTMASK2W512,
+ IX86_BUILTIN_PCMPEQB512_MASK,
+ IX86_BUILTIN_PCMPEQW512_MASK,
+ IX86_BUILTIN_PCMPGTB512_MASK,
+ IX86_BUILTIN_PCMPGTW512_MASK,
+ IX86_BUILTIN_PTESTMB512,
+ IX86_BUILTIN_PTESTMW512,
+ IX86_BUILTIN_PTESTNMB512,
+ IX86_BUILTIN_PTESTNMW512,
+ IX86_BUILTIN_PSLLVV32HI,
+ IX86_BUILTIN_PABSB512,
+ IX86_BUILTIN_PABSW512,
+ IX86_BUILTIN_BLENDMW512,
+ IX86_BUILTIN_BLENDMB512,
+ IX86_BUILTIN_CMPB512,
+ IX86_BUILTIN_CMPW512,
+ IX86_BUILTIN_UCMPB512,
+ IX86_BUILTIN_UCMPW512,
+
/* Alternate 4 and 8 element gather/scatter for the vectorizer
where all operands are 32-byte or 64-byte wide respectively. */
IX86_BUILTIN_GATHERALTSIV4DF,
@@ -28490,6 +29632,10 @@ enum ix86_builtins
IX86_BUILTIN_GATHERALTDIV8SI,
IX86_BUILTIN_GATHER3ALTDIV16SF,
IX86_BUILTIN_GATHER3ALTDIV16SI,
+ IX86_BUILTIN_GATHER3ALTSIV4DF,
+ IX86_BUILTIN_GATHER3ALTDIV8SF,
+ IX86_BUILTIN_GATHER3ALTSIV4DI,
+ IX86_BUILTIN_GATHER3ALTDIV8SI,
IX86_BUILTIN_GATHER3ALTSIV8DF,
IX86_BUILTIN_GATHER3ALTSIV8DI,
IX86_BUILTIN_GATHER3DIV16SF,
@@ -29127,7 +30273,7 @@ static const struct builtin_description bdesc_special_args[] =
{ OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_movntv16sf, "__builtin_ia32_movntps512", IX86_BUILTIN_MOVNTPS512, UNKNOWN, (int) VOID_FTYPE_PFLOAT_V16SF },
{ OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_movntv8df, "__builtin_ia32_movntpd512", IX86_BUILTIN_MOVNTPD512, UNKNOWN, (int) VOID_FTYPE_PDOUBLE_V8DF },
{ OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_movntv8di, "__builtin_ia32_movntdq512", IX86_BUILTIN_MOVNTDQ512, UNKNOWN, (int) VOID_FTYPE_PV8DI_V8DI },
- { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_movntdqa, "__builtin_ia32_movntdqa512", IX86_BUILTIN_MOVNTDQA512, UNKNOWN, (int) V8DI_FTYPE_PV8DI },
+ { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512dq_movntdqa, "__builtin_ia32_movntdqa512", IX86_BUILTIN_MOVNTDQA512, UNKNOWN, (int) V8DI_FTYPE_PV8DI },
{ OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_storedquv16si_mask, "__builtin_ia32_storedqusi512_mask", IX86_BUILTIN_STOREDQUSI512, UNKNOWN, (int) VOID_FTYPE_PV16SI_V16SI_HI },
{ OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_storedquv8di_mask, "__builtin_ia32_storedqudi512_mask", IX86_BUILTIN_STOREDQUDI512, UNKNOWN, (int) VOID_FTYPE_PV8DI_V8DI_QI },
{ OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_storeupd512_mask, "__builtin_ia32_storeupd512_mask", IX86_BUILTIN_STOREUPD512, UNKNOWN, (int) VOID_FTYPE_PV8DF_V8DF_QI },
@@ -29173,6 +30319,108 @@ static const struct builtin_description bdesc_special_args[] =
{ OPTION_MASK_ISA_RTM, CODE_FOR_xbegin, "__builtin_ia32_xbegin", IX86_BUILTIN_XBEGIN, UNKNOWN, (int) UNSIGNED_FTYPE_VOID },
{ OPTION_MASK_ISA_RTM, CODE_FOR_xend, "__builtin_ia32_xend", IX86_BUILTIN_XEND, UNKNOWN, (int) VOID_FTYPE_VOID },
{ OPTION_MASK_ISA_RTM, CODE_FOR_xtest, "__builtin_ia32_xtest", IX86_BUILTIN_XTEST, UNKNOWN, (int) INT_FTYPE_VOID },
+
+ /* AVX512BW */
+ { OPTION_MASK_ISA_AVX512BW, CODE_FOR_avx512bw_loaddquv32hi_mask, "__builtin_ia32_loaddquhi512_mask", IX86_BUILTIN_LOADDQUHI512_MASK, UNKNOWN, (int) V32HI_FTYPE_PCV32HI_V32HI_SI },
+ { OPTION_MASK_ISA_AVX512BW, CODE_FOR_avx512f_loaddquv64qi_mask, "__builtin_ia32_loaddquqi512_mask", IX86_BUILTIN_LOADDQUQI512_MASK, UNKNOWN, (int) V64QI_FTYPE_PCV64QI_V64QI_DI },
+ { OPTION_MASK_ISA_AVX512BW, CODE_FOR_avx512bw_storedquv32hi_mask, "__builtin_ia32_storedquhi512_mask", IX86_BUILTIN_STOREDQUHI512_MASK, UNKNOWN, (int) VOID_FTYPE_PV32HI_V32HI_SI },
+ { OPTION_MASK_ISA_AVX512BW, CODE_FOR_avx512bw_storedquv64qi_mask, "__builtin_ia32_storedquqi512_mask", IX86_BUILTIN_STOREDQUQI512_MASK, UNKNOWN, (int) VOID_FTYPE_PV64QI_V64QI_DI },
+
+ /* AVX512VL */
+ { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_loaddquv16hi_mask, "__builtin_ia32_loaddquhi256_mask", IX86_BUILTIN_LOADDQUHI256_MASK, UNKNOWN, (int) V16HI_FTYPE_PCV16HI_V16HI_HI },
+ { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_loaddquv8hi_mask, "__builtin_ia32_loaddquhi128_mask", IX86_BUILTIN_LOADDQUHI128_MASK, UNKNOWN, (int) V8HI_FTYPE_PCV8HI_V8HI_QI },
+ { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx_loaddquv32qi_mask, "__builtin_ia32_loaddquqi256_mask", IX86_BUILTIN_LOADDQUQI256_MASK, UNKNOWN, (int) V32QI_FTYPE_PCV32QI_V32QI_SI },
+ { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_sse2_loaddquv16qi_mask, "__builtin_ia32_loaddquqi128_mask", IX86_BUILTIN_LOADDQUQI128_MASK, UNKNOWN, (int) V16QI_FTYPE_PCV16QI_V16QI_HI },
+ { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_loadv4di_mask, "__builtin_ia32_movdqa64load256_mask", IX86_BUILTIN_MOVDQA64LOAD256_MASK, UNKNOWN, (int) V4DI_FTYPE_PCV4DI_V4DI_QI },
+ { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_loadv2di_mask, "__builtin_ia32_movdqa64load128_mask", IX86_BUILTIN_MOVDQA64LOAD128_MASK, UNKNOWN, (int) V2DI_FTYPE_PCV2DI_V2DI_QI },
+ { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_loadv8si_mask, "__builtin_ia32_movdqa32load256_mask", IX86_BUILTIN_MOVDQA32LOAD256_MASK, UNKNOWN, (int) V8SI_FTYPE_PCV8SI_V8SI_QI },
+ { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_loadv4si_mask, "__builtin_ia32_movdqa32load128_mask", IX86_BUILTIN_MOVDQA32LOAD128_MASK, UNKNOWN, (int) V4SI_FTYPE_PCV4SI_V4SI_QI },
+ { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_storev4di_mask, "__builtin_ia32_movdqa64store256_mask", IX86_BUILTIN_MOVDQA64STORE256_MASK, UNKNOWN, (int) VOID_FTYPE_PV4DI_V4DI_QI },
+ { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_storev2di_mask, "__builtin_ia32_movdqa64store128_mask", IX86_BUILTIN_MOVDQA64STORE128_MASK, UNKNOWN, (int) VOID_FTYPE_PV2DI_V2DI_QI },
+ { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_storev8si_mask, "__builtin_ia32_movdqa32store256_mask", IX86_BUILTIN_MOVDQA32STORE256_MASK, UNKNOWN, (int) VOID_FTYPE_PV8SI_V8SI_QI },
+ { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_storev4si_mask, "__builtin_ia32_movdqa32store128_mask", IX86_BUILTIN_MOVDQA32STORE128_MASK, UNKNOWN, (int) VOID_FTYPE_PV4SI_V4SI_QI },
+ { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_loadv4df_mask, "__builtin_ia32_loadapd256_mask", IX86_BUILTIN_LOADAPD256_MASK, UNKNOWN, (int) V4DF_FTYPE_PCV4DF_V4DF_QI },
+ { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_loadv2df_mask, "__builtin_ia32_loadapd128_mask", IX86_BUILTIN_LOADAPD128_MASK, UNKNOWN, (int) V2DF_FTYPE_PCV2DF_V2DF_QI },
+ { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_loadv8sf_mask, "__builtin_ia32_loadaps256_mask", IX86_BUILTIN_LOADAPS256_MASK, UNKNOWN, (int) V8SF_FTYPE_PCV8SF_V8SF_QI },
+ { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_loadv4sf_mask, "__builtin_ia32_loadaps128_mask", IX86_BUILTIN_LOADAPS128_MASK, UNKNOWN, (int) V4SF_FTYPE_PCV4SF_V4SF_QI },
+ { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_storev4df_mask, "__builtin_ia32_storeapd256_mask", IX86_BUILTIN_STOREAPD256_MASK, UNKNOWN, (int) VOID_FTYPE_PV4DF_V4DF_QI },
+ { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_storev2df_mask, "__builtin_ia32_storeapd128_mask", IX86_BUILTIN_STOREAPD128_MASK, UNKNOWN, (int) VOID_FTYPE_PV2DF_V2DF_QI },
+ { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_storev8sf_mask, "__builtin_ia32_storeaps256_mask", IX86_BUILTIN_STOREAPS256_MASK, UNKNOWN, (int) VOID_FTYPE_PV8SF_V8SF_QI },
+ { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_storev4sf_mask, "__builtin_ia32_storeaps128_mask", IX86_BUILTIN_STOREAPS128_MASK, UNKNOWN, (int) VOID_FTYPE_PV4SF_V4SF_QI },
+ { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx_loadupd256_mask, "__builtin_ia32_loadupd256_mask", IX86_BUILTIN_LOADUPD256_MASK, UNKNOWN, (int) V4DF_FTYPE_PCV4DF_V4DF_QI },
+ { OPTION_MASK_ISA_AVX512VL, CODE_FOR_sse2_loadupd_mask, "__builtin_ia32_loadupd128_mask", IX86_BUILTIN_LOADUPD128_MASK, UNKNOWN, (int) V2DF_FTYPE_PCV2DF_V2DF_QI },
+ { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx_loadups256_mask, "__builtin_ia32_loadups256_mask", IX86_BUILTIN_LOADUPS256_MASK, UNKNOWN, (int) V8SF_FTYPE_PCV8SF_V8SF_QI },
+ { OPTION_MASK_ISA_AVX512VL, CODE_FOR_sse_loadups_mask, "__builtin_ia32_loadups128_mask", IX86_BUILTIN_LOADUPS128_MASK, UNKNOWN, (int) V4SF_FTYPE_PCV4SF_V4SF_QI },
+ { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_storeupd256_mask, "__builtin_ia32_storeupd256_mask", IX86_BUILTIN_STOREUPD256_MASK, UNKNOWN, (int) VOID_FTYPE_PV4DF_V4DF_QI },
+ { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_storeupd_mask, "__builtin_ia32_storeupd128_mask", IX86_BUILTIN_STOREUPD128_MASK, UNKNOWN, (int) VOID_FTYPE_PV2DF_V2DF_QI },
+ { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_storeups256_mask, "__builtin_ia32_storeups256_mask", IX86_BUILTIN_STOREUPS256_MASK, UNKNOWN, (int) VOID_FTYPE_PV8SF_V8SF_QI },
+ { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_storeups_mask, "__builtin_ia32_storeups128_mask", IX86_BUILTIN_STOREUPS128_MASK, UNKNOWN, (int) VOID_FTYPE_PV4SF_V4SF_QI },
+ { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_loaddquv4di_mask, "__builtin_ia32_loaddqudi256_mask", IX86_BUILTIN_LOADDQUDI256_MASK, UNKNOWN, (int) V4DI_FTYPE_PCV4DI_V4DI_QI },
+ { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_loaddquv2di_mask, "__builtin_ia32_loaddqudi128_mask", IX86_BUILTIN_LOADDQUDI128_MASK, UNKNOWN, (int) V2DI_FTYPE_PCV2DI_V2DI_QI },
+ { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx_loaddquv8si_mask, "__builtin_ia32_loaddqusi256_mask", IX86_BUILTIN_LOADDQUSI256_MASK, UNKNOWN, (int) V8SI_FTYPE_PCV8SI_V8SI_QI },
+ { OPTION_MASK_ISA_AVX512VL, CODE_FOR_sse2_loaddquv4si_mask, "__builtin_ia32_loaddqusi128_mask", IX86_BUILTIN_LOADDQUSI128_MASK, UNKNOWN, (int) V4SI_FTYPE_PCV4SI_V4SI_QI },
+ { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_storedquv4di_mask, "__builtin_ia32_storedqudi256_mask", IX86_BUILTIN_STOREDQUDI256_MASK, UNKNOWN, (int) VOID_FTYPE_PV4DI_V4DI_QI },
+ { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_storedquv2di_mask, "__builtin_ia32_storedqudi128_mask", IX86_BUILTIN_STOREDQUDI128_MASK, UNKNOWN, (int) VOID_FTYPE_PV2DI_V2DI_QI },
+ { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_storedquv8si_mask, "__builtin_ia32_storedqusi256_mask", IX86_BUILTIN_STOREDQUSI256_MASK, UNKNOWN, (int) VOID_FTYPE_PV8SI_V8SI_QI },
+ { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_storedquv4si_mask, "__builtin_ia32_storedqusi128_mask", IX86_BUILTIN_STOREDQUSI128_MASK, UNKNOWN, (int) VOID_FTYPE_PV4SI_V4SI_QI },
+ { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_storedquv16hi_mask, "__builtin_ia32_storedquhi256_mask", IX86_BUILTIN_STOREDQUHI256_MASK, UNKNOWN, (int) VOID_FTYPE_PV16HI_V16HI_HI },
+ { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_storedquv8hi_mask, "__builtin_ia32_storedquhi128_mask", IX86_BUILTIN_STOREDQUHI128_MASK, UNKNOWN, (int) VOID_FTYPE_PV8HI_V8HI_QI },
+ { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_storedquv32qi_mask, "__builtin_ia32_storedquqi256_mask", IX86_BUILTIN_STOREDQUQI256_MASK, UNKNOWN, (int) VOID_FTYPE_PV32QI_V32QI_SI },
+ { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_storedquv16qi_mask, "__builtin_ia32_storedquqi128_mask", IX86_BUILTIN_STOREDQUQI128_MASK, UNKNOWN, (int) VOID_FTYPE_PV16QI_V16QI_HI },
+ { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_compressstorev4df_mask, "__builtin_ia32_compressstoredf256_mask", IX86_BUILTIN_COMPRESSPDSTORE256, UNKNOWN, (int) VOID_FTYPE_PV4DF_V4DF_QI },
+ { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_compressstorev2df_mask, "__builtin_ia32_compressstoredf128_mask", IX86_BUILTIN_COMPRESSPDSTORE128, UNKNOWN, (int) VOID_FTYPE_PV2DF_V2DF_QI },
+ { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_compressstorev8sf_mask, "__builtin_ia32_compressstoresf256_mask", IX86_BUILTIN_COMPRESSPSSTORE256, UNKNOWN, (int) VOID_FTYPE_PV8SF_V8SF_QI },
+ { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_compressstorev4sf_mask, "__builtin_ia32_compressstoresf128_mask", IX86_BUILTIN_COMPRESSPSSTORE128, UNKNOWN, (int) VOID_FTYPE_PV4SF_V4SF_QI },
+ { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_compressstorev4di_mask, "__builtin_ia32_compressstoredi256_mask", IX86_BUILTIN_PCOMPRESSQSTORE256, UNKNOWN, (int) VOID_FTYPE_PV4DI_V4DI_QI },
+ { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_compressstorev2di_mask, "__builtin_ia32_compressstoredi128_mask", IX86_BUILTIN_PCOMPRESSQSTORE128, UNKNOWN, (int) VOID_FTYPE_PV2DI_V2DI_QI },
+ { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_compressstorev8si_mask, "__builtin_ia32_compressstoresi256_mask", IX86_BUILTIN_PCOMPRESSDSTORE256, UNKNOWN, (int) VOID_FTYPE_PV8SI_V8SI_QI },
+ { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_compressstorev4si_mask, "__builtin_ia32_compressstoresi128_mask", IX86_BUILTIN_PCOMPRESSDSTORE128, UNKNOWN, (int) VOID_FTYPE_PV4SI_V4SI_QI },
+ { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_expandv4df_mask, "__builtin_ia32_expandloaddf256_mask", IX86_BUILTIN_EXPANDPDLOAD256, UNKNOWN, (int) V4DF_FTYPE_PCV4DF_V4DF_QI },
+ { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_expandv2df_mask, "__builtin_ia32_expandloaddf128_mask", IX86_BUILTIN_EXPANDPDLOAD128, UNKNOWN, (int) V2DF_FTYPE_PCV2DF_V2DF_QI },
+ { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_expandv8sf_mask, "__builtin_ia32_expandloadsf256_mask", IX86_BUILTIN_EXPANDPSLOAD256, UNKNOWN, (int) V8SF_FTYPE_PCV8SF_V8SF_QI },
+ { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_expandv4sf_mask, "__builtin_ia32_expandloadsf128_mask", IX86_BUILTIN_EXPANDPSLOAD128, UNKNOWN, (int) V4SF_FTYPE_PCV4SF_V4SF_QI },
+ { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_expandv4di_mask, "__builtin_ia32_expandloaddi256_mask", IX86_BUILTIN_PEXPANDQLOAD256, UNKNOWN, (int) V4DI_FTYPE_PCV4DI_V4DI_QI },
+ { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_expandv2di_mask, "__builtin_ia32_expandloaddi128_mask", IX86_BUILTIN_PEXPANDQLOAD128, UNKNOWN, (int) V2DI_FTYPE_PCV2DI_V2DI_QI },
+ { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_expandv8si_mask, "__builtin_ia32_expandloadsi256_mask", IX86_BUILTIN_PEXPANDDLOAD256, UNKNOWN, (int) V8SI_FTYPE_PCV8SI_V8SI_QI },
+ { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_expandv4si_mask, "__builtin_ia32_expandloadsi128_mask", IX86_BUILTIN_PEXPANDDLOAD128, UNKNOWN, (int) V4SI_FTYPE_PCV4SI_V4SI_QI },
+ { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_expandv4df_maskz, "__builtin_ia32_expandloaddf256_maskz", IX86_BUILTIN_EXPANDPDLOAD256Z, UNKNOWN, (int) V4DF_FTYPE_PCV4DF_V4DF_QI },
+ { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_expandv2df_maskz, "__builtin_ia32_expandloaddf128_maskz", IX86_BUILTIN_EXPANDPDLOAD128Z, UNKNOWN, (int) V2DF_FTYPE_PCV2DF_V2DF_QI },
+ { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_expandv8sf_maskz, "__builtin_ia32_expandloadsf256_maskz", IX86_BUILTIN_EXPANDPSLOAD256Z, UNKNOWN, (int) V8SF_FTYPE_PCV8SF_V8SF_QI },
+ { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_expandv4sf_maskz, "__builtin_ia32_expandloadsf128_maskz", IX86_BUILTIN_EXPANDPSLOAD128Z, UNKNOWN, (int) V4SF_FTYPE_PCV4SF_V4SF_QI },
+ { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_expandv4di_maskz, "__builtin_ia32_expandloaddi256_maskz", IX86_BUILTIN_PEXPANDQLOAD256Z, UNKNOWN, (int) V4DI_FTYPE_PCV4DI_V4DI_QI },
+ { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_expandv2di_maskz, "__builtin_ia32_expandloaddi128_maskz", IX86_BUILTIN_PEXPANDQLOAD128Z, UNKNOWN, (int) V2DI_FTYPE_PCV2DI_V2DI_QI },
+ { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_expandv8si_maskz, "__builtin_ia32_expandloadsi256_maskz", IX86_BUILTIN_PEXPANDDLOAD256Z, UNKNOWN, (int) V8SI_FTYPE_PCV8SI_V8SI_QI },
+ { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_expandv4si_maskz, "__builtin_ia32_expandloadsi128_maskz", IX86_BUILTIN_PEXPANDDLOAD128Z, UNKNOWN, (int) V4SI_FTYPE_PCV4SI_V4SI_QI },
+ { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_truncatev4div4si2_store_mask, "__builtin_ia32_pmovqd256mem_mask", IX86_BUILTIN_PMOVQD256_MEM, UNKNOWN, (int) VOID_FTYPE_PV4SI_V4DI_QI },
+ { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_truncatev2div2si2_store_mask, "__builtin_ia32_pmovqd128mem_mask", IX86_BUILTIN_PMOVQD128_MEM, UNKNOWN, (int) VOID_FTYPE_PV4SI_V2DI_QI },
+ { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_ss_truncatev4div4si2_store_mask, "__builtin_ia32_pmovsqd256mem_mask", IX86_BUILTIN_PMOVSQD256_MEM, UNKNOWN, (int) VOID_FTYPE_PV4SI_V4DI_QI },
+ { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_ss_truncatev2div2si2_store_mask, "__builtin_ia32_pmovsqd128mem_mask", IX86_BUILTIN_PMOVSQD128_MEM, UNKNOWN, (int) VOID_FTYPE_PV4SI_V2DI_QI },
+ { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_us_truncatev4div4si2_store_mask, "__builtin_ia32_pmovusqd256mem_mask", IX86_BUILTIN_PMOVUSQD256_MEM, UNKNOWN, (int) VOID_FTYPE_PV4SI_V4DI_QI },
+ { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_us_truncatev2div2si2_store_mask, "__builtin_ia32_pmovusqd128mem_mask", IX86_BUILTIN_PMOVUSQD128_MEM, UNKNOWN, (int) VOID_FTYPE_PV4SI_V2DI_QI },
+ { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_truncatev4div4hi2_store_mask, "__builtin_ia32_pmovqw256mem_mask", IX86_BUILTIN_PMOVQW256_MEM, UNKNOWN, (int) VOID_FTYPE_PV8HI_V4DI_QI },
+ { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_truncatev2div2hi2_store_mask, "__builtin_ia32_pmovqw128mem_mask", IX86_BUILTIN_PMOVQW128_MEM, UNKNOWN, (int) VOID_FTYPE_PV8HI_V2DI_QI },
+ { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_ss_truncatev4div4hi2_store_mask, "__builtin_ia32_pmovsqw256mem_mask", IX86_BUILTIN_PMOVSQW256_MEM, UNKNOWN, (int) VOID_FTYPE_PV8HI_V4DI_QI },
+ { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_ss_truncatev2div2hi2_store_mask, "__builtin_ia32_pmovsqw128mem_mask", IX86_BUILTIN_PMOVSQW128_MEM, UNKNOWN, (int) VOID_FTYPE_PV8HI_V2DI_QI },
+ { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_us_truncatev4div4hi2_store_mask, "__builtin_ia32_pmovusqw256mem_mask", IX86_BUILTIN_PMOVUSQW256_MEM, UNKNOWN, (int) VOID_FTYPE_PV8HI_V4DI_QI },
+ { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_us_truncatev2div2hi2_store_mask, "__builtin_ia32_pmovusqw128mem_mask", IX86_BUILTIN_PMOVUSQW128_MEM, UNKNOWN, (int) VOID_FTYPE_PV8HI_V2DI_QI },
+ { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_truncatev4div4qi2_store_mask, "__builtin_ia32_pmovqb256mem_mask", IX86_BUILTIN_PMOVQB256_MEM, UNKNOWN, (int) VOID_FTYPE_PV16QI_V4DI_QI },
+ { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_truncatev2div2qi2_store_mask, "__builtin_ia32_pmovqb128mem_mask", IX86_BUILTIN_PMOVQB128_MEM, UNKNOWN, (int) VOID_FTYPE_PV16QI_V2DI_QI },
+ { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_ss_truncatev4div4qi2_store_mask, "__builtin_ia32_pmovsqb256mem_mask", IX86_BUILTIN_PMOVSQB256_MEM, UNKNOWN, (int) VOID_FTYPE_PV16QI_V4DI_QI },
+ { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_ss_truncatev2div2qi2_store_mask, "__builtin_ia32_pmovsqb128mem_mask", IX86_BUILTIN_PMOVSQB128_MEM, UNKNOWN, (int) VOID_FTYPE_PV16QI_V2DI_QI },
+ { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_us_truncatev4div4qi2_store_mask, "__builtin_ia32_pmovusqb256mem_mask", IX86_BUILTIN_PMOVUSQB256_MEM, UNKNOWN, (int) VOID_FTYPE_PV16QI_V4DI_QI },
+ { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_us_truncatev2div2qi2_store_mask, "__builtin_ia32_pmovusqb128mem_mask", IX86_BUILTIN_PMOVUSQB128_MEM, UNKNOWN, (int) VOID_FTYPE_PV16QI_V2DI_QI },
+ { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_truncatev8siv8qi2_store_mask, "__builtin_ia32_pmovdb256mem_mask", IX86_BUILTIN_PMOVDB256_MEM, UNKNOWN, (int) VOID_FTYPE_PV16QI_V8SI_QI },
+ { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_truncatev4siv4qi2_store_mask, "__builtin_ia32_pmovdb128mem_mask", IX86_BUILTIN_PMOVDB128_MEM, UNKNOWN, (int) VOID_FTYPE_PV16QI_V4SI_QI },
+ { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_ss_truncatev8siv8qi2_store_mask, "__builtin_ia32_pmovsdb256mem_mask", IX86_BUILTIN_PMOVSDB256_MEM, UNKNOWN, (int) VOID_FTYPE_PV16QI_V8SI_QI },
+ { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_ss_truncatev4siv4qi2_store_mask, "__builtin_ia32_pmovsdb128mem_mask", IX86_BUILTIN_PMOVSDB128_MEM, UNKNOWN, (int) VOID_FTYPE_PV16QI_V4SI_QI },
+ { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_us_truncatev8siv8qi2_store_mask, "__builtin_ia32_pmovusdb256mem_mask", IX86_BUILTIN_PMOVUSDB256_MEM, UNKNOWN, (int) VOID_FTYPE_PV16QI_V8SI_QI },
+ { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_us_truncatev4siv4qi2_store_mask, "__builtin_ia32_pmovusdb128mem_mask", IX86_BUILTIN_PMOVUSDB128_MEM, UNKNOWN, (int) VOID_FTYPE_PV16QI_V4SI_QI },
+ { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_truncatev8siv8hi2_store_mask, "__builtin_ia32_pmovdw256mem_mask", IX86_BUILTIN_PMOVDW256_MEM, UNKNOWN, (int) VOID_FTYPE_PV8HI_V8SI_QI },
+ { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_truncatev4siv4hi2_store_mask, "__builtin_ia32_pmovdw128mem_mask", IX86_BUILTIN_PMOVDW128_MEM, UNKNOWN, (int) VOID_FTYPE_PV8HI_V4SI_QI },
+ { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_ss_truncatev8siv8hi2_store_mask, "__builtin_ia32_pmovsdw256mem_mask", IX86_BUILTIN_PMOVSDW256_MEM, UNKNOWN, (int) VOID_FTYPE_PV8HI_V8SI_QI },
+ { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_ss_truncatev4siv4hi2_store_mask, "__builtin_ia32_pmovsdw128mem_mask", IX86_BUILTIN_PMOVSDW128_MEM, UNKNOWN, (int) VOID_FTYPE_PV8HI_V4SI_QI },
+ { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_us_truncatev8siv8hi2_store_mask, "__builtin_ia32_pmovusdw256mem_mask", IX86_BUILTIN_PMOVUSDW256_MEM, UNKNOWN, (int) VOID_FTYPE_PV8HI_V8SI_QI },
+ { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_us_truncatev4siv4hi2_store_mask, "__builtin_ia32_pmovusdw128mem_mask", IX86_BUILTIN_PMOVUSDW128_MEM, UNKNOWN, (int) VOID_FTYPE_PV8HI_V4SI_QI },
};
/* Builtins with variable number of arguments. */
@@ -29966,8 +31214,8 @@ static const struct builtin_description bdesc_args[] =
{ OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_permv4df, "__builtin_ia32_permdf256", IX86_BUILTIN_VPERMDF256, UNKNOWN, (int) V4DF_FTYPE_V4DF_INT },
{ OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_permv4di, "__builtin_ia32_permdi256", IX86_BUILTIN_VPERMDI256, UNKNOWN, (int) V4DI_FTYPE_V4DI_INT },
{ OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_permv2ti, "__builtin_ia32_permti256", IX86_BUILTIN_VPERMTI256, UNKNOWN, (int) V4DI_FTYPE_V4DI_V4DI_INT },
- { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_extracti128, "__builtin_ia32_extract128i256", IX86_BUILTIN_VEXTRACT128I256, UNKNOWN, (int) V2DI_FTYPE_V4DI_INT },
- { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_inserti128, "__builtin_ia32_insert128i256", IX86_BUILTIN_VINSERT128I256, UNKNOWN, (int) V4DI_FTYPE_V4DI_V2DI_INT },
+ { OPTION_MASK_ISA_AVX2, CODE_FOR_avx_vextractf128v4di, "__builtin_ia32_extract128i256", IX86_BUILTIN_VEXTRACT128I256, UNKNOWN, (int) V2DI_FTYPE_V4DI_INT },
+ { OPTION_MASK_ISA_AVX2, CODE_FOR_avx_vinsertf128v4di, "__builtin_ia32_insert128i256", IX86_BUILTIN_VINSERT128I256, UNKNOWN, (int) V4DI_FTYPE_V4DI_V2DI_INT },
{ OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_ashlvv4di, "__builtin_ia32_psllv4di", IX86_BUILTIN_PSLLVV4DI, UNKNOWN, (int) V4DI_FTYPE_V4DI_V4DI },
{ OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_ashlvv2di, "__builtin_ia32_psllv2di", IX86_BUILTIN_PSLLVV2DI, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI },
{ OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_ashlvv8si, "__builtin_ia32_psllv8si", IX86_BUILTIN_PSLLVV8SI, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI },
@@ -30023,7 +31271,7 @@ static const struct builtin_description bdesc_args[] =
{ OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_compressv16sf_mask, "__builtin_ia32_compresssf512_mask", IX86_BUILTIN_COMPRESSPS512, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_HI },
{ OPTION_MASK_ISA_AVX512F, CODE_FOR_floatv8siv8df2_mask, "__builtin_ia32_cvtdq2pd512_mask", IX86_BUILTIN_CVTDQ2PD512, UNKNOWN, (int) V8DF_FTYPE_V8SI_V8DF_QI },
{ OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vcvtps2ph512_mask, "__builtin_ia32_vcvtps2ph512_mask", IX86_BUILTIN_CVTPS2PH512, UNKNOWN, (int) V16HI_FTYPE_V16SF_INT_V16HI_HI },
- { OPTION_MASK_ISA_AVX512F, CODE_FOR_ufloatv8siv8df_mask, "__builtin_ia32_cvtudq2pd512_mask", IX86_BUILTIN_CVTUDQ2PD512, UNKNOWN, (int) V8DF_FTYPE_V8SI_V8DF_QI },
+ { OPTION_MASK_ISA_AVX512F, CODE_FOR_ufloatv8siv8df2_mask, "__builtin_ia32_cvtudq2pd512_mask", IX86_BUILTIN_CVTUDQ2PD512, UNKNOWN, (int) V8DF_FTYPE_V8SI_V8DF_QI },
{ OPTION_MASK_ISA_AVX512F, CODE_FOR_cvtusi2sd32, "__builtin_ia32_cvtusi2sd32", IX86_BUILTIN_CVTUSI2SD32, UNKNOWN, (int) V2DF_FTYPE_V2DF_UINT },
{ OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_expandv8df_mask, "__builtin_ia32_expanddf512_mask", IX86_BUILTIN_EXPANDPD512, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_QI },
{ OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_expandv8df_maskz, "__builtin_ia32_expanddf512_maskz", IX86_BUILTIN_EXPANDPD512Z, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_QI },
@@ -30226,6 +31474,852 @@ static const struct builtin_description bdesc_args[] =
{ OPTION_MASK_ISA_SSE2, CODE_FOR_sha256msg1, 0, IX86_BUILTIN_SHA256MSG1, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI },
{ OPTION_MASK_ISA_SSE2, CODE_FOR_sha256msg2, 0, IX86_BUILTIN_SHA256MSG2, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI },
{ OPTION_MASK_ISA_SSE2, CODE_FOR_sha256rnds2, 0, IX86_BUILTIN_SHA256RNDS2, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI_V4SI },
+
+ /* AVX512VL. */
+ { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx2_palignrv32qi_mask, "__builtin_ia32_palignr256_mask", IX86_BUILTIN_PALIGNR256_MASK, UNKNOWN, (int) V4DI_FTYPE_V4DI_V4DI_INT_V4DI_SI_CONVERT },
+ { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_ssse3_palignrv16qi_mask, "__builtin_ia32_palignr128_mask", IX86_BUILTIN_PALIGNR128_MASK, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI_INT_V2DI_HI_CONVERT },
+ { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_loadv4di_mask, "__builtin_ia32_movdqa64_256_mask", IX86_BUILTIN_MOVDQA64_256_MASK, UNKNOWN, (int) V4DI_FTYPE_V4DI_V4DI_QI },
+ { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_loadv2di_mask, "__builtin_ia32_movdqa64_128_mask", IX86_BUILTIN_MOVDQA64_128_MASK, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI_QI },
+ { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_loadv8si_mask, "__builtin_ia32_movdqa32_256_mask", IX86_BUILTIN_MOVDQA32_256_MASK, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI_QI },
+ { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_loadv4si_mask, "__builtin_ia32_movdqa32_128_mask", IX86_BUILTIN_MOVDQA32_128_MASK, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI_QI },
+ { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_loadv4df_mask, "__builtin_ia32_movapd256_mask", IX86_BUILTIN_MOVAPD256_MASK, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF_QI },
+ { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_loadv2df_mask, "__builtin_ia32_movapd128_mask", IX86_BUILTIN_MOVAPD128_MASK, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_QI },
+ { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_loadv8sf_mask, "__builtin_ia32_movaps256_mask", IX86_BUILTIN_MOVAPS256_MASK, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF_QI },
+ { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_loadv4sf_mask, "__builtin_ia32_movaps128_mask", IX86_BUILTIN_MOVAPS128_MASK, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_QI },
+ { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_loaddquv16hi_mask, "__builtin_ia32_movdquhi256_mask", IX86_BUILTIN_MOVDQUHI256_MASK, UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI_HI },
+ { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_loaddquv8hi_mask, "__builtin_ia32_movdquhi128_mask", IX86_BUILTIN_MOVDQUHI128_MASK, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI_QI },
+ { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx_loaddquv32qi_mask, "__builtin_ia32_movdquqi256_mask", IX86_BUILTIN_MOVDQUQI256_MASK, UNKNOWN, (int) V32QI_FTYPE_V32QI_V32QI_SI },
+ { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_sse2_loaddquv16qi_mask, "__builtin_ia32_movdquqi128_mask", IX86_BUILTIN_MOVDQUQI128_MASK, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI_HI },
+ { OPTION_MASK_ISA_AVX512VL, CODE_FOR_sminv4sf3_mask, "__builtin_ia32_minps_mask", IX86_BUILTIN_MINPS128_MASK, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_V4SF_QI },
+ { OPTION_MASK_ISA_AVX512VL, CODE_FOR_smaxv4sf3_mask, "__builtin_ia32_maxps_mask", IX86_BUILTIN_MAXPS128_MASK, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_V4SF_QI },
+ { OPTION_MASK_ISA_AVX512VL, CODE_FOR_sminv2df3_mask, "__builtin_ia32_minpd_mask", IX86_BUILTIN_MINPD128_MASK, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_V2DF_QI },
+ { OPTION_MASK_ISA_AVX512VL, CODE_FOR_smaxv2df3_mask, "__builtin_ia32_maxpd_mask", IX86_BUILTIN_MAXPD128_MASK, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_V2DF_QI },
+ { OPTION_MASK_ISA_AVX512VL, CODE_FOR_smaxv4df3_mask, "__builtin_ia32_maxpd256_mask", IX86_BUILTIN_MAXPD256_MASK, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF_V4DF_QI },
+ { OPTION_MASK_ISA_AVX512VL, CODE_FOR_smaxv8sf3_mask, "__builtin_ia32_maxps256_mask", IX86_BUILTIN_MAXPS256_MASK, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF_V8SF_QI },
+ { OPTION_MASK_ISA_AVX512VL, CODE_FOR_sminv4df3_mask, "__builtin_ia32_minpd256_mask", IX86_BUILTIN_MINPD256_MASK, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF_V4DF_QI },
+ { OPTION_MASK_ISA_AVX512VL, CODE_FOR_sminv8sf3_mask, "__builtin_ia32_minps256_mask", IX86_BUILTIN_MINPS256_MASK, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF_V8SF_QI },
+ { OPTION_MASK_ISA_AVX512VL, CODE_FOR_mulv4sf3_mask, "__builtin_ia32_mulps_mask", IX86_BUILTIN_MULPS128_MASK, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_V4SF_QI },
+ { OPTION_MASK_ISA_AVX512VL, CODE_FOR_sse_divv4sf3_mask, "__builtin_ia32_divps_mask", IX86_BUILTIN_DIVPS128_MASK, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_V4SF_QI },
+ { OPTION_MASK_ISA_AVX512VL, CODE_FOR_mulv2df3_mask, "__builtin_ia32_mulpd_mask", IX86_BUILTIN_MULPD128_MASK, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_V2DF_QI },
+ { OPTION_MASK_ISA_AVX512VL, CODE_FOR_sse2_divv2df3_mask, "__builtin_ia32_divpd_mask", IX86_BUILTIN_DIVPD128_MASK, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_V2DF_QI },
+ { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx_divv4df3_mask, "__builtin_ia32_divpd256_mask", IX86_BUILTIN_DIVPD256_MASK, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF_V4DF_QI },
+ { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx_divv8sf3_mask, "__builtin_ia32_divps256_mask", IX86_BUILTIN_DIVPS256_MASK, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF_V8SF_QI },
+ { OPTION_MASK_ISA_AVX512VL, CODE_FOR_mulv4df3_mask, "__builtin_ia32_mulpd256_mask", IX86_BUILTIN_MULPD256_MASK, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF_V4DF_QI },
+ { OPTION_MASK_ISA_AVX512VL, CODE_FOR_mulv8sf3_mask, "__builtin_ia32_mulps256_mask", IX86_BUILTIN_MULPS256_MASK, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF_V8SF_QI },
+ { OPTION_MASK_ISA_AVX512VL, CODE_FOR_addv2df3_mask, "__builtin_ia32_addpd128_mask", IX86_BUILTIN_ADDPD128_MASK, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_V2DF_QI },
+ { OPTION_MASK_ISA_AVX512VL, CODE_FOR_addv4df3_mask, "__builtin_ia32_addpd256_mask", IX86_BUILTIN_ADDPD256_MASK, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF_V4DF_QI },
+ { OPTION_MASK_ISA_AVX512VL, CODE_FOR_addv4sf3_mask, "__builtin_ia32_addps128_mask", IX86_BUILTIN_ADDPS128_MASK, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_V4SF_QI },
+ { OPTION_MASK_ISA_AVX512VL, CODE_FOR_addv8sf3_mask, "__builtin_ia32_addps256_mask", IX86_BUILTIN_ADDPS256_MASK, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF_V8SF_QI },
+ { OPTION_MASK_ISA_AVX512VL, CODE_FOR_subv2df3_mask, "__builtin_ia32_subpd128_mask", IX86_BUILTIN_SUBPD128_MASK, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_V2DF_QI },
+ { OPTION_MASK_ISA_AVX512VL, CODE_FOR_subv4df3_mask, "__builtin_ia32_subpd256_mask", IX86_BUILTIN_SUBPD256_MASK, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF_V4DF_QI },
+ { OPTION_MASK_ISA_AVX512VL, CODE_FOR_subv4sf3_mask, "__builtin_ia32_subps128_mask", IX86_BUILTIN_SUBPS128_MASK, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_V4SF_QI },
+ { OPTION_MASK_ISA_AVX512VL, CODE_FOR_subv8sf3_mask, "__builtin_ia32_subps256_mask", IX86_BUILTIN_SUBPS256_MASK, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF_V8SF_QI },
+ { OPTION_MASK_ISA_AVX512DQ | OPTION_MASK_ISA_AVX512VL, CODE_FOR_xorv4df3_mask, "__builtin_ia32_xorpd256_mask", IX86_BUILTIN_XORPD256_MASK, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF_V4DF_QI },
+ { OPTION_MASK_ISA_AVX512DQ | OPTION_MASK_ISA_AVX512VL, CODE_FOR_xorv2df3_mask, "__builtin_ia32_xorpd128_mask", IX86_BUILTIN_XORPD128_MASK, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_V2DF_QI },
+ { OPTION_MASK_ISA_AVX512DQ | OPTION_MASK_ISA_AVX512VL, CODE_FOR_xorv8sf3_mask, "__builtin_ia32_xorps256_mask", IX86_BUILTIN_XORPS256_MASK, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF_V8SF_QI },
+ { OPTION_MASK_ISA_AVX512DQ | OPTION_MASK_ISA_AVX512VL, CODE_FOR_xorv4sf3_mask, "__builtin_ia32_xorps128_mask", IX86_BUILTIN_XORPS128_MASK, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_V4SF_QI },
+ { OPTION_MASK_ISA_AVX512DQ | OPTION_MASK_ISA_AVX512VL, CODE_FOR_iorv4df3_mask, "__builtin_ia32_orpd256_mask", IX86_BUILTIN_ORPD256_MASK, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF_V4DF_QI },
+ { OPTION_MASK_ISA_AVX512DQ | OPTION_MASK_ISA_AVX512VL, CODE_FOR_iorv2df3_mask, "__builtin_ia32_orpd128_mask", IX86_BUILTIN_ORPD128_MASK, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_V2DF_QI },
+ { OPTION_MASK_ISA_AVX512DQ | OPTION_MASK_ISA_AVX512VL, CODE_FOR_iorv8sf3_mask, "__builtin_ia32_orps256_mask", IX86_BUILTIN_ORPS256_MASK, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF_V8SF_QI },
+ { OPTION_MASK_ISA_AVX512DQ | OPTION_MASK_ISA_AVX512VL, CODE_FOR_iorv4sf3_mask, "__builtin_ia32_orps128_mask", IX86_BUILTIN_ORPS128_MASK, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_V4SF_QI },
+ { OPTION_MASK_ISA_AVX512DQ | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512dq_broadcastv8sf_mask, "__builtin_ia32_broadcastf32x2_256_mask", IX86_BUILTIN_BROADCASTF32x2_256, UNKNOWN, (int) V8SF_FTYPE_V4SF_V8SF_QI },
+ { OPTION_MASK_ISA_AVX512DQ | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512dq_broadcastv8si_mask, "__builtin_ia32_broadcasti32x2_256_mask", IX86_BUILTIN_BROADCASTI32x2_256, UNKNOWN, (int) V8SI_FTYPE_V4SI_V8SI_QI },
+ { OPTION_MASK_ISA_AVX512DQ | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512dq_broadcastv4si_mask, "__builtin_ia32_broadcasti32x2_128_mask", IX86_BUILTIN_BROADCASTI32x2_128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI_QI },
+ { OPTION_MASK_ISA_AVX512DQ | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512dq_broadcastv4df_mask_1, "__builtin_ia32_broadcastf64x2_256_mask", IX86_BUILTIN_BROADCASTF64X2_256, UNKNOWN, (int) V4DF_FTYPE_V2DF_V4DF_QI },
+ { OPTION_MASK_ISA_AVX512DQ | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512dq_broadcastv4di_mask_1, "__builtin_ia32_broadcasti64x2_256_mask", IX86_BUILTIN_BROADCASTI64X2_256, UNKNOWN, (int) V4DI_FTYPE_V2DI_V4DI_QI },
+ { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_broadcastv8sf_mask_1, "__builtin_ia32_broadcastf32x4_256_mask", IX86_BUILTIN_BROADCASTF32X4_256, UNKNOWN, (int) V8SF_FTYPE_V4SF_V8SF_QI },
+ { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_broadcastv8si_mask_1, "__builtin_ia32_broadcasti32x4_256_mask", IX86_BUILTIN_BROADCASTI32X4_256, UNKNOWN, (int) V8SI_FTYPE_V4SI_V8SI_QI },
+ { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_vextractf128v8sf, "__builtin_ia32_extractf32x4_256_mask", IX86_BUILTIN_EXTRACTF32X4_256, UNKNOWN, (int) V4SF_FTYPE_V8SF_INT_V4SF_QI },
+ { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_vextractf128v8si, "__builtin_ia32_extracti32x4_256_mask", IX86_BUILTIN_EXTRACTI32X4_256, UNKNOWN, (int) V4SI_FTYPE_V8SI_INT_V4SI_QI },
+ { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512bw_dbpsadbwv16hi_mask, "__builtin_ia32_dbpsadbw256_mask", IX86_BUILTIN_DBPSADBW256, UNKNOWN, (int) V16HI_FTYPE_V32QI_V32QI_INT_V16HI_HI },
+ { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512bw_dbpsadbwv8hi_mask, "__builtin_ia32_dbpsadbw128_mask", IX86_BUILTIN_DBPSADBW128, UNKNOWN, (int) V8HI_FTYPE_V16QI_V16QI_INT_V8HI_QI },
+ { OPTION_MASK_ISA_AVX512DQ | OPTION_MASK_ISA_AVX512VL, CODE_FOR_fix_truncv4dfv4di2_mask, "__builtin_ia32_cvttpd2qq256_mask", IX86_BUILTIN_CVTTPD2QQ256, UNKNOWN, (int) V4DI_FTYPE_V4DF_V4DI_QI },
+ { OPTION_MASK_ISA_AVX512DQ | OPTION_MASK_ISA_AVX512VL, CODE_FOR_fix_truncv2dfv2di2_mask, "__builtin_ia32_cvttpd2qq128_mask", IX86_BUILTIN_CVTTPD2QQ128, UNKNOWN, (int) V2DI_FTYPE_V2DF_V2DI_QI },
+ { OPTION_MASK_ISA_AVX512DQ | OPTION_MASK_ISA_AVX512VL, CODE_FOR_ufix_truncv4dfv4di2_mask, "__builtin_ia32_cvttpd2uqq256_mask", IX86_BUILTIN_CVTTPD2UQQ256, UNKNOWN, (int) V4DI_FTYPE_V4DF_V4DI_QI },
+ { OPTION_MASK_ISA_AVX512DQ | OPTION_MASK_ISA_AVX512VL, CODE_FOR_ufix_truncv2dfv2di2_mask, "__builtin_ia32_cvttpd2uqq128_mask", IX86_BUILTIN_CVTTPD2UQQ128, UNKNOWN, (int) V2DI_FTYPE_V2DF_V2DI_QI },
+ { OPTION_MASK_ISA_AVX512DQ | OPTION_MASK_ISA_AVX512VL, CODE_FOR_fix_notruncv4dfv4di2_mask, "__builtin_ia32_cvtpd2qq256_mask", IX86_BUILTIN_CVTPD2QQ256, UNKNOWN, (int) V4DI_FTYPE_V4DF_V4DI_QI },
+ { OPTION_MASK_ISA_AVX512DQ | OPTION_MASK_ISA_AVX512VL, CODE_FOR_fix_notruncv2dfv2di2_mask, "__builtin_ia32_cvtpd2qq128_mask", IX86_BUILTIN_CVTPD2QQ128, UNKNOWN, (int) V2DI_FTYPE_V2DF_V2DI_QI },
+ { OPTION_MASK_ISA_AVX512DQ | OPTION_MASK_ISA_AVX512VL, CODE_FOR_ufix_notruncv4dfv4di2_mask, "__builtin_ia32_cvtpd2uqq256_mask", IX86_BUILTIN_CVTPD2UQQ256, UNKNOWN, (int) V4DI_FTYPE_V4DF_V4DI_QI },
+ { OPTION_MASK_ISA_AVX512DQ | OPTION_MASK_ISA_AVX512VL, CODE_FOR_ufix_notruncv2dfv2di2_mask, "__builtin_ia32_cvtpd2uqq128_mask", IX86_BUILTIN_CVTPD2UQQ128, UNKNOWN, (int) V2DI_FTYPE_V2DF_V2DI_QI },
+ { OPTION_MASK_ISA_AVX512VL, CODE_FOR_ufix_notruncv4dfv4si2_mask, "__builtin_ia32_cvtpd2udq256_mask", IX86_BUILTIN_CVTPD2UDQ256_MASK, UNKNOWN, (int) V4SI_FTYPE_V4DF_V4SI_QI },
+ { OPTION_MASK_ISA_AVX512VL, CODE_FOR_ufix_notruncv2dfv2si2_mask, "__builtin_ia32_cvtpd2udq128_mask", IX86_BUILTIN_CVTPD2UDQ128_MASK, UNKNOWN, (int) V4SI_FTYPE_V2DF_V4SI_QI },
+ { OPTION_MASK_ISA_AVX512DQ | OPTION_MASK_ISA_AVX512VL, CODE_FOR_fix_truncv4sfv4di2_mask, "__builtin_ia32_cvttps2qq256_mask", IX86_BUILTIN_CVTTPS2QQ256, UNKNOWN, (int) V4DI_FTYPE_V4SF_V4DI_QI },
+ { OPTION_MASK_ISA_AVX512DQ | OPTION_MASK_ISA_AVX512VL, CODE_FOR_fix_truncv2sfv2di2_mask, "__builtin_ia32_cvttps2qq128_mask", IX86_BUILTIN_CVTTPS2QQ128, UNKNOWN, (int) V2DI_FTYPE_V4SF_V2DI_QI },
+ { OPTION_MASK_ISA_AVX512DQ | OPTION_MASK_ISA_AVX512VL, CODE_FOR_ufix_truncv4sfv4di2_mask, "__builtin_ia32_cvttps2uqq256_mask", IX86_BUILTIN_CVTTPS2UQQ256, UNKNOWN, (int) V4DI_FTYPE_V4SF_V4DI_QI },
+ { OPTION_MASK_ISA_AVX512DQ | OPTION_MASK_ISA_AVX512VL, CODE_FOR_ufix_truncv2sfv2di2_mask, "__builtin_ia32_cvttps2uqq128_mask", IX86_BUILTIN_CVTTPS2UQQ128, UNKNOWN, (int) V2DI_FTYPE_V4SF_V2DI_QI },
+ { OPTION_MASK_ISA_AVX512VL, CODE_FOR_fix_truncv8sfv8si2_mask, "__builtin_ia32_cvttps2dq256_mask", IX86_BUILTIN_CVTTPS2DQ256_MASK, UNKNOWN, (int) V8SI_FTYPE_V8SF_V8SI_QI },
+ { OPTION_MASK_ISA_AVX512VL, CODE_FOR_fix_truncv4sfv4si2_mask, "__builtin_ia32_cvttps2dq128_mask", IX86_BUILTIN_CVTTPS2DQ128_MASK, UNKNOWN, (int) V4SI_FTYPE_V4SF_V4SI_QI },
+ { OPTION_MASK_ISA_AVX512VL, CODE_FOR_ufix_truncv8sfv8si2_mask, "__builtin_ia32_cvttps2udq256_mask", IX86_BUILTIN_CVTTPS2UDQ256, UNKNOWN, (int) V8SI_FTYPE_V8SF_V8SI_QI },
+ { OPTION_MASK_ISA_AVX512VL, CODE_FOR_ufix_truncv4sfv4si2_mask, "__builtin_ia32_cvttps2udq128_mask", IX86_BUILTIN_CVTTPS2UDQ128, UNKNOWN, (int) V4SI_FTYPE_V4SF_V4SI_QI },
+ { OPTION_MASK_ISA_AVX512VL, CODE_FOR_fix_truncv4dfv4si2_mask, "__builtin_ia32_cvttpd2dq256_mask", IX86_BUILTIN_CVTTPD2DQ256_MASK, UNKNOWN, (int) V4SI_FTYPE_V4DF_V4SI_QI },
+ { OPTION_MASK_ISA_AVX512VL, CODE_FOR_sse2_cvttpd2dq_mask, "__builtin_ia32_cvttpd2dq128_mask", IX86_BUILTIN_CVTTPD2DQ128_MASK, UNKNOWN, (int) V4SI_FTYPE_V2DF_V4SI_QI },
+ { OPTION_MASK_ISA_AVX512VL, CODE_FOR_ufix_truncv4dfv4si2_mask, "__builtin_ia32_cvttpd2udq256_mask", IX86_BUILTIN_CVTTPD2UDQ256_MASK, UNKNOWN, (int) V4SI_FTYPE_V4DF_V4SI_QI },
+ { OPTION_MASK_ISA_AVX512VL, CODE_FOR_ufix_truncv2dfv2si2_mask, "__builtin_ia32_cvttpd2udq128_mask", IX86_BUILTIN_CVTTPD2UDQ128_MASK, UNKNOWN, (int) V4SI_FTYPE_V2DF_V4SI_QI },
+ { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx_cvtpd2dq256_mask, "__builtin_ia32_cvtpd2dq256_mask", IX86_BUILTIN_CVTPD2DQ256_MASK, UNKNOWN, (int) V4SI_FTYPE_V4DF_V4SI_QI },
+ { OPTION_MASK_ISA_AVX512VL, CODE_FOR_sse2_cvtpd2dq_mask, "__builtin_ia32_cvtpd2dq128_mask", IX86_BUILTIN_CVTPD2DQ128_MASK, UNKNOWN, (int) V4SI_FTYPE_V2DF_V4SI_QI },
+ { OPTION_MASK_ISA_AVX512VL, CODE_FOR_floatv4siv4df2_mask, "__builtin_ia32_cvtdq2pd256_mask", IX86_BUILTIN_CVTDQ2PD256_MASK, UNKNOWN, (int) V4DF_FTYPE_V4SI_V4DF_QI },
+ { OPTION_MASK_ISA_AVX512VL, CODE_FOR_sse2_cvtdq2pd_mask, "__builtin_ia32_cvtdq2pd128_mask", IX86_BUILTIN_CVTDQ2PD128_MASK, UNKNOWN, (int) V2DF_FTYPE_V4SI_V2DF_QI },
+ { OPTION_MASK_ISA_AVX512VL, CODE_FOR_ufloatv4siv4df2_mask, "__builtin_ia32_cvtudq2pd256_mask", IX86_BUILTIN_CVTUDQ2PD256_MASK, UNKNOWN, (int) V4DF_FTYPE_V4SI_V4DF_QI },
+ { OPTION_MASK_ISA_AVX512VL, CODE_FOR_ufloatv2siv2df2_mask, "__builtin_ia32_cvtudq2pd128_mask", IX86_BUILTIN_CVTUDQ2PD128_MASK, UNKNOWN, (int) V2DF_FTYPE_V4SI_V2DF_QI },
+ { OPTION_MASK_ISA_AVX512VL, CODE_FOR_floatv8siv8sf2_mask, "__builtin_ia32_cvtdq2ps256_mask", IX86_BUILTIN_CVTDQ2PS256_MASK, UNKNOWN, (int) V8SF_FTYPE_V8SI_V8SF_QI },
+ { OPTION_MASK_ISA_AVX512VL, CODE_FOR_floatv4siv4sf2_mask, "__builtin_ia32_cvtdq2ps128_mask", IX86_BUILTIN_CVTDQ2PS128_MASK, UNKNOWN, (int) V4SF_FTYPE_V4SI_V4SF_QI },
+ { OPTION_MASK_ISA_AVX512VL, CODE_FOR_ufloatv8siv8sf2_mask, "__builtin_ia32_cvtudq2ps256_mask", IX86_BUILTIN_CVTUDQ2PS256_MASK, UNKNOWN, (int) V8SF_FTYPE_V8SI_V8SF_QI },
+ { OPTION_MASK_ISA_AVX512VL, CODE_FOR_ufloatv4siv4sf2_mask, "__builtin_ia32_cvtudq2ps128_mask", IX86_BUILTIN_CVTUDQ2PS128_MASK, UNKNOWN, (int) V4SF_FTYPE_V4SI_V4SF_QI },
+ { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx_cvtps2pd256_mask, "__builtin_ia32_cvtps2pd256_mask", IX86_BUILTIN_CVTPS2PD256_MASK, UNKNOWN, (int) V4DF_FTYPE_V4SF_V4DF_QI },
+ { OPTION_MASK_ISA_AVX512VL, CODE_FOR_sse2_cvtps2pd_mask, "__builtin_ia32_cvtps2pd128_mask", IX86_BUILTIN_CVTPS2PD128_MASK, UNKNOWN, (int) V2DF_FTYPE_V4SF_V2DF_QI },
+ { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_vec_dupv32qi_mask, "__builtin_ia32_pbroadcastb256_mask", IX86_BUILTIN_PBROADCASTB256_MASK, UNKNOWN, (int) V32QI_FTYPE_V16QI_V32QI_SI },
+ { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_vec_dup_gprv32qi_mask, "__builtin_ia32_pbroadcastb256_gpr_mask", IX86_BUILTIN_PBROADCASTB256_GPR_MASK, UNKNOWN, (int) V32QI_FTYPE_QI_V32QI_SI },
+ { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_vec_dupv16qi_mask, "__builtin_ia32_pbroadcastb128_mask", IX86_BUILTIN_PBROADCASTB128_MASK, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI_HI },
+ { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_vec_dup_gprv16qi_mask, "__builtin_ia32_pbroadcastb128_gpr_mask", IX86_BUILTIN_PBROADCASTB128_GPR_MASK, UNKNOWN, (int) V16QI_FTYPE_QI_V16QI_HI },
+ { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_vec_dupv16hi_mask, "__builtin_ia32_pbroadcastw256_mask", IX86_BUILTIN_PBROADCASTW256_MASK, UNKNOWN, (int) V16HI_FTYPE_V8HI_V16HI_HI },
+ { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_vec_dup_gprv16hi_mask, "__builtin_ia32_pbroadcastw256_gpr_mask", IX86_BUILTIN_PBROADCASTW256_GPR_MASK, UNKNOWN, (int) V16HI_FTYPE_HI_V16HI_HI },
+ { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_vec_dupv8hi_mask, "__builtin_ia32_pbroadcastw128_mask", IX86_BUILTIN_PBROADCASTW128_MASK, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI_QI },
+ { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_vec_dup_gprv8hi_mask, "__builtin_ia32_pbroadcastw128_gpr_mask", IX86_BUILTIN_PBROADCASTW128_GPR_MASK, UNKNOWN, (int) V8HI_FTYPE_HI_V8HI_QI },
+ { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_vec_dupv8si_mask, "__builtin_ia32_pbroadcastd256_mask", IX86_BUILTIN_PBROADCASTD256_MASK, UNKNOWN, (int) V8SI_FTYPE_V4SI_V8SI_QI },
+ { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_vec_dup_gprv8si_mask, "__builtin_ia32_pbroadcastd256_gpr_mask", IX86_BUILTIN_PBROADCASTD256_GPR_MASK, UNKNOWN, (int) V8SI_FTYPE_SI_V8SI_QI },
+ { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_vec_dupv4si_mask, "__builtin_ia32_pbroadcastd128_mask", IX86_BUILTIN_PBROADCASTD128_MASK, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI_QI },
+ { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_vec_dup_gprv4si_mask, "__builtin_ia32_pbroadcastd128_gpr_mask", IX86_BUILTIN_PBROADCASTD128_GPR_MASK, UNKNOWN, (int) V4SI_FTYPE_SI_V4SI_QI },
+ { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_vec_dupv4di_mask, "__builtin_ia32_pbroadcastq256_mask", IX86_BUILTIN_PBROADCASTQ256_MASK, UNKNOWN, (int) V4DI_FTYPE_V2DI_V4DI_QI },
+ { OPTION_MASK_ISA_AVX512VL | OPTION_MASK_ISA_64BIT, CODE_FOR_avx512vl_vec_dup_gprv4di_mask, "__builtin_ia32_pbroadcastq256_gpr_mask", IX86_BUILTIN_PBROADCASTQ256_GPR_MASK, UNKNOWN, (int) V4DI_FTYPE_DI_V4DI_QI },
+ { OPTION_MASK_ISA_AVX512VL & ~OPTION_MASK_ISA_64BIT, CODE_FOR_avx512vl_vec_dup_memv4di_mask, "__builtin_ia32_pbroadcastq256_mem_mask", IX86_BUILTIN_PBROADCASTQ256_MEM_MASK, UNKNOWN, (int) V4DI_FTYPE_DI_V4DI_QI },
+ { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_vec_dupv2di_mask, "__builtin_ia32_pbroadcastq128_mask", IX86_BUILTIN_PBROADCASTQ128_MASK, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI_QI },
+ { OPTION_MASK_ISA_AVX512VL | OPTION_MASK_ISA_64BIT, CODE_FOR_avx512vl_vec_dup_gprv2di_mask, "__builtin_ia32_pbroadcastq128_gpr_mask", IX86_BUILTIN_PBROADCASTQ128_GPR_MASK, UNKNOWN, (int) V2DI_FTYPE_DI_V2DI_QI },
+ { OPTION_MASK_ISA_AVX512VL & ~OPTION_MASK_ISA_64BIT, CODE_FOR_avx512vl_vec_dup_memv2di_mask, "__builtin_ia32_pbroadcastq128_mem_mask", IX86_BUILTIN_PBROADCASTQ128_MEM_MASK, UNKNOWN, (int) V2DI_FTYPE_DI_V2DI_QI },
+ { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_vec_dupv8sf_mask, "__builtin_ia32_broadcastss256_mask", IX86_BUILTIN_BROADCASTSS256, UNKNOWN, (int) V8SF_FTYPE_V4SF_V8SF_QI },
+ { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_vec_dupv4sf_mask, "__builtin_ia32_broadcastss128_mask", IX86_BUILTIN_BROADCASTSS128, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_QI },
+ { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_vec_dupv4df_mask, "__builtin_ia32_broadcastsd256_mask", IX86_BUILTIN_BROADCASTSD256, UNKNOWN, (int) V4DF_FTYPE_V2DF_V4DF_QI },
+ { OPTION_MASK_ISA_AVX512DQ | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_vextractf128v4df, "__builtin_ia32_extractf64x2_256_mask", IX86_BUILTIN_EXTRACTF64X2_256, UNKNOWN, (int) V2DF_FTYPE_V4DF_INT_V2DF_QI },
+ { OPTION_MASK_ISA_AVX512DQ | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_vextractf128v4di, "__builtin_ia32_extracti64x2_256_mask", IX86_BUILTIN_EXTRACTI64X2_256, UNKNOWN, (int) V2DI_FTYPE_V4DI_INT_V2DI_QI },
+ { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_vinsertv8sf, "__builtin_ia32_insertf32x4_256_mask", IX86_BUILTIN_INSERTF32X4_256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V4SF_INT_V8SF_QI },
+ { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_vinsertv8si, "__builtin_ia32_inserti32x4_256_mask", IX86_BUILTIN_INSERTI32X4_256, UNKNOWN, (int) V8SI_FTYPE_V8SI_V4SI_INT_V8SI_QI },
+ { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx2_sign_extendv16qiv16hi2_mask, "__builtin_ia32_pmovsxbw256_mask", IX86_BUILTIN_PMOVSXBW256_MASK, UNKNOWN, (int) V16HI_FTYPE_V16QI_V16HI_HI },
+ { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_sse4_1_sign_extendv8qiv8hi2_mask, "__builtin_ia32_pmovsxbw128_mask", IX86_BUILTIN_PMOVSXBW128_MASK, UNKNOWN, (int) V8HI_FTYPE_V16QI_V8HI_QI },
+ { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx2_sign_extendv8qiv8si2_mask, "__builtin_ia32_pmovsxbd256_mask", IX86_BUILTIN_PMOVSXBD256_MASK, UNKNOWN, (int) V8SI_FTYPE_V16QI_V8SI_QI },
+ { OPTION_MASK_ISA_AVX512VL, CODE_FOR_sse4_1_sign_extendv4qiv4si2_mask, "__builtin_ia32_pmovsxbd128_mask", IX86_BUILTIN_PMOVSXBD128_MASK, UNKNOWN, (int) V4SI_FTYPE_V16QI_V4SI_QI },
+ { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx2_sign_extendv4qiv4di2_mask, "__builtin_ia32_pmovsxbq256_mask", IX86_BUILTIN_PMOVSXBQ256_MASK, UNKNOWN, (int) V4DI_FTYPE_V16QI_V4DI_QI },
+ { OPTION_MASK_ISA_AVX512VL, CODE_FOR_sse4_1_sign_extendv2qiv2di2_mask, "__builtin_ia32_pmovsxbq128_mask", IX86_BUILTIN_PMOVSXBQ128_MASK, UNKNOWN, (int) V2DI_FTYPE_V16QI_V2DI_QI },
+ { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx2_sign_extendv8hiv8si2_mask, "__builtin_ia32_pmovsxwd256_mask", IX86_BUILTIN_PMOVSXWD256_MASK, UNKNOWN, (int) V8SI_FTYPE_V8HI_V8SI_QI },
+ { OPTION_MASK_ISA_AVX512VL, CODE_FOR_sse4_1_sign_extendv4hiv4si2_mask, "__builtin_ia32_pmovsxwd128_mask", IX86_BUILTIN_PMOVSXWD128_MASK, UNKNOWN, (int) V4SI_FTYPE_V8HI_V4SI_QI },
+ { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx2_sign_extendv4hiv4di2_mask, "__builtin_ia32_pmovsxwq256_mask", IX86_BUILTIN_PMOVSXWQ256_MASK, UNKNOWN, (int) V4DI_FTYPE_V8HI_V4DI_QI },
+ { OPTION_MASK_ISA_AVX512VL, CODE_FOR_sse4_1_sign_extendv2hiv2di2_mask, "__builtin_ia32_pmovsxwq128_mask", IX86_BUILTIN_PMOVSXWQ128_MASK, UNKNOWN, (int) V2DI_FTYPE_V8HI_V2DI_QI },
+ { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx2_sign_extendv4siv4di2_mask, "__builtin_ia32_pmovsxdq256_mask", IX86_BUILTIN_PMOVSXDQ256_MASK, UNKNOWN, (int) V4DI_FTYPE_V4SI_V4DI_QI },
+ { OPTION_MASK_ISA_AVX512VL, CODE_FOR_sse4_1_sign_extendv2siv2di2_mask, "__builtin_ia32_pmovsxdq128_mask", IX86_BUILTIN_PMOVSXDQ128_MASK, UNKNOWN, (int) V2DI_FTYPE_V4SI_V2DI_QI },
+ { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx2_zero_extendv16qiv16hi2_mask, "__builtin_ia32_pmovzxbw256_mask", IX86_BUILTIN_PMOVZXBW256_MASK, UNKNOWN, (int) V16HI_FTYPE_V16QI_V16HI_HI },
+ { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_sse4_1_zero_extendv8qiv8hi2_mask, "__builtin_ia32_pmovzxbw128_mask", IX86_BUILTIN_PMOVZXBW128_MASK, UNKNOWN, (int) V8HI_FTYPE_V16QI_V8HI_QI },
+ { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx2_zero_extendv8qiv8si2_mask, "__builtin_ia32_pmovzxbd256_mask", IX86_BUILTIN_PMOVZXBD256_MASK, UNKNOWN, (int) V8SI_FTYPE_V16QI_V8SI_QI },
+ { OPTION_MASK_ISA_AVX512VL, CODE_FOR_sse4_1_zero_extendv4qiv4si2_mask, "__builtin_ia32_pmovzxbd128_mask", IX86_BUILTIN_PMOVZXBD128_MASK, UNKNOWN, (int) V4SI_FTYPE_V16QI_V4SI_QI },
+ { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx2_zero_extendv4qiv4di2_mask, "__builtin_ia32_pmovzxbq256_mask", IX86_BUILTIN_PMOVZXBQ256_MASK, UNKNOWN, (int) V4DI_FTYPE_V16QI_V4DI_QI },
+ { OPTION_MASK_ISA_AVX512VL, CODE_FOR_sse4_1_zero_extendv2qiv2di2_mask, "__builtin_ia32_pmovzxbq128_mask", IX86_BUILTIN_PMOVZXBQ128_MASK, UNKNOWN, (int) V2DI_FTYPE_V16QI_V2DI_QI },
+ { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx2_zero_extendv8hiv8si2_mask, "__builtin_ia32_pmovzxwd256_mask", IX86_BUILTIN_PMOVZXWD256_MASK, UNKNOWN, (int) V8SI_FTYPE_V8HI_V8SI_QI },
+ { OPTION_MASK_ISA_AVX512VL, CODE_FOR_sse4_1_zero_extendv4hiv4si2_mask, "__builtin_ia32_pmovzxwd128_mask", IX86_BUILTIN_PMOVZXWD128_MASK, UNKNOWN, (int) V4SI_FTYPE_V8HI_V4SI_QI },
+ { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx2_zero_extendv4hiv4di2_mask, "__builtin_ia32_pmovzxwq256_mask", IX86_BUILTIN_PMOVZXWQ256_MASK, UNKNOWN, (int) V4DI_FTYPE_V8HI_V4DI_QI },
+ { OPTION_MASK_ISA_AVX512VL, CODE_FOR_sse4_1_zero_extendv2hiv2di2_mask, "__builtin_ia32_pmovzxwq128_mask", IX86_BUILTIN_PMOVZXWQ128_MASK, UNKNOWN, (int) V2DI_FTYPE_V8HI_V2DI_QI },
+ { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx2_zero_extendv4siv4di2_mask, "__builtin_ia32_pmovzxdq256_mask", IX86_BUILTIN_PMOVZXDQ256_MASK, UNKNOWN, (int) V4DI_FTYPE_V4SI_V4DI_QI },
+ { OPTION_MASK_ISA_AVX512VL, CODE_FOR_sse4_1_zero_extendv2siv2di2_mask, "__builtin_ia32_pmovzxdq128_mask", IX86_BUILTIN_PMOVZXDQ128_MASK, UNKNOWN, (int) V2DI_FTYPE_V4SI_V2DI_QI },
+ { OPTION_MASK_ISA_AVX512DQ | OPTION_MASK_ISA_AVX512VL, CODE_FOR_reducepv4df_mask, "__builtin_ia32_reducepd256_mask", IX86_BUILTIN_REDUCEPD256_MASK, UNKNOWN, (int) V4DF_FTYPE_V4DF_INT_V4DF_QI },
+ { OPTION_MASK_ISA_AVX512DQ | OPTION_MASK_ISA_AVX512VL, CODE_FOR_reducepv2df_mask, "__builtin_ia32_reducepd128_mask", IX86_BUILTIN_REDUCEPD128_MASK, UNKNOWN, (int) V2DF_FTYPE_V2DF_INT_V2DF_QI },
+ { OPTION_MASK_ISA_AVX512DQ | OPTION_MASK_ISA_AVX512VL, CODE_FOR_reducepv8sf_mask, "__builtin_ia32_reduceps256_mask", IX86_BUILTIN_REDUCEPS256_MASK, UNKNOWN, (int) V8SF_FTYPE_V8SF_INT_V8SF_QI },
+ { OPTION_MASK_ISA_AVX512DQ | OPTION_MASK_ISA_AVX512VL, CODE_FOR_reducepv4sf_mask, "__builtin_ia32_reduceps128_mask", IX86_BUILTIN_REDUCEPS128_MASK, UNKNOWN, (int) V4SF_FTYPE_V4SF_INT_V4SF_QI },
+ { OPTION_MASK_ISA_AVX512DQ, CODE_FOR_reducesv2df, "__builtin_ia32_reducesd", IX86_BUILTIN_REDUCESD_MASK, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_INT },
+ { OPTION_MASK_ISA_AVX512DQ, CODE_FOR_reducesv4sf, "__builtin_ia32_reducess", IX86_BUILTIN_REDUCESS_MASK, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_INT },
+ { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_permvarv16hi_mask, "__builtin_ia32_permvarhi256_mask", IX86_BUILTIN_VPERMVARHI256_MASK, UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI_V16HI_HI },
+ { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_permvarv8hi_mask, "__builtin_ia32_permvarhi128_mask", IX86_BUILTIN_VPERMVARHI128_MASK, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI_V8HI_QI },
+ { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_vpermt2varv16hi3_mask, "__builtin_ia32_vpermt2varhi256_mask", IX86_BUILTIN_VPERMT2VARHI256, UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI_V16HI_HI },
+ { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_vpermt2varv16hi3_maskz, "__builtin_ia32_vpermt2varhi256_maskz", IX86_BUILTIN_VPERMT2VARHI256_MASKZ, UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI_V16HI_HI },
+ { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_vpermt2varv8hi3_mask, "__builtin_ia32_vpermt2varhi128_mask", IX86_BUILTIN_VPERMT2VARHI128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI_V8HI_QI },
+ { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_vpermt2varv8hi3_maskz, "__builtin_ia32_vpermt2varhi128_maskz", IX86_BUILTIN_VPERMT2VARHI128_MASKZ, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI_V8HI_QI },
+ { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_vpermi2varv16hi3_mask, "__builtin_ia32_vpermi2varhi256_mask", IX86_BUILTIN_VPERMI2VARHI256, UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI_V16HI_HI },
+ { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_vpermi2varv8hi3_mask, "__builtin_ia32_vpermi2varhi128_mask", IX86_BUILTIN_VPERMI2VARHI128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI_V8HI_QI },
+ { OPTION_MASK_ISA_AVX512VL, CODE_FOR_rcp14v4df_mask, "__builtin_ia32_rcp14pd256_mask", IX86_BUILTIN_RCP14PD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF_QI },
+ { OPTION_MASK_ISA_AVX512VL, CODE_FOR_rcp14v2df_mask, "__builtin_ia32_rcp14pd128_mask", IX86_BUILTIN_RCP14PD128, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_QI },
+ { OPTION_MASK_ISA_AVX512VL, CODE_FOR_rcp14v8sf_mask, "__builtin_ia32_rcp14ps256_mask", IX86_BUILTIN_RCP14PS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF_QI },
+ { OPTION_MASK_ISA_AVX512VL, CODE_FOR_rcp14v4sf_mask, "__builtin_ia32_rcp14ps128_mask", IX86_BUILTIN_RCP14PS128, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_QI },
+ { OPTION_MASK_ISA_AVX512VL, CODE_FOR_rsqrt14v4df_mask, "__builtin_ia32_rsqrt14pd256_mask", IX86_BUILTIN_RSQRT14PD256_MASK, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF_QI },
+ { OPTION_MASK_ISA_AVX512VL, CODE_FOR_rsqrt14v2df_mask, "__builtin_ia32_rsqrt14pd128_mask", IX86_BUILTIN_RSQRT14PD128_MASK, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_QI },
+ { OPTION_MASK_ISA_AVX512VL, CODE_FOR_rsqrt14v8sf_mask, "__builtin_ia32_rsqrt14ps256_mask", IX86_BUILTIN_RSQRT14PS256_MASK, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF_QI },
+ { OPTION_MASK_ISA_AVX512VL, CODE_FOR_rsqrt14v4sf_mask, "__builtin_ia32_rsqrt14ps128_mask", IX86_BUILTIN_RSQRT14PS128_MASK, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_QI },
+ { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx_sqrtv4df2_mask, "__builtin_ia32_sqrtpd256_mask", IX86_BUILTIN_SQRTPD256_MASK, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF_QI },
+ { OPTION_MASK_ISA_AVX512VL, CODE_FOR_sse2_sqrtv2df2_mask, "__builtin_ia32_sqrtpd128_mask", IX86_BUILTIN_SQRTPD128_MASK, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_QI },
+ { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx_sqrtv8sf2_mask, "__builtin_ia32_sqrtps256_mask", IX86_BUILTIN_SQRTPS256_MASK, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF_QI },
+ { OPTION_MASK_ISA_AVX512VL, CODE_FOR_sse_sqrtv4sf2_mask, "__builtin_ia32_sqrtps128_mask", IX86_BUILTIN_SQRTPS128_MASK, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_QI },
+ { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_addv16qi3_mask, "__builtin_ia32_paddb128_mask", IX86_BUILTIN_PADDB128_MASK, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI_V16QI_HI },
+ { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_addv8hi3_mask, "__builtin_ia32_paddw128_mask", IX86_BUILTIN_PADDW128_MASK, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI_V8HI_QI },
+ { OPTION_MASK_ISA_AVX512VL, CODE_FOR_addv4si3_mask, "__builtin_ia32_paddd128_mask", IX86_BUILTIN_PADDD128_MASK, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI_V4SI_QI },
+ { OPTION_MASK_ISA_AVX512VL, CODE_FOR_addv2di3_mask, "__builtin_ia32_paddq128_mask", IX86_BUILTIN_PADDQ128_MASK, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI_V2DI_QI },
+ { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_subv16qi3_mask, "__builtin_ia32_psubb128_mask", IX86_BUILTIN_PSUBB128_MASK, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI_V16QI_HI },
+ { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_subv8hi3_mask, "__builtin_ia32_psubw128_mask", IX86_BUILTIN_PSUBW128_MASK, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI_V8HI_QI },
+ { OPTION_MASK_ISA_AVX512VL, CODE_FOR_subv4si3_mask, "__builtin_ia32_psubd128_mask", IX86_BUILTIN_PSUBD128_MASK, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI_V4SI_QI },
+ { OPTION_MASK_ISA_AVX512VL, CODE_FOR_subv2di3_mask, "__builtin_ia32_psubq128_mask", IX86_BUILTIN_PSUBQ128_MASK, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI_V2DI_QI },
+ { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_sse2_ssaddv16qi3_mask, "__builtin_ia32_paddsb128_mask", IX86_BUILTIN_PADDSB128_MASK, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI_V16QI_HI },
+ { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_sse2_ssaddv8hi3_mask, "__builtin_ia32_paddsw128_mask", IX86_BUILTIN_PADDSW128_MASK, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI_V8HI_QI },
+ { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_sse2_sssubv16qi3_mask, "__builtin_ia32_psubsb128_mask", IX86_BUILTIN_PSUBSB128_MASK, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI_V16QI_HI },
+ { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_sse2_sssubv8hi3_mask, "__builtin_ia32_psubsw128_mask", IX86_BUILTIN_PSUBSW128_MASK, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI_V8HI_QI },
+ { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_sse2_usaddv16qi3_mask, "__builtin_ia32_paddusb128_mask", IX86_BUILTIN_PADDUSB128_MASK, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI_V16QI_HI },
+ { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_sse2_usaddv8hi3_mask, "__builtin_ia32_paddusw128_mask", IX86_BUILTIN_PADDUSW128_MASK, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI_V8HI_QI },
+ { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_sse2_ussubv16qi3_mask, "__builtin_ia32_psubusb128_mask", IX86_BUILTIN_PSUBUSB128_MASK, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI_V16QI_HI },
+ { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_sse2_ussubv8hi3_mask, "__builtin_ia32_psubusw128_mask", IX86_BUILTIN_PSUBUSW128_MASK, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI_V8HI_QI },
+ { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_addv32qi3_mask, "__builtin_ia32_paddb256_mask", IX86_BUILTIN_PADDB256_MASK, UNKNOWN, (int) V32QI_FTYPE_V32QI_V32QI_V32QI_SI },
+ { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_addv16hi3_mask, "__builtin_ia32_paddw256_mask", IX86_BUILTIN_PADDW256_MASK, UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI_V16HI_HI },
+ { OPTION_MASK_ISA_AVX512VL, CODE_FOR_addv8si3_mask, "__builtin_ia32_paddd256_mask", IX86_BUILTIN_PADDD256_MASK, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI_V8SI_QI },
+ { OPTION_MASK_ISA_AVX512VL, CODE_FOR_addv4di3_mask, "__builtin_ia32_paddq256_mask", IX86_BUILTIN_PADDQ256_MASK, UNKNOWN, (int) V4DI_FTYPE_V4DI_V4DI_V4DI_QI },
+ { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx2_ssaddv32qi3_mask, "__builtin_ia32_paddsb256_mask", IX86_BUILTIN_PADDSB256_MASK, UNKNOWN, (int) V32QI_FTYPE_V32QI_V32QI_V32QI_SI },
+ { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx2_ssaddv16hi3_mask, "__builtin_ia32_paddsw256_mask", IX86_BUILTIN_PADDSW256_MASK, UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI_V16HI_HI },
+ { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx2_usaddv32qi3_mask, "__builtin_ia32_paddusb256_mask", IX86_BUILTIN_PADDUSB256_MASK, UNKNOWN, (int) V32QI_FTYPE_V32QI_V32QI_V32QI_SI },
+ { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx2_usaddv16hi3_mask, "__builtin_ia32_paddusw256_mask", IX86_BUILTIN_PADDUSW256_MASK, UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI_V16HI_HI },
+ { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_subv32qi3_mask, "__builtin_ia32_psubb256_mask", IX86_BUILTIN_PSUBB256_MASK, UNKNOWN, (int) V32QI_FTYPE_V32QI_V32QI_V32QI_SI },
+ { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_subv16hi3_mask, "__builtin_ia32_psubw256_mask", IX86_BUILTIN_PSUBW256_MASK, UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI_V16HI_HI },
+ { OPTION_MASK_ISA_AVX512VL, CODE_FOR_subv8si3_mask, "__builtin_ia32_psubd256_mask", IX86_BUILTIN_PSUBD256_MASK, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI_V8SI_QI },
+ { OPTION_MASK_ISA_AVX512VL, CODE_FOR_subv4di3_mask, "__builtin_ia32_psubq256_mask", IX86_BUILTIN_PSUBQ256_MASK, UNKNOWN, (int) V4DI_FTYPE_V4DI_V4DI_V4DI_QI },
+ { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx2_sssubv32qi3_mask, "__builtin_ia32_psubsb256_mask", IX86_BUILTIN_PSUBSB256_MASK, UNKNOWN, (int) V32QI_FTYPE_V32QI_V32QI_V32QI_SI },
+ { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx2_sssubv16hi3_mask, "__builtin_ia32_psubsw256_mask", IX86_BUILTIN_PSUBSW256_MASK, UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI_V16HI_HI },
+ { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx2_ussubv32qi3_mask, "__builtin_ia32_psubusb256_mask", IX86_BUILTIN_PSUBUSB256_MASK, UNKNOWN, (int) V32QI_FTYPE_V32QI_V32QI_V32QI_SI },
+ { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx2_ussubv16hi3_mask, "__builtin_ia32_psubusw256_mask", IX86_BUILTIN_PSUBUSW256_MASK, UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI_V16HI_HI },
+ { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512dq_shuf_f64x2_mask, "__builtin_ia32_shuf_f64x2_256_mask", IX86_BUILTIN_SHUF_F64x2_256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF_INT_V4DF_QI },
+ { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512dq_shuf_i64x2_mask, "__builtin_ia32_shuf_i64x2_256_mask", IX86_BUILTIN_SHUF_I64x2_256, UNKNOWN, (int) V4DI_FTYPE_V4DI_V4DI_INT_V4DI_QI },
+ { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_shuf_i32x4_mask, "__builtin_ia32_shuf_i32x4_256_mask", IX86_BUILTIN_SHUF_I32x4_256, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI_INT_V8SI_QI },
+ { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_shuf_f32x4_mask, "__builtin_ia32_shuf_f32x4_256_mask", IX86_BUILTIN_SHUF_F32x4_256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF_INT_V8SF_QI },
+ { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_truncatev8hiv8qi2_mask, "__builtin_ia32_pmovwb128_mask", IX86_BUILTIN_PMOVWB128, UNKNOWN, (int) V16QI_FTYPE_V8HI_V16QI_QI },
+ { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_truncatev16hiv16qi2_mask, "__builtin_ia32_pmovwb256_mask", IX86_BUILTIN_PMOVWB256, UNKNOWN, (int) V16QI_FTYPE_V16HI_V16QI_HI },
+ { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_ss_truncatev8hiv8qi2_mask, "__builtin_ia32_pmovswb128_mask", IX86_BUILTIN_PMOVSWB128, UNKNOWN, (int) V16QI_FTYPE_V8HI_V16QI_QI },
+ { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_ss_truncatev16hiv16qi2_mask, "__builtin_ia32_pmovswb256_mask", IX86_BUILTIN_PMOVSWB256, UNKNOWN, (int) V16QI_FTYPE_V16HI_V16QI_HI },
+ { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_us_truncatev8hiv8qi2_mask, "__builtin_ia32_pmovuswb128_mask", IX86_BUILTIN_PMOVUSWB128, UNKNOWN, (int) V16QI_FTYPE_V8HI_V16QI_QI },
+ { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_us_truncatev16hiv16qi2_mask, "__builtin_ia32_pmovuswb256_mask", IX86_BUILTIN_PMOVUSWB256, UNKNOWN, (int) V16QI_FTYPE_V16HI_V16QI_HI },
+ { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_truncatev4siv4qi2_mask, "__builtin_ia32_pmovdb128_mask", IX86_BUILTIN_PMOVDB128, UNKNOWN, (int) V16QI_FTYPE_V4SI_V16QI_QI },
+ { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_truncatev8siv8qi2_mask, "__builtin_ia32_pmovdb256_mask", IX86_BUILTIN_PMOVDB256, UNKNOWN, (int) V16QI_FTYPE_V8SI_V16QI_QI },
+ { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_ss_truncatev4siv4qi2_mask, "__builtin_ia32_pmovsdb128_mask", IX86_BUILTIN_PMOVSDB128, UNKNOWN, (int) V16QI_FTYPE_V4SI_V16QI_QI },
+ { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_ss_truncatev8siv8qi2_mask, "__builtin_ia32_pmovsdb256_mask", IX86_BUILTIN_PMOVSDB256, UNKNOWN, (int) V16QI_FTYPE_V8SI_V16QI_QI },
+ { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_us_truncatev4siv4qi2_mask, "__builtin_ia32_pmovusdb128_mask", IX86_BUILTIN_PMOVUSDB128, UNKNOWN, (int) V16QI_FTYPE_V4SI_V16QI_QI },
+ { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_us_truncatev8siv8qi2_mask, "__builtin_ia32_pmovusdb256_mask", IX86_BUILTIN_PMOVUSDB256, UNKNOWN, (int) V16QI_FTYPE_V8SI_V16QI_QI },
+ { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_truncatev4siv4hi2_mask, "__builtin_ia32_pmovdw128_mask", IX86_BUILTIN_PMOVDW128, UNKNOWN, (int) V8HI_FTYPE_V4SI_V8HI_QI },
+ { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_truncatev8siv8hi2_mask, "__builtin_ia32_pmovdw256_mask", IX86_BUILTIN_PMOVDW256, UNKNOWN, (int) V8HI_FTYPE_V8SI_V8HI_QI },
+ { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_ss_truncatev4siv4hi2_mask, "__builtin_ia32_pmovsdw128_mask", IX86_BUILTIN_PMOVSDW128, UNKNOWN, (int) V8HI_FTYPE_V4SI_V8HI_QI },
+ { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_ss_truncatev8siv8hi2_mask, "__builtin_ia32_pmovsdw256_mask", IX86_BUILTIN_PMOVSDW256, UNKNOWN, (int) V8HI_FTYPE_V8SI_V8HI_QI },
+ { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_us_truncatev4siv4hi2_mask, "__builtin_ia32_pmovusdw128_mask", IX86_BUILTIN_PMOVUSDW128, UNKNOWN, (int) V8HI_FTYPE_V4SI_V8HI_QI },
+ { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_us_truncatev8siv8hi2_mask, "__builtin_ia32_pmovusdw256_mask", IX86_BUILTIN_PMOVUSDW256, UNKNOWN, (int) V8HI_FTYPE_V8SI_V8HI_QI },
+ { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_truncatev2div2qi2_mask, "__builtin_ia32_pmovqb128_mask", IX86_BUILTIN_PMOVQB128, UNKNOWN, (int) V16QI_FTYPE_V2DI_V16QI_QI },
+ { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_truncatev4div4qi2_mask, "__builtin_ia32_pmovqb256_mask", IX86_BUILTIN_PMOVQB256, UNKNOWN, (int) V16QI_FTYPE_V4DI_V16QI_QI },
+ { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_ss_truncatev2div2qi2_mask, "__builtin_ia32_pmovsqb128_mask", IX86_BUILTIN_PMOVSQB128, UNKNOWN, (int) V16QI_FTYPE_V2DI_V16QI_QI },
+ { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_ss_truncatev4div4qi2_mask, "__builtin_ia32_pmovsqb256_mask", IX86_BUILTIN_PMOVSQB256, UNKNOWN, (int) V16QI_FTYPE_V4DI_V16QI_QI },
+ { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_us_truncatev2div2qi2_mask, "__builtin_ia32_pmovusqb128_mask", IX86_BUILTIN_PMOVUSQB128, UNKNOWN, (int) V16QI_FTYPE_V2DI_V16QI_QI },
+ { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_us_truncatev4div4qi2_mask, "__builtin_ia32_pmovusqb256_mask", IX86_BUILTIN_PMOVUSQB256, UNKNOWN, (int) V16QI_FTYPE_V4DI_V16QI_QI },
+ { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_truncatev2div2hi2_mask, "__builtin_ia32_pmovqw128_mask", IX86_BUILTIN_PMOVQW128, UNKNOWN, (int) V8HI_FTYPE_V2DI_V8HI_QI },
+ { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_truncatev4div4hi2_mask, "__builtin_ia32_pmovqw256_mask", IX86_BUILTIN_PMOVQW256, UNKNOWN, (int) V8HI_FTYPE_V4DI_V8HI_QI },
+ { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_ss_truncatev2div2hi2_mask, "__builtin_ia32_pmovsqw128_mask", IX86_BUILTIN_PMOVSQW128, UNKNOWN, (int) V8HI_FTYPE_V2DI_V8HI_QI },
+ { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_ss_truncatev4div4hi2_mask, "__builtin_ia32_pmovsqw256_mask", IX86_BUILTIN_PMOVSQW256, UNKNOWN, (int) V8HI_FTYPE_V4DI_V8HI_QI },
+ { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_us_truncatev2div2hi2_mask, "__builtin_ia32_pmovusqw128_mask", IX86_BUILTIN_PMOVUSQW128, UNKNOWN, (int) V8HI_FTYPE_V2DI_V8HI_QI },
+ { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_us_truncatev4div4hi2_mask, "__builtin_ia32_pmovusqw256_mask", IX86_BUILTIN_PMOVUSQW256, UNKNOWN, (int) V8HI_FTYPE_V4DI_V8HI_QI },
+ { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_truncatev2div2si2_mask, "__builtin_ia32_pmovqd128_mask", IX86_BUILTIN_PMOVQD128, UNKNOWN, (int) V4SI_FTYPE_V2DI_V4SI_QI },
+ { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_truncatev4div4si2_mask, "__builtin_ia32_pmovqd256_mask", IX86_BUILTIN_PMOVQD256, UNKNOWN, (int) V4SI_FTYPE_V4DI_V4SI_QI },
+ { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_ss_truncatev2div2si2_mask, "__builtin_ia32_pmovsqd128_mask", IX86_BUILTIN_PMOVSQD128, UNKNOWN, (int) V4SI_FTYPE_V2DI_V4SI_QI },
+ { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_ss_truncatev4div4si2_mask, "__builtin_ia32_pmovsqd256_mask", IX86_BUILTIN_PMOVSQD256, UNKNOWN, (int) V4SI_FTYPE_V4DI_V4SI_QI },
+ { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_us_truncatev2div2si2_mask, "__builtin_ia32_pmovusqd128_mask", IX86_BUILTIN_PMOVUSQD128, UNKNOWN, (int) V4SI_FTYPE_V2DI_V4SI_QI },
+ { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_us_truncatev4div4si2_mask, "__builtin_ia32_pmovusqd256_mask", IX86_BUILTIN_PMOVUSQD256, UNKNOWN, (int) V4SI_FTYPE_V4DI_V4SI_QI },
+ { OPTION_MASK_ISA_AVX512DQ | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512dq_rangepv4df_mask, "__builtin_ia32_rangepd256_mask", IX86_BUILTIN_RANGEPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF_INT_V4DF_QI },
+ { OPTION_MASK_ISA_AVX512DQ | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512dq_rangepv2df_mask, "__builtin_ia32_rangepd128_mask", IX86_BUILTIN_RANGEPD128, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_INT_V2DF_QI },
+ { OPTION_MASK_ISA_AVX512DQ | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512dq_rangepv8sf_mask, "__builtin_ia32_rangeps256_mask", IX86_BUILTIN_RANGEPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF_INT_V8SF_QI },
+ { OPTION_MASK_ISA_AVX512DQ | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512dq_rangepv4sf_mask, "__builtin_ia32_rangeps128_mask", IX86_BUILTIN_RANGEPS128, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_INT_V4SF_QI },
+ { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_getexpv8sf_mask, "__builtin_ia32_getexpps256_mask", IX86_BUILTIN_GETEXPPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF_QI },
+ { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_getexpv4df_mask, "__builtin_ia32_getexppd256_mask", IX86_BUILTIN_GETEXPPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF_QI },
+ { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_getexpv4sf_mask, "__builtin_ia32_getexpps128_mask", IX86_BUILTIN_GETEXPPS128, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_QI },
+ { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_getexpv2df_mask, "__builtin_ia32_getexppd128_mask", IX86_BUILTIN_GETEXPPD128, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_QI },
+ { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_fixupimmv4df, "__builtin_ia32_fixupimmpd256", IX86_BUILTIN_FIXUPIMMPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF_V4DI_INT },
+ { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_fixupimmv4df_mask, "__builtin_ia32_fixupimmpd256_mask", IX86_BUILTIN_FIXUPIMMPD256_MASK, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF_V4DI_INT_QI },
+ { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_fixupimmv4df_maskz, "__builtin_ia32_fixupimmpd256_maskz", IX86_BUILTIN_FIXUPIMMPD256_MASKZ, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF_V4DI_INT_QI },
+ { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_fixupimmv8sf, "__builtin_ia32_fixupimmps256", IX86_BUILTIN_FIXUPIMMPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF_V8SI_INT },
+ { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_fixupimmv8sf_mask, "__builtin_ia32_fixupimmps256_mask", IX86_BUILTIN_FIXUPIMMPS256_MASK, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF_V8SI_INT_QI },
+ { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_fixupimmv8sf_maskz, "__builtin_ia32_fixupimmps256_maskz", IX86_BUILTIN_FIXUPIMMPS256_MASKZ, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF_V8SI_INT_QI },
+ { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_fixupimmv2df, "__builtin_ia32_fixupimmpd128", IX86_BUILTIN_FIXUPIMMPD128, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_V2DI_INT },
+ { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_fixupimmv2df_mask, "__builtin_ia32_fixupimmpd128_mask", IX86_BUILTIN_FIXUPIMMPD128_MASK, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_V2DI_INT_QI },
+ { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_fixupimmv2df_maskz, "__builtin_ia32_fixupimmpd128_maskz", IX86_BUILTIN_FIXUPIMMPD128_MASKZ, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_V2DI_INT_QI },
+ { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_fixupimmv4sf, "__builtin_ia32_fixupimmps128", IX86_BUILTIN_FIXUPIMMPS128, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_V4SI_INT },
+ { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_fixupimmv4sf_mask, "__builtin_ia32_fixupimmps128_mask", IX86_BUILTIN_FIXUPIMMPS128_MASK, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_V4SI_INT_QI },
+ { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_fixupimmv4sf_maskz, "__builtin_ia32_fixupimmps128_maskz", IX86_BUILTIN_FIXUPIMMPS128_MASKZ, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_V4SI_INT_QI },
+ { OPTION_MASK_ISA_AVX512VL, CODE_FOR_absv4di2_mask, "__builtin_ia32_pabsq256_mask", IX86_BUILTIN_PABSQ256, UNKNOWN, (int) V4DI_FTYPE_V4DI_V4DI_QI },
+ { OPTION_MASK_ISA_AVX512VL, CODE_FOR_absv2di2_mask, "__builtin_ia32_pabsq128_mask", IX86_BUILTIN_PABSQ128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI_QI },
+ { OPTION_MASK_ISA_AVX512VL, CODE_FOR_absv8si2_mask, "__builtin_ia32_pabsd256_mask", IX86_BUILTIN_PABSD256_MASK, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI_QI },
+ { OPTION_MASK_ISA_AVX512VL, CODE_FOR_absv4si2_mask, "__builtin_ia32_pabsd128_mask", IX86_BUILTIN_PABSD128_MASK, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI_QI },
+ { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx2_pmulhrswv16hi3_mask , "__builtin_ia32_pmulhrsw256_mask", IX86_BUILTIN_PMULHRSW256_MASK, UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI_V16HI_HI },
+ { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_ssse3_pmulhrswv8hi3_mask, "__builtin_ia32_pmulhrsw128_mask", IX86_BUILTIN_PMULHRSW128_MASK, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI_V8HI_QI },
+ { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_umulv8hi3_highpart_mask, "__builtin_ia32_pmulhuw128_mask", IX86_BUILTIN_PMULHUW128_MASK, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI_V8HI_QI },
+ { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_umulv16hi3_highpart_mask, "__builtin_ia32_pmulhuw256_mask" , IX86_BUILTIN_PMULHUW256_MASK, UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI_V16HI_HI },
+ { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_smulv16hi3_highpart_mask, "__builtin_ia32_pmulhw256_mask" , IX86_BUILTIN_PMULHW256_MASK, UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI_V16HI_HI },
+ { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_smulv8hi3_highpart_mask, "__builtin_ia32_pmulhw128_mask", IX86_BUILTIN_PMULHW128_MASK, UNKNOWN,(int) V8HI_FTYPE_V8HI_V8HI_V8HI_QI },
+ { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_mulv16hi3_mask, "__builtin_ia32_pmullw256_mask" , IX86_BUILTIN_PMULLW256_MASK, UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI_V16HI_HI },
+ { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_mulv8hi3_mask, "__builtin_ia32_pmullw128_mask", IX86_BUILTIN_PMULLW128_MASK, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI_V8HI_QI },
+ { OPTION_MASK_ISA_AVX512DQ | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512dq_mulv4di3_mask, "__builtin_ia32_pmullq256_mask", IX86_BUILTIN_PMULLQ256, UNKNOWN, (int) V4DI_FTYPE_V4DI_V4DI_V4DI_QI },
+ { OPTION_MASK_ISA_AVX512DQ | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512dq_mulv2di3_mask, "__builtin_ia32_pmullq128_mask", IX86_BUILTIN_PMULLQ128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI_V2DI_QI },
+ { OPTION_MASK_ISA_AVX512DQ | OPTION_MASK_ISA_AVX512VL, CODE_FOR_andv4df3_mask, "__builtin_ia32_andpd256_mask", IX86_BUILTIN_ANDPD256_MASK, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF_V4DF_QI },
+ { OPTION_MASK_ISA_AVX512DQ | OPTION_MASK_ISA_AVX512VL, CODE_FOR_andv2df3_mask, "__builtin_ia32_andpd128_mask", IX86_BUILTIN_ANDPD128_MASK, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_V2DF_QI },
+ { OPTION_MASK_ISA_AVX512DQ | OPTION_MASK_ISA_AVX512VL, CODE_FOR_andv8sf3_mask, "__builtin_ia32_andps256_mask", IX86_BUILTIN_ANDPS256_MASK, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF_V8SF_QI },
+ { OPTION_MASK_ISA_AVX512DQ | OPTION_MASK_ISA_AVX512VL, CODE_FOR_andv4sf3_mask, "__builtin_ia32_andps128_mask", IX86_BUILTIN_ANDPS128_MASK, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_V4SF_QI },
+ { OPTION_MASK_ISA_AVX512DQ | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx_andnotv4df3_mask, "__builtin_ia32_andnpd256_mask", IX86_BUILTIN_ANDNPD256_MASK, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF_V4DF_QI },
+ { OPTION_MASK_ISA_AVX512DQ | OPTION_MASK_ISA_AVX512VL, CODE_FOR_sse2_andnotv2df3_mask, "__builtin_ia32_andnpd128_mask", IX86_BUILTIN_ANDNPD128_MASK, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_V2DF_QI },
+ { OPTION_MASK_ISA_AVX512DQ | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx_andnotv8sf3_mask, "__builtin_ia32_andnps256_mask", IX86_BUILTIN_ANDNPS256_MASK, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF_V8SF_QI },
+ { OPTION_MASK_ISA_AVX512DQ | OPTION_MASK_ISA_AVX512VL, CODE_FOR_sse_andnotv4sf3_mask, "__builtin_ia32_andnps128_mask", IX86_BUILTIN_ANDNPS128_MASK, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_V4SF_QI },
+ { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_ashlv8hi3_mask, "__builtin_ia32_psllwi128_mask", IX86_BUILTIN_PSLLWI128_MASK, UNKNOWN, (int) V8HI_FTYPE_V8HI_INT_V8HI_QI },
+ { OPTION_MASK_ISA_AVX512VL, CODE_FOR_ashlv4si3_mask, "__builtin_ia32_pslldi128_mask", IX86_BUILTIN_PSLLDI128_MASK, UNKNOWN, (int) V4SI_FTYPE_V4SI_INT_V4SI_QI },
+ { OPTION_MASK_ISA_AVX512VL, CODE_FOR_ashlv2di3_mask, "__builtin_ia32_psllqi128_mask", IX86_BUILTIN_PSLLQI128_MASK, UNKNOWN, (int) V2DI_FTYPE_V2DI_INT_V2DI_QI },
+ { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_ashlv8hi3_mask, "__builtin_ia32_psllw128_mask", IX86_BUILTIN_PSLLW128_MASK, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI_V8HI_QI },
+ { OPTION_MASK_ISA_AVX512VL, CODE_FOR_ashlv4si3_mask, "__builtin_ia32_pslld128_mask", IX86_BUILTIN_PSLLD128_MASK, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI_V4SI_QI },
+ { OPTION_MASK_ISA_AVX512VL, CODE_FOR_ashlv2di3_mask, "__builtin_ia32_psllq128_mask", IX86_BUILTIN_PSLLQ128_MASK, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI_V2DI_QI },
+ { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_ashlv16hi3_mask, "__builtin_ia32_psllwi256_mask", IX86_BUILTIN_PSLLWI256_MASK , UNKNOWN, (int) V16HI_FTYPE_V16HI_INT_V16HI_HI },
+ { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_ashlv16hi3_mask, "__builtin_ia32_psllw256_mask", IX86_BUILTIN_PSLLW256_MASK, UNKNOWN, (int) V16HI_FTYPE_V16HI_V8HI_V16HI_HI },
+ { OPTION_MASK_ISA_AVX512VL, CODE_FOR_ashlv8si3_mask, "__builtin_ia32_pslldi256_mask", IX86_BUILTIN_PSLLDI256_MASK, UNKNOWN, (int) V8SI_FTYPE_V8SI_INT_V8SI_QI },
+ { OPTION_MASK_ISA_AVX512VL, CODE_FOR_ashlv8si3_mask, "__builtin_ia32_pslld256_mask", IX86_BUILTIN_PSLLD256_MASK, UNKNOWN, (int) V8SI_FTYPE_V8SI_V4SI_V8SI_QI },
+ { OPTION_MASK_ISA_AVX512VL, CODE_FOR_ashlv4di3_mask, "__builtin_ia32_psllqi256_mask", IX86_BUILTIN_PSLLQI256_MASK, UNKNOWN, (int) V4DI_FTYPE_V4DI_INT_V4DI_QI },
+ { OPTION_MASK_ISA_AVX512VL, CODE_FOR_ashlv4di3_mask, "__builtin_ia32_psllq256_mask", IX86_BUILTIN_PSLLQ256_MASK, UNKNOWN, (int) V4DI_FTYPE_V4DI_V2DI_V4DI_QI },
+ { OPTION_MASK_ISA_AVX512VL, CODE_FOR_ashrv4si3_mask, "__builtin_ia32_psradi128_mask", IX86_BUILTIN_PSRADI128_MASK, UNKNOWN, (int) V4SI_FTYPE_V4SI_INT_V4SI_QI },
+ { OPTION_MASK_ISA_AVX512VL, CODE_FOR_ashrv4si3_mask, "__builtin_ia32_psrad128_mask", IX86_BUILTIN_PSRAD128_MASK, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI_V4SI_QI },
+ { OPTION_MASK_ISA_AVX512VL, CODE_FOR_ashrv8si3_mask, "__builtin_ia32_psradi256_mask", IX86_BUILTIN_PSRADI256_MASK, UNKNOWN, (int) V8SI_FTYPE_V8SI_INT_V8SI_QI },
+ { OPTION_MASK_ISA_AVX512VL, CODE_FOR_ashrv8si3_mask, "__builtin_ia32_psrad256_mask", IX86_BUILTIN_PSRAD256_MASK, UNKNOWN, (int) V8SI_FTYPE_V8SI_V4SI_V8SI_QI },
+ { OPTION_MASK_ISA_AVX512VL, CODE_FOR_ashrv2di3_mask_1, "__builtin_ia32_psraqi128_mask", IX86_BUILTIN_PSRAQI128_MASK, UNKNOWN, (int) V2DI_FTYPE_V2DI_INT_V2DI_QI },
+ { OPTION_MASK_ISA_AVX512VL, CODE_FOR_ashrv2di3_mask_1, "__builtin_ia32_psraq128_mask", IX86_BUILTIN_PSRAQ128_MASK, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI_V2DI_QI },
+ { OPTION_MASK_ISA_AVX512VL, CODE_FOR_ashrv4di3_mask, "__builtin_ia32_psraqi256_mask", IX86_BUILTIN_PSRAQI256_MASK, UNKNOWN, (int) V4DI_FTYPE_V4DI_INT_V4DI_QI },
+ { OPTION_MASK_ISA_AVX512VL, CODE_FOR_ashrv4di3_mask, "__builtin_ia32_psraq256_mask", IX86_BUILTIN_PSRAQ256_MASK, UNKNOWN, (int) V4DI_FTYPE_V4DI_V2DI_V4DI_QI },
+ { OPTION_MASK_ISA_AVX512VL, CODE_FOR_andv8si3_mask, "__builtin_ia32_pandd256_mask", IX86_BUILTIN_PANDD256, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI_V8SI_QI },
+ { OPTION_MASK_ISA_AVX512VL, CODE_FOR_andv4si3_mask, "__builtin_ia32_pandd128_mask", IX86_BUILTIN_PANDD128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI_V4SI_QI },
+ { OPTION_MASK_ISA_AVX512VL, CODE_FOR_lshrv4si3_mask, "__builtin_ia32_psrldi128_mask", IX86_BUILTIN_PSRLDI128_MASK, UNKNOWN, (int) V4SI_FTYPE_V4SI_INT_V4SI_QI },
+ { OPTION_MASK_ISA_AVX512VL, CODE_FOR_lshrv4si3_mask, "__builtin_ia32_psrld128_mask", IX86_BUILTIN_PSRLD128_MASK, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI_V4SI_QI },
+ { OPTION_MASK_ISA_AVX512VL, CODE_FOR_lshrv8si3_mask, "__builtin_ia32_psrldi256_mask", IX86_BUILTIN_PSRLDI256_MASK, UNKNOWN, (int) V8SI_FTYPE_V8SI_INT_V8SI_QI },
+ { OPTION_MASK_ISA_AVX512VL, CODE_FOR_lshrv8si3_mask, "__builtin_ia32_psrld256_mask", IX86_BUILTIN_PSRLD256_MASK, UNKNOWN, (int) V8SI_FTYPE_V8SI_V4SI_V8SI_QI },
+ { OPTION_MASK_ISA_AVX512VL, CODE_FOR_lshrv2di3_mask, "__builtin_ia32_psrlqi128_mask", IX86_BUILTIN_PSRLQI128_MASK, UNKNOWN, (int) V2DI_FTYPE_V2DI_INT_V2DI_QI },
+ { OPTION_MASK_ISA_AVX512VL, CODE_FOR_lshrv2di3_mask, "__builtin_ia32_psrlq128_mask", IX86_BUILTIN_PSRLQ128_MASK, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI_V2DI_QI },
+ { OPTION_MASK_ISA_AVX512VL, CODE_FOR_lshrv4di3_mask, "__builtin_ia32_psrlqi256_mask", IX86_BUILTIN_PSRLQI256_MASK, UNKNOWN, (int) V4DI_FTYPE_V4DI_INT_V4DI_QI },
+ { OPTION_MASK_ISA_AVX512VL, CODE_FOR_lshrv4di3_mask, "__builtin_ia32_psrlq256_mask", IX86_BUILTIN_PSRLQ256_MASK, UNKNOWN, (int) V4DI_FTYPE_V4DI_V2DI_V4DI_QI },
+ { OPTION_MASK_ISA_AVX512VL, CODE_FOR_andv4di3_mask, "__builtin_ia32_pandq256_mask", IX86_BUILTIN_PANDQ256, UNKNOWN, (int) V4DI_FTYPE_V4DI_V4DI_V4DI_QI },
+ { OPTION_MASK_ISA_AVX512VL, CODE_FOR_andv2di3_mask, "__builtin_ia32_pandq128_mask", IX86_BUILTIN_PANDQ128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI_V2DI_QI },
+ { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx2_andnotv8si3_mask, "__builtin_ia32_pandnd256_mask", IX86_BUILTIN_PANDND256, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI_V8SI_QI },
+ { OPTION_MASK_ISA_AVX512VL, CODE_FOR_sse2_andnotv4si3_mask, "__builtin_ia32_pandnd128_mask", IX86_BUILTIN_PANDND128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI_V4SI_QI },
+ { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx2_andnotv4di3_mask, "__builtin_ia32_pandnq256_mask", IX86_BUILTIN_PANDNQ256, UNKNOWN, (int) V4DI_FTYPE_V4DI_V4DI_V4DI_QI },
+ { OPTION_MASK_ISA_AVX512VL, CODE_FOR_sse2_andnotv2di3_mask, "__builtin_ia32_pandnq128_mask", IX86_BUILTIN_PANDNQ128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI_V2DI_QI },
+ { OPTION_MASK_ISA_AVX512VL, CODE_FOR_iorv8si3_mask, "__builtin_ia32_pord256_mask", IX86_BUILTIN_PORD256, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI_V8SI_QI },
+ { OPTION_MASK_ISA_AVX512VL, CODE_FOR_iorv4si3_mask, "__builtin_ia32_pord128_mask", IX86_BUILTIN_PORD128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI_V4SI_QI },
+ { OPTION_MASK_ISA_AVX512VL, CODE_FOR_iorv4di3_mask, "__builtin_ia32_porq256_mask", IX86_BUILTIN_PORQ256, UNKNOWN, (int) V4DI_FTYPE_V4DI_V4DI_V4DI_QI },
+ { OPTION_MASK_ISA_AVX512VL, CODE_FOR_iorv2di3_mask, "__builtin_ia32_porq128_mask", IX86_BUILTIN_PORQ128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI_V2DI_QI },
+ { OPTION_MASK_ISA_AVX512VL, CODE_FOR_xorv8si3_mask, "__builtin_ia32_pxord256_mask", IX86_BUILTIN_PXORD256, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI_V8SI_QI },
+ { OPTION_MASK_ISA_AVX512VL, CODE_FOR_xorv4si3_mask, "__builtin_ia32_pxord128_mask", IX86_BUILTIN_PXORD128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI_V4SI_QI },
+ { OPTION_MASK_ISA_AVX512VL, CODE_FOR_xorv4di3_mask, "__builtin_ia32_pxorq256_mask", IX86_BUILTIN_PXORQ256, UNKNOWN, (int) V4DI_FTYPE_V4DI_V4DI_V4DI_QI },
+ { OPTION_MASK_ISA_AVX512VL, CODE_FOR_xorv2di3_mask, "__builtin_ia32_pxorq128_mask", IX86_BUILTIN_PXORQ128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI_V2DI_QI },
+ { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx2_packsswb_mask, "__builtin_ia32_packsswb256_mask", IX86_BUILTIN_PACKSSWB256_MASK, UNKNOWN, (int) V32QI_FTYPE_V16HI_V16HI_V32QI_SI },
+ { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_sse2_packsswb_mask, "__builtin_ia32_packsswb128_mask", IX86_BUILTIN_PACKSSWB128_MASK, UNKNOWN, (int) V16QI_FTYPE_V8HI_V8HI_V16QI_HI },
+ { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx2_packuswb_mask, "__builtin_ia32_packuswb256_mask", IX86_BUILTIN_PACKUSWB256_MASK, UNKNOWN, (int) V32QI_FTYPE_V16HI_V16HI_V32QI_SI },
+ { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_sse2_packuswb_mask, "__builtin_ia32_packuswb128_mask", IX86_BUILTIN_PACKUSWB128_MASK, UNKNOWN, (int) V16QI_FTYPE_V8HI_V8HI_V16QI_HI },
+ { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_rndscalev8sf_mask, "__builtin_ia32_rndscaleps_256_mask", IX86_BUILTIN_RNDSCALEPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_INT_V8SF_QI },
+ { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_rndscalev4df_mask, "__builtin_ia32_rndscalepd_256_mask", IX86_BUILTIN_RNDSCALEPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_INT_V4DF_QI },
+ { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_rndscalev4sf_mask, "__builtin_ia32_rndscaleps_128_mask", IX86_BUILTIN_RNDSCALEPS128, UNKNOWN, (int) V4SF_FTYPE_V4SF_INT_V4SF_QI },
+ { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_rndscalev2df_mask, "__builtin_ia32_rndscalepd_128_mask", IX86_BUILTIN_RNDSCALEPD128, UNKNOWN, (int) V2DF_FTYPE_V2DF_INT_V2DF_QI },
+ { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_vternlogv4di_mask, "__builtin_ia32_pternlogq256_mask", IX86_BUILTIN_VTERNLOGQ256_MASK, UNKNOWN, (int) V4DI_FTYPE_V4DI_V4DI_V4DI_INT_QI },
+ { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_vternlogv4di_maskz, "__builtin_ia32_pternlogq256_maskz", IX86_BUILTIN_VTERNLOGQ256_MASKZ, UNKNOWN, (int) V4DI_FTYPE_V4DI_V4DI_V4DI_INT_QI },
+ { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_vternlogv8si_mask, "__builtin_ia32_pternlogd256_mask", IX86_BUILTIN_VTERNLOGD256_MASK, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI_V8SI_INT_QI },
+ { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_vternlogv8si_maskz, "__builtin_ia32_pternlogd256_maskz", IX86_BUILTIN_VTERNLOGD256_MASKZ, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI_V8SI_INT_QI },
+ { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_vternlogv2di_mask, "__builtin_ia32_pternlogq128_mask", IX86_BUILTIN_VTERNLOGQ128_MASK, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI_V2DI_INT_QI },
+ { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_vternlogv2di_maskz, "__builtin_ia32_pternlogq128_maskz", IX86_BUILTIN_VTERNLOGQ128_MASKZ, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI_V2DI_INT_QI },
+ { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_vternlogv4si_mask, "__builtin_ia32_pternlogd128_mask", IX86_BUILTIN_VTERNLOGD128_MASK, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI_V4SI_INT_QI },
+ { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_vternlogv4si_maskz, "__builtin_ia32_pternlogd128_maskz", IX86_BUILTIN_VTERNLOGD128_MASKZ, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI_V4SI_INT_QI },
+ { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_scalefv4df_mask, "__builtin_ia32_scalefpd256_mask", IX86_BUILTIN_SCALEFPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF_V4DF_QI },
+ { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_scalefv8sf_mask, "__builtin_ia32_scalefps256_mask", IX86_BUILTIN_SCALEFPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF_V8SF_QI },
+ { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_scalefv2df_mask, "__builtin_ia32_scalefpd128_mask", IX86_BUILTIN_SCALEFPD128, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_V2DF_QI },
+ { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_scalefv4sf_mask, "__builtin_ia32_scalefps128_mask", IX86_BUILTIN_SCALEFPS128, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_V4SF_QI },
+ { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_fmadd_v4df_mask, "__builtin_ia32_vfmaddpd256_mask", IX86_BUILTIN_VFMADDPD256_MASK, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF_V4DF_QI },
+ { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_fmadd_v4df_mask3, "__builtin_ia32_vfmaddpd256_mask3", IX86_BUILTIN_VFMADDPD256_MASK3, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF_V4DF_QI },
+ { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_fmadd_v4df_maskz, "__builtin_ia32_vfmaddpd256_maskz", IX86_BUILTIN_VFMADDPD256_MASKZ, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF_V4DF_QI },
+ { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_fmadd_v2df_mask, "__builtin_ia32_vfmaddpd128_mask", IX86_BUILTIN_VFMADDPD128_MASK, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_V2DF_QI },
+ { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_fmadd_v2df_mask3, "__builtin_ia32_vfmaddpd128_mask3", IX86_BUILTIN_VFMADDPD128_MASK3, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_V2DF_QI },
+ { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_fmadd_v2df_maskz, "__builtin_ia32_vfmaddpd128_maskz", IX86_BUILTIN_VFMADDPD128_MASKZ, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_V2DF_QI },
+ { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_fmadd_v8sf_mask, "__builtin_ia32_vfmaddps256_mask", IX86_BUILTIN_VFMADDPS256_MASK, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF_V8SF_QI },
+ { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_fmadd_v8sf_mask3, "__builtin_ia32_vfmaddps256_mask3", IX86_BUILTIN_VFMADDPS256_MASK3, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF_V8SF_QI },
+ { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_fmadd_v8sf_maskz, "__builtin_ia32_vfmaddps256_maskz", IX86_BUILTIN_VFMADDPS256_MASKZ, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF_V8SF_QI },
+ { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_fmadd_v4sf_mask, "__builtin_ia32_vfmaddps128_mask", IX86_BUILTIN_VFMADDPS128_MASK, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_V4SF_QI },
+ { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_fmadd_v4sf_mask3, "__builtin_ia32_vfmaddps128_mask3", IX86_BUILTIN_VFMADDPS128_MASK3, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_V4SF_QI },
+ { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_fmadd_v4sf_maskz, "__builtin_ia32_vfmaddps128_maskz", IX86_BUILTIN_VFMADDPS128_MASKZ, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_V4SF_QI },
+ { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_fmsub_v4df_mask3, "__builtin_ia32_vfmsubpd256_mask3", IX86_BUILTIN_VFMSUBPD256_MASK3, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF_V4DF_QI },
+ { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_fmsub_v2df_mask3, "__builtin_ia32_vfmsubpd128_mask3", IX86_BUILTIN_VFMSUBPD128_MASK3, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_V2DF_QI },
+ { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_fmsub_v8sf_mask3, "__builtin_ia32_vfmsubps256_mask3", IX86_BUILTIN_VFMSUBPS256_MASK3, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF_V8SF_QI },
+ { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_fmsub_v4sf_mask3, "__builtin_ia32_vfmsubps128_mask3", IX86_BUILTIN_VFMSUBPS128_MASK3, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_V4SF_QI },
+ { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_fnmadd_v4df_mask, "__builtin_ia32_vfnmaddpd256_mask", IX86_BUILTIN_VFNMADDPD256_MASK, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF_V4DF_QI },
+ { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_fnmadd_v2df_mask, "__builtin_ia32_vfnmaddpd128_mask", IX86_BUILTIN_VFNMADDPD128_MASK, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_V2DF_QI },
+ { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_fnmadd_v8sf_mask, "__builtin_ia32_vfnmaddps256_mask", IX86_BUILTIN_VFNMADDPS256_MASK, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF_V8SF_QI },
+ { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_fnmadd_v4sf_mask, "__builtin_ia32_vfnmaddps128_mask", IX86_BUILTIN_VFNMADDPS128_MASK, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_V4SF_QI },
+ { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_fnmsub_v4df_mask, "__builtin_ia32_vfnmsubpd256_mask", IX86_BUILTIN_VFNMSUBPD256_MASK, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF_V4DF_QI },
+ { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_fnmsub_v4df_mask3, "__builtin_ia32_vfnmsubpd256_mask3", IX86_BUILTIN_VFNMSUBPD256_MASK3, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF_V4DF_QI },
+ { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_fnmsub_v2df_mask, "__builtin_ia32_vfnmsubpd128_mask", IX86_BUILTIN_VFNMSUBPD128_MASK, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_V2DF_QI },
+ { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_fnmsub_v2df_mask3, "__builtin_ia32_vfnmsubpd128_mask3", IX86_BUILTIN_VFNMSUBPD128_MASK3, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_V2DF_QI },
+ { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_fnmsub_v8sf_mask, "__builtin_ia32_vfnmsubps256_mask", IX86_BUILTIN_VFNMSUBPS256_MASK, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF_V8SF_QI },
+ { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_fnmsub_v8sf_mask3, "__builtin_ia32_vfnmsubps256_mask3", IX86_BUILTIN_VFNMSUBPS256_MASK3, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF_V8SF_QI },
+ { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_fnmsub_v4sf_mask, "__builtin_ia32_vfnmsubps128_mask", IX86_BUILTIN_VFNMSUBPS128_MASK, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_V4SF_QI },
+ { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_fnmsub_v4sf_mask3, "__builtin_ia32_vfnmsubps128_mask3", IX86_BUILTIN_VFNMSUBPS128_MASK3, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_V4SF_QI },
+ { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_fmaddsub_v4df_mask, "__builtin_ia32_vfmaddsubpd256_mask", IX86_BUILTIN_VFMADDSUBPD256_MASK, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF_V4DF_QI },
+ { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_fmaddsub_v4df_mask3, "__builtin_ia32_vfmaddsubpd256_mask3", IX86_BUILTIN_VFMADDSUBPD256_MASK3, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF_V4DF_QI },
+ { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_fmaddsub_v4df_maskz, "__builtin_ia32_vfmaddsubpd256_maskz", IX86_BUILTIN_VFMADDSUBPD256_MASKZ, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF_V4DF_QI },
+ { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_fmaddsub_v2df_mask, "__builtin_ia32_vfmaddsubpd128_mask", IX86_BUILTIN_VFMADDSUBPD128_MASK, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_V2DF_QI },
+ { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_fmaddsub_v2df_mask3, "__builtin_ia32_vfmaddsubpd128_mask3", IX86_BUILTIN_VFMADDSUBPD128_MASK3, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_V2DF_QI },
+ { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_fmaddsub_v2df_maskz, "__builtin_ia32_vfmaddsubpd128_maskz", IX86_BUILTIN_VFMADDSUBPD128_MASKZ, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_V2DF_QI },
+ { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_fmaddsub_v8sf_mask, "__builtin_ia32_vfmaddsubps256_mask", IX86_BUILTIN_VFMADDSUBPS256_MASK, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF_V8SF_QI },
+ { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_fmaddsub_v8sf_mask3, "__builtin_ia32_vfmaddsubps256_mask3", IX86_BUILTIN_VFMADDSUBPS256_MASK3, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF_V8SF_QI },
+ { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_fmaddsub_v8sf_maskz, "__builtin_ia32_vfmaddsubps256_maskz", IX86_BUILTIN_VFMADDSUBPS256_MASKZ, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF_V8SF_QI },
+ { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_fmaddsub_v4sf_mask, "__builtin_ia32_vfmaddsubps128_mask", IX86_BUILTIN_VFMADDSUBPS128_MASK, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_V4SF_QI },
+ { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_fmaddsub_v4sf_mask3, "__builtin_ia32_vfmaddsubps128_mask3", IX86_BUILTIN_VFMADDSUBPS128_MASK3, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_V4SF_QI },
+ { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_fmaddsub_v4sf_maskz, "__builtin_ia32_vfmaddsubps128_maskz", IX86_BUILTIN_VFMADDSUBPS128_MASKZ, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_V4SF_QI },
+ { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_fmsubadd_v4df_mask3, "__builtin_ia32_vfmsubaddpd256_mask3", IX86_BUILTIN_VFMSUBADDPD256_MASK3, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF_V4DF_QI },
+ { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_fmsubadd_v2df_mask3, "__builtin_ia32_vfmsubaddpd128_mask3", IX86_BUILTIN_VFMSUBADDPD128_MASK3, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_V2DF_QI },
+ { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_fmsubadd_v8sf_mask3, "__builtin_ia32_vfmsubaddps256_mask3", IX86_BUILTIN_VFMSUBADDPS256_MASK3, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF_V8SF_QI },
+ { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_fmsubadd_v4sf_mask3, "__builtin_ia32_vfmsubaddps128_mask3", IX86_BUILTIN_VFMSUBADDPS128_MASK3, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_V4SF_QI },
+ { OPTION_MASK_ISA_AVX512DQ | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_vinsertv4df, "__builtin_ia32_insertf64x2_256_mask", IX86_BUILTIN_INSERTF64X2_256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V2DF_INT_V4DF_QI },
+ { OPTION_MASK_ISA_AVX512DQ | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_vinsertv4di, "__builtin_ia32_inserti64x2_256_mask", IX86_BUILTIN_INSERTI64X2_256, UNKNOWN, (int) V4DI_FTYPE_V4DI_V2DI_INT_V4DI_QI },
+ { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_ashrvv16hi_mask, "__builtin_ia32_psrav16hi_mask", IX86_BUILTIN_PSRAVV16HI, UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI_V16HI_HI },
+ { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_ashrvv8hi_mask, "__builtin_ia32_psrav8hi_mask", IX86_BUILTIN_PSRAVV8HI, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI_V8HI_QI },
+ { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512bw_pmaddubsw512v16hi_mask, "__builtin_ia32_pmaddubsw256_mask", IX86_BUILTIN_PMADDUBSW256_MASK, UNKNOWN, (int) V16HI_FTYPE_V32QI_V32QI_V16HI_HI },
+ { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512bw_pmaddubsw512v8hi_mask, "__builtin_ia32_pmaddubsw128_mask", IX86_BUILTIN_PMADDUBSW128_MASK, UNKNOWN, (int) V8HI_FTYPE_V16QI_V16QI_V8HI_QI },
+ { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512bw_pmaddwd512v16hi_mask, "__builtin_ia32_pmaddwd256_mask", IX86_BUILTIN_PMADDWD256_MASK, UNKNOWN, (int) V8SI_FTYPE_V16HI_V16HI_V8SI_QI },
+ { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512bw_pmaddwd512v8hi_mask, "__builtin_ia32_pmaddwd128_mask", IX86_BUILTIN_PMADDWD128_MASK, UNKNOWN, (int) V4SI_FTYPE_V8HI_V8HI_V4SI_QI },
+ { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_lshrvv16hi_mask, "__builtin_ia32_psrlv16hi_mask", IX86_BUILTIN_PSRLVV16HI, UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI_V16HI_HI },
+ { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_lshrvv8hi_mask, "__builtin_ia32_psrlv8hi_mask", IX86_BUILTIN_PSRLVV8HI, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI_V8HI_QI },
+ { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx_fix_notruncv8sfv8si_mask, "__builtin_ia32_cvtps2dq256_mask", IX86_BUILTIN_CVTPS2DQ256_MASK, UNKNOWN, (int) V8SI_FTYPE_V8SF_V8SI_QI },
+ { OPTION_MASK_ISA_AVX512VL, CODE_FOR_sse2_fix_notruncv4sfv4si_mask, "__builtin_ia32_cvtps2dq128_mask", IX86_BUILTIN_CVTPS2DQ128_MASK, UNKNOWN, (int) V4SI_FTYPE_V4SF_V4SI_QI },
+ { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_ufix_notruncv8sfv8si_mask, "__builtin_ia32_cvtps2udq256_mask", IX86_BUILTIN_CVTPS2UDQ256, UNKNOWN, (int) V8SI_FTYPE_V8SF_V8SI_QI },
+ { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_ufix_notruncv4sfv4si_mask, "__builtin_ia32_cvtps2udq128_mask", IX86_BUILTIN_CVTPS2UDQ128, UNKNOWN, (int) V4SI_FTYPE_V4SF_V4SI_QI },
+ { OPTION_MASK_ISA_AVX512DQ | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512dq_cvtps2qqv4di_mask, "__builtin_ia32_cvtps2qq256_mask", IX86_BUILTIN_CVTPS2QQ256, UNKNOWN, (int) V4DI_FTYPE_V4SF_V4DI_QI },
+ { OPTION_MASK_ISA_AVX512DQ | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512dq_cvtps2qqv2di_mask, "__builtin_ia32_cvtps2qq128_mask", IX86_BUILTIN_CVTPS2QQ128, UNKNOWN, (int) V2DI_FTYPE_V4SF_V2DI_QI },
+ { OPTION_MASK_ISA_AVX512DQ | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512dq_cvtps2uqqv4di_mask, "__builtin_ia32_cvtps2uqq256_mask", IX86_BUILTIN_CVTPS2UQQ256, UNKNOWN, (int) V4DI_FTYPE_V4SF_V4DI_QI },
+ { OPTION_MASK_ISA_AVX512DQ | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512dq_cvtps2uqqv2di_mask, "__builtin_ia32_cvtps2uqq128_mask", IX86_BUILTIN_CVTPS2UQQ128, UNKNOWN, (int) V2DI_FTYPE_V4SF_V2DI_QI },
+ { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_getmantv8sf_mask, "__builtin_ia32_getmantps256_mask", IX86_BUILTIN_GETMANTPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_INT_V8SF_QI },
+ { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_getmantv4sf_mask, "__builtin_ia32_getmantps128_mask", IX86_BUILTIN_GETMANTPS128, UNKNOWN, (int) V4SF_FTYPE_V4SF_INT_V4SF_QI },
+ { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_getmantv4df_mask, "__builtin_ia32_getmantpd256_mask", IX86_BUILTIN_GETMANTPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_INT_V4DF_QI },
+ { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_getmantv2df_mask, "__builtin_ia32_getmantpd128_mask", IX86_BUILTIN_GETMANTPD128, UNKNOWN, (int) V2DF_FTYPE_V2DF_INT_V2DF_QI },
+ { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx_movddup256_mask, "__builtin_ia32_movddup256_mask", IX86_BUILTIN_MOVDDUP256_MASK, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF_QI },
+ { OPTION_MASK_ISA_AVX512VL, CODE_FOR_vec_dupv2df_mask, "__builtin_ia32_movddup128_mask", IX86_BUILTIN_MOVDDUP128_MASK, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_QI },
+ { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx_movshdup256_mask, "__builtin_ia32_movshdup256_mask", IX86_BUILTIN_MOVSHDUP256_MASK, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF_QI },
+ { OPTION_MASK_ISA_AVX512VL, CODE_FOR_sse3_movshdup_mask, "__builtin_ia32_movshdup128_mask", IX86_BUILTIN_MOVSHDUP128_MASK, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_QI },
+ { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx_movsldup256_mask, "__builtin_ia32_movsldup256_mask", IX86_BUILTIN_MOVSLDUP256_MASK, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF_QI },
+ { OPTION_MASK_ISA_AVX512VL, CODE_FOR_sse3_movsldup_mask, "__builtin_ia32_movsldup128_mask", IX86_BUILTIN_MOVSLDUP128_MASK, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_QI },
+ { OPTION_MASK_ISA_AVX512DQ | OPTION_MASK_ISA_AVX512VL, CODE_FOR_floatv4div4sf2_mask, "__builtin_ia32_cvtqq2ps256_mask", IX86_BUILTIN_CVTQQ2PS256, UNKNOWN, (int) V4SF_FTYPE_V4DI_V4SF_QI },
+ { OPTION_MASK_ISA_AVX512DQ | OPTION_MASK_ISA_AVX512VL, CODE_FOR_floatv2div2sf2_mask, "__builtin_ia32_cvtqq2ps128_mask", IX86_BUILTIN_CVTQQ2PS128, UNKNOWN, (int) V4SF_FTYPE_V2DI_V4SF_QI },
+ { OPTION_MASK_ISA_AVX512DQ | OPTION_MASK_ISA_AVX512VL, CODE_FOR_ufloatv4div4sf2_mask, "__builtin_ia32_cvtuqq2ps256_mask", IX86_BUILTIN_CVTUQQ2PS256, UNKNOWN, (int) V4SF_FTYPE_V4DI_V4SF_QI },
+ { OPTION_MASK_ISA_AVX512DQ | OPTION_MASK_ISA_AVX512VL, CODE_FOR_ufloatv2div2sf2_mask, "__builtin_ia32_cvtuqq2ps128_mask", IX86_BUILTIN_CVTUQQ2PS128, UNKNOWN, (int) V4SF_FTYPE_V2DI_V4SF_QI },
+ { OPTION_MASK_ISA_AVX512DQ | OPTION_MASK_ISA_AVX512VL, CODE_FOR_floatv4div4df2_mask, "__builtin_ia32_cvtqq2pd256_mask", IX86_BUILTIN_CVTQQ2PD256, UNKNOWN, (int) V4DF_FTYPE_V4DI_V4DF_QI },
+ { OPTION_MASK_ISA_AVX512DQ | OPTION_MASK_ISA_AVX512VL, CODE_FOR_floatv2div2df2_mask, "__builtin_ia32_cvtqq2pd128_mask", IX86_BUILTIN_CVTQQ2PD128, UNKNOWN, (int) V2DF_FTYPE_V2DI_V2DF_QI },
+ { OPTION_MASK_ISA_AVX512DQ | OPTION_MASK_ISA_AVX512VL, CODE_FOR_ufloatv4div4df2_mask, "__builtin_ia32_cvtuqq2pd256_mask", IX86_BUILTIN_CVTUQQ2PD256, UNKNOWN, (int) V4DF_FTYPE_V4DI_V4DF_QI },
+ { OPTION_MASK_ISA_AVX512DQ | OPTION_MASK_ISA_AVX512VL, CODE_FOR_ufloatv2div2df2_mask, "__builtin_ia32_cvtuqq2pd128_mask", IX86_BUILTIN_CVTUQQ2PD128, UNKNOWN, (int) V2DF_FTYPE_V2DI_V2DF_QI },
+ { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_vpermt2varv4di3_mask, "__builtin_ia32_vpermt2varq256_mask", IX86_BUILTIN_VPERMT2VARQ256, UNKNOWN, (int) V4DI_FTYPE_V4DI_V4DI_V4DI_QI },
+ { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_vpermt2varv4di3_maskz, "__builtin_ia32_vpermt2varq256_maskz", IX86_BUILTIN_VPERMT2VARQ256_MASKZ, UNKNOWN, (int) V4DI_FTYPE_V4DI_V4DI_V4DI_QI },
+ { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_vpermt2varv8si3_mask, "__builtin_ia32_vpermt2vard256_mask", IX86_BUILTIN_VPERMT2VARD256, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI_V8SI_QI },
+ { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_vpermt2varv8si3_maskz, "__builtin_ia32_vpermt2vard256_maskz", IX86_BUILTIN_VPERMT2VARD256_MASKZ, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI_V8SI_QI },
+ { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_vpermi2varv4di3_mask, "__builtin_ia32_vpermi2varq256_mask", IX86_BUILTIN_VPERMI2VARQ256, UNKNOWN, (int) V4DI_FTYPE_V4DI_V4DI_V4DI_QI },
+ { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_vpermi2varv8si3_mask, "__builtin_ia32_vpermi2vard256_mask", IX86_BUILTIN_VPERMI2VARD256, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI_V8SI_QI },
+ { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_vpermt2varv4df3_mask, "__builtin_ia32_vpermt2varpd256_mask", IX86_BUILTIN_VPERMT2VARPD256, UNKNOWN, (int) V4DF_FTYPE_V4DI_V4DF_V4DF_QI },
+ { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_vpermt2varv4df3_maskz, "__builtin_ia32_vpermt2varpd256_maskz", IX86_BUILTIN_VPERMT2VARPD256_MASKZ, UNKNOWN, (int) V4DF_FTYPE_V4DI_V4DF_V4DF_QI },
+ { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_vpermt2varv8sf3_mask, "__builtin_ia32_vpermt2varps256_mask", IX86_BUILTIN_VPERMT2VARPS256, UNKNOWN, (int) V8SF_FTYPE_V8SI_V8SF_V8SF_QI },
+ { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_vpermt2varv8sf3_maskz, "__builtin_ia32_vpermt2varps256_maskz", IX86_BUILTIN_VPERMT2VARPS256_MASKZ, UNKNOWN, (int) V8SF_FTYPE_V8SI_V8SF_V8SF_QI },
+ { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_vpermi2varv4df3_mask, "__builtin_ia32_vpermi2varpd256_mask", IX86_BUILTIN_VPERMI2VARPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DI_V4DF_QI },
+ { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_vpermi2varv8sf3_mask, "__builtin_ia32_vpermi2varps256_mask", IX86_BUILTIN_VPERMI2VARPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SI_V8SF_QI },
+ { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_vpermt2varv2di3_mask, "__builtin_ia32_vpermt2varq128_mask", IX86_BUILTIN_VPERMT2VARQ128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI_V2DI_QI },
+ { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_vpermt2varv2di3_maskz, "__builtin_ia32_vpermt2varq128_maskz", IX86_BUILTIN_VPERMT2VARQ128_MASKZ, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI_V2DI_QI },
+ { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_vpermt2varv4si3_mask, "__builtin_ia32_vpermt2vard128_mask", IX86_BUILTIN_VPERMT2VARD128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI_V4SI_QI },
+ { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_vpermt2varv4si3_maskz, "__builtin_ia32_vpermt2vard128_maskz", IX86_BUILTIN_VPERMT2VARD128_MASKZ, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI_V4SI_QI },
+ { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_vpermi2varv2di3_mask, "__builtin_ia32_vpermi2varq128_mask", IX86_BUILTIN_VPERMI2VARQ128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI_V2DI_QI },
+ { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_vpermi2varv4si3_mask, "__builtin_ia32_vpermi2vard128_mask", IX86_BUILTIN_VPERMI2VARD128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI_V4SI_QI },
+ { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_vpermt2varv2df3_mask, "__builtin_ia32_vpermt2varpd128_mask", IX86_BUILTIN_VPERMT2VARPD128, UNKNOWN, (int) V2DF_FTYPE_V2DI_V2DF_V2DF_QI },
+ { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_vpermt2varv2df3_maskz, "__builtin_ia32_vpermt2varpd128_maskz", IX86_BUILTIN_VPERMT2VARPD128_MASKZ, UNKNOWN, (int) V2DF_FTYPE_V2DI_V2DF_V2DF_QI },
+ { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_vpermt2varv4sf3_mask, "__builtin_ia32_vpermt2varps128_mask", IX86_BUILTIN_VPERMT2VARPS128, UNKNOWN, (int) V4SF_FTYPE_V4SI_V4SF_V4SF_QI },
+ { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_vpermt2varv4sf3_maskz, "__builtin_ia32_vpermt2varps128_maskz", IX86_BUILTIN_VPERMT2VARPS128_MASKZ, UNKNOWN, (int) V4SF_FTYPE_V4SI_V4SF_V4SF_QI },
+ { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_vpermi2varv2df3_mask, "__builtin_ia32_vpermi2varpd128_mask", IX86_BUILTIN_VPERMI2VARPD128, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DI_V2DF_QI },
+ { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_vpermi2varv4sf3_mask, "__builtin_ia32_vpermi2varps128_mask", IX86_BUILTIN_VPERMI2VARPS128, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SI_V4SF_QI },
+ { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx2_pshufbv32qi3_mask, "__builtin_ia32_pshufb256_mask", IX86_BUILTIN_PSHUFB256_MASK, UNKNOWN, (int) V32QI_FTYPE_V32QI_V32QI_V32QI_SI },
+ { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_ssse3_pshufbv16qi3_mask, "__builtin_ia32_pshufb128_mask", IX86_BUILTIN_PSHUFB128_MASK, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI_V16QI_HI },
+ { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_pshufhwv3_mask, "__builtin_ia32_pshufhw256_mask", IX86_BUILTIN_PSHUFHW256_MASK, UNKNOWN, (int) V16HI_FTYPE_V16HI_INT_V16HI_HI },
+ { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_pshufhw_mask, "__builtin_ia32_pshufhw128_mask", IX86_BUILTIN_PSHUFHW128_MASK, UNKNOWN, (int) V8HI_FTYPE_V8HI_INT_V8HI_QI },
+ { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_pshuflwv3_mask, "__builtin_ia32_pshuflw256_mask", IX86_BUILTIN_PSHUFLW256_MASK, UNKNOWN, (int) V16HI_FTYPE_V16HI_INT_V16HI_HI },
+ { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_pshuflw_mask, "__builtin_ia32_pshuflw128_mask", IX86_BUILTIN_PSHUFLW128_MASK, UNKNOWN, (int) V8HI_FTYPE_V8HI_INT_V8HI_QI },
+ { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_pshufdv3_mask, "__builtin_ia32_pshufd256_mask", IX86_BUILTIN_PSHUFD256_MASK, UNKNOWN, (int) V8SI_FTYPE_V8SI_INT_V8SI_QI },
+ { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_pshufd_mask, "__builtin_ia32_pshufd128_mask", IX86_BUILTIN_PSHUFD128_MASK, UNKNOWN, (int) V4SI_FTYPE_V4SI_INT_V4SI_QI },
+ { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx_shufpd256_mask, "__builtin_ia32_shufpd256_mask", IX86_BUILTIN_SHUFPD256_MASK, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF_INT_V4DF_QI },
+ { OPTION_MASK_ISA_AVX512VL, CODE_FOR_sse2_shufpd_mask, "__builtin_ia32_shufpd128_mask", IX86_BUILTIN_SHUFPD128_MASK, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_INT_V2DF_QI },
+ { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx_shufps256_mask, "__builtin_ia32_shufps256_mask", IX86_BUILTIN_SHUFPS256_MASK, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF_INT_V8SF_QI },
+ { OPTION_MASK_ISA_AVX512VL, CODE_FOR_sse_shufps_mask, "__builtin_ia32_shufps128_mask", IX86_BUILTIN_SHUFPS128_MASK, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_INT_V4SF_QI },
+ { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_rolvv4di_mask, "__builtin_ia32_prolvq256_mask", IX86_BUILTIN_PROLVQ256, UNKNOWN, (int) V4DI_FTYPE_V4DI_V4DI_V4DI_QI },
+ { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_rolvv2di_mask, "__builtin_ia32_prolvq128_mask", IX86_BUILTIN_PROLVQ128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI_V2DI_QI },
+ { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_rolv4di_mask, "__builtin_ia32_prolq256_mask", IX86_BUILTIN_PROLQ256, UNKNOWN, (int) V4DI_FTYPE_V4DI_INT_V4DI_QI },
+ { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_rolv2di_mask, "__builtin_ia32_prolq128_mask", IX86_BUILTIN_PROLQ128, UNKNOWN, (int) V2DI_FTYPE_V2DI_INT_V2DI_QI },
+ { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_rorvv4di_mask, "__builtin_ia32_prorvq256_mask", IX86_BUILTIN_PRORVQ256, UNKNOWN, (int) V4DI_FTYPE_V4DI_V4DI_V4DI_QI },
+ { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_rorvv2di_mask, "__builtin_ia32_prorvq128_mask", IX86_BUILTIN_PRORVQ128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI_V2DI_QI },
+ { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_rorv4di_mask, "__builtin_ia32_prorq256_mask", IX86_BUILTIN_PRORQ256, UNKNOWN, (int) V4DI_FTYPE_V4DI_INT_V4DI_QI },
+ { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_rorv2di_mask, "__builtin_ia32_prorq128_mask", IX86_BUILTIN_PRORQ128, UNKNOWN, (int) V2DI_FTYPE_V2DI_INT_V2DI_QI },
+ { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx2_ashrvv2di_mask, "__builtin_ia32_psravq128_mask", IX86_BUILTIN_PSRAVQ128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI_V2DI_QI },
+ { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx2_ashrvv4di_mask, "__builtin_ia32_psravq256_mask", IX86_BUILTIN_PSRAVQ256, UNKNOWN, (int) V4DI_FTYPE_V4DI_V4DI_V4DI_QI },
+ { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx2_ashlvv4di_mask, "__builtin_ia32_psllv4di_mask", IX86_BUILTIN_PSLLVV4DI_MASK, UNKNOWN, (int) V4DI_FTYPE_V4DI_V4DI_V4DI_QI },
+ { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx2_ashlvv2di_mask, "__builtin_ia32_psllv2di_mask", IX86_BUILTIN_PSLLVV2DI_MASK, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI_V2DI_QI },
+ { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx2_ashlvv8si_mask, "__builtin_ia32_psllv8si_mask", IX86_BUILTIN_PSLLVV8SI_MASK, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI_V8SI_QI },
+ { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx2_ashlvv4si_mask, "__builtin_ia32_psllv4si_mask", IX86_BUILTIN_PSLLVV4SI_MASK, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI_V4SI_QI },
+ { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx2_ashrvv8si_mask, "__builtin_ia32_psrav8si_mask", IX86_BUILTIN_PSRAVV8SI_MASK, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI_V8SI_QI },
+ { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx2_ashrvv4si_mask, "__builtin_ia32_psrav4si_mask", IX86_BUILTIN_PSRAVV4SI_MASK, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI_V4SI_QI },
+ { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx2_lshrvv4di_mask, "__builtin_ia32_psrlv4di_mask", IX86_BUILTIN_PSRLVV4DI_MASK, UNKNOWN, (int) V4DI_FTYPE_V4DI_V4DI_V4DI_QI },
+ { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx2_lshrvv2di_mask, "__builtin_ia32_psrlv2di_mask", IX86_BUILTIN_PSRLVV2DI_MASK, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI_V2DI_QI },
+ { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx2_lshrvv8si_mask, "__builtin_ia32_psrlv8si_mask", IX86_BUILTIN_PSRLVV8SI_MASK, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI_V8SI_QI },
+ { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx2_lshrvv4si_mask, "__builtin_ia32_psrlv4si_mask", IX86_BUILTIN_PSRLVV4SI_MASK, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI_V4SI_QI },
+ { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_ashrv16hi3_mask, "__builtin_ia32_psrawi256_mask", IX86_BUILTIN_PSRAWI256_MASK, UNKNOWN, (int) V16HI_FTYPE_V16HI_INT_V16HI_HI },
+ { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_ashrv16hi3_mask, "__builtin_ia32_psraw256_mask", IX86_BUILTIN_PSRAW256_MASK, UNKNOWN, (int) V16HI_FTYPE_V16HI_V8HI_V16HI_HI },
+ { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_ashrv8hi3_mask, "__builtin_ia32_psrawi128_mask", IX86_BUILTIN_PSRAWI128_MASK, UNKNOWN, (int) V8HI_FTYPE_V8HI_INT_V8HI_QI },
+ { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_ashrv8hi3_mask, "__builtin_ia32_psraw128_mask", IX86_BUILTIN_PSRAW128_MASK, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI_V8HI_QI },
+ { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_lshrv16hi3_mask, "__builtin_ia32_psrlwi256_mask", IX86_BUILTIN_PSRLWI256_MASK, UNKNOWN, (int) V16HI_FTYPE_V16HI_INT_V16HI_HI },
+ { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_lshrv16hi3_mask, "__builtin_ia32_psrlw256_mask", IX86_BUILTIN_PSRLW256_MASK, UNKNOWN, (int) V16HI_FTYPE_V16HI_V8HI_V16HI_HI },
+ { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_lshrv8hi3_mask, "__builtin_ia32_psrlwi128_mask", IX86_BUILTIN_PSRLWI128_MASK, UNKNOWN, (int) V8HI_FTYPE_V8HI_INT_V8HI_QI },
+ { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_lshrv8hi3_mask, "__builtin_ia32_psrlw128_mask", IX86_BUILTIN_PSRLW128_MASK, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI_V8HI_QI },
+ { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_rorvv8si_mask, "__builtin_ia32_prorvd256_mask", IX86_BUILTIN_PRORVD256, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI_V8SI_QI },
+ { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_rolvv8si_mask, "__builtin_ia32_prolvd256_mask", IX86_BUILTIN_PROLVD256, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI_V8SI_QI },
+ { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_rorv8si_mask, "__builtin_ia32_prord256_mask", IX86_BUILTIN_PRORD256, UNKNOWN, (int) V8SI_FTYPE_V8SI_INT_V8SI_QI },
+ { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_rolv8si_mask, "__builtin_ia32_prold256_mask", IX86_BUILTIN_PROLD256, UNKNOWN, (int) V8SI_FTYPE_V8SI_INT_V8SI_QI },
+ { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_rorvv4si_mask, "__builtin_ia32_prorvd128_mask", IX86_BUILTIN_PRORVD128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI_V4SI_QI },
+ { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_rolvv4si_mask, "__builtin_ia32_prolvd128_mask", IX86_BUILTIN_PROLVD128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI_V4SI_QI },
+ { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_rorv4si_mask, "__builtin_ia32_prord128_mask", IX86_BUILTIN_PRORD128, UNKNOWN, (int) V4SI_FTYPE_V4SI_INT_V4SI_QI },
+ { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_rolv4si_mask, "__builtin_ia32_prold128_mask", IX86_BUILTIN_PROLD128, UNKNOWN, (int) V4SI_FTYPE_V4SI_INT_V4SI_QI },
+ { OPTION_MASK_ISA_AVX512DQ | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512dq_fpclassv4df_mask, "__builtin_ia32_fpclasspd256_mask", IX86_BUILTIN_FPCLASSPD256, UNKNOWN, (int) QI_FTYPE_V4DF_INT_QI },
+ { OPTION_MASK_ISA_AVX512DQ | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512dq_fpclassv2df_mask, "__builtin_ia32_fpclasspd128_mask", IX86_BUILTIN_FPCLASSPD128, UNKNOWN, (int) QI_FTYPE_V2DF_INT_QI },
+ { OPTION_MASK_ISA_AVX512DQ, CODE_FOR_avx512dq_vmfpclassv2df, "__builtin_ia32_fpclasssd", IX86_BUILTIN_FPCLASSSD, UNKNOWN, (int) QI_FTYPE_V2DF_INT },
+ { OPTION_MASK_ISA_AVX512DQ | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512dq_fpclassv8sf_mask, "__builtin_ia32_fpclassps256_mask", IX86_BUILTIN_FPCLASSPS256, UNKNOWN, (int) QI_FTYPE_V8SF_INT_QI },
+ { OPTION_MASK_ISA_AVX512DQ | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512dq_fpclassv4sf_mask, "__builtin_ia32_fpclassps128_mask", IX86_BUILTIN_FPCLASSPS128, UNKNOWN, (int) QI_FTYPE_V4SF_INT_QI },
+ { OPTION_MASK_ISA_AVX512DQ, CODE_FOR_avx512dq_vmfpclassv4sf, "__builtin_ia32_fpclassss", IX86_BUILTIN_FPCLASSSS, UNKNOWN, (int) QI_FTYPE_V4SF_INT },
+ { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_cvtb2maskv16qi, "__builtin_ia32_cvtb2mask128", IX86_BUILTIN_CVTB2MASK128, UNKNOWN, (int) HI_FTYPE_V16QI },
+ { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_cvtb2maskv32qi, "__builtin_ia32_cvtb2mask256", IX86_BUILTIN_CVTB2MASK256, UNKNOWN, (int) SI_FTYPE_V32QI },
+ { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_cvtw2maskv8hi, "__builtin_ia32_cvtw2mask128", IX86_BUILTIN_CVTW2MASK128, UNKNOWN, (int) QI_FTYPE_V8HI },
+ { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_cvtw2maskv16hi, "__builtin_ia32_cvtw2mask256", IX86_BUILTIN_CVTW2MASK256, UNKNOWN, (int) HI_FTYPE_V16HI },
+ { OPTION_MASK_ISA_AVX512DQ | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_cvtd2maskv4si, "__builtin_ia32_cvtd2mask128", IX86_BUILTIN_CVTD2MASK128, UNKNOWN, (int) QI_FTYPE_V4SI },
+ { OPTION_MASK_ISA_AVX512DQ | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_cvtd2maskv8si, "__builtin_ia32_cvtd2mask256", IX86_BUILTIN_CVTD2MASK256, UNKNOWN, (int) QI_FTYPE_V8SI },
+ { OPTION_MASK_ISA_AVX512DQ | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_cvtq2maskv2di, "__builtin_ia32_cvtq2mask128", IX86_BUILTIN_CVTQ2MASK128, UNKNOWN, (int) QI_FTYPE_V2DI },
+ { OPTION_MASK_ISA_AVX512DQ | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_cvtq2maskv4di, "__builtin_ia32_cvtq2mask256", IX86_BUILTIN_CVTQ2MASK256, UNKNOWN, (int) QI_FTYPE_V4DI },
+ { OPTION_MASK_ISA_AVX512DQ | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_cvtmask2bv16qi, "__builtin_ia32_cvtmask2b128", IX86_BUILTIN_CVTMASK2B128, UNKNOWN, (int) V16QI_FTYPE_HI },
+ { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_cvtmask2bv32qi, "__builtin_ia32_cvtmask2b256", IX86_BUILTIN_CVTMASK2B256, UNKNOWN, (int) V32QI_FTYPE_SI },
+ { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_cvtmask2wv8hi, "__builtin_ia32_cvtmask2w128", IX86_BUILTIN_CVTMASK2W128, UNKNOWN, (int) V8HI_FTYPE_QI },
+ { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_cvtmask2wv16hi, "__builtin_ia32_cvtmask2w256", IX86_BUILTIN_CVTMASK2W256, UNKNOWN, (int) V16HI_FTYPE_HI },
+ { OPTION_MASK_ISA_AVX512DQ | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_cvtmask2dv4si, "__builtin_ia32_cvtmask2d128", IX86_BUILTIN_CVTMASK2D128, UNKNOWN, (int) V4SI_FTYPE_QI },
+ { OPTION_MASK_ISA_AVX512DQ | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_cvtmask2dv8si, "__builtin_ia32_cvtmask2d256", IX86_BUILTIN_CVTMASK2D256, UNKNOWN, (int) V8SI_FTYPE_QI },
+ { OPTION_MASK_ISA_AVX512DQ | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_cvtmask2qv2di, "__builtin_ia32_cvtmask2q128", IX86_BUILTIN_CVTMASK2Q128, UNKNOWN, (int) V2DI_FTYPE_QI },
+ { OPTION_MASK_ISA_AVX512DQ | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_cvtmask2qv4di, "__builtin_ia32_cvtmask2q256", IX86_BUILTIN_CVTMASK2Q256, UNKNOWN, (int) V4DI_FTYPE_QI },
+ { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_eqv16qi3_mask, "__builtin_ia32_pcmpeqb128_mask", IX86_BUILTIN_PCMPEQB128_MASK, UNKNOWN, (int) HI_FTYPE_V16QI_V16QI_HI },
+ { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_eqv32qi3_mask, "__builtin_ia32_pcmpeqb256_mask", IX86_BUILTIN_PCMPEQB256_MASK, UNKNOWN, (int) SI_FTYPE_V32QI_V32QI_SI },
+ { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_eqv8hi3_mask, "__builtin_ia32_pcmpeqw128_mask", IX86_BUILTIN_PCMPEQW128_MASK, UNKNOWN, (int) QI_FTYPE_V8HI_V8HI_QI },
+ { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_eqv16hi3_mask, "__builtin_ia32_pcmpeqw256_mask", IX86_BUILTIN_PCMPEQW256_MASK, UNKNOWN, (int) HI_FTYPE_V16HI_V16HI_HI },
+ { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_eqv4si3_mask, "__builtin_ia32_pcmpeqd128_mask", IX86_BUILTIN_PCMPEQD128_MASK, UNKNOWN, (int) QI_FTYPE_V4SI_V4SI_QI },
+ { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_eqv8si3_mask, "__builtin_ia32_pcmpeqd256_mask", IX86_BUILTIN_PCMPEQD256_MASK, UNKNOWN, (int) QI_FTYPE_V8SI_V8SI_QI },
+ { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_eqv2di3_mask, "__builtin_ia32_pcmpeqq128_mask", IX86_BUILTIN_PCMPEQQ128_MASK, UNKNOWN, (int) QI_FTYPE_V2DI_V2DI_QI },
+ { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_eqv4di3_mask, "__builtin_ia32_pcmpeqq256_mask", IX86_BUILTIN_PCMPEQQ256_MASK, UNKNOWN, (int) QI_FTYPE_V4DI_V4DI_QI },
+ { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_gtv16qi3_mask, "__builtin_ia32_pcmpgtb128_mask", IX86_BUILTIN_PCMPGTB128_MASK, UNKNOWN, (int) HI_FTYPE_V16QI_V16QI_HI },
+ { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_gtv32qi3_mask, "__builtin_ia32_pcmpgtb256_mask", IX86_BUILTIN_PCMPGTB256_MASK, UNKNOWN, (int) SI_FTYPE_V32QI_V32QI_SI },
+ { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_gtv8hi3_mask, "__builtin_ia32_pcmpgtw128_mask", IX86_BUILTIN_PCMPGTW128_MASK, UNKNOWN, (int) QI_FTYPE_V8HI_V8HI_QI },
+ { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_gtv16hi3_mask, "__builtin_ia32_pcmpgtw256_mask", IX86_BUILTIN_PCMPGTW256_MASK, UNKNOWN, (int) HI_FTYPE_V16HI_V16HI_HI },
+ { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_gtv4si3_mask, "__builtin_ia32_pcmpgtd128_mask", IX86_BUILTIN_PCMPGTD128_MASK, UNKNOWN, (int) QI_FTYPE_V4SI_V4SI_QI },
+ { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_gtv8si3_mask, "__builtin_ia32_pcmpgtd256_mask", IX86_BUILTIN_PCMPGTD256_MASK, UNKNOWN, (int) QI_FTYPE_V8SI_V8SI_QI },
+ { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_gtv2di3_mask, "__builtin_ia32_pcmpgtq128_mask", IX86_BUILTIN_PCMPGTQ128_MASK, UNKNOWN, (int) QI_FTYPE_V2DI_V2DI_QI },
+ { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_gtv4di3_mask, "__builtin_ia32_pcmpgtq256_mask", IX86_BUILTIN_PCMPGTQ256_MASK, UNKNOWN, (int) QI_FTYPE_V4DI_V4DI_QI },
+ { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_testmv16qi3_mask, "__builtin_ia32_ptestmb128", IX86_BUILTIN_PTESTMB128, UNKNOWN, (int) HI_FTYPE_V16QI_V16QI_HI },
+ { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_testmv32qi3_mask, "__builtin_ia32_ptestmb256", IX86_BUILTIN_PTESTMB256, UNKNOWN, (int) SI_FTYPE_V32QI_V32QI_SI },
+ { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_testmv8hi3_mask, "__builtin_ia32_ptestmw128", IX86_BUILTIN_PTESTMW128, UNKNOWN, (int) QI_FTYPE_V8HI_V8HI_QI },
+ { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_testmv16hi3_mask, "__builtin_ia32_ptestmw256", IX86_BUILTIN_PTESTMW256, UNKNOWN, (int) HI_FTYPE_V16HI_V16HI_HI },
+ { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_testmv4si3_mask, "__builtin_ia32_ptestmd128", IX86_BUILTIN_PTESTMD128, UNKNOWN, (int) QI_FTYPE_V4SI_V4SI_QI },
+ { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_testmv8si3_mask, "__builtin_ia32_ptestmd256", IX86_BUILTIN_PTESTMD256, UNKNOWN, (int) QI_FTYPE_V8SI_V8SI_QI },
+ { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_testmv2di3_mask, "__builtin_ia32_ptestmq128", IX86_BUILTIN_PTESTMQ128, UNKNOWN, (int) QI_FTYPE_V2DI_V2DI_QI },
+ { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_testmv4di3_mask, "__builtin_ia32_ptestmq256", IX86_BUILTIN_PTESTMQ256, UNKNOWN, (int) QI_FTYPE_V4DI_V4DI_QI },
+ { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_testnmv16qi3_mask, "__builtin_ia32_ptestnmb128", IX86_BUILTIN_PTESTNMB128, UNKNOWN, (int) HI_FTYPE_V16QI_V16QI_HI },
+ { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_testnmv32qi3_mask, "__builtin_ia32_ptestnmb256", IX86_BUILTIN_PTESTNMB256, UNKNOWN, (int) SI_FTYPE_V32QI_V32QI_SI },
+ { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_testnmv8hi3_mask, "__builtin_ia32_ptestnmw128", IX86_BUILTIN_PTESTNMW128, UNKNOWN, (int) QI_FTYPE_V8HI_V8HI_QI },
+ { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_testnmv16hi3_mask, "__builtin_ia32_ptestnmw256", IX86_BUILTIN_PTESTNMW256, UNKNOWN, (int) HI_FTYPE_V16HI_V16HI_HI },
+ { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_testnmv4si3_mask, "__builtin_ia32_ptestnmd128", IX86_BUILTIN_PTESTNMD128, UNKNOWN, (int) QI_FTYPE_V4SI_V4SI_QI },
+ { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_testnmv8si3_mask, "__builtin_ia32_ptestnmd256", IX86_BUILTIN_PTESTNMD256, UNKNOWN, (int) QI_FTYPE_V8SI_V8SI_QI },
+ { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_testnmv2di3_mask, "__builtin_ia32_ptestnmq128", IX86_BUILTIN_PTESTNMQ128, UNKNOWN, (int) QI_FTYPE_V2DI_V2DI_QI },
+ { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_testnmv4di3_mask, "__builtin_ia32_ptestnmq256", IX86_BUILTIN_PTESTNMQ256, UNKNOWN, (int) QI_FTYPE_V4DI_V4DI_QI },
+ { OPTION_MASK_ISA_AVX512VL | OPTION_MASK_ISA_AVX512CD, CODE_FOR_avx512cd_maskb_vec_dupv2di, "__builtin_ia32_broadcastmb128", IX86_BUILTIN_PBROADCASTMB128, UNKNOWN, (int) V2DI_FTYPE_QI },
+ { OPTION_MASK_ISA_AVX512VL | OPTION_MASK_ISA_AVX512CD, CODE_FOR_avx512cd_maskb_vec_dupv4di, "__builtin_ia32_broadcastmb256", IX86_BUILTIN_PBROADCASTMB256, UNKNOWN, (int) V4DI_FTYPE_QI },
+ { OPTION_MASK_ISA_AVX512VL | OPTION_MASK_ISA_AVX512CD, CODE_FOR_avx512cd_maskw_vec_dupv4si, "__builtin_ia32_broadcastmw128", IX86_BUILTIN_PBROADCASTMW128, UNKNOWN, (int) V4SI_FTYPE_HI },
+ { OPTION_MASK_ISA_AVX512VL | OPTION_MASK_ISA_AVX512CD, CODE_FOR_avx512cd_maskw_vec_dupv8si, "__builtin_ia32_broadcastmw256", IX86_BUILTIN_PBROADCASTMW256, UNKNOWN, (int) V8SI_FTYPE_HI },
+ { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_compressv4df_mask, "__builtin_ia32_compressdf256_mask", IX86_BUILTIN_COMPRESSPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF_QI },
+ { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_compressv2df_mask, "__builtin_ia32_compressdf128_mask", IX86_BUILTIN_COMPRESSPD128, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_QI },
+ { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_compressv8sf_mask, "__builtin_ia32_compresssf256_mask", IX86_BUILTIN_COMPRESSPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF_QI },
+ { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_compressv4sf_mask, "__builtin_ia32_compresssf128_mask", IX86_BUILTIN_COMPRESSPS128, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_QI },
+ { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_compressv4di_mask, "__builtin_ia32_compressdi256_mask", IX86_BUILTIN_PCOMPRESSQ256, UNKNOWN, (int) V4DI_FTYPE_V4DI_V4DI_QI },
+ { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_compressv2di_mask, "__builtin_ia32_compressdi128_mask", IX86_BUILTIN_PCOMPRESSQ128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI_QI },
+ { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_compressv8si_mask, "__builtin_ia32_compresssi256_mask", IX86_BUILTIN_PCOMPRESSD256, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI_QI },
+ { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_compressv4si_mask, "__builtin_ia32_compresssi128_mask", IX86_BUILTIN_PCOMPRESSD128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI_QI },
+ { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_expandv4df_mask, "__builtin_ia32_expanddf256_mask", IX86_BUILTIN_EXPANDPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF_QI },
+ { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_expandv2df_mask, "__builtin_ia32_expanddf128_mask", IX86_BUILTIN_EXPANDPD128, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_QI },
+ { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_expandv8sf_mask, "__builtin_ia32_expandsf256_mask", IX86_BUILTIN_EXPANDPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF_QI },
+ { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_expandv4sf_mask, "__builtin_ia32_expandsf128_mask", IX86_BUILTIN_EXPANDPS128, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_QI },
+ { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_expandv4di_mask, "__builtin_ia32_expanddi256_mask", IX86_BUILTIN_PEXPANDQ256, UNKNOWN, (int) V4DI_FTYPE_V4DI_V4DI_QI },
+ { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_expandv2di_mask, "__builtin_ia32_expanddi128_mask", IX86_BUILTIN_PEXPANDQ128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI_QI },
+ { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_expandv8si_mask, "__builtin_ia32_expandsi256_mask", IX86_BUILTIN_PEXPANDD256, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI_QI },
+ { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_expandv4si_mask, "__builtin_ia32_expandsi128_mask", IX86_BUILTIN_PEXPANDD128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI_QI },
+ { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_expandv4df_maskz, "__builtin_ia32_expanddf256_maskz", IX86_BUILTIN_EXPANDPD256Z, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF_QI },
+ { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_expandv2df_maskz, "__builtin_ia32_expanddf128_maskz", IX86_BUILTIN_EXPANDPD128Z, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_QI },
+ { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_expandv8sf_maskz, "__builtin_ia32_expandsf256_maskz", IX86_BUILTIN_EXPANDPS256Z, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF_QI },
+ { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_expandv4sf_maskz, "__builtin_ia32_expandsf128_maskz", IX86_BUILTIN_EXPANDPS128Z, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_QI },
+ { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_expandv4di_maskz, "__builtin_ia32_expanddi256_maskz", IX86_BUILTIN_PEXPANDQ256Z, UNKNOWN, (int) V4DI_FTYPE_V4DI_V4DI_QI },
+ { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_expandv2di_maskz, "__builtin_ia32_expanddi128_maskz", IX86_BUILTIN_PEXPANDQ128Z, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI_QI },
+ { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_expandv8si_maskz, "__builtin_ia32_expandsi256_maskz", IX86_BUILTIN_PEXPANDD256Z, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI_QI },
+ { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_expandv4si_maskz, "__builtin_ia32_expandsi128_maskz", IX86_BUILTIN_PEXPANDD128Z, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI_QI },
+ { OPTION_MASK_ISA_AVX512VL, CODE_FOR_smaxv8si3_mask, "__builtin_ia32_pmaxsd256_mask", IX86_BUILTIN_PMAXSD256_MASK, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI_V8SI_QI },
+ { OPTION_MASK_ISA_AVX512VL, CODE_FOR_sminv8si3_mask, "__builtin_ia32_pminsd256_mask", IX86_BUILTIN_PMINSD256_MASK, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI_V8SI_QI },
+ { OPTION_MASK_ISA_AVX512VL, CODE_FOR_umaxv8si3_mask, "__builtin_ia32_pmaxud256_mask", IX86_BUILTIN_PMAXUD256_MASK, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI_V8SI_QI },
+ { OPTION_MASK_ISA_AVX512VL, CODE_FOR_uminv8si3_mask, "__builtin_ia32_pminud256_mask", IX86_BUILTIN_PMINUD256_MASK, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI_V8SI_QI },
+ { OPTION_MASK_ISA_AVX512VL, CODE_FOR_smaxv4si3_mask, "__builtin_ia32_pmaxsd128_mask", IX86_BUILTIN_PMAXSD128_MASK, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI_V4SI_QI },
+ { OPTION_MASK_ISA_AVX512VL, CODE_FOR_sminv4si3_mask, "__builtin_ia32_pminsd128_mask", IX86_BUILTIN_PMINSD128_MASK, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI_V4SI_QI },
+ { OPTION_MASK_ISA_AVX512VL, CODE_FOR_umaxv4si3_mask, "__builtin_ia32_pmaxud128_mask", IX86_BUILTIN_PMAXUD128_MASK, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI_V4SI_QI },
+ { OPTION_MASK_ISA_AVX512VL, CODE_FOR_uminv4si3_mask, "__builtin_ia32_pminud128_mask", IX86_BUILTIN_PMINUD128_MASK, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI_V4SI_QI },
+ { OPTION_MASK_ISA_AVX512VL, CODE_FOR_smaxv4di3_mask, "__builtin_ia32_pmaxsq256_mask", IX86_BUILTIN_PMAXSQ256_MASK, UNKNOWN, (int) V4DI_FTYPE_V4DI_V4DI_V4DI_QI },
+ { OPTION_MASK_ISA_AVX512VL, CODE_FOR_sminv4di3_mask, "__builtin_ia32_pminsq256_mask", IX86_BUILTIN_PMINSQ256_MASK, UNKNOWN, (int) V4DI_FTYPE_V4DI_V4DI_V4DI_QI },
+ { OPTION_MASK_ISA_AVX512VL, CODE_FOR_umaxv4di3_mask, "__builtin_ia32_pmaxuq256_mask", IX86_BUILTIN_PMAXUQ256_MASK, UNKNOWN, (int) V4DI_FTYPE_V4DI_V4DI_V4DI_QI },
+ { OPTION_MASK_ISA_AVX512VL, CODE_FOR_uminv4di3_mask, "__builtin_ia32_pminuq256_mask", IX86_BUILTIN_PMINUQ256_MASK, UNKNOWN, (int) V4DI_FTYPE_V4DI_V4DI_V4DI_QI },
+ { OPTION_MASK_ISA_AVX512VL, CODE_FOR_smaxv2di3_mask, "__builtin_ia32_pmaxsq128_mask", IX86_BUILTIN_PMAXSQ128_MASK, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI_V2DI_QI },
+ { OPTION_MASK_ISA_AVX512VL, CODE_FOR_sminv2di3_mask, "__builtin_ia32_pminsq128_mask", IX86_BUILTIN_PMINSQ128_MASK, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI_V2DI_QI },
+ { OPTION_MASK_ISA_AVX512VL, CODE_FOR_umaxv2di3_mask, "__builtin_ia32_pmaxuq128_mask", IX86_BUILTIN_PMAXUQ128_MASK, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI_V2DI_QI },
+ { OPTION_MASK_ISA_AVX512VL, CODE_FOR_uminv2di3_mask, "__builtin_ia32_pminuq128_mask", IX86_BUILTIN_PMINUQ128_MASK, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI_V2DI_QI },
+ { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_sminv32qi3_mask, "__builtin_ia32_pminsb256_mask", IX86_BUILTIN_PMINSB256_MASK, UNKNOWN, (int) V32QI_FTYPE_V32QI_V32QI_V32QI_SI },
+ { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_uminv32qi3_mask, "__builtin_ia32_pminub256_mask", IX86_BUILTIN_PMINUB256_MASK, UNKNOWN, (int) V32QI_FTYPE_V32QI_V32QI_V32QI_SI },
+ { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_smaxv32qi3_mask, "__builtin_ia32_pmaxsb256_mask", IX86_BUILTIN_PMAXSB256_MASK, UNKNOWN, (int) V32QI_FTYPE_V32QI_V32QI_V32QI_SI },
+ { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_umaxv32qi3_mask, "__builtin_ia32_pmaxub256_mask", IX86_BUILTIN_PMAXUB256_MASK, UNKNOWN, (int) V32QI_FTYPE_V32QI_V32QI_V32QI_SI },
+ { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_sminv16qi3_mask, "__builtin_ia32_pminsb128_mask", IX86_BUILTIN_PMINSB128_MASK, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI_V16QI_HI },
+ { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_uminv16qi3_mask, "__builtin_ia32_pminub128_mask", IX86_BUILTIN_PMINUB128_MASK, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI_V16QI_HI },
+ { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_smaxv16qi3_mask, "__builtin_ia32_pmaxsb128_mask", IX86_BUILTIN_PMAXSB128_MASK, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI_V16QI_HI },
+ { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_umaxv16qi3_mask, "__builtin_ia32_pmaxub128_mask", IX86_BUILTIN_PMAXUB128_MASK, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI_V16QI_HI },
+ { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_sminv16hi3_mask, "__builtin_ia32_pminsw256_mask", IX86_BUILTIN_PMINSW256_MASK, UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI_V16HI_HI },
+ { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_uminv16hi3_mask, "__builtin_ia32_pminuw256_mask", IX86_BUILTIN_PMINUW256_MASK, UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI_V16HI_HI },
+ { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_smaxv16hi3_mask, "__builtin_ia32_pmaxsw256_mask", IX86_BUILTIN_PMAXSW256_MASK, UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI_V16HI_HI },
+ { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_umaxv16hi3_mask, "__builtin_ia32_pmaxuw256_mask", IX86_BUILTIN_PMAXUW256_MASK, UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI_V16HI_HI },
+ { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_sminv8hi3_mask, "__builtin_ia32_pminsw128_mask", IX86_BUILTIN_PMINSW128_MASK, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI_V8HI_QI },
+ { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_uminv8hi3_mask, "__builtin_ia32_pminuw128_mask", IX86_BUILTIN_PMINUW128_MASK, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI_V8HI_QI },
+ { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_smaxv8hi3_mask, "__builtin_ia32_pmaxsw128_mask", IX86_BUILTIN_PMAXSW128_MASK, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI_V8HI_QI },
+ { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_umaxv8hi3_mask, "__builtin_ia32_pmaxuw128_mask", IX86_BUILTIN_PMAXUW128_MASK, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI_V8HI_QI },
+ { OPTION_MASK_ISA_AVX512VL, CODE_FOR_conflictv4di_mask, "__builtin_ia32_vpconflictdi_256_mask", IX86_BUILTIN_VPCONFLICTQ256, UNKNOWN, (int) V4DI_FTYPE_V4DI_V4DI_QI },
+ { OPTION_MASK_ISA_AVX512VL, CODE_FOR_conflictv8si_mask, "__builtin_ia32_vpconflictsi_256_mask", IX86_BUILTIN_VPCONFLICTD256, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI_QI },
+ { OPTION_MASK_ISA_AVX512VL, CODE_FOR_clzv4di2_mask, "__builtin_ia32_vplzcntq_256_mask", IX86_BUILTIN_VPCLZCNTQ256, UNKNOWN, (int) V4DI_FTYPE_V4DI_V4DI_QI },
+ { OPTION_MASK_ISA_AVX512VL, CODE_FOR_clzv8si2_mask, "__builtin_ia32_vplzcntd_256_mask", IX86_BUILTIN_VPCLZCNTD256, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI_QI },
+ { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx_unpckhpd256_mask, "__builtin_ia32_unpckhpd256_mask", IX86_BUILTIN_UNPCKHPD256_MASK, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF_V4DF_QI },
+ { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_unpckhpd128_mask, "__builtin_ia32_unpckhpd128_mask", IX86_BUILTIN_UNPCKHPD128_MASK, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_V2DF_QI },
+ { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx_unpckhps256_mask, "__builtin_ia32_unpckhps256_mask", IX86_BUILTIN_UNPCKHPS256_MASK, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF_V8SF_QI },
+ { OPTION_MASK_ISA_AVX512VL, CODE_FOR_vec_interleave_highv4sf_mask, "__builtin_ia32_unpckhps128_mask", IX86_BUILTIN_UNPCKHPS128_MASK, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_V4SF_QI },
+ { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx_unpcklpd256_mask, "__builtin_ia32_unpcklpd256_mask", IX86_BUILTIN_UNPCKLPD256_MASK, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF_V4DF_QI },
+ { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_unpcklpd128_mask, "__builtin_ia32_unpcklpd128_mask", IX86_BUILTIN_UNPCKLPD128_MASK, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_V2DF_QI },
+ { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx_unpcklps256_mask, "__builtin_ia32_unpcklps256_mask", IX86_BUILTIN_UNPCKLPS256_MASK, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF_V8SF_QI },
+ { OPTION_MASK_ISA_AVX512VL, CODE_FOR_conflictv2di_mask, "__builtin_ia32_vpconflictdi_128_mask", IX86_BUILTIN_VPCONFLICTQ128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI_QI },
+ { OPTION_MASK_ISA_AVX512VL, CODE_FOR_conflictv4si_mask, "__builtin_ia32_vpconflictsi_128_mask", IX86_BUILTIN_VPCONFLICTD128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI_QI },
+ { OPTION_MASK_ISA_AVX512VL, CODE_FOR_clzv2di2_mask, "__builtin_ia32_vplzcntq_128_mask", IX86_BUILTIN_VPCLZCNTQ128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI_QI },
+ { OPTION_MASK_ISA_AVX512VL, CODE_FOR_clzv4si2_mask, "__builtin_ia32_vplzcntd_128_mask", IX86_BUILTIN_VPCLZCNTD128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI_QI },
+ { OPTION_MASK_ISA_AVX512VL, CODE_FOR_unpcklps128_mask, "__builtin_ia32_unpcklps128_mask", IX86_BUILTIN_UNPCKLPS128_MASK, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_V4SF_QI },
+ { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_alignv8si_mask, "__builtin_ia32_alignd256_mask", IX86_BUILTIN_ALIGND256, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI_INT_V8SI_QI },
+ { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_alignv4di_mask, "__builtin_ia32_alignq256_mask", IX86_BUILTIN_ALIGNQ256, UNKNOWN, (int) V4DI_FTYPE_V4DI_V4DI_INT_V4DI_QI },
+ { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_alignv4si_mask, "__builtin_ia32_alignd128_mask", IX86_BUILTIN_ALIGND128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI_INT_V4SI_QI },
+ { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_alignv2di_mask, "__builtin_ia32_alignq128_mask", IX86_BUILTIN_ALIGNQ128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI_INT_V2DI_QI },
+ { OPTION_MASK_ISA_AVX512VL, CODE_FOR_vcvtps2ph256_mask, "__builtin_ia32_vcvtps2ph256_mask", IX86_BUILTIN_CVTPS2PH256_MASK, UNKNOWN, (int) V8HI_FTYPE_V8SF_INT_V8HI_QI },
+ { OPTION_MASK_ISA_AVX512VL, CODE_FOR_vcvtps2ph_mask, "__builtin_ia32_vcvtps2ph_mask", IX86_BUILTIN_CVTPS2PH_MASK, UNKNOWN, (int) V8HI_FTYPE_V4SF_INT_V8HI_QI },
+ { OPTION_MASK_ISA_AVX512VL, CODE_FOR_vcvtph2ps_mask, "__builtin_ia32_vcvtph2ps_mask", IX86_BUILTIN_CVTPH2PS_MASK, UNKNOWN, (int) V4SF_FTYPE_V8HI_V4SF_QI },
+ { OPTION_MASK_ISA_AVX512VL, CODE_FOR_vcvtph2ps256_mask, "__builtin_ia32_vcvtph2ps256_mask", IX86_BUILTIN_CVTPH2PS256_MASK, UNKNOWN, (int) V8SF_FTYPE_V8HI_V8SF_QI },
+ { OPTION_MASK_ISA_AVX512VL, CODE_FOR_vec_interleave_highv4si_mask, "__builtin_ia32_punpckhdq128_mask", IX86_BUILTIN_PUNPCKHDQ128_MASK, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI_V4SI_QI },
+ { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx2_interleave_highv8si_mask, "__builtin_ia32_punpckhdq256_mask", IX86_BUILTIN_PUNPCKHDQ256_MASK, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI_V8SI_QI },
+ { OPTION_MASK_ISA_AVX512VL, CODE_FOR_vec_interleave_highv2di_mask, "__builtin_ia32_punpckhqdq128_mask", IX86_BUILTIN_PUNPCKHQDQ128_MASK, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI_V2DI_QI },
+ { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx2_interleave_highv4di_mask, "__builtin_ia32_punpckhqdq256_mask", IX86_BUILTIN_PUNPCKHQDQ256_MASK, UNKNOWN, (int) V4DI_FTYPE_V4DI_V4DI_V4DI_QI },
+ { OPTION_MASK_ISA_AVX512VL, CODE_FOR_vec_interleave_lowv4si_mask, "__builtin_ia32_punpckldq128_mask", IX86_BUILTIN_PUNPCKLDQ128_MASK, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI_V4SI_QI },
+ { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx2_interleave_lowv8si_mask, "__builtin_ia32_punpckldq256_mask", IX86_BUILTIN_PUNPCKLDQ256_MASK, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI_V8SI_QI },
+ { OPTION_MASK_ISA_AVX512VL, CODE_FOR_vec_interleave_lowv2di_mask, "__builtin_ia32_punpcklqdq128_mask", IX86_BUILTIN_PUNPCKLQDQ128_MASK, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI_V2DI_QI },
+ { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx2_interleave_lowv4di_mask, "__builtin_ia32_punpcklqdq256_mask", IX86_BUILTIN_PUNPCKLQDQ256_MASK, UNKNOWN, (int) V4DI_FTYPE_V4DI_V4DI_V4DI_QI },
+ { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_vec_interleave_highv16qi_mask, "__builtin_ia32_punpckhbw128_mask", IX86_BUILTIN_PUNPCKHBW128_MASK, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI_V16QI_HI },
+ { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx2_interleave_highv32qi_mask, "__builtin_ia32_punpckhbw256_mask", IX86_BUILTIN_PUNPCKHBW256_MASK, UNKNOWN, (int) V32QI_FTYPE_V32QI_V32QI_V32QI_SI },
+ { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_vec_interleave_highv8hi_mask, "__builtin_ia32_punpckhwd128_mask", IX86_BUILTIN_PUNPCKHWD128_MASK, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI_V8HI_QI },
+ { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx2_interleave_highv16hi_mask, "__builtin_ia32_punpckhwd256_mask", IX86_BUILTIN_PUNPCKHWD256_MASK, UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI_V16HI_HI },
+ { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_vec_interleave_lowv16qi_mask, "__builtin_ia32_punpcklbw128_mask", IX86_BUILTIN_PUNPCKLBW128_MASK, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI_V16QI_HI },
+ { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx2_interleave_lowv32qi_mask, "__builtin_ia32_punpcklbw256_mask", IX86_BUILTIN_PUNPCKLBW256_MASK, UNKNOWN, (int) V32QI_FTYPE_V32QI_V32QI_V32QI_SI },
+ { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_vec_interleave_lowv8hi_mask, "__builtin_ia32_punpcklwd128_mask", IX86_BUILTIN_PUNPCKLWD128_MASK, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI_V8HI_QI },
+ { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx2_interleave_lowv16hi_mask, "__builtin_ia32_punpcklwd256_mask", IX86_BUILTIN_PUNPCKLWD256_MASK, UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI_V16HI_HI },
+ { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_ashlvv16hi_mask, "__builtin_ia32_psllv16hi_mask", IX86_BUILTIN_PSLLVV16HI, UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI_V16HI_HI },
+ { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_ashlvv8hi_mask, "__builtin_ia32_psllv8hi_mask", IX86_BUILTIN_PSLLVV8HI, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI_V8HI_QI },
+ { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx2_packssdw_mask, "__builtin_ia32_packssdw256_mask", IX86_BUILTIN_PACKSSDW256_MASK, UNKNOWN, (int) V16HI_FTYPE_V8SI_V8SI_V16HI_HI },
+ { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_sse2_packssdw_mask, "__builtin_ia32_packssdw128_mask", IX86_BUILTIN_PACKSSDW128_MASK, UNKNOWN, (int) V8HI_FTYPE_V4SI_V4SI_V8HI_QI },
+ { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx2_packusdw_mask, "__builtin_ia32_packusdw256_mask", IX86_BUILTIN_PACKUSDW256_MASK, UNKNOWN, (int) V16HI_FTYPE_V8SI_V8SI_V16HI_HI },
+ { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_sse4_1_packusdw_mask, "__builtin_ia32_packusdw128_mask", IX86_BUILTIN_PACKUSDW128_MASK, UNKNOWN, (int) V8HI_FTYPE_V4SI_V4SI_V8HI_QI },
+ { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx2_uavgv32qi3_mask, "__builtin_ia32_pavgb256_mask", IX86_BUILTIN_PAVGB256_MASK, UNKNOWN, (int) V32QI_FTYPE_V32QI_V32QI_V32QI_SI },
+ { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx2_uavgv16hi3_mask, "__builtin_ia32_pavgw256_mask", IX86_BUILTIN_PAVGW256_MASK, UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI_V16HI_HI },
+ { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_sse2_uavgv16qi3_mask, "__builtin_ia32_pavgb128_mask", IX86_BUILTIN_PAVGB128_MASK, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI_V16QI_HI },
+ { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_sse2_uavgv8hi3_mask, "__builtin_ia32_pavgw128_mask", IX86_BUILTIN_PAVGW128_MASK, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI_V8HI_QI },
+ { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx2_permvarv8sf_mask, "__builtin_ia32_permvarsf256_mask", IX86_BUILTIN_VPERMVARSF256_MASK, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SI_V8SF_QI },
+ { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx2_permvarv4df_mask, "__builtin_ia32_permvardf256_mask", IX86_BUILTIN_VPERMVARDF256_MASK, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DI_V4DF_QI },
+ { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_permv4df_mask, "__builtin_ia32_permdf256_mask", IX86_BUILTIN_VPERMDF256_MASK, UNKNOWN, (int) V4DF_FTYPE_V4DF_INT_V4DF_QI },
+ { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_absv32qi2_mask, "__builtin_ia32_pabsb256_mask", IX86_BUILTIN_PABSB256_MASK, UNKNOWN, (int) V32QI_FTYPE_V32QI_V32QI_SI },
+ { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_absv16qi2_mask, "__builtin_ia32_pabsb128_mask", IX86_BUILTIN_PABSB128_MASK, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI_HI },
+ { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_absv16hi2_mask, "__builtin_ia32_pabsw256_mask", IX86_BUILTIN_PABSW256_MASK, UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI_HI },
+ { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_absv8hi2_mask, "__builtin_ia32_pabsw128_mask", IX86_BUILTIN_PABSW128_MASK, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI_QI },
+ { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx_vpermilvarv2df3_mask, "__builtin_ia32_vpermilvarpd_mask", IX86_BUILTIN_VPERMILVARPD_MASK, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DI_V2DF_QI },
+ { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx_vpermilvarv4sf3_mask, "__builtin_ia32_vpermilvarps_mask", IX86_BUILTIN_VPERMILVARPS_MASK, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SI_V4SF_QI },
+ { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx_vpermilvarv4df3_mask, "__builtin_ia32_vpermilvarpd256_mask", IX86_BUILTIN_VPERMILVARPD256_MASK, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DI_V4DF_QI },
+ { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx_vpermilvarv8sf3_mask, "__builtin_ia32_vpermilvarps256_mask", IX86_BUILTIN_VPERMILVARPS256_MASK, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SI_V8SF_QI },
+ { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx_vpermilv2df_mask, "__builtin_ia32_vpermilpd_mask", IX86_BUILTIN_VPERMILPD_MASK, UNKNOWN, (int) V2DF_FTYPE_V2DF_INT_V2DF_QI },
+ { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx_vpermilv4sf_mask, "__builtin_ia32_vpermilps_mask", IX86_BUILTIN_VPERMILPS_MASK, UNKNOWN, (int) V4SF_FTYPE_V4SF_INT_V4SF_QI },
+ { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx_vpermilv4df_mask, "__builtin_ia32_vpermilpd256_mask", IX86_BUILTIN_VPERMILPD256_MASK, UNKNOWN, (int) V4DF_FTYPE_V4DF_INT_V4DF_QI },
+ { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx_vpermilv8sf_mask, "__builtin_ia32_vpermilps256_mask", IX86_BUILTIN_VPERMILPS256_MASK, UNKNOWN, (int) V8SF_FTYPE_V8SF_INT_V8SF_QI },
+ { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_blendmv4di, "__builtin_ia32_blendmq_256_mask", IX86_BUILTIN_BLENDMQ256, UNKNOWN, (int) V4DI_FTYPE_V4DI_V4DI_QI },
+ { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_blendmv8si, "__builtin_ia32_blendmd_256_mask", IX86_BUILTIN_BLENDMD256, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI_QI },
+ { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_blendmv4df, "__builtin_ia32_blendmpd_256_mask", IX86_BUILTIN_BLENDMPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF_QI },
+ { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_blendmv8sf, "__builtin_ia32_blendmps_256_mask", IX86_BUILTIN_BLENDMPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF_QI },
+ { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_blendmv2di, "__builtin_ia32_blendmq_128_mask", IX86_BUILTIN_BLENDMQ128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI_QI },
+ { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_blendmv4si, "__builtin_ia32_blendmd_128_mask", IX86_BUILTIN_BLENDMD128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI_QI },
+ { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_blendmv2df, "__builtin_ia32_blendmpd_128_mask", IX86_BUILTIN_BLENDMPD128, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_QI },
+ { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_blendmv4sf, "__builtin_ia32_blendmps_128_mask", IX86_BUILTIN_BLENDMPS128, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_QI },
+ { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_blendmv16hi, "__builtin_ia32_blendmw_256_mask", IX86_BUILTIN_BLENDMW256, UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI_HI },
+ { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_blendmv32qi, "__builtin_ia32_blendmb_256_mask", IX86_BUILTIN_BLENDMB256, UNKNOWN, (int) V32QI_FTYPE_V32QI_V32QI_SI },
+ { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_blendmv8hi, "__builtin_ia32_blendmw_128_mask", IX86_BUILTIN_BLENDMW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI_QI },
+ { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_blendmv16qi, "__builtin_ia32_blendmb_128_mask", IX86_BUILTIN_BLENDMB128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI_HI },
+ { OPTION_MASK_ISA_AVX512VL, CODE_FOR_mulv8si3_mask, "__builtin_ia32_pmulld256_mask", IX86_BUILTIN_PMULLD256_MASK, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI_V8SI_QI },
+ { OPTION_MASK_ISA_AVX512VL, CODE_FOR_mulv4si3_mask, "__builtin_ia32_pmulld128_mask", IX86_BUILTIN_PMULLD128_MASK, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI_V4SI_QI },
+ { OPTION_MASK_ISA_AVX512VL, CODE_FOR_vec_widen_umult_even_v8si_mask, "__builtin_ia32_pmuludq256_mask", IX86_BUILTIN_PMULUDQ256_MASK, UNKNOWN, (int) V4DI_FTYPE_V8SI_V8SI_V4DI_QI },
+ { OPTION_MASK_ISA_AVX512VL, CODE_FOR_vec_widen_smult_even_v8si_mask, "__builtin_ia32_pmuldq256_mask", IX86_BUILTIN_PMULDQ256_MASK, UNKNOWN, (int) V4DI_FTYPE_V8SI_V8SI_V4DI_QI },
+ { OPTION_MASK_ISA_AVX512VL, CODE_FOR_sse4_1_mulv2siv2di3_mask, "__builtin_ia32_pmuldq128_mask", IX86_BUILTIN_PMULDQ128_MASK, UNKNOWN, (int) V2DI_FTYPE_V4SI_V4SI_V2DI_QI },
+ { OPTION_MASK_ISA_AVX512VL, CODE_FOR_vec_widen_umult_even_v4si_mask, "__builtin_ia32_pmuludq128_mask", IX86_BUILTIN_PMULUDQ128_MASK, UNKNOWN, (int) V2DI_FTYPE_V4SI_V4SI_V2DI_QI },
+ { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx_cvtpd2ps256_mask, "__builtin_ia32_cvtpd2ps256_mask", IX86_BUILTIN_CVTPD2PS256_MASK, UNKNOWN, (int) V4SF_FTYPE_V4DF_V4SF_QI },
+ { OPTION_MASK_ISA_AVX512VL, CODE_FOR_sse2_cvtpd2ps_mask, "__builtin_ia32_cvtpd2ps_mask", IX86_BUILTIN_CVTPD2PS_MASK, UNKNOWN, (int) V4SF_FTYPE_V2DF_V4SF_QI },
+ { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx2_permvarv8si_mask, "__builtin_ia32_permvarsi256_mask", IX86_BUILTIN_VPERMVARSI256_MASK, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI_V8SI_QI },
+ { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx2_permvarv4di_mask, "__builtin_ia32_permvardi256_mask", IX86_BUILTIN_VPERMVARDI256_MASK, UNKNOWN, (int) V4DI_FTYPE_V4DI_V4DI_V4DI_QI },
+ { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_permv4di_mask, "__builtin_ia32_permdi256_mask", IX86_BUILTIN_VPERMDI256_MASK, UNKNOWN, (int) V4DI_FTYPE_V4DI_INT_V4DI_QI },
+ { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_cmpv4di3_mask, "__builtin_ia32_cmpq256_mask", IX86_BUILTIN_CMPQ256, UNKNOWN, (int) QI_FTYPE_V4DI_V4DI_INT_QI },
+ { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_cmpv8si3_mask, "__builtin_ia32_cmpd256_mask", IX86_BUILTIN_CMPD256, UNKNOWN, (int) QI_FTYPE_V8SI_V8SI_INT_QI },
+ { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_ucmpv4di3_mask, "__builtin_ia32_ucmpq256_mask", IX86_BUILTIN_UCMPQ256, UNKNOWN, (int) QI_FTYPE_V4DI_V4DI_INT_QI },
+ { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_ucmpv8si3_mask, "__builtin_ia32_ucmpd256_mask", IX86_BUILTIN_UCMPD256, UNKNOWN, (int) QI_FTYPE_V8SI_V8SI_INT_QI },
+ { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_cmpv32qi3_mask, "__builtin_ia32_cmpb256_mask", IX86_BUILTIN_CMPB256, UNKNOWN, (int) SI_FTYPE_V32QI_V32QI_INT_SI },
+ { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_cmpv16hi3_mask, "__builtin_ia32_cmpw256_mask", IX86_BUILTIN_CMPW256, UNKNOWN, (int) HI_FTYPE_V16HI_V16HI_INT_HI },
+ { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_ucmpv32qi3_mask, "__builtin_ia32_ucmpb256_mask", IX86_BUILTIN_UCMPB256, UNKNOWN, (int) SI_FTYPE_V32QI_V32QI_INT_SI },
+ { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_ucmpv16hi3_mask, "__builtin_ia32_ucmpw256_mask", IX86_BUILTIN_UCMPW256, UNKNOWN, (int) HI_FTYPE_V16HI_V16HI_INT_HI },
+ { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_cmpv4df3_mask, "__builtin_ia32_cmppd256_mask", IX86_BUILTIN_CMPPD256_MASK, UNKNOWN, (int) QI_FTYPE_V4DF_V4DF_INT_QI },
+ { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_cmpv8sf3_mask, "__builtin_ia32_cmpps256_mask", IX86_BUILTIN_CMPPS256_MASK, UNKNOWN, (int) QI_FTYPE_V8SF_V8SF_INT_QI },
+ { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_cmpv2di3_mask, "__builtin_ia32_cmpq128_mask", IX86_BUILTIN_CMPQ128, UNKNOWN, (int) QI_FTYPE_V2DI_V2DI_INT_QI },
+ { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_cmpv4si3_mask, "__builtin_ia32_cmpd128_mask", IX86_BUILTIN_CMPD128, UNKNOWN, (int) QI_FTYPE_V4SI_V4SI_INT_QI },
+ { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_ucmpv2di3_mask, "__builtin_ia32_ucmpq128_mask", IX86_BUILTIN_UCMPQ128, UNKNOWN, (int) QI_FTYPE_V2DI_V2DI_INT_QI },
+ { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_ucmpv4si3_mask, "__builtin_ia32_ucmpd128_mask", IX86_BUILTIN_UCMPD128, UNKNOWN, (int) QI_FTYPE_V4SI_V4SI_INT_QI },
+ { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_cmpv16qi3_mask, "__builtin_ia32_cmpb128_mask", IX86_BUILTIN_CMPB128, UNKNOWN, (int) HI_FTYPE_V16QI_V16QI_INT_HI },
+ { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_cmpv8hi3_mask, "__builtin_ia32_cmpw128_mask", IX86_BUILTIN_CMPW128, UNKNOWN, (int) QI_FTYPE_V8HI_V8HI_INT_QI },
+ { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_ucmpv16qi3_mask, "__builtin_ia32_ucmpb128_mask", IX86_BUILTIN_UCMPB128, UNKNOWN, (int) HI_FTYPE_V16QI_V16QI_INT_HI },
+ { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_ucmpv8hi3_mask, "__builtin_ia32_ucmpw128_mask", IX86_BUILTIN_UCMPW128, UNKNOWN, (int) QI_FTYPE_V8HI_V8HI_INT_QI },
+ { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_cmpv2df3_mask, "__builtin_ia32_cmppd128_mask", IX86_BUILTIN_CMPPD128_MASK, UNKNOWN, (int) QI_FTYPE_V2DF_V2DF_INT_QI },
+ { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_cmpv4sf3_mask, "__builtin_ia32_cmpps128_mask", IX86_BUILTIN_CMPPS128_MASK, UNKNOWN, (int) QI_FTYPE_V4SF_V4SF_INT_QI },
+
+ /* AVX512DQ. */
+ { OPTION_MASK_ISA_AVX512DQ, CODE_FOR_avx512dq_broadcastv16sf_mask, "__builtin_ia32_broadcastf32x2_512_mask", IX86_BUILTIN_BROADCASTF32x2_512, UNKNOWN, (int) V16SF_FTYPE_V4SF_V16SF_HI },
+ { OPTION_MASK_ISA_AVX512DQ, CODE_FOR_avx512dq_broadcastv16si_mask, "__builtin_ia32_broadcasti32x2_512_mask", IX86_BUILTIN_BROADCASTI32x2_512, UNKNOWN, (int) V16SI_FTYPE_V4SI_V16SI_HI },
+ { OPTION_MASK_ISA_AVX512DQ, CODE_FOR_avx512dq_broadcastv8df_mask_1, "__builtin_ia32_broadcastf64x2_512_mask", IX86_BUILTIN_BROADCASTF64X2_512, UNKNOWN, (int) V8DF_FTYPE_V2DF_V8DF_QI },
+ { OPTION_MASK_ISA_AVX512DQ, CODE_FOR_avx512dq_broadcastv8di_mask_1, "__builtin_ia32_broadcasti64x2_512_mask", IX86_BUILTIN_BROADCASTI64X2_512, UNKNOWN, (int) V8DI_FTYPE_V2DI_V8DI_QI },
+ { OPTION_MASK_ISA_AVX512DQ, CODE_FOR_avx512dq_broadcastv16sf_mask_1, "__builtin_ia32_broadcastf32x8_512_mask", IX86_BUILTIN_BROADCASTF32X8_512, UNKNOWN, (int) V16SF_FTYPE_V8SF_V16SF_HI },
+ { OPTION_MASK_ISA_AVX512DQ, CODE_FOR_avx512dq_broadcastv16si_mask_1, "__builtin_ia32_broadcasti32x8_512_mask", IX86_BUILTIN_BROADCASTI32X8_512, UNKNOWN, (int) V16SI_FTYPE_V8SI_V16SI_HI },
+ { OPTION_MASK_ISA_AVX512DQ, CODE_FOR_avx512dq_vextractf64x2_mask, "__builtin_ia32_extractf64x2_512_mask", IX86_BUILTIN_EXTRACTF64X2_512, UNKNOWN, (int) V2DF_FTYPE_V8DF_INT_V2DF_QI },
+ { OPTION_MASK_ISA_AVX512DQ, CODE_FOR_avx512dq_vextractf32x8_mask, "__builtin_ia32_extractf32x8_mask", IX86_BUILTIN_EXTRACTF32X8, UNKNOWN, (int) V8SF_FTYPE_V16SF_INT_V8SF_QI },
+ { OPTION_MASK_ISA_AVX512DQ, CODE_FOR_avx512dq_vextracti64x2_mask, "__builtin_ia32_extracti64x2_512_mask", IX86_BUILTIN_EXTRACTI64X2_512, UNKNOWN, (int) V2DI_FTYPE_V8DI_INT_V2DI_QI },
+ { OPTION_MASK_ISA_AVX512DQ, CODE_FOR_avx512dq_vextracti32x8_mask, "__builtin_ia32_extracti32x8_mask", IX86_BUILTIN_EXTRACTI32X8, UNKNOWN, (int) V8SI_FTYPE_V16SI_INT_V8SI_QI },
+ { OPTION_MASK_ISA_AVX512DQ, CODE_FOR_reducepv8df_mask, "__builtin_ia32_reducepd512_mask", IX86_BUILTIN_REDUCEPD512_MASK, UNKNOWN, (int) V8DF_FTYPE_V8DF_INT_V8DF_QI },
+ { OPTION_MASK_ISA_AVX512DQ, CODE_FOR_reducepv16sf_mask, "__builtin_ia32_reduceps512_mask", IX86_BUILTIN_REDUCEPS512_MASK, UNKNOWN, (int) V16SF_FTYPE_V16SF_INT_V16SF_HI },
+ { OPTION_MASK_ISA_AVX512DQ, CODE_FOR_avx512dq_mulv8di3_mask, "__builtin_ia32_pmullq512_mask", IX86_BUILTIN_PMULLQ512, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_V8DI_QI },
+ { OPTION_MASK_ISA_AVX512DQ, CODE_FOR_xorv8df3_mask, "__builtin_ia32_xorpd512_mask", IX86_BUILTIN_XORPD512, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_V8DF_QI },
+ { OPTION_MASK_ISA_AVX512DQ, CODE_FOR_xorv16sf3_mask, "__builtin_ia32_xorps512_mask", IX86_BUILTIN_XORPS512, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_V16SF_HI },
+ { OPTION_MASK_ISA_AVX512DQ, CODE_FOR_iorv8df3_mask, "__builtin_ia32_orpd512_mask", IX86_BUILTIN_ORPD512, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_V8DF_QI },
+ { OPTION_MASK_ISA_AVX512DQ, CODE_FOR_iorv16sf3_mask, "__builtin_ia32_orps512_mask", IX86_BUILTIN_ORPS512, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_V16SF_HI },
+ { OPTION_MASK_ISA_AVX512DQ, CODE_FOR_andv8df3_mask, "__builtin_ia32_andpd512_mask", IX86_BUILTIN_ANDPD512, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_V8DF_QI },
+ { OPTION_MASK_ISA_AVX512DQ, CODE_FOR_andv16sf3_mask, "__builtin_ia32_andps512_mask", IX86_BUILTIN_ANDPS512, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_V16SF_HI },
+ { OPTION_MASK_ISA_AVX512DQ, CODE_FOR_avx512f_andnotv8df3_mask, "__builtin_ia32_andnpd512_mask", IX86_BUILTIN_ANDNPD512, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_V8DF_QI},
+ { OPTION_MASK_ISA_AVX512DQ, CODE_FOR_avx512f_andnotv16sf3_mask, "__builtin_ia32_andnps512_mask", IX86_BUILTIN_ANDNPS512, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_V16SF_HI },
+ { OPTION_MASK_ISA_AVX512DQ, CODE_FOR_avx512dq_vinsertf32x8_mask, "__builtin_ia32_insertf32x8_mask", IX86_BUILTIN_INSERTF32X8, UNKNOWN, (int) V16SF_FTYPE_V16SF_V8SF_INT_V16SF_HI },
+ { OPTION_MASK_ISA_AVX512DQ, CODE_FOR_avx512dq_vinserti32x8_mask, "__builtin_ia32_inserti32x8_mask", IX86_BUILTIN_INSERTI32X8, UNKNOWN, (int) V16SI_FTYPE_V16SI_V8SI_INT_V16SI_HI },
+ { OPTION_MASK_ISA_AVX512DQ, CODE_FOR_avx512dq_vinsertf64x2_mask, "__builtin_ia32_insertf64x2_512_mask", IX86_BUILTIN_INSERTF64X2_512, UNKNOWN, (int) V8DF_FTYPE_V8DF_V2DF_INT_V8DF_QI },
+ { OPTION_MASK_ISA_AVX512DQ, CODE_FOR_avx512dq_vinserti64x2_mask, "__builtin_ia32_inserti64x2_512_mask", IX86_BUILTIN_INSERTI64X2_512, UNKNOWN, (int) V8DI_FTYPE_V8DI_V2DI_INT_V8DI_QI },
+ { OPTION_MASK_ISA_AVX512DQ, CODE_FOR_avx512dq_fpclassv8df_mask, "__builtin_ia32_fpclasspd512_mask", IX86_BUILTIN_FPCLASSPD512, UNKNOWN, (int) QI_FTYPE_V8DF_INT_QI },
+ { OPTION_MASK_ISA_AVX512DQ, CODE_FOR_avx512dq_fpclassv16sf_mask, "__builtin_ia32_fpclassps512_mask", IX86_BUILTIN_FPCLASSPS512, UNKNOWN, (int) HI_FTYPE_V16SF_INT_HI },
+ { OPTION_MASK_ISA_AVX512DQ, CODE_FOR_avx512f_cvtd2maskv16si, "__builtin_ia32_cvtd2mask512", IX86_BUILTIN_CVTD2MASK512, UNKNOWN, (int) HI_FTYPE_V16SI },
+ { OPTION_MASK_ISA_AVX512DQ, CODE_FOR_avx512f_cvtq2maskv8di, "__builtin_ia32_cvtq2mask512", IX86_BUILTIN_CVTQ2MASK512, UNKNOWN, (int) QI_FTYPE_V8DI },
+ { OPTION_MASK_ISA_AVX512DQ, CODE_FOR_avx512f_cvtmask2dv16si, "__builtin_ia32_cvtmask2d512", IX86_BUILTIN_CVTMASK2D512, UNKNOWN, (int) V16SI_FTYPE_HI },
+ { OPTION_MASK_ISA_AVX512DQ, CODE_FOR_avx512f_cvtmask2qv8di, "__builtin_ia32_cvtmask2q512", IX86_BUILTIN_CVTMASK2Q512, UNKNOWN, (int) V8DI_FTYPE_QI },
+
+ /* AVX512BW. */
+ { OPTION_MASK_ISA_AVX512BW, CODE_FOR_kunpcksi, "__builtin_ia32_kunpcksi", IX86_BUILTIN_KUNPCKWD, UNKNOWN, (int) SI_FTYPE_SI_SI },
+ { OPTION_MASK_ISA_AVX512BW, CODE_FOR_kunpckdi, "__builtin_ia32_kunpckdi", IX86_BUILTIN_KUNPCKDQ, UNKNOWN, (int) DI_FTYPE_DI_DI },
+ { OPTION_MASK_ISA_AVX512BW, CODE_FOR_avx512bw_packusdw_mask, "__builtin_ia32_packusdw512_mask", IX86_BUILTIN_PACKUSDW512, UNKNOWN, (int) V32HI_FTYPE_V16SI_V16SI_V32HI_SI },
+ { OPTION_MASK_ISA_AVX512BW, CODE_FOR_avx512bw_ashlv4ti3, "__builtin_ia32_pslldq512", IX86_BUILTIN_PSLLDQ512, UNKNOWN, (int) V8DI_FTYPE_V8DI_INT_CONVERT },
+ { OPTION_MASK_ISA_AVX512BW, CODE_FOR_avx512bw_lshrv4ti3, "__builtin_ia32_psrldq512", IX86_BUILTIN_PSRLDQ512, UNKNOWN, (int) V8DI_FTYPE_V8DI_INT_CONVERT },
+ { OPTION_MASK_ISA_AVX512BW, CODE_FOR_avx512bw_packssdw_mask, "__builtin_ia32_packssdw512_mask", IX86_BUILTIN_PACKSSDW512, UNKNOWN, (int) V32HI_FTYPE_V16SI_V16SI_V32HI_SI },
+ { OPTION_MASK_ISA_AVX512BW, CODE_FOR_avx512bw_palignrv4ti, "__builtin_ia32_palignr512", IX86_BUILTIN_PALIGNR512, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_INT_CONVERT },
+ { OPTION_MASK_ISA_AVX512BW, CODE_FOR_avx512bw_palignrv64qi_mask, "__builtin_ia32_palignr512_mask", IX86_BUILTIN_PALIGNR512_MASK, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_INT_V8DI_DI_CONVERT },
+ { OPTION_MASK_ISA_AVX512BW, CODE_FOR_avx512bw_loaddquv32hi_mask, "__builtin_ia32_movdquhi512_mask", IX86_BUILTIN_MOVDQUHI512_MASK, UNKNOWN, (int) V32HI_FTYPE_V32HI_V32HI_SI },
+ { OPTION_MASK_ISA_AVX512BW, CODE_FOR_avx512f_loaddquv64qi_mask, "__builtin_ia32_movdquqi512_mask", IX86_BUILTIN_MOVDQUQI512_MASK, UNKNOWN, (int) V64QI_FTYPE_V64QI_V64QI_DI },
+ { OPTION_MASK_ISA_AVX512BW, CODE_FOR_avx512f_psadbw, "__builtin_ia32_psadbw512", IX86_BUILTIN_PSADBW512, UNKNOWN, (int) V8DI_FTYPE_V64QI_V64QI },
+ { OPTION_MASK_ISA_AVX512BW, CODE_FOR_avx512bw_dbpsadbwv32hi_mask, "__builtin_ia32_dbpsadbw512_mask", IX86_BUILTIN_DBPSADBW512, UNKNOWN, (int) V32HI_FTYPE_V64QI_V64QI_INT_V32HI_SI },
+ { OPTION_MASK_ISA_AVX512BW, CODE_FOR_avx512bw_vec_dupv64qi_mask, "__builtin_ia32_pbroadcastb512_mask", IX86_BUILTIN_PBROADCASTB512, UNKNOWN, (int) V64QI_FTYPE_V16QI_V64QI_DI },
+ { OPTION_MASK_ISA_AVX512BW, CODE_FOR_avx512bw_vec_dup_gprv64qi_mask, "__builtin_ia32_pbroadcastb512_gpr_mask", IX86_BUILTIN_PBROADCASTB512_GPR, UNKNOWN, (int) V64QI_FTYPE_QI_V64QI_DI },
+ { OPTION_MASK_ISA_AVX512BW, CODE_FOR_avx512bw_vec_dupv32hi_mask, "__builtin_ia32_pbroadcastw512_mask", IX86_BUILTIN_PBROADCASTW512, UNKNOWN, (int) V32HI_FTYPE_V8HI_V32HI_SI },
+ { OPTION_MASK_ISA_AVX512BW, CODE_FOR_avx512bw_vec_dup_gprv32hi_mask, "__builtin_ia32_pbroadcastw512_gpr_mask", IX86_BUILTIN_PBROADCASTW512_GPR, UNKNOWN, (int) V32HI_FTYPE_HI_V32HI_SI },
+ { OPTION_MASK_ISA_AVX512BW, CODE_FOR_avx512bw_sign_extendv32qiv32hi2_mask, "__builtin_ia32_pmovsxbw512_mask", IX86_BUILTIN_PMOVSXBW512_MASK, UNKNOWN, (int) V32HI_FTYPE_V32QI_V32HI_SI },
+ { OPTION_MASK_ISA_AVX512BW, CODE_FOR_avx512bw_zero_extendv32qiv32hi2_mask, "__builtin_ia32_pmovzxbw512_mask", IX86_BUILTIN_PMOVZXBW512_MASK, UNKNOWN, (int) V32HI_FTYPE_V32QI_V32HI_SI },
+ { OPTION_MASK_ISA_AVX512BW, CODE_FOR_avx512bw_permvarv32hi_mask, "__builtin_ia32_permvarhi512_mask", IX86_BUILTIN_VPERMVARHI512_MASK, UNKNOWN, (int) V32HI_FTYPE_V32HI_V32HI_V32HI_SI },
+ { OPTION_MASK_ISA_AVX512BW, CODE_FOR_avx512bw_vpermt2varv32hi3_mask, "__builtin_ia32_vpermt2varhi512_mask", IX86_BUILTIN_VPERMT2VARHI512, UNKNOWN, (int) V32HI_FTYPE_V32HI_V32HI_V32HI_SI },
+ { OPTION_MASK_ISA_AVX512BW, CODE_FOR_avx512bw_vpermt2varv32hi3_maskz, "__builtin_ia32_vpermt2varhi512_maskz", IX86_BUILTIN_VPERMT2VARHI512_MASKZ, UNKNOWN, (int) V32HI_FTYPE_V32HI_V32HI_V32HI_SI },
+ { OPTION_MASK_ISA_AVX512BW, CODE_FOR_avx512bw_vpermi2varv32hi3_mask, "__builtin_ia32_vpermi2varhi512_mask", IX86_BUILTIN_VPERMI2VARHI512, UNKNOWN, (int) V32HI_FTYPE_V32HI_V32HI_V32HI_SI },
+ { OPTION_MASK_ISA_AVX512BW, CODE_FOR_avx512bw_uavgv64qi3_mask, "__builtin_ia32_pavgb512_mask", IX86_BUILTIN_PAVGB512, UNKNOWN, (int) V64QI_FTYPE_V64QI_V64QI_V64QI_DI },
+ { OPTION_MASK_ISA_AVX512BW, CODE_FOR_avx512bw_uavgv32hi3_mask, "__builtin_ia32_pavgw512_mask", IX86_BUILTIN_PAVGW512, UNKNOWN, (int) V32HI_FTYPE_V32HI_V32HI_V32HI_SI },
+ { OPTION_MASK_ISA_AVX512BW, CODE_FOR_addv64qi3_mask, "__builtin_ia32_paddb512_mask", IX86_BUILTIN_PADDB512, UNKNOWN, (int) V64QI_FTYPE_V64QI_V64QI_V64QI_DI },
+ { OPTION_MASK_ISA_AVX512BW, CODE_FOR_subv64qi3_mask, "__builtin_ia32_psubb512_mask", IX86_BUILTIN_PSUBB512, UNKNOWN, (int) V64QI_FTYPE_V64QI_V64QI_V64QI_DI },
+ { OPTION_MASK_ISA_AVX512BW, CODE_FOR_avx512bw_sssubv64qi3_mask, "__builtin_ia32_psubsb512_mask", IX86_BUILTIN_PSUBSB512, UNKNOWN, (int) V64QI_FTYPE_V64QI_V64QI_V64QI_DI },
+ { OPTION_MASK_ISA_AVX512BW, CODE_FOR_avx512bw_ssaddv64qi3_mask, "__builtin_ia32_paddsb512_mask", IX86_BUILTIN_PADDSB512, UNKNOWN, (int) V64QI_FTYPE_V64QI_V64QI_V64QI_DI },
+ { OPTION_MASK_ISA_AVX512BW, CODE_FOR_avx512bw_ussubv64qi3_mask, "__builtin_ia32_psubusb512_mask", IX86_BUILTIN_PSUBUSB512, UNKNOWN, (int) V64QI_FTYPE_V64QI_V64QI_V64QI_DI },
+ { OPTION_MASK_ISA_AVX512BW, CODE_FOR_avx512bw_usaddv64qi3_mask, "__builtin_ia32_paddusb512_mask", IX86_BUILTIN_PADDUSB512, UNKNOWN, (int) V64QI_FTYPE_V64QI_V64QI_V64QI_DI },
+ { OPTION_MASK_ISA_AVX512BW, CODE_FOR_subv32hi3_mask, "__builtin_ia32_psubw512_mask", IX86_BUILTIN_PSUBW512, UNKNOWN, (int) V32HI_FTYPE_V32HI_V32HI_V32HI_SI },
+ { OPTION_MASK_ISA_AVX512BW, CODE_FOR_addv32hi3_mask, "__builtin_ia32_paddw512_mask", IX86_BUILTIN_PADDW512, UNKNOWN, (int) V32HI_FTYPE_V32HI_V32HI_V32HI_SI },
+ { OPTION_MASK_ISA_AVX512BW, CODE_FOR_avx512bw_sssubv32hi3_mask, "__builtin_ia32_psubsw512_mask", IX86_BUILTIN_PSUBSW512, UNKNOWN, (int) V32HI_FTYPE_V32HI_V32HI_V32HI_SI },
+ { OPTION_MASK_ISA_AVX512BW, CODE_FOR_avx512bw_ssaddv32hi3_mask, "__builtin_ia32_paddsw512_mask", IX86_BUILTIN_PADDSW512, UNKNOWN, (int) V32HI_FTYPE_V32HI_V32HI_V32HI_SI },
+ { OPTION_MASK_ISA_AVX512BW, CODE_FOR_avx512bw_ussubv32hi3_mask, "__builtin_ia32_psubusw512_mask", IX86_BUILTIN_PSUBUSW512, UNKNOWN, (int) V32HI_FTYPE_V32HI_V32HI_V32HI_SI },
+ { OPTION_MASK_ISA_AVX512BW, CODE_FOR_avx512bw_usaddv32hi3_mask, "__builtin_ia32_paddusw512_mask", IX86_BUILTIN_PADDUSW512, UNKNOWN, (int) V32HI_FTYPE_V32HI_V32HI_V32HI_SI },
+ { OPTION_MASK_ISA_AVX512BW, CODE_FOR_umaxv32hi3_mask, "__builtin_ia32_pmaxuw512_mask", IX86_BUILTIN_PMAXUW512, UNKNOWN, (int) V32HI_FTYPE_V32HI_V32HI_V32HI_SI },
+ { OPTION_MASK_ISA_AVX512BW, CODE_FOR_smaxv32hi3_mask, "__builtin_ia32_pmaxsw512_mask", IX86_BUILTIN_PMAXSW512, UNKNOWN, (int) V32HI_FTYPE_V32HI_V32HI_V32HI_SI },
+ { OPTION_MASK_ISA_AVX512BW, CODE_FOR_uminv32hi3_mask, "__builtin_ia32_pminuw512_mask", IX86_BUILTIN_PMINUW512, UNKNOWN, (int) V32HI_FTYPE_V32HI_V32HI_V32HI_SI },
+ { OPTION_MASK_ISA_AVX512BW, CODE_FOR_sminv32hi3_mask, "__builtin_ia32_pminsw512_mask", IX86_BUILTIN_PMINSW512, UNKNOWN, (int) V32HI_FTYPE_V32HI_V32HI_V32HI_SI },
+ { OPTION_MASK_ISA_AVX512BW, CODE_FOR_umaxv64qi3_mask, "__builtin_ia32_pmaxub512_mask", IX86_BUILTIN_PMAXUB512, UNKNOWN, (int) V64QI_FTYPE_V64QI_V64QI_V64QI_DI },
+ { OPTION_MASK_ISA_AVX512BW, CODE_FOR_smaxv64qi3_mask, "__builtin_ia32_pmaxsb512_mask", IX86_BUILTIN_PMAXSB512, UNKNOWN, (int) V64QI_FTYPE_V64QI_V64QI_V64QI_DI },
+ { OPTION_MASK_ISA_AVX512BW, CODE_FOR_uminv64qi3_mask, "__builtin_ia32_pminub512_mask", IX86_BUILTIN_PMINUB512, UNKNOWN, (int) V64QI_FTYPE_V64QI_V64QI_V64QI_DI },
+ { OPTION_MASK_ISA_AVX512BW, CODE_FOR_sminv64qi3_mask, "__builtin_ia32_pminsb512_mask", IX86_BUILTIN_PMINSB512, UNKNOWN, (int) V64QI_FTYPE_V64QI_V64QI_V64QI_DI },
+ { OPTION_MASK_ISA_AVX512BW, CODE_FOR_avx512vl_truncatev32hiv32qi2_mask, "__builtin_ia32_pmovwb512_mask", IX86_BUILTIN_PMOVWB512, UNKNOWN, (int) V32QI_FTYPE_V32HI_V32QI_SI },
+ { OPTION_MASK_ISA_AVX512BW, CODE_FOR_avx512vl_ss_truncatev32hiv32qi2_mask, "__builtin_ia32_pmovswb512_mask", IX86_BUILTIN_PMOVSWB512, UNKNOWN, (int) V32QI_FTYPE_V32HI_V32QI_SI },
+ { OPTION_MASK_ISA_AVX512BW, CODE_FOR_avx512vl_us_truncatev32hiv32qi2_mask, "__builtin_ia32_pmovuswb512_mask", IX86_BUILTIN_PMOVUSWB512, UNKNOWN, (int) V32QI_FTYPE_V32HI_V32QI_SI },
+ { OPTION_MASK_ISA_AVX512BW, CODE_FOR_avx512bw_umulhrswv32hi3_mask, "__builtin_ia32_pmulhrsw512_mask", IX86_BUILTIN_PMULHRSW512_MASK, UNKNOWN, (int) V32HI_FTYPE_V32HI_V32HI_V32HI_SI },
+ { OPTION_MASK_ISA_AVX512BW, CODE_FOR_umulv32hi3_highpart_mask, "__builtin_ia32_pmulhuw512_mask" , IX86_BUILTIN_PMULHUW512_MASK, UNKNOWN, (int) V32HI_FTYPE_V32HI_V32HI_V32HI_SI },
+ { OPTION_MASK_ISA_AVX512BW, CODE_FOR_smulv32hi3_highpart_mask, "__builtin_ia32_pmulhw512_mask" , IX86_BUILTIN_PMULHW512_MASK, UNKNOWN, (int) V32HI_FTYPE_V32HI_V32HI_V32HI_SI },
+ { OPTION_MASK_ISA_AVX512BW, CODE_FOR_mulv32hi3_mask, "__builtin_ia32_pmullw512_mask", IX86_BUILTIN_PMULLW512_MASK, UNKNOWN, (int) V32HI_FTYPE_V32HI_V32HI_V32HI_SI },
+ { OPTION_MASK_ISA_AVX512BW, CODE_FOR_ashlv32hi3_mask, "__builtin_ia32_psllwi512_mask", IX86_BUILTIN_PSLLWI512_MASK, UNKNOWN, (int) V32HI_FTYPE_V32HI_INT_V32HI_SI },
+ { OPTION_MASK_ISA_AVX512BW, CODE_FOR_ashlv32hi3_mask, "__builtin_ia32_psllw512_mask", IX86_BUILTIN_PSLLW512_MASK, UNKNOWN, (int) V32HI_FTYPE_V32HI_V8HI_V32HI_SI },
+ { OPTION_MASK_ISA_AVX512BW, CODE_FOR_avx512bw_packsswb_mask, "__builtin_ia32_packsswb512_mask", IX86_BUILTIN_PACKSSWB512, UNKNOWN, (int) V64QI_FTYPE_V32HI_V32HI_V64QI_DI },
+ { OPTION_MASK_ISA_AVX512BW, CODE_FOR_avx512bw_packuswb_mask, "__builtin_ia32_packuswb512_mask", IX86_BUILTIN_PACKUSWB512, UNKNOWN, (int) V64QI_FTYPE_V32HI_V32HI_V64QI_DI },
+ { OPTION_MASK_ISA_AVX512BW, CODE_FOR_avx512bw_ashrvv32hi_mask, "__builtin_ia32_psrav32hi_mask", IX86_BUILTIN_PSRAVV32HI, UNKNOWN, (int) V32HI_FTYPE_V32HI_V32HI_V32HI_SI },
+ { OPTION_MASK_ISA_AVX512BW, CODE_FOR_avx512bw_pmaddubsw512v32hi_mask, "__builtin_ia32_pmaddubsw512_mask", IX86_BUILTIN_PMADDUBSW512_MASK, UNKNOWN, (int) V32HI_FTYPE_V64QI_V64QI_V32HI_SI },
+ { OPTION_MASK_ISA_AVX512BW, CODE_FOR_avx512bw_pmaddwd512v32hi_mask, "__builtin_ia32_pmaddwd512_mask", IX86_BUILTIN_PMADDWD512_MASK, UNKNOWN, (int) V16SI_FTYPE_V32HI_V32HI_V16SI_HI },
+ { OPTION_MASK_ISA_AVX512BW, CODE_FOR_avx512bw_lshrvv32hi_mask, "__builtin_ia32_psrlv32hi_mask", IX86_BUILTIN_PSRLVV32HI, UNKNOWN, (int) V32HI_FTYPE_V32HI_V32HI_V32HI_SI },
+ { OPTION_MASK_ISA_AVX512BW, CODE_FOR_avx512bw_interleave_highv64qi_mask, "__builtin_ia32_punpckhbw512_mask", IX86_BUILTIN_PUNPCKHBW512, UNKNOWN, (int) V64QI_FTYPE_V64QI_V64QI_V64QI_DI },
+ { OPTION_MASK_ISA_AVX512BW, CODE_FOR_avx512bw_interleave_highv32hi_mask, "__builtin_ia32_punpckhwd512_mask", IX86_BUILTIN_PUNPCKHWD512, UNKNOWN, (int) V32HI_FTYPE_V32HI_V32HI_V32HI_SI },
+ { OPTION_MASK_ISA_AVX512BW, CODE_FOR_avx512bw_interleave_lowv64qi_mask, "__builtin_ia32_punpcklbw512_mask", IX86_BUILTIN_PUNPCKLBW512, UNKNOWN, (int) V64QI_FTYPE_V64QI_V64QI_V64QI_DI },
+ { OPTION_MASK_ISA_AVX512BW, CODE_FOR_avx512bw_interleave_lowv32hi_mask, "__builtin_ia32_punpcklwd512_mask", IX86_BUILTIN_PUNPCKLWD512, UNKNOWN, (int) V32HI_FTYPE_V32HI_V32HI_V32HI_SI },
+ { OPTION_MASK_ISA_AVX512BW, CODE_FOR_avx512bw_pshufbv64qi3_mask, "__builtin_ia32_pshufb512_mask", IX86_BUILTIN_PSHUFB512, UNKNOWN, (int) V64QI_FTYPE_V64QI_V64QI_V64QI_DI },
+ { OPTION_MASK_ISA_AVX512BW, CODE_FOR_avx512bw_pshufhwv32hi_mask, "__builtin_ia32_pshufhw512_mask", IX86_BUILTIN_PSHUFHW512, UNKNOWN, (int) V32HI_FTYPE_V32HI_INT_V32HI_SI },
+ { OPTION_MASK_ISA_AVX512BW, CODE_FOR_avx512bw_pshuflwv32hi_mask, "__builtin_ia32_pshuflw512_mask", IX86_BUILTIN_PSHUFLW512, UNKNOWN, (int) V32HI_FTYPE_V32HI_INT_V32HI_SI },
+ { OPTION_MASK_ISA_AVX512BW, CODE_FOR_ashrv32hi3_mask, "__builtin_ia32_psrawi512_mask", IX86_BUILTIN_PSRAWI512, UNKNOWN, (int) V32HI_FTYPE_V32HI_INT_V32HI_SI },
+ { OPTION_MASK_ISA_AVX512BW, CODE_FOR_ashrv32hi3_mask, "__builtin_ia32_psraw512_mask", IX86_BUILTIN_PSRAW512, UNKNOWN, (int) V32HI_FTYPE_V32HI_V8HI_V32HI_SI },
+ { OPTION_MASK_ISA_AVX512BW, CODE_FOR_lshrv32hi3_mask, "__builtin_ia32_psrlwi512_mask", IX86_BUILTIN_PSRLWI512, UNKNOWN, (int) V32HI_FTYPE_V32HI_INT_V32HI_SI },
+ { OPTION_MASK_ISA_AVX512BW, CODE_FOR_lshrv32hi3_mask, "__builtin_ia32_psrlw512_mask", IX86_BUILTIN_PSRLW512, UNKNOWN, (int) V32HI_FTYPE_V32HI_V8HI_V32HI_SI },
+ { OPTION_MASK_ISA_AVX512BW, CODE_FOR_avx512bw_cvtb2maskv64qi, "__builtin_ia32_cvtb2mask512", IX86_BUILTIN_CVTB2MASK512, UNKNOWN, (int) DI_FTYPE_V64QI },
+ { OPTION_MASK_ISA_AVX512BW, CODE_FOR_avx512bw_cvtw2maskv32hi, "__builtin_ia32_cvtw2mask512", IX86_BUILTIN_CVTW2MASK512, UNKNOWN, (int) SI_FTYPE_V32HI },
+ { OPTION_MASK_ISA_AVX512BW, CODE_FOR_avx512bw_cvtmask2bv64qi, "__builtin_ia32_cvtmask2b512", IX86_BUILTIN_CVTMASK2B512, UNKNOWN, (int) V64QI_FTYPE_DI },
+ { OPTION_MASK_ISA_AVX512BW, CODE_FOR_avx512bw_cvtmask2wv32hi, "__builtin_ia32_cvtmask2w512", IX86_BUILTIN_CVTMASK2W512, UNKNOWN, (int) V32HI_FTYPE_SI },
+ { OPTION_MASK_ISA_AVX512BW, CODE_FOR_avx512bw_eqv64qi3_mask, "__builtin_ia32_pcmpeqb512_mask", IX86_BUILTIN_PCMPEQB512_MASK, UNKNOWN, (int) DI_FTYPE_V64QI_V64QI_DI },
+ { OPTION_MASK_ISA_AVX512BW, CODE_FOR_avx512bw_eqv32hi3_mask, "__builtin_ia32_pcmpeqw512_mask", IX86_BUILTIN_PCMPEQW512_MASK, UNKNOWN, (int) SI_FTYPE_V32HI_V32HI_SI },
+ { OPTION_MASK_ISA_AVX512BW, CODE_FOR_avx512bw_gtv64qi3_mask, "__builtin_ia32_pcmpgtb512_mask", IX86_BUILTIN_PCMPGTB512_MASK, UNKNOWN, (int) DI_FTYPE_V64QI_V64QI_DI },
+ { OPTION_MASK_ISA_AVX512BW, CODE_FOR_avx512bw_gtv32hi3_mask, "__builtin_ia32_pcmpgtw512_mask", IX86_BUILTIN_PCMPGTW512_MASK, UNKNOWN, (int) SI_FTYPE_V32HI_V32HI_SI },
+ { OPTION_MASK_ISA_AVX512BW, CODE_FOR_avx512bw_testmv64qi3_mask, "__builtin_ia32_ptestmb512", IX86_BUILTIN_PTESTMB512, UNKNOWN, (int) DI_FTYPE_V64QI_V64QI_DI },
+ { OPTION_MASK_ISA_AVX512BW, CODE_FOR_avx512bw_testmv32hi3_mask, "__builtin_ia32_ptestmw512", IX86_BUILTIN_PTESTMW512, UNKNOWN, (int) SI_FTYPE_V32HI_V32HI_SI },
+ { OPTION_MASK_ISA_AVX512BW, CODE_FOR_avx512bw_testnmv64qi3_mask, "__builtin_ia32_ptestnmb512", IX86_BUILTIN_PTESTNMB512, UNKNOWN, (int) DI_FTYPE_V64QI_V64QI_DI },
+ { OPTION_MASK_ISA_AVX512BW, CODE_FOR_avx512bw_testnmv32hi3_mask, "__builtin_ia32_ptestnmw512", IX86_BUILTIN_PTESTNMW512, UNKNOWN, (int) SI_FTYPE_V32HI_V32HI_SI },
+ { OPTION_MASK_ISA_AVX512BW, CODE_FOR_avx512bw_ashlvv32hi_mask, "__builtin_ia32_psllv32hi_mask", IX86_BUILTIN_PSLLVV32HI, UNKNOWN, (int) V32HI_FTYPE_V32HI_V32HI_V32HI_SI },
+ { OPTION_MASK_ISA_AVX512BW, CODE_FOR_absv64qi2_mask, "__builtin_ia32_pabsb512_mask", IX86_BUILTIN_PABSB512, UNKNOWN, (int) V64QI_FTYPE_V64QI_V64QI_DI },
+ { OPTION_MASK_ISA_AVX512BW, CODE_FOR_absv32hi2_mask, "__builtin_ia32_pabsw512_mask", IX86_BUILTIN_PABSW512, UNKNOWN, (int) V32HI_FTYPE_V32HI_V32HI_SI },
+ { OPTION_MASK_ISA_AVX512BW, CODE_FOR_avx512bw_blendmv32hi, "__builtin_ia32_blendmw_512_mask", IX86_BUILTIN_BLENDMW512, UNKNOWN, (int) V32HI_FTYPE_V32HI_V32HI_SI },
+ { OPTION_MASK_ISA_AVX512BW, CODE_FOR_avx512bw_blendmv64qi, "__builtin_ia32_blendmb_512_mask", IX86_BUILTIN_BLENDMB512, UNKNOWN, (int) V64QI_FTYPE_V64QI_V64QI_DI },
+ { OPTION_MASK_ISA_AVX512BW, CODE_FOR_avx512bw_cmpv64qi3_mask, "__builtin_ia32_cmpb512_mask", IX86_BUILTIN_CMPB512, UNKNOWN, (int) DI_FTYPE_V64QI_V64QI_INT_DI },
+ { OPTION_MASK_ISA_AVX512BW, CODE_FOR_avx512bw_cmpv32hi3_mask, "__builtin_ia32_cmpw512_mask", IX86_BUILTIN_CMPW512, UNKNOWN, (int) SI_FTYPE_V32HI_V32HI_INT_SI },
+ { OPTION_MASK_ISA_AVX512BW, CODE_FOR_avx512bw_ucmpv64qi3_mask, "__builtin_ia32_ucmpb512_mask", IX86_BUILTIN_UCMPB512, UNKNOWN, (int) DI_FTYPE_V64QI_V64QI_INT_DI },
+ { OPTION_MASK_ISA_AVX512BW, CODE_FOR_avx512bw_ucmpv32hi3_mask, "__builtin_ia32_ucmpw512_mask", IX86_BUILTIN_UCMPW512, UNKNOWN, (int) SI_FTYPE_V32HI_V32HI_INT_SI },
};
/* Builtins with rounding support. */
@@ -30245,7 +32339,7 @@ static const struct builtin_description bdesc_round_args[] =
{ OPTION_MASK_ISA_AVX512F, CODE_FOR_floatv16siv16sf2_mask_round, "__builtin_ia32_cvtdq2ps512_mask", IX86_BUILTIN_CVTDQ2PS512, UNKNOWN, (int) V16SF_FTYPE_V16SI_V16SF_HI_INT },
{ OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_cvtpd2dq512_mask_round, "__builtin_ia32_cvtpd2dq512_mask", IX86_BUILTIN_CVTPD2DQ512, UNKNOWN, (int) V8SI_FTYPE_V8DF_V8SI_QI_INT },
{ OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_cvtpd2ps512_mask_round, "__builtin_ia32_cvtpd2ps512_mask", IX86_BUILTIN_CVTPD2PS512, UNKNOWN, (int) V8SF_FTYPE_V8DF_V8SF_QI_INT },
- { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_ufix_notruncv8dfv8si_mask_round, "__builtin_ia32_cvtpd2udq512_mask", IX86_BUILTIN_CVTPD2UDQ512, UNKNOWN, (int) V8SI_FTYPE_V8DF_V8SI_QI_INT },
+ { OPTION_MASK_ISA_AVX512F, CODE_FOR_ufix_notruncv8dfv8si2_mask_round, "__builtin_ia32_cvtpd2udq512_mask", IX86_BUILTIN_CVTPD2UDQ512, UNKNOWN, (int) V8SI_FTYPE_V8DF_V8SI_QI_INT },
{ OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vcvtph2ps512_mask_round, "__builtin_ia32_vcvtph2ps512_mask", IX86_BUILTIN_CVTPH2PS512, UNKNOWN, (int) V16SF_FTYPE_V16HI_V16SF_HI_INT },
{ OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_fix_notruncv16sfv16si_mask_round, "__builtin_ia32_cvtps2dq512_mask", IX86_BUILTIN_CVTPS2DQ512, UNKNOWN, (int) V16SI_FTYPE_V16SF_V16SI_HI_INT },
{ OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_cvtps2pd512_mask_round, "__builtin_ia32_cvtps2pd512_mask", IX86_BUILTIN_CVTPS2PD512, UNKNOWN, (int) V8DF_FTYPE_V8SF_V8DF_QI_INT },
@@ -30281,8 +32375,8 @@ static const struct builtin_description bdesc_round_args[] =
{ OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_sgetexpv4sf_round, "__builtin_ia32_getexpss128_round", IX86_BUILTIN_GETEXPSS128, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_INT },
{ OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_getmantv8df_mask_round, "__builtin_ia32_getmantpd512_mask", IX86_BUILTIN_GETMANTPD512, UNKNOWN, (int) V8DF_FTYPE_V8DF_INT_V8DF_QI_INT },
{ OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_getmantv16sf_mask_round, "__builtin_ia32_getmantps512_mask", IX86_BUILTIN_GETMANTPS512, UNKNOWN, (int) V16SF_FTYPE_V16SF_INT_V16SF_HI_INT },
- { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_getmantv2df_round, "__builtin_ia32_getmantsd_round", IX86_BUILTIN_GETMANTSD128, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_INT_INT },
- { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_getmantv4sf_round, "__builtin_ia32_getmantss_round", IX86_BUILTIN_GETMANTSS128, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_INT_INT },
+ { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vgetmantv2df_round, "__builtin_ia32_getmantsd_round", IX86_BUILTIN_GETMANTSD128, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_INT_INT },
+ { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vgetmantv4sf_round, "__builtin_ia32_getmantss_round", IX86_BUILTIN_GETMANTSS128, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_INT_INT },
{ OPTION_MASK_ISA_AVX512F, CODE_FOR_smaxv8df3_mask_round, "__builtin_ia32_maxpd512_mask", IX86_BUILTIN_MAXPD512, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_V8DF_QI_INT },
{ OPTION_MASK_ISA_AVX512F, CODE_FOR_smaxv16sf3_mask_round, "__builtin_ia32_maxps512_mask", IX86_BUILTIN_MAXPS512, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_V16SF_HI_INT },
{ OPTION_MASK_ISA_AVX512F, CODE_FOR_sse2_vmsmaxv2df3_round, "__builtin_ia32_maxsd_round", IX86_BUILTIN_MAXSD_ROUND, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_INT },
@@ -30363,6 +32457,24 @@ static const struct builtin_description bdesc_round_args[] =
{ OPTION_MASK_ISA_AVX512ER, CODE_FOR_avx512er_rsqrt28v16sf_mask_round, "__builtin_ia32_rsqrt28ps_mask", IX86_BUILTIN_RSQRT28PS, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_HI_INT },
{ OPTION_MASK_ISA_AVX512ER, CODE_FOR_avx512er_vmrsqrt28v2df_round, "__builtin_ia32_rsqrt28sd_round", IX86_BUILTIN_RSQRT28SD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_INT },
{ OPTION_MASK_ISA_AVX512ER, CODE_FOR_avx512er_vmrsqrt28v4sf_round, "__builtin_ia32_rsqrt28ss_round", IX86_BUILTIN_RSQRT28SS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_INT },
+
+ /* AVX512DQ. */
+ { OPTION_MASK_ISA_AVX512DQ, CODE_FOR_avx512dq_rangesv2df_round, "__builtin_ia32_rangesd128_round", IX86_BUILTIN_RANGESD128, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_INT_INT },
+ { OPTION_MASK_ISA_AVX512DQ, CODE_FOR_avx512dq_rangesv4sf_round, "__builtin_ia32_rangess128_round", IX86_BUILTIN_RANGESS128, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_INT_INT },
+ { OPTION_MASK_ISA_AVX512DQ, CODE_FOR_fix_notruncv8dfv8di2_mask_round, "__builtin_ia32_cvtpd2qq512_mask", IX86_BUILTIN_CVTPD2QQ512, UNKNOWN, (int) V8DI_FTYPE_V8DF_V8DI_QI_INT },
+ { OPTION_MASK_ISA_AVX512DQ, CODE_FOR_avx512dq_cvtps2qqv8di_mask_round, "__builtin_ia32_cvtps2qq512_mask", IX86_BUILTIN_CVTPS2QQ512, UNKNOWN, (int) V8DI_FTYPE_V8SF_V8DI_QI_INT },
+ { OPTION_MASK_ISA_AVX512DQ, CODE_FOR_ufix_notruncv8dfv8di2_mask_round, "__builtin_ia32_cvtpd2uqq512_mask", IX86_BUILTIN_CVTPD2UQQ512, UNKNOWN, (int) V8DI_FTYPE_V8DF_V8DI_QI_INT },
+ { OPTION_MASK_ISA_AVX512DQ, CODE_FOR_avx512dq_cvtps2uqqv8di_mask_round, "__builtin_ia32_cvtps2uqq512_mask", IX86_BUILTIN_CVTPS2UQQ512, UNKNOWN, (int) V8DI_FTYPE_V8SF_V8DI_QI_INT },
+ { OPTION_MASK_ISA_AVX512DQ, CODE_FOR_floatv8div8sf2_mask_round, "__builtin_ia32_cvtqq2ps512_mask", IX86_BUILTIN_CVTQQ2PS512, UNKNOWN, (int) V8SF_FTYPE_V8DI_V8SF_QI_INT },
+ { OPTION_MASK_ISA_AVX512DQ, CODE_FOR_ufloatv8div8sf2_mask_round, "__builtin_ia32_cvtuqq2ps512_mask", IX86_BUILTIN_CVTUQQ2PS512, UNKNOWN, (int) V8SF_FTYPE_V8DI_V8SF_QI_INT },
+ { OPTION_MASK_ISA_AVX512DQ, CODE_FOR_floatv8div8df2_mask_round, "__builtin_ia32_cvtqq2pd512_mask", IX86_BUILTIN_CVTQQ2PD512, UNKNOWN, (int) V8DF_FTYPE_V8DI_V8DF_QI_INT },
+ { OPTION_MASK_ISA_AVX512DQ, CODE_FOR_ufloatv8div8df2_mask_round, "__builtin_ia32_cvtuqq2pd512_mask", IX86_BUILTIN_CVTUQQ2PD512, UNKNOWN, (int) V8DF_FTYPE_V8DI_V8DF_QI_INT },
+ { OPTION_MASK_ISA_AVX512DQ, CODE_FOR_fix_truncv8sfv8di2_mask_round, "__builtin_ia32_cvttps2qq512_mask", IX86_BUILTIN_CVTTPS2QQ512, UNKNOWN, (int) V8DI_FTYPE_V8SF_V8DI_QI_INT },
+ { OPTION_MASK_ISA_AVX512DQ, CODE_FOR_ufix_truncv8sfv8di2_mask_round, "__builtin_ia32_cvttps2uqq512_mask", IX86_BUILTIN_CVTTPS2UQQ512, UNKNOWN, (int) V8DI_FTYPE_V8SF_V8DI_QI_INT },
+ { OPTION_MASK_ISA_AVX512DQ, CODE_FOR_fix_truncv8dfv8di2_mask_round, "__builtin_ia32_cvttpd2qq512_mask", IX86_BUILTIN_CVTTPD2QQ512, UNKNOWN, (int) V8DI_FTYPE_V8DF_V8DI_QI_INT },
+ { OPTION_MASK_ISA_AVX512DQ, CODE_FOR_ufix_truncv8dfv8di2_mask_round, "__builtin_ia32_cvttpd2uqq512_mask", IX86_BUILTIN_CVTTPD2UQQ512, UNKNOWN, (int) V8DI_FTYPE_V8DF_V8DI_QI_INT },
+ { OPTION_MASK_ISA_AVX512DQ, CODE_FOR_avx512dq_rangepv16sf_mask_round, "__builtin_ia32_rangeps512_mask", IX86_BUILTIN_RANGEPS512, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_INT_V16SF_HI_INT },
+ { OPTION_MASK_ISA_AVX512DQ, CODE_FOR_avx512dq_rangepv8df_mask_round, "__builtin_ia32_rangepd512_mask", IX86_BUILTIN_RANGEPD512, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_INT_V8DF_QI_INT },
};
/* FMA4 and XOP. */
@@ -31070,6 +33182,151 @@ ix86_init_mmx_sse_builtins (void)
VOID_FTYPE_PLONGLONG_QI_V8DI_V8DI_INT,
IX86_BUILTIN_SCATTERDIV8DI);
+ /* AVX512VL */
+ def_builtin (OPTION_MASK_ISA_AVX512VL, "__builtin_ia32_gather3siv2df",
+ V2DF_FTYPE_V2DF_PCDOUBLE_V4SI_QI_INT,
+ IX86_BUILTIN_GATHER3SIV2DF);
+
+ def_builtin (OPTION_MASK_ISA_AVX512VL, "__builtin_ia32_gather3siv4df",
+ V4DF_FTYPE_V4DF_PCDOUBLE_V4SI_QI_INT,
+ IX86_BUILTIN_GATHER3SIV4DF);
+
+ def_builtin (OPTION_MASK_ISA_AVX512VL, "__builtin_ia32_gather3div2df",
+ V2DF_FTYPE_V2DF_PCDOUBLE_V2DI_QI_INT,
+ IX86_BUILTIN_GATHER3DIV2DF);
+
+ def_builtin (OPTION_MASK_ISA_AVX512VL, "__builtin_ia32_gather3div4df",
+ V4DF_FTYPE_V4DF_PCDOUBLE_V4DI_QI_INT,
+ IX86_BUILTIN_GATHER3DIV4DF);
+
+ def_builtin (OPTION_MASK_ISA_AVX512VL, "__builtin_ia32_gather3siv4sf",
+ V4SF_FTYPE_V4SF_PCFLOAT_V4SI_QI_INT,
+ IX86_BUILTIN_GATHER3SIV4SF);
+
+ def_builtin (OPTION_MASK_ISA_AVX512VL, "__builtin_ia32_gather3siv8sf",
+ V8SF_FTYPE_V8SF_PCFLOAT_V8SI_QI_INT,
+ IX86_BUILTIN_GATHER3SIV8SF);
+
+ def_builtin (OPTION_MASK_ISA_AVX512VL, "__builtin_ia32_gather3div4sf",
+ V4SF_FTYPE_V4SF_PCFLOAT_V2DI_QI_INT,
+ IX86_BUILTIN_GATHER3DIV4SF);
+
+ def_builtin (OPTION_MASK_ISA_AVX512VL, "__builtin_ia32_gather3div8sf",
+ V4SF_FTYPE_V4SF_PCFLOAT_V4DI_QI_INT,
+ IX86_BUILTIN_GATHER3DIV8SF);
+
+ def_builtin (OPTION_MASK_ISA_AVX512VL, "__builtin_ia32_gather3siv2di",
+ V2DI_FTYPE_V2DI_PCINT64_V4SI_QI_INT,
+ IX86_BUILTIN_GATHER3SIV2DI);
+
+ def_builtin (OPTION_MASK_ISA_AVX512VL, "__builtin_ia32_gather3siv4di",
+ V4DI_FTYPE_V4DI_PCINT64_V4SI_QI_INT,
+ IX86_BUILTIN_GATHER3SIV4DI);
+
+ def_builtin (OPTION_MASK_ISA_AVX512VL, "__builtin_ia32_gather3div2di",
+ V2DI_FTYPE_V2DI_PCINT64_V2DI_QI_INT,
+ IX86_BUILTIN_GATHER3DIV2DI);
+
+ def_builtin (OPTION_MASK_ISA_AVX512VL, "__builtin_ia32_gather3div4di",
+ V4DI_FTYPE_V4DI_PCINT64_V4DI_QI_INT,
+ IX86_BUILTIN_GATHER3DIV4DI);
+
+ def_builtin (OPTION_MASK_ISA_AVX512VL, "__builtin_ia32_gather3siv4si",
+ V4SI_FTYPE_V4SI_PCINT_V4SI_QI_INT,
+ IX86_BUILTIN_GATHER3SIV4SI);
+
+ def_builtin (OPTION_MASK_ISA_AVX512VL, "__builtin_ia32_gather3siv8si",
+ V8SI_FTYPE_V8SI_PCINT_V8SI_QI_INT,
+ IX86_BUILTIN_GATHER3SIV8SI);
+
+ def_builtin (OPTION_MASK_ISA_AVX512VL, "__builtin_ia32_gather3div4si",
+ V4SI_FTYPE_V4SI_PCINT_V2DI_QI_INT,
+ IX86_BUILTIN_GATHER3DIV4SI);
+
+ def_builtin (OPTION_MASK_ISA_AVX512VL, "__builtin_ia32_gather3div8si",
+ V4SI_FTYPE_V4SI_PCINT_V4DI_QI_INT,
+ IX86_BUILTIN_GATHER3DIV8SI);
+
+ def_builtin (OPTION_MASK_ISA_AVX512VL, "__builtin_ia32_gather3altsiv4df ",
+ V4DF_FTYPE_V4DF_PCDOUBLE_V8SI_QI_INT,
+ IX86_BUILTIN_GATHER3ALTSIV4DF);
+
+ def_builtin (OPTION_MASK_ISA_AVX512VL, "__builtin_ia32_gather3altdiv8sf ",
+ V8SF_FTYPE_V8SF_PCFLOAT_V4DI_QI_INT,
+ IX86_BUILTIN_GATHER3ALTDIV8SF);
+
+ def_builtin (OPTION_MASK_ISA_AVX512VL, "__builtin_ia32_gather3altsiv4di ",
+ V4DI_FTYPE_V4DI_PCINT64_V8SI_QI_INT,
+ IX86_BUILTIN_GATHER3ALTSIV4DI);
+
+ def_builtin (OPTION_MASK_ISA_AVX512VL, "__builtin_ia32_gather3altdiv8si ",
+ V8SI_FTYPE_V8SI_PCINT_V4DI_QI_INT,
+ IX86_BUILTIN_GATHER3ALTDIV8SI);
+
+ def_builtin (OPTION_MASK_ISA_AVX512VL, "__builtin_ia32_scattersiv8sf",
+ VOID_FTYPE_PFLOAT_QI_V8SI_V8SF_INT,
+ IX86_BUILTIN_SCATTERSIV8SF);
+
+ def_builtin (OPTION_MASK_ISA_AVX512VL, "__builtin_ia32_scattersiv4sf",
+ VOID_FTYPE_PFLOAT_QI_V4SI_V4SF_INT,
+ IX86_BUILTIN_SCATTERSIV4SF);
+
+ def_builtin (OPTION_MASK_ISA_AVX512VL, "__builtin_ia32_scattersiv4df",
+ VOID_FTYPE_PDOUBLE_QI_V4SI_V4DF_INT,
+ IX86_BUILTIN_SCATTERSIV4DF);
+
+ def_builtin (OPTION_MASK_ISA_AVX512VL, "__builtin_ia32_scattersiv2df",
+ VOID_FTYPE_PDOUBLE_QI_V4SI_V2DF_INT,
+ IX86_BUILTIN_SCATTERSIV2DF);
+
+ def_builtin (OPTION_MASK_ISA_AVX512VL, "__builtin_ia32_scatterdiv8sf",
+ VOID_FTYPE_PFLOAT_QI_V4DI_V4SF_INT,
+ IX86_BUILTIN_SCATTERDIV8SF);
+
+ def_builtin (OPTION_MASK_ISA_AVX512VL, "__builtin_ia32_scatterdiv4sf",
+ VOID_FTYPE_PFLOAT_QI_V2DI_V4SF_INT,
+ IX86_BUILTIN_SCATTERDIV4SF);
+
+ def_builtin (OPTION_MASK_ISA_AVX512VL, "__builtin_ia32_scatterdiv4df",
+ VOID_FTYPE_PDOUBLE_QI_V4DI_V4DF_INT,
+ IX86_BUILTIN_SCATTERDIV4DF);
+
+ def_builtin (OPTION_MASK_ISA_AVX512VL, "__builtin_ia32_scatterdiv2df",
+ VOID_FTYPE_PDOUBLE_QI_V2DI_V2DF_INT,
+ IX86_BUILTIN_SCATTERDIV2DF);
+
+ def_builtin (OPTION_MASK_ISA_AVX512VL, "__builtin_ia32_scattersiv8si",
+ VOID_FTYPE_PINT_QI_V8SI_V8SI_INT,
+ IX86_BUILTIN_SCATTERSIV8SI);
+
+ def_builtin (OPTION_MASK_ISA_AVX512VL, "__builtin_ia32_scattersiv4si",
+ VOID_FTYPE_PINT_QI_V4SI_V4SI_INT,
+ IX86_BUILTIN_SCATTERSIV4SI);
+
+ def_builtin (OPTION_MASK_ISA_AVX512VL, "__builtin_ia32_scattersiv4di",
+ VOID_FTYPE_PLONGLONG_QI_V4SI_V4DI_INT,
+ IX86_BUILTIN_SCATTERSIV4DI);
+
+ def_builtin (OPTION_MASK_ISA_AVX512VL, "__builtin_ia32_scattersiv2di",
+ VOID_FTYPE_PLONGLONG_QI_V4SI_V2DI_INT,
+ IX86_BUILTIN_SCATTERSIV2DI);
+
+ def_builtin (OPTION_MASK_ISA_AVX512VL, "__builtin_ia32_scatterdiv8si",
+ VOID_FTYPE_PINT_QI_V4DI_V4SI_INT,
+ IX86_BUILTIN_SCATTERDIV8SI);
+
+ def_builtin (OPTION_MASK_ISA_AVX512VL, "__builtin_ia32_scatterdiv4si",
+ VOID_FTYPE_PINT_QI_V2DI_V4SI_INT,
+ IX86_BUILTIN_SCATTERDIV4SI);
+
+ def_builtin (OPTION_MASK_ISA_AVX512VL, "__builtin_ia32_scatterdiv4di",
+ VOID_FTYPE_PLONGLONG_QI_V4DI_V4DI_INT,
+ IX86_BUILTIN_SCATTERDIV4DI);
+
+ def_builtin (OPTION_MASK_ISA_AVX512VL, "__builtin_ia32_scatterdiv2di",
+ VOID_FTYPE_PLONGLONG_QI_V2DI_V2DI_INT,
+ IX86_BUILTIN_SCATTERDIV2DI);
+
/* AVX512PF */
def_builtin (OPTION_MASK_ISA_AVX512PF, "__builtin_ia32_gatherpfdpd",
VOID_FTYPE_QI_V8SI_PCINT64_INT_INT,
@@ -33632,6 +35889,28 @@ ix86_expand_args_builtin (const struct builtin_description *d,
case V4DI_FTYPE_V4SI:
case V4DI_FTYPE_V2DI:
case HI_FTYPE_HI:
+ case HI_FTYPE_V16QI:
+ case SI_FTYPE_V32QI:
+ case DI_FTYPE_V64QI:
+ case V16QI_FTYPE_HI:
+ case V32QI_FTYPE_SI:
+ case V64QI_FTYPE_DI:
+ case V8HI_FTYPE_QI:
+ case V16HI_FTYPE_HI:
+ case V32HI_FTYPE_SI:
+ case V4SI_FTYPE_QI:
+ case V8SI_FTYPE_QI:
+ case V4SI_FTYPE_HI:
+ case V8SI_FTYPE_HI:
+ case QI_FTYPE_V8HI:
+ case HI_FTYPE_V16HI:
+ case SI_FTYPE_V32HI:
+ case QI_FTYPE_V4SI:
+ case QI_FTYPE_V8SI:
+ case HI_FTYPE_V16SI:
+ case QI_FTYPE_V2DI:
+ case QI_FTYPE_V4DI:
+ case QI_FTYPE_V8DI:
case UINT_FTYPE_V2DF:
case UINT_FTYPE_V4SF:
case UINT64_FTYPE_V2DF:
@@ -33639,6 +35918,8 @@ ix86_expand_args_builtin (const struct builtin_description *d,
case V16QI_FTYPE_V8DI:
case V16HI_FTYPE_V16SI:
case V16SI_FTYPE_HI:
+ case V2DI_FTYPE_QI:
+ case V4DI_FTYPE_QI:
case V16SI_FTYPE_V16SI:
case V16SI_FTYPE_INT:
case V16SF_FTYPE_FLOAT:
@@ -33647,7 +35928,6 @@ ix86_expand_args_builtin (const struct builtin_description *d,
case V8HI_FTYPE_V8DI:
case V8UHI_FTYPE_V8UHI:
case V8SI_FTYPE_V8DI:
- case V8USI_FTYPE_V8USI:
case V8SF_FTYPE_V8DF:
case V8DI_FTYPE_QI:
case V8DI_FTYPE_INT64:
@@ -33727,6 +36007,7 @@ ix86_expand_args_builtin (const struct builtin_description *d,
case V4DI_FTYPE_V8SI_V8SI:
case V4UDI_FTYPE_V8USI_V8USI:
case QI_FTYPE_V8DI_V8DI:
+ case V8DI_FTYPE_V64QI_V64QI:
case HI_FTYPE_V16SI_V16SI:
if (comparison == UNKNOWN)
return ix86_expand_binop_builtin (icode, exp, target);
@@ -33766,6 +36047,8 @@ ix86_expand_args_builtin (const struct builtin_description *d,
case UINT16_FTYPE_UINT16_INT:
case UINT8_FTYPE_UINT8_INT:
case HI_FTYPE_HI_HI:
+ case SI_FTYPE_SI_SI:
+ case DI_FTYPE_DI_DI:
case V16SI_FTYPE_V8DF_V8DF:
nargs = 2;
break;
@@ -33779,6 +36062,11 @@ ix86_expand_args_builtin (const struct builtin_description *d,
rmode = V2TImode;
nargs_constant = 1;
break;
+ case V8DI_FTYPE_V8DI_INT_CONVERT:
+ nargs = 2;
+ rmode = V4TImode;
+ nargs_constant = 1;
+ break;
case V8HI_FTYPE_V8HI_INT:
case V8HI_FTYPE_V8SF_INT:
case V16HI_FTYPE_V16SF_INT:
@@ -33804,6 +36092,8 @@ ix86_expand_args_builtin (const struct builtin_description *d,
case V2DI_FTYPE_V4DI_INT:
case V4DI_FTYPE_V8DI_INT:
case HI_FTYPE_HI_INT:
+ case QI_FTYPE_V4SF_INT:
+ case QI_FTYPE_V2DF_INT:
nargs = 2;
nargs_constant = 1;
break;
@@ -33828,20 +36118,118 @@ ix86_expand_args_builtin (const struct builtin_description *d,
case V16SI_FTYPE_V16HI_V16SI_HI:
case V16SI_FTYPE_V16QI_V16SI_HI:
case V16SI_FTYPE_V16SF_V16SI_HI:
+ case V8SF_FTYPE_V4SF_V8SF_QI:
+ case V4DF_FTYPE_V2DF_V4DF_QI:
+ case V8SI_FTYPE_V4SI_V8SI_QI:
+ case V8SI_FTYPE_SI_V8SI_QI:
+ case V4SI_FTYPE_V4SI_V4SI_QI:
+ case V4SI_FTYPE_SI_V4SI_QI:
+ case V4DI_FTYPE_V2DI_V4DI_QI:
+ case V4DI_FTYPE_DI_V4DI_QI:
+ case V2DI_FTYPE_V2DI_V2DI_QI:
+ case V2DI_FTYPE_DI_V2DI_QI:
+ case V64QI_FTYPE_V64QI_V64QI_DI:
+ case V64QI_FTYPE_V16QI_V64QI_DI:
+ case V64QI_FTYPE_QI_V64QI_DI:
+ case V32QI_FTYPE_V32QI_V32QI_SI:
+ case V32QI_FTYPE_V16QI_V32QI_SI:
+ case V32QI_FTYPE_QI_V32QI_SI:
+ case V16QI_FTYPE_V16QI_V16QI_HI:
+ case V16QI_FTYPE_QI_V16QI_HI:
+ case V32HI_FTYPE_V8HI_V32HI_SI:
+ case V32HI_FTYPE_HI_V32HI_SI:
+ case V16HI_FTYPE_V8HI_V16HI_HI:
+ case V16HI_FTYPE_HI_V16HI_HI:
+ case V8HI_FTYPE_V8HI_V8HI_QI:
+ case V8HI_FTYPE_HI_V8HI_QI:
+ case V8SF_FTYPE_V8HI_V8SF_QI:
+ case V4SF_FTYPE_V8HI_V4SF_QI:
+ case V8SI_FTYPE_V8SF_V8SI_QI:
+ case V4SI_FTYPE_V4SF_V4SI_QI:
+ case V8DI_FTYPE_V8SF_V8DI_QI:
+ case V4DI_FTYPE_V4SF_V4DI_QI:
+ case V2DI_FTYPE_V4SF_V2DI_QI:
+ case V8SF_FTYPE_V8DI_V8SF_QI:
+ case V4SF_FTYPE_V4DI_V4SF_QI:
+ case V4SF_FTYPE_V2DI_V4SF_QI:
+ case V8DF_FTYPE_V8DI_V8DF_QI:
+ case V4DF_FTYPE_V4DI_V4DF_QI:
+ case V2DF_FTYPE_V2DI_V2DF_QI:
+ case V16QI_FTYPE_V8HI_V16QI_QI:
+ case V16QI_FTYPE_V16HI_V16QI_HI:
+ case V16QI_FTYPE_V4SI_V16QI_QI:
+ case V16QI_FTYPE_V8SI_V16QI_QI:
+ case V8HI_FTYPE_V4SI_V8HI_QI:
+ case V8HI_FTYPE_V8SI_V8HI_QI:
+ case V16QI_FTYPE_V2DI_V16QI_QI:
+ case V16QI_FTYPE_V4DI_V16QI_QI:
+ case V8HI_FTYPE_V2DI_V8HI_QI:
+ case V8HI_FTYPE_V4DI_V8HI_QI:
+ case V4SI_FTYPE_V2DI_V4SI_QI:
+ case V4SI_FTYPE_V4DI_V4SI_QI:
+ case V32QI_FTYPE_V32HI_V32QI_SI:
+ case HI_FTYPE_V16QI_V16QI_HI:
+ case SI_FTYPE_V32QI_V32QI_SI:
+ case DI_FTYPE_V64QI_V64QI_DI:
+ case QI_FTYPE_V8HI_V8HI_QI:
+ case HI_FTYPE_V16HI_V16HI_HI:
+ case SI_FTYPE_V32HI_V32HI_SI:
+ case QI_FTYPE_V4SI_V4SI_QI:
+ case QI_FTYPE_V8SI_V8SI_QI:
+ case QI_FTYPE_V2DI_V2DI_QI:
+ case QI_FTYPE_V4DI_V4DI_QI:
+ case V4SF_FTYPE_V2DF_V4SF_QI:
+ case V4SF_FTYPE_V4DF_V4SF_QI:
+ nargs = 3;
case V16SI_FTYPE_V16SI_V16SI_HI:
case V16SI_FTYPE_V16SI_V16SI_V16SI:
case V16SI_FTYPE_V4SI_V16SI_HI:
case V2DI_FTYPE_V2DI_V2DI_V2DI:
+ case V2DI_FTYPE_V4SI_V2DI_QI:
+ case V2DI_FTYPE_V8HI_V2DI_QI:
+ case V2DI_FTYPE_V16QI_V2DI_QI:
+ case V4DI_FTYPE_V4DI_V4DI_QI:
+ case V4DI_FTYPE_V4SI_V4DI_QI:
+ case V4DI_FTYPE_V8HI_V4DI_QI:
+ case V4DI_FTYPE_V16QI_V4DI_QI:
+ case V8DI_FTYPE_V8DF_V8DI_QI:
+ case V4DI_FTYPE_V4DF_V4DI_QI:
+ case V2DI_FTYPE_V2DF_V2DI_QI:
+ case V4SI_FTYPE_V4DF_V4SI_QI:
+ case V4SI_FTYPE_V2DF_V4SI_QI:
+ case V4SI_FTYPE_V8HI_V4SI_QI:
+ case V4SI_FTYPE_V16QI_V4SI_QI:
+ case V8SI_FTYPE_V8SI_V8SI_V8SI:
case V4DI_FTYPE_V4DI_V4DI_V4DI:
case V8DF_FTYPE_V2DF_V8DF_QI:
case V8DF_FTYPE_V4DF_V8DF_QI:
case V8DF_FTYPE_V8DF_V8DF_QI:
case V8DF_FTYPE_V8DF_V8DF_V8DF:
+ case V8SF_FTYPE_V8SF_V8SF_QI:
+ case V8SF_FTYPE_V8SI_V8SF_QI:
+ case V4DF_FTYPE_V4DF_V4DF_QI:
+ case V4SF_FTYPE_V4SF_V4SF_QI:
+ case V2DF_FTYPE_V2DF_V2DF_QI:
+ case V2DF_FTYPE_V4SF_V2DF_QI:
+ case V2DF_FTYPE_V4SI_V2DF_QI:
+ case V4SF_FTYPE_V4SI_V4SF_QI:
+ case V4DF_FTYPE_V4SF_V4DF_QI:
+ case V4DF_FTYPE_V4SI_V4DF_QI:
+ case V8SI_FTYPE_V8SI_V8SI_QI:
+ case V8SI_FTYPE_V8HI_V8SI_QI:
+ case V8SI_FTYPE_V16QI_V8SI_QI:
case V8DF_FTYPE_V8DF_V8DI_V8DF:
case V8DF_FTYPE_V8DI_V8DF_V8DF:
case V8DF_FTYPE_V8SF_V8DF_QI:
case V8DF_FTYPE_V8SI_V8DF_QI:
case V8DI_FTYPE_DI_V8DI_QI:
+ case V16SF_FTYPE_V8SF_V16SF_HI:
+ case V16SI_FTYPE_V8SI_V16SI_HI:
+ case V16HI_FTYPE_V16HI_V16HI_HI:
+ case V8HI_FTYPE_V16QI_V8HI_QI:
+ case V16HI_FTYPE_V16QI_V16HI_HI:
+ case V32HI_FTYPE_V32HI_V32HI_SI:
+ case V32HI_FTYPE_V32QI_V32HI_SI:
case V8DI_FTYPE_V16QI_V8DI_QI:
case V8DI_FTYPE_V2DI_V8DI_QI:
case V8DI_FTYPE_V4DI_V8DI_QI:
@@ -33903,13 +36291,80 @@ ix86_expand_args_builtin (const struct builtin_description *d,
nargs = 3;
nargs_constant = 2;
break;
+ case V8DI_FTYPE_V8DI_V8DI_INT_CONVERT:
+ nargs = 3;
+ rmode = V8DImode;
+ nargs_constant = 1;
+ break;
+ case V8DI_FTYPE_V8DI_V8DI_INT_V8DI_DI_CONVERT:
+ nargs = 5;
+ rmode = V8DImode;
+ mask_pos = 2;
+ nargs_constant = 1;
+ break;
+ case QI_FTYPE_V8DF_INT_QI:
+ case QI_FTYPE_V4DF_INT_QI:
+ case QI_FTYPE_V2DF_INT_QI:
+ case HI_FTYPE_V16SF_INT_HI:
+ case QI_FTYPE_V8SF_INT_QI:
+ case QI_FTYPE_V4SF_INT_QI:
+ nargs = 3;
+ mask_pos = 1;
+ nargs_constant = 1;
+ break;
+ case V4DI_FTYPE_V4DI_V4DI_INT_V4DI_SI_CONVERT:
+ nargs = 5;
+ rmode = V4DImode;
+ mask_pos = 2;
+ nargs_constant = 1;
+ break;
+ case V2DI_FTYPE_V2DI_V2DI_INT_V2DI_HI_CONVERT:
+ nargs = 5;
+ rmode = V2DImode;
+ mask_pos = 2;
+ nargs_constant = 1;
+ break;
+ case V32QI_FTYPE_V32QI_V32QI_V32QI_SI:
+ case V32HI_FTYPE_V32HI_V32HI_V32HI_SI:
+ case V32HI_FTYPE_V64QI_V64QI_V32HI_SI:
+ case V16SI_FTYPE_V32HI_V32HI_V16SI_HI:
+ case V64QI_FTYPE_V64QI_V64QI_V64QI_DI:
+ case V32HI_FTYPE_V32HI_V8HI_V32HI_SI:
+ case V16HI_FTYPE_V16HI_V8HI_V16HI_HI:
+ case V8SI_FTYPE_V8SI_V4SI_V8SI_QI:
+ case V4DI_FTYPE_V4DI_V2DI_V4DI_QI:
+ case V64QI_FTYPE_V32HI_V32HI_V64QI_DI:
+ case V32QI_FTYPE_V16HI_V16HI_V32QI_SI:
+ case V16QI_FTYPE_V8HI_V8HI_V16QI_HI:
+ case V32HI_FTYPE_V16SI_V16SI_V32HI_SI:
+ case V16HI_FTYPE_V8SI_V8SI_V16HI_HI:
+ case V8HI_FTYPE_V4SI_V4SI_V8HI_QI:
+ case V4DF_FTYPE_V4DF_V4DI_V4DF_QI:
+ case V8SF_FTYPE_V8SF_V8SI_V8SF_QI:
+ case V4SF_FTYPE_V4SF_V4SI_V4SF_QI:
+ case V2DF_FTYPE_V2DF_V2DI_V2DF_QI:
+ case V2DI_FTYPE_V4SI_V4SI_V2DI_QI:
+ case V4DI_FTYPE_V8SI_V8SI_V4DI_QI:
+ case V4DF_FTYPE_V4DI_V4DF_V4DF_QI:
+ case V8SF_FTYPE_V8SI_V8SF_V8SF_QI:
+ case V2DF_FTYPE_V2DI_V2DF_V2DF_QI:
+ case V4SF_FTYPE_V4SI_V4SF_V4SF_QI:
case V16SF_FTYPE_V16SF_V16SF_V16SF_HI:
case V16SF_FTYPE_V16SF_V16SI_V16SF_HI:
case V16SF_FTYPE_V16SI_V16SF_V16SF_HI:
case V16SI_FTYPE_V16SI_V16SI_V16SI_HI:
case V16SI_FTYPE_V16SI_V4SI_V16SI_HI:
+ case V8HI_FTYPE_V8HI_V8HI_V8HI_QI:
+ case V8SI_FTYPE_V8SI_V8SI_V8SI_QI:
+ case V4SI_FTYPE_V4SI_V4SI_V4SI_QI:
+ case V8SF_FTYPE_V8SF_V8SF_V8SF_QI:
+ case V16QI_FTYPE_V16QI_V16QI_V16QI_HI:
+ case V16HI_FTYPE_V16HI_V16HI_V16HI_HI:
+ case V2DI_FTYPE_V2DI_V2DI_V2DI_QI:
case V2DF_FTYPE_V2DF_V2DF_V2DF_QI:
case V2DF_FTYPE_V2DF_V4SF_V2DF_QI:
+ case V4DI_FTYPE_V4DI_V4DI_V4DI_QI:
+ case V4DF_FTYPE_V4DF_V4DF_V4DF_QI:
case V4SF_FTYPE_V4SF_V2DF_V4SF_QI:
case V4SF_FTYPE_V4SF_V4SF_V4SF_QI:
case V8DF_FTYPE_V8DF_V8DF_V8DF_QI:
@@ -33919,6 +36374,10 @@ ix86_expand_args_builtin (const struct builtin_description *d,
case V8DI_FTYPE_V8DI_SI_V8DI_V8DI:
case V8DI_FTYPE_V8DI_V2DI_V8DI_QI:
case V8DI_FTYPE_V8DI_V8DI_V8DI_QI:
+ case V8HI_FTYPE_V16QI_V16QI_V8HI_QI:
+ case V16HI_FTYPE_V32QI_V32QI_V16HI_HI:
+ case V8SI_FTYPE_V16HI_V16HI_V8SI_QI:
+ case V4SI_FTYPE_V8HI_V8HI_V4SI_QI:
nargs = 4;
break;
case V2DF_FTYPE_V2DF_V2DF_V2DI_INT:
@@ -33929,8 +36388,20 @@ ix86_expand_args_builtin (const struct builtin_description *d,
nargs = 4;
nargs_constant = 1;
break;
+ case QI_FTYPE_V4DI_V4DI_INT_QI:
+ case QI_FTYPE_V8SI_V8SI_INT_QI:
+ case QI_FTYPE_V4DF_V4DF_INT_QI:
+ case QI_FTYPE_V8SF_V8SF_INT_QI:
+ case QI_FTYPE_V2DI_V2DI_INT_QI:
+ case QI_FTYPE_V4SI_V4SI_INT_QI:
case QI_FTYPE_V2DF_V2DF_INT_QI:
case QI_FTYPE_V4SF_V4SF_INT_QI:
+ case DI_FTYPE_V64QI_V64QI_INT_DI:
+ case SI_FTYPE_V32QI_V32QI_INT_SI:
+ case HI_FTYPE_V16QI_V16QI_INT_HI:
+ case SI_FTYPE_V32HI_V32HI_INT_SI:
+ case HI_FTYPE_V16HI_V16HI_INT_HI:
+ case QI_FTYPE_V8HI_V8HI_INT_QI:
nargs = 4;
mask_pos = 1;
nargs_constant = 1;
@@ -33951,6 +36422,27 @@ ix86_expand_args_builtin (const struct builtin_description *d,
nargs = 4;
nargs_constant = 1;
break;
+ case V8SF_FTYPE_V8SF_INT_V8SF_QI:
+ case V4SF_FTYPE_V4SF_INT_V4SF_QI:
+ case V2DF_FTYPE_V4DF_INT_V2DF_QI:
+ case V2DI_FTYPE_V4DI_INT_V2DI_QI:
+ case V8SF_FTYPE_V16SF_INT_V8SF_QI:
+ case V8SI_FTYPE_V16SI_INT_V8SI_QI:
+ case V2DF_FTYPE_V8DF_INT_V2DF_QI:
+ case V2DI_FTYPE_V8DI_INT_V2DI_QI:
+ case V4SF_FTYPE_V8SF_INT_V4SF_QI:
+ case V4SI_FTYPE_V8SI_INT_V4SI_QI:
+ case V8HI_FTYPE_V8SF_INT_V8HI_QI:
+ case V8HI_FTYPE_V4SF_INT_V8HI_QI:
+ case V32HI_FTYPE_V32HI_INT_V32HI_SI:
+ case V16HI_FTYPE_V16HI_INT_V16HI_HI:
+ case V8HI_FTYPE_V8HI_INT_V8HI_QI:
+ case V4DI_FTYPE_V4DI_INT_V4DI_QI:
+ case V2DI_FTYPE_V2DI_INT_V2DI_QI:
+ case V8SI_FTYPE_V8SI_INT_V8SI_QI:
+ case V4SI_FTYPE_V4SI_INT_V4SI_QI:
+ case V4DF_FTYPE_V4DF_INT_V4DF_QI:
+ case V2DF_FTYPE_V2DF_INT_V2DF_QI:
case V8DF_FTYPE_V8DF_INT_V8DF_QI:
case V16SF_FTYPE_V16SF_INT_V16SF_HI:
case V16HI_FTYPE_V16SF_INT_V16HI_HI:
@@ -33974,6 +36466,23 @@ ix86_expand_args_builtin (const struct builtin_description *d,
case V2DF_FTYPE_V2DF_V2DF_INT_V2DF_QI:
case V8DF_FTYPE_V8DF_V4DF_INT_V8DF_QI:
case V8DI_FTYPE_V8DI_V4DI_INT_V8DI_QI:
+ case V4DF_FTYPE_V4DF_V4DF_INT_V4DF_QI:
+ case V8SF_FTYPE_V8SF_V8SF_INT_V8SF_QI:
+ case V8DF_FTYPE_V8DF_V2DF_INT_V8DF_QI:
+ case V8DI_FTYPE_V8DI_V2DI_INT_V8DI_QI:
+ case V8SI_FTYPE_V8SI_V8SI_INT_V8SI_QI:
+ case V4DI_FTYPE_V4DI_V4DI_INT_V4DI_QI:
+ case V4SI_FTYPE_V4SI_V4SI_INT_V4SI_QI:
+ case V2DI_FTYPE_V2DI_V2DI_INT_V2DI_QI:
+ case V32HI_FTYPE_V64QI_V64QI_INT_V32HI_SI:
+ case V16HI_FTYPE_V32QI_V32QI_INT_V16HI_HI:
+ case V8HI_FTYPE_V16QI_V16QI_INT_V8HI_QI:
+ case V16SF_FTYPE_V16SF_V8SF_INT_V16SF_HI:
+ case V16SI_FTYPE_V16SI_V8SI_INT_V16SI_HI:
+ case V8SF_FTYPE_V8SF_V4SF_INT_V8SF_QI:
+ case V8SI_FTYPE_V8SI_V4SI_INT_V8SI_QI:
+ case V4DI_FTYPE_V4DI_V2DI_INT_V4DI_QI:
+ case V4DF_FTYPE_V4DF_V2DF_INT_V4DF_QI:
nargs = 5;
mask_pos = 2;
nargs_constant = 1;
@@ -33983,6 +36492,13 @@ ix86_expand_args_builtin (const struct builtin_description *d,
case V16SI_FTYPE_V16SI_V16SI_V16SI_INT_HI:
case V2DF_FTYPE_V2DF_V2DF_V2DI_INT_QI:
case V4SF_FTYPE_V4SF_V4SF_V4SI_INT_QI:
+ case V8SF_FTYPE_V8SF_V8SF_V8SI_INT_QI:
+ case V8SI_FTYPE_V8SI_V8SI_V8SI_INT_QI:
+ case V4DF_FTYPE_V4DF_V4DF_V4DI_INT_QI:
+ case V4DI_FTYPE_V4DI_V4DI_V4DI_INT_QI:
+ case V4SI_FTYPE_V4SI_V4SI_V4SI_INT_QI:
+ case V2DI_FTYPE_V2DI_V2DI_V2DI_INT_QI:
+ nargs = 5;
nargs = 5;
mask_pos = 1;
nargs_constant = 1;
@@ -34040,8 +36556,8 @@ ix86_expand_args_builtin (const struct builtin_description *d,
if (!match)
switch (icode)
{
- case CODE_FOR_avx2_inserti128:
- case CODE_FOR_avx2_extracti128:
+ case CODE_FOR_avx_vinsertf128v4di:
+ case CODE_FOR_avx_vextractf128v4di:
error ("the last argument must be an 1-bit immediate");
return const0_rtx;
@@ -34049,6 +36565,14 @@ ix86_expand_args_builtin (const struct builtin_description *d,
case CODE_FOR_avx512f_cmpv16si3_mask:
case CODE_FOR_avx512f_ucmpv8di3_mask:
case CODE_FOR_avx512f_ucmpv16si3_mask:
+ case CODE_FOR_avx512vl_cmpv4di3_mask:
+ case CODE_FOR_avx512vl_cmpv8si3_mask:
+ case CODE_FOR_avx512vl_ucmpv4di3_mask:
+ case CODE_FOR_avx512vl_ucmpv8si3_mask:
+ case CODE_FOR_avx512vl_cmpv2di3_mask:
+ case CODE_FOR_avx512vl_cmpv4si3_mask:
+ case CODE_FOR_avx512vl_ucmpv2di3_mask:
+ case CODE_FOR_avx512vl_ucmpv4si3_mask:
error ("the last argument must be a 3-bit immediate");
return const0_rtx;
@@ -34068,14 +36592,27 @@ ix86_expand_args_builtin (const struct builtin_description *d,
case CODE_FOR_sse4_1_blendps:
case CODE_FOR_avx_blendpd256:
case CODE_FOR_avx_vpermilv4df:
+ case CODE_FOR_avx_vpermilv4df_mask:
case CODE_FOR_avx512f_getmantv8df_mask:
case CODE_FOR_avx512f_getmantv16sf_mask:
+ case CODE_FOR_avx512vl_getmantv8sf_mask:
+ case CODE_FOR_avx512vl_getmantv4df_mask:
+ case CODE_FOR_avx512vl_getmantv4sf_mask:
+ case CODE_FOR_avx512vl_getmantv2df_mask:
+ case CODE_FOR_avx512dq_rangepv8df_mask_round:
+ case CODE_FOR_avx512dq_rangepv16sf_mask_round:
+ case CODE_FOR_avx512dq_rangepv4df_mask:
+ case CODE_FOR_avx512dq_rangepv8sf_mask:
+ case CODE_FOR_avx512dq_rangepv2df_mask:
+ case CODE_FOR_avx512dq_rangepv4sf_mask:
+ case CODE_FOR_avx_shufpd256_mask:
error ("the last argument must be a 4-bit immediate");
return const0_rtx;
case CODE_FOR_sha1rnds4:
case CODE_FOR_sse4_1_blendpd:
case CODE_FOR_avx_vpermilv2df:
+ case CODE_FOR_avx_vpermilv2df_mask:
case CODE_FOR_xop_vpermil2v2df3:
case CODE_FOR_xop_vpermil2v4sf3:
case CODE_FOR_xop_vpermil2v4df3:
@@ -34084,6 +36621,12 @@ ix86_expand_args_builtin (const struct builtin_description *d,
case CODE_FOR_avx512f_vinserti32x4_mask:
case CODE_FOR_avx512f_vextractf32x4_mask:
case CODE_FOR_avx512f_vextracti32x4_mask:
+ case CODE_FOR_sse2_shufpd:
+ case CODE_FOR_sse2_shufpd_mask:
+ case CODE_FOR_avx512dq_shuf_f64x2_mask:
+ case CODE_FOR_avx512dq_shuf_i64x2_mask:
+ case CODE_FOR_avx512vl_shuf_i32x4_mask:
+ case CODE_FOR_avx512vl_shuf_f32x4_mask:
error ("the last argument must be a 2-bit immediate");
return const0_rtx;
@@ -34097,6 +36640,12 @@ ix86_expand_args_builtin (const struct builtin_description *d,
case CODE_FOR_avx512f_vinserti64x4_mask:
case CODE_FOR_avx512f_vextractf64x4_mask:
case CODE_FOR_avx512f_vextracti64x4_mask:
+ case CODE_FOR_avx512dq_vinsertf32x8_mask:
+ case CODE_FOR_avx512dq_vinserti32x8_mask:
+ case CODE_FOR_avx512vl_vinsertv4df:
+ case CODE_FOR_avx512vl_vinsertv4di:
+ case CODE_FOR_avx512vl_vinsertv8sf:
+ case CODE_FOR_avx512vl_vinsertv8si:
error ("the last argument must be a 1-bit immediate");
return const0_rtx;
@@ -34401,7 +36950,11 @@ ix86_expand_round_builtin (const struct builtin_description *d,
case V8SF_FTYPE_V8DF_V8SF_QI_INT:
case V8DF_FTYPE_V8DF_V8DF_QI_INT:
case V8SI_FTYPE_V8DF_V8SI_QI_INT:
+ case V8DI_FTYPE_V8DF_V8DI_QI_INT:
+ case V8SF_FTYPE_V8DI_V8SF_QI_INT:
+ case V8DF_FTYPE_V8DI_V8DF_QI_INT:
case V16SF_FTYPE_V16SF_V16SF_HI_INT:
+ case V8DI_FTYPE_V8SF_V8DI_QI_INT:
case V16SF_FTYPE_V16SI_V16SF_HI_INT:
case V16SI_FTYPE_V16SF_V16SI_HI_INT:
case V8DF_FTYPE_V8SF_V8DF_QI_INT:
@@ -34438,6 +36991,8 @@ ix86_expand_round_builtin (const struct builtin_description *d,
nargs_constant = 3;
nargs = 5;
break;
+ case V16SF_FTYPE_V16SF_V16SF_INT_V16SF_HI_INT:
+ case V8DF_FTYPE_V8DF_V8DF_INT_V8DF_QI_INT:
case V4SF_FTYPE_V4SF_V4SF_INT_V4SF_QI_INT:
case V2DF_FTYPE_V2DF_V2DF_INT_V2DF_QI_INT:
nargs = 6;
@@ -34476,8 +37031,8 @@ ix86_expand_round_builtin (const struct builtin_description *d,
{
case CODE_FOR_avx512f_getmantv8df_mask_round:
case CODE_FOR_avx512f_getmantv16sf_mask_round:
- case CODE_FOR_avx512f_getmantv2df_round:
- case CODE_FOR_avx512f_getmantv4sf_round:
+ case CODE_FOR_avx512f_vgetmantv2df_round:
+ case CODE_FOR_avx512f_vgetmantv4sf_round:
error ("the immediate argument must be a 4-bit immediate");
return const0_rtx;
case CODE_FOR_avx512f_cmpv8df3_mask_round:
@@ -34629,7 +37184,7 @@ ix86_expand_special_args_builtin (const struct builtin_description *d,
{
case CODE_FOR_sse4_1_movntdqa:
case CODE_FOR_avx2_movntdqa:
- case CODE_FOR_avx512f_movntdqa:
+ case CODE_FOR_avx512dq_movntdqa:
aligned_mem = true;
break;
default:
@@ -34700,7 +37255,11 @@ ix86_expand_special_args_builtin (const struct builtin_description *d,
case VOID_FTYPE_PV8DF_V8DF_QI:
case VOID_FTYPE_PV16SF_V16SF_HI:
case VOID_FTYPE_PV8DI_V8DI_QI:
+ case VOID_FTYPE_PV4DI_V4DI_QI:
+ case VOID_FTYPE_PV2DI_V2DI_QI:
case VOID_FTYPE_PV16SI_V16SI_HI:
+ case VOID_FTYPE_PV8SI_V8SI_QI:
+ case VOID_FTYPE_PV4SI_V4SI_QI:
switch (icode)
{
/* These builtins and instructions require the memory
@@ -34709,6 +37268,14 @@ ix86_expand_special_args_builtin (const struct builtin_description *d,
case CODE_FOR_avx512f_storev16si_mask:
case CODE_FOR_avx512f_storev8df_mask:
case CODE_FOR_avx512f_storev8di_mask:
+ case CODE_FOR_avx512vl_storev8sf_mask:
+ case CODE_FOR_avx512vl_storev8si_mask:
+ case CODE_FOR_avx512vl_storev4df_mask:
+ case CODE_FOR_avx512vl_storev4di_mask:
+ case CODE_FOR_avx512vl_storev4sf_mask:
+ case CODE_FOR_avx512vl_storev4si_mask:
+ case CODE_FOR_avx512vl_storev2df_mask:
+ case CODE_FOR_avx512vl_storev2di_mask:
aligned_mem = true;
break;
default:
@@ -34730,17 +37297,51 @@ ix86_expand_special_args_builtin (const struct builtin_description *d,
case VOID_FTYPE_PV16HI_V16SI_HI:
case VOID_FTYPE_PV16QI_V8DI_QI:
case VOID_FTYPE_PV16QI_V16SI_HI:
+ case VOID_FTYPE_PV4SI_V4DI_QI:
+ case VOID_FTYPE_PV4SI_V2DI_QI:
+ case VOID_FTYPE_PV8HI_V4DI_QI:
+ case VOID_FTYPE_PV8HI_V2DI_QI:
+ case VOID_FTYPE_PV8HI_V8SI_QI:
+ case VOID_FTYPE_PV8HI_V4SI_QI:
+ case VOID_FTYPE_PV16QI_V4DI_QI:
+ case VOID_FTYPE_PV16QI_V2DI_QI:
+ case VOID_FTYPE_PV16QI_V8SI_QI:
+ case VOID_FTYPE_PV16QI_V4SI_QI:
+ case VOID_FTYPE_PV8HI_V8HI_QI:
+ case VOID_FTYPE_PV16HI_V16HI_HI:
+ case VOID_FTYPE_PV32HI_V32HI_SI:
+ case VOID_FTYPE_PV16QI_V16QI_HI:
+ case VOID_FTYPE_PV32QI_V32QI_SI:
+ case VOID_FTYPE_PV64QI_V64QI_DI:
+ case VOID_FTYPE_PV4DF_V4DF_QI:
+ case VOID_FTYPE_PV2DF_V2DF_QI:
+ case VOID_FTYPE_PV8SF_V8SF_QI:
+ case VOID_FTYPE_PV4SF_V4SF_QI:
nargs = 2;
klass = store;
/* Reserve memory operand for target. */
memory = ARRAY_SIZE (args);
break;
+ case V4SF_FTYPE_PCV4SF_V4SF_QI:
+ case V8SF_FTYPE_PCV8SF_V8SF_QI:
case V16SF_FTYPE_PCV16SF_V16SF_HI:
+ case V4SI_FTYPE_PCV4SI_V4SI_QI:
+ case V8SI_FTYPE_PCV8SI_V8SI_QI:
case V16SI_FTYPE_PCV16SI_V16SI_HI:
+ case V2DF_FTYPE_PCV2DF_V2DF_QI:
+ case V4DF_FTYPE_PCV4DF_V4DF_QI:
case V8DF_FTYPE_PCV8DF_V8DF_QI:
+ case V2DI_FTYPE_PCV2DI_V2DI_QI:
+ case V4DI_FTYPE_PCV4DI_V4DI_QI:
case V8DI_FTYPE_PCV8DI_V8DI_QI:
case V2DF_FTYPE_PCDOUBLE_V2DF_QI:
case V4SF_FTYPE_PCFLOAT_V4SF_QI:
+ case V8HI_FTYPE_PCV8HI_V8HI_QI:
+ case V16HI_FTYPE_PCV16HI_V16HI_HI:
+ case V32HI_FTYPE_PCV32HI_V32HI_SI:
+ case V16QI_FTYPE_PCV16QI_V16QI_HI:
+ case V32QI_FTYPE_PCV32QI_V32QI_SI:
+ case V64QI_FTYPE_PCV64QI_V64QI_DI:
nargs = 3;
klass = load;
memory = 0;
@@ -34752,6 +37353,20 @@ ix86_expand_special_args_builtin (const struct builtin_description *d,
case CODE_FOR_avx512f_loadv16si_mask:
case CODE_FOR_avx512f_loadv8df_mask:
case CODE_FOR_avx512f_loadv8di_mask:
+ case CODE_FOR_avx512vl_loadv8sf_mask:
+ case CODE_FOR_avx512vl_loadv8si_mask:
+ case CODE_FOR_avx512vl_loadv4df_mask:
+ case CODE_FOR_avx512vl_loadv4di_mask:
+ case CODE_FOR_avx512vl_loadv4sf_mask:
+ case CODE_FOR_avx512vl_loadv4si_mask:
+ case CODE_FOR_avx512vl_loadv2df_mask:
+ case CODE_FOR_avx512vl_loadv2di_mask:
+ case CODE_FOR_avx512bw_loadv64qi_mask:
+ case CODE_FOR_avx512vl_loadv32qi_mask:
+ case CODE_FOR_avx512vl_loadv16qi_mask:
+ case CODE_FOR_avx512bw_loadv32hi_mask:
+ case CODE_FOR_avx512vl_loadv16hi_mask:
+ case CODE_FOR_avx512vl_loadv8hi_mask:
aligned_mem = true;
break;
default:
@@ -35771,6 +38386,66 @@ addcarryx:
case IX86_BUILTIN_GATHER3ALTDIV16SI:
icode = CODE_FOR_avx512f_gatherdiv16si;
goto gather_gen;
+ case IX86_BUILTIN_GATHER3SIV2DF:
+ icode = CODE_FOR_avx512vl_gathersiv2df;
+ goto gather_gen;
+ case IX86_BUILTIN_GATHER3SIV4DF:
+ icode = CODE_FOR_avx512vl_gathersiv4df;
+ goto gather_gen;
+ case IX86_BUILTIN_GATHER3DIV2DF:
+ icode = CODE_FOR_avx512vl_gatherdiv2df;
+ goto gather_gen;
+ case IX86_BUILTIN_GATHER3DIV4DF:
+ icode = CODE_FOR_avx512vl_gatherdiv4df;
+ goto gather_gen;
+ case IX86_BUILTIN_GATHER3SIV4SF:
+ icode = CODE_FOR_avx512vl_gathersiv4sf;
+ goto gather_gen;
+ case IX86_BUILTIN_GATHER3SIV8SF:
+ icode = CODE_FOR_avx512vl_gathersiv8sf;
+ goto gather_gen;
+ case IX86_BUILTIN_GATHER3DIV4SF:
+ icode = CODE_FOR_avx512vl_gatherdiv4sf;
+ goto gather_gen;
+ case IX86_BUILTIN_GATHER3DIV8SF:
+ icode = CODE_FOR_avx512vl_gatherdiv8sf;
+ goto gather_gen;
+ case IX86_BUILTIN_GATHER3SIV2DI:
+ icode = CODE_FOR_avx512vl_gathersiv2di;
+ goto gather_gen;
+ case IX86_BUILTIN_GATHER3SIV4DI:
+ icode = CODE_FOR_avx512vl_gathersiv4di;
+ goto gather_gen;
+ case IX86_BUILTIN_GATHER3DIV2DI:
+ icode = CODE_FOR_avx512vl_gatherdiv2di;
+ goto gather_gen;
+ case IX86_BUILTIN_GATHER3DIV4DI:
+ icode = CODE_FOR_avx512vl_gatherdiv4di;
+ goto gather_gen;
+ case IX86_BUILTIN_GATHER3SIV4SI:
+ icode = CODE_FOR_avx512vl_gathersiv4si;
+ goto gather_gen;
+ case IX86_BUILTIN_GATHER3SIV8SI:
+ icode = CODE_FOR_avx512vl_gathersiv8si;
+ goto gather_gen;
+ case IX86_BUILTIN_GATHER3DIV4SI:
+ icode = CODE_FOR_avx512vl_gatherdiv4si;
+ goto gather_gen;
+ case IX86_BUILTIN_GATHER3DIV8SI:
+ icode = CODE_FOR_avx512vl_gatherdiv8si;
+ goto gather_gen;
+ case IX86_BUILTIN_GATHER3ALTSIV4DF:
+ icode = CODE_FOR_avx512vl_gathersiv4df;
+ goto gather_gen;
+ case IX86_BUILTIN_GATHER3ALTDIV8SF:
+ icode = CODE_FOR_avx512vl_gatherdiv8sf;
+ goto gather_gen;
+ case IX86_BUILTIN_GATHER3ALTSIV4DI:
+ icode = CODE_FOR_avx512vl_gathersiv4di;
+ goto gather_gen;
+ case IX86_BUILTIN_GATHER3ALTDIV8SI:
+ icode = CODE_FOR_avx512vl_gatherdiv8si;
+ goto gather_gen;
case IX86_BUILTIN_SCATTERSIV16SF:
icode = CODE_FOR_avx512f_scattersiv16sf;
goto scatter_gen;
@@ -35795,7 +38470,54 @@ addcarryx:
case IX86_BUILTIN_SCATTERDIV8DI:
icode = CODE_FOR_avx512f_scatterdiv8di;
goto scatter_gen;
-
+ case IX86_BUILTIN_SCATTERSIV8SF:
+ icode = CODE_FOR_avx512vl_scattersiv8sf;
+ goto scatter_gen;
+ case IX86_BUILTIN_SCATTERSIV4SF:
+ icode = CODE_FOR_avx512vl_scattersiv4sf;
+ goto scatter_gen;
+ case IX86_BUILTIN_SCATTERSIV4DF:
+ icode = CODE_FOR_avx512vl_scattersiv4df;
+ goto scatter_gen;
+ case IX86_BUILTIN_SCATTERSIV2DF:
+ icode = CODE_FOR_avx512vl_scattersiv2df;
+ goto scatter_gen;
+ case IX86_BUILTIN_SCATTERDIV8SF:
+ icode = CODE_FOR_avx512vl_scatterdiv8sf;
+ goto scatter_gen;
+ case IX86_BUILTIN_SCATTERDIV4SF:
+ icode = CODE_FOR_avx512vl_scatterdiv4sf;
+ goto scatter_gen;
+ case IX86_BUILTIN_SCATTERDIV4DF:
+ icode = CODE_FOR_avx512vl_scatterdiv4df;
+ goto scatter_gen;
+ case IX86_BUILTIN_SCATTERDIV2DF:
+ icode = CODE_FOR_avx512vl_scatterdiv2df;
+ goto scatter_gen;
+ case IX86_BUILTIN_SCATTERSIV8SI:
+ icode = CODE_FOR_avx512vl_scattersiv8si;
+ goto scatter_gen;
+ case IX86_BUILTIN_SCATTERSIV4SI:
+ icode = CODE_FOR_avx512vl_scattersiv4si;
+ goto scatter_gen;
+ case IX86_BUILTIN_SCATTERSIV4DI:
+ icode = CODE_FOR_avx512vl_scattersiv4di;
+ goto scatter_gen;
+ case IX86_BUILTIN_SCATTERSIV2DI:
+ icode = CODE_FOR_avx512vl_scattersiv2di;
+ goto scatter_gen;
+ case IX86_BUILTIN_SCATTERDIV8SI:
+ icode = CODE_FOR_avx512vl_scatterdiv8si;
+ goto scatter_gen;
+ case IX86_BUILTIN_SCATTERDIV4SI:
+ icode = CODE_FOR_avx512vl_scatterdiv4si;
+ goto scatter_gen;
+ case IX86_BUILTIN_SCATTERDIV4DI:
+ icode = CODE_FOR_avx512vl_scatterdiv4di;
+ goto scatter_gen;
+ case IX86_BUILTIN_SCATTERDIV2DI:
+ icode = CODE_FOR_avx512vl_scatterdiv2di;
+ goto scatter_gen;
case IX86_BUILTIN_GATHERPFDPD:
icode = CODE_FOR_avx512pf_gatherpfv8sidf;
goto vec_prefetch_gen;
@@ -35859,6 +38581,8 @@ addcarryx:
emit_insn (gen_vec_extract_lo_v16si (half, op2));
op2 = half;
break;
+ case IX86_BUILTIN_GATHER3ALTSIV4DF:
+ case IX86_BUILTIN_GATHER3ALTSIV4DI:
case IX86_BUILTIN_GATHERALTSIV4DF:
case IX86_BUILTIN_GATHERALTSIV4DI:
half = gen_reg_rtx (V4SImode);
@@ -35886,6 +38610,8 @@ addcarryx:
op3 = half;
}
break;
+ case IX86_BUILTIN_GATHER3ALTDIV8SF:
+ case IX86_BUILTIN_GATHER3ALTDIV8SI:
case IX86_BUILTIN_GATHERALTDIV8SF:
case IX86_BUILTIN_GATHERALTDIV8SI:
half = gen_reg_rtx (mode0);
@@ -36021,11 +38747,13 @@ addcarryx:
target = gen_reg_rtx (V8SImode);
emit_insn (gen_vec_extract_lo_v16si (target, subtarget));
break;
+ case IX86_BUILTIN_GATHER3DIV8SF:
case IX86_BUILTIN_GATHERDIV8SF:
if (target == NULL_RTX)
target = gen_reg_rtx (V4SFmode);
emit_insn (gen_vec_extract_lo_v8sf (target, subtarget));
break;
+ case IX86_BUILTIN_GATHER3DIV8SI:
case IX86_BUILTIN_GATHERDIV8SI:
if (target == NULL_RTX)
target = gen_reg_rtx (V4SImode);
@@ -36857,28 +39585,52 @@ ix86_vectorize_builtin_gather (const_tree mem_vectype,
switch (TYPE_MODE (mem_vectype))
{
case V2DFmode:
- code = si ? IX86_BUILTIN_GATHERSIV2DF : IX86_BUILTIN_GATHERDIV2DF;
+ if (TARGET_AVX512VL)
+ code = si ? IX86_BUILTIN_GATHER3SIV2DF : IX86_BUILTIN_GATHER3DIV2DF;
+ else
+ code = si ? IX86_BUILTIN_GATHERSIV2DF : IX86_BUILTIN_GATHERDIV2DF;
break;
case V4DFmode:
- code = si ? IX86_BUILTIN_GATHERALTSIV4DF : IX86_BUILTIN_GATHERDIV4DF;
+ if (TARGET_AVX512VL)
+ code = si ? IX86_BUILTIN_GATHER3ALTSIV4DF : IX86_BUILTIN_GATHER3DIV4DF;
+ else
+ code = si ? IX86_BUILTIN_GATHERALTSIV4DF : IX86_BUILTIN_GATHERDIV4DF;
break;
case V2DImode:
- code = si ? IX86_BUILTIN_GATHERSIV2DI : IX86_BUILTIN_GATHERDIV2DI;
+ if (TARGET_AVX512VL)
+ code = si ? IX86_BUILTIN_GATHER3SIV2DI : IX86_BUILTIN_GATHER3DIV2DI;
+ else
+ code = si ? IX86_BUILTIN_GATHERSIV2DI : IX86_BUILTIN_GATHERDIV2DI;
break;
case V4DImode:
- code = si ? IX86_BUILTIN_GATHERALTSIV4DI : IX86_BUILTIN_GATHERDIV4DI;
+ if (TARGET_AVX512VL)
+ code = si ? IX86_BUILTIN_GATHER3ALTSIV4DI : IX86_BUILTIN_GATHER3DIV4DI;
+ else
+ code = si ? IX86_BUILTIN_GATHERALTSIV4DI : IX86_BUILTIN_GATHERDIV4DI;
break;
case V4SFmode:
- code = si ? IX86_BUILTIN_GATHERSIV4SF : IX86_BUILTIN_GATHERDIV4SF;
+ if (TARGET_AVX512VL)
+ code = si ? IX86_BUILTIN_GATHER3SIV4SF : IX86_BUILTIN_GATHER3DIV4SF;
+ else
+ code = si ? IX86_BUILTIN_GATHERSIV4SF : IX86_BUILTIN_GATHERDIV4SF;
break;
case V8SFmode:
- code = si ? IX86_BUILTIN_GATHERSIV8SF : IX86_BUILTIN_GATHERALTDIV8SF;
+ if (TARGET_AVX512VL)
+ code = si ? IX86_BUILTIN_GATHER3SIV8SF : IX86_BUILTIN_GATHER3ALTDIV8SF;
+ else
+ code = si ? IX86_BUILTIN_GATHERSIV8SF : IX86_BUILTIN_GATHERALTDIV8SF;
break;
case V4SImode:
- code = si ? IX86_BUILTIN_GATHERSIV4SI : IX86_BUILTIN_GATHERDIV4SI;
+ if (TARGET_AVX512VL)
+ code = si ? IX86_BUILTIN_GATHER3SIV4SI : IX86_BUILTIN_GATHER3DIV4SI;
+ else
+ code = si ? IX86_BUILTIN_GATHERSIV4SI : IX86_BUILTIN_GATHERDIV4SI;
break;
case V8SImode:
- code = si ? IX86_BUILTIN_GATHERSIV8SI : IX86_BUILTIN_GATHERALTDIV8SI;
+ if (TARGET_AVX512VL)
+ code = si ? IX86_BUILTIN_GATHER3SIV8SI : IX86_BUILTIN_GATHER3ALTDIV8SI;
+ else
+ code = si ? IX86_BUILTIN_GATHERSIV8SI : IX86_BUILTIN_GATHERALTDIV8SI;
break;
case V8DFmode:
if (TARGET_AVX512F)
@@ -37381,6 +40133,11 @@ inline_secondary_memory_needed (enum reg_class class1, enum reg_class class2,
if (FLOAT_CLASS_P (class1) != FLOAT_CLASS_P (class2))
return true;
+ /* Between mask and general, we have moves no larger than word size. */
+ if ((MAYBE_MASK_CLASS_P (class1) != MAYBE_MASK_CLASS_P (class2))
+ && (GET_MODE_SIZE (mode) > UNITS_PER_WORD))
+ return true;
+
/* ??? This is a lie. We do have moves between mmx/general, and for
mmx/sse2. But by saying we need secondary memory we discourage the
register allocator from using the mmx registers unless needed. */
@@ -37686,7 +40443,8 @@ ix86_hard_regno_mode_ok (int regno, enum machine_mode mode)
if (STACK_REGNO_P (regno))
return VALID_FP_MODE_P (mode);
if (MASK_REGNO_P (regno))
- return VALID_MASK_REG_MODE (mode);
+ return (VALID_MASK_REG_MODE (mode)
+ || (TARGET_AVX512BW && VALID_MASK_AVX512BW_MODE (mode)));
if (SSE_REGNO_P (regno))
{
/* We implement the move patterns for all vector modes into and
@@ -37703,6 +40461,15 @@ ix86_hard_regno_mode_ok (int regno, enum machine_mode mode)
|| VALID_AVX512F_SCALAR_MODE (mode)))
return true;
+ /* TODO check for QI/HI scalars. */
+ /* AVX512VL allows sse regs16+ for 128/256 bit modes. */
+ if (TARGET_AVX512VL
+ && (mode == OImode
+ || mode == TImode
+ || VALID_AVX256_REG_MODE (mode)
+ || VALID_AVX512VL_128_REG_MODE (mode)))
+ return true;
+
/* xmm16-xmm31 are only available for AVX-512. */
if (EXT_REX_SSE_REGNO_P (regno))
return false;
@@ -39607,6 +42374,8 @@ ix86_expand_vector_init_duplicate (bool mmx_ok, enum machine_mode mode,
case V8SFmode:
case V8SImode:
case V2DFmode:
+ case V64QImode:
+ case V32HImode:
case V2DImode:
case V4SFmode:
case V4SImode:
@@ -39637,6 +42406,9 @@ ix86_expand_vector_init_duplicate (bool mmx_ok, enum machine_mode mode,
goto widen;
case V8HImode:
+ if (TARGET_AVX512VL)
+ return ix86_vector_duplicate_value (mode, target, val);
+
if (TARGET_SSE2)
{
struct expand_vec_perm_d dperm;
@@ -39667,6 +42439,9 @@ ix86_expand_vector_init_duplicate (bool mmx_ok, enum machine_mode mode,
goto widen;
case V16QImode:
+ if (TARGET_AVX512VL)
+ return ix86_vector_duplicate_value (mode, target, val);
+
if (TARGET_SSE2)
goto permute;
goto widen;
@@ -39696,16 +42471,19 @@ ix86_expand_vector_init_duplicate (bool mmx_ok, enum machine_mode mode,
case V16HImode:
case V32QImode:
- {
- enum machine_mode hvmode = (mode == V16HImode ? V8HImode : V16QImode);
- rtx x = gen_reg_rtx (hvmode);
+ if (TARGET_AVX512VL)
+ return ix86_vector_duplicate_value (mode, target, val);
+ else
+ {
+ enum machine_mode hvmode = (mode == V16HImode ? V8HImode : V16QImode);
+ rtx x = gen_reg_rtx (hvmode);
- ok = ix86_expand_vector_init_duplicate (false, hvmode, x, val);
- gcc_assert (ok);
+ ok = ix86_expand_vector_init_duplicate (false, hvmode, x, val);
+ gcc_assert (ok);
- x = gen_rtx_VEC_CONCAT (mode, x, x);
- emit_insn (gen_rtx_SET (VOIDmode, target, x));
- }
+ x = gen_rtx_VEC_CONCAT (mode, x, x);
+ emit_insn (gen_rtx_SET (VOIDmode, target, x));
+ }
return true;
default:
@@ -40267,8 +43045,9 @@ static void
ix86_expand_vector_init_general (bool mmx_ok, enum machine_mode mode,
rtx target, rtx vals)
{
- rtx ops[64], op0, op1;
+ rtx ops[64], op0, op1, op2, op3, op4, op5;
enum machine_mode half_mode = VOIDmode;
+ enum machine_mode quarter_mode = VOIDmode;
int n, i;
switch (mode)
@@ -40319,6 +43098,42 @@ half:
gen_rtx_VEC_CONCAT (mode, op0, op1)));
return;
+ case V64QImode:
+ quarter_mode = V16QImode;
+ half_mode = V32QImode;
+ goto quarter;
+
+ case V32HImode:
+ quarter_mode = V8HImode;
+ half_mode = V16HImode;
+ goto quarter;
+
+quarter:
+ n = GET_MODE_NUNITS (mode);
+ for (i = 0; i < n; i++)
+ ops[i] = XVECEXP (vals, 0, i);
+ op0 = gen_reg_rtx (quarter_mode);
+ op1 = gen_reg_rtx (quarter_mode);
+ op2 = gen_reg_rtx (quarter_mode);
+ op3 = gen_reg_rtx (quarter_mode);
+ op4 = gen_reg_rtx (half_mode);
+ op5 = gen_reg_rtx (half_mode);
+ ix86_expand_vector_init_interleave (quarter_mode, op0, ops,
+ n >> 3);
+ ix86_expand_vector_init_interleave (quarter_mode, op1,
+ &ops [n >> 2], n >> 3);
+ ix86_expand_vector_init_interleave (quarter_mode, op2,
+ &ops [n >> 1], n >> 3);
+ ix86_expand_vector_init_interleave (quarter_mode, op3,
+ &ops [(n >> 1) | (n >> 2)], n >> 3);
+ emit_insn (gen_rtx_SET (VOIDmode, op4,
+ gen_rtx_VEC_CONCAT (half_mode, op0, op1)));
+ emit_insn (gen_rtx_SET (VOIDmode, op5,
+ gen_rtx_VEC_CONCAT (half_mode, op2, op3)));
+ emit_insn (gen_rtx_SET (VOIDmode, target,
+ gen_rtx_VEC_CONCAT (mode, op4, op5)));
+ return;
+
case V16QImode:
if (!TARGET_SSE4_1)
break;
@@ -40701,6 +43516,49 @@ half:
emit_insn (gen_insert[j][i] (target, target, tmp));
return;
+ case V8DFmode:
+ tmp = gen_reg_rtx (mode);
+ emit_insn (gen_rtx_SET (VOIDmode, tmp,
+ gen_rtx_VEC_DUPLICATE (mode, val)));
+ emit_insn (gen_avx512f_blendmv8df (target, tmp, target,
+ force_reg (QImode, GEN_INT (1 << elt))));
+ return;
+ case V8DImode:
+ tmp = gen_reg_rtx (mode);
+ emit_insn (gen_rtx_SET (VOIDmode, tmp,
+ gen_rtx_VEC_DUPLICATE (mode, val)));
+ emit_insn (gen_avx512f_blendmv8di (target, tmp, target,
+ force_reg (QImode, GEN_INT (1 << elt))));
+ return;
+ case V16SFmode:
+ tmp = gen_reg_rtx (mode);
+ emit_insn (gen_rtx_SET (VOIDmode, tmp,
+ gen_rtx_VEC_DUPLICATE (mode, val)));
+ emit_insn (gen_avx512f_blendmv16sf (target, tmp, target,
+ force_reg (HImode, GEN_INT (1 << elt))));
+ return;
+ case V16SImode:
+ tmp = gen_reg_rtx (mode);
+ emit_insn (gen_rtx_SET (VOIDmode, tmp,
+ gen_rtx_VEC_DUPLICATE (mode, val)));
+ emit_insn (gen_avx512f_blendmv16si (target, tmp, target,
+ force_reg (HImode, GEN_INT (1 << elt))));
+ return;
+ case V32HImode:
+ tmp = gen_reg_rtx (mode);
+ emit_insn (gen_rtx_SET (VOIDmode, tmp,
+ gen_rtx_VEC_DUPLICATE (mode, val)));
+ emit_insn (gen_avx512bw_blendmv32hi (target, tmp, target,
+ force_reg (SImode, GEN_INT (1 << elt))));
+ return;
+ case V64QImode:
+ tmp = gen_reg_rtx (mode);
+ emit_insn (gen_rtx_SET (VOIDmode, tmp,
+ gen_rtx_VEC_DUPLICATE (mode, val)));
+ emit_insn (gen_avx512bw_blendmv64qi (target, tmp, target,
+ force_reg (DImode, GEN_INT (1 << elt))));
+ return;
+
default:
break;
}
@@ -40908,6 +43766,32 @@ ix86_expand_vector_extract (bool mmx_ok, rtx target, rtx vec, int elt)
}
break;
+ case V32HImode:
+ if (TARGET_AVX512BW)
+ {
+ tmp = gen_reg_rtx (V16HImode);
+ if (elt < 16)
+ emit_insn (gen_vec_extract_lo_v32hi (tmp, vec));
+ else
+ emit_insn (gen_vec_extract_hi_v32hi (tmp, vec));
+ ix86_expand_vector_extract (false, target, tmp, elt & 15);
+ return;
+ }
+ break;
+
+ case V64QImode:
+ if (TARGET_AVX512BW)
+ {
+ tmp = gen_reg_rtx (V32QImode);
+ if (elt < 32)
+ emit_insn (gen_vec_extract_lo_v64qi (tmp, vec));
+ else
+ emit_insn (gen_vec_extract_hi_v64qi (tmp, vec));
+ ix86_expand_vector_extract (false, target, tmp, elt & 31);
+ return;
+ }
+ break;
+
case V16SFmode:
tmp = gen_reg_rtx (V8SFmode);
if (elt < 8)
@@ -41036,6 +43920,8 @@ emit_reduc_half (rtx dest, rtx src, int i)
GEN_INT (i / 2));
}
break;
+ case V64QImode:
+ case V32HImode:
case V16SImode:
case V16SFmode:
case V8DImode:
@@ -42525,7 +45411,12 @@ expand_vec_perm_blend (struct expand_vec_perm_d *d)
if (d->one_operand_p)
return false;
- if (TARGET_AVX2 && GET_MODE_SIZE (vmode) == 32)
+ if (TARGET_AVX512F && GET_MODE_SIZE (vmode) == 64 &&
+ GET_MODE_SIZE (GET_MODE_INNER (vmode)) >= 4)
+ ;
+ else if (TARGET_AVX512VL)
+ ;
+ else if (TARGET_AVX2 && GET_MODE_SIZE (vmode) == 32)
;
else if (TARGET_AVX && (vmode == V4DFmode || vmode == V8SFmode))
;
@@ -42556,12 +45447,18 @@ expand_vec_perm_blend (struct expand_vec_perm_d *d)
switch (vmode)
{
+ case V8DFmode:
+ case V16SFmode:
case V4DFmode:
case V8SFmode:
case V2DFmode:
case V4SFmode:
case V8HImode:
case V8SImode:
+ case V32HImode:
+ case V64QImode:
+ case V16SImode:
+ case V8DImode:
for (i = 0; i < nelt; ++i)
mask |= (d->perm[i] >= nelt) << i;
break;
@@ -42784,9 +45681,9 @@ static bool
expand_vec_perm_pshufb (struct expand_vec_perm_d *d)
{
unsigned i, nelt, eltsz, mask;
- unsigned char perm[32];
+ unsigned char perm[64];
enum machine_mode vmode = V16QImode;
- rtx rperm[32], vperm, target, op0, op1;
+ rtx rperm[64], vperm, target, op0, op1;
nelt = d->nelt;
@@ -42875,6 +45772,17 @@ expand_vec_perm_pshufb (struct expand_vec_perm_d *d)
return false;
}
}
+ else if (GET_MODE_SIZE (d->vmode) == 64)
+ {
+ if (!TARGET_AVX512BW)
+ return false;
+ if (vmode == V64QImode)
+ {
+ for (i = 0; i < nelt; ++i)
+ if ((d->perm[i] ^ i) & (nelt / 4))
+ return false;
+ }
+ }
else
return false;
}
@@ -42892,6 +45800,8 @@ expand_vec_perm_pshufb (struct expand_vec_perm_d *d)
mask = 2 * nelt - 1;
else if (vmode == V16QImode)
mask = nelt - 1;
+ else if (vmode == V64QImode)
+ mask = nelt / 4 - 1;
else
mask = nelt / 2 - 1;
@@ -42917,6 +45827,8 @@ expand_vec_perm_pshufb (struct expand_vec_perm_d *d)
emit_insn (gen_ssse3_pshufbv16qi3 (target, op0, vperm));
else if (vmode == V32QImode)
emit_insn (gen_avx2_pshufbv32qi3 (target, op0, vperm));
+ else if (vmode == V64QImode)
+ emit_insn (gen_avx512bw_pshufbv64qi3 (target, op0, vperm));
else if (vmode == V8SFmode)
emit_insn (gen_avx2_permvarv8sf (target, op0, vperm));
else
@@ -42972,12 +45884,24 @@ expand_vec_perm_1 (struct expand_vec_perm_d *d)
rtx (*gen) (rtx, rtx) = NULL;
switch (d->vmode)
{
+ case V64QImode:
+ if (TARGET_AVX512VL)
+ gen = gen_avx512bw_vec_dupv64qi;
+ break;
case V32QImode:
gen = gen_avx2_pbroadcastv32qi_1;
break;
+ case V32HImode:
+ if (TARGET_AVX512VL)
+ gen = gen_avx512bw_vec_dupv32hi;
+ break;
case V16HImode:
gen = gen_avx2_pbroadcastv16hi_1;
break;
+ case V16SImode:
+ if (TARGET_AVX512F)
+ gen = gen_avx512f_vec_dupv16si;
+ break;
case V8SImode:
gen = gen_avx2_pbroadcastv8si_1;
break;
@@ -42987,9 +45911,21 @@ expand_vec_perm_1 (struct expand_vec_perm_d *d)
case V8HImode:
gen = gen_avx2_pbroadcastv8hi;
break;
+ case V16SFmode:
+ if (TARGET_AVX512F)
+ gen = gen_avx512f_vec_dupv16sf;
+ break;
case V8SFmode:
gen = gen_avx2_vec_dupv8sf_1;
break;
+ case V8DFmode:
+ if (TARGET_AVX512F)
+ gen = gen_avx512f_vec_dupv8df;
+ break;
+ case V8DImode:
+ if (TARGET_AVX512F)
+ gen = gen_avx512f_vec_dupv8di;
+ break;
/* For other modes prefer other shuffles this function creates. */
default: break;
}
@@ -43079,6 +46015,14 @@ expand_vec_perm_1 (struct expand_vec_perm_d *d)
mode = V8DImode;
else if (mode == V16SFmode)
mode = V16SImode;
+ else if (mode == V4DFmode)
+ mode = V4DImode;
+ else if (mode == V2DFmode)
+ mode = V2DImode;
+ else if (mode == V8SFmode)
+ mode = V8SImode;
+ else if (mode == V4SFmode)
+ mode = V4SImode;
for (i = 0; i < nelt; ++i)
vec[i] = GEN_INT (d->perm[i]);
rtx mask = gen_rtx_CONST_VECTOR (mode, gen_rtvec_v (nelt, vec));
@@ -44622,6 +47566,16 @@ ix86_expand_vec_perm_const_1 (struct expand_vec_perm_d *d)
return true;
/* Try sequences of two instructions. */
+ /* ix86_expand_vec_perm_vpermi2 is also called from
+ * ix86_expand_vec_perm. So it doesn't take d as parameter.
+ * Construct needed params. */
+ rtx vec[64];
+ int i;
+ for (i = 0; i < d->nelt; ++i)
+ vec[i] = GEN_INT (d->perm[i]);
+ rtx sel = gen_rtx_CONST_VECTOR (d->vmode, gen_rtvec_v (d->nelt, vec));
+ if (ix86_expand_vec_perm_vpermi2 (d->target, d->op0, sel, d->op1))
+ return true;
if (expand_vec_perm_pshuflw_pshufhw (d))
return true;
@@ -44796,7 +47750,8 @@ ix86_vectorize_vec_perm_const_ok (enum machine_mode vmode,
/* Given sufficient ISA support we can just return true here
for selected vector modes. */
if (d.vmode == V16SImode || d.vmode == V16SFmode
- || d.vmode == V8DFmode || d.vmode == V8DImode)
+ || d.vmode == V8DFmode || d.vmode == V8DImode
+ || d.vmode == V32HImode || d.vmode == V64QImode)
/* All implementable with a single vpermi2 insn. */
return true;
if (GET_MODE_SIZE (d.vmode) == 16)
@@ -44929,6 +47884,11 @@ ix86_expand_vecop_qihi (enum rtx_code code, rtx dest, rtx op1, rtx op2)
gen_il = gen_avx2_interleave_lowv32qi;
gen_ih = gen_avx2_interleave_highv32qi;
break;
+ case V64QImode:
+ himode = V32HImode;
+ gen_il = gen_avx512bw_interleave_lowv64qi;
+ gen_ih = gen_avx512bw_interleave_highv64qi;
+ break;
default:
gcc_unreachable ();
}
@@ -44989,7 +47949,7 @@ ix86_expand_vecop_qihi (enum rtx_code code, rtx dest, rtx op1, rtx op2)
{
/* For SSE2, we used an full interleave, so the desired
results are in the even elements. */
- for (i = 0; i < 32; ++i)
+ for (i = 0; i < 64; ++i)
d.perm[i] = i * 2;
}
else
@@ -44997,7 +47957,7 @@ ix86_expand_vecop_qihi (enum rtx_code code, rtx dest, rtx op1, rtx op2)
/* For AVX, the interleave used above was not cross-lane. So the
extraction is evens but with the second and third quarter swapped.
Happily, that is even one insn shorter than even extraction. */
- for (i = 0; i < 32; ++i)
+ for (i = 0; i < 64; ++i)
d.perm[i] = i * 2 + ((i & 24) == 8 ? 16 : (i & 24) == 16 ? -16 : 0);
}
@@ -45195,6 +48155,10 @@ ix86_expand_mul_widen_hilo (rtx dest, rtx op1, rtx op2,
case V16QImode:
case V32QImode:
+/* TODO why handle hi here and not in hi case*/
+ case V32HImode:
+ case V16SImode:
+ case V64QImode:
t1 = gen_reg_rtx (wmode);
t2 = gen_reg_rtx (wmode);
ix86_expand_sse_unpack (t1, op1, uns_p, high_p);
@@ -45249,7 +48213,13 @@ ix86_expand_sse2_mulvxdi3 (rtx op0, rtx op1, rtx op2)
enum machine_mode mode = GET_MODE (op0);
rtx t1, t2, t3, t4, t5, t6;
- if (TARGET_XOP && mode == V2DImode)
+ if (TARGET_AVX512DQ && mode == V8DImode)
+ emit_insn (gen_avx512dq_mulv8di3 (op0, op1, op2));
+ else if (TARGET_AVX512DQ && TARGET_AVX512VL && mode == V4DImode)
+ emit_insn (gen_avx512dq_mulv4di3 (op0, op1, op2));
+ else if (TARGET_AVX512DQ && TARGET_AVX512VL && mode == V2DImode)
+ emit_insn (gen_avx512dq_mulv4di3 (op0, op1, op2));
+ else if (TARGET_XOP && mode == V2DImode)
{
/* op1: A,B,C,D, op2: E,F,G,H */
op1 = gen_lowpart (V4SImode, op1);
@@ -46456,9 +49426,11 @@ ix86_preferred_simd_mode (enum machine_mode mode)
switch (mode)
{
case QImode:
- return (TARGET_AVX && !TARGET_PREFER_AVX128) ? V32QImode : V16QImode;
+ return TARGET_AVX512BW ? V64QImode :
+ (TARGET_AVX && !TARGET_PREFER_AVX128) ? V32QImode : V16QImode;
case HImode:
- return (TARGET_AVX && !TARGET_PREFER_AVX128) ? V16HImode : V8HImode;
+ return TARGET_AVX512BW ? V32HImode :
+ (TARGET_AVX && !TARGET_PREFER_AVX128) ? V16HImode : V8HImode;
case SImode:
return TARGET_AVX512F ? V16SImode :
(TARGET_AVX && !TARGET_PREFER_AVX128) ? V8SImode : V4SImode;
diff --git a/gcc/config/i386/i386.h b/gcc/config/i386/i386.h
index 9e3ef9424c3..c2f0ceed45f 100644
--- a/gcc/config/i386/i386.h
+++ b/gcc/config/i386/i386.h
@@ -71,6 +71,12 @@ see the files COPYING3 and COPYING.RUNTIME respectively. If not, see
#define TARGET_AVX512ER_P(x) TARGET_ISA_AVX512ER_P(x)
#define TARGET_AVX512CD TARGET_ISA_AVX512CD
#define TARGET_AVX512CD_P(x) TARGET_ISA_AVX512CD_P(x)
+#define TARGET_AVX512DQ TARGET_ISA_AVX512DQ
+#define TARGET_AVX512DQ_P(x) TARGET_ISA_AVX512DQ_P(x)
+#define TARGET_AVX512BW TARGET_ISA_AVX512BW
+#define TARGET_AVX512BW_P(x) TARGET_ISA_AVX512BW_P(x)
+#define TARGET_AVX512VL TARGET_ISA_AVX512VL
+#define TARGET_AVX512VL_P(x) TARGET_ISA_AVX512VL_P(x)
#define TARGET_FMA TARGET_ISA_FMA
#define TARGET_FMA_P(x) TARGET_ISA_FMA_P(x)
#define TARGET_SSE4A TARGET_ISA_SSE4A
@@ -1048,7 +1054,8 @@ extern const char *host_detect_local_cpu (int argc, const char **argv);
applied to them. */
#define HARD_REGNO_NREGS(REGNO, MODE) \
- (STACK_REGNO_P (REGNO) || SSE_REGNO_P (REGNO) || MMX_REGNO_P (REGNO) \
+ (STACK_REGNO_P (REGNO) || SSE_REGNO_P (REGNO) \
+ || MMX_REGNO_P (REGNO) || MASK_REGNO_P (REGNO) \
? (COMPLEX_MODE_P (MODE) ? 2 : 1) \
: ((MODE) == XFmode \
? (TARGET_64BIT ? 2 : 3) \
@@ -1079,7 +1086,12 @@ extern const char *host_detect_local_cpu (int argc, const char **argv);
#define VALID_AVX512F_REG_MODE(MODE) \
((MODE) == V8DImode || (MODE) == V8DFmode || (MODE) == V64QImode \
- || (MODE) == V16SImode || (MODE) == V16SFmode || (MODE) == V32HImode)
+ || (MODE) == V16SImode || (MODE) == V16SFmode || (MODE) == V32HImode \
+ || (MODE) == V4TImode)
+
+#define VALID_AVX512VL_128_REG_MODE(MODE) \
+ ((MODE) == V2DImode || (MODE) == V2DFmode || (MODE) == V16QImode \
+ || (MODE) == V4SImode || (MODE) == V4SFmode || (MODE) == V8HImode)
#define VALID_SSE2_REG_MODE(MODE) \
((MODE) == V16QImode || (MODE) == V8HImode || (MODE) == V2DFmode \
@@ -1126,6 +1138,8 @@ extern const char *host_detect_local_cpu (int argc, const char **argv);
#define VALID_MASK_REG_MODE(MODE) ((MODE) == HImode || (MODE) == QImode)
+#define VALID_MASK_AVX512BW_MODE(MODE) ((MODE) == SImode || (MODE) == DImode)
+
/* Value is 1 if hard register REGNO can hold a value of machine-mode MODE. */
#define HARD_REGNO_MODE_OK(REGNO, MODE) \
@@ -1448,6 +1462,7 @@ enum reg_class
: (N) <= LAST_REX_SSE_REG ? (FIRST_REX_SSE_REG + (N) - 8) \
: (FIRST_EXT_REX_SSE_REG + (N) - 16))
+#define MASK_REG_P(X) (REG_P (X) && MASK_REGNO_P (REGNO (X)))
#define MASK_REGNO_P(N) IN_RANGE ((N), FIRST_MASK_REG, LAST_MASK_REG)
#define ANY_MASK_REG_P(X) (REG_P (X) && MASK_REGNO_P (REGNO (X)))
diff --git a/gcc/config/i386/i386.md b/gcc/config/i386/i386.md
index 3cb8b672515..2a10862fec8 100644
--- a/gcc/config/i386/i386.md
+++ b/gcc/config/i386/i386.md
@@ -740,7 +740,8 @@
;; Used to control the "enabled" attribute on a per-instruction basis.
(define_attr "isa" "base,x64,x64_sse4,x64_sse4_noavx,x64_avx,nox64,
sse2,sse2_noavx,sse3,sse4,sse4_noavx,avx,noavx,
- avx2,noavx2,bmi,bmi2,fma4,fma,avx512f,noavx512f,fma_avx512f"
+ avx2,noavx2,bmi,bmi2,fma4,fma,avx512f,noavx512f,
+ fma_avx512f,avx512bw,noavx512bw,avx512dq,noavx512dq"
(const_string "base"))
(define_attr "enabled" ""
@@ -771,6 +772,10 @@
(eq_attr "isa" "noavx512f") (symbol_ref "!TARGET_AVX512F")
(eq_attr "isa" "fma_avx512f")
(symbol_ref "TARGET_FMA || TARGET_AVX512F")
+ (eq_attr "isa" "avx512bw") (symbol_ref "TARGET_AVX512BW")
+ (eq_attr "isa" "noavx512bw") (symbol_ref "!TARGET_AVX512BW")
+ (eq_attr "isa" "avx512dq") (symbol_ref "TARGET_AVX512DQ")
+ (eq_attr "isa" "noavx512dq") (symbol_ref "!TARGET_AVX512DQ")
]
(const_int 1)))
@@ -876,6 +881,11 @@
;; Used in signed and unsigned fix.
(define_code_iterator any_fix [fix unsigned_fix])
(define_code_attr fixsuffix [(fix "") (unsigned_fix "u")])
+(define_code_attr ufix_bool [(fix "false") (unsigned_fix "true")])
+
+;; Used in signed and unsigned float.
+(define_code_iterator any_float [float unsigned_float])
+(define_code_attr floatsuffix [(float "") (unsigned_float "u")])
;; All integer modes.
(define_mode_iterator SWI1248x [QI HI SI DI])
@@ -952,6 +962,9 @@
;; Instruction suffix for integer modes.
(define_mode_attr imodesuffix [(QI "b") (HI "w") (SI "l") (DI "q")])
+;; Instruction suffix for masks.
+(define_mode_attr mskmodesuffix [(QI "b") (HI "w") (SI "d") (DI "q")])
+
;; Pointer size prefix for integer modes (Intel asm dialect)
(define_mode_attr iptrsize [(QI "BYTE")
(HI "WORD")
@@ -1048,7 +1061,7 @@
(V4SF "ps") (V2DF "pd")
(V16QI "b") (V8HI "w") (V4SI "d") (V2DI "q")
(V32QI "b") (V16HI "w") (V8SI "d") (V4DI "q")
- (V64QI "b") (V16SI "d") (V8DI "q")])
+ (V64QI "b") (V32HI "w") (V16SI "d") (V8DI "q")])
;; SSE vector suffix for floating point modes
(define_mode_attr ssevecmodesuffix [(SF "ps") (DF "pd")])
@@ -1912,8 +1925,8 @@
(set_attr "mode" "XI")])
(define_insn "*movoi_internal_avx"
- [(set (match_operand:OI 0 "nonimmediate_operand" "=x,x ,m")
- (match_operand:OI 1 "vector_move_operand" "C ,xm,x"))]
+ [(set (match_operand:OI 0 "nonimmediate_operand" "=v,v ,m")
+ (match_operand:OI 1 "vector_move_operand" "C ,vm,v"))]
"TARGET_AVX && !(MEM_P (operands[0]) && MEM_P (operands[1]))"
{
switch (get_attr_type (insn))
@@ -1927,6 +1940,8 @@
{
if (get_attr_mode (insn) == MODE_V8SF)
return "vmovups\t{%1, %0|%0, %1}";
+ else if (get_attr_mode (insn) == MODE_XI)
+ return "vmovdqu32\t{%1, %0|%0, %1}";
else
return "vmovdqu\t{%1, %0|%0, %1}";
}
@@ -1934,6 +1949,8 @@
{
if (get_attr_mode (insn) == MODE_V8SF)
return "vmovaps\t{%1, %0|%0, %1}";
+ else if (get_attr_mode (insn) == MODE_XI)
+ return "vmovdqa32\t{%1, %0|%0, %1}";
else
return "vmovdqa\t{%1, %0|%0, %1}";
}
@@ -1945,7 +1962,10 @@
[(set_attr "type" "sselog1,ssemov,ssemov")
(set_attr "prefix" "vex")
(set (attr "mode")
- (cond [(match_test "TARGET_SSE_PACKED_SINGLE_INSN_OPTIMAL")
+ (cond [(ior (match_operand 0 "ext_sse_reg_operand")
+ (match_operand 1 "ext_sse_reg_operand"))
+ (const_string "XI")
+ (match_test "TARGET_SSE_PACKED_SINGLE_INSN_OPTIMAL")
(const_string "V8SF")
(and (eq_attr "alternative" "2")
(match_test "TARGET_SSE_TYPELESS_STORES"))
@@ -1954,8 +1974,8 @@
(const_string "OI")))])
(define_insn "*movti_internal"
- [(set (match_operand:TI 0 "nonimmediate_operand" "=!r ,o ,x,x ,m")
- (match_operand:TI 1 "general_operand" "riFo,re,C,xm,x"))]
+ [(set (match_operand:TI 0 "nonimmediate_operand" "=!r ,o ,v,v ,m")
+ (match_operand:TI 1 "general_operand" "riFo,re,C,vm,v"))]
"(TARGET_64BIT || TARGET_SSE)
&& !(MEM_P (operands[0]) && MEM_P (operands[1]))"
{
@@ -1975,6 +1995,8 @@
{
if (get_attr_mode (insn) == MODE_V4SF)
return "%vmovups\t{%1, %0|%0, %1}";
+ else if (get_attr_mode (insn) == MODE_XI)
+ return "vmovdqu32\t{%1, %0|%0, %1}";
else
return "%vmovdqu\t{%1, %0|%0, %1}";
}
@@ -1982,6 +2004,8 @@
{
if (get_attr_mode (insn) == MODE_V4SF)
return "%vmovaps\t{%1, %0|%0, %1}";
+ else if (get_attr_mode (insn) == MODE_XI)
+ return "vmovdqa32\t{%1, %0|%0, %1}";
else
return "%vmovdqa\t{%1, %0|%0, %1}";
}
@@ -1997,7 +2021,10 @@
(const_string "maybe_vex")
(const_string "orig")))
(set (attr "mode")
- (cond [(eq_attr "alternative" "0,1")
+ (cond [(ior (match_operand 0 "ext_sse_reg_operand")
+ (match_operand 1 "ext_sse_reg_operand"))
+ (const_string "XI")
+ (eq_attr "alternative" "0,1")
(const_string "DI")
(ior (not (match_test "TARGET_SSE2"))
(match_test "TARGET_SSE_PACKED_SINGLE_INSN_OPTIMAL"))
@@ -2022,13 +2049,16 @@
(define_insn "*movdi_internal"
[(set (match_operand:DI 0 "nonimmediate_operand"
- "=r ,o ,r,r ,r,m ,*y,*y,?*y,?m,?r ,?*Ym,*v,*v,*v,m ,?r ,?r,?*Yi,?*Ym,?*Yi")
+ "=r ,o ,r,r ,r,m ,*y,*y,?*y,?m,?r ,?*Ym,*v,*v,*v,m ,?r ,?r,?*Yi,?*Ym,?*Yi,*k,*k ,*r ,*m")
(match_operand:DI 1 "general_operand"
- "riFo,riF,Z,rem,i,re,C ,*y,m ,*y,*Yn,r ,C ,*v,m ,*v,*Yj,*v,r ,*Yj ,*Yn"))]
+ "riFo,riF,Z,rem,i,re,C ,*y,m ,*y,*Yn,r ,C ,*v,m ,*v,*Yj,*v,r ,*Yj ,*Yn ,*r ,*km,*k,*k"))]
"!(MEM_P (operands[0]) && MEM_P (operands[1]))"
{
switch (get_attr_type (insn))
{
+ case TYPE_MSKMOV:
+ return "kmovq\t{%1, %0|%0, %1}";
+
case TYPE_MULTI:
return "#";
@@ -2099,7 +2129,7 @@
[(set (attr "isa")
(cond [(eq_attr "alternative" "0,1")
(const_string "nox64")
- (eq_attr "alternative" "2,3,4,5,10,11,16,18")
+ (eq_attr "alternative" "2,3,4,5,10,11,16,18,21,23")
(const_string "x64")
(eq_attr "alternative" "17")
(const_string "x64_sse4")
@@ -2118,6 +2148,8 @@
(const_string "ssemov")
(eq_attr "alternative" "19,20")
(const_string "ssecvt")
+ (eq_attr "alternative" "21,22,23,24")
+ (const_string "mskmov")
(match_operand 1 "pic_32bit_operand")
(const_string "lea")
]
@@ -2179,16 +2211,20 @@
[(set (match_operand:DI 0 "nonimmediate_operand")
(match_operand:DI 1 "general_operand"))]
"!TARGET_64BIT && reload_completed
- && !(MMX_REG_P (operands[0]) || SSE_REG_P (operands[0]))
- && !(MMX_REG_P (operands[1]) || SSE_REG_P (operands[1]))"
+ && !(MMX_REG_P (operands[0])
+ || SSE_REG_P (operands[0])
+ || MASK_REG_P (operands[0]))
+ && !(MMX_REG_P (operands[1])
+ || SSE_REG_P (operands[1])
+ || MASK_REG_P (operands[1]))"
[(const_int 0)]
"ix86_split_long_move (operands); DONE;")
(define_insn "*movsi_internal"
[(set (match_operand:SI 0 "nonimmediate_operand"
- "=r,m ,*y,*y,?rm,?*y,*v,*v,*v,m ,?r ,?r,?*Yi")
+ "=r,m ,*y,*y,?rm,?*y,*v,*v,*v,m ,?r ,?r,?*Yi,*k ,*rm")
(match_operand:SI 1 "general_operand"
- "g ,re,C ,*y,*y ,rm ,C ,*v,m ,*v,*Yj,*v,r"))]
+ "g ,re,C ,*y,*y ,rm ,C ,*v,m ,*v,*Yj,*v,r ,*krm,*k"))]
"!(MEM_P (operands[0]) && MEM_P (operands[1]))"
{
switch (get_attr_type (insn))
@@ -2199,6 +2235,9 @@
return standard_sse_constant_opcode (insn, operands[1]);
+ case TYPE_MSKMOV:
+ return "kmovd\t{%1, %0|%0, %1}";
+
case TYPE_SSEMOV:
switch (get_attr_mode (insn))
{
@@ -2262,6 +2301,8 @@
(const_string "sselog1")
(eq_attr "alternative" "7,8,9,10,12")
(const_string "ssemov")
+ (eq_attr "alternative" "13,14")
+ (const_string "mskmov")
(match_operand 1 "pic_32bit_operand")
(const_string "lea")
]
@@ -2410,9 +2451,12 @@
case TYPE_MSKMOV:
switch (which_alternative)
{
- case 7: return "kmovw\t{%k1, %0|%0, %k1}";
- case 8: return "kmovw\t{%1, %0|%0, %1}";
- case 9: return "kmovw\t{%1, %k0|%k0, %1}";
+ case 7: return TARGET_AVX512BW ? "kmovb\t{%k1, %0|%0, %k1}"
+ : "kmovw\t{%k1, %0|%0, %k1}";
+ case 8: return TARGET_AVX512BW ? "kmovb\t{%1, %0|%0, %1}"
+ : "kmovw\t{%1, %0|%0, %1}";
+ case 9: return TARGET_AVX512BW ? "kmovb\t{%1, %k0|%k0, %1}"
+ : "kmovw\t{%1, %k0|%k0, %1}";
default: gcc_unreachable ();
}
@@ -7490,21 +7534,45 @@
})
(define_split
- [(set (match_operand:SWI12 0 "mask_reg_operand")
- (any_logic:SWI12 (match_operand:SWI12 1 "mask_reg_operand")
- (match_operand:SWI12 2 "mask_reg_operand")))
+ [(set (match_operand:SWI1248x 0 "mask_reg_operand")
+ (any_logic:SWI1248x (match_operand:SWI1248x 1 "mask_reg_operand")
+ (match_operand:SWI1248x 2 "mask_reg_operand")))
(clobber (reg:CC FLAGS_REG))]
- "TARGET_AVX512F && reload_completed"
+;;TODO removed avx512f check because mask_reg implies it.
+ "reload_completed"
[(set (match_dup 0)
- (any_logic:SWI12 (match_dup 1)
- (match_dup 2)))])
+ (any_logic:SWI1248x (match_dup 1)
+ (match_dup 2)))])
-(define_insn "*k<logic><mode>"
- [(set (match_operand:SWI12 0 "mask_reg_operand" "=k")
- (any_logic:SWI12 (match_operand:SWI12 1 "mask_reg_operand" "k")
- (match_operand:SWI12 2 "mask_reg_operand" "k")))]
+(define_insn "*k<logic>qi"
+ [(set (match_operand:QI 0 "mask_reg_operand" "=k")
+ (any_logic:QI (match_operand:QI 1 "mask_reg_operand" "k")
+ (match_operand:QI 2 "mask_reg_operand" "k")))]
+ "TARGET_AVX512F"
+{
+ return TARGET_AVX512DQ ? "k<logic>b\t{%2, %1, %0|%0, %1, %2}"
+ : "k<logic>w\t{%2, %1, %0|%0, %1, %2}";
+}
+ [(set_attr "mode" "QI")
+ (set_attr "type" "msklog")
+ (set_attr "prefix" "vex")])
+
+(define_insn "*k<logic>hi"
+ [(set (match_operand:HI 0 "mask_reg_operand" "=k")
+ (any_logic:HI (match_operand:HI 1 "mask_reg_operand" "k")
+ (match_operand:HI 2 "mask_reg_operand" "k")))]
"TARGET_AVX512F"
"k<logic>w\t{%2, %1, %0|%0, %1, %2}";
+ [(set_attr "mode" "HI")
+ (set_attr "type" "msklog")
+ (set_attr "prefix" "vex")])
+
+(define_insn "*k<logic><mode>"
+ [(set (match_operand:SWI48x 0 "mask_reg_operand" "=k")
+ (any_logic:SWI48x (match_operand:SWI48x 1 "mask_reg_operand" "k")
+ (match_operand:SWI48x 2 "mask_reg_operand" "k")))]
+ "TARGET_AVX512BW"
+ "k<logic><mskmodesuffix>\t{%2, %1, %0|%0, %1, %2}";
[(set_attr "mode" "<MODE>")
(set_attr "type" "msklog")
(set_attr "prefix" "vex")])
@@ -7560,10 +7628,10 @@
})
(define_insn "*anddi_1"
- [(set (match_operand:DI 0 "nonimmediate_operand" "=r,rm,r,r")
+ [(set (match_operand:DI 0 "nonimmediate_operand" "=r,rm,r,r,!k")
(and:DI
- (match_operand:DI 1 "nonimmediate_operand" "%0,0,0,qm")
- (match_operand:DI 2 "x86_64_szext_general_operand" "Z,re,rm,L")))
+ (match_operand:DI 1 "nonimmediate_operand" "%0,0,0,qm,k")
+ (match_operand:DI 2 "x86_64_szext_general_operand" "Z,re,rm,L,k")))
(clobber (reg:CC FLAGS_REG))]
"TARGET_64BIT && ix86_binary_operator_ok (AND, DImode, operands)"
{
@@ -7572,6 +7640,9 @@
case TYPE_IMOVX:
return "#";
+ case TYPE_MSKLOG:
+ return "kandq\t{%2, %1, %0|%0, %1, %2}";
+
default:
gcc_assert (rtx_equal_p (operands[0], operands[1]));
if (get_attr_mode (insn) == MODE_SI)
@@ -7580,8 +7651,8 @@
return "and{q}\t{%2, %0|%0, %2}";
}
}
- [(set_attr "type" "alu,alu,alu,imovx")
- (set_attr "length_immediate" "*,*,*,0")
+ [(set_attr "type" "alu,alu,alu,imovx,msklog")
+ (set_attr "length_immediate" "*,*,*,0,0")
(set (attr "prefix_rex")
(if_then_else
(and (eq_attr "type" "imovx")
@@ -7589,12 +7660,12 @@
(match_operand 1 "ext_QIreg_operand")))
(const_string "1")
(const_string "*")))
- (set_attr "mode" "SI,DI,DI,SI")])
+ (set_attr "mode" "SI,DI,DI,SI,DI")])
(define_insn "*andsi_1"
- [(set (match_operand:SI 0 "nonimmediate_operand" "=rm,r,Ya")
- (and:SI (match_operand:SI 1 "nonimmediate_operand" "%0,0,qm")
- (match_operand:SI 2 "x86_64_general_operand" "re,rm,L")))
+ [(set (match_operand:SI 0 "nonimmediate_operand" "=rm,r,Ya,!k")
+ (and:SI (match_operand:SI 1 "nonimmediate_operand" "%0,0,qm,k")
+ (match_operand:SI 2 "x86_64_general_operand" "re,rm,L,k")))
(clobber (reg:CC FLAGS_REG))]
"ix86_binary_operator_ok (AND, SImode, operands)"
{
@@ -7603,12 +7674,15 @@
case TYPE_IMOVX:
return "#";
+ case TYPE_MSKLOG:
+ return "kandd\t{%2, %1, %0|%0, %1, %2}";
+
default:
gcc_assert (rtx_equal_p (operands[0], operands[1]));
return "and{l}\t{%2, %0|%0, %2}";
}
}
- [(set_attr "type" "alu,alu,imovx")
+ [(set_attr "type" "alu,alu,imovx,msklog")
(set (attr "prefix_rex")
(if_then_else
(and (eq_attr "type" "imovx")
@@ -7616,7 +7690,7 @@
(match_operand 1 "ext_QIreg_operand")))
(const_string "1")
(const_string "*")))
- (set_attr "length_immediate" "*,*,0")
+ (set_attr "length_immediate" "*,*,0,0")
(set_attr "mode" "SI")])
;; See comment for addsi_1_zext why we do use nonimmediate_operand
@@ -7668,11 +7742,21 @@
(match_operand:QI 2 "general_operand" "qn,qmn,rn,k")))
(clobber (reg:CC FLAGS_REG))]
"ix86_binary_operator_ok (AND, QImode, operands)"
- "@
- and{b}\t{%2, %0|%0, %2}
- and{b}\t{%2, %0|%0, %2}
- and{l}\t{%k2, %k0|%k0, %k2}
- kandw\t{%2, %1, %0|%0, %1, %2}"
+{
+ switch (which_alternative)
+ {
+ case 0:
+ case 1:
+ return "and{b}\t{%2, %0|%0, %2}";
+ case 2:
+ return "and{l}\t{%k2, %k0|%k0, %k2}";
+ case 3:
+ return TARGET_AVX512DQ ? "kandb\t{%2, %1, %0|%0, %1, %2}"
+ : "kandw\t{%2, %1, %0|%0, %1, %2}";
+ default:
+ gcc_unreachable ();
+ }
+}
[(set_attr "type" "alu,alu,alu,msklog")
(set_attr "mode" "QI,QI,SI,HI")])
@@ -7695,10 +7779,22 @@
(match_operand:SWI12 2 "register_operand" "r,r,k")))
(clobber (reg:CC FLAGS_REG))]
"TARGET_AVX512F"
- "@
- andn\t{%k2, %k1, %k0|%k0, %k1, %k2}
- #
- kandnw\t{%2, %1, %0|%0, %1, %2}"
+{
+ switch (which_alternative)
+ {
+ case 0:
+ return "andn\t{%k2, %k1, %k0|%k0, %k1, %k2}";
+ case 1:
+ return "#";
+ case 2:
+ if (TARGET_AVX512DQ && <MODE>mode == QImode)
+ return "kandnb\t{%2, %1, %0|%0, %1, %2}";
+ else
+ return "kandnw\t{%2, %1, %0|%0, %1, %2}";
+ default:
+ gcc_unreachable ();
+ }
+}
[(set_attr "isa" "bmi,*,avx512f")
(set_attr "type" "bitmanip,*,msklog")
(set_attr "prefix" "*,*,vex")
@@ -8062,14 +8158,17 @@
"ix86_expand_binary_operator (<CODE>, <MODE>mode, operands); DONE;")
(define_insn "*<code><mode>_1"
- [(set (match_operand:SWI48 0 "nonimmediate_operand" "=r,rm")
+ [(set (match_operand:SWI48 0 "nonimmediate_operand" "=r,rm,k")
(any_or:SWI48
- (match_operand:SWI48 1 "nonimmediate_operand" "%0,0")
- (match_operand:SWI48 2 "<general_operand>" "<g>,r<i>")))
+ (match_operand:SWI48 1 "nonimmediate_operand" "%0,0,k")
+ (match_operand:SWI48 2 "<general_operand>" "<g>,r<i>,k")))
(clobber (reg:CC FLAGS_REG))]
"ix86_binary_operator_ok (<CODE>, <MODE>mode, operands)"
- "<logic>{<imodesuffix>}\t{%2, %0|%0, %2}"
- [(set_attr "type" "alu")
+ "@
+ <logic>{<imodesuffix>}\t{%2, %0|%0, %2}
+ <logic>{<imodesuffix>}\t{%2, %0|%0, %2}
+ k<logic><mskmodesuffix>\t{%2, %1, %0|%0, %1, %2}"
+ [(set_attr "type" "alu,alu,msklog")
(set_attr "mode" "<MODE>")])
(define_insn "*<code>hi_1"
@@ -8157,19 +8256,36 @@
(match_operand:SWI12 2 "register_operand" "r,k"))))
(clobber (reg:CC FLAGS_REG))]
"TARGET_AVX512F"
+{
+ if (which_alternative == 1 && <MODE>mode == QImode && TARGET_AVX512DQ)
+ return "kxnorb\t{%2, %1, %0|%0, %1, %2}";
+ return "kxnorw\t{%2, %1, %0|%0, %1, %2}";
+}
+ [(set_attr "type" "*,msklog")
+ (set_attr "prefix" "*,vex")
+ (set_attr "mode" "<MODE>")])
+
+(define_insn "kxnor<mode>"
+ [(set (match_operand:SWI48x 0 "register_operand" "=r,!k")
+ (not:SWI48x
+ (xor:SWI48x
+ (match_operand:SWI48x 1 "register_operand" "0,k")
+ (match_operand:SWI48x 2 "register_operand" "r,k"))))
+ (clobber (reg:CC FLAGS_REG))]
+ "TARGET_AVX512BW"
"@
#
- kxnorw\t{%2, %1, %0|%0, %1, %2}"
+ kxnor<mskmodesuffix>\t{%2, %1, %0|%0, %1, %2}"
[(set_attr "type" "*,msklog")
(set_attr "prefix" "*,vex")
(set_attr "mode" "<MODE>")])
(define_split
- [(set (match_operand:SWI12 0 "general_reg_operand")
- (not:SWI12
- (xor:SWI12
+ [(set (match_operand:SWI1248x 0 "general_reg_operand")
+ (not:SWI1248x
+ (xor:SWI1248x
(match_dup 0)
- (match_operand:SWI12 1 "general_reg_operand"))))
+ (match_operand:SWI1248x 1 "general_reg_operand"))))
(clobber (reg:CC FLAGS_REG))]
"TARGET_AVX512F && reload_completed"
[(parallel [(set (match_dup 0)
@@ -8179,6 +8295,8 @@
(set (match_dup 0)
(not:HI (match_dup 0)))])
+;;There are kortrest[bdq] but no intrinsics for them.
+;;We probably don't need to implement them.
(define_insn "kortestzhi"
[(set (reg:CCZ FLAGS_REG)
(compare:CCZ
@@ -8218,6 +8336,28 @@
(set_attr "type" "msklog")
(set_attr "prefix" "vex")])
+(define_insn "kunpcksi"
+ [(set (match_operand:SI 0 "register_operand" "=k")
+ (ior:SI
+ (ashift:SI
+ (match_operand:SI 1 "register_operand" "k")
+ (const_int 16))
+ (zero_extend:SI (subreg:HI (match_operand:SI 2 "register_operand" "k") 0))))]
+ "TARGET_AVX512BW"
+ "kunpckwd\t{%2, %1, %0|%0, %1, %2}"
+ [(set_attr "mode" "SI")])
+
+(define_insn "kunpckdi"
+ [(set (match_operand:DI 0 "register_operand" "=k")
+ (ior:DI
+ (ashift:DI
+ (match_operand:DI 1 "register_operand" "k")
+ (const_int 32))
+ (zero_extend:DI (subreg:SI (match_operand:DI 2 "register_operand" "k") 0))))]
+ "TARGET_AVX512BW"
+ "kunpckdq\t{%2, %1, %0|%0, %1, %2}"
+ [(set_attr "mode" "DI")])
+
;; See comment for addsi_1_zext why we do use nonimmediate_operand
;; ??? Special case for immediate operand is missing - it is tricky.
(define_insn "*<code>si_2_zext"
@@ -8817,11 +8957,15 @@
"ix86_expand_unary_operator (NOT, <MODE>mode, operands); DONE;")
(define_insn "*one_cmpl<mode>2_1"
- [(set (match_operand:SWI48 0 "nonimmediate_operand" "=rm")
- (not:SWI48 (match_operand:SWI48 1 "nonimmediate_operand" "0")))]
+ [(set (match_operand:SWI48 0 "nonimmediate_operand" "=rm,k")
+ (not:SWI48 (match_operand:SWI48 1 "nonimmediate_operand" "0,k")))]
"ix86_unary_operator_ok (NOT, <MODE>mode, operands)"
- "not{<imodesuffix>}\t%0"
- [(set_attr "type" "negnot")
+ "@
+ not{<imodesuffix>}\t%0
+ knot<mskmodesuffix>\t{%1, %0|%0, %1}"
+ [(set_attr "isa" "*,avx512bw")
+ (set_attr "type" "negnot,msklog")
+ (set_attr "prefix" "*,vex")
(set_attr "mode" "<MODE>")])
(define_insn "*one_cmplhi2_1"
@@ -8841,10 +8985,21 @@
[(set (match_operand:QI 0 "nonimmediate_operand" "=qm,r,!k")
(not:QI (match_operand:QI 1 "nonimmediate_operand" "0,0,k")))]
"ix86_unary_operator_ok (NOT, QImode, operands)"
- "@
- not{b}\t%0
- not{l}\t%k0
- knotw\t{%1, %0|%0, %1}"
+{
+ switch (which_alternative)
+ {
+ case 0:
+ return "not{b}\t%0";
+ case 1:
+ return "not{l}\t%k0";
+ case 2:
+ if (TARGET_AVX512DQ)
+ return "knotb\t{%1, %0|%0, %1}";
+ return "knotw\t{%1, %0|%0, %1}";
+ default:
+ gcc_unreachable ();
+ }
+}
[(set_attr "isa" "*,*,avx512f")
(set_attr "type" "negnot,negnot,msklog")
(set_attr "prefix" "*,*,vex")
diff --git a/gcc/config/i386/i386.opt b/gcc/config/i386/i386.opt
index dc1302c5a01..9208b766030 100644
--- a/gcc/config/i386/i386.opt
+++ b/gcc/config/i386/i386.opt
@@ -641,6 +641,18 @@ mavx512cd
Target Report Mask(ISA_AVX512CD) Var(ix86_isa_flags) Save
Support MMX, SSE, SSE2, SSE3, SSSE3, SSE4.1, SSE4.2, AVX, AVX2 and AVX512F and AVX512CD built-in functions and code generation
+mavx512dq
+Target Report Mask(ISA_AVX512DQ) Var(ix86_isa_flags) Save
+Support MMX, SSE, SSE2, SSE3, SSSE3, SSE4.1, SSE4.2, AVX, AVX2 and AVX512F and AVX512DQ built-in functions and code generation
+
+mavx512bw
+Target Report Mask(ISA_AVX512BW) Var(ix86_isa_flags) Save
+Support MMX, SSE, SSE2, SSE3, SSSE3, SSE4.1, SSE4.2, AVX, AVX2 and AVX512F and AVX512BW built-in functions and code generation
+
+mavx512vl
+Target Report Mask(ISA_AVX512VL) Var(ix86_isa_flags) Save
+Support MMX, SSE, SSE2, SSE3, SSSE3, SSE4.1, SSE4.2, AVX, AVX2 and AVX512F and AVX512VL built-in functions and code generation
+
mfma
Target Report Mask(ISA_FMA) Var(ix86_isa_flags) Save
Support MMX, SSE, SSE2, SSE3, SSSE3, SSE4.1, SSE4.2, AVX and FMA built-in functions and code generation
diff --git a/gcc/config/i386/immintrin.h b/gcc/config/i386/immintrin.h
index 73b48599277..5d921822248 100644
--- a/gcc/config/i386/immintrin.h
+++ b/gcc/config/i386/immintrin.h
@@ -50,6 +50,16 @@
#include <avx512cdintrin.h>
+#include <avx512vlintrin.h>
+
+#include <avx512bwintrin.h>
+
+#include <avx512dqintrin.h>
+
+#include <avx512vlbwintrin.h>
+
+#include <avx512vldqintrin.h>
+
#include <shaintrin.h>
#include <lzcntintrin.h>
diff --git a/gcc/config/i386/sse.md b/gcc/config/i386/sse.md
index f667362b6f7..a0736e812f5 100644
--- a/gcc/config/i386/sse.md
+++ b/gcc/config/i386/sse.md
@@ -128,6 +128,21 @@
UNSPEC_SHA256MSG1
UNSPEC_SHA256MSG2
UNSPEC_SHA256RNDS2
+
+ ;; For AVX512BW support
+ UNSPEC_DBPSADBW
+ UNSPEC_PMADDUBSW512
+ UNSPEC_PMADDWD512
+ UNSPEC_PSHUFHW
+ UNSPEC_PSHUFLW
+ UNSPEC_CVTINT2MASK
+
+ ;; For AVX512DQ support
+ UNSPEC_REDUCE
+ UNSPEC_FPCLASS
+ UNSPEC_FPCLASS_SCALAR
+ UNSPEC_RANGE
+ UNSPEC_RANGE_SCALAR
])
(define_c_enum "unspecv" [
@@ -146,10 +161,21 @@
(V32HI "TARGET_AVX512F") (V16HI "TARGET_AVX") V8HI
(V16SI "TARGET_AVX512F") (V8SI "TARGET_AVX") V4SI
(V8DI "TARGET_AVX512F") (V4DI "TARGET_AVX") V2DI
- (V2TI "TARGET_AVX") V1TI
+ (V4TI "TARGET_AVX") (V2TI "TARGET_AVX") V1TI
(V16SF "TARGET_AVX512F") (V8SF "TARGET_AVX") V4SF
(V8DF "TARGET_AVX512F") (V4DF "TARGET_AVX") V2DF])
+;; All AVX512VL vector modes
+(define_mode_iterator V_AVX512VL
+ [(V64QI "TARGET_AVX512BW") (V32QI "TARGET_AVX512VL && TARGET_AVX512BW")
+ (V16QI "TARGET_AVX512VL && TARGET_AVX512BW")
+ (V32HI "TARGET_AVX512BW") (V16HI "TARGET_AVX512VL && TARGET_AVX512BW")
+ (V8HI "TARGET_AVX512VL && TARGET_AVX512BW")
+ (V16SI "TARGET_AVX512F") (V8SI "TARGET_AVX512VL") (V4SI "TARGET_AVX512VL")
+ (V8DI "TARGET_AVX512F") (V4DI "TARGET_AVX512VL") (V2DI "TARGET_AVX512VL")
+ (V16SF "TARGET_AVX512F") (V8SF "TARGET_AVX512VL") (V4SF "TARGET_AVX512VL")
+ (V8DF "TARGET_AVX512F") (V4DF "TARGET_AVX512VL") (V2DF "TARGET_AVX512VL")])
+
;; All vector modes
(define_mode_iterator V
[(V32QI "TARGET_AVX") V16QI
@@ -194,6 +220,9 @@
(define_mode_iterator VF1_128_256
[(V8SF "TARGET_AVX") V4SF])
+(define_mode_iterator VF1_128_256VL
+ [V8SF (V4SF "TARGET_AVX512VL")])
+
;; All DFmode vector float modes
(define_mode_iterator VF2
[(V8DF "TARGET_AVX512F") (V4DF "TARGET_AVX") V2DF])
@@ -205,6 +234,9 @@
(define_mode_iterator VF2_512_256
[(V8DF "TARGET_AVX512F") (V4DF "TARGET_AVX")])
+(define_mode_iterator VF2_512_256VL
+ [V8DF (V4DF "TARGET_AVX512VL")])
+
;; All 128bit vector float modes
(define_mode_iterator VF_128
[V4SF (V2DF "TARGET_SSE2")])
@@ -217,17 +249,33 @@
(define_mode_iterator VF_512
[V16SF V8DF])
+(define_mode_iterator VI_AVX512VL
+ [(V64QI "TARGET_AVX512BW") (V32QI "TARGET_AVX512VL") (V16QI "TARGET_AVX512VL")
+ (V32HI "TARGET_AVX512BW") (V16HI "TARGET_AVX512VL") (V8HI "TARGET_AVX512VL")
+ V16SI (V8SI "TARGET_AVX512VL") (V4SI "TARGET_AVX512VL")
+ V8DI (V4DI "TARGET_AVX512VL") (V2DI "TARGET_AVX512VL")])
+
+(define_mode_iterator VF_AVX512VL
+ [V16SF (V8SF "TARGET_AVX512VL") (V4SF "TARGET_AVX512VL")
+ V8DF (V4DF "TARGET_AVX512VL") (V2DF "TARGET_AVX512VL")])
+
+(define_mode_iterator VF2_AVX512VL
+ [V8DF (V4DF "TARGET_AVX512VL") (V2DF "TARGET_AVX512VL")])
+
+(define_mode_iterator VF1_AVX512VL
+ [V16SF (V8SF "TARGET_AVX512VL") (V4SF "TARGET_AVX512VL")])
+
;; All vector integer modes
(define_mode_iterator VI
[(V16SI "TARGET_AVX512F") (V8DI "TARGET_AVX512F")
- (V32QI "TARGET_AVX") V16QI
- (V16HI "TARGET_AVX") V8HI
+ (V64QI "TARGET_AVX512BW") (V32QI "TARGET_AVX") V16QI
+ (V32HI "TARGET_AVX512BW") (V16HI "TARGET_AVX") V8HI
(V8SI "TARGET_AVX") V4SI
(V4DI "TARGET_AVX") V2DI])
(define_mode_iterator VI_AVX2
- [(V32QI "TARGET_AVX2") V16QI
- (V16HI "TARGET_AVX2") V8HI
+ [(V64QI "TARGET_AVX512BW") (V32QI "TARGET_AVX2") V16QI
+ (V32HI "TARGET_AVX512BW") (V16HI "TARGET_AVX2") V8HI
(V16SI "TARGET_AVX512F") (V8SI "TARGET_AVX2") V4SI
(V8DI "TARGET_AVX512F") (V4DI "TARGET_AVX2") V2DI])
@@ -236,18 +284,33 @@
[(V32QI "TARGET_AVX") V16QI])
(define_mode_iterator VI_UNALIGNED_LOADSTORE
- [(V32QI "TARGET_AVX") V16QI
- (V16SI "TARGET_AVX512F") (V8DI "TARGET_AVX512F")])
+ [(V64QI "TARGET_AVX512BW") (V32QI "TARGET_AVX") V16QI
+ (V32HI "TARGET_AVX512BW")
+ (V16HI "TARGET_AVX512BW && TARGET_AVX512VL")
+ (V8HI "TARGET_AVX512BW && TARGET_AVX512VL")
+ (V16SI "TARGET_AVX512F") (V8SI "TARGET_AVX512VL") (V4SI "TARGET_AVX512VL")
+ (V8DI "TARGET_AVX512F") (V4DI "TARGET_AVX512VL") (V2DI "TARGET_AVX512VL")])
;; All DImode vector integer modes
(define_mode_iterator VI8
[(V8DI "TARGET_AVX512F") (V4DI "TARGET_AVX") V2DI])
+(define_mode_iterator VI8_AVX512VL
+ [V8DI (V4DI "TARGET_AVX512VL") (V2DI "TARGET_AVX512VL")])
+
+(define_mode_iterator VI8_256_512
+ [V8DI (V4DI "TARGET_AVX512VL")])
+
+(define_mode_iterator VI128_256
+ [(V4DI "TARGET_AVX512VL") (V2DI "TARGET_AVX512VL") (V4SI "TARGET_AVX512VL")
+ (V16QI "TARGET_AVX512VL && TARGET_AVX512BW")
+ (V8HI "TARGET_AVX512VL && TARGET_AVX512BW")])
+
(define_mode_iterator VI1_AVX2
- [(V32QI "TARGET_AVX2") V16QI])
+ [(V64QI "TARGET_AVX512BW") (V32QI "TARGET_AVX2") V16QI])
(define_mode_iterator VI2_AVX2
- [(V16HI "TARGET_AVX2") V8HI])
+ [(V32HI "TARGET_AVX512BW") (V16HI "TARGET_AVX2") V8HI])
(define_mode_iterator VI2_AVX512F
[(V32HI "TARGET_AVX512F") (V16HI "TARGET_AVX2") V8HI])
@@ -261,9 +324,20 @@
(define_mode_iterator VI4_AVX512F
[(V16SI "TARGET_AVX512F") (V8SI "TARGET_AVX2") V4SI])
-(define_mode_iterator VI48_AVX512F
- [(V16SI "TARGET_AVX512F") (V8SI "TARGET_AVX2") V4SI
- (V8DI "TARGET_AVX512F")])
+(define_mode_iterator VI4_AVX512VL
+ [V16SI (V8SI "TARGET_AVX512VL") (V4SI "TARGET_AVX512VL")])
+
+(define_mode_iterator VI248_AVX512
+ [(V16SI "TARGET_AVX512F") (V8SI "TARGET_AVX2") (V4SI "TARGET_AVX2")
+ (V32HI "TARGET_AVX512BW") (V16HI "TARGET_AVX512VL") (V8HI "TARGET_AVX512VL")
+ (V8DI "TARGET_AVX512F") (V4DI "TARGET_AVX512VL") (V2DI "TARGET_AVX512VL")])
+
+(define_mode_iterator VI48_AVX512VL
+ [(V16SI "TARGET_AVX512F") (V8SI "TARGET_AVX512VL") (V4SI "TARGET_AVX512VL")
+ (V8DI "TARGET_AVX512F") (V4DI "TARGET_AVX512VL") (V2DI "TARGET_AVX512VL")])
+
+(define_mode_iterator VI8_AVX2_AVX512BW
+ [(V8DI "TARGET_AVX512BW") (V4DI "TARGET_AVX2") V2DI])
(define_mode_iterator VI8_AVX2
[(V4DI "TARGET_AVX2") V2DI])
@@ -271,6 +345,12 @@
(define_mode_iterator VI8_AVX2_AVX512F
[(V8DI "TARGET_AVX512F") (V4DI "TARGET_AVX2") V2DI])
+(define_mode_iterator VI4_128_8_256
+ [V4SI V4DI])
+
+(define_mode_iterator VI2_128_4_256
+ [V8HI V8SI])
+
;; All V8D* modes
(define_mode_iterator V8FI
[V8DF V8DI])
@@ -281,26 +361,20 @@
;; ??? We should probably use TImode instead.
(define_mode_iterator VIMAX_AVX2
- [(V2TI "TARGET_AVX2") V1TI])
+ [(V4TI "TARGET_AVX512BW") (V2TI "TARGET_AVX2") V1TI])
;; ??? This should probably be dropped in favor of VIMAX_AVX2.
(define_mode_iterator SSESCALARMODE
- [(V2TI "TARGET_AVX2") TI])
+ [(V4TI "TARGET_AVX512BW") (V2TI "TARGET_AVX2") TI])
(define_mode_iterator VI12_AVX2
- [(V32QI "TARGET_AVX2") V16QI
- (V16HI "TARGET_AVX2") V8HI])
+ [(V64QI "TARGET_AVX512BW") (V32QI "TARGET_AVX2") V16QI
+ (V32HI "TARGET_AVX512BW") (V16HI "TARGET_AVX2") V8HI])
(define_mode_iterator VI24_AVX2
- [(V16HI "TARGET_AVX2") V8HI
+ [(V32HI "TARGET_AVX512BW") (V16HI "TARGET_AVX2") V8HI
(V8SI "TARGET_AVX2") V4SI])
-(define_mode_iterator VI124_AVX2_48_AVX512F
- [(V32QI "TARGET_AVX2") V16QI
- (V16HI "TARGET_AVX2") V8HI
- (V16SI "TARGET_AVX512F") (V8SI "TARGET_AVX2") V4SI
- (V8DI "TARGET_AVX512F")])
-
(define_mode_iterator VI124_AVX512F
[(V32QI "TARGET_AVX2") V16QI
(V32HI "TARGET_AVX512F") (V16HI "TARGET_AVX2") V8HI
@@ -312,7 +386,7 @@
(V8SI "TARGET_AVX2") V4SI])
(define_mode_iterator VI248_AVX2
- [(V16HI "TARGET_AVX2") V8HI
+ [(V32HI "TARGET_AVX512BW") (V16HI "TARGET_AVX2") V8HI
(V8SI "TARGET_AVX2") V4SI
(V4DI "TARGET_AVX2") V2DI])
@@ -321,9 +395,10 @@
(V8SI "TARGET_AVX2") V4SI
(V8DI "TARGET_AVX512F") (V4DI "TARGET_AVX2") V2DI])
-(define_mode_iterator VI48_AVX2_48_AVX512F
- [(V16SI "TARGET_AVX512F") (V8SI "TARGET_AVX2") V4SI
- (V8DI "TARGET_AVX512F") (V4DI "TARGET_AVX2") V2DI])
+(define_mode_iterator VI248_AVX512BW
+ [(V16SI "TARGET_AVX512F") (V8SI "TARGET_AVX2") (V4SI "TARGET_AVX2")
+ (V32HI "TARGET_AVX512BW") (V16HI "TARGET_AVX512VL") (V8HI "TARGET_AVX512VL")
+ (V8DI "TARGET_AVX512F") (V4DI "TARGET_AVX2") (V2DI "TARGET_AVX2")])
(define_mode_iterator V48_AVX2
[V4SF V2DF
@@ -331,32 +406,41 @@
(V4SI "TARGET_AVX2") (V2DI "TARGET_AVX2")
(V8SI "TARGET_AVX2") (V4DI "TARGET_AVX2")])
+(define_mode_attr avx512
+ [(V16QI "avx512vl") (V32QI "avx512vl") (V64QI "avx512bw")
+ (V8HI "avx512vl") (V16HI "avx512vl") (V32HI "avx512bw")
+ (V4SI "avx512vl") (V8SI "avx512vl") (V16SI "avx512f")
+ (V2DI "avx512vl") (V4DI "avx512vl") (V8DI "avx512f")
+ (V4SF "avx512vl") (V8SF "avx512vl") (V16SF "avx512f")
+ (V2DF "avx512vl") (V4DF "avx512vl") (V8DF "avx512f")])
+
(define_mode_attr sse2_avx_avx512f
[(V16QI "sse2") (V32QI "avx") (V64QI "avx512f")
+ (V8HI "avx512vl") (V16HI "avx512vl") (V32HI "avx512bw")
(V4SI "sse2") (V8SI "avx") (V16SI "avx512f")
- (V8DI "avx512f")
+ (V2DI "avx512vl") (V4DI "avx512vl") (V8DI "avx512f")
(V16SF "avx512f") (V8SF "avx") (V4SF "avx")
(V8DF "avx512f") (V4DF "avx") (V2DF "avx")])
(define_mode_attr sse2_avx2
- [(V16QI "sse2") (V32QI "avx2")
- (V8HI "sse2") (V16HI "avx2")
+ [(V16QI "sse2") (V32QI "avx2") (V64QI "avx512bw")
+ (V8HI "sse2") (V16HI "avx2") (V32HI "avx512bw")
(V4SI "sse2") (V8SI "avx2") (V16SI "avx512f")
(V2DI "sse2") (V4DI "avx2") (V8DI "avx512f")
- (V1TI "sse2") (V2TI "avx2")])
+ (V1TI "sse2") (V2TI "avx2") (V4TI "avx512bw")])
(define_mode_attr ssse3_avx2
- [(V16QI "ssse3") (V32QI "avx2")
- (V4HI "ssse3") (V8HI "ssse3") (V16HI "avx2")
+ [(V16QI "ssse3") (V32QI "avx2") (V64QI "avx512bw")
+ (V4HI "ssse3") (V8HI "ssse3") (V16HI "avx2") (V32HI "avx512bw")
(V4SI "ssse3") (V8SI "avx2")
(V2DI "ssse3") (V4DI "avx2")
- (TI "ssse3") (V2TI "avx2")])
+ (TI "ssse3") (V2TI "avx2") (V4TI "avx512bw")])
(define_mode_attr sse4_1_avx2
- [(V16QI "sse4_1") (V32QI "avx2")
- (V8HI "sse4_1") (V16HI "avx2")
+ [(V16QI "sse4_1") (V32QI "avx2") (V64QI "avx512bw")
+ (V8HI "sse4_1") (V16HI "avx2") (V32HI "avx512bw")
(V4SI "sse4_1") (V8SI "avx2") (V16SI "avx512f")
- (V2DI "sse4_1") (V4DI "avx2") (V8DI "avx512f")])
+ (V2DI "sse4_1") (V4DI "avx2") (V8DI "avx512dq")])
(define_mode_attr avx_avx2
[(V4SF "avx") (V2DF "avx")
@@ -370,11 +454,12 @@
(V4SI "vec") (V8SI "avx2")
(V2DI "vec") (V4DI "avx2")])
-(define_mode_attr avx2_avx512f
+(define_mode_attr avx2_avx512bw
[(V4SI "avx2") (V8SI "avx2") (V16SI "avx512f")
(V2DI "avx2") (V4DI "avx2") (V8DI "avx512f")
- (V8SF "avx2") (V16SF "avx512f")
- (V4DF "avx2") (V8DF "avx512f")])
+ (V4SF "avx2") (V8SF "avx2") (V16SF "avx512f")
+ (V2DF "avx2") (V4DF "avx2") (V8DF "avx512f")
+ (V8HI "avx512vl") (V16HI "avx512vl") (V32HI "avx512bw")])
(define_mode_attr shuffletype
[(V16SF "f") (V16SI "i") (V8DF "f") (V8DI "i")
@@ -386,13 +471,19 @@
(define_mode_attr ssequartermode
[(V16SF "V4SF") (V8DF "V2DF") (V16SI "V4SI") (V8DI "V2DI")])
+(define_mode_attr ssedoublemodelower
+ [(V16QI "v16hi") (V32QI "v32hi") (V64QI "v64hi")
+ (V8HI "v8si") (V16HI "v16si") (V32HI "v32si")
+ (V4SI "v4di") (V8SI "v8di") (V16SI "v16di")])
+
(define_mode_attr ssedoublemode
[(V16SF "V32SF") (V16SI "V32SI") (V8DI "V16DI") (V8DF "V16DF")
- (V16HI "V16SI") (V8HI "V8SI") (V4HI "V4SI")
- (V32QI "V32HI") (V16QI "V16HI")])
+ (V8SF "V16SF") (V8SI "V16SI") (V4DI "V8DI") (V4DF "V8DF")
+ (V16HI "V16SI") (V8HI "V8SI") (V4HI "V4SI") (V4SI "V4DI")
+ (V32HI "V32SI") (V32QI "V32HI") (V16QI "V16HI") (V64QI "V64HI")])
(define_mode_attr ssebytemode
- [(V4DI "V32QI") (V2DI "V16QI")])
+ [(V8DI "V64QI") (V4DI "V32QI") (V2DI "V16QI")])
;; All 128bit vector integer modes
(define_mode_iterator VI_128 [V16QI V8HI V4SI V2DI])
@@ -413,11 +504,15 @@
(define_mode_iterator VI48_128 [V4SI V2DI])
;; Various 256bit and 512 vector integer mode combinations
-(define_mode_iterator VI124_256_48_512
- [V32QI V16HI V8SI (V8DI "TARGET_AVX512F") (V16SI "TARGET_AVX512F")])
+(define_mode_iterator VI124_256_1248_512
+ [V32QI V16HI V8SI (V8DI "TARGET_AVX512F")
+ (V16SI "TARGET_AVX512F") (V64QI "TARGET_AVX512BW")
+ (V32HI "TARGET_AVX512BW")])
(define_mode_iterator VI48_256 [V8SI V4DI])
(define_mode_iterator VI48_512 [V16SI V8DI])
(define_mode_iterator VI4_256_8_512 [V8SI V8DI])
+(define_mode_iterator VI512_48F_12BW
+ [V16SI V8DI (V64QI "TARGET_AVX512BW") (V32HI "TARGET_AVX512BW")])
;; Int-float size matches
(define_mode_iterator VI4F_128 [V4SI V4SF])
@@ -426,11 +521,34 @@
(define_mode_iterator VI8F_256 [V4DI V4DF])
(define_mode_iterator VI8F_256_512
[V4DI V4DF (V8DI "TARGET_AVX512F") (V8DF "TARGET_AVX512F")])
-(define_mode_iterator VI48F_256_512
+(define_mode_iterator VI48F_256_512_2I
[V8SI V8SF
+ (V32HI "TARGET_AVX512BW")
+ (V16HI "TARGET_AVX512BW && TARGET_AVX512VL")
+ (V8HI "TARGET_AVX512BW && TARGET_AVX512VL")
+ (V16SI "TARGET_AVX512F") (V16SF "TARGET_AVX512F")
+ (V8DI "TARGET_AVX512F") (V8DF "TARGET_AVX512F")
+ (V4DF "TARGET_AVX512VL") (V4DI "TARGET_AVX512VL")])
+(define_mode_iterator VI248F
+ [(V8SI "TARGET_AVX512VL") (V8SF "TARGET_AVX512VL")
+ (V4SF "TARGET_AVX512VL") (V4SI "TARGET_AVX512VL")
+ (V4DI "TARGET_AVX512VL") (V4DF "TARGET_AVX512VL")
+ (V2DF "TARGET_AVX512VL") (V2DI "TARGET_AVX512VL")
+ (V32HI "TARGET_AVX512BW")
+ (V16HI "TARGET_AVX512BW && TARGET_AVX512VL")
+ (V8HI "TARGET_AVX512BW && TARGET_AVX512VL")
(V16SI "TARGET_AVX512F") (V16SF "TARGET_AVX512F")
(V8DI "TARGET_AVX512F") (V8DF "TARGET_AVX512F")])
-(define_mode_iterator VI48F_512 [V16SI V16SF V8DI V8DF])
+(define_mode_iterator VI48F_I12B_512
+ [V16SI V16SF V8DI V8DF
+ (V32HI "TARGET_AVX512BW") (V64QI "TARGET_AVX512BW")])
+(define_mode_iterator VI48F
+ [V16SI V16SF V8DI V8DF
+ (V8SI "TARGET_AVX512VL") (V8SF "TARGET_AVX512VL")
+ (V4DI "TARGET_AVX512VL") (V4DF "TARGET_AVX512VL")
+ (V4SI "TARGET_AVX512VL") (V4SF "TARGET_AVX512VL")
+ (V2DI "TARGET_AVX512VL") (V2DF "TARGET_AVX512VL")])
+(define_mode_iterator VI48F_256 [V8SI V8SF V4DI V4DF])
;; Mapping from float mode to required SSE level
(define_mode_attr sse
@@ -461,7 +579,7 @@
;; SSE instruction mode
(define_mode_attr sseinsnmode
- [(V64QI "XI") (V32HI "XI") (V16SI "XI") (V8DI "XI")
+ [(V64QI "XI") (V32HI "XI") (V16SI "XI") (V8DI "XI") (V4TI "XI")
(V32QI "OI") (V16HI "OI") (V8SI "OI") (V4DI "OI") (V2TI "OI")
(V16QI "TI") (V8HI "TI") (V4SI "TI") (V2DI "TI") (V1TI "TI")
(V16SF "V16SF") (V8DF "V8DF")
@@ -471,8 +589,8 @@
;; Mapping of vector modes to corresponding mask size
(define_mode_attr avx512fmaskmode
- [(V16QI "HI")
- (V16HI "HI") (V8HI "QI")
+ [(V64QI "DI") (V32QI "SI") (V16QI "HI")
+ (V32HI "SI") (V16HI "HI") (V8HI "QI") (V4HI "QI")
(V16SI "HI") (V8SI "QI") (V4SI "QI")
(V8DI "QI") (V4DI "QI") (V2DI "QI")
(V16SF "HI") (V8SF "QI") (V4SF "QI")
@@ -487,10 +605,15 @@
(V8SI "V8SI") (V4DI "V4DI")
(V4SI "V4SI") (V2DI "V2DI")
(V16HI "V16HI") (V8HI "V8HI")
+ (V32HI "V32HI") (V64QI "V64QI")
(V32QI "V32QI") (V16QI "V16QI")])
+(define_mode_attr sseintvecmode2
+ [(V8DF "XI") (V4DF "OI") (V2DF "TI")
+ (V8SF "OI") (V4SF "TI")])
+
(define_mode_attr sseintvecmodelower
- [(V16SF "v16si")
+ [(V16SF "v16si") (V8DF "v8di")
(V8SF "v8si") (V4DF "v4di")
(V4SF "v4si") (V2DF "v2di")
(V8SI "v8si") (V4DI "v4di")
@@ -522,10 +645,13 @@
(V32HI "V16SF") (V16HI "V8SF") (V8HI "V4SF")
(V8SI "V8SF") (V4SI "V4SF")
(V4DI "V8SF") (V2DI "V4SF")
- (V2TI "V8SF") (V1TI "V4SF")
+ (V4TI "V16SF") (V2TI "V8SF") (V1TI "V4SF")
(V8SF "V8SF") (V4SF "V4SF")
(V4DF "V8SF") (V2DF "V4SF")])
+(define_mode_attr ssePSmode2
+ [(V8DI "V8SF") (V4DI "V4SF")])
+
;; Mapping of vector modes back to the scalar modes
(define_mode_attr ssescalarmode
[(V64QI "QI") (V32QI "QI") (V16QI "QI")
@@ -570,6 +696,7 @@
(define_mode_attr ssescalarsize
[(V8DI "64") (V4DI "64") (V2DI "64")
+ (V64QI "8") (V32QI "8") (V16QI "8")
(V32HI "16") (V16HI "16") (V8HI "16")
(V16SI "32") (V8SI "32") (V4SI "32")
(V16SF "32") (V8DF "64")])
@@ -581,7 +708,10 @@
(V8DI "p") (V8DF "")
(V4SI "p") (V4SF "")
(V8SI "p") (V8SF "")
- (V16SI "p") (V16SF "")])
+ (V16SI "p") (V16SF "")
+ (V16QI "p") (V8HI "p")
+ (V32QI "p") (V16HI "p")
+ (V64QI "p") (V32HI "p")])
;; SSE scalar suffix for vector modes
(define_mode_attr ssescalarmodesuffix
@@ -626,9 +756,18 @@
(define_mode_attr blendbits
[(V8SF "255") (V4SF "15") (V4DF "15") (V2DF "3")])
+;; Mapping for dbpsabbw modes
+(define_mode_attr dbpsadbwmode
+ [(V32HI "V64QI") (V16HI "V32QI") (V8HI "V16QI")])
+
;; Mapping suffixes for broadcast
(define_mode_attr bcstscalarsuff
- [(V16SI "d") (V16SF "ss") (V8DI "q") (V8DF "sd")])
+ [(V64QI "b") (V32QI "b") (V16QI "b")
+ (V32HI "w") (V16HI "w") (V8HI "w")
+ (V16SI "d") (V8SI "d") (V4SI "d")
+ (V8DI "q") (V4DI "q") (V2DI "q")
+ (V16SF "ss") (V8SF "ss") (V4SF "ss")
+ (V8DF "sd") (V4DF "sd") (V2DF "sd")])
;; Include define_subst patterns for instructions with mask
(include "subst.md")
@@ -669,12 +808,10 @@
case 2:
/* There is no evex-encoded vmov* for sizes smaller than 64-bytes
in avx512f, so we need to use workarounds, to access sse registers
- 16-31, which are evex-only. */
- if (TARGET_AVX512F && <MODE_SIZE> < 64
- && ((REG_P (operands[0])
- && EXT_REX_SSE_REGNO_P (REGNO (operands[0])))
- || (REG_P (operands[1])
- && EXT_REX_SSE_REGNO_P (REGNO (operands[1])))))
+ 16-31, which are evex-only. In avx512vl we don't need workarounds. */
+ if (TARGET_AVX512F && GET_MODE_SIZE (<MODE>mode) < 64 && !TARGET_AVX512VL
+ && ((REG_P (operands[0]) && EXT_REX_SSE_REGNO_P (REGNO (operands[0])))
+ || (REG_P (operands[1]) && EXT_REX_SSE_REGNO_P (REGNO (operands[1])))))
{
if (memory_operand (operands[0], <MODE>mode))
{
@@ -738,9 +875,11 @@
if (TARGET_AVX
&& (misaligned_operand (operands[0], <MODE>mode)
|| misaligned_operand (operands[1], <MODE>mode)))
- return "vmovdqu\t{%1, %0|%0, %1}";
+ return TARGET_AVX512VL ? "vmovdqu64\t{%1, %0|%0, %1}"
+ : "vmovdqu\t{%1, %0|%0, %1}";
else
- return "%vmovdqa\t{%1, %0|%0, %1}";
+ return TARGET_AVX512VL ? "vmovdqa64\t{%1, %0|%0, %1}"
+ : "%vmovdqa\t{%1, %0|%0, %1}";
case MODE_XI:
if (misaligned_operand (operands[0], <MODE>mode)
|| misaligned_operand (operands[1], <MODE>mode))
@@ -774,25 +913,37 @@
]
(const_string "<sseinsnmode>")))])
-(define_insn "avx512f_load<mode>_mask"
- [(set (match_operand:VI48F_512 0 "register_operand" "=v,v")
- (vec_merge:VI48F_512
- (match_operand:VI48F_512 1 "nonimmediate_operand" "v,m")
- (match_operand:VI48F_512 2 "vector_move_operand" "0C,0C")
+(define_insn "<avx512>_load<mode>_mask"
+ [(set (match_operand:V_AVX512VL 0 "register_operand" "=v,v")
+ (vec_merge:V_AVX512VL
+ (match_operand:V_AVX512VL 1 "nonimmediate_operand" "v,m")
+ (match_operand:V_AVX512VL 2 "vector_move_operand" "0C,0C")
(match_operand:<avx512fmaskmode> 3 "register_operand" "Yk,Yk")))]
"TARGET_AVX512F"
{
switch (MODE_<sseinsnmode>)
{
case MODE_V8DF:
+ case MODE_V4DF:
+ case MODE_V2DF:
case MODE_V16SF:
+ case MODE_V8SF:
+ case MODE_V4SF:
if (misaligned_operand (operands[1], <MODE>mode))
return "vmovu<ssemodesuffix>\t{%1, %0%{%3%}%N2|%0%{%3%}%N2, %1}";
return "vmova<ssemodesuffix>\t{%1, %0%{%3%}%N2|%0%{%3%}%N2, %1}";
default:
- if (misaligned_operand (operands[1], <MODE>mode))
+ /* There is no vmovdqa8/16 use vmovdqu8/16 instead. */
+ if (<MODE>mode == V64QImode
+ || <MODE>mode == V32QImode
+ || <MODE>mode == V16QImode
+ || <MODE>mode == V32HImode
+ || <MODE>mode == V16HImode
+ || <MODE>mode == V8HImode
+ || misaligned_operand (operands[1], <MODE>mode))
return "vmovdqu<ssescalarsize>\t{%1, %0%{%3%}%N2|%0%{%3%}%N2, %1}";
- return "vmovdqa<ssescalarsize>\t{%1, %0%{%3%}%N2|%0%{%3%}%N2, %1}";
+ else
+ return "vmovdqa<ssescalarsize>\t{%1, %0%{%3%}%N2|%0%{%3%}%N2, %1}";
}
}
[(set_attr "type" "ssemov")
@@ -800,11 +951,11 @@
(set_attr "memory" "none,load")
(set_attr "mode" "<sseinsnmode>")])
-(define_insn "avx512f_blendm<mode>"
- [(set (match_operand:VI48F_512 0 "register_operand" "=v")
- (vec_merge:VI48F_512
- (match_operand:VI48F_512 2 "nonimmediate_operand" "vm")
- (match_operand:VI48F_512 1 "register_operand" "v")
+(define_insn "<avx512>_blendm<mode>"
+ [(set (match_operand:V_AVX512VL 0 "register_operand" "=v")
+ (vec_merge:V_AVX512VL
+ (match_operand:V_AVX512VL 2 "nonimmediate_operand" "vm")
+ (match_operand:V_AVX512VL 1 "register_operand" "v")
(match_operand:<avx512fmaskmode> 3 "register_operand" "Yk")))]
"TARGET_AVX512F"
"v<sseintprefix>blendm<ssemodesuffix>\t{%2, %1, %0%{%3%}|%0%{%3%}, %1, %2}"
@@ -812,10 +963,10 @@
(set_attr "prefix" "evex")
(set_attr "mode" "<sseinsnmode>")])
-(define_insn "avx512f_store<mode>_mask"
- [(set (match_operand:VI48F_512 0 "memory_operand" "=m")
- (vec_merge:VI48F_512
- (match_operand:VI48F_512 1 "register_operand" "v")
+(define_insn "<avx512>_store<mode>_mask"
+ [(set (match_operand:V_AVX512VL 0 "memory_operand" "=m")
+ (vec_merge:V_AVX512VL
+ (match_operand:V_AVX512VL 1 "register_operand" "v")
(match_dup 0)
(match_operand:<avx512fmaskmode> 2 "register_operand" "Yk")))]
"TARGET_AVX512F"
@@ -823,10 +974,23 @@
switch (MODE_<sseinsnmode>)
{
case MODE_V8DF:
+ case MODE_V4DF:
+ case MODE_V2DF:
case MODE_V16SF:
+ case MODE_V8SF:
+ case MODE_V4SF:
return "vmova<ssemodesuffix>\t{%1, %0%{%2%}|%0%{%2%}, %1}";
default:
- return "vmovdqa<ssescalarsize>\t{%1, %0%{%2%}|%0%{%2%}, %1}";
+ /* There is no vmovdqa8/16 use vmovdqu8/16 instead. */
+ if (<MODE>mode == V64QImode
+ || <MODE>mode == V32QImode
+ || <MODE>mode == V16QImode
+ || <MODE>mode == V32HImode
+ || <MODE>mode == V16HImode
+ || <MODE>mode == V8HImode)
+ return "vmovdqu<ssescalarsize>\t{%1, %0%{%2%}|%0%{%2%}, %1}";
+ else
+ return "vmovdqa<ssescalarsize>\t{%1, %0%{%2%}|%0%{%2%}, %1}";
}
}
[(set_attr "type" "ssemov")
@@ -1009,11 +1173,11 @@
]
(const_string "<MODE>")))])
-(define_insn "avx512f_storeu<ssemodesuffix>512_mask"
- [(set (match_operand:VF_512 0 "memory_operand" "=m")
- (vec_merge:VF_512
- (unspec:VF_512
- [(match_operand:VF_512 1 "register_operand" "v")]
+(define_insn "<avx512>_storeu<ssemodesuffix><avxsizesuffix>_mask"
+ [(set (match_operand:VF_AVX512VL 0 "memory_operand" "=m")
+ (vec_merge:VF_AVX512VL
+ (unspec:VF_AVX512VL
+ [(match_operand:VF_AVX512VL 1 "register_operand" "v")]
UNSPEC_STOREU)
(match_dup 0)
(match_operand:<avx512fmaskmode> 2 "register_operand" "Yk")))]
@@ -1022,6 +1186,8 @@
switch (get_attr_mode (insn))
{
case MODE_V16SF:
+ case MODE_V8SF:
+ case MODE_V4SF:
return "vmovups\t{%1, %0%{%2%}|%0%{%2%}, %1}";
default:
return "vmovu<ssemodesuffix>\t{%1, %0%{%2%}|%0%{%2%}, %1}";
@@ -1067,16 +1233,20 @@
{
switch (get_attr_mode (insn))
{
+ case MODE_V16SF:
case MODE_V8SF:
case MODE_V4SF:
return "%vmovups\t{%1, %0|%0, %1}";
- case MODE_XI:
- if (<MODE>mode == V8DImode)
- return "vmovdqu64\t{%1, %0<mask_operand2>|%0<mask_operand2>, %1}";
- else
- return "vmovdqu32\t{%1, %0<mask_operand2>|%0<mask_operand2>, %1}";
default:
- return "%vmovdqu\t{%1, %0|%0, %1}";
+ switch (<MODE>mode)
+ {
+ case V32QImode:
+ case V16QImode:
+ if (!(TARGET_AVX512VL && TARGET_AVX512BW))
+ return "%vmovdqu\t{%1, %0|%0, %1}";
+ default:
+ return "vmovdqu<ssescalarsize>\t{%1, %0<mask_operand2>|%0<mask_operand2>, %1}";
+ }
}
}
[(set_attr "type" "ssemov")
@@ -1112,13 +1282,16 @@
case MODE_V8SF:
case MODE_V4SF:
return "%vmovups\t{%1, %0|%0, %1}";
- case MODE_XI:
- if (<MODE>mode == V8DImode)
- return "vmovdqu64\t{%1, %0|%0, %1}";
- else
- return "vmovdqu32\t{%1, %0|%0, %1}";
default:
- return "%vmovdqu\t{%1, %0|%0, %1}";
+ switch (<MODE>mode)
+ {
+ case V32QImode:
+ case V16QImode:
+ if (!(TARGET_AVX512VL && TARGET_AVX512BW))
+ return "%vmovdqu\t{%1, %0|%0, %1}";
+ default:
+ return "vmovdqu<ssescalarsize>\t{%1, %0|%0, %1}";
+ }
}
}
[(set_attr "type" "ssemov")
@@ -1142,21 +1315,16 @@
]
(const_string "<sseinsnmode>")))])
-(define_insn "avx512f_storedqu<mode>_mask"
- [(set (match_operand:VI48_512 0 "memory_operand" "=m")
- (vec_merge:VI48_512
- (unspec:VI48_512
- [(match_operand:VI48_512 1 "register_operand" "v")]
+(define_insn "<avx512>_storedqu<mode>_mask"
+ [(set (match_operand:VI_AVX512VL 0 "memory_operand" "=m")
+ (vec_merge:VI_AVX512VL
+ (unspec:VI_AVX512VL
+ [(match_operand:VI_AVX512VL 1 "register_operand" "v")]
UNSPEC_STOREU)
(match_dup 0)
(match_operand:<avx512fmaskmode> 2 "register_operand" "Yk")))]
"TARGET_AVX512F"
-{
- if (<MODE>mode == V8DImode)
- return "vmovdqu64\t{%1, %0%{%2%}|%0%{%2%}, %1}";
- else
- return "vmovdqu32\t{%1, %0%{%2%}|%0%{%2%}, %1}";
-}
+ "vmovdqu<ssescalarsize>\t{%1, %0%{%2%}|%0%{%2%}, %1}"
[(set_attr "type" "ssemov")
(set_attr "movu" "1")
(set_attr "memory" "store")
@@ -1443,9 +1611,9 @@
(set_attr "mode" "SF")])
(define_insn "<mask_codefor>rcp14<mode><mask_name>"
- [(set (match_operand:VF_512 0 "register_operand" "=v")
- (unspec:VF_512
- [(match_operand:VF_512 1 "nonimmediate_operand" "vm")]
+ [(set (match_operand:VF_AVX512VL 0 "register_operand" "=v")
+ (unspec:VF_AVX512VL
+ [(match_operand:VF_AVX512VL 1 "nonimmediate_operand" "vm")]
UNSPEC_RCP14))]
"TARGET_AVX512F"
"vrcp14<ssemodesuffix>\t{%1, %0<mask_operand2>|%0<mask_operand2>, %1}"
@@ -1538,9 +1706,9 @@
(set_attr "mode" "<MODE>")])
(define_insn "<mask_codefor>rsqrt14<mode><mask_name>"
- [(set (match_operand:VF_512 0 "register_operand" "=v")
- (unspec:VF_512
- [(match_operand:VF_512 1 "nonimmediate_operand" "vm")]
+ [(set (match_operand:VF_AVX512VL 0 "register_operand" "=v")
+ (unspec:VF_AVX512VL
+ [(match_operand:VF_AVX512VL 1 "nonimmediate_operand" "vm")]
UNSPEC_RSQRT14))]
"TARGET_AVX512F"
"vrsqrt14<ssemodesuffix>\t{%1, %0<mask_operand2>|%0<mask_operand2>, %1}"
@@ -2025,7 +2193,8 @@
[(V32QI "TARGET_AVX2") (V16HI "TARGET_AVX2")
(V8SI "TARGET_AVX2") (V4DI "TARGET_AVX2")
(V8SF "TARGET_AVX") (V4DF "TARGET_AVX")
- (V4SF "TARGET_SSE") (V16SI "TARGET_AVX512F")
+ (V4SF "TARGET_SSE") (V64QI "TARGET_AVX512BW")
+ (V32HI "TARGET_AVX512BW") (V16SI "TARGET_AVX512F")
(V8DI "TARGET_AVX512F") (V16SF "TARGET_AVX512F")
(V8DF "TARGET_AVX512F")])
@@ -2040,9 +2209,9 @@
})
(define_expand "reduc_<code>_<mode>"
- [(umaxmin:VI48_512
- (match_operand:VI48_512 0 "register_operand")
- (match_operand:VI48_512 1 "register_operand"))]
+ [(umaxmin:VI512_48F_12BW
+ (match_operand:VI512_48F_12BW 0 "register_operand")
+ (match_operand:VI512_48F_12BW 1 "register_operand"))]
"TARGET_AVX512F"
{
ix86_expand_reduc (gen_<code><mode>3, operands[0], operands[1]);
@@ -2069,6 +2238,34 @@
DONE;
})
+(define_insn "<mask_codefor>reducep<mode><mask_name>"
+ [(set (match_operand:VF_AVX512VL 0 "register_operand" "=v")
+ (unspec:VF_AVX512VL
+ [(match_operand:VF_AVX512VL 1 "nonimmediate_operand" "vm")
+ (match_operand:SI 2 "const_0_to_255_operand")]
+ UNSPEC_REDUCE))]
+ "TARGET_AVX512DQ"
+ "vreduce<ssemodesuffix>\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}"
+ [(set_attr "type" "sse")
+ (set_attr "prefix" "evex")
+ (set_attr "mode" "<MODE>")])
+
+(define_insn "reduces<mode>"
+ [(set (match_operand:VF_128 0 "register_operand" "=v")
+ (vec_merge:VF_128
+ (unspec:VF_128
+ [(match_operand:VF_128 1 "register_operand" "v")
+ (match_operand:VF_128 2 "nonimmediate_operand" "vm")
+ (match_operand:SI 3 "const_0_to_255_operand")]
+ UNSPEC_REDUCE)
+ (match_dup 1)
+ (const_int 1)))]
+ "TARGET_AVX512DQ"
+ "vreduce<ssescalarmodesuffix>\t{%3, %2, %1, %0|%0, %1, %2, %3}"
+ [(set_attr "type" "sse")
+ (set_attr "prefix" "evex")
+ (set_attr "mode" "<MODE>")])
+
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
;;
;; Parallel floating point comparisons
@@ -2156,14 +2353,21 @@
(set_attr "mode" "<ssescalarmode>")])
(define_mode_attr cmp_imm_predicate
- [(V16SF "const_0_to_31_operand") (V8DF "const_0_to_31_operand")
- (V16SI "const_0_to_7_operand") (V8DI "const_0_to_7_operand")])
-
-(define_insn "avx512f_cmp<mode>3<mask_scalar_merge_name><round_saeonly_name>"
+ [(V16SF "const_0_to_31_operand") (V8DF "const_0_to_31_operand")
+ (V16SI "const_0_to_7_operand") (V8DI "const_0_to_7_operand")
+ (V8SF "const_0_to_31_operand") (V4DF "const_0_to_31_operand")
+ (V8SI "const_0_to_7_operand") (V4DI "const_0_to_7_operand")
+ (V4SF "const_0_to_31_operand") (V2DF "const_0_to_31_operand")
+ (V4SI "const_0_to_7_operand") (V2DI "const_0_to_7_operand")
+ (V32HI "const_0_to_7_operand") (V64QI "const_0_to_7_operand")
+ (V16HI "const_0_to_7_operand") (V32QI "const_0_to_7_operand")
+ (V8HI "const_0_to_7_operand") (V16QI "const_0_to_7_operand")])
+
+(define_insn "<avx512>_cmp<mode>3<mask_scalar_merge_name><round_saeonly_name>"
[(set (match_operand:<avx512fmaskmode> 0 "register_operand" "=Yk")
(unspec:<avx512fmaskmode>
- [(match_operand:VI48F_512 1 "register_operand" "v")
- (match_operand:VI48F_512 2 "<round_saeonly_nimm_predicate>" "<round_saeonly_constraint>")
+ [(match_operand:V_AVX512VL 1 "register_operand" "v")
+ (match_operand:V_AVX512VL 2 "nonimmediate_operand" "<round_saeonly_constraint>")
(match_operand:SI 3 "<cmp_imm_predicate>" "n")]
UNSPEC_PCMP))]
"TARGET_AVX512F && <round_saeonly_mode512bit_condition>"
@@ -2173,11 +2377,11 @@
(set_attr "prefix" "evex")
(set_attr "mode" "<sseinsnmode>")])
-(define_insn "avx512f_ucmp<mode>3<mask_scalar_merge_name>"
+(define_insn "<avx512>_ucmp<mode>3<mask_scalar_merge_name>"
[(set (match_operand:<avx512fmaskmode> 0 "register_operand" "=Yk")
(unspec:<avx512fmaskmode>
- [(match_operand:VI48_512 1 "register_operand" "v")
- (match_operand:VI48_512 2 "nonimmediate_operand" "vm")
+ [(match_operand:VI_AVX512VL 1 "register_operand" "v")
+ (match_operand:VI_AVX512VL 2 "nonimmediate_operand" "vm")
(match_operand:SI 3 "const_0_to_7_operand" "n")]
UNSPEC_UNSIGNED_PCMP))]
"TARGET_AVX512F"
@@ -2330,20 +2534,24 @@
;;
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
-(define_insn "<sse>_andnot<mode>3"
+(define_insn "<sse>_andnot<mode>3<mask_name>"
[(set (match_operand:VF 0 "register_operand" "=x,v")
(and:VF
(not:VF
(match_operand:VF 1 "register_operand" "0,v"))
(match_operand:VF 2 "nonimmediate_operand" "xm,vm")))]
- "TARGET_SSE"
+ "TARGET_SSE
+ && (!<mask_applied>
+ || (TARGET_AVX512DQ && GET_MODE_SIZE (<MODE>mode) == 64)
+ || (TARGET_AVX512DQ && TARGET_AVX512VL))"
{
- static char buf[32];
+ static char buf[64];
const char *ops;
const char *suffix;
switch (get_attr_mode (insn))
{
+ case MODE_V16SF:
case MODE_V8SF:
case MODE_V4SF:
suffix = "ps";
@@ -2358,14 +2566,14 @@
ops = "andn%s\t{%%2, %%0|%%0, %%2}";
break;
case 1:
- ops = "vandn%s\t{%%2, %%1, %%0|%%0, %%1, %%2}";
+ ops = "vandn%s\t{%%2, %%1, %%0<mask_operand3_1>|%%0<mask_operand3_1>, %%1, %%2}";
break;
default:
gcc_unreachable ();
}
/* There is no vandnp[sd]. Use vpandnq. */
- if (<MODE_SIZE> == 64)
+ if (GET_MODE_SIZE (<MODE>mode) == 64 && !TARGET_AVX512DQ)
{
suffix = "q";
ops = "vpandn%s\t{%%2, %%1, %%0|%%0, %%1, %%2}";
@@ -2388,35 +2596,31 @@
]
(const_string "<MODE>")))])
-(define_expand "<code><mode>3"
- [(set (match_operand:VF_128_256 0 "register_operand")
- (any_logic:VF_128_256
- (match_operand:VF_128_256 1 "nonimmediate_operand")
- (match_operand:VF_128_256 2 "nonimmediate_operand")))]
- "TARGET_SSE"
- "ix86_fixup_binary_operands_no_copy (<CODE>, <MODE>mode, operands);")
-
-(define_expand "<code><mode>3"
- [(set (match_operand:VF_512 0 "register_operand")
- (fpint_logic:VF_512
- (match_operand:VF_512 1 "nonimmediate_operand")
- (match_operand:VF_512 2 "nonimmediate_operand")))]
- "TARGET_AVX512F"
+(define_expand "<code><mode>3<mask_name>"
+ [(set (match_operand:VF 0 "register_operand")
+ (any_logic:VF
+ (match_operand:VF 1 "nonimmediate_operand")
+ (match_operand:VF 2 "nonimmediate_operand")))]
+ "TARGET_SSE
+ && (!<mask_applied> || TARGET_AVX512VL || GET_MODE_SIZE (<MODE>mode) == 64)"
"ix86_fixup_binary_operands_no_copy (<CODE>, <MODE>mode, operands);")
-(define_insn "*<code><mode>3"
+(define_insn "*<code><mode>3<mask_name>"
[(set (match_operand:VF 0 "register_operand" "=x,v")
(any_logic:VF
(match_operand:VF 1 "nonimmediate_operand" "%0,v")
(match_operand:VF 2 "nonimmediate_operand" "xm,vm")))]
- "TARGET_SSE && ix86_binary_operator_ok (<CODE>, <MODE>mode, operands)"
+ "TARGET_SSE
+ && ix86_binary_operator_ok (<CODE>, <MODE>mode, operands)
+ && (!<mask_applied> || TARGET_AVX512VL || GET_MODE_SIZE (<MODE>mode) == 64)"
{
- static char buf[32];
+ static char buf[64];
const char *ops;
const char *suffix;
switch (get_attr_mode (insn))
{
+ case MODE_V16SF:
case MODE_V8SF:
case MODE_V4SF:
suffix = "ps";
@@ -2431,14 +2635,14 @@
ops = "<logic>%s\t{%%2, %%0|%%0, %%2}";
break;
case 1:
- ops = "v<logic>%s\t{%%2, %%1, %%0|%%0, %%1, %%2}";
+ ops = "v<logic>%s\t{%%2, %%1, %%0<mask_operand3_1>|%%0<mask_operand3_1>, %%1, %%2}";
break;
default:
gcc_unreachable ();
}
- /* There is no v<logic>p[sd]. Use vp<logic>q. */
- if (<MODE_SIZE> == 64)
+ /* There is no v<logic>p[sd] in avx512f. Use vp<logic>q. */
+ if (GET_MODE_SIZE (<MODE>mode) == 64 && !TARGET_AVX512DQ)
{
suffix = "q";
ops = "vp<logic>%s\t{%%2, %%1, %%0|%%0, %%1, %%2}";
@@ -2670,23 +2874,6 @@
]
(const_string "TI")))])
-;; There are no floating point xor for V16SF and V8DF in avx512f
-;; but we need them for negation. Instead we use int versions of
-;; xor. Maybe there could be a better way to do that.
-
-(define_mode_attr avx512flogicsuff
- [(V16SF "d") (V8DF "q")])
-
-(define_insn "avx512f_<logic><mode>"
- [(set (match_operand:VF_512 0 "register_operand" "=v")
- (fpint_logic:VF_512
- (match_operand:VF_512 1 "register_operand" "v")
- (match_operand:VF_512 2 "nonimmediate_operand" "vm")))]
- "TARGET_AVX512F"
- "vp<logic><avx512flogicsuff>\t{%2, %1, %0|%0, %1, %2}"
- [(set_attr "type" "sselog")
- (set_attr "prefix" "evex")])
-
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
;;
;; FMA floating point multiply/accumulate instructions. These include
@@ -2705,10 +2892,10 @@
(define_mode_iterator FMAMODEM
[(SF "TARGET_SSE_MATH && (TARGET_FMA || TARGET_FMA4 || TARGET_AVX512F)")
(DF "TARGET_SSE_MATH && (TARGET_FMA || TARGET_FMA4 || TARGET_AVX512F)")
- (V4SF "TARGET_FMA || TARGET_FMA4")
- (V2DF "TARGET_FMA || TARGET_FMA4")
- (V8SF "TARGET_FMA || TARGET_FMA4")
- (V4DF "TARGET_FMA || TARGET_FMA4")
+ (V4SF "TARGET_FMA || TARGET_FMA4 || (TARGET_AVX512F && TARGET_AVX512VL)")
+ (V2DF "TARGET_FMA || TARGET_FMA4 || (TARGET_AVX512F && TARGET_AVX512VL)")
+ (V8SF "TARGET_FMA || TARGET_FMA4 || (TARGET_AVX512F && TARGET_AVX512VL)")
+ (V4DF "TARGET_FMA || TARGET_FMA4 || (TARGET_AVX512F && TARGET_AVX512VL)")
(V16SF "TARGET_AVX512F")
(V8DF "TARGET_AVX512F")])
@@ -2742,14 +2929,14 @@
;; The builtins for intrinsics are not constrained by SSE math enabled.
(define_mode_iterator FMAMODE
- [(SF "TARGET_FMA || TARGET_FMA4 || TARGET_AVX512F")
- (DF "TARGET_FMA || TARGET_FMA4 || TARGET_AVX512F")
- (V4SF "TARGET_FMA || TARGET_FMA4")
- (V2DF "TARGET_FMA || TARGET_FMA4")
- (V8SF "TARGET_FMA || TARGET_FMA4")
- (V4DF "TARGET_FMA || TARGET_FMA4")
- (V16SF "TARGET_AVX512F")
- (V8DF "TARGET_AVX512F")])
+ [(SF "TARGET_FMA || TARGET_FMA4 || TARGET_AVX512F")
+ (DF "TARGET_FMA || TARGET_FMA4 || TARGET_AVX512F")
+ (V4SF "TARGET_FMA || TARGET_FMA4 || (TARGET_AVX512F && TARGET_AVX512VL)")
+ (V2DF "TARGET_FMA || TARGET_FMA4 || (TARGET_AVX512F && TARGET_AVX512VL)")
+ (V8SF "TARGET_FMA || TARGET_FMA4 || (TARGET_AVX512F && TARGET_AVX512VL)")
+ (V4DF "TARGET_FMA || TARGET_FMA4 || (TARGET_AVX512F && TARGET_AVX512VL)")
+ (V16SF "TARGET_AVX512F")
+ (V8DF "TARGET_AVX512F")])
(define_expand "fma4i_fmadd_<mode>"
[(set (match_operand:FMAMODE 0 "register_operand")
@@ -2758,13 +2945,13 @@
(match_operand:FMAMODE 2 "nonimmediate_operand")
(match_operand:FMAMODE 3 "nonimmediate_operand")))])
-(define_expand "avx512f_fmadd_<mode>_maskz<round_expand_name>"
- [(match_operand:VF_512 0 "register_operand")
- (match_operand:VF_512 1 "<round_expand_nimm_predicate>")
- (match_operand:VF_512 2 "<round_expand_nimm_predicate>")
- (match_operand:VF_512 3 "<round_expand_nimm_predicate>")
+(define_expand "<avx512>_fmadd_<mode>_maskz<round_expand_name>"
+ [(match_operand:VF_AVX512VL 0 "register_operand")
+ (match_operand:VF_AVX512VL 1 "<round_expand_nimm_predicate>")
+ (match_operand:VF_AVX512VL 2 "<round_expand_nimm_predicate>")
+ (match_operand:VF_AVX512VL 3 "<round_expand_nimm_predicate>")
(match_operand:<avx512fmaskmode> 4 "register_operand")]
- "TARGET_AVX512F"
+ "TARGET_AVX512F && <round_mode512bit_condition>"
{
emit_insn (gen_fma_fmadd_<mode>_maskz_1<round_expand_name> (
operands[0], operands[1], operands[2], operands[3],
@@ -2780,7 +2967,7 @@
(V8SF "TARGET_FMA || TARGET_FMA4")
(V4DF "TARGET_FMA || TARGET_FMA4")])
-(define_insn "<sd_mask_codefor>fma_fmadd_<mode><sd_maskz_name><round_name>"
+(define_insn "<sd_mask_codefor>fma_fmadd_noavx512_<mode><sd_maskz_name><round_name>"
[(set (match_operand:FMAMODE_NOVF512 0 "register_operand" "=v,v,v,x,x")
(fma:FMAMODE_NOVF512
(match_operand:FMAMODE_NOVF512 1 "<round_nimm_predicate>" "%0,0,v,x,x")
@@ -2798,11 +2985,11 @@
(set_attr "mode" "<MODE>")])
(define_insn "<sd_mask_codefor>fma_fmadd_<mode><sd_maskz_name><round_name>"
- [(set (match_operand:VF_512 0 "register_operand" "=v,v,v")
- (fma:VF_512
- (match_operand:VF_512 1 "<round_nimm_predicate>" "%0,0,v")
- (match_operand:VF_512 2 "<round_nimm_predicate>" "<round_constraint>,v,<round_constraint>")
- (match_operand:VF_512 3 "<round_nimm_predicate>" "v,<round_constraint>,0")))]
+ [(set (match_operand:VF_AVX512VL 0 "register_operand" "=v,v,v")
+ (fma:VF_AVX512VL
+ (match_operand:VF_AVX512VL 1 "<round_nimm_predicate>" "%0,0,v")
+ (match_operand:VF_AVX512VL 2 "<round_nimm_predicate>" "<round_constraint>,v,<round_constraint>")
+ (match_operand:VF_AVX512VL 3 "<round_nimm_predicate>" "v,<round_constraint>,0")))]
"<sd_mask_mode512bit_condition> && <round_mode512bit_condition>"
"@
vfmadd132<ssemodesuffix>\t{<round_sd_mask_op4>%2, %3, %0<sd_mask_op4>|%0<sd_mask_op4>, %3, %2<round_sd_mask_op4>}
@@ -2812,16 +2999,16 @@
(set_attr "type" "ssemuladd")
(set_attr "mode" "<MODE>")])
-(define_insn "avx512f_fmadd_<mode>_mask<round_name>"
- [(set (match_operand:VF_512 0 "register_operand" "=v,v")
- (vec_merge:VF_512
- (fma:VF_512
- (match_operand:VF_512 1 "register_operand" "0,0")
- (match_operand:VF_512 2 "<round_nimm_predicate>" "<round_constraint>,v")
- (match_operand:VF_512 3 "<round_nimm_predicate>" "v,<round_constraint>"))
+(define_insn "<avx512>_fmadd_<mode>_mask<round_name>"
+ [(set (match_operand:VF_AVX512VL 0 "register_operand" "=v,v")
+ (vec_merge:VF_AVX512VL
+ (fma:VF_AVX512VL
+ (match_operand:VF_AVX512VL 1 "register_operand" "0,0")
+ (match_operand:VF_AVX512VL 2 "nonimmediate_operand" "<round_constraint>,v")
+ (match_operand:VF_AVX512VL 3 "nonimmediate_operand" "v,<round_constraint>"))
(match_dup 1)
(match_operand:<avx512fmaskmode> 4 "register_operand" "Yk,Yk")))]
- "TARGET_AVX512F"
+ "TARGET_AVX512F && <round_mode512bit_condition>"
"@
vfmadd132<ssemodesuffix>\t{<round_op5>%2, %3, %0%{%4%}|%0%{%4%}, %3, %2<round_op5>}
vfmadd213<ssemodesuffix>\t{<round_op5>%3, %2, %0%{%4%}|%0%{%4%}, %2, %3<round_op5>}"
@@ -2829,13 +3016,13 @@
(set_attr "type" "ssemuladd")
(set_attr "mode" "<MODE>")])
-(define_insn "avx512f_fmadd_<mode>_mask3<round_name>"
- [(set (match_operand:VF_512 0 "register_operand" "=x")
- (vec_merge:VF_512
- (fma:VF_512
- (match_operand:VF_512 1 "register_operand" "x")
- (match_operand:VF_512 2 "<round_nimm_predicate>" "<round_constraint>")
- (match_operand:VF_512 3 "register_operand" "0"))
+(define_insn "<avx512>_fmadd_<mode>_mask3<round_name>"
+ [(set (match_operand:VF_AVX512VL 0 "register_operand" "=x")
+ (vec_merge:VF_AVX512VL
+ (fma:VF_AVX512VL
+ (match_operand:VF_AVX512VL 1 "register_operand" "x")
+ (match_operand:VF_AVX512VL 2 "nonimmediate_operand" "<round_constraint>")
+ (match_operand:VF_AVX512VL 3 "register_operand" "0"))
(match_dup 3)
(match_operand:<avx512fmaskmode> 4 "register_operand" "Yk")))]
"TARGET_AVX512F"
@@ -2844,7 +3031,7 @@
(set_attr "type" "ssemuladd")
(set_attr "mode" "<MODE>")])
-(define_insn "<sd_mask_codefor>fma_fmsub_<mode><sd_maskz_name><round_name>"
+(define_insn "<sd_mask_codefor>fma_fmsub_noavx512<mode><sd_maskz_name><round_name>"
[(set (match_operand:FMAMODE_NOVF512 0 "register_operand" "=v,v,v,x,x")
(fma:FMAMODE_NOVF512
(match_operand:FMAMODE_NOVF512 1 "<round_nimm_predicate>" "%0,0,v,x,x")
@@ -2863,12 +3050,12 @@
(set_attr "mode" "<MODE>")])
(define_insn "<sd_mask_codefor>fma_fmsub_<mode><sd_maskz_name><round_name>"
- [(set (match_operand:VF_512 0 "register_operand" "=v,v,v")
- (fma:VF_512
- (match_operand:VF_512 1 "<round_nimm_predicate>" "%0,0,v")
- (match_operand:VF_512 2 "<round_nimm_predicate>" "<round_constraint>,v,<round_constraint>")
- (neg:VF_512
- (match_operand:VF_512 3 "<round_nimm_predicate>" "v,<round_constraint>,0"))))]
+ [(set (match_operand:VF_AVX512VL 0 "register_operand" "=v,v,v")
+ (fma:VF_AVX512VL
+ (match_operand:VF_AVX512VL 1 "<round_nimm_predicate>" "%0,0,v")
+ (match_operand:VF_AVX512VL 2 "<round_nimm_predicate>" "<round_constraint>,v,<round_constraint>")
+ (neg:VF_AVX512VL
+ (match_operand:VF_AVX512VL 3 "<round_nimm_predicate>" "v,<round_constraint>,0"))))]
"<sd_mask_mode512bit_condition> && <round_mode512bit_condition>"
"@
vfmsub132<ssemodesuffix>\t{<round_sd_mask_op4>%2, %3, %0<sd_mask_op4>|%0<sd_mask_op4>, %3, %2<round_sd_mask_op4>}
@@ -2878,14 +3065,14 @@
(set_attr "type" "ssemuladd")
(set_attr "mode" "<MODE>")])
-(define_insn "avx512f_fmsub_<mode>_mask<round_name>"
- [(set (match_operand:VF_512 0 "register_operand" "=v,v")
- (vec_merge:VF_512
- (fma:VF_512
- (match_operand:VF_512 1 "register_operand" "0,0")
- (match_operand:VF_512 2 "<round_nimm_predicate>" "<round_constraint>,v")
- (neg:VF_512
- (match_operand:VF_512 3 "<round_nimm_predicate>" "v,<round_constraint>")))
+(define_insn "<avx512>_fmsub_<mode>_mask<round_name>"
+ [(set (match_operand:VF_AVX512VL 0 "register_operand" "=v,v")
+ (vec_merge:VF_AVX512VL
+ (fma:VF_AVX512VL
+ (match_operand:VF_AVX512VL 1 "register_operand" "0,0")
+ (match_operand:VF_AVX512VL 2 "nonimmediate_operand" "<round_constraint>,v")
+ (neg:VF_AVX512VL
+ (match_operand:VF_AVX512VL 3 "nonimmediate_operand" "v,<round_constraint>")))
(match_dup 1)
(match_operand:<avx512fmaskmode> 4 "register_operand" "Yk,Yk")))]
"TARGET_AVX512F"
@@ -2896,23 +3083,23 @@
(set_attr "type" "ssemuladd")
(set_attr "mode" "<MODE>")])
-(define_insn "avx512f_fmsub_<mode>_mask3<round_name>"
- [(set (match_operand:VF_512 0 "register_operand" "=v")
- (vec_merge:VF_512
- (fma:VF_512
- (match_operand:VF_512 1 "register_operand" "v")
- (match_operand:VF_512 2 "<round_nimm_predicate>" "<round_constraint>")
- (neg:VF_512
- (match_operand:VF_512 3 "register_operand" "0")))
+(define_insn "<avx512>_fmsub_<mode>_mask3<round_name>"
+ [(set (match_operand:VF_AVX512VL 0 "register_operand" "=v")
+ (vec_merge:VF_AVX512VL
+ (fma:VF_AVX512VL
+ (match_operand:VF_AVX512VL 1 "register_operand" "v")
+ (match_operand:VF_AVX512VL 2 "nonimmediate_operand" "<round_constraint>")
+ (neg:VF_AVX512VL
+ (match_operand:VF_AVX512VL 3 "register_operand" "0")))
(match_dup 3)
(match_operand:<avx512fmaskmode> 4 "register_operand" "Yk")))]
- "TARGET_AVX512F"
+ "TARGET_AVX512F && <round_mode512bit_condition>"
"vfmsub231<ssemodesuffix>\t{<round_op5>%2, %1, %0%{%4%}|%0%{%4%}, %1, %2<round_op5>}"
[(set_attr "isa" "fma_avx512f")
(set_attr "type" "ssemuladd")
(set_attr "mode" "<MODE>")])
-(define_insn "<sd_mask_codefor>fma_fnmadd_<mode><sd_maskz_name><round_name>"
+(define_insn "<sd_mask_codefor>fma_fnmadd_noavx512_<mode><sd_maskz_name><round_name>"
[(set (match_operand:FMAMODE_NOVF512 0 "register_operand" "=v,v,v,x,x")
(fma:FMAMODE_NOVF512
(neg:FMAMODE_NOVF512
@@ -2931,12 +3118,12 @@
(set_attr "mode" "<MODE>")])
(define_insn "<sd_mask_codefor>fma_fnmadd_<mode><sd_maskz_name><round_name>"
- [(set (match_operand:VF_512 0 "register_operand" "=v,v,v")
- (fma:VF_512
- (neg:VF_512
- (match_operand:VF_512 1 "<round_nimm_predicate>" "%0,0,v"))
- (match_operand:VF_512 2 "<round_nimm_predicate>" "<round_constraint>,v,<round_constraint>")
- (match_operand:VF_512 3 "<round_nimm_predicate>" "v,<round_constraint>,0")))]
+ [(set (match_operand:VF_AVX512VL 0 "register_operand" "=v,v,v")
+ (fma:VF_AVX512VL
+ (neg:VF_AVX512VL
+ (match_operand:VF_AVX512VL 1 "<round_nimm_predicate>" "%0,0,v"))
+ (match_operand:VF_AVX512VL 2 "<round_nimm_predicate>" "<round_constraint>,v,<round_constraint>")
+ (match_operand:VF_AVX512VL 3 "<round_nimm_predicate>" "v,<round_constraint>,0")))]
"<sd_mask_mode512bit_condition> && <round_mode512bit_condition>"
"@
vfnmadd132<ssemodesuffix>\t{<round_sd_mask_op4>%2, %3, %0<sd_mask_op4>|%0<sd_mask_op4>, %3, %2<round_sd_mask_op4>}
@@ -2946,17 +3133,17 @@
(set_attr "type" "ssemuladd")
(set_attr "mode" "<MODE>")])
-(define_insn "avx512f_fnmadd_<mode>_mask<round_name>"
- [(set (match_operand:VF_512 0 "register_operand" "=v,v")
- (vec_merge:VF_512
- (fma:VF_512
- (neg:VF_512
- (match_operand:VF_512 1 "register_operand" "0,0"))
- (match_operand:VF_512 2 "<round_nimm_predicate>" "<round_constraint>,v")
- (match_operand:VF_512 3 "<round_nimm_predicate>" "v,<round_constraint>"))
+(define_insn "<avx512>_fnmadd_<mode>_mask<round_name>"
+ [(set (match_operand:VF_AVX512VL 0 "register_operand" "=v,v")
+ (vec_merge:VF_AVX512VL
+ (fma:VF_AVX512VL
+ (neg:VF_AVX512VL
+ (match_operand:VF_AVX512VL 1 "register_operand" "0,0"))
+ (match_operand:VF_AVX512VL 2 "nonimmediate_operand" "<round_constraint>,v")
+ (match_operand:VF_AVX512VL 3 "nonimmediate_operand" "v,<round_constraint>"))
(match_dup 1)
(match_operand:<avx512fmaskmode> 4 "register_operand" "Yk,Yk")))]
- "TARGET_AVX512F"
+ "TARGET_AVX512F && <round_mode512bit_condition>"
"@
vfnmadd132<ssemodesuffix>\t{<round_op5>%2, %3, %0%{%4%}|%0%{%4%}, %3, %2<round_op5>}
vfnmadd213<ssemodesuffix>\t{<round_op5>%3, %2, %0%{%4%}|%0%{%4%}, %2, %3<round_op5>}"
@@ -2964,23 +3151,23 @@
(set_attr "type" "ssemuladd")
(set_attr "mode" "<MODE>")])
-(define_insn "avx512f_fnmadd_<mode>_mask3<round_name>"
- [(set (match_operand:VF_512 0 "register_operand" "=v")
- (vec_merge:VF_512
- (fma:VF_512
- (neg:VF_512
- (match_operand:VF_512 1 "register_operand" "v"))
- (match_operand:VF_512 2 "<round_nimm_predicate>" "<round_constraint>")
- (match_operand:VF_512 3 "register_operand" "0"))
+(define_insn "<avx512>_fnmadd_<mode>_mask3<round_name>"
+ [(set (match_operand:VF_AVX512VL 0 "register_operand" "=v")
+ (vec_merge:VF_AVX512VL
+ (fma:VF_AVX512VL
+ (neg:VF_AVX512VL
+ (match_operand:VF_AVX512VL 1 "register_operand" "v"))
+ (match_operand:VF_AVX512VL 2 "nonimmediate_operand" "<round_constraint>")
+ (match_operand:VF_AVX512VL 3 "register_operand" "0"))
(match_dup 3)
(match_operand:<avx512fmaskmode> 4 "register_operand" "Yk")))]
- "TARGET_AVX512F"
+ "TARGET_AVX512F && <round_mode512bit_condition>"
"vfnmadd231<ssemodesuffix>\t{<round_op5>%2, %1, %0%{%4%}|%0%{%4%}, %1, %2<round_op5>}"
[(set_attr "isa" "fma_avx512f")
(set_attr "type" "ssemuladd")
(set_attr "mode" "<MODE>")])
-(define_insn "<sd_mask_codefor>fma_fnmsub_<mode><sd_maskz_name><round_name>"
+(define_insn "<sd_mask_codefor>fma_fnmsub_noavx512_<mode><sd_maskz_name><round_name>"
[(set (match_operand:FMAMODE_NOVF512 0 "register_operand" "=v,v,v,x,x")
(fma:FMAMODE_NOVF512
(neg:FMAMODE_NOVF512
@@ -3000,13 +3187,13 @@
(set_attr "mode" "<MODE>")])
(define_insn "<sd_mask_codefor>fma_fnmsub_<mode><sd_maskz_name><round_name>"
- [(set (match_operand:VF_512 0 "register_operand" "=v,v,v")
- (fma:VF_512
- (neg:VF_512
- (match_operand:VF_512 1 "<round_nimm_predicate>" "%0,0,v"))
- (match_operand:VF_512 2 "<round_nimm_predicate>" "<round_constraint>,v,<round_constraint>")
- (neg:VF_512
- (match_operand:VF_512 3 "<round_nimm_predicate>" "v,<round_constraint>,0"))))]
+ [(set (match_operand:VF_AVX512VL 0 "register_operand" "=v,v,v")
+ (fma:VF_AVX512VL
+ (neg:VF_AVX512VL
+ (match_operand:VF_AVX512VL 1 "<round_nimm_predicate>" "%0,0,v"))
+ (match_operand:VF_AVX512VL 2 "<round_nimm_predicate>" "<round_constraint>,v,<round_constraint>")
+ (neg:VF_AVX512VL
+ (match_operand:VF_AVX512VL 3 "<round_nimm_predicate>" "v,<round_constraint>,0"))))]
"<sd_mask_mode512bit_condition> && <round_mode512bit_condition>"
"@
vfnmsub132<ssemodesuffix>\t{<round_sd_mask_op4>%2, %3, %0<sd_mask_op4>|%0<sd_mask_op4>, %3, %2<round_sd_mask_op4>}
@@ -3016,18 +3203,18 @@
(set_attr "type" "ssemuladd")
(set_attr "mode" "<MODE>")])
-(define_insn "avx512f_fnmsub_<mode>_mask<round_name>"
- [(set (match_operand:VF_512 0 "register_operand" "=v,v")
- (vec_merge:VF_512
- (fma:VF_512
- (neg:VF_512
- (match_operand:VF_512 1 "register_operand" "0,0"))
- (match_operand:VF_512 2 "<round_nimm_predicate>" "<round_constraint>,v")
- (neg:VF_512
- (match_operand:VF_512 3 "<round_nimm_predicate>" "v,<round_constraint>")))
+(define_insn "<avx512>_fnmsub_<mode>_mask<round_name>"
+ [(set (match_operand:VF_AVX512VL 0 "register_operand" "=v,v")
+ (vec_merge:VF_AVX512VL
+ (fma:VF_AVX512VL
+ (neg:VF_AVX512VL
+ (match_operand:VF_AVX512VL 1 "register_operand" "0,0"))
+ (match_operand:VF_AVX512VL 2 "nonimmediate_operand" "<round_constraint>,v")
+ (neg:VF_AVX512VL
+ (match_operand:VF_AVX512VL 3 "nonimmediate_operand" "v,<round_constraint>")))
(match_dup 1)
(match_operand:<avx512fmaskmode> 4 "register_operand" "Yk,Yk")))]
- "TARGET_AVX512F"
+ "TARGET_AVX512F && <round_mode512bit_condition>"
"@
vfnmsub132<ssemodesuffix>\t{<round_op5>%2, %3, %0%{%4%}|%0%{%4%}, %3, %2<round_op5>}
vfnmsub213<ssemodesuffix>\t{<round_op5>%3, %2, %0%{%4%}|%0%{%4%}, %2, %3<round_op5>}"
@@ -3035,15 +3222,15 @@
(set_attr "type" "ssemuladd")
(set_attr "mode" "<MODE>")])
-(define_insn "avx512f_fnmsub_<mode>_mask3<round_name>"
- [(set (match_operand:VF_512 0 "register_operand" "=v")
- (vec_merge:VF_512
- (fma:VF_512
- (neg:VF_512
- (match_operand:VF_512 1 "register_operand" "v"))
- (match_operand:VF_512 2 "<round_nimm_predicate>" "<round_constraint>")
- (neg:VF_512
- (match_operand:VF_512 3 "register_operand" "0")))
+(define_insn "<avx512>_fnmsub_<mode>_mask3<round_name>"
+ [(set (match_operand:VF_AVX512VL 0 "register_operand" "=v")
+ (vec_merge:VF_AVX512VL
+ (fma:VF_AVX512VL
+ (neg:VF_AVX512VL
+ (match_operand:VF_AVX512VL 1 "register_operand" "v"))
+ (match_operand:VF_AVX512VL 2 "nonimmediate_operand" "<round_constraint>")
+ (neg:VF_AVX512VL
+ (match_operand:VF_AVX512VL 3 "register_operand" "0")))
(match_dup 3)
(match_operand:<avx512fmaskmode> 4 "register_operand" "Yk")))]
"TARGET_AVX512F"
@@ -3072,11 +3259,11 @@
UNSPEC_FMADDSUB))]
"TARGET_FMA || TARGET_FMA4 || TARGET_AVX512F")
-(define_expand "avx512f_fmaddsub_<mode>_maskz<round_expand_name>"
- [(match_operand:VF_512 0 "register_operand")
- (match_operand:VF_512 1 "<round_expand_nimm_predicate>")
- (match_operand:VF_512 2 "<round_expand_nimm_predicate>")
- (match_operand:VF_512 3 "<round_expand_nimm_predicate>")
+(define_expand "<avx512>_fmaddsub_<mode>_maskz<round_expand_name>"
+ [(match_operand:VF_AVX512VL 0 "register_operand")
+ (match_operand:VF_AVX512VL 1 "<round_expand_nimm_predicate>")
+ (match_operand:VF_AVX512VL 2 "<round_expand_nimm_predicate>")
+ (match_operand:VF_AVX512VL 3 "<round_expand_nimm_predicate>")
(match_operand:<avx512fmaskmode> 4 "register_operand")]
"TARGET_AVX512F"
{
@@ -3086,7 +3273,7 @@
DONE;
})
-(define_insn "<sd_mask_codefor>fma_fmaddsub_<mode><sd_maskz_name><round_name>"
+(define_insn "<sd_mask_codefor>fma_fmaddsub_noavx512_<mode><sd_maskz_name><round_name>"
[(set (match_operand:VF_128_256 0 "register_operand" "=v,v,v,x,x")
(unspec:VF_128_256
[(match_operand:VF_128_256 1 "<round_nimm_predicate>" "%0,0,v,x,x")
@@ -3105,11 +3292,11 @@
(set_attr "mode" "<MODE>")])
(define_insn "<sd_mask_codefor>fma_fmaddsub_<mode><sd_maskz_name><round_name>"
- [(set (match_operand:VF_512 0 "register_operand" "=v,v,v")
- (unspec:VF_512
- [(match_operand:VF_512 1 "<round_nimm_predicate>" "%0,0,v")
- (match_operand:VF_512 2 "<round_nimm_predicate>" "<round_constraint>,v,<round_constraint>")
- (match_operand:VF_512 3 "<round_nimm_predicate>" "v,<round_constraint>,0")]
+ [(set (match_operand:VF_AVX512VL 0 "register_operand" "=v,v,v")
+ (unspec:VF_AVX512VL
+ [(match_operand:VF_AVX512VL 1 "<round_nimm_predicate>" "%0,0,v")
+ (match_operand:VF_AVX512VL 2 "<round_nimm_predicate>" "<round_constraint>,v,<round_constraint>")
+ (match_operand:VF_AVX512VL 3 "<round_nimm_predicate>" "v,<round_constraint>,0")]
UNSPEC_FMADDSUB))]
"TARGET_AVX512F && <sd_mask_mode512bit_condition> && <round_mode512bit_condition>"
"@
@@ -3120,13 +3307,13 @@
(set_attr "type" "ssemuladd")
(set_attr "mode" "<MODE>")])
-(define_insn "avx512f_fmaddsub_<mode>_mask<round_name>"
- [(set (match_operand:VF_512 0 "register_operand" "=v,v")
- (vec_merge:VF_512
- (unspec:VF_512
- [(match_operand:VF_512 1 "register_operand" "0,0")
- (match_operand:VF_512 2 "<round_nimm_predicate>" "<round_constraint>,v")
- (match_operand:VF_512 3 "<round_nimm_predicate>" "v,<round_constraint>")]
+(define_insn "<avx512>_fmaddsub_<mode>_mask<round_name>"
+ [(set (match_operand:VF_AVX512VL 0 "register_operand" "=v,v")
+ (vec_merge:VF_AVX512VL
+ (unspec:VF_AVX512VL
+ [(match_operand:VF_AVX512VL 1 "register_operand" "0,0")
+ (match_operand:VF_AVX512VL 2 "nonimmediate_operand" "<round_constraint>,v")
+ (match_operand:VF_AVX512VL 3 "nonimmediate_operand" "v,<round_constraint>")]
UNSPEC_FMADDSUB)
(match_dup 1)
(match_operand:<avx512fmaskmode> 4 "register_operand" "Yk,Yk")))]
@@ -3138,13 +3325,13 @@
(set_attr "type" "ssemuladd")
(set_attr "mode" "<MODE>")])
-(define_insn "avx512f_fmaddsub_<mode>_mask3<round_name>"
- [(set (match_operand:VF_512 0 "register_operand" "=v")
- (vec_merge:VF_512
- (unspec:VF_512
- [(match_operand:VF_512 1 "register_operand" "v")
- (match_operand:VF_512 2 "<round_nimm_predicate>" "<round_constraint>")
- (match_operand:VF_512 3 "register_operand" "0")]
+(define_insn "<avx512>_fmaddsub_<mode>_mask3<round_name>"
+ [(set (match_operand:VF_AVX512VL 0 "register_operand" "=v")
+ (vec_merge:VF_AVX512VL
+ (unspec:VF_AVX512VL
+ [(match_operand:VF_AVX512VL 1 "register_operand" "v")
+ (match_operand:VF_AVX512VL 2 "nonimmediate_operand" "<round_constraint>")
+ (match_operand:VF_AVX512VL 3 "register_operand" "0")]
UNSPEC_FMADDSUB)
(match_dup 3)
(match_operand:<avx512fmaskmode> 4 "register_operand" "Yk")))]
@@ -3154,7 +3341,7 @@
(set_attr "type" "ssemuladd")
(set_attr "mode" "<MODE>")])
-(define_insn "<sd_mask_codefor>fma_fmsubadd_<mode><sd_maskz_name><round_name>"
+(define_insn "<sd_mask_codefor>fma_fmsubadd_noavx512_<mode><sd_maskz_name><round_name>"
[(set (match_operand:VF_128_256 0 "register_operand" "=v,v,v,x,x")
(unspec:VF_128_256
[(match_operand:VF_128_256 1 "<round_nimm_predicate>" "%0,0,v,x,x")
@@ -3174,12 +3361,12 @@
(set_attr "mode" "<MODE>")])
(define_insn "<sd_mask_codefor>fma_fmsubadd_<mode><sd_maskz_name><round_name>"
- [(set (match_operand:VF_512 0 "register_operand" "=v,v,v")
- (unspec:VF_512
- [(match_operand:VF_512 1 "<round_nimm_predicate>" "%0,0,v")
- (match_operand:VF_512 2 "<round_nimm_predicate>" "<round_constraint>,v,<round_constraint>")
- (neg:VF_512
- (match_operand:VF_512 3 "<round_nimm_predicate>" "v,<round_constraint>,0"))]
+ [(set (match_operand:VF_AVX512VL 0 "register_operand" "=v,v,v")
+ (unspec:VF_AVX512VL
+ [(match_operand:VF_AVX512VL 1 "<round_nimm_predicate>" "%0,0,v")
+ (match_operand:VF_AVX512VL 2 "<round_nimm_predicate>" "<round_constraint>,v,<round_constraint>")
+ (neg:VF_AVX512VL
+ (match_operand:VF_AVX512VL 3 "<round_nimm_predicate>" "v,<round_constraint>,0"))]
UNSPEC_FMADDSUB))]
"TARGET_AVX512F && <sd_mask_mode512bit_condition> && <round_mode512bit_condition>"
"@
@@ -3190,14 +3377,14 @@
(set_attr "type" "ssemuladd")
(set_attr "mode" "<MODE>")])
-(define_insn "avx512f_fmsubadd_<mode>_mask<round_name>"
- [(set (match_operand:VF_512 0 "register_operand" "=v,v")
- (vec_merge:VF_512
- (unspec:VF_512
- [(match_operand:VF_512 1 "register_operand" "0,0")
- (match_operand:VF_512 2 "<round_nimm_predicate>" "<round_constraint>,v")
- (neg:VF_512
- (match_operand:VF_512 3 "<round_nimm_predicate>" "v,<round_constraint>"))]
+(define_insn "<avx512>_fmsubadd_<mode>_mask<round_name>"
+ [(set (match_operand:VF_AVX512VL 0 "register_operand" "=v,v")
+ (vec_merge:VF_AVX512VL
+ (unspec:VF_AVX512VL
+ [(match_operand:VF_AVX512VL 1 "register_operand" "0,0")
+ (match_operand:VF_AVX512VL 2 "nonimmediate_operand" "<round_constraint>,v")
+ (neg:VF_AVX512VL
+ (match_operand:VF_AVX512VL 3 "nonimmediate_operand" "v,<round_constraint>"))]
UNSPEC_FMADDSUB)
(match_dup 1)
(match_operand:<avx512fmaskmode> 4 "register_operand" "Yk,Yk")))]
@@ -3209,14 +3396,14 @@
(set_attr "type" "ssemuladd")
(set_attr "mode" "<MODE>")])
-(define_insn "avx512f_fmsubadd_<mode>_mask3<round_name>"
- [(set (match_operand:VF_512 0 "register_operand" "=v")
- (vec_merge:VF_512
- (unspec:VF_512
- [(match_operand:VF_512 1 "register_operand" "v")
- (match_operand:VF_512 2 "<round_nimm_predicate>" "<round_constraint>")
- (neg:VF_512
- (match_operand:VF_512 3 "register_operand" "0"))]
+(define_insn "<avx512>_fmsubadd_<mode>_mask3<round_name>"
+ [(set (match_operand:VF_AVX512VL 0 "register_operand" "=v")
+ (vec_merge:VF_AVX512VL
+ (unspec:VF_AVX512VL
+ [(match_operand:VF_AVX512VL 1 "register_operand" "v")
+ (match_operand:VF_AVX512VL 2 "nonimmediate_operand" "<round_constraint>")
+ (neg:VF_AVX512VL
+ (match_operand:VF_AVX512VL 3 "register_operand" "0"))]
UNSPEC_FMADDSUB)
(match_dup 3)
(match_operand:<avx512fmaskmode> 4 "register_operand" "Yk")))]
@@ -3599,15 +3786,15 @@
(set_attr "prefix" "maybe_vex")
(set_attr "mode" "<sseinsnmode>")])
-(define_insn "ufloatv16siv16sf2<mask_name><round_name>"
- [(set (match_operand:V16SF 0 "register_operand" "=v")
- (unsigned_float:V16SF
- (match_operand:V16SI 1 "<round_nimm_predicate>" "<round_constraint>")))]
+(define_insn "ufloat<sseintvecmodelower><mode>2<mask_name><round_name>"
+ [(set (match_operand:VF1_AVX512VL 0 "register_operand" "=v")
+ (unsigned_float:VF1_AVX512VL
+ (match_operand:<sseintvecmode> 1 "nonimmediate_operand" "<round_constraint>")))]
"TARGET_AVX512F"
"vcvtudq2ps\t{<round_mask_op2>%1, %0<mask_operand2>|%0<mask_operand2>, %1<round_mask_op2>}"
[(set_attr "type" "ssecvt")
(set_attr "prefix" "evex")
- (set_attr "mode" "V16SF")])
+ (set_attr "mode" "<MODE>")])
(define_expand "floatuns<sseintvecmodelower><mode>2"
[(match_operand:VF1 0 "register_operand")
@@ -3627,13 +3814,13 @@
(define_mode_attr sf2simodelower
[(V16SI "v16sf") (V8SI "v8sf") (V4SI "v4sf")])
-(define_insn "<sse2_avx_avx512f>_fix_notrunc<sf2simodelower><mode>"
+(define_insn "<sse2_avx_avx512f>_fix_notrunc<sf2simodelower><mode><mask_name>"
[(set (match_operand:VI4_AVX 0 "register_operand" "=v")
(unspec:VI4_AVX
[(match_operand:<ssePSmode> 1 "nonimmediate_operand" "vm")]
UNSPEC_FIX_NOTRUNC))]
- "TARGET_SSE2"
- "%vcvtps2dq\t{%1, %0|%0, %1}"
+ "TARGET_SSE2 && <mask_mode512bit_condition>"
+ "%vcvtps2dq\t{%1, %0<mask_operand2>|%0<mask_operand2>, %1}"
[(set_attr "type" "ssecvt")
(set (attr "prefix_data16")
(if_then_else
@@ -3654,16 +3841,62 @@
(set_attr "prefix" "evex")
(set_attr "mode" "XI")])
-(define_insn "<mask_codefor>avx512f_ufix_notruncv16sfv16si<mask_name><round_name>"
- [(set (match_operand:V16SI 0 "register_operand" "=v")
- (unspec:V16SI
- [(match_operand:V16SF 1 "<round_nimm_predicate>" "<round_constraint>")]
+(define_insn "<mask_codefor><avx512>_ufix_notrunc<sf2simodelower><mode><mask_name><round_name>"
+ [(set (match_operand:VI4_AVX512VL 0 "register_operand" "=v")
+ (unspec:VI4_AVX512VL
+ [(match_operand:<ssePSmode> 1 "nonimmediate_operand" "<round_constraint>")]
UNSPEC_UNSIGNED_FIX_NOTRUNC))]
"TARGET_AVX512F"
"vcvtps2udq\t{<round_mask_op2>%1, %0<mask_operand2>|%0<mask_operand2>, %1<round_mask_op2>}"
[(set_attr "type" "ssecvt")
(set_attr "prefix" "evex")
- (set_attr "mode" "XI")])
+ (set_attr "mode" "<sseinsnmode>")])
+
+(define_insn "<mask_codefor>avx512dq_cvtps2qq<mode><mask_name><round_name>"
+ [(set (match_operand:VI8_256_512 0 "register_operand" "=v")
+ (unspec:VI8_256_512 [(match_operand:<ssePSmode2> 1 "nonimmediate_operand" "<round_constraint>")]
+ UNSPEC_FIX_NOTRUNC))]
+ "TARGET_AVX512DQ && <round_mode512bit_condition>"
+ "vcvtps2qq\t{<round_mask_op2>%1, %0<mask_operand2>|%0<mask_operand2>, %1<round_mask_op2>}"
+ [(set_attr "type" "ssecvt")
+ (set_attr "prefix" "evex")
+ (set_attr "mode" "<sseinsnmode>")])
+
+(define_insn "<mask_codefor>avx512dq_cvtps2qqv2di<mask_name>"
+ [(set (match_operand:V2DI 0 "register_operand" "=v")
+ (unspec:V2DI
+ [(vec_select:V2SF
+ (match_operand:V4SF 1 "nonimmediate_operand" "vm")
+ (parallel [(const_int 0) (const_int 1)]))]
+ UNSPEC_FIX_NOTRUNC))]
+ "TARGET_AVX512DQ && TARGET_AVX512VL"
+ "vcvtps2qq\t{%1, %0<mask_operand2>|%0<mask_operand2>, %1}"
+ [(set_attr "type" "ssecvt")
+ (set_attr "prefix" "evex")
+ (set_attr "mode" "TI")])
+
+(define_insn "<mask_codefor>avx512dq_cvtps2uqq<mode><mask_name><round_name>"
+ [(set (match_operand:VI8_256_512 0 "register_operand" "=v")
+ (unspec:VI8_256_512 [(match_operand:<ssePSmode2> 1 "nonimmediate_operand" "<round_constraint>")]
+ UNSPEC_UNSIGNED_FIX_NOTRUNC))]
+ "TARGET_AVX512DQ && <round_mode512bit_condition>"
+ "vcvtps2uqq\t{<round_mask_op2>%1, %0<mask_operand2>|%0<mask_operand2>, %1<round_mask_op2>}"
+ [(set_attr "type" "ssecvt")
+ (set_attr "prefix" "evex")
+ (set_attr "mode" "<sseinsnmode>")])
+
+(define_insn "<mask_codefor>avx512dq_cvtps2uqqv2di<mask_name>"
+ [(set (match_operand:V2DI 0 "register_operand" "=v")
+ (unspec:V2DI
+ [(vec_select:V2SF
+ (match_operand:V4SF 1 "nonimmediate_operand" "vm")
+ (parallel [(const_int 0) (const_int 1)]))]
+ UNSPEC_UNSIGNED_FIX_NOTRUNC))]
+ "TARGET_AVX512DQ && TARGET_AVX512VL"
+ "vcvtps2uqq\t{%1, %0<mask_operand2>|%0<mask_operand2>, %1}"
+ [(set_attr "type" "ssecvt")
+ (set_attr "prefix" "evex")
+ (set_attr "mode" "TI")])
(define_insn "<fixsuffix>fix_truncv16sfv16si2<mask_name><round_saeonly_name>"
[(set (match_operand:V16SI 0 "register_operand" "=v")
@@ -3675,20 +3908,20 @@
(set_attr "prefix" "evex")
(set_attr "mode" "XI")])
-(define_insn "fix_truncv8sfv8si2"
- [(set (match_operand:V8SI 0 "register_operand" "=x")
- (fix:V8SI (match_operand:V8SF 1 "nonimmediate_operand" "xm")))]
- "TARGET_AVX"
- "vcvttps2dq\t{%1, %0|%0, %1}"
+(define_insn "fix_truncv8sfv8si2<mask_name>"
+ [(set (match_operand:V8SI 0 "register_operand" "=v")
+ (fix:V8SI (match_operand:V8SF 1 "nonimmediate_operand" "vm")))]
+ "TARGET_AVX && <mask_mode512bit_condition>"
+ "vcvttps2dq\t{%1, %0<mask_operand2>|%0<mask_operand2>, %1}"
[(set_attr "type" "ssecvt")
- (set_attr "prefix" "vex")
+ (set_attr "prefix" "<mask_prefix>")
(set_attr "mode" "OI")])
-(define_insn "fix_truncv4sfv4si2"
- [(set (match_operand:V4SI 0 "register_operand" "=x")
- (fix:V4SI (match_operand:V4SF 1 "nonimmediate_operand" "xm")))]
- "TARGET_SSE2"
- "%vcvttps2dq\t{%1, %0|%0, %1}"
+(define_insn "fix_truncv4sfv4si2<mask_name>"
+ [(set (match_operand:V4SI 0 "register_operand" "=v")
+ (fix:V4SI (match_operand:V4SF 1 "nonimmediate_operand" "vm")))]
+ "TARGET_SSE2 && <mask_mode512bit_condition>"
+ "%vcvttps2dq\t{%1, %0<mask_operand2>|%0<mask_operand2>, %1}"
[(set_attr "type" "ssecvt")
(set (attr "prefix_rep")
(if_then_else
@@ -3701,7 +3934,7 @@
(const_string "*")
(const_string "0")))
(set_attr "prefix_data16" "0")
- (set_attr "prefix" "maybe_vex")
+ (set_attr "prefix" "<mask_prefix2>")
(set_attr "mode" "TI")])
(define_expand "fixuns_trunc<mode><sseintvecmodelower>2"
@@ -4009,21 +4242,94 @@
(define_insn "float<si2dfmodelower><mode>2<mask_name>"
[(set (match_operand:VF2_512_256 0 "register_operand" "=v")
(float:VF2_512_256 (match_operand:<si2dfmode> 1 "nonimmediate_operand" "vm")))]
- "TARGET_AVX && <mask_mode512bit_condition>"
+ "<mask_mode512bit_condition>"
"vcvtdq2pd\t{%1, %0<mask_operand2>|%0<mask_operand2>, %1}"
[(set_attr "type" "ssecvt")
(set_attr "prefix" "maybe_vex")
(set_attr "mode" "<MODE>")])
-(define_insn "ufloatv8siv8df<mask_name>"
- [(set (match_operand:V8DF 0 "register_operand" "=v")
- (unsigned_float:V8DF
- (match_operand:V8SI 1 "nonimmediate_operand" "vm")))]
- "TARGET_AVX512F"
+(define_insn "<floatsuffix>float<sseintvecmodelower><mode>2<mask_name><round_name>"
+ [(set (match_operand:VF2_AVX512VL 0 "register_operand" "=v")
+ (any_float:VF2_AVX512VL
+ (match_operand:<sseintvecmode> 1 "nonimmediate_operand" "vm")))]
+ "TARGET_AVX512DQ"
+ "vcvt<floatsuffix>qq2pd\t{<round_mask_op2>%1, %0<mask_operand2>|%0<mask_operand2>, %1<round_mask_op2>}"
+ [(set_attr "type" "ssecvt")
+ (set_attr "prefix" "evex")
+ (set_attr "mode" "<MODE>")])
+
+;; For <floatsuffix>float<sselondveclower><mode> insn patterns
+(define_mode_attr qq2pssuff
+ [(V8SF "") (V4SF "{y}")])
+
+(define_mode_attr sselongvecmode
+ [(V8SF "V8DI") (V4SF "V4DI")])
+
+(define_mode_attr sselongvecmodelower
+ [(V8SF "v8di") (V4SF "v4di")])
+
+(define_mode_attr sseintvecmode3
+ [(V8SF "XI") (V4SF "OI")
+ (V8DF "OI") (V4DF "TI")])
+
+(define_insn "<floatsuffix>float<sselongvecmodelower><mode>2<mask_name><round_name>"
+ [(set (match_operand:VF1_128_256VL 0 "register_operand" "=v")
+ (any_float:VF1_128_256VL
+ (match_operand:<sselongvecmode> 1 "nonimmediate_operand" "<round_constraint>")))]
+ "TARGET_AVX512DQ && <round_modev8sf_condition>"
+ "vcvt<floatsuffix>qq2ps<qq2pssuff>\t{<round_mask_op2>%1, %0<mask_operand2>|%0<mask_operand2>, %1<round_mask_op2>}"
+ [(set_attr "type" "ssecvt")
+ (set_attr "prefix" "evex")
+ (set_attr "mode" "<MODE>")])
+
+(define_insn "*<floatsuffix>floatv2div2sf2"
+ [(set (match_operand:V4SF 0 "register_operand" "=v")
+ (vec_concat:V4SF
+ (any_float:V2SF (match_operand:V2DI 1 "nonimmediate_operand" "vm"))
+ (const_vector:V2SF [(const_int 0) (const_int 0)])))]
+ "TARGET_AVX512DQ && TARGET_AVX512VL"
+ "vcvt<floatsuffix>qq2ps{x}\t{%1, %0|%0, %1}"
+ [(set_attr "type" "ssecvt")
+ (set_attr "prefix" "evex")
+ (set_attr "mode" "V4SF")])
+
+(define_insn "<floatsuffix>floatv2div2sf2_mask"
+ [(set (match_operand:V4SF 0 "register_operand" "=v")
+ (vec_concat:V4SF
+ (vec_merge:V2SF
+ (any_float:V2SF (match_operand:V2DI 1 "nonimmediate_operand" "vm"))
+ (vec_select:V2SF
+ (match_operand:V4SF 2 "vector_move_operand" "0C")
+ (parallel [(const_int 0) (const_int 1)]))
+ (match_operand:QI 3 "register_operand" "Yk"))
+ (const_vector:V2SF [(const_int 0) (const_int 0)])))]
+ "TARGET_AVX512DQ && TARGET_AVX512VL"
+ "vcvt<floatsuffix>qq2ps{x}\t{%1, %0%{%3%}%N2|%0%{%3%}%N2, %1}"
+ [(set_attr "type" "ssecvt")
+ (set_attr "prefix" "evex")
+ (set_attr "mode" "V4SF")])
+
+(define_insn "ufloat<si2dfmodelower><mode>2<mask_name>"
+ [(set (match_operand:VF2_512_256VL 0 "register_operand" "=v")
+ (unsigned_float:VF2_512_256VL
+ (match_operand:<si2dfmode> 1 "nonimmediate_operand" "vm")))]
+ "TARGET_AVX512F"
+ "vcvtudq2pd\t{%1, %0<mask_operand2>|%0<mask_operand2>, %1}"
+ [(set_attr "type" "ssecvt")
+ (set_attr "prefix" "evex")
+ (set_attr "mode" "<MODE>")])
+
+(define_insn "ufloatv2siv2df2<mask_name>"
+ [(set (match_operand:V2DF 0 "register_operand" "=v")
+ (unsigned_float:V2DF
+ (vec_select:V2SI
+ (match_operand:V4SI 1 "nonimmediate_operand" "vm")
+ (parallel [(const_int 0) (const_int 1)]))))]
+ "TARGET_AVX512VL"
"vcvtudq2pd\t{%1, %0<mask_operand2>|%0<mask_operand2>, %1}"
[(set_attr "type" "ssecvt")
(set_attr "prefix" "evex")
- (set_attr "mode" "V8DF")])
+ (set_attr "mode" "V2DF")])
(define_insn "avx512f_cvtdq2pd512_2"
[(set (match_operand:V8DF 0 "register_operand" "=v")
@@ -4034,33 +4340,33 @@
(const_int 2) (const_int 3)
(const_int 4) (const_int 5)
(const_int 6) (const_int 7)]))))]
- "TARGET_AVX"
+ "TARGET_AVX512F"
"vcvtdq2pd\t{%t1, %0|%0, %t1}"
[(set_attr "type" "ssecvt")
(set_attr "prefix" "evex")
(set_attr "mode" "V8DF")])
(define_insn "avx_cvtdq2pd256_2"
- [(set (match_operand:V4DF 0 "register_operand" "=x")
+ [(set (match_operand:V4DF 0 "register_operand" "=v")
(float:V4DF
(vec_select:V4SI
- (match_operand:V8SI 1 "nonimmediate_operand" "xm")
+ (match_operand:V8SI 1 "nonimmediate_operand" "vm")
(parallel [(const_int 0) (const_int 1)
(const_int 2) (const_int 3)]))))]
"TARGET_AVX"
"vcvtdq2pd\t{%x1, %0|%0, %x1}"
[(set_attr "type" "ssecvt")
- (set_attr "prefix" "vex")
+ (set_attr "prefix" "maybe_evex")
(set_attr "mode" "V4DF")])
-(define_insn "sse2_cvtdq2pd"
- [(set (match_operand:V2DF 0 "register_operand" "=x")
+(define_insn "sse2_cvtdq2pd<mask_name>"
+ [(set (match_operand:V2DF 0 "register_operand" "=v")
(float:V2DF
(vec_select:V2SI
- (match_operand:V4SI 1 "nonimmediate_operand" "xm")
+ (match_operand:V4SI 1 "nonimmediate_operand" "vm")
(parallel [(const_int 0) (const_int 1)]))))]
- "TARGET_SSE2"
- "%vcvtdq2pd\t{%1, %0|%0, %q1}"
+ "TARGET_SSE2 && <mask_mode512bit_condition>"
+ "%vcvtdq2pd\t{%1, %0<mask_operand2>|%0<mask_operand2>, %q1}"
[(set_attr "type" "ssecvt")
(set_attr "prefix" "maybe_vex")
(set_attr "ssememalign" "64")
@@ -4077,14 +4383,14 @@
(set_attr "prefix" "evex")
(set_attr "mode" "OI")])
-(define_insn "avx_cvtpd2dq256"
- [(set (match_operand:V4SI 0 "register_operand" "=x")
- (unspec:V4SI [(match_operand:V4DF 1 "nonimmediate_operand" "xm")]
+(define_insn "avx_cvtpd2dq256<mask_name>"
+ [(set (match_operand:V4SI 0 "register_operand" "=v")
+ (unspec:V4SI [(match_operand:V4DF 1 "nonimmediate_operand" "vm")]
UNSPEC_FIX_NOTRUNC))]
- "TARGET_AVX"
- "vcvtpd2dq{y}\t{%1, %0|%0, %1}"
+ "TARGET_AVX && <mask_mode512bit_condition>"
+ "vcvtpd2dq{y}\t{%1, %0<mask_operand2>|%0<mask_operand2>, %1}"
[(set_attr "type" "ssecvt")
- (set_attr "prefix" "vex")
+ (set_attr "prefix" "<mask_prefix>")
(set_attr "mode" "OI")])
(define_expand "avx_cvtpd2dq256_2"
@@ -4109,25 +4415,16 @@
(set_attr "btver2_decode" "vector")
(set_attr "mode" "OI")])
-(define_expand "sse2_cvtpd2dq"
- [(set (match_operand:V4SI 0 "register_operand")
- (vec_concat:V4SI
- (unspec:V2SI [(match_operand:V2DF 1 "nonimmediate_operand")]
- UNSPEC_FIX_NOTRUNC)
- (match_dup 2)))]
- "TARGET_SSE2"
- "operands[2] = CONST0_RTX (V2SImode);")
-
-(define_insn "*sse2_cvtpd2dq"
- [(set (match_operand:V4SI 0 "register_operand" "=x")
+(define_insn "sse2_cvtpd2dq<mask_name>"
+ [(set (match_operand:V4SI 0 "register_operand" "=v")
(vec_concat:V4SI
- (unspec:V2SI [(match_operand:V2DF 1 "nonimmediate_operand" "xm")]
+ (unspec:V2SI [(match_operand:V2DF 1 "nonimmediate_operand" "vm")]
UNSPEC_FIX_NOTRUNC)
- (match_operand:V2SI 2 "const0_operand")))]
- "TARGET_SSE2"
+ (const_vector:V2SI [(const_int 0) (const_int 0)])))]
+ "TARGET_SSE2 && <mask_mode512bit_condition>"
{
if (TARGET_AVX)
- return "vcvtpd2dq{x}\t{%1, %0|%0, %1}";
+ return "vcvtpd2dq{x}\t{%1, %0<mask_operand2>|%0<mask_operand2>, %1}";
else
return "cvtpd2dq\t{%1, %0|%0, %1}";
}
@@ -4140,16 +4437,33 @@
(set_attr "athlon_decode" "vector")
(set_attr "bdver1_decode" "double")])
-(define_insn "avx512f_ufix_notruncv8dfv8si<mask_name><round_name>"
- [(set (match_operand:V8SI 0 "register_operand" "=v")
- (unspec:V8SI
- [(match_operand:V8DF 1 "<round_nimm_predicate>" "<round_constraint>")]
+;; For ufix_notrunc* insn patterns
+(define_mode_attr pd2udqsuff
+ [(V8DF "") (V4DF "{y}")])
+
+(define_insn "ufix_notrunc<mode><si2dfmodelower>2<mask_name><round_name>"
+ [(set (match_operand:<si2dfmode> 0 "register_operand" "=v")
+ (unspec:<si2dfmode>
+ [(match_operand:VF2_512_256VL 1 "nonimmediate_operand" "<round_constraint>")]
UNSPEC_UNSIGNED_FIX_NOTRUNC))]
"TARGET_AVX512F"
- "vcvtpd2udq\t{<round_mask_op2>%1, %0<mask_operand2>|%0<mask_operand2>, %1<round_mask_op2>}"
+ "vcvtpd2udq<pd2udqsuff>\t{<round_mask_op2>%1, %0<mask_operand2>|%0<mask_operand2>, %1<round_mask_op2>}"
[(set_attr "type" "ssecvt")
(set_attr "prefix" "evex")
- (set_attr "mode" "OI")])
+ (set_attr "mode" "<sseinsnmode>")])
+
+(define_insn "ufix_notruncv2dfv2si2<mask_name>"
+ [(set (match_operand:V4SI 0 "register_operand" "=v")
+ (vec_concat:V4SI
+ (unspec:V2SI
+ [(match_operand:V2DF 1 "nonimmediate_operand" "vm")]
+ UNSPEC_UNSIGNED_FIX_NOTRUNC)
+ (const_vector:V2SI [(const_int 0) (const_int 0)])))]
+ "TARGET_AVX512VL"
+ "vcvtpd2udq{x}\t{%1, %0<mask_operand2>|%0<mask_operand2>, %1}"
+ [(set_attr "type" "ssecvt")
+ (set_attr "prefix" "evex")
+ (set_attr "mode" "TI")])
(define_insn "<fixsuffix>fix_truncv8dfv8si2<mask_name><round_saeonly_name>"
[(set (match_operand:V8SI 0 "register_operand" "=v")
@@ -4161,15 +4475,90 @@
(set_attr "prefix" "evex")
(set_attr "mode" "OI")])
-(define_insn "fix_truncv4dfv4si2"
- [(set (match_operand:V4SI 0 "register_operand" "=x")
- (fix:V4SI (match_operand:V4DF 1 "nonimmediate_operand" "xm")))]
- "TARGET_AVX"
- "vcvttpd2dq{y}\t{%1, %0|%0, %1}"
+(define_insn "ufix_truncv2dfv2si2<mask_name>"
+ [(set (match_operand:V4SI 0 "register_operand" "=v")
+ (vec_concat:V4SI
+ (unsigned_fix:V2SI (match_operand:V2DF 1 "nonimmediate_operand" "vm"))
+ (const_vector:V2SI [(const_int 0) (const_int 0)])))]
+ "TARGET_AVX512VL"
+ "vcvttpd2udq{x}\t{%1, %0<mask_operand2>|%0<mask_operand2>, %1}"
[(set_attr "type" "ssecvt")
- (set_attr "prefix" "vex")
+ (set_attr "prefix" "evex")
+ (set_attr "mode" "TI")])
+
+(define_insn "<fixsuffix>fix_truncv4dfv4si2<mask_name>"
+ [(set (match_operand:V4SI 0 "register_operand" "=v")
+ (any_fix:V4SI (match_operand:V4DF 1 "nonimmediate_operand" "vm")))]
+ "(TARGET_AVX && !<ufix_bool>) || (TARGET_AVX512VL && TARGET_AVX512F)"
+ "vcvttpd2<fixsuffix>dq{y}\t{%1, %0<mask_operand2>|%0<mask_operand2>, %1}"
+ [(set_attr "type" "ssecvt")
+ (set_attr "prefix" "maybe_evex")
(set_attr "mode" "OI")])
+(define_insn "<fixsuffix>fix_trunc<mode><sseintvecmodelower>2<mask_name><round_saeonly_name>"
+ [(set (match_operand:<sseintvecmode> 0 "register_operand" "=v")
+ (any_fix:<sseintvecmode>
+ (match_operand:VF2_AVX512VL 1 "<round_saeonly_nimm_predicate>" "<round_saeonly_constraint>")))]
+ "TARGET_AVX512DQ && <round_saeonly_mode512bit_condition>"
+ "vcvttpd2<fixsuffix>qq\t{<round_saeonly_mask_op2>%1, %0<mask_operand2>|%0<mask_operand2>, %1<round_saeonly_mask_op2>}"
+ [(set_attr "type" "ssecvt")
+ (set_attr "prefix" "evex")
+ (set_attr "mode" "<sseintvecmode2>")])
+
+(define_insn "fix_notrunc<mode><sseintvecmodelower>2<mask_name><round_name>"
+ [(set (match_operand:<sseintvecmode> 0 "register_operand" "=v")
+ (unspec:<sseintvecmode>
+ [(match_operand:VF2_AVX512VL 1 "<round_nimm_predicate>" "<round_constraint>")]
+ UNSPEC_FIX_NOTRUNC))]
+ "TARGET_AVX512DQ && <round_mode512bit_condition>"
+ "vcvtpd2qq\t{<round_mask_op2>%1, %0<mask_operand2>|%0<mask_operand2>, %1<round_mask_op2>}"
+ [(set_attr "type" "ssecvt")
+ (set_attr "prefix" "evex")
+ (set_attr "mode" "<sseintvecmode2>")])
+
+(define_insn "ufix_notrunc<mode><sseintvecmodelower>2<mask_name><round_name>"
+ [(set (match_operand:<sseintvecmode> 0 "register_operand" "=v")
+ (unspec:<sseintvecmode>
+ [(match_operand:VF2_AVX512VL 1 "nonimmediate_operand" "<round_constraint>")]
+ UNSPEC_UNSIGNED_FIX_NOTRUNC))]
+ "TARGET_AVX512DQ && <round_mode512bit_condition>"
+ "vcvtpd2uqq\t{<round_mask_op2>%1, %0<mask_operand2>|%0<mask_operand2>, %1<round_mask_op2>}"
+ [(set_attr "type" "ssecvt")
+ (set_attr "prefix" "evex")
+ (set_attr "mode" "<sseintvecmode2>")])
+
+(define_insn "<fixsuffix>fix_trunc<mode><sselongvecmodelower>2<mask_name><round_saeonly_name>"
+ [(set (match_operand:<sselongvecmode> 0 "register_operand" "=v")
+ (any_fix:<sselongvecmode>
+ (match_operand:VF1_128_256VL 1 "<round_saeonly_nimm_predicate>" "<round_saeonly_constraint>")))]
+ "TARGET_AVX512DQ && <round_saeonly_modev8sf_condition>"
+ "vcvttps2<fixsuffix>qq\t{<round_saeonly_mask_op2>%1, %0<mask_operand2>|%0<mask_operand2>, %1<round_saeonly_mask_op2>}"
+ [(set_attr "type" "ssecvt")
+ (set_attr "prefix" "evex")
+ (set_attr "mode" "<sseintvecmode3>")])
+
+(define_insn "<fixsuffix>fix_truncv2sfv2di2<mask_name>"
+ [(set (match_operand:V2DI 0 "register_operand" "=v")
+ (any_fix:V2DI
+ (vec_select:V2SF
+ (match_operand:V4SF 1 "nonimmediate_operand" "vm")
+ (parallel [(const_int 0) (const_int 1)]))))]
+ "TARGET_AVX512DQ && TARGET_AVX512VL"
+ "vcvttps2<fixsuffix>qq\t{%1, %0<mask_operand2>|%0<mask_operand2>, %1}"
+ [(set_attr "type" "ssecvt")
+ (set_attr "prefix" "evex")
+ (set_attr "mode" "TI")])
+
+(define_insn "ufix_trunc<mode><sseintvecmodelower>2<mask_name>"
+ [(set (match_operand:<sseintvecmode> 0 "register_operand" "=v")
+ (unsigned_fix:<sseintvecmode>
+ (match_operand:VF1_128_256VL 1 "nonimmediate_operand" "vm")))]
+ "TARGET_AVX512VL"
+ "vcvttps2udq\t{%1, %0<mask_operand2>|%0<mask_operand2>, %1}"
+ [(set_attr "type" "ssecvt")
+ (set_attr "prefix" "evex")
+ (set_attr "mode" "<sseintvecmode2>")])
+
(define_expand "avx_cvttpd2dq256_2"
[(set (match_operand:V8SI 0 "register_operand")
(vec_concat:V8SI
@@ -4178,35 +4567,15 @@
"TARGET_AVX"
"operands[2] = CONST0_RTX (V4SImode);")
-(define_insn "*avx_cvttpd2dq256_2"
- [(set (match_operand:V8SI 0 "register_operand" "=x")
- (vec_concat:V8SI
- (fix:V4SI (match_operand:V4DF 1 "nonimmediate_operand" "xm"))
- (match_operand:V4SI 2 "const0_operand")))]
- "TARGET_AVX"
- "vcvttpd2dq{y}\t{%1, %x0|%x0, %1}"
- [(set_attr "type" "ssecvt")
- (set_attr "prefix" "vex")
- (set_attr "btver2_decode" "vector")
- (set_attr "mode" "OI")])
-
-(define_expand "sse2_cvttpd2dq"
- [(set (match_operand:V4SI 0 "register_operand")
+(define_insn "sse2_cvttpd2dq<mask_name>"
+ [(set (match_operand:V4SI 0 "register_operand" "=v")
(vec_concat:V4SI
- (fix:V2SI (match_operand:V2DF 1 "nonimmediate_operand"))
- (match_dup 2)))]
- "TARGET_SSE2"
- "operands[2] = CONST0_RTX (V2SImode);")
-
-(define_insn "*sse2_cvttpd2dq"
- [(set (match_operand:V4SI 0 "register_operand" "=x")
- (vec_concat:V4SI
- (fix:V2SI (match_operand:V2DF 1 "nonimmediate_operand" "xm"))
- (match_operand:V2SI 2 "const0_operand")))]
- "TARGET_SSE2"
+ (fix:V2SI (match_operand:V2DF 1 "nonimmediate_operand" "vm"))
+ (const_vector:V2SI [(const_int 0) (const_int 0)])))]
+ "TARGET_SSE2 && <mask_mode512bit_condition>"
{
if (TARGET_AVX)
- return "vcvttpd2dq{x}\t{%1, %0|%0, %1}";
+ return "vcvttpd2dq{x}\t{%1, %0<mask_operand2>|%0<mask_operand2>, %1}";
else
return "cvttpd2dq\t{%1, %0|%0, %1}";
}
@@ -4244,7 +4613,7 @@
(vec_merge:V2DF
(float_extend:V2DF
(vec_select:V2SF
- (match_operand:V4SF 2 "nonimmediate_operand" "x,m,<round_saeonly_constraint>")
+ (match_operand:V4SF 2 "<round_saeonly_nimm_predicate>" "x,m,<round_saeonly_constraint>")
(parallel [(const_int 0) (const_int 1)])))
(match_operand:V2DF 1 "register_operand" "0,0,v")
(const_int 1)))]
@@ -4272,14 +4641,14 @@
(set_attr "prefix" "evex")
(set_attr "mode" "V8SF")])
-(define_insn "avx_cvtpd2ps256"
- [(set (match_operand:V4SF 0 "register_operand" "=x")
+(define_insn "avx_cvtpd2ps256<mask_name>"
+ [(set (match_operand:V4SF 0 "register_operand" "=v")
(float_truncate:V4SF
- (match_operand:V4DF 1 "nonimmediate_operand" "xm")))]
- "TARGET_AVX"
- "vcvtpd2ps{y}\t{%1, %0|%0, %1}"
+ (match_operand:V4DF 1 "nonimmediate_operand" "vm")))]
+ "TARGET_AVX && <mask_mode512bit_condition>"
+ "vcvtpd2ps{y}\t{%1, %0<mask_operand2>|%0<mask_operand2>, %1}"
[(set_attr "type" "ssecvt")
- (set_attr "prefix" "vex")
+ (set_attr "prefix" "maybe_evex")
(set_attr "btver2_decode" "vector")
(set_attr "mode" "V4SF")])
@@ -4292,16 +4661,28 @@
"TARGET_SSE2"
"operands[2] = CONST0_RTX (V2SFmode);")
-(define_insn "*sse2_cvtpd2ps"
- [(set (match_operand:V4SF 0 "register_operand" "=x")
+(define_expand "sse2_cvtpd2ps_mask"
+ [(set (match_operand:V4SF 0 "register_operand")
+ (vec_merge:V4SF
+ (vec_concat:V4SF
+ (float_truncate:V2SF
+ (match_operand:V2DF 1 "nonimmediate_operand"))
+ (match_dup 4))
+ (match_operand:V4SF 2 "register_operand")
+ (match_operand:QI 3 "register_operand")))]
+ "TARGET_SSE2"
+ "operands[4] = CONST0_RTX (V2SFmode);")
+
+(define_insn "*sse2_cvtpd2ps<mask_name>"
+ [(set (match_operand:V4SF 0 "register_operand" "=v")
(vec_concat:V4SF
(float_truncate:V2SF
- (match_operand:V2DF 1 "nonimmediate_operand" "xm"))
+ (match_operand:V2DF 1 "nonimmediate_operand" "vm"))
(match_operand:V2SF 2 "const0_operand")))]
- "TARGET_SSE2"
+ "TARGET_SSE2 && <mask_mode512bit_condition>"
{
if (TARGET_AVX)
- return "vcvtpd2ps{x}\t{%1, %0|%0, %1}";
+ return "vcvtpd2ps{x}\t{%1, %0<mask_operand3>|%0<mask_operand3>, %1}";
else
return "cvtpd2ps\t{%1, %0|%0, %1}";
}
@@ -4355,14 +4736,44 @@
(set_attr "prefix" "evex")
(set_attr "mode" "V8DF")])
-(define_insn "sse2_cvtps2pd"
- [(set (match_operand:V2DF 0 "register_operand" "=x")
+(define_insn "<avx512>_cvt<ssemodesuffix>2mask<mode>"
+ [(set (match_operand:<avx512fmaskmode> 0 "register_operand" "=Yk")
+ (unspec:<avx512fmaskmode>
+ [(match_operand:VI_AVX512VL 1 "register_operand" "v")]
+ UNSPEC_CVTINT2MASK))]
+ "((TARGET_AVX512BW
+ && (<ssescalarmode>mode == QImode
+ || <ssescalarmode>mode == HImode))
+ || (TARGET_AVX512DQ
+ && (<ssescalarmode>mode == SImode
+ || <ssescalarmode>mode == DImode)))"
+ "vpmov<ssemodesuffix>2m\t{%1, %0|%0, %1}"
+ [(set_attr "prefix" "evex")
+ (set_attr "mode" "<sseinsnmode>")])
+
+(define_insn "<avx512>_cvtmask2<ssemodesuffix><mode>"
+ [(set (match_operand:VI_AVX512VL 0 "register_operand" "=v")
+ (unspec:VI_AVX512VL
+ [(match_operand:<avx512fmaskmode> 1 "register_operand" "Yk")]
+ UNSPEC_CVTINT2MASK))]
+ "((TARGET_AVX512BW
+ && (<ssescalarmode>mode == QImode
+ || <ssescalarmode>mode == HImode))
+ || (TARGET_AVX512DQ
+ && (<ssescalarmode>mode == SImode
+ || <ssescalarmode>mode == DImode)))"
+ "vpmovm2<ssemodesuffix>\t{%1, %0|%0, %1}"
+ [(set_attr "prefix" "evex")
+ (set_attr "mode" "<sseinsnmode>")])
+
+(define_insn "sse2_cvtps2pd<mask_name>"
+ [(set (match_operand:V2DF 0 "register_operand" "=v")
(float_extend:V2DF
(vec_select:V2SF
- (match_operand:V4SF 1 "nonimmediate_operand" "xm")
+ (match_operand:V4SF 1 "nonimmediate_operand" "vm")
(parallel [(const_int 0) (const_int 1)]))))]
- "TARGET_SSE2"
- "%vcvtps2pd\t{%1, %0|%0, %q1}"
+ "TARGET_SSE2 && <mask_mode512bit_condition>"
+ "%vcvtps2pd\t{%1, %0<mask_operand2>|%0<mask_operand2>, %q1}"
[(set_attr "type" "ssecvt")
(set_attr "amdfam10_decode" "direct")
(set_attr "athlon_decode" "double")
@@ -5048,18 +5459,18 @@
(set_attr "mode" "V16SF")])
;; Recall that the 256-bit unpck insns only shuffle within their lanes.
-(define_insn "avx_unpckhps256"
- [(set (match_operand:V8SF 0 "register_operand" "=x")
+(define_insn "avx_unpckhps256<mask_name>"
+ [(set (match_operand:V8SF 0 "register_operand" "=v")
(vec_select:V8SF
(vec_concat:V16SF
- (match_operand:V8SF 1 "register_operand" "x")
- (match_operand:V8SF 2 "nonimmediate_operand" "xm"))
+ (match_operand:V8SF 1 "register_operand" "v")
+ (match_operand:V8SF 2 "nonimmediate_operand" "vm"))
(parallel [(const_int 2) (const_int 10)
(const_int 3) (const_int 11)
(const_int 6) (const_int 14)
(const_int 7) (const_int 15)])))]
- "TARGET_AVX"
- "vunpckhps\t{%2, %1, %0|%0, %1, %2}"
+ "TARGET_AVX && <mask_mode512bit_condition>"
+ "vunpckhps\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}"
[(set_attr "type" "sselog")
(set_attr "prefix" "vex")
(set_attr "mode" "V8SF")])
@@ -5098,18 +5509,18 @@
operands[4] = gen_reg_rtx (V8SFmode);
})
-(define_insn "vec_interleave_highv4sf"
- [(set (match_operand:V4SF 0 "register_operand" "=x,x")
+(define_insn "vec_interleave_highv4sf<mask_name>"
+ [(set (match_operand:V4SF 0 "register_operand" "=x,v")
(vec_select:V4SF
(vec_concat:V8SF
- (match_operand:V4SF 1 "register_operand" "0,x")
- (match_operand:V4SF 2 "nonimmediate_operand" "xm,xm"))
+ (match_operand:V4SF 1 "register_operand" "0,v")
+ (match_operand:V4SF 2 "nonimmediate_operand" "xm,vm"))
(parallel [(const_int 2) (const_int 6)
(const_int 3) (const_int 7)])))]
- "TARGET_SSE"
+ "TARGET_SSE && <mask_mode512bit_condition>"
"@
unpckhps\t{%2, %0|%0, %2}
- vunpckhps\t{%2, %1, %0|%0, %1, %2}"
+ vunpckhps\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}"
[(set_attr "isa" "noavx,avx")
(set_attr "type" "sselog")
(set_attr "prefix" "orig,vex")
@@ -5136,22 +5547,39 @@
(set_attr "mode" "V16SF")])
;; Recall that the 256-bit unpck insns only shuffle within their lanes.
-(define_insn "avx_unpcklps256"
- [(set (match_operand:V8SF 0 "register_operand" "=x")
+(define_insn "avx_unpcklps256<mask_name>"
+ [(set (match_operand:V8SF 0 "register_operand" "=v")
(vec_select:V8SF
(vec_concat:V16SF
- (match_operand:V8SF 1 "register_operand" "x")
- (match_operand:V8SF 2 "nonimmediate_operand" "xm"))
+ (match_operand:V8SF 1 "register_operand" "v")
+ (match_operand:V8SF 2 "nonimmediate_operand" "vm"))
(parallel [(const_int 0) (const_int 8)
(const_int 1) (const_int 9)
(const_int 4) (const_int 12)
(const_int 5) (const_int 13)])))]
- "TARGET_AVX"
- "vunpcklps\t{%2, %1, %0|%0, %1, %2}"
+ "TARGET_AVX && <mask_mode512bit_condition>"
+ "vunpcklps\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}"
[(set_attr "type" "sselog")
(set_attr "prefix" "vex")
(set_attr "mode" "V8SF")])
+(define_insn "unpcklps128_mask"
+ [(set (match_operand:V4SF 0 "register_operand" "=v")
+ (vec_merge:V4SF
+ (vec_select:V4SF
+ (vec_concat:V8SF
+ (match_operand:V4SF 1 "register_operand" "v")
+ (match_operand:V4SF 2 "nonimmediate_operand" "vm"))
+ (parallel [(const_int 0) (const_int 4)
+ (const_int 1) (const_int 5)]))
+ (match_operand:V4SF 3 "vector_move_operand" "0C")
+ (match_operand:QI 4 "register_operand" "Yk")))]
+ "TARGET_AVX512F && TARGET_AVX512VL"
+ "vunpcklps\t{%2, %1, %0%{%4%}%N3|%0%{%4%}%N3, %1, %2}"
+ [(set_attr "type" "sselog")
+ (set_attr "prefix" "evex")
+ (set_attr "mode" "V4SF")])
+
(define_expand "vec_interleave_lowv8sf"
[(set (match_dup 3)
(vec_select:V8SF
@@ -5205,34 +5633,34 @@
;; These are modeled with the same vec_concat as the others so that we
;; capture users of shufps that can use the new instructions
-(define_insn "avx_movshdup256"
- [(set (match_operand:V8SF 0 "register_operand" "=x")
+(define_insn "avx_movshdup256<mask_name>"
+ [(set (match_operand:V8SF 0 "register_operand" "=v")
(vec_select:V8SF
(vec_concat:V16SF
- (match_operand:V8SF 1 "nonimmediate_operand" "xm")
+ (match_operand:V8SF 1 "nonimmediate_operand" "vm")
(match_dup 1))
(parallel [(const_int 1) (const_int 1)
(const_int 3) (const_int 3)
(const_int 5) (const_int 5)
(const_int 7) (const_int 7)])))]
- "TARGET_AVX"
- "vmovshdup\t{%1, %0|%0, %1}"
+ "TARGET_AVX && <mask_mode512bit_condition>"
+ "vmovshdup\t{%1, %0<mask_operand2>|%0<mask_operand2>, %1}"
[(set_attr "type" "sse")
(set_attr "prefix" "vex")
(set_attr "mode" "V8SF")])
-(define_insn "sse3_movshdup"
- [(set (match_operand:V4SF 0 "register_operand" "=x")
+(define_insn "sse3_movshdup<mask_name>"
+ [(set (match_operand:V4SF 0 "register_operand" "=v")
(vec_select:V4SF
(vec_concat:V8SF
- (match_operand:V4SF 1 "nonimmediate_operand" "xm")
+ (match_operand:V4SF 1 "nonimmediate_operand" "vm")
(match_dup 1))
(parallel [(const_int 1)
(const_int 1)
(const_int 7)
(const_int 7)])))]
- "TARGET_SSE3"
- "%vmovshdup\t{%1, %0|%0, %1}"
+ "TARGET_SSE3 && <mask_mode512bit_condition>"
+ "%vmovshdup\t{%1, %0<mask_operand2>|%0<mask_operand2>, %1}"
[(set_attr "type" "sse")
(set_attr "prefix_rep" "1")
(set_attr "prefix" "maybe_vex")
@@ -5258,34 +5686,34 @@
(set_attr "prefix" "evex")
(set_attr "mode" "V16SF")])
-(define_insn "avx_movsldup256"
- [(set (match_operand:V8SF 0 "register_operand" "=x")
+(define_insn "avx_movsldup256<mask_name>"
+ [(set (match_operand:V8SF 0 "register_operand" "=v")
(vec_select:V8SF
(vec_concat:V16SF
- (match_operand:V8SF 1 "nonimmediate_operand" "xm")
+ (match_operand:V8SF 1 "nonimmediate_operand" "vm")
(match_dup 1))
(parallel [(const_int 0) (const_int 0)
(const_int 2) (const_int 2)
(const_int 4) (const_int 4)
(const_int 6) (const_int 6)])))]
- "TARGET_AVX"
- "vmovsldup\t{%1, %0|%0, %1}"
+ "TARGET_AVX && <mask_mode512bit_condition>"
+ "vmovsldup\t{%1, %0<mask_operand2>|%0<mask_operand2>, %1}"
[(set_attr "type" "sse")
(set_attr "prefix" "vex")
(set_attr "mode" "V8SF")])
-(define_insn "sse3_movsldup"
- [(set (match_operand:V4SF 0 "register_operand" "=x")
+(define_insn "sse3_movsldup<mask_name>"
+ [(set (match_operand:V4SF 0 "register_operand" "=v")
(vec_select:V4SF
(vec_concat:V8SF
- (match_operand:V4SF 1 "nonimmediate_operand" "xm")
+ (match_operand:V4SF 1 "nonimmediate_operand" "vm")
(match_dup 1))
(parallel [(const_int 0)
(const_int 0)
(const_int 6)
(const_int 6)])))]
- "TARGET_SSE3"
- "%vmovsldup\t{%1, %0|%0, %1}"
+ "TARGET_SSE3 && <mask_mode512bit_condition>"
+ "%vmovsldup\t{%1, %0<mask_operand2>|%0<mask_operand2>, %1}"
[(set_attr "type" "sse")
(set_attr "prefix_rep" "1")
(set_attr "prefix" "maybe_vex")
@@ -5311,7 +5739,7 @@
(set_attr "prefix" "evex")
(set_attr "mode" "V16SF")])
-(define_expand "avx_shufps256"
+(define_expand "avx_shufps256<mask_expand4_name>"
[(match_operand:V8SF 0 "register_operand")
(match_operand:V8SF 1 "register_operand")
(match_operand:V8SF 2 "nonimmediate_operand")
@@ -5319,25 +5747,28 @@
"TARGET_AVX"
{
int mask = INTVAL (operands[3]);
- emit_insn (gen_avx_shufps256_1 (operands[0], operands[1], operands[2],
- GEN_INT ((mask >> 0) & 3),
- GEN_INT ((mask >> 2) & 3),
- GEN_INT (((mask >> 4) & 3) + 8),
- GEN_INT (((mask >> 6) & 3) + 8),
- GEN_INT (((mask >> 0) & 3) + 4),
- GEN_INT (((mask >> 2) & 3) + 4),
- GEN_INT (((mask >> 4) & 3) + 12),
- GEN_INT (((mask >> 6) & 3) + 12)));
+ emit_insn (gen_avx_shufps256_1<mask_expand4_name> (operands[0],
+ operands[1],
+ operands[2],
+ GEN_INT ((mask >> 0) & 3),
+ GEN_INT ((mask >> 2) & 3),
+ GEN_INT (((mask >> 4) & 3) + 8),
+ GEN_INT (((mask >> 6) & 3) + 8),
+ GEN_INT (((mask >> 0) & 3) + 4),
+ GEN_INT (((mask >> 2) & 3) + 4),
+ GEN_INT (((mask >> 4) & 3) + 12),
+ GEN_INT (((mask >> 6) & 3) + 12)
+ <mask_expand4_args>));
DONE;
})
;; One bit in mask selects 2 elements.
-(define_insn "avx_shufps256_1"
- [(set (match_operand:V8SF 0 "register_operand" "=x")
+(define_insn "avx_shufps256_1<mask_name>"
+ [(set (match_operand:V8SF 0 "register_operand" "=v")
(vec_select:V8SF
(vec_concat:V16SF
- (match_operand:V8SF 1 "register_operand" "x")
- (match_operand:V8SF 2 "nonimmediate_operand" "xm"))
+ (match_operand:V8SF 1 "register_operand" "v")
+ (match_operand:V8SF 2 "nonimmediate_operand" "vm"))
(parallel [(match_operand 3 "const_0_to_3_operand" )
(match_operand 4 "const_0_to_3_operand" )
(match_operand 5 "const_8_to_11_operand" )
@@ -5347,6 +5778,7 @@
(match_operand 9 "const_12_to_15_operand")
(match_operand 10 "const_12_to_15_operand")])))]
"TARGET_AVX
+ && <mask_mode512bit_condition>
&& (INTVAL (operands[3]) == (INTVAL (operands[7]) - 4)
&& INTVAL (operands[4]) == (INTVAL (operands[8]) - 4)
&& INTVAL (operands[5]) == (INTVAL (operands[9]) - 4)
@@ -5359,14 +5791,14 @@
mask |= (INTVAL (operands[6]) - 8) << 6;
operands[3] = GEN_INT (mask);
- return "vshufps\t{%3, %2, %1, %0|%0, %1, %2, %3}";
+ return "vshufps\t{%3, %2, %1, %0<mask_operand11>|%0<mask_operand11>, %1, %2, %3}";
}
[(set_attr "type" "sseshuf")
(set_attr "length_immediate" "1")
- (set_attr "prefix" "vex")
+ (set_attr "prefix" "<mask_prefix>")
(set_attr "mode" "V8SF")])
-(define_expand "sse_shufps"
+(define_expand "sse_shufps<mask_expand4_name>"
[(match_operand:V4SF 0 "register_operand")
(match_operand:V4SF 1 "register_operand")
(match_operand:V4SF 2 "nonimmediate_operand")
@@ -5374,14 +5806,46 @@
"TARGET_SSE"
{
int mask = INTVAL (operands[3]);
- emit_insn (gen_sse_shufps_v4sf (operands[0], operands[1], operands[2],
- GEN_INT ((mask >> 0) & 3),
- GEN_INT ((mask >> 2) & 3),
- GEN_INT (((mask >> 4) & 3) + 4),
- GEN_INT (((mask >> 6) & 3) + 4)));
+ emit_insn (gen_sse_shufps_v4sf<mask_expand4_name> (operands[0],
+ operands[1],
+ operands[2],
+ GEN_INT ((mask >> 0) & 3),
+ GEN_INT ((mask >> 2) & 3),
+ GEN_INT (((mask >> 4) & 3) + 4),
+ GEN_INT (((mask >> 6) & 3) + 4)
+ <mask_expand4_args>));
DONE;
})
+(define_insn "sse_shufps_v4sf_mask"
+ [(set (match_operand:V4SF 0 "register_operand" "=v")
+ (vec_merge:V4SF
+ (vec_select:V4SF
+ (vec_concat:V8SF
+ (match_operand:V4SF 1 "register_operand" "v")
+ (match_operand:V4SF 2 "nonimmediate_operand" "vm"))
+ (parallel [(match_operand 3 "const_0_to_3_operand")
+ (match_operand 4 "const_0_to_3_operand")
+ (match_operand 5 "const_4_to_7_operand")
+ (match_operand 6 "const_4_to_7_operand")]))
+ (match_operand:V4SF 7 "vector_move_operand" "0C")
+ (match_operand:QI 8 "register_operand" "Yk")))]
+ "TARGET_AVX512F && TARGET_AVX512VL"
+{
+ int mask = 0;
+ mask |= INTVAL (operands[3]) << 0;
+ mask |= INTVAL (operands[4]) << 2;
+ mask |= (INTVAL (operands[5]) - 4) << 4;
+ mask |= (INTVAL (operands[6]) - 4) << 6;
+ operands[3] = GEN_INT (mask);
+
+ return "vshufps\t{%3, %2, %1, %0%{%8%}%N7|%0%{%8%}%N7, %1, %2, %3}";
+}
+ [(set_attr "type" "sseshuf")
+ (set_attr "length_immediate" "1")
+ (set_attr "prefix" "evex")
+ (set_attr "mode" "V4SF")])
+
(define_insn "sse_shufps_<mode>"
[(set (match_operand:VI4F_128 0 "register_operand" "=x,x")
(vec_select:VI4F_128
@@ -5652,13 +6116,13 @@
;; see comment above inline_secondary_memory_needed function in i386.c
(define_insn "vec_set<mode>_0"
[(set (match_operand:VI4F_128 0 "nonimmediate_operand"
- "=x,x,x ,x,x,x,x ,x ,m ,m ,m")
+ "=v,v,x ,x,x,v,x ,x ,m ,m ,m")
(vec_merge:VI4F_128
(vec_duplicate:VI4F_128
(match_operand:<ssescalarmode> 2 "general_operand"
- " x,m,*r,m,x,x,*rm,*rm,!x,!*re,!*fF"))
+ " v,m,*r,m,x,v,*rm,*rm,!x,!*re,!*fF"))
(match_operand:VI4F_128 1 "vector_move_operand"
- " C,C,C ,C,0,x,0 ,x ,0 ,0 ,0")
+ " C,C,C ,C,0,v,0 ,x ,0 ,0 ,0")
(const_int 1)))]
"TARGET_SSE"
"@
@@ -5843,44 +6307,62 @@
operands[1] = adjust_address (operands[1], SFmode, INTVAL (operands[2]) * 4);
})
-(define_expand "avx512f_vextract<shuffletype>32x4_mask"
+(define_mode_attr extract_type
+ [(V16SF "avx512f") (V16SI "avx512f") (V8DF "avx512dq") (V8DI "avx512dq")])
+
+(define_mode_attr extract_suf
+ [(V16SF "32x4") (V16SI "32x4") (V8DF "64x2") (V8DI "64x2")])
+
+(define_mode_iterator AVX512_VEC
+ [(V8DF "TARGET_AVX512DQ") (V8DI "TARGET_AVX512DQ") V16SF V16SI])
+
+(define_expand "<extract_type>_vextract<shuffletype><extract_suf>_mask"
[(match_operand:<ssequartermode> 0 "nonimmediate_operand")
- (match_operand:V16FI 1 "register_operand")
+ (match_operand:AVX512_VEC 1 "register_operand")
(match_operand:SI 2 "const_0_to_3_operand")
(match_operand:<ssequartermode> 3 "nonimmediate_operand")
(match_operand:QI 4 "register_operand")]
"TARGET_AVX512F"
{
+ int mask;
+ mask = INTVAL (operands[2]);
+
if (MEM_P (operands[0]) && GET_CODE (operands[3]) == CONST_VECTOR)
operands[0] = force_reg (<ssequartermode>mode, operands[0]);
- switch (INTVAL (operands[2]))
- {
- case 0:
- emit_insn (gen_avx512f_vextract<shuffletype>32x4_1_mask (operands[0],
- operands[1], GEN_INT (0), GEN_INT (1), GEN_INT (2),
- GEN_INT (3), operands[3], operands[4]));
- break;
- case 1:
- emit_insn (gen_avx512f_vextract<shuffletype>32x4_1_mask (operands[0],
- operands[1], GEN_INT (4), GEN_INT (5), GEN_INT (6),
- GEN_INT (7), operands[3], operands[4]));
- break;
- case 2:
- emit_insn (gen_avx512f_vextract<shuffletype>32x4_1_mask (operands[0],
- operands[1], GEN_INT (8), GEN_INT (9), GEN_INT (10),
- GEN_INT (11), operands[3], operands[4]));
- break;
- case 3:
- emit_insn (gen_avx512f_vextract<shuffletype>32x4_1_mask (operands[0],
- operands[1], GEN_INT (12), GEN_INT (13), GEN_INT (14),
- GEN_INT (15), operands[3], operands[4]));
- break;
- default:
- gcc_unreachable ();
- }
+
+ if (<MODE>mode == V16SImode || <MODE>mode == V16SFmode)
+ emit_insn (gen_avx512f_vextract<shuffletype>32x4_1_mask (operands[0],
+ operands[1], GEN_INT (mask * 4), GEN_INT (mask * 4 + 1),
+ GEN_INT (mask * 4 + 2), GEN_INT (mask * 4 + 3), operands[3],
+ operands[4]));
+ else
+ emit_insn (gen_avx512dq_vextract<shuffletype>64x2_1_mask (operands[0],
+ operands[1], GEN_INT (mask * 2), GEN_INT (mask * 2 + 1), operands[3],
+ operands[4]));
DONE;
})
+(define_insn "avx512dq_vextract<shuffletype>64x2_1_maskm"
+ [(set (match_operand:<ssequartermode> 0 "memory_operand" "=m")
+ (vec_merge:<ssequartermode>
+ (vec_select:<ssequartermode>
+ (match_operand:V8FI 1 "register_operand" "v")
+ (parallel [(match_operand 2 "const_0_to_7_operand")
+ (match_operand 3 "const_0_to_7_operand")]))
+ (match_operand:<ssequartermode> 4 "memory_operand" "0")
+ (match_operand:QI 5 "register_operand" "k")))]
+ "TARGET_AVX512DQ && (INTVAL (operands[2]) = INTVAL (operands[3]) - 1)"
+{
+ operands[2] = GEN_INT ((INTVAL (operands[2])) >> 1);
+ return "vextract<shuffletype>64x2\t{%2, %1, %0%{%5%}|%0%{%5%}, %1, %2}";
+}
+ [(set_attr "type" "sselog")
+ (set_attr "prefix_extra" "1")
+ (set_attr "length_immediate" "1")
+ (set_attr "memory" "store")
+ (set_attr "prefix" "evex")
+ (set_attr "mode" "<sseinsnmode>")])
+
(define_insn "avx512f_vextract<shuffletype>32x4_1_maskm"
[(set (match_operand:<ssequartermode> 0 "memory_operand" "=m")
(vec_merge:<ssequartermode>
@@ -5906,6 +6388,27 @@
(set_attr "prefix" "evex")
(set_attr "mode" "<sseinsnmode>")])
+(define_insn "<mask_codefor>avx512dq_vextract<shuffletype>64x2_1<mask_name>"
+ [(set (match_operand:<ssequartermode> 0 "<store_mask_predicate>" "=<store_mask_constraint>")
+ (vec_select:<ssequartermode>
+ (match_operand:V8FI 1 "register_operand" "v")
+ (parallel [(match_operand 2 "const_0_to_7_operand")
+ (match_operand 3 "const_0_to_7_operand")])))]
+ "TARGET_AVX512DQ && (INTVAL (operands[2]) = INTVAL (operands[3]) - 1)"
+{
+ operands[2] = GEN_INT ((INTVAL (operands[2])) >> 1);
+ return "vextract<shuffletype>64x2\t{%2, %1, %0<mask_operand4>|%0<mask_operand4>, %1, %2}";
+}
+ [(set_attr "type" "sselog")
+ (set_attr "prefix_extra" "1")
+ (set_attr "length_immediate" "1")
+ (set (attr "memory")
+ (if_then_else (match_test "MEM_P (operands[0])")
+ (const_string "store")
+ (const_string "none")))
+ (set_attr "prefix" "evex")
+ (set_attr "mode" "<sseinsnmode>")])
+
(define_insn "<mask_codefor>avx512f_vextract<shuffletype>32x4_1<mask_name>"
[(set (match_operand:<ssequartermode> 0 "<store_mask_predicate>" "=<store_mask_constraint>")
(vec_select:<ssequartermode>
@@ -5931,9 +6434,18 @@
(set_attr "prefix" "evex")
(set_attr "mode" "<sseinsnmode>")])
-(define_expand "avx512f_vextract<shuffletype>64x4_mask"
+(define_mode_attr extract_type_2
+ [(V16SF "avx512dq") (V16SI "avx512dq") (V8DF "avx512f") (V8DI "avx512f")])
+
+(define_mode_attr extract_suf_2
+ [(V16SF "32x8") (V16SI "32x8") (V8DF "64x4") (V8DI "64x4")])
+
+(define_mode_iterator AVX512_VEC_2
+ [(V16SF "TARGET_AVX512DQ") (V16SI "TARGET_AVX512DQ") V8DF V8DI])
+
+(define_expand "<extract_type_2>_vextract<shuffletype><extract_suf_2>_mask"
[(match_operand:<ssehalfvecmode> 0 "nonimmediate_operand")
- (match_operand:V8FI 1 "register_operand")
+ (match_operand:AVX512_VEC_2 1 "register_operand")
(match_operand:SI 2 "const_0_to_1_operand")
(match_operand:<ssehalfvecmode> 3 "nonimmediate_operand")
(match_operand:QI 4 "register_operand")]
@@ -5989,7 +6501,7 @@
(match_operand:<ssehalfvecmode> 2 "memory_operand" "0")
(match_operand:QI 3 "register_operand" "Yk")))]
"TARGET_AVX512F"
-"vextract<shuffletype>64x4\t{$0x0, %1, %0%{%3%}|%0%{%3%}, %1, 0x0}"
+ "vextract<shuffletype>64x4\t{$0x0, %1, %0%{%3%}|%0%{%3%}, %1, 0x0}"
[(set_attr "type" "sselog")
(set_attr "prefix_extra" "1")
(set_attr "length_immediate" "1")
@@ -6055,6 +6567,81 @@
(set_attr "prefix" "evex")
(set_attr "mode" "<sseinsnmode>")])
+(define_insn "vec_extract_hi_<mode>_maskm"
+ [(set (match_operand:<ssehalfvecmode> 0 "memory_operand" "=m")
+ (vec_merge:<ssehalfvecmode>
+ (vec_select:<ssehalfvecmode>
+ (match_operand:V16FI 1 "register_operand" "v")
+ (parallel [(const_int 8) (const_int 9)
+ (const_int 10) (const_int 11)
+ (const_int 12) (const_int 13)
+ (const_int 14) (const_int 15)]))
+ (match_operand:<ssehalfvecmode> 2 "memory_operand" "0")
+ (match_operand:QI 3 "register_operand" "k")))]
+ "TARGET_AVX512DQ"
+ "vextract<shuffletype>32x8\t{$0x1, %1, %0%{%3%}|%0%{%3%}, %1, 0x1}"
+ [(set_attr "type" "sselog")
+ (set_attr "prefix_extra" "1")
+ (set_attr "length_immediate" "1")
+ (set (attr "memory")
+ (if_then_else (match_test "MEM_P (operands[0])")
+ (const_string "store")
+ (const_string "none")))
+ (set_attr "prefix" "evex")
+ (set_attr "mode" "<sseinsnmode>")])
+
+(define_insn "vec_extract_hi_<mode><mask_name>"
+ [(set (match_operand:<ssehalfvecmode> 0 "<store_mask_predicate>" "=<store_mask_constraint>,vm")
+ (vec_select:<ssehalfvecmode>
+ (match_operand:V16FI 1 "register_operand" "v,v")
+ (parallel [(const_int 8) (const_int 9)
+ (const_int 10) (const_int 11)
+ (const_int 12) (const_int 13)
+ (const_int 14) (const_int 15)])))]
+ "TARGET_AVX512F && (!<mask_applied> || TARGET_AVX512DQ)"
+ "@
+ vextract<shuffletype>32x8\t{$0x1, %1, %0<mask_operand2>|%0<mask_operand2>, %1, 0x1}
+ vextracti64x4\t{$0x1, %1, %0|%0, %1, 0x1}"
+ [(set_attr "type" "sselog")
+ (set_attr "prefix_extra" "1")
+ (set_attr "isa" "avx512dq,noavx512dq")
+ (set_attr "length_immediate" "1")
+ (set (attr "memory")
+ (if_then_else (match_test "MEM_P (operands[0])")
+ (const_string "store")
+ (const_string "none")))
+ (set_attr "prefix" "evex")
+ (set_attr "mode" "<sseinsnmode>")])
+
+(define_expand "avx512vl_vextractf128<mode>"
+ [(match_operand:<ssehalfvecmode> 0 "nonimmediate_operand")
+ (match_operand:VI48F_256 1 "register_operand")
+ (match_operand:SI 2 "const_0_to_1_operand")
+ (match_operand:<ssehalfvecmode> 3 "vector_move_operand")
+ (match_operand:QI 4 "register_operand")]
+ "TARGET_AVX512DQ && TARGET_AVX512VL"
+{
+ rtx (*insn)(rtx, rtx, rtx, rtx);
+
+ if (MEM_P (operands[0]) && GET_CODE (operands[3]) == CONST_VECTOR)
+ operands[0] = force_reg (<ssehalfvecmode>mode, operands[0]);
+
+ switch (INTVAL (operands[2]))
+ {
+ case 0:
+ insn = gen_vec_extract_lo_<mode>_mask;
+ break;
+ case 1:
+ insn = gen_vec_extract_hi_<mode>_mask;
+ break;
+ default:
+ gcc_unreachable ();
+ }
+
+ emit_insn (insn (operands[0], operands[1], operands[3], operands[4]));
+ DONE;
+})
+
(define_expand "avx_vextractf128<mode>"
[(match_operand:<ssehalfvecmode> 0 "nonimmediate_operand")
(match_operand:V_256 1 "register_operand")
@@ -6079,7 +6666,7 @@
DONE;
})
-(define_insn_and_split "vec_extract_lo_<mode>"
+(define_insn "vec_extract_lo_<mode><mask_name>"
[(set (match_operand:<ssehalfvecmode> 0 "nonimmediate_operand" "=v,m")
(vec_select:<ssehalfvecmode>
(match_operand:V16FI 1 "nonimmediate_operand" "vm,v")
@@ -6087,11 +6674,28 @@
(const_int 2) (const_int 3)
(const_int 4) (const_int 5)
(const_int 6) (const_int 7)])))]
- "TARGET_AVX512F && !(MEM_P (operands[0]) && MEM_P (operands[1]))"
- "#"
- "&& reload_completed"
- [(const_int 0)]
+ "TARGET_AVX512F
+ && <mask_mode512bit_condition>
+ && !(MEM_P (operands[0]) && MEM_P (operands[1]))"
{
+ if (<mask_applied>)
+ return "vextract<shuffletype>32x8\t{$0x0, %1, %0<mask_operand2>|%0<mask_operand2>, %1, 0x0}";
+ else
+ return "#";
+})
+
+(define_split
+ [(set (match_operand:<ssehalfvecmode> 0 "nonimmediate_operand")
+ (vec_select:<ssehalfvecmode>
+ (match_operand:V16FI 1 "nonimmediate_operand")
+ (parallel [(const_int 0) (const_int 1)
+ (const_int 2) (const_int 3)
+ (const_int 4) (const_int 5)
+ (const_int 6) (const_int 7)])))]
+ "TARGET_AVX512F && !(MEM_P (operands[0]) && MEM_P (operands[1]))
+ && reload_completed"
+ [(const_int 0)]
+ {
rtx op1 = operands[1];
if (REG_P (op1))
op1 = gen_rtx_REG (<ssehalfvecmode>mode, REGNO (op1));
@@ -6101,46 +6705,57 @@
DONE;
})
-(define_insn "vec_extract_hi_<mode>"
- [(set (match_operand:<ssehalfvecmode> 0 "nonimmediate_operand" "=v,m")
+(define_insn "vec_extract_lo_<mode><mask_name>"
+ [(set (match_operand:<ssehalfvecmode> 0 "<store_mask_predicate>" "=v,m")
(vec_select:<ssehalfvecmode>
- (match_operand:V16FI 1 "nonimmediate_operand" "v,v")
- (parallel [(const_int 8) (const_int 9)
- (const_int 10) (const_int 11)
- (const_int 12) (const_int 13)
- (const_int 14) (const_int 15)])))]
- "TARGET_AVX512F"
- "vextracti64x4\t{$0x1, %1, %0|%0, %1, 0x1}"
- [(set_attr "type" "sselog")
- (set_attr "prefix_extra" "1")
- (set_attr "length_immediate" "1")
- (set_attr "memory" "none,store")
- (set_attr "prefix" "evex")
- (set_attr "mode" "XI")])
+ (match_operand:VI8F_256 1 "nonimmediate_operand" "vm,v")
+ (parallel [(const_int 0) (const_int 1)])))]
+ "TARGET_AVX
+ && (!<mask_applied> || (TARGET_AVX512VL && TARGET_AVX512DQ))
+ && !(MEM_P (operands[0]) && MEM_P (operands[1]))"
+{
+ if (<mask_applied>)
+ return "vextract<shuffletype>64x2\t{$0x0, %1, %0%{%3%}|%0%{%3%}, %1, 0x0}";
+ else
+ return "#";
+}
+ [(set_attr "type" "sselog")
+ (set_attr "prefix_extra" "1")
+ (set_attr "length_immediate" "1")
+ (set_attr "memory" "none,store")
+ (set_attr "prefix" "evex")
+ (set_attr "mode" "XI")])
-(define_insn_and_split "vec_extract_lo_<mode>"
- [(set (match_operand:<ssehalfvecmode> 0 "nonimmediate_operand" "=x,m")
+(define_split
+ [(set (match_operand:<ssehalfvecmode> 0 "nonimmediate_operand")
(vec_select:<ssehalfvecmode>
- (match_operand:VI8F_256 1 "nonimmediate_operand" "xm,x")
+ (match_operand:VI8F_256 1 "nonimmediate_operand")
(parallel [(const_int 0) (const_int 1)])))]
- "TARGET_AVX && !(MEM_P (operands[0]) && MEM_P (operands[1]))"
- "#"
- "&& reload_completed"
- [(set (match_dup 0) (match_dup 1))]
+ "TARGET_AVX && !(MEM_P (operands[0]) && MEM_P (operands[1]))
+ && reload_completed"
+ [(const_int 0)]
{
- if (REG_P (operands[1]))
- operands[1] = gen_rtx_REG (<ssehalfvecmode>mode, REGNO (operands[1]));
+ rtx op1 = operands[1];
+ if (REG_P (op1))
+ op1 = gen_rtx_REG (<ssehalfvecmode>mode, REGNO (op1));
else
- operands[1] = adjust_address (operands[1], <ssehalfvecmode>mode, 0);
+ op1 = gen_lowpart (<ssehalfvecmode>mode, op1);
+ emit_move_insn (operands[0], op1);
+ DONE;
})
-(define_insn "vec_extract_hi_<mode>"
- [(set (match_operand:<ssehalfvecmode> 0 "nonimmediate_operand" "=x,m")
+(define_insn "vec_extract_hi_<mode><mask_name>"
+ [(set (match_operand:<ssehalfvecmode> 0 "<store_mask_predicate>" "=v,<store_mask_constraint>")
(vec_select:<ssehalfvecmode>
- (match_operand:VI8F_256 1 "register_operand" "x,x")
+ (match_operand:VI8F_256 1 "register_operand" "v,v")
(parallel [(const_int 2) (const_int 3)])))]
"TARGET_AVX"
- "vextract<i128>\t{$0x1, %1, %0|%0, %1, 0x1}"
+{
+ if (TARGET_AVX512DQ && TARGET_AVX512VL)
+ return "vextract<shuffletype>64x2\t{$0x1, %1, %0<mask_operand2>|%0<mask_operand2>, %1, 0x1}";
+ else
+ return "vextract<i128>\t{$0x1, %1, %0|%0, %1, 0x1}";
+}
[(set_attr "type" "sselog")
(set_attr "prefix_extra" "1")
(set_attr "length_immediate" "1")
@@ -6148,36 +6763,106 @@
(set_attr "prefix" "vex")
(set_attr "mode" "<sseinsnmode>")])
-(define_insn_and_split "vec_extract_lo_<mode>"
- [(set (match_operand:<ssehalfvecmode> 0 "nonimmediate_operand" "=x,m")
+(define_split
+ [(set (match_operand:<ssehalfvecmode> 0 "nonimmediate_operand")
(vec_select:<ssehalfvecmode>
- (match_operand:VI4F_256 1 "nonimmediate_operand" "xm,x")
+ (match_operand:VI4F_256 1 "nonimmediate_operand")
(parallel [(const_int 0) (const_int 1)
(const_int 2) (const_int 3)])))]
- "TARGET_AVX && !(MEM_P (operands[0]) && MEM_P (operands[1]))"
- "#"
- "&& reload_completed"
- [(set (match_dup 0) (match_dup 1))]
+ "TARGET_AVX && !(MEM_P (operands[0]) && MEM_P (operands[1])) && reload_completed"
+ [(const_int 0)]
{
- if (REG_P (operands[1]))
- operands[1] = gen_rtx_REG (<ssehalfvecmode>mode, REGNO (operands[1]));
+ rtx op1 = operands[1];
+ if (REG_P (op1))
+ op1 = gen_rtx_REG (<ssehalfvecmode>mode, REGNO (op1));
else
- operands[1] = adjust_address (operands[1], <ssehalfvecmode>mode, 0);
+ op1 = gen_lowpart (<ssehalfvecmode>mode, op1);
+ emit_move_insn (operands[0], op1);
+ DONE;
})
-(define_insn "vec_extract_hi_<mode>"
- [(set (match_operand:<ssehalfvecmode> 0 "nonimmediate_operand" "=x,m")
+
+(define_insn "vec_extract_lo_<mode><mask_name>"
+ [(set (match_operand:<ssehalfvecmode> 0 "<store_mask_predicate>" "=<store_mask_constraint>")
(vec_select:<ssehalfvecmode>
- (match_operand:VI4F_256 1 "register_operand" "x,x")
+ (match_operand:VI4F_256 1 "nonimmediate_operand" "v")
+ (parallel [(const_int 0) (const_int 1)
+ (const_int 2) (const_int 3)])))]
+ "(!<mask_applied> && TARGET_AVX) || (TARGET_AVX512VL && TARGET_AVX512DQ)"
+{
+ if (<mask_applied>)
+ return "vextract<shuffletype>32x4\t{$0x0, %1, %0<mask_operand2>|%0<mask_operand2>, %1, 0x0}";
+ else
+ return "#";
+}
+ [(set_attr "type" "sselog")
+ (set_attr "prefix_extra" "1")
+ (set_attr "length_immediate" "1")
+ (set (attr "memory")
+ (if_then_else (match_test "MEM_P (operands[0])")
+ (const_string "store")
+ (const_string "none")))
+ (set_attr "prefix" "evex")
+ (set_attr "mode" "<sseinsnmode>")])
+
+(define_insn "vec_extract_lo_<mode>_maskm"
+ [(set (match_operand:<ssehalfvecmode> 0 "memory_operand" "=m")
+ (vec_merge:<ssehalfvecmode>
+ (vec_select:<ssehalfvecmode>
+ (match_operand:VI4F_256 1 "register_operand" "v")
+ (parallel [(const_int 0) (const_int 1)
+ (const_int 2) (const_int 3)]))
+ (match_operand:<ssehalfvecmode> 2 "memory_operand" "0")
+ (match_operand:QI 3 "register_operand" "k")))]
+ "TARGET_AVX512VL && TARGET_AVX512F"
+ "vextract<shuffletype>32x4\t{$0x0, %1, %0%{3%}|%0%{%3%}, %1, 0x0}"
+ [(set_attr "type" "sselog")
+ (set_attr "length_immediate" "1")
+ (set_attr "prefix" "evex")
+ (set_attr "mode" "<sseinsnmode>")])
+
+(define_insn "vec_extract_hi_<mode>_maskm"
+ [(set (match_operand:<ssehalfvecmode> 0 "memory_operand" "=m")
+ (vec_merge:<ssehalfvecmode>
+ (vec_select:<ssehalfvecmode>
+ (match_operand:VI4F_256 1 "register_operand" "v")
+ (parallel [(const_int 4) (const_int 5)
+ (const_int 6) (const_int 7)]))
+ (match_operand:<ssehalfvecmode> 2 "memory_operand" "0")
+ (match_operand:<ssehalfvecmode> 3 "register_operand" "k")))]
+ "TARGET_AVX512F && TARGET_AVX512VL"
+{
+ return "vextract<shuffletype>32x4\t{$0x1, %1, %0%{%3%}|%0%{%3%}, %1, 0x1}";
+}
+ [(set_attr "type" "sselog")
+ (set_attr "prefix_extra" "1")
+ (set_attr "length_immediate" "1")
+ (set_attr "memory" "store")
+ (set_attr "prefix" "evex")
+ (set_attr "mode" "<sseinsnmode>")])
+
+(define_insn "vec_extract_hi_<mode><mask_name>"
+ [(set (match_operand:<ssehalfvecmode> 0 "<store_mask_predicate>" "=<store_mask_constraint>")
+ (vec_select:<ssehalfvecmode>
+ (match_operand:VI4F_256 1 "register_operand" "v")
(parallel [(const_int 4) (const_int 5)
(const_int 6) (const_int 7)])))]
- "TARGET_AVX"
- "vextract<i128>\t{$0x1, %1, %0|%0, %1, 0x1}"
+ "TARGET_AVX && (!<mask_applied> || (TARGET_AVX512VL && TARGET_AVX512F))"
+{
+ if (TARGET_AVX512VL && TARGET_AVX512F)
+ return "vextract<shuffletype>32x4\t{$0x1, %1, %0<mask_operand2>|%0<mask_operand2>, %1, 0x1}";
+ else
+ return "vextract<i128>\t{$0x1, %1, %0|%0, %1, 0x1}";
+}
[(set_attr "type" "sselog")
(set_attr "prefix_extra" "1")
(set_attr "length_immediate" "1")
- (set_attr "memory" "none,store")
- (set_attr "prefix" "vex")
+ (set_attr "memory" "none")
+ (set (attr "prefix")
+ (if_then_else
+ (match_test "TARGET_AVX512VL")
+ (const_string "evex")
+ (const_string "vex")))
(set_attr "mode" "<sseinsnmode>")])
(define_insn_and_split "vec_extract_lo_v32hi"
@@ -6366,8 +7051,8 @@
;; Modes handled by vec_extract patterns.
(define_mode_iterator VEC_EXTRACT_MODE
- [(V32QI "TARGET_AVX") V16QI
- (V16HI "TARGET_AVX") V8HI
+ [(V64QI "TARGET_AVX512BW") (V32QI "TARGET_AVX") V16QI
+ (V32HI "TARGET_AVX512BW") (V16HI "TARGET_AVX") V8HI
(V16SI "TARGET_AVX512F") (V8SI "TARGET_AVX") V4SI
(V8DI "TARGET_AVX512F") (V4DI "TARGET_AVX") V2DI
(V16SF "TARGET_AVX512F") (V8SF "TARGET_AVX") V4SF
@@ -6407,16 +7092,16 @@
(set_attr "mode" "V8DF")])
;; Recall that the 256-bit unpck insns only shuffle within their lanes.
-(define_insn "avx_unpckhpd256"
- [(set (match_operand:V4DF 0 "register_operand" "=x")
+(define_insn "avx_unpckhpd256<mask_name>"
+ [(set (match_operand:V4DF 0 "register_operand" "=v")
(vec_select:V4DF
(vec_concat:V8DF
- (match_operand:V4DF 1 "register_operand" "x")
- (match_operand:V4DF 2 "nonimmediate_operand" "xm"))
+ (match_operand:V4DF 1 "register_operand" "v")
+ (match_operand:V4DF 2 "nonimmediate_operand" "vm"))
(parallel [(const_int 1) (const_int 5)
(const_int 3) (const_int 7)])))]
- "TARGET_AVX"
- "vunpckhpd\t{%2, %1, %0|%0, %1, %2}"
+ "TARGET_AVX && <mask_mode512bit_condition>"
+ "vunpckhpd\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}"
[(set_attr "type" "sselog")
(set_attr "prefix" "vex")
(set_attr "mode" "V4DF")])
@@ -6450,6 +7135,22 @@
})
+(define_insn "avx512vl_unpckhpd128_mask"
+ [(set (match_operand:V2DF 0 "register_operand" "=v")
+ (vec_merge:V2DF
+ (vec_select:V2DF
+ (vec_concat:V4DF
+ (match_operand:V2DF 1 "register_operand" "v")
+ (match_operand:V2DF 2 "nonimmediate_operand" "vm"))
+ (parallel [(const_int 1) (const_int 3)]))
+ (match_operand:V2DF 3 "vector_move_operand" "0C")
+ (match_operand:QI 4 "register_operand" "Yk")))]
+ "TARGET_AVX512VL && TARGET_AVX512F"
+ "vunpckhpd\t{%2, %1, %0%{%4%}%N3|%0%{%4%}%N3, %1, %2}"
+ [(set_attr "type" "sselog")
+ (set_attr "prefix" "evex")
+ (set_attr "mode" "V2DF")])
+
(define_expand "vec_interleave_highv2df"
[(set (match_operand:V2DF 0 "register_operand")
(vec_select:V2DF
@@ -6530,7 +7231,7 @@
(set_attr "mode" "V8DF")])
;; Recall that the 256-bit unpck insns only shuffle within their lanes.
-(define_expand "avx_movddup256"
+(define_expand "avx_movddup256<mask_name>"
[(set (match_operand:V4DF 0 "register_operand")
(vec_select:V4DF
(vec_concat:V8DF
@@ -6538,9 +7239,9 @@
(match_dup 1))
(parallel [(const_int 0) (const_int 4)
(const_int 2) (const_int 6)])))]
- "TARGET_AVX")
+ "TARGET_AVX && <mask_mode512bit_condition>")
-(define_expand "avx_unpcklpd256"
+(define_expand "avx_unpcklpd256<mask_name>"
[(set (match_operand:V4DF 0 "register_operand")
(vec_select:V4DF
(vec_concat:V8DF
@@ -6548,20 +7249,20 @@
(match_operand:V4DF 2 "nonimmediate_operand"))
(parallel [(const_int 0) (const_int 4)
(const_int 2) (const_int 6)])))]
- "TARGET_AVX")
+ "TARGET_AVX && <mask_mode512bit_condition>")
-(define_insn "*avx_unpcklpd256"
- [(set (match_operand:V4DF 0 "register_operand" "=x,x")
+(define_insn "*avx_unpcklpd256<mask_name>"
+ [(set (match_operand:V4DF 0 "register_operand" "=v,v")
(vec_select:V4DF
(vec_concat:V8DF
- (match_operand:V4DF 1 "nonimmediate_operand" " x,m")
- (match_operand:V4DF 2 "nonimmediate_operand" "xm,1"))
+ (match_operand:V4DF 1 "nonimmediate_operand" " v,m")
+ (match_operand:V4DF 2 "nonimmediate_operand" "vm,1"))
(parallel [(const_int 0) (const_int 4)
(const_int 2) (const_int 6)])))]
- "TARGET_AVX"
+ "TARGET_AVX && <mask_mode512bit_condition>"
"@
- vunpcklpd\t{%2, %1, %0|%0, %1, %2}
- vmovddup\t{%1, %0|%0, %1}"
+ vunpcklpd\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}
+ vmovddup\t{%1, %0<mask_operand3>|%0<mask_operand3>, %1}"
[(set_attr "type" "sselog")
(set_attr "prefix" "vex")
(set_attr "mode" "V4DF")])
@@ -6594,6 +7295,22 @@
operands[4] = gen_reg_rtx (V4DFmode);
})
+(define_insn "avx512vl_unpcklpd128_mask"
+ [(set (match_operand:V2DF 0 "register_operand" "=v")
+ (vec_merge:V2DF
+ (vec_select:V2DF
+ (vec_concat:V4DF
+ (match_operand:V2DF 1 "register_operand" "v")
+ (match_operand:V2DF 2 "nonimmediate_operand" "vm"))
+ (parallel [(const_int 0) (const_int 2)]))
+ (match_operand:V2DF 3 "vector_move_operand" "0C")
+ (match_operand:QI 4 "register_operand" "Yk")))]
+ "TARGET_AVX512VL && TARGET_AVX512F"
+ "vunpcklpd\t{%2, %1, %0%{%4%}%N3|%0%{%4%}%N3, %1, %2}"
+ [(set_attr "type" "sselog")
+ (set_attr "prefix" "evex")
+ (set_attr "mode" "V2DF")])
+
(define_expand "vec_interleave_lowv2df"
[(set (match_operand:V2DF 0 "register_operand")
(vec_select:V2DF
@@ -6676,38 +7393,38 @@
[(set_attr "prefix" "evex")
(set_attr "mode" "<ssescalarmode>")])
-(define_insn "avx512f_scalef<mode><mask_name><round_name>"
- [(set (match_operand:VF_512 0 "register_operand" "=v")
- (unspec:VF_512
- [(match_operand:VF_512 1 "register_operand" "v")
- (match_operand:VF_512 2 "<round_nimm_predicate>" "<round_constraint>")]
+(define_insn "<avx512>_scalef<mode><mask_name><round_name>"
+ [(set (match_operand:VF_AVX512VL 0 "register_operand" "=v")
+ (unspec:VF_AVX512VL
+ [(match_operand:VF_AVX512VL 1 "register_operand" "v")
+ (match_operand:VF_AVX512VL 2 "nonimmediate_operand" "<round_constraint>")]
UNSPEC_SCALEF))]
"TARGET_AVX512F"
"vscalef<ssemodesuffix>\t{<round_mask_op3>%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2<round_mask_op3>}"
[(set_attr "prefix" "evex")
(set_attr "mode" "<MODE>")])
-(define_expand "avx512f_vternlog<mode>_maskz"
- [(match_operand:VI48_512 0 "register_operand")
- (match_operand:VI48_512 1 "register_operand")
- (match_operand:VI48_512 2 "register_operand")
- (match_operand:VI48_512 3 "nonimmediate_operand")
+(define_expand "<avx512>_vternlog<mode>_maskz"
+ [(match_operand:VI48_AVX512VL 0 "register_operand")
+ (match_operand:VI48_AVX512VL 1 "register_operand")
+ (match_operand:VI48_AVX512VL 2 "register_operand")
+ (match_operand:VI48_AVX512VL 3 "nonimmediate_operand")
(match_operand:SI 4 "const_0_to_255_operand")
(match_operand:<avx512fmaskmode> 5 "register_operand")]
"TARGET_AVX512F"
{
- emit_insn (gen_avx512f_vternlog<mode>_maskz_1 (
+ emit_insn (gen_<avx512>_vternlog<mode>_maskz_1 (
operands[0], operands[1], operands[2], operands[3],
operands[4], CONST0_RTX (<MODE>mode), operands[5]));
DONE;
})
-(define_insn "avx512f_vternlog<mode><sd_maskz_name>"
- [(set (match_operand:VI48_512 0 "register_operand" "=v")
- (unspec:VI48_512
- [(match_operand:VI48_512 1 "register_operand" "0")
- (match_operand:VI48_512 2 "register_operand" "v")
- (match_operand:VI48_512 3 "nonimmediate_operand" "vm")
+(define_insn "<avx512>_vternlog<mode><sd_maskz_name>"
+ [(set (match_operand:VI48_AVX512VL 0 "register_operand" "=v")
+ (unspec:VI48_AVX512VL
+ [(match_operand:VI48_AVX512VL 1 "register_operand" "0")
+ (match_operand:VI48_AVX512VL 2 "register_operand" "v")
+ (match_operand:VI48_AVX512VL 3 "nonimmediate_operand" "vm")
(match_operand:SI 4 "const_0_to_255_operand")]
UNSPEC_VTERNLOG))]
"TARGET_AVX512F"
@@ -6716,13 +7433,13 @@
(set_attr "prefix" "evex")
(set_attr "mode" "<sseinsnmode>")])
-(define_insn "avx512f_vternlog<mode>_mask"
- [(set (match_operand:VI48_512 0 "register_operand" "=v")
- (vec_merge:VI48_512
- (unspec:VI48_512
- [(match_operand:VI48_512 1 "register_operand" "0")
- (match_operand:VI48_512 2 "register_operand" "v")
- (match_operand:VI48_512 3 "nonimmediate_operand" "vm")
+(define_insn "<avx512>_vternlog<mode>_mask"
+ [(set (match_operand:VI48_AVX512VL 0 "register_operand" "=v")
+ (vec_merge:VI48_AVX512VL
+ (unspec:VI48_AVX512VL
+ [(match_operand:VI48_AVX512VL 1 "register_operand" "0")
+ (match_operand:VI48_AVX512VL 2 "register_operand" "v")
+ (match_operand:VI48_AVX512VL 3 "nonimmediate_operand" "vm")
(match_operand:SI 4 "const_0_to_255_operand")]
UNSPEC_VTERNLOG)
(match_dup 1)
@@ -6733,9 +7450,9 @@
(set_attr "prefix" "evex")
(set_attr "mode" "<sseinsnmode>")])
-(define_insn "avx512f_getexp<mode><mask_name><round_saeonly_name>"
- [(set (match_operand:VF_512 0 "register_operand" "=v")
- (unspec:VF_512 [(match_operand:VF_512 1 "<round_saeonly_nimm_predicate>" "<round_saeonly_constraint>")]
+(define_insn "<avx512>_getexp<mode><mask_name><round_saeonly_name>"
+ [(set (match_operand:VF_AVX512VL 0 "register_operand" "=v")
+ (unspec:VF_AVX512VL [(match_operand:VF_AVX512VL 1 "<round_saeonly_nimm_predicate>" "<round_saeonly_constraint>")]
UNSPEC_GETEXP))]
"TARGET_AVX512F"
"vgetexp<ssemodesuffix>\t{<round_saeonly_mask_op2>%1, %0<mask_operand2>|%0<mask_operand2>, %1<round_saeonly_mask_op2>}";
@@ -6756,12 +7473,12 @@
[(set_attr "prefix" "evex")
(set_attr "mode" "<ssescalarmode>")])
-(define_insn "<mask_codefor>avx512f_align<mode><mask_name>"
- [(set (match_operand:VI48_512 0 "register_operand" "=v")
- (unspec:VI48_512 [(match_operand:VI48_512 1 "register_operand" "v")
- (match_operand:VI48_512 2 "nonimmediate_operand" "vm")
- (match_operand:SI 3 "const_0_to_255_operand")]
- UNSPEC_ALIGN))]
+(define_insn "<mask_codefor><avx512>_align<mode><mask_name>"
+ [(set (match_operand:VI48_AVX512VL 0 "register_operand" "=v")
+ (unspec:VI48_AVX512VL [(match_operand:VI48_AVX512VL 1 "register_operand" "v")
+ (match_operand:VI48_AVX512VL 2 "nonimmediate_operand" "vm")
+ (match_operand:SI 3 "const_0_to_255_operand")]
+ UNSPEC_ALIGN))]
"TARGET_AVX512F"
"valign<ssemodesuffix>\t{%3, %2, %1, %0<mask_operand4>|%0<mask_operand4>, %1, %2, %3}";
[(set_attr "prefix" "evex")
@@ -6799,28 +7516,28 @@
})
-(define_expand "avx512f_fixupimm<mode>_maskz<round_saeonly_expand_name>"
- [(match_operand:VF_512 0 "register_operand")
- (match_operand:VF_512 1 "register_operand")
- (match_operand:VF_512 2 "register_operand")
+(define_expand "<avx512>_fixupimm<mode>_maskz<round_saeonly_expand_name>"
+ [(match_operand:VF_AVX512VL 0 "register_operand")
+ (match_operand:VF_AVX512VL 1 "register_operand")
+ (match_operand:VF_AVX512VL 2 "register_operand")
(match_operand:<sseintvecmode> 3 "<round_saeonly_expand_nimm_predicate>")
(match_operand:SI 4 "const_0_to_255_operand")
(match_operand:<avx512fmaskmode> 5 "register_operand")]
"TARGET_AVX512F"
{
- emit_insn (gen_avx512f_fixupimm<mode>_maskz_1<round_saeonly_expand_name> (
+ emit_insn (gen_<avx512>_fixupimm<mode>_maskz_1<round_saeonly_expand_name> (
operands[0], operands[1], operands[2], operands[3],
operands[4], CONST0_RTX (<MODE>mode), operands[5]
<round_saeonly_expand_operand6>));
DONE;
})
-(define_insn "avx512f_fixupimm<mode><sd_maskz_name><round_saeonly_name>"
- [(set (match_operand:VF_512 0 "register_operand" "=v")
- (unspec:VF_512
- [(match_operand:VF_512 1 "register_operand" "0")
- (match_operand:VF_512 2 "register_operand" "v")
- (match_operand:<sseintvecmode> 3 "<round_saeonly_nimm_predicate>" "<round_saeonly_constraint>")
+(define_insn "<avx512>_fixupimm<mode><sd_maskz_name><round_saeonly_name>"
+ [(set (match_operand:VF_AVX512VL 0 "register_operand" "=v")
+ (unspec:VF_AVX512VL
+ [(match_operand:VF_AVX512VL 1 "register_operand" "0")
+ (match_operand:VF_AVX512VL 2 "register_operand" "v")
+ (match_operand:<sseintvecmode> 3 "nonimmediate_operand" "<round_saeonly_constraint>")
(match_operand:SI 4 "const_0_to_255_operand")]
UNSPEC_FIXUPIMM))]
"TARGET_AVX512F"
@@ -6828,13 +7545,13 @@
[(set_attr "prefix" "evex")
(set_attr "mode" "<MODE>")])
-(define_insn "avx512f_fixupimm<mode>_mask<round_saeonly_name>"
- [(set (match_operand:VF_512 0 "register_operand" "=v")
- (vec_merge:VF_512
- (unspec:VF_512
- [(match_operand:VF_512 1 "register_operand" "0")
- (match_operand:VF_512 2 "register_operand" "v")
- (match_operand:<sseintvecmode> 3 "<round_saeonly_nimm_predicate>" "<round_saeonly_constraint>")
+(define_insn "<avx512>_fixupimm<mode>_mask<round_saeonly_name>"
+ [(set (match_operand:VF_AVX512VL 0 "register_operand" "=v")
+ (vec_merge:VF_AVX512VL
+ (unspec:VF_AVX512VL
+ [(match_operand:VF_AVX512VL 1 "register_operand" "0")
+ (match_operand:VF_AVX512VL 2 "register_operand" "v")
+ (match_operand:<sseintvecmode> 3 "nonimmediate_operand" "<round_saeonly_constraint>")
(match_operand:SI 4 "const_0_to_255_operand")]
UNSPEC_FIXUPIMM)
(match_dup 1)
@@ -6895,10 +7612,10 @@
[(set_attr "prefix" "evex")
(set_attr "mode" "<ssescalarmode>")])
-(define_insn "avx512f_rndscale<mode><mask_name><round_saeonly_name>"
- [(set (match_operand:VF_512 0 "register_operand" "=v")
- (unspec:VF_512
- [(match_operand:VF_512 1 "<round_saeonly_nimm_predicate>" "<round_saeonly_constraint>")
+(define_insn "<avx512>_rndscale<mode><mask_name><round_saeonly_name>"
+ [(set (match_operand:VF_AVX512VL 0 "register_operand" "=v")
+ (unspec:VF_AVX512VL
+ [(match_operand:VF_AVX512VL 1 "nonimmediate_operand" "<round_saeonly_constraint>")
(match_operand:SI 2 "const_0_to_255_operand")]
UNSPEC_ROUND))]
"TARGET_AVX512F"
@@ -7031,7 +7748,7 @@
(set_attr "prefix" "evex")
(set_attr "mode" "V8DF")])
-(define_expand "avx_shufpd256"
+(define_expand "avx_shufpd256<mask_expand4_name>"
[(match_operand:V4DF 0 "register_operand")
(match_operand:V4DF 1 "register_operand")
(match_operand:V4DF 2 "nonimmediate_operand")
@@ -7039,25 +7756,28 @@
"TARGET_AVX"
{
int mask = INTVAL (operands[3]);
- emit_insn (gen_avx_shufpd256_1 (operands[0], operands[1], operands[2],
- GEN_INT (mask & 1),
- GEN_INT (mask & 2 ? 5 : 4),
- GEN_INT (mask & 4 ? 3 : 2),
- GEN_INT (mask & 8 ? 7 : 6)));
+ emit_insn (gen_avx_shufpd256_1<mask_expand4_name> (operands[0],
+ operands[1],
+ operands[2],
+ GEN_INT (mask & 1),
+ GEN_INT (mask & 2 ? 5 : 4),
+ GEN_INT (mask & 4 ? 3 : 2),
+ GEN_INT (mask & 8 ? 7 : 6)
+ <mask_expand4_args>));
DONE;
})
-(define_insn "avx_shufpd256_1"
- [(set (match_operand:V4DF 0 "register_operand" "=x")
+(define_insn "avx_shufpd256_1<mask_name>"
+ [(set (match_operand:V4DF 0 "register_operand" "=v")
(vec_select:V4DF
(vec_concat:V8DF
- (match_operand:V4DF 1 "register_operand" "x")
- (match_operand:V4DF 2 "nonimmediate_operand" "xm"))
+ (match_operand:V4DF 1 "register_operand" "v")
+ (match_operand:V4DF 2 "nonimmediate_operand" "vm"))
(parallel [(match_operand 3 "const_0_to_1_operand")
(match_operand 4 "const_4_to_5_operand")
(match_operand 5 "const_2_to_3_operand")
(match_operand 6 "const_6_to_7_operand")])))]
- "TARGET_AVX"
+ "TARGET_AVX && <mask_mode512bit_condition>"
{
int mask;
mask = INTVAL (operands[3]);
@@ -7066,14 +7786,14 @@
mask |= (INTVAL (operands[6]) - 6) << 3;
operands[3] = GEN_INT (mask);
- return "vshufpd\t{%3, %2, %1, %0|%0, %1, %2, %3}";
+ return "vshufpd\t{%3, %2, %1, %0<mask_operand7>|%0<mask_operand7>, %1, %2, %3}";
}
[(set_attr "type" "sseshuf")
(set_attr "length_immediate" "1")
(set_attr "prefix" "vex")
(set_attr "mode" "V4DF")])
-(define_expand "sse2_shufpd"
+(define_expand "sse2_shufpd<mask_expand4_name>"
[(match_operand:V2DF 0 "register_operand")
(match_operand:V2DF 1 "register_operand")
(match_operand:V2DF 2 "nonimmediate_operand")
@@ -7081,25 +7801,51 @@
"TARGET_SSE2"
{
int mask = INTVAL (operands[3]);
- emit_insn (gen_sse2_shufpd_v2df (operands[0], operands[1], operands[2],
- GEN_INT (mask & 1),
- GEN_INT (mask & 2 ? 3 : 2)));
+ emit_insn (gen_sse2_shufpd_v2df<mask_expand4_name> (operands[0], operands[1],
+ operands[2], GEN_INT (mask & 1),
+ GEN_INT (mask & 2 ? 3 : 2)
+ <mask_expand4_args>));
DONE;
})
+(define_insn "sse2_shufpd_v2df_mask"
+ [(set (match_operand:V2DF 0 "register_operand" "=v")
+ (vec_merge:V2DF
+ (vec_select:V2DF
+ (vec_concat:V4DF
+ (match_operand:V2DF 1 "register_operand" "v")
+ (match_operand:V2DF 2 "nonimmediate_operand" "vm"))
+ (parallel [(match_operand 3 "const_0_to_1_operand")
+ (match_operand 4 "const_2_to_3_operand")]))
+ (match_operand:V2DF 5 "vector_move_operand" "0C")
+ (match_operand:QI 6 "register_operand" "Yk")))]
+ "TARGET_AVX512VL"
+{
+ int mask;
+ mask = INTVAL (operands[3]);
+ mask |= (INTVAL (operands[4]) - 2) << 1;
+ operands[3] = GEN_INT (mask);
+
+ return "vshufpd\t{%3, %2, %1, %0%{%6%}%N5|%0%{6%}%N5, %1, %2, %3}";
+}
+ [(set_attr "type" "sseshuf")
+ (set_attr "length_immediate" "1")
+ (set_attr "prefix" "evex")
+ (set_attr "mode" "V2DF")])
+
;; punpcklqdq and punpckhqdq are shorter than shufpd.
-(define_insn "avx2_interleave_highv4di"
- [(set (match_operand:V4DI 0 "register_operand" "=x")
+(define_insn "avx2_interleave_highv4di<mask_name>"
+ [(set (match_operand:V4DI 0 "register_operand" "=v")
(vec_select:V4DI
(vec_concat:V8DI
- (match_operand:V4DI 1 "register_operand" "x")
- (match_operand:V4DI 2 "nonimmediate_operand" "xm"))
+ (match_operand:V4DI 1 "register_operand" "v")
+ (match_operand:V4DI 2 "nonimmediate_operand" "vm"))
(parallel [(const_int 1)
(const_int 5)
(const_int 3)
(const_int 7)])))]
- "TARGET_AVX2"
- "vpunpckhqdq\t{%2, %1, %0|%0, %1, %2}"
+ "TARGET_AVX2 && <mask_mode512bit_condition>"
+ "vpunpckhqdq\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}"
[(set_attr "type" "sselog")
(set_attr "prefix" "vex")
(set_attr "mode" "OI")])
@@ -7120,36 +7866,36 @@
(set_attr "prefix" "evex")
(set_attr "mode" "XI")])
-(define_insn "vec_interleave_highv2di"
- [(set (match_operand:V2DI 0 "register_operand" "=x,x")
+(define_insn "vec_interleave_highv2di<mask_name>"
+ [(set (match_operand:V2DI 0 "register_operand" "=x,v")
(vec_select:V2DI
(vec_concat:V4DI
- (match_operand:V2DI 1 "register_operand" "0,x")
- (match_operand:V2DI 2 "nonimmediate_operand" "xm,xm"))
+ (match_operand:V2DI 1 "register_operand" "0,v")
+ (match_operand:V2DI 2 "nonimmediate_operand" "xm,vm"))
(parallel [(const_int 1)
(const_int 3)])))]
- "TARGET_SSE2"
+ "TARGET_SSE2 && <mask_mode512bit_condition>"
"@
punpckhqdq\t{%2, %0|%0, %2}
- vpunpckhqdq\t{%2, %1, %0|%0, %1, %2}"
+ vpunpckhqdq\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}"
[(set_attr "isa" "noavx,avx")
(set_attr "type" "sselog")
(set_attr "prefix_data16" "1,*")
- (set_attr "prefix" "orig,vex")
+ (set_attr "prefix" "orig,<mask_prefix>")
(set_attr "mode" "TI")])
-(define_insn "avx2_interleave_lowv4di"
- [(set (match_operand:V4DI 0 "register_operand" "=x")
+(define_insn "avx2_interleave_lowv4di<mask_name>"
+ [(set (match_operand:V4DI 0 "register_operand" "=v")
(vec_select:V4DI
(vec_concat:V8DI
- (match_operand:V4DI 1 "register_operand" "x")
- (match_operand:V4DI 2 "nonimmediate_operand" "xm"))
+ (match_operand:V4DI 1 "register_operand" "v")
+ (match_operand:V4DI 2 "nonimmediate_operand" "vm"))
(parallel [(const_int 0)
(const_int 4)
(const_int 2)
(const_int 6)])))]
- "TARGET_AVX2"
- "vpunpcklqdq\t{%2, %1, %0|%0, %1, %2}"
+ "TARGET_AVX2 && <mask_mode512bit_condition>"
+ "vpunpcklqdq\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}"
[(set_attr "type" "sselog")
(set_attr "prefix" "vex")
(set_attr "mode" "OI")])
@@ -7170,18 +7916,18 @@
(set_attr "prefix" "evex")
(set_attr "mode" "XI")])
-(define_insn "vec_interleave_lowv2di"
- [(set (match_operand:V2DI 0 "register_operand" "=x,x")
+(define_insn "vec_interleave_lowv2di<mask_name>"
+ [(set (match_operand:V2DI 0 "register_operand" "=x,v")
(vec_select:V2DI
(vec_concat:V4DI
- (match_operand:V2DI 1 "register_operand" "0,x")
- (match_operand:V2DI 2 "nonimmediate_operand" "xm,xm"))
+ (match_operand:V2DI 1 "register_operand" "0,v")
+ (match_operand:V2DI 2 "nonimmediate_operand" "xm,vm"))
(parallel [(const_int 0)
(const_int 2)])))]
- "TARGET_SSE2"
+ "TARGET_SSE2 && <mask_mode512bit_condition>"
"@
punpcklqdq\t{%2, %0|%0, %2}
- vpunpcklqdq\t{%2, %1, %0|%0, %1, %2}"
+ vpunpcklqdq\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}"
[(set_attr "isa" "noavx,avx")
(set_attr "type" "sselog")
(set_attr "prefix_data16" "1,*")
@@ -7477,24 +8223,24 @@
(set_attr "prefix" "orig,vex,orig,vex,maybe_vex,orig,orig,vex,maybe_vex")
(set_attr "mode" "DF,DF,V1DF,V1DF,V1DF,V2DF,V1DF,V1DF,V1DF")])
-(define_insn "vec_dupv2df"
- [(set (match_operand:V2DF 0 "register_operand" "=x,x")
+(define_insn "vec_dupv2df<mask_name>"
+ [(set (match_operand:V2DF 0 "register_operand" "=x,v")
(vec_duplicate:V2DF
- (match_operand:DF 1 "nonimmediate_operand" " 0,xm")))]
- "TARGET_SSE2"
+ (match_operand:DF 1 "nonimmediate_operand" " 0,vm")))]
+ "TARGET_SSE2 && <mask_mode512bit_condition>"
"@
unpcklpd\t%0, %0
- %vmovddup\t{%1, %0|%0, %1}"
+ %vmovddup\t{%1, %0<mask_operand2>|%0<mask_operand2>, %1}"
[(set_attr "isa" "noavx,sse3")
(set_attr "type" "sselog1")
(set_attr "prefix" "orig,maybe_vex")
(set_attr "mode" "V2DF,DF")])
(define_insn "*vec_concatv2df"
- [(set (match_operand:V2DF 0 "register_operand" "=x,x,x,x,x,x,x,x")
+ [(set (match_operand:V2DF 0 "register_operand" "=x,v,v,x,x,v,x,x")
(vec_concat:V2DF
- (match_operand:DF 1 "nonimmediate_operand" " 0,x,m,0,x,m,0,0")
- (match_operand:DF 2 "vector_move_operand" " x,x,1,m,m,C,x,m")))]
+ (match_operand:DF 1 "nonimmediate_operand" " 0,v,m,0,x,m,0,0")
+ (match_operand:DF 2 "vector_move_operand" " x,v,1,m,m,C,x,m")))]
"TARGET_SSE"
"@
unpcklpd\t{%2, %0|%0, %2}
@@ -7521,48 +8267,513 @@
;;
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
-(define_mode_iterator PMOV_DST_MODE [V16QI V16HI V8SI V8HI])
+(define_mode_iterator PMOV_DST_MODE_1 [V16QI V16HI V8SI V8HI])
(define_mode_attr pmov_src_mode
[(V16QI "V16SI") (V16HI "V16SI") (V8SI "V8DI") (V8HI "V8DI")])
(define_mode_attr pmov_src_lower
[(V16QI "v16si") (V16HI "v16si") (V8SI "v8di") (V8HI "v8di")])
-(define_mode_attr pmov_suff
+(define_mode_attr pmov_suff_1
[(V16QI "db") (V16HI "dw") (V8SI "qd") (V8HI "qw")])
(define_insn "*avx512f_<code><pmov_src_lower><mode>2"
- [(set (match_operand:PMOV_DST_MODE 0 "nonimmediate_operand" "=v,m")
- (any_truncate:PMOV_DST_MODE
+ [(set (match_operand:PMOV_DST_MODE_1 0 "nonimmediate_operand" "=v,m")
+ (any_truncate:PMOV_DST_MODE_1
(match_operand:<pmov_src_mode> 1 "register_operand" "v,v")))]
"TARGET_AVX512F"
- "vpmov<trunsuffix><pmov_suff>\t{%1, %0|%0, %1}"
+ "vpmov<trunsuffix><pmov_suff_1>\t{%1, %0|%0, %1}"
[(set_attr "type" "ssemov")
(set_attr "memory" "none,store")
(set_attr "prefix" "evex")
(set_attr "mode" "<sseinsnmode>")])
(define_insn "avx512f_<code><pmov_src_lower><mode>2_mask"
- [(set (match_operand:PMOV_DST_MODE 0 "nonimmediate_operand" "=v,m")
- (vec_merge:PMOV_DST_MODE
- (any_truncate:PMOV_DST_MODE
+ [(set (match_operand:PMOV_DST_MODE_1 0 "nonimmediate_operand" "=v,m")
+ (vec_merge:PMOV_DST_MODE_1
+ (any_truncate:PMOV_DST_MODE_1
(match_operand:<pmov_src_mode> 1 "register_operand" "v,v"))
- (match_operand:PMOV_DST_MODE 2 "vector_move_operand" "0C,0")
+ (match_operand:PMOV_DST_MODE_1 2 "vector_move_operand" "0C,0")
(match_operand:<avx512fmaskmode> 3 "register_operand" "Yk,Yk")))]
"TARGET_AVX512F"
- "vpmov<trunsuffix><pmov_suff>\t{%1, %0%{%3%}%N2|%0%{%3%}%N2, %1}"
+ "vpmov<trunsuffix><pmov_suff_1>\t{%1, %0%{%3%}%N2|%0%{%3%}%N2, %1}"
[(set_attr "type" "ssemov")
(set_attr "memory" "none,store")
(set_attr "prefix" "evex")
(set_attr "mode" "<sseinsnmode>")])
(define_expand "avx512f_<code><pmov_src_lower><mode>2_mask_store"
- [(set (match_operand:PMOV_DST_MODE 0 "memory_operand")
- (vec_merge:PMOV_DST_MODE
- (any_truncate:PMOV_DST_MODE
+ [(set (match_operand:PMOV_DST_MODE_1 0 "memory_operand")
+ (vec_merge:PMOV_DST_MODE_1
+ (any_truncate:PMOV_DST_MODE_1
(match_operand:<pmov_src_mode> 1 "register_operand"))
(match_dup 0)
(match_operand:<avx512fmaskmode> 2 "register_operand")))]
"TARGET_AVX512F")
+(define_mode_iterator PMOV_DST_MODE_2
+ [(V32QI "TARGET_AVX512BW") (V16QI "TARGET_AVX512BW && TARGET_AVX512VL")
+ (V8HI "TARGET_AVX512VL") (V4SI "TARGET_AVX512VL")])
+(define_mode_attr pmov_suff_2
+ [(V32QI "wb") (V16QI "wb") (V8HI "dw") (V4SI "qd")])
+
+(define_insn "*avx512vl_<code><ssedoublemodelower><mode>2"
+ [(set (match_operand:PMOV_DST_MODE_2 0 "nonimmediate_operand" "=v,m")
+ (any_truncate:PMOV_DST_MODE_2
+ (match_operand:<ssedoublemode> 1 "register_operand" "v,v")))]
+ ""
+ "vpmov<trunsuffix><pmov_suff_2>\t{%1, %0|%0, %1}"
+ [(set_attr "type" "ssemov")
+ (set_attr "memory" "none,store")
+ (set_attr "prefix" "evex")
+ (set_attr "mode" "<sseinsnmode>")])
+
+(define_insn "<avx512>_<code><ssedoublemodelower><mode>2_mask"
+ [(set (match_operand:PMOV_DST_MODE_2 0 "nonimmediate_operand" "=v,m")
+ (vec_merge:PMOV_DST_MODE_2
+ (any_truncate:PMOV_DST_MODE_2
+ (match_operand:<ssedoublemode> 1 "register_operand" "v,v"))
+ (match_operand:PMOV_DST_MODE_2 2 "vector_move_operand" "0C,0")
+ (match_operand:<avx512fmaskmode> 3 "register_operand" "Yk,Yk")))]
+ ""
+ "vpmov<trunsuffix><pmov_suff_2>\t{%1, %0%{%3%}%N2|%0%{%3%}%N2, %1}"
+ [(set_attr "type" "ssemov")
+ (set_attr "memory" "none,store")
+ (set_attr "prefix" "evex")
+ (set_attr "mode" "<sseinsnmode>")])
+
+(define_expand "<avx512>_<code><ssedoublemodelower><mode>2_store_mask"
+ [(set (match_operand:PMOV_DST_MODE_2 0 "nonimmediate_operand")
+ (vec_merge:PMOV_DST_MODE_2
+ (any_truncate:PMOV_DST_MODE_2
+ (match_operand:<ssedoublemode> 1 "register_operand"))
+ (match_dup 0)
+ (match_operand:<avx512fmaskmode> 2 "register_operand")))]
+ "TARGET_AVX512VL")
+
+(define_mode_iterator PMOV_SRC_MODE_3 [V4DI V2DI V8SI V4SI (V8HI "TARGET_AVX512BW")])
+(define_mode_attr pmov_dst_3
+ [(V4DI "V4QI") (V2DI "V2QI") (V8SI "V8QI") (V4SI "V4QI") (V8HI "V8QI")])
+(define_mode_attr pmov_dst_zeroed_3
+ [(V4DI "V12QI") (V2DI "V14QI") (V8SI "V8QI") (V4SI "V12QI") (V8HI "V8QI")])
+(define_mode_attr pmov_suff_3
+ [(V4DI "qb") (V2DI "qb") (V8SI "db") (V4SI "db") (V8HI "wb")])
+
+(define_insn "*avx512vl_<code><mode>v<ssescalarnum>qi2"
+ [(set (match_operand:V16QI 0 "register_operand" "=v")
+ (vec_concat:V16QI
+ (any_truncate:<pmov_dst_3>
+ (match_operand:PMOV_SRC_MODE_3 1 "register_operand" "v"))
+ (match_operand:<pmov_dst_zeroed_3> 2 "const0_operand")))]
+ "TARGET_AVX512VL"
+ "vpmov<trunsuffix><pmov_suff_3>\t{%1, %0|%0, %1}"
+ [(set_attr "type" "ssemov")
+ (set_attr "prefix" "evex")
+ (set_attr "mode" "TI")])
+
+(define_insn "*avx512vl_<code>v2div2qi2_store"
+ [(set (match_operand:V16QI 0 "memory_operand" "=m")
+ (vec_concat:V16QI
+ (any_truncate:V2QI
+ (match_operand:V2DI 1 "register_operand" "v"))
+ (vec_select:V14QI
+ (match_dup 0)
+ (parallel [(const_int 2) (const_int 3)
+ (const_int 4) (const_int 5)
+ (const_int 6) (const_int 7)
+ (const_int 8) (const_int 9)
+ (const_int 10) (const_int 11)
+ (const_int 12) (const_int 13)
+ (const_int 14) (const_int 15)]))))]
+ "TARGET_AVX512VL"
+ "vpmov<trunsuffix>qb\t{%1, %0|%0, %1}"
+ [(set_attr "type" "ssemov")
+ (set_attr "memory" "store")
+ (set_attr "prefix" "evex")
+ (set_attr "mode" "TI")])
+
+(define_insn "avx512vl_<code>v2div2qi2_mask"
+ [(set (match_operand:V16QI 0 "register_operand" "=v")
+ (vec_concat:V16QI
+ (vec_merge:V2QI
+ (any_truncate:V2QI
+ (match_operand:V2DI 1 "register_operand" "v"))
+ (vec_select:V2QI
+ (match_operand:V16QI 2 "vector_move_operand" "0C")
+ (parallel [(const_int 0) (const_int 1)]))
+ (match_operand:QI 3 "register_operand" "Yk"))
+ (const_vector:V14QI [(const_int 0) (const_int 0)
+ (const_int 0) (const_int 0)
+ (const_int 0) (const_int 0)
+ (const_int 0) (const_int 0)
+ (const_int 0) (const_int 0)
+ (const_int 0) (const_int 0)
+ (const_int 0) (const_int 0)])))]
+ "TARGET_AVX512VL"
+ "vpmov<trunsuffix>qb\t{%1, %0%{%3%}%N2|%0%{%3%}%N2, %1}"
+ [(set_attr "type" "ssemov")
+ (set_attr "prefix" "evex")
+ (set_attr "mode" "TI")])
+
+(define_insn "avx512vl_<code>v2div2qi2_store_mask"
+ [(set (match_operand:V16QI 0 "memory_operand" "=m")
+ (vec_concat:V16QI
+ (vec_merge:V2QI
+ (any_truncate:V2QI
+ (match_operand:V2DI 1 "register_operand" "v"))
+ (vec_select:V2QI
+ (match_dup 0)
+ (parallel [(const_int 0) (const_int 1)]))
+ (match_operand:QI 2 "register_operand" "Yk"))
+ (vec_select:V14QI
+ (match_dup 0)
+ (parallel [(const_int 2) (const_int 3)
+ (const_int 4) (const_int 5)
+ (const_int 6) (const_int 7)
+ (const_int 8) (const_int 9)
+ (const_int 10) (const_int 11)
+ (const_int 12) (const_int 13)
+ (const_int 14) (const_int 15)]))))]
+ "TARGET_AVX512VL"
+ "vpmov<trunsuffix>qb\t{%1, %0%{%2%}|%0%{%2%}, %1}"
+ [(set_attr "type" "ssemov")
+ (set_attr "memory" "store")
+ (set_attr "prefix" "evex")
+ (set_attr "mode" "TI")])
+
+(define_insn "*avx512vl_<code><mode>v4qi2_store"
+ [(set (match_operand:V16QI 0 "memory_operand" "=m")
+ (vec_concat:V16QI
+ (any_truncate:V4QI
+ (match_operand:VI4_128_8_256 1 "register_operand" "v"))
+ (vec_select:V12QI
+ (match_dup 0)
+ (parallel [(const_int 4) (const_int 5)
+ (const_int 6) (const_int 7)
+ (const_int 8) (const_int 9)
+ (const_int 10) (const_int 11)
+ (const_int 12) (const_int 13)
+ (const_int 14) (const_int 15)]))))]
+ "TARGET_AVX512VL"
+ "vpmov<trunsuffix><pmov_suff_3>\t{%1, %0|%0, %1}"
+ [(set_attr "type" "ssemov")
+ (set_attr "memory" "store")
+ (set_attr "prefix" "evex")
+ (set_attr "mode" "TI")])
+
+(define_insn "avx512vl_<code><mode>v4qi2_mask"
+ [(set (match_operand:V16QI 0 "register_operand" "=v")
+ (vec_concat:V16QI
+ (vec_merge:V4QI
+ (any_truncate:V4QI
+ (match_operand:VI4_128_8_256 1 "register_operand" "v"))
+ (vec_select:V4QI
+ (match_operand:V16QI 2 "vector_move_operand" "0C")
+ (parallel [(const_int 0) (const_int 1)
+ (const_int 2) (const_int 3)]))
+ (match_operand:QI 3 "register_operand" "Yk"))
+ (const_vector:V12QI [(const_int 0) (const_int 0)
+ (const_int 0) (const_int 0)
+ (const_int 0) (const_int 0)
+ (const_int 0) (const_int 0)
+ (const_int 0) (const_int 0)
+ (const_int 0) (const_int 0)])))]
+ "TARGET_AVX512VL"
+ "vpmov<trunsuffix><pmov_suff_3>\t{%1, %0%{%3%}%N2|%0%{%3%}%N2, %1}"
+ [(set_attr "type" "ssemov")
+ (set_attr "prefix" "evex")
+ (set_attr "mode" "TI")])
+
+(define_insn "avx512vl_<code><mode>v4qi2_store_mask"
+ [(set (match_operand:V16QI 0 "memory_operand" "=m")
+ (vec_concat:V16QI
+ (vec_merge:V4QI
+ (any_truncate:V4QI
+ (match_operand:VI4_128_8_256 1 "register_operand" "v"))
+ (vec_select:V4QI
+ (match_dup 0)
+ (parallel [(const_int 0) (const_int 1)
+ (const_int 2) (const_int 3)]))
+ (match_operand:QI 2 "register_operand" "Yk"))
+ (vec_select:V12QI
+ (match_dup 0)
+ (parallel [(const_int 4) (const_int 5)
+ (const_int 6) (const_int 7)
+ (const_int 8) (const_int 9)
+ (const_int 10) (const_int 11)
+ (const_int 12) (const_int 13)
+ (const_int 14) (const_int 15)]))))]
+ "TARGET_AVX512VL"
+ "vpmov<trunsuffix><pmov_suff_3>\t{%1, %0%{%2%}|%0%{%2%}, %1}"
+ [(set_attr "type" "ssemov")
+ (set_attr "memory" "store")
+ (set_attr "prefix" "evex")
+ (set_attr "mode" "TI")])
+
+(define_insn "*avx512vl_<code><mode>v8qi2_store"
+ [(set (match_operand:V16QI 0 "memory_operand" "=m")
+ (vec_concat:V16QI
+ (any_truncate:V8QI
+ (match_operand:VI2_128_4_256 1 "register_operand" "v"))
+ (vec_select:V8QI
+ (match_dup 0)
+ (parallel [(const_int 8) (const_int 9)
+ (const_int 10) (const_int 11)
+ (const_int 12) (const_int 13)
+ (const_int 14) (const_int 15)]))))]
+ "TARGET_AVX512VL"
+ "vpmov<trunsuffix><pmov_suff_3>\t{%1, %0|%0, %1}"
+ [(set_attr "type" "ssemov")
+ (set_attr "memory" "store")
+ (set_attr "prefix" "evex")
+ (set_attr "mode" "TI")])
+
+(define_insn "avx512vl_<code><mode>v8qi2_mask"
+ [(set (match_operand:V16QI 0 "register_operand" "=v")
+ (vec_concat:V16QI
+ (vec_merge:V8QI
+ (any_truncate:V8QI
+ (match_operand:VI2_128_4_256 1 "register_operand" "v"))
+ (vec_select:V8QI
+ (match_operand:V16QI 2 "vector_move_operand" "0C")
+ (parallel [(const_int 0) (const_int 1)
+ (const_int 2) (const_int 3)
+ (const_int 4) (const_int 5)
+ (const_int 6) (const_int 7)]))
+ (match_operand:QI 3 "register_operand" "Yk"))
+ (const_vector:V8QI [(const_int 0) (const_int 0)
+ (const_int 0) (const_int 0)
+ (const_int 0) (const_int 0)
+ (const_int 0) (const_int 0)])))]
+ "TARGET_AVX512VL"
+ "vpmov<trunsuffix><pmov_suff_3>\t{%1, %0%{%3%}%N2|%0%{%3%}%N2, %1}"
+ [(set_attr "type" "ssemov")
+ (set_attr "prefix" "evex")
+ (set_attr "mode" "TI")])
+
+(define_insn "avx512vl_<code><mode>v8qi2_store_mask"
+ [(set (match_operand:V16QI 0 "memory_operand" "=m")
+ (vec_concat:V16QI
+ (vec_merge:V8QI
+ (any_truncate:V8QI
+ (match_operand:VI2_128_4_256 1 "register_operand" "v"))
+ (vec_select:V8QI
+ (match_dup 0)
+ (parallel [(const_int 0) (const_int 1)
+ (const_int 2) (const_int 3)
+ (const_int 4) (const_int 5)
+ (const_int 6) (const_int 7)]))
+ (match_operand:QI 2 "register_operand" "Yk"))
+ (vec_select:V8QI
+ (match_dup 0)
+ (parallel [(const_int 8) (const_int 9)
+ (const_int 10) (const_int 11)
+ (const_int 12) (const_int 13)
+ (const_int 14) (const_int 15)]))))]
+ "TARGET_AVX512VL"
+ "vpmov<trunsuffix><pmov_suff_3>\t{%1, %0%{%2%}|%0%{%2%}, %1}"
+ [(set_attr "type" "ssemov")
+ (set_attr "memory" "store")
+ (set_attr "prefix" "evex")
+ (set_attr "mode" "TI")])
+
+(define_mode_iterator PMOV_SRC_MODE_4 [V4DI V2DI V4SI])
+(define_mode_attr pmov_dst_4
+ [(V4DI "V4HI") (V2DI "V2HI") (V4SI "V4HI")])
+(define_mode_attr pmov_dst_zeroed_4
+ [(V4DI "V4HI") (V2DI "V6HI") (V4SI "V4HI")])
+(define_mode_attr pmov_suff_4
+ [(V4DI "qw") (V2DI "qw") (V4SI "dw")])
+
+(define_insn "*avx512vl_<code><mode>v<ssescalarnum>hi2"
+ [(set (match_operand:V8HI 0 "register_operand" "=v")
+ (vec_concat:V8HI
+ (any_truncate:<pmov_dst_4>
+ (match_operand:PMOV_SRC_MODE_4 1 "register_operand" "v"))
+ (match_operand:<pmov_dst_zeroed_4> 2 "const0_operand")))]
+ "TARGET_AVX512VL"
+ "vpmov<trunsuffix><pmov_suff_4>\t{%1, %0|%0, %1}"
+ [(set_attr "type" "ssemov")
+ (set_attr "prefix" "evex")
+ (set_attr "mode" "TI")])
+
+(define_insn "*avx512vl_<code><mode>v4hi2_store"
+ [(set (match_operand:V8HI 0 "memory_operand" "=m")
+ (vec_concat:V8HI
+ (any_truncate:V4HI
+ (match_operand:VI4_128_8_256 1 "register_operand" "v"))
+ (vec_select:V4HI
+ (match_dup 0)
+ (parallel [(const_int 4) (const_int 5)
+ (const_int 6) (const_int 7)]))))]
+ "TARGET_AVX512VL"
+ "vpmov<trunsuffix><pmov_suff_4>\t{%1, %0|%0, %1}"
+ [(set_attr "type" "ssemov")
+ (set_attr "memory" "store")
+ (set_attr "prefix" "evex")
+ (set_attr "mode" "TI")])
+
+(define_insn "avx512vl_<code><mode>v4hi2_mask"
+ [(set (match_operand:V8HI 0 "register_operand" "=v")
+ (vec_concat:V8HI
+ (vec_merge:V4HI
+ (any_truncate:V4HI
+ (match_operand:VI4_128_8_256 1 "register_operand" "v"))
+ (vec_select:V4HI
+ (match_operand:V8HI 2 "vector_move_operand" "0C")
+ (parallel [(const_int 0) (const_int 1)
+ (const_int 2) (const_int 3)]))
+ (match_operand:QI 3 "register_operand" "Yk"))
+ (const_vector:V4HI [(const_int 0) (const_int 0)
+ (const_int 0) (const_int 0)])))]
+ "TARGET_AVX512VL"
+ "vpmov<trunsuffix><pmov_suff_4>\t{%1, %0%{%3%}%N2|%0%{%3%}%N2, %1}"
+ [(set_attr "type" "ssemov")
+ (set_attr "prefix" "evex")
+ (set_attr "mode" "TI")])
+
+(define_insn "avx512vl_<code><mode>v4hi2_store_mask"
+ [(set (match_operand:V8HI 0 "memory_operand" "=m")
+ (vec_concat:V8HI
+ (vec_merge:V4HI
+ (any_truncate:V4HI
+ (match_operand:VI4_128_8_256 1 "register_operand" "v"))
+ (vec_select:V4HI
+ (match_dup 0)
+ (parallel [(const_int 0) (const_int 1)
+ (const_int 2) (const_int 3)]))
+ (match_operand:QI 2 "register_operand" "Yk"))
+ (vec_select:V4HI
+ (match_dup 0)
+ (parallel [(const_int 4) (const_int 5)
+ (const_int 6) (const_int 7)]))))]
+ "TARGET_AVX512VL"
+ "vpmov<trunsuffix><pmov_suff_4>\t{%1, %0%{%2%}|%0%{%2%}, %1}"
+ [(set_attr "type" "ssemov")
+ (set_attr "memory" "store")
+ (set_attr "prefix" "evex")
+ (set_attr "mode" "TI")])
+
+(define_insn "*avx512vl_<code>v2div2hi2_store"
+ [(set (match_operand:V8HI 0 "memory_operand" "=m")
+ (vec_concat:V8HI
+ (any_truncate:V2HI
+ (match_operand:V2DI 1 "register_operand" "v"))
+ (vec_select:V6HI
+ (match_dup 0)
+ (parallel [(const_int 2) (const_int 3)
+ (const_int 4) (const_int 5)
+ (const_int 6) (const_int 7)]))))]
+ "TARGET_AVX512VL"
+ "vpmov<trunsuffix>qw\t{%1, %0|%0, %1}"
+ [(set_attr "type" "ssemov")
+ (set_attr "memory" "store")
+ (set_attr "prefix" "evex")
+ (set_attr "mode" "TI")])
+
+(define_insn "avx512vl_<code>v2div2hi2_mask"
+ [(set (match_operand:V8HI 0 "register_operand" "=v")
+ (vec_concat:V8HI
+ (vec_merge:V2HI
+ (any_truncate:V2HI
+ (match_operand:V2DI 1 "register_operand" "v"))
+ (vec_select:V2HI
+ (match_operand:V8HI 2 "vector_move_operand" "0C")
+ (parallel [(const_int 0) (const_int 1)]))
+ (match_operand:QI 3 "register_operand" "Yk"))
+ (const_vector:V6HI [(const_int 0) (const_int 0)
+ (const_int 0) (const_int 0)
+ (const_int 0) (const_int 0)])))]
+ "TARGET_AVX512VL"
+ "vpmov<trunsuffix>qw\t{%1, %0%{%3%}%N2|%0%{%3%}%N2, %1}"
+ [(set_attr "type" "ssemov")
+ (set_attr "prefix" "evex")
+ (set_attr "mode" "TI")])
+
+(define_insn "avx512vl_<code>v2div2hi2_store_mask"
+ [(set (match_operand:V8HI 0 "memory_operand" "=m")
+ (vec_concat:V8HI
+ (vec_merge:V2HI
+ (any_truncate:V2HI
+ (match_operand:V2DI 1 "register_operand" "v"))
+ (vec_select:V2HI
+ (match_dup 0)
+ (parallel [(const_int 0) (const_int 1)]))
+ (match_operand:QI 2 "register_operand" "Yk"))
+ (vec_select:V6HI
+ (match_dup 0)
+ (parallel [(const_int 2) (const_int 3)
+ (const_int 4) (const_int 5)
+ (const_int 6) (const_int 7)]))))]
+ "TARGET_AVX512VL"
+ "vpmov<trunsuffix>qw\t{%1, %0%{%2%}|%0%{%2%}, %1}"
+ [(set_attr "type" "ssemov")
+ (set_attr "memory" "store")
+ (set_attr "prefix" "evex")
+ (set_attr "mode" "TI")])
+
+(define_insn "*avx512vl_<code>v2div2si2"
+ [(set (match_operand:V4SI 0 "register_operand" "=v")
+ (vec_concat:V4SI
+ (any_truncate:V2SI
+ (match_operand:V2DI 1 "register_operand" "v"))
+ (match_operand:V2SI 2 "const0_operand")))]
+ "TARGET_AVX512VL"
+ "vpmov<trunsuffix>qd\t{%1, %0|%0, %1}"
+ [(set_attr "type" "ssemov")
+ (set_attr "prefix" "evex")
+ (set_attr "mode" "TI")])
+
+(define_insn "*avx512vl_<code>v2div2si2_store"
+ [(set (match_operand:V4SI 0 "memory_operand" "=m")
+ (vec_concat:V4SI
+ (any_truncate:V2SI
+ (match_operand:V2DI 1 "register_operand" "v"))
+ (vec_select:V2SI
+ (match_dup 0)
+ (parallel [(const_int 2) (const_int 3)]))))]
+ "TARGET_AVX512VL"
+ "vpmov<trunsuffix>qd\t{%1, %0|%0, %1}"
+ [(set_attr "type" "ssemov")
+ (set_attr "memory" "store")
+ (set_attr "prefix" "evex")
+ (set_attr "mode" "TI")])
+
+(define_insn "avx512vl_<code>v2div2si2_mask"
+ [(set (match_operand:V4SI 0 "register_operand" "=v")
+ (vec_concat:V4SI
+ (vec_merge:V2SI
+ (any_truncate:V2SI
+ (match_operand:V2DI 1 "register_operand" "v"))
+ (vec_select:V2SI
+ (match_operand:V4SI 2 "vector_move_operand" "0C")
+ (parallel [(const_int 0) (const_int 1)]))
+ (match_operand:QI 3 "register_operand" "Yk"))
+ (const_vector:V2SI [(const_int 0) (const_int 0)])))]
+ "TARGET_AVX512VL"
+ "vpmov<trunsuffix>qd\t{%1, %0%{%3%}%N2|%0%{%3%}%N2, %1}"
+ [(set_attr "type" "ssemov")
+ (set_attr "prefix" "evex")
+ (set_attr "mode" "TI")])
+
+(define_insn "avx512vl_<code>v2div2si2_store_mask"
+ [(set (match_operand:V4SI 0 "memory_operand" "=m")
+ (vec_concat:V4SI
+ (vec_merge:V2SI
+ (any_truncate:V2SI
+ (match_operand:V2DI 1 "register_operand" "v"))
+ (vec_select:V2SI
+ (match_dup 0)
+ (parallel [(const_int 0) (const_int 1)]))
+ (match_operand:QI 2 "register_operand" "Yk"))
+ (vec_select:V2SI
+ (match_dup 0)
+ (parallel [(const_int 2) (const_int 3)]))))]
+ "TARGET_AVX512VL"
+ "vpmov<trunsuffix>qd\t{%1, %0%{%2%}|%0%{%2%}, %1}"
+ [(set_attr "type" "ssemov")
+ (set_attr "memory" "store")
+ (set_attr "prefix" "evex")
+ (set_attr "mode" "TI")])
+
(define_insn "*avx512f_<code>v8div16qi2"
[(set (match_operand:V16QI 0 "register_operand" "=v")
(vec_concat:V16QI
@@ -7682,61 +8893,65 @@
(set_attr "prefix" "<mask_prefix3>")
(set_attr "mode" "<sseinsnmode>")])
-(define_expand "<sse2_avx2>_<plusminus_insn><mode>3"
+(define_expand "<sse2_avx2>_<plusminus_insn><mode>3<mask_name>"
[(set (match_operand:VI12_AVX2 0 "register_operand")
(sat_plusminus:VI12_AVX2
(match_operand:VI12_AVX2 1 "nonimmediate_operand")
(match_operand:VI12_AVX2 2 "nonimmediate_operand")))]
- "TARGET_SSE2"
+ "TARGET_SSE2 && <mask_mode512bit_condition>"
"ix86_fixup_binary_operands_no_copy (<CODE>, <MODE>mode, operands);")
-(define_insn "*<sse2_avx2>_<plusminus_insn><mode>3"
+(define_insn "*<sse2_avx2>_<plusminus_insn><mode>3<mask_name>"
[(set (match_operand:VI12_AVX2 0 "register_operand" "=x,v")
(sat_plusminus:VI12_AVX2
(match_operand:VI12_AVX2 1 "nonimmediate_operand" "<comm>0,v")
(match_operand:VI12_AVX2 2 "nonimmediate_operand" "xm,vm")))]
- "TARGET_SSE2 && ix86_binary_operator_ok (<CODE>, <MODE>mode, operands)"
+ "TARGET_SSE2
+ && ix86_binary_operator_ok (<CODE>, <MODE>mode, operands)
+ && <mask_mode512bit_condition>"
"@
p<plusminus_mnemonic><ssemodesuffix>\t{%2, %0|%0, %2}
- vp<plusminus_mnemonic><ssemodesuffix>\t{%2, %1, %0|%0, %1, %2}"
+ vp<plusminus_mnemonic><ssemodesuffix>\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}"
[(set_attr "isa" "noavx,avx")
(set_attr "type" "sseiadd")
(set_attr "prefix_data16" "1,*")
- (set_attr "prefix" "orig,vex")
+ (set_attr "prefix" "orig,maybe_evex")
(set_attr "mode" "TI")])
-(define_expand "mul<mode>3"
+(define_expand "mul<mode>3<mask_name>"
[(set (match_operand:VI1_AVX2 0 "register_operand")
(mult:VI1_AVX2 (match_operand:VI1_AVX2 1 "register_operand")
(match_operand:VI1_AVX2 2 "register_operand")))]
- "TARGET_SSE2"
+ "TARGET_SSE2&& <mask_mode512bit_condition>"
{
ix86_expand_vecop_qihi (MULT, operands[0], operands[1], operands[2]);
DONE;
})
-(define_expand "mul<mode>3"
+(define_expand "mul<mode>3<mask_name>"
[(set (match_operand:VI2_AVX2 0 "register_operand")
(mult:VI2_AVX2 (match_operand:VI2_AVX2 1 "nonimmediate_operand")
(match_operand:VI2_AVX2 2 "nonimmediate_operand")))]
- "TARGET_SSE2"
+ "TARGET_SSE2 && <mask_mode512bit_condition>"
"ix86_fixup_binary_operands_no_copy (MULT, <MODE>mode, operands);")
-(define_insn "*mul<mode>3"
- [(set (match_operand:VI2_AVX2 0 "register_operand" "=x,x")
- (mult:VI2_AVX2 (match_operand:VI2_AVX2 1 "nonimmediate_operand" "%0,x")
- (match_operand:VI2_AVX2 2 "nonimmediate_operand" "xm,xm")))]
- "TARGET_SSE2 && ix86_binary_operator_ok (MULT, <MODE>mode, operands)"
+(define_insn "*mul<mode>3<mask_name>"
+ [(set (match_operand:VI2_AVX2 0 "register_operand" "=x,v")
+ (mult:VI2_AVX2 (match_operand:VI2_AVX2 1 "nonimmediate_operand" "%0,v")
+ (match_operand:VI2_AVX2 2 "nonimmediate_operand" "xm,vm")))]
+ "TARGET_SSE2
+ && ix86_binary_operator_ok (MULT, <MODE>mode, operands)
+ && <mask_mode512bit_condition>"
"@
pmullw\t{%2, %0|%0, %2}
- vpmullw\t{%2, %1, %0|%0, %1, %2}"
+ vpmullw\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}"
[(set_attr "isa" "noavx,avx")
(set_attr "type" "sseimul")
(set_attr "prefix_data16" "1,*")
(set_attr "prefix" "orig,vex")
(set_attr "mode" "<sseinsnmode>")])
-(define_expand "<s>mul<mode>3_highpart"
+(define_expand "<s>mul<mode>3_highpart<mask_name>"
[(set (match_operand:VI2_AVX2 0 "register_operand")
(truncate:VI2_AVX2
(lshiftrt:<ssedoublemode>
@@ -7746,23 +8961,25 @@
(any_extend:<ssedoublemode>
(match_operand:VI2_AVX2 2 "nonimmediate_operand")))
(const_int 16))))]
- "TARGET_SSE2"
+ "TARGET_SSE2 && <mask_mode512bit_condition>"
"ix86_fixup_binary_operands_no_copy (MULT, <MODE>mode, operands);")
-(define_insn "*<s>mul<mode>3_highpart"
- [(set (match_operand:VI2_AVX2 0 "register_operand" "=x,x")
+(define_insn "*<s>mul<mode>3_highpart<mask_name>"
+ [(set (match_operand:VI2_AVX2 0 "register_operand" "=x,v")
(truncate:VI2_AVX2
(lshiftrt:<ssedoublemode>
(mult:<ssedoublemode>
(any_extend:<ssedoublemode>
- (match_operand:VI2_AVX2 1 "nonimmediate_operand" "%0,x"))
+ (match_operand:VI2_AVX2 1 "nonimmediate_operand" "%0,v"))
(any_extend:<ssedoublemode>
- (match_operand:VI2_AVX2 2 "nonimmediate_operand" "xm,xm")))
+ (match_operand:VI2_AVX2 2 "nonimmediate_operand" "xm,vm")))
(const_int 16))))]
- "TARGET_SSE2 && ix86_binary_operator_ok (MULT, <MODE>mode, operands)"
+ "TARGET_SSE2
+ && ix86_binary_operator_ok (MULT, <MODE>mode, operands)
+ && <mask_mode512bit_condition>"
"@
pmulh<u>w\t{%2, %0|%0, %2}
- vpmulh<u>w\t{%2, %1, %0|%0, %1, %2}"
+ vpmulh<u>w\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}"
[(set_attr "isa" "noavx,avx")
(set_attr "type" "sseimul")
(set_attr "prefix_data16" "1,*")
@@ -7814,7 +9031,7 @@
(set_attr "prefix" "evex")
(set_attr "mode" "XI")])
-(define_expand "vec_widen_umult_even_v8si"
+(define_expand "vec_widen_umult_even_v8si<mask_name>"
[(set (match_operand:V4DI 0 "register_operand")
(mult:V4DI
(zero_extend:V4DI
@@ -7827,29 +9044,31 @@
(match_operand:V8SI 2 "nonimmediate_operand")
(parallel [(const_int 0) (const_int 2)
(const_int 4) (const_int 6)])))))]
- "TARGET_AVX2"
+ "TARGET_AVX2 && <mask_mode512bit_condition>"
"ix86_fixup_binary_operands_no_copy (MULT, V8SImode, operands);")
-(define_insn "*vec_widen_umult_even_v8si"
- [(set (match_operand:V4DI 0 "register_operand" "=x")
+(define_insn "*vec_widen_umult_even_v8si<mask_name>"
+ [(set (match_operand:V4DI 0 "register_operand" "=v")
(mult:V4DI
(zero_extend:V4DI
(vec_select:V4SI
- (match_operand:V8SI 1 "nonimmediate_operand" "%x")
+ (match_operand:V8SI 1 "nonimmediate_operand" "%v")
(parallel [(const_int 0) (const_int 2)
(const_int 4) (const_int 6)])))
(zero_extend:V4DI
(vec_select:V4SI
- (match_operand:V8SI 2 "nonimmediate_operand" "xm")
+ (match_operand:V8SI 2 "nonimmediate_operand" "vm")
(parallel [(const_int 0) (const_int 2)
(const_int 4) (const_int 6)])))))]
- "TARGET_AVX2 && ix86_binary_operator_ok (MULT, V8SImode, operands)"
- "vpmuludq\t{%2, %1, %0|%0, %1, %2}"
+ "TARGET_AVX2
+ && ix86_binary_operator_ok (MULT, V8SImode, operands)
+ && <mask_mode512bit_condition>"
+ "vpmuludq\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}"
[(set_attr "type" "sseimul")
- (set_attr "prefix" "vex")
+ (set_attr "prefix" "maybe_evex")
(set_attr "mode" "OI")])
-(define_expand "vec_widen_umult_even_v4si"
+(define_expand "vec_widen_umult_even_v4si<mask_name>"
[(set (match_operand:V2DI 0 "register_operand")
(mult:V2DI
(zero_extend:V2DI
@@ -7860,28 +9079,30 @@
(vec_select:V2SI
(match_operand:V4SI 2 "nonimmediate_operand")
(parallel [(const_int 0) (const_int 2)])))))]
- "TARGET_SSE2"
+ "TARGET_SSE2 && <mask_mode512bit_condition>"
"ix86_fixup_binary_operands_no_copy (MULT, V4SImode, operands);")
-(define_insn "*vec_widen_umult_even_v4si"
- [(set (match_operand:V2DI 0 "register_operand" "=x,x")
+(define_insn "*vec_widen_umult_even_v4si<mask_name>"
+ [(set (match_operand:V2DI 0 "register_operand" "=x,v")
(mult:V2DI
(zero_extend:V2DI
(vec_select:V2SI
- (match_operand:V4SI 1 "nonimmediate_operand" "%0,x")
+ (match_operand:V4SI 1 "nonimmediate_operand" "%0,v")
(parallel [(const_int 0) (const_int 2)])))
(zero_extend:V2DI
(vec_select:V2SI
- (match_operand:V4SI 2 "nonimmediate_operand" "xm,xm")
+ (match_operand:V4SI 2 "nonimmediate_operand" "xm,vm")
(parallel [(const_int 0) (const_int 2)])))))]
- "TARGET_SSE2 && ix86_binary_operator_ok (MULT, V4SImode, operands)"
+ "TARGET_SSE2
+ && ix86_binary_operator_ok (MULT, V4SImode, operands)
+ && <mask_mode512bit_condition>"
"@
pmuludq\t{%2, %0|%0, %2}
- vpmuludq\t{%2, %1, %0|%0, %1, %2}"
+ vpmuludq\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}"
[(set_attr "isa" "noavx,avx")
(set_attr "type" "sseimul")
(set_attr "prefix_data16" "1,*")
- (set_attr "prefix" "orig,vex")
+ (set_attr "prefix" "orig,maybe_evex")
(set_attr "mode" "TI")])
(define_expand "vec_widen_smult_even_v16si<mask_name>"
@@ -7929,7 +9150,7 @@
(set_attr "prefix" "evex")
(set_attr "mode" "XI")])
-(define_expand "vec_widen_smult_even_v8si"
+(define_expand "vec_widen_smult_even_v8si<mask_name>"
[(set (match_operand:V4DI 0 "register_operand")
(mult:V4DI
(sign_extend:V4DI
@@ -7942,30 +9163,31 @@
(match_operand:V8SI 2 "nonimmediate_operand")
(parallel [(const_int 0) (const_int 2)
(const_int 4) (const_int 6)])))))]
- "TARGET_AVX2"
+ "TARGET_AVX2 && <mask_mode512bit_condition>"
"ix86_fixup_binary_operands_no_copy (MULT, V8SImode, operands);")
-(define_insn "*vec_widen_smult_even_v8si"
- [(set (match_operand:V4DI 0 "register_operand" "=x")
+(define_insn "*vec_widen_smult_even_v8si<mask_name>"
+ [(set (match_operand:V4DI 0 "register_operand" "=v")
(mult:V4DI
(sign_extend:V4DI
(vec_select:V4SI
- (match_operand:V8SI 1 "nonimmediate_operand" "x")
+ (match_operand:V8SI 1 "nonimmediate_operand" "v")
(parallel [(const_int 0) (const_int 2)
(const_int 4) (const_int 6)])))
(sign_extend:V4DI
(vec_select:V4SI
- (match_operand:V8SI 2 "nonimmediate_operand" "xm")
+ (match_operand:V8SI 2 "nonimmediate_operand" "vm")
(parallel [(const_int 0) (const_int 2)
(const_int 4) (const_int 6)])))))]
- "TARGET_AVX2 && ix86_binary_operator_ok (MULT, V8SImode, operands)"
- "vpmuldq\t{%2, %1, %0|%0, %1, %2}"
+ "TARGET_AVX2
+ && ix86_binary_operator_ok (MULT, V8SImode, operands)"
+ "vpmuldq\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}"
[(set_attr "type" "sseimul")
(set_attr "prefix_extra" "1")
(set_attr "prefix" "vex")
(set_attr "mode" "OI")])
-(define_expand "sse4_1_mulv2siv2di3"
+(define_expand "sse4_1_mulv2siv2di3<mask_name>"
[(set (match_operand:V2DI 0 "register_operand")
(mult:V2DI
(sign_extend:V2DI
@@ -7976,24 +9198,26 @@
(vec_select:V2SI
(match_operand:V4SI 2 "nonimmediate_operand")
(parallel [(const_int 0) (const_int 2)])))))]
- "TARGET_SSE4_1"
+ "TARGET_SSE4_1 && <mask_mode512bit_condition>"
"ix86_fixup_binary_operands_no_copy (MULT, V4SImode, operands);")
-(define_insn "*sse4_1_mulv2siv2di3"
- [(set (match_operand:V2DI 0 "register_operand" "=x,x")
+(define_insn "*sse4_1_mulv2siv2di3<mask_name>"
+ [(set (match_operand:V2DI 0 "register_operand" "=x,v")
(mult:V2DI
(sign_extend:V2DI
(vec_select:V2SI
- (match_operand:V4SI 1 "nonimmediate_operand" "%0,x")
+ (match_operand:V4SI 1 "nonimmediate_operand" "%0,v")
(parallel [(const_int 0) (const_int 2)])))
(sign_extend:V2DI
(vec_select:V2SI
- (match_operand:V4SI 2 "nonimmediate_operand" "xm,xm")
+ (match_operand:V4SI 2 "nonimmediate_operand" "xm,vm")
(parallel [(const_int 0) (const_int 2)])))))]
- "TARGET_SSE4_1 && ix86_binary_operator_ok (MULT, V4SImode, operands)"
+ "TARGET_SSE4_1
+ && ix86_binary_operator_ok (MULT, V4SImode, operands)
+ && <mask_mode512bit_condition>"
"@
pmuldq\t{%2, %0|%0, %2}
- vpmuldq\t{%2, %1, %0|%0, %1, %2}"
+ vpmuldq\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}"
[(set_attr "isa" "noavx,avx")
(set_attr "type" "sseimul")
(set_attr "prefix_data16" "1,*")
@@ -8001,6 +9225,18 @@
(set_attr "prefix" "orig,vex")
(set_attr "mode" "TI")])
+(define_insn "avx512bw_pmaddwd512<mode><mask_name>"
+ [(set (match_operand:<sseunpackmode> 0 "register_operand" "=v")
+ (unspec:<sseunpackmode>
+ [(match_operand:VI2_AVX2 1 "register_operand" "v")
+ (match_operand:VI2_AVX2 2 "nonimmediate_operand" "vm")]
+ UNSPEC_PMADDWD512))]
+ "TARGET_AVX512BW && <mask_mode512bit_condition>"
+ "vpmaddwd\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}";
+ [(set_attr "type" "sseiadd")
+ (set_attr "prefix" "evex")
+ (set_attr "mode" "XI")])
+
(define_expand "avx2_pmaddwd"
[(set (match_operand:V8SI 0 "register_operand")
(plus:V8SI
@@ -8132,6 +9368,17 @@
(set_attr "prefix" "orig,vex")
(set_attr "mode" "TI")])
+(define_insn "avx512dq_mul<mode>3<mask_name>"
+ [(set (match_operand:VI8 0 "register_operand" "=v")
+ (mult:VI8
+ (match_operand:VI8 1 "register_operand" "v")
+ (match_operand:VI8 2 "nonimmediate_operand" "vm")))]
+ "TARGET_AVX512DQ && <mask_mode512bit_condition>"
+ "vpmullq\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}"
+ [(set_attr "type" "sseimul")
+ (set_attr "prefix" "evex")
+ (set_attr "mode" "<sseinsnmode>")])
+
(define_expand "mul<mode>3<mask_name>"
[(set (match_operand:VI4_AVX512F 0 "register_operand")
(mult:VI4_AVX512F
@@ -8230,6 +9477,9 @@
DONE;
})
+(define_mode_attr SDOT_PMADD_SUF
+ [(V32HI "512v32hi") (V16HI "") (V8HI "")])
+
(define_expand "sdot_prod<mode>"
[(match_operand:<sseunpackmode> 0 "register_operand")
(match_operand:VI2_AVX2 1 "register_operand")
@@ -8238,7 +9488,7 @@
"TARGET_SSE2"
{
rtx t = gen_reg_rtx (<sseunpackmode>mode);
- emit_insn (gen_<sse2_avx2>_pmaddwd (t, operands[1], operands[2]));
+ emit_insn (gen_<sse2_avx2>_pmaddwd<SDOT_PMADD_SUF> (t, operands[1], operands[2]));
emit_insn (gen_rtx_SET (VOIDmode, operands[0],
gen_rtx_PLUS (<sseunpackmode>mode,
operands[3], t)));
@@ -8290,15 +9540,15 @@
DONE;
})
-(define_insn "ashr<mode>3"
- [(set (match_operand:VI24_AVX2 0 "register_operand" "=x,x")
+(define_insn "ashr<mode>3<mask_name>"
+ [(set (match_operand:VI24_AVX2 0 "register_operand" "=x,v")
(ashiftrt:VI24_AVX2
- (match_operand:VI24_AVX2 1 "register_operand" "0,x")
- (match_operand:SI 2 "nonmemory_operand" "xN,xN")))]
- "TARGET_SSE2"
+ (match_operand:VI24_AVX2 1 "register_operand" "0,v")
+ (match_operand:SI 2 "nonmemory_operand" "xN,vN")))]
+ "TARGET_SSE2 && <mask_mode512bit_condition>"
"@
psra<ssemodesuffix>\t{%2, %0|%0, %2}
- vpsra<ssemodesuffix>\t{%2, %1, %0|%0, %1, %2}"
+ vpsra<ssemodesuffix>\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}"
[(set_attr "isa" "noavx,avx")
(set_attr "type" "sseishft")
(set (attr "length_immediate")
@@ -8306,9 +9556,37 @@
(const_string "1")
(const_string "0")))
(set_attr "prefix_data16" "1,*")
- (set_attr "prefix" "orig,vex")
+ (set_attr "prefix" "orig,maybe_evex")
(set_attr "mode" "<sseinsnmode>")])
+(define_insn "ashrv4di3<mask_name>"
+ [(set (match_operand:V4DI 0 "register_operand" "=v,v")
+ (ashiftrt:V4DI
+ (match_operand:V4DI 1 "nonimmediate_operand" "v,vm")
+ (match_operand:SI 2 "nonmemory_operand" "v,N")))]
+ "TARGET_AVX512VL"
+ "vpsraq\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}"
+ [(set_attr "type" "sseishft")
+ (set (attr "length_immediate")
+ (if_then_else (match_operand 2 "const_int_operand")
+ (const_string "1")
+ (const_string "0")))
+ (set_attr "mode" "OI")])
+
+(define_insn "ashrv2di3<mask_name>_1"
+ [(set (match_operand:V2DI 0 "register_operand" "=v,v")
+ (ashiftrt:V2DI
+ (match_operand:V2DI 1 "nonimmediate_operand" "v,vm")
+ (match_operand:DI 2 "nonmemory_operand" "v,N")))]
+ "TARGET_AVX512VL"
+ "vpsraq\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}"
+ [(set_attr "type" "sseishft")
+ (set (attr "length_immediate")
+ (if_then_else (match_operand 2 "const_int_operand")
+ (const_string "1")
+ (const_string "0")))
+ (set_attr "mode" "TI")])
+
(define_insn "ashr<mode>3<mask_name>"
[(set (match_operand:VI48_512 0 "register_operand" "=v,v")
(ashiftrt:VI48_512
@@ -8323,15 +9601,16 @@
(const_string "0")))
(set_attr "mode" "<sseinsnmode>")])
-(define_insn "<shift_insn><mode>3"
- [(set (match_operand:VI248_AVX2 0 "register_operand" "=x,x")
+(define_insn "<shift_insn><mode>3<mask_name>"
+ [(set (match_operand:VI248_AVX2 0 "register_operand" "=x,v")
(any_lshift:VI248_AVX2
- (match_operand:VI248_AVX2 1 "register_operand" "0,x")
- (match_operand:SI 2 "nonmemory_operand" "xN,xN")))]
- "TARGET_SSE2"
+ (match_operand:VI248_AVX2 1 "register_operand" "0,v")
+ (match_operand:SI 2 "nonmemory_operand" "xN,vN")))]
+ "TARGET_SSE2
+ && (!<mask_applied> || TARGET_AVX512VL || <MODE>mode == V32HImode)"
"@
p<vshift><ssemodesuffix>\t{%2, %0|%0, %2}
- vp<vshift><ssemodesuffix>\t{%2, %1, %0|%0, %1, %2}"
+ vp<vshift><ssemodesuffix>\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}"
[(set_attr "isa" "noavx,avx")
(set_attr "type" "sseishft")
(set (attr "length_immediate")
@@ -8373,9 +9652,9 @@
})
(define_insn "<sse2_avx2>_ashl<mode>3"
- [(set (match_operand:VIMAX_AVX2 0 "register_operand" "=x,x")
+ [(set (match_operand:VIMAX_AVX2 0 "register_operand" "=x,v")
(ashift:VIMAX_AVX2
- (match_operand:VIMAX_AVX2 1 "register_operand" "0,x")
+ (match_operand:VIMAX_AVX2 1 "register_operand" "0,v")
(match_operand:SI 2 "const_0_to_255_mul_8_operand" "n,n")))]
"TARGET_SSE2"
{
@@ -8412,9 +9691,9 @@
})
(define_insn "<sse2_avx2>_lshr<mode>3"
- [(set (match_operand:VIMAX_AVX2 0 "register_operand" "=x,x")
+ [(set (match_operand:VIMAX_AVX2 0 "register_operand" "=x,v")
(lshiftrt:VIMAX_AVX2
- (match_operand:VIMAX_AVX2 1 "register_operand" "0,x")
+ (match_operand:VIMAX_AVX2 1 "register_operand" "0,v")
(match_operand:SI 2 "const_0_to_255_mul_8_operand" "n,n")))]
"TARGET_SSE2"
{
@@ -8438,20 +9717,20 @@
(set_attr "prefix" "orig,vex")
(set_attr "mode" "<sseinsnmode>")])
-(define_insn "avx512f_<rotate>v<mode><mask_name>"
- [(set (match_operand:VI48_512 0 "register_operand" "=v")
- (any_rotate:VI48_512
- (match_operand:VI48_512 1 "register_operand" "v")
- (match_operand:VI48_512 2 "nonimmediate_operand" "vm")))]
+(define_insn "<avx512>_<rotate>v<mode><mask_name>"
+ [(set (match_operand:VI48_AVX512VL 0 "register_operand" "=v")
+ (any_rotate:VI48_AVX512VL
+ (match_operand:VI48_AVX512VL 1 "register_operand" "v")
+ (match_operand:VI48_AVX512VL 2 "nonimmediate_operand" "vm")))]
"TARGET_AVX512F"
"vp<rotate>v<ssemodesuffix>\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}"
[(set_attr "prefix" "evex")
(set_attr "mode" "<sseinsnmode>")])
-(define_insn "avx512f_<rotate><mode><mask_name>"
- [(set (match_operand:VI48_512 0 "register_operand" "=v")
- (any_rotate:VI48_512
- (match_operand:VI48_512 1 "nonimmediate_operand" "vm")
+(define_insn "<avx512>_<rotate><mode><mask_name>"
+ [(set (match_operand:VI48_AVX512VL 0 "register_operand" "=v")
+ (any_rotate:VI48_AVX512VL
+ (match_operand:VI48_AVX512VL 1 "nonimmediate_operand" "vm")
(match_operand:SI 2 "const_0_to_255_operand")))]
"TARGET_AVX512F"
"vp<rotate><ssemodesuffix>\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}"
@@ -8459,18 +9738,18 @@
(set_attr "mode" "<sseinsnmode>")])
(define_expand "<code><mode>3<mask_name><round_name>"
- [(set (match_operand:VI124_256_48_512 0 "register_operand")
- (maxmin:VI124_256_48_512
- (match_operand:VI124_256_48_512 1 "<round_nimm_predicate>")
- (match_operand:VI124_256_48_512 2 "<round_nimm_predicate>")))]
+ [(set (match_operand:VI124_256_1248_512 0 "register_operand")
+ (maxmin:VI124_256_1248_512
+ (match_operand:VI124_256_1248_512 1 "nonimmediate_operand")
+ (match_operand:VI124_256_1248_512 2 "nonimmediate_operand")))]
"TARGET_AVX2 && <mask_mode512bit_condition> && <round_mode512bit_condition>"
"ix86_fixup_binary_operands_no_copy (<CODE>, <MODE>mode, operands);")
(define_insn "*avx2_<code><mode>3<mask_name><round_name>"
- [(set (match_operand:VI124_256_48_512 0 "register_operand" "=v")
- (maxmin:VI124_256_48_512
- (match_operand:VI124_256_48_512 1 "<round_nimm_predicate>" "%v")
- (match_operand:VI124_256_48_512 2 "<round_nimm_predicate>" "<round_constraint>")))]
+ [(set (match_operand:VI124_256_1248_512 0 "register_operand" "=v")
+ (maxmin:VI124_256_1248_512
+ (match_operand:VI124_256_1248_512 1 "nonimmediate_operand" "%v")
+ (match_operand:VI124_256_1248_512 2 "nonimmediate_operand" "<round_constraint>")))]
"TARGET_AVX2 && ix86_binary_operator_ok (<CODE>, <MODE>mode, operands)
&& <mask_mode512bit_condition> && <round_mode512bit_condition>"
"vp<maxmin_int><ssemodesuffix>\t{<round_mask_op3>%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2<round_mask_op3>}"
@@ -8479,6 +9758,17 @@
(set_attr "prefix" "maybe_evex")
(set_attr "mode" "OI")])
+(define_insn "<mask_codefor><code><mode>3<mask_name>"
+ [(set (match_operand:VI128_256 0 "register_operand" "=v")
+ (maxmin:VI128_256
+ (match_operand:VI128_256 1 "register_operand" "v")
+ (match_operand:VI128_256 2 "nonimmediate_operand" "vm")))]
+ ""
+ "vp<maxmin_int><ssemodesuffix>\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}"
+ [(set_attr "type" "sseiadd")
+ (set_attr "prefix" "evex")
+ (set_attr "mode" "<sseinsnmode>")])
+
(define_expand "<code><mode>3"
[(set (match_operand:VI8_AVX2 0 "register_operand")
(maxmin:VI8_AVX2
@@ -8490,27 +9780,32 @@
rtx xops[6];
bool ok;
- xops[0] = operands[0];
-
- if (<CODE> == SMAX || <CODE> == UMAX)
- {
- xops[1] = operands[1];
- xops[2] = operands[2];
- }
+ if (TARGET_AVX512VL)
+ ix86_fixup_binary_operands_no_copy (<CODE>, <MODE>mode, operands);
else
{
- xops[1] = operands[2];
- xops[2] = operands[1];
- }
+ xops[0] = operands[0];
- code = (<CODE> == UMAX || <CODE> == UMIN) ? GTU : GT;
+ if (<CODE> == SMAX || <CODE> == UMAX)
+ {
+ xops[1] = operands[1];
+ xops[2] = operands[2];
+ }
+ else
+ {
+ xops[1] = operands[2];
+ xops[2] = operands[1];
+ }
- xops[3] = gen_rtx_fmt_ee (code, VOIDmode, operands[1], operands[2]);
- xops[4] = operands[1];
- xops[5] = operands[2];
+ code = (<CODE> == UMAX || <CODE> == UMIN) ? GTU : GT;
- ok = ix86_expand_int_vcond (xops);
- gcc_assert (ok);
+ xops[3] = gen_rtx_fmt_ee (code, VOIDmode, operands[1], operands[2]);
+ xops[4] = operands[1];
+ xops[5] = operands[2];
+
+ ok = ix86_expand_int_vcond (xops);
+ gcc_assert (ok);
+ }
DONE;
})
@@ -8553,15 +9848,17 @@
}
})
-(define_insn "*sse4_1_<code><mode>3"
- [(set (match_operand:VI14_128 0 "register_operand" "=x,x")
+(define_insn "*sse4_1_<code><mode>3<mask_name>"
+ [(set (match_operand:VI14_128 0 "register_operand" "=x,v")
(smaxmin:VI14_128
- (match_operand:VI14_128 1 "nonimmediate_operand" "%0,x")
- (match_operand:VI14_128 2 "nonimmediate_operand" "xm,xm")))]
- "TARGET_SSE4_1 && ix86_binary_operator_ok (<CODE>, <MODE>mode, operands)"
+ (match_operand:VI14_128 1 "nonimmediate_operand" "%0,v")
+ (match_operand:VI14_128 2 "nonimmediate_operand" "xm,vm")))]
+ "TARGET_SSE4_1
+ && ix86_binary_operator_ok (<CODE>, <MODE>mode, operands)
+ && <mask_mode512bit_condition>"
"@
p<maxmin_int><ssemodesuffix>\t{%2, %0|%0, %2}
- vp<maxmin_int><ssemodesuffix>\t{%2, %1, %0|%0, %1, %2}"
+ vp<maxmin_int><ssemodesuffix>\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}"
[(set_attr "isa" "noavx,avx")
(set_attr "type" "sseiadd")
(set_attr "prefix_extra" "1,*")
@@ -8634,15 +9931,17 @@
}
})
-(define_insn "*sse4_1_<code><mode>3"
- [(set (match_operand:VI24_128 0 "register_operand" "=x,x")
+(define_insn "*sse4_1_<code><mode>3<mask_name>"
+ [(set (match_operand:VI24_128 0 "register_operand" "=x,v")
(umaxmin:VI24_128
- (match_operand:VI24_128 1 "nonimmediate_operand" "%0,x")
- (match_operand:VI24_128 2 "nonimmediate_operand" "xm,xm")))]
- "TARGET_SSE4_1 && ix86_binary_operator_ok (<CODE>, <MODE>mode, operands)"
+ (match_operand:VI24_128 1 "nonimmediate_operand" "%0,v")
+ (match_operand:VI24_128 2 "nonimmediate_operand" "xm,vm")))]
+ "TARGET_SSE4_1
+ && ix86_binary_operator_ok (<CODE>, <MODE>mode, operands)
+ && <mask_mode512bit_condition>"
"@
p<maxmin_int><ssemodesuffix>\t{%2, %0|%0, %2}
- vp<maxmin_int><ssemodesuffix>\t{%2, %1, %0|%0, %1, %2}"
+ vp<maxmin_int><ssemodesuffix>\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}"
[(set_attr "isa" "noavx,avx")
(set_attr "type" "sseiadd")
(set_attr "prefix_extra" "1,*")
@@ -8691,20 +9990,20 @@
(set_attr "prefix" "vex")
(set_attr "mode" "OI")])
-(define_expand "avx512f_eq<mode>3<mask_scalar_merge_name>"
+(define_expand "<avx512>_eq<mode>3<mask_scalar_merge_name>"
[(set (match_operand:<avx512fmaskmode> 0 "register_operand")
(unspec:<avx512fmaskmode>
- [(match_operand:VI48_512 1 "register_operand")
- (match_operand:VI48_512 2 "nonimmediate_operand")]
+ [(match_operand:VI_AVX512VL 1 "register_operand")
+ (match_operand:VI_AVX512VL 2 "nonimmediate_operand")]
UNSPEC_MASKED_EQ))]
"TARGET_AVX512F"
"ix86_fixup_binary_operands_no_copy (EQ, <MODE>mode, operands);")
-(define_insn "avx512f_eq<mode>3<mask_scalar_merge_name>_1"
+(define_insn "<avx512>_eq<mode>3<mask_scalar_merge_name>_1"
[(set (match_operand:<avx512fmaskmode> 0 "register_operand" "=Yk")
(unspec:<avx512fmaskmode>
- [(match_operand:VI48_512 1 "register_operand" "%v")
- (match_operand:VI48_512 2 "nonimmediate_operand" "vm")]
+ [(match_operand:VI_AVX512VL 1 "register_operand" "%v")
+ (match_operand:VI_AVX512VL 2 "nonimmediate_operand" "vm")]
UNSPEC_MASKED_EQ))]
"TARGET_AVX512F && ix86_binary_operator_ok (EQ, <MODE>mode, operands)"
"vpcmpeq<ssemodesuffix>\t{%2, %1, %0<mask_scalar_merge_operand3>|%0<mask_scalar_merge_operand3>, %1, %2}"
@@ -8787,11 +10086,11 @@
(set_attr "prefix" "vex")
(set_attr "mode" "OI")])
-(define_insn "avx512f_gt<mode>3<mask_scalar_merge_name>"
+(define_insn "<avx512>_gt<mode>3<mask_scalar_merge_name>"
[(set (match_operand:<avx512fmaskmode> 0 "register_operand" "=Yk")
(unspec:<avx512fmaskmode>
- [(match_operand:VI48_512 1 "register_operand" "v")
- (match_operand:VI48_512 2 "nonimmediate_operand" "vm")] UNSPEC_MASKED_GT))]
+ [(match_operand:VI_AVX512VL 1 "register_operand" "v")
+ (match_operand:VI_AVX512VL 2 "nonimmediate_operand" "vm")] UNSPEC_MASKED_GT))]
"TARGET_AVX512F"
"vpcmpgt<ssemodesuffix>\t{%2, %1, %0<mask_scalar_merge_operand3>|%0<mask_scalar_merge_operand3>, %1, %2}"
[(set_attr "type" "ssecmp")
@@ -8952,7 +10251,8 @@
(V8SI "TARGET_AVX2") (V4DI "TARGET_AVX2")
(V8SF "TARGET_AVX2") (V4DF "TARGET_AVX2")
(V16SF "TARGET_AVX512F") (V8DF "TARGET_AVX512F")
- (V16SI "TARGET_AVX512F") (V8DI "TARGET_AVX512F")])
+ (V16SI "TARGET_AVX512F") (V8DI "TARGET_AVX512F")
+ (V32HI "TARGET_AVX512BW")])
(define_expand "vec_perm<mode>"
[(match_operand:VEC_PERM_AVX2 0 "register_operand")
@@ -8973,7 +10273,8 @@
(V8SI "TARGET_AVX") (V4DI "TARGET_AVX")
(V32QI "TARGET_AVX2") (V16HI "TARGET_AVX2")
(V16SI "TARGET_AVX512F") (V8DI "TARGET_AVX512F")
- (V16SF "TARGET_AVX512F") (V8DF "TARGET_AVX512F")])
+ (V16SF "TARGET_AVX512F") (V8DF "TARGET_AVX512F")
+ (V32HI "TARGET_AVX512BW")])
(define_expand "vec_perm_const<mode>"
[(match_operand:VEC_PERM_CONST 0 "register_operand")
@@ -9031,16 +10332,31 @@
{
case MODE_XI:
gcc_assert (TARGET_AVX512F);
-
- tmp = "pandn<ssemodesuffix>";
- break;
-
case MODE_OI:
- gcc_assert (TARGET_AVX2);
+ gcc_assert (TARGET_AVX2 || TARGET_AVX512VL);
case MODE_TI:
- gcc_assert (TARGET_SSE2);
-
- tmp = "pandn";
+ gcc_assert (TARGET_SSE2 || TARGET_AVX512VL);
+ switch (<MODE>mode)
+ {
+ case V16SImode:
+ case V8DImode:
+ if (TARGET_AVX512F)
+ {
+ tmp = "pandn<ssemodesuffix>";
+ break;
+ }
+ case V8SImode:
+ case V4DImode:
+ case V4SImode:
+ case V2DImode:
+ if (TARGET_AVX512VL)
+ {
+ tmp = "pandn<ssemodesuffix>";
+ break;
+ }
+ default:
+ tmp = TARGET_AVX512VL ? "pandnq" : "pandn";
+ }
break;
case MODE_V16SF:
@@ -9125,16 +10441,31 @@
{
case MODE_XI:
gcc_assert (TARGET_AVX512F);
-
- tmp = "p<logic><ssemodesuffix>";
- break;
-
case MODE_OI:
- gcc_assert (TARGET_AVX2);
+ gcc_assert (TARGET_AVX2 || TARGET_AVX512VL);
case MODE_TI:
- gcc_assert (TARGET_SSE2);
-
- tmp = "p<logic>";
+ gcc_assert (TARGET_SSE2 || TARGET_AVX512VL);
+ switch (<MODE>mode)
+ {
+ case V16SImode:
+ case V8DImode:
+ if (TARGET_AVX512F)
+ {
+ tmp = "p<logic><ssemodesuffix>";
+ break;
+ }
+ case V8SImode:
+ case V4DImode:
+ case V4SImode:
+ case V2DImode:
+ if (TARGET_AVX512VL)
+ {
+ tmp = "p<logic><ssemodesuffix>";
+ break;
+ }
+ default:
+ tmp = TARGET_AVX512VL ? "p<logic>q" : "p<logic>";
+ }
break;
case MODE_V16SF:
@@ -9192,22 +10523,22 @@
]
(const_string "<sseinsnmode>")))])
-(define_insn "avx512f_testm<mode>3<mask_scalar_merge_name>"
+(define_insn "<avx512>_testm<mode>3<mask_scalar_merge_name>"
[(set (match_operand:<avx512fmaskmode> 0 "register_operand" "=Yk")
(unspec:<avx512fmaskmode>
- [(match_operand:VI48_512 1 "register_operand" "v")
- (match_operand:VI48_512 2 "nonimmediate_operand" "vm")]
+ [(match_operand:VI_AVX512VL 1 "register_operand" "v")
+ (match_operand:VI_AVX512VL 2 "nonimmediate_operand" "vm")]
UNSPEC_TESTM))]
"TARGET_AVX512F"
"vptestm<ssemodesuffix>\t{%2, %1, %0<mask_scalar_merge_operand3>|%0<mask_scalar_merge_operand3>, %1, %2}"
[(set_attr "prefix" "evex")
(set_attr "mode" "<sseinsnmode>")])
-(define_insn "avx512f_testnm<mode>3<mask_scalar_merge_name>"
+(define_insn "<avx512>_testnm<mode>3<mask_scalar_merge_name>"
[(set (match_operand:<avx512fmaskmode> 0 "register_operand" "=Yk")
(unspec:<avx512fmaskmode>
- [(match_operand:VI48_512 1 "register_operand" "v")
- (match_operand:VI48_512 2 "nonimmediate_operand" "vm")]
+ [(match_operand:VI_AVX512VL 1 "register_operand" "v")
+ (match_operand:VI_AVX512VL 2 "nonimmediate_operand" "vm")]
UNSPEC_TESTNM))]
"TARGET_AVX512F"
"vptestnm<ssemodesuffix>\t{%2, %1, %0<mask_scalar_merge_operand3>|%0<mask_scalar_merge_operand3>, %1, %2}"
@@ -9232,63 +10563,107 @@
DONE;
})
-(define_insn "<sse2_avx2>_packsswb"
- [(set (match_operand:VI1_AVX2 0 "register_operand" "=x,x")
+(define_insn "<sse2_avx2>_packsswb<mask_name>"
+ [(set (match_operand:VI1_AVX2 0 "register_operand" "=x,v")
(vec_concat:VI1_AVX2
(ss_truncate:<ssehalfvecmode>
- (match_operand:<sseunpackmode> 1 "register_operand" "0,x"))
+ (match_operand:<sseunpackmode> 1 "register_operand" "0,v"))
(ss_truncate:<ssehalfvecmode>
- (match_operand:<sseunpackmode> 2 "nonimmediate_operand" "xm,xm"))))]
- "TARGET_SSE2"
+ (match_operand:<sseunpackmode> 2 "nonimmediate_operand" "xm,vm"))))]
+ "TARGET_SSE2 && <mask_mode512bit_condition>"
"@
packsswb\t{%2, %0|%0, %2}
- vpacksswb\t{%2, %1, %0|%0, %1, %2}"
+ vpacksswb\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}"
[(set_attr "isa" "noavx,avx")
(set_attr "type" "sselog")
(set_attr "prefix_data16" "1,*")
- (set_attr "prefix" "orig,vex")
+ (set_attr "prefix" "orig,maybe_evex")
(set_attr "mode" "<sseinsnmode>")])
-(define_insn "<sse2_avx2>_packssdw"
- [(set (match_operand:VI2_AVX2 0 "register_operand" "=x,x")
+(define_insn "<sse2_avx2>_packssdw<mask_name>"
+ [(set (match_operand:VI2_AVX2 0 "register_operand" "=x,v")
(vec_concat:VI2_AVX2
(ss_truncate:<ssehalfvecmode>
- (match_operand:<sseunpackmode> 1 "register_operand" "0,x"))
+ (match_operand:<sseunpackmode> 1 "register_operand" "0,v"))
(ss_truncate:<ssehalfvecmode>
- (match_operand:<sseunpackmode> 2 "nonimmediate_operand" "xm,xm"))))]
- "TARGET_SSE2"
+ (match_operand:<sseunpackmode> 2 "nonimmediate_operand" "xm,vm"))))]
+ "TARGET_SSE2 && <mask_mode512bit_condition>"
"@
packssdw\t{%2, %0|%0, %2}
- vpackssdw\t{%2, %1, %0|%0, %1, %2}"
+ vpackssdw\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}"
[(set_attr "isa" "noavx,avx")
(set_attr "type" "sselog")
(set_attr "prefix_data16" "1,*")
(set_attr "prefix" "orig,vex")
(set_attr "mode" "<sseinsnmode>")])
-(define_insn "<sse2_avx2>_packuswb"
- [(set (match_operand:VI1_AVX2 0 "register_operand" "=x,x")
+(define_insn "<sse2_avx2>_packuswb<mask_name>"
+ [(set (match_operand:VI1_AVX2 0 "register_operand" "=x,v")
(vec_concat:VI1_AVX2
(us_truncate:<ssehalfvecmode>
- (match_operand:<sseunpackmode> 1 "register_operand" "0,x"))
+ (match_operand:<sseunpackmode> 1 "register_operand" "0,v"))
(us_truncate:<ssehalfvecmode>
- (match_operand:<sseunpackmode> 2 "nonimmediate_operand" "xm,xm"))))]
- "TARGET_SSE2"
+ (match_operand:<sseunpackmode> 2 "nonimmediate_operand" "xm,vm"))))]
+ "TARGET_SSE2 && <mask_mode512bit_condition>"
"@
packuswb\t{%2, %0|%0, %2}
- vpackuswb\t{%2, %1, %0|%0, %1, %2}"
+ vpackuswb\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}"
[(set_attr "isa" "noavx,avx")
(set_attr "type" "sselog")
(set_attr "prefix_data16" "1,*")
(set_attr "prefix" "orig,vex")
(set_attr "mode" "<sseinsnmode>")])
-(define_insn "avx2_interleave_highv32qi"
- [(set (match_operand:V32QI 0 "register_operand" "=x")
+(define_insn "avx512bw_interleave_highv64qi<mask_name>"
+ [(set (match_operand:V64QI 0 "register_operand" "=v")
+ (vec_select:V64QI
+ (vec_concat:V128QI
+ (match_operand:V64QI 1 "register_operand" "v")
+ (match_operand:V64QI 2 "nonimmediate_operand" "vm"))
+ (parallel [(const_int 8) (const_int 72)
+ (const_int 9) (const_int 73)
+ (const_int 10) (const_int 74)
+ (const_int 11) (const_int 75)
+ (const_int 12) (const_int 76)
+ (const_int 13) (const_int 77)
+ (const_int 14) (const_int 78)
+ (const_int 15) (const_int 79)
+ (const_int 24) (const_int 88)
+ (const_int 25) (const_int 89)
+ (const_int 26) (const_int 90)
+ (const_int 27) (const_int 91)
+ (const_int 28) (const_int 92)
+ (const_int 29) (const_int 93)
+ (const_int 30) (const_int 94)
+ (const_int 31) (const_int 95)
+ (const_int 40) (const_int 104)
+ (const_int 41) (const_int 105)
+ (const_int 42) (const_int 106)
+ (const_int 43) (const_int 107)
+ (const_int 44) (const_int 108)
+ (const_int 45) (const_int 109)
+ (const_int 46) (const_int 110)
+ (const_int 47) (const_int 111)
+ (const_int 56) (const_int 120)
+ (const_int 57) (const_int 121)
+ (const_int 58) (const_int 122)
+ (const_int 59) (const_int 123)
+ (const_int 60) (const_int 124)
+ (const_int 61) (const_int 125)
+ (const_int 62) (const_int 126)
+ (const_int 63) (const_int 127)])))]
+ "TARGET_AVX512BW"
+ "vpunpckhbw\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}"
+ [(set_attr "type" "sselog")
+ (set_attr "prefix" "evex")
+ (set_attr "mode" "XI")])
+
+(define_insn "avx2_interleave_highv32qi<mask_name>"
+ [(set (match_operand:V32QI 0 "register_operand" "=v")
(vec_select:V32QI
(vec_concat:V64QI
- (match_operand:V32QI 1 "register_operand" "x")
- (match_operand:V32QI 2 "nonimmediate_operand" "xm"))
+ (match_operand:V32QI 1 "register_operand" "v")
+ (match_operand:V32QI 2 "nonimmediate_operand" "vm"))
(parallel [(const_int 8) (const_int 40)
(const_int 9) (const_int 41)
(const_int 10) (const_int 42)
@@ -9305,18 +10680,18 @@
(const_int 29) (const_int 61)
(const_int 30) (const_int 62)
(const_int 31) (const_int 63)])))]
- "TARGET_AVX2"
- "vpunpckhbw\t{%2, %1, %0|%0, %1, %2}"
+ "TARGET_AVX2 && <mask_mode512bit_condition>"
+ "vpunpckhbw\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}"
[(set_attr "type" "sselog")
- (set_attr "prefix" "vex")
+ (set_attr "prefix" "<mask_prefix>")
(set_attr "mode" "OI")])
-(define_insn "vec_interleave_highv16qi"
- [(set (match_operand:V16QI 0 "register_operand" "=x,x")
+(define_insn "vec_interleave_highv16qi<mask_name>"
+ [(set (match_operand:V16QI 0 "register_operand" "=x,v")
(vec_select:V16QI
(vec_concat:V32QI
- (match_operand:V16QI 1 "register_operand" "0,x")
- (match_operand:V16QI 2 "nonimmediate_operand" "xm,xm"))
+ (match_operand:V16QI 1 "register_operand" "0,v")
+ (match_operand:V16QI 2 "nonimmediate_operand" "xm,vm"))
(parallel [(const_int 8) (const_int 24)
(const_int 9) (const_int 25)
(const_int 10) (const_int 26)
@@ -9325,22 +10700,66 @@
(const_int 13) (const_int 29)
(const_int 14) (const_int 30)
(const_int 15) (const_int 31)])))]
- "TARGET_SSE2"
+ "TARGET_SSE2 && <mask_mode512bit_condition>"
"@
punpckhbw\t{%2, %0|%0, %2}
- vpunpckhbw\t{%2, %1, %0|%0, %1, %2}"
+ vpunpckhbw\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}"
[(set_attr "isa" "noavx,avx")
(set_attr "type" "sselog")
(set_attr "prefix_data16" "1,*")
- (set_attr "prefix" "orig,vex")
+ (set_attr "prefix" "orig,<mask_prefix>")
(set_attr "mode" "TI")])
-(define_insn "avx2_interleave_lowv32qi"
- [(set (match_operand:V32QI 0 "register_operand" "=x")
+(define_insn "avx512bw_interleave_lowv64qi<mask_name>"
+ [(set (match_operand:V64QI 0 "register_operand" "=v")
+ (vec_select:V64QI
+ (vec_concat:V128QI
+ (match_operand:V64QI 1 "register_operand" "v")
+ (match_operand:V64QI 2 "nonimmediate_operand" "vm"))
+ (parallel [(const_int 0) (const_int 64)
+ (const_int 1) (const_int 65)
+ (const_int 2) (const_int 66)
+ (const_int 3) (const_int 67)
+ (const_int 4) (const_int 68)
+ (const_int 5) (const_int 69)
+ (const_int 6) (const_int 70)
+ (const_int 7) (const_int 71)
+ (const_int 16) (const_int 80)
+ (const_int 17) (const_int 81)
+ (const_int 18) (const_int 82)
+ (const_int 19) (const_int 83)
+ (const_int 20) (const_int 84)
+ (const_int 21) (const_int 85)
+ (const_int 22) (const_int 86)
+ (const_int 23) (const_int 87)
+ (const_int 32) (const_int 96)
+ (const_int 33) (const_int 97)
+ (const_int 34) (const_int 98)
+ (const_int 35) (const_int 99)
+ (const_int 36) (const_int 100)
+ (const_int 37) (const_int 101)
+ (const_int 38) (const_int 102)
+ (const_int 39) (const_int 103)
+ (const_int 48) (const_int 112)
+ (const_int 49) (const_int 113)
+ (const_int 50) (const_int 114)
+ (const_int 51) (const_int 115)
+ (const_int 52) (const_int 116)
+ (const_int 53) (const_int 117)
+ (const_int 54) (const_int 118)
+ (const_int 55) (const_int 119)])))]
+ "TARGET_AVX512BW"
+ "vpunpcklbw\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}"
+ [(set_attr "type" "sselog")
+ (set_attr "prefix" "evex")
+ (set_attr "mode" "XI")])
+
+(define_insn "avx2_interleave_lowv32qi<mask_name>"
+ [(set (match_operand:V32QI 0 "register_operand" "=v")
(vec_select:V32QI
(vec_concat:V64QI
- (match_operand:V32QI 1 "register_operand" "x")
- (match_operand:V32QI 2 "nonimmediate_operand" "xm"))
+ (match_operand:V32QI 1 "register_operand" "v")
+ (match_operand:V32QI 2 "nonimmediate_operand" "vm"))
(parallel [(const_int 0) (const_int 32)
(const_int 1) (const_int 33)
(const_int 2) (const_int 34)
@@ -9357,18 +10776,18 @@
(const_int 21) (const_int 53)
(const_int 22) (const_int 54)
(const_int 23) (const_int 55)])))]
- "TARGET_AVX2"
- "vpunpcklbw\t{%2, %1, %0|%0, %1, %2}"
+ "TARGET_AVX2 && <mask_mode512bit_condition>"
+ "vpunpcklbw\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}"
[(set_attr "type" "sselog")
- (set_attr "prefix" "vex")
+ (set_attr "prefix" "maybe_vex")
(set_attr "mode" "OI")])
-(define_insn "vec_interleave_lowv16qi"
- [(set (match_operand:V16QI 0 "register_operand" "=x,x")
+(define_insn "vec_interleave_lowv16qi<mask_name>"
+ [(set (match_operand:V16QI 0 "register_operand" "=x,v")
(vec_select:V16QI
(vec_concat:V32QI
- (match_operand:V16QI 1 "register_operand" "0,x")
- (match_operand:V16QI 2 "nonimmediate_operand" "xm,xm"))
+ (match_operand:V16QI 1 "register_operand" "0,v")
+ (match_operand:V16QI 2 "nonimmediate_operand" "xm,vm"))
(parallel [(const_int 0) (const_int 16)
(const_int 1) (const_int 17)
(const_int 2) (const_int 18)
@@ -9377,22 +10796,50 @@
(const_int 5) (const_int 21)
(const_int 6) (const_int 22)
(const_int 7) (const_int 23)])))]
- "TARGET_SSE2"
+ "TARGET_SSE2 && <mask_mode512bit_condition>"
"@
punpcklbw\t{%2, %0|%0, %2}
- vpunpcklbw\t{%2, %1, %0|%0, %1, %2}"
+ vpunpcklbw\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}"
[(set_attr "isa" "noavx,avx")
(set_attr "type" "sselog")
(set_attr "prefix_data16" "1,*")
(set_attr "prefix" "orig,vex")
(set_attr "mode" "TI")])
-(define_insn "avx2_interleave_highv16hi"
- [(set (match_operand:V16HI 0 "register_operand" "=x")
+(define_insn "avx512bw_interleave_highv32hi<mask_name>"
+ [(set (match_operand:V32HI 0 "register_operand" "=v")
+ (vec_select:V32HI
+ (vec_concat:V64HI
+ (match_operand:V32HI 1 "register_operand" "v")
+ (match_operand:V32HI 2 "nonimmediate_operand" "vm"))
+ (parallel [(const_int 4) (const_int 36)
+ (const_int 5) (const_int 37)
+ (const_int 6) (const_int 38)
+ (const_int 7) (const_int 39)
+ (const_int 12) (const_int 44)
+ (const_int 13) (const_int 45)
+ (const_int 14) (const_int 46)
+ (const_int 15) (const_int 47)
+ (const_int 20) (const_int 52)
+ (const_int 21) (const_int 53)
+ (const_int 22) (const_int 54)
+ (const_int 23) (const_int 55)
+ (const_int 28) (const_int 60)
+ (const_int 29) (const_int 61)
+ (const_int 30) (const_int 62)
+ (const_int 31) (const_int 63)])))]
+ "TARGET_AVX512BW"
+ "vpunpckhwd\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}"
+ [(set_attr "type" "sselog")
+ (set_attr "prefix" "evex")
+ (set_attr "mode" "XI")])
+
+(define_insn "avx2_interleave_highv16hi<mask_name>"
+ [(set (match_operand:V16HI 0 "register_operand" "=v")
(vec_select:V16HI
(vec_concat:V32HI
- (match_operand:V16HI 1 "register_operand" "x")
- (match_operand:V16HI 2 "nonimmediate_operand" "xm"))
+ (match_operand:V16HI 1 "register_operand" "v")
+ (match_operand:V16HI 2 "nonimmediate_operand" "vm"))
(parallel [(const_int 4) (const_int 20)
(const_int 5) (const_int 21)
(const_int 6) (const_int 22)
@@ -9401,38 +10848,66 @@
(const_int 13) (const_int 29)
(const_int 14) (const_int 30)
(const_int 15) (const_int 31)])))]
- "TARGET_AVX2"
- "vpunpckhwd\t{%2, %1, %0|%0, %1, %2}"
+ "TARGET_AVX2 && <mask_mode512bit_condition>"
+ "vpunpckhwd\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}"
[(set_attr "type" "sselog")
- (set_attr "prefix" "vex")
+ (set_attr "prefix" "maybe_evex")
(set_attr "mode" "OI")])
-(define_insn "vec_interleave_highv8hi"
- [(set (match_operand:V8HI 0 "register_operand" "=x,x")
+(define_insn "vec_interleave_highv8hi<mask_name>"
+ [(set (match_operand:V8HI 0 "register_operand" "=x,v")
(vec_select:V8HI
(vec_concat:V16HI
- (match_operand:V8HI 1 "register_operand" "0,x")
- (match_operand:V8HI 2 "nonimmediate_operand" "xm,xm"))
+ (match_operand:V8HI 1 "register_operand" "0,v")
+ (match_operand:V8HI 2 "nonimmediate_operand" "xm,vm"))
(parallel [(const_int 4) (const_int 12)
(const_int 5) (const_int 13)
(const_int 6) (const_int 14)
(const_int 7) (const_int 15)])))]
- "TARGET_SSE2"
+ "TARGET_SSE2 && <mask_mode512bit_condition>"
"@
punpckhwd\t{%2, %0|%0, %2}
- vpunpckhwd\t{%2, %1, %0|%0, %1, %2}"
+ vpunpckhwd\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}"
[(set_attr "isa" "noavx,avx")
(set_attr "type" "sselog")
(set_attr "prefix_data16" "1,*")
- (set_attr "prefix" "orig,vex")
+ (set_attr "prefix" "orig,maybe_vex")
(set_attr "mode" "TI")])
-(define_insn "avx2_interleave_lowv16hi"
- [(set (match_operand:V16HI 0 "register_operand" "=x")
+(define_insn "<mask_codefor>avx512bw_interleave_lowv32hi<mask_name>"
+ [(set (match_operand:V32HI 0 "register_operand" "=v")
+ (vec_select:V32HI
+ (vec_concat:V64HI
+ (match_operand:V32HI 1 "register_operand" "v")
+ (match_operand:V32HI 2 "nonimmediate_operand" "vm"))
+ (parallel [(const_int 0) (const_int 32)
+ (const_int 1) (const_int 33)
+ (const_int 2) (const_int 34)
+ (const_int 3) (const_int 35)
+ (const_int 8) (const_int 40)
+ (const_int 9) (const_int 41)
+ (const_int 10) (const_int 42)
+ (const_int 11) (const_int 43)
+ (const_int 16) (const_int 48)
+ (const_int 17) (const_int 49)
+ (const_int 18) (const_int 50)
+ (const_int 19) (const_int 51)
+ (const_int 24) (const_int 56)
+ (const_int 25) (const_int 57)
+ (const_int 26) (const_int 58)
+ (const_int 27) (const_int 59)])))]
+ "TARGET_AVX512BW"
+ "vpunpcklwd\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}"
+ [(set_attr "type" "sselog")
+ (set_attr "prefix" "evex")
+ (set_attr "mode" "XI")])
+
+(define_insn "avx2_interleave_lowv16hi<mask_name>"
+ [(set (match_operand:V16HI 0 "register_operand" "=v")
(vec_select:V16HI
(vec_concat:V32HI
- (match_operand:V16HI 1 "register_operand" "x")
- (match_operand:V16HI 2 "nonimmediate_operand" "xm"))
+ (match_operand:V16HI 1 "register_operand" "v")
+ (match_operand:V16HI 2 "nonimmediate_operand" "vm"))
(parallel [(const_int 0) (const_int 16)
(const_int 1) (const_int 17)
(const_int 2) (const_int 18)
@@ -9441,46 +10916,46 @@
(const_int 9) (const_int 25)
(const_int 10) (const_int 26)
(const_int 11) (const_int 27)])))]
- "TARGET_AVX2"
- "vpunpcklwd\t{%2, %1, %0|%0, %1, %2}"
+ "TARGET_AVX2 && <mask_mode512bit_condition>"
+ "vpunpcklwd\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}"
[(set_attr "type" "sselog")
- (set_attr "prefix" "vex")
+ (set_attr "prefix" "maybe_evex")
(set_attr "mode" "OI")])
-(define_insn "vec_interleave_lowv8hi"
- [(set (match_operand:V8HI 0 "register_operand" "=x,x")
+(define_insn "vec_interleave_lowv8hi<mask_name>"
+ [(set (match_operand:V8HI 0 "register_operand" "=x,v")
(vec_select:V8HI
(vec_concat:V16HI
- (match_operand:V8HI 1 "register_operand" "0,x")
- (match_operand:V8HI 2 "nonimmediate_operand" "xm,xm"))
+ (match_operand:V8HI 1 "register_operand" "0,v")
+ (match_operand:V8HI 2 "nonimmediate_operand" "xm,vm"))
(parallel [(const_int 0) (const_int 8)
(const_int 1) (const_int 9)
(const_int 2) (const_int 10)
(const_int 3) (const_int 11)])))]
- "TARGET_SSE2"
+ "TARGET_SSE2 && <mask_mode512bit_condition>"
"@
punpcklwd\t{%2, %0|%0, %2}
- vpunpcklwd\t{%2, %1, %0|%0, %1, %2}"
+ vpunpcklwd\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}"
[(set_attr "isa" "noavx,avx")
(set_attr "type" "sselog")
(set_attr "prefix_data16" "1,*")
- (set_attr "prefix" "orig,vex")
+ (set_attr "prefix" "orig,maybe_evex")
(set_attr "mode" "TI")])
-(define_insn "avx2_interleave_highv8si"
- [(set (match_operand:V8SI 0 "register_operand" "=x")
+(define_insn "avx2_interleave_highv8si<mask_name>"
+ [(set (match_operand:V8SI 0 "register_operand" "=v")
(vec_select:V8SI
(vec_concat:V16SI
- (match_operand:V8SI 1 "register_operand" "x")
- (match_operand:V8SI 2 "nonimmediate_operand" "xm"))
+ (match_operand:V8SI 1 "register_operand" "v")
+ (match_operand:V8SI 2 "nonimmediate_operand" "vm"))
(parallel [(const_int 2) (const_int 10)
(const_int 3) (const_int 11)
(const_int 6) (const_int 14)
(const_int 7) (const_int 15)])))]
- "TARGET_AVX2"
- "vpunpckhdq\t{%2, %1, %0|%0, %1, %2}"
+ "TARGET_AVX2 && <mask_mode512bit_condition>"
+ "vpunpckhdq\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}"
[(set_attr "type" "sselog")
- (set_attr "prefix" "vex")
+ (set_attr "prefix" "maybe_evex")
(set_attr "mode" "OI")])
(define_insn "<mask_codefor>avx512f_interleave_highv16si<mask_name>"
@@ -9504,38 +10979,38 @@
(set_attr "mode" "XI")])
-(define_insn "vec_interleave_highv4si"
- [(set (match_operand:V4SI 0 "register_operand" "=x,x")
+(define_insn "vec_interleave_highv4si<mask_name>"
+ [(set (match_operand:V4SI 0 "register_operand" "=x,v")
(vec_select:V4SI
(vec_concat:V8SI
- (match_operand:V4SI 1 "register_operand" "0,x")
- (match_operand:V4SI 2 "nonimmediate_operand" "xm,xm"))
+ (match_operand:V4SI 1 "register_operand" "0,v")
+ (match_operand:V4SI 2 "nonimmediate_operand" "xm,vm"))
(parallel [(const_int 2) (const_int 6)
(const_int 3) (const_int 7)])))]
- "TARGET_SSE2"
+ "TARGET_SSE2 && <mask_mode512bit_condition>"
"@
punpckhdq\t{%2, %0|%0, %2}
- vpunpckhdq\t{%2, %1, %0|%0, %1, %2}"
+ vpunpckhdq\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}"
[(set_attr "isa" "noavx,avx")
(set_attr "type" "sselog")
(set_attr "prefix_data16" "1,*")
- (set_attr "prefix" "orig,vex")
+ (set_attr "prefix" "orig,maybe_vex")
(set_attr "mode" "TI")])
-(define_insn "avx2_interleave_lowv8si"
- [(set (match_operand:V8SI 0 "register_operand" "=x")
+(define_insn "avx2_interleave_lowv8si<mask_name>"
+ [(set (match_operand:V8SI 0 "register_operand" "=v")
(vec_select:V8SI
(vec_concat:V16SI
- (match_operand:V8SI 1 "register_operand" "x")
- (match_operand:V8SI 2 "nonimmediate_operand" "xm"))
+ (match_operand:V8SI 1 "register_operand" "v")
+ (match_operand:V8SI 2 "nonimmediate_operand" "vm"))
(parallel [(const_int 0) (const_int 8)
(const_int 1) (const_int 9)
(const_int 4) (const_int 12)
(const_int 5) (const_int 13)])))]
- "TARGET_AVX2"
- "vpunpckldq\t{%2, %1, %0|%0, %1, %2}"
+ "TARGET_AVX2 && <mask_mode512bit_condition>"
+ "vpunpckldq\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}"
[(set_attr "type" "sselog")
- (set_attr "prefix" "vex")
+ (set_attr "prefix" "maybe_evex")
(set_attr "mode" "OI")])
(define_insn "<mask_codefor>avx512f_interleave_lowv16si<mask_name>"
@@ -9558,18 +11033,18 @@
(set_attr "prefix" "evex")
(set_attr "mode" "XI")])
-(define_insn "vec_interleave_lowv4si"
- [(set (match_operand:V4SI 0 "register_operand" "=x,x")
+(define_insn "vec_interleave_lowv4si<mask_name>"
+ [(set (match_operand:V4SI 0 "register_operand" "=x,v")
(vec_select:V4SI
(vec_concat:V8SI
- (match_operand:V4SI 1 "register_operand" "0,x")
- (match_operand:V4SI 2 "nonimmediate_operand" "xm,xm"))
+ (match_operand:V4SI 1 "register_operand" "0,v")
+ (match_operand:V4SI 2 "nonimmediate_operand" "xm,vm"))
(parallel [(const_int 0) (const_int 4)
(const_int 1) (const_int 5)])))]
- "TARGET_SSE2"
+ "TARGET_SSE2 && <mask_mode512bit_condition>"
"@
punpckldq\t{%2, %0|%0, %2}
- vpunpckldq\t{%2, %1, %0|%0, %1, %2}"
+ vpunpckldq\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}"
[(set_attr "isa" "noavx,avx")
(set_attr "type" "sselog")
(set_attr "prefix_data16" "1,*")
@@ -9678,80 +11153,64 @@
(set_attr "prefix" "orig,orig,vex,vex")
(set_attr "mode" "TI")])
-(define_expand "avx512f_vinsert<shuffletype>32x4_mask"
- [(match_operand:V16FI 0 "register_operand")
- (match_operand:V16FI 1 "register_operand")
+(define_expand "<extract_type>_vinsert<shuffletype><extract_suf>_mask"
+ [(match_operand:AVX512_VEC 0 "register_operand")
+ (match_operand:AVX512_VEC 1 "register_operand")
(match_operand:<ssequartermode> 2 "nonimmediate_operand")
(match_operand:SI 3 "const_0_to_3_operand")
- (match_operand:V16FI 4 "register_operand")
+ (match_operand:AVX512_VEC 4 "register_operand")
(match_operand:<avx512fmaskmode> 5 "register_operand")]
"TARGET_AVX512F"
{
- switch (INTVAL (operands[3]))
- {
- case 0:
- emit_insn (gen_avx512f_vinsert<shuffletype>32x4_1_mask (operands[0],
- operands[1], operands[2], GEN_INT (0xFFF), operands[4],
- operands[5]));
- break;
- case 1:
- emit_insn (gen_avx512f_vinsert<shuffletype>32x4_1_mask (operands[0],
- operands[1], operands[2], GEN_INT (0xF0FF), operands[4],
- operands[5]));
- break;
- case 2:
- emit_insn (gen_avx512f_vinsert<shuffletype>32x4_1_mask (operands[0],
- operands[1], operands[2], GEN_INT (0xFF0F), operands[4],
- operands[5]));
- break;
- case 3:
- emit_insn (gen_avx512f_vinsert<shuffletype>32x4_1_mask (operands[0],
- operands[1], operands[2], GEN_INT (0xFFF0), operands[4],
- operands[5]));
- break;
- default:
- gcc_unreachable ();
- }
+ int mask,selector;
+ mask = INTVAL (operands[3]);
+ selector = GET_MODE_SIZE (GET_MODE_INNER (<MODE>mode)) == 4 ?
+ 0xFFFF ^ (0xF000 >> mask * 4)
+ : 0xFF ^ (0xC0 >> mask * 2);
+ emit_insn (gen_<extract_type>_vinsert<shuffletype><extract_suf>_1_mask
+ (operands[0], operands[1], operands[2], GEN_INT (selector),
+ operands[4], operands[5]));
DONE;
-
})
-(define_insn "<mask_codefor>avx512f_vinsert<shuffletype>32x4_1<mask_name>"
- [(set (match_operand:V16FI 0 "register_operand" "=v")
- (vec_merge:V16FI
- (match_operand:V16FI 1 "register_operand" "v")
- (vec_duplicate:V16FI
+(define_insn "<mask_codefor><extract_type>_vinsert<shuffletype><extract_suf>_1<mask_name>"
+ [(set (match_operand:AVX512_VEC 0 "register_operand" "=v")
+ (vec_merge:AVX512_VEC
+ (match_operand:AVX512_VEC 1 "register_operand" "v")
+ (vec_duplicate:AVX512_VEC
(match_operand:<ssequartermode> 2 "nonimmediate_operand" "vm"))
(match_operand:SI 3 "const_int_operand" "n")))]
"TARGET_AVX512F"
{
int mask;
- if (INTVAL (operands[3]) == 0xFFF)
- mask = 0;
- else if ( INTVAL (operands[3]) == 0xF0FF)
- mask = 1;
- else if ( INTVAL (operands[3]) == 0xFF0F)
- mask = 2;
- else if ( INTVAL (operands[3]) == 0xFFF0)
- mask = 3;
+ int selector = INTVAL (operands[3]);
+
+ if (selector == 0xFFF || selector == 0x3F)
+ mask = 0;
+ else if ( selector == 0xF0FF || selector == 0xCF)
+ mask = 1;
+ else if ( selector == 0xFF0F || selector == 0xF3)
+ mask = 2;
+ else if ( selector == 0xFFF0 || selector == 0xFC)
+ mask = 3;
else
gcc_unreachable ();
operands[3] = GEN_INT (mask);
- return "vinsert<shuffletype>32x4\t{%3, %2, %1, %0<mask_operand4>|%0<mask_operand4>, %1, %2, %3}";
+ return "vinsert<shuffletype><extract_suf>\t{%3, %2, %1, %0<mask_operand4>|%0<mask_operand4>, %1, %2, %3}";
}
[(set_attr "type" "sselog")
(set_attr "length_immediate" "1")
(set_attr "prefix" "evex")
(set_attr "mode" "<sseinsnmode>")])
-(define_expand "avx512f_vinsert<shuffletype>64x4_mask"
- [(match_operand:V8FI 0 "register_operand")
- (match_operand:V8FI 1 "register_operand")
+(define_expand "<extract_type_2>_vinsert<shuffletype><extract_suf_2>_mask"
+ [(match_operand:AVX512_VEC_2 0 "register_operand")
+ (match_operand:AVX512_VEC_2 1 "register_operand")
(match_operand:<ssehalfvecmode> 2 "nonimmediate_operand")
(match_operand:SI 3 "const_0_to_1_operand")
- (match_operand:V8FI 4 "register_operand")
+ (match_operand:AVX512_VEC_2 4 "register_operand")
(match_operand:<avx512fmaskmode> 5 "register_operand")]
"TARGET_AVX512F"
{
@@ -9768,6 +11227,40 @@
})
(define_insn "vec_set_lo_<mode><mask_name>"
+ [(set (match_operand:V16FI 0 "register_operand" "=v")
+ (vec_concat:V16FI
+ (match_operand:<ssehalfvecmode> 2 "nonimmediate_operand" "vm")
+ (vec_select:<ssehalfvecmode>
+ (match_operand:V16FI 1 "register_operand" "v")
+ (parallel [(const_int 8) (const_int 9)
+ (const_int 10) (const_int 11)
+ (const_int 12) (const_int 13)
+ (const_int 14) (const_int 15)]))))]
+ "TARGET_AVX512DQ"
+ "vinsert<shuffletype>32x8\t{$0x0, %2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2, $0x0}"
+ [(set_attr "type" "sselog")
+ (set_attr "length_immediate" "1")
+ (set_attr "prefix" "evex")
+ (set_attr "mode" "<sseinsnmode>")])
+
+(define_insn "vec_set_hi_<mode><mask_name>"
+ [(set (match_operand:V16FI 0 "register_operand" "=v")
+ (vec_concat:V16FI
+ (match_operand:<ssehalfvecmode> 2 "nonimmediate_operand" "vm")
+ (vec_select:<ssehalfvecmode>
+ (match_operand:V16FI 1 "register_operand" "v")
+ (parallel [(const_int 0) (const_int 1)
+ (const_int 2) (const_int 3)
+ (const_int 4) (const_int 5)
+ (const_int 6) (const_int 7)]))))]
+ "TARGET_AVX512DQ"
+ "vinsert<shuffletype>32x8\t{$0x1, %2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2, $0x1}"
+ [(set_attr "type" "sselog")
+ (set_attr "length_immediate" "1")
+ (set_attr "prefix" "evex")
+ (set_attr "mode" "<sseinsnmode>")])
+
+(define_insn "vec_set_lo_<mode><mask_name>"
[(set (match_operand:V8FI 0 "register_operand" "=v")
(vec_concat:V8FI
(match_operand:<ssehalfvecmode> 2 "nonimmediate_operand" "vm")
@@ -9797,6 +11290,51 @@
(set_attr "prefix" "evex")
(set_attr "mode" "XI")])
+(define_expand "avx512dq_shuf_<shuffletype>64x2_mask"
+ [(match_operand:VI8F_256 0 "register_operand")
+ (match_operand:VI8F_256 1 "register_operand")
+ (match_operand:VI8F_256 2 "nonimmediate_operand")
+ (match_operand:SI 3 "const_0_to_3_operand")
+ (match_operand:VI8F_256 4 "register_operand")
+ (match_operand:QI 5 "register_operand")]
+ "TARGET_AVX512DQ"
+{
+ int mask = INTVAL (operands[3]);
+ emit_insn (gen_avx512dq_shuf_<shuffletype>64x2_1_mask
+ (operands[0], operands[1], operands[2],
+ GEN_INT (((mask >> 0) & 1) * 2 + 0),
+ GEN_INT (((mask >> 0) & 1) * 2 + 1),
+ GEN_INT (((mask >> 1) & 1) * 2 + 4),
+ GEN_INT (((mask >> 1) & 1) * 2 + 5),
+ operands[4], operands[5]));
+ DONE;
+})
+
+(define_insn "<mask_codefor>avx512dq_shuf_<shuffletype>64x2_1<mask_name>"
+ [(set (match_operand:VI8F_256 0 "register_operand" "=v")
+ (vec_select:VI8F_256
+ (vec_concat:<ssedoublemode>
+ (match_operand:VI8F_256 1 "register_operand" "v")
+ (match_operand:VI8F_256 2 "nonimmediate_operand" "vm"))
+ (parallel [(match_operand 3 "const_0_to_3_operand")
+ (match_operand 4 "const_0_to_3_operand")
+ (match_operand 5 "const_4_to_7_operand")
+ (match_operand 6 "const_4_to_7_operand")])))]
+ "TARGET_AVX512VL
+ && (INTVAL (operands[3]) == (INTVAL (operands[4]) - 1)
+ && INTVAL (operands[5]) == (INTVAL (operands[6]) - 1))"
+{
+ int mask;
+ mask = INTVAL (operands[3]) / 2;
+ mask |= (INTVAL (operands[5]) - 4) / 2 << 1;
+ operands[3] = GEN_INT (mask);
+ return "vshuf<shuffletype>64x2\t{%3, %2, %1, %0<mask_operand7>|%0<mask_operand7>, %1, %2, %3}";
+}
+ [(set_attr "type" "sselog")
+ (set_attr "length_immediate" "1")
+ (set_attr "prefix" "evex")
+ (set_attr "mode" "XI")])
+
(define_expand "avx512f_shuf_<shuffletype>64x2_mask"
[(match_operand:V8FI 0 "register_operand")
(match_operand:V8FI 1 "register_operand")
@@ -9855,6 +11393,64 @@
(set_attr "prefix" "evex")
(set_attr "mode" "<sseinsnmode>")])
+(define_expand "avx512vl_shuf_<shuffletype>32x4_mask"
+ [(match_operand:VI4F_256 0 "register_operand")
+ (match_operand:VI4F_256 1 "register_operand")
+ (match_operand:VI4F_256 2 "nonimmediate_operand")
+ (match_operand:SI 3 "const_0_to_3_operand")
+ (match_operand:VI4F_256 4 "register_operand")
+ (match_operand:QI 5 "register_operand")]
+ "TARGET_AVX512VL"
+{
+ int mask = INTVAL (operands[3]);
+ emit_insn (gen_avx512vl_shuf_<shuffletype>32x4_1_mask
+ (operands[0], operands[1], operands[2],
+ GEN_INT (((mask >> 0) & 1) * 4 + 0),
+ GEN_INT (((mask >> 0) & 1) * 4 + 1),
+ GEN_INT (((mask >> 0) & 1) * 4 + 2),
+ GEN_INT (((mask >> 0) & 1) * 4 + 3),
+ GEN_INT (((mask >> 1) & 1) * 4 + 8),
+ GEN_INT (((mask >> 1) & 1) * 4 + 9),
+ GEN_INT (((mask >> 1) & 1) * 4 + 10),
+ GEN_INT (((mask >> 1) & 1) * 4 + 11),
+ operands[4], operands[5]));
+ DONE;
+})
+
+(define_insn "<mask_codefor>avx512vl_shuf_<shuffletype>32x4_1<mask_name>"
+ [(set (match_operand:VI4F_256 0 "register_operand" "=v")
+ (vec_select:VI4F_256
+ (vec_concat:<ssedoublemode>
+ (match_operand:VI4F_256 1 "register_operand" "v")
+ (match_operand:VI4F_256 2 "nonimmediate_operand" "vm"))
+ (parallel [(match_operand 3 "const_0_to_7_operand")
+ (match_operand 4 "const_0_to_7_operand")
+ (match_operand 5 "const_0_to_7_operand")
+ (match_operand 6 "const_0_to_7_operand")
+ (match_operand 7 "const_8_to_15_operand")
+ (match_operand 8 "const_8_to_15_operand")
+ (match_operand 9 "const_8_to_15_operand")
+ (match_operand 10 "const_8_to_15_operand")])))]
+ "TARGET_AVX512VL
+ && (INTVAL (operands[3]) == (INTVAL (operands[4]) - 1)
+ && INTVAL (operands[3]) == (INTVAL (operands[5]) - 2)
+ && INTVAL (operands[3]) == (INTVAL (operands[6]) - 3)
+ && INTVAL (operands[7]) == (INTVAL (operands[8]) - 1)
+ && INTVAL (operands[7]) == (INTVAL (operands[9]) - 2)
+ && INTVAL (operands[7]) == (INTVAL (operands[10]) - 3))"
+{
+ int mask;
+ mask = INTVAL (operands[3]) / 4;
+ mask |= (INTVAL (operands[7]) - 8) / 4 << 1;
+ operands[3] = GEN_INT (mask);
+
+ return "vshuf<shuffletype>32x4\t{%3, %2, %1, %0<mask_operand11>|%0<mask_operand11>, %1, %2, %3}";
+}
+ [(set_attr "type" "sselog")
+ (set_attr "length_immediate" "1")
+ (set_attr "prefix" "evex")
+ (set_attr "mode" "<sseinsnmode>")])
+
(define_expand "avx512f_shuf_<shuffletype>32x4_mask"
[(match_operand:V16FI 0 "register_operand")
(match_operand:V16FI 1 "register_operand")
@@ -10015,6 +11611,28 @@
(set_attr "length_immediate" "1")
(set_attr "mode" "XI")])
+(define_expand "avx512vl_pshufdv3_mask"
+ [(match_operand:V8SI 0 "register_operand")
+ (match_operand:V8SI 1 "nonimmediate_operand")
+ (match_operand:SI 2 "const_0_to_255_operand")
+ (match_operand:V8SI 3 "register_operand")
+ (match_operand:QI 4 "register_operand")]
+ "TARGET_AVX512VL"
+{
+ int mask = INTVAL (operands[2]);
+ emit_insn (gen_avx2_pshufd_1_mask (operands[0], operands[1],
+ GEN_INT ((mask >> 0) & 3),
+ GEN_INT ((mask >> 2) & 3),
+ GEN_INT ((mask >> 4) & 3),
+ GEN_INT ((mask >> 6) & 3),
+ GEN_INT (((mask >> 0) & 3) + 4),
+ GEN_INT (((mask >> 2) & 3) + 4),
+ GEN_INT (((mask >> 4) & 3) + 4),
+ GEN_INT (((mask >> 6) & 3) + 4),
+ operands[3], operands[4]));
+ DONE;
+})
+
(define_expand "avx2_pshufdv3"
[(match_operand:V8SI 0 "register_operand")
(match_operand:V8SI 1 "nonimmediate_operand")
@@ -10034,10 +11652,10 @@
DONE;
})
-(define_insn "avx2_pshufd_1"
- [(set (match_operand:V8SI 0 "register_operand" "=x")
+(define_insn "avx2_pshufd_1<mask_name>"
+ [(set (match_operand:V8SI 0 "register_operand" "=v")
(vec_select:V8SI
- (match_operand:V8SI 1 "nonimmediate_operand" "xm")
+ (match_operand:V8SI 1 "nonimmediate_operand" "vm")
(parallel [(match_operand 2 "const_0_to_3_operand")
(match_operand 3 "const_0_to_3_operand")
(match_operand 4 "const_0_to_3_operand")
@@ -10050,7 +11668,8 @@
&& INTVAL (operands[2]) + 4 == INTVAL (operands[6])
&& INTVAL (operands[3]) + 4 == INTVAL (operands[7])
&& INTVAL (operands[4]) + 4 == INTVAL (operands[8])
- && INTVAL (operands[5]) + 4 == INTVAL (operands[9])"
+ && INTVAL (operands[5]) + 4 == INTVAL (operands[9])
+ && <mask_mode512bit_condition>"
{
int mask = 0;
mask |= INTVAL (operands[2]) << 0;
@@ -10059,13 +11678,31 @@
mask |= INTVAL (operands[5]) << 6;
operands[2] = GEN_INT (mask);
- return "vpshufd\t{%2, %1, %0|%0, %1, %2}";
+ return "vpshufd\t{%2, %1, %0<mask_operand10>|%0<mask_operand10>, %1, %2}";
}
[(set_attr "type" "sselog1")
- (set_attr "prefix" "vex")
+ (set_attr "prefix" "maybe_evex")
(set_attr "length_immediate" "1")
(set_attr "mode" "OI")])
+(define_expand "avx512vl_pshufd_mask"
+ [(match_operand:V4SI 0 "register_operand")
+ (match_operand:V4SI 1 "nonimmediate_operand")
+ (match_operand:SI 2 "const_0_to_255_operand")
+ (match_operand:V4SI 3 "register_operand")
+ (match_operand:QI 4 "register_operand")]
+ "TARGET_AVX512VL"
+{
+ int mask = INTVAL (operands[2]);
+ emit_insn (gen_sse2_pshufd_1_mask (operands[0], operands[1],
+ GEN_INT ((mask >> 0) & 3),
+ GEN_INT ((mask >> 2) & 3),
+ GEN_INT ((mask >> 4) & 3),
+ GEN_INT ((mask >> 6) & 3),
+ operands[3], operands[4]));
+ DONE;
+})
+
(define_expand "sse2_pshufd"
[(match_operand:V4SI 0 "register_operand")
(match_operand:V4SI 1 "nonimmediate_operand")
@@ -10081,15 +11718,15 @@
DONE;
})
-(define_insn "sse2_pshufd_1"
- [(set (match_operand:V4SI 0 "register_operand" "=x")
+(define_insn "sse2_pshufd_1<mask_name>"
+ [(set (match_operand:V4SI 0 "register_operand" "=v")
(vec_select:V4SI
- (match_operand:V4SI 1 "nonimmediate_operand" "xm")
+ (match_operand:V4SI 1 "nonimmediate_operand" "vm")
(parallel [(match_operand 2 "const_0_to_3_operand")
(match_operand 3 "const_0_to_3_operand")
(match_operand 4 "const_0_to_3_operand")
(match_operand 5 "const_0_to_3_operand")])))]
- "TARGET_SSE2"
+ "TARGET_SSE2 && <mask_mode512bit_condition>"
{
int mask = 0;
mask |= INTVAL (operands[2]) << 0;
@@ -10098,14 +11735,48 @@
mask |= INTVAL (operands[5]) << 6;
operands[2] = GEN_INT (mask);
- return "%vpshufd\t{%2, %1, %0|%0, %1, %2}";
+ return "%vpshufd\t{%2, %1, %0<mask_operand6>|%0<mask_operand6>, %1, %2}";
}
[(set_attr "type" "sselog1")
(set_attr "prefix_data16" "1")
- (set_attr "prefix" "maybe_vex")
+ (set_attr "prefix" "<mask_prefix2>")
(set_attr "length_immediate" "1")
(set_attr "mode" "TI")])
+(define_insn "<mask_codefor>avx512bw_pshuflwv32hi<mask_name>"
+ [(set (match_operand:V32HI 0 "register_operand" "=v")
+ (unspec:V32HI
+ [(match_operand:V32HI 1 "nonimmediate_operand" "vm")
+ (match_operand:SI 2 "const_0_to_255_operand" "n")]
+ UNSPEC_PSHUFLW))]
+ "TARGET_AVX512BW"
+ "vpshuflw\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}"
+ [(set_attr "type" "sselog")
+ (set_attr "prefix" "evex")
+ (set_attr "mode" "XI")])
+
+(define_expand "avx512vl_pshuflwv3_mask"
+ [(match_operand:V16HI 0 "register_operand")
+ (match_operand:V16HI 1 "nonimmediate_operand")
+ (match_operand:SI 2 "const_0_to_255_operand")
+ (match_operand:V16HI 3 "register_operand")
+ (match_operand:HI 4 "register_operand")]
+ "TARGET_AVX512VL"
+{
+ int mask = INTVAL (operands[2]);
+ emit_insn (gen_avx2_pshuflw_1_mask (operands[0], operands[1],
+ GEN_INT ((mask >> 0) & 3),
+ GEN_INT ((mask >> 2) & 3),
+ GEN_INT ((mask >> 4) & 3),
+ GEN_INT ((mask >> 6) & 3),
+ GEN_INT (((mask >> 0) & 3) + 8),
+ GEN_INT (((mask >> 2) & 3) + 8),
+ GEN_INT (((mask >> 4) & 3) + 8),
+ GEN_INT (((mask >> 6) & 3) + 8),
+ operands[3], operands[4]));
+ DONE;
+})
+
(define_expand "avx2_pshuflwv3"
[(match_operand:V16HI 0 "register_operand")
(match_operand:V16HI 1 "nonimmediate_operand")
@@ -10125,10 +11796,10 @@
DONE;
})
-(define_insn "avx2_pshuflw_1"
- [(set (match_operand:V16HI 0 "register_operand" "=x")
+(define_insn "avx2_pshuflw_1<mask_name>"
+ [(set (match_operand:V16HI 0 "register_operand" "=v")
(vec_select:V16HI
- (match_operand:V16HI 1 "nonimmediate_operand" "xm")
+ (match_operand:V16HI 1 "nonimmediate_operand" "vm")
(parallel [(match_operand 2 "const_0_to_3_operand")
(match_operand 3 "const_0_to_3_operand")
(match_operand 4 "const_0_to_3_operand")
@@ -10149,7 +11820,8 @@
&& INTVAL (operands[2]) + 8 == INTVAL (operands[6])
&& INTVAL (operands[3]) + 8 == INTVAL (operands[7])
&& INTVAL (operands[4]) + 8 == INTVAL (operands[8])
- && INTVAL (operands[5]) + 8 == INTVAL (operands[9])"
+ && INTVAL (operands[5]) + 8 == INTVAL (operands[9])
+ && <mask_mode512bit_condition>"
{
int mask = 0;
mask |= INTVAL (operands[2]) << 0;
@@ -10158,13 +11830,31 @@
mask |= INTVAL (operands[5]) << 6;
operands[2] = GEN_INT (mask);
- return "vpshuflw\t{%2, %1, %0|%0, %1, %2}";
+ return "vpshuflw\t{%2, %1, %0<mask_operand10>|%0<mask_operand10>, %1, %2}";
}
[(set_attr "type" "sselog")
- (set_attr "prefix" "vex")
+ (set_attr "prefix" "maybe_evex")
(set_attr "length_immediate" "1")
(set_attr "mode" "OI")])
+(define_expand "avx512vl_pshuflw_mask"
+ [(match_operand:V8HI 0 "register_operand")
+ (match_operand:V8HI 1 "nonimmediate_operand")
+ (match_operand:SI 2 "const_0_to_255_operand")
+ (match_operand:V8HI 3 "register_operand")
+ (match_operand:QI 4 "register_operand")]
+ "TARGET_AVX512VL"
+{
+ int mask = INTVAL (operands[2]);
+ emit_insn (gen_sse2_pshuflw_1_mask (operands[0], operands[1],
+ GEN_INT ((mask >> 0) & 3),
+ GEN_INT ((mask >> 2) & 3),
+ GEN_INT ((mask >> 4) & 3),
+ GEN_INT ((mask >> 6) & 3),
+ operands[3], operands[4]));
+ DONE;
+})
+
(define_expand "sse2_pshuflw"
[(match_operand:V8HI 0 "register_operand")
(match_operand:V8HI 1 "nonimmediate_operand")
@@ -10180,10 +11870,10 @@
DONE;
})
-(define_insn "sse2_pshuflw_1"
- [(set (match_operand:V8HI 0 "register_operand" "=x")
+(define_insn "sse2_pshuflw_1<mask_name>"
+ [(set (match_operand:V8HI 0 "register_operand" "=v")
(vec_select:V8HI
- (match_operand:V8HI 1 "nonimmediate_operand" "xm")
+ (match_operand:V8HI 1 "nonimmediate_operand" "vm")
(parallel [(match_operand 2 "const_0_to_3_operand")
(match_operand 3 "const_0_to_3_operand")
(match_operand 4 "const_0_to_3_operand")
@@ -10192,7 +11882,7 @@
(const_int 5)
(const_int 6)
(const_int 7)])))]
- "TARGET_SSE2"
+ "TARGET_SSE2 && <mask_mode512bit_condition>"
{
int mask = 0;
mask |= INTVAL (operands[2]) << 0;
@@ -10201,7 +11891,7 @@
mask |= INTVAL (operands[5]) << 6;
operands[2] = GEN_INT (mask);
- return "%vpshuflw\t{%2, %1, %0|%0, %1, %2}";
+ return "%vpshuflw\t{%2, %1, %0<mask_operand6>|%0<mask_operand6>, %1, %2}";
}
[(set_attr "type" "sselog")
(set_attr "prefix_data16" "0")
@@ -10229,10 +11919,44 @@
DONE;
})
-(define_insn "avx2_pshufhw_1"
- [(set (match_operand:V16HI 0 "register_operand" "=x")
+(define_insn "<mask_codefor>avx512bw_pshufhwv32hi<mask_name>"
+ [(set (match_operand:V32HI 0 "register_operand" "=v")
+ (unspec:V32HI
+ [(match_operand:V32HI 1 "nonimmediate_operand" "vm")
+ (match_operand:SI 2 "const_0_to_255_operand" "n")]
+ UNSPEC_PSHUFHW))]
+ "TARGET_AVX512BW"
+ "vpshufhw\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}"
+ [(set_attr "type" "sselog")
+ (set_attr "prefix" "evex")
+ (set_attr "mode" "XI")])
+
+(define_expand "avx512vl_pshufhwv3_mask"
+ [(match_operand:V16HI 0 "register_operand")
+ (match_operand:V16HI 1 "nonimmediate_operand")
+ (match_operand:SI 2 "const_0_to_255_operand")
+ (match_operand:V16HI 3 "register_operand")
+ (match_operand:HI 4 "register_operand")]
+ "TARGET_AVX512VL"
+{
+ int mask = INTVAL (operands[2]);
+ emit_insn (gen_avx2_pshufhw_1_mask (operands[0], operands[1],
+ GEN_INT (((mask >> 0) & 3) + 4),
+ GEN_INT (((mask >> 2) & 3) + 4),
+ GEN_INT (((mask >> 4) & 3) + 4),
+ GEN_INT (((mask >> 6) & 3) + 4),
+ GEN_INT (((mask >> 0) & 3) + 12),
+ GEN_INT (((mask >> 2) & 3) + 12),
+ GEN_INT (((mask >> 4) & 3) + 12),
+ GEN_INT (((mask >> 6) & 3) + 12),
+ operands[3], operands[4]));
+ DONE;
+})
+
+(define_insn "avx2_pshufhw_1<mask_name>"
+ [(set (match_operand:V16HI 0 "register_operand" "=v")
(vec_select:V16HI
- (match_operand:V16HI 1 "nonimmediate_operand" "xm")
+ (match_operand:V16HI 1 "nonimmediate_operand" "vm")
(parallel [(const_int 0)
(const_int 1)
(const_int 2)
@@ -10253,7 +11977,8 @@
&& INTVAL (operands[2]) + 8 == INTVAL (operands[6])
&& INTVAL (operands[3]) + 8 == INTVAL (operands[7])
&& INTVAL (operands[4]) + 8 == INTVAL (operands[8])
- && INTVAL (operands[5]) + 8 == INTVAL (operands[9])"
+ && INTVAL (operands[5]) + 8 == INTVAL (operands[9])
+ && <mask_mode512bit_condition>"
{
int mask = 0;
mask |= (INTVAL (operands[2]) - 4) << 0;
@@ -10262,13 +11987,31 @@
mask |= (INTVAL (operands[5]) - 4) << 6;
operands[2] = GEN_INT (mask);
- return "vpshufhw\t{%2, %1, %0|%0, %1, %2}";
+ return "vpshufhw\t{%2, %1, %0<mask_operand10>|%0<mask_operand10>, %1, %2}";
}
[(set_attr "type" "sselog")
- (set_attr "prefix" "vex")
+ (set_attr "prefix" "maybe_evex")
(set_attr "length_immediate" "1")
(set_attr "mode" "OI")])
+(define_expand "avx512vl_pshufhw_mask"
+ [(match_operand:V8HI 0 "register_operand")
+ (match_operand:V8HI 1 "nonimmediate_operand")
+ (match_operand:SI 2 "const_0_to_255_operand")
+ (match_operand:V8HI 3 "register_operand")
+ (match_operand:QI 4 "register_operand")]
+ "TARGET_AVX512VL"
+{
+ int mask = INTVAL (operands[2]);
+ emit_insn (gen_sse2_pshufhw_1_mask (operands[0], operands[1],
+ GEN_INT (((mask >> 0) & 3) + 4),
+ GEN_INT (((mask >> 2) & 3) + 4),
+ GEN_INT (((mask >> 4) & 3) + 4),
+ GEN_INT (((mask >> 6) & 3) + 4),
+ operands[3], operands[4]));
+ DONE;
+})
+
(define_expand "sse2_pshufhw"
[(match_operand:V8HI 0 "register_operand")
(match_operand:V8HI 1 "nonimmediate_operand")
@@ -10284,10 +12027,10 @@
DONE;
})
-(define_insn "sse2_pshufhw_1"
- [(set (match_operand:V8HI 0 "register_operand" "=x")
+(define_insn "sse2_pshufhw_1<mask_name>"
+ [(set (match_operand:V8HI 0 "register_operand" "=v")
(vec_select:V8HI
- (match_operand:V8HI 1 "nonimmediate_operand" "xm")
+ (match_operand:V8HI 1 "nonimmediate_operand" "vm")
(parallel [(const_int 0)
(const_int 1)
(const_int 2)
@@ -10296,7 +12039,7 @@
(match_operand 3 "const_4_to_7_operand")
(match_operand 4 "const_4_to_7_operand")
(match_operand 5 "const_4_to_7_operand")])))]
- "TARGET_SSE2"
+ "TARGET_SSE2 && <mask_mode512bit_condition>"
{
int mask = 0;
mask |= (INTVAL (operands[2]) - 4) << 0;
@@ -10305,7 +12048,7 @@
mask |= (INTVAL (operands[5]) - 4) << 6;
operands[2] = GEN_INT (mask);
- return "%vpshufhw\t{%2, %1, %0|%0, %1, %2}";
+ return "%vpshufhw\t{%2, %1, %0<mask_operand6>|%0<mask_operand6>, %1, %2}";
}
[(set_attr "type" "sselog")
(set_attr "prefix_rep" "1")
@@ -10717,7 +12460,7 @@
;;
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
-(define_expand "<sse2_avx2>_uavg<mode>3"
+(define_expand "<sse2_avx2>_uavg<mode>3<mask_name>"
[(set (match_operand:VI12_AVX2 0 "register_operand")
(truncate:VI12_AVX2
(lshiftrt:<ssedoublemode>
@@ -10727,43 +12470,54 @@
(match_operand:VI12_AVX2 1 "nonimmediate_operand"))
(zero_extend:<ssedoublemode>
(match_operand:VI12_AVX2 2 "nonimmediate_operand")))
- (match_dup 3))
+ (match_dup <mask_expand_op3>))
(const_int 1))))]
- "TARGET_SSE2"
+ "TARGET_SSE2 && <mask_mode512bit_condition>"
{
+ rtx tmp;
+ if (<mask_applied>)
+ tmp = operands[3];
operands[3] = CONST1_RTX(<MODE>mode);
ix86_fixup_binary_operands_no_copy (PLUS, <MODE>mode, operands);
+
+ if (<mask_applied>)
+ {
+ operands[5] = operands[3];
+ operands[3] = tmp;
+ }
})
-(define_insn "*<sse2_avx2>_uavg<mode>3"
- [(set (match_operand:VI12_AVX2 0 "register_operand" "=x,x")
+(define_insn "*<sse2_avx2>_uavg<mode>3<mask_name>"
+ [(set (match_operand:VI12_AVX2 0 "register_operand" "=x,v")
(truncate:VI12_AVX2
(lshiftrt:<ssedoublemode>
(plus:<ssedoublemode>
(plus:<ssedoublemode>
(zero_extend:<ssedoublemode>
- (match_operand:VI12_AVX2 1 "nonimmediate_operand" "%0,x"))
+ (match_operand:VI12_AVX2 1 "nonimmediate_operand" "%0,v"))
(zero_extend:<ssedoublemode>
- (match_operand:VI12_AVX2 2 "nonimmediate_operand" "xm,xm")))
- (match_operand:VI12_AVX2 3 "const1_operand"))
+ (match_operand:VI12_AVX2 2 "nonimmediate_operand" "xm,vm")))
+ (match_operand:VI12_AVX2 <mask_expand_op3> "const1_operand"))
(const_int 1))))]
- "TARGET_SSE2 && ix86_binary_operator_ok (PLUS, <MODE>mode, operands)"
+ "TARGET_SSE2
+ && ix86_binary_operator_ok (PLUS, <MODE>mode, operands)
+ && <mask_mode512bit_condition>"
"@
pavg<ssemodesuffix>\t{%2, %0|%0, %2}
- vpavg<ssemodesuffix>\t{%2, %1, %0|%0, %1, %2}"
+ vpavg<ssemodesuffix>\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}"
[(set_attr "isa" "noavx,avx")
(set_attr "type" "sseiadd")
(set_attr "prefix_data16" "1,*")
- (set_attr "prefix" "orig,vex")
+ (set_attr "prefix" "orig,<mask_prefix>")
(set_attr "mode" "<sseinsnmode>")])
;; The correct representation for this is absolutely enormous, and
;; surely not generally useful.
(define_insn "<sse2_avx2>_psadbw"
- [(set (match_operand:VI8_AVX2 0 "register_operand" "=x,x")
- (unspec:VI8_AVX2
- [(match_operand:<ssebytemode> 1 "register_operand" "0,x")
- (match_operand:<ssebytemode> 2 "nonimmediate_operand" "xm,xm")]
+ [(set (match_operand:VI8_AVX2_AVX512BW 0 "register_operand" "=x,v")
+ (unspec:VI8_AVX2_AVX512BW
+ [(match_operand:<ssebytemode> 1 "register_operand" "0,v")
+ (match_operand:<ssebytemode> 2 "nonimmediate_operand" "xm,vm")]
UNSPEC_PSADBW))]
"TARGET_SSE2"
"@
@@ -10773,7 +12527,7 @@
(set_attr "type" "sseiadd")
(set_attr "atom_unit" "simul")
(set_attr "prefix_data16" "1,*")
- (set_attr "prefix" "orig,vex")
+ (set_attr "prefix" "orig,maybe_evex")
(set_attr "mode" "<sseinsnmode>")])
(define_insn "<sse>_movmsk<ssemodesuffix><avxsizesuffix>"
@@ -11208,6 +12962,54 @@
(set_attr "prefix" "vex")
(set_attr "mode" "OI")])
+;;unspec version for intrinsics.
+(define_insn "avx512bw_pmaddubsw512<mode><mask_name>"
+ [(set (match_operand:VI2_AVX2 0 "register_operand" "=v")
+ (unspec:VI2_AVX2
+ [(match_operand:<dbpsadbwmode> 1 "register_operand" "v")
+ (match_operand:<dbpsadbwmode> 2 "nonimmediate_operand" "vm")]
+ UNSPEC_PMADDUBSW512))]
+ "TARGET_AVX512BW && <mask_mode512bit_condition>"
+ "vpmaddubsw\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}";
+ [(set_attr "type" "sseiadd")
+ (set_attr "prefix" "evex")
+ (set_attr "mode" "XI")])
+
+(define_insn "avx512bw_umulhrswv32hi3<mask_name>"
+ [(set (match_operand:V32HI 0 "register_operand" "=v")
+ (truncate:V32HI
+ (lshiftrt:V32SI
+ (plus:V32SI
+ (lshiftrt:V32SI
+ (mult:V32SI
+ (sign_extend:V32SI
+ (match_operand:V32HI 1 "nonimmediate_operand" "%v"))
+ (sign_extend:V32SI
+ (match_operand:V32HI 2 "nonimmediate_operand" "vm")))
+ (const_int 14))
+ (const_vector:V32HI [(const_int 1) (const_int 1)
+ (const_int 1) (const_int 1)
+ (const_int 1) (const_int 1)
+ (const_int 1) (const_int 1)
+ (const_int 1) (const_int 1)
+ (const_int 1) (const_int 1)
+ (const_int 1) (const_int 1)
+ (const_int 1) (const_int 1)
+ (const_int 1) (const_int 1)
+ (const_int 1) (const_int 1)
+ (const_int 1) (const_int 1)
+ (const_int 1) (const_int 1)
+ (const_int 1) (const_int 1)
+ (const_int 1) (const_int 1)
+ (const_int 1) (const_int 1)
+ (const_int 1) (const_int 1)]))
+ (const_int 1))))]
+ "TARGET_AVX512BW"
+ "vpmulhrsw\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}"
+ [(set_attr "type" "sseimul")
+ (set_attr "prefix" "evex")
+ (set_attr "mode" "XI")])
+
(define_insn "ssse3_pmaddubsw128"
[(set (match_operand:V8HI 0 "register_operand" "=x,x")
(ss_plus:V8HI
@@ -11285,6 +13087,29 @@
(define_mode_iterator PMULHRSW
[V4HI V8HI (V16HI "TARGET_AVX2")])
+(define_expand "<ssse3_avx2>_pmulhrsw<mode>3_mask"
+ [(set (match_operand:PMULHRSW 0 "register_operand")
+ (vec_merge:PMULHRSW
+ (truncate:PMULHRSW
+ (lshiftrt:<ssedoublemode>
+ (plus:<ssedoublemode>
+ (lshiftrt:<ssedoublemode>
+ (mult:<ssedoublemode>
+ (sign_extend:<ssedoublemode>
+ (match_operand:PMULHRSW 1 "nonimmediate_operand"))
+ (sign_extend:<ssedoublemode>
+ (match_operand:PMULHRSW 2 "nonimmediate_operand")))
+ (const_int 14))
+ (match_dup 5))
+ (const_int 1)))
+ (match_operand:PMULHRSW 3 "register_operand")
+ (match_operand:<avx512fmaskmode> 4 "register_operand")))]
+ "TARGET_AVX512BW && TARGET_AVX512VL"
+{
+ operands[5] = CONST1_RTX(<MODE>mode);
+ ix86_fixup_binary_operands_no_copy (MULT, <MODE>mode, operands);
+})
+
(define_expand "<ssse3_avx2>_pmulhrsw<mode>3"
[(set (match_operand:PMULHRSW 0 "register_operand")
(truncate:PMULHRSW
@@ -11305,29 +13130,31 @@
ix86_fixup_binary_operands_no_copy (MULT, <MODE>mode, operands);
})
-(define_insn "*<ssse3_avx2>_pmulhrsw<mode>3"
- [(set (match_operand:VI2_AVX2 0 "register_operand" "=x,x")
+(define_insn "*<ssse3_avx2>_pmulhrsw<mode>3<mask_name>"
+ [(set (match_operand:VI2_AVX2 0 "register_operand" "=x,v")
(truncate:VI2_AVX2
(lshiftrt:<ssedoublemode>
(plus:<ssedoublemode>
(lshiftrt:<ssedoublemode>
(mult:<ssedoublemode>
(sign_extend:<ssedoublemode>
- (match_operand:VI2_AVX2 1 "nonimmediate_operand" "%0,x"))
+ (match_operand:VI2_AVX2 1 "nonimmediate_operand" "%0,v"))
(sign_extend:<ssedoublemode>
- (match_operand:VI2_AVX2 2 "nonimmediate_operand" "xm,xm")))
+ (match_operand:VI2_AVX2 2 "nonimmediate_operand" "xm,vm")))
(const_int 14))
(match_operand:VI2_AVX2 3 "const1_operand"))
(const_int 1))))]
- "TARGET_SSSE3 && ix86_binary_operator_ok (MULT, <MODE>mode, operands)"
+ "TARGET_SSSE3
+ && ix86_binary_operator_ok (MULT, <MODE>mode, operands)
+ && <mask_mode512bit_condition>"
"@
pmulhrsw\t{%2, %0|%0, %2}
- vpmulhrsw\t{%2, %1, %0|%0, %1, %2}"
+ vpmulhrsw\t{%2, %1, %0<mask_operand4>|%0<mask_operand4>, %1, %2}"
[(set_attr "isa" "noavx,avx")
(set_attr "type" "sseimul")
(set_attr "prefix_data16" "1,*")
(set_attr "prefix_extra" "1")
- (set_attr "prefix" "orig,vex")
+ (set_attr "prefix" "orig,maybe_evex")
(set_attr "mode" "<sseinsnmode>")])
(define_insn "*ssse3_pmulhrswv4hi3"
@@ -11351,21 +13178,21 @@
(set (attr "prefix_rex") (symbol_ref "x86_extended_reg_mentioned_p (insn)"))
(set_attr "mode" "DI")])
-(define_insn "<ssse3_avx2>_pshufb<mode>3"
- [(set (match_operand:VI1_AVX2 0 "register_operand" "=x,x")
+(define_insn "<ssse3_avx2>_pshufb<mode>3<mask_name>"
+ [(set (match_operand:VI1_AVX2 0 "register_operand" "=x,v")
(unspec:VI1_AVX2
- [(match_operand:VI1_AVX2 1 "register_operand" "0,x")
- (match_operand:VI1_AVX2 2 "nonimmediate_operand" "xm,xm")]
+ [(match_operand:VI1_AVX2 1 "register_operand" "0,v")
+ (match_operand:VI1_AVX2 2 "nonimmediate_operand" "xm,vm")]
UNSPEC_PSHUFB))]
- "TARGET_SSSE3"
+ "TARGET_SSSE3 && <mask_mode512bit_condition>"
"@
pshufb\t{%2, %0|%0, %2}
- vpshufb\t{%2, %1, %0|%0, %1, %2}"
+ vpshufb\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}"
[(set_attr "isa" "noavx,avx")
(set_attr "type" "sselog1")
(set_attr "prefix_data16" "1,*")
(set_attr "prefix_extra" "1")
- (set_attr "prefix" "orig,vex")
+ (set_attr "prefix" "orig,maybe_evex")
(set_attr "btver2_decode" "vector,vector")
(set_attr "mode" "<sseinsnmode>")])
@@ -11411,11 +13238,33 @@
(set (attr "prefix_rex") (symbol_ref "x86_extended_reg_mentioned_p (insn)"))
(set_attr "mode" "DI")])
+(define_insn "<ssse3_avx2>_palignr<mode>_mask"
+ [(set (match_operand:VI1_AVX2 0 "register_operand" "=v")
+ (vec_merge:VI1_AVX2
+ (unspec:VI1_AVX2
+ [(match_operand:VI1_AVX2 1 "register_operand" "v")
+ (match_operand:VI1_AVX2 2 "nonimmediate_operand" "vm")
+ (match_operand:SI 3 "const_0_to_255_mul_8_operand" "n")]
+ UNSPEC_PALIGNR)
+ (match_operand:VI1_AVX2 4 "vector_move_operand" "0C")
+ (match_operand:<avx512fmaskmode> 5 "register_operand" "Yk")))]
+ "TARGET_AVX512BW"
+{
+ operands[3] = GEN_INT (INTVAL (operands[3]) / 8);
+ return "vpalignr\t{%3, %2, %1, %0%{%5%}%N4|%0%{%5%}%N4, %1, %2, %3}";
+}
+ [(set_attr "type" "sseishft")
+ (set_attr "atom_unit" "sishuf")
+ (set_attr "prefix_extra" "1")
+ (set_attr "length_immediate" "1")
+ (set_attr "prefix" "evex")
+ (set_attr "mode" "<sseinsnmode>")])
+
(define_insn "<ssse3_avx2>_palignr<mode>"
- [(set (match_operand:SSESCALARMODE 0 "register_operand" "=x,x")
+ [(set (match_operand:SSESCALARMODE 0 "register_operand" "=x,v")
(unspec:SSESCALARMODE
- [(match_operand:SSESCALARMODE 1 "register_operand" "0,x")
- (match_operand:SSESCALARMODE 2 "nonimmediate_operand" "xm,xm")
+ [(match_operand:SSESCALARMODE 1 "register_operand" "0,v")
+ (match_operand:SSESCALARMODE 2 "nonimmediate_operand" "xm,vm")
(match_operand:SI 3 "const_0_to_255_mul_8_operand" "n,n")]
UNSPEC_PALIGNR))]
"TARGET_SSSE3"
@@ -11460,10 +13309,10 @@
(set_attr "mode" "DI")])
(define_insn "<mask_codefor>abs<mode>2<mask_name>"
- [(set (match_operand:VI124_AVX2_48_AVX512F 0 "register_operand" "=v")
- (abs:VI124_AVX2_48_AVX512F
- (match_operand:VI124_AVX2_48_AVX512F 1 "nonimmediate_operand" "vm")))]
- "TARGET_SSSE3 && <mask_mode512bit_condition>"
+ [(set (match_operand:VI_AVX2 0 "register_operand" "=v")
+ (abs:VI_AVX2
+ (match_operand:VI_AVX2 1 "nonimmediate_operand" "vm")))]
+ "TARGET_SSSE3 && <mask_mode512bit_condition> && (<MODE>mode != V2DImode || TARGET_AVX)"
"%vpabs<ssemodesuffix>\t{%1, %0<mask_operand2>|%0<mask_operand2>, %1}"
[(set_attr "type" "sselog1")
(set_attr "prefix_data16" "1")
@@ -11471,11 +13320,12 @@
(set_attr "prefix" "maybe_vex")
(set_attr "mode" "<sseinsnmode>")])
+;; TODO not in patch check this itterator v
(define_expand "abs<mode>2"
- [(set (match_operand:VI124_AVX2_48_AVX512F 0 "register_operand")
- (abs:VI124_AVX2_48_AVX512F
- (match_operand:VI124_AVX2_48_AVX512F 1 "nonimmediate_operand")))]
- "TARGET_SSE2"
+ [(set (match_operand:VI_AVX2 0 "register_operand")
+ (abs:VI_AVX2
+ (match_operand:VI_AVX2 1 "nonimmediate_operand")))]
+ "TARGET_SSE2 && (<MODE>mode != V2DImode || TARGET_AVX)"
{
if (!TARGET_SSSE3)
{
@@ -11669,36 +13519,22 @@
(set_attr "btver2_decode" "vector,vector")
(set_attr "mode" "<sseinsnmode>")])
-(define_insn "avx2_packusdw"
- [(set (match_operand:V16HI 0 "register_operand" "=x")
- (vec_concat:V16HI
- (us_truncate:V8HI
- (match_operand:V8SI 1 "register_operand" "x"))
- (us_truncate:V8HI
- (match_operand:V8SI 2 "nonimmediate_operand" "xm"))))]
- "TARGET_AVX2"
- "vpackusdw\t{%2, %1, %0|%0, %1, %2}"
- [(set_attr "type" "sselog")
- (set_attr "prefix_extra" "1")
- (set_attr "prefix" "vex")
- (set_attr "mode" "OI")])
-
-(define_insn "sse4_1_packusdw"
- [(set (match_operand:V8HI 0 "register_operand" "=x,x")
- (vec_concat:V8HI
- (us_truncate:V4HI
- (match_operand:V4SI 1 "register_operand" "0,x"))
- (us_truncate:V4HI
- (match_operand:V4SI 2 "nonimmediate_operand" "xm,xm"))))]
- "TARGET_SSE4_1"
+(define_insn "<sse4_1_avx2>_packusdw<mask_name>"
+ [(set (match_operand:VI2_AVX2 0 "register_operand" "=x,v")
+ (vec_concat:VI2_AVX2
+ (us_truncate:<ssehalfvecmode>
+ (match_operand:<sseunpackmode> 1 "register_operand" "0,v"))
+ (us_truncate:<ssehalfvecmode>
+ (match_operand:<sseunpackmode> 2 "nonimmediate_operand" "xm,vm"))))]
+ "TARGET_SSE4_1 && <mask_mode512bit_condition>"
"@
packusdw\t{%2, %0|%0, %2}
- vpackusdw\t{%2, %1, %0|%0, %1, %2}"
+ vpackusdw\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}"
[(set_attr "isa" "noavx,avx")
(set_attr "type" "sselog")
(set_attr "prefix_extra" "1")
- (set_attr "prefix" "orig,vex")
- (set_attr "mode" "TI")])
+ (set_attr "prefix" "orig,maybe_evex")
+ (set_attr "mode" "<sseinsnmode>")])
(define_insn "<sse4_1_avx2>_pblendvb"
[(set (match_operand:VI1_AVX2 0 "register_operand" "=x,x")
@@ -11791,28 +13627,39 @@
(set_attr "prefix" "maybe_vex")
(set_attr "mode" "TI")])
-(define_insn "avx2_<code>v16qiv16hi2"
- [(set (match_operand:V16HI 0 "register_operand" "=x")
+(define_insn "avx2_<code>v16qiv16hi2<mask_name>"
+ [(set (match_operand:V16HI 0 "register_operand" "=v")
(any_extend:V16HI
- (match_operand:V16QI 1 "nonimmediate_operand" "xm")))]
- "TARGET_AVX2"
- "vpmov<extsuffix>bw\t{%1, %0|%0, %1}"
+ (match_operand:V16QI 1 "nonimmediate_operand" "vm")))]
+ "TARGET_AVX2 && <mask_mode512bit_condition>"
+ "vpmov<extsuffix>bw\t{%1, %0<mask_operand2>|%0<mask_operand2>, %1}"
[(set_attr "type" "ssemov")
(set_attr "prefix_extra" "1")
- (set_attr "prefix" "vex")
+ (set_attr "prefix" "maybe_evex")
(set_attr "mode" "OI")])
-(define_insn "sse4_1_<code>v8qiv8hi2"
- [(set (match_operand:V8HI 0 "register_operand" "=x")
+(define_insn "avx512bw_<code>v32qiv32hi2<mask_name>"
+ [(set (match_operand:V32HI 0 "register_operand" "=v")
+ (any_extend:V32HI
+ (match_operand:V32QI 1 "nonimmediate_operand" "vm")))]
+ "TARGET_AVX512BW"
+ "vpmov<extsuffix>bw\t{%1, %0<mask_operand2>|%0<mask_operand2>, %1}"
+ [(set_attr "type" "ssemov")
+ (set_attr "prefix_extra" "1")
+ (set_attr "prefix" "evex")
+ (set_attr "mode" "XI")])
+
+(define_insn "sse4_1_<code>v8qiv8hi2<mask_name>"
+ [(set (match_operand:V8HI 0 "register_operand" "=v")
(any_extend:V8HI
(vec_select:V8QI
- (match_operand:V16QI 1 "nonimmediate_operand" "xm")
+ (match_operand:V16QI 1 "nonimmediate_operand" "vm")
(parallel [(const_int 0) (const_int 1)
(const_int 2) (const_int 3)
(const_int 4) (const_int 5)
(const_int 6) (const_int 7)]))))]
- "TARGET_SSE4_1"
- "%vpmov<extsuffix>bw\t{%1, %0|%0, %q1}"
+ "TARGET_SSE4_1 && <mask_mode512bit_condition>"
+ "%vpmov<extsuffix>bw\t{%1, %0<mask_operand2>|%0<mask_operand2>, %q1}"
[(set_attr "type" "ssemov")
(set_attr "ssememalign" "64")
(set_attr "prefix_extra" "1")
@@ -11829,31 +13676,31 @@
(set_attr "prefix" "evex")
(set_attr "mode" "XI")])
-(define_insn "avx2_<code>v8qiv8si2"
- [(set (match_operand:V8SI 0 "register_operand" "=x")
+(define_insn "avx2_<code>v8qiv8si2<mask_name>"
+ [(set (match_operand:V8SI 0 "register_operand" "=v")
(any_extend:V8SI
(vec_select:V8QI
- (match_operand:V16QI 1 "nonimmediate_operand" "xm")
+ (match_operand:V16QI 1 "nonimmediate_operand" "vm")
(parallel [(const_int 0) (const_int 1)
(const_int 2) (const_int 3)
(const_int 4) (const_int 5)
(const_int 6) (const_int 7)]))))]
- "TARGET_AVX2"
- "vpmov<extsuffix>bd\t{%1, %0|%0, %q1}"
+ "TARGET_AVX2 && <mask_mode512bit_condition>"
+ "vpmov<extsuffix>bd\t{%1, %0<mask_operand2>|%0<mask_operand2>, %q1}"
[(set_attr "type" "ssemov")
(set_attr "prefix_extra" "1")
- (set_attr "prefix" "vex")
+ (set_attr "prefix" "maybe_evex")
(set_attr "mode" "OI")])
-(define_insn "sse4_1_<code>v4qiv4si2"
- [(set (match_operand:V4SI 0 "register_operand" "=x")
+(define_insn "sse4_1_<code>v4qiv4si2<mask_name>"
+ [(set (match_operand:V4SI 0 "register_operand" "=v")
(any_extend:V4SI
(vec_select:V4QI
- (match_operand:V16QI 1 "nonimmediate_operand" "xm")
+ (match_operand:V16QI 1 "nonimmediate_operand" "vm")
(parallel [(const_int 0) (const_int 1)
(const_int 2) (const_int 3)]))))]
- "TARGET_SSE4_1"
- "%vpmov<extsuffix>bd\t{%1, %0|%0, %k1}"
+ "TARGET_SSE4_1 && <mask_mode512bit_condition>"
+ "%vpmov<extsuffix>bd\t{%1, %0<mask_operand2>|%0<mask_operand2>, %k1}"
[(set_attr "type" "ssemov")
(set_attr "ssememalign" "32")
(set_attr "prefix_extra" "1")
@@ -11870,26 +13717,26 @@
(set_attr "prefix" "evex")
(set_attr "mode" "XI")])
-(define_insn "avx2_<code>v8hiv8si2"
- [(set (match_operand:V8SI 0 "register_operand" "=x")
+(define_insn "avx2_<code>v8hiv8si2<mask_name>"
+ [(set (match_operand:V8SI 0 "register_operand" "=v")
(any_extend:V8SI
- (match_operand:V8HI 1 "nonimmediate_operand" "xm")))]
- "TARGET_AVX2"
- "vpmov<extsuffix>wd\t{%1, %0|%0, %1}"
+ (match_operand:V8HI 1 "nonimmediate_operand" "vm")))]
+ "TARGET_AVX2 && <mask_mode512bit_condition>"
+ "vpmov<extsuffix>wd\t{%1, %0<mask_operand2>|%0<mask_operand2>, %1}"
[(set_attr "type" "ssemov")
(set_attr "prefix_extra" "1")
- (set_attr "prefix" "vex")
+ (set_attr "prefix" "maybe_evex")
(set_attr "mode" "OI")])
-(define_insn "sse4_1_<code>v4hiv4si2"
- [(set (match_operand:V4SI 0 "register_operand" "=x")
+(define_insn "sse4_1_<code>v4hiv4si2<mask_name>"
+ [(set (match_operand:V4SI 0 "register_operand" "=v")
(any_extend:V4SI
(vec_select:V4HI
- (match_operand:V8HI 1 "nonimmediate_operand" "xm")
+ (match_operand:V8HI 1 "nonimmediate_operand" "vm")
(parallel [(const_int 0) (const_int 1)
(const_int 2) (const_int 3)]))))]
- "TARGET_SSE4_1"
- "%vpmov<extsuffix>wd\t{%1, %0|%0, %q1}"
+ "TARGET_SSE4_1 && <mask_mode512bit_condition>"
+ "%vpmov<extsuffix>wd\t{%1, %0<mask_operand2>|%0<mask_operand2>, %q1}"
[(set_attr "type" "ssemov")
(set_attr "ssememalign" "64")
(set_attr "prefix_extra" "1")
@@ -11911,28 +13758,28 @@
(set_attr "prefix" "evex")
(set_attr "mode" "XI")])
-(define_insn "avx2_<code>v4qiv4di2"
- [(set (match_operand:V4DI 0 "register_operand" "=x")
+(define_insn "avx2_<code>v4qiv4di2<mask_name>"
+ [(set (match_operand:V4DI 0 "register_operand" "=v")
(any_extend:V4DI
(vec_select:V4QI
- (match_operand:V16QI 1 "nonimmediate_operand" "xm")
+ (match_operand:V16QI 1 "nonimmediate_operand" "vm")
(parallel [(const_int 0) (const_int 1)
(const_int 2) (const_int 3)]))))]
- "TARGET_AVX2"
- "vpmov<extsuffix>bq\t{%1, %0|%0, %k1}"
+ "TARGET_AVX2 && <mask_mode512bit_condition>"
+ "vpmov<extsuffix>bq\t{%1, %0<mask_operand2>|%0<mask_operand2>, %k1}"
[(set_attr "type" "ssemov")
(set_attr "prefix_extra" "1")
- (set_attr "prefix" "vex")
+ (set_attr "prefix" "maybe_evex")
(set_attr "mode" "OI")])
-(define_insn "sse4_1_<code>v2qiv2di2"
- [(set (match_operand:V2DI 0 "register_operand" "=x")
+(define_insn "sse4_1_<code>v2qiv2di2<mask_name>"
+ [(set (match_operand:V2DI 0 "register_operand" "=v")
(any_extend:V2DI
(vec_select:V2QI
- (match_operand:V16QI 1 "nonimmediate_operand" "xm")
+ (match_operand:V16QI 1 "nonimmediate_operand" "vm")
(parallel [(const_int 0) (const_int 1)]))))]
- "TARGET_SSE4_1"
- "%vpmov<extsuffix>bq\t{%1, %0|%0, %w1}"
+ "TARGET_SSE4_1 && <mask_mode512bit_condition>"
+ "%vpmov<extsuffix>bq\t{%1, %0<mask_operand2>|%0<mask_operand2>, %w1}"
[(set_attr "type" "ssemov")
(set_attr "ssememalign" "16")
(set_attr "prefix_extra" "1")
@@ -11949,28 +13796,28 @@
(set_attr "prefix" "evex")
(set_attr "mode" "XI")])
-(define_insn "avx2_<code>v4hiv4di2"
- [(set (match_operand:V4DI 0 "register_operand" "=x")
+(define_insn "avx2_<code>v4hiv4di2<mask_name>"
+ [(set (match_operand:V4DI 0 "register_operand" "=v")
(any_extend:V4DI
(vec_select:V4HI
- (match_operand:V8HI 1 "nonimmediate_operand" "xm")
+ (match_operand:V8HI 1 "nonimmediate_operand" "vm")
(parallel [(const_int 0) (const_int 1)
(const_int 2) (const_int 3)]))))]
- "TARGET_AVX2"
- "vpmov<extsuffix>wq\t{%1, %0|%0, %q1}"
+ "TARGET_AVX2 && <mask_mode512bit_condition>"
+ "vpmov<extsuffix>wq\t{%1, %0<mask_operand2>|%0<mask_operand2>, %q1}"
[(set_attr "type" "ssemov")
(set_attr "prefix_extra" "1")
- (set_attr "prefix" "vex")
+ (set_attr "prefix" "maybe_evex")
(set_attr "mode" "OI")])
-(define_insn "sse4_1_<code>v2hiv2di2"
- [(set (match_operand:V2DI 0 "register_operand" "=x")
+(define_insn "sse4_1_<code>v2hiv2di2<mask_name>"
+ [(set (match_operand:V2DI 0 "register_operand" "=v")
(any_extend:V2DI
(vec_select:V2HI
- (match_operand:V8HI 1 "nonimmediate_operand" "xm")
+ (match_operand:V8HI 1 "nonimmediate_operand" "vm")
(parallel [(const_int 0) (const_int 1)]))))]
- "TARGET_SSE4_1"
- "%vpmov<extsuffix>wq\t{%1, %0|%0, %k1}"
+ "TARGET_SSE4_1 && <mask_mode512bit_condition>"
+ "%vpmov<extsuffix>wq\t{%1, %0<mask_operand2>|%0<mask_operand2>, %k1}"
[(set_attr "type" "ssemov")
(set_attr "ssememalign" "32")
(set_attr "prefix_extra" "1")
@@ -11987,24 +13834,25 @@
(set_attr "prefix" "evex")
(set_attr "mode" "XI")])
-(define_insn "avx2_<code>v4siv4di2"
- [(set (match_operand:V4DI 0 "register_operand" "=x")
+(define_insn "avx2_<code>v4siv4di2<mask_name>"
+ [(set (match_operand:V4DI 0 "register_operand" "=v")
(any_extend:V4DI
- (match_operand:V4SI 1 "nonimmediate_operand" "xm")))]
- "TARGET_AVX2"
- "vpmov<extsuffix>dq\t{%1, %0|%0, %1}"
+ (match_operand:V4SI 1 "nonimmediate_operand" "vm")))]
+ "TARGET_AVX2 && <mask_mode512bit_condition>"
+ "vpmov<extsuffix>dq\t{%1, %0<mask_operand2>|%0<mask_operand2>, %1}"
[(set_attr "type" "ssemov")
+ (set_attr "prefix" "maybe_evex")
(set_attr "prefix_extra" "1")
(set_attr "mode" "OI")])
-(define_insn "sse4_1_<code>v2siv2di2"
- [(set (match_operand:V2DI 0 "register_operand" "=x")
+(define_insn "sse4_1_<code>v2siv2di2<mask_name>"
+ [(set (match_operand:V2DI 0 "register_operand" "=v")
(any_extend:V2DI
(vec_select:V2SI
- (match_operand:V4SI 1 "nonimmediate_operand" "xm")
+ (match_operand:V4SI 1 "nonimmediate_operand" "vm")
(parallel [(const_int 0) (const_int 1)]))))]
- "TARGET_SSE4_1"
- "%vpmov<extsuffix>dq\t{%1, %0|%0, %q1}"
+ "TARGET_SSE4_1 && <mask_mode512bit_condition>"
+ "%vpmov<extsuffix>dq\t{%1, %0<mask_operand2>|%0<mask_operand2>, %q1}"
[(set_attr "type" "ssemov")
(set_attr "ssememalign" "64")
(set_attr "prefix_extra" "1")
@@ -13556,17 +15404,20 @@
(match_operand:VI48_256 2 "nonimmediate_operand")))]
"TARGET_AVX2")
-(define_expand "vashr<mode>3"
+(define_expand "vashr<mode>3<mask_name>"
[(set (match_operand:VI128_128 0 "register_operand")
(ashiftrt:VI128_128
(match_operand:VI128_128 1 "register_operand")
(match_operand:VI128_128 2 "nonimmediate_operand")))]
- "TARGET_XOP"
+ "TARGET_XOP || ((TARGET_AVX512BW || <MODE>mode == V2DImode) && TARGET_AVX512VL)"
{
- rtx neg = gen_reg_rtx (<MODE>mode);
- emit_insn (gen_neg<mode>2 (neg, operands[2]));
- emit_insn (gen_xop_sha<mode>3 (operands[0], operands[1], neg));
- DONE;
+ if (!((TARGET_AVX512BW || <MODE>mode == V2DImode) && TARGET_AVX512VL))
+ {
+ rtx neg = gen_reg_rtx (<MODE>mode);
+ emit_insn (gen_neg<mode>2 (neg, operands[2]));
+ emit_insn (gen_xop_sha<mode>3 (operands[0], operands[1], neg));
+ DONE;
+ }
})
(define_expand "vashrv4si3"
@@ -13713,34 +15564,37 @@
DONE;
})
-(define_expand "ashrv2di3"
+(define_expand "ashrv2di3<mask_name>"
[(set (match_operand:V2DI 0 "register_operand")
(ashiftrt:V2DI
(match_operand:V2DI 1 "register_operand")
(match_operand:DI 2 "nonmemory_operand")))]
- "TARGET_XOP"
+ "TARGET_XOP || TARGET_AVX512VL"
{
rtx reg = gen_reg_rtx (V2DImode);
rtx par;
bool negate = false;
int i;
- if (CONST_INT_P (operands[2]))
- operands[2] = GEN_INT (-INTVAL (operands[2]));
- else
- negate = true;
+ if (!TARGET_AVX512VL)
+ {
+ if (CONST_INT_P (operands[2]))
+ operands[2] = GEN_INT (-INTVAL (operands[2]));
+ else
+ negate = true;
- par = gen_rtx_PARALLEL (V2DImode, rtvec_alloc (2));
- for (i = 0; i < 2; i++)
- XVECEXP (par, 0, i) = operands[2];
+ par = gen_rtx_PARALLEL (V2DImode, rtvec_alloc (2));
+ for (i = 0; i < 2; i++)
+ XVECEXP (par, 0, i) = operands[2];
- emit_insn (gen_vec_initv2di (reg, par));
+ emit_insn (gen_vec_initv2di (reg, par));
- if (negate)
- emit_insn (gen_negv2di2 (reg, reg));
+ if (negate)
+ emit_insn (gen_negv2di2 (reg, reg));
- emit_insn (gen_xop_shav2di3 (operands[0], operands[1], reg));
- DONE;
+ emit_insn (gen_xop_shav2di3 (operands[0], operands[1], reg));
+ DONE;
+ }
})
;; XOP FRCZ support
@@ -14039,10 +15893,10 @@
(set_attr "prefix" "vex")
(set_attr "mode" "<sseinsnmode>")])
-(define_insn "<avx2_avx512f>_permvar<mode><mask_name>"
- [(set (match_operand:VI48F_256_512 0 "register_operand" "=v")
- (unspec:VI48F_256_512
- [(match_operand:VI48F_256_512 1 "nonimmediate_operand" "vm")
+(define_insn "<avx2_avx512bw>_permvar<mode><mask_name>"
+ [(set (match_operand:VI48F_256_512_2I 0 "register_operand" "=v")
+ (unspec:VI48F_256_512_2I
+ [(match_operand:VI48F_256_512_2I 1 "nonimmediate_operand" "vm")
(match_operand:<sseintvecmode> 2 "register_operand" "v")]
UNSPEC_VPERMVAR))]
"TARGET_AVX2 && <mask_mode512bit_condition>"
@@ -14051,14 +15905,14 @@
(set_attr "prefix" "<mask_prefix2>")
(set_attr "mode" "<sseinsnmode>")])
-(define_expand "<avx2_avx512f>_perm<mode>"
+(define_expand "<avx2_avx512bw>_perm<mode>"
[(match_operand:VI8F_256_512 0 "register_operand")
(match_operand:VI8F_256_512 1 "nonimmediate_operand")
(match_operand:SI 2 "const_0_to_255_operand")]
"TARGET_AVX2"
{
int mask = INTVAL (operands[2]);
- emit_insn (gen_<avx2_avx512f>_perm<mode>_1 (operands[0], operands[1],
+ emit_insn (gen_<avx2_avx512bw>_perm<mode>_1 (operands[0], operands[1],
GEN_INT ((mask >> 0) & 3),
GEN_INT ((mask >> 2) & 3),
GEN_INT ((mask >> 4) & 3),
@@ -14066,16 +15920,16 @@
DONE;
})
-(define_expand "avx512f_perm<mode>_mask"
- [(match_operand:V8FI 0 "register_operand")
- (match_operand:V8FI 1 "nonimmediate_operand")
+(define_expand "<avx512>_perm<mode>_mask"
+ [(match_operand:VI8F_256_512 0 "register_operand")
+ (match_operand:VI8F_256_512 1 "nonimmediate_operand")
(match_operand:SI 2 "const_0_to_255_operand")
- (match_operand:V8FI 3 "vector_move_operand")
+ (match_operand:VI8F_256_512 3 "vector_move_operand")
(match_operand:<avx512fmaskmode> 4 "register_operand")]
"TARGET_AVX512F"
{
int mask = INTVAL (operands[2]);
- emit_insn (gen_<avx2_avx512f>_perm<mode>_1_mask (operands[0], operands[1],
+ emit_insn (gen_<avx2_avx512bw>_perm<mode>_1_mask (operands[0], operands[1],
GEN_INT ((mask >> 0) & 3),
GEN_INT ((mask >> 2) & 3),
GEN_INT ((mask >> 4) & 3),
@@ -14084,7 +15938,7 @@
DONE;
})
-(define_insn "<avx2_avx512f>_perm<mode>_1<mask_name>"
+(define_insn "<avx2_avx512bw>_perm<mode>_1<mask_name>"
[(set (match_operand:VI8F_256_512 0 "register_operand" "=v")
(vec_select:VI8F_256_512
(match_operand:VI8F_256_512 1 "nonimmediate_operand" "vm")
@@ -14136,9 +15990,9 @@
[V8SI V8SF V4DI V4DF])
(define_insn "vec_dup<mode>"
- [(set (match_operand:AVX_VEC_DUP_MODE 0 "register_operand" "=x,x,x")
+ [(set (match_operand:AVX_VEC_DUP_MODE 0 "register_operand" "=x,v,x")
(vec_duplicate:AVX_VEC_DUP_MODE
- (match_operand:<ssescalarmode> 1 "nonimmediate_operand" "m,x,?x")))]
+ (match_operand:<ssescalarmode> 1 "nonimmediate_operand" "m,v,?x")))]
"TARGET_AVX"
"@
vbroadcast<ssescalarmodesuffix>\t{%1, %0|%0, %1}
@@ -14146,13 +16000,13 @@
#"
[(set_attr "type" "ssemov")
(set_attr "prefix_extra" "1")
- (set_attr "prefix" "vex")
+ (set_attr "prefix" "maybe_evex")
(set_attr "isa" "*,avx2,noavx2")
(set_attr "mode" "V8SF")])
-(define_insn "<mask_codefor>avx512f_vec_dup<mode><mask_name>"
- [(set (match_operand:VI48F_512 0 "register_operand" "=v")
- (vec_duplicate:VI48F_512
+(define_insn "<avx512>_vec_dup<mode><mask_name>"
+ [(set (match_operand:V_AVX512VL 0 "register_operand" "=v")
+ (vec_duplicate:V_AVX512VL
(vec_select:<ssescalarmode>
(match_operand:<ssexmmmode> 1 "nonimmediate_operand" "vm")
(parallel [(const_int 0)]))))]
@@ -14186,19 +16040,24 @@
(set_attr "prefix" "evex")
(set_attr "mode" "<sseinsnmode>")])
-(define_insn "<mask_codefor>avx512f_vec_dup_gpr<mode><mask_name>"
- [(set (match_operand:VI48_512 0 "register_operand" "=v")
- (vec_duplicate:VI48_512
+(define_insn "<mask_codefor><avx512>_vec_dup_gpr<mode><mask_name>"
+ [(set (match_operand:VI_AVX512VL 0 "register_operand" "=v")
+ (vec_duplicate:VI_AVX512VL
(match_operand:<ssescalarmode> 1 "register_operand" "r")))]
- "TARGET_AVX512F && (<MODE>mode != V8DImode || TARGET_64BIT)"
- "vpbroadcast<bcstscalarsuff>\t{%1, %0<mask_operand2>|%0<mask_operand2>, %1}"
+ "TARGET_AVX512F && (<ssescalarmode>mode != DImode || TARGET_64BIT)"
+{
+ /* To generate correct assembler. */
+ if (GET_MODE (operands[1]) == QImode || GET_MODE (operands[1]) == HImode)
+ operands[1] = gen_rtx_REG (SImode, REGNO (operands[1]));
+ return "vpbroadcast<bcstscalarsuff>\t{%1, %0<mask_operand2>|%0<mask_operand2>, %1}";
+}
[(set_attr "type" "ssemov")
(set_attr "prefix" "evex")
(set_attr "mode" "<sseinsnmode>")])
-(define_insn "<mask_codefor>avx512f_vec_dup_mem<mode><mask_name>"
- [(set (match_operand:VI48F_512 0 "register_operand" "=v")
- (vec_duplicate:VI48F_512
+(define_insn "<mask_codefor><avx512>_vec_dup_mem<mode><mask_name>"
+ [(set (match_operand:V_AVX512VL 0 "register_operand" "=v")
+ (vec_duplicate:V_AVX512VL
(match_operand:<ssescalarmode> 1 "nonimmediate_operand" "vm")))]
"TARGET_AVX512F"
"v<sseintprefix>broadcast<bcstscalarsuff>\t{%1, %0<mask_operand2>|%0<mask_operand2>, %1}"
@@ -14245,9 +16104,84 @@
(set_attr "prefix" "vex")
(set_attr "mode" "<sseinsnmode>")])
-(define_insn "avx512cd_maskb_vec_dupv8di"
- [(set (match_operand:V8DI 0 "register_operand" "=v")
- (vec_duplicate:V8DI
+;; For broadcast[i|f]32x2. Yes there is no v4sf version, only v4si.
+(define_mode_iterator VI4F_BRCST32x2
+ [V16SI (V8SI "TARGET_AVX512VL") (V4SI "TARGET_AVX512VL")
+ V16SF (V8SF "TARGET_AVX512VL")])
+
+(define_mode_attr 64x2_mode
+ [(V8DF "V2DF") (V8DI "V2DI") (V4DI "V2DI") (V4DF "V2DF")])
+
+(define_mode_attr 32x2mode
+ [(V16SF "V2SF") (V16SI "V2SI") (V8SI "V2SI")
+ (V8SF "V2SF") (V4SI "V2SI")])
+
+(define_insn "<mask_codefor>avx512dq_broadcast<mode><mask_name>"
+ [(set (match_operand:VI4F_BRCST32x2 0 "register_operand" "=v")
+ (vec_duplicate:VI4F_BRCST32x2
+ (vec_select:<32x2mode>
+ (match_operand:<ssexmmmode> 1 "nonimmediate_operand" "vm")
+ (parallel [(const_int 0) (const_int 1)]))))]
+ "TARGET_AVX512DQ"
+ "vbroadcast<shuffletype>32x2\t{%1, %0<mask_operand2>|%0<mask_operand2>, %1}"
+ [(set_attr "type" "ssemov")
+ (set_attr "prefix_extra" "1")
+ (set_attr "prefix" "evex")
+ (set_attr "mode" "<sseinsnmode>")])
+
+(define_insn "<mask_codefor>avx512vl_broadcast<mode><mask_name>_1"
+ [(set (match_operand:VI4F_256 0 "register_operand" "=v,v")
+ (vec_duplicate:VI4F_256
+ (match_operand:<ssexmmmode> 1 "nonimmediate_operand" "v,m")))]
+ "TARGET_AVX512VL"
+ "@
+ vshuf<shuffletype>32x4\t{$0x0, %t1, %t1, %0<mask_operand2>|%0<mask_operand2>, %t1, %t1, 0x0}
+ vbroadcast<shuffletype>32x4\t{%1, %0<mask_operand2>|%0<mask_operand2>, %1}"
+ [(set_attr "type" "ssemov")
+ (set_attr "prefix_extra" "1")
+ (set_attr "prefix" "evex")
+ (set_attr "mode" "<sseinsnmode>")])
+
+(define_insn "<mask_codefor>avx512dq_broadcast<mode><mask_name>_1"
+ [(set (match_operand:V16FI 0 "register_operand" "=v,v")
+ (vec_duplicate:V16FI
+ (match_operand:<ssehalfvecmode> 1 "nonimmediate_operand" "v,m")))]
+ "TARGET_AVX512DQ"
+ "@
+ vshuf<shuffletype>32x4\t{$0x44, %g1, %g1, %0<mask_operand2>|%0<mask_operand2>, %g1, %g1, 0x44}
+ vbroadcast<shuffletype>32x8\t{%1, %0<mask_operand2>|%0<mask_operand2>, %1}"
+ [(set_attr "type" "ssemov")
+ (set_attr "prefix_extra" "1")
+ (set_attr "prefix" "evex")
+ (set_attr "mode" "<sseinsnmode>")])
+
+(define_insn "<mask_codefor>avx512dq_broadcast<mode><mask_name>_1"
+ [(set (match_operand:VI8F_256_512 0 "register_operand" "=v,v")
+ (vec_duplicate:VI8F_256_512
+ (match_operand:<64x2_mode> 1 "nonimmediate_operand" "v,m")))]
+ "TARGET_AVX512DQ"
+{
+ switch (which_alternative)
+ {
+ case 0:
+ if (GET_MODE_SIZE (<MODE>mode) == 64)
+ return "vshuf<shuffletype>64x2\t{$0x0, %g1, %g1, %0<mask_operand2>|%0<mask_operand2>, %g1, %g1, 0x0}";
+ else
+ return "vshuf<shuffletype>64x2\t{$0x0, %t1, %t1, %0<mask_operand2>|%0<mask_operand2>, %t1, %t1, 0x0}";
+ case 1:
+ return "vbroadcast<shuffletype>64x2\t{%1, %0<mask_operand2>|%0<mask_operand2>, %1}";
+ default:
+ gcc_unreachable ();
+ }
+}
+ [(set_attr "type" "ssemov")
+ (set_attr "prefix_extra" "1")
+ (set_attr "prefix" "evex")
+ (set_attr "mode" "<sseinsnmode>")])
+
+(define_insn "avx512cd_maskb_vec_dup<mode>"
+ [(set (match_operand:VI8_AVX512VL 0 "register_operand" "=v")
+ (vec_duplicate:VI8_AVX512VL
(zero_extend:DI
(match_operand:QI 1 "register_operand" "Yk"))))]
"TARGET_AVX512CD"
@@ -14256,9 +16190,9 @@
(set_attr "prefix" "evex")
(set_attr "mode" "XI")])
-(define_insn "avx512cd_maskw_vec_dupv16si"
- [(set (match_operand:V16SI 0 "register_operand" "=v")
- (vec_duplicate:V16SI
+(define_insn "avx512cd_maskw_vec_dup<mode>"
+ [(set (match_operand:VI4_AVX512VL 0 "register_operand" "=v")
+ (vec_duplicate:VI4_AVX512VL
(zero_extend:SI
(match_operand:HI 1 "register_operand" "Yk"))))]
"TARGET_AVX512CD"
@@ -14418,26 +16352,26 @@
(set_attr "prefix" "<mask_prefix>")
(set_attr "mode" "<sseinsnmode>")])
-(define_expand "avx512f_vpermi2var<mode>3_maskz"
- [(match_operand:VI48F_512 0 "register_operand" "=v")
- (match_operand:VI48F_512 1 "register_operand" "v")
+(define_expand "<avx512>_vpermi2var<mode>3_maskz"
+ [(match_operand:VI248F 0 "register_operand" "=v")
+ (match_operand:VI248F 1 "register_operand" "v")
(match_operand:<sseintvecmode> 2 "register_operand" "0")
- (match_operand:VI48F_512 3 "nonimmediate_operand" "vm")
+ (match_operand:VI248F 3 "nonimmediate_operand" "vm")
(match_operand:<avx512fmaskmode> 4 "register_operand" "Yk")]
"TARGET_AVX512F"
{
- emit_insn (gen_avx512f_vpermi2var<mode>3_maskz_1 (
+ emit_insn (gen_<avx512>_vpermi2var<mode>3_maskz_1 (
operands[0], operands[1], operands[2], operands[3],
CONST0_RTX (<MODE>mode), operands[4]));
DONE;
})
-(define_insn "avx512f_vpermi2var<mode>3<sd_maskz_name>"
- [(set (match_operand:VI48F_512 0 "register_operand" "=v")
- (unspec:VI48F_512
- [(match_operand:VI48F_512 1 "register_operand" "v")
+(define_insn "<avx512>_vpermi2var<mode>3<sd_maskz_name>"
+ [(set (match_operand:VI248F 0 "register_operand" "=v")
+ (unspec:VI248F
+ [(match_operand:VI248F 1 "register_operand" "v")
(match_operand:<sseintvecmode> 2 "register_operand" "0")
- (match_operand:VI48F_512 3 "nonimmediate_operand" "vm")]
+ (match_operand:VI248F 3 "nonimmediate_operand" "vm")]
UNSPEC_VPERMI2))]
"TARGET_AVX512F"
"vpermi2<ssemodesuffix>\t{%3, %1, %0<sd_mask_op4>|%0<sd_mask_op4>, %1, %3}"
@@ -14445,13 +16379,13 @@
(set_attr "prefix" "evex")
(set_attr "mode" "<sseinsnmode>")])
-(define_insn "avx512f_vpermi2var<mode>3_mask"
- [(set (match_operand:VI48F_512 0 "register_operand" "=v")
- (vec_merge:VI48F_512
- (unspec:VI48F_512
- [(match_operand:VI48F_512 1 "register_operand" "v")
+(define_insn "<avx512>_vpermi2var<mode>3_mask"
+ [(set (match_operand:VI248F 0 "register_operand" "=v")
+ (vec_merge:VI248F
+ (unspec:VI248F
+ [(match_operand:VI248F 1 "register_operand" "v")
(match_operand:<sseintvecmode> 2 "register_operand" "0")
- (match_operand:VI48F_512 3 "nonimmediate_operand" "vm")]
+ (match_operand:VI248F 3 "nonimmediate_operand" "vm")]
UNSPEC_VPERMI2_MASK)
(match_dup 0)
(match_operand:<avx512fmaskmode> 4 "register_operand" "Yk")))]
@@ -14461,26 +16395,26 @@
(set_attr "prefix" "evex")
(set_attr "mode" "<sseinsnmode>")])
-(define_expand "avx512f_vpermt2var<mode>3_maskz"
- [(match_operand:VI48F_512 0 "register_operand" "=v")
+(define_expand "<avx512>_vpermt2var<mode>3_maskz"
+ [(match_operand:VI248F 0 "register_operand" "=v")
(match_operand:<sseintvecmode> 1 "register_operand" "v")
- (match_operand:VI48F_512 2 "register_operand" "0")
- (match_operand:VI48F_512 3 "nonimmediate_operand" "vm")
+ (match_operand:VI248F 2 "register_operand" "0")
+ (match_operand:VI248F 3 "nonimmediate_operand" "vm")
(match_operand:<avx512fmaskmode> 4 "register_operand" "Yk")]
"TARGET_AVX512F"
{
- emit_insn (gen_avx512f_vpermt2var<mode>3_maskz_1 (
+ emit_insn (gen_<avx512>_vpermt2var<mode>3_maskz_1 (
operands[0], operands[1], operands[2], operands[3],
CONST0_RTX (<MODE>mode), operands[4]));
DONE;
})
-(define_insn "avx512f_vpermt2var<mode>3<sd_maskz_name>"
- [(set (match_operand:VI48F_512 0 "register_operand" "=v")
- (unspec:VI48F_512
+(define_insn "<avx512>_vpermt2var<mode>3<sd_maskz_name>"
+ [(set (match_operand:VI248F 0 "register_operand" "=v")
+ (unspec:VI248F
[(match_operand:<sseintvecmode> 1 "register_operand" "v")
- (match_operand:VI48F_512 2 "register_operand" "0")
- (match_operand:VI48F_512 3 "nonimmediate_operand" "vm")]
+ (match_operand:VI248F 2 "register_operand" "0")
+ (match_operand:VI248F 3 "nonimmediate_operand" "vm")]
UNSPEC_VPERMT2))]
"TARGET_AVX512F"
"vpermt2<ssemodesuffix>\t{%3, %1, %0<sd_mask_op4>|%0<sd_mask_op4>, %1, %3}"
@@ -14488,13 +16422,13 @@
(set_attr "prefix" "evex")
(set_attr "mode" "<sseinsnmode>")])
-(define_insn "avx512f_vpermt2var<mode>3_mask"
- [(set (match_operand:VI48F_512 0 "register_operand" "=v")
- (vec_merge:VI48F_512
- (unspec:VI48F_512
+(define_insn "<avx512>_vpermt2var<mode>3_mask"
+ [(set (match_operand:VI248F 0 "register_operand" "=v")
+ (vec_merge:VI248F
+ (unspec:VI248F
[(match_operand:<sseintvecmode> 1 "register_operand" "v")
- (match_operand:VI48F_512 2 "register_operand" "0")
- (match_operand:VI48F_512 3 "nonimmediate_operand" "vm")]
+ (match_operand:VI248F 2 "register_operand" "0")
+ (match_operand:VI248F 3 "nonimmediate_operand" "vm")]
UNSPEC_VPERMT2)
(match_dup 2)
(match_operand:<avx512fmaskmode> 4 "register_operand" "Yk")))]
@@ -14610,6 +16544,34 @@
(set_attr "length_immediate" "1")
(set_attr "prefix" "orig,vex")])
+(define_expand "avx512vl_vinsert<mode>"
+ [(match_operand:VI48F_256 0 "register_operand")
+ (match_operand:VI48F_256 1 "register_operand")
+ (match_operand:<ssehalfvecmode> 2 "nonimmediate_operand")
+ (match_operand:SI 3 "const_0_to_1_operand")
+ (match_operand:VI48F_256 4 "register_operand")
+ (match_operand:<avx512fmaskmode> 5 "register_operand")]
+ "TARGET_AVX512VL"
+{
+ rtx (*insn)(rtx, rtx, rtx, rtx, rtx);
+
+ switch (INTVAL (operands[3]))
+ {
+ case 0:
+ insn = gen_vec_set_lo_<mode>_mask;
+ break;
+ case 1:
+ insn = gen_vec_set_hi_<mode>_mask;
+ break;
+ default:
+ gcc_unreachable ();
+ }
+
+ emit_insn (insn (operands[0], operands[1], operands[2], operands[4],
+ operands[5]));
+ DONE;
+})
+
(define_expand "avx_vinsertf128<mode>"
[(match_operand:V_256 0 "register_operand")
(match_operand:V_256 1 "register_operand")
@@ -14635,92 +16597,82 @@
DONE;
})
-(define_insn "avx2_vec_set_lo_v4di"
- [(set (match_operand:V4DI 0 "register_operand" "=x")
- (vec_concat:V4DI
- (match_operand:V2DI 2 "nonimmediate_operand" "xm")
- (vec_select:V2DI
- (match_operand:V4DI 1 "register_operand" "x")
- (parallel [(const_int 2) (const_int 3)]))))]
- "TARGET_AVX2"
- "vinserti128\t{$0x0, %2, %1, %0|%0, %1, %2, 0x0}"
- [(set_attr "type" "sselog")
- (set_attr "prefix_extra" "1")
- (set_attr "length_immediate" "1")
- (set_attr "prefix" "vex")
- (set_attr "mode" "OI")])
-
-(define_insn "avx2_vec_set_hi_v4di"
- [(set (match_operand:V4DI 0 "register_operand" "=x")
- (vec_concat:V4DI
- (vec_select:V2DI
- (match_operand:V4DI 1 "register_operand" "x")
- (parallel [(const_int 0) (const_int 1)]))
- (match_operand:V2DI 2 "nonimmediate_operand" "xm")))]
- "TARGET_AVX2"
- "vinserti128\t{$0x1, %2, %1, %0|%0, %1, %2, 0x1}"
- [(set_attr "type" "sselog")
- (set_attr "prefix_extra" "1")
- (set_attr "length_immediate" "1")
- (set_attr "prefix" "vex")
- (set_attr "mode" "OI")])
-
-(define_insn "vec_set_lo_<mode>"
- [(set (match_operand:VI8F_256 0 "register_operand" "=x")
+(define_insn "vec_set_lo_<mode><mask_name>"
+ [(set (match_operand:VI8F_256 0 "register_operand" "=v")
(vec_concat:VI8F_256
- (match_operand:<ssehalfvecmode> 2 "nonimmediate_operand" "xm")
+ (match_operand:<ssehalfvecmode> 2 "nonimmediate_operand" "vm")
(vec_select:<ssehalfvecmode>
- (match_operand:VI8F_256 1 "register_operand" "x")
+ (match_operand:VI8F_256 1 "register_operand" "v")
(parallel [(const_int 2) (const_int 3)]))))]
"TARGET_AVX"
- "vinsert<i128>\t{$0x0, %2, %1, %0|%0, %1, %2, 0x0}"
+{
+ if (TARGET_AVX512VL)
+ return "vinsert<shuffletype>64x2\t{$0x0, %2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2, 0x0}";
+ else
+ return "vinsert<i128>\t{$0x0, %2, %1, %0|%0, %1, %2, 0x0}";
+}
[(set_attr "type" "sselog")
(set_attr "prefix_extra" "1")
(set_attr "length_immediate" "1")
(set_attr "prefix" "vex")
(set_attr "mode" "<sseinsnmode>")])
-(define_insn "vec_set_hi_<mode>"
- [(set (match_operand:VI8F_256 0 "register_operand" "=x")
+(define_insn "vec_set_hi_<mode><mask_name>"
+ [(set (match_operand:VI8F_256 0 "register_operand" "=v")
(vec_concat:VI8F_256
(vec_select:<ssehalfvecmode>
- (match_operand:VI8F_256 1 "register_operand" "x")
+ (match_operand:VI8F_256 1 "register_operand" "v")
(parallel [(const_int 0) (const_int 1)]))
- (match_operand:<ssehalfvecmode> 2 "nonimmediate_operand" "xm")))]
+ (match_operand:<ssehalfvecmode> 2 "nonimmediate_operand" "vm")))]
"TARGET_AVX"
- "vinsert<i128>\t{$0x1, %2, %1, %0|%0, %1, %2, 0x1}"
+{
+ if (TARGET_AVX512VL)
+ return "vinsert<shuffletype>64x2\t{$0x1, %2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2, 0x1}";
+ else
+ return "vinsert<i128>\t{$0x1, %2, %1, %0|%0, %1, %2, 0x1}";
+}
[(set_attr "type" "sselog")
(set_attr "prefix_extra" "1")
(set_attr "length_immediate" "1")
(set_attr "prefix" "vex")
(set_attr "mode" "<sseinsnmode>")])
-(define_insn "vec_set_lo_<mode>"
- [(set (match_operand:VI4F_256 0 "register_operand" "=x")
+(define_insn "vec_set_lo_<mode><mask_name>"
+ [(set (match_operand:VI4F_256 0 "register_operand" "=v")
(vec_concat:VI4F_256
- (match_operand:<ssehalfvecmode> 2 "nonimmediate_operand" "xm")
+ (match_operand:<ssehalfvecmode> 2 "nonimmediate_operand" "vm")
(vec_select:<ssehalfvecmode>
- (match_operand:VI4F_256 1 "register_operand" "x")
+ (match_operand:VI4F_256 1 "register_operand" "v")
(parallel [(const_int 4) (const_int 5)
(const_int 6) (const_int 7)]))))]
"TARGET_AVX"
- "vinsert<i128>\t{$0x0, %2, %1, %0|%0, %1, %2, 0x0}"
+{
+ if (TARGET_AVX512VL)
+ return "vinsert<shuffletype>32x4\t{$0x0, %2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2, 0x0}";
+ else
+ return "vinsert<i128>\t{$0x0, %2, %1, %0|%0, %1, %2, 0x0}";
+}
[(set_attr "type" "sselog")
(set_attr "prefix_extra" "1")
(set_attr "length_immediate" "1")
(set_attr "prefix" "vex")
(set_attr "mode" "<sseinsnmode>")])
-(define_insn "vec_set_hi_<mode>"
- [(set (match_operand:VI4F_256 0 "register_operand" "=x")
+(define_insn "vec_set_hi_<mode><mask_name>"
+ [(set (match_operand:VI4F_256 0 "register_operand" "=v")
(vec_concat:VI4F_256
(vec_select:<ssehalfvecmode>
- (match_operand:VI4F_256 1 "register_operand" "x")
+ (match_operand:VI4F_256 1 "register_operand" "v")
(parallel [(const_int 0) (const_int 1)
(const_int 2) (const_int 3)]))
- (match_operand:<ssehalfvecmode> 2 "nonimmediate_operand" "xm")))]
+ (match_operand:<ssehalfvecmode> 2 "nonimmediate_operand" "vm")))]
"TARGET_AVX"
- "vinsert<i128>\t{$0x1, %2, %1, %0|%0, %1, %2, 0x1}"
+{
+ if (TARGET_AVX512VL)
+ return "vinsert<shuffletype>32x4\t{$0x1, %2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2, 0x1}";
+ else
+ return "vinsert<i128>\t{$0x1, %2, %1, %0|%0, %1, %2, 0x1}";
+}
[(set_attr "type" "sselog")
(set_attr "prefix_extra" "1")
(set_attr "length_immediate" "1")
@@ -14883,7 +16835,7 @@
})
(define_expand "vec_init<mode>"
- [(match_operand:VI48F_512 0 "register_operand")
+ [(match_operand:VI48F_I12B_512 0 "register_operand")
(match_operand 1)]
"TARGET_AVX512F"
{
@@ -14891,71 +16843,22 @@
DONE;
})
-(define_expand "avx2_extracti128"
- [(match_operand:V2DI 0 "nonimmediate_operand")
- (match_operand:V4DI 1 "register_operand")
- (match_operand:SI 2 "const_0_to_1_operand")]
- "TARGET_AVX2"
-{
- rtx (*insn)(rtx, rtx);
-
- switch (INTVAL (operands[2]))
- {
- case 0:
- insn = gen_vec_extract_lo_v4di;
- break;
- case 1:
- insn = gen_vec_extract_hi_v4di;
- break;
- default:
- gcc_unreachable ();
- }
-
- emit_insn (insn (operands[0], operands[1]));
- DONE;
-})
-
-(define_expand "avx2_inserti128"
- [(match_operand:V4DI 0 "register_operand")
- (match_operand:V4DI 1 "register_operand")
- (match_operand:V2DI 2 "nonimmediate_operand")
- (match_operand:SI 3 "const_0_to_1_operand")]
- "TARGET_AVX2"
-{
- rtx (*insn)(rtx, rtx, rtx);
-
- switch (INTVAL (operands[3]))
- {
- case 0:
- insn = gen_avx2_vec_set_lo_v4di;
- break;
- case 1:
- insn = gen_avx2_vec_set_hi_v4di;
- break;
- default:
- gcc_unreachable ();
- }
-
- emit_insn (insn (operands[0], operands[1], operands[2]));
- DONE;
-})
-
-(define_insn "<avx2_avx512f>_ashrv<mode><mask_name>"
- [(set (match_operand:VI48_AVX512F 0 "register_operand" "=v")
- (ashiftrt:VI48_AVX512F
- (match_operand:VI48_AVX512F 1 "register_operand" "v")
- (match_operand:VI48_AVX512F 2 "nonimmediate_operand" "vm")))]
+(define_insn "<avx2_avx512bw>_ashrv<mode><mask_name>"
+ [(set (match_operand:VI248_AVX512 0 "register_operand" "=v")
+ (ashiftrt:VI248_AVX512
+ (match_operand:VI248_AVX512 1 "register_operand" "v")
+ (match_operand:VI248_AVX512 2 "nonimmediate_operand" "vm")))]
"TARGET_AVX2 && <mask_mode512bit_condition>"
"vpsrav<ssemodesuffix>\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}"
[(set_attr "type" "sseishft")
(set_attr "prefix" "maybe_evex")
(set_attr "mode" "<sseinsnmode>")])
-(define_insn "<avx2_avx512f>_<shift_insn>v<mode><mask_name>"
- [(set (match_operand:VI48_AVX2_48_AVX512F 0 "register_operand" "=v")
- (any_lshift:VI48_AVX2_48_AVX512F
- (match_operand:VI48_AVX2_48_AVX512F 1 "register_operand" "v")
- (match_operand:VI48_AVX2_48_AVX512F 2 "nonimmediate_operand" "vm")))]
+(define_insn "<avx2_avx512bw>_<shift_insn>v<mode><mask_name>"
+ [(set (match_operand:VI248_AVX512BW 0 "register_operand" "=v")
+ (any_lshift:VI248_AVX512BW
+ (match_operand:VI248_AVX512BW 1 "register_operand" "v")
+ (match_operand:VI248_AVX512BW 2 "nonimmediate_operand" "vm")))]
"TARGET_AVX2 && <mask_mode512bit_condition>"
"vp<vshift>v<ssemodesuffix>\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}"
[(set_attr "type" "sseishft")
@@ -15006,35 +16909,35 @@
(set_attr "prefix" "maybe_evex")
(set_attr "mode" "<sseinsnmode>")])
-(define_insn "vcvtph2ps"
- [(set (match_operand:V4SF 0 "register_operand" "=x")
+(define_insn "vcvtph2ps<mask_name>"
+ [(set (match_operand:V4SF 0 "register_operand" "=v")
(vec_select:V4SF
- (unspec:V8SF [(match_operand:V8HI 1 "register_operand" "x")]
+ (unspec:V8SF [(match_operand:V8HI 1 "register_operand" "v")]
UNSPEC_VCVTPH2PS)
(parallel [(const_int 0) (const_int 1)
(const_int 2) (const_int 3)])))]
- "TARGET_F16C"
- "vcvtph2ps\t{%1, %0|%0, %1}"
+ "TARGET_F16C || TARGET_AVX512VL"
+ "vcvtph2ps\t{%1, %0<mask_operand2>|%0<mask_operand2>, %1}"
[(set_attr "type" "ssecvt")
- (set_attr "prefix" "vex")
+ (set_attr "prefix" "maybe_evex")
(set_attr "mode" "V4SF")])
-(define_insn "*vcvtph2ps_load"
- [(set (match_operand:V4SF 0 "register_operand" "=x")
+(define_insn "*vcvtph2ps_load<mask_name>"
+ [(set (match_operand:V4SF 0 "register_operand" "=v")
(unspec:V4SF [(match_operand:V4HI 1 "memory_operand" "m")]
UNSPEC_VCVTPH2PS))]
- "TARGET_F16C"
- "vcvtph2ps\t{%1, %0|%0, %1}"
+ "TARGET_F16C || TARGET_AVX512VL"
+ "vcvtph2ps\t{%1, %0<mask_operand2>|%0<mask_operand2>, %1}"
[(set_attr "type" "ssecvt")
(set_attr "prefix" "vex")
(set_attr "mode" "V8SF")])
-(define_insn "vcvtph2ps256"
- [(set (match_operand:V8SF 0 "register_operand" "=x")
- (unspec:V8SF [(match_operand:V8HI 1 "nonimmediate_operand" "xm")]
+(define_insn "vcvtph2ps256<mask_name>"
+ [(set (match_operand:V8SF 0 "register_operand" "=v")
+ (unspec:V8SF [(match_operand:V8HI 1 "nonimmediate_operand" "vm")]
UNSPEC_VCVTPH2PS))]
- "TARGET_F16C"
- "vcvtph2ps\t{%1, %0|%0, %1}"
+ "TARGET_F16C || TARGET_AVX512VL"
+ "vcvtph2ps\t{%1, %0<mask_operand2>|%0<mask_operand2>, %1}"
[(set_attr "type" "ssecvt")
(set_attr "prefix" "vex")
(set_attr "btver2_decode" "double")
@@ -15051,6 +16954,19 @@
(set_attr "prefix" "evex")
(set_attr "mode" "V16SF")])
+(define_expand "vcvtps2ph_mask"
+ [(set (match_operand:V8HI 0 "register_operand")
+ (vec_merge:V8HI
+ (vec_concat:V8HI
+ (unspec:V4HI [(match_operand:V4SF 1 "register_operand")
+ (match_operand:SI 2 "const_0_to_255_operand")]
+ UNSPEC_VCVTPS2PH)
+ (match_dup 5))
+ (match_operand:V8HI 3 "vector_move_operand")
+ (match_operand:QI 4 "register_operand")))]
+ "TARGET_AVX512VL"
+ "operands[5] = CONST0_RTX (V4HImode);")
+
(define_expand "vcvtps2ph"
[(set (match_operand:V8HI 0 "register_operand")
(vec_concat:V8HI
@@ -15061,39 +16977,39 @@
"TARGET_F16C"
"operands[3] = CONST0_RTX (V4HImode);")
-(define_insn "*vcvtps2ph"
- [(set (match_operand:V8HI 0 "register_operand" "=x")
+(define_insn "*vcvtps2ph<mask_name>"
+ [(set (match_operand:V8HI 0 "register_operand" "=v")
(vec_concat:V8HI
- (unspec:V4HI [(match_operand:V4SF 1 "register_operand" "x")
+ (unspec:V4HI [(match_operand:V4SF 1 "register_operand" "v")
(match_operand:SI 2 "const_0_to_255_operand" "N")]
UNSPEC_VCVTPS2PH)
(match_operand:V4HI 3 "const0_operand")))]
- "TARGET_F16C"
- "vcvtps2ph\t{%2, %1, %0|%0, %1, %2}"
+ "(TARGET_F16C && !<mask_applied>) || TARGET_AVX512VL"
+ "vcvtps2ph\t{%2, %1, %0<mask_operand4>|%0<mask_operand4>, %1, %2}"
[(set_attr "type" "ssecvt")
- (set_attr "prefix" "vex")
+ (set_attr "prefix" "maybe_evex")
(set_attr "mode" "V4SF")])
-(define_insn "*vcvtps2ph_store"
+(define_insn "*vcvtps2ph_store<mask_name>"
[(set (match_operand:V4HI 0 "memory_operand" "=m")
(unspec:V4HI [(match_operand:V4SF 1 "register_operand" "x")
(match_operand:SI 2 "const_0_to_255_operand" "N")]
UNSPEC_VCVTPS2PH))]
- "TARGET_F16C"
- "vcvtps2ph\t{%2, %1, %0|%0, %1, %2}"
+ "TARGET_F16C || TARGET_AVX512VL"
+ "vcvtps2ph\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}"
[(set_attr "type" "ssecvt")
- (set_attr "prefix" "vex")
+ (set_attr "prefix" "maybe_evex")
(set_attr "mode" "V4SF")])
-(define_insn "vcvtps2ph256"
+(define_insn "vcvtps2ph256<mask_name>"
[(set (match_operand:V8HI 0 "nonimmediate_operand" "=xm")
(unspec:V8HI [(match_operand:V8SF 1 "register_operand" "x")
(match_operand:SI 2 "const_0_to_255_operand" "N")]
UNSPEC_VCVTPS2PH))]
- "TARGET_F16C"
- "vcvtps2ph\t{%2, %1, %0|%0, %1, %2}"
+ "TARGET_F16C || TARGET_AVX512VL"
+ "vcvtps2ph\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}"
[(set_attr "type" "ssecvt")
- (set_attr "prefix" "vex")
+ (set_attr "prefix" "maybe_evex")
(set_attr "btver2_decode" "vector")
(set_attr "mode" "V8SF")])
@@ -15303,10 +17219,10 @@
(set_attr "prefix" "vex")
(set_attr "mode" "<sseinsnmode>")])
-(define_expand "avx512f_gathersi<mode>"
- [(parallel [(set (match_operand:VI48F_512 0 "register_operand")
- (unspec:VI48F_512
- [(match_operand:VI48F_512 1 "register_operand")
+(define_expand "<avx512>_gathersi<mode>"
+ [(parallel [(set (match_operand:VI48F 0 "register_operand")
+ (unspec:VI48F
+ [(match_operand:VI48F 1 "register_operand")
(match_operand:<avx512fmaskmode> 4 "register_operand")
(mem:<ssescalarmode>
(match_par_dup 6
@@ -15323,9 +17239,9 @@
})
(define_insn "*avx512f_gathersi<mode>"
- [(set (match_operand:VI48F_512 0 "register_operand" "=&v")
- (unspec:VI48F_512
- [(match_operand:VI48F_512 1 "register_operand" "0")
+ [(set (match_operand:VI48F 0 "register_operand" "=&v")
+ (unspec:VI48F
+ [(match_operand:VI48F 1 "register_operand" "0")
(match_operand:<avx512fmaskmode> 7 "register_operand" "2")
(match_operator:<ssescalarmode> 6 "vsib_mem_operator"
[(unspec:P
@@ -15342,8 +17258,8 @@
(set_attr "mode" "<sseinsnmode>")])
(define_insn "*avx512f_gathersi<mode>_2"
- [(set (match_operand:VI48F_512 0 "register_operand" "=&v")
- (unspec:VI48F_512
+ [(set (match_operand:VI48F 0 "register_operand" "=&v")
+ (unspec:VI48F
[(pc)
(match_operand:<avx512fmaskmode> 6 "register_operand" "1")
(match_operator:<ssescalarmode> 5 "vsib_mem_operator"
@@ -15361,9 +17277,9 @@
(set_attr "mode" "<sseinsnmode>")])
-(define_expand "avx512f_gatherdi<mode>"
- [(parallel [(set (match_operand:VI48F_512 0 "register_operand")
- (unspec:VI48F_512
+(define_expand "<avx512>_gatherdi<mode>"
+ [(parallel [(set (match_operand:VI48F 0 "register_operand")
+ (unspec:VI48F
[(match_operand:<VEC_GATHER_SRCDI> 1 "register_operand")
(match_operand:QI 4 "register_operand")
(mem:<ssescalarmode>
@@ -15381,8 +17297,8 @@
})
(define_insn "*avx512f_gatherdi<mode>"
- [(set (match_operand:VI48F_512 0 "register_operand" "=&v")
- (unspec:VI48F_512
+ [(set (match_operand:VI48F 0 "register_operand" "=&v")
+ (unspec:VI48F
[(match_operand:<VEC_GATHER_SRCDI> 1 "register_operand" "0")
(match_operand:QI 7 "register_operand" "2")
(match_operator:<ssescalarmode> 6 "vsib_mem_operator"
@@ -15400,8 +17316,8 @@
(set_attr "mode" "<sseinsnmode>")])
(define_insn "*avx512f_gatherdi<mode>_2"
- [(set (match_operand:VI48F_512 0 "register_operand" "=&v")
- (unspec:VI48F_512
+ [(set (match_operand:VI48F 0 "register_operand" "=&v")
+ (unspec:VI48F
[(pc)
(match_operand:QI 6 "register_operand" "1")
(match_operator:<ssescalarmode> 5 "vsib_mem_operator"
@@ -15415,22 +17331,27 @@
"TARGET_AVX512F"
{
if (<MODE>mode != <VEC_GATHER_SRCDI>mode)
- return "v<sseintprefix>gatherq<ssemodesuffix>\t{%5, %t0%{%1%}|%t0%{%1%}, %g5}";
+ {
+ if (GET_MODE_SIZE (<MODE>mode) != 64)
+ return "v<sseintprefix>gatherq<ssemodesuffix>\t{%5, %x0%{%1%}|%t0%{%1%}, %g5}";
+ else
+ return "v<sseintprefix>gatherq<ssemodesuffix>\t{%5, %t0%{%1%}|%t0%{%1%}, %g5}";
+ }
return "v<sseintprefix>gatherq<ssemodesuffix>\t{%5, %0%{%1%}|%0%{%1%}, %g5}";
}
[(set_attr "type" "ssemov")
(set_attr "prefix" "evex")
(set_attr "mode" "<sseinsnmode>")])
-(define_expand "avx512f_scattersi<mode>"
- [(parallel [(set (mem:VI48F_512
+(define_expand "<avx512>_scattersi<mode>"
+ [(parallel [(set (mem:VI48F
(match_par_dup 5
[(match_operand 0 "vsib_address_operand")
(match_operand:<VEC_GATHER_IDXSI> 2 "register_operand")
(match_operand:SI 4 "const1248_operand")]))
- (unspec:VI48F_512
+ (unspec:VI48F
[(match_operand:<avx512fmaskmode> 1 "register_operand")
- (match_operand:VI48F_512 3 "register_operand")]
+ (match_operand:VI48F 3 "register_operand")]
UNSPEC_SCATTER))
(clobber (match_scratch:<avx512fmaskmode> 6))])]
"TARGET_AVX512F"
@@ -15441,15 +17362,15 @@
})
(define_insn "*avx512f_scattersi<mode>"
- [(set (match_operator:VI48F_512 5 "vsib_mem_operator"
+ [(set (match_operator:VI48F 5 "vsib_mem_operator"
[(unspec:P
[(match_operand:P 0 "vsib_address_operand" "Tv")
(match_operand:<VEC_GATHER_IDXSI> 2 "register_operand" "v")
(match_operand:SI 4 "const1248_operand" "n")]
UNSPEC_VSIBADDR)])
- (unspec:VI48F_512
+ (unspec:VI48F
[(match_operand:<avx512fmaskmode> 6 "register_operand" "1")
- (match_operand:VI48F_512 3 "register_operand" "v")]
+ (match_operand:VI48F 3 "register_operand" "v")]
UNSPEC_SCATTER))
(clobber (match_scratch:<avx512fmaskmode> 1 "=&Yk"))]
"TARGET_AVX512F"
@@ -15458,13 +17379,13 @@
(set_attr "prefix" "evex")
(set_attr "mode" "<sseinsnmode>")])
-(define_expand "avx512f_scatterdi<mode>"
- [(parallel [(set (mem:VI48F_512
+(define_expand "<avx512>_scatterdi<mode>"
+ [(parallel [(set (mem:VI48F
(match_par_dup 5
[(match_operand 0 "vsib_address_operand")
- (match_operand:V8DI 2 "register_operand")
+ (match_operand:<VEC_GATHER_IDXDI> 2 "register_operand")
(match_operand:SI 4 "const1248_operand")]))
- (unspec:VI48F_512
+ (unspec:VI48F
[(match_operand:QI 1 "register_operand")
(match_operand:<VEC_GATHER_SRCDI> 3 "register_operand")]
UNSPEC_SCATTER))
@@ -15477,13 +17398,13 @@
})
(define_insn "*avx512f_scatterdi<mode>"
- [(set (match_operator:VI48F_512 5 "vsib_mem_operator"
+ [(set (match_operator:VI48F 5 "vsib_mem_operator"
[(unspec:P
[(match_operand:P 0 "vsib_address_operand" "Tv")
- (match_operand:V8DI 2 "register_operand" "v")
+ (match_operand:<VEC_GATHER_IDXDI> 2 "register_operand" "v")
(match_operand:SI 4 "const1248_operand" "n")]
UNSPEC_VSIBADDR)])
- (unspec:VI48F_512
+ (unspec:VI48F
[(match_operand:QI 6 "register_operand" "1")
(match_operand:<VEC_GATHER_SRCDI> 3 "register_operand" "v")]
UNSPEC_SCATTER))
@@ -15494,11 +17415,11 @@
(set_attr "prefix" "evex")
(set_attr "mode" "<sseinsnmode>")])
-(define_insn "avx512f_compress<mode>_mask"
- [(set (match_operand:VI48F_512 0 "register_operand" "=v")
- (unspec:VI48F_512
- [(match_operand:VI48F_512 1 "register_operand" "v")
- (match_operand:VI48F_512 2 "vector_move_operand" "0C")
+(define_insn "<avx512>_compress<mode>_mask"
+ [(set (match_operand:VI48F 0 "register_operand" "=v")
+ (unspec:VI48F
+ [(match_operand:VI48F 1 "register_operand" "v")
+ (match_operand:VI48F 2 "vector_move_operand" "0C")
(match_operand:<avx512fmaskmode> 3 "register_operand" "Yk")]
UNSPEC_COMPRESS))]
"TARGET_AVX512F"
@@ -15507,10 +17428,10 @@
(set_attr "prefix" "evex")
(set_attr "mode" "<sseinsnmode>")])
-(define_insn "avx512f_compressstore<mode>_mask"
- [(set (match_operand:VI48F_512 0 "memory_operand" "=m")
- (unspec:VI48F_512
- [(match_operand:VI48F_512 1 "register_operand" "x")
+(define_insn "<avx512>_compressstore<mode>_mask"
+ [(set (match_operand:VI48F 0 "memory_operand" "=m")
+ (unspec:VI48F
+ [(match_operand:VI48F 1 "register_operand" "x")
(match_dup 0)
(match_operand:<avx512fmaskmode> 2 "register_operand" "Yk")]
UNSPEC_COMPRESS_STORE))]
@@ -15521,21 +17442,21 @@
(set_attr "memory" "store")
(set_attr "mode" "<sseinsnmode>")])
-(define_expand "avx512f_expand<mode>_maskz"
- [(set (match_operand:VI48F_512 0 "register_operand")
- (unspec:VI48F_512
- [(match_operand:VI48F_512 1 "nonimmediate_operand")
- (match_operand:VI48F_512 2 "vector_move_operand")
+(define_expand "<avx512>_expand<mode>_maskz"
+ [(set (match_operand:VI48F 0 "register_operand")
+ (unspec:VI48F
+ [(match_operand:VI48F 1 "nonimmediate_operand")
+ (match_operand:VI48F 2 "vector_move_operand")
(match_operand:<avx512fmaskmode> 3 "register_operand")]
UNSPEC_EXPAND))]
"TARGET_AVX512F"
"operands[2] = CONST0_RTX (<MODE>mode);")
-(define_insn "avx512f_expand<mode>_mask"
- [(set (match_operand:VI48F_512 0 "register_operand" "=v,v")
- (unspec:VI48F_512
- [(match_operand:VI48F_512 1 "nonimmediate_operand" "v,m")
- (match_operand:VI48F_512 2 "vector_move_operand" "0C,0C")
+(define_insn "<avx512>_expand<mode>_mask"
+ [(set (match_operand:VI48F 0 "register_operand" "=v,v")
+ (unspec:VI48F
+ [(match_operand:VI48F 1 "nonimmediate_operand" "v,m")
+ (match_operand:VI48F 2 "vector_move_operand" "0C,0C")
(match_operand:<avx512fmaskmode> 3 "register_operand" "Yk,Yk")]
UNSPEC_EXPAND))]
"TARGET_AVX512F"
@@ -15545,10 +17466,67 @@
(set_attr "memory" "none,load")
(set_attr "mode" "<sseinsnmode>")])
-(define_insn "avx512f_getmant<mode><mask_name><round_saeonly_name>"
- [(set (match_operand:VF_512 0 "register_operand" "=v")
- (unspec:VF_512
- [(match_operand:VF_512 1 "<round_saeonly_nimm_predicate>" "<round_saeonly_constraint>")
+(define_insn "avx512dq_rangep<mode><mask_name><round_saeonly_name>"
+ [(set (match_operand:VF_AVX512VL 0 "register_operand" "=v")
+ (unspec:VF_AVX512VL
+ [(match_operand:VF_AVX512VL 1 "register_operand" "v")
+ (match_operand:VF_AVX512VL 2 "<round_saeonly_nimm_predicate>" "<round_saeonly_constraint>")
+ (match_operand:SI 3 "const_0_to_15_operand")]
+ UNSPEC_RANGE))]
+ "TARGET_AVX512DQ && <round_saeonly_mode512bit_condition>"
+ "vrange<ssemodesuffix>\t{<round_saeonly_mask_op4>%3, %2, %1, %0<mask_operand4>|%0<mask_operand4>, %1, %2, %3<round_saeonly_mask_op4>}"
+ [(set_attr "type" "sse")
+ (set_attr "prefix" "evex")
+ (set_attr "mode" "<MODE>")])
+
+(define_insn "avx512dq_ranges<mode><round_saeonly_name>"
+ [(set (match_operand:VF_128 0 "register_operand" "=v")
+ (vec_merge:VF_128
+ (unspec:VF_128
+ [(match_operand:VF_128 1 "register_operand" "v")
+ (match_operand:VF_128 2 "<round_saeonly_nimm_predicate>" "<round_saeonly_constraint>")
+ (match_operand:SI 3 "const_0_to_15_operand")]
+ UNSPEC_RANGE_SCALAR)
+ (match_dup 1)
+ (const_int 1)))]
+ "TARGET_AVX512DQ"
+ "vrange<ssescalarmodesuffix>\t{<round_saeonly_op4>%3, %2, %1, %0|%0, %1, %2, %3<round_saeonly_op4>}"
+ [(set_attr "type" "sse")
+ (set_attr "prefix" "evex")
+ (set_attr "mode" "<MODE>")])
+
+(define_insn "avx512dq_fpclass<mode><mask_scalar_merge_name>"
+ [(set (match_operand:<avx512fmaskmode> 0 "register_operand" "=Yk")
+ (unspec:<avx512fmaskmode>
+ [(match_operand:VF_AVX512VL 1 "register_operand" "v")
+ (match_operand:QI 2 "const_0_to_255_operand" "n")]
+ UNSPEC_FPCLASS))]
+ "TARGET_AVX512DQ"
+ "vfpclass<ssemodesuffix>\t{%2, %1, %0<mask_scalar_merge_operand3>|%0<mask_scalar_merge_operand3>, %1, %2}";
+ [(set_attr "type" "sse")
+ (set_attr "length_immediate" "1")
+ (set_attr "prefix" "evex")
+ (set_attr "mode" "<MODE>")])
+
+(define_insn "avx512dq_vmfpclass<mode>"
+ [(set (match_operand:<avx512fmaskmode> 0 "register_operand" "=Yk")
+ (and:<avx512fmaskmode>
+ (unspec:<avx512fmaskmode>
+ [(match_operand:VF_128 1 "register_operand" "v")
+ (match_operand:QI 2 "const_0_to_255_operand" "n")]
+ UNSPEC_FPCLASS_SCALAR)
+ (const_int 1)))]
+ "TARGET_AVX512DQ"
+ "vfpclass<ssescalarmodesuffix>\t{%2, %1, %0|%0, %1, %2}";
+ [(set_attr "type" "sse")
+ (set_attr "length_immediate" "1")
+ (set_attr "prefix" "evex")
+ (set_attr "mode" "<MODE>")])
+
+(define_insn "<avx512>_getmant<mode><mask_name><round_saeonly_name>"
+ [(set (match_operand:VF_AVX512VL 0 "register_operand" "=v")
+ (unspec:VF_AVX512VL
+ [(match_operand:VF_AVX512VL 1 "nonimmediate_operand" "<round_saeonly_constraint>")
(match_operand:SI 2 "const_0_to_15_operand")]
UNSPEC_GETMANT))]
"TARGET_AVX512F"
@@ -15556,7 +17534,7 @@
[(set_attr "prefix" "evex")
(set_attr "mode" "<MODE>")])
-(define_insn "avx512f_getmant<mode><round_saeonly_name>"
+(define_insn "avx512f_vgetmant<mode><round_saeonly_name>"
[(set (match_operand:VF_128 0 "register_operand" "=v")
(vec_merge:VF_128
(unspec:VF_128
@@ -15571,10 +17549,25 @@
[(set_attr "prefix" "evex")
(set_attr "mode" "<ssescalarmode>")])
+(define_insn "<mask_codefor>avx512bw_dbpsadbw<mode><mask_name>"
+ [(set (match_operand:VI2_AVX512F 0 "register_operand" "=v")
+ (unspec:VI2_AVX512F
+ [(match_operand:<dbpsadbwmode> 1 "register_operand" "v")
+ (match_operand:<dbpsadbwmode> 2 "nonimmediate_operand" "vm")
+ (match_operand:SI 3 "const_0_to_255_operand")]
+ UNSPEC_DBPSADBW))]
+ "TARGET_AVX512BW && <mask_mode512bit_condition>"
+ "vdbpsadbw\t{%3, %2, %1, %0<mask_operand4>|%0<mask_operand4>, %1, %2, %3}"
+ [(set_attr "isa" "avx")
+ (set_attr "type" "sselog1")
+ (set_attr "length_immediate" "1")
+ (set_attr "prefix" "evex")
+ (set_attr "mode" "<sseinsnmode>")])
+
(define_insn "clz<mode>2<mask_name>"
- [(set (match_operand:VI48_512 0 "register_operand" "=v")
- (clz:VI48_512
- (match_operand:VI48_512 1 "nonimmediate_operand" "vm")))]
+ [(set (match_operand:VI48_AVX512VL 0 "register_operand" "=v")
+ (clz:VI48_AVX512VL
+ (match_operand:VI48_AVX512VL 1 "nonimmediate_operand" "vm")))]
"TARGET_AVX512CD"
"vplzcnt<ssemodesuffix>\t{%1, %0<mask_operand2>|%0<mask_operand2>, %1}"
[(set_attr "type" "sse")
@@ -15582,9 +17575,9 @@
(set_attr "mode" "<sseinsnmode>")])
(define_insn "<mask_codefor>conflict<mode><mask_name>"
- [(set (match_operand:VI48_512 0 "register_operand" "=v")
- (unspec:VI48_512
- [(match_operand:VI48_512 1 "nonimmediate_operand" "vm")]
+ [(set (match_operand:VI48_AVX512VL 0 "register_operand" "=v")
+ (unspec:VI48_AVX512VL
+ [(match_operand:VI48_AVX512VL 1 "nonimmediate_operand" "vm")]
UNSPEC_CONFLICT))]
"TARGET_AVX512CD"
"vpconflict<ssemodesuffix>\t{%1, %0<mask_operand2>|%0<mask_operand2>, %1}"
diff --git a/gcc/config/i386/subst.md b/gcc/config/i386/subst.md
index 1654cbae6ab..8826533cf33 100644
--- a/gcc/config/i386/subst.md
+++ b/gcc/config/i386/subst.md
@@ -20,8 +20,8 @@
;; Some iterators for extending subst as much as possible
;; All vectors (Use it for destination)
(define_mode_iterator SUBST_V
- [V16QI
- V16HI V8HI
+ [V64QI V32QI V16QI
+ V32HI V16HI V8HI
V16SI V8SI V4SI
V8DI V4DI V2DI
V16SF V8SF V4SF
@@ -31,8 +31,8 @@
[QI HI SI DI])
(define_mode_iterator SUBST_A
- [V16QI
- V16HI V8HI
+ [V64QI V32QI V16QI
+ V32HI V16HI V8HI
V16SI V8SI V4SI
V8DI V4DI V2DI
V16SF V8SF V4SF
@@ -47,16 +47,20 @@
(define_subst_attr "mask_operand3_1" "mask" "" "%%{%%4%%}%%N3") ;; for sprintf
(define_subst_attr "mask_operand4" "mask" "" "%{%5%}%N4")
(define_subst_attr "mask_operand6" "mask" "" "%{%7%}%N6")
+(define_subst_attr "mask_operand7" "mask" "" "%{%8%}%N7")
+(define_subst_attr "mask_operand10" "mask" "" "%{%11%}%N10")
(define_subst_attr "mask_operand11" "mask" "" "%{%12%}%N11")
(define_subst_attr "mask_operand18" "mask" "" "%{%19%}%N18")
(define_subst_attr "mask_operand19" "mask" "" "%{%20%}%N19")
(define_subst_attr "mask_codefor" "mask" "*" "")
-(define_subst_attr "mask_mode512bit_condition" "mask" "1" "(<MODE_SIZE> == 64)")
+(define_subst_attr "mask_operand_arg34" "mask" "" ", operands[3], operands[4]")
+(define_subst_attr "mask_mode512bit_condition" "mask" "1" "(GET_MODE_SIZE (GET_MODE (operands[0])) == 64 || TARGET_AVX512VL)")
(define_subst_attr "store_mask_constraint" "mask" "vm" "v")
(define_subst_attr "store_mask_predicate" "mask" "nonimmediate_operand" "register_operand")
(define_subst_attr "mask_prefix" "mask" "vex" "evex")
(define_subst_attr "mask_prefix2" "mask" "maybe_vex" "evex")
(define_subst_attr "mask_prefix3" "mask" "orig,vex" "evex")
+(define_subst_attr "mask_expand_op3" "mask" "3" "5")
(define_subst "mask"
[(set (match_operand:SUBST_V 0)
@@ -85,7 +89,7 @@
(define_subst_attr "sd_mask_op4" "sd" "" "%{%5%}%N4")
(define_subst_attr "sd_mask_op5" "sd" "" "%{%6%}%N5")
(define_subst_attr "sd_mask_codefor" "sd" "*" "")
-(define_subst_attr "sd_mask_mode512bit_condition" "sd" "1" "(<MODE_SIZE> == 64)")
+(define_subst_attr "sd_mask_mode512bit_condition" "sd" "1" "(<MODE_SIZE> == 64 || TARGET_AVX512VL)")
(define_subst "sd"
[(set (match_operand:SUBST_V 0)
@@ -101,6 +105,7 @@
(define_subst_attr "round_name" "round" "" "_round")
(define_subst_attr "round_mask_operand2" "mask" "%R2" "%R4")
(define_subst_attr "round_mask_operand3" "mask" "%R3" "%R5")
+(define_subst_attr "round_mask_operand4" "mask" "%R4" "%R6")
(define_subst_attr "round_sd_mask_operand4" "sd" "%R4" "%R6")
(define_subst_attr "round_op2" "round" "" "%R2")
(define_subst_attr "round_op3" "round" "" "%R3")
@@ -109,15 +114,19 @@
(define_subst_attr "round_op6" "round" "" "%R6")
(define_subst_attr "round_mask_op2" "round" "" "<round_mask_operand2>")
(define_subst_attr "round_mask_op3" "round" "" "<round_mask_operand3>")
-(define_subst_attr "round_mask_scalar_op3" "round" "" "<round_mask_scalar_operand3>")
+(define_subst_attr "round_mask_op4" "round" "" "<round_mask_operand4>")
(define_subst_attr "round_sd_mask_op4" "round" "" "<round_sd_mask_operand4>")
(define_subst_attr "round_constraint" "round" "vm" "v")
(define_subst_attr "round_constraint2" "round" "m" "v")
(define_subst_attr "round_constraint3" "round" "rm" "r")
(define_subst_attr "round_nimm_predicate" "round" "nonimmediate_operand" "register_operand")
(define_subst_attr "round_prefix" "round" "vex" "evex")
-(define_subst_attr "round_mode512bit_condition" "round" "1" "(<MODE>mode == V16SFmode || <MODE>mode == V8DFmode)")
-(define_subst_attr "round_modev4sf_condition" "round" "1" "(<MODE>mode == V4SFmode)")
+(define_subst_attr "round_mode512bit_condition" "round" "1" "(GET_MODE (operands[0]) == V16SFmode
+ || GET_MODE (operands[0]) == V8DFmode
+ || GET_MODE (operands[0]) == V8DImode
+ || GET_MODE (operands[0]) == V16SImode)")
+(define_subst_attr "round_modev8sf_condition" "round" "1" "(GET_MODE (operands[0]) == V8SFmode)")
+(define_subst_attr "round_modev4sf_condition" "round" "1" "(GET_MODE (operands[0]) == V4SFmode)")
(define_subst_attr "round_codefor" "round" "*" "")
(define_subst_attr "round_opnum" "round" "5" "6")
@@ -133,6 +142,7 @@
(define_subst_attr "round_saeonly_name" "round_saeonly" "" "_round")
(define_subst_attr "round_saeonly_mask_operand2" "mask" "%r2" "%r4")
(define_subst_attr "round_saeonly_mask_operand3" "mask" "%r3" "%r5")
+(define_subst_attr "round_saeonly_mask_operand4" "mask" "%r4" "%r6")
(define_subst_attr "round_saeonly_mask_scalar_merge_operand4" "mask_scalar_merge" "%r4" "%r5")
(define_subst_attr "round_saeonly_sd_mask_operand5" "sd" "%r5" "%r7")
(define_subst_attr "round_saeonly_op2" "round_saeonly" "" "%r2")
@@ -143,12 +153,17 @@
(define_subst_attr "round_saeonly_prefix" "round_saeonly" "vex" "evex")
(define_subst_attr "round_saeonly_mask_op2" "round_saeonly" "" "<round_saeonly_mask_operand2>")
(define_subst_attr "round_saeonly_mask_op3" "round_saeonly" "" "<round_saeonly_mask_operand3>")
+(define_subst_attr "round_saeonly_mask_op4" "round_saeonly" "" "<round_saeonly_mask_operand4>")
(define_subst_attr "round_saeonly_mask_scalar_merge_op4" "round_saeonly" "" "<round_saeonly_mask_scalar_merge_operand4>")
(define_subst_attr "round_saeonly_sd_mask_op5" "round_saeonly" "" "<round_saeonly_sd_mask_operand5>")
(define_subst_attr "round_saeonly_constraint" "round_saeonly" "vm" "v")
(define_subst_attr "round_saeonly_constraint2" "round_saeonly" "m" "v")
(define_subst_attr "round_saeonly_nimm_predicate" "round_saeonly" "nonimmediate_operand" "register_operand")
-(define_subst_attr "round_saeonly_mode512bit_condition" "round_saeonly" "1" "(<MODE>mode == V16SFmode || <MODE>mode == V8DFmode)")
+(define_subst_attr "round_saeonly_mode512bit_condition" "round_saeonly" "1" "(<MODE>mode == V16SFmode
+ || <MODE>mode == V8DFmode
+ || <MODE>mode == V8DImode
+ || <MODE>mode == V16SImode)")
+(define_subst_attr "round_saeonly_modev8sf_condition" "round_saeonly" "1" "(<MODE>mode == V8SFmode)")
(define_subst "round_saeonly"
[(set (match_operand:SUBST_A 0)
@@ -196,3 +211,19 @@
(match_dup 4)
(match_dup 5)
(unspec [(match_operand:SI 6 "const48_operand")] UNSPEC_EMBEDDED_ROUNDING)])
+
+(define_subst_attr "mask_expand4_name" "mask_expand4" "" "_mask")
+(define_subst_attr "mask_expand4_args" "mask_expand4" "" ", operands[4], operands[5]")
+
+(define_subst "mask_expand4"
+ [(match_operand:SUBST_V 0)
+ (match_operand:SUBST_V 1)
+ (match_operand:SUBST_V 2)
+ (match_operand:SI 3)]
+ "TARGET_AVX512VL"
+ [(match_dup 0)
+ (match_dup 1)
+ (match_dup 2)
+ (match_dup 3)
+ (match_operand:SUBST_V 4 "vector_move_operand")
+ (match_operand:<avx512fmaskmode> 5 "register_operand")])
diff --git a/gcc/testsuite/g++.dg/other/i386-2.C b/gcc/testsuite/g++.dg/other/i386-2.C
index b205c3d466d..d642accd441 100644
--- a/gcc/testsuite/g++.dg/other/i386-2.C
+++ b/gcc/testsuite/g++.dg/other/i386-2.C
@@ -1,5 +1,5 @@
/* { dg-do compile { target i?86-*-* x86_64-*-* } } */
-/* { dg-options "-O -pedantic-errors -march=k8 -msse4a -m3dnow -mavx -mavx2 -mfma4 -mxop -maes -mpclmul -mpopcnt -mabm -mlzcnt -mbmi -mbmi2 -mtbm -mlwp -mfsgsbase -mrdrnd -mf16c -mfma -mrtm -mrdseed -mprfchw -madx -mfxsr -mxsaveopt -mavx512f -mavx512er -mavx512cd -mavx512pf -msha -mprefetchwt1 -mxsavec -mxsaves -mclflushopt" } */
+/* { dg-options "-O -pedantic-errors -march=k8 -msse4a -m3dnow -mavx -mavx2 -mfma4 -mxop -maes -mpclmul -mpopcnt -mabm -mlzcnt -mbmi -mbmi2 -mtbm -mlwp -mfsgsbase -mrdrnd -mf16c -mfma -mrtm -mrdseed -mprfchw -madx -mfxsr -mxsaveopt -mavx512f -mavx512er -mavx512cd -mavx512pf -msha -mprefetchwt1 -mxsavec -mxsaves -mclflushopt -mavx512dq -mavx512bw -mavx512vl" } */
/* Test that {,x,e,p,t,s,w,a,b,i}mmintrin.h, mm3dnow.h, fma4intrin.h,
xopintrin.h, abmintrin.h, bmiintrin.h, tbmintrin.h, lwpintrin.h,
diff --git a/gcc/testsuite/g++.dg/other/i386-3.C b/gcc/testsuite/g++.dg/other/i386-3.C
index aa94a481327..6d3e24f8262 100644
--- a/gcc/testsuite/g++.dg/other/i386-3.C
+++ b/gcc/testsuite/g++.dg/other/i386-3.C
@@ -1,5 +1,5 @@
/* { dg-do compile { target i?86-*-* x86_64-*-* } } */
-/* { dg-options "-O -fkeep-inline-functions -march=k8 -msse4a -m3dnow -mavx -mavx2 -mfma4 -mxop -maes -mpclmul -mpopcnt -mabm -mlzcnt -mbmi -mbmi2 -mtbm -mlwp -mfsgsbase -mrdrnd -mf16c -mfma -mrtm -mrdseed -mprfchw -madx -mfxsr -mxsaveopt -mavx512f -mavx512er -mavx512cd -mavx512pf -msha -mprefetchwt1 -mxsavec -mxsaves -mclflushopt" } */
+/* { dg-options "-O -fkeep-inline-functions -march=k8 -msse4a -m3dnow -mavx -mavx2 -mfma4 -mxop -maes -mpclmul -mpopcnt -mabm -mlzcnt -mbmi -mbmi2 -mtbm -mlwp -mfsgsbase -mrdrnd -mf16c -mfma -mrtm -mrdseed -mprfchw -madx -mfxsr -mxsaveopt -mavx512f -mavx512er -mavx512cd -mavx512pf -msha -mprefetchwt1 -mxsavec -mxsaves -mclflushopt -mavx512dq -mavx512bw -mavx512vl" } */
/* Test that {,x,e,p,t,s,w,a,b,i}mmintrin.h, mm3dnow.h, fma4intrin.h,
xopintrin.h, abmintrin.h, bmiintrin.h, tbmintrin.h, lwpintrin.h,
diff --git a/gcc/testsuite/gcc.target/i386/avx-1.c b/gcc/testsuite/gcc.target/i386/avx-1.c
index 8f28921ca86..fd390f9157c 100644
--- a/gcc/testsuite/gcc.target/i386/avx-1.c
+++ b/gcc/testsuite/gcc.target/i386/avx-1.c
@@ -370,6 +370,210 @@
/* shaintrin.h */
#define __builtin_ia32_sha1rnds4(A, B, C) __builtin_ia32_sha1rnds4(A, B, 1)
+/* TODO split */
+#define __builtin_ia32_pslldq512(A, B) __builtin_ia32_pslldq512(A, 8)
+#define __builtin_ia32_psrldq512(A, B) __builtin_ia32_psrldq512(A, 8)
+#define __builtin_ia32_alignd128_mask(A, B, F, D, E) __builtin_ia32_alignd128_mask(A, B, 1, D, E)
+#define __builtin_ia32_alignd256_mask(A, B, F, D, E) __builtin_ia32_alignd256_mask(A, B, 1, D, E)
+#define __builtin_ia32_alignq128_mask(A, B, F, D, E) __builtin_ia32_alignq128_mask(A, B, 1, D, E)
+#define __builtin_ia32_alignq256_mask(A, B, F, D, E) __builtin_ia32_alignq256_mask(A, B, 1, D, E)
+#define __builtin_ia32_cmpb128_mask(A, B, E, D) __builtin_ia32_cmpb128_mask(A, B, 1, D)
+#define __builtin_ia32_cmpb256_mask(A, B, E, D) __builtin_ia32_cmpb256_mask(A, B, 1, D)
+#define __builtin_ia32_cmpb512_mask(A, B, E, D) __builtin_ia32_cmpb512_mask(A, B, 1, D)
+#define __builtin_ia32_cmpd128_mask(A, B, E, D) __builtin_ia32_cmpd128_mask(A, B, 1, D)
+#define __builtin_ia32_cmpd256_mask(A, B, E, D) __builtin_ia32_cmpd256_mask(A, B, 1, D)
+#define __builtin_ia32_cmppd128_mask(A, B, E, D) __builtin_ia32_cmppd128_mask(A, B, 1, D)
+#define __builtin_ia32_cmppd256_mask(A, B, E, D) __builtin_ia32_cmppd256_mask(A, B, 1, D)
+#define __builtin_ia32_cmpps128_mask(A, B, E, D) __builtin_ia32_cmpps128_mask(A, B, 1, D)
+#define __builtin_ia32_cmpps256_mask(A, B, E, D) __builtin_ia32_cmpps256_mask(A, B, 1, D)
+#define __builtin_ia32_cmpq128_mask(A, B, E, D) __builtin_ia32_cmpq128_mask(A, B, 1, D)
+#define __builtin_ia32_cmpq256_mask(A, B, E, D) __builtin_ia32_cmpq256_mask(A, B, 1, D)
+#define __builtin_ia32_cmpw128_mask(A, B, E, D) __builtin_ia32_cmpw128_mask(A, B, 1, D)
+#define __builtin_ia32_cmpw256_mask(A, B, E, D) __builtin_ia32_cmpw256_mask(A, B, 1, D)
+#define __builtin_ia32_cmpw512_mask(A, B, E, D) __builtin_ia32_cmpw512_mask(A, B, 1, D)
+#define __builtin_ia32_cvtpd2qq512_mask(A, B, C, D) __builtin_ia32_cvtpd2qq512_mask(A, B, C, 8)
+#define __builtin_ia32_cvtpd2uqq512_mask(A, B, C, D) __builtin_ia32_cvtpd2uqq512_mask(A, B, C, 8)
+#define __builtin_ia32_cvtps2qq512_mask(A, B, C, D) __builtin_ia32_cvtps2qq512_mask(A, B, C, 8)
+#define __builtin_ia32_cvtps2uqq512_mask(A, B, C, D) __builtin_ia32_cvtps2uqq512_mask(A, B, C, 8)
+#define __builtin_ia32_cvtqq2pd512_mask(A, B, C, D) __builtin_ia32_cvtqq2pd512_mask(A, B, C, 8)
+#define __builtin_ia32_cvtqq2ps512_mask(A, B, C, D) __builtin_ia32_cvtqq2ps512_mask(A, B, C, 8)
+#define __builtin_ia32_cvttpd2qq512_mask(A, B, C, D) __builtin_ia32_cvttpd2qq512_mask(A, B, C, 8)
+#define __builtin_ia32_cvttpd2uqq512_mask(A, B, C, D) __builtin_ia32_cvttpd2uqq512_mask(A, B, C, 8)
+#define __builtin_ia32_cvttps2qq512_mask(A, B, C, D) __builtin_ia32_cvttps2qq512_mask(A, B, C, 8)
+#define __builtin_ia32_cvttps2uqq512_mask(A, B, C, D) __builtin_ia32_cvttps2uqq512_mask(A, B, C, 8)
+#define __builtin_ia32_cvtuqq2pd512_mask(A, B, C, D) __builtin_ia32_cvtuqq2pd512_mask(A, B, C, 8)
+#define __builtin_ia32_cvtuqq2ps512_mask(A, B, C, D) __builtin_ia32_cvtuqq2ps512_mask(A, B, C, 8)
+#define __builtin_ia32_dbpsadbw128_mask(A, B, F, D, E) __builtin_ia32_dbpsadbw128_mask(A, B, 1, D, E)
+#define __builtin_ia32_dbpsadbw256_mask(A, B, F, D, E) __builtin_ia32_dbpsadbw256_mask(A, B, 1, D, E)
+#define __builtin_ia32_dbpsadbw512_mask(A, B, F, D, E) __builtin_ia32_dbpsadbw512_mask(A, B, 1, D, E)
+#define __builtin_ia32_extractf32x4_256_mask(A, E, C, D) __builtin_ia32_extractf32x4_256_mask(A, 1, C, D)
+#define __builtin_ia32_extractf32x8_mask(A, E, C, D) __builtin_ia32_extractf32x8_mask(A, 1, C, D)
+#define __builtin_ia32_extractf64x2_256_mask(A, E, C, D) __builtin_ia32_extractf64x2_256_mask(A, 1, C, D)
+#define __builtin_ia32_extractf64x2_512_mask(A, E, C, D) __builtin_ia32_extractf64x2_512_mask(A, 1, C, D)
+#define __builtin_ia32_extracti32x4_256_mask(A, E, C, D) __builtin_ia32_extracti32x4_256_mask(A, 1, C, D)
+#define __builtin_ia32_extracti32x8_mask(A, E, C, D) __builtin_ia32_extracti32x8_mask(A, 1, C, D)
+#define __builtin_ia32_extracti64x2_256_mask(A, E, C, D) __builtin_ia32_extracti64x2_256_mask(A, 1, C, D)
+#define __builtin_ia32_extracti64x2_512_mask(A, E, C, D) __builtin_ia32_extracti64x2_512_mask(A, 1, C, D)
+#define __builtin_ia32_fixupimmpd128(A, B, C, E) __builtin_ia32_fixupimmpd128(A, B, C, 1)
+#define __builtin_ia32_fixupimmpd128_mask(A, B, C, F, E) __builtin_ia32_fixupimmpd128_mask(A, B, C, 1, E)
+#define __builtin_ia32_fixupimmpd128_maskz(A, B, C, F, E) __builtin_ia32_fixupimmpd128_maskz(A, B, C, 1, E)
+#define __builtin_ia32_fixupimmpd256(A, B, C, E) __builtin_ia32_fixupimmpd256(A, B, C, 1)
+#define __builtin_ia32_fixupimmpd256_mask(A, B, C, F, E) __builtin_ia32_fixupimmpd256_mask(A, B, C, 1, E)
+#define __builtin_ia32_fixupimmpd256_maskz(A, B, C, F, E) __builtin_ia32_fixupimmpd256_maskz(A, B, C, 1, E)
+#define __builtin_ia32_fixupimmps128(A, B, C, E) __builtin_ia32_fixupimmps128(A, B, C, 1)
+#define __builtin_ia32_fixupimmps128_mask(A, B, C, F, E) __builtin_ia32_fixupimmps128_mask(A, B, C, 1, E)
+#define __builtin_ia32_fixupimmps128_maskz(A, B, C, F, E) __builtin_ia32_fixupimmps128_maskz(A, B, C, 1, E)
+#define __builtin_ia32_fixupimmps256(A, B, C, E) __builtin_ia32_fixupimmps256(A, B, C, 1)
+#define __builtin_ia32_fixupimmps256_mask(A, B, C, F, E) __builtin_ia32_fixupimmps256_mask(A, B, C, 1, E)
+#define __builtin_ia32_fixupimmps256_maskz(A, B, C, F, E) __builtin_ia32_fixupimmps256_maskz(A, B, C, 1, E)
+#define __builtin_ia32_fpclasspd128_mask(A, D, C) __builtin_ia32_fpclasspd128_mask(A, 1, C)
+#define __builtin_ia32_fpclasspd256_mask(A, D, C) __builtin_ia32_fpclasspd256_mask(A, 1, C)
+#define __builtin_ia32_fpclasspd512_mask(A, D, C) __builtin_ia32_fpclasspd512_mask(A, 1, C)
+#define __builtin_ia32_fpclassps128_mask(A, D, C) __builtin_ia32_fpclassps128_mask(A, 1, C)
+#define __builtin_ia32_fpclassps256_mask(A, D, C) __builtin_ia32_fpclassps256_mask(A, 1, C)
+#define __builtin_ia32_fpclassps512_mask(A, D, C) __builtin_ia32_fpclassps512_mask(A, 1, C)
+#define __builtin_ia32_fpclasssd(A, D) __builtin_ia32_fpclasssd(A, 1)
+#define __builtin_ia32_fpclassss(A, D) __builtin_ia32_fpclassss(A, 1)
+#define __builtin_ia32_gather3div2df(A, B, C, D, F) __builtin_ia32_gather3div2df(A, B, C, D, 1)
+#define __builtin_ia32_gather3div2di(A, B, C, D, F) __builtin_ia32_gather3div2di(A, B, C, D, 1)
+#define __builtin_ia32_gather3div4df(A, B, C, D, F) __builtin_ia32_gather3div4df(A, B, C, D, 1)
+#define __builtin_ia32_gather3div4di(A, B, C, D, F) __builtin_ia32_gather3div4di(A, B, C, D, 1)
+#define __builtin_ia32_gather3div4sf(A, B, C, D, F) __builtin_ia32_gather3div4sf(A, B, C, D, 1)
+#define __builtin_ia32_gather3div4si(A, B, C, D, F) __builtin_ia32_gather3div4si(A, B, C, D, 1)
+#define __builtin_ia32_gather3div8sf(A, B, C, D, F) __builtin_ia32_gather3div8sf(A, B, C, D, 1)
+#define __builtin_ia32_gather3div8si(A, B, C, D, F) __builtin_ia32_gather3div8si(A, B, C, D, 1)
+#define __builtin_ia32_gather3siv2df(A, B, C, D, F) __builtin_ia32_gather3siv2df(A, B, C, D, 1)
+#define __builtin_ia32_gather3siv2di(A, B, C, D, F) __builtin_ia32_gather3siv2di(A, B, C, D, 1)
+#define __builtin_ia32_gather3siv4df(A, B, C, D, F) __builtin_ia32_gather3siv4df(A, B, C, D, 1)
+#define __builtin_ia32_gather3siv4di(A, B, C, D, F) __builtin_ia32_gather3siv4di(A, B, C, D, 1)
+#define __builtin_ia32_gather3siv4sf(A, B, C, D, F) __builtin_ia32_gather3siv4sf(A, B, C, D, 1)
+#define __builtin_ia32_gather3siv4si(A, B, C, D, F) __builtin_ia32_gather3siv4si(A, B, C, D, 1)
+#define __builtin_ia32_gather3siv8sf(A, B, C, D, F) __builtin_ia32_gather3siv8sf(A, B, C, D, 1)
+#define __builtin_ia32_gather3siv8si(A, B, C, D, F) __builtin_ia32_gather3siv8si(A, B, C, D, 1)
+#define __builtin_ia32_getmantpd128_mask(A, E, C, D) __builtin_ia32_getmantpd128_mask(A, 1, C, D)
+#define __builtin_ia32_getmantpd256_mask(A, E, C, D) __builtin_ia32_getmantpd256_mask(A, 1, C, D)
+#define __builtin_ia32_getmantps128_mask(A, E, C, D) __builtin_ia32_getmantps128_mask(A, 1, C, D)
+#define __builtin_ia32_getmantps256_mask(A, E, C, D) __builtin_ia32_getmantps256_mask(A, 1, C, D)
+#define __builtin_ia32_insertf32x4_256_mask(A, B, F, D, E) __builtin_ia32_insertf32x4_256_mask(A, B, 1, D, E)
+#define __builtin_ia32_insertf32x8_mask(A, B, F, D, E) __builtin_ia32_insertf32x8_mask(A, B, 1, D, E)
+#define __builtin_ia32_insertf64x2_256_mask(A, B, F, D, E) __builtin_ia32_insertf64x2_256_mask(A, B, 1, D, E)
+#define __builtin_ia32_insertf64x2_512_mask(A, B, F, D, E) __builtin_ia32_insertf64x2_512_mask(A, B, 1, D, E)
+#define __builtin_ia32_inserti32x4_256_mask(A, B, F, D, E) __builtin_ia32_inserti32x4_256_mask(A, B, 1, D, E)
+#define __builtin_ia32_inserti32x8_mask(A, B, F, D, E) __builtin_ia32_inserti32x8_mask(A, B, 1, D, E)
+#define __builtin_ia32_inserti64x2_256_mask(A, B, F, D, E) __builtin_ia32_inserti64x2_256_mask(A, B, 1, D, E)
+#define __builtin_ia32_inserti64x2_512_mask(A, B, F, D, E) __builtin_ia32_inserti64x2_512_mask(A, B, 1, D, E)
+#define __builtin_ia32_palignr128_mask(A, B, F, D, E) __builtin_ia32_palignr128_mask(A, B, 8, D, E)
+#define __builtin_ia32_palignr256_mask(A, B, F, D, E) __builtin_ia32_palignr256_mask(A, B, 8, D, E)
+#define __builtin_ia32_palignr512(A, B, D) __builtin_ia32_palignr512(A, B, 8)
+#define __builtin_ia32_palignr512_mask(A, B, F, D, E) __builtin_ia32_palignr512_mask(A, B, 8, D, E)
+#define __builtin_ia32_permdf256_mask(A, E, C, D) __builtin_ia32_permdf256_mask(A, 1, C, D)
+#define __builtin_ia32_permdi256_mask(A, E, C, D) __builtin_ia32_permdi256_mask(A, 1, C, D)
+#define __builtin_ia32_prold128_mask(A, E, C, D) __builtin_ia32_prold128_mask(A, 1, C, D)
+#define __builtin_ia32_prold256_mask(A, E, C, D) __builtin_ia32_prold256_mask(A, 1, C, D)
+#define __builtin_ia32_prolq128_mask(A, E, C, D) __builtin_ia32_prolq128_mask(A, 1, C, D)
+#define __builtin_ia32_prolq256_mask(A, E, C, D) __builtin_ia32_prolq256_mask(A, 1, C, D)
+#define __builtin_ia32_prord128_mask(A, E, C, D) __builtin_ia32_prord128_mask(A, 1, C, D)
+#define __builtin_ia32_prord256_mask(A, E, C, D) __builtin_ia32_prord256_mask(A, 1, C, D)
+#define __builtin_ia32_prorq128_mask(A, E, C, D) __builtin_ia32_prorq128_mask(A, 1, C, D)
+#define __builtin_ia32_prorq256_mask(A, E, C, D) __builtin_ia32_prorq256_mask(A, 1, C, D)
+#define __builtin_ia32_pshufd128_mask(A, E, C, D) __builtin_ia32_pshufd128_mask(A, 1, C, D)
+#define __builtin_ia32_pshufd256_mask(A, E, C, D) __builtin_ia32_pshufd256_mask(A, 1, C, D)
+#define __builtin_ia32_pshufhw128_mask(A, E, C, D) __builtin_ia32_pshufhw128_mask(A, 1, C, D)
+#define __builtin_ia32_pshufhw256_mask(A, E, C, D) __builtin_ia32_pshufhw256_mask(A, 1, C, D)
+#define __builtin_ia32_pshufhw512_mask(A, E, C, D) __builtin_ia32_pshufhw512_mask(A, 1, C, D)
+#define __builtin_ia32_pshuflw128_mask(A, E, C, D) __builtin_ia32_pshuflw128_mask(A, 1, C, D)
+#define __builtin_ia32_pshuflw256_mask(A, E, C, D) __builtin_ia32_pshuflw256_mask(A, 1, C, D)
+#define __builtin_ia32_pshuflw512_mask(A, E, C, D) __builtin_ia32_pshuflw512_mask(A, 1, C, D)
+#define __builtin_ia32_pslldi128_mask(A, E, C, D) __builtin_ia32_pslldi128_mask(A, 1, C, D)
+#define __builtin_ia32_pslldi256_mask(A, E, C, D) __builtin_ia32_pslldi256_mask(A, 1, C, D)
+#define __builtin_ia32_psllqi128_mask(A, E, C, D) __builtin_ia32_psllqi128_mask(A, 1, C, D)
+#define __builtin_ia32_psllqi256_mask(A, E, C, D) __builtin_ia32_psllqi256_mask(A, 1, C, D)
+#define __builtin_ia32_psllwi128_mask(A, E, C, D) __builtin_ia32_psllwi128_mask(A, 1, C, D)
+#define __builtin_ia32_psllwi256_mask(A, E, C, D) __builtin_ia32_psllwi256_mask(A, 1, C, D)
+#define __builtin_ia32_psllwi512_mask(A, E, C, D) __builtin_ia32_psllwi512_mask(A, 1, C, D)
+#define __builtin_ia32_psradi128_mask(A, E, C, D) __builtin_ia32_psradi128_mask(A, 1, C, D)
+#define __builtin_ia32_psradi256_mask(A, E, C, D) __builtin_ia32_psradi256_mask(A, 1, C, D)
+#define __builtin_ia32_psraqi128_mask(A, E, C, D) __builtin_ia32_psraqi128_mask(A, 1, C, D)
+#define __builtin_ia32_psraqi256_mask(A, E, C, D) __builtin_ia32_psraqi256_mask(A, 1, C, D)
+#define __builtin_ia32_psrawi128_mask(A, E, C, D) __builtin_ia32_psrawi128_mask(A, 1, C, D)
+#define __builtin_ia32_psrawi256_mask(A, E, C, D) __builtin_ia32_psrawi256_mask(A, 1, C, D)
+#define __builtin_ia32_psrawi512_mask(A, E, C, D) __builtin_ia32_psrawi512_mask(A, 1, C, D)
+#define __builtin_ia32_psrldi128_mask(A, E, C, D) __builtin_ia32_psrldi128_mask(A, 1, C, D)
+#define __builtin_ia32_psrldi256_mask(A, E, C, D) __builtin_ia32_psrldi256_mask(A, 1, C, D)
+#define __builtin_ia32_psrlqi128_mask(A, E, C, D) __builtin_ia32_psrlqi128_mask(A, 1, C, D)
+#define __builtin_ia32_psrlqi256_mask(A, E, C, D) __builtin_ia32_psrlqi256_mask(A, 1, C, D)
+#define __builtin_ia32_psrlwi128_mask(A, E, C, D) __builtin_ia32_psrlwi128_mask(A, 1, C, D)
+#define __builtin_ia32_psrlwi256_mask(A, E, C, D) __builtin_ia32_psrlwi256_mask(A, 1, C, D)
+#define __builtin_ia32_psrlwi512_mask(A, E, C, D) __builtin_ia32_psrlwi512_mask(A, 1, C, D)
+#define __builtin_ia32_pternlogd128_mask(A, B, C, F, E) __builtin_ia32_pternlogd128_mask(A, B, C, 1, E)
+#define __builtin_ia32_pternlogd128_maskz(A, B, C, F, E) __builtin_ia32_pternlogd128_maskz(A, B, C, 1, E)
+#define __builtin_ia32_pternlogd256_mask(A, B, C, F, E) __builtin_ia32_pternlogd256_mask(A, B, C, 1, E)
+#define __builtin_ia32_pternlogd256_maskz(A, B, C, F, E) __builtin_ia32_pternlogd256_maskz(A, B, C, 1, E)
+#define __builtin_ia32_pternlogq128_mask(A, B, C, F, E) __builtin_ia32_pternlogq128_mask(A, B, C, 1, E)
+#define __builtin_ia32_pternlogq128_maskz(A, B, C, F, E) __builtin_ia32_pternlogq128_maskz(A, B, C, 1, E)
+#define __builtin_ia32_pternlogq256_mask(A, B, C, F, E) __builtin_ia32_pternlogq256_mask(A, B, C, 1, E)
+#define __builtin_ia32_pternlogq256_maskz(A, B, C, F, E) __builtin_ia32_pternlogq256_maskz(A, B, C, 1, E)
+#define __builtin_ia32_rangepd128_mask(A, B, F, D, E) __builtin_ia32_rangepd128_mask(A, B, 1, D, E)
+#define __builtin_ia32_rangepd256_mask(A, B, F, D, E) __builtin_ia32_rangepd256_mask(A, B, 1, D, E)
+#define __builtin_ia32_rangepd512_mask(A, B, I, D, E, F) __builtin_ia32_rangepd512_mask(A, B, 1, D, E, 8)
+#define __builtin_ia32_rangeps128_mask(A, B, F, D, E) __builtin_ia32_rangeps128_mask(A, B, 1, D, E)
+#define __builtin_ia32_rangeps256_mask(A, B, F, D, E) __builtin_ia32_rangeps256_mask(A, B, 1, D, E)
+#define __builtin_ia32_rangeps512_mask(A, B, I, D, E, F) __builtin_ia32_rangeps512_mask(A, B, 1, D, E, 8)
+#define __builtin_ia32_rangesd128_round(A, B, I, F) __builtin_ia32_rangesd128_round(A, B, 1, 8)
+#define __builtin_ia32_rangess128_round(A, B, I, F) __builtin_ia32_rangess128_round(A, B, 1, 8)
+#define __builtin_ia32_reducepd128_mask(A, E, C, D) __builtin_ia32_reducepd128_mask(A, 1, C, D)
+#define __builtin_ia32_reducepd256_mask(A, E, C, D) __builtin_ia32_reducepd256_mask(A, 1, C, D)
+#define __builtin_ia32_reducepd512_mask(A, E, C, D) __builtin_ia32_reducepd512_mask(A, 1, C, D)
+#define __builtin_ia32_reduceps128_mask(A, E, C, D) __builtin_ia32_reduceps128_mask(A, 1, C, D)
+#define __builtin_ia32_reduceps256_mask(A, E, C, D) __builtin_ia32_reduceps256_mask(A, 1, C, D)
+#define __builtin_ia32_reduceps512_mask(A, E, C, D) __builtin_ia32_reduceps512_mask(A, 1, C, D)
+#define __builtin_ia32_reducesd(A, B, F) __builtin_ia32_reducesd(A, B, 1)
+#define __builtin_ia32_reducess(A, B, F) __builtin_ia32_reducess(A, B, 1)
+#define __builtin_ia32_rndscalepd_128_mask(A, E, C, D) __builtin_ia32_rndscalepd_128_mask(A, 1, C, D)
+#define __builtin_ia32_rndscalepd_256_mask(A, E, C, D) __builtin_ia32_rndscalepd_256_mask(A, 1, C, D)
+#define __builtin_ia32_rndscaleps_128_mask(A, E, C, D) __builtin_ia32_rndscaleps_128_mask(A, 1, C, D)
+#define __builtin_ia32_rndscaleps_256_mask(A, E, C, D) __builtin_ia32_rndscaleps_256_mask(A, 1, C, D)
+#define __builtin_ia32_scatterdiv2df(A, B, C, D, F) __builtin_ia32_scatterdiv2df(A, B, C, D, 1)
+#define __builtin_ia32_scatterdiv2di(A, B, C, D, F) __builtin_ia32_scatterdiv2di(A, B, C, D, 1)
+#define __builtin_ia32_scatterdiv4df(A, B, C, D, F) __builtin_ia32_scatterdiv4df(A, B, C, D, 1)
+#define __builtin_ia32_scatterdiv4di(A, B, C, D, F) __builtin_ia32_scatterdiv4di(A, B, C, D, 1)
+#define __builtin_ia32_scatterdiv4sf(A, B, C, D, F) __builtin_ia32_scatterdiv4sf(A, B, C, D, 1)
+#define __builtin_ia32_scatterdiv4si(A, B, C, D, F) __builtin_ia32_scatterdiv4si(A, B, C, D, 1)
+#define __builtin_ia32_scatterdiv8sf(A, B, C, D, F) __builtin_ia32_scatterdiv8sf(A, B, C, D, 1)
+#define __builtin_ia32_scatterdiv8si(A, B, C, D, F) __builtin_ia32_scatterdiv8si(A, B, C, D, 1)
+#define __builtin_ia32_scattersiv2df(A, B, C, D, F) __builtin_ia32_scattersiv2df(A, B, C, D, 1)
+#define __builtin_ia32_scattersiv2di(A, B, C, D, F) __builtin_ia32_scattersiv2di(A, B, C, D, 1)
+#define __builtin_ia32_scattersiv4df(A, B, C, D, F) __builtin_ia32_scattersiv4df(A, B, C, D, 1)
+#define __builtin_ia32_scattersiv4di(A, B, C, D, F) __builtin_ia32_scattersiv4di(A, B, C, D, 1)
+#define __builtin_ia32_scattersiv4sf(A, B, C, D, F) __builtin_ia32_scattersiv4sf(A, B, C, D, 1)
+#define __builtin_ia32_scattersiv4si(A, B, C, D, F) __builtin_ia32_scattersiv4si(A, B, C, D, 1)
+#define __builtin_ia32_scattersiv8sf(A, B, C, D, F) __builtin_ia32_scattersiv8sf(A, B, C, D, 1)
+#define __builtin_ia32_scattersiv8si(A, B, C, D, F) __builtin_ia32_scattersiv8si(A, B, C, D, 1)
+#define __builtin_ia32_shuf_f32x4_256_mask(A, B, F, D, E) __builtin_ia32_shuf_f32x4_256_mask(A, B, 1, D, E)
+#define __builtin_ia32_shuf_f64x2_256_mask(A, B, F, D, E) __builtin_ia32_shuf_f64x2_256_mask(A, B, 1, D, E)
+#define __builtin_ia32_shuf_i32x4_256_mask(A, B, F, D, E) __builtin_ia32_shuf_i32x4_256_mask(A, B, 1, D, E)
+#define __builtin_ia32_shuf_i64x2_256_mask(A, B, F, D, E) __builtin_ia32_shuf_i64x2_256_mask(A, B, 1, D, E)
+#define __builtin_ia32_shufpd128_mask(A, B, F, D, E) __builtin_ia32_shufpd128_mask(A, B, 1, D, E)
+#define __builtin_ia32_shufpd256_mask(A, B, F, D, E) __builtin_ia32_shufpd256_mask(A, B, 1, D, E)
+#define __builtin_ia32_shufps128_mask(A, B, F, D, E) __builtin_ia32_shufps128_mask(A, B, 1, D, E)
+#define __builtin_ia32_shufps256_mask(A, B, F, D, E) __builtin_ia32_shufps256_mask(A, B, 1, D, E)
+#define __builtin_ia32_ucmpb128_mask(A, B, E, D) __builtin_ia32_ucmpb128_mask(A, B, 1, D)
+#define __builtin_ia32_ucmpb256_mask(A, B, E, D) __builtin_ia32_ucmpb256_mask(A, B, 1, D)
+#define __builtin_ia32_ucmpb512_mask(A, B, E, D) __builtin_ia32_ucmpb512_mask(A, B, 1, D)
+#define __builtin_ia32_ucmpd128_mask(A, B, E, D) __builtin_ia32_ucmpd128_mask(A, B, 1, D)
+#define __builtin_ia32_ucmpd256_mask(A, B, E, D) __builtin_ia32_ucmpd256_mask(A, B, 1, D)
+#define __builtin_ia32_ucmpq128_mask(A, B, E, D) __builtin_ia32_ucmpq128_mask(A, B, 1, D)
+#define __builtin_ia32_ucmpq256_mask(A, B, E, D) __builtin_ia32_ucmpq256_mask(A, B, 1, D)
+#define __builtin_ia32_ucmpw128_mask(A, B, E, D) __builtin_ia32_ucmpw128_mask(A, B, 1, D)
+#define __builtin_ia32_ucmpw256_mask(A, B, E, D) __builtin_ia32_ucmpw256_mask(A, B, 1, D)
+#define __builtin_ia32_ucmpw512_mask(A, B, E, D) __builtin_ia32_ucmpw512_mask(A, B, 1, D)
+#define __builtin_ia32_vcvtps2ph256_mask(A, E, C, D) __builtin_ia32_vcvtps2ph256_mask(A, 1, C, D)
+#define __builtin_ia32_vcvtps2ph_mask(A, E, C, D) __builtin_ia32_vcvtps2ph_mask(A, 1, C, D)
+#define __builtin_ia32_vpermilpd256_mask(A, E, C, D) __builtin_ia32_vpermilpd256_mask(A, 1, C, D)
+#define __builtin_ia32_vpermilpd_mask(A, E, C, D) __builtin_ia32_vpermilpd_mask(A, 1, C, D)
+#define __builtin_ia32_vpermilps256_mask(A, E, C, D) __builtin_ia32_vpermilps256_mask(A, 1, C, D)
+#define __builtin_ia32_vpermilps_mask(A, E, C, D) __builtin_ia32_vpermilps_mask(A, 1, C, D)
+
#include <wmmintrin.h>
#include <immintrin.h>
#include <mm3dnow.h>
diff --git a/gcc/testsuite/gcc.target/i386/avx512bw-check.h b/gcc/testsuite/gcc.target/i386/avx512bw-check.h
new file mode 100644
index 00000000000..4cae3092fe7
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/avx512bw-check.h
@@ -0,0 +1,47 @@
+#include <stdlib.h>
+#include "cpuid.h"
+#include "m512-check.h"
+#include "avx512f-os-support.h"
+
+static void avx512bw_test (void);
+
+static void __attribute__ ((noinline)) do_test (void)
+{
+ avx512bw_test ();
+}
+
+int
+main ()
+{
+ unsigned int eax, ebx, ecx, edx;
+
+ if (!__get_cpuid (1, &eax, &ebx, &ecx, &edx))
+ return 0;
+
+ /* Run AVX512BW test only if host has AVX512BW support. */
+ if ((ecx & bit_OSXSAVE) == (bit_OSXSAVE))
+ {
+ if (__get_cpuid_max (0, NULL) < 7)
+ return 0;
+
+ __cpuid_count (7, 0, eax, ebx, ecx, edx);
+
+ if ((avx512f_os_support ()) && ((ebx & bit_AVX512BW) == bit_AVX512BW))
+ {
+ do_test ();
+#ifdef DEBUG
+ printf ("PASSED\n");
+#endif
+ return 0;
+ }
+#ifdef DEBUG
+ printf ("SKIPPED\n");
+#endif
+ }
+#ifdef DEBUG
+ else
+ printf ("SKIPPED\n");
+#endif
+
+ return 0;
+}
diff --git a/gcc/testsuite/gcc.target/i386/avx512bw-kunpckdq-1.c b/gcc/testsuite/gcc.target/i386/avx512bw-kunpckdq-1.c
new file mode 100644
index 00000000000..16fce46f8e1
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/avx512bw-kunpckdq-1.c
@@ -0,0 +1,17 @@
+/* { dg-do compile } */
+/* { dg-options "-mavx512bw -O2" } */
+/* { dg-final { scan-assembler-times "kunpckdq\[ \\t\]+\[^\n\]*%k\[1-7\]" 1 } } */
+
+#include <immintrin.h>
+
+void
+avx512bw_test () {
+ __mmask64 k1, k2, k3;
+ volatile __m512i x;
+
+ __asm__( "kmovq %1, %0" : "=k" (k1) : "r" (1) );
+ __asm__( "kmovq %1, %0" : "=k" (k2) : "r" (2) );
+
+ k3 = _mm512_kunpackd (k1, k2);
+ x = _mm512_mask_avg_epu8 (x, k3, x, x);
+}
diff --git a/gcc/testsuite/gcc.target/i386/avx512bw-kunpckwd-1.c b/gcc/testsuite/gcc.target/i386/avx512bw-kunpckwd-1.c
new file mode 100644
index 00000000000..eece5e8f264
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/avx512bw-kunpckwd-1.c
@@ -0,0 +1,17 @@
+/* { dg-do compile } */
+/* { dg-options "-mavx512bw -O2" } */
+/* { dg-final { scan-assembler-times "kunpckwd\[ \\t\]+\[^\n\]*%k\[1-7\]" 1 } } */
+
+#include <immintrin.h>
+
+void
+avx512bw_test () {
+ volatile __mmask32 k1, k2, k3;
+ volatile __m256i x;
+
+ __asm__( "kmovd %1, %0" : "=k" (k1) : "r" (1) );
+ __asm__( "kmovd %1, %0" : "=k" (k2) : "r" (2) );
+
+ k3 = _mm512_kunpackw (k1, k2);
+ //x = _mm256_mask_avg_epu8 (x, k3, x, x);
+}
diff --git a/gcc/testsuite/gcc.target/i386/avx512bw-vdbpsadbw-1.c b/gcc/testsuite/gcc.target/i386/avx512bw-vdbpsadbw-1.c
new file mode 100644
index 00000000000..3a6522cbee7
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/avx512bw-vdbpsadbw-1.c
@@ -0,0 +1,34 @@
+/* { dg-do compile } */
+/* { dg-options "-mavx512bw -mavx512vl -O2" } */
+/* { dg-final { scan-assembler-times "vdbpsadbw\[ \\t\]+\[^\n\]*%zmm\[0-9\]\[\\n\]" 1 } } */
+/* { dg-final { scan-assembler-times "vdbpsadbw\[ \\t\]+\[^\n\]*%zmm\[0-9\]\{%k\[1-7\]\}\[^\{\]" 1 } } */
+/* { dg-final { scan-assembler-times "vdbpsadbw\[ \\t\]+\[^\n\]*%zmm\[0-9\]\{%k\[1-7\]\}\{z\}" 1 } } */
+/* { dg-final { scan-assembler-times "vdbpsadbw\[ \\t\]+\[^\n\]*%ymm\[0-9\]\[\\n\]" 1 } } */
+/* { dg-final { scan-assembler-times "vdbpsadbw\[ \\t\]+\[^\n\]*%ymm\[0-9\]\{%k\[1-7\]\}\[^\{\]" 1 } } */
+/* { dg-final { scan-assembler-times "vdbpsadbw\[ \\t\]+\[^\n\]*%ymm\[0-9\]\{%k\[1-7\]\}\{z\}" 1 } } */
+/* { dg-final { scan-assembler-times "vdbpsadbw\[ \\t\]+\[^\n\]*%xmm\[0-9\]\[\\n\]" 1 } } */
+/* { dg-final { scan-assembler-times "vdbpsadbw\[ \\t\]+\[^\n\]*%xmm\[0-9\]\{%k\[1-7\]\}\[^\{\]" 1 } } */
+/* { dg-final { scan-assembler-times "vdbpsadbw\[ \\t\]+\[^\n\]*%xmm\[0-9\]\{%k\[1-7\]\}\{z\}" 1 } } */
+
+#include <immintrin.h>
+
+volatile __m512i x, a;
+volatile __m256i y, b;
+volatile __m128i z, c;
+volatile __mmask32 m1;
+volatile __mmask16 m2;
+volatile __mmask8 m3;
+
+void extern
+avx512bw_test (void)
+{
+ x = _mm512_dbsad_epu8 (a, a, 0xaa);
+ x = _mm512_mask_dbsad_epu8 (x, m1, a, a, 0xaa);
+ x = _mm512_maskz_dbsad_epu8 (m1, a, a, 0xaa);
+ y = _mm256_dbsad_epu8 (b, b, 0xbb);
+ y = _mm256_mask_dbsad_epu8 (y, m2, b, b, 0xbb);
+ y = _mm256_maskz_dbsad_epu8 (m2, b, b, 0xbb);
+ z = _mm_dbsad_epu8 (c, c, 0xcc);
+ z = _mm_mask_dbsad_epu8 (z, m3, c, c, 0xcc);
+ z = _mm_maskz_dbsad_epu8 (m3, c, c, 0xcc);
+}
diff --git a/gcc/testsuite/gcc.target/i386/avx512bw-vdbpsadbw-2.c b/gcc/testsuite/gcc.target/i386/avx512bw-vdbpsadbw-2.c
new file mode 100644
index 00000000000..cbd50d3a7ef
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/avx512bw-vdbpsadbw-2.c
@@ -0,0 +1,80 @@
+/* { dg-do run } */
+/* { dg-options "-O2 -mavx512bw -DAVX512BW" } */
+/* { dg-require-effective-target avx512bw } */
+
+#include "avx512f-helper.h"
+
+#define SIZE (AVX512F_LEN / 16)
+#include "avx512f-mask-type.h"
+
+void
+CALC (unsigned short *dst, unsigned char *src1, unsigned char *src2,
+ int imm)
+{
+ int i, j, k, part, power;
+ unsigned char tmp[2 * SIZE];;
+
+ for (i = 0; i < 2 * SIZE; i += 16)
+ {
+ for (j = 0; j < 4; j++)
+ {
+ power = 1;
+ for (k = 0; k < j; k++)
+ power *= 4;
+ part = (imm & (3 * power)) >> (2 * j);
+ for (k = 0; k < 4; k++)
+ tmp[i + 4 * j + k] = src2[i + 4 * part + k];
+ }
+ }
+
+ for (i = 0; i < SIZE; i += 4)
+ {
+ dst[i] = dst[i + 1] = dst[i + 2] = dst[i + 3] = 0;
+ for (j = 0; j < 4; j++)
+ {
+ dst[i] += abs (src1[2 * i + j] - tmp[2 * i + j]);
+ dst[i + 1] += abs (src1[2 * i + j] - tmp[2 * i + j + 1]);
+ dst[i + 2] += abs (src1[2 * i + j + 4] - tmp[2 * i + j + 2]);
+ dst[i + 3] += abs (src1[2 * i + j + 4] - tmp[2 * i + j + 3]);
+ }
+ }
+}
+
+void
+TEST (void)
+{
+ int i, sign;
+ UNION_TYPE (AVX512F_LEN, i_w) res1, res2, res3;
+ UNION_TYPE (AVX512F_LEN, i_b) src1, src2;
+ MASK_TYPE mask = MASK_VALUE;
+ unsigned short res_ref[SIZE];
+ int imm = 0x22;
+
+ sign = -1;
+ for (i = 0; i < 2*SIZE; i++)
+ {
+ src1.a[i] = 1 + 34 * i * sign;
+ src1.a[i] = 179 - i;
+ sign = sign * -1;
+ }
+
+ for (i = 0; i < SIZE; i++)
+ res2.a[i] = DEFAULT_VALUE;
+
+ res1.x = INTRINSIC (_dbsad_epu8) (src1.x, src2.x, imm);
+ res2.x = INTRINSIC (_mask_dbsad_epu8) (res2.x, mask, src1.x, src2.x, imm);
+ res3.x = INTRINSIC (_maskz_dbsad_epu8) (mask, src1.x, src2.x, imm);
+
+ CALC (res_ref, src1.a, src2.a, imm);
+
+ if (UNION_CHECK (AVX512F_LEN, i_w) (res1, res_ref))
+ abort ();
+
+ MASK_MERGE (i_w) (res_ref, mask, SIZE);
+ if (UNION_CHECK (AVX512F_LEN, i_w) (res2, res_ref))
+ abort ();
+
+ MASK_ZERO (i_w) (res_ref, mask, SIZE);
+ if (UNION_CHECK (AVX512F_LEN, i_w) (res3, res_ref))
+ abort ();
+}
diff --git a/gcc/testsuite/gcc.target/i386/avx512bw-vmovdqu16-1.c b/gcc/testsuite/gcc.target/i386/avx512bw-vmovdqu16-1.c
new file mode 100644
index 00000000000..96682a56118
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/avx512bw-vmovdqu16-1.c
@@ -0,0 +1,51 @@
+/* { dg-do compile } */
+/* { dg-options "-mavx512bw -mavx512vl -O2" } */
+/* { dg-final { scan-assembler-times "vmovdqu16\[ \\t\]+\[^\n\]*%zmm\[0-9\]\[^\n\]*%zmm\[0-9\]\{%k\[1-7\]\}\[^\{\]" 1 } } */
+/* { dg-final { scan-assembler-times "vmovdqu16\[ \\t\]+\[^\n\]*%ymm\[0-9\]\[^\n\]*%ymm\[0-9\]\{%k\[1-7\]\}\[^\{\]" 1 } } */
+/* { dg-final { scan-assembler-times "vmovdqu16\[ \\t\]+\[^\n\]*%xmm\[0-9\]\[^\n\]*%xmm\[0-9\]\{%k\[1-7\]\}\[^\{\]" 1 } } */
+/* { dg-final { scan-assembler-times "vmovdqu16\[ \\t\]+\[^\n\]*%zmm\[0-9\]\[^\n\]*%zmm\[0-9\]\{%k\[1-7\]\}\{z\}" 1 } } */
+/* { dg-final { scan-assembler-times "vmovdqu16\[ \\t\]+\[^\n\]*%ymm\[0-9\]\[^\n\]*%ymm\[0-9\]\{%k\[1-7\]\}\{z\}" 1 } } */
+/* { dg-final { scan-assembler-times "vmovdqu16\[ \\t\]+\[^\n\]*%xmm\[0-9\]\[^\n\]*%xmm\[0-9\]\{%k\[1-7\]\}\{z\}" 1 } } */
+/* { dg-final { scan-assembler-times "vmovdqu16\[ \\t\]+\[^\n\]*\\)\[^\n\]*%zmm\[0-9\]\{%k\[1-7\]\}\[^\{\]" 1 } } */
+/* { dg-final { scan-assembler-times "vmovdqu16\[ \\t\]+\[^\n\]*\\)\[^\n\]*%ymm\[0-9\]\{%k\[1-7\]\}\[^\{\]" 1 } } */
+/* { dg-final { scan-assembler-times "vmovdqu16\[ \\t\]+\[^\n\]*\\)\[^\n\]*%xmm\[0-9\]\{%k\[1-7\]\}\[^\{\]" 1 } } */
+/* { dg-final { scan-assembler-times "vmovdqu16\[ \\t\]+\[^\n\]*\\)\[^\n\]*%zmm\[0-9\]\{%k\[1-7\]\}\{z\}" 1 } } */
+/* { dg-final { scan-assembler-times "vmovdqu16\[ \\t\]+\[^\n\]*\\)\[^\n\]*%ymm\[0-9\]\{%k\[1-7\]\}\{z\}" 1 } } */
+/* { dg-final { scan-assembler-times "vmovdqu16\[ \\t\]+\[^\n\]*\\)\[^\n\]*%xmm\[0-9\]\{%k\[1-7\]\}\{z\}" 1 } } */
+/* { dg-final { scan-assembler-times "vmovdqu16\[ \\t\]+\[^\n\]*%zmm\[0-9\]\[^\n\]*\\)\{%k\[1-7\]\}\[^\{\]" 1 } } */
+/* { dg-final { scan-assembler-times "vmovdqu16\[ \\t\]+\[^\n\]*%ymm\[0-9\]\[^\n\]*\\)\{%k\[1-7\]\}\[^\{\]" 1 } } */
+/* { dg-final { scan-assembler-times "vmovdqu16\[ \\t\]+\[^\n\]*%xmm\[0-9\]\[^\n\]*\\)\{%k\[1-7\]\}\[^\{\]" 1 } } */
+
+#include <immintrin.h>
+
+short *p;
+volatile __m512i x1, yy;
+volatile __m256i x2, y2;
+volatile __m128i x3, y3;
+volatile __mmask32 m32;
+volatile __mmask16 m16;
+volatile __mmask8 m8;
+
+void extern
+avx512bw_test (void)
+{
+ x1 = _mm512_mask_mov_epi16 (x1, m32, yy);
+ x2 = _mm256_mask_mov_epi16 (x2, m16, y2);
+ x3 = _mm_mask_mov_epi16 (x3, m8, y3);
+
+ x1 = _mm512_maskz_mov_epi16 (m32, yy);
+ x2 = _mm256_maskz_mov_epi16 (m16, y2);
+ x3 = _mm_maskz_mov_epi16 (m8, y3);
+
+ x1 = _mm512_mask_loadu_epi16 (x1, m32, p);
+ x2 = _mm256_mask_loadu_epi16 (x2, m16, p);
+ x3 = _mm_mask_loadu_epi16 (x3, m8, p);
+
+ x1 = _mm512_maskz_loadu_epi16 (m32, p);
+ x2 = _mm256_maskz_loadu_epi16 (m16, p);
+ x3 = _mm_maskz_loadu_epi16 (m8, p);
+
+ _mm512_mask_storeu_epi16 (p, m32, x1);
+ _mm256_mask_storeu_epi16 (p, m16, x2);
+ _mm_mask_storeu_epi16 (p, m8, x3);
+}
diff --git a/gcc/testsuite/gcc.target/i386/avx512bw-vmovdqu16-2.c b/gcc/testsuite/gcc.target/i386/avx512bw-vmovdqu16-2.c
new file mode 100644
index 00000000000..48a59b84824
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/avx512bw-vmovdqu16-2.c
@@ -0,0 +1,60 @@
+/* { dg-do run } */
+/* { dg-options "-O2 -mavx512bw -DAVX512BW" } */
+/* { dg-require-effective-target avx512bw } */
+
+#include "avx512f-helper.h"
+
+#define SIZE ((AVX512F_LEN) / 16)
+#include "avx512f-mask-type.h"
+
+typedef struct
+{
+ char c;
+ short a[SIZE];
+} __attribute__ ((packed)) EVAL(unaligned_array, AVX512F_LEN,);
+
+void
+TEST (void)
+{
+ UNION_TYPE (AVX512F_LEN, i_w) s1, s3, res1, res2, res3, res4;
+ EVAL(unaligned_array, AVX512F_LEN,) s2, res5;
+ MASK_TYPE mask = MASK_VALUE;
+ int i, sign = 1;
+
+ for (i = 0; i < SIZE; i++)
+ {
+ s1.a[i] = 123 * i * sign;
+ s2.a[i] = 456 * i * sign;
+ s3.a[i] = 789 * i * sign;
+ res1.a[i] = DEFAULT_VALUE;
+ res3.a[i] = DEFAULT_VALUE;
+ res5.a[i] = DEFAULT_VALUE;
+ sign = -sign;
+ }
+
+ res1.x = INTRINSIC (_mask_mov_epi16) (res1.x, mask, s1.x);
+ res2.x = INTRINSIC (_maskz_mov_epi16) (mask, s1.x);
+ res3.x = INTRINSIC (_mask_loadu_epi16) (res3.x, mask, s2.a);
+ res4.x = INTRINSIC (_maskz_loadu_epi16) (mask, s2.a);
+ INTRINSIC (_mask_storeu_epi16) (res5.a, mask, s3.x);
+
+ MASK_MERGE (i_w) (s1.a, mask, SIZE);
+ if (checkVs (res1.a, s1.a, SIZE))
+ abort ();
+
+ MASK_ZERO (i_w) (s1.a, mask, SIZE);
+ if (checkVs (res2.a, s1.a, SIZE))
+ abort ();
+
+ MASK_MERGE (i_w) (s2.a, mask, SIZE);
+ if (UNION_CHECK (AVX512F_LEN, i_w) (res3, s2.a))
+ abort ();
+
+ MASK_ZERO (i_w) (s2.a, mask, SIZE);
+ if (UNION_CHECK (AVX512F_LEN, i_w) (res4, s2.a))
+ abort ();
+
+ MASK_MERGE (i_w) (s3.a, mask, SIZE);
+ if (UNION_CHECK (AVX512F_LEN, i_w) (s3, res5.a))
+ abort ();
+}
diff --git a/gcc/testsuite/gcc.target/i386/avx512bw-vmovdqu8-1.c b/gcc/testsuite/gcc.target/i386/avx512bw-vmovdqu8-1.c
new file mode 100644
index 00000000000..8856a21f126
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/avx512bw-vmovdqu8-1.c
@@ -0,0 +1,51 @@
+/* { dg-do compile } */
+/* { dg-options "-mavx512bw -mavx512vl -O2" } */
+/* { dg-final { scan-assembler-times "vmovdqu8\[ \\t\]+\[^\n\]*%zmm\[0-9\]\[^\n\]*%zmm\[0-9\]\{%k\[1-7\]\}\[^\{\]" 1 } } */
+/* { dg-final { scan-assembler-times "vmovdqu8\[ \\t\]+\[^\n\]*%ymm\[0-9\]\[^\n\]*%ymm\[0-9\]\{%k\[1-7\]\}\[^\{\]" 1 } } */
+/* { dg-final { scan-assembler-times "vmovdqu8\[ \\t\]+\[^\n\]*%xmm\[0-9\]\[^\n\]*%xmm\[0-9\]\{%k\[1-7\]\}\[^\{\]" 1 } } */
+/* { dg-final { scan-assembler-times "vmovdqu8\[ \\t\]+\[^\n\]*%zmm\[0-9\]\[^\n\]*%zmm\[0-9\]\{%k\[1-7\]\}\{z\}" 1 } } */
+/* { dg-final { scan-assembler-times "vmovdqu8\[ \\t\]+\[^\n\]*%ymm\[0-9\]\[^\n\]*%ymm\[0-9\]\{%k\[1-7\]\}\{z\}" 1 } } */
+/* { dg-final { scan-assembler-times "vmovdqu8\[ \\t\]+\[^\n\]*%xmm\[0-9\]\[^\n\]*%xmm\[0-9\]\{%k\[1-7\]\}\{z\}" 1 } } */
+/* { dg-final { scan-assembler-times "vmovdqu8\[ \\t\]+\[^\n\]*\\)\[^\n\]*%zmm\[0-9\]\{%k\[1-7\]\}\[^\{\]" 1 } } */
+/* { dg-final { scan-assembler-times "vmovdqu8\[ \\t\]+\[^\n\]*\\)\[^\n\]*%ymm\[0-9\]\{%k\[1-7\]\}\[^\{\]" 1 } } */
+/* { dg-final { scan-assembler-times "vmovdqu8\[ \\t\]+\[^\n\]*\\)\[^\n\]*%xmm\[0-9\]\{%k\[1-7\]\}\[^\{\]" 1 } } */
+/* { dg-final { scan-assembler-times "vmovdqu8\[ \\t\]+\[^\n\]*\\)\[^\n\]*%zmm\[0-9\]\{%k\[1-7\]\}\{z\}" 1 } } */
+/* { dg-final { scan-assembler-times "vmovdqu8\[ \\t\]+\[^\n\]*\\)\[^\n\]*%ymm\[0-9\]\{%k\[1-7\]\}\{z\}" 1 } } */
+/* { dg-final { scan-assembler-times "vmovdqu8\[ \\t\]+\[^\n\]*\\)\[^\n\]*%xmm\[0-9\]\{%k\[1-7\]\}\{z\}" 1 } } */
+/* { dg-final { scan-assembler-times "vmovdqu8\[ \\t\]+\[^\n\]*%zmm\[0-9\]\[^\n\]*\\)\{%k\[1-7\]\}\[^\{\]" 1 } } */
+/* { dg-final { scan-assembler-times "vmovdqu8\[ \\t\]+\[^\n\]*%ymm\[0-9\]\[^\n\]*\\)\{%k\[1-7\]\}\[^\{\]" 1 } } */
+/* { dg-final { scan-assembler-times "vmovdqu8\[ \\t\]+\[^\n\]*%xmm\[0-9\]\[^\n\]*\\)\{%k\[1-7\]\}\[^\{\]" 1 } } */
+
+#include <immintrin.h>
+
+char *p;
+volatile __m512i x1, yy;
+volatile __m256i x2, y2;
+volatile __m128i x3, y3;
+volatile __mmask64 m64;
+volatile __mmask32 m32;
+volatile __mmask16 m16;
+
+void extern
+avx512bw_test (void)
+{
+ x1 = _mm512_mask_mov_epi8 (x1, m64, yy);
+ x2 = _mm256_mask_mov_epi8 (x2, m32, y2);
+ x3 = _mm_mask_mov_epi8 (x3, m16, y3);
+
+ x1 = _mm512_maskz_mov_epi8 (m64, yy);
+ x2 = _mm256_maskz_mov_epi8 (m32, y2);
+ x3 = _mm_maskz_mov_epi8 (m16, y3);
+
+ x1 = _mm512_mask_loadu_epi8 (x1, m64, p);
+ x2 = _mm256_mask_loadu_epi8 (x2, m32, p);
+ x3 = _mm_mask_loadu_epi8 (x3, m16, p);
+
+ x1 = _mm512_maskz_loadu_epi8 (m64, p);
+ x2 = _mm256_maskz_loadu_epi8 (m32, p);
+ x3 = _mm_maskz_loadu_epi8 (m16, p);
+
+ _mm512_mask_storeu_epi8 (p, m64, x1);
+ _mm256_mask_storeu_epi8 (p, m32, x2);
+ _mm_mask_storeu_epi8 (p, m16, x3);
+}
diff --git a/gcc/testsuite/gcc.target/i386/avx512bw-vmovdqu8-2.c b/gcc/testsuite/gcc.target/i386/avx512bw-vmovdqu8-2.c
new file mode 100644
index 00000000000..4c65cf54e43
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/avx512bw-vmovdqu8-2.c
@@ -0,0 +1,60 @@
+/* { dg-do run } */
+/* { dg-options "-O2 -mavx512bw -DAVX512BW" } */
+/* { dg-require-effective-target avx512bw } */
+
+#include "avx512f-helper.h"
+
+#define SIZE ((AVX512F_LEN) / 8)
+#include "avx512f-mask-type.h"
+
+typedef struct
+{
+ char c;
+ char a[SIZE];
+} __attribute__ ((packed)) EVAL(unaligned_array, AVX512F_LEN,);
+
+void
+TEST (void)
+{
+ UNION_TYPE (AVX512F_LEN, i_b) s1, s3, res1, res2, res3, res4;
+ EVAL(unaligned_array, AVX512F_LEN,) s2, res5;
+ MASK_TYPE mask = MASK_VALUE;
+ int i, sign = 1;
+
+ for (i = 0; i < SIZE; i++)
+ {
+ s1.a[i] = (i + 1) * sign;
+ s2.a[i] = (i + 2) * sign;
+ s3.a[i] = (i * 2) * sign;
+ res1.a[i] = DEFAULT_VALUE;
+ res3.a[i] = DEFAULT_VALUE;
+ res5.a[i] = DEFAULT_VALUE;
+ sign = -sign;
+ }
+
+ res1.x = INTRINSIC (_mask_mov_epi8) (res1.x, mask, s1.x);
+ res2.x = INTRINSIC (_maskz_mov_epi8) (mask, s1.x);
+ res3.x = INTRINSIC (_mask_loadu_epi8) (res3.x, mask, s2.a);
+ res4.x = INTRINSIC (_maskz_loadu_epi8) (mask, s2.a);
+ INTRINSIC (_mask_storeu_epi8) (res5.a, mask, s3.x);
+
+ MASK_MERGE (i_b) (s1.a, mask, SIZE);
+ if (checkVc (res1.a, s1.a, SIZE))
+ abort ();
+
+ MASK_ZERO (i_b) (s1.a, mask, SIZE);
+ if (checkVc (res2.a, s1.a, SIZE))
+ abort ();
+
+ MASK_MERGE (i_b) (s2.a, mask, SIZE);
+ if (UNION_CHECK (AVX512F_LEN, i_b) (res3, s2.a))
+ abort ();
+
+ MASK_ZERO (i_b) (s2.a, mask, SIZE);
+ if (UNION_CHECK (AVX512F_LEN, i_b) (res4, s2.a))
+ abort ();
+
+ MASK_MERGE (i_b) (s3.a, mask, SIZE);
+ if (UNION_CHECK (AVX512F_LEN, i_b) (s3, res5.a))
+ abort ();
+}
diff --git a/gcc/testsuite/gcc.target/i386/avx512bw-vpabsb-1.c b/gcc/testsuite/gcc.target/i386/avx512bw-vpabsb-1.c
new file mode 100644
index 00000000000..298b9ef2c08
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/avx512bw-vpabsb-1.c
@@ -0,0 +1,32 @@
+/* { dg-do compile } */
+/* { dg-options "-mavx512bw -mavx512vl -O2" } */
+/* { dg-final { scan-assembler-times "vpabsb\[ \\t\]+\[^\n\]*%zmm\[0-9\]" 3 } } */
+/* { dg-final { scan-assembler-times "vpabsb\[ \\t\]+\[^\n\]*%zmm\[0-9\]\{%k\[1-7\]\}\[^\{\]" 1 } } */
+/* { dg-final { scan-assembler-times "vpabsb\[ \\t\]+\[^\n\]*%zmm\[0-9\]\{%k\[1-7\]\}\{z\}" 1 } } */
+/* { dg-final { scan-assembler-times "vpabsb\[ \\t\]+\[^\n\]*%ymm\[0-9\]" 2 } } */
+/* { dg-final { scan-assembler-times "vpabsb\[ \\t\]+\[^\n\]*%ymm\[0-9\]\{%k\[1-7\]\}\[^\{\]" 1 } } */
+/* { dg-final { scan-assembler-times "vpabsb\[ \\t\]+\[^\n\]*%ymm\[0-9\]\{%k\[1-7\]\}\{z\}" 1 } } */
+/* { dg-final { scan-assembler-times "vpabsb\[ \\t\]+\[^\n\]*%xmm\[0-9\]" 2 } } */
+/* { dg-final { scan-assembler-times "vpabsb\[ \\t\]+\[^\n\]*%xmm\[0-9\]\{%k\[1-7\]\}\[^\{\]" 1 } } */
+/* { dg-final { scan-assembler-times "vpabsb\[ \\t\]+\[^\n\]*%xmm\[0-9\]\{%k\[1-7\]\}\{z\}" 1 } } */
+
+#include <immintrin.h>
+
+volatile __m512i z;
+volatile __m256i y;
+volatile __m128i x;
+volatile __mmask64 m1;
+volatile __mmask32 m2;
+volatile __mmask16 m3;
+
+void extern
+avx512bw_test (void)
+{
+ z = _mm512_abs_epi8 (z);
+ z = _mm512_mask_abs_epi8 (z, m1, z);
+ z = _mm512_maskz_abs_epi8 (m1, z);
+ y = _mm256_mask_abs_epi8 (y, m2, y);
+ y = _mm256_maskz_abs_epi8 (m2, y);
+ x = _mm_mask_abs_epi8 (x, m3, x);
+ x = _mm_maskz_abs_epi8 (m3, x);
+}
diff --git a/gcc/testsuite/gcc.target/i386/avx512bw-vpabsb-2.c b/gcc/testsuite/gcc.target/i386/avx512bw-vpabsb-2.c
new file mode 100644
index 00000000000..9cd6ce18b8e
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/avx512bw-vpabsb-2.c
@@ -0,0 +1,51 @@
+/* { dg-do run } */
+/* { dg-options "-O2 -mavx512bw -DAVX512BW" } */
+/* { dg-require-effective-target avx512bw } */
+
+#include "avx512f-helper.h"
+
+#define SIZE (AVX512F_LEN / 8)
+#include "avx512f-mask-type.h"
+
+void
+CALC (char *s, char *r)
+{
+ int i;
+
+ for (i = 0; i < SIZE; i++)
+ if (s[i] < 0)
+ r[i] = -s[i];
+ else
+ r[i] = s[i];
+}
+
+void
+TEST (void)
+{
+ UNION_TYPE (AVX512F_LEN, i_b) s, res1, res2, res3;
+ MASK_TYPE mask = MASK_VALUE;
+ char res_ref[SIZE];
+ int i;
+
+ for (i = 0; i < SIZE; i++)
+ {
+ s.a[i] = i * 7 + (i << 15) + 356;
+ res2.a[i] = DEFAULT_VALUE;
+ }
+
+ CALC (s.a, res_ref);
+
+ res1.x = INTRINSIC (_abs_epi8) (s.x);
+ res2.x = INTRINSIC (_mask_abs_epi8) (res2.x, mask, s.x);
+ res3.x = INTRINSIC (_maskz_abs_epi8) (mask, s.x);
+
+ if (UNION_CHECK (AVX512F_LEN, i_b) (res1, res_ref))
+ abort ();
+
+ MASK_MERGE (i_b) (res_ref, mask, SIZE);
+ if (UNION_CHECK (AVX512F_LEN, i_b) (res2, res_ref))
+ abort ();
+
+ MASK_ZERO (i_b) (res_ref, mask, SIZE);
+ if (UNION_CHECK (AVX512F_LEN, i_b) (res3, res_ref))
+ abort ();}
diff --git a/gcc/testsuite/gcc.target/i386/avx512bw-vpabsw-1.c b/gcc/testsuite/gcc.target/i386/avx512bw-vpabsw-1.c
new file mode 100644
index 00000000000..73a3af1eee5
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/avx512bw-vpabsw-1.c
@@ -0,0 +1,32 @@
+/* { dg-do compile } */
+/* { dg-options "-mavx512bw -mavx512vl -O2" } */
+/* { dg-final { scan-assembler-times "vpabsw\[ \\t\]+\[^\n\]*%zmm\[0-9\]" 3 } } */
+/* { dg-final { scan-assembler-times "vpabsw\[ \\t\]+\[^\n\]*%zmm\[0-9\]\{%k\[1-7\]\}\[^\{\]" 1 } } */
+/* { dg-final { scan-assembler-times "vpabsw\[ \\t\]+\[^\n\]*%zmm\[0-9\]\{%k\[1-7\]\}\{z\}" 1 } } */
+/* { dg-final { scan-assembler-times "vpabsw\[ \\t\]+\[^\n\]*%ymm\[0-9\]" 2 } } */
+/* { dg-final { scan-assembler-times "vpabsw\[ \\t\]+\[^\n\]*%ymm\[0-9\]\{%k\[1-7\]\}\[^\{\]" 1 } } */
+/* { dg-final { scan-assembler-times "vpabsw\[ \\t\]+\[^\n\]*%ymm\[0-9\]\{%k\[1-7\]\}\{z\}" 1 } } */
+/* { dg-final { scan-assembler-times "vpabsw\[ \\t\]+\[^\n\]*%xmm\[0-9\]" 2 } } */
+/* { dg-final { scan-assembler-times "vpabsw\[ \\t\]+\[^\n\]*%xmm\[0-9\]\{%k\[1-7\]\}\[^\{\]" 1 } } */
+/* { dg-final { scan-assembler-times "vpabsw\[ \\t\]+\[^\n\]*%xmm\[0-9\]\{%k\[1-7\]\}\{z\}" 1 } } */
+
+#include <immintrin.h>
+
+volatile __m512i z;
+volatile __m256i y;
+volatile __m128i x;
+volatile __mmask32 m1;
+volatile __mmask16 m2;
+volatile __mmask8 m3;
+
+void extern
+avx512bw_test (void)
+{
+ z = _mm512_abs_epi16 (z);
+ z = _mm512_mask_abs_epi16 (z, m1, z);
+ z = _mm512_maskz_abs_epi16 (m1, z);
+ y = _mm256_mask_abs_epi16 (y, m2, y);
+ y = _mm256_maskz_abs_epi16 (m2, y);
+ x = _mm_mask_abs_epi16 (x, m3, x);
+ x = _mm_maskz_abs_epi16 (m3, x);
+}
diff --git a/gcc/testsuite/gcc.target/i386/avx512bw-vpabsw-2.c b/gcc/testsuite/gcc.target/i386/avx512bw-vpabsw-2.c
new file mode 100644
index 00000000000..07e9bfea64f
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/avx512bw-vpabsw-2.c
@@ -0,0 +1,51 @@
+/* { dg-do run } */
+/* { dg-options "-O2 -mavx512bw -DAVX512BW" } */
+/* { dg-require-effective-target avx512bw } */
+
+#include "avx512f-helper.h"
+
+#define SIZE (AVX512F_LEN / 16)
+#include "avx512f-mask-type.h"
+
+void
+CALC (short *s, short *r)
+{
+ int i;
+
+ for (i = 0; i < SIZE; i++)
+ if (s[i] < 0)
+ r[i] = -s[i];
+ else
+ r[i] = s[i];
+}
+
+void
+TEST (void)
+{
+ UNION_TYPE (AVX512F_LEN, i_w) s, res1, res2, res3;
+ MASK_TYPE mask = MASK_VALUE;
+ short res_ref[SIZE];
+ int i;
+
+ for (i = 0; i < SIZE; i++)
+ {
+ s.a[i] = i * 7 + (i << 15) + 356;
+ res2.a[i] = DEFAULT_VALUE;
+ }
+
+ CALC (s.a, res_ref);
+
+ res1.x = INTRINSIC (_abs_epi16) (s.x);
+ res2.x = INTRINSIC (_mask_abs_epi16) (res2.x, mask, s.x);
+ res3.x = INTRINSIC (_maskz_abs_epi16) (mask, s.x);
+
+ if (UNION_CHECK (AVX512F_LEN, i_w) (res1, res_ref))
+ abort ();
+ MASK_MERGE (i_w) (res_ref, mask, SIZE);
+ if (UNION_CHECK (AVX512F_LEN, i_w) (res2, res_ref))
+ abort ();
+
+ MASK_ZERO (i_w) (res_ref, mask, SIZE);
+ if (UNION_CHECK (AVX512F_LEN, i_w) (res3, res_ref))
+ abort ();
+}
diff --git a/gcc/testsuite/gcc.target/i386/avx512bw-vpackssdw-1.c b/gcc/testsuite/gcc.target/i386/avx512bw-vpackssdw-1.c
new file mode 100644
index 00000000000..71ff18f881c
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/avx512bw-vpackssdw-1.c
@@ -0,0 +1,31 @@
+/* { dg-do compile } */
+/* { dg-options "-mavx512bw -mavx512vl -O2" } */
+/* { dg-final { scan-assembler-times "vpackssdw\[ \\t\]+\[^\n\]*%zmm\[0-9\]\[^\{\]" 3 } } */
+/* { dg-final { scan-assembler-times "vpackssdw\[ \\t\]+\[^\n\]*%zmm\[0-9\]\{%k\[1-7\]\}\[^\{\]" 1 } } */
+/* { dg-final { scan-assembler-times "vpackssdw\[ \\t\]+\[^\n\]*%zmm\[0-9\]\{%k\[1-7\]\}{z}" 1 } } */
+/* { dg-final { scan-assembler-times "vpackssdw\[ \\t\]+\[^\n\]*%ymm\[0-9\]\{%k\[1-7\]\}\[^\{\]" 1 } } */
+/* { dg-final { scan-assembler-times "vpackssdw\[ \\t\]+\[^\n\]*%ymm\[0-9\]\{%k\[1-7\]\}\[^\{\]" 1 } } */
+/* { dg-final { scan-assembler-times "vpackssdw\[ \\t\]+\[^\n\]*%xmm\[0-9\]\{%k\[1-7\]\}{z}" 1 } } */
+/* { dg-final { scan-assembler-times "vpackssdw\[ \\t\]+\[^\n\]*%xmm\[0-9\]\{%k\[1-7\]\}{z}" 1 } } */
+
+
+#include <immintrin.h>
+
+volatile __m512i x;
+volatile __m256i y;
+volatile __m128i z;
+volatile __mmask64 mx;
+volatile __mmask32 my;
+volatile __mmask16 mz;
+
+void extern
+avx512bw_test (void)
+{
+ x = _mm512_packs_epi32 (x, x);
+ x = _mm512_mask_packs_epi32 (x, mx, x, x);
+ x = _mm512_maskz_packs_epi32 (mx, x, x);
+ y = _mm256_mask_packs_epi32 (y, my, y, y);
+ y = _mm256_maskz_packs_epi32 (my, y, y);
+ z = _mm_mask_packs_epi32 (z, mz, z, z);
+ z = _mm_maskz_packs_epi32 (mz, z, z);
+}
diff --git a/gcc/testsuite/gcc.target/i386/avx512bw-vpackssdw-2.c b/gcc/testsuite/gcc.target/i386/avx512bw-vpackssdw-2.c
new file mode 100644
index 00000000000..3a9f5c0b4fb
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/avx512bw-vpackssdw-2.c
@@ -0,0 +1,77 @@
+/* { dg-do run } */
+/* { dg-options "-O2 -mavx512bw -DAVX512BW" } */
+/* { dg-require-effective-target avx512bw } */
+
+#include "avx512f-helper.h"
+
+#define DST_SIZE (AVX512F_LEN / 16)
+#define SRC_SIZE (AVX512F_LEN / 32)
+
+#include "limits.h"
+
+#include "avx512f-mask-type.h"
+
+static short
+EVAL(int_to_short, AVX512F_LEN,) (int iVal)
+{
+ short sVal;
+
+ if (iVal < SHRT_MIN)
+ sVal = SHRT_MIN;
+ else if (iVal > SHRT_MAX)
+ sVal = SHRT_MAX;
+ else
+ sVal = iVal;
+
+ return sVal;
+}
+
+void
+CALC (int *src1, int *src2, short *dst)
+{
+ int i;
+ int *ptr;
+
+ for (i = 0; i < DST_SIZE; i++)
+ {
+ ptr = (i / 4) % 2 ? src2 : src1;
+ dst[i] = EVAL(int_to_short, AVX512F_LEN,) (ptr[i % 4 + (i / 8) * 4]);
+ }
+}
+
+void
+TEST (void)
+{
+ UNION_TYPE (AVX512F_LEN, i_d) s1, s2;
+ UNION_TYPE (AVX512F_LEN, i_w) res1, res2, res3;
+ MASK_TYPE mask = MASK_VALUE;
+ short dst_ref[DST_SIZE];
+ int i;
+
+ for (i = 0; i < DST_SIZE; i++)
+ res2.a[i] = DEFAULT_VALUE;
+
+ for (i = 0; i < SRC_SIZE; i++)
+ {
+ s1.a[i] = i + 10;
+ s2.a[i] = i + 15;
+ }
+
+ res1.x = INTRINSIC (_packs_epi32) (s1.x, s2.x);
+ res2.x = INTRINSIC (_mask_packs_epi32) (res2.x, mask, s1.x, s2.x);
+ res3.x = INTRINSIC (_maskz_packs_epi32) (mask, s1.x, s2.x);
+
+ CALC (s1.a, s2.a, dst_ref);
+
+ if (UNION_CHECK (AVX512F_LEN, i_w) (res1, dst_ref))
+ abort ();
+
+ MASK_MERGE (i_w) (dst_ref, mask, DST_SIZE);
+ if (UNION_CHECK (AVX512F_LEN, i_w) (res2, dst_ref))
+ abort ();
+
+ MASK_ZERO (i_w) (dst_ref, mask, DST_SIZE);
+ if (UNION_CHECK (AVX512F_LEN, i_w) (res3, dst_ref))
+ abort ();
+
+}
diff --git a/gcc/testsuite/gcc.target/i386/avx512bw-vpacksswb-1.c b/gcc/testsuite/gcc.target/i386/avx512bw-vpacksswb-1.c
new file mode 100644
index 00000000000..251867d45b3
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/avx512bw-vpacksswb-1.c
@@ -0,0 +1,31 @@
+/* { dg-do compile } */
+/* { dg-options "-mavx512bw -mavx512vl -O2" } */
+/* { dg-final { scan-assembler-times "vpacksswb\[ \\t\]+\[^\n\]*%zmm\[0-9\]\[^\{\]" 3 } } */
+/* { dg-final { scan-assembler-times "vpacksswb\[ \\t\]+\[^\n\]*%zmm\[0-9\]\{%k\[1-7\]\}\[^\{\]" 1 } } */
+/* { dg-final { scan-assembler-times "vpacksswb\[ \\t\]+\[^\n\]*%zmm\[0-9\]\{%k\[1-7\]\}{z}" 1 } } */
+/* { dg-final { scan-assembler-times "vpacksswb\[ \\t\]+\[^\n\]*%ymm\[0-9\]\{%k\[1-7\]\}\[^\{\]" 1 } } */
+/* { dg-final { scan-assembler-times "vpacksswb\[ \\t\]+\[^\n\]*%ymm\[0-9\]\{%k\[1-7\]\}\[^\{\]" 1 } } */
+/* { dg-final { scan-assembler-times "vpacksswb\[ \\t\]+\[^\n\]*%xmm\[0-9\]\{%k\[1-7\]\}{z}" 1 } } */
+/* { dg-final { scan-assembler-times "vpacksswb\[ \\t\]+\[^\n\]*%xmm\[0-9\]\{%k\[1-7\]\}{z}" 1 } } */
+
+
+#include <immintrin.h>
+
+volatile __m512i x;
+volatile __m256i y;
+volatile __m128i z;
+volatile __mmask64 mx;
+volatile __mmask32 my;
+volatile __mmask16 mz;
+
+void extern
+avx512bw_test (void)
+{
+ x = _mm512_packs_epi16 (x, x);
+ x = _mm512_mask_packs_epi16 (x, mx, x, x);
+ x = _mm512_maskz_packs_epi16 (mx, x, x);
+ y = _mm256_mask_packs_epi16 (y, my, y, y);
+ y = _mm256_maskz_packs_epi16 (my, y, y);
+ z = _mm_mask_packs_epi16 (z, mz, z, z);
+ z = _mm_maskz_packs_epi16 (mz, z, z);
+}
diff --git a/gcc/testsuite/gcc.target/i386/avx512bw-vpacksswb-2.c b/gcc/testsuite/gcc.target/i386/avx512bw-vpacksswb-2.c
new file mode 100644
index 00000000000..734f3ffe047
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/avx512bw-vpacksswb-2.c
@@ -0,0 +1,77 @@
+/* { dg-do run } */
+/* { dg-options "-O2 -mavx512bw -DAVX512BW" } */
+/* { dg-require-effective-target avx512bw } */
+
+#include "avx512f-helper.h"
+
+#define DST_SIZE (AVX512F_LEN / 8)
+#define SRC_SIZE (AVX512F_LEN / 16)
+
+#include "limits.h"
+
+#include "avx512f-mask-type.h"
+
+static char
+EVAL(short_to_char, AVX512F_LEN,) (short iVal)
+{
+ char sVal;
+
+ if (iVal < CHAR_MIN)
+ sVal = CHAR_MIN;
+ else if (iVal > CHAR_MAX)
+ sVal = CHAR_MAX;
+ else
+ sVal = iVal;
+
+ return sVal;
+}
+
+void
+CALC (short *src1, short *src2, char *dst)
+{
+ int i;
+ short *ptr;
+
+ for (i = 0; i < DST_SIZE; i++)
+ {
+ ptr = (i / 8) % 2 ? src2 : src1;
+ dst[i] = EVAL(short_to_char, AVX512F_LEN,) (ptr[i % 8 + (i / 16) * 8]);
+ }
+}
+
+void
+TEST (void)
+{
+ UNION_TYPE (AVX512F_LEN, i_w) s1, s2;
+ UNION_TYPE (AVX512F_LEN, i_b) res1, res2, res3;
+ MASK_TYPE mask = MASK_VALUE;
+ char dst_ref[DST_SIZE];
+ int i;
+
+ for (i = 0; i < DST_SIZE; i++)
+ res2.a[i] = DEFAULT_VALUE;
+
+ for (i = 0; i < SRC_SIZE; i++)
+ {
+ s1.a[i] = i + 10;
+ s2.a[i] = i + 15;
+ }
+
+ res1.x = INTRINSIC (_packs_epi16) (s1.x, s2.x);
+ res2.x = INTRINSIC (_mask_packs_epi16) (res2.x, mask, s1.x, s2.x);
+ res3.x = INTRINSIC (_maskz_packs_epi16) (mask, s1.x, s2.x);
+
+ CALC (s1.a, s2.a, dst_ref);
+
+ if (UNION_CHECK (AVX512F_LEN, i_b) (res1, dst_ref))
+ abort ();
+
+ MASK_MERGE (i_b) (dst_ref, mask, DST_SIZE);
+ if (UNION_CHECK (AVX512F_LEN, i_b) (res2, dst_ref))
+ abort ();
+
+ MASK_ZERO (i_b) (dst_ref, mask, DST_SIZE);
+ if (UNION_CHECK (AVX512F_LEN, i_b) (res3, dst_ref))
+ abort ();
+
+}
diff --git a/gcc/testsuite/gcc.target/i386/avx512bw-vpackusdw-1.c b/gcc/testsuite/gcc.target/i386/avx512bw-vpackusdw-1.c
new file mode 100644
index 00000000000..b6d0166c0d1
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/avx512bw-vpackusdw-1.c
@@ -0,0 +1,32 @@
+/* { dg-do compile } */
+/* { dg-options "-mavx512bw -mavx512vl -O2" } */
+/* { dg-final { scan-assembler-times "vpackusdw\[ \\t\]+\[^\n\]*%zmm\[0-9\]\[^\{\]" 3 } } */
+/* { dg-final { scan-assembler-times "vpackusdw\[ \\t\]+\[^\n\]*%zmm\[0-9\]\{%k\[1-7\]\}\[^\{\]" 1 } } */
+/* { dg-final { scan-assembler-times "vpackusdw\[ \\t\]+\[^\n\]*%zmm\[0-9\]\{%k\[1-7\]\}{z}" 1 } } */
+/* { dg-final { scan-assembler-times "vpackusdw\[ \\t\]+\[^\n\]*%ymm\[0-9\]\{%k\[1-7\]\}\[^\{\]" 1 } } */
+/* { dg-final { scan-assembler-times "vpackusdw\[ \\t\]+\[^\n\]*%ymm\[0-9\]\{%k\[1-7\]\}\[^\{\]" 1 } } */
+/* { dg-final { scan-assembler-times "vpackusdw\[ \\t\]+\[^\n\]*%xmm\[0-9\]\{%k\[1-7\]\}{z}" 1 } } */
+/* { dg-final { scan-assembler-times "vpackusdw\[ \\t\]+\[^\n\]*%xmm\[0-9\]\{%k\[1-7\]\}{z}" 1 } } */
+
+
+#include <immintrin.h>
+
+volatile __m512i x;
+volatile __m256i y;
+volatile __m128i z;
+volatile __mmask64 mx;
+volatile __mmask32 my;
+volatile __mmask16 mz;
+
+void extern
+avx512bw_test (void)
+{
+ x = _mm512_packus_epi32 (x, x);
+ x = _mm512_mask_packus_epi32 (x, mx, x, x);
+ x = _mm512_maskz_packus_epi32 (mx, x, x);
+ y = _mm256_mask_packus_epi32 (y, my, y, y);
+ y = _mm256_maskz_packus_epi32 (my, y, y);
+ z = _mm_mask_packus_epi32 (z, mz, z, z);
+ z = _mm_maskz_packus_epi32 (mz, z, z);
+}
+
diff --git a/gcc/testsuite/gcc.target/i386/avx512bw-vpackusdw-2.c b/gcc/testsuite/gcc.target/i386/avx512bw-vpackusdw-2.c
new file mode 100644
index 00000000000..2083b59a837
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/avx512bw-vpackusdw-2.c
@@ -0,0 +1,77 @@
+/* { dg-do run } */
+/* { dg-options "-O2 -mavx512bw -DAVX512BW" } */
+/* { dg-require-effective-target avx512bw } */
+
+#include "avx512f-helper.h"
+
+#define DST_SIZE (AVX512F_LEN / 16)
+#define SRC_SIZE (AVX512F_LEN / 32)
+
+#include "limits.h"
+
+#include "avx512f-mask-type.h"
+
+static unsigned short
+EVAL(int_to_ushort, AVX512F_LEN,) (int iVal)
+{
+ unsigned short sVal;
+
+ if (iVal < 0)
+ sVal = 0;
+ else if (iVal > USHRT_MAX)
+ sVal = USHRT_MAX;
+ else
+ sVal = iVal;
+
+ return sVal;
+}
+
+void
+CALC (int *src1, int *src2, unsigned short *dst)
+{
+ int i;
+ int *ptr;
+
+ for (i = 0; i < DST_SIZE; i++)
+ {
+ ptr = (i / 4) % 2 ? src2 : src1;
+ dst[i] = EVAL(int_to_ushort, AVX512F_LEN,) (ptr[i % 4 + (i / 8) * 4]);
+ }
+}
+
+void
+TEST (void)
+{
+ UNION_TYPE (AVX512F_LEN, i_d) s1, s2;
+ UNION_TYPE (AVX512F_LEN, i_w) res1, res2, res3;
+ MASK_TYPE mask = MASK_VALUE;
+ unsigned short dst_ref[DST_SIZE];
+ int i;
+
+ for (i = 0; i < DST_SIZE; i++)
+ res2.a[i] = DEFAULT_VALUE;
+
+ for (i = 0; i < SRC_SIZE; i++)
+ {
+ s1.a[i] = i + 10;
+ s2.a[i] = i + 15;
+ }
+
+ res1.x = INTRINSIC (_packus_epi32) (s1.x, s2.x);
+ res2.x = INTRINSIC (_mask_packus_epi32) (res2.x, mask, s1.x, s2.x);
+ res3.x = INTRINSIC (_maskz_packus_epi32) (mask, s1.x, s2.x);
+
+ CALC (s1.a, s2.a, dst_ref);
+
+ if (UNION_CHECK (AVX512F_LEN, i_w) (res1, dst_ref))
+ abort ();
+
+ MASK_MERGE (i_w) (dst_ref, mask, DST_SIZE);
+ if (UNION_CHECK (AVX512F_LEN, i_w) (res2, dst_ref))
+ abort ();
+
+ MASK_ZERO (i_w) (dst_ref, mask, DST_SIZE);
+ if (UNION_CHECK (AVX512F_LEN, i_w) (res3, dst_ref))
+ abort ();
+
+}
diff --git a/gcc/testsuite/gcc.target/i386/avx512bw-vpackuswb-1.c b/gcc/testsuite/gcc.target/i386/avx512bw-vpackuswb-1.c
new file mode 100644
index 00000000000..69135cb9bc5
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/avx512bw-vpackuswb-1.c
@@ -0,0 +1,31 @@
+/* { dg-do compile } */
+/* { dg-options "-mavx512bw -mavx512vl -O2" } */
+/* { dg-final { scan-assembler-times "vpackuswb\[ \\t\]+\[^\n\]*%zmm\[0-9\]\[^\{\]" 3 } } */
+/* { dg-final { scan-assembler-times "vpackuswb\[ \\t\]+\[^\n\]*%zmm\[0-9\]\{%k\[1-7\]\}\[^\{\]" 1 } } */
+/* { dg-final { scan-assembler-times "vpackuswb\[ \\t\]+\[^\n\]*%zmm\[0-9\]\{%k\[1-7\]\}{z}" 1 } } */
+/* { dg-final { scan-assembler-times "vpackuswb\[ \\t\]+\[^\n\]*%ymm\[0-9\]\{%k\[1-7\]\}\[^\{\]" 1 } } */
+/* { dg-final { scan-assembler-times "vpackuswb\[ \\t\]+\[^\n\]*%ymm\[0-9\]\{%k\[1-7\]\}\[^\{\]" 1 } } */
+/* { dg-final { scan-assembler-times "vpackuswb\[ \\t\]+\[^\n\]*%xmm\[0-9\]\{%k\[1-7\]\}{z}" 1 } } */
+/* { dg-final { scan-assembler-times "vpackuswb\[ \\t\]+\[^\n\]*%xmm\[0-9\]\{%k\[1-7\]\}{z}" 1 } } */
+
+
+#include <immintrin.h>
+
+volatile __m512i x;
+volatile __m256i y;
+volatile __m128i z;
+volatile __mmask64 mx;
+volatile __mmask32 my;
+volatile __mmask16 mz;
+
+void extern
+avx512bw_test (void)
+{
+ x = _mm512_packus_epi16 (x, x);
+ x = _mm512_mask_packus_epi16 (x, mx, x, x);
+ x = _mm512_maskz_packus_epi16 (mx, x, x);
+ y = _mm256_mask_packus_epi16 (y, my, y, y);
+ y = _mm256_maskz_packus_epi16 (my, y, y);
+ z = _mm_mask_packus_epi16 (z, mz, z, z);
+ z = _mm_maskz_packus_epi16 (mz, z, z);
+}
diff --git a/gcc/testsuite/gcc.target/i386/avx512bw-vpackuswb-2.c b/gcc/testsuite/gcc.target/i386/avx512bw-vpackuswb-2.c
new file mode 100644
index 00000000000..5a8d796fd1c
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/avx512bw-vpackuswb-2.c
@@ -0,0 +1,77 @@
+/* { dg-do run } */
+/* { dg-options "-O2 -mavx512bw -DAVX512BW" } */
+/* { dg-require-effective-target avx512bw } */
+
+#include "avx512f-helper.h"
+
+#define DST_SIZE (AVX512F_LEN / 8)
+#define SRC_SIZE (AVX512F_LEN / 16)
+
+#include "limits.h"
+
+#include "avx512f-mask-type.h"
+
+static unsigned char
+EVAL(short_to_uchar, AVX512F_LEN,) (short iVal)
+{
+ unsigned char sVal;
+
+ if (iVal < 0)
+ sVal = 0;
+ else if (iVal > UCHAR_MAX)
+ sVal = UCHAR_MAX;
+ else
+ sVal = iVal;
+
+ return sVal;
+}
+
+void
+CALC (short *src1, short *src2, unsigned char *dst)
+{
+ int i;
+ short *ptr;
+
+ for (i = 0; i < DST_SIZE; i++)
+ {
+ ptr = (i / 8) % 2 ? src2 : src1;
+ dst[i] = EVAL(short_to_uchar, AVX512F_LEN,) (ptr[i % 8 + (i / 16) * 8]);
+ }
+}
+
+void
+TEST (void)
+{
+ UNION_TYPE (AVX512F_LEN, i_w) s1, s2;
+ UNION_TYPE (AVX512F_LEN, i_b) res1, res2, res3;
+ MASK_TYPE mask = MASK_VALUE;
+ unsigned char dst_ref[DST_SIZE];
+ int i;
+
+ for (i = 0; i < DST_SIZE; i++)
+ res2.a[i] = DEFAULT_VALUE;
+
+ for (i = 0; i < SRC_SIZE; i++)
+ {
+ s1.a[i] = i + 10;
+ s2.a[i] = i + 15;
+ }
+
+ res1.x = INTRINSIC (_packus_epi16) (s1.x, s2.x);
+ res2.x = INTRINSIC (_mask_packus_epi16) (res2.x, mask, s1.x, s2.x);
+ res3.x = INTRINSIC (_maskz_packus_epi16) (mask, s1.x, s2.x);
+
+ CALC (s1.a, s2.a, dst_ref);
+
+ if (UNION_CHECK (AVX512F_LEN, i_b) (res1, dst_ref))
+ abort ();
+
+ MASK_MERGE (i_b) (dst_ref, mask, DST_SIZE);
+ if (UNION_CHECK (AVX512F_LEN, i_b) (res2, dst_ref))
+ abort ();
+
+ MASK_ZERO (i_b) (dst_ref, mask, DST_SIZE);
+ if (UNION_CHECK (AVX512F_LEN, i_b) (res3, dst_ref))
+ abort ();
+
+}
diff --git a/gcc/testsuite/gcc.target/i386/avx512bw-vpaddb-1.c b/gcc/testsuite/gcc.target/i386/avx512bw-vpaddb-1.c
new file mode 100644
index 00000000000..6e106f2bc82
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/avx512bw-vpaddb-1.c
@@ -0,0 +1,32 @@
+/* { dg-do compile } */
+/* { dg-options "-O2 -mavx512bw -mavx512vl" } */
+/* { dg-final { scan-assembler-times "vpaddb\[ \\t\]+\[^\n\]*%zmm\[0-9\]\[^\{\]" 3 } } */
+/* { dg-final { scan-assembler-times "vpaddb\[ \\t\]+\[^\n\]*%zmm\[0-9\]\{%k\[1-7\]\}\[^\{\]" 1 } } */
+/* { dg-final { scan-assembler-times "vpaddb\[ \\t\]+\[^\n\]*%zmm\[0-9\]\{%k\[1-7\]\}\{z\}" 1 } } */
+/* { dg-final { scan-assembler-times "vpaddb\[ \\t\]+\[^\n\]*%ymm\[0-9\]\[^\{\]" 2 } } */
+/* { dg-final { scan-assembler-times "vpaddb\[ \\t\]+\[^\n\]*%ymm\[0-9\]\{%k\[1-7\]\}\[^\{\]" 1 } } */
+/* { dg-final { scan-assembler-times "vpaddb\[ \\t\]+\[^\n\]*%ymm\[0-9\]\{%k\[1-7\]\}\{z\}" 1 } } */
+/* { dg-final { scan-assembler-times "vpaddb\[ \\t\]+\[^\n\]*%xmm\[0-9\]\[^\{\]" 2 } } */
+/* { dg-final { scan-assembler-times "vpaddb\[ \\t\]+\[^\n\]*%xmm\[0-9\]\{%k\[1-7\]\}\[^\{\]" 1 } } */
+/* { dg-final { scan-assembler-times "vpaddb\[ \\t\]+\[^\n\]*%xmm\[0-9\]\{%k\[1-7\]\}\{z\}" 1 } } */
+
+#include <immintrin.h>
+
+volatile __m512i x512;
+volatile __m256i x256;
+volatile __m128i x128;
+volatile __mmask64 m512;
+volatile __mmask32 m256;
+volatile __mmask16 m128;
+
+void extern
+avx512bw_test (void)
+{
+ x512 = _mm512_add_epi8 (x512, x512);
+ x512 = _mm512_mask_add_epi8 (x512, m512, x512, x512);
+ x512 = _mm512_maskz_add_epi8 (m512, x512, x512);
+ x256 = _mm256_mask_add_epi8 (x256, m256, x256, x256);
+ x256 = _mm256_maskz_add_epi8 (m256, x256, x256);
+ x128 = _mm_mask_add_epi8 (x128, m128, x128, x128);
+ x128 = _mm_maskz_add_epi8 (m128, x128, x128);
+}
diff --git a/gcc/testsuite/gcc.target/i386/avx512bw-vpaddb-2.c b/gcc/testsuite/gcc.target/i386/avx512bw-vpaddb-2.c
new file mode 100644
index 00000000000..0419026bbf6
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/avx512bw-vpaddb-2.c
@@ -0,0 +1,53 @@
+/* { dg-do run } */
+/* { dg-options "-O2 -mavx512bw -DAVX512BW" } */
+/* { dg-require-effective-target avx512bw } */
+
+#include "avx512f-helper.h"
+
+#define SIZE (AVX512F_LEN / 8)
+#include "avx512f-mask-type.h"
+
+CALC (char *r, char *s1, char *s2)
+{
+ int i;
+ for (i = 0; i < SIZE; i++)
+ {
+ r[i] = s1[i] + s2[i];
+ }
+}
+
+void
+TEST (void)
+{
+ int i, sign;
+ UNION_TYPE (AVX512F_LEN, i_b) res1, res2, res3, src1, src2;
+ MASK_TYPE mask = MASK_VALUE;
+ char res_ref[SIZE];
+
+ sign = -1;
+ for (i = 0; i < SIZE; i++)
+ {
+ src1.a[i] = 2 + sign * 7 * i % 291;
+ src2.a[i] = 3 + sign * 11 * (i % 377) * i;
+ sign = sign * -1;
+ }
+ for (i = 0; i < SIZE; i++)
+ res2.a[i] = DEFAULT_VALUE;
+
+ res1.x = INTRINSIC (_add_epi8) (src1.x, src2.x);
+ res2.x = INTRINSIC (_mask_add_epi8) (res2.x, mask, src1.x, src2.x);
+ res3.x = INTRINSIC (_maskz_add_epi8) (mask, src1.x, src2.x);
+
+ CALC (res_ref, src1.a, src2.a);
+
+ if (UNION_CHECK (AVX512F_LEN, i_b) (res1, res_ref))
+ abort ();
+
+ MASK_MERGE (i_b) (res_ref, mask, SIZE);
+ if (UNION_CHECK (AVX512F_LEN, i_b) (res2, res_ref))
+ abort ();
+
+ MASK_ZERO (i_b) (res_ref, mask, SIZE);
+ if (UNION_CHECK (AVX512F_LEN, i_b) (res3, res_ref))
+ abort ();
+}
diff --git a/gcc/testsuite/gcc.target/i386/avx512bw-vpaddsb-1.c b/gcc/testsuite/gcc.target/i386/avx512bw-vpaddsb-1.c
new file mode 100644
index 00000000000..a2352b6e762
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/avx512bw-vpaddsb-1.c
@@ -0,0 +1,32 @@
+/* { dg-do compile } */
+/* { dg-options "-O2 -mavx512bw -mavx512vl" } */
+/* { dg-final { scan-assembler-times "vpaddsb\[ \\t\]+\[^\n\]*%zmm\[0-9\]\[^\{\]" 3 } } */
+/* { dg-final { scan-assembler-times "vpaddsb\[ \\t\]+\[^\n\]*%zmm\[0-9\]\{%k\[1-7\]\}\[^\{\]" 1 } } */
+/* { dg-final { scan-assembler-times "vpaddsb\[ \\t\]+\[^\n\]*%zmm\[0-9\]\{%k\[1-7\]\}\{z\}" 1 } } */
+/* { dg-final { scan-assembler-times "vpaddsb\[ \\t\]+\[^\n\]*%ymm\[0-9\]\[^\{\]" 2 } } */
+/* { dg-final { scan-assembler-times "vpaddsb\[ \\t\]+\[^\n\]*%ymm\[0-9\]\{%k\[1-7\]\}\[^\{\]" 1 } } */
+/* { dg-final { scan-assembler-times "vpaddsb\[ \\t\]+\[^\n\]*%ymm\[0-9\]\{%k\[1-7\]\}\{z\}" 1 } } */
+/* { dg-final { scan-assembler-times "vpaddsb\[ \\t\]+\[^\n\]*%xmm\[0-9\]\[^\{\]" 2 } } */
+/* { dg-final { scan-assembler-times "vpaddsb\[ \\t\]+\[^\n\]*%xmm\[0-9\]\{%k\[1-7\]\}\[^\{\]" 1 } } */
+/* { dg-final { scan-assembler-times "vpaddsb\[ \\t\]+\[^\n\]*%xmm\[0-9\]\{%k\[1-7\]\}\{z\}" 1 } } */
+
+#include <immintrin.h>
+
+volatile __m512i x512;
+volatile __m256i x256;
+volatile __m128i x128;
+volatile __mmask64 m512;
+volatile __mmask32 m256;
+volatile __mmask16 m128;
+
+void extern
+avx512bw_test (void)
+{
+ x512 = _mm512_adds_epi8 (x512, x512);
+ x512 = _mm512_mask_adds_epi8 (x512, m512, x512, x512);
+ x512 = _mm512_maskz_adds_epi8 (m512, x512, x512);
+ x256 = _mm256_mask_adds_epi8 (x256, m256, x256, x256);
+ x256 = _mm256_maskz_adds_epi8 (m256, x256, x256);
+ x128 = _mm_mask_adds_epi8 (x128, m128, x128, x128);
+ x128 = _mm_maskz_adds_epi8 (m128, x128, x128);
+}
diff --git a/gcc/testsuite/gcc.target/i386/avx512bw-vpaddsb-2.c b/gcc/testsuite/gcc.target/i386/avx512bw-vpaddsb-2.c
new file mode 100644
index 00000000000..b379b8b1201
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/avx512bw-vpaddsb-2.c
@@ -0,0 +1,56 @@
+/* { dg-do run } */
+/* { dg-options "-O2 -mavx512bw -DAVX512BW" } */
+/* { dg-require-effective-target avx512bw } */
+
+#include "avx512f-helper.h"
+
+#define SIZE (AVX512F_LEN / 8)
+#include "avx512f-mask-type.h"
+
+CALC (char *r, char *s1, char *s2)
+{
+ int i;
+ for (i = 0; i < SIZE; i++)
+ {
+ int tmp = (int)s1[i] + (int)s2[i];
+ if (tmp > 0x7F) tmp = 0x7F;
+ if (tmp < (char)0x80) tmp = (char)0x80;
+ r[i] = (char)tmp;
+ }
+}
+
+void
+TEST (void)
+{
+ int i, sign;
+ UNION_TYPE (AVX512F_LEN, i_b) res1, res2, res3, src1, src2;
+ MASK_TYPE mask = MASK_VALUE;
+ char res_ref[SIZE];
+
+ sign = -1;
+ for (i = 0; i < SIZE; i++)
+ {
+ src1.a[i] = 2 + sign * 7 * i % 291;
+ src2.a[i] = 3 + sign * 11 * (i % 377) * i;
+ sign = sign * -1;
+ }
+ for (i = 0; i < SIZE; i++)
+ res2.a[i] = DEFAULT_VALUE;
+
+ res1.x = INTRINSIC (_adds_epi8) (src1.x, src2.x);
+ res2.x = INTRINSIC (_mask_adds_epi8) (res2.x, mask, src1.x, src2.x);
+ res3.x = INTRINSIC (_maskz_adds_epi8) (mask, src1.x, src2.x);
+
+ CALC (res_ref, src1.a, src2.a);
+
+ if (UNION_CHECK (AVX512F_LEN, i_b) (res1, res_ref))
+ abort ();
+
+ MASK_MERGE (i_b) (res_ref, mask, SIZE);
+ if (UNION_CHECK (AVX512F_LEN, i_b) (res2, res_ref))
+ abort ();
+
+ MASK_ZERO (i_b) (res_ref, mask, SIZE);
+ if (UNION_CHECK (AVX512F_LEN, i_b) (res3, res_ref))
+ abort ();
+}
diff --git a/gcc/testsuite/gcc.target/i386/avx512bw-vpaddsw-1.c b/gcc/testsuite/gcc.target/i386/avx512bw-vpaddsw-1.c
new file mode 100644
index 00000000000..187b0e97860
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/avx512bw-vpaddsw-1.c
@@ -0,0 +1,32 @@
+/* { dg-do compile } */
+/* { dg-options "-O2 -mavx512bw -mavx512vl" } */
+/* { dg-final { scan-assembler-times "vpaddsw\[ \\t\]+\[^\n\]*%zmm\[0-9\]\[^\{\]" 3 } } */
+/* { dg-final { scan-assembler-times "vpaddsw\[ \\t\]+\[^\n\]*%zmm\[0-9\]\{%k\[1-7\]\}\[^\{\]" 1 } } */
+/* { dg-final { scan-assembler-times "vpaddsw\[ \\t\]+\[^\n\]*%zmm\[0-9\]\{%k\[1-7\]\}\{z\}" 1 } } */
+/* { dg-final { scan-assembler-times "vpaddsw\[ \\t\]+\[^\n\]*%ymm\[0-9\]\[^\{\]" 2 } } */
+/* { dg-final { scan-assembler-times "vpaddsw\[ \\t\]+\[^\n\]*%ymm\[0-9\]\{%k\[1-7\]\}\[^\{\]" 1 } } */
+/* { dg-final { scan-assembler-times "vpaddsw\[ \\t\]+\[^\n\]*%ymm\[0-9\]\{%k\[1-7\]\}\{z\}" 1 } } */
+/* { dg-final { scan-assembler-times "vpaddsw\[ \\t\]+\[^\n\]*%xmm\[0-9\]\[^\{\]" 2 } } */
+/* { dg-final { scan-assembler-times "vpaddsw\[ \\t\]+\[^\n\]*%xmm\[0-9\]\{%k\[1-7\]\}\[^\{\]" 1 } } */
+/* { dg-final { scan-assembler-times "vpaddsw\[ \\t\]+\[^\n\]*%xmm\[0-9\]\{%k\[1-7\]\}\{z\}" 1 } } */
+
+#include <immintrin.h>
+
+volatile __m512i x512;
+volatile __m256i x256;
+volatile __m128i x128;
+volatile __mmask32 m512;
+volatile __mmask16 m256;
+volatile __mmask8 m128;
+
+void extern
+avx512bw_test (void)
+{
+ x512 = _mm512_adds_epi16 (x512, x512);
+ x512 = _mm512_mask_adds_epi16 (x512, m512, x512, x512);
+ x512 = _mm512_maskz_adds_epi16 (m512, x512, x512);
+ x256 = _mm256_mask_adds_epi16 (x256, m256, x256, x256);
+ x256 = _mm256_maskz_adds_epi16 (m256, x256, x256);
+ x128 = _mm_mask_adds_epi16 (x128, m128, x128, x128);
+ x128 = _mm_maskz_adds_epi16 (m128, x128, x128);
+}
diff --git a/gcc/testsuite/gcc.target/i386/avx512bw-vpaddsw-2.c b/gcc/testsuite/gcc.target/i386/avx512bw-vpaddsw-2.c
new file mode 100644
index 00000000000..f07ad9c9e6d
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/avx512bw-vpaddsw-2.c
@@ -0,0 +1,56 @@
+/* { dg-do run } */
+/* { dg-options "-O2 -mavx512bw -DAVX512BW" } */
+/* { dg-require-effective-target avx512bw } */
+
+#include "avx512f-helper.h"
+
+#define SIZE (AVX512F_LEN / 16)
+#include "avx512f-mask-type.h"
+
+CALC (short *r, short *s1, short *s2)
+{
+ int i;
+ for (i = 0; i < SIZE; i++)
+ {
+ int tmp = (int)s1[i] + (int)s2[i];
+ if (tmp > 0x7FFF) tmp = 0x7FFF;
+ if (tmp < (short)0x8000) tmp = (short)0x8000;
+ r[i] = (short)tmp;
+ }
+}
+
+void
+TEST (void)
+{
+ int i, sign;
+ UNION_TYPE (AVX512F_LEN, i_w) res1, res2, res3, src1, src2;
+ MASK_TYPE mask = MASK_VALUE;
+ short res_ref[SIZE];
+
+ sign = -1;
+ for (i = 0; i < SIZE; i++)
+ {
+ src1.a[i] = 2 + sign * 7 * i % 291;
+ src2.a[i] = 3 + sign * 11 * (i % 377) * i;
+ sign = sign * -1;
+ }
+ for (i = 0; i < SIZE; i++)
+ res2.a[i] = DEFAULT_VALUE;
+
+ res1.x = INTRINSIC (_adds_epi16) (src1.x, src2.x);
+ res2.x = INTRINSIC (_mask_adds_epi16) (res2.x, mask, src1.x, src2.x);
+ res3.x = INTRINSIC (_maskz_adds_epi16) (mask, src1.x, src2.x);
+
+ CALC (res_ref, src1.a, src2.a);
+
+ if (UNION_CHECK (AVX512F_LEN, i_w) (res1, res_ref))
+ abort ();
+
+ MASK_MERGE (i_w) (res_ref, mask, SIZE);
+ if (UNION_CHECK (AVX512F_LEN, i_w) (res2, res_ref))
+ abort ();
+
+ MASK_ZERO (i_w) (res_ref, mask, SIZE);
+ if (UNION_CHECK (AVX512F_LEN, i_w) (res3, res_ref))
+ abort ();
+}
diff --git a/gcc/testsuite/gcc.target/i386/avx512bw-vpaddusb-1.c b/gcc/testsuite/gcc.target/i386/avx512bw-vpaddusb-1.c
new file mode 100644
index 00000000000..e1036cb019e
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/avx512bw-vpaddusb-1.c
@@ -0,0 +1,32 @@
+/* { dg-do compile } */
+/* { dg-options "-O2 -mavx512bw -mavx512vl" } */
+/* { dg-final { scan-assembler-times "vpaddusb\[ \\t\]+\[^\n\]*%zmm\[0-9\]\[^\{\]" 3 } } */
+/* { dg-final { scan-assembler-times "vpaddusb\[ \\t\]+\[^\n\]*%zmm\[0-9\]\{%k\[1-7\]\}\[^\{\]" 1 } } */
+/* { dg-final { scan-assembler-times "vpaddusb\[ \\t\]+\[^\n\]*%zmm\[0-9\]\{%k\[1-7\]\}\{z\}" 1 } } */
+/* { dg-final { scan-assembler-times "vpaddusb\[ \\t\]+\[^\n\]*%ymm\[0-9\]\[^\{\]" 2 } } */
+/* { dg-final { scan-assembler-times "vpaddusb\[ \\t\]+\[^\n\]*%ymm\[0-9\]\{%k\[1-7\]\}\[^\{\]" 1 } } */
+/* { dg-final { scan-assembler-times "vpaddusb\[ \\t\]+\[^\n\]*%ymm\[0-9\]\{%k\[1-7\]\}\{z\}" 1 } } */
+/* { dg-final { scan-assembler-times "vpaddusb\[ \\t\]+\[^\n\]*%xmm\[0-9\]\[^\{\]" 2 } } */
+/* { dg-final { scan-assembler-times "vpaddusb\[ \\t\]+\[^\n\]*%xmm\[0-9\]\{%k\[1-7\]\}\[^\{\]" 1 } } */
+/* { dg-final { scan-assembler-times "vpaddusb\[ \\t\]+\[^\n\]*%xmm\[0-9\]\{%k\[1-7\]\}\{z\}" 1 } } */
+
+#include <immintrin.h>
+
+volatile __m512i x512;
+volatile __m256i x256;
+volatile __m128i x128;
+volatile __mmask64 m512;
+volatile __mmask32 m256;
+volatile __mmask16 m128;
+
+void extern
+avx512bw_test (void)
+{
+ x512 = _mm512_adds_epu8 (x512, x512);
+ x512 = _mm512_mask_adds_epu8 (x512, m512, x512, x512);
+ x512 = _mm512_maskz_adds_epu8 (m512, x512, x512);
+ x256 = _mm256_mask_adds_epu8 (x256, m256, x256, x256);
+ x256 = _mm256_maskz_adds_epu8 (m256, x256, x256);
+ x128 = _mm_mask_adds_epu8 (x128, m128, x128, x128);
+ x128 = _mm_maskz_adds_epu8 (m128, x128, x128);
+}
diff --git a/gcc/testsuite/gcc.target/i386/avx512bw-vpaddusb-2.c b/gcc/testsuite/gcc.target/i386/avx512bw-vpaddusb-2.c
new file mode 100644
index 00000000000..728968557b3
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/avx512bw-vpaddusb-2.c
@@ -0,0 +1,54 @@
+/* { dg-do run } */
+/* { dg-options "-O2 -mavx512bw -DAVX512BW" } */
+/* { dg-require-effective-target avx512bw } */
+
+#include "avx512f-helper.h"
+
+#define SIZE (AVX512F_LEN / 8)
+#include "avx512f-mask-type.h"
+
+CALC (unsigned char *r, unsigned char *s1, unsigned char *s2)
+{
+ int i;
+ for (i = 0; i < SIZE; i++)
+ {
+ int tmp = (int)s1[i] + (int)s2[i];
+ if (tmp > 0xFF) tmp = 0xFF;
+ if (tmp < 0) tmp = 0;
+ r[i] = (unsigned char)tmp;
+ }
+}
+
+void
+TEST (void)
+{
+ int i;
+ UNION_TYPE (AVX512F_LEN, i_b) res1, res2, res3, src1, src2;
+ MASK_TYPE mask = MASK_VALUE;
+ unsigned char res_ref[SIZE];
+
+ for (i = 0; i < SIZE; i++)
+ {
+ src1.a[i] = 2 + 7 * i % 291;
+ src2.a[i] = 3 + 11 * (i % 377) * i;
+ }
+ for (i = 0; i < SIZE; i++)
+ res2.a[i] = DEFAULT_VALUE;
+
+ res1.x = INTRINSIC (_adds_epu8) (src1.x, src2.x);
+ res2.x = INTRINSIC (_mask_adds_epu8) (res2.x, mask, src1.x, src2.x);
+ res3.x = INTRINSIC (_maskz_adds_epu8) (mask, src1.x, src2.x);
+
+ CALC (res_ref, src1.a, src2.a);
+
+ if (UNION_CHECK (AVX512F_LEN, i_b) (res1, res_ref))
+ abort ();
+
+ MASK_MERGE (i_b) (res_ref, mask, SIZE);
+ if (UNION_CHECK (AVX512F_LEN, i_b) (res2, res_ref))
+ abort ();
+
+ MASK_ZERO (i_b) (res_ref, mask, SIZE);
+ if (UNION_CHECK (AVX512F_LEN, i_b) (res3, res_ref))
+ abort ();
+}
diff --git a/gcc/testsuite/gcc.target/i386/avx512bw-vpaddusw-1.c b/gcc/testsuite/gcc.target/i386/avx512bw-vpaddusw-1.c
new file mode 100644
index 00000000000..95520c629b0
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/avx512bw-vpaddusw-1.c
@@ -0,0 +1,32 @@
+/* { dg-do compile } */
+/* { dg-options "-O2 -mavx512bw -mavx512vl" } */
+/* { dg-final { scan-assembler-times "vpaddusw\[ \\t\]+\[^\n\]*%zmm\[0-9\]\[^\{\]" 3 } } */
+/* { dg-final { scan-assembler-times "vpaddusw\[ \\t\]+\[^\n\]*%zmm\[0-9\]\{%k\[1-7\]\}\[^\{\]" 1 } } */
+/* { dg-final { scan-assembler-times "vpaddusw\[ \\t\]+\[^\n\]*%zmm\[0-9\]\{%k\[1-7\]\}\{z\}" 1 } } */
+/* { dg-final { scan-assembler-times "vpaddusw\[ \\t\]+\[^\n\]*%ymm\[0-9\]\[^\{\]" 2 } } */
+/* { dg-final { scan-assembler-times "vpaddusw\[ \\t\]+\[^\n\]*%ymm\[0-9\]\{%k\[1-7\]\}\[^\{\]" 1 } } */
+/* { dg-final { scan-assembler-times "vpaddusw\[ \\t\]+\[^\n\]*%ymm\[0-9\]\{%k\[1-7\]\}\{z\}" 1 } } */
+/* { dg-final { scan-assembler-times "vpaddusw\[ \\t\]+\[^\n\]*%xmm\[0-9\]\[^\{\]" 2 } } */
+/* { dg-final { scan-assembler-times "vpaddusw\[ \\t\]+\[^\n\]*%xmm\[0-9\]\{%k\[1-7\]\}\[^\{\]" 1 } } */
+/* { dg-final { scan-assembler-times "vpaddusw\[ \\t\]+\[^\n\]*%xmm\[0-9\]\{%k\[1-7\]\}\{z\}" 1 } } */
+
+#include <immintrin.h>
+
+volatile __m512i x512;
+volatile __m256i x256;
+volatile __m128i x128;
+volatile __mmask32 m512;
+volatile __mmask16 m256;
+volatile __mmask8 m128;
+
+void extern
+avx512bw_test (void)
+{
+ x512 = _mm512_adds_epu16 (x512, x512);
+ x512 = _mm512_mask_adds_epu16 (x512, m512, x512, x512);
+ x512 = _mm512_maskz_adds_epu16 (m512, x512, x512);
+ x256 = _mm256_mask_adds_epu16 (x256, m256, x256, x256);
+ x256 = _mm256_maskz_adds_epu16 (m256, x256, x256);
+ x128 = _mm_mask_adds_epu16 (x128, m128, x128, x128);
+ x128 = _mm_maskz_adds_epu16 (m128, x128, x128);
+}
diff --git a/gcc/testsuite/gcc.target/i386/avx512bw-vpaddusw-2.c b/gcc/testsuite/gcc.target/i386/avx512bw-vpaddusw-2.c
new file mode 100644
index 00000000000..4dad97a2707
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/avx512bw-vpaddusw-2.c
@@ -0,0 +1,54 @@
+/* { dg-do run } */
+/* { dg-options "-O2 -mavx512bw -DAVX512BW" } */
+/* { dg-require-effective-target avx512bw } */
+
+#include "avx512f-helper.h"
+
+#define SIZE (AVX512F_LEN / 16)
+#include "avx512f-mask-type.h"
+
+CALC (unsigned short *r, unsigned short *s1, unsigned short *s2)
+{
+ int i;
+ for (i = 0; i < SIZE; i++)
+ {
+ int tmp = (int)s1[i] + (int)s2[i];
+ if (tmp > 0xFFFF) tmp = 0xFFFF;
+ if (tmp < 0) tmp = 0;
+ r[i] = (unsigned short)tmp;
+ }
+}
+
+void
+TEST (void)
+{
+ int i;
+ UNION_TYPE (AVX512F_LEN, i_w) res1, res2, res3, src1, src2;
+ MASK_TYPE mask = MASK_VALUE;
+ unsigned short res_ref[SIZE];
+
+ for (i = 0; i < SIZE; i++)
+ {
+ src1.a[i] = 2 + 7 * i % 291;
+ src2.a[i] = 3 + 11 * (i % 377) * i;
+ }
+ for (i = 0; i < SIZE; i++)
+ res2.a[i] = DEFAULT_VALUE;
+
+ res1.x = INTRINSIC (_adds_epu16) (src1.x, src2.x);
+ res2.x = INTRINSIC (_mask_adds_epu16) (res2.x, mask, src1.x, src2.x);
+ res3.x = INTRINSIC (_maskz_adds_epu16) (mask, src1.x, src2.x);
+
+ CALC (res_ref, src1.a, src2.a);
+
+ if (UNION_CHECK (AVX512F_LEN, i_w) (res1, res_ref))
+ abort ();
+
+ MASK_MERGE (i_w) (res_ref, mask, SIZE);
+ if (UNION_CHECK (AVX512F_LEN, i_w) (res2, res_ref))
+ abort ();
+
+ MASK_ZERO (i_w) (res_ref, mask, SIZE);
+ if (UNION_CHECK (AVX512F_LEN, i_w) (res3, res_ref))
+ abort ();
+}
diff --git a/gcc/testsuite/gcc.target/i386/avx512bw-vpaddw-1.c b/gcc/testsuite/gcc.target/i386/avx512bw-vpaddw-1.c
new file mode 100644
index 00000000000..86029ea6094
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/avx512bw-vpaddw-1.c
@@ -0,0 +1,32 @@
+/* { dg-do compile } */
+/* { dg-options "-O2 -mavx512bw -mavx512vl" } */
+/* { dg-final { scan-assembler-times "vpaddw\[ \\t\]+\[^\n\]*%zmm\[0-9\]\[^\{\]" 3 } } */
+/* { dg-final { scan-assembler-times "vpaddw\[ \\t\]+\[^\n\]*%zmm\[0-9\]\{%k\[1-7\]\}\[^\{\]" 1 } } */
+/* { dg-final { scan-assembler-times "vpaddw\[ \\t\]+\[^\n\]*%zmm\[0-9\]\{%k\[1-7\]\}\{z\}" 1 } } */
+/* { dg-final { scan-assembler-times "vpaddw\[ \\t\]+\[^\n\]*%ymm\[0-9\]\[^\{\]" 2 } } */
+/* { dg-final { scan-assembler-times "vpaddw\[ \\t\]+\[^\n\]*%ymm\[0-9\]\{%k\[1-7\]\}\[^\{\]" 1 } } */
+/* { dg-final { scan-assembler-times "vpaddw\[ \\t\]+\[^\n\]*%ymm\[0-9\]\{%k\[1-7\]\}\{z\}" 1 } } */
+/* { dg-final { scan-assembler-times "vpaddw\[ \\t\]+\[^\n\]*%xmm\[0-9\]\[^\{\]" 2 } } */
+/* { dg-final { scan-assembler-times "vpaddw\[ \\t\]+\[^\n\]*%xmm\[0-9\]\{%k\[1-7\]\}\[^\{\]" 1 } } */
+/* { dg-final { scan-assembler-times "vpaddw\[ \\t\]+\[^\n\]*%xmm\[0-9\]\{%k\[1-7\]\}\{z\}" 1 } } */
+
+#include <immintrin.h>
+
+volatile __m512i x512;
+volatile __m256i x256;
+volatile __m128i x128;
+volatile __mmask32 m512;
+volatile __mmask16 m256;
+volatile __mmask8 m128;
+
+void extern
+avx512bw_test (void)
+{
+ x512 = _mm512_add_epi16 (x512, x512);
+ x512 = _mm512_mask_add_epi16 (x512, m512, x512, x512);
+ x512 = _mm512_maskz_add_epi16 (m512, x512, x512);
+ x256 = _mm256_mask_add_epi16 (x256, m256, x256, x256);
+ x256 = _mm256_maskz_add_epi16 (m256, x256, x256);
+ x128 = _mm_mask_add_epi16 (x128, m128, x128, x128);
+ x128 = _mm_maskz_add_epi16 (m128, x128, x128);
+}
diff --git a/gcc/testsuite/gcc.target/i386/avx512bw-vpaddw-2.c b/gcc/testsuite/gcc.target/i386/avx512bw-vpaddw-2.c
new file mode 100644
index 00000000000..5edc979ed71
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/avx512bw-vpaddw-2.c
@@ -0,0 +1,53 @@
+/* { dg-do run } */
+/* { dg-options "-O2 -mavx512bw -DAVX512BW" } */
+/* { dg-require-effective-target avx512bw } */
+
+#include "avx512f-helper.h"
+
+#define SIZE (AVX512F_LEN / 16)
+#include "avx512f-mask-type.h"
+
+CALC (short *r, short *s1, short *s2)
+{
+ int i;
+ for (i = 0; i < SIZE; i++)
+ {
+ r[i] = s1[i] + s2[i];
+ }
+}
+
+void
+TEST (void)
+{
+ int i, sign;
+ UNION_TYPE (AVX512F_LEN, i_w) res1, res2, res3, src1, src2;
+ MASK_TYPE mask = MASK_VALUE;
+ short res_ref[SIZE];
+
+ sign = -1;
+ for (i = 0; i < SIZE; i++)
+ {
+ src1.a[i] = 2 + sign * 7 * i % 291;
+ src2.a[i] = 3 + sign * 11 * (i % 377) * i;
+ sign = sign * -1;
+ }
+ for (i = 0; i < SIZE; i++)
+ res2.a[i] = DEFAULT_VALUE;
+
+ res1.x = INTRINSIC (_add_epi16) (src1.x, src2.x);
+ res2.x = INTRINSIC (_mask_add_epi16) (res2.x, mask, src1.x, src2.x);
+ res3.x = INTRINSIC (_maskz_add_epi16) (mask, src1.x, src2.x);
+
+ CALC (res_ref, src1.a, src2.a);
+
+ if (UNION_CHECK (AVX512F_LEN, i_w) (res1, res_ref))
+ abort ();
+
+ MASK_MERGE (i_w) (res_ref, mask, SIZE);
+ if (UNION_CHECK (AVX512F_LEN, i_w) (res2, res_ref))
+ abort ();
+
+ MASK_ZERO (i_w) (res_ref, mask, SIZE);
+ if (UNION_CHECK (AVX512F_LEN, i_w) (res3, res_ref))
+ abort ();
+}
diff --git a/gcc/testsuite/gcc.target/i386/avx512bw-vpalignr-1.c b/gcc/testsuite/gcc.target/i386/avx512bw-vpalignr-1.c
new file mode 100644
index 00000000000..c609365f89b
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/avx512bw-vpalignr-1.c
@@ -0,0 +1,32 @@
+/* { dg-do compile } */
+/* { dg-options "-O2 -mavx512bw -mavx512vl" } */
+/* { dg-final { scan-assembler-times "vpalignr\[ \\t\]+\[^\n\]*%zmm\[0-9\]\[^\{\]" 3 } } */
+/* { dg-final { scan-assembler-times "vpalignr\[ \\t\]+\[^\n\]*%zmm\[0-9\]\{%k\[1-7\]\}\[^\{\]" 1 } } */
+/* { dg-final { scan-assembler-times "vpalignr\[ \\t\]+\[^\n\]*%zmm\[0-9\]\{%k\[1-7\]\}\{z\}" 1 } } */
+/* { dg-final { scan-assembler-times "vpalignr\[ \\t\]+\[^\n\]*%ymm\[0-9\]\[^\{\]" 2 } } */
+/* { dg-final { scan-assembler-times "vpalignr\[ \\t\]+\[^\n\]*%ymm\[0-9\]\{%k\[1-7\]\}\[^\{\]" 1 } } */
+/* { dg-final { scan-assembler-times "vpalignr\[ \\t\]+\[^\n\]*%ymm\[0-9\]\{%k\[1-7\]\}\{z\}" 1 } } */
+/* { dg-final { scan-assembler-times "vpalignr\[ \\t\]+\[^\n\]*%xmm\[0-9\]\[^\{\]" 2 } } */
+/* { dg-final { scan-assembler-times "vpalignr\[ \\t\]+\[^\n\]*%xmm\[0-9\]\{%k\[1-7\]\}\[^\{\]" 1 } } */
+/* { dg-final { scan-assembler-times "vpalignr\[ \\t\]+\[^\n\]*%xmm\[0-9\]\{%k\[1-7\]\}\{z\}" 1 } } */
+
+#include <immintrin.h>
+
+volatile __m512i z;
+volatile __m256i y;
+volatile __m128i x;
+volatile __mmask64 m1;
+volatile __mmask32 m2;
+volatile __mmask16 m3;
+
+void extern
+avx512bw_test (void)
+{
+ z = _mm512_alignr_epi8 (z, z, 10);
+ z = _mm512_mask_alignr_epi8 (z, m1, z, z, 10);
+ z = _mm512_maskz_alignr_epi8 (m1, z, z, 10);
+ y = _mm256_mask_alignr_epi8 (y, m2, y, y, 10);
+ y = _mm256_maskz_alignr_epi8 (m2, y, y, 10);
+ x = _mm_mask_alignr_epi8 (x, m3, x, x, 10);
+ x = _mm_maskz_alignr_epi8 (m3, x, x, 10);
+}
diff --git a/gcc/testsuite/gcc.target/i386/avx512bw-vpalignr-2.c b/gcc/testsuite/gcc.target/i386/avx512bw-vpalignr-2.c
new file mode 100644
index 00000000000..4de6e05db79
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/avx512bw-vpalignr-2.c
@@ -0,0 +1,74 @@
+/* { dg-do run } */
+/* { dg-options "-O2 -mavx512bw -DAVX512BW" } */
+/* { dg-require-effective-target avx512bw } */
+
+#include "avx512f-helper.h"
+
+#include <string.h>
+
+#define SIZE (AVX512F_LEN / 8)
+#include "avx512f-mask-type.h"
+
+#define N 0x3
+
+void
+CALC (char *src1, char *src2, char * dst)
+{
+ /* result for EVEX.U1.512 version consists from 4 result block, each of them
+ * has length of 128 bits. */
+ unsigned block_len = 16;
+ unsigned double_block_len = 32;
+ unsigned shift = 0;
+ char buf[double_block_len];
+ char *bout = dst;
+ int bits, i;
+
+ for (bits = 0; bits < AVX512F_LEN; bits += 128)
+ {
+ memcpy (&buf[0], src2 + shift, block_len);
+ memcpy (&buf[block_len], src1 + shift, block_len);
+
+ for (i = 0; i < block_len; i++)
+ /* shift counts larger than 32 produces zero result. */
+ if (N >= 32 || N + i >= 32)
+ bout[i] = 0;
+ else
+ bout[i] = buf[N + i];
+
+ shift += block_len;
+ bout += block_len;
+ }
+}
+
+void
+TEST (void)
+{
+ UNION_TYPE (AVX512F_LEN, i_b) s1, s2, res1, res2, res3;
+ MASK_TYPE mask = MASK_VALUE;
+ char res_ref[SIZE];
+ int i;
+
+ for (i = 0; i < SIZE; i++)
+ {
+ s1.a[i] = i;
+ s2.a[i] = i * 2;
+ res2.a[i] = DEFAULT_VALUE;
+ }
+
+ res1.x = INTRINSIC (_alignr_epi8) (s1.x, s2.x, N);
+ res2.x = INTRINSIC (_mask_alignr_epi8) (res2.x, mask, s1.x, s2.x, N);
+ res3.x = INTRINSIC (_maskz_alignr_epi8) (mask, s1.x, s2.x, N);
+
+ CALC (s1.a, s2.a, res_ref);
+
+ if (UNION_CHECK (AVX512F_LEN, i_b) (res1, res_ref))
+ abort ();
+
+ MASK_MERGE (i_b) (res_ref, mask, SIZE);
+ if (UNION_CHECK (AVX512F_LEN, i_b) (res2, res_ref))
+ abort ();
+
+ MASK_ZERO (i_b) (res_ref, mask, SIZE);
+ if (UNION_CHECK (AVX512F_LEN, i_b) (res3, res_ref))
+ abort ();
+}
diff --git a/gcc/testsuite/gcc.target/i386/avx512bw-vpavgb-1.c b/gcc/testsuite/gcc.target/i386/avx512bw-vpavgb-1.c
new file mode 100644
index 00000000000..266a1bfa012
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/avx512bw-vpavgb-1.c
@@ -0,0 +1,32 @@
+/* { dg-do compile } */
+/* { dg-options "-mavx512bw -mavx512vl -O2" } */
+/* { dg-final { scan-assembler-times "vpavgb\[ \\t\]+\[^\n\]*%zmm\[0-9\]\[^\{\]" 3 } } */
+/* { dg-final { scan-assembler-times "vpavgb\[ \\t\]+\[^\n\]*%zmm\[0-9\]\{%k\[1-7\]\}\[^\{\]" 1 } } */
+/* { dg-final { scan-assembler-times "vpavgb\[ \\t\]+\[^\n\]*%zmm\[0-9\]\{%k\[1-7\]\}\{z\}" 1 } } */
+/* { dg-final { scan-assembler-times "vpavgb\[ \\t\]+\[^\n\]*%ymm\[0-9\]\[^\{\]" 2 } } */
+/* { dg-final { scan-assembler-times "vpavgb\[ \\t\]+\[^\n\]*%ymm\[0-9\]\{%k\[1-7\]\}\[^\{\]" 1 } } */
+/* { dg-final { scan-assembler-times "vpavgb\[ \\t\]+\[^\n\]*%ymm\[0-9\]\{%k\[1-7\]\}\{z\}" 1 } } */
+/* { dg-final { scan-assembler-times "vpavgb\[ \\t\]+\[^\n\]*%xmm\[0-9\]\[^\{\]" 2 } } */
+/* { dg-final { scan-assembler-times "vpavgb\[ \\t\]+\[^\n\]*%xmm\[0-9\]\{%k\[1-7\]\}\[^\{\]" 1 } } */
+/* { dg-final { scan-assembler-times "vpavgb\[ \\t\]+\[^\n\]*%xmm\[0-9\]\{%k\[1-7\]\}\{z\}" 1 } } */
+
+#include <immintrin.h>
+
+volatile __m128i x;
+volatile __m256i y;
+volatile __m512i z;
+volatile __mmask64 m1;
+volatile __mmask32 m2;
+volatile __mmask16 m3;
+
+void extern
+avx512bw_test (void)
+{
+ z = _mm512_avg_epu8 (z, z);
+ z = _mm512_mask_avg_epu8 (z, m1, z, z);
+ z = _mm512_maskz_avg_epu8 (m1, z, z);
+ y = _mm256_mask_avg_epu8 (y, m2, y, y);
+ y = _mm256_maskz_avg_epu8 (m2, y, y);
+ x = _mm_mask_avg_epu8 (x, m3, x, x);
+ x = _mm_maskz_avg_epu8 (m3, x, x);
+}
diff --git a/gcc/testsuite/gcc.target/i386/avx512bw-vpavgb-2.c b/gcc/testsuite/gcc.target/i386/avx512bw-vpavgb-2.c
new file mode 100644
index 00000000000..2dabd719478
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/avx512bw-vpavgb-2.c
@@ -0,0 +1,51 @@
+/* { dg-do run } */
+/* { dg-options "-O2 -mavx512bw -DAVX512BW" } */
+/* { dg-require-effective-target avx512bw } */
+
+#include "avx512f-helper.h"
+
+#define SIZE (AVX512F_LEN / 8)
+#include "avx512f-mask-type.h"
+
+void
+CALC (char *s1, char *s2, char *r)
+{
+ int i;
+
+ for (i = 0; i < SIZE; i++)
+ r[i] = ((unsigned char) s1[i] +
+ (unsigned char) s2[i] + 1) >> 1;
+}
+
+void
+TEST (void)
+{
+ UNION_TYPE (AVX512F_LEN, i_b) s1, s2, res1, res2 ,res3;
+ MASK_TYPE mask = MASK_VALUE;
+ char res_ref[SIZE];
+ int i;
+
+ for (i = 0; i < SIZE; i++)
+ {
+ s1.a[i] = i + 15;
+ s2.a[i] = i + 14;
+ res2.a[i] = DEFAULT_VALUE;
+ }
+
+ res1.x = INTRINSIC (_avg_epu8) (s1.x, s2.x);
+ res2.x = INTRINSIC (_mask_avg_epu8) (res2.x, mask, s1.x, s2.x);
+ res3.x = INTRINSIC (_maskz_avg_epu8) (mask, s1.x, s2.x);
+
+ CALC (s1.a, s2.a, res_ref);
+
+ if (UNION_CHECK (AVX512F_LEN, i_b) (res1, res_ref))
+ abort ();
+
+ MASK_MERGE (i_b) (res_ref, mask, SIZE);
+ if (UNION_CHECK (AVX512F_LEN, i_b) (res2, res_ref))
+ abort ();
+
+ MASK_ZERO (i_b) (res_ref, mask, SIZE);
+ if (UNION_CHECK (AVX512F_LEN, i_b) (res3, res_ref))
+ abort ();
+}
diff --git a/gcc/testsuite/gcc.target/i386/avx512bw-vpavgw-1.c b/gcc/testsuite/gcc.target/i386/avx512bw-vpavgw-1.c
new file mode 100644
index 00000000000..3b00784bbad
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/avx512bw-vpavgw-1.c
@@ -0,0 +1,32 @@
+/* { dg-do compile } */
+/* { dg-options "-mavx512bw -mavx512vl -O2" } */
+/* { dg-final { scan-assembler-times "vpavgw\[ \\t\]+\[^\n\]*%zmm\[0-9\]\[^\{\]" 3 } } */
+/* { dg-final { scan-assembler-times "vpavgw\[ \\t\]+\[^\n\]*%zmm\[0-9\]\{%k\[1-7\]\}\[^\{\]" 1 } } */
+/* { dg-final { scan-assembler-times "vpavgw\[ \\t\]+\[^\n\]*%zmm\[0-9\]\{%k\[1-7\]\}\{z\}" 1 } } */
+/* { dg-final { scan-assembler-times "vpavgw\[ \\t\]+\[^\n\]*%ymm\[0-9\]\[^\{\]" 2 } } */
+/* { dg-final { scan-assembler-times "vpavgw\[ \\t\]+\[^\n\]*%ymm\[0-9\]\{%k\[1-7\]\}\[^\{\]" 1 } } */
+/* { dg-final { scan-assembler-times "vpavgw\[ \\t\]+\[^\n\]*%ymm\[0-9\]\{%k\[1-7\]\}\{z\}" 1 } } */
+/* { dg-final { scan-assembler-times "vpavgw\[ \\t\]+\[^\n\]*%xmm\[0-9\]\[^\{\]" 2 } } */
+/* { dg-final { scan-assembler-times "vpavgw\[ \\t\]+\[^\n\]*%xmm\[0-9\]\{%k\[1-7\]\}\[^\{\]" 1 } } */
+/* { dg-final { scan-assembler-times "vpavgw\[ \\t\]+\[^\n\]*%xmm\[0-9\]\{%k\[1-7\]\}\{z\}" 1 } } */
+
+#include <immintrin.h>
+
+volatile __m128i x;
+volatile __m256i y;
+volatile __m512i z;
+volatile __mmask32 m1;
+volatile __mmask16 m2;
+volatile __mmask8 m3;
+
+void extern
+avx512bw_test (void)
+{
+ z = _mm512_avg_epu16 (z, z);
+ z = _mm512_mask_avg_epu16 (z, m1, z, z);
+ z = _mm512_maskz_avg_epu16 (m1, z, z);
+ y = _mm256_mask_avg_epu16 (y, m2, y, y);
+ y = _mm256_maskz_avg_epu16 (m2, y, y);
+ x = _mm_mask_avg_epu16 (x, m3, x, x);
+ x = _mm_maskz_avg_epu16 (m3, x, x);
+}
diff --git a/gcc/testsuite/gcc.target/i386/avx512bw-vpavgw-2.c b/gcc/testsuite/gcc.target/i386/avx512bw-vpavgw-2.c
new file mode 100644
index 00000000000..51496865d64
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/avx512bw-vpavgw-2.c
@@ -0,0 +1,51 @@
+/* { dg-do run } */
+/* { dg-options "-O2 -mavx512bw -DAVX512BW" } */
+/* { dg-require-effective-target avx512bw } */
+
+#include "avx512f-helper.h"
+
+#define SIZE (AVX512F_LEN / 16)
+#include "avx512f-mask-type.h"
+
+void
+CALC (short *s1, short *s2, short *r)
+{
+ int i;
+
+ for (i = 0; i < SIZE; i++)
+ r[i] = ((unsigned short) s1[i] +
+ (unsigned short) s2[i] + 1) >> 1;
+}
+
+void
+TEST (void)
+{
+ UNION_TYPE (AVX512F_LEN, i_w) s1, s2, res1, res2 ,res3;
+ MASK_TYPE mask = MASK_VALUE;
+ short res_ref[SIZE];
+ int i;
+
+ for (i = 0; i < SIZE; i++)
+ {
+ s1.a[i] = i + 15;
+ s2.a[i] = i + 14;
+ res2.a[i] = DEFAULT_VALUE;
+ }
+
+ res1.x = INTRINSIC (_avg_epu16) (s1.x, s2.x);
+ res2.x = INTRINSIC (_mask_avg_epu16) (res2.x, mask, s1.x, s2.x);
+ res3.x = INTRINSIC (_maskz_avg_epu16) (mask, s1.x, s2.x);
+
+ CALC (s1.a, s2.a, res_ref);
+
+ if (UNION_CHECK (AVX512F_LEN, i_w) (res1, res_ref))
+ abort ();
+
+ MASK_MERGE (i_w) (res_ref, mask, SIZE);
+ if (UNION_CHECK (AVX512F_LEN, i_w) (res2, res_ref))
+ abort ();
+
+ MASK_ZERO (i_w) (res_ref, mask, SIZE);
+ if (UNION_CHECK (AVX512F_LEN, i_w) (res3, res_ref))
+ abort ();
+}
diff --git a/gcc/testsuite/gcc.target/i386/avx512bw-vpblendmb-1.c b/gcc/testsuite/gcc.target/i386/avx512bw-vpblendmb-1.c
new file mode 100644
index 00000000000..d001c27c388
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/avx512bw-vpblendmb-1.c
@@ -0,0 +1,20 @@
+/* { dg-do compile } */
+/* { dg-options "-mavx512bw -mavx512vl -O2" } */
+/* { dg-final { scan-assembler "(vpblendmb|vmovdqu8)\[ \\t\]+\[^\n\]*%zmm\[0-9\]\{%k\[1-7\]\}" } } */
+/* { dg-final { scan-assembler "(vpblendmb|vmovdqu8)\[ \\t\]+\[^\n\]*%ymm\[0-9\]\{%k\[1-7\]\}" } } */
+/* { dg-final { scan-assembler "(vpblendmb|vmovdqu8)\[ \\t\]+\[^\n\]*%xmm\[0-9\]\{%k\[1-7\]\}" } } */
+
+#include <immintrin.h>
+
+volatile __m256i x;
+volatile __m128i xx;
+volatile __m512i xxx;
+volatile __mmask8 m;
+
+void extern
+avx512bw_test (void)
+{
+ x = _mm256_mask_blend_epi8 (m, x, x);
+ xx = _mm_mask_blend_epi8 (m, xx, xx);
+ xxx = _mm512_mask_blend_epi8 (m, xxx, xxx);
+}
diff --git a/gcc/testsuite/gcc.target/i386/avx512bw-vpblendmb-2.c b/gcc/testsuite/gcc.target/i386/avx512bw-vpblendmb-2.c
new file mode 100644
index 00000000000..738093cdb78
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/avx512bw-vpblendmb-2.c
@@ -0,0 +1,41 @@
+/* { dg-do run } */
+/* { dg-options "-O2 -mavx512bw -DAVX512BW" } */
+/* { dg-require-effective-target avx512bw } */
+
+#include "avx512f-helper.h"
+
+#define SIZE (AVX512F_LEN / 8)
+#include "avx512f-mask-type.h"
+
+CALC (char *r, char *s1, char *s2, MASK_TYPE mask)
+{
+ int i;
+ for (i = 0; i < SIZE; i++)
+ {
+ r[i] = (mask & (1LL << i)) ? s2[i] : s1[i];
+ }
+}
+
+void
+TEST (void)
+{
+ int i, sign;
+ UNION_TYPE (AVX512F_LEN, i_b) res1, src1, src2;
+ MASK_TYPE mask = MASK_VALUE;
+ char res_ref[SIZE];
+
+ sign = -1;
+ for (i = 0; i < SIZE; i++)
+ {
+ src1.a[i] = 15 + 46 * i * sign;
+ src2.a[i] = -22 + i * sign;
+ sign = sign * -1;
+ }
+
+ res1.x = INTRINSIC (_mask_blend_epi8) (mask, src1.x, src2.x);
+
+ CALC (res_ref, src1.a, src2.a, mask);
+
+ if (UNION_CHECK (AVX512F_LEN, i_b) (res1, res_ref))
+ abort ();
+}
diff --git a/gcc/testsuite/gcc.target/i386/avx512bw-vpblendmw-1.c b/gcc/testsuite/gcc.target/i386/avx512bw-vpblendmw-1.c
new file mode 100644
index 00000000000..65431cdea41
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/avx512bw-vpblendmw-1.c
@@ -0,0 +1,20 @@
+/* { dg-do compile } */
+/* { dg-options "-mavx512bw -mavx512vl -O2" } */
+/* { dg-final { scan-assembler "(vpblendmw|vmovdqu16)\[ \\t\]+\[^\n\]*%zmm\[0-9\]\{%k\[1-7\]\}" } } */
+/* { dg-final { scan-assembler "(vpblendmw|vmovdqu16)\[ \\t\]+\[^\n\]*%ymm\[0-9\]\{%k\[1-7\]\}" } } */
+/* { dg-final { scan-assembler "(vpblendmw|vmovdqu16)\[ \\t\]+\[^\n\]*%xmm\[0-9\]\{%k\[1-7\]\}" } } */
+
+#include <immintrin.h>
+
+volatile __m256i x;
+volatile __m128i xx;
+volatile __m512i xxx;
+volatile __mmask8 m;
+
+void extern
+avx512bw_test (void)
+{
+ x = _mm256_mask_blend_epi16 (m, x, x);
+ xx = _mm_mask_blend_epi16 (m, xx, xx);
+ xxx = _mm512_mask_blend_epi16 (m, xxx, xxx);
+}
diff --git a/gcc/testsuite/gcc.target/i386/avx512bw-vpblendmw-2.c b/gcc/testsuite/gcc.target/i386/avx512bw-vpblendmw-2.c
new file mode 100644
index 00000000000..1877e6b9b65
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/avx512bw-vpblendmw-2.c
@@ -0,0 +1,41 @@
+/* { dg-do run } */
+/* { dg-options "-O2 -mavx512bw -DAVX512BW" } */
+/* { dg-require-effective-target avx512bw } */
+
+#include "avx512f-helper.h"
+
+#define SIZE (AVX512F_LEN / 16)
+#include "avx512f-mask-type.h"
+
+CALC (short *r, short *s1, short *s2, MASK_TYPE mask)
+{
+ int i;
+ for (i = 0; i < SIZE; i++)
+ {
+ r[i] = (mask & (1 << i)) ? s2[i] : s1[i];
+ }
+}
+
+void
+TEST (void)
+{
+ int i, sign;
+ UNION_TYPE (AVX512F_LEN, i_w) res1, src1, src2;
+ MASK_TYPE mask = MASK_VALUE;
+ short res_ref[SIZE];
+
+ sign = -1;
+ for (i = 0; i < SIZE; i++)
+ {
+ src1.a[i] = 15 + 3467 * i * sign;
+ src2.a[i] = -2217 * i * sign;
+ sign = sign * -1;
+ }
+
+ res1.x = INTRINSIC (_mask_blend_epi16) (mask, src1.x, src2.x);
+
+ CALC (res_ref, src1.a, src2.a, mask);
+
+ if (UNION_CHECK (AVX512F_LEN, i_w) (res1, res_ref))
+ abort ();
+}
diff --git a/gcc/testsuite/gcc.target/i386/avx512bw-vpbroadcastb-1.c b/gcc/testsuite/gcc.target/i386/avx512bw-vpbroadcastb-1.c
new file mode 100644
index 00000000000..1880105a701
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/avx512bw-vpbroadcastb-1.c
@@ -0,0 +1,39 @@
+/* { dg-do compile } */
+/* { dg-options "-mavx512bw -mavx512vl -O2" } */
+/* { dg-final { scan-assembler-times "vpbroadcastb\[ \\t\]+\[^\n\]*%zmm\[0-9\]\[^\{\]" 2 } } */
+/* { dg-final { scan-assembler-times "vpbroadcastb\[ \\t\]+\[^\n\]*%zmm\[0-9\]\{%k\[1-7\]\}\[^\{\]" 2 } } */
+/* { dg-final { scan-assembler-times "vpbroadcastb\[ \\t\]+\[^\n\]*%zmm\[0-9\]\{%k\[1-7\]\}\{z\}" 2 } } */
+/* { dg-final { scan-assembler-times "vpbroadcastb\[ \\t\]+\[^\n\]*%ymm\[0-9\]\{%k\[1-7\]\}\[^\{\]" 2 } } */
+/* { dg-final { scan-assembler-times "vpbroadcastb\[ \\t\]+\[^\n\]*%ymm\[0-9\]\{%k\[1-7\]\}\{z\}" 2 } } */
+/* { dg-final { scan-assembler-times "vpbroadcastb\[ \\t\]+\[^\n\]*%xmm\[0-9\]\{%k\[1-7\]\}\[^\{\]" 2 } } */
+/* { dg-final { scan-assembler-times "vpbroadcastb\[ \\t\]+\[^\n\]*%xmm\[0-9\]\{%k\[1-7\]\}\{z\}" 2 } } */
+
+#include <immintrin.h>
+
+volatile __m512i x;
+volatile __m256i y;
+volatile __m128i z;
+volatile char w;
+volatile __mmask64 mx;
+volatile __mmask32 my;
+volatile __mmask16 mz;
+
+void extern
+avx512bw_test (void)
+{
+ x = _mm512_broadcastb_epi8 (z);
+ x = _mm512_mask_broadcastb_epi8 (x, mx, z);
+ x = _mm512_maskz_broadcastb_epi8 (mx, z);
+ y = _mm256_mask_broadcastb_epi8 (y, my, z);
+ y = _mm256_maskz_broadcastb_epi8 (my, z);
+ z = _mm_mask_broadcastb_epi8 (z, mz, z);
+ z = _mm_maskz_broadcastb_epi8 (mz, z);
+
+ x = _mm512_set1_epi8 (w);
+ x = _mm512_mask_set1_epi8 (x, mx, w);
+ x = _mm512_maskz_set1_epi8 (mx, w);
+ y = _mm256_mask_set1_epi8 (y, my, w);
+ y = _mm256_maskz_set1_epi8 (my, w);
+ z = _mm_mask_set1_epi8 (z, mz, w);
+ z = _mm_maskz_set1_epi8 (mz, w);
+}
diff --git a/gcc/testsuite/gcc.target/i386/avx512bw-vpbroadcastb-2.c b/gcc/testsuite/gcc.target/i386/avx512bw-vpbroadcastb-2.c
new file mode 100644
index 00000000000..e7f2cab7c41
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/avx512bw-vpbroadcastb-2.c
@@ -0,0 +1,76 @@
+/* { dg-do run } */
+/* { dg-options "-O2 -mavx512bw -DAVX512BW" } */
+/* { dg-require-effective-target avx512bw } */
+
+#include "avx512f-helper.h"
+
+#define SIZE (AVX512F_LEN / 8)
+#include "avx512f-mask-type.h"
+
+CALC (char *r, char *s)
+{
+ int i;
+ for (i = 0; i < SIZE; i++)
+ {
+ r[i] = s[0];
+ }
+}
+
+void
+TEST (void)
+{
+ int i, sign;
+ UNION_TYPE (AVX512F_LEN, i_b) res1, res2, res3;
+ UNION_TYPE (128, i_b) src;
+ MASK_TYPE mask = SIZE | 123;
+ char res_ref[SIZE];
+
+ sign = -1;
+ for (i = 0; i < 16; i++)
+ {
+ src.a[i] = 1 + 3 * i * sign;
+ sign = sign * -1;
+ }
+ for (i = 0; i < SIZE; i++)
+ res2.a[i] = DEFAULT_VALUE;
+
+ CALC (res_ref, src.a);
+
+ if (AVX512F_LEN == 512)
+ {
+ res1.x = INTRINSIC (_broadcastb_epi8) (src.x);
+ if (UNION_CHECK (AVX512F_LEN, i_b) (res1, res_ref))
+ abort ();
+ }
+
+ res2.x = INTRINSIC (_mask_broadcastb_epi8) (res2.x, mask, src.x);
+ res3.x = INTRINSIC (_maskz_broadcastb_epi8) (mask, src.x);
+
+ MASK_MERGE (i_b) (res_ref, mask, SIZE);
+ if (UNION_CHECK (AVX512F_LEN, i_b) (res2, res_ref))
+ abort ();
+
+ MASK_ZERO (i_b) (res_ref, mask, SIZE);
+ if (UNION_CHECK (AVX512F_LEN, i_b) (res3, res_ref))
+ abort ();
+
+ CALC (res_ref, src.a);
+
+ if (AVX512F_LEN == 512)
+ {
+ res1.x = INTRINSIC (_set1_epi8) (src.a[0]);
+ if (UNION_CHECK (AVX512F_LEN, i_b) (res1, res_ref))
+ abort ();
+ }
+
+ res2.x = INTRINSIC (_mask_set1_epi8) (res2.x, mask, src.a[0]);
+ res3.x = INTRINSIC (_maskz_set1_epi8) (mask, src.a[0]);
+
+ MASK_MERGE (i_b) (res_ref, mask, SIZE);
+ if (UNION_CHECK (AVX512F_LEN, i_b) (res2, res_ref))
+ abort ();
+
+ MASK_ZERO (i_b) (res_ref, mask, SIZE);
+ if (UNION_CHECK (AVX512F_LEN, i_b) (res3, res_ref))
+ abort ();
+}
diff --git a/gcc/testsuite/gcc.target/i386/avx512bw-vpbroadcastw-1.c b/gcc/testsuite/gcc.target/i386/avx512bw-vpbroadcastw-1.c
new file mode 100644
index 00000000000..5255f4f0757
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/avx512bw-vpbroadcastw-1.c
@@ -0,0 +1,39 @@
+/* { dg-do compile } */
+/* { dg-options "-mavx512bw -mavx512vl -O2" } */
+/* { dg-final { scan-assembler-times "vpbroadcastw\[ \\t\]+\[^\n\]*%zmm\[0-9\]\[^\{\]" 2 } } */
+/* { dg-final { scan-assembler-times "vpbroadcastw\[ \\t\]+\[^\n\]*%zmm\[0-9\]\{%k\[1-7\]\}\[^\{\]" 2 } } */
+/* { dg-final { scan-assembler-times "vpbroadcastw\[ \\t\]+\[^\n\]*%zmm\[0-9\]\{%k\[1-7\]\}\{z\}" 2 } } */
+/* { dg-final { scan-assembler-times "vpbroadcastw\[ \\t\]+\[^\n\]*%ymm\[0-9\]\{%k\[1-7\]\}\[^\{\]" 2 } } */
+/* { dg-final { scan-assembler-times "vpbroadcastw\[ \\t\]+\[^\n\]*%ymm\[0-9\]\{%k\[1-7\]\}\{z\}" 2 } } */
+/* { dg-final { scan-assembler-times "vpbroadcastw\[ \\t\]+\[^\n\]*%xmm\[0-9\]\{%k\[1-7\]\}\[^\{\]" 2 } } */
+/* { dg-final { scan-assembler-times "vpbroadcastw\[ \\t\]+\[^\n\]*%xmm\[0-9\]\{%k\[1-7\]\}\{z\}" 2 } } */
+
+#include <immintrin.h>
+
+volatile __m512i x;
+volatile __m256i y;
+volatile __m128i z;
+volatile short w;
+volatile __mmask32 mx;
+volatile __mmask16 my;
+volatile __mmask8 mz;
+
+void extern
+avx512bw_test (void)
+{
+ x = _mm512_broadcastw_epi16 (z);
+ x = _mm512_mask_broadcastw_epi16 (x, mx, z);
+ x = _mm512_maskz_broadcastw_epi16 (mx, z);
+ y = _mm256_mask_broadcastw_epi16 (y, my, z);
+ y = _mm256_maskz_broadcastw_epi16 (my, z);
+ z = _mm_mask_broadcastw_epi16 (z, mz, z);
+ z = _mm_maskz_broadcastw_epi16 (mz, z);
+
+ x = _mm512_set1_epi16 (w);
+ x = _mm512_mask_set1_epi16 (x, mx, w);
+ x = _mm512_maskz_set1_epi16 (mx, w);
+ y = _mm256_mask_set1_epi16 (y, my, w);
+ y = _mm256_maskz_set1_epi16 (my, w);
+ z = _mm_mask_set1_epi16 (z, mz, w);
+ z = _mm_maskz_set1_epi16 (mz, w);
+}
diff --git a/gcc/testsuite/gcc.target/i386/avx512bw-vpbroadcastw-2.c b/gcc/testsuite/gcc.target/i386/avx512bw-vpbroadcastw-2.c
new file mode 100644
index 00000000000..238d358f87a
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/avx512bw-vpbroadcastw-2.c
@@ -0,0 +1,76 @@
+/* { dg-do run } */
+/* { dg-options "-O2 -mavx512bw -DAVX512BW" } */
+/* { dg-require-effective-target avx512bw } */
+
+#include "avx512f-helper.h"
+
+#define SIZE (AVX512F_LEN / 16)
+#include "avx512f-mask-type.h"
+
+CALC (short *r, short *s)
+{
+ int i;
+ for (i = 0; i < SIZE; i++)
+ {
+ r[i] = s[0];
+ }
+}
+
+void
+TEST (void)
+{
+ int i, sign;
+ UNION_TYPE (AVX512F_LEN, i_w) res1, res2, res3;
+ UNION_TYPE (128, i_w) src;
+ MASK_TYPE mask = SIZE | 123;
+ short res_ref[SIZE];
+
+ sign = -1;
+ for (i = 0; i < 8; i++)
+ {
+ src.a[i] = 1 + 3 * i * sign;
+ sign = sign * -1;
+ }
+ for (i = 0; i < SIZE; i++)
+ res2.a[i] = DEFAULT_VALUE;
+
+ CALC (res_ref, src.a);
+
+ if (AVX512F_LEN == 512)
+ {
+ res1.x = INTRINSIC (_broadcastw_epi16) (src.x);
+ if (UNION_CHECK (AVX512F_LEN, i_w) (res1, res_ref))
+ abort ();
+ }
+
+ res2.x = INTRINSIC (_mask_broadcastw_epi16) (res2.x, mask, src.x);
+ res3.x = INTRINSIC (_maskz_broadcastw_epi16) (mask, src.x);
+
+ MASK_MERGE (i_w) (res_ref, mask, SIZE);
+ if (UNION_CHECK (AVX512F_LEN, i_w) (res2, res_ref))
+ abort ();
+
+ MASK_ZERO (i_w) (res_ref, mask, SIZE);
+ if (UNION_CHECK (AVX512F_LEN, i_w) (res3, res_ref))
+ abort ();
+
+ CALC (res_ref, src.a);
+
+ if (AVX512F_LEN == 512)
+ {
+ res1.x = INTRINSIC (_set1_epi16) (src.a[0]);
+ if (UNION_CHECK (AVX512F_LEN, i_w) (res1, res_ref))
+ abort ();
+ }
+
+ res2.x = INTRINSIC (_mask_set1_epi16) (res2.x, mask, src.a[0]);
+ res3.x = INTRINSIC (_maskz_set1_epi16) (mask, src.a[0]);
+
+ MASK_MERGE (i_w) (res_ref, mask, SIZE);
+ if (UNION_CHECK (AVX512F_LEN, i_w) (res2, res_ref))
+ abort ();
+
+ MASK_ZERO (i_w) (res_ref, mask, SIZE);
+ if (UNION_CHECK (AVX512F_LEN, i_w) (res3, res_ref))
+ abort ();
+}
diff --git a/gcc/testsuite/gcc.target/i386/avx512bw-vpcmpb-1.c b/gcc/testsuite/gcc.target/i386/avx512bw-vpcmpb-1.c
new file mode 100644
index 00000000000..6a76a7cd5ea
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/avx512bw-vpcmpb-1.c
@@ -0,0 +1,28 @@
+/* { dg-do compile } */
+/* { dg-options "-O2 -mavx512bw -mavx512vl" } */
+/* { dg-final { scan-assembler "vpcmpb\[ \\t\]+\[^\n\]*%zmm\[0-9\]\[^\n^k\]*%k\[1-7\]\[^\{\]" } } */
+/* { dg-final { scan-assembler "vpcmpb\[ \\t\]+\[^\n\]*%zmm\[0-9\]\[^\n^k\]*%k\[1-7\]\{" } } */
+/* { dg-final { scan-assembler "vpcmpb\[ \\t\]+\[^\n\]*%ymm\[0-9\]\[^\n^k\]*%k\[1-7\]\[^\{\]" } } */
+/* { dg-final { scan-assembler "vpcmpb\[ \\t\]+\[^\n\]*%ymm\[0-9\]\[^\n^k\]*%k\[1-7\]\{" } } */
+/* { dg-final { scan-assembler "vpcmpb\[ \\t\]+\[^\n\]*%xmm\[0-9\]\[^\n^k\]*%k\[1-7\]\[^\{\]" } } */
+/* { dg-final { scan-assembler "vpcmpb\[ \\t\]+\[^\n\]*%xmm\[0-9\]\[^\n^k\]*%k\[1-7\]\{" } } */
+
+#include <immintrin.h>
+
+volatile __m512i xq;
+volatile __m256i x;
+volatile __m128i xx;
+volatile __mmask16 m;
+volatile __mmask32 mm;
+volatile __mmask64 mmm;
+
+void extern
+avx512bw_test (void)
+{
+ mmm = _mm512_cmp_epi8_mask (xq, xq, _MM_CMPINT_GE);
+ mmm = _mm512_mask_cmp_epi8_mask (m, xq, xq, _MM_CMPINT_NLE);
+ mm = _mm256_cmp_epi8_mask (x, x, _MM_CMPINT_GT);
+ mm = _mm256_mask_cmp_epi8_mask (m, x, x, _MM_CMPINT_EQ);
+ m = _mm_cmp_epi8_mask (xx, xx, _MM_CMPINT_LT);
+ m = _mm_mask_cmp_epi8_mask (m, xx, xx, _MM_CMPINT_LE);
+}
diff --git a/gcc/testsuite/gcc.target/i386/avx512bw-vpcmpb-2.c b/gcc/testsuite/gcc.target/i386/avx512bw-vpcmpb-2.c
new file mode 100644
index 00000000000..8b0c541a902
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/avx512bw-vpcmpb-2.c
@@ -0,0 +1,107 @@
+/* { dg-do run } */
+/* { dg-options "-O2 -mavx512bw -DAVX512BW" } */
+/* { dg-require-effective-target avx512bw } */
+
+#include "avx512f-helper.h"
+
+#include <math.h>
+#define SIZE (AVX512F_LEN / 16)
+#include "avx512f-mask-type.h"
+
+#if AVX512F_LEN == 512
+#undef CMP
+#define CMP(imm, rel) \
+ dst_ref = 0; \
+ for (i = 0; i < 64; i++) \
+ { \
+ dst_ref = ((rel) << i) | dst_ref; \
+ } \
+ source1.x = _mm512_loadu_si512 (s1); \
+ source2.x = _mm512_loadu_si512 (s2); \
+ dst1 = _mm512_cmp_epi8_mask (source1.x, source2.x, imm);\
+ dst2 = _mm512_mask_cmp_epi8_mask (mask, source1.x, source2.x, imm);\
+ if (dst_ref != dst1) abort(); \
+ if ((mask & dst_ref) != dst2) abort();
+#endif
+
+#if AVX512F_LEN == 256
+#undef CMP
+#define CMP(imm, rel) \
+ dst_ref = 0; \
+ for (i = 0; i < 32; i++) \
+ { \
+ dst_ref = ((rel) << i) | dst_ref; \
+ } \
+ source1.x = _mm256_loadu_si256 ((__m256i*)s1); \
+ source2.x = _mm256_loadu_si256 ((__m256i*)s2); \
+ dst1 = _mm256_cmp_epi8_mask (source1.x, source2.x, imm);\
+ dst2 = _mm256_mask_cmp_epi8_mask (mask, source1.x, source2.x, imm);\
+ if (dst_ref != dst1) abort(); \
+ if ((mask & dst_ref) != dst2) abort();
+#endif
+
+#if AVX512F_LEN == 128
+#undef CMP
+#define CMP(imm, rel) \
+ dst_ref = 0; \
+ for (i = 0; i < 16; i++) \
+ { \
+ dst_ref = ((rel) << i) | dst_ref; \
+ } \
+ source1.x = _mm_loadu_si128 ((__m128i*)s1); \
+ source2.x = _mm_loadu_si128 ((__m128i*)s2); \
+ dst1 = _mm_cmp_epi8_mask (source1.x, source2.x, imm);\
+ dst2 = _mm_mask_cmp_epi8_mask (mask, source1.x, source2.x, imm);\
+ if (dst_ref != dst1) abort(); \
+ if ((mask & dst_ref) != dst2) abort();
+#endif
+
+void
+TEST ()
+{
+ char s1[64] = {34, 78, 53, 64,
+ 1, 57, 11, 231,
+ 14, 45, 71, 75,
+ 55, 66, 21, 73,
+ 34, 68, 3, 56,
+ 1, 57, 111, 241,
+ 14, 15, 61, 75,
+ 55, 16, 52, 3,
+ 34, 78, 53, 64,
+ 1, 57, 11, 231,
+ 14, 45, 71, 75,
+ 45, 66, 21, 73,
+ 34, 68, 3, 56,
+ 1, 57, 111, 241,
+ 14, 15, 61, 75,
+ 55, 16, 52, 3};
+ char s2[64] = {4, 68, 86, 8,
+ 1, 46, 1, 1,
+ 45, 67, 36, 3,
+ 4, 39, 56, 56,
+ 124, 78, 53, 56,
+ 1, 46, 1, 12,
+ 45, 47, 36, 13,
+ 4, 35, 56, 67,
+ 4, 68, 86, 8,
+ 1, 46, 1, 1,
+ 45, 67, 36, 3,
+ 4, 39, 56, 56,
+ 124, 78, 53, 56,
+ 1, 46, 1, 12,
+ 45, 47, 36, 13,
+ 4, 35, 56, 67};
+ UNION_TYPE (AVX512F_LEN, i_b) source1, source2;
+ MASK_TYPE dst1, dst2, dst_ref;
+ MASK_TYPE mask = MASK_VALUE;
+ int i;
+
+ CMP(0x00, s1[i] == s2[i]);
+ CMP(0x01, s1[i] < s2[i]);
+ CMP(0x02, s1[i] <= s2[i]);
+ CMP(0x03, 0);
+ CMP(0x04, s1[i] != s2[i]);
+ CMP(0x05, s1[i] >= s2[i]);
+ CMP(0x06, s1[i] > s2[i]);
+ CMP(0x07, 1);
+}
diff --git a/gcc/testsuite/gcc.target/i386/avx512bw-vpcmpeqb-1.c b/gcc/testsuite/gcc.target/i386/avx512bw-vpcmpeqb-1.c
new file mode 100644
index 00000000000..732a951a4b4
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/avx512bw-vpcmpeqb-1.c
@@ -0,0 +1,28 @@
+/* { dg-do compile } */
+/* { dg-options "-mavx512bw -mavx512vl -O2" } */
+/* { dg-final { scan-assembler "vpcmpeqb\[ \\t\]+\[^\n\]*%zmm\[0-9\]\[^\n\]*%k\[1-7\]\[\{\]" } } */
+/* { dg-final { scan-assembler "vpcmpeqb\[ \\t\]+\[^\n\]*%ymm\[0-9\]\[^\n\]*%k\[1-7\]\[\{\]" } } */
+/* { dg-final { scan-assembler "vpcmpeqb\[ \\t\]+\[^\n\]*%xmm\[0-9\]\[^\n\]*%k\[1-7\]\[\{\]" } } */
+/* { dg-final { scan-assembler "vpcmpeqb\[ \\t\]+\[^\n\]*%zmm\[0-9\]\[^\n\]*%k\[1-7\]\{%k\[1-7\]\}" } } */
+/* { dg-final { scan-assembler "vpcmpeqb\[ \\t\]+\[^\n\]*%ymm\[0-9\]\[^\n\]*%k\[1-7\]\{%k\[1-7\]\}" } } */
+/* { dg-final { scan-assembler "vpcmpeqb\[ \\t\]+\[^\n\]*%xmm\[0-9\]\[^\n\]*%k\[1-7\]\{%k\[1-7\]\}" } } */
+
+#include <immintrin.h>
+
+volatile __m512i x512;
+volatile __m256i x256;
+volatile __m128i x128;
+volatile __mmask16 m16;
+volatile __mmask32 m32;
+volatile __mmask64 m64;
+
+void extern
+avx512bw_test (void)
+{
+ m16 = _mm_cmpeq_epi8_mask (x128, x128);
+ m32 = _mm256_cmpeq_epi8_mask (x256, x256);
+ m64 = _mm512_cmpeq_epi8_mask (x512, x512);
+ m16 = _mm_mask_cmpeq_epi8_mask (3, x128, x128);
+ m32 = _mm256_mask_cmpeq_epi8_mask (3, x256, x256);
+ m64 = _mm512_mask_cmpeq_epi8_mask (3, x512, x512);
+}
diff --git a/gcc/testsuite/gcc.target/i386/avx512bw-vpcmpeqb-2.c b/gcc/testsuite/gcc.target/i386/avx512bw-vpcmpeqb-2.c
new file mode 100644
index 00000000000..aaef6880631
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/avx512bw-vpcmpeqb-2.c
@@ -0,0 +1,51 @@
+/* { dg-do run } */
+/* { dg-options "-O2 -mavx512bw -DAVX512BW" } */
+/* { dg-require-effective-target avx512bw } */
+
+#include "avx512f-helper.h"
+
+#define SIZE (AVX512F_LEN / 8)
+#include "avx512f-mask-type.h"
+
+CALC (MASK_TYPE *r, char *s1, char *s2)
+{
+ int i;
+ *r = 0;
+ MASK_TYPE one = 1;
+
+ for (i = 0; i < SIZE; i++)
+ if (s1[i] == s2[i])
+ *r = *r | (one << i);
+}
+
+void
+TEST (void)
+{
+ int i;
+ UNION_TYPE (AVX512F_LEN, i_b) src1, src2;
+ MASK_TYPE res_ref, res1, res2;
+ MASK_TYPE mask = MASK_VALUE;
+ res1 = 0;
+ res2 = 0;
+
+ for (i = 0; i < SIZE / 2; i++)
+ {
+ src1.a[i * 2] = i;
+ src1.a[i * 2 + 1] = i * i;
+ src2.a[i * 2] = 2 * i;
+ src2.a[i * 2 + 1] = i * i;
+ }
+
+ res1 = INTRINSIC (_cmpeq_epi8_mask) (src1.x, src2.x);
+ res2 = INTRINSIC (_mask_cmpeq_epi8_mask) (mask, src1.x, src2.x);
+
+ CALC (&res_ref, src1.a, src2.a);
+
+ if (res_ref != res1)
+ abort ();
+
+ res_ref &= mask;
+
+ if (res_ref != res2)
+ abort ();
+}
diff --git a/gcc/testsuite/gcc.target/i386/avx512bw-vpcmpeqw-1.c b/gcc/testsuite/gcc.target/i386/avx512bw-vpcmpeqw-1.c
new file mode 100644
index 00000000000..b77203bfb3f
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/avx512bw-vpcmpeqw-1.c
@@ -0,0 +1,28 @@
+/* { dg-do compile } */
+/* { dg-options "-mavx512bw -mavx512vl -O2" } */
+/* { dg-final { scan-assembler "vpcmpeqw\[ \\t\]+\[^\n\]*%zmm\[0-9\]\[^\n\]*%k\[1-7\]\{%k\[1-7\]\}" } } */
+/* { dg-final { scan-assembler "vpcmpeqw\[ \\t\]+\[^\n\]*%ymm\[0-9\]\[^\n\]*%k\[1-7\]\{%k\[1-7\]\}" } } */
+/* { dg-final { scan-assembler "vpcmpeqw\[ \\t\]+\[^\n\]*%xmm\[0-9\]\[^\n\]*%k\[1-7\]\{%k\[1-7\]\}" } } */
+/* { dg-final { scan-assembler "vpcmpeqw\[ \\t\]+\[^\n\]*%zmm\[0-9\]\[^\n\]*%k\[1-7\]\[^\{\]" } } */
+/* { dg-final { scan-assembler "vpcmpeqw\[ \\t\]+\[^\n\]*%ymm\[0-9\]\[^\n\]*%k\[1-7\]\[^\{\]" } } */
+/* { dg-final { scan-assembler "vpcmpeqw\[ \\t\]+\[^\n\]*%xmm\[0-9\]\[^\n\]*%k\[1-7\]\[^\{\]" } } */
+
+#include <immintrin.h>
+
+volatile __m512i x512;
+volatile __m256i x256;
+volatile __m128i x128;
+volatile __mmask8 m8;
+volatile __mmask16 m16;
+volatile __mmask32 m32;
+
+void extern
+avx512bw_test (void)
+{
+ m8 = _mm_cmpeq_epi16_mask (x128, x128);
+ m8 = _mm_mask_cmpeq_epi16_mask (3, x128, x128);
+ m16 = _mm256_cmpeq_epi16_mask (x256, x256);
+ m16 = _mm256_mask_cmpeq_epi16_mask (3, x256, x256);
+ m32 = _mm512_mask_cmpeq_epi16_mask (3, x512, x512);
+ m32 = _mm512_cmpeq_epi16_mask (x512, x512);
+}
diff --git a/gcc/testsuite/gcc.target/i386/avx512bw-vpcmpeqw-2.c b/gcc/testsuite/gcc.target/i386/avx512bw-vpcmpeqw-2.c
new file mode 100644
index 00000000000..6103e40760a
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/avx512bw-vpcmpeqw-2.c
@@ -0,0 +1,49 @@
+/* { dg-do run } */
+/* { dg-options "-O2 -mavx512bw -DAVX512BW" } */
+/* { dg-require-effective-target avx512bw } */
+
+#include "avx512f-helper.h"
+
+#define SIZE (AVX512F_LEN / 16)
+#include "avx512f-mask-type.h"
+
+CALC (MASK_TYPE *r, short *s1, short *s2)
+{
+ int i;
+ *r = 0;
+ MASK_TYPE one = 1;
+
+ for (i = 0; i < SIZE; i++)
+ if (s1[i] == s2[i])
+ *r = *r | (one << i);
+}
+
+void
+TEST (void)
+{
+ int i;
+ UNION_TYPE (AVX512F_LEN, i_w) src1, src2;
+ MASK_TYPE res_ref, res1, res2;
+ MASK_TYPE mask = MASK_VALUE;
+
+ for (i = 0; i < SIZE / 2; i++)
+ {
+ src1.a[i * 2] = i;
+ src1.a[i * 2 + 1] = i * i;
+ src2.a[i * 2] = 2 * i;
+ src2.a[i * 2 + 1] = i * i;
+ }
+
+ res1 = INTRINSIC (_cmpeq_epi16_mask) (src1.x, src2.x);
+ res2 = INTRINSIC (_mask_cmpeq_epi16_mask) (mask, src1.x, src2.x);
+
+ CALC (&res_ref, src1.a, src2.a);
+
+ if (res_ref != res1)
+ abort ();
+
+ res_ref &= mask;
+
+ if (res_ref != res2)
+ abort ();
+}
diff --git a/gcc/testsuite/gcc.target/i386/avx512bw-vpcmpgtb-1.c b/gcc/testsuite/gcc.target/i386/avx512bw-vpcmpgtb-1.c
new file mode 100644
index 00000000000..1b54de5ec9d
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/avx512bw-vpcmpgtb-1.c
@@ -0,0 +1,28 @@
+/* { dg-do compile } */
+/* { dg-options "-mavx512bw -mavx512vl -O2" } */
+/* { dg-final { scan-assembler "vpcmpgtb\[ \\t\]+\[^\n\]*%zmm\[0-9\]\[^\n\]*%k\[1-7\]\[^\{\]" } } */
+/* { dg-final { scan-assembler "vpcmpgtb\[ \\t\]+\[^\n\]*%ymm\[0-9\]\[^\n\]*%k\[1-7\]\[^\{\]" } } */
+/* { dg-final { scan-assembler "vpcmpgtb\[ \\t\]+\[^\n\]*%xmm\[0-9\]\[^\n\]*%k\[1-7\]\[^\{\]" } } */
+/* { dg-final { scan-assembler "vpcmpgtb\[ \\t\]+\[^\n\]*%zmm\[0-9\]\[^\n\]*%k\[1-7\]\{%k\[1-7\]\}" } } */
+/* { dg-final { scan-assembler "vpcmpgtb\[ \\t\]+\[^\n\]*%ymm\[0-9\]\[^\n\]*%k\[1-7\]\{%k\[1-7\]\}" } } */
+/* { dg-final { scan-assembler "vpcmpgtb\[ \\t\]+\[^\n\]*%xmm\[0-9\]\[^\n\]*%k\[1-7\]\{%k\[1-7\]\}" } } */
+
+#include <immintrin.h>
+
+volatile __m512i x512;
+volatile __m256i x256;
+volatile __m128i x128;
+volatile __mmask16 m16;
+volatile __mmask32 m32;
+volatile __mmask64 m64;
+
+void extern
+avx512bw_test (void)
+{
+ m16 = _mm_cmpgt_epi8_mask (x128, x128);
+ m32 = _mm256_cmpgt_epi8_mask (x256, x256);
+ m64 = _mm512_cmpgt_epi8_mask (x512, x512);
+ m16 = _mm_mask_cmpgt_epi8_mask (3, x128, x128);
+ m32 = _mm256_mask_cmpgt_epi8_mask (3, x256, x256);
+ m64 = _mm512_mask_cmpgt_epi8_mask (3, x512, x512);
+}
diff --git a/gcc/testsuite/gcc.target/i386/avx512bw-vpcmpgtb-2.c b/gcc/testsuite/gcc.target/i386/avx512bw-vpcmpgtb-2.c
new file mode 100644
index 00000000000..1647a17a7bf
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/avx512bw-vpcmpgtb-2.c
@@ -0,0 +1,49 @@
+/* { dg-do run } */
+/* { dg-options "-O2 -mavx512bw -DAVX512BW" } */
+/* { dg-require-effective-target avx512bw } */
+
+#include "avx512f-helper.h"
+
+#define SIZE (AVX512F_LEN / 8)
+#include "avx512f-mask-type.h"
+
+CALC (MASK_TYPE *r, char *s1, char *s2)
+{
+ int i;
+ *r = 0;
+ MASK_TYPE one = 1;
+
+ for (i = 0; i < SIZE; i++)
+ if (s1[i] > s2[i])
+ *r = *r | (one << i);
+}
+
+void
+TEST (void)
+{
+ int i;
+ UNION_TYPE (AVX512F_LEN, i_b) src1, src2;
+ MASK_TYPE res_ref, res1, res2;
+ MASK_TYPE mask = MASK_VALUE;
+
+ for (i = 0; i < SIZE / 2; i++)
+ {
+ src1.a[i * 2] = i;
+ src1.a[i * 2 + 1] = i * i;
+ src2.a[i * 2] = 2 * i;
+ src2.a[i * 2 + 1] = i * i;
+ }
+
+ res1 = INTRINSIC (_cmpgt_epi8_mask) (src1.x, src2.x);
+ res2 = INTRINSIC (_mask_cmpgt_epi8_mask) (mask, src1.x, src2.x);
+
+ CALC (&res_ref, src1.a, src2.a);
+
+ if (res_ref != res1)
+ abort ();
+
+ res_ref &= mask;
+
+ if (res_ref != res2)
+ abort ();
+}
diff --git a/gcc/testsuite/gcc.target/i386/avx512bw-vpcmpgtw-1.c b/gcc/testsuite/gcc.target/i386/avx512bw-vpcmpgtw-1.c
new file mode 100644
index 00000000000..0f2671345fe
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/avx512bw-vpcmpgtw-1.c
@@ -0,0 +1,28 @@
+/* { dg-do compile } */
+/* { dg-options "-mavx512bw -mavx512vl -O2" } */
+/* { dg-final { scan-assembler "vpcmpgtw\[ \\t\]+\[^\n\]*%zmm\[0-9\]\[^\n\]*%k\[1-7\]\{%k\[1-7\]\}" } } */
+/* { dg-final { scan-assembler "vpcmpgtw\[ \\t\]+\[^\n\]*%ymm\[0-9\]\[^\n\]*%k\[1-7\]\{%k\[1-7\]\}" } } */
+/* { dg-final { scan-assembler "vpcmpgtw\[ \\t\]+\[^\n\]*%xmm\[0-9\]\[^\n\]*%k\[1-7\]\{%k\[1-7\]\}" } } */
+/* { dg-final { scan-assembler "vpcmpgtw\[ \\t\]+\[^\n\]*%zmm\[0-9\]\[^\n\]*%k\[1-7\]\[^\{\]" } } */
+/* { dg-final { scan-assembler "vpcmpgtw\[ \\t\]+\[^\n\]*%ymm\[0-9\]\[^\n\]*%k\[1-7\]\[^\{\]" } } */
+/* { dg-final { scan-assembler "vpcmpgtw\[ \\t\]+\[^\n\]*%xmm\[0-9\]\[^\n\]*%k\[1-7\]\[^\{\]" } } */
+
+#include <immintrin.h>
+
+volatile __m512i x512;
+volatile __m256i x256;
+volatile __m128i x128;
+volatile __mmask8 m8;
+volatile __mmask16 m16;
+volatile __mmask32 m32;
+
+void extern
+avx512bw_test (void)
+{
+ m8 = _mm_cmpgt_epi16_mask (x128, x128);
+ m16 = _mm256_cmpgt_epi16_mask (x256, x256);
+ m32 = _mm512_cmpgt_epi16_mask (x512, x512);
+ m8 = _mm_mask_cmpgt_epi16_mask (3, x128, x128);
+ m16 = _mm256_mask_cmpgt_epi16_mask (3, x256, x256);
+ m32 = _mm512_mask_cmpgt_epi16_mask (3, x512, x512);
+}
diff --git a/gcc/testsuite/gcc.target/i386/avx512bw-vpcmpgtw-2.c b/gcc/testsuite/gcc.target/i386/avx512bw-vpcmpgtw-2.c
new file mode 100644
index 00000000000..2c6e8fabf4f
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/avx512bw-vpcmpgtw-2.c
@@ -0,0 +1,49 @@
+/* { dg-do run } */
+/* { dg-options "-O2 -mavx512bw -DAVX512BW" } */
+/* { dg-require-effective-target avx512bw } */
+
+#include "avx512f-helper.h"
+
+#define SIZE (AVX512F_LEN / 16)
+#include "avx512f-mask-type.h"
+
+CALC (MASK_TYPE *r, short *s1, short *s2)
+{
+ int i;
+ *r = 0;
+ MASK_TYPE one = 1;
+
+ for (i = 0; i < SIZE; i++)
+ if (s1[i] > s2[i])
+ *r = *r | (one << i);
+}
+
+void
+TEST (void)
+{
+ int i;
+ UNION_TYPE (AVX512F_LEN, i_w) src1, src2;
+ MASK_TYPE res_ref, res1, res2;
+ MASK_TYPE mask = MASK_VALUE;
+
+ for (i = 0; i < SIZE / 2; i++)
+ {
+ src1.a[i * 2] = i;
+ src1.a[i * 2 + 1] = i * i;
+ src2.a[i * 2] = 2 * i;
+ src2.a[i * 2 + 1] = i * i;
+ }
+
+ res1 = INTRINSIC (_cmpgt_epi16_mask) (src1.x, src2.x);
+ res2 = INTRINSIC (_mask_cmpgt_epi16_mask) (mask, src1.x, src2.x);
+
+ CALC (&res_ref, src1.a, src2.a);
+
+ if (res_ref != res1)
+ abort ();
+
+ res_ref &= mask;
+
+ if (res_ref != res2)
+ abort ();
+}
diff --git a/gcc/testsuite/gcc.target/i386/avx512bw-vpcmpub-1.c b/gcc/testsuite/gcc.target/i386/avx512bw-vpcmpub-1.c
new file mode 100644
index 00000000000..32117ac721e
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/avx512bw-vpcmpub-1.c
@@ -0,0 +1,28 @@
+/* { dg-do compile } */
+/* { dg-options "-O2 -mavx512bw -mavx512vl" } */
+/* { dg-final { scan-assembler "vpcmpub\[ \\t\]+\[^\n\]*%zmm\[0-9\]\[^\n^k\]*%k\[1-7\]\[^\{\]" } } */
+/* { dg-final { scan-assembler "vpcmpub\[ \\t\]+\[^\n\]*%zmm\[0-9\]\[^\n^k\]*%k\[1-7\]\{" } } */
+/* { dg-final { scan-assembler "vpcmpub\[ \\t\]+\[^\n\]*%ymm\[0-9\]\[^\n^k\]*%k\[1-7\]\[^\{\]" } } */
+/* { dg-final { scan-assembler "vpcmpub\[ \\t\]+\[^\n\]*%ymm\[0-9\]\[^\n^k\]*%k\[1-7\]\{" } } */
+/* { dg-final { scan-assembler "vpcmpub\[ \\t\]+\[^\n\]*%xmm\[0-9\]\[^\n^k\]*%k\[1-7\]\[^\{\]" } } */
+/* { dg-final { scan-assembler "vpcmpub\[ \\t\]+\[^\n\]*%xmm\[0-9\]\[^\n^k\]*%k\[1-7\]\{" } } */
+
+#include <immintrin.h>
+
+volatile __m512i xq;
+volatile __m256i x;
+volatile __m128i xx;
+volatile __mmask16 m;
+volatile __mmask32 mm;
+volatile __mmask64 mmm;
+
+void extern
+avx512bw_test (void)
+{
+ mmm = _mm512_cmp_epu8_mask (xq, xq, _MM_CMPINT_EQ);
+ mmm = _mm512_mask_cmp_epu8_mask (m, xq, xq, _MM_CMPINT_LT);
+ mm = _mm256_cmp_epu8_mask (x, x, _MM_CMPINT_LE);
+ mm = _mm256_mask_cmp_epu8_mask (m, x, x, _MM_CMPINT_UNUSED);
+ m = _mm_cmp_epu8_mask (xx, xx, _MM_CMPINT_NE);
+ m = _mm_mask_cmp_epu8_mask (m, xx, xx, _MM_CMPINT_NLT);
+}
diff --git a/gcc/testsuite/gcc.target/i386/avx512bw-vpcmpub-2.c b/gcc/testsuite/gcc.target/i386/avx512bw-vpcmpub-2.c
new file mode 100644
index 00000000000..be288c9b3e6
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/avx512bw-vpcmpub-2.c
@@ -0,0 +1,107 @@
+/* { dg-do run } */
+/* { dg-options "-O2 -mavx512bw -DAVX512BW" } */
+/* { dg-require-effective-target avx512bw } */
+
+#include "avx512f-helper.h"
+
+#include <math.h>
+#define SIZE (AVX512F_LEN / 16)
+#include "avx512f-mask-type.h"
+
+#if AVX512F_LEN == 512
+#undef CMP
+#define CMP(imm, rel) \
+ dst_ref = 0; \
+ for (i = 0; i < 64; i++) \
+ { \
+ dst_ref = ((rel) << i) | dst_ref; \
+ } \
+ source1.x = _mm512_loadu_si512 (s1); \
+ source2.x = _mm512_loadu_si512 (s2); \
+ dst1 = _mm512_cmp_epu8_mask (source1.x, source2.x, imm);\
+ dst2 = _mm512_mask_cmp_epu8_mask (mask, source1.x, source2.x, imm);\
+ if (dst_ref != dst1) abort(); \
+ if ((mask & dst_ref) != dst2) abort();
+#endif
+
+#if AVX512F_LEN == 256
+#undef CMP
+#define CMP(imm, rel) \
+ dst_ref = 0; \
+ for (i = 0; i < 32; i++) \
+ { \
+ dst_ref = ((rel) << i) | dst_ref; \
+ } \
+ source1.x = _mm256_loadu_si256 ((__m256i*)s1); \
+ source2.x = _mm256_loadu_si256 ((__m256i*)s2); \
+ dst1 = _mm256_cmp_epu8_mask (source1.x, source2.x, imm);\
+ dst2 = _mm256_mask_cmp_epu8_mask (mask, source1.x, source2.x, imm);\
+ if (dst_ref != dst1) abort(); \
+ if ((mask & dst_ref) != dst2) abort();
+#endif
+
+#if AVX512F_LEN == 128
+#undef CMP
+#define CMP(imm, rel) \
+ dst_ref = 0; \
+ for (i = 0; i < 16; i++) \
+ { \
+ dst_ref = ((rel) << i) | dst_ref; \
+ } \
+ source1.x = _mm_loadu_si128 ((__m128i*)s1); \
+ source2.x = _mm_loadu_si128 ((__m128i*)s2); \
+ dst1 = _mm_cmp_epu8_mask (source1.x, source2.x, imm);\
+ dst2 = _mm_mask_cmp_epu8_mask (mask, source1.x, source2.x, imm);\
+ if (dst_ref != dst1) abort(); \
+ if ((mask & dst_ref) != dst2) abort();
+#endif
+
+void
+TEST ()
+{
+ unsigned char s1[64] = {34, 78, 53, 64,
+ 1, 57, 11, 231,
+ 14, 45, 71, 75,
+ 55, 66, 21, 73,
+ 34, 68, 3, 56,
+ 1, 57, 111, 241,
+ 14, 15, 61, 75,
+ 55, 16, 52, 3,
+ 34, 78, 53, 64,
+ 1, 57, 11, 231,
+ 14, 45, 71, 75,
+ 55, 66, 21, 73,
+ 34, 68, 3, 56,
+ 1, 57, 111, 241,
+ 14, 15, 61, 75,
+ 55, 16, 52, 3};
+ unsigned char s2[64] = {4, 68, 86, 8,
+ 1, 46, 1, 1,
+ 45, 67, 36, 3,
+ 4, 39, 56, 56,
+ 124, 78, 53, 56,
+ 1, 46, 1, 12,
+ 45, 47, 36, 13,
+ 4, 35, 56, 67,
+ 4, 68, 86, 8,
+ 1, 46, 1, 1,
+ 45, 67, 36, 3,
+ 4, 39, 56, 56,
+ 124, 78, 53, 56,
+ 1, 46, 1, 12,
+ 45, 47, 36, 13,
+ 4, 35, 56, 67};
+ UNION_TYPE (AVX512F_LEN, i_b) source1, source2;
+ MASK_TYPE dst1, dst2, dst_ref;
+ MASK_TYPE mask = MASK_VALUE;
+ int i;
+
+ CMP(0x00, s1[i] == s2[i]);
+ CMP(0x01, s1[i] < s2[i]);
+ CMP(0x02, s1[i] <= s2[i]);
+ CMP(0x03, 0);
+ CMP(0x04, s1[i] != s2[i]);
+ CMP(0x05, s1[i] >= s2[i]);
+ CMP(0x06, s1[i] > s2[i]);
+ CMP(0x07, 1);
+}
diff --git a/gcc/testsuite/gcc.target/i386/avx512bw-vpcmpuw-1.c b/gcc/testsuite/gcc.target/i386/avx512bw-vpcmpuw-1.c
new file mode 100644
index 00000000000..916f01b0fab
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/avx512bw-vpcmpuw-1.c
@@ -0,0 +1,28 @@
+/* { dg-do compile } */
+/* { dg-options "-O2 -mavx512bw -mavx512vl" } */
+/* { dg-final { scan-assembler "vpcmpuw\[ \\t\]+\[^\n\]*%zmm\[0-9\]\[^\n^k\]*%k\[1-7\]\[^\{\]" } } */
+/* { dg-final { scan-assembler "vpcmpuw\[ \\t\]+\[^\n\]*%zmm\[0-9\]\[^\n^k\]*%k\[1-7\]\{" } } */
+/* { dg-final { scan-assembler "vpcmpuw\[ \\t\]+\[^\n\]*%ymm\[0-9\]\[^\n^k\]*%k\[1-7\]\[^\{\]" } } */
+/* { dg-final { scan-assembler "vpcmpuw\[ \\t\]+\[^\n\]*%ymm\[0-9\]\[^\n^k\]*%k\[1-7\]\{" } } */
+/* { dg-final { scan-assembler "vpcmpuw\[ \\t\]+\[^\n\]*%xmm\[0-9\]\[^\n^k\]*%k\[1-7\]\[^\{\]" } } */
+/* { dg-final { scan-assembler "vpcmpuw\[ \\t\]+\[^\n\]*%xmm\[0-9\]\[^\n^k\]*%k\[1-7\]\{" } } */
+
+#include <immintrin.h>
+
+volatile __m512i xq;
+volatile __m256i x;
+volatile __m128i xx;
+volatile __mmask8 m;
+volatile __mmask16 mm;
+volatile __mmask32 mmm;
+
+void extern
+avx512bw_test (void)
+{
+ mmm = _mm512_cmp_epu16_mask (xq, xq, _MM_CMPINT_NE);
+ mmm = _mm512_mask_cmp_epu16_mask (m, xq, xq, _MM_CMPINT_NLT);
+ mm = _mm256_cmp_epu16_mask (x, x, _MM_CMPINT_GE);
+ mm = _mm256_mask_cmp_epu16_mask (m, x, x, _MM_CMPINT_NLE);
+ m = _mm_cmp_epu16_mask (xx, xx, _MM_CMPINT_GT);
+ m = _mm_mask_cmp_epu16_mask (m, xx, xx, _MM_CMPINT_EQ);
+}
diff --git a/gcc/testsuite/gcc.target/i386/avx512bw-vpcmpuw-2.c b/gcc/testsuite/gcc.target/i386/avx512bw-vpcmpuw-2.c
new file mode 100644
index 00000000000..587030535af
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/avx512bw-vpcmpuw-2.c
@@ -0,0 +1,91 @@
+/* { dg-do run } */
+/* { dg-options "-O2 -mavx512bw -DAVX512BW" } */
+/* { dg-require-effective-target avx512bw } */
+
+#include "avx512f-helper.h"
+
+#include <math.h>
+#define SIZE (AVX512F_LEN / 16)
+#include "avx512f-mask-type.h"
+
+#if AVX512F_LEN == 512
+#undef CMP
+#define CMP(imm, rel) \
+ dst_ref = 0; \
+ for (i = 0; i < 32; i++) \
+ { \
+ dst_ref = ((rel) << i) | dst_ref; \
+ } \
+ source1.x = _mm512_loadu_si512 (s1); \
+ source2.x = _mm512_loadu_si512 (s2); \
+ dst1 = _mm512_cmp_epu16_mask (source1.x, source2.x, imm);\
+ dst2 = _mm512_mask_cmp_epu16_mask (mask, source1.x, source2.x, imm);\
+ if (dst_ref != dst1) abort(); \
+ if ((mask & dst_ref) != dst2) abort();
+#endif
+
+#if AVX512F_LEN == 256
+#undef CMP
+#define CMP(imm, rel) \
+ dst_ref = 0; \
+ for (i = 0; i < 16; i++) \
+ { \
+ dst_ref = ((rel) << i) | dst_ref; \
+ } \
+ source1.x = _mm256_loadu_si256 ((__m256i*)s1); \
+ source2.x = _mm256_loadu_si256 ((__m256i*)s2); \
+ dst1 = _mm256_cmp_epu16_mask (source1.x, source2.x, imm);\
+ dst2 = _mm256_mask_cmp_epu16_mask (mask, source1.x, source2.x, imm);\
+ if (dst_ref != dst1) abort(); \
+ if ((mask & dst_ref) != dst2) abort();
+#endif
+
+#if AVX512F_LEN == 128
+#undef CMP
+#define CMP(imm, rel) \
+ dst_ref = 0; \
+ for (i = 0; i < 8; i++) \
+ { \
+ dst_ref = ((rel) << i) | dst_ref; \
+ } \
+ source1.x = _mm_loadu_si128 ((__m128i*)s1); \
+ source2.x = _mm_loadu_si128 ((__m128i*)s2); \
+ dst1 = _mm_cmp_epu16_mask (source1.x, source2.x, imm);\
+ dst2 = _mm_mask_cmp_epu16_mask (mask, source1.x, source2.x, imm);\
+ if (dst_ref != dst1) abort(); \
+ if ((mask & dst_ref) != dst2) abort();
+#endif
+
+void
+TEST ()
+{
+ unsigned short s1[32] = {2134, 6678, 453, 54646,
+ 231, 5674, 111, 23241,
+ 12314, 145, 671, 7575,
+ 23455, 166, 5321, 5673,
+ 2134, 6678, 453, 54646,
+ 231, 5674, 111, 23241,
+ 12314, 145, 671, 7575,
+ 23455, 166, 5321, 5673};
+ unsigned short s2[32] = {41124, 6678, 8653, 856,
+ 231, 4646, 111, 124,
+ 2745, 4567, 3676, 123,
+ 714, 3589, 5683, 5673,
+ 41124, 6678, 8653, 856,
+ 231, 4646, 111, 124,
+ 2745, 4567, 3676, 123,
+ 714, 3589, 5683, 5673};
+ UNION_TYPE (AVX512F_LEN, i_w) source1, source2;
+ MASK_TYPE dst1, dst2, dst_ref;
+ MASK_TYPE mask = MASK_VALUE;
+ int i;
+
+ CMP(0x00, s1[i] == s2[i]);
+ CMP(0x01, s1[i] < s2[i]);
+ CMP(0x02, s1[i] <= s2[i]);
+ CMP(0x03, 0);
+ CMP(0x04, s1[i] != s2[i]);
+ CMP(0x05, s1[i] >= s2[i]);
+ CMP(0x06, s1[i] > s2[i]);
+ CMP(0x07, 1);
+}
diff --git a/gcc/testsuite/gcc.target/i386/avx512bw-vpcmpw-1.c b/gcc/testsuite/gcc.target/i386/avx512bw-vpcmpw-1.c
new file mode 100644
index 00000000000..a506dc12e0a
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/avx512bw-vpcmpw-1.c
@@ -0,0 +1,28 @@
+/* { dg-do compile } */
+/* { dg-options "-O2 -mavx512bw -mavx512vl" } */
+/* { dg-final { scan-assembler "vpcmpw\[ \\t\]+\[^\n\]*%zmm\[0-9\]\[^\n^k\]*%k\[1-7\]\[^\{\]" } } */
+/* { dg-final { scan-assembler "vpcmpw\[ \\t\]+\[^\n\]*%zmm\[0-9\]\[^\n^k\]*%k\[1-7\]\{" } } */
+/* { dg-final { scan-assembler "vpcmpw\[ \\t\]+\[^\n\]*%ymm\[0-9\]\[^\n^k\]*%k\[1-7\]\[^\{\]" } } */
+/* { dg-final { scan-assembler "vpcmpw\[ \\t\]+\[^\n\]*%ymm\[0-9\]\[^\n^k\]*%k\[1-7\]\{" } } */
+/* { dg-final { scan-assembler "vpcmpw\[ \\t\]+\[^\n\]*%xmm\[0-9\]\[^\n^k\]*%k\[1-7\]\[^\{\]" } } */
+/* { dg-final { scan-assembler "vpcmpw\[ \\t\]+\[^\n\]*%xmm\[0-9\]\[^\n^k\]*%k\[1-7\]\{" } } */
+
+#include <immintrin.h>
+
+volatile __m512i xq;
+volatile __m256i x;
+volatile __m128i xx;
+volatile __mmask8 m;
+volatile __mmask16 mm;
+volatile __mmask32 mmm;
+
+void extern
+avx512bw_test (void)
+{
+ mmm = _mm512_cmp_epi16_mask (xq, xq, _MM_CMPINT_GT);
+ mmm = _mm512_mask_cmp_epi16_mask (m, xq, xq, _MM_CMPINT_EQ);
+ mm = _mm256_cmp_epi16_mask (x, x, _MM_CMPINT_EQ);
+ mm = _mm256_mask_cmp_epi16_mask (m, x, x, _MM_CMPINT_LT);
+ m = _mm_cmp_epi16_mask (xx, xx, _MM_CMPINT_LE);
+ m = _mm_mask_cmp_epi16_mask (m, xx, xx, _MM_CMPINT_UNUSED);
+}
diff --git a/gcc/testsuite/gcc.target/i386/avx512bw-vpcmpw-2.c b/gcc/testsuite/gcc.target/i386/avx512bw-vpcmpw-2.c
new file mode 100644
index 00000000000..54c3588238b
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/avx512bw-vpcmpw-2.c
@@ -0,0 +1,91 @@
+/* { dg-do run } */
+/* { dg-options "-O2 -mavx512bw -DAVX512BW" } */
+/* { dg-require-effective-target avx512bw } */
+
+#include "avx512f-helper.h"
+
+#include <math.h>
+#define SIZE (AVX512F_LEN / 16)
+#include "avx512f-mask-type.h"
+
+#if AVX512F_LEN == 512
+#undef CMP
+#define CMP(imm, rel) \
+ dst_ref = 0; \
+ for (i = 0; i < 32; i++) \
+ { \
+ dst_ref = ((rel) << i) | dst_ref; \
+ } \
+ source1.x = _mm512_loadu_si512 (s1); \
+ source2.x = _mm512_loadu_si512 (s2); \
+ dst1 = _mm512_cmp_epi16_mask (source1.x, source2.x, imm);\
+ dst2 = _mm512_mask_cmp_epi16_mask (mask, source1.x, source2.x, imm);\
+ if (dst_ref != dst1) abort(); \
+ if ((mask & dst_ref) != dst2) abort();
+#endif
+
+#if AVX512F_LEN == 256
+#undef CMP
+#define CMP(imm, rel) \
+ dst_ref = 0; \
+ for (i = 0; i < 16; i++) \
+ { \
+ dst_ref = ((rel) << i) | dst_ref; \
+ } \
+ source1.x = _mm256_loadu_si256 ((__m256i*)s1); \
+ source2.x = _mm256_loadu_si256 ((__m256i*)s2); \
+ dst1 = _mm256_cmp_epi16_mask (source1.x, source2.x, imm);\
+ dst2 = _mm256_mask_cmp_epi16_mask (mask, source1.x, source2.x, imm);\
+ if (dst_ref != dst1) abort(); \
+ if ((mask & dst_ref) != dst2) abort();
+#endif
+
+#if AVX512F_LEN == 128
+#undef CMP
+#define CMP(imm, rel) \
+ dst_ref = 0; \
+ for (i = 0; i < 8; i++) \
+ { \
+ dst_ref = ((rel) << i) | dst_ref; \
+ } \
+ source1.x = _mm_loadu_si128 ((__m128i*)s1); \
+ source2.x = _mm_loadu_si128 ((__m128i*)s2); \
+ dst1 = _mm_cmp_epi16_mask (source1.x, source2.x, imm);\
+ dst2 = _mm_mask_cmp_epi16_mask (mask, source1.x, source2.x, imm);\
+ if (dst_ref != dst1) abort(); \
+ if ((mask & dst_ref) != dst2) abort();
+#endif
+
+void
+TEST ()
+{
+ short s1[32] = {2134, 6678, 453, 54646,
+ 231, 5674, 111, 23241,
+ 12314, 145, 671, 7575,
+ 23455, 166, 5321, 5673,
+ 2134, 6678, 453, 54646,
+ 231, 5674, 111, 23241,
+ 12314, 145, 671, 7575,
+ 23455, 166, 5321, 5673};
+ short s2[32] = {41124, 6678, 8653, 856,
+ 231, 4646, 111, 124,
+ 2745, 4567, 3676, 123,
+ 714, 3589, 5683, 5673,
+ 41124, 6678, 8653, 856,
+ 231, 4646, 111, 124,
+ 2745, 4567, 3676, 123,
+ 714, 3589, 5683, 5673};
+ UNION_TYPE (AVX512F_LEN, i_w) source1, source2;
+ MASK_TYPE dst1, dst2, dst_ref;
+ MASK_TYPE mask = MASK_VALUE;
+ int i;
+
+ CMP(0x00, s1[i] == s2[i]);
+ CMP(0x01, s1[i] < s2[i]);
+ CMP(0x02, s1[i] <= s2[i]);
+ CMP(0x03, 0);
+ CMP(0x04, s1[i] != s2[i]);
+ CMP(0x05, s1[i] >= s2[i]);
+ CMP(0x06, s1[i] > s2[i]);
+ CMP(0x07, 1);
+}
diff --git a/gcc/testsuite/gcc.target/i386/avx512bw-vpermi2w-1.c b/gcc/testsuite/gcc.target/i386/avx512bw-vpermi2w-1.c
new file mode 100644
index 00000000000..275fef47c2a
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/avx512bw-vpermi2w-1.c
@@ -0,0 +1,25 @@
+/* { dg-do compile } */
+/* { dg-options "-mavx512bw -mavx512vl -O2" } */
+/* { dg-final { scan-assembler-times "vpermi2w\[ \\t\]+\[^\n\]*%zmm\[0-9\]\{%k\[1-7\]\}\[^\{\]" 1 } } */
+/* { dg-final { scan-assembler-times "vpermi2w\[ \\t\]+\[^\n\]*%ymm\[0-9\]\{%k\[1-7\]\}\[^\{\]" 1 } } */
+/* { dg-final { scan-assembler-times "vpermi2w\[ \\t\]+\[^\n\]*%xmm\[0-9\]\{%k\[1-7\]\}\[^\{\]" 1 } } */
+
+#include <immintrin.h>
+
+volatile __m512i x3;
+volatile __m256i x2;
+volatile __m128i x1;
+volatile __m512i z;
+volatile __m256i y;
+volatile __m128i x;
+volatile __mmask32 m3;
+volatile __mmask16 m2;
+volatile __mmask8 m1;
+
+void extern
+avx512bw_test (void)
+{
+ x3 = _mm512_mask2_permutex2var_epi16 (x3, z, m3, x3);
+ x2 = _mm256_mask2_permutex2var_epi16 (x2, y, m2, x2);
+ x1 = _mm_mask2_permutex2var_epi16 (x1, x, m1, x1);
+}
diff --git a/gcc/testsuite/gcc.target/i386/avx512bw-vpermi2w-2.c b/gcc/testsuite/gcc.target/i386/avx512bw-vpermi2w-2.c
new file mode 100644
index 00000000000..52d7ac274fc
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/avx512bw-vpermi2w-2.c
@@ -0,0 +1,58 @@
+/* { dg-do run } */
+/* { dg-options "-O2 -mavx512bw -DAVX512BW" } */
+/* { dg-require-effective-target avx512bw } */
+
+#include "avx512f-helper.h"
+
+#define SIZE (AVX512F_LEN / 16)
+#include "math.h"
+#include "values.h"
+#include "avx512f-mask-type.h"
+
+#define NUM 32
+
+void
+CALC (short *dst, short *src1, short *ind, short *src2)
+{
+ int i;
+
+ for (i = 0; i < SIZE; i++)
+ {
+ unsigned long long offset = ind[i] & (SIZE - 1);
+ unsigned long long cond = ind[i] & SIZE;
+
+ dst[i] = cond ? src2[offset] : src1[offset];
+ }
+}
+
+void
+TEST (void)
+{
+ int i, j;
+ UNION_TYPE (AVX512F_LEN, i_w) s1, s2, res, ind;
+ short res_ref[SIZE];
+
+ MASK_TYPE mask = MASK_VALUE;
+
+ for (i = 0; i < NUM; i++)
+ {
+ for (j = 0; j < SIZE; j++)
+ {
+ ind.a[j] = DEFAULT_VALUE;
+ s1.a[j] = i * 2 * j + 1;
+ s2.a[j] = i * 2 * j;
+
+ res.a[j] = DEFAULT_VALUE;
+ }
+
+ CALC (res_ref, s1.a, ind.a, s2.a);
+
+ res.x =
+ INTRINSIC (_mask2_permutex2var_epi16) (s1.x, ind.x, mask,
+ s2.x);
+
+ MASK_MERGE (i_w) (res_ref, mask, SIZE);
+ if (UNION_CHECK (AVX512F_LEN, i_w) (res, res_ref))
+ abort ();
+ }
+}
diff --git a/gcc/testsuite/gcc.target/i386/avx512bw-vpermt2w-1.c b/gcc/testsuite/gcc.target/i386/avx512bw-vpermt2w-1.c
new file mode 100644
index 00000000000..1596695251c
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/avx512bw-vpermt2w-1.c
@@ -0,0 +1,37 @@
+/* { dg-do compile } */
+/* { dg-options "-mavx512bw -mavx512vl -O2" } */
+/* { dg-final { scan-assembler-times "vpermt2w\[ \\t\]+\[^\n\]*%zmm\[0-9\]" 3 } } */
+/* { dg-final { scan-assembler-times "vpermt2w\[ \\t\]+\[^\n\]*%ymm\[0-9\]" 3 } } *
+/* { dg-final { scan-assembler-times "vpermt2w\[ \\t\]+\[^\n\]*%xmm\[0-9\]" 3 } } */
+/* { dg-final { scan-assembler-times "vpermt2w\[ \\t\]+\[^\n\]*%zmm\[0-9\]\{%k\[1-7\]\}\[^\{\]" 1 } } */
+/* { dg-final { scan-assembler-times "vpermt2w\[ \\t\]+\[^\n\]*%ymm\[0-9\]\{%k\[1-7\]\}\[^\{\]" 1 } } */
+/* { dg-final { scan-assembler-times "vpermt2w\[ \\t\]+\[^\n\]*%xmm\[0-9\]\{%k\[1-7\]\}\[^\{\]" 1 } } */
+/* { dg-final { scan-assembler-times "vpermt2w\[ \\t\]+\[^\n\]*%zmm\[0-9\]\{%k\[1-7\]\}\{z\}" 1 } } */
+/* { dg-final { scan-assembler-times "vpermt2w\[ \\t\]+\[^\n\]*%ymm\[0-9\]\{%k\[1-7\]\}\{z\}" 1 } } */
+/* { dg-final { scan-assembler-times "vpermt2w\[ \\t\]+\[^\n\]*%xmm\[0-9\]\{%k\[1-7\]\}\{z\}" 1 } } */
+
+#include <immintrin.h>
+
+volatile __m512i x3;
+volatile __m256i x2;
+volatile __m128i x1;
+volatile __m512i z;
+volatile __m256i y;
+volatile __m128i x;
+volatile __mmask32 m3;
+volatile __mmask16 m2;
+volatile __mmask8 m1;
+
+void extern
+avx512bw_test (void)
+{
+ x3 = _mm512_permutex2var_epi16 (x3, z, x3);
+ x3 = _mm512_mask_permutex2var_epi16 (x3, m3, z, x3);
+ x3 = _mm512_maskz_permutex2var_epi16 (m3, x3, z, x3);
+ x2 = _mm256_permutex2var_epi16 (x2, y, x2);
+ x2 = _mm256_mask_permutex2var_epi16 (x2, m2, y, x2);
+ x2 = _mm256_maskz_permutex2var_epi16 (m2, x2, y, x2);
+ x1 = _mm_permutex2var_epi16 (x1, x, x1);
+ x1 = _mm_mask_permutex2var_epi16 (x1, m1, x, x1);
+ x1 = _mm_maskz_permutex2var_epi16 (m1, x1, x, x1);
+}
diff --git a/gcc/testsuite/gcc.target/i386/avx512bw-vpermt2w-2.c b/gcc/testsuite/gcc.target/i386/avx512bw-vpermt2w-2.c
new file mode 100644
index 00000000000..58d75f4b8ec
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/avx512bw-vpermt2w-2.c
@@ -0,0 +1,70 @@
+/* { dg-do run } */
+/* { dg-options "-O2 -mavx512bw -DAVX512BW" } */
+/* { dg-require-effective-target avx512bw } */
+
+#include "avx512f-helper.h"
+
+#define SIZE (AVX512F_LEN / 16)
+#include "math.h"
+#include "values.h"
+#include "avx512f-mask-type.h"
+
+#define NUM 32
+
+void
+CALC (short *dst, short *src1, short *ind, short *src2)
+{
+ int i;
+
+ for (i = 0; i < SIZE; i++)
+ {
+ unsigned long long offset = ind[i] & (SIZE - 1);
+ unsigned long long cond = ind[i] & SIZE;
+
+ dst[i] = cond ? src2[offset] : src1[offset];
+ }
+}
+
+void
+TEST (void)
+{
+ int i, j;
+ UNION_TYPE (AVX512F_LEN, i_w) s1, s2, res1, res2, res3, ind;
+ short res_ref[SIZE];
+
+ MASK_TYPE mask = MASK_VALUE;
+
+ for (i = 0; i < NUM; i++)
+ {
+ for (j = 0; j < SIZE; j++)
+ {
+ ind.a[j] = i * (j << 1);
+ s1.a[j] = DEFAULT_VALUE;
+ s2.a[j] = 1.5 * i * 2 * j;
+
+ res1.a[j] = DEFAULT_VALUE;
+ res2.a[j] = DEFAULT_VALUE;
+ res3.a[j] = DEFAULT_VALUE;
+ }
+
+ CALC (res_ref, s1.a, ind.a, s2.a);
+
+ res1.x = INTRINSIC (_permutex2var_epi16) (s1.x, ind.x, s2.x);
+ res2.x =
+ INTRINSIC (_mask_permutex2var_epi16) (s1.x, mask, ind.x, s2.x);
+ res3.x =
+ INTRINSIC (_maskz_permutex2var_epi16) (mask, s1.x, ind.x,
+ s2.x);
+
+ if (UNION_CHECK (AVX512F_LEN, i_w) (res1, res_ref))
+ abort ();
+
+ MASK_MERGE (i_w) (res_ref, mask, SIZE);
+ if (UNION_CHECK (AVX512F_LEN, i_w) (res2, res_ref))
+ abort ();
+
+ MASK_ZERO (i_w) (res_ref, mask, SIZE);
+ if (UNION_CHECK (AVX512F_LEN, i_w) (res3, res_ref))
+ abort ();
+ }
+}
diff --git a/gcc/testsuite/gcc.target/i386/avx512bw-vpermw-1.c b/gcc/testsuite/gcc.target/i386/avx512bw-vpermw-1.c
new file mode 100644
index 00000000000..4d8f356ddc8
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/avx512bw-vpermw-1.c
@@ -0,0 +1,34 @@
+/* { dg-do compile } */
+/* { dg-options "-mavx512bw -mavx512vl -O2" } */
+/* { dg-final { scan-assembler-times "vpermw\[ \\t\]+\[^\n\]*%zmm\[0-9\]\[^\{\]" 3 } } */
+/* { dg-final { scan-assembler-times "vpermw\[ \\t\]+\[^\n\]*%zmm\[0-9\]\{%k\[1-7\]\}\[^\{\]" 1 } } */
+/* { dg-final { scan-assembler-times "vpermw\[ \\t\]+\[^\n\]*%zmm\[0-9\]\{%k\[1-7\]\}\{z\}" 1 } } */
+/* { dg-final { scan-assembler-times "vpermw\[ \\t\]+\[^\n\]*%ymm\[0-9\]\[^\{\]" 3 } } */
+/* { dg-final { scan-assembler-times "vpermw\[ \\t\]+\[^\n\]*%ymm\[0-9\]\{%k\[1-7\]\}\[^\{\]" 1 } } */
+/* { dg-final { scan-assembler-times "vpermw\[ \\t\]+\[^\n\]*%ymm\[0-9\]\{%k\[1-7\]\}\{z\}" 1 } } */
+/* { dg-final { scan-assembler-times "vpermw\[ \\t\]+\[^\n\]*%zmm\[0-9\]\[^\{\]" 3 } } */
+/* { dg-final { scan-assembler-times "vpermw\[ \\t\]+\[^\n\]*%zmm\[0-9\]\{%k\[1-7\]\}\[^\{\]" 1 } } */
+/* { dg-final { scan-assembler-times "vpermw\[ \\t\]+\[^\n\]*%zmm\[0-9\]\{%k\[1-7\]\}\{z\}" 1 } } */
+
+#include <immintrin.h>
+
+volatile __m512i x1;
+volatile __m256i x2;
+volatile __m128i x3;
+volatile __mmask32 m1;
+volatile __mmask16 m2;
+volatile __mmask8 m3;
+
+void extern
+avx512bw_test (void)
+{
+ x1 = _mm512_permutexvar_epi16 (x1, x1);
+ x1 = _mm512_maskz_permutexvar_epi16 (m1, x1, x1);
+ x1 = _mm512_mask_permutexvar_epi16 (x1, m1, x1, x1);
+ x2 = _mm256_permutexvar_epi16 (x2, x2);
+ x2 = _mm256_maskz_permutexvar_epi16 (m2, x2, x2);
+ x2 = _mm256_mask_permutexvar_epi16 (x2, m2, x2, x2);
+ x3 = _mm_permutexvar_epi16 (x3, x3);
+ x3 = _mm_maskz_permutexvar_epi16 (m3, x3, x3);
+ x3 = _mm_mask_permutexvar_epi16 (x3, m3, x3, x3);
+}
diff --git a/gcc/testsuite/gcc.target/i386/avx512bw-vpermw-2.c b/gcc/testsuite/gcc.target/i386/avx512bw-vpermw-2.c
new file mode 100644
index 00000000000..42bedd9f89a
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/avx512bw-vpermw-2.c
@@ -0,0 +1,52 @@
+/* { dg-do run } */
+/* { dg-options "-O2 -mavx512bw -DAVX512BW" } */
+/* { dg-require-effective-target avx512bw } */
+
+#include "avx512f-helper.h"
+
+#define SIZE (AVX512F_LEN / 16)
+#include "avx512f-mask-type.h"
+
+void
+CALC (short *src, short *ind, short *res)
+{
+ int i;
+
+ for (i = 0; i < SIZE; i++)
+ {
+ res[i] = src[ind[i] & (SIZE - 1)];
+ }
+}
+
+void
+TEST (void)
+{
+ UNION_TYPE (AVX512F_LEN, i_w) s1, s2, res1, res2, res3;
+ short res_ref[SIZE];
+ MASK_TYPE mask = MASK_VALUE;
+ int i, sign = 1;
+
+ for (i = 0; i < SIZE; i++)
+ {
+ s1.a[i] = i * i * i;
+ s2.a[i] = i + 20;
+ sign = -sign;
+ res2.a[i] = DEFAULT_VALUE;
+ }
+
+ res1.x = INTRINSIC (_permutexvar_epi16) (s1.x, s2.x);
+ res2.x = INTRINSIC (_mask_permutexvar_epi16) (res2.x, mask, s1.x, s2.x);
+ res3.x = INTRINSIC (_maskz_permutexvar_epi16) (mask, s1.x, s2.x);
+ CALC (s1.a, s2.a, res_ref);
+
+ if (UNION_CHECK (AVX512F_LEN, i_w) (res1, res_ref))
+ abort ();
+
+ MASK_MERGE (i_w)(res_ref, mask, SIZE);
+ if (UNION_CHECK (AVX512F_LEN, i_w) (res2, res_ref))
+ abort ();
+
+ MASK_ZERO (i_w)(res_ref, mask, SIZE);
+ if (UNION_CHECK (AVX512F_LEN, i_w) (res3, res_ref))
+ abort ();
+}
diff --git a/gcc/testsuite/gcc.target/i386/avx512bw-vpmaddubsw-1.c b/gcc/testsuite/gcc.target/i386/avx512bw-vpmaddubsw-1.c
new file mode 100644
index 00000000000..f4f41b58df7
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/avx512bw-vpmaddubsw-1.c
@@ -0,0 +1,27 @@
+/* { dg-do compile } */
+/* { dg-options "-mavx512bw -mavx512vl -O2" } */
+/* { dg-final { scan-assembler-times "vpmaddubsw\[ \\t\]+\[^\n\]*%zmm\[0-9\]" 3 } } */
+/* { dg-final { scan-assembler-times "vpmaddubsw\[ \\t\]+\[^\n\]*%zmm\[0-9\]\{%k\[1-7\]\}\{z\}" 1 } } */
+/* { dg-final { scan-assembler-times "vpmaddubsw\[ \\t\]+\[^\n\]*%zmm\[0-9\]\{%k\[1-7\]\}\[^\{\]" 1 } } */
+/* { dg-final { scan-assembler-times "vpmaddubsw\[ \\t\]+\[^\n\]*%ymm\[0-9\]\{%k\[1-7\]\}\{z\}" 1 } } */
+/* { dg-final { scan-assembler-times "vpmaddubsw\[ \\t\]+\[^\n\]*%ymm\[0-9\]\{%k\[1-7\]\}\[^\{\]" 1 } } */
+/* { dg-final { scan-assembler-times "vpmaddubsw\[ \\t\]+\[^\n\]*%xmm\[0-9\]\{%k\[1-7\]\}\{z\}" 1 } } */
+/* { dg-final { scan-assembler-times "vpmaddubsw\[ \\t\]+\[^\n\]*%xmm\[0-9\]\{%k\[1-7\]\}\[^\{\]" 1 } } */
+
+#include <immintrin.h>
+
+volatile __m512i x;
+volatile __m256i xq;
+volatile __m128i xw;
+
+void extern
+avx512bw_test (void)
+{
+ x = _mm512_maddubs_epi16 (x, x);
+ x = _mm512_mask_maddubs_epi16 (x, 2, x, x);
+ x = _mm512_maskz_maddubs_epi16 (2, x, x);
+ xq = _mm256_mask_maddubs_epi16 (xq, 2, xq, xq);
+ xq = _mm256_maskz_maddubs_epi16 (2, xq, xq);
+ xw = _mm_mask_maddubs_epi16 (xw, 2, xw, xw);
+ xw = _mm_maskz_maddubs_epi16 (2, xw, xw);
+}
diff --git a/gcc/testsuite/gcc.target/i386/avx512bw-vpmaddubsw-2.c b/gcc/testsuite/gcc.target/i386/avx512bw-vpmaddubsw-2.c
new file mode 100644
index 00000000000..e3bd83fee8a
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/avx512bw-vpmaddubsw-2.c
@@ -0,0 +1,66 @@
+/* { dg-do run } */
+/* { dg-options "-O2 -mavx512bw -DAVX512BW" } */
+/* { dg-require-effective-target avx512bw } */
+
+#include "avx512f-helper.h"
+
+#include <math.h>
+#include <values.h>
+#define SIZE (AVX512F_LEN / 16)
+#include "avx512f-mask-type.h"
+
+void
+CALC (short *i1, short *i2, short *r)
+{
+ unsigned char *ub1 = (unsigned char *) i1;
+ char *sb2 = (char *) i2;
+ short *sout = (short *) r;
+ int t0;
+ int i;
+
+ for (i = 0; i < SIZE; i++)
+ {
+ t0 = ((int) ub1[2 * i] * (int) sb2[2 * i] +
+ (int) ub1[2 * i + 1] * (int) sb2[2 * i + 1]);
+ if (t0 > (int) 0x7fff)
+ sout[i] = 0x7fff;
+ else if (t0 < (int) 0xffff8000)
+ sout[i] = 0x8000;
+ else
+ sout[i] = (short) t0;
+ }
+}
+
+void
+TEST (void)
+{
+ UNION_TYPE (AVX512F_LEN, i_w) s1, s2, res1, res2, res3;
+ short res_ref[SIZE];
+ MASK_TYPE mask = MASK_VALUE;
+ int i;
+ int fail = 0;
+
+ for (i = 0; i < SIZE; i++)
+ {
+ s1.a[i] = i * 17 + i;
+ s2.a[i] = i * -17 + i * 2;
+ res2.a[i] = DEFAULT_VALUE;
+ }
+
+ res1.x = INTRINSIC (_maddubs_epi16) (s1.x, s2.x);
+ res2.x = INTRINSIC (_mask_maddubs_epi16) (res2.x, mask, s1.x, s2.x);
+ res3.x = INTRINSIC (_maskz_maddubs_epi16) (mask, s1.x, s2.x);
+
+ CALC(s1.a, s2.a, res_ref);
+
+ if (UNION_CHECK (AVX512F_LEN, i_w) (res1, res_ref))
+ abort ();
+
+ MASK_MERGE (i_w) (res_ref, mask, SIZE);
+ if (UNION_CHECK (AVX512F_LEN, i_w) (res2, res_ref))
+ abort ();
+
+ MASK_ZERO (i_w) (res_ref, mask, SIZE);
+ if (UNION_CHECK (AVX512F_LEN, i_w) (res3, res_ref))
+ abort ();
+}
diff --git a/gcc/testsuite/gcc.target/i386/avx512bw-vpmaddwd-1.c b/gcc/testsuite/gcc.target/i386/avx512bw-vpmaddwd-1.c
new file mode 100644
index 00000000000..42e20a6799c
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/avx512bw-vpmaddwd-1.c
@@ -0,0 +1,27 @@
+/* { dg-do compile } */
+/* { dg-options "-mavx512bw -mavx512vl -O2" } */
+/* { dg-final { scan-assembler-times "vpmaddwd\[ \\t\]+\[^\n\]*%zmm\[0-9\]" 3 } } */
+/* { dg-final { scan-assembler-times "vpmaddwd\[ \\t\]+\[^\n\]*%zmm\[0-9\]\{%k\[1-7\]\}\{z\}" 1 } } */
+/* { dg-final { scan-assembler-times "vpmaddwd\[ \\t\]+\[^\n\]*%zmm\[0-9\]\{%k\[1-7\]\}\[^\{\]" 1 } } */
+/* { dg-final { scan-assembler-times "vpmaddwd\[ \\t\]+\[^\n\]*%ymm\[0-9\]\{%k\[1-7\]\}\{z\}" 1 } } */
+/* { dg-final { scan-assembler-times "vpmaddwd\[ \\t\]+\[^\n\]*%ymm\[0-9\]\{%k\[1-7\]\}\[^\{\]" 1 } } */
+/* { dg-final { scan-assembler-times "vpmaddwd\[ \\t\]+\[^\n\]*%xmm\[0-9\]\{%k\[1-7\]\}\{z\}" 1 } } */
+/* { dg-final { scan-assembler-times "vpmaddwd\[ \\t\]+\[^\n\]*%xmm\[0-9\]\{%k\[1-7\]\}\[^\{\]" 1 } } */
+
+#include <immintrin.h>
+
+volatile __m512i x;
+volatile __m256i xq;
+volatile __m128i xw;
+
+void extern
+avx512bw_test (void)
+{
+ x = _mm512_madd_epi16 (x, x);
+ x = _mm512_mask_madd_epi16 (x, 2, x, x);
+ x = _mm512_maskz_madd_epi16 (2, x, x);
+ xq = _mm256_mask_madd_epi16 (xq, 2, xq, xq);
+ xq = _mm256_maskz_madd_epi16 (2, xq, xq);
+ xw = _mm_mask_madd_epi16 (xw, 2, xw, xw);
+ xw = _mm_maskz_madd_epi16 (2, xw, xw);
+}
diff --git a/gcc/testsuite/gcc.target/i386/avx512bw-vpmaddwd-2.c b/gcc/testsuite/gcc.target/i386/avx512bw-vpmaddwd-2.c
new file mode 100644
index 00000000000..fb6ef8e1b94
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/avx512bw-vpmaddwd-2.c
@@ -0,0 +1,58 @@
+/* { dg-do run } */
+/* { dg-options "-O2 -mavx512bw -DAVX512BW" } */
+/* { dg-require-effective-target avx512bw } */
+
+#include "avx512f-helper.h"
+
+#include <math.h>
+#include <values.h>
+#define SIZE (AVX512F_LEN / 32)
+#include "avx512f-mask-type.h"
+
+void
+CALC (short *i1, short *i2, int *r)
+{
+ int i;
+
+ for (i = 0; i < SIZE; i++)
+ r[i] = ((int) i1[2 * i] * (int) i2[2 * i] +
+ (int) i1[2 * i + 1] * (int) i2[2 * i + 1]);
+}
+
+void
+TEST (void)
+{
+ UNION_TYPE (AVX512F_LEN, i_w) s1, s2;
+ UNION_TYPE (AVX512F_LEN, i_d) res1, res2, res3;
+ int res_ref[SIZE];
+ MASK_TYPE mask = MASK_VALUE;
+ int i;
+
+ for (i = 0; i < SIZE * 2; i++)
+ {
+ s1.a[i] = i * 17 + i;
+ s2.a[i] = i * -17 + i * 2;
+ }
+
+ for (i = 0; i < SIZE; i++)
+ {
+ res2.a[i] = DEFAULT_VALUE;
+ }
+
+ res1.x = INTRINSIC (_madd_epi16) (s1.x, s2.x);
+ res2.x = INTRINSIC (_mask_madd_epi16) (res2.x, mask, s1.x, s2.x);
+ res3.x = INTRINSIC (_maskz_madd_epi16) (mask, s1.x, s2.x);
+
+ CALC(s1.a, s2.a, res_ref);
+
+ if (UNION_CHECK (AVX512F_LEN, i_d) (res1, res_ref))
+ abort ();
+
+ MASK_MERGE (i_d) (res_ref, mask, SIZE);
+ if (UNION_CHECK (AVX512F_LEN, i_d) (res2, res_ref))
+ abort ();
+
+ MASK_ZERO (i_d) (res_ref, mask, SIZE);
+ if (UNION_CHECK (AVX512F_LEN, i_d) (res3, res_ref))
+ abort ();
+}
diff --git a/gcc/testsuite/gcc.target/i386/avx512bw-vpmaxsb-1.c b/gcc/testsuite/gcc.target/i386/avx512bw-vpmaxsb-1.c
new file mode 100644
index 00000000000..ad8d9095a8d
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/avx512bw-vpmaxsb-1.c
@@ -0,0 +1,30 @@
+/* { dg-do compile } */
+/* { dg-options "-mavx512bw -mavx512vl -O2" } */
+/* { dg-final { scan-assembler-times "vpmaxsb\[ \\t\]+\[^\n\]*%zmm\[0-9\]\[^\{\]" 3 } } */
+/* { dg-final { scan-assembler-times "vpmaxsb\[ \\t\]+\[^\n\]*%zmm\[0-9\]\{%k\[1-7\]\}\[^\{\]" 1 } } */
+/* { dg-final { scan-assembler-times "vpmaxsb\[ \\t\]+\[^\n\]*%zmm\[0-9\]\{%k\[1-7\]\}\{z\}" 1 } } */
+/* { dg-final { scan-assembler-times "vpmaxsb\[ \\t\]+\[^\n\]*%ymm\[0-9\]\{%k\[1-7\]\}\[^\{\]" 1 } } */
+/* { dg-final { scan-assembler-times "vpmaxsb\[ \\t\]+\[^\n\]*%ymm\[0-9\]\{%k\[1-7\]\}\{z\}" 1 } } */
+/* { dg-final { scan-assembler-times "vpmaxsb\[ \\t\]+\[^\n\]*%xmm\[0-9\]\{%k\[1-7\]\}\[^\{\]" 1 } } */
+/* { dg-final { scan-assembler-times "vpmaxsb\[ \\t\]+\[^\n\]*%xmm\[0-9\]\{%k\[1-7\]\}\{z\}" 1 } } */
+
+#include <immintrin.h>
+
+volatile __m512i x;
+volatile __m256i y;
+volatile __m128i z;
+volatile __mmask32 mx;
+volatile __mmask16 my;
+volatile __mmask8 mz;
+
+void extern
+avx512bw_test (void)
+{
+ x = _mm512_max_epi8 (x, x);
+ x = _mm512_mask_max_epi8 (x, mx, x, x);
+ x = _mm512_maskz_max_epi8 (mx, x, x);
+ y = _mm256_mask_max_epi8 (y, my, y, y);
+ y = _mm256_maskz_max_epi8 (my, y, y);
+ z = _mm_mask_max_epi8 (z, mz, z, z);
+ z = _mm_maskz_max_epi8 (mz, z, z);
+}
diff --git a/gcc/testsuite/gcc.target/i386/avx512bw-vpmaxsb-2.c b/gcc/testsuite/gcc.target/i386/avx512bw-vpmaxsb-2.c
new file mode 100644
index 00000000000..856d1c4c9b3
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/avx512bw-vpmaxsb-2.c
@@ -0,0 +1,51 @@
+/* { dg-do run } */
+/* { dg-options "-O2 -mavx512bw -DAVX512BW" } */
+/* { dg-require-effective-target avx512bw } */
+
+#include "avx512f-helper.h"
+
+#define SIZE (AVX512F_LEN / 8)
+
+#include "avx512f-mask-type.h"
+
+CALC (char *src1, char *src2, char *dst)
+{
+ int i;
+
+ for (i = 0; i < SIZE; i++)
+ dst[i] = src1[i] > src2[i] ? src1[i] : src2[i];
+}
+
+void
+TEST (void)
+{
+ int i, sign;
+ UNION_TYPE (AVX512F_LEN, i_b) src1, src2, res1, res2, res3;
+ MASK_TYPE mask = MASK_VALUE;
+ char res_ref[SIZE];
+
+ for (i = 0; i < SIZE; i++)
+ {
+ src1.a[i] = i * sign;
+ src2.a[i] = (i + 20) * sign;
+ sign = -sign;
+ res2.a[i] = DEFAULT_VALUE;
+ }
+
+ res1.x = INTRINSIC (_max_epi8) (src1.x, src2.x);
+ res2.x = INTRINSIC (_mask_max_epi8) (res2.x, mask, src1.x, src2.x);
+ res3.x = INTRINSIC (_maskz_max_epi8) (mask, src1.x, src2.x);
+
+ CALC (src1.a, src2.a, res_ref);
+
+ if (UNION_CHECK (AVX512F_LEN, i_b) (res1, res_ref))
+ abort ();
+
+ MASK_MERGE (i_b) (res_ref, mask, SIZE);
+ if (UNION_CHECK (AVX512F_LEN, i_b) (res2, res_ref))
+ abort ();
+
+ MASK_ZERO (i_b) (res_ref, mask, SIZE);
+ if (UNION_CHECK (AVX512F_LEN, i_b) (res3, res_ref))
+ abort ();
+}
diff --git a/gcc/testsuite/gcc.target/i386/avx512bw-vpmaxsw-1.c b/gcc/testsuite/gcc.target/i386/avx512bw-vpmaxsw-1.c
new file mode 100644
index 00000000000..237b9b8b287
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/avx512bw-vpmaxsw-1.c
@@ -0,0 +1,30 @@
+/* { dg-do compile } */
+/* { dg-options "-mavx512bw -mavx512vl -O2" } */
+/* { dg-final { scan-assembler-times "vpmaxsw\[ \\t\]+\[^\n\]*%zmm\[0-9\]\[^\{\]" 3 } } */
+/* { dg-final { scan-assembler-times "vpmaxsw\[ \\t\]+\[^\n\]*%zmm\[0-9\]\{%k\[1-7\]\}\[^\{\]" 1 } } */
+/* { dg-final { scan-assembler-times "vpmaxsw\[ \\t\]+\[^\n\]*%zmm\[0-9\]\{%k\[1-7\]\}\{z\}" 1 } } */
+/* { dg-final { scan-assembler-times "vpmaxsw\[ \\t\]+\[^\n\]*%ymm\[0-9\]\{%k\[1-7\]\}\[^\{\]" 1 } } */
+/* { dg-final { scan-assembler-times "vpmaxsw\[ \\t\]+\[^\n\]*%ymm\[0-9\]\{%k\[1-7\]\}\{z\}" 1 } } */
+/* { dg-final { scan-assembler-times "vpmaxsw\[ \\t\]+\[^\n\]*%xmm\[0-9\]\{%k\[1-7\]\}\[^\{\]" 1 } } */
+/* { dg-final { scan-assembler-times "vpmaxsw\[ \\t\]+\[^\n\]*%xmm\[0-9\]\{%k\[1-7\]\}\{z\}" 1 } } */
+
+#include <immintrin.h>
+
+volatile __m512i x;
+volatile __m256i y;
+volatile __m128i z;
+volatile __mmask32 mx;
+volatile __mmask16 my;
+volatile __mmask8 mz;
+
+void extern
+avx512bw_test (void)
+{
+ x = _mm512_max_epi16 (x, x);
+ x = _mm512_mask_max_epi16 (x, mx, x, x);
+ x = _mm512_maskz_max_epi16 (mx, x, x);
+ y = _mm256_mask_max_epi16 (y, my, y, y);
+ y = _mm256_maskz_max_epi16 (my, y, y);
+ z = _mm_mask_max_epi16 (z, mz, z, z);
+ z = _mm_maskz_max_epi16 (mz, z, z);
+}
diff --git a/gcc/testsuite/gcc.target/i386/avx512bw-vpmaxsw-2.c b/gcc/testsuite/gcc.target/i386/avx512bw-vpmaxsw-2.c
new file mode 100644
index 00000000000..0469fe53a8c
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/avx512bw-vpmaxsw-2.c
@@ -0,0 +1,51 @@
+/* { dg-do run } */
+/* { dg-options "-O2 -mavx512bw -DAVX512BW" } */
+/* { dg-require-effective-target avx512bw } */
+
+#include "avx512f-helper.h"
+
+#define SIZE (AVX512F_LEN / 16)
+
+#include "avx512f-mask-type.h"
+
+CALC (short *src1, short *src2, short *dst)
+{
+ int i;
+
+ for (i = 0; i < SIZE; i++)
+ dst[i] = src1[i] > src2[i] ? src1[i] : src2[i];
+}
+
+void
+TEST (void)
+{
+ int i, sign;
+ UNION_TYPE (AVX512F_LEN, i_w) src1, src2, res1, res2, res3;
+ MASK_TYPE mask = MASK_VALUE;
+ short res_ref[SIZE];
+
+ for (i = 0; i < SIZE; i++)
+ {
+ src1.a[i] = i * sign;
+ src2.a[i] = (i + 20) * sign;
+ sign = -sign;
+ res2.a[i] = DEFAULT_VALUE;
+ }
+
+ res1.x = INTRINSIC (_max_epi16) (src1.x, src2.x);
+ res2.x = INTRINSIC (_mask_max_epi16) (res2.x, mask, src1.x, src2.x);
+ res3.x = INTRINSIC (_maskz_max_epi16) (mask, src1.x, src2.x);
+
+ CALC (src1.a, src2.a, res_ref);
+
+ if (UNION_CHECK (AVX512F_LEN, i_w) (res1, res_ref))
+ abort ();
+
+ MASK_MERGE (i_w) (res_ref, mask, SIZE);
+ if (UNION_CHECK (AVX512F_LEN, i_w) (res2, res_ref))
+ abort ();
+
+ MASK_ZERO (i_w) (res_ref, mask, SIZE);
+ if (UNION_CHECK (AVX512F_LEN, i_w) (res3, res_ref))
+ abort ();
+}
diff --git a/gcc/testsuite/gcc.target/i386/avx512bw-vpmaxub-1.c b/gcc/testsuite/gcc.target/i386/avx512bw-vpmaxub-1.c
new file mode 100644
index 00000000000..87728891557
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/avx512bw-vpmaxub-1.c
@@ -0,0 +1,30 @@
+/* { dg-do compile } */
+/* { dg-options "-mavx512bw -mavx512vl -O2" } */
+/* { dg-final { scan-assembler-times "vpmaxub\[ \\t\]+\[^\n\]*%zmm\[0-9\]\[^\{\]" 3 } } */
+/* { dg-final { scan-assembler-times "vpmaxub\[ \\t\]+\[^\n\]*%zmm\[0-9\]\{%k\[1-7\]\}\[^\{\]" 1 } } */
+/* { dg-final { scan-assembler-times "vpmaxub\[ \\t\]+\[^\n\]*%zmm\[0-9\]\{%k\[1-7\]\}\{z\}" 1 } } */
+/* { dg-final { scan-assembler-times "vpmaxub\[ \\t\]+\[^\n\]*%ymm\[0-9\]\{%k\[1-7\]\}\[^\{\]" 1 } } */
+/* { dg-final { scan-assembler-times "vpmaxub\[ \\t\]+\[^\n\]*%ymm\[0-9\]\{%k\[1-7\]\}\{z\}" 1 } } */
+/* { dg-final { scan-assembler-times "vpmaxub\[ \\t\]+\[^\n\]*%xmm\[0-9\]\{%k\[1-7\]\}\[^\{\]" 1 } } */
+/* { dg-final { scan-assembler-times "vpmaxub\[ \\t\]+\[^\n\]*%xmm\[0-9\]\{%k\[1-7\]\}\{z\}" 1 } } */
+
+#include <immintrin.h>
+
+volatile __m512i x;
+volatile __m256i y;
+volatile __m128i z;
+volatile __mmask32 mx;
+volatile __mmask16 my;
+volatile __mmask8 mz;
+
+void extern
+avx512bw_test (void)
+{
+ x = _mm512_max_epu8 (x, x);
+ x = _mm512_mask_max_epu8 (x, mx, x, x);
+ x = _mm512_maskz_max_epu8 (mx, x, x);
+ y = _mm256_mask_max_epu8 (y, my, y, y);
+ y = _mm256_maskz_max_epu8 (my, y, y);
+ z = _mm_mask_max_epu8 (z, mz, z, z);
+ z = _mm_maskz_max_epu8 (mz, z, z);
+}
diff --git a/gcc/testsuite/gcc.target/i386/avx512bw-vpmaxub-2.c b/gcc/testsuite/gcc.target/i386/avx512bw-vpmaxub-2.c
new file mode 100644
index 00000000000..bccf5b8d4e6
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/avx512bw-vpmaxub-2.c
@@ -0,0 +1,51 @@
+/* { dg-do run } */
+/* { dg-options "-O2 -mavx512bw -DAVX512BW" } */
+/* { dg-require-effective-target avx512bw } */
+
+#include "avx512f-helper.h"
+
+#define SIZE (AVX512F_LEN / 8)
+
+#include "avx512f-mask-type.h"
+
+CALC (unsigned char *src1, unsigned char *src2,
+ unsigned char *dst)
+{
+ int i;
+
+ for (i = 0; i < SIZE; i++)
+ dst[i] = src1[i] > src2[i] ? src1[i] : src2[i];
+}
+
+void
+TEST (void)
+{
+ int i;
+ UNION_TYPE (AVX512F_LEN, i_b) src1, src2, res1, res2, res3;
+ MASK_TYPE mask = MASK_VALUE;
+ unsigned char res_ref[SIZE];
+
+ for (i = 0; i < SIZE; i++)
+ {
+ src1.a[i] = i * i;
+ src2.a[i] = i + 20;
+ res2.a[i] = DEFAULT_VALUE;
+ }
+
+ res1.x = INTRINSIC (_max_epu8) (src1.x, src2.x);
+ res2.x = INTRINSIC (_mask_max_epu8) (res2.x, mask, src1.x, src2.x);
+ res3.x = INTRINSIC (_maskz_max_epu8) (mask, src1.x, src2.x);
+
+ CALC (src1.a, src2.a, res_ref);
+
+ if (UNION_CHECK (AVX512F_LEN, i_b) (res1, res_ref))
+ abort ();
+
+ MASK_MERGE (i_b) (res_ref, mask, SIZE);
+ if (UNION_CHECK (AVX512F_LEN, i_b) (res2, res_ref))
+ abort ();
+
+ MASK_ZERO (i_b) (res_ref, mask, SIZE);
+ if (UNION_CHECK (AVX512F_LEN, i_b) (res3, res_ref))
+ abort ();
+}
diff --git a/gcc/testsuite/gcc.target/i386/avx512bw-vpmaxuw-1.c b/gcc/testsuite/gcc.target/i386/avx512bw-vpmaxuw-1.c
new file mode 100644
index 00000000000..a0dc29f2293
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/avx512bw-vpmaxuw-1.c
@@ -0,0 +1,30 @@
+/* { dg-do compile } */
+/* { dg-options "-mavx512bw -mavx512vl -O2" } */
+/* { dg-final { scan-assembler-times "vpmaxuw\[ \\t\]+\[^\n\]*%zmm\[0-9\]\[^\{\]" 3 } } */
+/* { dg-final { scan-assembler-times "vpmaxuw\[ \\t\]+\[^\n\]*%zmm\[0-9\]\{%k\[1-7\]\}\[^\{\]" 1 } } */
+/* { dg-final { scan-assembler-times "vpmaxuw\[ \\t\]+\[^\n\]*%zmm\[0-9\]\{%k\[1-7\]\}\{z\}" 1 } } */
+/* { dg-final { scan-assembler-times "vpmaxuw\[ \\t\]+\[^\n\]*%ymm\[0-9\]\{%k\[1-7\]\}\[^\{\]" 1 } } */
+/* { dg-final { scan-assembler-times "vpmaxuw\[ \\t\]+\[^\n\]*%ymm\[0-9\]\{%k\[1-7\]\}\{z\}" 1 } } */
+/* { dg-final { scan-assembler-times "vpmaxuw\[ \\t\]+\[^\n\]*%xmm\[0-9\]\{%k\[1-7\]\}\[^\{\]" 1 } } */
+/* { dg-final { scan-assembler-times "vpmaxuw\[ \\t\]+\[^\n\]*%xmm\[0-9\]\{%k\[1-7\]\}\{z\}" 1 } } */
+
+#include <immintrin.h>
+
+volatile __m512i x;
+volatile __m256i y;
+volatile __m128i z;
+volatile __mmask32 mx;
+volatile __mmask16 my;
+volatile __mmask8 mz;
+
+void extern
+avx512bw_test (void)
+{
+ x = _mm512_max_epu16 (x, x);
+ x = _mm512_mask_max_epu16 (x, mx, x, x);
+ x = _mm512_maskz_max_epu16 (mx, x, x);
+ y = _mm256_mask_max_epu16 (y, my, y, y);
+ y = _mm256_maskz_max_epu16 (my, y, y);
+ z = _mm_mask_max_epu16 (z, mz, z, z);
+ z = _mm_maskz_max_epu16 (mz, z, z);
+}
diff --git a/gcc/testsuite/gcc.target/i386/avx512bw-vpmaxuw-2.c b/gcc/testsuite/gcc.target/i386/avx512bw-vpmaxuw-2.c
new file mode 100644
index 00000000000..b9af22cc55c
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/avx512bw-vpmaxuw-2.c
@@ -0,0 +1,51 @@
+/* { dg-do run } */
+/* { dg-options "-O2 -mavx512bw -DAVX512BW" } */
+/* { dg-require-effective-target avx512bw } */
+
+#include "avx512f-helper.h"
+
+#define SIZE (AVX512F_LEN / 16)
+
+#include "avx512f-mask-type.h"
+
+CALC (unsigned short *src1, unsigned short *src2,
+ unsigned short *dst)
+{
+ int i;
+
+ for (i = 0; i < SIZE; i++)
+ dst[i] = src1[i] > src2[i] ? src1[i] : src2[i];
+}
+
+void
+TEST (void)
+{
+ int i;
+ UNION_TYPE (AVX512F_LEN, i_w) src1, src2, res1, res2, res3;
+ MASK_TYPE mask = MASK_VALUE;
+ unsigned short res_ref[SIZE];
+
+ for (i = 0; i < SIZE; i++)
+ {
+ src1.a[i] = i * i;
+ src2.a[i] = i + 20;
+ res2.a[i] = DEFAULT_VALUE;
+ }
+
+ res1.x = INTRINSIC (_max_epu16) (src1.x, src2.x);
+ res2.x = INTRINSIC (_mask_max_epu16) (res2.x, mask, src1.x, src2.x);
+ res3.x = INTRINSIC (_maskz_max_epu16) (mask, src1.x, src2.x);
+
+ CALC (src1.a, src2.a, res_ref);
+
+ if (UNION_CHECK (AVX512F_LEN, i_w) (res1, res_ref))
+ abort ();
+
+ MASK_MERGE (i_w) (res_ref, mask, SIZE);
+ if (UNION_CHECK (AVX512F_LEN, i_w) (res2, res_ref))
+ abort ();
+
+ MASK_ZERO (i_w) (res_ref, mask, SIZE);
+ if (UNION_CHECK (AVX512F_LEN, i_w) (res3, res_ref))
+ abort ();
+}
diff --git a/gcc/testsuite/gcc.target/i386/avx512bw-vpminsb-1.c b/gcc/testsuite/gcc.target/i386/avx512bw-vpminsb-1.c
new file mode 100644
index 00000000000..091607373b6
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/avx512bw-vpminsb-1.c
@@ -0,0 +1,30 @@
+/* { dg-do compile } */
+/* { dg-options "-mavx512bw -mavx512vl -O2" } */
+/* { dg-final { scan-assembler-times "vpminsb\[ \\t\]+\[^\n\]*%zmm\[0-9\]\[^\{\]" 3 } } */
+/* { dg-final { scan-assembler-times "vpminsb\[ \\t\]+\[^\n\]*%zmm\[0-9\]\{%k\[1-7\]\}\[^\{\]" 1 } } */
+/* { dg-final { scan-assembler-times "vpminsb\[ \\t\]+\[^\n\]*%zmm\[0-9\]\{%k\[1-7\]\}\{z\}" 1 } } */
+/* { dg-final { scan-assembler-times "vpminsb\[ \\t\]+\[^\n\]*%ymm\[0-9\]\{%k\[1-7\]\}\[^\{\]" 1 } } */
+/* { dg-final { scan-assembler-times "vpminsb\[ \\t\]+\[^\n\]*%ymm\[0-9\]\{%k\[1-7\]\}\{z\}" 1 } } */
+/* { dg-final { scan-assembler-times "vpminsb\[ \\t\]+\[^\n\]*%xmm\[0-9\]\{%k\[1-7\]\}\[^\{\]" 1 } } */
+/* { dg-final { scan-assembler-times "vpminsb\[ \\t\]+\[^\n\]*%xmm\[0-9\]\{%k\[1-7\]\}\{z\}" 1 } } */
+
+#include <immintrin.h>
+
+volatile __m512i x;
+volatile __m256i y;
+volatile __m128i z;
+volatile __mmask32 mx;
+volatile __mmask16 my;
+volatile __mmask8 mz;
+
+void extern
+avx512bw_test (void)
+{
+ x = _mm512_min_epi8 (x, x);
+ x = _mm512_mask_min_epi8 (x, mx, x, x);
+ x = _mm512_maskz_min_epi8 (mx, x, x);
+ y = _mm256_mask_min_epi8 (y, my, y, y);
+ y = _mm256_maskz_min_epi8 (my, y, y);
+ z = _mm_mask_min_epi8 (z, mz, z, z);
+ z = _mm_maskz_min_epi8 (mz, z, z);
+}
diff --git a/gcc/testsuite/gcc.target/i386/avx512bw-vpminsb-2.c b/gcc/testsuite/gcc.target/i386/avx512bw-vpminsb-2.c
new file mode 100644
index 00000000000..aa2509cfc9f
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/avx512bw-vpminsb-2.c
@@ -0,0 +1,51 @@
+/* { dg-do run } */
+/* { dg-options "-O2 -mavx512bw -DAVX512BW" } */
+/* { dg-require-effective-target avx512bw } */
+
+#include "avx512f-helper.h"
+
+#define SIZE (AVX512F_LEN / 8)
+
+#include "avx512f-mask-type.h"
+
+CALC (char *src1, char *src2, char *dst)
+{
+ int i;
+
+ for (i = 0; i < SIZE; i++)
+ dst[i] = src1[i] < src2[i] ? src1[i] : src2[i];
+}
+
+void
+TEST (void)
+{
+ int i, sign;
+ UNION_TYPE (AVX512F_LEN, i_b) src1, src2, res1, res2, res3;
+ MASK_TYPE mask = MASK_VALUE;
+ char res_ref[SIZE];
+
+ for (i = 0; i < SIZE; i++)
+ {
+ src1.a[i] = i * sign;
+ src2.a[i] = (i + 20) * sign;
+ sign = -sign;
+ res2.a[i] = DEFAULT_VALUE;
+ }
+
+ res1.x = INTRINSIC (_min_epi8) (src1.x, src2.x);
+ res2.x = INTRINSIC (_mask_min_epi8) (res2.x, mask, src1.x, src2.x);
+ res3.x = INTRINSIC (_maskz_min_epi8) (mask, src1.x, src2.x);
+
+ CALC (src1.a, src2.a, res_ref);
+
+ if (UNION_CHECK (AVX512F_LEN, i_b) (res1, res_ref))
+ abort ();
+
+ MASK_MERGE (i_b) (res_ref, mask, SIZE);
+ if (UNION_CHECK (AVX512F_LEN, i_b) (res2, res_ref))
+ abort ();
+
+ MASK_ZERO (i_b) (res_ref, mask, SIZE);
+ if (UNION_CHECK (AVX512F_LEN, i_b) (res3, res_ref))
+ abort ();
+}
diff --git a/gcc/testsuite/gcc.target/i386/avx512bw-vpminsw-1.c b/gcc/testsuite/gcc.target/i386/avx512bw-vpminsw-1.c
new file mode 100644
index 00000000000..e7127520781
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/avx512bw-vpminsw-1.c
@@ -0,0 +1,30 @@
+/* { dg-do compile } */
+/* { dg-options "-mavx512bw -mavx512vl -O2" } */
+/* { dg-final { scan-assembler-times "vpminsw\[ \\t\]+\[^\n\]*%zmm\[0-9\]\[^\{\]" 3 } } */
+/* { dg-final { scan-assembler-times "vpminsw\[ \\t\]+\[^\n\]*%zmm\[0-9\]\{%k\[1-7\]\}\[^\{\]" 1 } } */
+/* { dg-final { scan-assembler-times "vpminsw\[ \\t\]+\[^\n\]*%zmm\[0-9\]\{%k\[1-7\]\}\{z\}" 1 } } */
+/* { dg-final { scan-assembler-times "vpminsw\[ \\t\]+\[^\n\]*%ymm\[0-9\]\{%k\[1-7\]\}\[^\{\]" 1 } } */
+/* { dg-final { scan-assembler-times "vpminsw\[ \\t\]+\[^\n\]*%ymm\[0-9\]\{%k\[1-7\]\}\{z\}" 1 } } */
+/* { dg-final { scan-assembler-times "vpminsw\[ \\t\]+\[^\n\]*%xmm\[0-9\]\{%k\[1-7\]\}\[^\{\]" 1 } } */
+/* { dg-final { scan-assembler-times "vpminsw\[ \\t\]+\[^\n\]*%xmm\[0-9\]\{%k\[1-7\]\}\{z\}" 1 } } */
+
+#include <immintrin.h>
+
+volatile __m512i x;
+volatile __m256i y;
+volatile __m128i z;
+volatile __mmask32 mx;
+volatile __mmask16 my;
+volatile __mmask8 mz;
+
+void extern
+avx512bw_test (void)
+{
+ x = _mm512_min_epi16 (x, x);
+ x = _mm512_mask_min_epi16 (x, mx, x, x);
+ x = _mm512_maskz_min_epi16 (mx, x, x);
+ y = _mm256_mask_min_epi16 (y, my, y, y);
+ y = _mm256_maskz_min_epi16 (my, y, y);
+ z = _mm_mask_min_epi16 (z, mz, z, z);
+ z = _mm_maskz_min_epi16 (mz, z, z);
+}
diff --git a/gcc/testsuite/gcc.target/i386/avx512bw-vpminsw-2.c b/gcc/testsuite/gcc.target/i386/avx512bw-vpminsw-2.c
new file mode 100644
index 00000000000..1161558751b
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/avx512bw-vpminsw-2.c
@@ -0,0 +1,51 @@
+/* { dg-do run } */
+/* { dg-options "-O2 -mavx512bw -DAVX512BW" } */
+/* { dg-require-effective-target avx512bw } */
+
+#include "avx512f-helper.h"
+
+#define SIZE (AVX512F_LEN / 16)
+
+#include "avx512f-mask-type.h"
+
+CALC (short *src1, short *src2, short *dst)
+{
+ int i;
+
+ for (i = 0; i < SIZE; i++)
+ dst[i] = src1[i] < src2[i] ? src1[i] : src2[i];
+}
+
+void
+TEST (void)
+{
+ int i, sign;
+ UNION_TYPE (AVX512F_LEN, i_w) src1, src2, res1, res2, res3;
+ MASK_TYPE mask = MASK_VALUE;
+ short res_ref[SIZE];
+
+ for (i = 0; i < SIZE; i++)
+ {
+ src1.a[i] = i * sign;
+ src2.a[i] = (i + 20) * sign;
+ sign = -sign;
+ res2.a[i] = DEFAULT_VALUE;
+ }
+
+ res1.x = INTRINSIC (_min_epi16) (src1.x, src2.x);
+ res2.x = INTRINSIC (_mask_min_epi16) (res2.x, mask, src1.x, src2.x);
+ res3.x = INTRINSIC (_maskz_min_epi16) (mask, src1.x, src2.x);
+
+ CALC (src1.a, src2.a, res_ref);
+
+ if (UNION_CHECK (AVX512F_LEN, i_w) (res1, res_ref))
+ abort ();
+
+ MASK_MERGE (i_w) (res_ref, mask, SIZE);
+ if (UNION_CHECK (AVX512F_LEN, i_w) (res2, res_ref))
+ abort ();
+
+ MASK_ZERO (i_w) (res_ref, mask, SIZE);
+ if (UNION_CHECK (AVX512F_LEN, i_w) (res3, res_ref))
+ abort ();
+}
diff --git a/gcc/testsuite/gcc.target/i386/avx512bw-vpminub-1.c b/gcc/testsuite/gcc.target/i386/avx512bw-vpminub-1.c
new file mode 100644
index 00000000000..6e36e344558
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/avx512bw-vpminub-1.c
@@ -0,0 +1,30 @@
+/* { dg-do compile } */
+/* { dg-options "-mavx512bw -mavx512vl -O2" } */
+/* { dg-final { scan-assembler-times "vpminub\[ \\t\]+\[^\n\]*%zmm\[0-9\]\[^\{\]" 3 } } */
+/* { dg-final { scan-assembler-times "vpminub\[ \\t\]+\[^\n\]*%zmm\[0-9\]\{%k\[1-7\]\}\[^\{\]" 1 } } */
+/* { dg-final { scan-assembler-times "vpminub\[ \\t\]+\[^\n\]*%zmm\[0-9\]\{%k\[1-7\]\}\{z\}" 1 } } */
+/* { dg-final { scan-assembler-times "vpminub\[ \\t\]+\[^\n\]*%ymm\[0-9\]\{%k\[1-7\]\}\[^\{\]" 1 } } */
+/* { dg-final { scan-assembler-times "vpminub\[ \\t\]+\[^\n\]*%ymm\[0-9\]\{%k\[1-7\]\}\{z\}" 1 } } */
+/* { dg-final { scan-assembler-times "vpminub\[ \\t\]+\[^\n\]*%xmm\[0-9\]\{%k\[1-7\]\}\[^\{\]" 1 } } */
+/* { dg-final { scan-assembler-times "vpminub\[ \\t\]+\[^\n\]*%xmm\[0-9\]\{%k\[1-7\]\}\{z\}" 1 } } */
+
+#include <immintrin.h>
+
+volatile __m512i x;
+volatile __m256i y;
+volatile __m128i z;
+volatile __mmask32 mx;
+volatile __mmask16 my;
+volatile __mmask8 mz;
+
+void extern
+avx512bw_test (void)
+{
+ x = _mm512_min_epu8 (x, x);
+ x = _mm512_mask_min_epu8 (x, mx, x, x);
+ x = _mm512_maskz_min_epu8 (mx, x, x);
+ y = _mm256_mask_min_epu8 (y, my, y, y);
+ y = _mm256_maskz_min_epu8 (my, y, y);
+ z = _mm_mask_min_epu8 (z, mz, z, z);
+ z = _mm_maskz_min_epu8 (mz, z, z);
+}
diff --git a/gcc/testsuite/gcc.target/i386/avx512bw-vpminub-2.c b/gcc/testsuite/gcc.target/i386/avx512bw-vpminub-2.c
new file mode 100644
index 00000000000..bbf2e1996eb
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/avx512bw-vpminub-2.c
@@ -0,0 +1,51 @@
+/* { dg-do run } */
+/* { dg-options "-O2 -mavx512bw -DAVX512BW" } */
+/* { dg-require-effective-target avx512bw } */
+
+#include "avx512f-helper.h"
+
+#define SIZE (AVX512F_LEN / 8)
+
+#include "avx512f-mask-type.h"
+
+CALC (unsigned char *src1, unsigned char *src2,
+ unsigned char *dst)
+{
+ int i;
+
+ for (i = 0; i < SIZE; i++)
+ dst[i] = src1[i] < src2[i] ? src1[i] : src2[i];
+}
+
+void
+TEST (void)
+{
+ int i;
+ UNION_TYPE (AVX512F_LEN, i_b) src1, src2, res1, res2, res3;
+ MASK_TYPE mask = MASK_VALUE;
+ unsigned char res_ref[SIZE];
+
+ for (i = 0; i < SIZE; i++)
+ {
+ src1.a[i] = i * i;
+ src2.a[i] = i + 20;
+ res2.a[i] = DEFAULT_VALUE;
+ }
+
+ res1.x = INTRINSIC (_min_epu8) (src1.x, src2.x);
+ res2.x = INTRINSIC (_mask_min_epu8) (res2.x, mask, src1.x, src2.x);
+ res3.x = INTRINSIC (_maskz_min_epu8) (mask, src1.x, src2.x);
+
+ CALC (src1.a, src2.a, res_ref);
+
+ if (UNION_CHECK (AVX512F_LEN, i_b) (res1, res_ref))
+ abort ();
+
+ MASK_MERGE (i_b) (res_ref, mask, SIZE);
+ if (UNION_CHECK (AVX512F_LEN, i_b) (res2, res_ref))
+ abort ();
+
+ MASK_ZERO (i_b) (res_ref, mask, SIZE);
+ if (UNION_CHECK (AVX512F_LEN, i_b) (res3, res_ref))
+ abort ();
+}
diff --git a/gcc/testsuite/gcc.target/i386/avx512bw-vpminuw-1.c b/gcc/testsuite/gcc.target/i386/avx512bw-vpminuw-1.c
new file mode 100644
index 00000000000..4ca15b38abb
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/avx512bw-vpminuw-1.c
@@ -0,0 +1,30 @@
+/* { dg-do compile } */
+/* { dg-options "-mavx512bw -mavx512vl -O2" } */
+/* { dg-final { scan-assembler-times "vpminuw\[ \\t\]+\[^\n\]*%zmm\[0-9\]\[^\{\]" 3 } } */
+/* { dg-final { scan-assembler-times "vpminuw\[ \\t\]+\[^\n\]*%zmm\[0-9\]\{%k\[1-7\]\}\[^\{\]" 1 } } */
+/* { dg-final { scan-assembler-times "vpminuw\[ \\t\]+\[^\n\]*%zmm\[0-9\]\{%k\[1-7\]\}\{z\}" 1 } } */
+/* { dg-final { scan-assembler-times "vpminuw\[ \\t\]+\[^\n\]*%ymm\[0-9\]\{%k\[1-7\]\}\[^\{\]" 1 } } */
+/* { dg-final { scan-assembler-times "vpminuw\[ \\t\]+\[^\n\]*%ymm\[0-9\]\{%k\[1-7\]\}\{z\}" 1 } } */
+/* { dg-final { scan-assembler-times "vpminuw\[ \\t\]+\[^\n\]*%xmm\[0-9\]\{%k\[1-7\]\}\[^\{\]" 1 } } */
+/* { dg-final { scan-assembler-times "vpminuw\[ \\t\]+\[^\n\]*%xmm\[0-9\]\{%k\[1-7\]\}\{z\}" 1 } } */
+
+#include <immintrin.h>
+
+volatile __m512i x;
+volatile __m256i y;
+volatile __m128i z;
+volatile __mmask32 mx;
+volatile __mmask16 my;
+volatile __mmask8 mz;
+
+void extern
+avx512bw_test (void)
+{
+ x = _mm512_min_epu16 (x, x);
+ x = _mm512_mask_min_epu16 (x, mx, x, x);
+ x = _mm512_maskz_min_epu16 (mx, x, x);
+ y = _mm256_mask_min_epu16 (y, my, y, y);
+ y = _mm256_maskz_min_epu16 (my, y, y);
+ z = _mm_mask_min_epu16 (z, mz, z, z);
+ z = _mm_maskz_min_epu16 (mz, z, z);
+}
diff --git a/gcc/testsuite/gcc.target/i386/avx512bw-vpminuw-2.c b/gcc/testsuite/gcc.target/i386/avx512bw-vpminuw-2.c
new file mode 100644
index 00000000000..9ee63b93776
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/avx512bw-vpminuw-2.c
@@ -0,0 +1,51 @@
+/* { dg-do run } */
+/* { dg-options "-O2 -mavx512bw -DAVX512BW" } */
+/* { dg-require-effective-target avx512bw } */
+
+#include "avx512f-helper.h"
+
+#define SIZE (AVX512F_LEN / 16)
+
+#include "avx512f-mask-type.h"
+
+CALC (unsigned short *src1, unsigned short *src2,
+ unsigned short *dst)
+{
+ int i;
+
+ for (i = 0; i < SIZE; i++)
+ dst[i] = src1[i] < src2[i] ? src1[i] : src2[i];
+}
+
+void
+TEST (void)
+{
+ int i;
+ UNION_TYPE (AVX512F_LEN, i_w) src1, src2, res1, res2, res3;
+ MASK_TYPE mask = MASK_VALUE;
+ unsigned short res_ref[SIZE];
+
+ for (i = 0; i < SIZE; i++)
+ {
+ src1.a[i] = i * i;
+ src2.a[i] = i + 20;
+ res2.a[i] = DEFAULT_VALUE;
+ }
+
+ res1.x = INTRINSIC (_min_epu16) (src1.x, src2.x);
+ res2.x = INTRINSIC (_mask_min_epu16) (res2.x, mask, src1.x, src2.x);
+ res3.x = INTRINSIC (_maskz_min_epu16) (mask, src1.x, src2.x);
+
+ CALC (src1.a, src2.a, res_ref);
+
+ if (UNION_CHECK (AVX512F_LEN, i_w) (res1, res_ref))
+ abort ();
+
+ MASK_MERGE (i_w) (res_ref, mask, SIZE);
+ if (UNION_CHECK (AVX512F_LEN, i_w) (res2, res_ref))
+ abort ();
+
+ MASK_ZERO (i_w) (res_ref, mask, SIZE);
+ if (UNION_CHECK (AVX512F_LEN, i_w) (res3, res_ref))
+ abort ();
+}
diff --git a/gcc/testsuite/gcc.target/i386/avx512bw-vpmovb2m-1.c b/gcc/testsuite/gcc.target/i386/avx512bw-vpmovb2m-1.c
new file mode 100644
index 00000000000..48284783bef
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/avx512bw-vpmovb2m-1.c
@@ -0,0 +1,22 @@
+/* { dg-do compile } */
+/* { dg-options "-mavx512bw -mavx512vl -O2" } */
+/* { dg-final { scan-assembler "vpmovb2m\[ \\t\]+\[^\n\]*%xmm\[0-9\]\[^\n\]*%k\[1-7\]" } } */
+/* { dg-final { scan-assembler "vpmovb2m\[ \\t\]+\[^\n\]*%ymm\[0-9\]\[^\n\]*%k\[1-7\]" } } */
+/* { dg-final { scan-assembler "vpmovb2m\[ \\t\]+\[^\n\]*%zmm\[0-9\]\[^\n\]*%k\[1-7\]" } } */
+
+#include <immintrin.h>
+
+volatile __m512i x512;
+volatile __m256i x256;
+volatile __m128i x128;
+volatile __mmask64 m64;
+volatile __mmask32 m32;
+volatile __mmask16 m16;
+
+void extern
+avx512bw_test (void)
+{
+ m16 = _mm_movepi8_mask (x128);
+ m32 = _mm256_movepi8_mask (x256);
+ m64 = _mm512_movepi8_mask (x512);
+}
diff --git a/gcc/testsuite/gcc.target/i386/avx512bw-vpmovb2m-2.c b/gcc/testsuite/gcc.target/i386/avx512bw-vpmovb2m-2.c
new file mode 100644
index 00000000000..5bed63ebd75
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/avx512bw-vpmovb2m-2.c
@@ -0,0 +1,43 @@
+/* { dg-do run } */
+/* { dg-options "-O2 -mavx512bw -DAVX512BW" } */
+/* { dg-require-effective-target avx512bw } */
+
+#include "avx512f-helper.h"
+
+#define SIZE (AVX512F_LEN / 8)
+#include "avx512f-mask-type.h"
+
+CALC (MASK_TYPE *r, char *s1)
+{
+ int i;
+ MASK_TYPE res = 0;
+ MASK_TYPE one = 1;
+
+ for (i = 0; i < SIZE; i++)
+ if (s1[i] >> 7)
+ res = res | (one << i);
+
+ *r = res;
+}
+
+void
+TEST (void)
+{
+ int i, sign;
+ UNION_TYPE (AVX512F_LEN, i_b) src;
+ MASK_TYPE res, res_ref = 0;
+
+ sign = -1;
+ for (i = 0; i < SIZE; i++)
+ {
+ src.a[i] = 2 * i * sign;
+ sign = sign * -1;
+ }
+
+ res = INTRINSIC (_movepi8_mask) (src.x);
+
+ CALC (&res_ref, src.a);
+
+ if (res_ref != res)
+ abort ();
+}
diff --git a/gcc/testsuite/gcc.target/i386/avx512bw-vpmovm2b-1.c b/gcc/testsuite/gcc.target/i386/avx512bw-vpmovm2b-1.c
new file mode 100644
index 00000000000..a832479dc4f
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/avx512bw-vpmovm2b-1.c
@@ -0,0 +1,22 @@
+/* { dg-do compile } */
+/* { dg-options "-mavx512bw -mavx512vl -O2" } */
+/* { dg-final { scan-assembler "vpmovm2b\[ \\t\]+\[^\n\]*%k\[1-7\]\[^\n\]*%xmm\[0-9\]" } } */
+/* { dg-final { scan-assembler "vpmovm2b\[ \\t\]+\[^\n\]*%k\[1-7\]\[^\n\]*%ymm\[0-9\]" } } */
+/* { dg-final { scan-assembler "vpmovm2b\[ \\t\]+\[^\n\]*%k\[1-7\]\[^\n\]*%zmm\[0-9\]" } } */
+
+#include <immintrin.h>
+
+volatile __m512i x512;
+volatile __m256i x256;
+volatile __m128i x128;
+volatile __mmask64 m64;
+volatile __mmask32 m32;
+volatile __mmask16 m16;
+
+void extern
+avx512bw_test (void)
+{
+ x128 = _mm_movm_epi8 (m16);
+ x256 = _mm256_movm_epi8 (m32);
+ x512 = _mm512_movm_epi8 (m64);
+}
diff --git a/gcc/testsuite/gcc.target/i386/avx512bw-vpmovm2b-2.c b/gcc/testsuite/gcc.target/i386/avx512bw-vpmovm2b-2.c
new file mode 100644
index 00000000000..e054bf78365
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/avx512bw-vpmovm2b-2.c
@@ -0,0 +1,32 @@
+/* { dg-do run } */
+/* { dg-options "-O2 -mavx512bw -DAVX512BW" } */
+/* { dg-require-effective-target avx512bw } */
+
+#include "avx512f-helper.h"
+
+#define SIZE (AVX512F_LEN / 8)
+#include "avx512f-mask-type.h"
+
+CALC (char *r, MASK_TYPE s)
+{
+ int i;
+ char all_ones = 0xff;
+
+ for (i = 0; i < SIZE; i++)
+ r[i] = ((s >> i) & 1) ? all_ones : 0;
+}
+
+void
+TEST (void)
+{
+ int i;
+ UNION_TYPE (AVX512F_LEN, i_b) res, res_ref;
+ MASK_TYPE src = (MASK_TYPE) 0x1111abeffeec1234;
+
+ res.x = INTRINSIC (_movm_epi8) (src);
+
+ CALC (res_ref.a, src);
+
+ if (UNION_CHECK (AVX512F_LEN, i_b) (res, res_ref.a))
+ abort ();
+}
diff --git a/gcc/testsuite/gcc.target/i386/avx512bw-vpmovm2w-1.c b/gcc/testsuite/gcc.target/i386/avx512bw-vpmovm2w-1.c
new file mode 100644
index 00000000000..d356d5e89a4
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/avx512bw-vpmovm2w-1.c
@@ -0,0 +1,22 @@
+/* { dg-do compile } */
+/* { dg-options "-mavx512bw -mavx512vl -O2" } */
+/* { dg-final { scan-assembler "vpmovm2w\[ \\t\]+\[^\n\]*%k\[1-7\]\[^\n\]*%xmm\[0-9\]" } } */
+/* { dg-final { scan-assembler "vpmovm2w\[ \\t\]+\[^\n\]*%k\[1-7\]\[^\n\]*%ymm\[0-9\]" } } */
+/* { dg-final { scan-assembler "vpmovm2w\[ \\t\]+\[^\n\]*%k\[1-7\]\[^\n\]*%zmm\[0-9\]" } } */
+
+#include <immintrin.h>
+
+volatile __m512i x512;
+volatile __m256i x256;
+volatile __m128i x128;
+volatile __mmask32 m32;
+volatile __mmask16 m16;
+volatile __mmask8 m8;
+
+void extern
+avx512bw_test (void)
+{
+ x128 = _mm_movm_epi16 (m8);
+ x256 = _mm256_movm_epi16 (m16);
+ x512 = _mm512_movm_epi16 (m32);
+}
diff --git a/gcc/testsuite/gcc.target/i386/avx512bw-vpmovm2w-2.c b/gcc/testsuite/gcc.target/i386/avx512bw-vpmovm2w-2.c
new file mode 100644
index 00000000000..24b0737bf3d
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/avx512bw-vpmovm2w-2.c
@@ -0,0 +1,32 @@
+/* { dg-do run } */
+/* { dg-options "-O2 -mavx512bw -DAVX512BW" } */
+/* { dg-require-effective-target avx512bw } */
+
+#include "avx512f-helper.h"
+
+#define SIZE (AVX512F_LEN / 16)
+#include "avx512f-mask-type.h"
+
+CALC (short *r, MASK_TYPE s)
+{
+ int i;
+ short all_ones = 0xffff;
+
+ for (i = 0; i < SIZE; i++)
+ r[i] = ((s >> i) & 1) ? all_ones : 0;
+}
+
+void
+TEST (void)
+{
+ int i;
+ UNION_TYPE (AVX512F_LEN, i_w) res, res_ref;
+ MASK_TYPE src = (MASK_TYPE) 0x1111abc2;
+
+ res.x = INTRINSIC (_movm_epi16) (src);
+
+ CALC (res_ref.a, src);
+
+ if (UNION_CHECK (AVX512F_LEN, i_w) (res, res_ref.a))
+ abort ();
+}
diff --git a/gcc/testsuite/gcc.target/i386/avx512bw-vpmovswb-1.c b/gcc/testsuite/gcc.target/i386/avx512bw-vpmovswb-1.c
new file mode 100644
index 00000000000..ea02205b069
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/avx512bw-vpmovswb-1.c
@@ -0,0 +1,34 @@
+/* { dg-do compile } */
+/* { dg-options "-mavx512bw -mavx512vl -O2" } */
+/* { dg-final { scan-assembler-times "vpmovswb\[ \\t\]+\[^\n\]*%xmm\[0-9\]\[^\n\]*%xmm\[0-9\]\[\\n\]" 1 } } */
+/* { dg-final { scan-assembler-times "vpmovswb\[ \\t\]+\[^\n\]*%xmm\[0-9\]\[^\n\]*%xmm\[0-9\]\{%k\[1-7\]\}\[^\{\]" 1 } } */
+/* { dg-final { scan-assembler-times "vpmovswb\[ \\t\]+\[^\n\]*%xmm\[0-9\]\[^\n\]*%xmm\[0-9\]\{%k\[1-7\]\}{z}" 1 } } */
+/* { dg-final { scan-assembler-times "vpmovswb\[ \\t\]+\[^\n\]*%ymm\[0-9\]\[^\n\]*%xmm\[0-9\]\[\\n\]" 1 } } */
+/* { dg-final { scan-assembler-times "vpmovswb\[ \\t\]+\[^\n\]*%ymm\[0-9\]\[^\n\]*%xmm\[0-9\]\{%k\[1-7\]\}\[^\{\]" 1 } } */
+/* { dg-final { scan-assembler-times "vpmovswb\[ \\t\]+\[^\n\]*%ymm\[0-9\]\[^\n\]*%xmm\[0-9\]\{%k\[1-7\]\}{z}" 1 } } */
+/* { dg-final { scan-assembler-times "vpmovswb\[ \\t\]+\[^\n\]*%zmm\[0-9\]\[^\n\]*%ymm\[0-9\]\[\\n\]" 1 } } */
+/* { dg-final { scan-assembler-times "vpmovswb\[ \\t\]+\[^\n\]*%zmm\[0-9\]\[^\n\]*%ymm\[0-9\]\{%k\[1-7\]\}\[^\{\]" 1 } } */
+/* { dg-final { scan-assembler-times "vpmovswb\[ \\t\]+\[^\n\]*%zmm\[0-9\]\[^\n\]*%ymm\[0-9\]\{%k\[1-7\]\}{z}" 1 } } */
+
+#include <immintrin.h>
+
+volatile __m128i x, z;
+volatile __m256i y;
+volatile __m512i u;
+volatile __mmask8 m1;
+volatile __mmask16 m2;
+volatile __mmask32 m3;
+
+void extern
+avx512bw_test (void)
+{
+ z = _mm_cvtsepi16_epi8 (x);
+ z = _mm_mask_cvtsepi16_epi8 (z, m1, x);
+ z = _mm_maskz_cvtsepi16_epi8 (m1, x);
+ z = _mm256_cvtsepi16_epi8 (y);
+ z = _mm256_mask_cvtsepi16_epi8 (z, m2, y);
+ z = _mm256_maskz_cvtsepi16_epi8 (m2, y);
+ y = _mm512_cvtsepi16_epi8 (u);
+ y = _mm512_mask_cvtsepi16_epi8 (y, m3, u);
+ y = _mm512_maskz_cvtsepi16_epi8 (m3, u);
+}
diff --git a/gcc/testsuite/gcc.target/i386/avx512bw-vpmovswb-2.c b/gcc/testsuite/gcc.target/i386/avx512bw-vpmovswb-2.c
new file mode 100644
index 00000000000..88d1ee101dc
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/avx512bw-vpmovswb-2.c
@@ -0,0 +1,60 @@
+/* { dg-do run } */
+/* { dg-options "-O2 -mavx512bw -DAVX512BW" } */
+/* { dg-require-effective-target avx512bw } */
+
+#include "avx512f-helper.h"
+
+#define SIZE (AVX512F_LEN / 16)
+#define SIZE_HALF (AVX512F_LEN_HALF / 8)
+#include "avx512f-mask-type.h"
+#include <limits.h>
+
+CALC (char *r, short *s)
+{
+ int i;
+ for (i = 0; i < SIZE_HALF; i++)
+ {
+ if (s[i] < CHAR_MIN)
+ r[i] = CHAR_MIN;
+ else if (s[i] > CHAR_MAX)
+ r[i] = CHAR_MAX;
+ else
+ r[i] = s[i];
+ r[i] = (i < SIZE) ? r[i] : 0;
+ }
+}
+
+void
+TEST (void)
+{
+ int i, sign;
+ UNION_TYPE (AVX512F_LEN_HALF, i_b) res1, res2, res3;
+ UNION_TYPE (AVX512F_LEN, i_w) src;
+ MASK_TYPE mask = MASK_VALUE;
+ char res_ref[32];
+
+ sign = -1;
+ for (i = 0; i < SIZE; i++)
+ {
+ src.a[i] = 1 + 34 * i * sign;
+ sign = sign * -1;
+ res2.a[i] = DEFAULT_VALUE;
+ }
+
+ res1.x = INTRINSIC (_cvtsepi16_epi8) (src.x);
+ res2.x = INTRINSIC (_mask_cvtsepi16_epi8) (res2.x, mask, src.x);
+ res3.x = INTRINSIC (_maskz_cvtsepi16_epi8) (mask, src.x);
+
+ CALC (res_ref, src.a);
+
+ if (UNION_CHECK (AVX512F_LEN_HALF, i_b) (res1, res_ref))
+ abort ();
+
+ MASK_MERGE (i_b) (res_ref, mask, SIZE);
+ if (UNION_CHECK (AVX512F_LEN_HALF, i_b) (res2, res_ref))
+ abort ();
+
+ MASK_ZERO (i_b) (res_ref, mask, SIZE);
+ if (UNION_CHECK (AVX512F_LEN_HALF, i_b) (res3, res_ref))
+ abort ();
+}
diff --git a/gcc/testsuite/gcc.target/i386/avx512bw-vpmovsxbw-1.c b/gcc/testsuite/gcc.target/i386/avx512bw-vpmovsxbw-1.c
new file mode 100644
index 00000000000..78be054595e
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/avx512bw-vpmovsxbw-1.c
@@ -0,0 +1,32 @@
+/* { dg-do compile } */
+/* { dg-options "-mavx512bw -mavx512vl -O2" } */
+/* { dg-final { scan-assembler-times "vpmovsxbw\[ \\t\]+\[^\n\]*%ymm\[0-9\]\[^\n\]*%zmm\[0-9\]\[^\{\]" 1 } } */
+/* { dg-final { scan-assembler-times "vpmovsxbw\[ \\t\]+\[^\n\]*%ymm\[0-9\]\[^\n\]*%zmm\[0-9\]\{%k\[1-7\]\}\[^\{\]" 1 } } */
+/* { dg-final { scan-assembler-times "vpmovsxbw\[ \\t\]+\[^\n\]*%xmm\[0-9\]\[^\n\]*%ymm\[0-9\]\{%k\[1-7\]\}\[^\{\]" 1 } } */
+/* { dg-final { scan-assembler-times "vpmovsxbw\[ \\t\]+\[^\n\]*%xmm\[0-9\]\[^\n\]*%xmm\[0-9\]\{%k\[1-7\]\}\[^\{\]" 1 } } */
+/* { dg-final { scan-assembler-times "vpmovsxbw\[ \\t\]+\[^\n\]*%ymm\[0-9\]\[^\n\]*%zmm\[0-9\]\{%k\[1-7\]\}\{z\}" 1 } } */
+/* { dg-final { scan-assembler-times "vpmovsxbw\[ \\t\]+\[^\n\]*%xmm\[0-9\]\[^\n\]*%ymm\[0-9\]\{%k\[1-7\]\}\{z\}" 1 } } */
+/* { dg-final { scan-assembler-times "vpmovsxbw\[ \\t\]+\[^\n\]*%xmm\[0-9\]\[^\n\]*%xmm\[0-9\]\{%k\[1-7\]\}\{z\}" 1 } } */
+
+#include <immintrin.h>
+
+volatile __m512i res1;
+volatile __m256i s1, res2;
+volatile __m128i s2, res3;
+volatile __mmask32 m32;
+volatile __mmask16 m16;
+volatile __mmask8 m8;
+
+void extern
+avx512bw_test (void)
+{
+ res1 = _mm512_cvtepi8_epi16 (s1);
+
+ res1 = _mm512_mask_cvtepi8_epi16 (res1, m32, s1);
+ res2 = _mm256_mask_cvtepi8_epi16 (res2, m16, s2);
+ res3 = _mm_mask_cvtepi8_epi16 (res3, m8, s2);
+
+ res1 = _mm512_maskz_cvtepi8_epi16 (m32, s1);
+ res2 = _mm256_maskz_cvtepi8_epi16 (m16, s2);
+ res3 = _mm_maskz_cvtepi8_epi16 (m8, s2);
+}
diff --git a/gcc/testsuite/gcc.target/i386/avx512bw-vpmovsxbw-2.c b/gcc/testsuite/gcc.target/i386/avx512bw-vpmovsxbw-2.c
new file mode 100644
index 00000000000..4cc44053068
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/avx512bw-vpmovsxbw-2.c
@@ -0,0 +1,56 @@
+/* { dg-do run } */
+/* { dg-options "-O2 -mavx512bw -DAVX512BW" } */
+/* { dg-require-effective-target avx512bw } */
+
+#include "avx512f-helper.h"
+
+#define SRC_SIZE (AVX512F_LEN_HALF / 8)
+#define DST_SIZE (AVX512F_LEN / 16)
+#include "avx512f-mask-type.h"
+
+void
+CALC (char *s, short *r)
+{
+ int i;
+
+ for (i = 0; i < DST_SIZE; i++)
+ {
+ r[i] = (short) s[i];
+ }
+}
+
+void
+TEST (void)
+{
+ UNION_TYPE (AVX512F_LEN_HALF, i_b) s;
+ UNION_TYPE (AVX512F_LEN, i_w) res1, res2, res3;
+ MASK_TYPE mask = MASK_VALUE;
+ short res_ref[DST_SIZE];
+ int i, sign = 1;
+
+ for (i = 0; i < SRC_SIZE; i++)
+ {
+ s.a[i] = 8 * i * sign;
+ sign = -sign;
+ }
+
+ for (i = 0; i < DST_SIZE; i++)
+ res2.a[i] = DEFAULT_VALUE;
+
+ res1.x = INTRINSIC (_cvtepi8_epi16) (s.x);
+ res2.x = INTRINSIC (_mask_cvtepi8_epi16) (res2.x, mask, s.x);
+ res3.x = INTRINSIC (_maskz_cvtepi8_epi16) (mask, s.x);
+
+ CALC (s.a, res_ref);
+
+ if (UNION_CHECK (AVX512F_LEN, i_w) (res1, res_ref))
+ abort ();
+
+ MASK_MERGE (i_w) (res_ref, mask, DST_SIZE);
+ if (UNION_CHECK (AVX512F_LEN, i_w) (res2, res_ref))
+ abort ();
+
+ MASK_ZERO (i_w) (res_ref, mask, DST_SIZE);
+ if (UNION_CHECK (AVX512F_LEN, i_w) (res3, res_ref))
+ abort ();
+}
diff --git a/gcc/testsuite/gcc.target/i386/avx512bw-vpmovuswb-1.c b/gcc/testsuite/gcc.target/i386/avx512bw-vpmovuswb-1.c
new file mode 100644
index 00000000000..03fd07bea63
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/avx512bw-vpmovuswb-1.c
@@ -0,0 +1,34 @@
+/* { dg-do compile } */
+/* { dg-options "-mavx512bw -mavx512vl -O2" } */
+/* { dg-final { scan-assembler-times "vpmovuswb\[ \\t\]+\[^\n\]*%xmm\[0-9\]\[^\n\]*%xmm\[0-9\]\[\\n\]" 1 } } */
+/* { dg-final { scan-assembler-times "vpmovuswb\[ \\t\]+\[^\n\]*%xmm\[0-9\]\[^\n\]*%xmm\[0-9\]\{%k\[1-7\]\}\[^\{\]" 1 } } */
+/* { dg-final { scan-assembler-times "vpmovuswb\[ \\t\]+\[^\n\]*%xmm\[0-9\]\[^\n\]*%xmm\[0-9\]\{%k\[1-7\]\}{z}" 1 } } */
+/* { dg-final { scan-assembler-times "vpmovuswb\[ \\t\]+\[^\n\]*%ymm\[0-9\]\[^\n\]*%xmm\[0-9\]\[\\n\]" 1 } } */
+/* { dg-final { scan-assembler-times "vpmovuswb\[ \\t\]+\[^\n\]*%ymm\[0-9\]\[^\n\]*%xmm\[0-9\]\{%k\[1-7\]\}\[^\{\]" 1 } } */
+/* { dg-final { scan-assembler-times "vpmovuswb\[ \\t\]+\[^\n\]*%ymm\[0-9\]\[^\n\]*%xmm\[0-9\]\{%k\[1-7\]\}{z}" 1 } } */
+/* { dg-final { scan-assembler-times "vpmovuswb\[ \\t\]+\[^\n\]*%zmm\[0-9\]\[^\n\]*%ymm\[0-9\]\[\\n\]" 1 } } */
+/* { dg-final { scan-assembler-times "vpmovuswb\[ \\t\]+\[^\n\]*%zmm\[0-9\]\[^\n\]*%ymm\[0-9\]\{%k\[1-7\]\}\[^\{\]" 1 } } */
+/* { dg-final { scan-assembler-times "vpmovuswb\[ \\t\]+\[^\n\]*%zmm\[0-9\]\[^\n\]*%ymm\[0-9\]\{%k\[1-7\]\}{z}" 1 } } */
+
+#include <immintrin.h>
+
+volatile __m128i x, z;
+volatile __m256i y;
+volatile __m512i u;
+volatile __mmask8 m1;
+volatile __mmask16 m2;
+volatile __mmask32 m3;
+
+void extern
+avx512bw_test (void)
+{
+ z = _mm_cvtusepi16_epi8 (x);
+ z = _mm_mask_cvtusepi16_epi8 (z, m1, x);
+ z = _mm_maskz_cvtusepi16_epi8 (m1, x);
+ z = _mm256_cvtusepi16_epi8 (y);
+ z = _mm256_mask_cvtusepi16_epi8 (z, m2, y);
+ z = _mm256_maskz_cvtusepi16_epi8 (m2, y);
+ y = _mm512_cvtusepi16_epi8 (u);
+ y = _mm512_mask_cvtusepi16_epi8 (y, m3, u);
+ y = _mm512_maskz_cvtusepi16_epi8 (m3, u);
+}
diff --git a/gcc/testsuite/gcc.target/i386/avx512bw-vpmovuswb-2.c b/gcc/testsuite/gcc.target/i386/avx512bw-vpmovuswb-2.c
new file mode 100644
index 00000000000..a25dac3d699
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/avx512bw-vpmovuswb-2.c
@@ -0,0 +1,53 @@
+/* { dg-do run } */
+/* { dg-options "-O2 -mavx512bw -DAVX512BW" } */
+/* { dg-require-effective-target avx512bw } */
+
+#include "avx512f-helper.h"
+
+#define SIZE (AVX512F_LEN / 16)
+#define SIZE_HALF (AVX512F_LEN_HALF / 8)
+#include "avx512f-mask-type.h"
+#include <limits.h>
+
+CALC (unsigned char *r, unsigned short *s)
+{
+ int i;
+ for (i = 0; i < SIZE_HALF; i++)
+ {
+ r[i] = (s[i] > UCHAR_MAX) ? UCHAR_MAX : s[i];
+ r[i] = (i < SIZE) ? r[i] : 0;
+ }
+}
+
+void
+TEST (void)
+{
+ int i;
+ UNION_TYPE (AVX512F_LEN_HALF, i_b) res1, res2, res3;
+ UNION_TYPE (AVX512F_LEN, i_w) src;
+ MASK_TYPE mask = MASK_VALUE;
+ unsigned char res_ref[32];
+
+ for (i = 0; i < SIZE; i++)
+ {
+ src.a[i] = 1 + 34 * i;
+ res2.a[i] = DEFAULT_VALUE;
+ }
+
+ res1.x = INTRINSIC (_cvtusepi16_epi8) (src.x);
+ res2.x = INTRINSIC (_mask_cvtusepi16_epi8) (res2.x, mask, src.x);
+ res3.x = INTRINSIC (_maskz_cvtusepi16_epi8) (mask, src.x);
+
+ CALC (res_ref, src.a);
+
+ if (UNION_CHECK (AVX512F_LEN_HALF, i_b) (res1, res_ref))
+ abort ();
+
+ MASK_MERGE (i_b) (res_ref, mask, SIZE);
+ if (UNION_CHECK (AVX512F_LEN_HALF, i_b) (res2, res_ref))
+ abort ();
+
+ MASK_ZERO (i_b) (res_ref, mask, SIZE);
+ if (UNION_CHECK (AVX512F_LEN_HALF, i_b) (res3, res_ref))
+ abort ();
+}
diff --git a/gcc/testsuite/gcc.target/i386/avx512bw-vpmovw2m-1.c b/gcc/testsuite/gcc.target/i386/avx512bw-vpmovw2m-1.c
new file mode 100644
index 00000000000..31a64ffc0d9
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/avx512bw-vpmovw2m-1.c
@@ -0,0 +1,22 @@
+/* { dg-do compile } */
+/* { dg-options "-mavx512bw -mavx512vl -O2" } */
+/* { dg-final { scan-assembler "vpmovw2m\[ \\t\]+\[^\n\]*%xmm\[0-9\]\[^\n\]*%k\[1-7\]" } } */
+/* { dg-final { scan-assembler "vpmovw2m\[ \\t\]+\[^\n\]*%ymm\[0-9\]\[^\n\]*%k\[1-7\]" } } */
+/* { dg-final { scan-assembler "vpmovw2m\[ \\t\]+\[^\n\]*%zmm\[0-9\]\[^\n\]*%k\[1-7\]" } } */
+
+#include <immintrin.h>
+
+volatile __m512i x512;
+volatile __m256i x256;
+volatile __m128i x128;
+volatile __mmask8 m8;
+volatile __mmask16 m16;
+volatile __mmask32 m32;
+
+void extern
+avx512bw_test (void)
+{
+ m8 = _mm_movepi16_mask (x128);
+ m16 = _mm256_movepi16_mask (x256);
+ m32 = _mm512_movepi16_mask (x512);
+}
diff --git a/gcc/testsuite/gcc.target/i386/avx512bw-vpmovw2m-2.c b/gcc/testsuite/gcc.target/i386/avx512bw-vpmovw2m-2.c
new file mode 100644
index 00000000000..1c8ae02beb2
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/avx512bw-vpmovw2m-2.c
@@ -0,0 +1,43 @@
+/* { dg-do run } */
+/* { dg-options "-O2 -mavx512bw -DAVX512BW" } */
+/* { dg-require-effective-target avx512bw } */
+
+#include "avx512f-helper.h"
+
+#define SIZE (AVX512F_LEN / 16)
+#include "avx512f-mask-type.h"
+
+CALC (MASK_TYPE *r, short *s1)
+{
+ int i;
+ MASK_TYPE res = 0;
+ MASK_TYPE one = 1;
+
+ for (i = 0; i < SIZE; i++)
+ if (s1[i] >> 15)
+ res = res | (one << i);
+
+ *r = res;
+}
+
+void
+TEST (void)
+{
+ int i, sign;
+ UNION_TYPE (AVX512F_LEN, i_w) src;
+ MASK_TYPE res, res_ref = 0;
+
+ sign = -1;
+ for (i = 0; i < SIZE; i++)
+ {
+ src.a[i] = 2 * i * sign;
+ sign = sign * -1;
+ }
+
+ res = INTRINSIC (_movepi16_mask) (src.x);
+
+ CALC (&res_ref, src.a);
+
+ if (res_ref != res)
+ abort ();
+}
diff --git a/gcc/testsuite/gcc.target/i386/avx512bw-vpmovwb-1.c b/gcc/testsuite/gcc.target/i386/avx512bw-vpmovwb-1.c
new file mode 100644
index 00000000000..115a54747f4
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/avx512bw-vpmovwb-1.c
@@ -0,0 +1,34 @@
+/* { dg-do compile } */
+/* { dg-options "-mavx512bw -mavx512vl -O2" } */
+/* { dg-final { scan-assembler-times "vpmovwb\[ \\t\]+\[^\n\]*%xmm\[0-9\]\[^\n\]*%xmm\[0-9\]\[\\n\]" 1 } } */
+/* { dg-final { scan-assembler-times "vpmovwb\[ \\t\]+\[^\n\]*%xmm\[0-9\]\[^\n\]*%xmm\[0-9\]\{%k\[1-7\]\}\[^\{\]" 1 } } */
+/* { dg-final { scan-assembler-times "vpmovwb\[ \\t\]+\[^\n\]*%xmm\[0-9\]\[^\n\]*%xmm\[0-9\]\{%k\[1-7\]\}{z}" 1 } } */
+/* { dg-final { scan-assembler-times "vpmovwb\[ \\t\]+\[^\n\]*%ymm\[0-9\]\[^\n\]*%xmm\[0-9\]\[\\n\]" 1 } } */
+/* { dg-final { scan-assembler-times "vpmovwb\[ \\t\]+\[^\n\]*%ymm\[0-9\]\[^\n\]*%xmm\[0-9\]\{%k\[1-7\]\}\[^\{\]" 1 } } */
+/* { dg-final { scan-assembler-times "vpmovwb\[ \\t\]+\[^\n\]*%ymm\[0-9\]\[^\n\]*%xmm\[0-9\]\{%k\[1-7\]\}{z}" 1 } } */
+/* { dg-final { scan-assembler-times "vpmovwb\[ \\t\]+\[^\n\]*%zmm\[0-9\]\[^\n\]*%ymm\[0-9\]\[\\n\]" 1 } } */
+/* { dg-final { scan-assembler-times "vpmovwb\[ \\t\]+\[^\n\]*%zmm\[0-9\]\[^\n\]*%ymm\[0-9\]\{%k\[1-7\]\}\[^\{\]" 1 } } */
+/* { dg-final { scan-assembler-times "vpmovwb\[ \\t\]+\[^\n\]*%zmm\[0-9\]\[^\n\]*%ymm\[0-9\]\{%k\[1-7\]\}{z}" 1 } } */
+
+#include <immintrin.h>
+
+volatile __m128i x, z;
+volatile __m256i y;
+volatile __m512i u;
+volatile __mmask8 m1;
+volatile __mmask16 m2;
+volatile __mmask32 m3;
+
+void extern
+avx512bw_test (void)
+{
+ z = _mm_cvtepi16_epi8 (x);
+ z = _mm_mask_cvtepi16_epi8 (z, m1, x);
+ z = _mm_maskz_cvtepi16_epi8 (m1, x);
+ z = _mm256_cvtepi16_epi8 (y);
+ z = _mm256_mask_cvtepi16_epi8 (z, m2, y);
+ z = _mm256_maskz_cvtepi16_epi8 (m2, y);
+ y = _mm512_cvtepi16_epi8 (u);
+ y = _mm512_mask_cvtepi16_epi8 (y, m3, u);
+ y = _mm512_maskz_cvtepi16_epi8 (m3, u);
+}
diff --git a/gcc/testsuite/gcc.target/i386/avx512bw-vpmovwb-2.c b/gcc/testsuite/gcc.target/i386/avx512bw-vpmovwb-2.c
new file mode 100644
index 00000000000..e923d6f5b9a
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/avx512bw-vpmovwb-2.c
@@ -0,0 +1,53 @@
+/* { dg-do run } */
+/* { dg-options "-O2 -mavx512bw -DAVX512BW" } */
+/* { dg-require-effective-target avx512bw } */
+
+#include "avx512f-helper.h"
+
+#define SIZE (AVX512F_LEN / 16)
+#define SIZE_HALF (AVX512F_LEN_HALF / 8)
+#include "avx512f-mask-type.h"
+
+CALC (char *r, short *s)
+{
+ int i;
+ for (i = 0; i < SIZE_HALF; i++)
+ {
+ r[i] = (i < SIZE) ? (char) s[i] : 0;
+ }
+}
+
+void
+TEST (void)
+{
+ int i, sign;
+ UNION_TYPE (AVX512F_LEN_HALF, i_b) res1, res2, res3;
+ UNION_TYPE (AVX512F_LEN, i_w) src;
+ MASK_TYPE mask = MASK_VALUE;
+ char res_ref[32];
+
+ sign = -1;
+ for (i = 0; i < SIZE; i++)
+ {
+ src.a[i] = 1 + 34 * i * sign;
+ sign = sign * -1;
+ res2.a[i] = DEFAULT_VALUE;
+ }
+
+ res1.x = INTRINSIC (_cvtepi16_epi8) (src.x);
+ res2.x = INTRINSIC (_mask_cvtepi16_epi8) (res2.x, mask, src.x);
+ res3.x = INTRINSIC (_maskz_cvtepi16_epi8) (mask, src.x);
+
+ CALC (res_ref, src.a);
+
+ if (UNION_CHECK (AVX512F_LEN_HALF, i_b) (res1, res_ref))
+ abort ();
+
+ MASK_MERGE (i_b) (res_ref, mask, SIZE);
+ if (UNION_CHECK (AVX512F_LEN_HALF, i_b) (res2, res_ref))
+ abort ();
+
+ MASK_ZERO (i_b) (res_ref, mask, SIZE);
+ if (UNION_CHECK (AVX512F_LEN_HALF, i_b) (res3, res_ref))
+ abort ();
+}
diff --git a/gcc/testsuite/gcc.target/i386/avx512bw-vpmovzxbw-1.c b/gcc/testsuite/gcc.target/i386/avx512bw-vpmovzxbw-1.c
new file mode 100644
index 00000000000..691b9b70e2c
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/avx512bw-vpmovzxbw-1.c
@@ -0,0 +1,32 @@
+/* { dg-do compile } */
+/* { dg-options "-mavx512bw -mavx512vl -O2" } */
+/* { dg-final { scan-assembler-times "vpmovzxbw\[ \\t\]+\[^\n\]*%ymm\[0-9\]\[^\n\]*%zmm\[0-9\]\[^\{\]" 1 } } */
+/* { dg-final { scan-assembler-times "vpmovzxbw\[ \\t\]+\[^\n\]*%ymm\[0-9\]\[^\n\]*%zmm\[0-9\]\{%k\[1-7\]\}\[^\{\]" 1 } } */
+/* { dg-final { scan-assembler-times "vpmovzxbw\[ \\t\]+\[^\n\]*%xmm\[0-9\]\[^\n\]*%ymm\[0-9\]\{%k\[1-7\]\}\[^\{\]" 1 } } */
+/* { dg-final { scan-assembler-times "vpmovzxbw\[ \\t\]+\[^\n\]*%xmm\[0-9\]\[^\n\]*%xmm\[0-9\]\{%k\[1-7\]\}\[^\{\]" 1 } } */
+/* { dg-final { scan-assembler-times "vpmovzxbw\[ \\t\]+\[^\n\]*%ymm\[0-9\]\[^\n\]*%zmm\[0-9\]\{%k\[1-7\]\}\{z\}" 1 } } */
+/* { dg-final { scan-assembler-times "vpmovzxbw\[ \\t\]+\[^\n\]*%xmm\[0-9\]\[^\n\]*%ymm\[0-9\]\{%k\[1-7\]\}\{z\}" 1 } } */
+/* { dg-final { scan-assembler-times "vpmovzxbw\[ \\t\]+\[^\n\]*%xmm\[0-9\]\[^\n\]*%xmm\[0-9\]\{%k\[1-7\]\}\{z\}" 1 } } */
+
+#include <immintrin.h>
+
+volatile __m512i res1;
+volatile __m256i s1, res2;
+volatile __m128i s2, res3;
+volatile __mmask32 m32;
+volatile __mmask16 m16;
+volatile __mmask8 m8;
+
+void extern
+avx512bw_test (void)
+{
+ res1 = _mm512_cvtepu8_epi16 (s1);
+
+ res1 = _mm512_mask_cvtepu8_epi16 (res1, m32, s1);
+ res2 = _mm256_mask_cvtepu8_epi16 (res2, m16, s2);
+ res3 = _mm_mask_cvtepu8_epi16 (res3, m8, s2);
+
+ res1 = _mm512_maskz_cvtepu8_epi16 (m32, s1);
+ res2 = _mm256_maskz_cvtepu8_epi16 (m16, s2);
+ res3 = _mm_maskz_cvtepu8_epi16 (m8, s2);
+}
diff --git a/gcc/testsuite/gcc.target/i386/avx512bw-vpmovzxbw-2.c b/gcc/testsuite/gcc.target/i386/avx512bw-vpmovzxbw-2.c
new file mode 100644
index 00000000000..7048147e743
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/avx512bw-vpmovzxbw-2.c
@@ -0,0 +1,55 @@
+/* { dg-do run } */
+/* { dg-options "-O2 -mavx512bw -DAVX512BW" } */
+/* { dg-require-effective-target avx512bw } */
+
+#include "avx512f-helper.h"
+
+#define SRC_SIZE (AVX512F_LEN_HALF / 8)
+#define DST_SIZE (AVX512F_LEN / 16)
+#include "avx512f-mask-type.h"
+
+void
+CALC (unsigned char *s, short *r)
+{
+ int i;
+
+ for (i = 0; i < DST_SIZE; i++)
+ {
+ r[i] = s[i];
+ }
+}
+
+void
+TEST (void)
+{
+ UNION_TYPE (AVX512F_LEN_HALF, i_b) s;
+ UNION_TYPE (AVX512F_LEN, i_w) res1, res2, res3;
+ MASK_TYPE mask = MASK_VALUE;
+ short res_ref[DST_SIZE];
+ int i;
+
+ for (i = 0; i < SRC_SIZE; i++)
+ {
+ s.a[i] = 16 * i;
+ }
+
+ for (i = 0; i < DST_SIZE; i++)
+ res2.a[i] = DEFAULT_VALUE;
+
+ res1.x = INTRINSIC (_cvtepu8_epi16) (s.x);
+ res2.x = INTRINSIC (_mask_cvtepu8_epi16) (res2.x, mask, s.x);
+ res3.x = INTRINSIC (_maskz_cvtepu8_epi16) (mask, s.x);
+
+ CALC (s.a, res_ref);
+
+ if (UNION_CHECK (AVX512F_LEN, i_w) (res1, res_ref))
+ abort ();
+
+ MASK_MERGE (i_w) (res_ref, mask, DST_SIZE);
+ if (UNION_CHECK (AVX512F_LEN, i_w) (res2, res_ref))
+ abort ();
+
+ MASK_ZERO (i_w) (res_ref, mask, DST_SIZE);
+ if (UNION_CHECK (AVX512F_LEN, i_w) (res3, res_ref))
+ abort ();
+}
diff --git a/gcc/testsuite/gcc.target/i386/avx512bw-vpmulhrsw-1.c b/gcc/testsuite/gcc.target/i386/avx512bw-vpmulhrsw-1.c
new file mode 100644
index 00000000000..066da24dfae
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/avx512bw-vpmulhrsw-1.c
@@ -0,0 +1,27 @@
+/* { dg-do compile } */
+/* { dg-options "-mavx512bw -mavx512vl -O2" } */
+/* { dg-final { scan-assembler-times "vpmulhrsw\[ \\t\]+\[^\n\]*%zmm\[0-9\]\[^\n\]*%zmm\[0-9\]\[^\n\]*%zmm\[0-9\]" 3 } } */
+/* { dg-final { scan-assembler-times "vpmulhrsw\[ \\t\]+\[^\n\]*%zmm\[0-9\]\[^\n\]*%zmm\[0-9\]\[^\n\]*%zmm\[0-9\]\{%k\[1-7\]\}\[^\{\]" 1 } } */
+/* { dg-final { scan-assembler-times "vpmulhrsw\[ \\t\]+\[^\n\]*%zmm\[0-9\]\[^\n\]*%zmm\[0-9\]\[^\n\]*%zmm\[0-9\]\{%k\[1-7\]\}\{z\}" 1 } } */
+/* { dg-final { scan-assembler-times "vpmulhrsw\[ \\t\]+\[^\n\]*%ymm\[0-9\]\[^\n\]*%ymm\[0-9\]\[^\n\]*%ymm\[0-9\]\{%k\[1-7\]\}\[^\{\]" 1 } } */
+/* { dg-final { scan-assembler-times "vpmulhrsw\[ \\t\]+\[^\n\]*%ymm\[0-9\]\[^\n\]*%ymm\[0-9\]\[^\n\]*%ymm\[0-9\]\{%k\[1-7\]\}\{z\}" 1 } } */
+/* { dg-final { scan-assembler-times "vpmulhrsw\[ \\t\]+\[^\n\]*%xmm\[0-9\]\[^\n\]*%xmm\[0-9\]\[^\n\]*%xmm\[0-9\]\{%k\[1-7\]\}\[^\{\]" 1 } } */
+/* { dg-final { scan-assembler-times "vpmulhrsw\[ \\t\]+\[^\n\]*%xmm\[0-9\]\[^\n\]*%xmm\[0-9\]\[^\n\]*%xmm\[0-9\]\{%k\[1-7\]\}\{z\}" 1 } } */
+
+#include <immintrin.h>
+
+volatile __m512i x, y, z;
+volatile __m256i xq, yq, zq;
+volatile __m128i xw, yw, zw;
+
+void extern
+avx512bw_test (void)
+{
+ x = _mm512_mulhrs_epi16 (y, z);
+ x = _mm512_mask_mulhrs_epi16 (x, 2, y, z);
+ x = _mm512_maskz_mulhrs_epi16 (2, y, z);
+ xq = _mm256_mask_mulhrs_epi16 (xq, 2, yq, zq);
+ xq = _mm256_maskz_mulhrs_epi16 (2, yq, zq);
+ xw = _mm_mask_mulhrs_epi16 (xw, 2, yw, zw);
+ xw = _mm_maskz_mulhrs_epi16 (2, yw, zw);
+}
diff --git a/gcc/testsuite/gcc.target/i386/avx512bw-vpmulhrsw-2.c b/gcc/testsuite/gcc.target/i386/avx512bw-vpmulhrsw-2.c
new file mode 100644
index 00000000000..4ae2f36f73b
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/avx512bw-vpmulhrsw-2.c
@@ -0,0 +1,55 @@
+/* { dg-do run } */
+/* { dg-options "-O2 -mavx512bw -DAVX512BW" } */
+/* { dg-require-effective-target avx512bw } */
+
+#include "avx512f-helper.h"
+
+#define SIZE (AVX512F_LEN / 16)
+#include "avx512f-mask-type.h"
+
+void
+CALC (short *src1, short *src2, short *dst)
+{
+ int i, t0;
+
+ for (i = 0; i < SIZE; i++)
+ {
+ t0 = (((int) src1[i] * (int) src2[i]) >> 14) + 1;
+ dst[i] = (short) (t0 >> 1);
+ }
+}
+
+void
+TEST (void)
+{
+ UNION_TYPE (AVX512F_LEN, i_w) src1, src2, dst1, dst2, dst3;
+ short dst_ref[SIZE];
+ MASK_TYPE mask = MASK_VALUE;
+ int i, sign = -1;
+
+ for (i = 0; i < SIZE; i++)
+ {
+ src1.a[i] = i % 2;
+ src2.a[i] = i * sign;
+ dst2.a[i] = DEFAULT_VALUE;
+ sign = -sign;
+ }
+
+ dst1.x = INTRINSIC (_mulhrs_epi16) (src1.x, src2.x);
+ dst2.x =
+ INTRINSIC (_mask_mulhrs_epi16) (dst2.x, mask, src1.x, src2.x);
+ dst3.x = INTRINSIC (_maskz_mulhrs_epi16) (mask, src1.x, src2.x);
+
+ CALC (src1.a, src2.a, dst_ref);
+
+ if (UNION_CHECK (AVX512F_LEN, i_w) (dst1, dst_ref))
+ abort ();
+
+ MASK_MERGE (i_w) (dst_ref, mask, SIZE);
+ if (UNION_CHECK (AVX512F_LEN, i_w) (dst2, dst_ref))
+ abort ();
+
+ MASK_ZERO (i_w) (dst_ref, mask, SIZE);
+ if (UNION_CHECK (AVX512F_LEN, i_w) (dst3, dst_ref))
+ abort ();
+}
diff --git a/gcc/testsuite/gcc.target/i386/avx512bw-vpmulhuw-1.c b/gcc/testsuite/gcc.target/i386/avx512bw-vpmulhuw-1.c
new file mode 100644
index 00000000000..4ffda936549
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/avx512bw-vpmulhuw-1.c
@@ -0,0 +1,27 @@
+/* { dg-do compile } */
+/* { dg-options "-mavx512bw -mavx512vl -O2" } */
+/* { dg-final { scan-assembler-times "vpmulhuw\[ \\t\]+\[^\n\]*%zmm\[0-9\]\[^\n\]*%zmm\[0-9\]\[^\n\]*%zmm\[0-9\]" 3 } } */
+/* { dg-final { scan-assembler-times "vpmulhuw\[ \\t\]+\[^\n\]*%zmm\[0-9\]\[^\n\]*%zmm\[0-9\]\[^\n\]*%zmm\[0-9\]\{%k\[1-7\]\}\[^\{\]" 1 } } */
+/* { dg-final { scan-assembler-times "vpmulhuw\[ \\t\]+\[^\n\]*%zmm\[0-9\]\[^\n\]*%zmm\[0-9\]\[^\n\]*%zmm\[0-9\]\{%k\[1-7\]\}\{z\}" 1 } } */
+/* { dg-final { scan-assembler-times "vpmulhuw\[ \\t\]+\[^\n\]*%ymm\[0-9\]\[^\n\]*%ymm\[0-9\]\[^\n\]*%ymm\[0-9\]\{%k\[1-7\]\}\[^\{\]" 1 } } */
+/* { dg-final { scan-assembler-times "vpmulhuw\[ \\t\]+\[^\n\]*%ymm\[0-9\]\[^\n\]*%ymm\[0-9\]\[^\n\]*%ymm\[0-9\]\{%k\[1-7\]\}\{z\}" 1 } } */
+/* { dg-final { scan-assembler-times "vpmulhuw\[ \\t\]+\[^\n\]*%xmm\[0-9\]\[^\n\]*%xmm\[0-9\]\[^\n\]*%xmm\[0-9\]\{%k\[1-7\]\}\[^\{\]" 1 } } */
+/* { dg-final { scan-assembler-times "vpmulhuw\[ \\t\]+\[^\n\]*%xmm\[0-9\]\[^\n\]*%xmm\[0-9\]\[^\n\]*%xmm\[0-9\]\{%k\[1-7\]\}\{z\}" 1 } } */
+
+#include <immintrin.h>
+
+volatile __m512i x, y, z;
+volatile __m256i xq, yq, zq;
+volatile __m128i xw, yw, zw;
+
+void extern
+avx512bw_test (void)
+{
+ x = _mm512_mulhi_epu16 (y, z);
+ x = _mm512_mask_mulhi_epu16 (x, 2, y, z);
+ x = _mm512_maskz_mulhi_epu16 (2, y, z);
+ xq = _mm256_mask_mulhi_epu16 (xq, 2, yq, zq);
+ xq = _mm256_maskz_mulhi_epu16 (2, yq, zq);
+ xw = _mm_mask_mulhi_epu16 (xw, 2, yw, zw);
+ xw = _mm_maskz_mulhi_epu16 (2, yw, zw);
+}
diff --git a/gcc/testsuite/gcc.target/i386/avx512bw-vpmulhuw-2.c b/gcc/testsuite/gcc.target/i386/avx512bw-vpmulhuw-2.c
new file mode 100644
index 00000000000..512940a2765
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/avx512bw-vpmulhuw-2.c
@@ -0,0 +1,52 @@
+/* { dg-do run } */
+/* { dg-options "-O2 -mavx512bw -DAVX512BW" } */
+/* { dg-require-effective-target avx512bw } */
+
+#include "avx512f-helper.h"
+
+#define SIZE (AVX512F_LEN / 16)
+#include "avx512f-mask-type.h"
+
+void
+CALC (short *src1, short *src2, short *dst)
+{
+ int i;
+
+ for (i = 0; i < SIZE; i++)
+ dst[i] = (src1[i] * src2[i]) >> 16;
+}
+
+void
+TEST (void)
+{
+ UNION_TYPE (AVX512F_LEN, i_w) src1, src2, dst1, dst2, dst3;
+ short dst_ref[SIZE];
+ MASK_TYPE mask = MASK_VALUE;
+ int i, sign = -1;
+
+ for (i = 0; i < SIZE; i++)
+ {
+ src1.a[i] = i % 2;
+ src2.a[i] = i * sign;
+ dst2.a[i] = DEFAULT_VALUE;
+ sign = -sign;
+ }
+
+ dst1.x = INTRINSIC (_mulhi_epu16) (src1.x, src2.x);
+ dst2.x =
+ INTRINSIC (_mask_mulhi_epu16) (dst2.x, mask, src1.x, src2.x);
+ dst3.x = INTRINSIC (_maskz_mulhi_epu16) (mask, src1.x, src2.x);
+
+ CALC (src1.a, src2.a, dst_ref);
+
+ if (UNION_CHECK (AVX512F_LEN, i_w) (dst1, dst_ref))
+ abort ();
+
+ MASK_MERGE (i_w) (dst_ref, mask, SIZE);
+ if (UNION_CHECK (AVX512F_LEN, i_w) (dst2, dst_ref))
+ abort ();
+
+ MASK_ZERO (i_w) (dst_ref, mask, SIZE);
+ if (UNION_CHECK (AVX512F_LEN, i_w) (dst3, dst_ref))
+ abort ();
+}
diff --git a/gcc/testsuite/gcc.target/i386/avx512bw-vpmulhw-1.c b/gcc/testsuite/gcc.target/i386/avx512bw-vpmulhw-1.c
new file mode 100644
index 00000000000..70c2f561bda
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/avx512bw-vpmulhw-1.c
@@ -0,0 +1,27 @@
+/* { dg-do compile } */
+/* { dg-options "-mavx512bw -mavx512vl -O2" } */
+/* { dg-final { scan-assembler-times "vpmulhw\[ \\t\]+\[^\n\]*%zmm\[0-9\]\[^\n\]*%zmm\[0-9\]\[^\n\]*%zmm\[0-9\]" 3 } } */
+/* { dg-final { scan-assembler-times "vpmulhw\[ \\t\]+\[^\n\]*%zmm\[0-9\]\[^\n\]*%zmm\[0-9\]\[^\n\]*%zmm\[0-9\]\{%k\[1-7\]\}\{z\}" 1 } } */
+/* { dg-final { scan-assembler-times "vpmulhw\[ \\t\]+\[^\n\]*%zmm\[0-9\]\[^\n\]*%zmm\[0-9\]\[^\n\]*%zmm\[0-9\]\{%k\[1-7\]\}\[^\{\]" 1 } } */
+/* { dg-final { scan-assembler-times "vpmulhw\[ \\t\]+\[^\n\]*%xmm\[0-9\]\[^\n\]*%xmm\[0-9\]\[^\n\]*%xmm\[0-9\]\{%k\[1-7\]\}\{z\}" 1 } } */
+/* { dg-final { scan-assembler-times "vpmulhw\[ \\t\]+\[^\n\]*%xmm\[0-9\]\[^\n\]*%xmm\[0-9\]\[^\n\]*%xmm\[0-9\]\{%k\[1-7\]\}\{z\}" 1 } } */
+/* { dg-final { scan-assembler-times "vpmulhw\[ \\t\]+\[^\n\]*%ymm\[0-9\]\[^\n\]*%ymm\[0-9\]\[^\n\]*%ymm\[0-9\]\{%k\[1-7\]\}\[^\{\]" 1 } } */
+/* { dg-final { scan-assembler-times "vpmulhw\[ \\t\]+\[^\n\]*%ymm\[0-9\]\[^\n\]*%ymm\[0-9\]\[^\n\]*%ymm\[0-9\]\{%k\[1-7\]\}\[^\{\]" 1 } } */
+
+#include <immintrin.h>
+
+volatile __m512i x, y, z;
+volatile __m256i xq, yq, zq;
+volatile __m128i xw, yw, zw;
+
+void extern
+avx512bw_test (void)
+{
+ x = _mm512_mulhi_epi16 (y, z);
+ x = _mm512_mask_mulhi_epi16 (x, 2, y, z);
+ x = _mm512_maskz_mulhi_epi16 (2, y, z);
+ xq = _mm256_mask_mulhi_epi16 (xq, 2, yq, zq);
+ xq = _mm256_maskz_mulhi_epi16 (2, yq, zq);
+ xw = _mm_mask_mulhi_epi16 (xw, 2, yw, zw);
+ xw = _mm_maskz_mulhi_epi16 (2, yw, zw);
+}
diff --git a/gcc/testsuite/gcc.target/i386/avx512bw-vpmulhw-2.c b/gcc/testsuite/gcc.target/i386/avx512bw-vpmulhw-2.c
new file mode 100644
index 00000000000..d87932d304e
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/avx512bw-vpmulhw-2.c
@@ -0,0 +1,52 @@
+/* { dg-do run } */
+/* { dg-options "-O2 -mavx512bw -DAVX512BW" } */
+/* { dg-require-effective-target avx512bw } */
+
+#include "avx512f-helper.h"
+
+#define SIZE (AVX512F_LEN / 16)
+#include "avx512f-mask-type.h"
+
+void
+CALC (short *src1, short *src2, short *dst)
+{
+ int i;
+
+ for (i = 0; i < SIZE; i++)
+ dst[i] = (src1[i] * src2[i]) >> 16;
+}
+
+void
+TEST (void)
+{
+ UNION_TYPE (AVX512F_LEN, i_w) src1, src2, dst1, dst2, dst3;
+ short dst_ref[SIZE];
+ MASK_TYPE mask = MASK_VALUE;
+ int i, sign = -1;
+
+ for (i = 0; i < SIZE; i++)
+ {
+ src1.a[i] = i % 2;
+ src2.a[i] = i * sign;
+ dst2.a[i] = DEFAULT_VALUE;
+ sign = -sign;
+ }
+
+ dst1.x = INTRINSIC (_mulhi_epi16) (src1.x, src2.x);
+ dst2.x =
+ INTRINSIC (_mask_mulhi_epi16) (dst2.x, mask, src1.x, src2.x);
+ dst3.x = INTRINSIC (_maskz_mulhi_epi16) (mask, src1.x, src2.x);
+
+ CALC (src1.a, src2.a, dst_ref);
+
+ if (UNION_CHECK (AVX512F_LEN, i_w) (dst1, dst_ref))
+ abort ();
+
+ MASK_MERGE (i_w) (dst_ref, mask, SIZE);
+ if (UNION_CHECK (AVX512F_LEN, i_w) (dst2, dst_ref))
+ abort ();
+
+ MASK_ZERO (i_w) (dst_ref, mask, SIZE);
+ if (UNION_CHECK (AVX512F_LEN, i_w) (dst3, dst_ref))
+ abort ();
+}
diff --git a/gcc/testsuite/gcc.target/i386/avx512bw-vpmullw-1.c b/gcc/testsuite/gcc.target/i386/avx512bw-vpmullw-1.c
new file mode 100644
index 00000000000..1d27e88a936
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/avx512bw-vpmullw-1.c
@@ -0,0 +1,27 @@
+/* { dg-do compile } */
+/* { dg-options "-mavx512bw -mavx512vl -O2" } */
+/* { dg-final { scan-assembler-times "vpmullw\[ \\t\]+\[^\n\]*%zmm\[0-9\]\[^\n\]*%zmm\[0-9\]\[^\n\]*%zmm\[0-9\]" 3 } } */
+/* { dg-final { scan-assembler-times "vpmullw\[ \\t\]+\[^\n\]*%zmm\[0-9\]\[^\n\]*%zmm\[0-9\]\[^\n\]*%zmm\[0-9\]\{%k\[1-7\]\}\{z\}" 1 } } */
+/* { dg-final { scan-assembler-times "vpmullw\[ \\t\]+\[^\n\]*%zmm\[0-9\]\[^\n\]*%zmm\[0-9\]\[^\n\]*%zmm\[0-9\]\{%k\[1-7\]\}\[^\{\]" 1 } } */
+/* { dg-final { scan-assembler-times "vpmullw\[ \\t\]+\[^\n\]*%ymm\[0-9\]\[^\n\]*%ymm\[0-9\]\[^\n\]*%ymm\[0-9\]\{%k\[1-7\]\}\{z\}" 1 } } */
+/* { dg-final { scan-assembler-times "vpmullw\[ \\t\]+\[^\n\]*%ymm\[0-9\]\[^\n\]*%ymm\[0-9\]\[^\n\]*%ymm\[0-9\]\{%k\[1-7\]\}\[^\{\]" 1 } } */
+/* { dg-final { scan-assembler-times "vpmullw\[ \\t\]+\[^\n\]*%xmm\[0-9\]\[^\n\]*%xmm\[0-9\]\[^\n\]*%xmm\[0-9\]\{%k\[1-7\]\}\{z\}" 1 } } */
+/* { dg-final { scan-assembler-times "vpmullw\[ \\t\]+\[^\n\]*%xmm\[0-9\]\[^\n\]*%xmm\[0-9\]\[^\n\]*%xmm\[0-9\]\{%k\[1-7\]\}\[^\{\]" 1 } } */
+
+#include <immintrin.h>
+
+volatile __m512i x, y, z;
+volatile __m256i xq, yq, zq;
+volatile __m128i xw, yw, zw;
+
+void extern
+avx512bw_test (void)
+{
+ x = _mm512_mullo_epi16 (y, z);
+ x = _mm512_mask_mullo_epi16 (x, 3, y, z);
+ x = _mm512_maskz_mullo_epi16 (3, y, z);
+ xq = _mm256_mask_mullo_epi16 (xq, 3, yq, zq);
+ xq = _mm256_maskz_mullo_epi16 (3, yq, zq);
+ xw = _mm_mask_mullo_epi16 (xw, 3, yw, zw);
+ xw = _mm_maskz_mullo_epi16 (3, yw, zw);
+}
diff --git a/gcc/testsuite/gcc.target/i386/avx512bw-vpmullw-2.c b/gcc/testsuite/gcc.target/i386/avx512bw-vpmullw-2.c
new file mode 100644
index 00000000000..603882330b9
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/avx512bw-vpmullw-2.c
@@ -0,0 +1,51 @@
+/* { dg-do run } */
+/* { dg-options "-O2 -mavx512bw -DAVX512BW" } */
+/* { dg-require-effective-target avx512bw } */
+
+#include "avx512f-helper.h"
+
+#define SIZE (AVX512F_LEN / 16)
+#include "avx512f-mask-type.h"
+
+void
+CALC (short *src1, short *src2, short *dst)
+{
+ int i;
+
+ for (i = 0; i < SIZE; i++)
+ dst[i] = (short) ((int) src1[i] * (int) src2[i]);
+}
+
+void
+TEST (void)
+{
+ UNION_TYPE (AVX512F_LEN, i_w) src1, src2, dst1, dst2, dst3;
+ short dst_ref[SIZE];
+ MASK_TYPE mask = MASK_VALUE;
+ int i, sign = 1;
+
+ for (i = 0; i < SIZE; i++)
+ {
+ src1.a[i] = i * sign;
+ src2.a[i] = i + 20 * sign;
+ sign = -sign;
+ dst2.a[i] = DEFAULT_VALUE;
+ }
+
+ dst1.x = INTRINSIC (_mullo_epi16) (src1.x, src2.x);
+ dst2.x = INTRINSIC (_mask_mullo_epi16) (dst2.x, mask, src1.x, src2.x);
+ dst3.x = INTRINSIC (_maskz_mullo_epi16) (mask, src1.x, src2.x);
+
+ CALC (src1.a, src2.a, dst_ref);
+
+ if (UNION_CHECK (AVX512F_LEN, i_w) (dst1, dst_ref))
+ abort ();
+
+ MASK_MERGE (i_w) (dst_ref, mask, SIZE);
+ if (UNION_CHECK (AVX512F_LEN, i_w) (dst2, dst_ref))
+ abort ();
+
+ MASK_ZERO (i_w) (dst_ref, mask, SIZE);
+ if (UNION_CHECK (AVX512F_LEN, i_w) (dst3, dst_ref))
+ abort ();
+}
diff --git a/gcc/testsuite/gcc.target/i386/avx512bw-vpshufb-1.c b/gcc/testsuite/gcc.target/i386/avx512bw-vpshufb-1.c
new file mode 100644
index 00000000000..200ec8957f5
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/avx512bw-vpshufb-1.c
@@ -0,0 +1,30 @@
+/* { dg-do compile } */
+/* { dg-options "-mavx512bw -mavx512vl -O2" } */
+/* { dg-final { scan-assembler-times "vpshufb\[ \\t\]+\[^\n\]*%zmm\[0-9\]\[\\n\]" 1 } } */
+/* { dg-final { scan-assembler-times "vpshufb\[ \\t\]+\[^\n\]*%zmm\[0-9\]\{%k\[1-7\]\}\{z\}" 1 } } */
+/* { dg-final { scan-assembler-times "vpshufb\[ \\t\]+\[^\n\]*%zmm\[0-9\]\{%k\[1-7\]\}\[^\{\]" 1 } } */
+/* { dg-final { scan-assembler-times "vpshufb\[ \\t\]+\[^\n\]*%ymm\[0-9\]\{%k\[1-7\]\}\{z\}" 1 } } */
+/* { dg-final { scan-assembler-times "vpshufb\[ \\t\]+\[^\n\]*%ymm\[0-9\]\{%k\[1-7\]\}\[^\{\]" 1 } } */
+/* { dg-final { scan-assembler-times "vpshufb\[ \\t\]+\[^\n\]*%xmm\[0-9\]\{%k\[1-7\]\}\{z\}" 1 } } */
+/* { dg-final { scan-assembler-times "vpshufb\[ \\t\]+\[^\n\]*%xmm\[0-9\]\{%k\[1-7\]\}\[^\{\]" 1 } } */
+
+#include <immintrin.h>
+
+volatile __m512i z;
+volatile __m256i y;
+volatile __m128i x;
+volatile __mmask64 m1;
+volatile __mmask32 m2;
+volatile __mmask16 m3;
+
+void extern
+avx512bw_test (void)
+{
+ z = _mm512_shuffle_epi8 (z, z);
+ z = _mm512_mask_shuffle_epi8 (z, m1, z, z);
+ z = _mm512_maskz_shuffle_epi8 (m1, z, z);
+ y = _mm256_mask_shuffle_epi8 (y, m2, y, y);
+ y = _mm256_maskz_shuffle_epi8 (m2, y, y);
+ x = _mm_mask_shuffle_epi8 (x, m3, x, x);
+ x = _mm_maskz_shuffle_epi8 (m3, x, x);
+}
diff --git a/gcc/testsuite/gcc.target/i386/avx512bw-vpshufb-2.c b/gcc/testsuite/gcc.target/i386/avx512bw-vpshufb-2.c
new file mode 100644
index 00000000000..6b43dcfb744
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/avx512bw-vpshufb-2.c
@@ -0,0 +1,58 @@
+/* { dg-do run } */
+/* { dg-options "-O2 -mavx512bw -DAVX512BW" } */
+/* { dg-require-effective-target avx512bw } */
+
+#include "avx512f-helper.h"
+
+#define SIZE (AVX512F_LEN / 8)
+#include "avx512f-mask-type.h"
+
+void
+CALC (char *s1, char *s2, char *r)
+{
+ int i;
+
+ for (i = 0; i < SIZE; i++)
+ {
+ if (s2[i] < 0)
+ r[i] = 0;
+ else
+ r[i] = s1[(s2[i] & 0xf) + 16 * (i / 16)];
+ }
+}
+
+void
+TEST (void)
+{
+ UNION_TYPE (AVX512F_LEN, i_b) s1, s2, res1, res2, res3;
+ char res_ref[SIZE];
+ int i, sign = 1;
+ MASK_TYPE mask = MASK_VALUE;
+
+ for (i = 0; i < SIZE; i++)
+ {
+ s1.a[i] = i * i * sign;
+ s2.a[i] = 179 - i;
+ res1.a[i] = DEFAULT_VALUE;
+ res2.a[i] = DEFAULT_VALUE;
+ res3.a[i] = DEFAULT_VALUE;
+ sign = -sign;
+ }
+
+ res1.x = INTRINSIC (_shuffle_epi8) (s1.x, s2.x);
+ res2.x = INTRINSIC (_mask_shuffle_epi8) (res2.x, mask, s1.x, s2.x);
+ res3.x = INTRINSIC (_maskz_shuffle_epi8) (mask, s1.x, s2.x);
+
+ CALC (s1.a, s2.a, res_ref);
+
+ if (UNION_CHECK (AVX512F_LEN, i_b) (res1, res_ref))
+ abort ();
+
+ MASK_MERGE (i_b) (res_ref, mask, SIZE);
+ if (UNION_CHECK (AVX512F_LEN, i_b) (res2, res_ref))
+ abort ();
+
+ MASK_ZERO (i_b) (res_ref, mask, SIZE);
+ if (UNION_CHECK (AVX512F_LEN, i_b) (res3, res_ref))
+ abort ();
+}
diff --git a/gcc/testsuite/gcc.target/i386/avx512bw-vpshufhw-1.c b/gcc/testsuite/gcc.target/i386/avx512bw-vpshufhw-1.c
new file mode 100644
index 00000000000..66de86451dd
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/avx512bw-vpshufhw-1.c
@@ -0,0 +1,30 @@
+/* { dg-do compile } */
+/* { dg-options "-mavx512bw -mavx512vl -O2" } */
+/* { dg-final { scan-assembler-times "vpshufhw\[ \\t\]+\[^\n\]*%zmm\[0-9\]" 3 } } */
+/* { dg-final { scan-assembler-times "vpshufhw\[ \\t\]+\[^\n\]*%zmm\[0-9\]\{%k\[1-7\]\}\{z\}" 1 } } */
+/* { dg-final { scan-assembler-times "vpshufhw\[ \\t\]+\[^\n\]*%zmm\[0-9\]\{%k\[1-7\]\}\[^\{\]" 1 } } */
+/* { dg-final { scan-assembler-times "vpshufhw\[ \\t\]+\[^\n\]*%ymm\[0-9\]\{%k\[1-7\]\}\{z\}" 1 } } */
+/* { dg-final { scan-assembler-times "vpshufhw\[ \\t\]+\[^\n\]*%ymm\[0-9\]\{%k\[1-7\]\}\[^\{\]" 1 } } */
+/* { dg-final { scan-assembler-times "vpshufhw\[ \\t\]+\[^\n\]*%xmm\[0-9\]\{%k\[1-7\]\}\{z\}" 1 } } */
+/* { dg-final { scan-assembler-times "vpshufhw\[ \\t\]+\[^\n\]*%xmm\[0-9\]\{%k\[1-7\]\}\[^\{\]" 1 } } */
+
+#include <immintrin.h>
+
+volatile __m512i z;
+volatile __m256i y;
+volatile __m128i x;
+volatile __mmask32 m1;
+volatile __mmask16 m2;
+volatile __mmask8 m3;
+
+void extern
+avx512bw_test (void)
+{
+ z = _mm512_shufflehi_epi16 (z, _MM_PERM_AADB);
+ z = _mm512_mask_shufflehi_epi16 (z, m1, z, _MM_PERM_AADB);
+ z = _mm512_maskz_shufflehi_epi16 (m1, z, _MM_PERM_AADB);
+ y = _mm256_mask_shufflehi_epi16 (y, m2, y, _MM_PERM_AADB);
+ y = _mm256_maskz_shufflehi_epi16 (m2, y, _MM_PERM_AADB);
+ x = _mm_mask_shufflehi_epi16 (x, m3, x, _MM_PERM_AADB);
+ x = _mm_maskz_shufflehi_epi16 (m3, x, _MM_PERM_AADB);
+}
diff --git a/gcc/testsuite/gcc.target/i386/avx512bw-vpshufhw-2.c b/gcc/testsuite/gcc.target/i386/avx512bw-vpshufhw-2.c
new file mode 100644
index 00000000000..4043217ba7b
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/avx512bw-vpshufhw-2.c
@@ -0,0 +1,62 @@
+/* { dg-do run } */
+/* { dg-options "-O2 -mavx512bw -DAVX512BW" } */
+/* { dg-require-effective-target avx512bw } */
+
+#include "avx512f-helper.h"
+
+#define SIZE (AVX512F_LEN / 16)
+#include "avx512f-mask-type.h"
+
+void
+CALC (short *s, unsigned char imm, short *r)
+{
+ int i;
+
+ for (i = 0; i < SIZE / 8; i++)
+ {
+ r[8 * i] = s[8 * i];
+ r[8 * i + 1] = s[8 * i + 1];
+ r[8 * i + 2] = s[8 * i + 2];
+ r[8 * i + 3] = s[8 * i + 3];
+ r[8 * i + 4] = s[8 * i + (imm >> 0 & 3) + 4];
+ r[8 * i + 5] = s[8 * i + (imm >> 2 & 3) + 4];
+ r[8 * i + 6] = s[8 * i + (imm >> 4 & 3) + 4];
+ r[8 * i + 7] = s[8 * i + (imm >> 6 & 3) + 4];
+ }
+}
+
+void
+TEST (void)
+{
+ UNION_TYPE (AVX512F_LEN, i_w) s1, res1, res2, res3;
+ short res_ref[SIZE];
+ int i, sign = 1;
+ MASK_TYPE mask = MASK_VALUE;
+
+ for (i = 0; i < SIZE; i++)
+ {
+ s1.a[i] = i * i * sign;
+ res1.a[i] = DEFAULT_VALUE;
+ res2.a[i] = DEFAULT_VALUE;
+ res3.a[i] = DEFAULT_VALUE;
+ sign = -sign;
+ }
+
+ res1.x = INTRINSIC (_shufflehi_epi16) (s1.x, 0xec);
+ res2.x =
+ INTRINSIC (_mask_shufflehi_epi16) (res2.x, mask, s1.x, 0xec);
+ res3.x = INTRINSIC (_maskz_shufflehi_epi16) (mask, s1.x, 0xec);
+
+ CALC (s1.a, 0xec, res_ref);
+
+ if (UNION_CHECK (AVX512F_LEN, i_w) (res1, res_ref))
+ abort ();
+
+ MASK_MERGE (i_w) (res_ref, mask, SIZE);
+ if (UNION_CHECK (AVX512F_LEN, i_w) (res2, res_ref))
+ abort ();
+
+ MASK_ZERO (i_w) (res_ref, mask, SIZE);
+ if (UNION_CHECK (AVX512F_LEN, i_w) (res3, res_ref))
+ abort ();
+}
diff --git a/gcc/testsuite/gcc.target/i386/avx512bw-vpshuflw-1.c b/gcc/testsuite/gcc.target/i386/avx512bw-vpshuflw-1.c
new file mode 100644
index 00000000000..1b18f99c0c0
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/avx512bw-vpshuflw-1.c
@@ -0,0 +1,30 @@
+/* { dg-do compile } */
+/* { dg-options "-mavx512bw -mavx512vl -O2" } */
+/* { dg-final { scan-assembler-times "vpshuflw\[ \\t\]+\[^\n\]*%zmm\[0-9\]" 3 } } */
+/* { dg-final { scan-assembler-times "vpshuflw\[ \\t\]+\[^\n\]*%zmm\[0-9\]\{%k\[1-7\]\}\{z\}" 1 } } */
+/* { dg-final { scan-assembler-times "vpshuflw\[ \\t\]+\[^\n\]*%zmm\[0-9\]\{%k\[1-7\]\}\[^\{\]" 1 } } */
+/* { dg-final { scan-assembler-times "vpshuflw\[ \\t\]+\[^\n\]*%ymm\[0-9\]\{%k\[1-7\]\}\{z\}" 1 } } */
+/* { dg-final { scan-assembler-times "vpshuflw\[ \\t\]+\[^\n\]*%ymm\[0-9\]\{%k\[1-7\]\}\[^\{\]" 1 } } */
+/* { dg-final { scan-assembler-times "vpshuflw\[ \\t\]+\[^\n\]*%xmm\[0-9\]\{%k\[1-7\]\}\{z\}" 1 } } */
+/* { dg-final { scan-assembler-times "vpshuflw\[ \\t\]+\[^\n\]*%xmm\[0-9\]\{%k\[1-7\]\}\[^\{\]" 1 } } */
+
+#include <immintrin.h>
+
+volatile __m512i z;
+volatile __m256i y;
+volatile __m128i x;
+volatile __mmask32 m1;
+volatile __mmask16 m2;
+volatile __mmask8 m3;
+
+void extern
+avx512bw_test (void)
+{
+ z = _mm512_shufflelo_epi16 (z, _MM_PERM_AADB);
+ z = _mm512_mask_shufflelo_epi16 (z, m1, z, _MM_PERM_AADB);
+ z = _mm512_maskz_shufflelo_epi16 (m1, z, _MM_PERM_AADB);
+ y = _mm256_mask_shufflelo_epi16 (y, m2, y, _MM_PERM_AADB);
+ y = _mm256_maskz_shufflelo_epi16 (m2, y, _MM_PERM_AADB);
+ x = _mm_mask_shufflelo_epi16 (x, m3, x, _MM_PERM_AADB);
+ x = _mm_maskz_shufflelo_epi16 (m3, x, _MM_PERM_AADB);
+}
diff --git a/gcc/testsuite/gcc.target/i386/avx512bw-vpshuflw-2.c b/gcc/testsuite/gcc.target/i386/avx512bw-vpshuflw-2.c
new file mode 100644
index 00000000000..72dda61c3c2
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/avx512bw-vpshuflw-2.c
@@ -0,0 +1,62 @@
+/* { dg-do run } */
+/* { dg-options "-O2 -mavx512bw -DAVX512BW" } */
+/* { dg-require-effective-target avx512bw } */
+
+#include "avx512f-helper.h"
+
+#define SIZE (AVX512F_LEN / 16)
+#include "avx512f-mask-type.h"
+
+void
+CALC (short *s, unsigned char imm, short *r)
+{
+ int i;
+
+ for (i = 0; i < SIZE / 8; i++)
+ {
+ r[8 * i] = s[8 * i + (imm >> 0 & 3)];
+ r[8 * i + 1] = s[8 * i + (imm >> 2 & 3)];
+ r[8 * i + 2] = s[8 * i + (imm >> 4 & 3)];
+ r[8 * i + 3] = s[8 * i + (imm >> 6 & 3)];
+ r[8 * i + 4] = s[8 * i + 4];
+ r[8 * i + 5] = s[8 * i + 5];
+ r[8 * i + 6] = s[8 * i + 6];
+ r[8 * i + 7] = s[8 * i + 7];
+ }
+}
+
+void
+TEST (void)
+{
+ UNION_TYPE (AVX512F_LEN, i_w) s1, res1, res2, res3;
+ short res_ref[SIZE];
+ int i, sign = 1;
+ MASK_TYPE mask = MASK_VALUE;
+
+ for (i = 0; i < SIZE; i++)
+ {
+ s1.a[i] = i * i * sign;
+ res1.a[i] = DEFAULT_VALUE;
+ res2.a[i] = DEFAULT_VALUE;
+ res3.a[i] = DEFAULT_VALUE;
+ sign = -sign;
+ }
+
+ res1.x = INTRINSIC (_shufflelo_epi16) (s1.x, 0xec);
+ res2.x =
+ INTRINSIC (_mask_shufflelo_epi16) (res2.x, mask, s1.x, 0xec);
+ res3.x = INTRINSIC (_maskz_shufflelo_epi16) (mask, s1.x, 0xec);
+
+ CALC (s1.a, 0xec, res_ref);
+
+ if (UNION_CHECK (AVX512F_LEN, i_w) (res1, res_ref))
+ abort ();
+
+ MASK_MERGE (i_w) (res_ref, mask, SIZE);
+ if (UNION_CHECK (AVX512F_LEN, i_w) (res2, res_ref))
+ abort ();
+
+ MASK_ZERO (i_w) (res_ref, mask, SIZE);
+ if (UNION_CHECK (AVX512F_LEN, i_w) (res3, res_ref))
+ abort ();
+}
diff --git a/gcc/testsuite/gcc.target/i386/avx512bw-vpslldq-1.c b/gcc/testsuite/gcc.target/i386/avx512bw-vpslldq-1.c
new file mode 100644
index 00000000000..4964c1df895
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/avx512bw-vpslldq-1.c
@@ -0,0 +1,13 @@
+/* { dg-do compile } */
+/* { dg-options "-mavx512bw -O2" } */
+/* { dg-final { scan-assembler-times "vpslldq\[ \\t\]+\[^\n\]*%zmm\[0-9\]\[^\{\]" 1 } } */
+
+#include <immintrin.h>
+
+extern volatile __m512i x;
+
+void extern
+avx512bw_test (void)
+{
+ x = _mm512_bslli_epi128 (x, 13);
+}
diff --git a/gcc/testsuite/gcc.target/i386/avx512bw-vpsllvw-1.c b/gcc/testsuite/gcc.target/i386/avx512bw-vpsllvw-1.c
new file mode 100644
index 00000000000..bdf0da64e50
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/avx512bw-vpsllvw-1.c
@@ -0,0 +1,34 @@
+/* { dg-do compile } */
+/* { dg-options "-mavx512bw -mavx512vl -O2" } */
+/* { dg-final { scan-assembler-times "vpsllvw\[ \\t\]+\[^\n\]*%zmm\[0-9\]\[^\{\]" 3 } } */
+/* { dg-final { scan-assembler-times "vpsllvw\[ \\t\]+\[^\n\]*%zmm\[0-9\]\{%k\[1-7\]\}\[^\{\]" 1 } } */
+/* { dg-final { scan-assembler-times "vpsllvw\[ \\t\]+\[^\n\]*%zmm\[0-9\]\{%k\[1-7\]\}\{z\}" 1 } } */
+/* { dg-final { scan-assembler-times "vpsllvw\[ \\t\]+\[^\n\]*%ymm\[0-9\]\[^\{\]" 3 } } */
+/* { dg-final { scan-assembler-times "vpsllvw\[ \\t\]+\[^\n\]*%ymm\[0-9\]\{%k\[1-7\]\}\[^\{\]" 1 } } */
+/* { dg-final { scan-assembler-times "vpsllvw\[ \\t\]+\[^\n\]*%ymm\[0-9\]\{%k\[1-7\]\}\{z\}" 1 } } */
+/* { dg-final { scan-assembler-times "vpsllvw\[ \\t\]+\[^\n\]*%xmm\[0-9\]\[^\{\]" 3 } } */
+/* { dg-final { scan-assembler-times "vpsllvw\[ \\t\]+\[^\n\]*%xmm\[0-9\]\{%k\[1-7\]\}\[^\{\]" 1 } } */
+/* { dg-final { scan-assembler-times "vpsllvw\[ \\t\]+\[^\n\]*%xmm\[0-9\]\{%k\[1-7\]\}\{z\}" 1 } } */
+
+#include <immintrin.h>
+
+volatile __m512i z;
+volatile __m256i y;
+volatile __m128i x;
+volatile __mmask32 m1;
+volatile __mmask16 m2;
+volatile __mmask8 m3;
+
+void extern
+avx512bw_test (void)
+{
+ z = _mm512_sllv_epi16 (z, z);
+ z = _mm512_mask_sllv_epi16 (z, m1, z, z);
+ z = _mm512_maskz_sllv_epi16 (m1, z, z);
+ y = _mm256_sllv_epi16 (y, y);
+ y = _mm256_mask_sllv_epi16 (y, m2, y, y);
+ y = _mm256_maskz_sllv_epi16 (m2, y, y);
+ x = _mm_sllv_epi16 (x, x);
+ x = _mm_mask_sllv_epi16 (x, m3, x, x);
+ x = _mm_maskz_sllv_epi16 (m3, x, x);
+}
diff --git a/gcc/testsuite/gcc.target/i386/avx512bw-vpsllvw-2.c b/gcc/testsuite/gcc.target/i386/avx512bw-vpsllvw-2.c
new file mode 100644
index 00000000000..e8a5f20b657
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/avx512bw-vpsllvw-2.c
@@ -0,0 +1,53 @@
+/* { dg-do run } */
+/* { dg-options "-O2 -mavx512bw -DAVX512BW" } */
+/* { dg-require-effective-target avx512bw } */
+
+#include "avx512f-helper.h"
+
+#define SIZE (AVX512F_LEN / 16)
+#include "avx512f-mask-type.h"
+
+void
+CALC (short *s1, short *s2, short *r)
+{
+ int i;
+
+ for (i = 0; i < SIZE; ++i)
+ {
+ r[i] = ((unsigned short) s1[i]) << s2[i];
+ }
+}
+
+void
+TEST (void)
+{
+ UNION_TYPE (AVX512F_LEN, i_w) s1, s2, res1, res2, res3;
+ MASK_TYPE mask = MASK_VALUE;
+ short res_ref[SIZE];
+ int i, sign = 1;
+
+ for (i = 0; i < SIZE; i++)
+ {
+ s1.a[i] = i * sign;
+ s2.a[i] = i >> 2;
+ res2.a[i] = DEFAULT_VALUE;
+ sign = -sign;
+ }
+
+ res1.x = INTRINSIC (_sllv_epi16) (s1.x, s2.x);
+ res2.x = INTRINSIC (_mask_sllv_epi16) (res2.x, mask, s1.x, s2.x);
+ res3.x = INTRINSIC (_maskz_sllv_epi16) (mask, s1.x, s2.x);
+
+ CALC (s1.a, s2.a, res_ref);
+
+ if (UNION_CHECK (AVX512F_LEN, i_w) (res1, res_ref))
+ abort ();
+
+ MASK_MERGE (i_w) (res_ref, mask, SIZE);
+ if (UNION_CHECK (AVX512F_LEN, i_w) (res2, res_ref))
+ abort ();
+
+ MASK_ZERO (i_w) (res_ref, mask, SIZE);
+ if (UNION_CHECK (AVX512F_LEN, i_w) (res3, res_ref))
+ abort ();
+}
diff --git a/gcc/testsuite/gcc.target/i386/avx512bw-vpsllw-1.c b/gcc/testsuite/gcc.target/i386/avx512bw-vpsllw-1.c
new file mode 100644
index 00000000000..42457fb0107
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/avx512bw-vpsllw-1.c
@@ -0,0 +1,32 @@
+/* { dg-do compile } */
+/* { dg-options "-O2 -mavx512bw -mavx512vl" } */
+/* { dg-final { scan-assembler-times "vpsllw\[ \\t\]+\[^\n\]*%zmm\[0-9\]\[^\{\]" 3 } } */
+/* { dg-final { scan-assembler-times "vpsllw\[ \\t\]+\[^\n\]*%zmm\[0-9\]\{%k\[1-7\]\}\[^\{\]" 1 } } */
+/* { dg-final { scan-assembler-times "vpsllw\[ \\t\]+\[^\n\]*%zmm\[0-9\]\{%k\[1-7\]\}\{z\}" 1 } } */
+/* { dg-final { scan-assembler-times "vpsllw\[ \\t\]+\[^\n\]*%ymm\[0-9\]\[^\{\]" 2 } } */
+/* { dg-final { scan-assembler-times "vpsllw\[ \\t\]+\[^\n\]*%ymm\[0-9\]\{%k\[1-7\]\}\[^\{\]" 1 } } */
+/* { dg-final { scan-assembler-times "vpsllw\[ \\t\]+\[^\n\]*%ymm\[0-9\]\{%k\[1-7\]\}\{z\}" 1 } } */
+/* { dg-final { scan-assembler-times "vpsllw\[ \\t\]+\[^\n\]*%xmm\[0-9\]\[^\{\]" 7 } } */
+/* { dg-final { scan-assembler-times "vpsllw\[ \\t\]+\[^\n\]*%xmm\[0-9\]\{%k\[1-7\]\}\[^\{\]" 1 } } */
+/* { dg-final { scan-assembler-times "vpsllw\[ \\t\]+\[^\n\]*%xmm\[0-9\]\{%k\[1-7\]\}\{z\}" 1 } } */
+
+#include <immintrin.h>
+volatile __m512i x;
+volatile __m256i x256;
+volatile __m128i x128;
+volatile __m128i y;
+volatile __mmask32 m;
+volatile __mmask16 m256;
+volatile __mmask8 m128;
+
+void extern
+avx512bw_test (void)
+{
+ x = _mm512_sll_epi16 (x, y);
+ x = _mm512_mask_sll_epi16 (x, m, x, y);
+ x = _mm512_maskz_sll_epi16 (m, x, y);
+ x256 = _mm256_mask_sll_epi16 (x256, m256, x256, y);
+ x256 = _mm256_maskz_sll_epi16 (m256, x256, y);
+ x128 = _mm_mask_sll_epi16 (x128, m128, x128, y);
+ x128 = _mm_maskz_sll_epi16 (m128, x128, y);
+}
diff --git a/gcc/testsuite/gcc.target/i386/avx512bw-vpsllw-2.c b/gcc/testsuite/gcc.target/i386/avx512bw-vpsllw-2.c
new file mode 100644
index 00000000000..7c74742161a
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/avx512bw-vpsllw-2.c
@@ -0,0 +1,64 @@
+/* { dg-do run } */
+/* { dg-options "-O2 -mavx512bw -DAVX512BW" } */
+/* { dg-require-effective-target avx512bw } */
+
+#include "avx512f-helper.h"
+
+#define SIZE (AVX512F_LEN / 16)
+#include "avx512f-mask-type.h"
+
+CALC (short *r, short *s1, long long *s2)
+{
+ int i;
+ long long count = s2[0];
+ for (i = 0; i < SIZE; i++)
+ {
+ r[i] = count < 16 ? (s1[i] << count) : 0;
+ }
+}
+
+void
+TEST (void)
+{
+ int i, sign;
+ UNION_TYPE (AVX512F_LEN, i_w) res1, res2, res3, src1;
+ UNION_TYPE (128, i_q) src2;
+ MASK_TYPE mask = MASK_VALUE;
+ short res_ref[SIZE];
+
+ long long imm;
+ sign = -1;
+ for (i = 0; i < SIZE; i++)
+ {
+ src1.a[i] = 2 + sign * 7 * i % 291;
+ sign = sign * -1;
+ }
+ for (i = 0; i < 2; i++)
+ {
+ src2.a[i] = 0;
+ }
+
+ for (imm = 1; imm <= 17; imm++)
+ {
+ src2.a[0] = imm;
+ for (i = 0; i < SIZE; i++)
+ res2.a[i] = DEFAULT_VALUE;
+
+ res1.x = INTRINSIC (_sll_epi16) (src1.x, src2.x);
+ res2.x = INTRINSIC (_mask_sll_epi16) (res2.x, mask, src1.x, src2.x);
+ res3.x = INTRINSIC (_maskz_sll_epi16) (mask, src1.x, src2.x);
+
+ CALC (res_ref, src1.a, src2.a);
+
+ if (UNION_CHECK (AVX512F_LEN, i_w) (res1, res_ref))
+ abort ();
+
+ MASK_MERGE (i_w) (res_ref, mask, SIZE);
+ if (UNION_CHECK (AVX512F_LEN, i_w) (res2, res_ref))
+ abort ();
+
+ MASK_ZERO (i_w) (res_ref, mask, SIZE);
+ if (UNION_CHECK (AVX512F_LEN, i_w) (res3, res_ref))
+ abort ();
+ }
+}
diff --git a/gcc/testsuite/gcc.target/i386/avx512bw-vpsllwi-1.c b/gcc/testsuite/gcc.target/i386/avx512bw-vpsllwi-1.c
new file mode 100644
index 00000000000..83d4c04710e
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/avx512bw-vpsllwi-1.c
@@ -0,0 +1,32 @@
+/* { dg-do compile } */
+/* { dg-options "-O2 -mavx512bw -mavx512vl" } */
+/* { dg-final { scan-assembler-times "vpsllw\[ \\t\]+\[^\n\]*%zmm\[0-9\]\[^\{\]" 3 } } */
+/* { dg-final { scan-assembler-times "vpsllw\[ \\t\]+\[^\n\]*%zmm\[0-9\]\{%k\[1-7\]\}\[^\{\]" 1 } } */
+/* { dg-final { scan-assembler-times "vpsllw\[ \\t\]+\[^\n\]*%zmm\[0-9\]\{%k\[1-7\]\}\{z\}" 1 } } */
+/* { dg-final { scan-assembler-times "vpsllw\[ \\t\]+\[^\n\]*%ymm\[0-9\]\[^\{\]" 2 } } */
+/* { dg-final { scan-assembler-times "vpsllw\[ \\t\]+\[^\n\]*%ymm\[0-9\]\{%k\[1-7\]\}\[^\{\]" 1 } } */
+/* { dg-final { scan-assembler-times "vpsllw\[ \\t\]+\[^\n\]*%ymm\[0-9\]\{%k\[1-7\]\}\{z\}" 1 } } */
+/* { dg-final { scan-assembler-times "vpsllw\[ \\t\]+\[^\n\]*%xmm\[0-9\]\[^\{\]" 2 } } */
+/* { dg-final { scan-assembler-times "vpsllw\[ \\t\]+\[^\n\]*%xmm\[0-9\]\{%k\[1-7\]\}\[^\{\]" 1 } } */
+/* { dg-final { scan-assembler-times "vpsllw\[ \\t\]+\[^\n\]*%xmm\[0-9\]\{%k\[1-7\]\}\{z\}" 1 } } */
+
+#include <immintrin.h>
+volatile __m512i x;
+volatile __m256i x256;
+volatile __m128i x128;
+volatile __mmask32 m;
+volatile __mmask16 m256;
+volatile __mmask8 m128;
+#define y 7
+
+void extern
+avx512bw_test (void)
+{
+ x = _mm512_slli_epi16 (x, y);
+ x = _mm512_mask_slli_epi16 (x, m, x, y);
+ x = _mm512_maskz_slli_epi16 (m, x, y);
+ x256 = _mm256_mask_slli_epi16 (x256, m256, x256, y);
+ x256 = _mm256_maskz_slli_epi16 (m256, x256, y);
+ x128 = _mm_mask_slli_epi16 (x128, m128, x128, y);
+ x128 = _mm_maskz_slli_epi16 (m128, x128, y);
+}
diff --git a/gcc/testsuite/gcc.target/i386/avx512bw-vpsllwi-2.c b/gcc/testsuite/gcc.target/i386/avx512bw-vpsllwi-2.c
new file mode 100644
index 00000000000..21898f5cdb6
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/avx512bw-vpsllwi-2.c
@@ -0,0 +1,74 @@
+/* { dg-do run } */
+/* { dg-options "-O2 -mavx512bw -DAVX512BW" } */
+/* { dg-require-effective-target avx512bw } */
+
+#include "avx512f-helper.h"
+
+#define SIZE (AVX512F_LEN / 16)
+#include "avx512f-mask-type.h"
+
+CALC (short *r, short *s1, short count)
+{
+ int i;
+ for (i = 0; i < SIZE; i++)
+ {
+ r[i] = count < 16 ? (s1[i] << count) : 0;
+ }
+}
+
+void
+TEST (void)
+{
+ int i, sign;
+ UNION_TYPE (AVX512F_LEN, i_w) res1, res2, res3, src1;
+ MASK_TYPE mask = MASK_VALUE;
+ short res_ref[SIZE];
+
+ sign = -1;
+ for (i = 0; i < SIZE; i++)
+ {
+ src1.a[i] = 2 + sign * 7 * i % 291;
+ sign = sign * -1;
+ }
+
+ for (i = 0; i < SIZE; i++)
+ res2.a[i] = DEFAULT_VALUE;
+
+ res1.x = INTRINSIC (_slli_epi16) (src1.x, 5);
+ res2.x = INTRINSIC (_mask_slli_epi16) (res2.x, mask, src1.x, 5);
+ res3.x = INTRINSIC (_maskz_slli_epi16) (mask, src1.x, 5);
+
+ CALC (res_ref, src1.a, 5);
+
+ if (UNION_CHECK (AVX512F_LEN, i_w) (res1, res_ref))
+ abort ();
+
+ MASK_MERGE (i_w) (res_ref, mask, SIZE);
+ if (UNION_CHECK (AVX512F_LEN, i_w) (res2, res_ref))
+ abort ();
+
+ MASK_ZERO (i_w) (res_ref, mask, SIZE);
+ if (UNION_CHECK (AVX512F_LEN, i_w) (res3, res_ref))
+ abort ();
+
+
+ for (i = 0; i < SIZE; i++)
+ res2.a[i] = DEFAULT_VALUE;
+
+ res1.x = INTRINSIC (_slli_epi16) (src1.x, 17);
+ res2.x = INTRINSIC (_mask_slli_epi16) (res2.x, mask, src1.x, 17);
+ res3.x = INTRINSIC (_maskz_slli_epi16) (mask, src1.x, 17);
+
+ CALC (res_ref, src1.a, 17);
+
+ if (UNION_CHECK (AVX512F_LEN, i_w) (res1, res_ref))
+ abort ();
+
+ MASK_MERGE (i_w) (res_ref, mask, SIZE);
+ if (UNION_CHECK (AVX512F_LEN, i_w) (res2, res_ref))
+ abort ();
+
+ MASK_ZERO (i_w) (res_ref, mask, SIZE);
+ if (UNION_CHECK (AVX512F_LEN, i_w) (res3, res_ref))
+ abort ();
+}
diff --git a/gcc/testsuite/gcc.target/i386/avx512bw-vpsravw-1.c b/gcc/testsuite/gcc.target/i386/avx512bw-vpsravw-1.c
new file mode 100644
index 00000000000..70db2fda453
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/avx512bw-vpsravw-1.c
@@ -0,0 +1,34 @@
+/* { dg-do compile } */
+/* { dg-options "-mavx512bw -mavx512vl -O2" } */
+/* { dg-final { scan-assembler-times "vpsravw\[ \\t\]+\[^\n\]*%zmm\[0-9\]\[^\{\]" 3 } } */
+/* { dg-final { scan-assembler-times "vpsravw\[ \\t\]+\[^\n\]*%zmm\[0-9\]\{%k\[1-7\]\}\[^\{\]" 1 } } */
+/* { dg-final { scan-assembler-times "vpsravw\[ \\t\]+\[^\n\]*%zmm\[0-9\]\{%k\[1-7\]\}\{z\}" 1 } } */
+/* { dg-final { scan-assembler-times "vpsravw\[ \\t\]+\[^\n\]*%ymm\[0-9\]\[^\{\]" 3 } } */
+/* { dg-final { scan-assembler-times "vpsravw\[ \\t\]+\[^\n\]*%ymm\[0-9\]\{%k\[1-7\]\}\[^\{\]" 1 } } */
+/* { dg-final { scan-assembler-times "vpsravw\[ \\t\]+\[^\n\]*%ymm\[0-9\]\{%k\[1-7\]\}\{z\}" 1 } } */
+/* { dg-final { scan-assembler-times "vpsravw\[ \\t\]+\[^\n\]*%xmm\[0-9\]\[^\{\]" 3 } } */
+/* { dg-final { scan-assembler-times "vpsravw\[ \\t\]+\[^\n\]*%xmm\[0-9\]\{%k\[1-7\]\}\[^\{\]" 1 } } */
+/* { dg-final { scan-assembler-times "vpsravw\[ \\t\]+\[^\n\]*%xmm\[0-9\]\{%k\[1-7\]\}\{z\}" 1 } } */
+
+#include <immintrin.h>
+
+volatile __m512i z;
+volatile __m256i y;
+volatile __m128i x;
+volatile __mmask32 m1;
+volatile __mmask16 m2;
+volatile __mmask8 m3;
+
+void extern
+avx512bw_test (void)
+{
+ z = _mm512_srav_epi16 (z, z);
+ z = _mm512_mask_srav_epi16 (z, m1, z, z);
+ z = _mm512_maskz_srav_epi16 (m1, z, z);
+ y = _mm256_srav_epi16 (y, y);
+ y = _mm256_mask_srav_epi16 (y, m2, y, y);
+ y = _mm256_maskz_srav_epi16 (m2, y, y);
+ x = _mm_srav_epi16 (x, x);
+ x = _mm_mask_srav_epi16 (x, m3, x, x);
+ x = _mm_maskz_srav_epi16 (m3, x, x);
+}
diff --git a/gcc/testsuite/gcc.target/i386/avx512bw-vpsravw-2.c b/gcc/testsuite/gcc.target/i386/avx512bw-vpsravw-2.c
new file mode 100644
index 00000000000..225d732a7b4
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/avx512bw-vpsravw-2.c
@@ -0,0 +1,53 @@
+/* { dg-do run } */
+/* { dg-options "-O2 -mavx512bw -DAVX512BW" } */
+/* { dg-require-effective-target avx512bw } */
+
+#include "avx512f-helper.h"
+
+#define SIZE (AVX512F_LEN / 16)
+#include "avx512f-mask-type.h"
+
+void
+CALC (short *s1, short *s2, short *r)
+{
+ int i;
+
+ for (i = 0; i < SIZE; ++i)
+ {
+ r[i] = s2[i] < 16 ? (s1[i] >> s2[i]) : (s1[i] > 0 ? 0 : 0xFFFF);
+ }
+}
+
+void
+TEST (void)
+{
+ UNION_TYPE (AVX512F_LEN, i_w) s1, s2, res1, res2, res3;
+ MASK_TYPE mask = MASK_VALUE;
+ short res_ref[SIZE];
+ int i, sign = 1;
+
+ for (i = 0; i < SIZE; i++)
+ {
+ s1.a[i] = i * sign;
+ s2.a[i] = i >> 2;
+ res2.a[i] = DEFAULT_VALUE;
+ sign = -sign;
+ }
+
+ res1.x = INTRINSIC (_srav_epi16) (s1.x, s2.x);
+ res2.x = INTRINSIC (_mask_srav_epi16) (res2.x, mask, s1.x, s2.x);
+ res3.x = INTRINSIC (_maskz_srav_epi16) (mask, s1.x, s2.x);
+
+ CALC (s1.a, s2.a, res_ref);
+
+ if (UNION_CHECK (AVX512F_LEN, i_w) (res1, res_ref))
+ abort ();
+
+ MASK_MERGE (i_w) (res_ref, mask, SIZE);
+ if (UNION_CHECK (AVX512F_LEN, i_w) (res2, res_ref))
+ abort ();
+
+ MASK_ZERO (i_w) (res_ref, mask, SIZE);
+ if (UNION_CHECK (AVX512F_LEN, i_w) (res3, res_ref))
+ abort ();
+}
diff --git a/gcc/testsuite/gcc.target/i386/avx512bw-vpsraw-1.c b/gcc/testsuite/gcc.target/i386/avx512bw-vpsraw-1.c
new file mode 100644
index 00000000000..667825bd7eb
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/avx512bw-vpsraw-1.c
@@ -0,0 +1,30 @@
+/* { dg-do compile } */
+/* { dg-options "-mavx512bw -mavx512vl -O2" } */
+/* { dg-final { scan-assembler-times "vpsraw\[ \\t\]+\[^\n\]*%xmm\[0-9\]\[^\n\]*%zmm\[0-9\]\[^\{\]" 3 } } */
+/* { dg-final { scan-assembler-times "vpsraw\[ \\t\]+\[^\n\]*%xmm\[0-9\]\[^\n\]*%zmm\[0-9\]\{%k\[1-7\]\}\[^\{\]" 1 } } */
+/* { dg-final { scan-assembler-times "vpsraw\[ \\t\]+\[^\n\]*%xmm\[0-9\]\[^\n\]*%zmm\[0-9\]\{%k\[1-7\]\}\{z\}" 1 } } */
+/* { dg-final { scan-assembler-times "vpsraw\[ \\t\]+\[^\n\]*%xmm\[0-9\]\[^\n\]*%ymm\[0-9\]\{%k\[1-7\]\}\[^\{\]" 1 } } */
+/* { dg-final { scan-assembler-times "vpsraw\[ \\t\]+\[^\n\]*%xmm\[0-9\]\[^\n\]*%ymm\[0-9\]\{%k\[1-7\]\}\{z\}" 1 } } */
+/* { dg-final { scan-assembler-times "vpsraw\[ \\t\]+\[^\n\]*%xmm\[0-9\]\[^\n\]*%xmm\[0-9\]\{%k\[1-7\]\}\[^\{\]" 1 } } */
+/* { dg-final { scan-assembler-times "vpsraw\[ \\t\]+\[^\n\]*%xmm\[0-9\]\[^\n\]*%xmm\[0-9\]\{%k\[1-7\]\}\{z\}" 1 } } */
+
+#include <immintrin.h>
+
+volatile __m512i z;
+volatile __m256i y;
+volatile __m128i x;
+volatile __mmask32 m1;
+volatile __mmask16 m2;
+volatile __mmask8 m3;
+
+void extern
+avx512bw_test (void)
+{
+ z = _mm512_sra_epi16 (z, x);
+ z = _mm512_mask_sra_epi16 (z, m1, z, x);
+ z = _mm512_maskz_sra_epi16 (m1, z, x);
+ y = _mm256_mask_sra_epi16 (y, m2, y, x);
+ y = _mm256_maskz_sra_epi16 (m2, y, x);
+ x = _mm_mask_sra_epi16 (x, m3, x, x);
+ x = _mm_maskz_sra_epi16 (m3, x, x);
+}
diff --git a/gcc/testsuite/gcc.target/i386/avx512bw-vpsraw-2.c b/gcc/testsuite/gcc.target/i386/avx512bw-vpsraw-2.c
new file mode 100644
index 00000000000..f1649c23542
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/avx512bw-vpsraw-2.c
@@ -0,0 +1,61 @@
+/* { dg-do run } */
+/* { dg-options "-O2 -mavx512bw -DAVX512BW" } */
+/* { dg-require-effective-target avx512bw } */
+
+#include "avx512f-helper.h"
+
+#define SIZE (AVX512F_LEN / 16)
+#include "avx512f-mask-type.h"
+
+#include <string.h>
+
+#define N 0x5
+
+void
+CALC (short *s1, long long int *s2, short *r)
+{
+ int i;
+ long long int count = s2[0];
+
+ memset (r, 0, SIZE);
+
+ if (count < 16)
+ for (i = 0; i < SIZE; ++i)
+ r[i] = s1[i] >> count;
+}
+
+void
+TEST (void)
+{
+ UNION_TYPE (AVX512F_LEN, i_w) s1, res1, res2, res3;
+ MASK_TYPE mask = MASK_VALUE;
+ union128i_q s2;
+ short res_ref[SIZE];
+ int i, sign;
+
+ for (i = 0; i < SIZE; i++)
+ {
+ s1.a[i] = i * sign;
+ sign = -sign;
+ res2.a[i] = DEFAULT_VALUE;
+ }
+
+ s2.a[0] = N;
+
+ res1.x = INTRINSIC (_sra_epi16) (s1.x, s2.x);
+ res2.x = INTRINSIC (_mask_sra_epi16) (res2.x, mask, s1.x, s2.x);
+ res3.x = INTRINSIC (_maskz_sra_epi16) (mask, s1.x, s2.x);
+
+ CALC (s1.a, s2.a, res_ref);
+
+ if (UNION_CHECK (AVX512F_LEN, i_w) (res1, res_ref))
+ abort ();
+
+ MASK_MERGE (i_w) (res_ref, mask, SIZE);
+ if (UNION_CHECK (AVX512F_LEN, i_w) (res2, res_ref))
+ abort ();
+
+ MASK_ZERO (i_w) (res_ref, mask, SIZE);
+ if (UNION_CHECK (AVX512F_LEN, i_w) (res3, res_ref))
+ abort ();
+}
diff --git a/gcc/testsuite/gcc.target/i386/avx512bw-vpsrawi-1.c b/gcc/testsuite/gcc.target/i386/avx512bw-vpsrawi-1.c
new file mode 100644
index 00000000000..ebb9fa9f4a7
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/avx512bw-vpsrawi-1.c
@@ -0,0 +1,30 @@
+/* { dg-do compile } */
+/* { dg-options "-mavx512bw -mavx512vl -O2" } */
+/* { dg-final { scan-assembler-times "vpsraw\[ \\t\]+\[^\n\]*13\[^\n\]*%zmm\[0-9\]\[^\{\]" 3 } } */
+/* { dg-final { scan-assembler-times "vpsraw\[ \\t\]+\[^\n\]*13\[^\n\]*%zmm\[0-9\]\{%k\[1-7\]\}\[^\{\]" 1 } } */
+/* { dg-final { scan-assembler-times "vpsraw\[ \\t\]+\[^\n\]*13\[^\n\]*%zmm\[0-9\]\{%k\[1-7\]\}\{z\}" 1 } } */
+/* { dg-final { scan-assembler-times "vpsraw\[ \\t\]+\[^\n\]*13\[^\n\]*%ymm\[0-9\]\{%k\[1-7\]\}\[^\{\]" 1 } } */
+/* { dg-final { scan-assembler-times "vpsraw\[ \\t\]+\[^\n\]*13\[^\n\]*%ymm\[0-9\]\{%k\[1-7\]\}\{z\}" 1 } } */
+/* { dg-final { scan-assembler-times "vpsraw\[ \\t\]+\[^\n\]*13\[^\n\]*%xmm\[0-9\]\{%k\[1-7\]\}\[^\{\]" 1 } } */
+/* { dg-final { scan-assembler-times "vpsraw\[ \\t\]+\[^\n\]*13\[^\n\]*%xmm\[0-9\]\{%k\[1-7\]\}\{z\}" 1 } } */
+
+#include <immintrin.h>
+
+volatile __m512i z;
+volatile __m256i y;
+volatile __m128i x;
+volatile __mmask32 m1;
+volatile __mmask16 m2;
+volatile __mmask8 m3;
+
+void extern
+avx512bw_test (void)
+{
+ z = _mm512_srai_epi16 (z, 13);
+ z = _mm512_mask_srai_epi16 (z, m1, z, 13);
+ z = _mm512_maskz_srai_epi16 (m1, z, 13);
+ y = _mm256_mask_srai_epi16 (y, m2, y, 13);
+ y = _mm256_maskz_srai_epi16 (m2, y, 13);
+ x = _mm_mask_srai_epi16 (x, m3, x, 13);
+ x = _mm_maskz_srai_epi16 (m3, x, 13);
+}
diff --git a/gcc/testsuite/gcc.target/i386/avx512bw-vpsrawi-2.c b/gcc/testsuite/gcc.target/i386/avx512bw-vpsrawi-2.c
new file mode 100644
index 00000000000..b72b806a482
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/avx512bw-vpsrawi-2.c
@@ -0,0 +1,58 @@
+/* { dg-do run } */
+/* { dg-options "-O2 -mavx512bw -DAVX512BW" } */
+/* { dg-require-effective-target avx512bw } */
+
+#include "avx512f-helper.h"
+
+#define SIZE (AVX512F_LEN / 16)
+#include "avx512f-mask-type.h"
+
+#include <string.h>
+
+#define N 0x5
+
+void
+CALC (short *s1, short *r)
+{
+ int i;
+
+ memset (r, 0, SIZE);
+
+ if (N < 16)
+ for (i = 0; i < SIZE; ++i)
+ r[i] = s1[i] >> N;
+}
+
+void
+TEST (void)
+{
+ UNION_TYPE (AVX512F_LEN, i_w) s1, res1, res2, res3;
+ MASK_TYPE mask = MASK_VALUE;
+ union128i_q s2;
+ short res_ref[SIZE];
+ int i, sign;
+
+ for (i = 0; i < SIZE; i++)
+ {
+ s1.a[i] = i * sign;
+ sign = -sign;
+ res2.a[i] = DEFAULT_VALUE;
+ }
+
+ res1.x = INTRINSIC (_srai_epi16) (s1.x, N);
+ res2.x = INTRINSIC (_mask_srai_epi16) (res2.x, mask, s1.x, N);
+ res3.x = INTRINSIC (_maskz_srai_epi16) (mask, s1.x, N);
+
+ CALC (s1.a, res_ref);
+
+ if (UNION_CHECK (AVX512F_LEN, i_w) (res1, res_ref))
+ abort ();
+
+ MASK_MERGE (i_w) (res_ref, mask, SIZE);
+ if (UNION_CHECK (AVX512F_LEN, i_w) (res2, res_ref))
+ abort ();
+
+ MASK_ZERO (i_w) (res_ref, mask, SIZE);
+ if (UNION_CHECK (AVX512F_LEN, i_w) (res3, res_ref))
+ abort ();
+}
diff --git a/gcc/testsuite/gcc.target/i386/avx512bw-vpsrldq-1.c b/gcc/testsuite/gcc.target/i386/avx512bw-vpsrldq-1.c
new file mode 100644
index 00000000000..d9424f51069
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/avx512bw-vpsrldq-1.c
@@ -0,0 +1,13 @@
+/* { dg-do compile } */
+/* { dg-options "-mavx512bw -O2" } */
+/* { dg-final { scan-assembler-times "vpsrldq\[ \\t\]+\[^\n\]*%zmm\[0-9\]\[^\{\]" 1 } } */
+
+#include <immintrin.h>
+
+extern volatile __m512i x;
+
+void extern
+avx512bw_test (void)
+{
+ x = _mm512_bsrli_epi128 (x, 13);
+}
diff --git a/gcc/testsuite/gcc.target/i386/avx512bw-vpsrlvw-1.c b/gcc/testsuite/gcc.target/i386/avx512bw-vpsrlvw-1.c
new file mode 100644
index 00000000000..a94b7cfa30e
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/avx512bw-vpsrlvw-1.c
@@ -0,0 +1,34 @@
+/* { dg-do compile } */
+/* { dg-options "-mavx512bw -mavx512vl -O2" } */
+/* { dg-final { scan-assembler-times "vpsrlvw\[ \\t\]+\[^\n\]*%zmm\[0-9\]\[^\{\]" 3 } } */
+/* { dg-final { scan-assembler-times "vpsrlvw\[ \\t\]+\[^\n\]*%zmm\[0-9\]\{%k\[1-7\]\}\[^\{\]" 1 } } */
+/* { dg-final { scan-assembler-times "vpsrlvw\[ \\t\]+\[^\n\]*%zmm\[0-9\]\{%k\[1-7\]\}\{z\}" 1 } } */
+/* { dg-final { scan-assembler-times "vpsrlvw\[ \\t\]+\[^\n\]*%ymm\[0-9\]\[^\{\]" 3 } } */
+/* { dg-final { scan-assembler-times "vpsrlvw\[ \\t\]+\[^\n\]*%ymm\[0-9\]\{%k\[1-7\]\}\[^\{\]" 1 } } */
+/* { dg-final { scan-assembler-times "vpsrlvw\[ \\t\]+\[^\n\]*%ymm\[0-9\]\{%k\[1-7\]\}\{z\}" 1 } } */
+/* { dg-final { scan-assembler-times "vpsrlvw\[ \\t\]+\[^\n\]*%xmm\[0-9\]\[^\{\]" 3 } } */
+/* { dg-final { scan-assembler-times "vpsrlvw\[ \\t\]+\[^\n\]*%xmm\[0-9\]\{%k\[1-7\]\}\[^\{\]" 1 } } */
+/* { dg-final { scan-assembler-times "vpsrlvw\[ \\t\]+\[^\n\]*%xmm\[0-9\]\{%k\[1-7\]\}\{z\}" 1 } } */
+
+#include <immintrin.h>
+
+volatile __m512i z;
+volatile __m256i y;
+volatile __m128i x;
+volatile __mmask32 m1;
+volatile __mmask16 m2;
+volatile __mmask8 m3;
+
+void extern
+avx512bw_test (void)
+{
+ z = _mm512_srlv_epi16 (z, z);
+ z = _mm512_mask_srlv_epi16 (z, m1, z, z);
+ z = _mm512_maskz_srlv_epi16 (m1, z, z);
+ y = _mm256_srlv_epi16 (y, y);
+ y = _mm256_mask_srlv_epi16 (y, m2, y, y);
+ y = _mm256_maskz_srlv_epi16 (m2, y, y);
+ x = _mm_srlv_epi16 (x, x);
+ x = _mm_mask_srlv_epi16 (x, m3, x, x);
+ x = _mm_maskz_srlv_epi16 (m3, x, x);
+}
diff --git a/gcc/testsuite/gcc.target/i386/avx512bw-vpsrlvw-2.c b/gcc/testsuite/gcc.target/i386/avx512bw-vpsrlvw-2.c
new file mode 100644
index 00000000000..eb98685770b
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/avx512bw-vpsrlvw-2.c
@@ -0,0 +1,53 @@
+/* { dg-do run } */
+/* { dg-options "-O2 -mavx512bw -DAVX512BW" } */
+/* { dg-require-effective-target avx512bw } */
+
+#include "avx512f-helper.h"
+
+#define SIZE (AVX512F_LEN / 16)
+#include "avx512f-mask-type.h"
+
+void
+CALC (short *s1, short *s2, short *r)
+{
+ int i;
+
+ for (i = 0; i < SIZE; ++i)
+ {
+ r[i] = ((unsigned short) s1[i]) >> s2[i];
+ }
+}
+
+void
+TEST (void)
+{
+ UNION_TYPE (AVX512F_LEN, i_w) s1, s2, res1, res2, res3;
+ MASK_TYPE mask = MASK_VALUE;
+ short res_ref[SIZE];
+ int i, sign = 1;
+
+ for (i = 0; i < SIZE; i++)
+ {
+ s1.a[i] = i * sign;
+ s2.a[i] = i >> 2;
+ res2.a[i] = DEFAULT_VALUE;
+ sign = -sign;
+ }
+
+ res1.x = INTRINSIC (_srlv_epi16) (s1.x, s2.x);
+ res2.x = INTRINSIC (_mask_srlv_epi16) (res2.x, mask, s1.x, s2.x);
+ res3.x = INTRINSIC (_maskz_srlv_epi16) (mask, s1.x, s2.x);
+
+ CALC (s1.a, s2.a, res_ref);
+
+ if (UNION_CHECK (AVX512F_LEN, i_w) (res1, res_ref))
+ abort ();
+
+ MASK_MERGE (i_w) (res_ref, mask, SIZE);
+ if (UNION_CHECK (AVX512F_LEN, i_w) (res2, res_ref))
+ abort ();
+
+ MASK_ZERO (i_w) (res_ref, mask, SIZE);
+ if (UNION_CHECK (AVX512F_LEN, i_w) (res3, res_ref))
+ abort ();
+}
diff --git a/gcc/testsuite/gcc.target/i386/avx512bw-vpsrlw-1.c b/gcc/testsuite/gcc.target/i386/avx512bw-vpsrlw-1.c
new file mode 100644
index 00000000000..584f30346d7
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/avx512bw-vpsrlw-1.c
@@ -0,0 +1,30 @@
+/* { dg-do compile } */
+/* { dg-options "-mavx512bw -mavx512vl -O2" } */
+/* { dg-final { scan-assembler-times "vpsrlw\[ \\t\]+\[^\n\]*%xmm\[0-9\]\[^\n\]*%zmm\[0-9\]\[^\{\]" 3 } } */
+/* { dg-final { scan-assembler-times "vpsrlw\[ \\t\]+\[^\n\]*%xmm\[0-9\]\[^\n\]*%zmm\[0-9\]\{%k\[1-7\]\}\[^\{\]" 1 } } */
+/* { dg-final { scan-assembler-times "vpsrlw\[ \\t\]+\[^\n\]*%xmm\[0-9\]\[^\n\]*%zmm\[0-9\]\{%k\[1-7\]\}\{z\}" 1 } } */
+/* { dg-final { scan-assembler-times "vpsrlw\[ \\t\]+\[^\n\]*%xmm\[0-9\]\[^\n\]*%ymm\[0-9\]\{%k\[1-7\]\}\[^\{\]" 1 } } */
+/* { dg-final { scan-assembler-times "vpsrlw\[ \\t\]+\[^\n\]*%xmm\[0-9\]\[^\n\]*%ymm\[0-9\]\{%k\[1-7\]\}\{z\}" 1 } } */
+/* { dg-final { scan-assembler-times "vpsrlw\[ \\t\]+\[^\n\]*%xmm\[0-9\]\[^\n\]*%xmm\[0-9\]\{%k\[1-7\]\}\[^\{\]" 1 } } */
+/* { dg-final { scan-assembler-times "vpsrlw\[ \\t\]+\[^\n\]*%xmm\[0-9\]\[^\n\]*%xmm\[0-9\]\{%k\[1-7\]\}\{z\}" 1 } } */
+
+#include <immintrin.h>
+
+volatile __m512i z;
+volatile __m256i y;
+volatile __m128i x;
+volatile __mmask32 m1;
+volatile __mmask16 m2;
+volatile __mmask8 m3;
+
+void extern
+avx512bw_test (void)
+{
+ z = _mm512_srl_epi16 (z, x);
+ z = _mm512_mask_srl_epi16 (z, m1, z, x);
+ z = _mm512_maskz_srl_epi16 (m1, z, x);
+ y = _mm256_mask_srl_epi16 (y, m2, y, x);
+ y = _mm256_maskz_srl_epi16 (m2, y, x);
+ x = _mm_mask_srl_epi16 (x, m3, x, x);
+ x = _mm_maskz_srl_epi16 (m3, x, x);
+}
diff --git a/gcc/testsuite/gcc.target/i386/avx512bw-vpsrlw-2.c b/gcc/testsuite/gcc.target/i386/avx512bw-vpsrlw-2.c
new file mode 100644
index 00000000000..611a8a84b83
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/avx512bw-vpsrlw-2.c
@@ -0,0 +1,82 @@
+/* { dg-do run } */
+/* { dg-options "-O2 -mavx512bw -DAVX512BW" } */
+/* { dg-require-effective-target avx512bw } */
+
+#include "avx512f-helper.h"
+
+#define SIZE (AVX512F_LEN / 16)
+#include "avx512f-mask-type.h"
+
+void
+CALC (unsigned short *r, unsigned short *s1, unsigned short *s2)
+{
+ int i;
+ unsigned short count = s2[0];
+ for (i = 0; i < SIZE; i++)
+ {
+ r[i] = count < 16 ? (s1[i] >> count) : 0;
+ }
+}
+
+void
+TEST (void)
+{
+ int i;
+ UNION_TYPE (AVX512F_LEN, i_w) res1, res2, res3, src1;
+ UNION_TYPE (128, i_w) src2;
+ MASK_TYPE mask = MASK_VALUE;
+ unsigned short res_ref[SIZE];
+
+ for (i = 0; i < SIZE; i++)
+ {
+ src1.a[i] = 2 + 7 * i % 291;
+ }
+
+ for (i = 0; i < 128 / 16; i++)
+ {
+ src2.a[i] = 0;
+ }
+
+ src2.a[0] = 1;
+ src2.a[1] = 0;
+ for (i = 0; i < SIZE; i++)
+ res2.a[i] = DEFAULT_VALUE;
+
+ res1.x = INTRINSIC (_srl_epi16) (src1.x, src2.x);
+ res2.x = INTRINSIC (_mask_srl_epi16) (res2.x, mask, src1.x, src2.x);
+ res3.x = INTRINSIC (_maskz_srl_epi16) (mask, src1.x, src2.x);
+
+ CALC (res_ref, src1.a, src2.a);
+
+ if (UNION_CHECK (AVX512F_LEN, i_w) (res1, res_ref))
+ abort ();
+
+ MASK_MERGE (i_w) (res_ref, mask, SIZE);
+ if (UNION_CHECK (AVX512F_LEN, i_w) (res2, res_ref))
+ abort ();
+
+ MASK_ZERO (i_w) (res_ref, mask, SIZE);
+ if (UNION_CHECK (AVX512F_LEN, i_w) (res3, res_ref))
+ abort ();
+
+ src2.a[0] = 17;
+ for (i = 0; i < SIZE; i++)
+ res2.a[i] = DEFAULT_VALUE;
+
+ res1.x = INTRINSIC (_srl_epi16) (src1.x, src2.x);
+ res2.x = INTRINSIC (_mask_srl_epi16) (res2.x, mask, src1.x, src2.x);
+ res3.x = INTRINSIC (_maskz_srl_epi16) (mask, src1.x, src2.x);
+
+ CALC (res_ref, src1.a, src2.a);
+
+ if (UNION_CHECK (AVX512F_LEN, i_w) (res1, res_ref))
+ abort ();
+
+ MASK_MERGE (i_w) (res_ref, mask, SIZE);
+ if (UNION_CHECK (AVX512F_LEN, i_w) (res2, res_ref))
+ abort ();
+
+ MASK_ZERO (i_w) (res_ref, mask, SIZE);
+ if (UNION_CHECK (AVX512F_LEN, i_w) (res3, res_ref))
+ abort ();
+}
diff --git a/gcc/testsuite/gcc.target/i386/avx512bw-vpsrlwi-1.c b/gcc/testsuite/gcc.target/i386/avx512bw-vpsrlwi-1.c
new file mode 100644
index 00000000000..0ca04050e4b
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/avx512bw-vpsrlwi-1.c
@@ -0,0 +1,30 @@
+/* { dg-do compile } */
+/* { dg-options "-mavx512bw -mavx512vl -O2" } */
+/* { dg-final { scan-assembler-times "vpsrlw\[ \\t\]+\[^\n\]*13\[^\n\]*%zmm\[0-9\]\[^\{\]" 3 } } */
+/* { dg-final { scan-assembler-times "vpsrlw\[ \\t\]+\[^\n\]*13\[^\n\]*%zmm\[0-9\]\{%k\[1-7\]\}\[^\{\]" 1 } } */
+/* { dg-final { scan-assembler-times "vpsrlw\[ \\t\]+\[^\n\]*13\[^\n\]*%zmm\[0-9\]\{%k\[1-7\]\}\{z\}" 1 } } */
+/* { dg-final { scan-assembler-times "vpsrlw\[ \\t\]+\[^\n\]*13\[^\n\]*%ymm\[0-9\]\{%k\[1-7\]\}\[^\{\]" 1 } } */
+/* { dg-final { scan-assembler-times "vpsrlw\[ \\t\]+\[^\n\]*13\[^\n\]*%ymm\[0-9\]\{%k\[1-7\]\}\{z\}" 1 } } */
+/* { dg-final { scan-assembler-times "vpsrlw\[ \\t\]+\[^\n\]*13\[^\n\]*%xmm\[0-9\]\{%k\[1-7\]\}\[^\{\]" 1 } } */
+/* { dg-final { scan-assembler-times "vpsrlw\[ \\t\]+\[^\n\]*13\[^\n\]*%xmm\[0-9\]\{%k\[1-7\]\}\{z\}" 1 } } */
+
+#include <immintrin.h>
+
+volatile __m512i z;
+volatile __m256i y;
+volatile __m128i x;
+volatile __mmask32 m1;
+volatile __mmask16 m2;
+volatile __mmask8 m3;
+
+void extern
+avx512bw_test (void)
+{
+ z = _mm512_srli_epi16 (z, 13);
+ z = _mm512_mask_srli_epi16 (z, m1, z, 13);
+ z = _mm512_maskz_srli_epi16 (m1, z, 13);
+ y = _mm256_mask_srli_epi16 (y, m2, y, 13);
+ y = _mm256_maskz_srli_epi16 (m2, y, 13);
+ x = _mm_mask_srli_epi16 (x, m3, x, 13);
+ x = _mm_maskz_srli_epi16 (m3, x, 13);
+}
diff --git a/gcc/testsuite/gcc.target/i386/avx512bw-vpsrlwi-2.c b/gcc/testsuite/gcc.target/i386/avx512bw-vpsrlwi-2.c
new file mode 100644
index 00000000000..1dfe6448af8
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/avx512bw-vpsrlwi-2.c
@@ -0,0 +1,72 @@
+/* { dg-do run } */
+/* { dg-options "-O2 -mavx512bw -DAVX512BW" } */
+/* { dg-require-effective-target avx512bw } */
+
+#include "avx512f-helper.h"
+
+#define SIZE (AVX512F_LEN / 16)
+#include "avx512f-mask-type.h"
+
+void
+CALC (unsigned short *r, unsigned short *s1, unsigned short count)
+{
+ int i;
+ for (i = 0; i < SIZE; i++)
+ {
+ r[i] = count < 16 ? (s1[i] >> count) : 0;
+ }
+}
+
+void
+TEST (void)
+{
+ int i;
+ UNION_TYPE (AVX512F_LEN, i_w) res1, res2, res3, src1;
+ MASK_TYPE mask = MASK_VALUE;
+ unsigned short res_ref[SIZE];
+
+ for (i = 0; i < SIZE; i++)
+ {
+ src1.a[i] = 2 + 7 * i % 291;
+ }
+
+ for (i = 0; i < SIZE; i++)
+ res2.a[i] = DEFAULT_VALUE;
+
+ res1.x = INTRINSIC (_srli_epi16) (src1.x, 5);
+ res2.x = INTRINSIC (_mask_srli_epi16) (res2.x, mask, src1.x, 5);
+ res3.x = INTRINSIC (_maskz_srli_epi16) (mask, src1.x, 5);
+
+ CALC (res_ref, src1.a, 5);
+
+ if (UNION_CHECK (AVX512F_LEN, i_w) (res1, res_ref))
+ abort ();
+
+ MASK_MERGE (i_w) (res_ref, mask, SIZE);
+ if (UNION_CHECK (AVX512F_LEN, i_w) (res2, res_ref))
+ abort ();
+
+ MASK_ZERO (i_w) (res_ref, mask, SIZE);
+ if (UNION_CHECK (AVX512F_LEN, i_w) (res3, res_ref))
+ abort ();
+
+ for (i = 0; i < SIZE; i++)
+ res2.a[i] = DEFAULT_VALUE;
+
+ res1.x = INTRINSIC (_srli_epi16) (src1.x, 17);
+ res2.x = INTRINSIC (_mask_srli_epi16) (res2.x, mask, src1.x, 17);
+ res3.x = INTRINSIC (_maskz_srli_epi16) (mask, src1.x, 17);
+
+ CALC (res_ref, src1.a, 17);
+
+ if (UNION_CHECK (AVX512F_LEN, i_w) (res1, res_ref))
+ abort ();
+
+ MASK_MERGE (i_w) (res_ref, mask, SIZE);
+ if (UNION_CHECK (AVX512F_LEN, i_w) (res2, res_ref))
+ abort ();
+
+ MASK_ZERO (i_w) (res_ref, mask, SIZE);
+ if (UNION_CHECK (AVX512F_LEN, i_w) (res3, res_ref))
+ abort ();
+}
diff --git a/gcc/testsuite/gcc.target/i386/avx512bw-vpsubb-1.c b/gcc/testsuite/gcc.target/i386/avx512bw-vpsubb-1.c
new file mode 100644
index 00000000000..6ce3113845a
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/avx512bw-vpsubb-1.c
@@ -0,0 +1,32 @@
+/* { dg-do compile } */
+/* { dg-options "-O2 -mavx512bw -mavx512vl" } */
+/* { dg-final { scan-assembler-times "vpsubb\[ \\t\]+\[^\n\]*%zmm\[0-9\]\[^\{\]" 3 } } */
+/* { dg-final { scan-assembler-times "vpsubb\[ \\t\]+\[^\n\]*%zmm\[0-9\]\{%k\[1-7\]\}\[^\{\]" 1 } } */
+/* { dg-final { scan-assembler-times "vpsubb\[ \\t\]+\[^\n\]*%zmm\[0-9\]\{%k\[1-7\]\}\{z\}" 1 } } */
+/* { dg-final { scan-assembler-times "vpsubb\[ \\t\]+\[^\n\]*%ymm\[0-9\]\[^\{\]" 2 } } */
+/* { dg-final { scan-assembler-times "vpsubb\[ \\t\]+\[^\n\]*%ymm\[0-9\]\{%k\[1-7\]\}\[^\{\]" 1 } } */
+/* { dg-final { scan-assembler-times "vpsubb\[ \\t\]+\[^\n\]*%ymm\[0-9\]\{%k\[1-7\]\}\{z\}" 1 } } */
+/* { dg-final { scan-assembler-times "vpsubb\[ \\t\]+\[^\n\]*%xmm\[0-9\]\[^\{\]" 2 } } */
+/* { dg-final { scan-assembler-times "vpsubb\[ \\t\]+\[^\n\]*%xmm\[0-9\]\{%k\[1-7\]\}\[^\{\]" 1 } } */
+/* { dg-final { scan-assembler-times "vpsubb\[ \\t\]+\[^\n\]*%xmm\[0-9\]\{%k\[1-7\]\}\{z\}" 1 } } */
+
+#include <immintrin.h>
+
+volatile __m512i x512;
+volatile __m256i x256;
+volatile __m128i x128;
+volatile __mmask64 m512;
+volatile __mmask32 m256;
+volatile __mmask16 m128;
+
+void extern
+avx512bw_test (void)
+{
+ x512 = _mm512_sub_epi8 (x512, x512);
+ x512 = _mm512_mask_sub_epi8 (x512, m512, x512, x512);
+ x512 = _mm512_maskz_sub_epi8 (m512, x512, x512);
+ x256 = _mm256_mask_sub_epi8 (x256, m256, x256, x256);
+ x256 = _mm256_maskz_sub_epi8 (m256, x256, x256);
+ x128 = _mm_mask_sub_epi8 (x128, m128, x128, x128);
+ x128 = _mm_maskz_sub_epi8 (m128, x128, x128);
+}
diff --git a/gcc/testsuite/gcc.target/i386/avx512bw-vpsubb-2.c b/gcc/testsuite/gcc.target/i386/avx512bw-vpsubb-2.c
new file mode 100644
index 00000000000..00d9ec97671
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/avx512bw-vpsubb-2.c
@@ -0,0 +1,53 @@
+/* { dg-do run } */
+/* { dg-options "-O2 -mavx512bw -DAVX512BW" } */
+/* { dg-require-effective-target avx512bw } */
+
+#include "avx512f-helper.h"
+
+#define SIZE (AVX512F_LEN / 8)
+#include "avx512f-mask-type.h"
+
+CALC (char *r, char *s1, char *s2)
+{
+ int i;
+ for (i = 0; i < SIZE; i++)
+ {
+ r[i] = s1[i] - s2[i];
+ }
+}
+
+void
+TEST (void)
+{
+ int i, sign;
+ UNION_TYPE (AVX512F_LEN, i_b) res1, res2, res3, src1, src2;
+ MASK_TYPE mask = MASK_VALUE;
+ char res_ref[SIZE];
+
+ sign = -1;
+ for (i = 0; i < SIZE; i++)
+ {
+ src1.a[i] = 2 + sign * 7 * i % 291;
+ src2.a[i] = 3 + sign * 11 * (i % 377) * i;
+ sign = sign * -1;
+ }
+ for (i = 0; i < SIZE; i++)
+ res2.a[i] = DEFAULT_VALUE;
+
+ res1.x = INTRINSIC (_sub_epi8) (src1.x, src2.x);
+ res2.x = INTRINSIC (_mask_sub_epi8) (res2.x, mask, src1.x, src2.x);
+ res3.x = INTRINSIC (_maskz_sub_epi8) (mask, src1.x, src2.x);
+
+ CALC (res_ref, src1.a, src2.a);
+
+ if (UNION_CHECK (AVX512F_LEN, i_b) (res1, res_ref))
+ abort ();
+
+ MASK_MERGE (i_b) (res_ref, mask, SIZE);
+ if (UNION_CHECK (AVX512F_LEN, i_b) (res2, res_ref))
+ abort ();
+
+ MASK_ZERO (i_b) (res_ref, mask, SIZE);
+ if (UNION_CHECK (AVX512F_LEN, i_b) (res3, res_ref))
+ abort ();
+}
diff --git a/gcc/testsuite/gcc.target/i386/avx512bw-vpsubsb-1.c b/gcc/testsuite/gcc.target/i386/avx512bw-vpsubsb-1.c
new file mode 100644
index 00000000000..cf27ca0dc43
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/avx512bw-vpsubsb-1.c
@@ -0,0 +1,32 @@
+/* { dg-do compile } */
+/* { dg-options "-O2 -mavx512bw -mavx512vl" } */
+/* { dg-final { scan-assembler-times "vpsubsb\[ \\t\]+\[^\n\]*%zmm\[0-9\]\[^\{\]" 3 } } */
+/* { dg-final { scan-assembler-times "vpsubsb\[ \\t\]+\[^\n\]*%zmm\[0-9\]\{%k\[1-7\]\}\[^\{\]" 1 } } */
+/* { dg-final { scan-assembler-times "vpsubsb\[ \\t\]+\[^\n\]*%zmm\[0-9\]\{%k\[1-7\]\}\{z\}" 1 } } */
+/* { dg-final { scan-assembler-times "vpsubsb\[ \\t\]+\[^\n\]*%ymm\[0-9\]\[^\{\]" 2 } } */
+/* { dg-final { scan-assembler-times "vpsubsb\[ \\t\]+\[^\n\]*%ymm\[0-9\]\{%k\[1-7\]\}\[^\{\]" 1 } } */
+/* { dg-final { scan-assembler-times "vpsubsb\[ \\t\]+\[^\n\]*%ymm\[0-9\]\{%k\[1-7\]\}\{z\}" 1 } } */
+/* { dg-final { scan-assembler-times "vpsubsb\[ \\t\]+\[^\n\]*%xmm\[0-9\]\[^\{\]" 2 } } */
+/* { dg-final { scan-assembler-times "vpsubsb\[ \\t\]+\[^\n\]*%xmm\[0-9\]\{%k\[1-7\]\}\[^\{\]" 1 } } */
+/* { dg-final { scan-assembler-times "vpsubsb\[ \\t\]+\[^\n\]*%xmm\[0-9\]\{%k\[1-7\]\}\{z\}" 1 } } */
+
+#include <immintrin.h>
+
+volatile __m512i x512;
+volatile __m256i x256;
+volatile __m128i x128;
+volatile __mmask64 m512;
+volatile __mmask32 m256;
+volatile __mmask16 m128;
+
+void extern
+avx512bw_test (void)
+{
+ x512 = _mm512_subs_epi8 (x512, x512);
+ x512 = _mm512_mask_subs_epi8 (x512, m512, x512, x512);
+ x512 = _mm512_maskz_subs_epi8 (m512, x512, x512);
+ x256 = _mm256_mask_subs_epi8 (x256, m256, x256, x256);
+ x256 = _mm256_maskz_subs_epi8 (m256, x256, x256);
+ x128 = _mm_mask_subs_epi8 (x128, m128, x128, x128);
+ x128 = _mm_maskz_subs_epi8 (m128, x128, x128);
+}
diff --git a/gcc/testsuite/gcc.target/i386/avx512bw-vpsubsb-2.c b/gcc/testsuite/gcc.target/i386/avx512bw-vpsubsb-2.c
new file mode 100644
index 00000000000..bb53926ef60
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/avx512bw-vpsubsb-2.c
@@ -0,0 +1,56 @@
+/* { dg-do run } */
+/* { dg-options "-O2 -mavx512bw -DAVX512BW" } */
+/* { dg-require-effective-target avx512bw } */
+
+#include "avx512f-helper.h"
+
+#define SIZE (AVX512F_LEN / 8)
+#include "avx512f-mask-type.h"
+
+CALC (char *r, char *s1, char *s2)
+{
+ int i;
+ for (i = 0; i < SIZE; i++)
+ {
+ int tmp = (int)s1[i] - (int)s2[i];
+ if (tmp > 0x7F) tmp = 0x7F;
+ if (tmp < (char)0x80) tmp = (char)0x80;
+ r[i] = (char)tmp;
+ }
+}
+
+void
+TEST (void)
+{
+ int i, sign;
+ UNION_TYPE (AVX512F_LEN, i_b) res1, res2, res3, src1, src2;
+ MASK_TYPE mask = MASK_VALUE;
+ char res_ref[SIZE];
+
+ sign = -1;
+ for (i = 0; i < SIZE; i++)
+ {
+ src1.a[i] = 2 + sign * 7 * i % 291;
+ src2.a[i] = 3 + sign * 11 * (i % 377) * i;
+ sign = sign * -1;
+ }
+ for (i = 0; i < SIZE; i++)
+ res2.a[i] = DEFAULT_VALUE;
+
+ res1.x = INTRINSIC (_subs_epi8) (src1.x, src2.x);
+ res2.x = INTRINSIC (_mask_subs_epi8) (res2.x, mask, src1.x, src2.x);
+ res3.x = INTRINSIC (_maskz_subs_epi8) (mask, src1.x, src2.x);
+
+ CALC (res_ref, src1.a, src2.a);
+
+ if (UNION_CHECK (AVX512F_LEN, i_b) (res1, res_ref))
+ abort ();
+
+ MASK_MERGE (i_b) (res_ref, mask, SIZE);
+ if (UNION_CHECK (AVX512F_LEN, i_b) (res2, res_ref))
+ abort ();
+
+ MASK_ZERO (i_b) (res_ref, mask, SIZE);
+ if (UNION_CHECK (AVX512F_LEN, i_b) (res3, res_ref))
+ abort ();
+}
diff --git a/gcc/testsuite/gcc.target/i386/avx512bw-vpsubsw-1.c b/gcc/testsuite/gcc.target/i386/avx512bw-vpsubsw-1.c
new file mode 100644
index 00000000000..583fc791594
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/avx512bw-vpsubsw-1.c
@@ -0,0 +1,32 @@
+/* { dg-do compile } */
+/* { dg-options "-O2 -mavx512bw -mavx512vl" } */
+/* { dg-final { scan-assembler-times "vpsubsw\[ \\t\]+\[^\n\]*%zmm\[0-9\]\[^\{\]" 3 } } */
+/* { dg-final { scan-assembler-times "vpsubsw\[ \\t\]+\[^\n\]*%zmm\[0-9\]\{%k\[1-7\]\}\[^\{\]" 1 } } */
+/* { dg-final { scan-assembler-times "vpsubsw\[ \\t\]+\[^\n\]*%zmm\[0-9\]\{%k\[1-7\]\}\{z\}" 1 } } */
+/* { dg-final { scan-assembler-times "vpsubsw\[ \\t\]+\[^\n\]*%ymm\[0-9\]\[^\{\]" 2 } } */
+/* { dg-final { scan-assembler-times "vpsubsw\[ \\t\]+\[^\n\]*%ymm\[0-9\]\{%k\[1-7\]\}\[^\{\]" 1 } } */
+/* { dg-final { scan-assembler-times "vpsubsw\[ \\t\]+\[^\n\]*%ymm\[0-9\]\{%k\[1-7\]\}\{z\}" 1 } } */
+/* { dg-final { scan-assembler-times "vpsubsw\[ \\t\]+\[^\n\]*%xmm\[0-9\]\[^\{\]" 2 } } */
+/* { dg-final { scan-assembler-times "vpsubsw\[ \\t\]+\[^\n\]*%xmm\[0-9\]\{%k\[1-7\]\}\[^\{\]" 1 } } */
+/* { dg-final { scan-assembler-times "vpsubsw\[ \\t\]+\[^\n\]*%xmm\[0-9\]\{%k\[1-7\]\}\{z\}" 1 } } */
+
+#include <immintrin.h>
+
+volatile __m512i x512;
+volatile __m256i x256;
+volatile __m128i x128;
+volatile __mmask32 m512;
+volatile __mmask16 m256;
+volatile __mmask8 m128;
+
+void extern
+avx512bw_test (void)
+{
+ x512 = _mm512_subs_epi16 (x512, x512);
+ x512 = _mm512_mask_subs_epi16 (x512, m512, x512, x512);
+ x512 = _mm512_maskz_subs_epi16 (m512, x512, x512);
+ x256 = _mm256_mask_subs_epi16 (x256, m256, x256, x256);
+ x256 = _mm256_maskz_subs_epi16 (m256, x256, x256);
+ x128 = _mm_mask_subs_epi16 (x128, m128, x128, x128);
+ x128 = _mm_maskz_subs_epi16 (m128, x128, x128);
+}
diff --git a/gcc/testsuite/gcc.target/i386/avx512bw-vpsubsw-2.c b/gcc/testsuite/gcc.target/i386/avx512bw-vpsubsw-2.c
new file mode 100644
index 00000000000..d654f4b80a5
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/avx512bw-vpsubsw-2.c
@@ -0,0 +1,56 @@
+/* { dg-do run } */
+/* { dg-options "-O2 -mavx512bw -DAVX512BW" } */
+/* { dg-require-effective-target avx512bw } */
+
+#include "avx512f-helper.h"
+
+#define SIZE (AVX512F_LEN / 16)
+#include "avx512f-mask-type.h"
+
+CALC (short *r, short *s1, short *s2)
+{
+ int i;
+ for (i = 0; i < SIZE; i++)
+ {
+ int tmp = (int)s1[i] - (int)s2[i];
+ if (tmp > 0x7FFF) tmp = 0x7FFF;
+ if (tmp < (short)0x8000) tmp = (short)0x8000;
+ r[i] = (short)tmp;
+ }
+}
+
+void
+TEST (void)
+{
+ int i, sign;
+ UNION_TYPE (AVX512F_LEN, i_w) res1, res2, res3, src1, src2;
+ MASK_TYPE mask = MASK_VALUE;
+ short res_ref[SIZE];
+
+ sign = -1;
+ for (i = 0; i < SIZE; i++)
+ {
+ src1.a[i] = 2 + sign * 7 * i % 291;
+ src2.a[i] = 3 + sign * 11 * (i % 377) * i;
+ sign = sign * -1;
+ }
+ for (i = 0; i < SIZE; i++)
+ res2.a[i] = DEFAULT_VALUE;
+
+ res1.x = INTRINSIC (_subs_epi16) (src1.x, src2.x);
+ res2.x = INTRINSIC (_mask_subs_epi16) (res2.x, mask, src1.x, src2.x);
+ res3.x = INTRINSIC (_maskz_subs_epi16) (mask, src1.x, src2.x);
+
+ CALC (res_ref, src1.a, src2.a);
+
+ if (UNION_CHECK (AVX512F_LEN, i_w) (res1, res_ref))
+ abort ();
+
+ MASK_MERGE (i_w) (res_ref, mask, SIZE);
+ if (UNION_CHECK (AVX512F_LEN, i_w) (res2, res_ref))
+ abort ();
+
+ MASK_ZERO (i_w) (res_ref, mask, SIZE);
+ if (UNION_CHECK (AVX512F_LEN, i_w) (res3, res_ref))
+ abort ();
+}
diff --git a/gcc/testsuite/gcc.target/i386/avx512bw-vpsubusb-1.c b/gcc/testsuite/gcc.target/i386/avx512bw-vpsubusb-1.c
new file mode 100644
index 00000000000..0012e92de1d
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/avx512bw-vpsubusb-1.c
@@ -0,0 +1,32 @@
+/* { dg-do compile } */
+/* { dg-options "-O2 -mavx512bw -mavx512vl" } */
+/* { dg-final { scan-assembler-times "vpsubusb\[ \\t\]+\[^\n\]*%zmm\[0-9\]\[^\{\]" 3 } } */
+/* { dg-final { scan-assembler-times "vpsubusb\[ \\t\]+\[^\n\]*%zmm\[0-9\]\{%k\[1-7\]\}\[^\{\]" 1 } } */
+/* { dg-final { scan-assembler-times "vpsubusb\[ \\t\]+\[^\n\]*%zmm\[0-9\]\{%k\[1-7\]\}\{z\}" 1 } } */
+/* { dg-final { scan-assembler-times "vpsubusb\[ \\t\]+\[^\n\]*%ymm\[0-9\]\[^\{\]" 2 } } */
+/* { dg-final { scan-assembler-times "vpsubusb\[ \\t\]+\[^\n\]*%ymm\[0-9\]\{%k\[1-7\]\}\[^\{\]" 1 } } */
+/* { dg-final { scan-assembler-times "vpsubusb\[ \\t\]+\[^\n\]*%ymm\[0-9\]\{%k\[1-7\]\}\{z\}" 1 } } */
+/* { dg-final { scan-assembler-times "vpsubusb\[ \\t\]+\[^\n\]*%xmm\[0-9\]\[^\{\]" 2 } } */
+/* { dg-final { scan-assembler-times "vpsubusb\[ \\t\]+\[^\n\]*%xmm\[0-9\]\{%k\[1-7\]\}\[^\{\]" 1 } } */
+/* { dg-final { scan-assembler-times "vpsubusb\[ \\t\]+\[^\n\]*%xmm\[0-9\]\{%k\[1-7\]\}\{z\}" 1 } } */
+
+#include <immintrin.h>
+
+volatile __m512i x512;
+volatile __m256i x256;
+volatile __m128i x128;
+volatile __mmask64 m512;
+volatile __mmask32 m256;
+volatile __mmask16 m128;
+
+void extern
+avx512bw_test (void)
+{
+ x512 = _mm512_subs_epu8 (x512, x512);
+ x512 = _mm512_mask_subs_epu8 (x512, m512, x512, x512);
+ x512 = _mm512_maskz_subs_epu8 (m512, x512, x512);
+ x256 = _mm256_mask_subs_epu8 (x256, m256, x256, x256);
+ x256 = _mm256_maskz_subs_epu8 (m256, x256, x256);
+ x128 = _mm_mask_subs_epu8 (x128, m128, x128, x128);
+ x128 = _mm_maskz_subs_epu8 (m128, x128, x128);
+}
diff --git a/gcc/testsuite/gcc.target/i386/avx512bw-vpsubusb-2.c b/gcc/testsuite/gcc.target/i386/avx512bw-vpsubusb-2.c
new file mode 100644
index 00000000000..f282919e6a1
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/avx512bw-vpsubusb-2.c
@@ -0,0 +1,54 @@
+/* { dg-do run } */
+/* { dg-options "-O2 -mavx512bw -DAVX512BW" } */
+/* { dg-require-effective-target avx512bw } */
+
+#include "avx512f-helper.h"
+
+#define SIZE (AVX512F_LEN / 8)
+#include "avx512f-mask-type.h"
+
+CALC (unsigned char *r, unsigned char *s1, unsigned char *s2)
+{
+ int i;
+ for (i = 0; i < SIZE; i++)
+ {
+ int tmp = (int)s1[i] - (int)s2[i];
+ if (tmp > 0xFF) tmp = 0xFF;
+ if (tmp < 0) tmp = 0;
+ r[i] = (unsigned char)tmp;
+ }
+}
+
+void
+TEST (void)
+{
+ int i;
+ UNION_TYPE (AVX512F_LEN, i_b) res1, res2, res3, src1, src2;
+ MASK_TYPE mask = MASK_VALUE;
+ unsigned char res_ref[SIZE];
+
+ for (i = 0; i < SIZE; i++)
+ {
+ src1.a[i] = 2 + 7 * i % 291;
+ src2.a[i] = 3 + 11 * (i % 377) * i;
+ }
+ for (i = 0; i < SIZE; i++)
+ res2.a[i] = DEFAULT_VALUE;
+
+ res1.x = INTRINSIC (_subs_epu8) (src1.x, src2.x);
+ res2.x = INTRINSIC (_mask_subs_epu8) (res2.x, mask, src1.x, src2.x);
+ res3.x = INTRINSIC (_maskz_subs_epu8) (mask, src1.x, src2.x);
+
+ CALC (res_ref, src1.a, src2.a);
+
+ if (UNION_CHECK (AVX512F_LEN, i_b) (res1, res_ref))
+ abort ();
+
+ MASK_MERGE (i_b) (res_ref, mask, SIZE);
+ if (UNION_CHECK (AVX512F_LEN, i_b) (res2, res_ref))
+ abort ();
+
+ MASK_ZERO (i_b) (res_ref, mask, SIZE);
+ if (UNION_CHECK (AVX512F_LEN, i_b) (res3, res_ref))
+ abort ();
+}
diff --git a/gcc/testsuite/gcc.target/i386/avx512bw-vpsubusw-1.c b/gcc/testsuite/gcc.target/i386/avx512bw-vpsubusw-1.c
new file mode 100644
index 00000000000..c5f448d6909
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/avx512bw-vpsubusw-1.c
@@ -0,0 +1,32 @@
+/* { dg-do compile } */
+/* { dg-options "-O2 -mavx512bw -mavx512vl" } */
+/* { dg-final { scan-assembler-times "vpsubusw\[ \\t\]+\[^\n\]*%zmm\[0-9\]\[^\{\]" 3 } } */
+/* { dg-final { scan-assembler-times "vpsubusw\[ \\t\]+\[^\n\]*%zmm\[0-9\]\{%k\[1-7\]\}\[^\{\]" 1 } } */
+/* { dg-final { scan-assembler-times "vpsubusw\[ \\t\]+\[^\n\]*%zmm\[0-9\]\{%k\[1-7\]\}\{z\}" 1 } } */
+/* { dg-final { scan-assembler-times "vpsubusw\[ \\t\]+\[^\n\]*%ymm\[0-9\]\[^\{\]" 2 } } */
+/* { dg-final { scan-assembler-times "vpsubusw\[ \\t\]+\[^\n\]*%ymm\[0-9\]\{%k\[1-7\]\}\[^\{\]" 1 } } */
+/* { dg-final { scan-assembler-times "vpsubusw\[ \\t\]+\[^\n\]*%ymm\[0-9\]\{%k\[1-7\]\}\{z\}" 1 } } */
+/* { dg-final { scan-assembler-times "vpsubusw\[ \\t\]+\[^\n\]*%xmm\[0-9\]\[^\{\]" 2 } } */
+/* { dg-final { scan-assembler-times "vpsubusw\[ \\t\]+\[^\n\]*%xmm\[0-9\]\{%k\[1-7\]\}\[^\{\]" 1 } } */
+/* { dg-final { scan-assembler-times "vpsubusw\[ \\t\]+\[^\n\]*%xmm\[0-9\]\{%k\[1-7\]\}\{z\}" 1 } } */
+
+#include <immintrin.h>
+
+volatile __m512i x512;
+volatile __m256i x256;
+volatile __m128i x128;
+volatile __mmask32 m512;
+volatile __mmask16 m256;
+volatile __mmask8 m128;
+
+void extern
+avx512bw_test (void)
+{
+ x512 = _mm512_subs_epu16 (x512, x512);
+ x512 = _mm512_mask_subs_epu16 (x512, m512, x512, x512);
+ x512 = _mm512_maskz_subs_epu16 (m512, x512, x512);
+ x256 = _mm256_mask_subs_epu16 (x256, m256, x256, x256);
+ x256 = _mm256_maskz_subs_epu16 (m256, x256, x256);
+ x128 = _mm_mask_subs_epu16 (x128, m128, x128, x128);
+ x128 = _mm_maskz_subs_epu16 (m128, x128, x128);
+}
diff --git a/gcc/testsuite/gcc.target/i386/avx512bw-vpsubusw-2.c b/gcc/testsuite/gcc.target/i386/avx512bw-vpsubusw-2.c
new file mode 100644
index 00000000000..b5d63f330f3
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/avx512bw-vpsubusw-2.c
@@ -0,0 +1,54 @@
+/* { dg-do run } */
+/* { dg-options "-O2 -mavx512bw -DAVX512BW" } */
+/* { dg-require-effective-target avx512bw } */
+
+#include "avx512f-helper.h"
+
+#define SIZE (AVX512F_LEN / 16)
+#include "avx512f-mask-type.h"
+
+CALC (unsigned short *r, unsigned short *s1, unsigned short *s2)
+{
+ int i;
+ for (i = 0; i < SIZE; i++)
+ {
+ int tmp = (int)s1[i] - (int)s2[i];
+ if (tmp > 0xFFFF) tmp = 0xFFFF;
+ if (tmp < 0) tmp = 0;
+ r[i] = (unsigned short)tmp;
+ }
+}
+
+void
+TEST (void)
+{
+ int i;
+ UNION_TYPE (AVX512F_LEN, i_w) res1, res2, res3, src1, src2;
+ MASK_TYPE mask = MASK_VALUE;
+ unsigned short res_ref[SIZE];
+
+ for (i = 0; i < SIZE; i++)
+ {
+ src1.a[i] = 2 + 7 * i % 291;
+ src2.a[i] = 3 + 11 * (i % 377) * i;
+ }
+ for (i = 0; i < SIZE; i++)
+ res2.a[i] = DEFAULT_VALUE;
+
+ res1.x = INTRINSIC (_subs_epu16) (src1.x, src2.x);
+ res2.x = INTRINSIC (_mask_subs_epu16) (res2.x, mask, src1.x, src2.x);
+ res3.x = INTRINSIC (_maskz_subs_epu16) (mask, src1.x, src2.x);
+
+ CALC (res_ref, src1.a, src2.a);
+
+ if (UNION_CHECK (AVX512F_LEN, i_w) (res1, res_ref))
+ abort ();
+
+ MASK_MERGE (i_w) (res_ref, mask, SIZE);
+ if (UNION_CHECK (AVX512F_LEN, i_w) (res2, res_ref))
+ abort ();
+
+ MASK_ZERO (i_w) (res_ref, mask, SIZE);
+ if (UNION_CHECK (AVX512F_LEN, i_w) (res3, res_ref))
+ abort ();
+}
diff --git a/gcc/testsuite/gcc.target/i386/avx512bw-vpsubw-1.c b/gcc/testsuite/gcc.target/i386/avx512bw-vpsubw-1.c
new file mode 100644
index 00000000000..68127e7d23e
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/avx512bw-vpsubw-1.c
@@ -0,0 +1,32 @@
+/* { dg-do compile } */
+/* { dg-options "-O2 -mavx512bw -mavx512vl" } */
+/* { dg-final { scan-assembler-times "vpsubw\[ \\t\]+\[^\n\]*%zmm\[0-9\]\[^\{\]" 3 } } */
+/* { dg-final { scan-assembler-times "vpsubw\[ \\t\]+\[^\n\]*%zmm\[0-9\]\{%k\[1-7\]\}\[^\{\]" 1 } } */
+/* { dg-final { scan-assembler-times "vpsubw\[ \\t\]+\[^\n\]*%zmm\[0-9\]\{%k\[1-7\]\}\{z\}" 1 } } */
+/* { dg-final { scan-assembler-times "vpsubw\[ \\t\]+\[^\n\]*%ymm\[0-9\]\[^\{\]" 2 } } */
+/* { dg-final { scan-assembler-times "vpsubw\[ \\t\]+\[^\n\]*%ymm\[0-9\]\{%k\[1-7\]\}\[^\{\]" 1 } } */
+/* { dg-final { scan-assembler-times "vpsubw\[ \\t\]+\[^\n\]*%ymm\[0-9\]\{%k\[1-7\]\}\{z\}" 1 } } */
+/* { dg-final { scan-assembler-times "vpsubw\[ \\t\]+\[^\n\]*%xmm\[0-9\]\[^\{\]" 2 } } */
+/* { dg-final { scan-assembler-times "vpsubw\[ \\t\]+\[^\n\]*%xmm\[0-9\]\{%k\[1-7\]\}\[^\{\]" 1 } } */
+/* { dg-final { scan-assembler-times "vpsubw\[ \\t\]+\[^\n\]*%xmm\[0-9\]\{%k\[1-7\]\}\{z\}" 1 } } */
+
+#include <immintrin.h>
+
+volatile __m512i x512;
+volatile __m256i x256;
+volatile __m128i x128;
+volatile __mmask32 m512;
+volatile __mmask16 m256;
+volatile __mmask8 m128;
+
+void extern
+avx512bw_test (void)
+{
+ x512 = _mm512_sub_epi16 (x512, x512);
+ x512 = _mm512_mask_sub_epi16 (x512, m512, x512, x512);
+ x512 = _mm512_maskz_sub_epi16 (m512, x512, x512);
+ x256 = _mm256_mask_sub_epi16 (x256, m256, x256, x256);
+ x256 = _mm256_maskz_sub_epi16 (m256, x256, x256);
+ x128 = _mm_mask_sub_epi16 (x128, m128, x128, x128);
+ x128 = _mm_maskz_sub_epi16 (m128, x128, x128);
+}
diff --git a/gcc/testsuite/gcc.target/i386/avx512bw-vpsubw-2.c b/gcc/testsuite/gcc.target/i386/avx512bw-vpsubw-2.c
new file mode 100644
index 00000000000..487dcd93450
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/avx512bw-vpsubw-2.c
@@ -0,0 +1,53 @@
+/* { dg-do run } */
+/* { dg-options "-O2 -mavx512bw -DAVX512BW" } */
+/* { dg-require-effective-target avx512bw } */
+
+#include "avx512f-helper.h"
+
+#define SIZE (AVX512F_LEN / 16)
+#include "avx512f-mask-type.h"
+
+CALC (short *r, short *s1, short *s2)
+{
+ int i;
+ for (i = 0; i < SIZE; i++)
+ {
+ r[i] = s1[i] - s2[i];
+ }
+}
+
+void
+TEST (void)
+{
+ int i, sign;
+ UNION_TYPE (AVX512F_LEN, i_w) res1, res2, res3, src1, src2;
+ MASK_TYPE mask = MASK_VALUE;
+ short res_ref[SIZE];
+
+ sign = -1;
+ for (i = 0; i < SIZE; i++)
+ {
+ src1.a[i] = 2 + sign * 7 * i % 291;
+ src2.a[i] = 3 + sign * 11 * (i % 377) * i;
+ sign = sign * -1;
+ }
+ for (i = 0; i < SIZE; i++)
+ res2.a[i] = DEFAULT_VALUE;
+
+ res1.x = INTRINSIC (_sub_epi16) (src1.x, src2.x);
+ res2.x = INTRINSIC (_mask_sub_epi16) (res2.x, mask, src1.x, src2.x);
+ res3.x = INTRINSIC (_maskz_sub_epi16) (mask, src1.x, src2.x);
+
+ CALC (res_ref, src1.a, src2.a);
+
+ if (UNION_CHECK (AVX512F_LEN, i_w) (res1, res_ref))
+ abort ();
+
+ MASK_MERGE (i_w) (res_ref, mask, SIZE);
+ if (UNION_CHECK (AVX512F_LEN, i_w) (res2, res_ref))
+ abort ();
+
+ MASK_ZERO (i_w) (res_ref, mask, SIZE);
+ if (UNION_CHECK (AVX512F_LEN, i_w) (res3, res_ref))
+ abort ();
+}
diff --git a/gcc/testsuite/gcc.target/i386/avx512bw-vptestmb-1.c b/gcc/testsuite/gcc.target/i386/avx512bw-vptestmb-1.c
new file mode 100644
index 00000000000..62137d05bde
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/avx512bw-vptestmb-1.c
@@ -0,0 +1,28 @@
+/* { dg-do compile } */
+/* { dg-options "-mavx512bw -mavx512vl -O2" } */
+/* { dg-final { scan-assembler "vptestmb\[ \\t\]+\[^\n\]*%xmm\[0-7\]\[^\n\]*k\[1-7\]\{%k\[1-7\]\}" } } */
+/* { dg-final { scan-assembler "vptestmb\[ \\t\]+\[^\n\]*%ymm\[0-7\]\[^\n\]*k\[1-7\]\{%k\[1-7\]\}" } } */
+/* { dg-final { scan-assembler "vptestmb\[ \\t\]+\[^\n\]*%zmm\[0-7\]\[^\n\]*k\[1-7\]\{%k\[1-7\]\}" } } */
+/* { dg-final { scan-assembler "vptestmb\[ \\t\]+\[^\n\]*%xmm\[0-7\]\[^\n\]*k\[1-7\]\[^\{\]" } } */
+/* { dg-final { scan-assembler "vptestmb\[ \\t\]+\[^\n\]*%ymm\[0-7\]\[^\n\]*k\[1-7\]\[^\{\]" } } */
+/* { dg-final { scan-assembler "vptestmb\[ \\t\]+\[^\n\]*%zmm\[0-7\]\[^\n\]*k\[1-7\]\[^\{\]" } } */
+
+#include <immintrin.h>
+
+volatile __m128i x128;
+volatile __m256i x256;
+volatile __m512i x512;
+volatile __mmask16 m16;
+volatile __mmask32 m32;
+volatile __mmask64 m64;
+
+void extern
+avx512bw_test (void)
+{
+ m16 = _mm_test_epi8_mask (x128, x128);
+ m32 = _mm256_test_epi8_mask (x256, x256);
+ m64 = _mm512_test_epi8_mask (x512, x512);
+ m16 = _mm_mask_test_epi8_mask (3, x128, x128);
+ m32 = _mm256_mask_test_epi8_mask (3, x256, x256);
+ m64 = _mm512_mask_test_epi8_mask (3, x512, x512);
+}
diff --git a/gcc/testsuite/gcc.target/i386/avx512bw-vptestmb-2.c b/gcc/testsuite/gcc.target/i386/avx512bw-vptestmb-2.c
new file mode 100644
index 00000000000..d39f593c629
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/avx512bw-vptestmb-2.c
@@ -0,0 +1,48 @@
+/* { dg-do run } */
+/* { dg-options "-O2 -mavx512bw -DAVX512BW" } */
+/* { dg-require-effective-target avx512bw } */
+
+#include "avx512f-helper.h"
+
+#define SIZE (AVX512F_LEN / 8)
+#include "avx512f-mask-type.h"
+
+CALC (MASK_TYPE *res, char *src1, char *src2)
+{
+ int i;
+ *res = 0;
+ MASK_TYPE one = 1;
+
+ for (i = 0; i < SIZE; i++)
+ if (src1[i] & src2[i])
+ *res = *res | one << i;
+}
+
+void
+TEST (void)
+{
+ int i, sign = 1;
+ UNION_TYPE (AVX512F_LEN, i_b) src1, src2;
+ MASK_TYPE res_ref, res1, res2;
+ MASK_TYPE mask = MASK_VALUE;
+
+ for (i = 0; i < SIZE; i++)
+ {
+ src1.a[i] = i * i * sign;
+ src2.a[i] = i + 20;
+ sign = -sign;
+ }
+
+ res1 = INTRINSIC (_test_epi8_mask) (src1.x, src2.x);
+ res2 = INTRINSIC (_mask_test_epi8_mask) (mask, src1.x, src2.x);
+
+ CALC (&res_ref, src1.a, src2.a);
+
+ if (res1 != res_ref)
+ abort ();
+
+ res_ref &= mask;
+
+ if (res2 != res_ref)
+ abort ();
+}
diff --git a/gcc/testsuite/gcc.target/i386/avx512bw-vptestmw-1.c b/gcc/testsuite/gcc.target/i386/avx512bw-vptestmw-1.c
new file mode 100644
index 00000000000..8194a11b1b1
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/avx512bw-vptestmw-1.c
@@ -0,0 +1,28 @@
+/* { dg-do compile } */
+/* { dg-options "-mavx512bw -mavx512vl -O2" } */
+/* { dg-final { scan-assembler "vptestmw\[ \\t\]+\[^\n\]*%xmm\[0-7\]\[^\n\]*k\[1-7\]\{%k\[1-7\]\}" } } */
+/* { dg-final { scan-assembler "vptestmw\[ \\t\]+\[^\n\]*%ymm\[0-7\]\[^\n\]*k\[1-7\]\{%k\[1-7\]\}" } } */
+/* { dg-final { scan-assembler "vptestmw\[ \\t\]+\[^\n\]*%zmm\[0-7\]\[^\n\]*k\[1-7\]\{%k\[1-7\]\}" } } */
+/* { dg-final { scan-assembler "vptestmw\[ \\t\]+\[^\n\]*%xmm\[0-7\]\[^\n\]*k\[1-7\]\[^\{\]" } } */
+/* { dg-final { scan-assembler "vptestmw\[ \\t\]+\[^\n\]*%ymm\[0-7\]\[^\n\]*k\[1-7\]\[^\{\]" } } */
+/* { dg-final { scan-assembler "vptestmw\[ \\t\]+\[^\n\]*%zmm\[0-7\]\[^\n\]*k\[1-7\]\[^\{\]" } } */
+
+#include <immintrin.h>
+
+volatile __m128i x128;
+volatile __m256i x256;
+volatile __m512i x512;
+volatile __mmask8 m8;
+volatile __mmask16 m16;
+volatile __mmask32 m32;
+
+void extern
+avx512bw_test (void)
+{
+ m8 = _mm_test_epi16_mask (x128, x128);
+ m16 = _mm256_test_epi16_mask (x256, x256);
+ m32 = _mm512_test_epi16_mask (x512, x512);
+ m8 = _mm_mask_test_epi16_mask (3, x128, x128);
+ m16 = _mm256_mask_test_epi16_mask (3, x256, x256);
+ m32 = _mm512_mask_test_epi16_mask (3, x512, x512);
+}
diff --git a/gcc/testsuite/gcc.target/i386/avx512bw-vptestmw-2.c b/gcc/testsuite/gcc.target/i386/avx512bw-vptestmw-2.c
new file mode 100644
index 00000000000..5301dfa325f
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/avx512bw-vptestmw-2.c
@@ -0,0 +1,48 @@
+/* { dg-do run } */
+/* { dg-options "-O2 -mavx512bw -DAVX512BW" } */
+/* { dg-require-effective-target avx512bw } */
+
+#include "avx512f-helper.h"
+
+#define SIZE (AVX512F_LEN / 16)
+#include "avx512f-mask-type.h"
+
+CALC (MASK_TYPE *res, short *src1, short *src2)
+{
+ int i;
+ *res = 0;
+ MASK_TYPE one = 1;
+
+ for (i = 0; i < SIZE; i++)
+ if (src1[i] & src2[i])
+ *res = *res | one << i;
+}
+
+void
+TEST (void)
+{
+ int i, sign = 1;
+ UNION_TYPE (AVX512F_LEN, i_w) src1, src2;
+ MASK_TYPE res_ref, res1, res2;
+ MASK_TYPE mask = MASK_VALUE;
+
+ for (i = 0; i < SIZE; i++)
+ {
+ src1.a[i] = i * i * sign;
+ src2.a[i] = i + 20;
+ sign = -sign;
+ }
+
+ res1 = INTRINSIC (_test_epi16_mask) (src1.x, src2.x);
+ res2 = INTRINSIC (_mask_test_epi16_mask) (mask, src1.x, src2.x);
+
+ CALC (&res_ref, src1.a, src2.a);
+
+ if (res1 != res_ref)
+ abort ();
+
+ res_ref &= mask;
+
+ if (res2 != res_ref)
+ abort ();
+}
diff --git a/gcc/testsuite/gcc.target/i386/avx512bw-vptestnmb-1.c b/gcc/testsuite/gcc.target/i386/avx512bw-vptestnmb-1.c
new file mode 100644
index 00000000000..bb126fbd2ff
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/avx512bw-vptestnmb-1.c
@@ -0,0 +1,28 @@
+/* { dg-do compile } */
+/* { dg-options "-mavx512bw -mavx512vl -O2" } */
+/* { dg-final { scan-assembler "vptestnmb\[ \\t\]+\[^\n\]*%xmm\[0-7\]\[^\n\]*k\[1-7\]\[^\{\]" } } */
+/* { dg-final { scan-assembler "vptestnmb\[ \\t\]+\[^\n\]*%ymm\[0-7\]\[^\n\]*k\[1-7\]\[^\{\]" } } */
+/* { dg-final { scan-assembler "vptestnmb\[ \\t\]+\[^\n\]*%zmm\[0-7\]\[^\n\]*k\[1-7\]\[^\{\]" } } */
+/* { dg-final { scan-assembler "vptestnmb\[ \\t\]+\[^\n\]*%xmm\[0-7\]\[^\n\]*k\[1-7\]\{%k\[1-7\]\}" } } */
+/* { dg-final { scan-assembler "vptestnmb\[ \\t\]+\[^\n\]*%ymm\[0-7\]\[^\n\]*k\[1-7\]\{%k\[1-7\]\}" } } */
+/* { dg-final { scan-assembler "vptestnmb\[ \\t\]+\[^\n\]*%zmm\[0-7\]\[^\n\]*k\[1-7\]\{%k\[1-7\]\}" } } */
+
+#include <immintrin.h>
+
+volatile __m128i x128;
+volatile __m256i x256;
+volatile __m512i x512;
+volatile __mmask16 m16;
+volatile __mmask32 m32;
+volatile __mmask64 m64;
+
+void extern
+avx512bw_test (void)
+{
+ m16 = _mm_testn_epi8_mask (x128, x128);
+ m32 = _mm256_testn_epi8_mask (x256, x256);
+ m64 = _mm512_testn_epi8_mask (x512, x512);
+ m16 = _mm_mask_testn_epi8_mask (3, x128, x128);
+ m32 = _mm256_mask_testn_epi8_mask (3, x256, x256);
+ m64 = _mm512_mask_testn_epi8_mask (3, x512, x512);
+}
diff --git a/gcc/testsuite/gcc.target/i386/avx512bw-vptestnmb-2.c b/gcc/testsuite/gcc.target/i386/avx512bw-vptestnmb-2.c
new file mode 100644
index 00000000000..ba54bd1c6ca
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/avx512bw-vptestnmb-2.c
@@ -0,0 +1,48 @@
+/* { dg-do run } */
+/* { dg-options "-O2 -mavx512bw -DAVX512BW" } */
+/* { dg-require-effective-target avx512bw } */
+
+#include "avx512f-helper.h"
+
+#define SIZE (AVX512F_LEN / 8)
+#include "avx512f-mask-type.h"
+
+CALC (MASK_TYPE *res, char *src1, char *src2)
+{
+ int i;
+ *res = 0;
+ MASK_TYPE one = 1;
+
+ for (i = 0; i < SIZE; i++)
+ if (!(src1[i] & src2[i]))
+ *res = *res | one << i;
+}
+
+void
+TEST (void)
+{
+ int i, sign = 1;
+ UNION_TYPE (AVX512F_LEN, i_b) src1, src2;
+ MASK_TYPE res_ref, res1, res2;
+ MASK_TYPE mask = MASK_VALUE;
+
+ for (i = 0; i < SIZE; i++)
+ {
+ src1.a[i] = i * i * sign;
+ src2.a[i] = i + 20;
+ sign = -sign;
+ }
+
+ res1 = INTRINSIC (_testn_epi8_mask) (src1.x, src2.x);
+ res2 = INTRINSIC (_mask_testn_epi8_mask) (mask, src1.x, src2.x);
+
+ CALC (&res_ref, src1.a, src2.a);
+
+ if (res1 != res_ref)
+ abort ();
+
+ res_ref &= mask;
+
+ if (res2 != res_ref)
+ abort ();
+}
diff --git a/gcc/testsuite/gcc.target/i386/avx512bw-vptestnmw-1.c b/gcc/testsuite/gcc.target/i386/avx512bw-vptestnmw-1.c
new file mode 100644
index 00000000000..82944b56e1a
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/avx512bw-vptestnmw-1.c
@@ -0,0 +1,28 @@
+/* { dg-do compile } */
+/* { dg-options "-mavx512bw -mavx512vl -O2" } */
+/* { dg-final { scan-assembler "vptestnmw\[ \\t\]+\[^\n\]*%xmm\[0-7\]\[^\n\]*k\[1-7\]\[^\{\]" } } */
+/* { dg-final { scan-assembler "vptestnmw\[ \\t\]+\[^\n\]*%ymm\[0-7\]\[^\n\]*k\[1-7\]\[^\{\]" } } */
+/* { dg-final { scan-assembler "vptestnmw\[ \\t\]+\[^\n\]*%zmm\[0-7\]\[^\n\]*k\[1-7\]\[^\{\]" } } */
+/* { dg-final { scan-assembler "vptestnmw\[ \\t\]+\[^\n\]*%xmm\[0-7\]\[^\n\]*k\[1-7\]\{%k\[1-7\]\}" } } */
+/* { dg-final { scan-assembler "vptestnmw\[ \\t\]+\[^\n\]*%ymm\[0-7\]\[^\n\]*k\[1-7\]\{%k\[1-7\]\}" } } */
+/* { dg-final { scan-assembler "vptestnmw\[ \\t\]+\[^\n\]*%zmm\[0-7\]\[^\n\]*k\[1-7\]\{%k\[1-7\]\}" } } */
+
+#include <immintrin.h>
+
+volatile __m128i x128;
+volatile __m256i x256;
+volatile __m512i x512;
+volatile __mmask8 m8;
+volatile __mmask16 m16;
+volatile __mmask32 m32;
+
+void extern
+avx512bw_test (void)
+{
+ m8 = _mm_testn_epi16_mask (x128, x128);
+ m16 = _mm256_testn_epi16_mask (x256, x256);
+ m32 = _mm512_testn_epi16_mask (x512, x512);
+ m8 = _mm_mask_testn_epi16_mask (3, x128, x128);
+ m16 = _mm256_mask_testn_epi16_mask (3, x256, x256);
+ m32 = _mm512_mask_testn_epi16_mask (3, x512, x512);
+}
diff --git a/gcc/testsuite/gcc.target/i386/avx512bw-vptestnmw-2.c b/gcc/testsuite/gcc.target/i386/avx512bw-vptestnmw-2.c
new file mode 100644
index 00000000000..eac1fc5c55e
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/avx512bw-vptestnmw-2.c
@@ -0,0 +1,48 @@
+/* { dg-do run } */
+/* { dg-options "-O2 -mavx512bw -DAVX512BW" } */
+/* { dg-require-effective-target avx512bw } */
+
+#include "avx512f-helper.h"
+
+#define SIZE (AVX512F_LEN / 16)
+#include "avx512f-mask-type.h"
+
+CALC (MASK_TYPE *res, short *src1, short *src2)
+{
+ int i;
+ *res = 0;
+ MASK_TYPE one = 1;
+
+ for (i = 0; i < SIZE; i++)
+ if (!(src1[i] & src2[i]))
+ *res = *res | one << i;
+}
+
+void
+TEST (void)
+{
+ int i, sign = 1;
+ UNION_TYPE (AVX512F_LEN, i_w) src1, src2;
+ MASK_TYPE res_ref, res1, res2;
+ MASK_TYPE mask = MASK_VALUE;
+
+ for (i = 0; i < SIZE; i++)
+ {
+ src1.a[i] = i * i * sign;
+ src2.a[i] = i + 20;
+ sign = -sign;
+ }
+
+ res1 = INTRINSIC (_testn_epi16_mask) (src1.x, src2.x);
+ res2 = INTRINSIC (_mask_testn_epi16_mask) (mask, src1.x, src2.x);
+
+ CALC (&res_ref, src1.a, src2.a);
+
+ if (res1 != res_ref)
+ abort ();
+
+ res_ref &= mask;
+
+ if (res2 != res_ref)
+ abort ();
+}
diff --git a/gcc/testsuite/gcc.target/i386/avx512bw-vpunpckhbw-1.c b/gcc/testsuite/gcc.target/i386/avx512bw-vpunpckhbw-1.c
new file mode 100644
index 00000000000..8beb7cc9c1e
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/avx512bw-vpunpckhbw-1.c
@@ -0,0 +1,30 @@
+/* { dg-do compile } */
+/* { dg-options "-mavx512bw -mavx512vl -O2" } */
+/* { dg-final { scan-assembler-times "vpunpckhbw\[ \\t\]+\[^\n\]*%zmm\[0-9\]\[\\n\]" 1 } } */
+/* { dg-final { scan-assembler-times "vpunpckhbw\[ \\t\]+\[^\n\]*%zmm\[0-9\]\{%k\[1-7\]\}\[^\{\]" 1 } } */
+/* { dg-final { scan-assembler-times "vpunpckhbw\[ \\t\]+\[^\n\]*%zmm\[0-9\]\{%k\[1-7\]\}\{z\}" 1 } } */
+/* { dg-final { scan-assembler-times "vpunpckhbw\[ \\t\]+\[^\n\]*%ymm\[0-9\]\{%k\[1-7\]\}\[^\{\]" 1 } } */
+/* { dg-final { scan-assembler-times "vpunpckhbw\[ \\t\]+\[^\n\]*%ymm\[0-9\]\{%k\[1-7\]\}\{z\}" 1 } } */
+/* { dg-final { scan-assembler-times "vpunpckhbw\[ \\t\]+\[^\n\]*%xmm\[0-9\]\{%k\[1-7\]\}\[^\{\]" 1 } } */
+/* { dg-final { scan-assembler-times "vpunpckhbw\[ \\t\]+\[^\n\]*%xmm\[0-9\]\{%k\[1-7\]\}\{z\}" 1 } } */
+
+#include <immintrin.h>
+
+volatile __m512i d, e, f;
+volatile __m256i x, y, z;
+volatile __m128i a, b, c;
+volatile __mmask64 m1;
+volatile __mmask32 m2;
+volatile __mmask16 m3;
+
+void extern
+avx512bw_test (void)
+{
+ d = _mm512_unpackhi_epi8 (e, f);
+ d = _mm512_mask_unpackhi_epi8 (d, m1, e, f);
+ d = _mm512_maskz_unpackhi_epi8 (m1, e, f);
+ x = _mm256_mask_unpackhi_epi8 (x, m2, y, z);
+ x = _mm256_maskz_unpackhi_epi8 (m2, y, z);
+ a = _mm_mask_unpackhi_epi8 (a, m3, b, c);
+ a = _mm_maskz_unpackhi_epi8 (m3, b, c);
+}
diff --git a/gcc/testsuite/gcc.target/i386/avx512bw-vpunpckhbw-2.c b/gcc/testsuite/gcc.target/i386/avx512bw-vpunpckhbw-2.c
new file mode 100644
index 00000000000..6de5c013532
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/avx512bw-vpunpckhbw-2.c
@@ -0,0 +1,68 @@
+/* { dg-do run } */
+/* { dg-options "-O2 -mavx512bw -DAVX512BW" } */
+/* { dg-require-effective-target avx512bw } */
+
+#include "avx512f-helper.h"
+
+#define SIZE (AVX512F_LEN / 8)
+#include "avx512f-mask-type.h"
+
+CALC (char *r, char *s1, char *s2)
+{
+ int i;
+ for (i = 0; i < SIZE/16; i++)
+ {
+ r[16 * i] = s1[16 * i + 8];
+ r[16 * i + 1] = s2[16 * i + 8];
+ r[16 * i + 2] = s1[16 * i + 9];
+ r[16 * i + 3] = s2[16 * i + 9];
+ r[16 * i + 4] = s1[16 * i + 10];
+ r[16 * i + 5] = s2[16 * i + 10];
+ r[16 * i + 6] = s1[16 * i + 11];
+ r[16 * i + 7] = s2[16 * i + 11];
+ r[16 * i + 8] = s1[16 * i + 12];
+ r[16 * i + 9] = s2[16 * i + 12];
+ r[16 * i + 10] = s1[16 * i + 13];
+ r[16 * i + 11] = s2[16 * i + 13];
+ r[16 * i + 12] = s1[16 * i + 14];
+ r[16 * i + 13] = s2[16 * i + 14];
+ r[16 * i + 14] = s1[16 * i + 15];
+ r[16 * i + 15] = s2[16 * i + 15];
+ }
+}
+
+void
+TEST (void)
+{
+ int i, sign;
+ UNION_TYPE (AVX512F_LEN, i_b) res1, res2, res3, src1, src2;
+ MASK_TYPE mask = MASK_VALUE;
+ char res_ref[SIZE];
+
+ sign = -1;
+ for (i = 0; i < SIZE; i++)
+ {
+ src1.a[i] = 34 * i * sign;
+ src1.a[i] = 179 * i;
+ sign = sign * -1;
+ }
+ for (i = 0; i < SIZE; i++)
+ res2.a[i] = DEFAULT_VALUE;
+
+ res1.x = INTRINSIC (_unpackhi_epi8) (src1.x, src2.x);
+ res2.x = INTRINSIC (_mask_unpackhi_epi8) (res2.x, mask, src1.x, src2.x);
+ res3.x = INTRINSIC (_maskz_unpackhi_epi8) (mask, src1.x, src2.x);
+
+ CALC (res_ref, src1.a, src2.a);
+
+ if (UNION_CHECK (AVX512F_LEN, i_b) (res1, res_ref))
+ abort ();
+
+ MASK_MERGE (i_b) (res_ref, mask, SIZE);
+ if (UNION_CHECK (AVX512F_LEN, i_b) (res2, res_ref))
+ abort ();
+
+ MASK_ZERO (i_b) (res_ref, mask, SIZE);
+ if (UNION_CHECK (AVX512F_LEN, i_b) (res3, res_ref))
+ abort ();
+}
diff --git a/gcc/testsuite/gcc.target/i386/avx512bw-vpunpckhwd-1.c b/gcc/testsuite/gcc.target/i386/avx512bw-vpunpckhwd-1.c
new file mode 100644
index 00000000000..069cf7fef1c
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/avx512bw-vpunpckhwd-1.c
@@ -0,0 +1,30 @@
+/* { dg-do compile } */
+/* { dg-options "-mavx512bw -mavx512vl -O2" } */
+/* { dg-final { scan-assembler-times "vpunpckhwd\[ \\t\]+\[^\n\]*%zmm\[0-9\]\[\\n\]" 1 } } */
+/* { dg-final { scan-assembler-times "vpunpckhwd\[ \\t\]+\[^\n\]*%zmm\[0-9\]\{%k\[1-7\]\}\[^\{\]" 1 } } */
+/* { dg-final { scan-assembler-times "vpunpckhwd\[ \\t\]+\[^\n\]*%zmm\[0-9\]\{%k\[1-7\]\}\{z\}" 1 } } */
+/* { dg-final { scan-assembler-times "vpunpckhwd\[ \\t\]+\[^\n\]*%ymm\[0-9\]\{%k\[1-7\]\}\[^\{\]" 1 } } */
+/* { dg-final { scan-assembler-times "vpunpckhwd\[ \\t\]+\[^\n\]*%ymm\[0-9\]\{%k\[1-7\]\}\{z\}" 1 } } */
+/* { dg-final { scan-assembler-times "vpunpckhwd\[ \\t\]+\[^\n\]*%xmm\[0-9\]\{%k\[1-7\]\}\[^\{\]" 1 } } */
+/* { dg-final { scan-assembler-times "vpunpckhwd\[ \\t\]+\[^\n\]*%xmm\[0-9\]\{%k\[1-7\]\}\{z\}" 1 } } */
+
+#include <immintrin.h>
+
+volatile __m512i d, e, f;
+volatile __m256i x, y, z;
+volatile __m128i a, b, c;
+volatile __mmask32 m1;
+volatile __mmask16 m2;
+volatile __mmask8 m3;
+
+void extern
+avx512bw_test (void)
+{
+ d = _mm512_unpackhi_epi16 (e, f);
+ d = _mm512_mask_unpackhi_epi16 (d, m1, e, f);
+ d = _mm512_maskz_unpackhi_epi16 (m1, e, f);
+ x = _mm256_mask_unpackhi_epi16 (x, m2, y, z);
+ x = _mm256_maskz_unpackhi_epi16 (m2, y, z);
+ a = _mm_mask_unpackhi_epi16 (a, m3, b, c);
+ a = _mm_maskz_unpackhi_epi16 (m3, b, c);
+}
diff --git a/gcc/testsuite/gcc.target/i386/avx512bw-vpunpckhwd-2.c b/gcc/testsuite/gcc.target/i386/avx512bw-vpunpckhwd-2.c
new file mode 100644
index 00000000000..a1e27588abc
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/avx512bw-vpunpckhwd-2.c
@@ -0,0 +1,60 @@
+/* { dg-do run } */
+/* { dg-options "-O2 -mavx512bw -DAVX512BW" } */
+/* { dg-require-effective-target avx512bw } */
+
+#include "avx512f-helper.h"
+
+#define SIZE (AVX512F_LEN / 16)
+#include "avx512f-mask-type.h"
+
+CALC (short *r, short *s1, short *s2)
+{
+ int i;
+ for (i = 0; i < SIZE/8; i++)
+ {
+ r[8 * i] = s1[8 * i + 4];
+ r[8 * i + 1] = s2[8 * i + 4];
+ r[8 * i + 2] = s1[8 * i + 5];
+ r[8 * i + 3] = s2[8 * i + 5];
+ r[8 * i + 4] = s1[8 * i + 6];
+ r[8 * i + 5] = s2[8 * i + 6];
+ r[8 * i + 6] = s1[8 * i + 7];
+ r[8 * i + 7] = s2[8 * i + 7];
+ }
+}
+
+void
+TEST (void)
+{
+ int i, sign;
+ UNION_TYPE (AVX512F_LEN, i_w) res1, res2, res3, src1, src2;
+ MASK_TYPE mask = MASK_VALUE;
+ short res_ref[SIZE];
+
+ sign = -1;
+ for (i = 0; i < SIZE; i++)
+ {
+ src1.a[i] = 34 * i * sign;
+ src1.a[i] = 179 * i;
+ sign = sign * -1;
+ }
+ for (i = 0; i < SIZE; i++)
+ res2.a[i] = DEFAULT_VALUE;
+
+ res1.x = INTRINSIC (_unpackhi_epi16) (src1.x, src2.x);
+ res2.x = INTRINSIC (_mask_unpackhi_epi16) (res2.x, mask, src1.x, src2.x);
+ res3.x = INTRINSIC (_maskz_unpackhi_epi16) (mask, src1.x, src2.x);
+
+ CALC (res_ref, src1.a, src2.a);
+
+ if (UNION_CHECK (AVX512F_LEN, i_w) (res1, res_ref))
+ abort ();
+
+ MASK_MERGE (i_w) (res_ref, mask, SIZE);
+ if (UNION_CHECK (AVX512F_LEN, i_w) (res2, res_ref))
+ abort ();
+
+ MASK_ZERO (i_w) (res_ref, mask, SIZE);
+ if (UNION_CHECK (AVX512F_LEN, i_w) (res3, res_ref))
+ abort ();
+}
diff --git a/gcc/testsuite/gcc.target/i386/avx512bw-vpunpcklbw-1.c b/gcc/testsuite/gcc.target/i386/avx512bw-vpunpcklbw-1.c
new file mode 100644
index 00000000000..34ed46d4cc5
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/avx512bw-vpunpcklbw-1.c
@@ -0,0 +1,30 @@
+/* { dg-do compile } */
+/* { dg-options "-mavx512bw -mavx512vl -O2" } */
+/* { dg-final { scan-assembler-times "vpunpcklbw\[ \\t\]+\[^\n\]*%zmm\[0-9\]\[\\n\]" 1 } } */
+/* { dg-final { scan-assembler-times "vpunpcklbw\[ \\t\]+\[^\n\]*%zmm\[0-9\]\{%k\[1-7\]\}\[^\{\]" 1 } } */
+/* { dg-final { scan-assembler-times "vpunpcklbw\[ \\t\]+\[^\n\]*%zmm\[0-9\]\{%k\[1-7\]\}\{z\}" 1 } } */
+/* { dg-final { scan-assembler-times "vpunpcklbw\[ \\t\]+\[^\n\]*%ymm\[0-9\]\{%k\[1-7\]\}\[^\{\]" 1 } } */
+/* { dg-final { scan-assembler-times "vpunpcklbw\[ \\t\]+\[^\n\]*%ymm\[0-9\]\{%k\[1-7\]\}\{z\}" 1 } } */
+/* { dg-final { scan-assembler-times "vpunpcklbw\[ \\t\]+\[^\n\]*%xmm\[0-9\]\{%k\[1-7\]\}\[^\{\]" 1 } } */
+/* { dg-final { scan-assembler-times "vpunpcklbw\[ \\t\]+\[^\n\]*%xmm\[0-9\]\{%k\[1-7\]\}\{z\}" 1 } } */
+
+#include <immintrin.h>
+
+volatile __m512i d, e, f;
+volatile __m256i x, y, z;
+volatile __m128i a, b, c;
+volatile __mmask64 m1;
+volatile __mmask32 m2;
+volatile __mmask16 m3;
+
+void extern
+avx512bw_test (void)
+{
+ d = _mm512_unpacklo_epi8 (e, f);
+ d = _mm512_mask_unpacklo_epi8 (d, m1, e, f);
+ d = _mm512_maskz_unpacklo_epi8 (m1, e, f);
+ x = _mm256_mask_unpacklo_epi8 (x, m2, y, z);
+ x = _mm256_maskz_unpacklo_epi8 (m2, y, z);
+ a = _mm_mask_unpacklo_epi8 (a, m3, b, c);
+ a = _mm_maskz_unpacklo_epi8 (m3, b, c);
+}
diff --git a/gcc/testsuite/gcc.target/i386/avx512bw-vpunpcklbw-2.c b/gcc/testsuite/gcc.target/i386/avx512bw-vpunpcklbw-2.c
new file mode 100644
index 00000000000..aea8839ec6e
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/avx512bw-vpunpcklbw-2.c
@@ -0,0 +1,68 @@
+/* { dg-do run } */
+/* { dg-options "-O2 -mavx512bw -DAVX512BW" } */
+/* { dg-require-effective-target avx512bw } */
+
+#include "avx512f-helper.h"
+
+#define SIZE (AVX512F_LEN / 8)
+#include "avx512f-mask-type.h"
+
+CALC (char *r, char *s1, char *s2)
+{
+ int i;
+ for (i = 0; i < SIZE/16; i++)
+ {
+ r[16 * i] = s1[16 * i];
+ r[16 * i + 1] = s2[16 * i];
+ r[16 * i + 2] = s1[16 * i + 1];
+ r[16 * i + 3] = s2[16 * i + 1];
+ r[16 * i + 4] = s1[16 * i + 2];
+ r[16 * i + 5] = s2[16 * i + 2];
+ r[16 * i + 6] = s1[16 * i + 3];
+ r[16 * i + 7] = s2[16 * i + 3];
+ r[16 * i + 8] = s1[16 * i + 4];
+ r[16 * i + 9] = s2[16 * i + 4];
+ r[16 * i + 10] = s1[16 * i + 5];
+ r[16 * i + 11] = s2[16 * i + 5];
+ r[16 * i + 12] = s1[16 * i + 6];
+ r[16 * i + 13] = s2[16 * i + 6];
+ r[16 * i + 14] = s1[16 * i + 7];
+ r[16 * i + 15] = s2[16 * i + 7];
+ }
+}
+
+void
+TEST (void)
+{
+ int i, sign;
+ UNION_TYPE (AVX512F_LEN, i_b) res1, res2, res3, src1, src2;
+ MASK_TYPE mask = MASK_VALUE;
+ char res_ref[SIZE];
+
+ sign = -1;
+ for (i = 0; i < SIZE; i++)
+ {
+ src1.a[i] = 34 * i * sign;
+ src1.a[i] = 179 * i;
+ sign = sign * -1;
+ }
+ for (i = 0; i < SIZE; i++)
+ res2.a[i] = DEFAULT_VALUE;
+
+ res1.x = INTRINSIC (_unpacklo_epi8) (src1.x, src2.x);
+ res2.x = INTRINSIC (_mask_unpacklo_epi8) (res2.x, mask, src1.x, src2.x);
+ res3.x = INTRINSIC (_maskz_unpacklo_epi8) (mask, src1.x, src2.x);
+
+ CALC (res_ref, src1.a, src2.a);
+
+ if (UNION_CHECK (AVX512F_LEN, i_b) (res1, res_ref))
+ abort ();
+
+ MASK_MERGE (i_b) (res_ref, mask, SIZE);
+ if (UNION_CHECK (AVX512F_LEN, i_b) (res2, res_ref))
+ abort ();
+
+ MASK_ZERO (i_b) (res_ref, mask, SIZE);
+ if (UNION_CHECK (AVX512F_LEN, i_b) (res3, res_ref))
+ abort ();
+}
diff --git a/gcc/testsuite/gcc.target/i386/avx512bw-vpunpcklwd-1.c b/gcc/testsuite/gcc.target/i386/avx512bw-vpunpcklwd-1.c
new file mode 100644
index 00000000000..79963113736
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/avx512bw-vpunpcklwd-1.c
@@ -0,0 +1,30 @@
+/* { dg-do compile } */
+/* { dg-options "-mavx512bw -mavx512vl -O2" } */
+/* { dg-final { scan-assembler-times "vpunpcklwd\[ \\t\]+\[^\n\]*%zmm\[0-9\]\[\\n\]" 1 } } */
+/* { dg-final { scan-assembler-times "vpunpcklwd\[ \\t\]+\[^\n\]*%zmm\[0-9\]\{%k\[1-7\]\}\[^\{\]" 1 } } */
+/* { dg-final { scan-assembler-times "vpunpcklwd\[ \\t\]+\[^\n\]*%zmm\[0-9\]\{%k\[1-7\]\}\{z\}" 1 } } */
+/* { dg-final { scan-assembler-times "vpunpcklwd\[ \\t\]+\[^\n\]*%ymm\[0-9\]\{%k\[1-7\]\}\[^\{\]" 1 } } */
+/* { dg-final { scan-assembler-times "vpunpcklwd\[ \\t\]+\[^\n\]*%ymm\[0-9\]\{%k\[1-7\]\}\{z\}" 1 } } */
+/* { dg-final { scan-assembler-times "vpunpcklwd\[ \\t\]+\[^\n\]*%xmm\[0-9\]\{%k\[1-7\]\}\[^\{\]" 1 } } */
+/* { dg-final { scan-assembler-times "vpunpcklwd\[ \\t\]+\[^\n\]*%xmm\[0-9\]\{%k\[1-7\]\}\{z\}" 1 } } */
+
+#include <immintrin.h>
+
+volatile __m512i d, e, f;
+volatile __m256i x, y, z;
+volatile __m128i a, b, c;
+volatile __mmask32 m1;
+volatile __mmask16 m2;
+volatile __mmask8 m3;
+
+void extern
+avx512bw_test (void)
+{
+ d = _mm512_unpacklo_epi16 (e, f);
+ d = _mm512_mask_unpacklo_epi16 (d, m1, e, f);
+ d = _mm512_maskz_unpacklo_epi16 (m1, e, f);
+ x = _mm256_mask_unpacklo_epi16 (x, m2, y, z);
+ x = _mm256_maskz_unpacklo_epi16 (m2, y, z);
+ a = _mm_mask_unpacklo_epi16 (a, m3, b, c);
+ a = _mm_maskz_unpacklo_epi16 (m3, b, c);
+}
diff --git a/gcc/testsuite/gcc.target/i386/avx512bw-vpunpcklwd-2.c b/gcc/testsuite/gcc.target/i386/avx512bw-vpunpcklwd-2.c
new file mode 100644
index 00000000000..c771bd9bced
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/avx512bw-vpunpcklwd-2.c
@@ -0,0 +1,60 @@
+/* { dg-do run } */
+/* { dg-options "-O2 -mavx512bw -DAVX512BW" } */
+/* { dg-require-effective-target avx512bw } */
+
+#include "avx512f-helper.h"
+
+#define SIZE (AVX512F_LEN / 16)
+#include "avx512f-mask-type.h"
+
+CALC (short *r, short *s1, short *s2)
+{
+ int i;
+ for (i = 0; i < SIZE/8; i++)
+ {
+ r[8 * i] = s1[8 * i];
+ r[8 * i + 1] = s2[8 * i];
+ r[8 * i + 2] = s1[8 * i + 1];
+ r[8 * i + 3] = s2[8 * i + 1];
+ r[8 * i + 4] = s1[8 * i + 2];
+ r[8 * i + 5] = s2[8 * i + 2];
+ r[8 * i + 6] = s1[8 * i + 3];
+ r[8 * i + 7] = s2[8 * i + 3];
+ }
+}
+
+void
+TEST (void)
+{
+ int i, sign;
+ UNION_TYPE (AVX512F_LEN, i_w) res1, res2, res3, src1, src2;
+ MASK_TYPE mask = MASK_VALUE;
+ short res_ref[SIZE];
+
+ sign = -1;
+ for (i = 0; i < SIZE; i++)
+ {
+ src1.a[i] = 34 * i * sign;
+ src1.a[i] = 179 * i;
+ sign = sign * -1;
+ }
+ for (i = 0; i < SIZE; i++)
+ res2.a[i] = DEFAULT_VALUE;
+
+ res1.x = INTRINSIC (_unpacklo_epi16) (src1.x, src2.x);
+ res2.x = INTRINSIC (_mask_unpacklo_epi16) (res2.x, mask, src1.x, src2.x);
+ res3.x = INTRINSIC (_maskz_unpacklo_epi16) (mask, src1.x, src2.x);
+
+ CALC (res_ref, src1.a, src2.a);
+
+ if (UNION_CHECK (AVX512F_LEN, i_w) (res1, res_ref))
+ abort ();
+
+ MASK_MERGE (i_w) (res_ref, mask, SIZE);
+ if (UNION_CHECK (AVX512F_LEN, i_w) (res2, res_ref))
+ abort ();
+
+ MASK_ZERO (i_w) (res_ref, mask, SIZE);
+ if (UNION_CHECK (AVX512F_LEN, i_w) (res3, res_ref))
+ abort ();
+}
diff --git a/gcc/testsuite/gcc.target/i386/avx512dq-check.h b/gcc/testsuite/gcc.target/i386/avx512dq-check.h
new file mode 100644
index 00000000000..e8dcf4b7d9f
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/avx512dq-check.h
@@ -0,0 +1,47 @@
+#include <stdlib.h>
+#include "cpuid.h"
+#include "m512-check.h"
+#include "avx512f-os-support.h"
+
+static void avx512dq_test (void);
+
+static void __attribute__ ((noinline)) do_test (void)
+{
+ avx512dq_test ();
+}
+
+int
+main ()
+{
+ unsigned int eax, ebx, ecx, edx;
+
+ if (!__get_cpuid (1, &eax, &ebx, &ecx, &edx))
+ return 0;
+
+ /* Run AVX512DQ test only if host has AVX512DQ support. */
+ if ((ecx & bit_OSXSAVE) == (bit_OSXSAVE))
+ {
+ if (__get_cpuid_max (0, NULL) < 7)
+ return 0;
+
+ __cpuid_count (7, 0, eax, ebx, ecx, edx);
+
+ if ((avx512f_os_support ()) && ((ebx & bit_AVX512DQ) == bit_AVX512DQ))
+ {
+ do_test ();
+#ifdef DEBUG
+ printf ("PASSED\n");
+#endif
+ return 0;
+ }
+#ifdef DEBUG
+ printf ("SKIPPED\n");
+#endif
+ }
+#ifdef DEBUG
+ else
+ printf ("SKIPPED\n");
+#endif
+
+ return 0;
+}
diff --git a/gcc/testsuite/gcc.target/i386/avx512dq-vandnpd-1.c b/gcc/testsuite/gcc.target/i386/avx512dq-vandnpd-1.c
new file mode 100644
index 00000000000..bb6cf9250e6
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/avx512dq-vandnpd-1.c
@@ -0,0 +1,30 @@
+/* { dg-do compile } */
+/* { dg-options "-mavx512dq -mavx512vl -O2" } */
+/* { dg-final { scan-assembler-times "vandnpd\[ \\t\]+\[^\n\]*%zmm\[0-9\]\[^\{\]" 3 } } */
+/* { dg-final { scan-assembler-times "vandnpd\[ \\t\]+\[^\n\]*%zmm\[0-9\]\{%k\[1-7\]\}\[^\{\]" 1 } } */
+/* { dg-final { scan-assembler-times "vandnpd\[ \\t\]+\[^\n\]*%zmm\[0-9\]\{%k\[1-7\]\}\{z\}" 1 } } */
+/* { dg-final { scan-assembler-times "vandnpd\[ \\t\]+\[^\n\]*%ymm\[0-9\]\[^\{\]" 2 } } */
+/* { dg-final { scan-assembler-times "vandnpd\[ \\t\]+\[^\n\]*%ymm\[0-9\]\{%k\[1-7\]\}\[^\{\]" 1 } } */
+/* { dg-final { scan-assembler-times "vandnpd\[ \\t\]+\[^\n\]*%ymm\[0-9\]\{%k\[1-7\]\}\{z\}" 1 } } */
+/* { dg-final { scan-assembler-times "vandnpd\[ \\t\]+\[^\n\]*%xmm\[0-9\]\[^\{\]" 2 } } */
+/* { dg-final { scan-assembler-times "vandnpd\[ \\t\]+\[^\n\]*%xmm\[0-9\]\{%k\[1-7\]\}\[^\{\]" 1 } } */
+/* { dg-final { scan-assembler-times "vandnpd\[ \\t\]+\[^\n\]*%xmm\[0-9\]\{%k\[1-7\]\}\{z\}" 1 } } */
+
+#include <immintrin.h>
+
+volatile __m512d z;
+volatile __m256d y;
+volatile __m128d x;
+volatile __mmask8 m;
+
+void extern
+avx512dq_test (void)
+{
+ z = _mm512_andnot_pd (z, z);
+ z = _mm512_mask_andnot_pd (z, m, z, z);
+ z = _mm512_maskz_andnot_pd (m, z, z);
+ y = _mm256_mask_andnot_pd (y, m, y, y);
+ y = _mm256_maskz_andnot_pd (m, y, y);
+ x = _mm_mask_andnot_pd (x, m, x, x);
+ x = _mm_maskz_andnot_pd (m, x, x);
+}
diff --git a/gcc/testsuite/gcc.target/i386/avx512dq-vandnpd-2.c b/gcc/testsuite/gcc.target/i386/avx512dq-vandnpd-2.c
new file mode 100644
index 00000000000..88e52e0d891
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/avx512dq-vandnpd-2.c
@@ -0,0 +1,54 @@
+/* { dg-do run } */
+/* { dg-options "-O2 -mavx512dq -DAVX512DQ" } */
+/* { dg-require-effective-target avx512dq } */
+
+#include "avx512f-helper.h"
+
+#define SIZE (AVX512F_LEN / 64)
+#include "avx512f-mask-type.h"
+
+void
+CALC (double *s1, double *s2, double *r)
+{
+ int i;
+ long long tmp;
+
+ for (i = 0; i < SIZE; i++)
+ {
+ tmp = (~(*(long long *) &s1[i])) & (*(long long *) &s2[i]);
+ r[i] = *(double *) &tmp;
+ }
+}
+
+void
+TEST (void)
+{
+ UNION_TYPE (AVX512F_LEN, d) s1, s2, res1, res2, res3;
+ MASK_TYPE mask = MASK_VALUE;
+ double res_ref[SIZE];
+ int i;
+
+ for (i = 0; i < SIZE; i++)
+ {
+ s1.a[i] = 13. * i;
+ s2.a[i] = 17. * i;
+ res2.a[i] = DEFAULT_VALUE;
+ }
+
+ res1.x = INTRINSIC (_andnot_pd) (s1.x, s2.x);
+ res2.x = INTRINSIC (_mask_andnot_pd) (res2.x, mask, s1.x, s2.x);
+ res3.x = INTRINSIC (_maskz_andnot_pd) (mask, s1.x, s2.x);
+
+ CALC (s1.a, s2.a, res_ref);
+
+ if (UNION_CHECK (AVX512F_LEN, d) (res1, res_ref))
+ abort ();
+
+ MASK_MERGE (d) (res_ref, mask, SIZE);
+ if (UNION_CHECK (AVX512F_LEN, d) (res2, res_ref))
+ abort ();
+
+ MASK_ZERO (d) (res_ref, mask, SIZE);
+ if (UNION_CHECK (AVX512F_LEN, d) (res3, res_ref))
+ abort ();
+}
diff --git a/gcc/testsuite/gcc.target/i386/avx512dq-vandnps-1.c b/gcc/testsuite/gcc.target/i386/avx512dq-vandnps-1.c
new file mode 100644
index 00000000000..6a76a2564c3
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/avx512dq-vandnps-1.c
@@ -0,0 +1,31 @@
+/* { dg-do compile } */
+/* { dg-options "-mavx512dq -mavx512vl -O2" } */
+/* { dg-final { scan-assembler-times "vandnps\[ \\t\]+\[^\n\]*%zmm\[0-9\]\[^\{\]" 3 } } */
+/* { dg-final { scan-assembler-times "vandnps\[ \\t\]+\[^\n\]*%zmm\[0-9\]\{%k\[1-7\]\}\[^\{\]" 1 } } */
+/* { dg-final { scan-assembler-times "vandnps\[ \\t\]+\[^\n\]*%zmm\[0-9\]\{%k\[1-7\]\}\{z\}" 1 } } */
+/* { dg-final { scan-assembler-times "vandnps\[ \\t\]+\[^\n\]*%ymm\[0-9\]\[^\{\]" 2 } } */
+/* { dg-final { scan-assembler-times "vandnps\[ \\t\]+\[^\n\]*%ymm\[0-9\]\{%k\[1-7\]\}\[^\{\]" 1 } } */
+/* { dg-final { scan-assembler-times "vandnps\[ \\t\]+\[^\n\]*%ymm\[0-9\]\{%k\[1-7\]\}\{z\}" 1 } } */
+/* { dg-final { scan-assembler-times "vandnps\[ \\t\]+\[^\n\]*%xmm\[0-9\]\[^\{\]" 2 } } */
+/* { dg-final { scan-assembler-times "vandnps\[ \\t\]+\[^\n\]*%xmm\[0-9\]\{%k\[1-7\]\}\[^\{\]" 1 } } */
+/* { dg-final { scan-assembler-times "vandnps\[ \\t\]+\[^\n\]*%xmm\[0-9\]\{%k\[1-7\]\}\{z\}" 1 } } */
+
+#include <immintrin.h>
+
+volatile __m512 z;
+volatile __m256 y;
+volatile __m128 x;
+volatile __mmask16 m1;
+volatile __mmask8 m2;
+
+void extern
+avx512dq_test (void)
+{
+ z = _mm512_andnot_ps (z, z);
+ z = _mm512_mask_andnot_ps (z, m1, z, z);
+ z = _mm512_maskz_andnot_ps (m1, z, z);
+ y = _mm256_mask_andnot_ps (y, m2, y, y);
+ y = _mm256_maskz_andnot_ps (m2, y, y);
+ x = _mm_mask_andnot_ps (x, m2, x, x);
+ x = _mm_maskz_andnot_ps (m2, x, x);
+}
diff --git a/gcc/testsuite/gcc.target/i386/avx512dq-vandnps-2.c b/gcc/testsuite/gcc.target/i386/avx512dq-vandnps-2.c
new file mode 100644
index 00000000000..14df2fbdef2
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/avx512dq-vandnps-2.c
@@ -0,0 +1,54 @@
+/* { dg-do run } */
+/* { dg-options "-O2 -mavx512dq -DAVX512DQ" } */
+/* { dg-require-effective-target avx512dq } */
+
+#include "avx512f-helper.h"
+
+#define SIZE (AVX512F_LEN / 32)
+#include "avx512f-mask-type.h"
+
+void
+CALC (float *s1, float *s2, float *r)
+{
+ int i;
+ int tmp;
+
+ for (i = 0; i < SIZE; i++)
+ {
+ tmp = (~(*(int *) &s1[i])) & (*(int *) &s2[i]);
+ r[i] = *(float *) &tmp;
+ }
+}
+
+void
+TEST (void)
+{
+ UNION_TYPE (AVX512F_LEN, ) s1, s2, res1, res2, res3;
+ MASK_TYPE mask = MASK_VALUE;
+ float res_ref[SIZE];
+ int i;
+
+ for (i = 0; i < SIZE; i++)
+ {
+ s1.a[i] = 13. * i;
+ s2.a[i] = 17. * i;
+ res2.a[i] = DEFAULT_VALUE;
+ }
+
+ res1.x = INTRINSIC (_andnot_ps) (s1.x, s2.x);
+ res2.x = INTRINSIC (_mask_andnot_ps) (res2.x, mask, s1.x, s2.x);
+ res3.x = INTRINSIC (_maskz_andnot_ps) (mask, s1.x, s2.x);
+
+ CALC (s1.a, s2.a, res_ref);
+
+ if (UNION_CHECK (AVX512F_LEN, ) (res1, res_ref))
+ abort ();
+
+ MASK_MERGE () (res_ref, mask, SIZE);
+ if (UNION_CHECK (AVX512F_LEN, ) (res2, res_ref))
+ abort ();
+
+ MASK_ZERO () (res_ref, mask, SIZE);
+ if (UNION_CHECK (AVX512F_LEN, ) (res3, res_ref))
+ abort ();
+}
diff --git a/gcc/testsuite/gcc.target/i386/avx512dq-vandpd-1.c b/gcc/testsuite/gcc.target/i386/avx512dq-vandpd-1.c
new file mode 100644
index 00000000000..212754d425f
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/avx512dq-vandpd-1.c
@@ -0,0 +1,30 @@
+/* { dg-do compile } */
+/* { dg-options "-mavx512dq -mavx512vl -O2" } */
+/* { dg-final { scan-assembler-times "vandpd\[ \\t\]+\[^\n\]*%zmm\[0-9\]\[^\{\]" 3 } } */
+/* { dg-final { scan-assembler-times "vandpd\[ \\t\]+\[^\n\]*%zmm\[0-9\]\{%k\[1-7\]\}\[^\{\]" 1 } } */
+/* { dg-final { scan-assembler-times "vandpd\[ \\t\]+\[^\n\]*%zmm\[0-9\]\{%k\[1-7\]\}\{z\}" 1 } } */
+/* { dg-final { scan-assembler-times "vandpd\[ \\t\]+\[^\n\]*%ymm\[0-9\]\[^\{\]" 2 } } */
+/* { dg-final { scan-assembler-times "vandpd\[ \\t\]+\[^\n\]*%ymm\[0-9\]\{%k\[1-7\]\}\[^\{\]" 1 } } */
+/* { dg-final { scan-assembler-times "vandpd\[ \\t\]+\[^\n\]*%ymm\[0-9\]\{%k\[1-7\]\}\{z\}" 1 } } */
+/* { dg-final { scan-assembler-times "vandpd\[ \\t\]+\[^\n\]*%xmm\[0-9\]\[^\{\]" 2 } } */
+/* { dg-final { scan-assembler-times "vandpd\[ \\t\]+\[^\n\]*%xmm\[0-9\]\{%k\[1-7\]\}\[^\{\]" 1 } } */
+/* { dg-final { scan-assembler-times "vandpd\[ \\t\]+\[^\n\]*%xmm\[0-9\]\{%k\[1-7\]\}\{z\}" 1 } } */
+
+#include <immintrin.h>
+
+volatile __m512d z;
+volatile __m256d y;
+volatile __m128d x;
+volatile __mmask8 m;
+
+void extern
+avx512dq_test (void)
+{
+ z = _mm512_and_pd (z, z);
+ z = _mm512_mask_and_pd (z, m, z, z);
+ z = _mm512_maskz_and_pd (m, z, z);
+ y = _mm256_mask_and_pd (y, m, y, y);
+ y = _mm256_maskz_and_pd (m, y, y);
+ x = _mm_mask_and_pd (x, m, x, x);
+ x = _mm_maskz_and_pd (m, x, x);
+}
diff --git a/gcc/testsuite/gcc.target/i386/avx512dq-vandpd-2.c b/gcc/testsuite/gcc.target/i386/avx512dq-vandpd-2.c
new file mode 100644
index 00000000000..e5a73658b10
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/avx512dq-vandpd-2.c
@@ -0,0 +1,54 @@
+/* { dg-do run } */
+/* { dg-options "-O2 -mavx512dq -DAVX512DQ" } */
+/* { dg-require-effective-target avx512dq } */
+
+#include "avx512f-helper.h"
+
+#define SIZE (AVX512F_LEN / 64)
+#include "avx512f-mask-type.h"
+
+void
+CALC (double *s1, double *s2, double *r)
+{
+ int i;
+ long long tmp;
+
+ for (i = 0; i < SIZE; i++)
+ {
+ tmp = (*(long long *) &s1[i]) & (*(long long *) &s2[i]);
+ r[i] = *(double *) &tmp;
+ }
+}
+
+void
+TEST (void)
+{
+ UNION_TYPE (AVX512F_LEN, d) s1, s2, res1, res2, res3;
+ MASK_TYPE mask = MASK_VALUE;
+ double res_ref[SIZE];
+ int i;
+
+ for (i = 0; i < SIZE; i++)
+ {
+ s1.a[i] = 13. * i;
+ s2.a[i] = 17. * i;
+ res2.a[i] = DEFAULT_VALUE;
+ }
+
+ res1.x = INTRINSIC (_and_pd) (s1.x, s2.x);
+ res2.x = INTRINSIC (_mask_and_pd) (res2.x, mask, s1.x, s2.x);
+ res3.x = INTRINSIC (_maskz_and_pd) (mask, s1.x, s2.x);
+
+ CALC (s1.a, s2.a, res_ref);
+
+ if (UNION_CHECK (AVX512F_LEN, d) (res1, res_ref))
+ abort ();
+
+ MASK_MERGE (d) (res_ref, mask, SIZE);
+ if (UNION_CHECK (AVX512F_LEN, d) (res2, res_ref))
+ abort ();
+
+ MASK_ZERO (d) (res_ref, mask, SIZE);
+ if (UNION_CHECK (AVX512F_LEN, d) (res3, res_ref))
+ abort ();
+}
diff --git a/gcc/testsuite/gcc.target/i386/avx512dq-vandps-1.c b/gcc/testsuite/gcc.target/i386/avx512dq-vandps-1.c
new file mode 100644
index 00000000000..e41a7db041b
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/avx512dq-vandps-1.c
@@ -0,0 +1,31 @@
+/* { dg-do compile } */
+/* { dg-options "-mavx512dq -mavx512vl -O2" } */
+/* { dg-final { scan-assembler-times "vandps\[ \\t\]+\[^\n\]*%zmm\[0-9\]\[^\{\]" 3 } } */
+/* { dg-final { scan-assembler-times "vandps\[ \\t\]+\[^\n\]*%zmm\[0-9\]\{%k\[1-7\]\}\[^\{\]" 1 } } */
+/* { dg-final { scan-assembler-times "vandps\[ \\t\]+\[^\n\]*%zmm\[0-9\]\{%k\[1-7\]\}\{z\}" 1 } } */
+/* { dg-final { scan-assembler-times "vandps\[ \\t\]+\[^\n\]*%ymm\[0-9\]\[^\{\]" 2 } } */
+/* { dg-final { scan-assembler-times "vandps\[ \\t\]+\[^\n\]*%ymm\[0-9\]\{%k\[1-7\]\}\[^\{\]" 1 } } */
+/* { dg-final { scan-assembler-times "vandps\[ \\t\]+\[^\n\]*%ymm\[0-9\]\{%k\[1-7\]\}\{z\}" 1 } } */
+/* { dg-final { scan-assembler-times "vandps\[ \\t\]+\[^\n\]*%xmm\[0-9\]\[^\{\]" 2 } } */
+/* { dg-final { scan-assembler-times "vandps\[ \\t\]+\[^\n\]*%xmm\[0-9\]\{%k\[1-7\]\}\[^\{\]" 1 } } */
+/* { dg-final { scan-assembler-times "vandps\[ \\t\]+\[^\n\]*%xmm\[0-9\]\{%k\[1-7\]\}\{z\}" 1 } } */
+
+#include <immintrin.h>
+
+volatile __m512 z;
+volatile __m256 y;
+volatile __m128 x;
+volatile __mmask16 m1;
+volatile __mmask8 m2;
+
+void extern
+avx512dq_test (void)
+{
+ z = _mm512_and_ps (z, z);
+ z = _mm512_mask_and_ps (z, m1, z, z);
+ z = _mm512_maskz_and_ps (m1, z, z);
+ y = _mm256_mask_and_ps (y, m2, y, y);
+ y = _mm256_maskz_and_ps (m2, y, y);
+ x = _mm_mask_and_ps (x, m2, x, x);
+ x = _mm_maskz_and_ps (m2, x, x);
+}
diff --git a/gcc/testsuite/gcc.target/i386/avx512dq-vandps-2.c b/gcc/testsuite/gcc.target/i386/avx512dq-vandps-2.c
new file mode 100644
index 00000000000..013e1ecc7a7
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/avx512dq-vandps-2.c
@@ -0,0 +1,54 @@
+/* { dg-do run } */
+/* { dg-options "-O2 -mavx512dq -DAVX512DQ" } */
+/* { dg-require-effective-target avx512dq } */
+
+#include "avx512f-helper.h"
+
+#define SIZE (AVX512F_LEN / 32)
+#include "avx512f-mask-type.h"
+
+void
+CALC (float *s1, float *s2, float *r)
+{
+ int i;
+ int tmp;
+
+ for (i = 0; i < SIZE; i++)
+ {
+ tmp = (*(int *) &s1[i]) & (*(int *) &s2[i]);
+ r[i] = *(float *) &tmp;
+ }
+}
+
+void
+TEST (void)
+{
+ UNION_TYPE (AVX512F_LEN, ) s1, s2, res1, res2, res3;
+ MASK_TYPE mask = MASK_VALUE;
+ float res_ref[SIZE];
+ int i;
+
+ for (i = 0; i < SIZE; i++)
+ {
+ s1.a[i] = 13. * i;
+ s2.a[i] = 17. * i;
+ res2.a[i] = DEFAULT_VALUE;
+ }
+
+ res1.x = INTRINSIC (_and_ps) (s1.x, s2.x);
+ res2.x = INTRINSIC (_mask_and_ps) (res2.x, mask, s1.x, s2.x);
+ res3.x = INTRINSIC (_maskz_and_ps) (mask, s1.x, s2.x);
+
+ CALC (s1.a, s2.a, res_ref);
+
+ if (UNION_CHECK (AVX512F_LEN, ) (res1, res_ref))
+ abort ();
+
+ MASK_MERGE () (res_ref, mask, SIZE);
+ if (UNION_CHECK (AVX512F_LEN, ) (res2, res_ref))
+ abort ();
+
+ MASK_ZERO () (res_ref, mask, SIZE);
+ if (UNION_CHECK (AVX512F_LEN, ) (res3, res_ref))
+ abort ();
+}
diff --git a/gcc/testsuite/gcc.target/i386/avx512dq-vbroadcastf32x2-1.c b/gcc/testsuite/gcc.target/i386/avx512dq-vbroadcastf32x2-1.c
new file mode 100644
index 00000000000..6f0072c7770
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/avx512dq-vbroadcastf32x2-1.c
@@ -0,0 +1,27 @@
+/* { dg-do compile } */
+/* { dg-options "-mavx512dq -mavx512vl -O2" } */
+/* { dg-final { scan-assembler-times "vbroadcastf32x2\[ \\t\]+\[^\n\]*%zmm\[0-9\]\[^\{\]" 1 } } */
+/* { dg-final { scan-assembler-times "vbroadcastf32x2\[ \\t\]+\[^\n\]*%zmm\[0-9\]\{%k\[1-7\]\}\[^\{\]" 1 } } */
+/* { dg-final { scan-assembler-times "vbroadcastf32x2\[ \\t\]+\[^\n\]*%zmm\[0-9\]\{%k\[1-7\]\}{z}" 1 } } */
+/* { dg-final { scan-assembler-times "vbroadcastf32x2\[ \\t\]+\[^\n\]*%ymm\[0-9\]\[^\{\]" 1 } } */
+/* { dg-final { scan-assembler-times "vbroadcastf32x2\[ \\t\]+\[^\n\]*%ymm\[0-9\]\{%k\[1-7\]\}\[^\{\]" 1 } } */
+/* { dg-final { scan-assembler-times "vbroadcastf32x2\[ \\t\]+\[^\n\]*%ymm\[0-9\]\{%k\[1-7\]\}{z}" 1 } } */
+
+#include <immintrin.h>
+
+volatile __m512 x;
+volatile __m256 y;
+volatile __m128 z;
+volatile __mmask16 mx;
+volatile __mmask8 my;
+
+void extern
+avx512dq_test (void)
+{
+ x = _mm512_broadcast_f32x2 (z);
+ x = _mm512_mask_broadcast_f32x2 (x, mx, z);
+ x = _mm512_maskz_broadcast_f32x2 (mx, z);
+ y = _mm256_broadcast_f32x2 (z);
+ y = _mm256_mask_broadcast_f32x2 (y, my, z);
+ y = _mm256_maskz_broadcast_f32x2 (my, z);
+}
diff --git a/gcc/testsuite/gcc.target/i386/avx512dq-vbroadcastf32x2-2.c b/gcc/testsuite/gcc.target/i386/avx512dq-vbroadcastf32x2-2.c
new file mode 100644
index 00000000000..aa78be158f6
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/avx512dq-vbroadcastf32x2-2.c
@@ -0,0 +1,53 @@
+/* { dg-do run } */
+/* { dg-options "-O2 -mavx512dq -DAVX512DQ" } */
+/* { dg-require-effective-target avx512dq } */
+
+#include "avx512f-helper.h"
+
+#define SIZE (AVX512F_LEN / 32)
+#include "avx512f-mask-type.h"
+
+CALC (float *r, float *s)
+{
+ int i;
+ for (i = 0; i < SIZE; i++)
+ {
+ r[i] = s[i % 2];
+ }
+}
+
+void
+TEST (void)
+{
+ int i, sign;
+ UNION_TYPE (AVX512F_LEN,) res1, res2, res3;
+ UNION_TYPE (128,) src;
+ MASK_TYPE mask = SIZE | 123;
+ float res_ref[SIZE];
+
+ sign = -1;
+ for (i = 0; i < 4; i++)
+ {
+ src.a[i] = 34.67 * i * sign;
+ sign = sign * -1;
+ }
+ for (i = 0; i < SIZE; i++)
+ res2.a[i] = DEFAULT_VALUE;
+
+ res1.x = INTRINSIC (_broadcast_f32x2) (src.x);
+ res2.x = INTRINSIC (_mask_broadcast_f32x2) (res2.x, mask, src.x);
+ res3.x = INTRINSIC (_maskz_broadcast_f32x2) (mask, src.x);
+
+ CALC (res_ref, src.a);
+
+ if (UNION_CHECK (AVX512F_LEN,) (res1, res_ref))
+ abort ();
+
+ MASK_MERGE ()(res_ref, mask, SIZE);
+ if (UNION_CHECK (AVX512F_LEN,) (res2, res_ref))
+ abort ();
+
+ MASK_ZERO ()(res_ref, mask, SIZE);
+ if (UNION_CHECK (AVX512F_LEN,) (res3, res_ref))
+ abort ();
+}
diff --git a/gcc/testsuite/gcc.target/i386/avx512dq-vbroadcastf32x8-1.c b/gcc/testsuite/gcc.target/i386/avx512dq-vbroadcastf32x8-1.c
new file mode 100644
index 00000000000..d041bba4937
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/avx512dq-vbroadcastf32x8-1.c
@@ -0,0 +1,19 @@
+/* { dg-do compile } */
+/* { dg-options "-mavx512dq -mavx512vl -O2" } */
+/* { dg-final { scan-assembler-times "vbroadcastf32x8\[ \\t\]+\[^\n\]*%zmm\[0-9\]\[^\{\]|vshuff32x4\[ \\t\]+\[^\n\]*%zmm\[0-9\]\[^\n\]*%zmm\[0-9\]\[^\n\]*%zmm\[0-9\]\[^\{\]" 1 } } */
+/* { dg-final { scan-assembler-times "vbroadcastf32x8\[ \\t\]+\[^\n\]*%zmm\[0-9\]\{%k\[1-7\]\}\[^\{\]|vshuff32x4\[ \\t\]+\[^\n\]*%zmm\[0-9\]\{%k\[1-7\]\}\{z\}" 1 } } */
+/* { dg-final { scan-assembler-times "vbroadcastf32x8\[ \\t\]+\[^\n\]*%zmm\[0-9\]\{%k\[1-7\]\}\{z\}|vshuff32x4\[ \\t\]+\[^\n\]*%zmm\[0-9\]\{%k\[1-7\]\}\[^\{\]" 1 } } */
+
+#include <immintrin.h>
+
+volatile __m512 x;
+volatile __m256 y;
+volatile __mmask16 m;
+
+void extern
+avx512dq_test (void)
+{
+ x = _mm512_broadcast_f32x8 (y);
+ x = _mm512_mask_broadcast_f32x8 (x, m, y);
+ x = _mm512_maskz_broadcast_f32x8 (m, y);
+}
diff --git a/gcc/testsuite/gcc.target/i386/avx512dq-vbroadcastf32x8-2.c b/gcc/testsuite/gcc.target/i386/avx512dq-vbroadcastf32x8-2.c
new file mode 100644
index 00000000000..eda3baf6da6
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/avx512dq-vbroadcastf32x8-2.c
@@ -0,0 +1,53 @@
+/* { dg-do run } */
+/* { dg-options "-O2 -mavx512dq -DAVX512DQ" } */
+/* { dg-require-effective-target avx512dq } */
+
+#include "avx512f-helper.h"
+
+#define SIZE (AVX512F_LEN / 32)
+#include "avx512f-mask-type.h"
+
+CALC (float *r, float *s)
+{
+ int i;
+ for (i = 0; i < SIZE; i++)
+ {
+ r[i] = s[i % 8];
+ }
+}
+
+void
+TEST (void)
+{
+ int i, sign;
+ UNION_TYPE (AVX512F_LEN,) res1, res2, res3;
+ UNION_TYPE (256,) src;
+ MASK_TYPE mask = SIZE | 123;
+ float res_ref[SIZE];
+
+ sign = -1;
+ for (i = 0; i < 8; i++)
+ {
+ src.a[i] = 34.67 * i * sign;
+ sign = sign * -1;
+ }
+ for (i = 0; i < SIZE; i++)
+ res2.a[i] = DEFAULT_VALUE;
+
+ res1.x = INTRINSIC (_broadcast_f32x8) (src.x);
+ res2.x = INTRINSIC (_mask_broadcast_f32x8) (res2.x, mask, src.x);
+ res3.x = INTRINSIC (_maskz_broadcast_f32x8) (mask, src.x);
+
+ CALC (res_ref, src.a);
+
+ if (UNION_CHECK (AVX512F_LEN,) (res1, res_ref))
+ abort ();
+
+ MASK_MERGE ()(res_ref, mask, SIZE);
+ if (UNION_CHECK (AVX512F_LEN,) (res2, res_ref))
+ abort ();
+
+ MASK_ZERO ()(res_ref, mask, SIZE);
+ if (UNION_CHECK (AVX512F_LEN,) (res3, res_ref))
+ abort ();
+}
diff --git a/gcc/testsuite/gcc.target/i386/avx512dq-vbroadcastf64x2-1.c b/gcc/testsuite/gcc.target/i386/avx512dq-vbroadcastf64x2-1.c
new file mode 100644
index 00000000000..c240ccee5fd
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/avx512dq-vbroadcastf64x2-1.c
@@ -0,0 +1,26 @@
+/* { dg-do compile } */
+/* { dg-options "-mavx512dq -mavx512vl -O2" } */
+/* { dg-final { scan-assembler-times "vbroadcastf64x2\[ \\t\]+\[^\n\]*%zmm\[0-9\]\[^\{\]|vshuff64x2\[ \\t\]+\[^\n\]*%zmm\[0-9\]\[^\n\]*%zmm\[0-9\]\[^\n\]*%zmm\[0-9\]\[^\{\]" 1 } } */
+/* { dg-final { scan-assembler-times "vbroadcastf64x2\[ \\t\]+\[^\n\]*%zmm\[0-9\]\{%k\[1-7\]\}\[^\{\]|vshuff64x2\[ \\t\]+\[^\n\]*%zmm\[0-9\]\{%k\[1-7\]\}\{z\}" 1 } } */
+/* { dg-final { scan-assembler-times "vbroadcastf64x2\[ \\t\]+\[^\n\]*%zmm\[0-9\]\{%k\[1-7\]\}\{z\}|vshuff64x2\[ \\t\]+\[^\n\]*%zmm\[0-9\]\{%k\[1-7\]\}\[^\{\]" 1 } } */
+/* { dg-final { scan-assembler-times "vbroadcastf64x2\[ \\t\]+\[^\n\]*%ymm\[0-9\]\[^\{\]|vshuff64x2\[ \\t\]+\[^\n\]*%ymm\[0-9\]\[^\n\]*%ymm\[0-9\]\[^\n\]*%ymm\[0-9\]\[^\{\]" 1 } } */
+/* { dg-final { scan-assembler-times "vbroadcastf64x2\[ \\t\]+\[^\n\]*%ymm\[0-9\]\{%k\[1-7\]\}\[^\{\]|vshuff64x2\[ \\t\]+\[^\n\]*%ymm\[0-9\]\{%k\[1-7\]\}\{z\}" 1 } } */
+/* { dg-final { scan-assembler-times "vbroadcastf64x2\[ \\t\]+\[^\n\]*%ymm\[0-9\]\{%k\[1-7\]\}\{z\}|vshuff64x2\[ \\t\]+\[^\n\]*%ymm\[0-9\]\{%k\[1-7\]\}\[^\{\]" 1 } } */
+
+#include <immintrin.h>
+
+volatile __m512d x;
+volatile __m256d y;
+volatile __m128d z;
+volatile __mmask8 m;
+
+void extern
+avx512dq_test (void)
+{
+ x = _mm512_broadcast_f64x2 (z);
+ x = _mm512_mask_broadcast_f64x2 (x, m, z);
+ x = _mm512_maskz_broadcast_f64x2 (m, z);
+ y = _mm256_broadcast_f64x2 (z);
+ y = _mm256_mask_broadcast_f64x2 (y, m, z);
+ y = _mm256_maskz_broadcast_f64x2 (m, z);
+}
diff --git a/gcc/testsuite/gcc.target/i386/avx512dq-vbroadcastf64x2-2.c b/gcc/testsuite/gcc.target/i386/avx512dq-vbroadcastf64x2-2.c
new file mode 100644
index 00000000000..d148fe5750b
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/avx512dq-vbroadcastf64x2-2.c
@@ -0,0 +1,53 @@
+/* { dg-do run } */
+/* { dg-options "-O2 -mavx512dq -DAVX512DQ" } */
+/* { dg-require-effective-target avx512dq } */
+
+#include "avx512f-helper.h"
+
+#define SIZE (AVX512F_LEN / 64)
+#include "avx512f-mask-type.h"
+
+CALC (double *r, double *s)
+{
+ int i;
+ for (i = 0; i < SIZE; i++)
+ {
+ r[i] = s[i % 2];
+ }
+}
+
+void
+TEST (void)
+{
+ int i, sign;
+ UNION_TYPE (AVX512F_LEN, d) res1, res2, res3;
+ UNION_TYPE (128, d) src;
+ MASK_TYPE mask = SIZE | 123;
+ double res_ref[SIZE];
+
+ sign = -1;
+ for (i = 0; i < 2; i++)
+ {
+ src.a[i] = 34.67 * i * sign;
+ sign = sign * -1;
+ }
+ for (i = 0; i < SIZE; i++)
+ res2.a[i] = DEFAULT_VALUE;
+
+ res1.x = INTRINSIC (_broadcast_f64x2) (src.x);
+ res2.x = INTRINSIC (_mask_broadcast_f64x2) (res2.x, mask, src.x);
+ res3.x = INTRINSIC (_maskz_broadcast_f64x2) (mask, src.x);
+
+ CALC (res_ref, src.a);
+
+ if (UNION_CHECK (AVX512F_LEN, d) (res1, res_ref))
+ abort ();
+
+ MASK_MERGE (d) (res_ref, mask, SIZE);
+ if (UNION_CHECK (AVX512F_LEN, d) (res2, res_ref))
+ abort ();
+
+ MASK_ZERO (d) (res_ref, mask, SIZE);
+ if (UNION_CHECK (AVX512F_LEN, d) (res3, res_ref))
+ abort ();
+}
diff --git a/gcc/testsuite/gcc.target/i386/avx512dq-vbroadcasti32x2-1.c b/gcc/testsuite/gcc.target/i386/avx512dq-vbroadcasti32x2-1.c
new file mode 100644
index 00000000000..95cfcbd2521
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/avx512dq-vbroadcasti32x2-1.c
@@ -0,0 +1,34 @@
+/* { dg-do compile } */
+/* { dg-options "-mavx512dq -mavx512vl -O2" } */
+/* { dg-final { scan-assembler-times "vbroadcasti32x2\[ \\t\]+\[^\n\]*%zmm\[0-9\]\[^\{\]" 1 } } */
+/* { dg-final { scan-assembler-times "vbroadcasti32x2\[ \\t\]+\[^\n\]*%zmm\[0-9\]\{%k\[1-7\]\}\[^\{\]" 1 } } */
+/* { dg-final { scan-assembler-times "vbroadcasti32x2\[ \\t\]+\[^\n\]*%zmm\[0-9\]\{%k\[1-7\]\}\{z\}" 1 } } */
+/* { dg-final { scan-assembler-times "vbroadcasti32x2\[ \\t\]+\[^\n\]*%ymm\[0-9\]\[^\{\]" 1 } } */
+/* { dg-final { scan-assembler-times "vbroadcasti32x2\[ \\t\]+\[^\n\]*%ymm\[0-9\]\{%k\[1-7\]\}\[^\{\]" 1 } } */
+/* { dg-final { scan-assembler-times "vbroadcasti32x2\[ \\t\]+\[^\n\]*%ymm\[0-9\]\{%k\[1-7\]\}\{z\}" 1 } } */
+/* { dg-final { scan-assembler-times "vbroadcasti32x2\[ \\t\]+\[^\n\]*%xmm\[0-9\]\[\\n\]" 1 } } */
+/* { dg-final { scan-assembler-times "vbroadcasti32x2\[ \\t\]+\[^\n\]*%xmm\[0-9\]\{%k\[1-7\]\}\[^\{\]" 1 } } */
+/* { dg-final { scan-assembler-times "vbroadcasti32x2\[ \\t\]+\[^\n\]*%xmm\[0-9\]\{%k\[1-7\]\}\{z\}" 1 } } */
+
+#include <immintrin.h>
+
+volatile __m512i x;
+volatile __m256i y;
+volatile __m128i z;
+volatile __mmask16 mx;
+volatile __mmask8 my;
+
+void extern
+avx512dq_test (void)
+{
+ x = _mm512_broadcast_i32x2 (z);
+ x = _mm512_mask_broadcast_i32x2 (x, mx, z);
+ x = _mm512_maskz_broadcast_i32x2 (mx, z);
+ y = _mm256_broadcast_i32x2 (z);
+ y = _mm256_mask_broadcast_i32x2 (y, my, z);
+ y = _mm256_maskz_broadcast_i32x2 (my, z);
+ z = _mm_broadcast_i32x2 (z);
+ z = _mm_mask_broadcast_i32x2 (z, my, z);
+ z = _mm_maskz_broadcast_i32x2 (my, z);
+
+}
diff --git a/gcc/testsuite/gcc.target/i386/avx512dq-vbroadcasti32x2-2.c b/gcc/testsuite/gcc.target/i386/avx512dq-vbroadcasti32x2-2.c
new file mode 100644
index 00000000000..f508d861303
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/avx512dq-vbroadcasti32x2-2.c
@@ -0,0 +1,53 @@
+/* { dg-do run } */
+/* { dg-options "-O2 -mavx512dq -DAVX512DQ" } */
+/* { dg-require-effective-target avx512dq } */
+
+#include "avx512f-helper.h"
+
+#define SIZE (AVX512F_LEN / 32)
+#include "avx512f-mask-type.h"
+
+CALC (int *r, int *s)
+{
+ int i;
+ for (i = 0; i < SIZE; i++)
+ {
+ r[i] = s[i % 2];
+ }
+}
+
+void
+TEST (void)
+{
+ int i, sign;
+ UNION_TYPE (AVX512F_LEN, i_d) res1, res2, res3;
+ UNION_TYPE (128, i_d) src;
+ MASK_TYPE mask = SIZE | 123;
+ int res_ref[SIZE];
+
+ sign = -1;
+ for (i = 0; i < 4; i++)
+ {
+ src.a[i] = 34 * i * sign;
+ sign = sign * -1;
+ }
+ for (i = 0; i < SIZE; i++)
+ res2.a[i] = DEFAULT_VALUE;
+
+ res1.x = INTRINSIC (_broadcast_i32x2) (src.x);
+ res2.x = INTRINSIC (_mask_broadcast_i32x2) (res2.x, mask, src.x);
+ res3.x = INTRINSIC (_maskz_broadcast_i32x2) (mask, src.x);
+
+ CALC (res_ref, src.a);
+
+ if (UNION_CHECK (AVX512F_LEN, i_d) (res1, res_ref))
+ abort ();
+
+ MASK_MERGE (i_d) (res_ref, mask, SIZE);
+ if (UNION_CHECK (AVX512F_LEN, i_d) (res2, res_ref))
+ abort ();
+
+ MASK_ZERO (i_d) (res_ref, mask, SIZE);
+ if (UNION_CHECK (AVX512F_LEN, i_d) (res3, res_ref))
+ abort ();
+}
diff --git a/gcc/testsuite/gcc.target/i386/avx512dq-vbroadcasti32x8-1.c b/gcc/testsuite/gcc.target/i386/avx512dq-vbroadcasti32x8-1.c
new file mode 100644
index 00000000000..b9e05ea2734
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/avx512dq-vbroadcasti32x8-1.c
@@ -0,0 +1,19 @@
+/* { dg-do compile } */
+/* { dg-options "-mavx512dq -mavx512vl -O2" } */
+/* { dg-final { scan-assembler-times "vbroadcasti32x8\[ \\t\]+\[^\n\]*%zmm\[0-9\]\[^\{\]|vshufi32x4\[ \\t\]+\[^\n\]*%zmm\[0-9\]\[^\n\]*%zmm\[0-9\]\[^\n\]*%zmm\[0-9\]\[^\{\]" 1 } } */
+/* { dg-final { scan-assembler-times "vbroadcasti32x8\[ \\t\]+\[^\n\]*%zmm\[0-9\]\{%k\[1-7\]\}\[^\{\]|vshufi32x4\[ \\t\]+\[^\n\]*%zmm\[0-9\]\{%k\[1-7\]\}\{z\}" 1 } } */
+/* { dg-final { scan-assembler-times "vbroadcasti32x8\[ \\t\]+\[^\n\]*%zmm\[0-9\]\{%k\[1-7\]\}\{z\}|vshufi32x4\[ \\t\]+\[^\n\]*%zmm\[0-9\]\{%k\[1-7\]\}\[^\{\]" 1 } } */
+
+#include <immintrin.h>
+
+volatile __m512i x;
+volatile __m256i y;
+volatile __mmask16 m;
+
+void extern
+avx512dq_test (void)
+{
+ x = _mm512_broadcast_i32x8 (y);
+ x = _mm512_mask_broadcast_i32x8 (x, m, y);
+ x = _mm512_maskz_broadcast_i32x8 (m, y);
+}
diff --git a/gcc/testsuite/gcc.target/i386/avx512dq-vbroadcasti32x8-2.c b/gcc/testsuite/gcc.target/i386/avx512dq-vbroadcasti32x8-2.c
new file mode 100644
index 00000000000..bbed9aeaed3
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/avx512dq-vbroadcasti32x8-2.c
@@ -0,0 +1,53 @@
+/* { dg-do run } */
+/* { dg-options "-O2 -mavx512dq -DAVX512DQ" } */
+/* { dg-require-effective-target avx512dq } */
+
+#include "avx512f-helper.h"
+
+#define SIZE (AVX512F_LEN / 32)
+#include "avx512f-mask-type.h"
+
+CALC (int *r, int *s)
+{
+ int i;
+ for (i = 0; i < SIZE; i++)
+ {
+ r[i] = s[i % 8];
+ }
+}
+
+void
+TEST (void)
+{
+ int i, sign;
+ UNION_TYPE (AVX512F_LEN, i_d) res1, res2, res3;
+ UNION_TYPE (256, i_d) src;
+ MASK_TYPE mask = SIZE | 123;
+ int res_ref[SIZE];
+
+ sign = -1;
+ for (i = 0; i < 8; i++)
+ {
+ src.a[i] = 34 * i * sign;
+ sign = sign * -1;
+ }
+ for (i = 0; i < SIZE; i++)
+ res2.a[i] = DEFAULT_VALUE;
+
+ res1.x = INTRINSIC (_broadcast_i32x8) (src.x);
+ res2.x = INTRINSIC (_mask_broadcast_i32x8) (res2.x, mask, src.x);
+ res3.x = INTRINSIC (_maskz_broadcast_i32x8) (mask, src.x);
+
+ CALC (res_ref, src.a);
+
+ if (UNION_CHECK (AVX512F_LEN, i_d) (res1, res_ref))
+ abort ();
+
+ MASK_MERGE (i_d) (res_ref, mask, SIZE);
+ if (UNION_CHECK (AVX512F_LEN, i_d) (res2, res_ref))
+ abort ();
+
+ MASK_ZERO (i_d) (res_ref, mask, SIZE);
+ if (UNION_CHECK (AVX512F_LEN, i_d) (res3, res_ref))
+ abort ();
+}
diff --git a/gcc/testsuite/gcc.target/i386/avx512dq-vbroadcasti64x2-1.c b/gcc/testsuite/gcc.target/i386/avx512dq-vbroadcasti64x2-1.c
new file mode 100644
index 00000000000..7dd332dafcf
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/avx512dq-vbroadcasti64x2-1.c
@@ -0,0 +1,26 @@
+/* { dg-do compile } */
+/* { dg-options "-mavx512dq -mavx512vl -O2" } */
+/* { dg-final { scan-assembler-times "vbroadcasti64x2\[ \\t\]+\[^\n\]*%zmm\[0-9\]\[^\{\]|vshufi64x2\[ \\t\]+\[^\n\]*%zmm\[0-9\]\[^\n\]*%zmm\[0-9\]\[^\n\]*%zmm\[0-9\]\[^\{\]" 1 } } */
+/* { dg-final { scan-assembler-times "vbroadcasti64x2\[ \\t\]+\[^\n\]*%zmm\[0-9\]\{%k\[1-7\]\}\[^\{\]|vshufi64x2\[ \\t\]+\[^\n\]*%zmm\[0-9\]\{%k\[1-7\]\}\{z\}" 1 } } */
+/* { dg-final { scan-assembler-times "vbroadcasti64x2\[ \\t\]+\[^\n\]*%zmm\[0-9\]\{%k\[1-7\]\}\{z\}|vshufi64x2\[ \\t\]+\[^\n\]*%zmm\[0-9\]\{%k\[1-7\]\}\[^\{\]" 1 } } */
+/* { dg-final { scan-assembler-times "vbroadcasti64x2\[ \\t\]+\[^\n\]*%ymm\[0-9\]\[^\{\]|vshufi64x2\[ \\t\]+\[^\n\]*%ymm\[0-9\]\[^\n\]*%ymm\[0-9\]\[^\n\]*%ymm\[0-9\]\[^\{\]" 1 } } */
+/* { dg-final { scan-assembler-times "vbroadcasti64x2\[ \\t\]+\[^\n\]*%ymm\[0-9\]\{%k\[1-7\]\}\[^\{\]|vshufi64x2\[ \\t\]+\[^\n\]*%ymm\[0-9\]\{%k\[1-7\]\}\{z\}" 1 } } */
+/* { dg-final { scan-assembler-times "vbroadcasti64x2\[ \\t\]+\[^\n\]*%ymm\[0-9\]\{%k\[1-7\]\}\{z\}|vshufi64x2\[ \\t\]+\[^\n\]*%ymm\[0-9\]\{%k\[1-7\]\}\[^\{\]" 1 } } */
+
+#include <immintrin.h>
+
+volatile __m512i x;
+volatile __m256i y;
+volatile __m128i z;
+volatile __mmask8 m;
+
+void extern
+avx512dq_test (void)
+{
+ x = _mm512_broadcast_i64x2 (z);
+ x = _mm512_mask_broadcast_i64x2 (x, m, z);
+ x = _mm512_maskz_broadcast_i64x2 (m, z);
+ y = _mm256_broadcast_i64x2 (z);
+ y = _mm256_mask_broadcast_i64x2 (y, m, z);
+ y = _mm256_maskz_broadcast_i64x2 (m, z);
+}
diff --git a/gcc/testsuite/gcc.target/i386/avx512dq-vbroadcasti64x2-2.c b/gcc/testsuite/gcc.target/i386/avx512dq-vbroadcasti64x2-2.c
new file mode 100644
index 00000000000..ca560d9b515
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/avx512dq-vbroadcasti64x2-2.c
@@ -0,0 +1,53 @@
+/* { dg-do run } */
+/* { dg-options "-O2 -mavx512dq -DAVX512DQ" } */
+/* { dg-require-effective-target avx512dq } */
+
+#include "avx512f-helper.h"
+
+#define SIZE (AVX512F_LEN / 64)
+#include "avx512f-mask-type.h"
+
+CALC (long long *r, long long *s)
+{
+ int i;
+ for (i = 0; i < SIZE; i++)
+ {
+ r[i] = s[i % 2];
+ }
+}
+
+void
+TEST (void)
+{
+ int i, sign;
+ UNION_TYPE (AVX512F_LEN, i_q) res1, res2, res3;
+ UNION_TYPE (128, i_q) src;
+ MASK_TYPE mask = SIZE | 123;
+ long long res_ref[SIZE];
+
+ sign = -1;
+ for (i = 0; i < 2; i++)
+ {
+ src.a[i] = 34 * i * sign;
+ sign = sign * -1;
+ }
+ for (i = 0; i < SIZE; i++)
+ res2.a[i] = DEFAULT_VALUE;
+
+ res1.x = INTRINSIC (_broadcast_i64x2) (src.x);
+ res2.x = INTRINSIC (_mask_broadcast_i64x2) (res2.x, mask, src.x);
+ res3.x = INTRINSIC (_maskz_broadcast_i64x2) (mask, src.x);
+
+ CALC (res_ref, src.a);
+
+ if (UNION_CHECK (AVX512F_LEN, i_q) (res1, res_ref))
+ abort ();
+
+ MASK_MERGE (i_q) (res_ref, mask, SIZE);
+ if (UNION_CHECK (AVX512F_LEN, i_q) (res2, res_ref))
+ abort ();
+
+ MASK_ZERO (i_q) (res_ref, mask, SIZE);
+ if (UNION_CHECK (AVX512F_LEN, i_q) (res3, res_ref))
+ abort ();
+}
diff --git a/gcc/testsuite/gcc.target/i386/avx512dq-vcvtpd2qq-1.c b/gcc/testsuite/gcc.target/i386/avx512dq-vcvtpd2qq-1.c
new file mode 100644
index 00000000000..16f2c6c2dcf
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/avx512dq-vcvtpd2qq-1.c
@@ -0,0 +1,44 @@
+/* { dg-do compile } */
+/* { dg-options "-mavx512dq -mavx512vl -O2" } */
+/* { dg-final { scan-assembler-times "vcvtpd2qq\[ \\t\]+\[^\n\]*%zmm\[0-9\]\[^\{\]" 6 } } */
+/* { dg-final { scan-assembler-times "vcvtpd2qq\[ \\t\]+\[^\n\]*%ymm\[0-9\]\[^\{\]" 3 } } */
+/* { dg-final { scan-assembler-times "vcvtpd2qq\[ \\t\]+\[^\n\]*%xmm\[0-9\]\[^\{\]" 3 } } */
+/* { dg-final { scan-assembler-times "vcvtpd2qq\[ \\t\]+\[^\n\]*%zmm\[0-9\]\{%k\[1-7\]\}\[^\{\]" 2 } } */
+/* { dg-final { scan-assembler-times "vcvtpd2qq\[ \\t\]+\[^\n\]*%ymm\[0-9\]\{%k\[1-7\]\}\[^\{\]" 1 } } */
+/* { dg-final { scan-assembler-times "vcvtpd2qq\[ \\t\]+\[^\n\]*%xmm\[0-9\]\{%k\[1-7\]\}\[^\{\]" 1 } } */
+/* { dg-final { scan-assembler-times "vcvtpd2qq\[ \\t\]+\[^\n\]*%zmm\[0-9\]\{%k\[1-7\]\}\{z\}" 2 } } */
+/* { dg-final { scan-assembler-times "vcvtpd2qq\[ \\t\]+\[^\n\]*%ymm\[0-9\]\{%k\[1-7\]\}\{z\}" 1 } } */
+/* { dg-final { scan-assembler-times "vcvtpd2qq\[ \\t\]+\[^\n\]*%xmm\[0-9\]\{%k\[1-7\]\}\{z\}" 1 } } */
+/* { dg-final { scan-assembler-times "vcvtpd2qq\[ \\t\]+\[^\n\]*\{rn-sae\}\[^\n\]*%zmm\[0-9\]" 1 } } */
+/* { dg-final { scan-assembler-times "vcvtpd2qq\[ \\t\]+\[^\n\]*\{ru-sae\}\[^\n\]*%zmm\[0-9\]\{%k\[1-7\]\}\[^\{\]" 1 } } */
+/* { dg-final { scan-assembler-times "vcvtpd2qq\[ \\t\]+\[^\n\]*\{rz-sae\}\[^\n\]*%zmm\[0-9\]\{%k\[1-7\]\}\{z\}" 1 } } */
+
+#include <immintrin.h>
+
+volatile __m512d s1;
+volatile __m256d s2;
+volatile __m128d s3;
+volatile __m512i res1;
+volatile __m256i res2;
+volatile __m128i res3;
+volatile __mmask8 m;
+
+void extern
+avx512dq_test (void)
+{
+ res1 = _mm512_cvtpd_epi64 (s1);
+ res2 = _mm256_cvtpd_epi64 (s2);
+ res3 = _mm_cvtpd_epi64 (s3);
+
+ res1 = _mm512_mask_cvtpd_epi64 (res1, m, s1);
+ res2 = _mm256_mask_cvtpd_epi64 (res2, m, s2);
+ res3 = _mm_mask_cvtpd_epi64 (res3, m, s3);
+
+ res1 = _mm512_maskz_cvtpd_epi64 (m, s1);
+ res2 = _mm256_maskz_cvtpd_epi64 (m, s2);
+ res3 = _mm_maskz_cvtpd_epi64 (m, s3);
+
+ res1 = _mm512_cvt_roundpd_epi64 (s1, _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC);
+ res1 = _mm512_mask_cvt_roundpd_epi64 (res1, m, s1, _MM_FROUND_TO_POS_INF | _MM_FROUND_NO_EXC);
+ res1 = _mm512_maskz_cvt_roundpd_epi64 (m, s1, _MM_FROUND_TO_ZERO | _MM_FROUND_NO_EXC);
+}
diff --git a/gcc/testsuite/gcc.target/i386/avx512dq-vcvtpd2qq-2.c b/gcc/testsuite/gcc.target/i386/avx512dq-vcvtpd2qq-2.c
new file mode 100644
index 00000000000..0e30bfe99cb
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/avx512dq-vcvtpd2qq-2.c
@@ -0,0 +1,54 @@
+/* { dg-do run } */
+/* { dg-options "-O2 -mavx512dq -DAVX512DQ" } */
+/* { dg-require-effective-target avx512dq } */
+
+#include "avx512f-helper.h"
+
+#define SIZE (AVX512F_LEN / 64)
+#include "avx512f-mask-type.h"
+
+void
+CALC (double *s, long long *r)
+{
+ int i;
+
+ for (i = 0; i < SIZE; i++)
+ {
+ r[i] = (s[i] >= 0) ? (long long) (s[i] + 0.5)
+ : (long long) (s[i] - 0.5);
+ }
+}
+
+void
+TEST (void)
+{
+ UNION_TYPE (AVX512F_LEN, d) s;
+ UNION_TYPE (AVX512F_LEN, i_q) res1, res2, res3;
+ MASK_TYPE mask = MASK_VALUE;
+ long long res_ref[SIZE];
+ int i, sign = 1;
+
+ for (i = 0; i < SIZE; i++)
+ {
+ s.a[i] = 123.456 * (i + 2000) * sign;
+ res2.a[i] = DEFAULT_VALUE;
+ sign = -sign;
+ }
+
+ res1.x = INTRINSIC (_cvtpd_epi64) (s.x);
+ res2.x = INTRINSIC (_mask_cvtpd_epi64) (res2.x, mask, s.x);
+ res3.x = INTRINSIC (_maskz_cvtpd_epi64) (mask, s.x);
+
+ CALC (s.a, res_ref);
+
+ if (UNION_CHECK (AVX512F_LEN, i_q) (res1, res_ref))
+ abort ();
+
+ MASK_MERGE (i_q) (res_ref, mask, SIZE);
+ if (UNION_CHECK (AVX512F_LEN, i_q) (res2, res_ref))
+ abort ();
+
+ MASK_ZERO (i_q) (res_ref, mask, SIZE);
+ if (UNION_CHECK (AVX512F_LEN, i_q) (res3, res_ref))
+ abort ();
+}
diff --git a/gcc/testsuite/gcc.target/i386/avx512dq-vcvtpd2uqq-1.c b/gcc/testsuite/gcc.target/i386/avx512dq-vcvtpd2uqq-1.c
new file mode 100644
index 00000000000..c53e41d8bef
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/avx512dq-vcvtpd2uqq-1.c
@@ -0,0 +1,44 @@
+/* { dg-do compile } */
+/* { dg-options "-mavx512dq -mavx512vl -O2" } */
+/* { dg-final { scan-assembler-times "vcvtpd2uqq\[ \\t\]+\[^\n\]*%zmm\[0-9\]\[^\{\]" 6 } } */
+/* { dg-final { scan-assembler-times "vcvtpd2uqq\[ \\t\]+\[^\n\]*%ymm\[0-9\]\[^\{\]" 3 } } */
+/* { dg-final { scan-assembler-times "vcvtpd2uqq\[ \\t\]+\[^\n\]*%xmm\[0-9\]\[^\{\]" 3 } } */
+/* { dg-final { scan-assembler-times "vcvtpd2uqq\[ \\t\]+\[^\n\]*%zmm\[0-9\]\{%k\[1-7\]\}\[^\{\]" 2 } } */
+/* { dg-final { scan-assembler-times "vcvtpd2uqq\[ \\t\]+\[^\n\]*%ymm\[0-9\]\{%k\[1-7\]\}\[^\{\]" 1 } } */
+/* { dg-final { scan-assembler-times "vcvtpd2uqq\[ \\t\]+\[^\n\]*%xmm\[0-9\]\{%k\[1-7\]\}\[^\{\]" 1 } } */
+/* { dg-final { scan-assembler-times "vcvtpd2uqq\[ \\t\]+\[^\n\]*%zmm\[0-9\]\{%k\[1-7\]\}\{z\}" 2 } } */
+/* { dg-final { scan-assembler-times "vcvtpd2uqq\[ \\t\]+\[^\n\]*%ymm\[0-9\]\{%k\[1-7\]\}\{z\}" 1 } } */
+/* { dg-final { scan-assembler-times "vcvtpd2uqq\[ \\t\]+\[^\n\]*%xmm\[0-9\]\{%k\[1-7\]\}\{z\}" 1 } } */
+/* { dg-final { scan-assembler-times "vcvtpd2uqq\[ \\t\]+\[^\n\]*\{rn-sae\}\[^\n\]*%zmm\[0-9\]" 1 } } */
+/* { dg-final { scan-assembler-times "vcvtpd2uqq\[ \\t\]+\[^\n\]*\{rd-sae\}\[^\n\]*%zmm\[0-9\]\{%k\[1-7\]\}\[^\{\]" 1 } } */
+/* { dg-final { scan-assembler-times "vcvtpd2uqq\[ \\t\]+\[^\n\]*\{rz-sae\}\[^\n\]*%zmm\[0-9\]\{%k\[1-7\]\}\{z\}" 1 } } */
+
+#include <immintrin.h>
+
+volatile __m512d s1;
+volatile __m256d s2;
+volatile __m128d s3;
+volatile __m512i res1;
+volatile __m256i res2;
+volatile __m128i res3;
+volatile __mmask8 m;
+
+void extern
+avx512dq_test (void)
+{
+ res1 = _mm512_cvtpd_epu64 (s1);
+ res2 = _mm256_cvtpd_epu64 (s2);
+ res3 = _mm_cvtpd_epu64 (s3);
+
+ res1 = _mm512_mask_cvtpd_epu64 (res1, m, s1);
+ res2 = _mm256_mask_cvtpd_epu64 (res2, m, s2);
+ res3 = _mm_mask_cvtpd_epu64 (res3, m, s3);
+
+ res1 = _mm512_maskz_cvtpd_epu64 (m, s1);
+ res2 = _mm256_maskz_cvtpd_epu64 (m, s2);
+ res3 = _mm_maskz_cvtpd_epu64 (m, s3);
+
+ res1 = _mm512_cvt_roundpd_epu64 (s1, _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC);
+ res1 = _mm512_mask_cvt_roundpd_epu64 (res1, m, s1, _MM_FROUND_TO_NEG_INF | _MM_FROUND_NO_EXC);
+ res1 = _mm512_maskz_cvt_roundpd_epu64 (m, s1, _MM_FROUND_TO_ZERO | _MM_FROUND_NO_EXC);
+}
diff --git a/gcc/testsuite/gcc.target/i386/avx512dq-vcvtpd2uqq-2.c b/gcc/testsuite/gcc.target/i386/avx512dq-vcvtpd2uqq-2.c
new file mode 100644
index 00000000000..de1462841b3
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/avx512dq-vcvtpd2uqq-2.c
@@ -0,0 +1,52 @@
+/* { dg-do run } */
+/* { dg-options "-O2 -mavx512dq -DAVX512DQ" } */
+/* { dg-require-effective-target avx512dq } */
+
+#include "avx512f-helper.h"
+
+#define SIZE (AVX512F_LEN / 64)
+#include "avx512f-mask-type.h"
+
+void
+CALC (double *s, unsigned long long *r)
+{
+ int i;
+
+ for (i = 0; i < SIZE; i++)
+ {
+ r[i] = (unsigned long long) (s[i] + 0.5);
+ }
+}
+
+void
+TEST (void)
+{
+ UNION_TYPE (AVX512F_LEN, d) s;
+ UNION_TYPE (AVX512F_LEN, i_q) res1, res2, res3;
+ MASK_TYPE mask = MASK_VALUE;
+ unsigned long long res_ref[SIZE];
+ int i;
+
+ for (i = 0; i < SIZE; i++)
+ {
+ s.a[i] = 123.456 * (i + 2000);
+ res2.a[i] = DEFAULT_VALUE;
+ }
+
+ res1.x = INTRINSIC (_cvtpd_epu64) (s.x);
+ res2.x = INTRINSIC (_mask_cvtpd_epu64) (res2.x, mask, s.x);
+ res3.x = INTRINSIC (_maskz_cvtpd_epu64) (mask, s.x);
+
+ CALC (s.a, res_ref);
+
+ if (UNION_CHECK (AVX512F_LEN, i_q) (res1, res_ref))
+ abort ();
+
+ MASK_MERGE (i_q) (res_ref, mask, SIZE);
+ if (UNION_CHECK (AVX512F_LEN, i_q) (res2, res_ref))
+ abort ();
+
+ MASK_ZERO (i_q) (res_ref, mask, SIZE);
+ if (UNION_CHECK (AVX512F_LEN, i_q) (res3, res_ref))
+ abort ();
+}
diff --git a/gcc/testsuite/gcc.target/i386/avx512dq-vcvtps2qq-1.c b/gcc/testsuite/gcc.target/i386/avx512dq-vcvtps2qq-1.c
new file mode 100644
index 00000000000..60a631f0fb4
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/avx512dq-vcvtps2qq-1.c
@@ -0,0 +1,40 @@
+/* { dg-do compile } */
+/* { dg-options "-mavx512dq -mavx512vl -O2" } */
+/* { dg-final { scan-assembler-times "vcvtps2qq\[ \\t\]+\[^\n\]*%xmm\[0-9\]\[^\n\]*%xmm\[0-9\]\[^\{\]" 1 } } */
+/* { dg-final { scan-assembler-times "vcvtps2qq\[ \\t\]+\[^\n\]*%xmm\[0-9\]\[^\n\]*%xmm\[0-9\]\{%k\[1-7\]\}\[^\{\]" 1 } } */
+/* { dg-final { scan-assembler-times "vcvtps2qq\[ \\t\]+\[^\n\]*%xmm\[0-9\]\[^\n\]*%xmm\[0-9\]\{%k\[1-7\]\}\{z\}" 1 } } */
+/* { dg-final { scan-assembler-times "vcvtps2qq\[ \\t\]+\[^\n\]*%xmm\[0-9\]\[^\n\]*%ymm\[0-9\]\[^\{\]" 1 } } */
+/* { dg-final { scan-assembler-times "vcvtps2qq\[ \\t\]+\[^\n\]*%xmm\[0-9\]\[^\n\]*%ymm\[0-9\]\{%k\[1-7\]\}\[^\{\]" 1 } } */
+/* { dg-final { scan-assembler-times "vcvtps2qq\[ \\t\]+\[^\n\]*%xmm\[0-9\]\[^\n\]*%ymm\[0-9\]\{%k\[1-7\]\}\{z\}" 1 } } */
+/* { dg-final { scan-assembler-times "vcvtps2qq\[ \\t\]+\[^\n\]*%ymm\[0-9\]\[^\n\]*%zmm\[0-9\]\[^\{\]" 2 } } */
+/* { dg-final { scan-assembler-times "vcvtps2qq\[ \\t\]+\[^\n\]*%ymm\[0-9\]\[^\n\]*%zmm\[0-9\]\{%k\[1-7\]\}\[^\{\]" 2 } } */
+/* { dg-final { scan-assembler-times "vcvtps2qq\[ \\t\]+\[^\n\]*%ymm\[0-9\]\[^\n\]*%zmm\[0-9\]\{%k\[1-7\]\}\{z\}" 2 } } */
+/* { dg-final { scan-assembler-times "vcvtps2qq\[ \\t\]+\[^\n\]*\{rn-sae\}\[^\n\]*%zmm\[0-9\]" 1 } } */
+/* { dg-final { scan-assembler-times "vcvtps2qq\[ \\t\]+\[^\n\]*\{ru-sae\}\[^\n\]*%zmm\[0-9\]\{%k\[1-7\]\}\[^\{\]" 1 } } */
+/* { dg-final { scan-assembler-times "vcvtps2qq\[ \\t\]+\[^\n\]*\{rz-sae\}\[^\n\]*%zmm\[0-9\]\{%k\[1-7\]\}\{z\}" 1 } } */
+
+#include <immintrin.h>
+
+volatile __m512i x1;
+volatile __m256i x2;
+volatile __m128i x3;
+volatile __m256 z1;
+volatile __m128 z2;
+volatile __mmask8 m;
+
+void extern
+avx512dq_test (void)
+{
+ x1 = _mm512_cvtps_epi64 (z1);
+ x1 = _mm512_mask_cvtps_epi64 (x1, m, z1);
+ x1 = _mm512_maskz_cvtps_epi64 (m, z1);
+ x2 = _mm256_cvtps_epi64 (z2);
+ x2 = _mm256_mask_cvtps_epi64 (x2, m, z2);
+ x2 = _mm256_maskz_cvtps_epi64 (m, z2);
+ x3 = _mm_cvtps_epi64 (z2);
+ x3 = _mm_mask_cvtps_epi64 (x3, m, z2);
+ x3 = _mm_maskz_cvtps_epi64 (m, z2);
+ x1 = _mm512_cvt_roundps_epi64 (z1, _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC);
+ x1 = _mm512_mask_cvt_roundps_epi64 (x1, m, z1, _MM_FROUND_TO_POS_INF | _MM_FROUND_NO_EXC);
+ x1 = _mm512_maskz_cvt_roundps_epi64 (m, z1, _MM_FROUND_TO_ZERO | _MM_FROUND_NO_EXC);
+}
diff --git a/gcc/testsuite/gcc.target/i386/avx512dq-vcvtps2qq-2.c b/gcc/testsuite/gcc.target/i386/avx512dq-vcvtps2qq-2.c
new file mode 100644
index 00000000000..7fef656625d
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/avx512dq-vcvtps2qq-2.c
@@ -0,0 +1,50 @@
+/* { dg-do run } */
+/* { dg-options "-O2 -mavx512dq -DAVX512DQ" } */
+/* { dg-require-effective-target avx512dq } */
+
+#include "avx512f-helper.h"
+
+#define SIZE (AVX512F_LEN / 64)
+#include "avx512f-mask-type.h"
+
+CALC (long long *r, float *s)
+{
+ int i;
+ for (i = 0; i < SIZE; i++)
+ r[i] = (long long) (s[i] + ((s[i] >= 0) ? 0.5 : -0.5));
+}
+
+void
+TEST (void)
+{
+ int i, sign;
+ UNION_TYPE (AVX512F_LEN, i_q) res1, res2, res3;
+ UNION_TYPE (AVX512F_LEN_HALF,) src;
+ MASK_TYPE mask = MASK_VALUE;
+ long long res_ref[SIZE];
+
+ sign = -1;
+ for (i = 0; i < SIZE; i++)
+ {
+ res2.a[i] = DEFAULT_VALUE;
+ src.a[i] = 1.5 + 34.67 * i * sign;
+ sign = sign * -1;
+ }
+
+ res1.x = INTRINSIC (_cvtps_epi64) (src.x);
+ res2.x = INTRINSIC (_mask_cvtps_epi64) (res2.x, mask, src.x);
+ res3.x = INTRINSIC (_maskz_cvtps_epi64) (mask, src.x);
+
+ CALC (res_ref, src.a);
+
+ if (UNION_CHECK (AVX512F_LEN, i_q) (res1, res_ref))
+ abort ();
+
+ MASK_MERGE (i_q) (res_ref, mask, SIZE);
+ if (UNION_CHECK (AVX512F_LEN, i_q) (res2, res_ref))
+ abort ();
+
+ MASK_ZERO (i_q) (res_ref, mask, SIZE);
+ if (UNION_CHECK (AVX512F_LEN, i_q) (res3, res_ref))
+ abort ();
+}
diff --git a/gcc/testsuite/gcc.target/i386/avx512dq-vcvtps2uqq-1.c b/gcc/testsuite/gcc.target/i386/avx512dq-vcvtps2uqq-1.c
new file mode 100644
index 00000000000..4f41c4ab647
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/avx512dq-vcvtps2uqq-1.c
@@ -0,0 +1,40 @@
+/* { dg-do compile } */
+/* { dg-options "-mavx512dq -mavx512vl -O2" } */
+/* { dg-final { scan-assembler-times "vcvtps2uqq\[ \\t\]+\[^\n\]*%xmm\[0-9\]\[^\n\]*%xmm\[0-9\]\[^\{\]" 1 } } */
+/* { dg-final { scan-assembler-times "vcvtps2uqq\[ \\t\]+\[^\n\]*%xmm\[0-9\]\[^\n\]*%xmm\[0-9\]\{%k\[1-7\]\}\[^\{\]" 1 } } */
+/* { dg-final { scan-assembler-times "vcvtps2uqq\[ \\t\]+\[^\n\]*%xmm\[0-9\]\[^\n\]*%xmm\[0-9\]\{%k\[1-7\]\}\{z\}" 1 } } */
+/* { dg-final { scan-assembler-times "vcvtps2uqq\[ \\t\]+\[^\n\]*%xmm\[0-9\]\[^\n\]*%ymm\[0-9\]\[^\{\]" 1 } } */
+/* { dg-final { scan-assembler-times "vcvtps2uqq\[ \\t\]+\[^\n\]*%xmm\[0-9\]\[^\n\]*%ymm\[0-9\]\{%k\[1-7\]\}\[^\{\]" 1 } } */
+/* { dg-final { scan-assembler-times "vcvtps2uqq\[ \\t\]+\[^\n\]*%xmm\[0-9\]\[^\n\]*%ymm\[0-9\]\{%k\[1-7\]\}\{z\}" 1 } } */
+/* { dg-final { scan-assembler-times "vcvtps2uqq\[ \\t\]+\[^\n\]*%ymm\[0-9\]\[^\n\]*%zmm\[0-9\]\[^\{\]" 2 } } */
+/* { dg-final { scan-assembler-times "vcvtps2uqq\[ \\t\]+\[^\n\]*%ymm\[0-9\]\[^\n\]*%zmm\[0-9\]\{%k\[1-7\]\}\[^\{\]" 2 } } */
+/* { dg-final { scan-assembler-times "vcvtps2uqq\[ \\t\]+\[^\n\]*%ymm\[0-9\]\[^\n\]*%zmm\[0-9\]\{%k\[1-7\]\}\{z\}" 2 } } */
+/* { dg-final { scan-assembler-times "vcvtps2uqq\[ \\t\]+\[^\n\]*\{rn-sae\}\[^\n\]*%zmm\[0-9\]" 1 } } */
+/* { dg-final { scan-assembler-times "vcvtps2uqq\[ \\t\]+\[^\n\]*\{rd-sae\}\[^\n\]*%zmm\[0-9\]\{%k\[1-7\]\}\[^\{\]" 1 } } */
+/* { dg-final { scan-assembler-times "vcvtps2uqq\[ \\t\]+\[^\n\]*\{rz-sae\}\[^\n\]*%zmm\[0-9\]\{%k\[1-7\]\}\{z\}" 1 } } */
+
+#include <immintrin.h>
+
+volatile __m512i x1;
+volatile __m256i x2;
+volatile __m128i x3;
+volatile __m256 z1;
+volatile __m128 z2;
+volatile __mmask8 m;
+
+void extern
+avx512dq_test (void)
+{
+ x1 = _mm512_cvtps_epu64 (z1);
+ x1 = _mm512_mask_cvtps_epu64 (x1, m, z1);
+ x1 = _mm512_maskz_cvtps_epu64 (m, z1);
+ x2 = _mm256_cvtps_epu64 (z2);
+ x2 = _mm256_mask_cvtps_epu64 (x2, m, z2);
+ x2 = _mm256_maskz_cvtps_epu64 (m, z2);
+ x3 = _mm_cvtps_epu64 (z2);
+ x3 = _mm_mask_cvtps_epu64 (x3, m, z2);
+ x3 = _mm_maskz_cvtps_epu64 (m, z2);
+ x1 = _mm512_cvt_roundps_epu64 (z1, _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC);
+ x1 = _mm512_mask_cvt_roundps_epu64 (x1, m, z1, _MM_FROUND_TO_NEG_INF | _MM_FROUND_NO_EXC);
+ x1 = _mm512_maskz_cvt_roundps_epu64 (m, z1, _MM_FROUND_TO_ZERO | _MM_FROUND_NO_EXC);
+}
diff --git a/gcc/testsuite/gcc.target/i386/avx512dq-vcvtps2uqq-2.c b/gcc/testsuite/gcc.target/i386/avx512dq-vcvtps2uqq-2.c
new file mode 100644
index 00000000000..ca341ef1bdf
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/avx512dq-vcvtps2uqq-2.c
@@ -0,0 +1,48 @@
+/* { dg-do run } */
+/* { dg-options "-O2 -mavx512dq -DAVX512DQ" } */
+/* { dg-require-effective-target avx512dq } */
+
+#include "avx512f-helper.h"
+
+#define SIZE (AVX512F_LEN / 64)
+#include "avx512f-mask-type.h"
+
+CALC (unsigned long long *r, float *s)
+{
+ int i;
+ for (i = 0; i < SIZE; i++)
+ r[i] = (unsigned long long) (s[i] + ((s[i] >= 0) ? 0.5 : -0.5));
+}
+
+void
+TEST (void)
+{
+ int i;
+ UNION_TYPE (AVX512F_LEN, i_q) res1, res2, res3;
+ UNION_TYPE (AVX512F_LEN_HALF,) src;
+ MASK_TYPE mask = MASK_VALUE;
+ unsigned long long res_ref[SIZE];
+
+ for (i = 0; i < SIZE; i++)
+ {
+ res2.a[i] = DEFAULT_VALUE;
+ src.a[i] = 1.5 + 34.67 * i;
+ }
+
+ res1.x = INTRINSIC (_cvtps_epu64) (src.x);
+ res2.x = INTRINSIC (_mask_cvtps_epu64) (res2.x, mask, src.x);
+ res3.x = INTRINSIC (_maskz_cvtps_epu64) (mask, src.x);
+
+ CALC (res_ref, src.a);
+
+ if (UNION_CHECK (AVX512F_LEN, i_q) (res1, res_ref))
+ abort ();
+
+ MASK_MERGE (i_q) (res_ref, mask, SIZE);
+ if (UNION_CHECK (AVX512F_LEN, i_q) (res2, res_ref))
+ abort ();
+
+ MASK_ZERO (i_q) (res_ref, mask, SIZE);
+ if (UNION_CHECK (AVX512F_LEN, i_q) (res3, res_ref))
+ abort ();
+}
diff --git a/gcc/testsuite/gcc.target/i386/avx512dq-vcvtqq2pd-1.c b/gcc/testsuite/gcc.target/i386/avx512dq-vcvtqq2pd-1.c
new file mode 100644
index 00000000000..7ad246c384d
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/avx512dq-vcvtqq2pd-1.c
@@ -0,0 +1,41 @@
+/* { dg-do compile } */
+/* { dg-options "-mavx512dq -mavx512vl -O2" } */
+/* { dg-final { scan-assembler-times "vcvtqq2pd\[ \\t\]+\[^\n\]*%xmm\[0-9\]\[^\n\]*%xmm\[0-9\]\[^\{\]" 1 } } */
+/* { dg-final { scan-assembler-times "vcvtqq2pd\[ \\t\]+\[^\n\]*%xmm\[0-9\]\[^\n\]*%xmm\[0-9\]\{%k\[1-7\]\}\[^\{\]" 1 } } */
+/* { dg-final { scan-assembler-times "vcvtqq2pd\[ \\t\]+\[^\n\]*%xmm\[0-9\]\[^\n\]*%xmm\[0-9\]\{%k\[1-7\]\}\{z\}" 1 } } */
+/* { dg-final { scan-assembler-times "vcvtqq2pd\[ \\t\]+\[^\n\]*%ymm\[0-9\]\[^\n\]*%ymm\[0-9\]\[^\{\]" 1 } } */
+/* { dg-final { scan-assembler-times "vcvtqq2pd\[ \\t\]+\[^\n\]*%ymm\[0-9\]\[^\n\]*%ymm\[0-9\]\{%k\[1-7\]\}\[^\{\]" 1 } } */
+/* { dg-final { scan-assembler-times "vcvtqq2pd\[ \\t\]+\[^\n\]*%ymm\[0-9\]\[^\n\]*%ymm\[0-9\]\{%k\[1-7\]\}\{z\}" 1 } } */
+/* { dg-final { scan-assembler-times "vcvtqq2pd\[ \\t\]+\[^\n\]*%zmm\[0-9\]\[^\n\]*%zmm\[0-9\]\[^\{\]" 2 } } */
+/* { dg-final { scan-assembler-times "vcvtqq2pd\[ \\t\]+\[^\n\]*%zmm\[0-9\]\[^\n\]*%zmm\[0-9\]\{%k\[1-7\]\}\[^\{\]" 2 } } */
+/* { dg-final { scan-assembler-times "vcvtqq2pd\[ \\t\]+\[^\n\]*%zmm\[0-9\]\[^\n\]*%zmm\[0-9\]\{%k\[1-7\]\}\{z\}" 2 } } */
+/* { dg-final { scan-assembler-times "vcvtqq2pd\[ \\t\]+\[^\n\]*\{rn-sae\}\[^\n\]*%zmm\[0-9\]\[^\n\]*%zmm\[0-9\]\[^\{\]" 1 } } */
+/* { dg-final { scan-assembler-times "vcvtqq2pd\[ \\t\]+\[^\n\]*\{rd-sae\}\[^\n\]*%zmm\[0-9\]\[^\n\]*%zmm\[0-9\]\{%k\[1-7\]\}\[^\{\]" 1 } } */
+/* { dg-final { scan-assembler-times "vcvtqq2pd\[ \\t\]+\[^\n\]*\{rz-sae\}\[^\n\]*%zmm\[0-9\]\[^\n\]*%zmm\[0-9\]\{%k\[1-7\]\}\{z\}" 1 } } */
+
+#include <immintrin.h>
+
+volatile __m512i s1;
+volatile __m256i s2;
+volatile __m128i s3;
+volatile __m512d res1;
+volatile __m256d res2;
+volatile __m128d res3;
+volatile __mmask8 m;
+
+void extern
+avx512dq_test (void)
+{
+ res1 = _mm512_cvtepi64_pd (s1);
+ res1 = _mm512_mask_cvtepi64_pd (res1, m, s1);
+ res1 = _mm512_maskz_cvtepi64_pd (m, s1);
+ res2 = _mm256_cvtepi64_pd (s2);
+ res2 = _mm256_mask_cvtepi64_pd (res2, m, s2);
+ res2 = _mm256_maskz_cvtepi64_pd (m, s2);
+ res3 = _mm_cvtepi64_pd (s3);
+ res3 = _mm_mask_cvtepi64_pd (res3, m, s3);
+ res3 = _mm_maskz_cvtepi64_pd (m, s3);
+ res1 = _mm512_cvt_roundepi64_pd (s1, _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC);
+ res1 = _mm512_mask_cvt_roundepi64_pd (res1, m, s1, _MM_FROUND_TO_NEG_INF | _MM_FROUND_NO_EXC);
+ res1 = _mm512_maskz_cvt_roundepi64_pd (m, s1, _MM_FROUND_TO_ZERO | _MM_FROUND_NO_EXC);
+}
diff --git a/gcc/testsuite/gcc.target/i386/avx512dq-vcvtqq2pd-2.c b/gcc/testsuite/gcc.target/i386/avx512dq-vcvtqq2pd-2.c
new file mode 100644
index 00000000000..7143415198b
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/avx512dq-vcvtqq2pd-2.c
@@ -0,0 +1,51 @@
+/* { dg-do run } */
+/* { dg-options "-O2 -mavx512dq -DAVX512DQ" } */
+/* { dg-require-effective-target avx512dq } */
+
+#include "avx512f-helper.h"
+
+#define SIZE (AVX512F_LEN / 64)
+#include "avx512f-mask-type.h"
+
+void
+CALC (long long *s, double *r)
+{
+ int i;
+
+ for (i = 0; i < SIZE; i++)
+ r[i] = (double) s[i];
+}
+
+void
+TEST (void)
+{
+ UNION_TYPE (AVX512F_LEN, i_q) s;
+ UNION_TYPE (AVX512F_LEN, d) res1, res2, res3;
+ MASK_TYPE mask = MASK_VALUE;
+ double res_ref[SIZE];
+ int i, sign = 1;
+
+ for (i = 0; i < SIZE; i++)
+ {
+ s.a[i] = 123 * (i + 2000) * sign;
+ res2.a[i] = DEFAULT_VALUE;
+ sign = -sign;
+ }
+
+ res1.x = INTRINSIC (_cvtepi64_pd) (s.x);
+ res2.x = INTRINSIC (_mask_cvtepi64_pd) (res2.x, mask, s.x);
+ res3.x = INTRINSIC (_maskz_cvtepi64_pd) (mask, s.x);
+
+ CALC (s.a, res_ref);
+
+ if (UNION_CHECK (AVX512F_LEN, d) (res1, res_ref))
+ abort ();
+
+ MASK_MERGE (d) (res_ref, mask, SIZE);
+ if (UNION_CHECK (AVX512F_LEN, d) (res2, res_ref))
+ abort ();
+
+ MASK_ZERO (d) (res_ref, mask, SIZE);
+ if (UNION_CHECK (AVX512F_LEN, d) (res3, res_ref))
+ abort ();
+}
diff --git a/gcc/testsuite/gcc.target/i386/avx512dq-vcvtqq2ps-1.c b/gcc/testsuite/gcc.target/i386/avx512dq-vcvtqq2ps-1.c
new file mode 100644
index 00000000000..8007299490b
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/avx512dq-vcvtqq2ps-1.c
@@ -0,0 +1,40 @@
+/* { dg-do compile } */
+/* { dg-options "-mavx512dq -mavx512vl -O2" } */
+/* { dg-final { scan-assembler-times "vcvtqq2psx\[ \\t\]+\[^\n\]*%xmm\[0-9\]\[^\n\]*%xmm\[0-9\]\[^\{\]" 1 } } */
+/* { dg-final { scan-assembler-times "vcvtqq2psx\[ \\t\]+\[^\n\]*%xmm\[0-9\]\[^\n\]*%xmm\[0-9\]\{%k\[1-7\]\}\[^\{\]" 1 } } */
+/* { dg-final { scan-assembler-times "vcvtqq2psx\[ \\t\]+\[^\n\]*%xmm\[0-9\]\[^\n\]*%xmm\[0-9\]\{%k\[1-7\]\}\{z\}" 1 } } */
+/* { dg-final { scan-assembler-times "vcvtqq2psy\[ \\t\]+\[^\n\]*%ymm\[0-9\]\[^\n\]*%xmm\[0-9\]\[^\{\]" 1 } } */
+/* { dg-final { scan-assembler-times "vcvtqq2psy\[ \\t\]+\[^\n\]*%ymm\[0-9\]\[^\n\]*%xmm\[0-9\]\{%k\[1-7\]\}\[^\{\]" 1 } } */
+/* { dg-final { scan-assembler-times "vcvtqq2psy\[ \\t\]+\[^\n\]*%ymm\[0-9\]\[^\n\]*%xmm\[0-9\]\{%k\[1-7\]\}\{z\}" 1 } } */
+/* { dg-final { scan-assembler-times "vcvtqq2ps\[ \\t\]+\[^\n\]*%zmm\[0-9\]\[^\n\]*%ymm\[0-9\]\[^\{\]" 2 } } */
+/* { dg-final { scan-assembler-times "vcvtqq2ps\[ \\t\]+\[^\n\]*%zmm\[0-9\]\[^\n\]*%ymm\[0-9\]\{%k\[1-7\]\}\[^\{\]" 2 } } */
+/* { dg-final { scan-assembler-times "vcvtqq2ps\[ \\t\]+\[^\n\]*%zmm\[0-9\]\[^\n\]*%ymm\[0-9\]\{%k\[1-7\]\}\{z\}" 2 } } */
+/* { dg-final { scan-assembler-times "vcvtqq2ps\[ \\t\]+\[^\n\]*\{rn-sae\}\[^\n\]*%zmm\[0-9\]\[^\n\]*%ymm\[0-9\]\[^\{\]" 1 } } */
+/* { dg-final { scan-assembler-times "vcvtqq2ps\[ \\t\]+\[^\n\]*\{rd-sae\}\[^\n\]*%zmm\[0-9\]\[^\n\]*%ymm\[0-9\]\{%k\[1-7\]\}\[^\{\]" 1 } } */
+/* { dg-final { scan-assembler-times "vcvtqq2ps\[ \\t\]+\[^\n\]*\{rz-sae\}\[^\n\]*%zmm\[0-9\]\[^\n\]*%ymm\[0-9\]\{%k\[1-7\]\}\{z\}" 1 } } */
+
+#include <immintrin.h>
+
+volatile __m512i s1;
+volatile __m256i s2;
+volatile __m128i s3;
+volatile __m256 res1;
+volatile __m128 res2;
+volatile __mmask8 m;
+
+void extern
+avx512dq_test (void)
+{
+ res1 = _mm512_cvtepi64_ps (s1);
+ res1 = _mm512_mask_cvtepi64_ps (res1, m, s1);
+ res1 = _mm512_maskz_cvtepi64_ps (m, s1);
+ res2 = _mm256_cvtepi64_ps (s2);
+ res2 = _mm256_mask_cvtepi64_ps (res2, m, s2);
+ res2 = _mm256_maskz_cvtepi64_ps (m, s2);
+ res2 = _mm_cvtepi64_ps (s3);
+ res2 = _mm_mask_cvtepi64_ps (res2, m, s3);
+ res2 = _mm_maskz_cvtepi64_ps (m, s3);
+ res1 = _mm512_cvt_roundepi64_ps (s1, _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC);
+ res1 = _mm512_mask_cvt_roundepi64_ps (res1, m, s1, _MM_FROUND_TO_NEG_INF | _MM_FROUND_NO_EXC);
+ res1 = _mm512_maskz_cvt_roundepi64_ps (m, s1, _MM_FROUND_TO_ZERO | _MM_FROUND_NO_EXC);
+}
diff --git a/gcc/testsuite/gcc.target/i386/avx512dq-vcvtqq2ps-2.c b/gcc/testsuite/gcc.target/i386/avx512dq-vcvtqq2ps-2.c
new file mode 100644
index 00000000000..751c086f6ff
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/avx512dq-vcvtqq2ps-2.c
@@ -0,0 +1,52 @@
+/* { dg-do run } */
+/* { dg-options "-O2 -mavx512dq -DAVX512DQ" } */
+/* { dg-require-effective-target avx512dq } */
+
+#include "avx512f-helper.h"
+
+#define SIZE (AVX512F_LEN / 64)
+#define SIZE_HALF (AVX512F_LEN_HALF / 32)
+#include "avx512f-mask-type.h"
+
+void
+CALC (long long *s, float *r)
+{
+ int i;
+
+ for (i = 0; i < SIZE_HALF; i++)
+ r[i] = (i < SIZE) ? (float) s[i] : 0;
+}
+
+void
+TEST (void)
+{
+ UNION_TYPE (AVX512F_LEN, i_q) s;
+ UNION_TYPE (AVX512F_LEN_HALF,) res1, res2, res3;
+ MASK_TYPE mask = MASK_VALUE;
+ float res_ref[SIZE_HALF];
+ int i, sign = 1;
+
+ for (i = 0; i < SIZE; i++)
+ {
+ s.a[i] = 123 * (i + 2000) * sign;
+ res2.a[i] = DEFAULT_VALUE;
+ sign = -sign;
+ }
+
+ res1.x = INTRINSIC (_cvtepi64_ps) (s.x);
+ res2.x = INTRINSIC (_mask_cvtepi64_ps) (res2.x, mask, s.x);
+ res3.x = INTRINSIC (_maskz_cvtepi64_ps) (mask, s.x);
+
+ CALC (s.a, res_ref);
+
+ if (UNION_CHECK (AVX512F_LEN_HALF,) (res1, res_ref))
+ abort ();
+
+ MASK_MERGE ()(res_ref, mask, SIZE);
+ if (UNION_CHECK (AVX512F_LEN_HALF,) (res2, res_ref))
+ abort ();
+
+ MASK_ZERO ()(res_ref, mask, SIZE);
+ if (UNION_CHECK (AVX512F_LEN_HALF,) (res3, res_ref))
+ abort ();
+}
diff --git a/gcc/testsuite/gcc.target/i386/avx512dq-vcvttpd2qq-1.c b/gcc/testsuite/gcc.target/i386/avx512dq-vcvttpd2qq-1.c
new file mode 100644
index 00000000000..ec4ccf9f23a
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/avx512dq-vcvttpd2qq-1.c
@@ -0,0 +1,44 @@
+/* { dg-do compile } */
+/* { dg-options "-mavx512dq -mavx512vl -O2" } */
+/* { dg-final { scan-assembler-times "vcvttpd2qq\[ \\t\]+\[^\n\]*%zmm\[0-9\]\[^\{\]" 6 } } */
+/* { dg-final { scan-assembler-times "vcvttpd2qq\[ \\t\]+\[^\n\]*%ymm\[0-9\]\[^\{\]" 3 } } */
+/* { dg-final { scan-assembler-times "vcvttpd2qq\[ \\t\]+\[^\n\]*%xmm\[0-9\]\[^\{\]" 3 } } */
+/* { dg-final { scan-assembler-times "vcvttpd2qq\[ \\t\]+\[^\n\]*%zmm\[0-9\]\{%k\[1-7\]\}\[^\{\]" 2 } } */
+/* { dg-final { scan-assembler-times "vcvttpd2qq\[ \\t\]+\[^\n\]*%ymm\[0-9\]\{%k\[1-7\]\}\[^\{\]" 1 } } */
+/* { dg-final { scan-assembler-times "vcvttpd2qq\[ \\t\]+\[^\n\]*%xmm\[0-9\]\{%k\[1-7\]\}\[^\{\]" 1 } } */
+/* { dg-final { scan-assembler-times "vcvttpd2qq\[ \\t\]+\[^\n\]*%zmm\[0-9\]\{%k\[1-7\]\}\{z\}" 2 } } */
+/* { dg-final { scan-assembler-times "vcvttpd2qq\[ \\t\]+\[^\n\]*%ymm\[0-9\]\{%k\[1-7\]\}\{z\}" 1 } } */
+/* { dg-final { scan-assembler-times "vcvttpd2qq\[ \\t\]+\[^\n\]*%xmm\[0-9\]\{%k\[1-7\]\}\{z\}" 1 } } */
+/* { dg-final { scan-assembler-times "vcvttpd2qq\[ \\t\]+\[^\n\]*\{sae\}\[^\n\]*%zmm\[0-9\]\[^\{\]" 3 } } */
+/* { dg-final { scan-assembler-times "vcvttpd2qq\[ \\t\]+\[^\n\]*\{sae\}\[^\n\]*%zmm\[0-9\]\{%k\[1-7\]\}\[^\{\]" 1 } } */
+/* { dg-final { scan-assembler-times "vcvttpd2qq\[ \\t\]+\[^\n\]*\{sae\}\[^\n\]*%zmm\[0-9\]\{%k\[1-7\]\}\{z\}" 1 } } */
+
+#include <immintrin.h>
+
+volatile __m512d s1;
+volatile __m256d s2;
+volatile __m128d s3;
+volatile __m512i res1;
+volatile __m256i res2;
+volatile __m128i res3;
+volatile __mmask8 m;
+
+void extern
+avx512dq_test (void)
+{
+ res1 = _mm512_cvttpd_epi64 (s1);
+ res2 = _mm256_cvttpd_epi64 (s2);
+ res3 = _mm_cvttpd_epi64 (s3);
+
+ res1 = _mm512_mask_cvttpd_epi64 (res1, m, s1);
+ res2 = _mm256_mask_cvttpd_epi64 (res2, m, s2);
+ res3 = _mm_mask_cvttpd_epi64 (res3, m, s3);
+
+ res1 = _mm512_maskz_cvttpd_epi64 (m, s1);
+ res2 = _mm256_maskz_cvttpd_epi64 (m, s2);
+ res3 = _mm_maskz_cvttpd_epi64 (m, s3);
+
+ res1 = _mm512_cvtt_roundpd_epi64 (s1, _MM_FROUND_NO_EXC);
+ res1 = _mm512_mask_cvtt_roundpd_epi64 (res1, m, s1, _MM_FROUND_NO_EXC);
+ res1 = _mm512_maskz_cvtt_roundpd_epi64 (m, s1, _MM_FROUND_NO_EXC);
+}
diff --git a/gcc/testsuite/gcc.target/i386/avx512dq-vcvttpd2qq-2.c b/gcc/testsuite/gcc.target/i386/avx512dq-vcvttpd2qq-2.c
new file mode 100644
index 00000000000..6b338223cbc
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/avx512dq-vcvttpd2qq-2.c
@@ -0,0 +1,53 @@
+/* { dg-do run } */
+/* { dg-options "-O2 -mavx512dq -DAVX512DQ" } */
+/* { dg-require-effective-target avx512dq } */
+
+#include "avx512f-helper.h"
+
+#define SIZE (AVX512F_LEN / 64)
+#include "avx512f-mask-type.h"
+
+void
+CALC (double *s, long long *r)
+{
+ int i;
+
+ for (i = 0; i < SIZE; i++)
+ {
+ r[i] = (long long) s[i];
+ }
+}
+
+void
+TEST (void)
+{
+ UNION_TYPE (AVX512F_LEN, d) s;
+ UNION_TYPE (AVX512F_LEN, i_q) res1, res2, res3;
+ MASK_TYPE mask = MASK_VALUE;
+ long long res_ref[SIZE];
+ int i, sign = 1;
+
+ for (i = 0; i < SIZE; i++)
+ {
+ s.a[i] = 123.456 * (i + 2000) * sign;
+ res2.a[i] = DEFAULT_VALUE;
+ sign = -sign;
+ }
+
+ res1.x = INTRINSIC (_cvttpd_epi64) (s.x);
+ res2.x = INTRINSIC (_mask_cvttpd_epi64) (res2.x, mask, s.x);
+ res3.x = INTRINSIC (_maskz_cvttpd_epi64) (mask, s.x);
+
+ CALC (s.a, res_ref);
+
+ if (UNION_CHECK (AVX512F_LEN, i_q) (res1, res_ref))
+ abort ();
+
+ MASK_MERGE (i_q) (res_ref, mask, SIZE);
+ if (UNION_CHECK (AVX512F_LEN, i_q) (res2, res_ref))
+ abort ();
+
+ MASK_ZERO (i_q) (res_ref, mask, SIZE);
+ if (UNION_CHECK (AVX512F_LEN, i_q) (res3, res_ref))
+ abort ();
+}
diff --git a/gcc/testsuite/gcc.target/i386/avx512dq-vcvttpd2uqq-1.c b/gcc/testsuite/gcc.target/i386/avx512dq-vcvttpd2uqq-1.c
new file mode 100644
index 00000000000..a4ceec9a8e8
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/avx512dq-vcvttpd2uqq-1.c
@@ -0,0 +1,44 @@
+/* { dg-do compile } */
+/* { dg-options "-mavx512dq -mavx512vl -O2" } */
+/* { dg-final { scan-assembler-times "vcvttpd2uqq\[ \\t\]+\[^\n\]*%zmm\[0-9\]\[^\{\]" 6 } } */
+/* { dg-final { scan-assembler-times "vcvttpd2uqq\[ \\t\]+\[^\n\]*%ymm\[0-9\]\[^\{\]" 3 } } */
+/* { dg-final { scan-assembler-times "vcvttpd2uqq\[ \\t\]+\[^\n\]*%xmm\[0-9\]\[^\{\]" 3 } } */
+/* { dg-final { scan-assembler-times "vcvttpd2uqq\[ \\t\]+\[^\n\]*%zmm\[0-9\]\{%k\[1-7\]\}\[^\{\]" 2 } } */
+/* { dg-final { scan-assembler-times "vcvttpd2uqq\[ \\t\]+\[^\n\]*%ymm\[0-9\]\{%k\[1-7\]\}\[^\{\]" 1 } } */
+/* { dg-final { scan-assembler-times "vcvttpd2uqq\[ \\t\]+\[^\n\]*%xmm\[0-9\]\{%k\[1-7\]\}\[^\{\]" 1 } } */
+/* { dg-final { scan-assembler-times "vcvttpd2uqq\[ \\t\]+\[^\n\]*%zmm\[0-9\]\{%k\[1-7\]\}\{z\}" 2 } } */
+/* { dg-final { scan-assembler-times "vcvttpd2uqq\[ \\t\]+\[^\n\]*%ymm\[0-9\]\{%k\[1-7\]\}\{z\}" 1 } } */
+/* { dg-final { scan-assembler-times "vcvttpd2uqq\[ \\t\]+\[^\n\]*%xmm\[0-9\]\{%k\[1-7\]\}\{z\}" 1 } } */
+/* { dg-final { scan-assembler-times "vcvttpd2uqq\[ \\t\]+\[^\n\]*\{sae\}\[^\n\]*%zmm\[0-9\]\[^\{\]" 3 } } */
+/* { dg-final { scan-assembler-times "vcvttpd2uqq\[ \\t\]+\[^\n\]*\{sae\}\[^\n\]*%zmm\[0-9\]\{%k\[1-7\]\}\[^\{\]" 1 } } */
+/* { dg-final { scan-assembler-times "vcvttpd2uqq\[ \\t\]+\[^\n\]*\{sae\}\[^\n\]*%zmm\[0-9\]\{%k\[1-7\]\}\{z\}" 1 } } */
+
+#include <immintrin.h>
+
+volatile __m512d s1;
+volatile __m256d s2;
+volatile __m128d s3;
+volatile __m512i res1;
+volatile __m256i res2;
+volatile __m128i res3;
+volatile __mmask8 m;
+
+void extern
+avx512dq_test (void)
+{
+ res1 = _mm512_cvttpd_epu64 (s1);
+ res2 = _mm256_cvttpd_epu64 (s2);
+ res3 = _mm_cvttpd_epu64 (s3);
+
+ res1 = _mm512_mask_cvttpd_epu64 (res1, m, s1);
+ res2 = _mm256_mask_cvttpd_epu64 (res2, m, s2);
+ res3 = _mm_mask_cvttpd_epu64 (res3, m, s3);
+
+ res1 = _mm512_maskz_cvttpd_epu64 (m, s1);
+ res2 = _mm256_maskz_cvttpd_epu64 (m, s2);
+ res3 = _mm_maskz_cvttpd_epu64 (m, s3);
+
+ res1 = _mm512_cvtt_roundpd_epu64 (s1, _MM_FROUND_NO_EXC);
+ res1 = _mm512_mask_cvtt_roundpd_epu64 (res1, m, s1, _MM_FROUND_NO_EXC);
+ res1 = _mm512_maskz_cvtt_roundpd_epu64 (m, s1, _MM_FROUND_NO_EXC);
+}
diff --git a/gcc/testsuite/gcc.target/i386/avx512dq-vcvttpd2uqq-2.c b/gcc/testsuite/gcc.target/i386/avx512dq-vcvttpd2uqq-2.c
new file mode 100644
index 00000000000..39f450c9ea0
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/avx512dq-vcvttpd2uqq-2.c
@@ -0,0 +1,52 @@
+/* { dg-do run } */
+/* { dg-options "-O2 -mavx512dq -DAVX512DQ" } */
+/* { dg-require-effective-target avx512dq } */
+
+#include "avx512f-helper.h"
+
+#define SIZE (AVX512F_LEN / 64)
+#include "avx512f-mask-type.h"
+
+void
+CALC (double *s, unsigned long long *r)
+{
+ int i;
+
+ for (i = 0; i < SIZE; i++)
+ {
+ r[i] = (unsigned long long) s[i];
+ }
+}
+
+void
+TEST (void)
+{
+ UNION_TYPE (AVX512F_LEN, d) s;
+ UNION_TYPE (AVX512F_LEN, i_q) res1, res2, res3;
+ MASK_TYPE mask = MASK_VALUE;
+ unsigned long long res_ref[SIZE];
+ int i;
+
+ for (i = 0; i < SIZE; i++)
+ {
+ s.a[i] = 123.456 * (i + 2000);
+ res2.a[i] = DEFAULT_VALUE;
+ }
+
+ res1.x = INTRINSIC (_cvttpd_epu64) (s.x);
+ res2.x = INTRINSIC (_mask_cvttpd_epu64) (res2.x, mask, s.x);
+ res3.x = INTRINSIC (_maskz_cvttpd_epu64) (mask, s.x);
+
+ CALC (s.a, res_ref);
+
+ if (UNION_CHECK (AVX512F_LEN, i_q) (res1, res_ref))
+ abort ();
+
+ MASK_MERGE (i_q) (res_ref, mask, SIZE);
+ if (UNION_CHECK (AVX512F_LEN, i_q) (res2, res_ref))
+ abort ();
+
+ MASK_ZERO (i_q) (res_ref, mask, SIZE);
+ if (UNION_CHECK (AVX512F_LEN, i_q) (res3, res_ref))
+ abort ();
+}
diff --git a/gcc/testsuite/gcc.target/i386/avx512dq-vcvttps2qq-1.c b/gcc/testsuite/gcc.target/i386/avx512dq-vcvttps2qq-1.c
new file mode 100644
index 00000000000..dd3b451f25e
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/avx512dq-vcvttps2qq-1.c
@@ -0,0 +1,40 @@
+/* { dg-do compile } */
+/* { dg-options "-mavx512dq -mavx512vl -O2" } */
+/* { dg-final { scan-assembler-times "vcvttps2qq\[ \\t\]+\[^\n\]*%xmm\[0-9\]\[^\n\]*%xmm\[0-9\]\[^\{\]" 1 } } */
+/* { dg-final { scan-assembler-times "vcvttps2qq\[ \\t\]+\[^\n\]*%xmm\[0-9\]\[^\n\]*%xmm\[0-9\]\{%k\[1-7\]\}\[^\{\]" 1 } } */
+/* { dg-final { scan-assembler-times "vcvttps2qq\[ \\t\]+\[^\n\]*%xmm\[0-9\]\[^\n\]*%xmm\[0-9\]\{%k\[1-7\]\}\{z\}" 1 } } */
+/* { dg-final { scan-assembler-times "vcvttps2qq\[ \\t\]+\[^\n\]*%xmm\[0-9\]\[^\n\]*%ymm\[0-9\]\[^\{\]" 1 } } */
+/* { dg-final { scan-assembler-times "vcvttps2qq\[ \\t\]+\[^\n\]*%xmm\[0-9\]\[^\n\]*%ymm\[0-9\]\{%k\[1-7\]\}\[^\{\]" 1 } } */
+/* { dg-final { scan-assembler-times "vcvttps2qq\[ \\t\]+\[^\n\]*%xmm\[0-9\]\[^\n\]*%ymm\[0-9\]\{%k\[1-7\]\}\{z\}" 1 } } */
+/* { dg-final { scan-assembler-times "vcvttps2qq\[ \\t\]+\[^\n\]*%ymm\[0-9\]\[^\n\]*%zmm\[0-9\]\[^\{\]" 2 } } */
+/* { dg-final { scan-assembler-times "vcvttps2qq\[ \\t\]+\[^\n\]*%ymm\[0-9\]\[^\n\]*%zmm\[0-9\]\{%k\[1-7\]\}\[^\{\]" 2 } } */
+/* { dg-final { scan-assembler-times "vcvttps2qq\[ \\t\]+\[^\n\]*%ymm\[0-9\]\[^\n\]*%zmm\[0-9\]\{%k\[1-7\]\}\{z\}" 2 } } */
+/* { dg-final { scan-assembler-times "vcvttps2qq\[ \\t\]+\[^\n\]*\{sae\}\[^\n\]*%zmm\[0-9\]\[^\{\]" 1 } } */
+/* { dg-final { scan-assembler-times "vcvttps2qq\[ \\t\]+\[^\n\]*\{sae\}\[^\n\]*%zmm\[0-9\]\{%k\[1-7\]\}\[^\{\]" 1 } } */
+/* { dg-final { scan-assembler-times "vcvttps2qq\[ \\t\]+\[^\n\]*\{sae\}\[^\n\]*%zmm\[0-9\]\{%k\[1-7\]\}\{z\}" 1 } } */
+
+#include <immintrin.h>
+
+volatile __m512i x1;
+volatile __m256i x2;
+volatile __m128i x3;
+volatile __m256 z1;
+volatile __m128 z2;
+volatile __mmask8 m;
+
+void extern
+avx512dq_test (void)
+{
+ x1 = _mm512_cvttps_epi64 (z1);
+ x1 = _mm512_mask_cvttps_epi64 (x1, m, z1);
+ x1 = _mm512_maskz_cvttps_epi64 (m, z1);
+ x2 = _mm256_cvttps_epi64 (z2);
+ x2 = _mm256_mask_cvttps_epi64 (x2, m, z2);
+ x2 = _mm256_maskz_cvttps_epi64 (m, z2);
+ x3 = _mm_cvttps_epi64 (z2);
+ x3 = _mm_mask_cvttps_epi64 (x3, m, z2);
+ x3 = _mm_maskz_cvttps_epi64 (m, z2);
+ x1 = _mm512_cvtt_roundps_epi64 (z1, _MM_FROUND_NO_EXC);
+ x1 = _mm512_mask_cvtt_roundps_epi64 (x1, m, z1, _MM_FROUND_NO_EXC);
+ x1 = _mm512_maskz_cvtt_roundps_epi64 (m, z1, _MM_FROUND_NO_EXC);
+}
diff --git a/gcc/testsuite/gcc.target/i386/avx512dq-vcvttps2qq-2.c b/gcc/testsuite/gcc.target/i386/avx512dq-vcvttps2qq-2.c
new file mode 100644
index 00000000000..c56f6c97fcd
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/avx512dq-vcvttps2qq-2.c
@@ -0,0 +1,50 @@
+/* { dg-do run } */
+/* { dg-options "-O2 -mavx512dq -DAVX512DQ" } */
+/* { dg-require-effective-target avx512dq } */
+
+#include "avx512f-helper.h"
+
+#define SIZE (AVX512F_LEN / 64)
+#include "avx512f-mask-type.h"
+
+CALC (long long *r, float *s)
+{
+ int i;
+ for (i = 0; i < SIZE; i++)
+ r[i] = (long long) s[i];
+}
+
+void
+TEST (void)
+{
+ int i, sign;
+ UNION_TYPE (AVX512F_LEN, i_q) res1, res2, res3;
+ UNION_TYPE (AVX512F_LEN_HALF,) src;
+ MASK_TYPE mask = MASK_VALUE;
+ long long res_ref[SIZE];
+
+ sign = -1;
+ for (i = 0; i < SIZE; i++)
+ {
+ res2.a[i] = DEFAULT_VALUE;
+ src.a[i] = 1.5 + 34.67 * i * sign;
+ sign = sign * -1;
+ }
+
+ res1.x = INTRINSIC (_cvttps_epi64) (src.x);
+ res2.x = INTRINSIC (_mask_cvttps_epi64) (res2.x, mask, src.x);
+ res3.x = INTRINSIC (_maskz_cvttps_epi64) (mask, src.x);
+
+ CALC (res_ref, src.a);
+
+ if (UNION_CHECK (AVX512F_LEN, i_q) (res1, res_ref))
+ abort ();
+
+ MASK_MERGE (i_q) (res_ref, mask, SIZE);
+ if (UNION_CHECK (AVX512F_LEN, i_q) (res2, res_ref))
+ abort ();
+
+ MASK_ZERO (i_q) (res_ref, mask, SIZE);
+ if (UNION_CHECK (AVX512F_LEN, i_q) (res3, res_ref))
+ abort ();
+}
diff --git a/gcc/testsuite/gcc.target/i386/avx512dq-vcvttps2uqq-1.c b/gcc/testsuite/gcc.target/i386/avx512dq-vcvttps2uqq-1.c
new file mode 100644
index 00000000000..9ef629b887a
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/avx512dq-vcvttps2uqq-1.c
@@ -0,0 +1,40 @@
+/* { dg-do compile } */
+/* { dg-options "-mavx512dq -mavx512vl -O2" } */
+/* { dg-final { scan-assembler-times "vcvttps2uqq\[ \\t\]+\[^\n\]*%xmm\[0-9\]\[^\n\]*%xmm\[0-9\]\[^\{\]" 1 } } */
+/* { dg-final { scan-assembler-times "vcvttps2uqq\[ \\t\]+\[^\n\]*%xmm\[0-9\]\[^\n\]*%xmm\[0-9\]\{%k\[1-7\]\}\[^\{\]" 1 } } */
+/* { dg-final { scan-assembler-times "vcvttps2uqq\[ \\t\]+\[^\n\]*%xmm\[0-9\]\[^\n\]*%xmm\[0-9\]\{%k\[1-7\]\}\{z\}" 1 } } */
+/* { dg-final { scan-assembler-times "vcvttps2uqq\[ \\t\]+\[^\n\]*%xmm\[0-9\]\[^\n\]*%ymm\[0-9\]\[^\{\]" 1 } } */
+/* { dg-final { scan-assembler-times "vcvttps2uqq\[ \\t\]+\[^\n\]*%xmm\[0-9\]\[^\n\]*%ymm\[0-9\]\{%k\[1-7\]\}\[^\{\]" 1 } } */
+/* { dg-final { scan-assembler-times "vcvttps2uqq\[ \\t\]+\[^\n\]*%xmm\[0-9\]\[^\n\]*%ymm\[0-9\]\{%k\[1-7\]\}\{z\}" 1 } } */
+/* { dg-final { scan-assembler-times "vcvttps2uqq\[ \\t\]+\[^\n\]*%ymm\[0-9\]\[^\n\]*%zmm\[0-9\]\[^\{\]" 2 } } */
+/* { dg-final { scan-assembler-times "vcvttps2uqq\[ \\t\]+\[^\n\]*%ymm\[0-9\]\[^\n\]*%zmm\[0-9\]\{%k\[1-7\]\}\[^\{\]" 2 } } */
+/* { dg-final { scan-assembler-times "vcvttps2uqq\[ \\t\]+\[^\n\]*%ymm\[0-9\]\[^\n\]*%zmm\[0-9\]\{%k\[1-7\]\}\{z\}" 2 } } */
+/* { dg-final { scan-assembler-times "vcvttps2uqq\[ \\t\]+\[^\n\]*\{sae\}\[^\n\]*%zmm\[0-9\]\[^\{\]" 1 } } */
+/* { dg-final { scan-assembler-times "vcvttps2uqq\[ \\t\]+\[^\n\]*\{sae\}\[^\n\]*%zmm\[0-9\]\{%k\[1-7\]\}\[^\{\]" 1 } } */
+/* { dg-final { scan-assembler-times "vcvttps2uqq\[ \\t\]+\[^\n\]*\{sae\}\[^\n\]*%zmm\[0-9\]\{%k\[1-7\]\}\{z\}" 1 } } */
+
+#include <immintrin.h>
+
+volatile __m512i x1;
+volatile __m256i x2;
+volatile __m128i x3;
+volatile __m256 z1;
+volatile __m128 z2;
+volatile __mmask8 m;
+
+void extern
+avx512dq_test (void)
+{
+ x1 = _mm512_cvttps_epu64 (z1);
+ x1 = _mm512_mask_cvttps_epu64 (x1, m, z1);
+ x1 = _mm512_maskz_cvttps_epu64 (m, z1);
+ x2 = _mm256_cvttps_epu64 (z2);
+ x2 = _mm256_mask_cvttps_epu64 (x2, m, z2);
+ x2 = _mm256_maskz_cvttps_epu64 (m, z2);
+ x3 = _mm_cvttps_epu64 (z2);
+ x3 = _mm_mask_cvttps_epu64 (x3, m, z2);
+ x3 = _mm_maskz_cvttps_epu64 (m, z2);
+ x1 = _mm512_cvtt_roundps_epu64 (z1, _MM_FROUND_NO_EXC);
+ x1 = _mm512_mask_cvtt_roundps_epu64 (x1, m, z1, _MM_FROUND_NO_EXC);
+ x1 = _mm512_maskz_cvtt_roundps_epu64 (m, z1, _MM_FROUND_NO_EXC);
+}
diff --git a/gcc/testsuite/gcc.target/i386/avx512dq-vcvttps2uqq-2.c b/gcc/testsuite/gcc.target/i386/avx512dq-vcvttps2uqq-2.c
new file mode 100644
index 00000000000..6b90e7a3f0d
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/avx512dq-vcvttps2uqq-2.c
@@ -0,0 +1,48 @@
+/* { dg-do run } */
+/* { dg-options "-O2 -mavx512dq -DAVX512DQ" } */
+/* { dg-require-effective-target avx512dq } */
+
+#include "avx512f-helper.h"
+
+#define SIZE (AVX512F_LEN / 64)
+#include "avx512f-mask-type.h"
+
+CALC (unsigned long long *r, float *s)
+{
+ int i;
+ for (i = 0; i < SIZE; i++)
+ r[i] = (unsigned long long) s[i];
+}
+
+void
+TEST (void)
+{
+ int i;
+ UNION_TYPE (AVX512F_LEN, i_q) res1, res2, res3;
+ UNION_TYPE (AVX512F_LEN_HALF,) src;
+ MASK_TYPE mask = MASK_VALUE;
+ unsigned long long res_ref[SIZE];
+
+ for (i = 0; i < SIZE; i++)
+ {
+ res2.a[i] = DEFAULT_VALUE;
+ src.a[i] = 1.5 + 34.67 * i;
+ }
+
+ res1.x = INTRINSIC (_cvttps_epu64) (src.x);
+ res2.x = INTRINSIC (_mask_cvttps_epu64) (res2.x, mask, src.x);
+ res3.x = INTRINSIC (_maskz_cvttps_epu64) (mask, src.x);
+
+ CALC (res_ref, src.a);
+
+ if (UNION_CHECK (AVX512F_LEN, i_q) (res1, res_ref))
+ abort ();
+
+ MASK_MERGE (i_q) (res_ref, mask, SIZE);
+ if (UNION_CHECK (AVX512F_LEN, i_q) (res2, res_ref))
+ abort ();
+
+ MASK_ZERO (i_q) (res_ref, mask, SIZE);
+ if (UNION_CHECK (AVX512F_LEN, i_q) (res3, res_ref))
+ abort ();
+}
diff --git a/gcc/testsuite/gcc.target/i386/avx512dq-vcvtuqq2pd-1.c b/gcc/testsuite/gcc.target/i386/avx512dq-vcvtuqq2pd-1.c
new file mode 100644
index 00000000000..55fad80c835
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/avx512dq-vcvtuqq2pd-1.c
@@ -0,0 +1,41 @@
+/* { dg-do compile } */
+/* { dg-options "-mavx512dq -mavx512vl -O2" } */
+/* { dg-final { scan-assembler-times "vcvtuqq2pd\[ \\t\]+\[^\n\]*%xmm\[0-9\]\[^\n\]*%xmm\[0-9\]\[^\{\]" 1 } } */
+/* { dg-final { scan-assembler-times "vcvtuqq2pd\[ \\t\]+\[^\n\]*%xmm\[0-9\]\[^\n\]*%xmm\[0-9\]\{%k\[1-7\]\}\[^\{\]" 1 } } */
+/* { dg-final { scan-assembler-times "vcvtuqq2pd\[ \\t\]+\[^\n\]*%xmm\[0-9\]\[^\n\]*%xmm\[0-9\]\{%k\[1-7\]\}\{z\}" 1 } } */
+/* { dg-final { scan-assembler-times "vcvtuqq2pd\[ \\t\]+\[^\n\]*%ymm\[0-9\]\[^\n\]*%ymm\[0-9\]\[^\{\]" 1 } } */
+/* { dg-final { scan-assembler-times "vcvtuqq2pd\[ \\t\]+\[^\n\]*%ymm\[0-9\]\[^\n\]*%ymm\[0-9\]\{%k\[1-7\]\}\[^\{\]" 1 } } */
+/* { dg-final { scan-assembler-times "vcvtuqq2pd\[ \\t\]+\[^\n\]*%ymm\[0-9\]\[^\n\]*%ymm\[0-9\]\{%k\[1-7\]\}\{z\}" 1 } } */
+/* { dg-final { scan-assembler-times "vcvtuqq2pd\[ \\t\]+\[^\n\]*%zmm\[0-9\]\[^\n\]*%zmm\[0-9\]\[^\{\]" 2 } } */
+/* { dg-final { scan-assembler-times "vcvtuqq2pd\[ \\t\]+\[^\n\]*%zmm\[0-9\]\[^\n\]*%zmm\[0-9\]\{%k\[1-7\]\}\[^\{\]" 2 } } */
+/* { dg-final { scan-assembler-times "vcvtuqq2pd\[ \\t\]+\[^\n\]*%zmm\[0-9\]\[^\n\]*%zmm\[0-9\]\{%k\[1-7\]\}\{z\}" 2 } } */
+/* { dg-final { scan-assembler-times "vcvtuqq2pd\[ \\t\]+\[^\n\]*\{rn-sae\}\[^\n\]*%zmm\[0-9\]\[^\n\]*%zmm\[0-9\]\[^\{\]" 1 } } */
+/* { dg-final { scan-assembler-times "vcvtuqq2pd\[ \\t\]+\[^\n\]*\{ru-sae\}\[^\n\]*%zmm\[0-9\]\[^\n\]*%zmm\[0-9\]\{%k\[1-7\]\}\[^\{\]" 1 } } */
+/* { dg-final { scan-assembler-times "vcvtuqq2pd\[ \\t\]+\[^\n\]*\{rz-sae\}\[^\n\]*%zmm\[0-9\]\[^\n\]*%zmm\[0-9\]\{%k\[1-7\]\}\{z\}" 1 } } */
+
+#include <immintrin.h>
+
+volatile __m512i s1;
+volatile __m256i s2;
+volatile __m128i s3;
+volatile __m512d res1;
+volatile __m256d res2;
+volatile __m128d res3;
+volatile __mmask8 m;
+
+void extern
+avx512dq_test (void)
+{
+ res1 = _mm512_cvtepu64_pd (s1);
+ res1 = _mm512_mask_cvtepu64_pd (res1, m, s1);
+ res1 = _mm512_maskz_cvtepu64_pd (m, s1);
+ res2 = _mm256_cvtepu64_pd (s2);
+ res2 = _mm256_mask_cvtepu64_pd (res2, m, s2);
+ res2 = _mm256_maskz_cvtepu64_pd (m, s2);
+ res3 = _mm_cvtepu64_pd (s3);
+ res3 = _mm_mask_cvtepu64_pd (res3, m, s3);
+ res3 = _mm_maskz_cvtepu64_pd (m, s3);
+ res1 = _mm512_cvt_roundepu64_pd (s1, _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC);
+ res1 = _mm512_mask_cvt_roundepu64_pd (res1, m, s1, _MM_FROUND_TO_POS_INF | _MM_FROUND_NO_EXC);
+ res1 = _mm512_maskz_cvt_roundepu64_pd (m, s1, _MM_FROUND_TO_ZERO | _MM_FROUND_NO_EXC);
+}
diff --git a/gcc/testsuite/gcc.target/i386/avx512dq-vcvtuqq2pd-2.c b/gcc/testsuite/gcc.target/i386/avx512dq-vcvtuqq2pd-2.c
new file mode 100644
index 00000000000..907e1d9877b
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/avx512dq-vcvtuqq2pd-2.c
@@ -0,0 +1,50 @@
+/* { dg-do run } */
+/* { dg-options "-O2 -mavx512dq -DAVX512DQ" } */
+/* { dg-require-effective-target avx512dq } */
+
+#include "avx512f-helper.h"
+
+#define SIZE (AVX512F_LEN / 64)
+#include "avx512f-mask-type.h"
+
+void
+CALC (unsigned long long *s, double *r)
+{
+ int i;
+
+ for (i = 0; i < SIZE; i++)
+ r[i] = (double) s[i];
+}
+
+void
+TEST (void)
+{
+ UNION_TYPE (AVX512F_LEN, i_q) s;
+ UNION_TYPE (AVX512F_LEN, d) res1, res2, res3;
+ MASK_TYPE mask = MASK_VALUE;
+ double res_ref[SIZE];
+ int i;
+
+ for (i = 0; i < SIZE; i++)
+ {
+ s.a[i] = 123 * (i + 2000);
+ res2.a[i] = DEFAULT_VALUE;
+ }
+
+ res1.x = INTRINSIC (_cvtepu64_pd) (s.x);
+ res2.x = INTRINSIC (_mask_cvtepu64_pd) (res2.x, mask, s.x);
+ res3.x = INTRINSIC (_maskz_cvtepu64_pd) (mask, s.x);
+
+ CALC (s.a, res_ref);
+
+ if (UNION_CHECK (AVX512F_LEN, d) (res1, res_ref))
+ abort ();
+
+ MASK_MERGE (d) (res_ref, mask, SIZE);
+ if (UNION_CHECK (AVX512F_LEN, d) (res2, res_ref))
+ abort ();
+
+ MASK_ZERO (d) (res_ref, mask, SIZE);
+ if (UNION_CHECK (AVX512F_LEN, d) (res3, res_ref))
+ abort ();
+}
diff --git a/gcc/testsuite/gcc.target/i386/avx512dq-vcvtuqq2ps-1.c b/gcc/testsuite/gcc.target/i386/avx512dq-vcvtuqq2ps-1.c
new file mode 100644
index 00000000000..4931bb322db
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/avx512dq-vcvtuqq2ps-1.c
@@ -0,0 +1,40 @@
+/* { dg-do compile } */
+/* { dg-options "-mavx512dq -mavx512vl -O2" } */
+/* { dg-final { scan-assembler-times "vcvtuqq2psx\[ \\t\]+\[^\n\]*%xmm\[0-9\]\[^\n\]*%xmm\[0-9\]\[^\{\]" 1 } } */
+/* { dg-final { scan-assembler-times "vcvtuqq2psx\[ \\t\]+\[^\n\]*%xmm\[0-9\]\[^\n\]*%xmm\[0-9\]\{%k\[1-7\]\}\[^\{\]" 1 } } */
+/* { dg-final { scan-assembler-times "vcvtuqq2psx\[ \\t\]+\[^\n\]*%xmm\[0-9\]\[^\n\]*%xmm\[0-9\]\{%k\[1-7\]\}\{z\}" 1 } } */
+/* { dg-final { scan-assembler-times "vcvtuqq2psy\[ \\t\]+\[^\n\]*%ymm\[0-9\]\[^\n\]*%xmm\[0-9\]\[^\{\]" 1 } } */
+/* { dg-final { scan-assembler-times "vcvtuqq2psy\[ \\t\]+\[^\n\]*%ymm\[0-9\]\[^\n\]*%xmm\[0-9\]\{%k\[1-7\]\}\[^\{\]" 1 } } */
+/* { dg-final { scan-assembler-times "vcvtuqq2psy\[ \\t\]+\[^\n\]*%ymm\[0-9\]\[^\n\]*%xmm\[0-9\]\{%k\[1-7\]\}\{z\}" 1 } } */
+/* { dg-final { scan-assembler-times "vcvtuqq2ps\[ \\t\]+\[^\n\]*%zmm\[0-9\]\[^\n\]*%ymm\[0-9\]\[^\{\]" 2 } } */
+/* { dg-final { scan-assembler-times "vcvtuqq2ps\[ \\t\]+\[^\n\]*%zmm\[0-9\]\[^\n\]*%ymm\[0-9\]\{%k\[1-7\]\}\[^\{\]" 2 } } */
+/* { dg-final { scan-assembler-times "vcvtuqq2ps\[ \\t\]+\[^\n\]*%zmm\[0-9\]\[^\n\]*%ymm\[0-9\]\{%k\[1-7\]\}\{z\}" 2 } } */
+/* { dg-final { scan-assembler-times "vcvtuqq2ps\[ \\t\]+\[^\n\]*\{rn-sae\}\[^\n\]*%zmm\[0-9\]\[^\n\]*%ymm\[0-9\]\[^\{\]" 1 } } */
+/* { dg-final { scan-assembler-times "vcvtuqq2ps\[ \\t\]+\[^\n\]*\{ru-sae\}\[^\n\]*%zmm\[0-9\]\[^\n\]*%ymm\[0-9\]\{%k\[1-7\]\}\[^\{\]" 1 } } */
+/* { dg-final { scan-assembler-times "vcvtuqq2ps\[ \\t\]+\[^\n\]*\{rz-sae\}\[^\n\]*%zmm\[0-9\]\[^\n\]*%ymm\[0-9\]\{%k\[1-7\]\}\{z\}" 1 } } */
+
+#include <immintrin.h>
+
+volatile __m512i s1;
+volatile __m256i s2;
+volatile __m128i s3;
+volatile __m256 res1;
+volatile __m128 res2;
+volatile __mmask8 m;
+
+void extern
+avx512dq_test (void)
+{
+ res1 = _mm512_cvtepu64_ps (s1);
+ res1 = _mm512_mask_cvtepu64_ps (res1, m, s1);
+ res1 = _mm512_maskz_cvtepu64_ps (m, s1);
+ res2 = _mm256_cvtepu64_ps (s2);
+ res2 = _mm256_mask_cvtepu64_ps (res2, m, s2);
+ res2 = _mm256_maskz_cvtepu64_ps (m, s2);
+ res2 = _mm_cvtepu64_ps (s3);
+ res2 = _mm_mask_cvtepu64_ps (res2, m, s3);
+ res2 = _mm_maskz_cvtepu64_ps (m, s3);
+ res1 = _mm512_cvt_roundepu64_ps (s1, _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC);
+ res1 = _mm512_mask_cvt_roundepu64_ps (res1, m, s1, _MM_FROUND_TO_POS_INF | _MM_FROUND_NO_EXC);
+ res1 = _mm512_maskz_cvt_roundepu64_ps (m, s1, _MM_FROUND_TO_ZERO | _MM_FROUND_NO_EXC);
+}
diff --git a/gcc/testsuite/gcc.target/i386/avx512dq-vcvtuqq2ps-2.c b/gcc/testsuite/gcc.target/i386/avx512dq-vcvtuqq2ps-2.c
new file mode 100644
index 00000000000..794024f6ced
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/avx512dq-vcvtuqq2ps-2.c
@@ -0,0 +1,51 @@
+/* { dg-do run } */
+/* { dg-options "-O2 -mavx512dq -DAVX512DQ" } */
+/* { dg-require-effective-target avx512dq } */
+
+#include "avx512f-helper.h"
+
+#define SIZE (AVX512F_LEN / 64)
+#define SIZE_HALF (AVX512F_LEN_HALF / 32)
+#include "avx512f-mask-type.h"
+
+void
+CALC (unsigned long long *s, float *r)
+{
+ int i;
+
+ for (i = 0; i < SIZE_HALF; i++)
+ r[i] = (i < SIZE) ? (float) s[i] : 0;
+}
+
+void
+TEST (void)
+{
+ UNION_TYPE (AVX512F_LEN, i_q) s;
+ UNION_TYPE (AVX512F_LEN_HALF,) res1, res2, res3;
+ MASK_TYPE mask = MASK_VALUE;
+ float res_ref[SIZE_HALF];
+ int i;
+
+ for (i = 0; i < SIZE; i++)
+ {
+ s.a[i] = 123 * (i + 2000);
+ res2.a[i] = DEFAULT_VALUE;
+ }
+
+ res1.x = INTRINSIC (_cvtepu64_ps) (s.x);
+ res2.x = INTRINSIC (_mask_cvtepu64_ps) (res2.x, mask, s.x);
+ res3.x = INTRINSIC (_maskz_cvtepu64_ps) (mask, s.x);
+
+ CALC (s.a, res_ref);
+
+ if (UNION_CHECK (AVX512F_LEN_HALF,) (res1, res_ref))
+ abort ();
+
+ MASK_MERGE ()(res_ref, mask, SIZE);
+ if (UNION_CHECK (AVX512F_LEN_HALF,) (res2, res_ref))
+ abort ();
+
+ MASK_ZERO ()(res_ref, mask, SIZE);
+ if (UNION_CHECK (AVX512F_LEN_HALF,) (res3, res_ref))
+ abort ();
+}
diff --git a/gcc/testsuite/gcc.target/i386/avx512dq-vextractf32x8-1.c b/gcc/testsuite/gcc.target/i386/avx512dq-vextractf32x8-1.c
new file mode 100644
index 00000000000..03a4f2c145f
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/avx512dq-vextractf32x8-1.c
@@ -0,0 +1,18 @@
+/* { dg-do compile } */
+/* { dg-options "-mavx512dq -O2" } */
+/* { dg-final { scan-assembler-times "vextractf32x8\[ \\t\]+\[^\n\]*%zmm\[0-9\]" 3 } } */
+/* { dg-final { scan-assembler-times "vextractf32x8\[ \\t\]+\[^\n\]*%ymm\[0-9\]\{%k\[1-7\]\}\[^\{\]" 1 } } */
+/* { dg-final { scan-assembler-times "vextractf32x8\[ \\t\]+\[^\n\]*%ymm\[0-9\]\{%k\[1-7\]\}\{z\}" 1 } } */
+
+#include <immintrin.h>
+
+volatile __m512 x;
+volatile __m256 y;
+
+void extern
+avx512dq_test (void)
+{
+ y = _mm512_extractf32x8_ps (x, 1);
+ y = _mm512_mask_extractf32x8_ps (y, 2, x, 1);
+ y = _mm512_maskz_extractf32x8_ps (2, x, 1);
+}
diff --git a/gcc/testsuite/gcc.target/i386/avx512dq-vextractf64x2-1.c b/gcc/testsuite/gcc.target/i386/avx512dq-vextractf64x2-1.c
new file mode 100644
index 00000000000..ddd52c63754
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/avx512dq-vextractf64x2-1.c
@@ -0,0 +1,23 @@
+/* { dg-do compile } */
+/* { dg-options "-mavx512dq -mavx512vl -O2" } */
+/* { dg-final { scan-assembler-times "vextractf64x2\[ \\t\]+\[^\n\]*%zmm\[0-9\]" 3 } } */
+/* { dg-final { scan-assembler-times "vextractf64x2\[ \\t\]+\[^\n\]*%xmm\[0-9\]\{%k\[1-7\]\}\{z\}" 2 } } */
+/* { dg-final { scan-assembler-times "vextractf64x2\[ \\t\]+\[^\n\]*%xmm\[0-9\]\{%k\[1-7\]\}\[^\{\]" 2 } } */
+/* { dg-final { scan-assembler-times "vextractf64x2\[ \\t\]+\[^\n\]*%ymm\[0-9\]" 3 } } */
+
+#include <immintrin.h>
+
+volatile __m512d x1;
+volatile __m256d x2;
+volatile __m128d y;
+
+void extern
+avx512dq_test (void)
+{
+ y = _mm512_extractf64x2_pd (x1, 3);
+ y = _mm512_mask_extractf64x2_pd (y, 2, x1, 3);
+ y = _mm512_maskz_extractf64x2_pd (2, x1, 3);
+ y = _mm256_extractf64x2_pd (x2, 1);
+ y = _mm256_mask_extractf64x2_pd (y, 2, x2, 1);
+ y = _mm256_maskz_extractf64x2_pd (2, x2, 1);
+}
diff --git a/gcc/testsuite/gcc.target/i386/avx512dq-vextractf64x2-2.c b/gcc/testsuite/gcc.target/i386/avx512dq-vextractf64x2-2.c
new file mode 100644
index 00000000000..02a2543cbb1
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/avx512dq-vextractf64x2-2.c
@@ -0,0 +1,54 @@
+/* { dg-do run } */
+/* { dg-options "-O2 -mavx512dq -DAVX512DQ" } */
+/* { dg-require-effective-target avx512dq } */
+
+#include "avx512f-helper.h"
+
+#define SIZE (AVX512F_LEN / 64)
+#include "avx512f-mask-type.h"
+#include "string.h"
+
+void
+CALC (double *s1, double *res_ref, int mask)
+{
+ memset (res_ref, 0, 16);
+ memcpy (res_ref, s1 + mask * 2, 16);
+}
+
+void
+TEST (void)
+{
+ UNION_TYPE (AVX512F_LEN, d) s1;
+ union128d res1, res2, res3;
+ double res_ref[2];
+ MASK_TYPE mask = MASK_VALUE;
+ int j;
+
+ for (j = 0; j < SIZE; j++)
+ {
+ s1.a[j] = j * j / 4.56;
+ }
+
+ for (j = 0; j < 2; j++)
+ {
+ res1.a[j] = DEFAULT_VALUE;
+ res2.a[j] = DEFAULT_VALUE;
+ res3.a[j] = DEFAULT_VALUE;
+ }
+
+ res1.x = INTRINSIC (_extractf64x2_pd) (s1.x, 1);
+ res2.x = INTRINSIC (_mask_extractf64x2_pd) (res2.x, mask, s1.x, 1);
+ res3.x = INTRINSIC (_maskz_extractf64x2_pd) (mask, s1.x, 1);
+ CALC (s1.a, res_ref, 1);
+
+ if (check_union128d (res1, res_ref))
+ abort ();
+
+ MASK_MERGE (d) (res_ref, mask, 2);
+ if (check_union128d (res2, res_ref))
+ abort ();
+
+ MASK_ZERO (d) (res_ref, mask, 2);
+ if (check_union128d (res3, res_ref))
+ abort ();
+}
diff --git a/gcc/testsuite/gcc.target/i386/avx512dq-vextracti32x8-1.c b/gcc/testsuite/gcc.target/i386/avx512dq-vextracti32x8-1.c
new file mode 100644
index 00000000000..c1f66bc897b
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/avx512dq-vextracti32x8-1.c
@@ -0,0 +1,18 @@
+/* { dg-do compile } */
+/* { dg-options "-mavx512dq -O2" } */
+/* { dg-final { scan-assembler-times "vextracti32x8\[ \\t\]+\[^\n\]*%zmm\[0-9\]" 3 } } */
+/* { dg-final { scan-assembler-times "vextracti32x8\[ \\t\]+\[^\n\]*%ymm\[0-9\]\{%k\[1-7\]\}\{z\}" 1 } } */
+/* { dg-final { scan-assembler-times "vextracti32x8\[ \\t\]+\[^\n\]*%ymm\[0-9\]\{%k\[1-7\]\}\[^\{\]" 1 } } */
+
+#include <immintrin.h>
+
+volatile __m512i x;
+volatile __m256i y;
+
+void extern
+avx512dq_test (void)
+{
+ y = _mm512_extracti32x8_epi32 (x, 1);
+ y = _mm512_mask_extracti32x8_epi32 (y, 2, x, 1);
+ y = _mm512_maskz_extracti32x8_epi32 (2, x, 1);
+}
diff --git a/gcc/testsuite/gcc.target/i386/avx512dq-vextracti64x2-1.c b/gcc/testsuite/gcc.target/i386/avx512dq-vextracti64x2-1.c
new file mode 100644
index 00000000000..9852d8b2b66
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/avx512dq-vextracti64x2-1.c
@@ -0,0 +1,23 @@
+/* { dg-do compile } */
+/* { dg-options "-mavx512dq -mavx512vl -O2" } */
+/* { dg-final { scan-assembler-times "vextracti64x2\[ \\t\]+\[^\n\]*%zmm\[0-9\]" 3 } } */
+/* { dg-final { scan-assembler-times "vextracti64x2\[ \\t\]+\[^\n\]*%xmm\[0-9\]\{%k\[1-7\]\}\[^\{\]" 2 } } */
+/* { dg-final { scan-assembler-times "vextracti64x2\[ \\t\]+\[^\n\]*%xmm\[0-9\]\{%k\[1-7\]\}\{z\}" 2 } } */
+/* { dg-final { scan-assembler-times "vextracti64x2\[ \\t\]+\[^\n\]*%ymm\[0-9\]" 3 } } */
+
+#include <immintrin.h>
+
+volatile __m512i x1;
+volatile __m256i x2;
+volatile __m128i y;
+
+void extern
+avx512dq_test (void)
+{
+ y = _mm512_extracti64x2_epi64 (x1, 3);
+ y = _mm512_mask_extracti64x2_epi64 (y, 2, x1, 3);
+ y = _mm512_maskz_extracti64x2_epi64 (2, x1, 3);
+ y = _mm256_extracti64x2_epi64 (x2, 1);
+ y = _mm256_mask_extracti64x2_epi64 (y, 2, x2, 1);
+ y = _mm256_maskz_extracti64x2_epi64 (2, x2, 1);
+}
diff --git a/gcc/testsuite/gcc.target/i386/avx512dq-vextracti64x2-2.c b/gcc/testsuite/gcc.target/i386/avx512dq-vextracti64x2-2.c
new file mode 100644
index 00000000000..95fb0cf6a2c
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/avx512dq-vextracti64x2-2.c
@@ -0,0 +1,55 @@
+/* { dg-do run } */
+/* { dg-options "-O2 -mavx512dq -DAVX512DQ" } */
+/* { dg-require-effective-target avx512dq } */
+
+#include "avx512f-helper.h"
+
+#define SIZE (AVX512F_LEN / 64)
+#include "avx512f-mask-type.h"
+#include "string.h"
+
+void
+CALC (long long int *s1, long long int *res_ref, int mask)
+{
+ memset (res_ref, 0, 16);
+ memcpy (res_ref, s1 + mask * 2, 16);
+}
+
+void
+TEST (void)
+{
+ UNION_TYPE (AVX512F_LEN, i_q) s1;
+ union128i_q res1, res2, res3;
+ long long int res_ref[2];
+ MASK_TYPE mask = MASK_VALUE;
+ int j;
+
+ for (j = 0; j < SIZE; j++)
+ {
+ s1.a[j] = j * j + 37;
+ }
+
+ for (j = 0; j < 2; j++)
+ {
+ res1.a[j] = DEFAULT_VALUE;
+ res2.a[j] = DEFAULT_VALUE;
+ res3.a[j] = DEFAULT_VALUE;
+ }
+
+ res1.x = INTRINSIC (_extracti64x2_epi64) (s1.x, 1);
+ res2.x =
+ INTRINSIC (_mask_extracti64x2_epi64) (res2.x, mask, s1.x, 1);
+ res3.x = INTRINSIC (_maskz_extracti64x2_epi64) (mask, s1.x, 1);
+ CALC (s1.a, res_ref, 1);
+
+ if (check_union128i_q (res1, res_ref))
+ abort ();
+
+ MASK_MERGE (i_q) (res_ref, mask, 2);
+ if (check_union128i_q (res2, res_ref))
+ abort ();
+
+ MASK_ZERO (i_q) (res_ref, mask, 2);
+ if (check_union128i_q (res3, res_ref))
+ abort ();
+}
diff --git a/gcc/testsuite/gcc.target/i386/avx512dq-vfpclasspd-1.c b/gcc/testsuite/gcc.target/i386/avx512dq-vfpclasspd-1.c
new file mode 100644
index 00000000000..00855bde878
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/avx512dq-vfpclasspd-1.c
@@ -0,0 +1,26 @@
+/* { dg-do compile } */
+/* { dg-options "-mavx512dq -mavx512vl -O2" } */
+/* { dg-final { scan-assembler-times "vfpclasspd\[ \\t\]+\[^\n\]*%zmm\[0-7\]\[^\n^k\]*%k\[1-7\]\[^\{\]" 1 } } */
+/* { dg-final { scan-assembler-times "vfpclasspd\[ \\t\]+\[^\n\]*%ymm\[0-7\]\[^\n^k\]*%k\[1-7\]\[^\{\]" 1 } } */
+/* { dg-final { scan-assembler-times "vfpclasspd\[ \\t\]+\[^\n\]*%xmm\[0-7\]\[^\n^k\]*%k\[1-7\]\[^\{\]" 1 } } */
+/* { dg-final { scan-assembler-times "vfpclasspd\[ \\t\]+\[^\n\]*%zmm\[0-7\]\[^\n^k\]*%k\[1-7\]\{" 1 } } */
+/* { dg-final { scan-assembler-times "vfpclasspd\[ \\t\]+\[^\n\]*%ymm\[0-7\]\[^\n^k\]*%k\[1-7\]\{" 1 } } */
+/* { dg-final { scan-assembler-times "vfpclasspd\[ \\t\]+\[^\n\]*%xmm\[0-7\]\[^\n^k\]*%k\[1-7\]\{" 1 } } */
+
+#include <immintrin.h>
+
+volatile __m512d x512;
+volatile __m256d x256;
+volatile __m128d x128;
+volatile __mmask8 m;
+
+void extern
+avx512dq_test (void)
+{
+ m = _mm512_fpclass_pd_mask (x512, 13);
+ m = _mm256_fpclass_pd_mask (x256, 13);
+ m = _mm_fpclass_pd_mask (x128, 13);
+ m = _mm512_mask_fpclass_pd_mask (2, x512, 13);
+ m = _mm256_mask_fpclass_pd_mask (2, x256, 13);
+ m = _mm_mask_fpclass_pd_mask (2, x128, 13);
+}
diff --git a/gcc/testsuite/gcc.target/i386/avx512dq-vfpclasspd-2.c b/gcc/testsuite/gcc.target/i386/avx512dq-vfpclasspd-2.c
new file mode 100644
index 00000000000..c6610fded40
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/avx512dq-vfpclasspd-2.c
@@ -0,0 +1,74 @@
+/* { dg-do run } */
+/* { dg-options "-O2 -mavx512dq -DAVX512DQ" } */
+/* { dg-require-effective-target avx512dq } */
+
+#include "avx512f-helper.h"
+
+#include <math.h>
+#include <values.h>
+#define SIZE (AVX512F_LEN / 64)
+#include "avx512f-mask-type.h"
+
+#ifndef __FPCLASSPD__
+#define __FPCLASSPD__
+int check_fp_class_dp (double src, int imm)
+{
+ int qNaN_res = isnan (src);
+ int sNaN_res = isnan (src);
+ int Pzero_res = (src == 0.0);
+ int Nzero_res = (src == -0.0);
+ int PInf_res = (isinf (src) == 1);
+ int NInf_res = (isinf (src) == -1);
+ int Denorm_res = (fpclassify (src) == FP_SUBNORMAL);
+ int FinNeg_res = finite (src) && (src < 0);
+
+ int result = (((imm & 1) && qNaN_res)
+ || (((imm >> 1) & 1) && Pzero_res)
+ || (((imm >> 2) & 1) && Nzero_res)
+ || (((imm >> 3) & 1) && PInf_res)
+ || (((imm >> 4) & 1) && NInf_res)
+ || (((imm >> 5) & 1) && Denorm_res)
+ || (((imm >> 6) & 1) && FinNeg_res)
+ || (((imm >> 7) & 1) && sNaN_res));
+ return result;
+}
+#endif
+
+CALC (double *s1, int imm)
+{
+ int i;
+ __mmask8 res = 0;
+
+ for (i = 0; i < SIZE; i++)
+ if (check_fp_class_dp(s1[i], imm))
+ res = res | (1 << i);
+
+ return res;
+}
+
+void
+TEST (void)
+{
+ int i;
+ UNION_TYPE (AVX512F_LEN, d) src;
+ __mmask8 res1, res2, res_ref = 0;
+ MASK_TYPE mask = MASK_VALUE;
+
+ src.a[0] = NAN;
+ src.a[1] = 1.0 / 0.0;
+ for (i = 2; i < SIZE; i++)
+ {
+ src.a[i] = -24.43 + 0.6 * i;
+ }
+
+ res1 = INTRINSIC (_fpclass_pd_mask) (src.x, 0xFF);
+ res2 = INTRINSIC (_mask_fpclass_pd_mask) (mask, src.x, 0xFF);
+
+ res_ref = CALC (src.a, 0xFF);
+
+ if (res_ref != res1)
+ abort ();
+
+ if ((res_ref & mask) != res2)
+ abort ();
+}
diff --git a/gcc/testsuite/gcc.target/i386/avx512dq-vfpclassps-1.c b/gcc/testsuite/gcc.target/i386/avx512dq-vfpclassps-1.c
new file mode 100644
index 00000000000..b6da5e72770
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/avx512dq-vfpclassps-1.c
@@ -0,0 +1,27 @@
+/* { dg-do compile } */
+/* { dg-options "-mavx512dq -mavx512vl -O2" } */
+/* { dg-final { scan-assembler-times "vfpclassps\[ \\t\]+\[^\n\]*%zmm\[0-7\]\[^\n^k\]*%k\[1-7\]\[^\{\]" 1 } } */
+/* { dg-final { scan-assembler-times "vfpclassps\[ \\t\]+\[^\n\]*%ymm\[0-7\]\[^\n^k\]*%k\[1-7\]\[^\{\]" 1 } } */
+/* { dg-final { scan-assembler-times "vfpclassps\[ \\t\]+\[^\n\]*%xmm\[0-7\]\[^\n^k\]*%k\[1-7\]\[^\{\]" 1 } } */
+/* { dg-final { scan-assembler-times "vfpclassps\[ \\t\]+\[^\n\]*%zmm\[0-7\]\[^\n^k\]*%k\[1-7\]\{" 1 } } */
+/* { dg-final { scan-assembler-times "vfpclassps\[ \\t\]+\[^\n\]*%ymm\[0-7\]\[^\n^k\]*%k\[1-7\]\{" 1 } } */
+/* { dg-final { scan-assembler-times "vfpclassps\[ \\t\]+\[^\n\]*%xmm\[0-7\]\[^\n^k\]*%k\[1-7\]\{" 1 } } */
+
+#include <immintrin.h>
+
+volatile __m512 x512;
+volatile __m256 x256;
+volatile __m128 x128;
+volatile __mmask16 m16;
+volatile __mmask8 m8;
+
+void extern
+avx512dq_test (void)
+{
+ m16 = _mm512_fpclass_ps_mask (x512, 13);
+ m8 = _mm256_fpclass_ps_mask (x256, 13);
+ m8 = _mm_fpclass_ps_mask (x128, 13);
+ m16 = _mm512_mask_fpclass_ps_mask (2, x512, 13);
+ m8 = _mm256_mask_fpclass_ps_mask (2, x256, 13);
+ m8 = _mm_mask_fpclass_ps_mask (2, x128, 13);
+}
diff --git a/gcc/testsuite/gcc.target/i386/avx512dq-vfpclassps-2.c b/gcc/testsuite/gcc.target/i386/avx512dq-vfpclassps-2.c
new file mode 100644
index 00000000000..8aba38d4cfe
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/avx512dq-vfpclassps-2.c
@@ -0,0 +1,75 @@
+/* { dg-do run } */
+/* { dg-options "-O2 -mavx512dq -DAVX512DQ" } */
+/* { dg-require-effective-target avx512dq } */
+
+#include "avx512f-helper.h"
+
+#include <math.h>
+#include <values.h>
+#include "avx512f-mask-type.h"
+#define SIZE (AVX512F_LEN / 32)
+
+#ifndef __FPCLASSPD__
+#define __FPCLASSPD__
+int check_fp_class_sp (float src, int imm)
+{
+ int qNaN_res = isnan (src);
+ int sNaN_res = isnan (src);
+ int Pzero_res = (src == 0.0);
+ int Nzero_res = (src == -0.0);
+ int PInf_res = (isinf (src) == 1);
+ int NInf_res = (isinf (src) == -1);
+ int Denorm_res = (fpclassify (src) == FP_SUBNORMAL);
+ int FinNeg_res = finite (src) && (src < 0);
+
+ int result = (((imm & 1) && qNaN_res)
+ || (((imm >> 1) & 1) && Pzero_res)
+ || (((imm >> 2) & 1) && Nzero_res)
+ || (((imm >> 3) & 1) && PInf_res)
+ || (((imm >> 4) & 1) && NInf_res)
+ || (((imm >> 5) & 1) && Denorm_res)
+ || (((imm >> 6) & 1) && FinNeg_res)
+ || (((imm >> 7) & 1) && sNaN_res));
+ return result;
+}
+#endif
+
+CALC (float *s1, int imm)
+{
+ int i;
+ MASK_TYPE res = 0;
+
+ for (i = 0; i < SIZE; i++)
+ if (check_fp_class_sp(s1[i], imm))
+ res = res | (1 << i);
+
+ return res;
+}
+
+void
+TEST (void)
+{
+ int i;
+ UNION_TYPE (AVX512F_LEN,) src;
+ MASK_TYPE res1, res2, res_ref = 0;
+ MASK_TYPE mask = MASK_VALUE;
+
+ src.a[0] = NAN;
+ src.a[1] = 1.0 / 0.0;
+ for (i = 2; i < SIZE; i++)
+ {
+ src.a[i] = -24.43 + 0.6 * i;
+ }
+
+ res1 = INTRINSIC (_fpclass_ps_mask) (src.x, 0xFF);
+ res2 = INTRINSIC (_mask_fpclass_ps_mask) (mask, src.x, 0xFF);
+
+
+ res_ref = CALC (src.a, 0xFF);
+
+ if (res_ref != res1)
+ abort ();
+
+ if ((mask & res_ref) != res2)
+ abort ();
+}
diff --git a/gcc/testsuite/gcc.target/i386/avx512dq-vfpclasssd-1.c b/gcc/testsuite/gcc.target/i386/avx512dq-vfpclasssd-1.c
new file mode 100644
index 00000000000..c1b5caab991
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/avx512dq-vfpclasssd-1.c
@@ -0,0 +1,14 @@
+/* { dg-do compile } */
+/* { dg-options "-mavx512dq -O2" } */
+/* { dg-final { scan-assembler "vfpclasssd\[ \\t\]+\[^\n\]*%xmm\[0-9\]\[^\n^k\]*%k\[1-7\]\[^\{\]" } } */
+
+#include <immintrin.h>
+
+volatile __m128d x128;
+volatile __mmask8 m8;
+
+void extern
+avx512dq_test (void)
+{
+ m8 = _mm_fpclass_sd_mask (x128, 13);
+}
diff --git a/gcc/testsuite/gcc.target/i386/avx512dq-vfpclassss-1.c b/gcc/testsuite/gcc.target/i386/avx512dq-vfpclassss-1.c
new file mode 100644
index 00000000000..2f4756ea279
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/avx512dq-vfpclassss-1.c
@@ -0,0 +1,14 @@
+/* { dg-do compile } */
+/* { dg-options "-mavx512dq -O2" } */
+/* { dg-final { scan-assembler "vfpclassss\[ \\t\]+\[^\n\]*%xmm\[0-9\]\[^\n^k\]*%k\[1-7\]\[^\{\]" } } */
+
+#include <immintrin.h>
+
+volatile __m128 x128;
+volatile __mmask8 m8;
+
+void extern
+avx512dq_test (void)
+{
+ m8 = _mm_fpclass_ss_mask (x128, 13);
+}
diff --git a/gcc/testsuite/gcc.target/i386/avx512dq-vinsertf32x8-1.c b/gcc/testsuite/gcc.target/i386/avx512dq-vinsertf32x8-1.c
new file mode 100644
index 00000000000..26f36c49c27
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/avx512dq-vinsertf32x8-1.c
@@ -0,0 +1,18 @@
+/* { dg-do compile } */
+/* { dg-options "-mavx512dq -O2" } */
+/* { dg-final { scan-assembler-times "vinsertf32x8\[ \\t\]+\[^\n\]*\{%k\[1-7\]\}\[^\{\]" 1 } } */
+/* { dg-final { scan-assembler-times "vinsertf32x8\[ \\t\]+\[^\n\]*\{z\}" 1 } } */
+/* { dg-final { scan-assembler-times "vinsertf32x8\[ \\t\]+\[^\n\]*" 3 } } */
+
+#include <immintrin.h>
+
+volatile __m512 x;
+volatile __m256 y;
+
+void extern
+avx512dq_test (void)
+{
+ x = _mm512_insertf32x8 (x, y, 1);
+ x = _mm512_mask_insertf32x8 (x, 2, x, y, 1);
+ x = _mm512_maskz_insertf32x8 (2, x, y, 1);
+}
diff --git a/gcc/testsuite/gcc.target/i386/avx512dq-vinsertf64x2-1.c b/gcc/testsuite/gcc.target/i386/avx512dq-vinsertf64x2-1.c
new file mode 100644
index 00000000000..8476a36b2c5
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/avx512dq-vinsertf64x2-1.c
@@ -0,0 +1,23 @@
+/* { dg-do compile } */
+/* { dg-options "-mavx512dq -mavx512vl -O2" } */
+/* { dg-final { scan-assembler-times "vinsertf64x2\[^\n\]*ymm" 3 } } */
+/* { dg-final { scan-assembler-times "vinsertf64x2\[^\n\]*\{z\}" 2 } } */
+/* { dg-final { scan-assembler-times "vinsertf64x2\[^\n\]*\{%k\[1-7\]\}\[^\{\]" 2 } } */
+/* { dg-final { scan-assembler-times "vinsertf64x2\[^\n\]*zmm" 3 } } */
+
+#include <immintrin.h>
+
+volatile __m256d x;
+volatile __m128d y;
+volatile __m512d z;
+
+void extern
+avx512dq_test (void)
+{
+ x = _mm256_insertf64x2 (x, y, 1);
+ x = _mm256_mask_insertf64x2 (x, 2, x, y, 1);
+ x = _mm256_maskz_insertf64x2 (2, x, y, 1);
+ z = _mm512_insertf64x2 (z, y, 1);
+ z = _mm512_mask_insertf64x2 (z, 2, z, y, 1);
+ z = _mm512_maskz_insertf64x2 (2, z, y, 1);
+}
diff --git a/gcc/testsuite/gcc.target/i386/avx512dq-vinsertf64x2-2.c b/gcc/testsuite/gcc.target/i386/avx512dq-vinsertf64x2-2.c
new file mode 100644
index 00000000000..00c1c8018db
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/avx512dq-vinsertf64x2-2.c
@@ -0,0 +1,55 @@
+/* { dg-do run } */
+/* { dg-options "-O2 -mavx512dq -DAVX512DQ" } */
+/* { dg-require-effective-target avx512dq } */
+
+#include "avx512f-helper.h"
+#include "string.h"
+
+#define SIZE (AVX512F_LEN / 64)
+#include "avx512f-mask-type.h"
+
+void
+CALC (UNION_TYPE (AVX512F_LEN, d) s1, union128d s2,
+ double *res_ref, int mask)
+{
+ memcpy (res_ref, s1.a, SIZE * sizeof (double));
+ memcpy (res_ref + mask * 2, s2.a, 16);
+}
+
+void
+TEST (void)
+{
+ UNION_TYPE (AVX512F_LEN, d) s1, res1, res2, res3;
+ union128d s2;
+ double res_ref[SIZE];
+ MASK_TYPE mask = MASK_VALUE;
+ int j;
+
+ for (j = 0; j < SIZE; j++)
+ {
+ s1.a[j] = j * j;
+ res1.a[j] = DEFAULT_VALUE;
+ res2.a[j] = DEFAULT_VALUE;
+ res3.a[j] = DEFAULT_VALUE;
+ }
+
+ for (j = 0; j < 2; j++)
+ s2.a[j] = j * j * j;
+
+ res1.x = INTRINSIC (_insertf64x2) (s1.x, s2.x, 1);
+ res2.x = INTRINSIC (_mask_insertf64x2) (res2.x, mask, s1.x, s2.x, 1);
+ res3.x = INTRINSIC (_maskz_insertf64x2) (mask, s1.x, s2.x, 1);
+
+ CALC (s1, s2, res_ref, 1);
+
+ if (UNION_CHECK (AVX512F_LEN, d) (res1, res_ref))
+ abort ();
+
+ MASK_MERGE (d) (res_ref, mask, SIZE);
+ if (UNION_CHECK (AVX512F_LEN, d) (res2, res_ref))
+ abort ();
+
+ MASK_ZERO (d) (res_ref, mask, SIZE);
+ if (UNION_CHECK (AVX512F_LEN, d) (res3, res_ref))
+ abort ();
+}
diff --git a/gcc/testsuite/gcc.target/i386/avx512dq-vinserti32x8-1.c b/gcc/testsuite/gcc.target/i386/avx512dq-vinserti32x8-1.c
new file mode 100644
index 00000000000..9bebd6836b6
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/avx512dq-vinserti32x8-1.c
@@ -0,0 +1,18 @@
+/* { dg-do compile } */
+/* { dg-options "-mavx512dq -O2" } */
+/* { dg-final { scan-assembler-times "vinserti32x8\[ \\t\]+\[^\n\]*" 3 } } */
+/* { dg-final { scan-assembler-times "vinserti32x8\[ \\t\]+\[^\n\]*\{%k\[1-7\]\}\[^\{\]" 1 } } */
+/* { dg-final { scan-assembler-times "vinserti32x8\[ \\t\]+\[^\n\]*\{z\}" 1 } } */
+
+#include <immintrin.h>
+
+volatile __m512i x;
+volatile __m256i y;
+
+void extern
+avx512dq_test (void)
+{
+ x = _mm512_inserti32x8 (x, y, 1);
+ x = _mm512_mask_inserti32x8 (x, 2, x, y, 1);
+ x = _mm512_maskz_inserti32x8 (2, x, y, 1);
+}
diff --git a/gcc/testsuite/gcc.target/i386/avx512dq-vinserti64x2-1.c b/gcc/testsuite/gcc.target/i386/avx512dq-vinserti64x2-1.c
new file mode 100644
index 00000000000..22d8f1132ea
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/avx512dq-vinserti64x2-1.c
@@ -0,0 +1,23 @@
+/* { dg-do compile } */
+/* { dg-options "-mavx512dq -mavx512vl -O2" } */
+/* { dg-final { scan-assembler-times "vinserti64x2\[^\n\]*ymm" 3 } } */
+/* { dg-final { scan-assembler-times "vinserti64x2\[^\n\]*\{z\}" 2 } } */
+/* { dg-final { scan-assembler-times "vinserti64x2\[^\n\]*\{%k\[1-7\]\}\[^\{\]" 2 } } */
+/* { dg-final { scan-assembler-times "vinserti64x2\[^\n\]*zmm" 3 } } */
+
+#include <immintrin.h>
+
+volatile __m512i z;
+volatile __m256i x;
+volatile __m128i y;
+
+void extern
+avx512dq_test (void)
+{
+ x = _mm256_inserti64x2 (x, y, 1);
+ x = _mm256_mask_inserti64x2 (x, 2, x, y, 1);
+ x = _mm256_maskz_inserti64x2 (2, x, y, 1);
+ z = _mm512_inserti64x2 (z, y, 0);
+ z = _mm512_mask_inserti64x2 (z, 2, z, y, 0);
+ z = _mm512_maskz_inserti64x2 (2, z, y, 0);
+}
diff --git a/gcc/testsuite/gcc.target/i386/avx512dq-vinserti64x2-2.c b/gcc/testsuite/gcc.target/i386/avx512dq-vinserti64x2-2.c
new file mode 100644
index 00000000000..63ed54ea0b4
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/avx512dq-vinserti64x2-2.c
@@ -0,0 +1,55 @@
+/* { dg-do run } */
+/* { dg-options "-O2 -mavx512dq -DAVX512DQ" } */
+/* { dg-require-effective-target avx512dq } */
+
+#include "avx512f-helper.h"
+#include "string.h"
+
+#define SIZE (AVX512F_LEN / 64)
+#include "avx512f-mask-type.h"
+
+void
+CALC (UNION_TYPE (AVX512F_LEN, i_q) s1, union128i_q s2,
+ long long *res_ref, int mask)
+{
+ memcpy (res_ref, s1.a, SIZE * sizeof (long long));
+ memcpy (res_ref + mask * 2, s2.a, 16);
+}
+
+void
+TEST (void)
+{
+ UNION_TYPE (AVX512F_LEN, i_q) s1, res1, res2, res3;
+ union128i_q s2;
+ long long res_ref[SIZE];
+ MASK_TYPE mask = MASK_VALUE;
+ int j;
+
+ for (j = 0; j < SIZE; j++)
+ {
+ s1.a[j] = j * j;
+ res1.a[j] = DEFAULT_VALUE;
+ res2.a[j] = DEFAULT_VALUE;
+ res3.a[j] = DEFAULT_VALUE;
+ }
+
+ for (j = 0; j < 2; j++)
+ s2.a[j] = j * j * j;
+
+ res1.x = INTRINSIC (_inserti64x2) (s1.x, s2.x, 1);
+ res2.x = INTRINSIC (_mask_inserti64x2) (res2.x, mask, s1.x, s2.x, 1);
+ res3.x = INTRINSIC (_maskz_inserti64x2) (mask, s1.x, s2.x, 1);
+
+ CALC (s1, s2, res_ref, 1);
+
+ if (UNION_CHECK (AVX512F_LEN, i_q) (res1, res_ref))
+ abort ();
+
+ MASK_MERGE (i_q) (res_ref, mask, SIZE);
+ if (UNION_CHECK (AVX512F_LEN, i_q) (res2, res_ref))
+ abort ();
+
+ MASK_ZERO (i_q) (res_ref, mask, SIZE);
+ if (UNION_CHECK (AVX512F_LEN, i_q) (res3, res_ref))
+ abort ();
+}
diff --git a/gcc/testsuite/gcc.target/i386/avx512dq-vorpd-1.c b/gcc/testsuite/gcc.target/i386/avx512dq-vorpd-1.c
new file mode 100644
index 00000000000..97383c1af69
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/avx512dq-vorpd-1.c
@@ -0,0 +1,32 @@
+/* { dg-do compile } */
+/* { dg-options "-mavx512dq -mavx512vl -O2" } */
+/* { dg-final { scan-assembler-times "vorpd\[ \\t\]+\[^\n\]*%zmm\[0-9\]\[^\{\]" 3 } } */
+/* { dg-final { scan-assembler-times "vorpd\[ \\t\]+\[^\n\]*%zmm\[0-9\]\{%k\[1-7\]\}\[^\{\]" 1 } } */
+/* { dg-final { scan-assembler-times "vorpd\[ \\t\]+\[^\n\]*%zmm\[0-9\]\{%k\[1-7\]\}\{z\}" 1 } } */
+/* { dg-final { scan-assembler-times "vorpd\[ \\t\]+\[^\n\]*%ymm\[0-9\]\[^\{\]" 2 } } */
+/* { dg-final { scan-assembler-times "vorpd\[ \\t\]+\[^\n\]*%ymm\[0-9\]\{%k\[1-7\]\}\[^\{\]" 1 } } */
+/* { dg-final { scan-assembler-times "vorpd\[ \\t\]+\[^\n\]*%ymm\[0-9\]\{%k\[1-7\]\}\{z\}" 1 } } */
+/* { dg-final { scan-assembler-times "vorpd\[ \\t\]+\[^\n\]*%xmm\[0-9\]\[^\{\]" 2 } } */
+/* { dg-final { scan-assembler-times "vorpd\[ \\t\]+\[^\n\]*%xmm\[0-9\]\{%k\[1-7\]\}\[^\{\]" 1 } } */
+/* { dg-final { scan-assembler-times "vorpd\[ \\t\]+\[^\n\]*%xmm\[0-9\]\{%k\[1-7\]\}\{z\}" 1 } } */
+
+#include <immintrin.h>
+
+volatile __m512d z;
+volatile __m256d y;
+volatile __m128d x;
+volatile __mmask8 m;
+
+void extern
+avx512dq_test (void)
+{
+ z = _mm512_or_pd (z, z);
+ z = _mm512_mask_or_pd (z, m, z, z);
+ z = _mm512_maskz_or_pd (m, z, z);
+
+ y = _mm256_mask_or_pd (y, m, y, y);
+ y = _mm256_maskz_or_pd (m, y, y);
+
+ x = _mm_mask_or_pd (x, m, x, x);
+ x = _mm_maskz_or_pd (m, x, x);
+}
diff --git a/gcc/testsuite/gcc.target/i386/avx512dq-vorpd-2.c b/gcc/testsuite/gcc.target/i386/avx512dq-vorpd-2.c
new file mode 100644
index 00000000000..c5ef0309ebe
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/avx512dq-vorpd-2.c
@@ -0,0 +1,52 @@
+/* { dg-do run } */
+/* { dg-options "-O2 -mavx512dq -DAVX512DQ" } */
+/* { dg-require-effective-target avx512dq } */
+
+#include "avx512f-helper.h"
+
+#define SIZE (AVX512F_LEN / 64)
+#include "avx512f-mask-type.h"
+
+void
+CALC (double *src1, double *src2, double *dst)
+{
+ int i;
+
+ for (i = 0; i < SIZE; i++)
+ {
+ long long tmp = (*(long long *) &src1[i]) | (*(long long *) &src2[i]);
+ dst[i] = *(double *) &tmp;
+ }
+}
+
+void
+TEST (void)
+{
+ UNION_TYPE (AVX512F_LEN,d) s1, s2, res1, res2, res3;
+ MASK_TYPE mask = MASK_VALUE;
+ double dst_ref[SIZE];
+ int i;
+
+ for (i = 0; i < SIZE; i++) {
+ s1.a[i] = 132.45 * i;
+ s2.a[i] = 43.6 - i * 4.4;
+ res2.a[i] = DEFAULT_VALUE;
+ }
+
+ res1.x = INTRINSIC (_or_pd) (s1.x, s2.x);
+ res2.x = INTRINSIC (_mask_or_pd) (res2.x, mask, s1.x, s2.x);
+ res3.x = INTRINSIC (_maskz_or_pd) (mask, s1.x, s2.x);
+
+ CALC (s1.a, s2.a, dst_ref);
+
+ if (UNION_CHECK (AVX512F_LEN,d) (res1, dst_ref))
+ abort ();
+
+ MASK_MERGE (d) (dst_ref, mask, SIZE);
+ if (UNION_CHECK (AVX512F_LEN,d) (res2, dst_ref))
+ abort ();
+
+ MASK_ZERO (d) (dst_ref, mask, SIZE);
+ if (UNION_CHECK (AVX512F_LEN,d) (res3, dst_ref))
+ abort ();
+}
diff --git a/gcc/testsuite/gcc.target/i386/avx512dq-vorps-1.c b/gcc/testsuite/gcc.target/i386/avx512dq-vorps-1.c
new file mode 100644
index 00000000000..7f042868431
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/avx512dq-vorps-1.c
@@ -0,0 +1,33 @@
+/* { dg-do compile } */
+/* { dg-options "-mavx512dq -mavx512vl -O2" } */
+/* { dg-final { scan-assembler-times "vorps\[ \\t\]+\[^\n\]*%zmm\[0-9\]\[^\{\]" 3 } } */
+/* { dg-final { scan-assembler-times "vorps\[ \\t\]+\[^\n\]*%zmm\[0-9\]\{%k\[1-7\]\}\[^\{\]" 1 } } */
+/* { dg-final { scan-assembler-times "vorps\[ \\t\]+\[^\n\]*%zmm\[0-9\]\{%k\[1-7\]\}\{z\}" 1 } } */
+/* { dg-final { scan-assembler-times "vorps\[ \\t\]+\[^\n\]*%ymm\[0-9\]\[^\{\]" 2 } } */
+/* { dg-final { scan-assembler-times "vorps\[ \\t\]+\[^\n\]*%ymm\[0-9\]\{%k\[1-7\]\}\[^\{\]" 1 } } */
+/* { dg-final { scan-assembler-times "vorps\[ \\t\]+\[^\n\]*%ymm\[0-9\]\{%k\[1-7\]\}\{z\}" 1 } } */
+/* { dg-final { scan-assembler-times "vorps\[ \\t\]+\[^\n\]*%xmm\[0-9\]\[^\{\]" 2 } } */
+/* { dg-final { scan-assembler-times "vorps\[ \\t\]+\[^\n\]*%xmm\[0-9\]\{%k\[1-7\]\}\[^\{\]" 1 } } */
+/* { dg-final { scan-assembler-times "vorps\[ \\t\]+\[^\n\]*%xmm\[0-9\]\{%k\[1-7\]\}\{z\}" 1 } } */
+
+#include <immintrin.h>
+
+volatile __m512 z;
+volatile __m256 y;
+volatile __m128 x;
+volatile __mmask8 n;
+volatile __mmask16 m;
+
+void extern
+avx512dq_test (void)
+{
+ z = _mm512_or_ps (z, z);
+ z = _mm512_mask_or_ps (z, m, z, z);
+ z = _mm512_maskz_or_ps (m, z, z);
+
+ y = _mm256_mask_or_ps (y, n, y, y);
+ y = _mm256_maskz_or_ps (n, y, y);
+
+ x = _mm_mask_or_ps (x, n, x, x);
+ x = _mm_maskz_or_ps (n, x, x);
+}
diff --git a/gcc/testsuite/gcc.target/i386/avx512dq-vorps-2.c b/gcc/testsuite/gcc.target/i386/avx512dq-vorps-2.c
new file mode 100644
index 00000000000..87d8e02a524
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/avx512dq-vorps-2.c
@@ -0,0 +1,52 @@
+/* { dg-do run } */
+/* { dg-options "-O2 -mavx512dq -DAVX512DQ" } */
+/* { dg-require-effective-target avx512dq } */
+
+#include "avx512f-helper.h"
+
+#define SIZE (AVX512F_LEN / 32)
+#include "avx512f-mask-type.h"
+
+void
+CALC (float *src1, float *src2, float *dst)
+{
+ int i;
+
+ for (i = 0; i < SIZE; i++)
+ {
+ int tmp = (*(int *) &src1[i]) | (*(int *) &src2[i]);
+ dst[i] = *(float *) &tmp;
+ }
+}
+
+void
+TEST (void)
+{
+ UNION_TYPE (AVX512F_LEN,) s1, s2, res1, res2, res3;
+ MASK_TYPE mask = MASK_VALUE;
+ float dst_ref[SIZE];
+ int i;
+
+ for (i = 0; i < SIZE; i++) {
+ s1.a[i] = 132.45 * i;
+ s2.a[i] = 43.6 - i * 4.4;
+ res2.a[i] = DEFAULT_VALUE;
+ }
+
+ res1.x = INTRINSIC (_or_ps) (s1.x, s2.x);
+ res2.x = INTRINSIC (_mask_or_ps) (res2.x, mask, s1.x, s2.x);
+ res3.x = INTRINSIC (_maskz_or_ps) (mask, s1.x, s2.x);
+
+ CALC (s1.a, s2.a, dst_ref);
+
+ if (UNION_CHECK (AVX512F_LEN,) (res1, dst_ref))
+ abort ();
+
+ MASK_MERGE () (dst_ref, mask, SIZE);
+ if (UNION_CHECK (AVX512F_LEN,) (res2, dst_ref))
+ abort ();
+
+ MASK_ZERO () (dst_ref, mask, SIZE);
+ if (UNION_CHECK (AVX512F_LEN,) (res3, dst_ref))
+ abort ();
+}
diff --git a/gcc/testsuite/gcc.target/i386/avx512dq-vpmovd2m-1.c b/gcc/testsuite/gcc.target/i386/avx512dq-vpmovd2m-1.c
new file mode 100644
index 00000000000..c76bdec47e4
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/avx512dq-vpmovd2m-1.c
@@ -0,0 +1,21 @@
+/* { dg-do compile } */
+/* { dg-options "-mavx512dq -mavx512vl -O2" } */
+/* { dg-final { scan-assembler "vpmovd2m\[ \\t\]+\[^\n\]*%xmm\[0-9\]\[^\n\]*%k\[1-7\]" } } */
+/* { dg-final { scan-assembler "vpmovd2m\[ \\t\]+\[^\n\]*%ymm\[0-9\]\[^\n\]*%k\[1-7\]" } } */
+/* { dg-final { scan-assembler "vpmovd2m\[ \\t\]+\[^\n\]*%zmm\[0-9\]\[^\n\]*%k\[1-7\]" } } */
+
+#include <immintrin.h>
+
+volatile __m512i x512;
+volatile __m256i x256;
+volatile __m128i x128;
+volatile __mmask8 m8;
+volatile __mmask16 m16;
+
+void extern
+avx512dq_test (void)
+{
+ m8 = _mm_movepi32_mask (x128);
+ m8 = _mm256_movepi32_mask (x256);
+ m16 = _mm512_movepi32_mask (x512);
+}
diff --git a/gcc/testsuite/gcc.target/i386/avx512dq-vpmovd2m-2.c b/gcc/testsuite/gcc.target/i386/avx512dq-vpmovd2m-2.c
new file mode 100644
index 00000000000..9693fb6f2af
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/avx512dq-vpmovd2m-2.c
@@ -0,0 +1,43 @@
+/* { dg-do run } */
+/* { dg-options "-O2 -mavx512dq -DAVX512DQ" } */
+/* { dg-require-effective-target avx512dq } */
+
+#include "avx512f-helper.h"
+
+#define SIZE (AVX512F_LEN / 32)
+#include "avx512f-mask-type.h"
+
+CALC (MASK_TYPE *r, int *s1)
+{
+ int i;
+ MASK_TYPE res = 0;
+ MASK_TYPE one = 1;
+
+ for (i = 0; i < SIZE; i++)
+ if (s1[i] >> 31)
+ res = res | (one << i);
+
+ *r = res;
+}
+
+void
+TEST (void)
+{
+ int i, sign;
+ UNION_TYPE (AVX512F_LEN, i_d) src;
+ MASK_TYPE res, res_ref = 0;
+
+ sign = -1;
+ for (i = 0; i < SIZE; i++)
+ {
+ src.a[i] = 2 * i * sign;
+ sign = sign * -1;
+ }
+
+ res = INTRINSIC (_movepi32_mask) (src.x);
+
+ CALC (&res_ref, src.a);
+
+ if (res_ref != res)
+ abort ();
+}
diff --git a/gcc/testsuite/gcc.target/i386/avx512dq-vpmovm2d-1.c b/gcc/testsuite/gcc.target/i386/avx512dq-vpmovm2d-1.c
new file mode 100644
index 00000000000..5afd552492d
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/avx512dq-vpmovm2d-1.c
@@ -0,0 +1,21 @@
+/* { dg-do compile } */
+/* { dg-options "-mavx512dq -mavx512vl -O2" } */
+/* { dg-final { scan-assembler "vpmovm2d\[ \\t\]+\[^\n\]*%k\[1-7\]\[^\n\]*%xmm\[0-9\]" } } */
+/* { dg-final { scan-assembler "vpmovm2d\[ \\t\]+\[^\n\]*%k\[1-7\]\[^\n\]*%ymm\[0-9\]" } } */
+/* { dg-final { scan-assembler "vpmovm2d\[ \\t\]+\[^\n\]*%k\[1-7\]\[^\n\]*%zmm\[0-9\]" } } */
+
+#include <immintrin.h>
+
+volatile __m512i x512;
+volatile __m256i x256;
+volatile __m128i x128;
+volatile __mmask16 m16;
+volatile __mmask8 m8;
+
+void extern
+avx512dq_test (void)
+{
+ x128 = _mm_movm_epi32 (m8);
+ x256 = _mm256_movm_epi32 (m8);
+ x512 = _mm512_movm_epi32 (m16);
+}
diff --git a/gcc/testsuite/gcc.target/i386/avx512dq-vpmovm2d-2.c b/gcc/testsuite/gcc.target/i386/avx512dq-vpmovm2d-2.c
new file mode 100644
index 00000000000..9ec250ffc98
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/avx512dq-vpmovm2d-2.c
@@ -0,0 +1,32 @@
+/* { dg-do run } */
+/* { dg-options "-O2 -mavx512dq -DAVX512DQ" } */
+/* { dg-require-effective-target avx512dq } */
+
+#include "avx512f-helper.h"
+
+#define SIZE (AVX512F_LEN / 32)
+#include "avx512f-mask-type.h"
+
+CALC (int *r, MASK_TYPE s)
+{
+ int i;
+ int all_ones = 0xffffffff;
+
+ for (i = 0; i < SIZE; i++)
+ r[i] = ((s >> i) & 1) ? all_ones : 0;
+}
+
+void
+TEST (void)
+{
+ int i;
+ UNION_TYPE (AVX512F_LEN, i_d) res, res_ref;
+ MASK_TYPE src = (MASK_TYPE) 0x1111;
+
+ res.x = INTRINSIC (_movm_epi32) (src);
+
+ CALC (res_ref.a, src);
+
+ if (UNION_CHECK (AVX512F_LEN, i_d) (res, res_ref.a))
+ abort ();
+}
diff --git a/gcc/testsuite/gcc.target/i386/avx512dq-vpmovm2q-1.c b/gcc/testsuite/gcc.target/i386/avx512dq-vpmovm2q-1.c
new file mode 100644
index 00000000000..a71599e5560
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/avx512dq-vpmovm2q-1.c
@@ -0,0 +1,20 @@
+/* { dg-do compile } */
+/* { dg-options "-mavx512dq -mavx512vl -O2" } */
+/* { dg-final { scan-assembler "vpmovm2q\[ \\t\]+\[^\n\]*%k\[1-7\]\[^\n\]*%xmm\[0-9\]" } } */
+/* { dg-final { scan-assembler "vpmovm2q\[ \\t\]+\[^\n\]*%k\[1-7\]\[^\n\]*%ymm\[0-9\]" } } */
+/* { dg-final { scan-assembler "vpmovm2q\[ \\t\]+\[^\n\]*%k\[1-7\]\[^\n\]*%zmm\[0-9\]" } } */
+
+#include <immintrin.h>
+
+volatile __m512i x512;
+volatile __m256i x256;
+volatile __m128i x128;
+volatile __mmask8 m8;
+
+void extern
+avx512dq_test (void)
+{
+ x128 = _mm_movm_epi64 (m8);
+ x256 = _mm256_movm_epi64 (m8);
+ x512 = _mm512_movm_epi64 (m8);
+}
diff --git a/gcc/testsuite/gcc.target/i386/avx512dq-vpmovm2q-2.c b/gcc/testsuite/gcc.target/i386/avx512dq-vpmovm2q-2.c
new file mode 100644
index 00000000000..1a1c187e484
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/avx512dq-vpmovm2q-2.c
@@ -0,0 +1,32 @@
+/* { dg-do run } */
+/* { dg-options "-O2 -mavx512dq -DAVX512DQ" } */
+/* { dg-require-effective-target avx512dq } */
+
+#include "avx512f-helper.h"
+
+#define SIZE (AVX512F_LEN / 64)
+#include "avx512f-mask-type.h"
+
+CALC (long long *r, MASK_TYPE s)
+{
+ int i;
+ long long all_ones = 0xffffffffffffffff;
+
+ for (i = 0; i < SIZE; i++)
+ r[i] = ((s >> i) & 1) ? all_ones : 0;
+}
+
+void
+TEST (void)
+{
+ int i;
+ UNION_TYPE (AVX512F_LEN, i_q) res, res_ref;
+ MASK_TYPE src = (MASK_TYPE) 0xff;
+
+ res.x = INTRINSIC (_movm_epi64) (src);
+
+ CALC (res_ref.a, src);
+
+ if (UNION_CHECK (AVX512F_LEN, i_q) (res, res_ref.a))
+ abort ();
+}
diff --git a/gcc/testsuite/gcc.target/i386/avx512dq-vpmovq2m-1.c b/gcc/testsuite/gcc.target/i386/avx512dq-vpmovq2m-1.c
new file mode 100644
index 00000000000..8ce3694da9a
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/avx512dq-vpmovq2m-1.c
@@ -0,0 +1,20 @@
+/* { dg-do compile } */
+/* { dg-options "-mavx512dq -mavx512vl -O2" } */
+/* { dg-final { scan-assembler "vpmovq2m\[ \\t\]+\[^\n\]*%xmm\[0-9\]\[^\n\]*%k\[1-7\]" } } */
+/* { dg-final { scan-assembler "vpmovq2m\[ \\t\]+\[^\n\]*%ymm\[0-9\]\[^\n\]*%k\[1-7\]" } } */
+/* { dg-final { scan-assembler "vpmovq2m\[ \\t\]+\[^\n\]*%zmm\[0-9\]\[^\n\]*%k\[1-7\]" } } */
+
+#include <immintrin.h>
+
+volatile __m512i x512;
+volatile __m256i x256;
+volatile __m128i x128;
+volatile __mmask8 m8;
+
+void extern
+avx512dq_test (void)
+{
+ m8 = _mm_movepi64_mask (x128);
+ m8 = _mm256_movepi64_mask (x256);
+ m8 = _mm512_movepi64_mask (x512);
+}
diff --git a/gcc/testsuite/gcc.target/i386/avx512dq-vpmovq2m-2.c b/gcc/testsuite/gcc.target/i386/avx512dq-vpmovq2m-2.c
new file mode 100644
index 00000000000..24b3c552ac2
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/avx512dq-vpmovq2m-2.c
@@ -0,0 +1,43 @@
+/* { dg-do run } */
+/* { dg-options "-O2 -mavx512dq -DAVX512DQ" } */
+/* { dg-require-effective-target avx512dq } */
+
+#include "avx512f-helper.h"
+
+#define SIZE (AVX512F_LEN / 64)
+#include "avx512f-mask-type.h"
+
+CALC (MASK_TYPE *r, long long *s1)
+{
+ int i;
+ MASK_TYPE res = 0;
+ MASK_TYPE one = 1;
+
+ for (i = 0; i < SIZE; i++)
+ if (s1[i] >> 63)
+ res = res | (one << i);
+
+ *r = res;
+}
+
+void
+TEST (void)
+{
+ int i, sign;
+ UNION_TYPE (AVX512F_LEN, i_q) src;
+ MASK_TYPE res, res_ref = 0;
+
+ sign = -1;
+ for (i = 0; i < SIZE; i++)
+ {
+ src.a[i] = 2 * i * sign;
+ sign = sign * -1;
+ }
+
+ res = INTRINSIC (_movepi64_mask) (src.x);
+
+ CALC (&res_ref, src.a);
+
+ if (res_ref != res)
+ abort ();
+}
diff --git a/gcc/testsuite/gcc.target/i386/avx512dq-vpmullq-1.c b/gcc/testsuite/gcc.target/i386/avx512dq-vpmullq-1.c
new file mode 100644
index 00000000000..25124ebc828
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/avx512dq-vpmullq-1.c
@@ -0,0 +1,31 @@
+/* { dg-do compile } */
+/* { dg-options "-mavx512dq -mavx512vl -O2" } */
+/* { dg-final { scan-assembler-times "vpmullq\[ \\t\]+\[^\n\]*%xmm\[0-9\]\[^\n\]*%xmm\[0-9\]\[^\n\]*%xmm\[0-9\]" 3 } } */
+/* { dg-final { scan-assembler-times "vpmullq\[ \\t\]+\[^\n\]*%xmm\[0-9\]\[^\n\]*%xmm\[0-9\]\[^\n\]*%xmm\[0-9\]\{%k\[1-7\]\}\[^\{\]" 1 } } */
+/* { dg-final { scan-assembler-times "vpmullq\[ \\t\]+\[^\n\]*%xmm\[0-9\]\[^\n\]*%xmm\[0-9\]\[^\n\]*%xmm\[0-9\]\{%k\[1-7\]\}\{z\}" 1 } } */
+/* { dg-final { scan-assembler-times "vpmullq\[ \\t\]+\[^\n\]*%ymm\[0-9\]\[^\n\]*%ymm\[0-9\]\[^\n\]*%ymm\[0-9\]" 3 } } */
+/* { dg-final { scan-assembler-times "vpmullq\[ \\t\]+\[^\n\]*%ymm\[0-9\]\[^\n\]*%ymm\[0-9\]\[^\n\]*%ymm\[0-9\]\{%k\[1-7\]\}\[^\{\]" 1 } } */
+/* { dg-final { scan-assembler-times "vpmullq\[ \\t\]+\[^\n\]*%ymm\[0-9\]\[^\n\]*%ymm\[0-9\]\[^\n\]*%ymm\[0-9\]\{%k\[1-7\]\}\{z\}" 1 } } */
+/* { dg-final { scan-assembler-times "vpmullq\[ \\t\]+\[^\n\]*%zmm\[0-9\]\[^\n\]*%zmm\[0-9\]\[^\n\]*%zmm\[0-9\]" 3 } } */
+/* { dg-final { scan-assembler-times "vpmullq\[ \\t\]+\[^\n\]*%zmm\[0-9\]\[^\n\]*%zmm\[0-9\]\[^\n\]*%zmm\[0-9\]\{%k\[1-7\]\}\[^\{\]" 1 } } */
+/* { dg-final { scan-assembler-times "vpmullq\[ \\t\]+\[^\n\]*%zmm\[0-9\]\[^\n\]*%zmm\[0-9\]\[^\n\]*%zmm\[0-9\]\{%k\[1-7\]\}\{z\}" 1 } } */
+
+#include <immintrin.h>
+
+volatile __m512i _x1, _y1, _z1;
+volatile __m256i _x2, _y2, _z2;
+volatile __m128i _x3, _y3, _z3;
+
+void extern
+avx512dq_test (void)
+{
+ _x3 = _mm_mullo_epi64 (_y3, _z3);
+ _x3 = _mm_mask_mullo_epi64 (_x3, 2, _y3, _z3);
+ _x3 = _mm_maskz_mullo_epi64 (2, _y3, _z3);
+ _x2 = _mm256_mullo_epi64 (_y2, _z2);
+ _x2 = _mm256_mask_mullo_epi64 (_x2, 3, _y2, _z2);
+ _x2 = _mm256_maskz_mullo_epi64 (3, _y2, _z2);
+ _x1 = _mm512_mullo_epi64 (_y1, _z1);
+ _x1 = _mm512_mask_mullo_epi64 (_x1, 3, _y1, _z1);
+ _x1 = _mm512_maskz_mullo_epi64 (3, _y1, _z1);
+}
diff --git a/gcc/testsuite/gcc.target/i386/avx512dq-vpmullq-2.c b/gcc/testsuite/gcc.target/i386/avx512dq-vpmullq-2.c
new file mode 100644
index 00000000000..2184834d807
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/avx512dq-vpmullq-2.c
@@ -0,0 +1,49 @@
+/* { dg-do run } */
+/* { dg-options "-O2 -mavx512dq -DAVX512DQ" } */
+/* { dg-require-effective-target avx512dq } */
+
+#include "avx512f-helper.h"
+
+#define SIZE (AVX512F_LEN / 64)
+#include "avx512f-mask-type.h"
+
+void
+CALC (long long *src1, long long *src2, long long *dst)
+{
+ int i;
+
+ for (i = 0; i < SIZE; i++)
+ dst[i] = src1[i] * src2[i];
+}
+
+void
+TEST (void)
+{
+ UNION_TYPE (AVX512F_LEN, i_q) src1, src2, dst1, dst2, dst3;
+ long long dst_ref[SIZE];
+ int i;
+ MASK_TYPE mask = MASK_VALUE;
+
+ for (i = 0; i < SIZE; i++)
+ {
+ src1.a[i] = i + 50;
+ src2.a[i] = i + 100;
+ dst2.a[i] = DEFAULT_VALUE;
+ }
+
+ dst1.x = INTRINSIC (_mullo_epi64) (src1.x, src2.x);
+ dst2.x = INTRINSIC (_mask_mullo_epi64) (dst2.x, mask, src1.x, src2.x);
+ dst3.x = INTRINSIC (_maskz_mullo_epi64) (mask, src1.x, src2.x);
+ CALC (src1.a, src2.a, dst_ref);
+
+ if (UNION_CHECK (AVX512F_LEN, i_q) (dst1, dst_ref))
+ abort ();
+
+ MASK_MERGE (i_q) (dst_ref, mask, SIZE);
+ if (UNION_CHECK (AVX512F_LEN, i_q) (dst2, dst_ref))
+ abort ();
+
+ MASK_ZERO (i_q) (dst_ref, mask, SIZE);
+ if (UNION_CHECK (AVX512F_LEN, i_q) (dst3, dst_ref))
+ abort ();
+}
diff --git a/gcc/testsuite/gcc.target/i386/avx512dq-vrangepd-1.c b/gcc/testsuite/gcc.target/i386/avx512dq-vrangepd-1.c
new file mode 100644
index 00000000000..45f7b27dcc8
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/avx512dq-vrangepd-1.c
@@ -0,0 +1,40 @@
+/* { dg-do compile } */
+/* { dg-options "-mavx512dq -mavx512vl -O2" } */
+/* { dg-final { scan-assembler-times "vrangepd\[ \\t\]+\[^\n\]*\{sae\}\[^\n\]*%zmm\[0-9\]" 3 } } */
+/* { dg-final { scan-assembler-times "vrangepd\[ \\t\]+\[^\n\]*%zmm\[0-9\]\[^\{\]" 6 } } */
+/* { dg-final { scan-assembler-times "vrangepd\[ \\t\]+\[^\n\]*%ymm\[0-9\]\[^\{\]" 3 } } */
+/* { dg-final { scan-assembler-times "vrangepd\[ \\t\]+\[^\n\]*%xmm\[0-9\]\[^\{\]" 3 } } */
+/* { dg-final { scan-assembler-times "vrangepd\[ \\t\]+\[^\n\]*\{sae\}\[^\n\]*%zmm\[0-9\]\{%k\[1-7\]\}\[^\{\]" 1 } } */
+/* { dg-final { scan-assembler-times "vrangepd\[ \\t\]+\[^\n\]*%zmm\[0-9\]\{%k\[1-7\]\}\[^\{\]" 2 } } */
+/* { dg-final { scan-assembler-times "vrangepd\[ \\t\]+\[^\n\]*%ymm\[0-9\]\{%k\[1-7\]\}\[^\{\]" 1 } } */
+/* { dg-final { scan-assembler-times "vrangepd\[ \\t\]+\[^\n\]*%xmm\[0-9\]\{%k\[1-7\]\}\[^\{\]" 1 } } */
+/* { dg-final { scan-assembler-times "vrangepd\[ \\t\]+\[^\n\]*%zmm\[0-9\]\{%k\[1-7\]\}\{z\}" 2 } } */
+/* { dg-final { scan-assembler-times "vrangepd\[ \\t\]+\[^\n\]*\{sae\}\[^\n\]*%zmm\[0-9\]\{%k\[1-7\]\}\{z\}" 1 } } */
+/* { dg-final { scan-assembler-times "vrangepd\[ \\t\]+\[^\n\]*%ymm\[0-9\]\{%k\[1-7\]\}\{z\}" 1 } } */
+/* { dg-final { scan-assembler-times "vrangepd\[ \\t\]+\[^\n\]*%xmm\[0-9\]\{%k\[1-7\]\}\{z\}" 1 } } */
+
+#include <immintrin.h>
+
+volatile __m512d z;
+volatile __m256d y;
+volatile __m128d x;
+volatile __mmask8 m;
+
+void extern
+avx512dq_test (void)
+{
+ z = _mm512_range_round_pd (z, z, 15, _MM_FROUND_NO_EXC);
+ z = _mm512_range_pd (z, z, 15);
+ y = _mm256_range_pd (y, y, 15);
+ x = _mm_range_pd (x, x, 15);
+
+ z = _mm512_mask_range_round_pd (z, m, z, z, 15, _MM_FROUND_NO_EXC);
+ z = _mm512_mask_range_pd (z, m, z, z, 15);
+ y = _mm256_mask_range_pd (y, m, y, y, 15);
+ x = _mm_mask_range_pd (x, m, x, x, 15);
+
+ z = _mm512_maskz_range_round_pd (m, z, z, 15, _MM_FROUND_NO_EXC);
+ z = _mm512_maskz_range_pd (m, z, z, 15);
+ y = _mm256_maskz_range_pd (m, y, y, 15);
+ x = _mm_maskz_range_pd (m, x, x, 15);
+}
diff --git a/gcc/testsuite/gcc.target/i386/avx512dq-vrangepd-2.c b/gcc/testsuite/gcc.target/i386/avx512dq-vrangepd-2.c
new file mode 100644
index 00000000000..fc032fbc507
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/avx512dq-vrangepd-2.c
@@ -0,0 +1,59 @@
+/* { dg-do run } */
+/* { dg-options "-O2 -mavx512dq -DAVX512DQ" } */
+/* { dg-require-effective-target avx512dq } */
+
+#include "avx512f-helper.h"
+
+#define SIZE (AVX512F_LEN / 64)
+#include "avx512f-mask-type.h"
+#define IMM 0x02
+
+void
+CALC (double *s1, double *s2, double *r)
+{
+ int i;
+
+ for (i = 0; i < SIZE; i++)
+ {
+ if (s1[i] < -s2[i])
+ r[i] = -s2[i];
+ else if (s1[i] > s2[i])
+ r[i] = s2[i];
+ else
+ r[i] = s1[i];
+ }
+}
+
+void
+TEST (void)
+{
+ UNION_TYPE (AVX512F_LEN, d) s1, s2, res1, res2, res3;
+ MASK_TYPE mask = MASK_VALUE;
+ double res_ref[SIZE];
+ int i, sign = 1;
+
+ for (i = 0; i < SIZE; i++)
+ {
+ s1.a[i] = 234.567 * i * sign;
+ s2.a[i] = 100 * (i + 1);
+ res2.a[i] = DEFAULT_VALUE;
+ sign = -sign;
+ }
+
+ res1.x = INTRINSIC (_range_pd) (s1.x, s2.x, IMM);
+ res2.x = INTRINSIC (_mask_range_pd) (res2.x, mask, s1.x, s2.x, IMM);
+ res3.x = INTRINSIC (_maskz_range_pd) (mask, s1.x, s2.x, IMM);
+
+ CALC (s1.a, s2.a, res_ref);
+
+ if (UNION_CHECK (AVX512F_LEN, d) (res1, res_ref))
+ abort ();
+
+ MASK_MERGE (d) (res_ref, mask, SIZE);
+ if (UNION_CHECK (AVX512F_LEN, d) (res2, res_ref))
+ abort ();
+
+ MASK_ZERO (d) (res_ref, mask, SIZE);
+ if (UNION_CHECK (AVX512F_LEN, d) (res3, res_ref))
+ abort ();
+}
diff --git a/gcc/testsuite/gcc.target/i386/avx512dq-vrangeps-1.c b/gcc/testsuite/gcc.target/i386/avx512dq-vrangeps-1.c
new file mode 100644
index 00000000000..71e259ff8c9
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/avx512dq-vrangeps-1.c
@@ -0,0 +1,41 @@
+/* { dg-do compile } */
+/* { dg-options "-mavx512dq -mavx512vl -O2" } */
+/* { dg-final { scan-assembler-times "vrangeps\[ \\t\]+\[^\n\]*%zmm\[0-9\]\[^\{\]" 6 } } */
+/* { dg-final { scan-assembler-times "vrangeps\[ \\t\]+\[^\n\]*%ymm\[0-9\]\[^\{\]" 3 } } */
+/* { dg-final { scan-assembler-times "vrangeps\[ \\t\]+\[^\n\]*%xmm\[0-9\]\[^\{\]" 3 } } */
+/* { dg-final { scan-assembler-times "vrangeps\[ \\t\]+\[^\n\]*\{sae\}\[^\n\]*%zmm\[0-9\]" 3 } } */
+/* { dg-final { scan-assembler-times "vrangeps\[ \\t\]+\[^\n\]*%zmm\[0-9\]\{%k\[1-7\]\}\[^\{\]" 2 } } */
+/* { dg-final { scan-assembler-times "vrangeps\[ \\t\]+\[^\n\]*%ymm\[0-9\]\{%k\[1-7\]\}\[^\{\]" 1 } } */
+/* { dg-final { scan-assembler-times "vrangeps\[ \\t\]+\[^\n\]*%xmm\[0-9\]\{%k\[1-7\]\}\[^\{\]" 1 } } */
+/* { dg-final { scan-assembler-times "vrangeps\[ \\t\]+\[^\n\]*\{sae\}\[^\n\]*%zmm\[0-9\]\{%k\[1-7\]\}\[^\{\]" 1 } } */
+/* { dg-final { scan-assembler-times "vrangeps\[ \\t\]+\[^\n\]*%zmm\[0-9\]\{%k\[1-7\]\}\{z\}" 2 } } */
+/* { dg-final { scan-assembler-times "vrangeps\[ \\t\]+\[^\n\]*%ymm\[0-9\]\{%k\[1-7\]\}\{z\}" 1 } } */
+/* { dg-final { scan-assembler-times "vrangeps\[ \\t\]+\[^\n\]*%xmm\[0-9\]\{%k\[1-7\]\}\{z\}" 1 } } */
+/* { dg-final { scan-assembler-times "vrangeps\[ \\t\]+\[^\n\]*\{sae\}\[^\n\]*%zmm\[0-9\]\{%k\[1-7\]\}\{z\}" 1 } } */
+
+#include <immintrin.h>
+
+volatile __m512 z;
+volatile __m256 y;
+volatile __m128 x;
+volatile __mmask8 m8;
+volatile __mmask16 m16;
+
+void extern
+avx512dq_test (void)
+{
+ z = _mm512_range_round_ps (z, z, 15, _MM_FROUND_NO_EXC);
+ z = _mm512_range_ps (z, z, 15);
+ y = _mm256_range_ps (y, y, 15);
+ x = _mm_range_ps (x, x, 15);
+
+ z = _mm512_mask_range_round_ps (z, m16, z, z, 15, _MM_FROUND_NO_EXC);
+ z = _mm512_mask_range_ps (z, m16, z, z, 15);
+ y = _mm256_mask_range_ps (y, m8, y, y, 15);
+ x = _mm_mask_range_ps (x, m8, x, x, 15);
+
+ z = _mm512_maskz_range_round_ps (m16, z, z, 15, _MM_FROUND_NO_EXC);
+ z = _mm512_maskz_range_ps (m16, z, z, 15);
+ y = _mm256_maskz_range_ps (m8, y, y, 15);
+ x = _mm_maskz_range_ps (m8, x, x, 15);
+}
diff --git a/gcc/testsuite/gcc.target/i386/avx512dq-vrangeps-2.c b/gcc/testsuite/gcc.target/i386/avx512dq-vrangeps-2.c
new file mode 100644
index 00000000000..373260abd79
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/avx512dq-vrangeps-2.c
@@ -0,0 +1,59 @@
+/* { dg-do run } */
+/* { dg-options "-O2 -mavx512dq -DAVX512DQ" } */
+/* { dg-require-effective-target avx512dq } */
+
+#include "avx512f-helper.h"
+
+#define SIZE (AVX512F_LEN / 32)
+#include "avx512f-mask-type.h"
+#define IMM 0x02
+
+void
+CALC (float *s1, float *s2, float *r)
+{
+ int i;
+
+ for (i = 0; i < SIZE; i++)
+ {
+ if (s1[i] < -s2[i])
+ r[i] = -s2[i];
+ else if (s1[i] > s2[i])
+ r[i] = s2[i];
+ else
+ r[i] = s1[i];
+ }
+}
+
+void
+TEST (void)
+{
+ UNION_TYPE (AVX512F_LEN, ) s1, s2, res1, res2, res3;
+ MASK_TYPE mask = MASK_VALUE;
+ float res_ref[SIZE];
+ int i, sign = 1;
+
+ for (i = 0; i < SIZE; i++)
+ {
+ s1.a[i] = 234.567 * i * sign;
+ s2.a[i] = 100 * (i + 1);
+ res2.a[i] = DEFAULT_VALUE;
+ sign = -sign;
+ }
+
+ res1.x = INTRINSIC (_range_ps) (s1.x, s2.x, IMM);
+ res2.x = INTRINSIC (_mask_range_ps) (res2.x, mask, s1.x, s2.x, IMM);
+ res3.x = INTRINSIC (_maskz_range_ps) (mask, s1.x, s2.x, IMM);
+
+ CALC (s1.a, s2.a, res_ref);
+
+ if (UNION_CHECK (AVX512F_LEN, ) (res1, res_ref))
+ abort ();
+
+ MASK_MERGE () (res_ref, mask, SIZE);
+ if (UNION_CHECK (AVX512F_LEN, ) (res2, res_ref))
+ abort ();
+
+ MASK_ZERO () (res_ref, mask, SIZE);
+ if (UNION_CHECK (AVX512F_LEN, ) (res3, res_ref))
+ abort ();
+}
diff --git a/gcc/testsuite/gcc.target/i386/avx512dq-vrangesd-1.c b/gcc/testsuite/gcc.target/i386/avx512dq-vrangesd-1.c
new file mode 100644
index 00000000000..21f48113c1a
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/avx512dq-vrangesd-1.c
@@ -0,0 +1,16 @@
+/* { dg-do compile } */
+/* { dg-options "-mavx512dq -O2" } */
+/* { dg-final { scan-assembler-times "vrangesd\[ \\t\]+\[^\n\]*%xmm\[0-9\]\[^\{\]" 2 } } */
+/* { dg-final { scan-assembler-times "vrangesd\[ \\t\]+\[^\n\]*\{sae\}\[^\n\]*%xmm\[0-9\]\[^\{\]" 1 } } */
+
+#include <immintrin.h>
+
+volatile __m128d x1, x2;
+volatile __mmask8 m;
+
+void extern
+avx512dq_test (void)
+{
+ x1 = _mm_range_sd (x1, x2, 3);
+ x1 = _mm_range_round_sd (x1, x2, 3, _MM_FROUND_NO_EXC);
+}
diff --git a/gcc/testsuite/gcc.target/i386/avx512dq-vrangess-1.c b/gcc/testsuite/gcc.target/i386/avx512dq-vrangess-1.c
new file mode 100644
index 00000000000..0f5e750b676
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/avx512dq-vrangess-1.c
@@ -0,0 +1,16 @@
+/* { dg-do compile } */
+/* { dg-options "-mavx512dq -O2" } */
+/* { dg-final { scan-assembler-times "vrangess\[ \\t\]+\[^\n\]*\{sae\}\[^\n\]*%xmm\[0-9\]\[^\{\]" 1 } } */
+/* { dg-final { scan-assembler-times "vrangess\[ \\t\]+\[^\n\]*%xmm\[0-9\]\[^\{\]" 2 } } */
+
+#include <immintrin.h>
+
+volatile __m128 x1, x2;
+volatile __mmask8 m;
+
+void extern
+avx512dq_test (void)
+{
+ x1 = _mm_range_ss (x1, x2, 1);
+ x1 = _mm_range_round_ss (x1, x2, 1, _MM_FROUND_NO_EXC);
+}
diff --git a/gcc/testsuite/gcc.target/i386/avx512dq-vreducepd-1.c b/gcc/testsuite/gcc.target/i386/avx512dq-vreducepd-1.c
new file mode 100644
index 00000000000..ce70cd7152a
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/avx512dq-vreducepd-1.c
@@ -0,0 +1,36 @@
+/* { dg-do compile } */
+/* { dg-options "-mavx512dq -mavx512vl -O2" } */
+/* { dg-final { scan-assembler-times "vreducepd\[ \\t\]+\[^\n\]*%zmm\[0-9\]\[^\{\]" 3 } } */
+/* { dg-final { scan-assembler-times "vreducepd\[ \\t\]+\[^\n\]*%ymm\[0-9\]\[^\{\]" 3 } } */
+/* { dg-final { scan-assembler-times "vreducepd\[ \\t\]+\[^\n\]*%xmm\[0-9\]\[^\{\]" 3 } } */
+/* { dg-final { scan-assembler-times "vreducepd\[ \\t\]+\[^\n\]*%zmm\[0-9\]\{%k\[1-7\]\}\[^\{\]" 1 } } */
+/* { dg-final { scan-assembler-times "vreducepd\[ \\t\]+\[^\n\]*%ymm\[0-9\]\{%k\[1-7\]\}\[^\{\]" 1 } } */
+/* { dg-final { scan-assembler-times "vreducepd\[ \\t\]+\[^\n\]*%xmm\[0-9\]\{%k\[1-7\]\}\[^\{\]" 1 } } */
+/* { dg-final { scan-assembler-times "vreducepd\[ \\t\]+\[^\n\]*%zmm\[0-9\]\{%k\[1-7\]\}\{z\}" 1 } } */
+/* { dg-final { scan-assembler-times "vreducepd\[ \\t\]+\[^\n\]*%ymm\[0-9\]\{%k\[1-7\]\}\{z\}" 1 } } */
+/* { dg-final { scan-assembler-times "vreducepd\[ \\t\]+\[^\n\]*%xmm\[0-9\]\{%k\[1-7\]\}\{z\}" 1 } } */
+
+#include <immintrin.h>
+
+#define IMM 123
+
+volatile __m512d x1;
+volatile __m256d x2;
+volatile __m128d x3;
+volatile __mmask8 m;
+
+void extern
+avx512dq_test (void)
+{
+ x1 = _mm512_reduce_pd (x1, IMM);
+ x2 = _mm256_reduce_pd (x2, IMM);
+ x3 = _mm_reduce_pd (x3, IMM);
+
+ x1 = _mm512_mask_reduce_pd (x1, m, x1, IMM);
+ x2 = _mm256_mask_reduce_pd (x2, m, x2, IMM);
+ x3 = _mm_mask_reduce_pd (x3, m, x3, IMM);
+
+ x1 = _mm512_maskz_reduce_pd (m, x1, IMM);
+ x2 = _mm256_maskz_reduce_pd (m, x2, IMM);
+ x3 = _mm_maskz_reduce_pd (m, x3, IMM);
+}
diff --git a/gcc/testsuite/gcc.target/i386/avx512dq-vreducepd-2.c b/gcc/testsuite/gcc.target/i386/avx512dq-vreducepd-2.c
new file mode 100644
index 00000000000..3e231ab9d2a
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/avx512dq-vreducepd-2.c
@@ -0,0 +1,55 @@
+/* { dg-do run } */
+/* { dg-options "-O2 -mavx512dq -DAVX512DQ" } */
+/* { dg-require-effective-target avx512dq } */
+
+#include "avx512f-helper.h"
+
+#define SIZE (AVX512F_LEN / 64)
+#include "avx512f-mask-type.h"
+
+#define IMM 0x23
+
+void
+CALC (double *s, double *r)
+{
+ int i;
+
+ for (i = 0; i < SIZE; i++)
+ {
+ double tmp = (int) (4 * s[i]) / 4.0;
+ r[i] = s[i] - tmp;
+ }
+}
+
+void
+TEST (void)
+{
+ UNION_TYPE (AVX512F_LEN, d) s, res1, res2, res3;
+ MASK_TYPE mask = MASK_VALUE;
+ double res_ref[SIZE];
+ int i, sign = 1;
+
+ for (i = 0; i < SIZE; i++)
+ {
+ s.a[i] = 123.456 * (i + 2000) * sign;
+ res2.a[i] = DEFAULT_VALUE;
+ sign = -sign;
+ }
+
+ res1.x = INTRINSIC (_reduce_pd) (s.x, IMM);
+ res2.x = INTRINSIC (_mask_reduce_pd) (res2.x, mask, s.x, IMM);
+ res3.x = INTRINSIC (_maskz_reduce_pd) (mask, s.x, IMM);
+
+ CALC (s.a, res_ref);
+
+ if (UNION_FP_CHECK (AVX512F_LEN, d) (res1, res_ref))
+ abort ();
+
+ MASK_MERGE (d) (res_ref, mask, SIZE);
+ if (UNION_FP_CHECK (AVX512F_LEN, d) (res2, res_ref))
+ abort ();
+
+ MASK_ZERO (d) (res_ref, mask, SIZE);
+ if (UNION_FP_CHECK (AVX512F_LEN, d) (res3, res_ref))
+ abort ();
+}
diff --git a/gcc/testsuite/gcc.target/i386/avx512dq-vreduceps-1.c b/gcc/testsuite/gcc.target/i386/avx512dq-vreduceps-1.c
new file mode 100644
index 00000000000..cdc3fb963ea
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/avx512dq-vreduceps-1.c
@@ -0,0 +1,37 @@
+/* { dg-do compile } */
+/* { dg-options "-mavx512dq -mavx512vl -O2" } */
+/* { dg-final { scan-assembler-times "vreduceps\[ \\t\]+\[^\n\]*%zmm\[0-9\]\[^\{\]" 3 } } */
+/* { dg-final { scan-assembler-times "vreduceps\[ \\t\]+\[^\n\]*%ymm\[0-9\]\[^\{\]" 3 } } */
+/* { dg-final { scan-assembler-times "vreduceps\[ \\t\]+\[^\n\]*%xmm\[0-9\]\[^\{\]" 3 } } */
+/* { dg-final { scan-assembler-times "vreduceps\[ \\t\]+\[^\n\]*%zmm\[0-9\]\{%k\[1-7\]\}\[^\{\]" 1 } } */
+/* { dg-final { scan-assembler-times "vreduceps\[ \\t\]+\[^\n\]*%ymm\[0-9\]\{%k\[1-7\]\}\[^\{\]" 1 } } */
+/* { dg-final { scan-assembler-times "vreduceps\[ \\t\]+\[^\n\]*%xmm\[0-9\]\{%k\[1-7\]\}\[^\{\]" 1 } } */
+/* { dg-final { scan-assembler-times "vreduceps\[ \\t\]+\[^\n\]*%zmm\[0-9\]\{%k\[1-7\]\}\{z\}" 1 } } */
+/* { dg-final { scan-assembler-times "vreduceps\[ \\t\]+\[^\n\]*%ymm\[0-9\]\{%k\[1-7\]\}\{z\}" 1 } } */
+/* { dg-final { scan-assembler-times "vreduceps\[ \\t\]+\[^\n\]*%xmm\[0-9\]\{%k\[1-7\]\}\{z\}" 1 } } */
+
+#include <immintrin.h>
+
+#define IMM 123
+
+volatile __m512 x1;
+volatile __m256 x2;
+volatile __m128 x3;
+volatile __mmask16 m16;
+volatile __mmask8 m8;
+
+void extern
+avx512dq_test (void)
+{
+ x1 = _mm512_reduce_ps (x1, IMM);
+ x2 = _mm256_reduce_ps (x2, IMM);
+ x3 = _mm_reduce_ps (x3, IMM);
+
+ x1 = _mm512_mask_reduce_ps (x1, m16, x1, IMM);
+ x2 = _mm256_mask_reduce_ps (x2, m8, x2, IMM);
+ x3 = _mm_mask_reduce_ps (x3, m8, x3, IMM);
+
+ x1 = _mm512_maskz_reduce_ps (m16, x1, IMM);
+ x2 = _mm256_maskz_reduce_ps (m8, x2, IMM);
+ x3 = _mm_maskz_reduce_ps (m8, x3, IMM);
+}
diff --git a/gcc/testsuite/gcc.target/i386/avx512dq-vreduceps-2.c b/gcc/testsuite/gcc.target/i386/avx512dq-vreduceps-2.c
new file mode 100644
index 00000000000..97afd2ad93e
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/avx512dq-vreduceps-2.c
@@ -0,0 +1,55 @@
+/* { dg-do run } */
+/* { dg-options "-O2 -mavx512dq -DAVX512DQ" } */
+/* { dg-require-effective-target avx512dq } */
+
+#include "avx512f-helper.h"
+
+#define SIZE (AVX512F_LEN / 32)
+#include "avx512f-mask-type.h"
+
+#define IMM 0x23
+
+void
+CALC (float *s, float *r)
+{
+ int i;
+
+ for (i = 0; i < SIZE; i++)
+ {
+ float tmp = (int) (4 * s[i]) / 4.0;
+ r[i] = s[i] - tmp;
+ }
+}
+
+void
+TEST (void)
+{
+ UNION_TYPE (AVX512F_LEN,) s, res1, res2, res3;
+ MASK_TYPE mask = MASK_VALUE;
+ float res_ref[SIZE];
+ int i, sign = 1;
+
+ for (i = 0; i < SIZE; i++)
+ {
+ s.a[i] = 123.456 * (i + 2000) * sign;
+ res2.a[i] = DEFAULT_VALUE;
+ sign = -sign;
+ }
+
+ res1.x = INTRINSIC (_reduce_ps) (s.x, IMM);
+ res2.x = INTRINSIC (_mask_reduce_ps) (res2.x, mask, s.x, IMM);
+ res3.x = INTRINSIC (_maskz_reduce_ps) (mask, s.x, IMM);
+
+ CALC (s.a, res_ref);
+
+ if (UNION_FP_CHECK (AVX512F_LEN,) (res1, res_ref))
+ abort ();
+
+ MASK_MERGE () (res_ref, mask, SIZE);
+ if (UNION_FP_CHECK (AVX512F_LEN,) (res2, res_ref))
+ abort ();
+
+ MASK_ZERO () (res_ref, mask, SIZE);
+ if (UNION_FP_CHECK (AVX512F_LEN,) (res3, res_ref))
+ abort ();
+}
diff --git a/gcc/testsuite/gcc.target/i386/avx512dq-vreducesd-1.c b/gcc/testsuite/gcc.target/i386/avx512dq-vreducesd-1.c
new file mode 100644
index 00000000000..f6a4a283098
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/avx512dq-vreducesd-1.c
@@ -0,0 +1,14 @@
+/* { dg-do compile } */
+/* { dg-options "-mavx512dq -O2" } */
+/* { dg-final { scan-assembler-times "vreducesd\[ \\t\]+\[^\n\]*%xmm\[0-9\]\[^\{\]" 1 } } */
+
+#include <immintrin.h>
+
+volatile __m128d x1, x2;
+volatile __mmask8 m;
+
+void extern
+avx512dq_test (void)
+{
+ x1 = _mm_reduce_sd (x1, x2, 123);
+}
diff --git a/gcc/testsuite/gcc.target/i386/avx512dq-vreducess-1.c b/gcc/testsuite/gcc.target/i386/avx512dq-vreducess-1.c
new file mode 100644
index 00000000000..0d51b6119df
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/avx512dq-vreducess-1.c
@@ -0,0 +1,14 @@
+/* { dg-do compile } */
+/* { dg-options "-mavx512dq -O2" } */
+/* { dg-final { scan-assembler-times "vreducess\[ \\t\]+\[^\n\]*%xmm\[0-9\]\[^\{\]" 1 } } */
+
+#include <immintrin.h>
+
+volatile __m128 x1, x2;
+volatile __mmask8 m;
+
+void extern
+avx512dq_test (void)
+{
+ x1 = _mm_reduce_ss (x1, x2, 123);
+}
diff --git a/gcc/testsuite/gcc.target/i386/avx512dq-vxorpd-1.c b/gcc/testsuite/gcc.target/i386/avx512dq-vxorpd-1.c
new file mode 100644
index 00000000000..42ea18d592e
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/avx512dq-vxorpd-1.c
@@ -0,0 +1,33 @@
+/* { dg-do compile } */
+/* { dg-options "-mavx512dq -mavx512vl -O2" } */
+/* { dg-final { scan-assembler-times "vxorpd\[ \\t\]+\[^\n\]*%zmm\[0-9\]\[^\{\]" 3 } } */
+/* { dg-final { scan-assembler-times "vxorpd\[ \\t\]+\[^\n\]*%zmm\[0-9\]\{%k\[1-7\]\}\[^\{\]" 1 } } */
+/* { dg-final { scan-assembler-times "vxorpd\[ \\t\]+\[^\n\]*%zmm\[0-9\]\{%k\[1-7\]\}\{z\}" 1 } } */
+/* { dg-final { scan-assembler-times "vxorpd\[ \\t\]+\[^\n\]*%ymm\[0-9\]\[^\{\]" 2 } } */
+/* { dg-final { scan-assembler-times "vxorpd\[ \\t\]+\[^\n\]*%ymm\[0-9\]\{%k\[1-7\]\}\[^\{\]" 1 } } */
+/* { dg-final { scan-assembler-times "vxorpd\[ \\t\]+\[^\n\]*%ymm\[0-9\]\{%k\[1-7\]\}\{z\}" 1 } } */
+/* { dg-final { scan-assembler-times "vxorpd\[ \\t\]+\[^\n\]*%xmm\[0-9\]\[^\{\]" 2 } } */
+/* { dg-final { scan-assembler-times "vxorpd\[ \\t\]+\[^\n\]*%xmm\[0-9\]\{%k\[1-7\]\}\[^\{\]" 1 } } */
+/* { dg-final { scan-assembler-times "vxorpd\[ \\t\]+\[^\n\]*%xmm\[0-9\]\{%k\[1-7\]\}\{z\}" 1 } } */
+
+#include <immintrin.h>
+
+volatile __m512d z;
+volatile __m256d y;
+volatile __m128d x;
+volatile __mmask8 m;
+
+
+void extern
+avx512dq_test (void)
+{
+ z = _mm512_xor_pd (z, z);
+ z = _mm512_mask_xor_pd (z, m, z, z);
+ z = _mm512_maskz_xor_pd (m, z, z);
+
+ y = _mm256_mask_xor_pd (y, m, y, y);
+ y = _mm256_maskz_xor_pd (m, y, y);
+
+ x = _mm_mask_xor_pd (x, m, x, x);
+ x = _mm_maskz_xor_pd (m, x, x);
+}
diff --git a/gcc/testsuite/gcc.target/i386/avx512dq-vxorpd-2.c b/gcc/testsuite/gcc.target/i386/avx512dq-vxorpd-2.c
new file mode 100644
index 00000000000..060861a9264
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/avx512dq-vxorpd-2.c
@@ -0,0 +1,52 @@
+/* { dg-do run } */
+/* { dg-options "-O2 -mavx512dq -DAVX512DQ" } */
+/* { dg-require-effective-target avx512dq } */
+
+#include "avx512f-helper.h"
+
+#define SIZE (AVX512F_LEN / 64)
+#include "avx512f-mask-type.h"
+
+void
+CALC (double *src1, double *src2, double *dst)
+{
+ int i;
+
+ for (i = 0; i < SIZE; i++)
+ {
+ long long tmp = (*(long long *) &src1[i]) ^ (*(long long *) &src2[i]);
+ dst[i] = *(double *) &tmp;
+ }
+}
+
+void
+TEST (void)
+{
+ UNION_TYPE (AVX512F_LEN,d) s1, s2, res1, res2, res3;
+ MASK_TYPE mask = MASK_VALUE;
+ double dst_ref[SIZE];
+ int i;
+
+ for (i = 0; i < SIZE; i++) {
+ s1.a[i] = 132.45 * i;
+ s2.a[i] = 43.6 - i * 4.4;
+ res2.a[i] = DEFAULT_VALUE;
+ }
+
+ res1.x = INTRINSIC (_xor_pd) (s1.x, s2.x);
+ res2.x = INTRINSIC (_mask_xor_pd) (res2.x, mask, s1.x, s2.x);
+ res3.x = INTRINSIC (_maskz_xor_pd) (mask, s1.x, s2.x);
+
+ CALC (s1.a, s2.a, dst_ref);
+
+ if (UNION_CHECK (AVX512F_LEN,d) (res1, dst_ref))
+ abort ();
+
+ MASK_MERGE (d) (dst_ref, mask, SIZE);
+ if (UNION_CHECK (AVX512F_LEN,d) (res2, dst_ref))
+ abort ();
+
+ MASK_ZERO (d) (dst_ref, mask, SIZE);
+ if (UNION_CHECK (AVX512F_LEN,d) (res3, dst_ref))
+ abort ();
+}
diff --git a/gcc/testsuite/gcc.target/i386/avx512dq-vxorps-1.c b/gcc/testsuite/gcc.target/i386/avx512dq-vxorps-1.c
new file mode 100644
index 00000000000..561785209d5
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/avx512dq-vxorps-1.c
@@ -0,0 +1,33 @@
+/* { dg-do compile } */
+/* { dg-options "-mavx512dq -mavx512vl -O2" } */
+/* { dg-final { scan-assembler-times "vxorps\[ \\t\]+\[^\n\]*%zmm\[0-9\]\[^\{\]" 3 } } */
+/* { dg-final { scan-assembler-times "vxorps\[ \\t\]+\[^\n\]*%zmm\[0-9\]\{%k\[1-7\]\}\[^\{\]" 1 } } */
+/* { dg-final { scan-assembler-times "vxorps\[ \\t\]+\[^\n\]*%zmm\[0-9\]\{%k\[1-7\]\}\{z\}" 1 } } */
+/* { dg-final { scan-assembler-times "vxorps\[ \\t\]+\[^\n\]*%ymm\[0-9\]\[^\{\]" 2 } } */
+/* { dg-final { scan-assembler-times "vxorps\[ \\t\]+\[^\n\]*%ymm\[0-9\]\{%k\[1-7\]\}\[^\{\]" 1 } } */
+/* { dg-final { scan-assembler-times "vxorps\[ \\t\]+\[^\n\]*%ymm\[0-9\]\{%k\[1-7\]\}\{z\}" 1 } } */
+/* { dg-final { scan-assembler-times "vxorps\[ \\t\]+\[^\n\]*%xmm\[0-9\]\[^\{\]" 2 } } */
+/* { dg-final { scan-assembler-times "vxorps\[ \\t\]+\[^\n\]*%xmm\[0-9\]\{%k\[1-7\]\}\[^\{\]" 1 } } */
+/* { dg-final { scan-assembler-times "vxorps\[ \\t\]+\[^\n\]*%xmm\[0-9\]\{%k\[1-7\]\}\{z\}" 1 } } */
+
+#include <immintrin.h>
+
+volatile __m512 z;
+volatile __m256 y;
+volatile __m128 x;
+volatile __mmask16 m;
+volatile __mmask8 n;
+
+void extern
+avx512dq_test (void)
+{
+ z = _mm512_xor_ps (z, z);
+ z = _mm512_mask_xor_ps (z, m, z, z);
+ z = _mm512_maskz_xor_ps (m, z, z);
+
+ y = _mm256_mask_xor_ps (y, n, y, y);
+ y = _mm256_maskz_xor_ps (n, y, y);
+
+ x = _mm_mask_xor_ps (x, n, x, x);
+ x = _mm_maskz_xor_ps (n, x, x);
+}
diff --git a/gcc/testsuite/gcc.target/i386/avx512dq-vxorps-2.c b/gcc/testsuite/gcc.target/i386/avx512dq-vxorps-2.c
new file mode 100644
index 00000000000..5360c04f12e
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/avx512dq-vxorps-2.c
@@ -0,0 +1,52 @@
+/* { dg-do run } */
+/* { dg-options "-O2 -mavx512dq -DAVX512DQ" } */
+/* { dg-require-effective-target avx512dq } */
+
+#include "avx512f-helper.h"
+
+#define SIZE (AVX512F_LEN / 32)
+#include "avx512f-mask-type.h"
+
+void
+CALC (float *src1, float *src2, float *dst)
+{
+ int i;
+
+ for (i = 0; i < SIZE; i++)
+ {
+ int tmp = (*(int *) &src1[i]) ^ (*(int *) &src2[i]);
+ dst[i] = *(float *) &tmp;
+ }
+}
+
+void
+TEST (void)
+{
+ UNION_TYPE (AVX512F_LEN,) s1, s2, res1, res2, res3;
+ MASK_TYPE mask = MASK_VALUE;
+ float dst_ref[SIZE];
+ int i;
+
+ for (i = 0; i < SIZE; i++) {
+ s1.a[i] = 132.45 * i;
+ s2.a[i] = 43.6 - i * 4.4;
+ res2.a[i] = DEFAULT_VALUE;
+ }
+
+ res1.x = INTRINSIC (_xor_ps) (s1.x, s2.x);
+ res2.x = INTRINSIC (_mask_xor_ps) (res2.x, mask, s1.x, s2.x);
+ res3.x = INTRINSIC (_maskz_xor_ps) (mask, s1.x, s2.x);
+
+ CALC (s1.a, s2.a, dst_ref);
+
+ if (UNION_CHECK (AVX512F_LEN,) (res1, dst_ref))
+ abort ();
+
+ MASK_MERGE () (dst_ref, mask, SIZE);
+ if (UNION_CHECK (AVX512F_LEN,) (res2, dst_ref))
+ abort ();
+
+ MASK_ZERO () (dst_ref, mask, SIZE);
+ if (UNION_CHECK (AVX512F_LEN,) (res3, dst_ref))
+ abort ();
+}
diff --git a/gcc/testsuite/gcc.target/i386/avx512f-helper.h b/gcc/testsuite/gcc.target/i386/avx512f-helper.h
index 9beabdf5e43..04a1a89da51 100644
--- a/gcc/testsuite/gcc.target/i386/avx512f-helper.h
+++ b/gcc/testsuite/gcc.target/i386/avx512f-helper.h
@@ -1,16 +1,25 @@
/* This file is used to reduce a number of runtime tests for AVX512F
- instructions. Idea is to create one file per instruction -
+ and AVX512VL instructions. Idea is to create one file per instruction -
avx512f-insn-2.c - using defines from this file instead of intrinsic
name, vector length etc. Then dg-options are set with appropriate
-Dwhatever options in that .c file producing tests for specific
length. */
-#if defined (AVX512F)
+#ifndef AVX512F_HELPER_INCLUDED
+#define AVX512F_HELPER_INCLUDED
+
+#if defined (AVX512F) && !defined (AVX512VL)
#include "avx512f-check.h"
#elif defined (AVX512ER)
#include "avx512er-check.h"
#elif defined (AVX512CD)
#include "avx512cd-check.h"
+#elif defined (AVX512DQ)
+#include "avx512dq-check.h"
+#elif defined (AVX512BW)
+#include "avx512bw-check.h"
+#elif defined (AVX512VL)
+#include "avx512vl-check.h"
#endif
/* Macros expansion. */
@@ -73,8 +82,7 @@ MAKE_MASK_ZERO(i_uq, unsigned long long)
#define MASK_ZERO(TYPE) zero_masking_##TYPE
-/* Intrinsic being tested. */
-#define INTRINSIC(NAME) EVAL(_mm, AVX512F_LEN, NAME)
+
/* Unions used for testing (for example union512d, union256d etc.). */
#define UNION_TYPE(SIZE, NAME) EVAL(union, SIZE, NAME)
/* Corresponding union check. */
@@ -89,12 +97,17 @@ MAKE_MASK_ZERO(i_uq, unsigned long long)
/* Function which calculates result. */
#define CALC EVAL(calc_, AVX512F_LEN,)
+#ifndef AVX512VL
#define AVX512F_LEN 512
#define AVX512F_LEN_HALF 256
+void test_512 ();
+#endif
void test_512 ();
+void test_256 ();
+void test_128 ();
-#if defined (AVX512F)
+#if defined (AVX512F) && !defined (AVX512VL)
void
avx512f_test (void) { test_512 (); }
#elif defined (AVX512CD)
@@ -103,4 +116,28 @@ avx512cd_test (void) { test_512 (); }
#elif defined (AVX512ER)
void
avx512er_test (void) { test_512 (); }
+#elif defined (AVX512DQ)
+void
+avx512dq_test (void) { test_512 (); }
+#elif defined (AVX512BW)
+void
+avx512bw_test (void) { test_512 (); }
+#elif defined (AVX512VL)
+void
+avx512vl_test (void) { test_256 (); test_128 (); }
+#endif
+
+#endif /* AVX512F_HELPER_INCLUDED */
+
+/* Intrinsic being tested. It has different deffinitions,
+ depending on AVX512F_LEN, so it's outside include guards
+ and in undefed away to silence warnings. */
+#if defined INTRINSIC
+#undef INTRINSIC
+#endif
+
+#if AVX512F_LEN != 128
+#define INTRINSIC(NAME) EVAL(_mm, AVX512F_LEN, NAME)
+#else
+#define INTRINSIC(NAME) _mm ## NAME
#endif
diff --git a/gcc/testsuite/gcc.target/i386/avx512f-mask-type.h b/gcc/testsuite/gcc.target/i386/avx512f-mask-type.h
index 2dacdd67a2a..efece198e51 100644
--- a/gcc/testsuite/gcc.target/i386/avx512f-mask-type.h
+++ b/gcc/testsuite/gcc.target/i386/avx512f-mask-type.h
@@ -1,10 +1,30 @@
/* Type of mask. */
#if SIZE <= 8
+#undef MASK_TYPE
+#undef MASK_VALUE
+#undef MASK_ALL_ONES
#define MASK_TYPE __mmask8
#define MASK_VALUE 0xB9
#define MASK_ALL_ONES 0xFF
#elif SIZE <= 16
+#undef MASK_TYPE
+#undef MASK_VALUE
+#undef MASK_ALL_ONES
#define MASK_TYPE __mmask16
#define MASK_VALUE 0xA6BA
#define MASK_ALL_ONES 0xFFFF
+#elif SIZE <= 32
+#undef MASK_TYPE
+#undef MASK_VALUE
+#undef MASK_ALL_ONES
+#define MASK_TYPE __mmask32
+#define MASK_VALUE 0xA6BAAB6A
+#define MASK_ALL_ONES 0xFFFFFFFFu
+#elif SIZE <= 64
+#undef MASK_TYPE
+#undef MASK_VALUE
+#undef MASK_ALL_ONES
+#define MASK_TYPE __mmask64
+#define MASK_VALUE 0xA6BAA6BAB6AB6ABB
+#define MASK_ALL_ONES 0xFFFFFFFFFFFFFFFFull
#endif
diff --git a/gcc/testsuite/gcc.target/i386/avx512f-vcmppd-2.c b/gcc/testsuite/gcc.target/i386/avx512f-vcmppd-2.c
index a81f36256cd..52e226d9f15 100644
--- a/gcc/testsuite/gcc.target/i386/avx512f-vcmppd-2.c
+++ b/gcc/testsuite/gcc.target/i386/avx512f-vcmppd-2.c
@@ -26,6 +26,38 @@
if ((dst_ref & mask) != dst2) abort();
#endif
+#if AVX512F_LEN == 256
+#undef CMP
+#define CMP(imm, rel) \
+ dst_ref = 0; \
+ for (i = 0; i < 4; i++) \
+ { \
+ dst_ref = (((int) rel) << i) | dst_ref; \
+ } \
+ source1.x = _mm256_loadu_pd(s1); \
+ source2.x = _mm256_loadu_pd(s2); \
+ dst1 = _mm256_cmp_pd_mask(source1.x, source2.x, imm);\
+ dst2 = _mm256_mask_cmp_pd_mask(mask, source1.x, source2.x, imm);\
+ if (dst_ref != dst1) abort(); \
+ if ((dst_ref & mask) != dst2) abort();
+#endif
+
+#if AVX512F_LEN == 128
+#undef CMP
+#define CMP(imm, rel) \
+ dst_ref = 0; \
+ for (i = 0; i < 2; i++) \
+ { \
+ dst_ref = (((int) rel) << i) | dst_ref; \
+ } \
+ source1.x = _mm_loadu_pd(s1); \
+ source2.x = _mm_loadu_pd(s2); \
+ dst1 = _mm_cmp_pd_mask(source1.x, source2.x, imm);\
+ dst2 = _mm_mask_cmp_pd_mask(mask, source1.x, source2.x, imm);\
+ if (dst_ref != dst1) abort(); \
+ if ((dst_ref & mask) != dst2) abort();
+#endif
+
void
TEST ()
{
diff --git a/gcc/testsuite/gcc.target/i386/avx512f-vcmpps-2.c b/gcc/testsuite/gcc.target/i386/avx512f-vcmpps-2.c
index 88dc8908596..2ffa2ed16b7 100644
--- a/gcc/testsuite/gcc.target/i386/avx512f-vcmpps-2.c
+++ b/gcc/testsuite/gcc.target/i386/avx512f-vcmpps-2.c
@@ -12,6 +12,7 @@
#include "avx512f-mask-type.h"
#if AVX512F_LEN == 512
+#undef CMP
#define CMP(imm, rel) \
dst_ref = 0; \
for (i = 0; i < 16; i++) \
@@ -26,6 +27,38 @@
if ((dst_ref & mask) != dst2) abort();
#endif
+#if AVX512F_LEN == 256
+#undef CMP
+#define CMP(imm, rel) \
+ dst_ref = 0; \
+ for (i = 0; i < 8; i++) \
+ { \
+ dst_ref = (((int) rel) << i) | dst_ref; \
+ } \
+ source1.x = _mm256_loadu_ps(s1); \
+ source2.x = _mm256_loadu_ps(s2); \
+ dst1 = _mm256_cmp_ps_mask(source1.x, source2.x, imm);\
+ dst2 = _mm256_mask_cmp_ps_mask(mask, source1.x, source2.x, imm);\
+ if (dst_ref != dst1) abort(); \
+ if ((dst_ref & mask) != dst2) abort();
+#endif
+
+#if AVX512F_LEN == 128
+#undef CMP
+#define CMP(imm, rel) \
+ dst_ref = 0; \
+ for (i = 0; i < 4; i++) \
+ { \
+ dst_ref = (((int) rel) << i) | dst_ref; \
+ } \
+ source1.x = _mm_loadu_ps(s1); \
+ source2.x = _mm_loadu_ps(s2); \
+ dst1 = _mm_cmp_ps_mask(source1.x, source2.x, imm);\
+ dst2 = _mm_mask_cmp_ps_mask(mask, source1.x, source2.x, imm);\
+ if (dst_ref != dst1) abort(); \
+ if ((dst_ref & mask) != dst2) abort();
+#endif
+
void
TEST ()
{
diff --git a/gcc/testsuite/gcc.target/i386/avx512f-vcvtpd2ps-2.c b/gcc/testsuite/gcc.target/i386/avx512f-vcvtpd2ps-2.c
index e79e014e61d..dde7c12c8ad 100644
--- a/gcc/testsuite/gcc.target/i386/avx512f-vcvtpd2ps-2.c
+++ b/gcc/testsuite/gcc.target/i386/avx512f-vcvtpd2ps-2.c
@@ -15,6 +15,8 @@ CALC (float *e, UNION_TYPE (AVX512F_LEN, d) s1)
int i;
for (i = 0; i < SIZE; i++)
e[i] = (float) s1.a[i];
+ for (i = SIZE; i < AVX512F_LEN_HALF / 32; i++)
+ e[i] = 0.0;
}
void
diff --git a/gcc/testsuite/gcc.target/i386/avx512f-vcvtph2ps-2.c b/gcc/testsuite/gcc.target/i386/avx512f-vcvtph2ps-2.c
index ec98f4b3ad5..5cf110f92ac 100644
--- a/gcc/testsuite/gcc.target/i386/avx512f-vcvtph2ps-2.c
+++ b/gcc/testsuite/gcc.target/i386/avx512f-vcvtph2ps-2.c
@@ -67,9 +67,9 @@ TEST (void)
val.a[15] = 0xc800;
#endif
- res1.x = _mm512_cvtph_ps (val.x);
- res2.x = _mm512_mask_cvtph_ps (res2.x, mask, val.x);
- res3.x = _mm512_maskz_cvtph_ps (mask, val.x);
+ res1.x = INTRINSIC (_cvtph_ps) (val.x);
+ res2.x = INTRINSIC (_mask_cvtph_ps) (res2.x, mask, val.x);
+ res3.x = INTRINSIC (_maskz_cvtph_ps) (mask, val.x);
if (UNION_CHECK (AVX512F_LEN,) (res1, exp))
abort ();
diff --git a/gcc/testsuite/gcc.target/i386/avx512f-vcvtps2ph-2.c b/gcc/testsuite/gcc.target/i386/avx512f-vcvtps2ph-2.c
index 540bf29a4f9..e4f21a6e4b1 100644
--- a/gcc/testsuite/gcc.target/i386/avx512f-vcvtps2ph-2.c
+++ b/gcc/testsuite/gcc.target/i386/avx512f-vcvtps2ph-2.c
@@ -15,7 +15,11 @@ TEST (void)
UNION_TYPE (AVX512F_LEN,) val;
UNION_TYPE (AVX512F_LEN_HALF, i_w) res1,res2,res3;
MASK_TYPE mask = MASK_VALUE;
+#if AVX512F_LEN == 128
+ short exp[SIZE * 2];
+#else
short exp[SIZE];
+#endif
int i;
for (i = 0; i < SIZE; i++)
@@ -55,6 +59,11 @@ TEST (void)
exp[5] = 0xc000;
exp[6] = 0xc400;
exp[7] = 0xc800;
+#else
+ exp[4] = 0;
+ exp[5] = 0;
+ exp[6] = 0;
+ exp[7] = 0;
#endif
#if AVX512F_LEN > 256
exp[8] = 0x3c00;
@@ -67,9 +76,9 @@ TEST (void)
exp[15] = 0xc800;
#endif
- res1.x = _mm512_cvtps_ph (val.x, 0);
- res2.x = _mm512_mask_cvtps_ph (res2.x, mask, val.x, 0);
- res3.x = _mm512_maskz_cvtps_ph (mask, val.x, 0);
+ res1.x = INTRINSIC (_cvtps_ph (val.x, 0));
+ res2.x = INTRINSIC (_mask_cvtps_ph (res2.x, mask, val.x, 0));
+ res3.x = INTRINSIC (_maskz_cvtps_ph (mask, val.x, 0));
if (UNION_CHECK (AVX512F_LEN_HALF, i_w) (res1, exp))
abort ();
diff --git a/gcc/testsuite/gcc.target/i386/avx512f-vpandd-2.c b/gcc/testsuite/gcc.target/i386/avx512f-vpandd-2.c
index b5654d2fe8c..55786bc6289 100644
--- a/gcc/testsuite/gcc.target/i386/avx512f-vpandd-2.c
+++ b/gcc/testsuite/gcc.target/i386/avx512f-vpandd-2.c
@@ -34,18 +34,22 @@ TEST (void)
res3.a[i] = DEFAULT_VALUE;
}
+#if AVX512F_LEN == 512
res1.x = INTRINSIC (_and_si512) (s1.x, s2.x);
res2.x = INTRINSIC (_and_epi32) (s1.x, s2.x);
+#endif
res3.x = INTRINSIC (_mask_and_epi32) (res3.x, mask, s1.x, s2.x);
res4.x = INTRINSIC (_maskz_and_epi32) (mask, s1.x, s2.x);
CALC (s1.a, s2.a, res_ref);
+#if AVX512F_LEN == 512
if (UNION_CHECK (AVX512F_LEN, i_d) (res1, res_ref))
abort ();
if (UNION_CHECK (AVX512F_LEN, i_d) (res2, res_ref))
abort ();
+#endif
MASK_MERGE (i_d) (res_ref, mask, SIZE);
if (UNION_CHECK (AVX512F_LEN, i_d) (res3, res_ref))
diff --git a/gcc/testsuite/gcc.target/i386/avx512f-vpandnd-2.c b/gcc/testsuite/gcc.target/i386/avx512f-vpandnd-2.c
index 418b96e6edf..b5c071944db 100644
--- a/gcc/testsuite/gcc.target/i386/avx512f-vpandnd-2.c
+++ b/gcc/testsuite/gcc.target/i386/avx512f-vpandnd-2.c
@@ -34,18 +34,22 @@ TEST (void)
res3.a[i] = DEFAULT_VALUE;
}
+#if AVX512F_LEN == 512
res1.x = INTRINSIC (_andnot_si512) (s1.x, s2.x);
res2.x = INTRINSIC (_andnot_epi32) (s1.x, s2.x);
+#endif
res3.x = INTRINSIC (_mask_andnot_epi32) (res3.x, mask, s1.x, s2.x);
res4.x = INTRINSIC (_maskz_andnot_epi32) (mask, s1.x, s2.x);
CALC (s1.a, s2.a, res_ref);
+#if AVX512F_LEN == 512
if (UNION_CHECK (AVX512F_LEN, i_d) (res1, res_ref))
abort ();
if (UNION_CHECK (AVX512F_LEN, i_d) (res2, res_ref))
abort ();
+#endif
MASK_MERGE (i_d) (res_ref, mask, SIZE);
if (UNION_CHECK (AVX512F_LEN, i_d) (res3, res_ref))
diff --git a/gcc/testsuite/gcc.target/i386/avx512f-vpandnq-2.c b/gcc/testsuite/gcc.target/i386/avx512f-vpandnq-2.c
index 585d2885991..9b7512bcdf8 100644
--- a/gcc/testsuite/gcc.target/i386/avx512f-vpandnq-2.c
+++ b/gcc/testsuite/gcc.target/i386/avx512f-vpandnq-2.c
@@ -34,14 +34,18 @@ TEST (void)
res2.a[i] = DEFAULT_VALUE;
}
+#if AVX512F_LEN == 512
res1.x = INTRINSIC (_andnot_epi64) (s1.x, s2.x);
+#endif
res2.x = INTRINSIC (_mask_andnot_epi64) (res2.x, mask, s1.x, s2.x);
res3.x = INTRINSIC (_maskz_andnot_epi64) (mask, s1.x, s2.x);
CALC (s1.a, s2.a, res_ref);
+#if AVX512F_LEN == 512
if (UNION_CHECK (AVX512F_LEN, i_q) (res1, res_ref))
abort ();
+#endif
MASK_MERGE (i_q) (res_ref, mask, SIZE);
if (UNION_CHECK (AVX512F_LEN, i_q) (res2, res_ref))
diff --git a/gcc/testsuite/gcc.target/i386/avx512f-vpandq-2.c b/gcc/testsuite/gcc.target/i386/avx512f-vpandq-2.c
index b2f39456449..3493830d504 100644
--- a/gcc/testsuite/gcc.target/i386/avx512f-vpandq-2.c
+++ b/gcc/testsuite/gcc.target/i386/avx512f-vpandq-2.c
@@ -34,14 +34,18 @@ TEST (void)
res2.a[i] = DEFAULT_VALUE;
}
+#if AVX512F_LEN == 512
res1.x = INTRINSIC (_and_epi64) (s1.x, s2.x);
+#endif
res2.x = INTRINSIC (_mask_and_epi64) (res2.x, mask, s1.x, s2.x);
res3.x = INTRINSIC (_maskz_and_epi64) (mask, s1.x, s2.x);
CALC (s1.a, s2.a, res_ref);
+#if AVX512F_LEN == 512
if (UNION_CHECK (AVX512F_LEN, i_q) (res1, res_ref))
abort ();
+#endif
MASK_MERGE (i_q) (res_ref, mask, SIZE);
if (UNION_CHECK (AVX512F_LEN, i_q) (res2, res_ref))
diff --git a/gcc/testsuite/gcc.target/i386/avx512f-vpbroadcastq-2.c b/gcc/testsuite/gcc.target/i386/avx512f-vpbroadcastq-2.c
index cbd9d7b85d4..9cfa63d98b1 100644
--- a/gcc/testsuite/gcc.target/i386/avx512f-vpbroadcastq-2.c
+++ b/gcc/testsuite/gcc.target/i386/avx512f-vpbroadcastq-2.c
@@ -54,7 +54,9 @@ TEST (void)
if (UNION_CHECK (AVX512F_LEN, i_q) (res3, res_ref))
abort ();
+#if AVX512F_LEN == 512
res1.x = INTRINSIC (_set1_epi64) (src.a[0]);
+#endif
res2.x = INTRINSIC (_mask_set1_epi64) (res2.x, mask, src.a[0]);
res3.x = INTRINSIC (_maskz_set1_epi64) (mask, src.a[0]);
diff --git a/gcc/testsuite/gcc.target/i386/avx512f-vpcmpd-2.c b/gcc/testsuite/gcc.target/i386/avx512f-vpcmpd-2.c
index 3a2dccfd4bd..9479f896af3 100644
--- a/gcc/testsuite/gcc.target/i386/avx512f-vpcmpd-2.c
+++ b/gcc/testsuite/gcc.target/i386/avx512f-vpcmpd-2.c
@@ -11,6 +11,7 @@
#include "avx512f-mask-type.h"
#if AVX512F_LEN == 512
+#undef CMP
#define CMP(imm, rel) \
dst_ref = 0; \
for (i = 0; i < 16; i++) \
@@ -25,6 +26,38 @@
if ((mask & dst_ref) != dst2) abort();
#endif
+#if AVX512F_LEN == 256
+#undef CMP
+#define CMP(imm, rel) \
+ dst_ref = 0; \
+ for (i = 0; i < 8; i++) \
+ { \
+ dst_ref = ((rel) << i) | dst_ref; \
+ } \
+ source1.x = _mm256_loadu_si256 ((__m256i*)s1); \
+ source2.x = _mm256_loadu_si256 ((__m256i*)s2); \
+ dst1 = _mm256_cmp_epi32_mask (source1.x, source2.x, imm);\
+ dst2 = _mm256_mask_cmp_epi32_mask (mask, source1.x, source2.x, imm);\
+ if (dst_ref != dst1) abort(); \
+ if ((mask & dst_ref) != dst2) abort();
+#endif
+
+#if AVX512F_LEN == 128
+#undef CMP
+#define CMP(imm, rel) \
+ dst_ref = 0; \
+ for (i = 0; i < 4; i++) \
+ { \
+ dst_ref = ((rel) << i) | dst_ref; \
+ } \
+ source1.x = _mm_loadu_si128 ((__m128i*)s1); \
+ source2.x = _mm_loadu_si128 ((__m128i*)s2); \
+ dst1 = _mm_cmp_epi32_mask (source1.x, source2.x, imm);\
+ dst2 = _mm_mask_cmp_epi32_mask (mask, source1.x, source2.x, imm);\
+ if (dst_ref != dst1) abort(); \
+ if ((mask & dst_ref) != dst2) abort();
+#endif
+
void
TEST ()
{
diff --git a/gcc/testsuite/gcc.target/i386/avx512f-vpcmpq-2.c b/gcc/testsuite/gcc.target/i386/avx512f-vpcmpq-2.c
index 7c9b888b255..857b04e6999 100644
--- a/gcc/testsuite/gcc.target/i386/avx512f-vpcmpq-2.c
+++ b/gcc/testsuite/gcc.target/i386/avx512f-vpcmpq-2.c
@@ -12,6 +12,8 @@
__mmask8 dst_ref;
+#if AVX512F_LEN == 512
+#undef CMP
#define CMP(imm, rel) \
dst_ref = 0; \
for (i = 0; i < 8; i++) \
@@ -24,6 +26,39 @@ __mmask8 dst_ref;
dst2 = _mm512_mask_cmp_epi64_mask (mask, source1.x, source2.x, imm);\
if (dst_ref != dst1) abort(); \
if ((mask & dst_ref) != dst2) abort();
+#endif
+
+#if AVX512F_LEN == 256
+#undef CMP
+#define CMP(imm, rel) \
+ dst_ref = 0; \
+ for (i = 0; i < 4; i++) \
+ { \
+ dst_ref = ((rel) << i) | dst_ref; \
+ } \
+ source1.x = _mm256_loadu_si256 ((__m256i*)s1); \
+ source2.x = _mm256_loadu_si256 ((__m256i*)s2); \
+ dst1 = _mm256_cmp_epi64_mask (source1.x, source2.x, imm);\
+ dst2 = _mm256_mask_cmp_epi64_mask (mask, source1.x, source2.x, imm);\
+ if (dst_ref != dst1) abort(); \
+ if ((mask & dst_ref) != dst2) abort();
+#endif
+
+#if AVX512F_LEN == 128
+#undef CMP
+#define CMP(imm, rel) \
+ dst_ref = 0; \
+ for (i = 0; i < 2; i++) \
+ { \
+ dst_ref = ((rel) << i) | dst_ref; \
+ } \
+ source1.x = _mm_loadu_si128 ((__m128i*)s1); \
+ source2.x = _mm_loadu_si128 ((__m128i*)s2); \
+ dst1 = _mm_cmp_epi64_mask (source1.x, source2.x, imm);\
+ dst2 = _mm_mask_cmp_epi64_mask (mask, source1.x, source2.x, imm);\
+ if (dst_ref != dst1) abort(); \
+ if ((mask & dst_ref) != dst2) abort();
+#endif
void
TEST ()
diff --git a/gcc/testsuite/gcc.target/i386/avx512f-vpcmpud-2.c b/gcc/testsuite/gcc.target/i386/avx512f-vpcmpud-2.c
index 7a1ed898271..3f09e7d3ea0 100644
--- a/gcc/testsuite/gcc.target/i386/avx512f-vpcmpud-2.c
+++ b/gcc/testsuite/gcc.target/i386/avx512f-vpcmpud-2.c
@@ -11,6 +11,7 @@
#include "avx512f-mask-type.h"
#if AVX512F_LEN == 512
+#undef CMP
#define CMP(imm, rel) \
dst_ref = 0; \
for (i = 0; i < 16; i++) \
@@ -25,6 +26,38 @@
if ((mask & dst_ref) != dst2) abort();
#endif
+#if AVX512F_LEN == 256
+#undef CMP
+#define CMP(imm, rel) \
+ dst_ref = 0; \
+ for (i = 0; i < 8; i++) \
+ { \
+ dst_ref = ((rel) << i) | dst_ref; \
+ } \
+ source1.x = _mm256_loadu_si256 ((__m256i*)s1); \
+ source2.x = _mm256_loadu_si256 ((__m256i*)s2); \
+ dst1 = _mm256_cmp_epu32_mask (source1.x, source2.x, imm);\
+ dst2 = _mm256_mask_cmp_epu32_mask (mask, source1.x, source2.x, imm);\
+ if (dst_ref != dst1) abort(); \
+ if ((mask & dst_ref) != dst2) abort();
+#endif
+
+#if AVX512F_LEN == 128
+#undef CMP
+#define CMP(imm, rel) \
+ dst_ref = 0; \
+ for (i = 0; i < 4; i++) \
+ { \
+ dst_ref = ((rel) << i) | dst_ref; \
+ } \
+ source1.x = _mm_loadu_si128 ((__m128i*)s1); \
+ source2.x = _mm_loadu_si128 ((__m128i*)s2); \
+ dst1 = _mm_cmp_epu32_mask (source1.x, source2.x, imm);\
+ dst2 = _mm_mask_cmp_epu32_mask (mask, source1.x, source2.x, imm);\
+ if (dst_ref != dst1) abort(); \
+ if ((mask & dst_ref) != dst2) abort();
+#endif
+
void
TEST ()
{
diff --git a/gcc/testsuite/gcc.target/i386/avx512f-vpcmpuq-2.c b/gcc/testsuite/gcc.target/i386/avx512f-vpcmpuq-2.c
index f8db6c643a9..11eb6ffebcd 100644
--- a/gcc/testsuite/gcc.target/i386/avx512f-vpcmpuq-2.c
+++ b/gcc/testsuite/gcc.target/i386/avx512f-vpcmpuq-2.c
@@ -11,6 +11,7 @@
#include "avx512f-mask-type.h"
#if AVX512F_LEN == 512
+#undef CMP
#define CMP(imm, rel) \
dst_ref = 0; \
for (i = 0; i < 8; i++) \
@@ -25,6 +26,38 @@
if ((mask & dst_ref) != dst2) abort();
#endif
+#if AVX512F_LEN == 256
+#undef CMP
+#define CMP(imm, rel) \
+ dst_ref = 0; \
+ for (i = 0; i < 4; i++) \
+ { \
+ dst_ref = ((rel) << i) | dst_ref; \
+ } \
+ source1.x = _mm256_loadu_si256 ((__m256i*)s1); \
+ source2.x = _mm256_loadu_si256 ((__m256i*)s2); \
+ dst1 = _mm256_cmp_epu64_mask (source1.x, source2.x, imm);\
+ dst2 = _mm256_mask_cmp_epu64_mask (mask, source1.x, source2.x, imm);\
+ if (dst_ref != dst1) abort(); \
+ if ((mask & dst_ref) != dst2) abort();
+#endif
+
+#if AVX512F_LEN == 128
+#undef CMP
+#define CMP(imm, rel) \
+ dst_ref = 0; \
+ for (i = 0; i < 2; i++) \
+ { \
+ dst_ref = ((rel) << i) | dst_ref; \
+ } \
+ source1.x = _mm_loadu_si128 ((__m128i*)s1); \
+ source2.x = _mm_loadu_si128 ((__m128i*)s2); \
+ dst1 = _mm_cmp_epu64_mask (source1.x, source2.x, imm);\
+ dst2 = _mm_mask_cmp_epu64_mask (mask, source1.x, source2.x, imm);\
+ if (dst_ref != dst1) abort(); \
+ if ((mask & dst_ref) != dst2) abort();
+#endif
+
void
TEST ()
{
diff --git a/gcc/testsuite/gcc.target/i386/avx512f-vpermd-2.c b/gcc/testsuite/gcc.target/i386/avx512f-vpermd-2.c
index 1b2ce756abb..dbd4544c39e 100644
--- a/gcc/testsuite/gcc.target/i386/avx512f-vpermd-2.c
+++ b/gcc/testsuite/gcc.target/i386/avx512f-vpermd-2.c
@@ -17,7 +17,11 @@ CALC (int *mask, int *src1, int *dst)
for (i = 0; i < SIZE; i++)
{
+#if AVX512F_LEN == 512
dst[i] = src1[mask[i] & 15];
+#else
+ dst[i] = src1[mask[i] & 7];
+#endif
}
}
@@ -37,14 +41,18 @@ TEST (void)
res3.a[i] = DEFAULT_VALUE;
}
+#if AVX512F_LEN == 512
res1.x = INTRINSIC (_permutexvar_epi32) (src1.x, src2.x);
+#endif
res2.x = INTRINSIC (_maskz_permutexvar_epi32) (mask, src1.x, src2.x);
res3.x = INTRINSIC (_mask_permutexvar_epi32) (res3.x, mask, src1.x, src2.x);
CALC (src1.a, src2.a, res_ref);
+#if AVX512F_LEN == 512
if (UNION_CHECK (AVX512F_LEN, i_d) (res1, res_ref))
abort ();
+#endif
MASK_ZERO (i_d) (res_ref, mask, SIZE);
if (UNION_CHECK (AVX512F_LEN, i_d) (res2, res_ref))
diff --git a/gcc/testsuite/gcc.target/i386/avx512f-vpermq-imm-2.c b/gcc/testsuite/gcc.target/i386/avx512f-vpermq-imm-2.c
index dd88cd46c0b..770d5623f5f 100644
--- a/gcc/testsuite/gcc.target/i386/avx512f-vpermq-imm-2.c
+++ b/gcc/testsuite/gcc.target/i386/avx512f-vpermq-imm-2.c
@@ -40,14 +40,18 @@ TEST (void)
res3.a[i] = DEFAULT_VALUE;
}
+#if AVX512F_LEN == 512
res1.x = INTRINSIC (_permutex_epi64) (src1.x, IMM_MASK);
+#endif
res2.x = INTRINSIC (_maskz_permutex_epi64) (mask, src1.x, IMM_MASK);
res3.x = INTRINSIC (_mask_permutex_epi64) (res3.x, mask, src1.x, IMM_MASK);
CALC (src1.a, IMM_MASK, res_ref);
+#if AVX512F_LEN == 512
if (UNION_CHECK (AVX512F_LEN, i_q) (res1, res_ref))
abort ();
+#endif
MASK_ZERO (i_q) (res_ref, mask, SIZE);
if (UNION_CHECK (AVX512F_LEN, i_q) (res2, res_ref))
diff --git a/gcc/testsuite/gcc.target/i386/avx512f-vpermq-var-2.c b/gcc/testsuite/gcc.target/i386/avx512f-vpermq-var-2.c
index b7c36a5f141..c596b1d9c40 100644
--- a/gcc/testsuite/gcc.target/i386/avx512f-vpermq-var-2.c
+++ b/gcc/testsuite/gcc.target/i386/avx512f-vpermq-var-2.c
@@ -17,7 +17,11 @@ CALC (long long *mask, long long *src1, long long *dst)
for (i = 0; i < SIZE; i++)
{
+#if AVX512F_LEN == 512
dst[i] = src1[mask[i] & 7];
+#else
+ dst[i] = src1[mask[i] & 3];
+#endif
}
}
@@ -37,14 +41,18 @@ TEST (void)
res3.a[i] = DEFAULT_VALUE;
}
+#if AVX512F_LEN == 512
res1.x = INTRINSIC (_permutexvar_epi64) (src1.x, src2.x);
+#endif
res2.x = INTRINSIC (_maskz_permutexvar_epi64) (mask, src1.x, src2.x);
res3.x = INTRINSIC (_mask_permutexvar_epi64) (res3.x, mask, src1.x, src2.x);
CALC (src1.a, src2.a, res_ref);
+#if AVX512F_LEN == 512
if (UNION_CHECK (AVX512F_LEN, i_q) (res1, res_ref))
abort ();
+#endif
MASK_ZERO (i_q) (res_ref, mask, SIZE);
if (UNION_CHECK (AVX512F_LEN, i_q) (res2, res_ref))
diff --git a/gcc/testsuite/gcc.target/i386/avx512f-vpmovdb-2.c b/gcc/testsuite/gcc.target/i386/avx512f-vpmovdb-2.c
index 0e418f98ed2..80cb2c3cb2f 100644
--- a/gcc/testsuite/gcc.target/i386/avx512f-vpmovdb-2.c
+++ b/gcc/testsuite/gcc.target/i386/avx512f-vpmovdb-2.c
@@ -9,11 +9,12 @@
#define SIZE (AVX512F_LEN / 32)
#include "avx512f-mask-type.h"
-static void
-CALC (char *r, int *s)
+void
+CALC (char *r, int *s, int mem)
{
int i;
- for (i = 0; i < 16; i++)
+ int len = mem ? SIZE : 16;
+ for (i = 0; i < len; i++)
{
r[i] = (i < SIZE) ? (char) s[i] : 0;
}
@@ -28,6 +29,7 @@ TEST (void)
UNION_TYPE (AVX512F_LEN, i_d) src;
MASK_TYPE mask = MASK_VALUE;
char res_ref[16];
+ char res_ref2[16];
sign = -1;
for (i = 0; i < SIZE; i++)
@@ -38,12 +40,17 @@ TEST (void)
res4[i] = DEFAULT_VALUE;
}
+ for (i = SIZE; i < 16; i++)
+ {
+ res4[i] = DEFAULT_VALUE * 2;
+ res_ref2[i] = DEFAULT_VALUE * 2;
+ }
+
res1.x = INTRINSIC (_cvtepi32_epi8) (src.x);
res2.x = INTRINSIC (_mask_cvtepi32_epi8) (res2.x, mask, src.x);
res3.x = INTRINSIC (_maskz_cvtepi32_epi8) (mask, src.x);
- INTRINSIC (_mask_cvtepi32_storeu_epi8) (res4, mask, src.x);
- CALC (res_ref, src.a);
+ CALC (res_ref, src.a, 0);
if (UNION_CHECK (128, i_b) (res1, res_ref))
abort ();
@@ -52,10 +59,14 @@ TEST (void)
if (UNION_CHECK (128, i_b) (res2, res_ref))
abort ();
- if (checkVc (res4, res_ref, 16))
- abort ();
-
MASK_ZERO (i_b) (res_ref, mask, SIZE);
if (UNION_CHECK (128, i_b) (res3, res_ref))
abort ();
+
+ INTRINSIC (_mask_cvtepi32_storeu_epi8) (res4, mask, src.x);
+ CALC (res_ref2, src.a, 1);
+
+ MASK_MERGE (i_b) (res_ref2, mask, SIZE);
+ if (checkVc (res4, res_ref2, 16))
+ abort ();
}
diff --git a/gcc/testsuite/gcc.target/i386/avx512f-vpmovdw-2.c b/gcc/testsuite/gcc.target/i386/avx512f-vpmovdw-2.c
index 376c105c0b2..c0797fcda95 100644
--- a/gcc/testsuite/gcc.target/i386/avx512f-vpmovdw-2.c
+++ b/gcc/testsuite/gcc.target/i386/avx512f-vpmovdw-2.c
@@ -10,11 +10,12 @@
#include "avx512f-mask-type.h"
#define SIZE_HALF (AVX512F_LEN_HALF / 16)
-void static
-CALC (short *r, int *s)
+void
+CALC (short *r, int *s, int mem)
{
int i;
- for (i = 0; i < SIZE_HALF; i++)
+ int len = mem ? SIZE : SIZE_HALF;
+ for (i = 0; i < len; i++)
{
r[i] = (i < SIZE) ? (short) s[i] : 0;
}
@@ -29,6 +30,7 @@ TEST (void)
UNION_TYPE (AVX512F_LEN, i_d) src;
MASK_TYPE mask = MASK_VALUE;
short res_ref[SIZE_HALF];
+ short res_ref2[SIZE_HALF];
sign = -1;
for (i = 0; i < SIZE; i++)
@@ -39,12 +41,16 @@ TEST (void)
res4[i] = DEFAULT_VALUE;
}
+ for (i = SIZE; i < SIZE_HALF; i++)
+ {
+ res_ref2[i] = DEFAULT_VALUE * 2;
+ res4[i] = DEFAULT_VALUE * 2;
+ }
+
res1.x = INTRINSIC (_cvtepi32_epi16) (src.x);
res2.x = INTRINSIC (_mask_cvtepi32_epi16) (res2.x, mask, src.x);
res3.x = INTRINSIC (_maskz_cvtepi32_epi16) (mask, src.x);
- INTRINSIC (_mask_cvtepi32_storeu_epi16) (res4, mask, src.x);
-
- CALC (res_ref, src.a);
+ CALC (res_ref, src.a, 0);
if (UNION_CHECK (AVX512F_LEN_HALF, i_w) (res1, res_ref))
abort ();
@@ -53,10 +59,14 @@ TEST (void)
if (UNION_CHECK (AVX512F_LEN_HALF, i_w) (res2, res_ref))
abort ();
- if (checkVs (res4, res_ref, SIZE_HALF))
- abort ();
-
MASK_ZERO (i_w) (res_ref, mask, SIZE);
if (UNION_CHECK (AVX512F_LEN_HALF, i_w) (res3, res_ref))
abort ();
+
+ INTRINSIC (_mask_cvtepi32_storeu_epi16) (res4, mask, src.x);
+ CALC (res_ref2, src.a, 1);
+
+ MASK_MERGE (i_w) (res_ref2, mask, SIZE);
+ if (checkVs (res4, res_ref2, SIZE_HALF))
+ abort ();
}
diff --git a/gcc/testsuite/gcc.target/i386/avx512f-vpmovqb-2.c b/gcc/testsuite/gcc.target/i386/avx512f-vpmovqb-2.c
index 892ad74e0d1..8a9b4adaade 100644
--- a/gcc/testsuite/gcc.target/i386/avx512f-vpmovqb-2.c
+++ b/gcc/testsuite/gcc.target/i386/avx512f-vpmovqb-2.c
@@ -14,7 +14,7 @@ CALC (char *r, long long *s, int mem)
{
int i;
/* Don't zero out upper half if destination is memory. */
- int len = mem ? 8 : 16;
+ int len = mem ? SIZE : 16;
for (i = 0; i < len; i++)
{
r[i] = (i < SIZE) ? (char) s[i] : 0;
diff --git a/gcc/testsuite/gcc.target/i386/avx512f-vpmovqd-2.c b/gcc/testsuite/gcc.target/i386/avx512f-vpmovqd-2.c
index 57170faf8f8..f0e4e127483 100644
--- a/gcc/testsuite/gcc.target/i386/avx512f-vpmovqd-2.c
+++ b/gcc/testsuite/gcc.target/i386/avx512f-vpmovqd-2.c
@@ -10,11 +10,12 @@
#include "avx512f-mask-type.h"
#define SIZE_HALF (AVX512F_LEN_HALF / 32)
-void static
-CALC (int *r, long long *s)
+void
+CALC (int *r, long long *s, int mem)
{
int i;
- for (i = 0; i < SIZE_HALF; i++)
+ int len = mem ? SIZE : SIZE_HALF;
+ for (i = 0; i < len; i++)
{
r[i] = (i < SIZE) ? (int) s[i] : 0;
}
@@ -29,6 +30,7 @@ TEST (void)
UNION_TYPE (AVX512F_LEN, i_q) src;
MASK_TYPE mask = MASK_VALUE;
int res_ref[SIZE_HALF];
+ int res_ref2[SIZE_HALF];
sign = -1;
for (i = 0; i < SIZE; i++)
@@ -39,13 +41,17 @@ TEST (void)
res4[i] = DEFAULT_VALUE;
}
+ for (i = SIZE; i < SIZE_HALF; i++)
+ {
+ res_ref2[i] = DEFAULT_VALUE * 2;
+ res4[i] = DEFAULT_VALUE * 2;
+ }
+
res1.x = INTRINSIC (_cvtepi64_epi32) (src.x);
res2.x = INTRINSIC (_mask_cvtepi64_epi32) (res2.x, mask, src.x);
res3.x = INTRINSIC (_maskz_cvtepi64_epi32) (mask, src.x);
- INTRINSIC (_mask_cvtepi64_storeu_epi32) (res4, mask, src.x);
-
- CALC (res_ref, src.a);
+ CALC (res_ref, src.a, 0);
if (UNION_CHECK (AVX512F_LEN_HALF, i_d) (res1, res_ref))
abort ();
@@ -54,10 +60,14 @@ TEST (void)
if (UNION_CHECK (AVX512F_LEN_HALF, i_d) (res2, res_ref))
abort ();
- if (checkVi (res4, res_ref, SIZE_HALF))
- abort ();
-
MASK_ZERO (i_d) (res_ref, mask, SIZE);
if (UNION_CHECK (AVX512F_LEN_HALF, i_d) (res3, res_ref))
abort ();
+
+ INTRINSIC (_mask_cvtepi64_storeu_epi32) (res4, mask, src.x);
+ CALC (res_ref2, src.a, 1);
+
+ MASK_MERGE (i_d) (res_ref2, mask, SIZE);
+ if (checkVi (res4, res_ref2, SIZE_HALF))
+ abort ();
}
diff --git a/gcc/testsuite/gcc.target/i386/avx512f-vpmovqw-2.c b/gcc/testsuite/gcc.target/i386/avx512f-vpmovqw-2.c
index 27b816511da..c527a977948 100644
--- a/gcc/testsuite/gcc.target/i386/avx512f-vpmovqw-2.c
+++ b/gcc/testsuite/gcc.target/i386/avx512f-vpmovqw-2.c
@@ -9,11 +9,12 @@
#define SIZE (AVX512F_LEN / 64)
#include "avx512f-mask-type.h"
-static void
-CALC (short *r, long long *s)
+void
+CALC (short *r, long long *s, int mem)
{
int i;
- for (i = 0; i < 8; i++)
+ int len = mem ? SIZE : 8;
+ for (i = 0; i < len; i++)
{
r[i] = (i < SIZE) ? (short) s[i] : 0;
}
@@ -28,6 +29,7 @@ TEST (void)
UNION_TYPE (AVX512F_LEN, i_q) src;
MASK_TYPE mask = MASK_VALUE;
short res_ref[8];
+ short res_ref2[8];
sign = -1;
for (i = 0; i < SIZE; i++)
@@ -38,12 +40,17 @@ TEST (void)
res4[i] = DEFAULT_VALUE;
}
+ for (i = SIZE; i < 8; i++)
+ {
+ res_ref2[i] = DEFAULT_VALUE * 2;
+ res4[i] = DEFAULT_VALUE * 2;
+ }
+
res1.x = INTRINSIC (_cvtepi64_epi16) (src.x);
res2.x = INTRINSIC (_mask_cvtepi64_epi16) (res2.x, mask, src.x);
res3.x = INTRINSIC (_maskz_cvtepi64_epi16) (mask, src.x);
- INTRINSIC (_mask_cvtepi64_storeu_epi16) (res4, mask, src.x);
- CALC (res_ref, src.a);
+ CALC (res_ref, src.a, 0);
if (UNION_CHECK (128, i_w) (res1, res_ref))
abort ();
@@ -52,10 +59,14 @@ TEST (void)
if (UNION_CHECK (128, i_w) (res2, res_ref))
abort ();
- if (checkVs (res4, res_ref, 8))
- abort ();
-
MASK_ZERO (i_w) (res_ref, mask, SIZE);
if (UNION_CHECK (128, i_w) (res3, res_ref))
abort ();
+
+ INTRINSIC (_mask_cvtepi64_storeu_epi16) (res4, mask, src.x);
+ CALC (res_ref2, src.a, 1);
+
+ MASK_MERGE (i_w) (res_ref2, mask, SIZE);
+ if (checkVs (res4, res_ref2, 8))
+ abort ();
}
diff --git a/gcc/testsuite/gcc.target/i386/avx512f-vpmovsdb-2.c b/gcc/testsuite/gcc.target/i386/avx512f-vpmovsdb-2.c
index 116c8b69e71..b22b8746895 100644
--- a/gcc/testsuite/gcc.target/i386/avx512f-vpmovsdb-2.c
+++ b/gcc/testsuite/gcc.target/i386/avx512f-vpmovsdb-2.c
@@ -10,11 +10,12 @@
#include "avx512f-mask-type.h"
#include <limits.h>
-static void
-CALC (char *r, int *s)
+void
+CALC (char *r, int *s, int mem)
{
int i;
- for (i = 0; i < 16; i++)
+ int len = mem ? SIZE : 16;
+ for (i = 0; i < len; i++)
{
if (s[i] < CHAR_MIN)
r[i] = CHAR_MIN;
@@ -35,6 +36,7 @@ TEST (void)
UNION_TYPE (AVX512F_LEN, i_d) src;
MASK_TYPE mask = MASK_VALUE;
char res_ref[16];
+ char res_ref2[16];
sign = -1;
for (i = 0; i < SIZE; i++)
@@ -45,12 +47,17 @@ TEST (void)
res4[i] = DEFAULT_VALUE;
}
+ for (i = SIZE; i < 16; i++)
+ {
+ res4[i] = DEFAULT_VALUE * 2;
+ res_ref2[i] = DEFAULT_VALUE * 2;
+ }
+
res1.x = INTRINSIC (_cvtsepi32_epi8) (src.x);
res2.x = INTRINSIC (_mask_cvtsepi32_epi8) (res2.x, mask, src.x);
res3.x = INTRINSIC (_maskz_cvtsepi32_epi8) (mask, src.x);
- INTRINSIC (_mask_cvtsepi32_storeu_epi8) (res4, mask, src.x);
- CALC (res_ref, src.a);
+ CALC (res_ref, src.a, 0);
if (UNION_CHECK (128, i_b) (res1, res_ref))
abort ();
@@ -59,10 +66,14 @@ TEST (void)
if (UNION_CHECK (128, i_b) (res2, res_ref))
abort ();
- if (checkVc (res4, res_ref, 16))
- abort ();
-
MASK_ZERO (i_b) (res_ref, mask, SIZE);
if (UNION_CHECK (128, i_b) (res3, res_ref))
abort ();
+
+ INTRINSIC (_mask_cvtsepi32_storeu_epi8) (res4, mask, src.x);
+ CALC (res_ref2, src.a, 1);
+
+ MASK_MERGE (i_b) (res_ref2, mask, SIZE);
+ if (checkVc (res4, res_ref2, 16))
+ abort ();
}
diff --git a/gcc/testsuite/gcc.target/i386/avx512f-vpmovsdw-2.c b/gcc/testsuite/gcc.target/i386/avx512f-vpmovsdw-2.c
index e175cce1c48..217ba6eaa46 100644
--- a/gcc/testsuite/gcc.target/i386/avx512f-vpmovsdw-2.c
+++ b/gcc/testsuite/gcc.target/i386/avx512f-vpmovsdw-2.c
@@ -11,11 +11,12 @@
#define SIZE_HALF (AVX512F_LEN_HALF / 16)
#include <limits.h>
-static void
-CALC (short *r, int *s)
+void
+CALC (short *r, int *s, int mem)
{
int i;
- for (i = 0; i < SIZE_HALF; i++)
+ int len = mem ? SIZE : SIZE_HALF;
+ for (i = 0; i < len; i++)
{
if (s[i] < SHRT_MIN)
r[i] = SHRT_MIN;
@@ -36,6 +37,7 @@ TEST (void)
UNION_TYPE (AVX512F_LEN, i_d) src;
MASK_TYPE mask = MASK_VALUE;
short res_ref[SIZE_HALF];
+ short res_ref2[SIZE_HALF];
sign = -1;
for (i = 0; i < SIZE; i++)
@@ -46,12 +48,17 @@ TEST (void)
res4[i] = DEFAULT_VALUE;
}
+ for (i = SIZE; i < SIZE_HALF; i++)
+ {
+ res_ref2[i] = DEFAULT_VALUE * 2;
+ res4[i] = DEFAULT_VALUE * 2;
+ }
+
res1.x = INTRINSIC (_cvtsepi32_epi16) (src.x);
res2.x = INTRINSIC (_mask_cvtsepi32_epi16) (res2.x, mask, src.x);
res3.x = INTRINSIC (_maskz_cvtsepi32_epi16) (mask, src.x);
- INTRINSIC (_mask_cvtsepi32_storeu_epi16) (res4, mask, src.x);
- CALC (res_ref, src.a);
+ CALC (res_ref, src.a, 0);
if (UNION_CHECK (AVX512F_LEN_HALF, i_w) (res1, res_ref))
abort ();
@@ -60,10 +67,14 @@ TEST (void)
if (UNION_CHECK (AVX512F_LEN_HALF, i_w) (res2, res_ref))
abort ();
- if (checkVs (res4, res_ref, SIZE_HALF))
- abort ();
-
MASK_ZERO (i_w) (res_ref, mask, SIZE);
if (UNION_CHECK (AVX512F_LEN_HALF, i_w) (res3, res_ref))
abort ();
+
+ INTRINSIC (_mask_cvtsepi32_storeu_epi16) (res4, mask, src.x);
+ CALC (res_ref2, src.a, 1);
+
+ MASK_MERGE (i_w) (res_ref2, mask, SIZE);
+ if (checkVs (res4, res_ref2, SIZE_HALF))
+ abort ();
}
diff --git a/gcc/testsuite/gcc.target/i386/avx512f-vpmovsqb-2.c b/gcc/testsuite/gcc.target/i386/avx512f-vpmovsqb-2.c
index babe6e22b99..09b509616b2 100644
--- a/gcc/testsuite/gcc.target/i386/avx512f-vpmovsqb-2.c
+++ b/gcc/testsuite/gcc.target/i386/avx512f-vpmovsqb-2.c
@@ -14,7 +14,7 @@ void static
CALC (char *r, long long *s, int mem)
{
int i;
- int len = mem ? 8 : 16;
+ int len = mem ? SIZE : 16;
for (i = 0; i < len; i++)
{
if (s[i] < CHAR_MIN)
diff --git a/gcc/testsuite/gcc.target/i386/avx512f-vpmovsqd-2.c b/gcc/testsuite/gcc.target/i386/avx512f-vpmovsqd-2.c
index 342f9cb76b5..96d6ff7001c 100644
--- a/gcc/testsuite/gcc.target/i386/avx512f-vpmovsqd-2.c
+++ b/gcc/testsuite/gcc.target/i386/avx512f-vpmovsqd-2.c
@@ -11,11 +11,12 @@
#define SIZE_HALF (AVX512F_LEN_HALF / 32)
#include <limits.h>
-static void
-CALC (int *r, long long *s)
+void
+CALC (int *r, long long *s, int mem)
{
int i;
- for (i = 0; i < SIZE_HALF; i++)
+ int len = mem ? SIZE : SIZE_HALF;
+ for (i = 0; i < len; i++)
{
if (s[i] < INT_MIN)
r[i] = INT_MIN;
@@ -36,6 +37,7 @@ TEST (void)
UNION_TYPE (AVX512F_LEN, i_q) src;
MASK_TYPE mask = MASK_VALUE;
int res_ref[SIZE_HALF];
+ int res_ref2[SIZE_HALF];
sign = -1;
for (i = 0; i < SIZE; i++)
@@ -46,12 +48,17 @@ TEST (void)
res4[i] = DEFAULT_VALUE;
}
+ for (i = SIZE; i < SIZE_HALF; i++)
+ {
+ res4[i] = DEFAULT_VALUE * 2;
+ res_ref2[i] = DEFAULT_VALUE * 2;
+ }
+
res1.x = INTRINSIC (_cvtsepi64_epi32) (src.x);
res2.x = INTRINSIC (_mask_cvtsepi64_epi32) (res2.x, mask, src.x);
res3.x = INTRINSIC (_maskz_cvtsepi64_epi32) (mask, src.x);
- INTRINSIC (_mask_cvtsepi64_storeu_epi32) (res4, mask, src.x);
- CALC (res_ref, src.a);
+ CALC (res_ref, src.a, 0);
if (UNION_CHECK (AVX512F_LEN_HALF, i_d) (res1, res_ref))
abort ();
@@ -60,10 +67,14 @@ TEST (void)
if (UNION_CHECK (AVX512F_LEN_HALF, i_d) (res2, res_ref))
abort ();
- if (checkVi (res4, res_ref, SIZE_HALF))
- abort ();
-
MASK_ZERO (i_d) (res_ref, mask, SIZE);
if (UNION_CHECK (AVX512F_LEN_HALF, i_d) (res3, res_ref))
abort ();
+
+ INTRINSIC (_mask_cvtsepi64_storeu_epi32) (res4, mask, src.x);
+ CALC (res_ref2, src.a, 1);
+
+ MASK_MERGE (i_d) (res_ref2, mask, SIZE);
+ if (checkVi (res4, res_ref2, SIZE_HALF))
+ abort ();
}
diff --git a/gcc/testsuite/gcc.target/i386/avx512f-vpmovsqw-2.c b/gcc/testsuite/gcc.target/i386/avx512f-vpmovsqw-2.c
index bc1619a8308..a7b6b367439 100644
--- a/gcc/testsuite/gcc.target/i386/avx512f-vpmovsqw-2.c
+++ b/gcc/testsuite/gcc.target/i386/avx512f-vpmovsqw-2.c
@@ -10,11 +10,12 @@
#include "avx512f-mask-type.h"
#include <limits.h>
-void static
-CALC (short *r, long long *s)
+void
+CALC (short *r, long long *s, int mem)
{
int i;
- for (i = 0; i < 8; i++)
+ int len = mem ? SIZE : 8;
+ for (i = 0; i < len; i++)
{
if (s[i] < SHRT_MIN)
r[i] = SHRT_MIN;
@@ -35,6 +36,7 @@ TEST (void)
UNION_TYPE (AVX512F_LEN, i_q) src;
MASK_TYPE mask = MASK_VALUE;
short res_ref[8];
+ short res_ref2[8];
sign = -1;
for (i = 0; i < SIZE; i++)
@@ -45,12 +47,17 @@ TEST (void)
res4[i] = DEFAULT_VALUE;
}
+ for (i = SIZE; i < 8; i++)
+ {
+ res_ref2[i] = DEFAULT_VALUE * 2;
+ res4[i] = DEFAULT_VALUE * 2;
+ }
+
res1.x = INTRINSIC (_cvtsepi64_epi16) (src.x);
res2.x = INTRINSIC (_mask_cvtsepi64_epi16) (res2.x, mask, src.x);
res3.x = INTRINSIC (_maskz_cvtsepi64_epi16) (mask, src.x);
- INTRINSIC (_mask_cvtsepi64_storeu_epi16) (res4, mask, src.x);
- CALC (res_ref, src.a);
+ CALC (res_ref, src.a, 0);
if (UNION_CHECK (128, i_w) (res1, res_ref))
abort ();
@@ -59,10 +66,14 @@ TEST (void)
if (UNION_CHECK (128, i_w) (res2, res_ref))
abort ();
- if (checkVs (res4, res_ref, 8))
- abort ();
-
MASK_ZERO (i_w) (res_ref, mask, SIZE);
if (UNION_CHECK (128, i_w) (res3, res_ref))
abort ();
+
+ INTRINSIC (_mask_cvtsepi64_storeu_epi16) (res4, mask, src.x);
+ CALC (res_ref2, src.a, 1);
+
+ MASK_MERGE (i_w) (res_ref2, mask, SIZE);
+ if (checkVs (res4, res_ref2, 8))
+ abort ();
}
diff --git a/gcc/testsuite/gcc.target/i386/avx512f-vpmovusdb-2.c b/gcc/testsuite/gcc.target/i386/avx512f-vpmovusdb-2.c
index 8907a72f3ea..91eb4663872 100644
--- a/gcc/testsuite/gcc.target/i386/avx512f-vpmovusdb-2.c
+++ b/gcc/testsuite/gcc.target/i386/avx512f-vpmovusdb-2.c
@@ -10,11 +10,12 @@
#include "avx512f-mask-type.h"
#include <limits.h>
-static void
-CALC (unsigned char *r, unsigned int *s)
+void
+CALC (unsigned char *r, unsigned int *s, int mem)
{
int i;
- for (i = 0; i < 16; i++)
+ int len = mem ? SIZE : 16;
+ for (i = 0; i < len; i++)
{
r[i] = (s[i] > UCHAR_MAX) ? UCHAR_MAX : s[i];
r[i] = (i < SIZE) ? r[i] : 0;
@@ -30,6 +31,7 @@ TEST (void)
UNION_TYPE (AVX512F_LEN, i_ud) src;
MASK_TYPE mask = MASK_VALUE;
unsigned char res_ref[16];
+ unsigned char res_ref2[16];
for (i = 0; i < SIZE; i++)
{
@@ -38,12 +40,16 @@ TEST (void)
res4[i] = DEFAULT_VALUE;
}
+ for (i = SIZE; i < 16; i++)
+ {
+ res4[i] = DEFAULT_VALUE * 2;
+ res_ref2[i] = DEFAULT_VALUE * 2;
+ }
+
res1.x = INTRINSIC (_cvtusepi32_epi8) (src.x);
res2.x = INTRINSIC (_mask_cvtusepi32_epi8) (res2.x, mask, src.x);
res3.x = INTRINSIC (_maskz_cvtusepi32_epi8) (mask, src.x);
- INTRINSIC (_mask_cvtusepi32_storeu_epi8) (res4, mask, src.x);
-
- CALC (res_ref, src.a);
+ CALC (res_ref, src.a, 0);
if (UNION_CHECK (128, i_ub) (res1, res_ref))
abort ();
@@ -52,10 +58,14 @@ TEST (void)
if (UNION_CHECK (128, i_ub) (res2, res_ref))
abort ();
- if (checkVuc (res4, res_ref, 16))
- abort ();
-
MASK_ZERO (i_ub) (res_ref, mask, SIZE);
if (UNION_CHECK (128, i_ub) (res3, res_ref))
abort ();
+
+ INTRINSIC (_mask_cvtusepi32_storeu_epi8) (res4, mask, src.x);
+ CALC (res_ref2, src.a, 1);
+
+ MASK_MERGE (i_b) (res_ref2, mask, SIZE);
+ if (checkVc (res4, res_ref2, 16))
+ abort ();
}
diff --git a/gcc/testsuite/gcc.target/i386/avx512f-vpmovusdw-2.c b/gcc/testsuite/gcc.target/i386/avx512f-vpmovusdw-2.c
index c2c2624b498..64f6b0ac3f4 100644
--- a/gcc/testsuite/gcc.target/i386/avx512f-vpmovusdw-2.c
+++ b/gcc/testsuite/gcc.target/i386/avx512f-vpmovusdw-2.c
@@ -11,11 +11,12 @@
#define SIZE_HALF (AVX512F_LEN_HALF / 16)
#include <limits.h>
-static void
-CALC (unsigned short *r, unsigned int *s)
+void
+CALC (unsigned short *r, unsigned int *s, int mem)
{
int i;
- for (i = 0; i < SIZE_HALF; i++)
+ int len = mem ? SIZE : SIZE_HALF;
+ for (i = 0; i < len; i++)
{
r[i] = (s[i] > USHRT_MAX) ? USHRT_MAX : s[i];
r[i] = (i < SIZE) ? r[i] : 0;
@@ -31,6 +32,7 @@ TEST (void)
UNION_TYPE (AVX512F_LEN, i_ud) src;
MASK_TYPE mask = MASK_VALUE;
unsigned short res_ref[SIZE_HALF];
+ unsigned short res_ref2[SIZE_HALF];
for (i = 0; i < SIZE; i++)
{
@@ -39,12 +41,17 @@ TEST (void)
res4[i] = DEFAULT_VALUE;
}
+ for (i = SIZE; i < SIZE_HALF; i++)
+ {
+ res_ref2[i] = DEFAULT_VALUE * 2;
+ res4[i] = DEFAULT_VALUE * 2;
+ }
+
res1.x = INTRINSIC (_cvtusepi32_epi16) (src.x);
res2.x = INTRINSIC (_mask_cvtusepi32_epi16) (res2.x, mask, src.x);
res3.x = INTRINSIC (_maskz_cvtusepi32_epi16) (mask, src.x);
- INTRINSIC (_mask_cvtusepi32_storeu_epi16) (res4, mask, src.x);
- CALC (res_ref, src.a);
+ CALC (res_ref, src.a, 0);
if (UNION_CHECK (AVX512F_LEN_HALF, i_uw) (res1, res_ref))
abort ();
@@ -53,10 +60,14 @@ TEST (void)
if (UNION_CHECK (AVX512F_LEN_HALF, i_uw) (res2, res_ref))
abort ();
- if (checkVus (res4, res_ref, SIZE_HALF))
- abort ();
-
MASK_ZERO (i_uw) (res_ref, mask, SIZE);
if (UNION_CHECK (AVX512F_LEN_HALF, i_uw) (res3, res_ref))
abort ();
+
+ INTRINSIC (_mask_cvtusepi32_storeu_epi16) (res4, mask, src.x);
+ CALC (res_ref2, src.a, 1);
+
+ MASK_MERGE (i_w) (res_ref2, mask, SIZE);
+ if (checkVs (res4, res_ref2, SIZE_HALF))
+ abort ();
}
diff --git a/gcc/testsuite/gcc.target/i386/avx512f-vpmovusqb-2.c b/gcc/testsuite/gcc.target/i386/avx512f-vpmovusqb-2.c
index 8c20544524b..74ba3d4e26a 100644
--- a/gcc/testsuite/gcc.target/i386/avx512f-vpmovusqb-2.c
+++ b/gcc/testsuite/gcc.target/i386/avx512f-vpmovusqb-2.c
@@ -14,7 +14,7 @@ static void
CALC (unsigned char *r, unsigned long long *s, int mem)
{
int i;
- int len = mem ? 8 : 16;
+ int len = mem ? SIZE : 16;
for (i = 0; i < len; i++)
{
r[i] = (s[i] > UCHAR_MAX) ? UCHAR_MAX : s[i];
diff --git a/gcc/testsuite/gcc.target/i386/avx512f-vpmovusqd-2.c b/gcc/testsuite/gcc.target/i386/avx512f-vpmovusqd-2.c
index f500ea3c285..f66e858c636 100644
--- a/gcc/testsuite/gcc.target/i386/avx512f-vpmovusqd-2.c
+++ b/gcc/testsuite/gcc.target/i386/avx512f-vpmovusqd-2.c
@@ -11,11 +11,12 @@
#define SIZE_HALF (AVX512F_LEN_HALF / 32)
#include <limits.h>
-static void
-CALC (unsigned int *r, unsigned long long *s)
+void
+CALC (unsigned int *r, unsigned long long *s, int mem)
{
int i;
- for (i = 0; i < SIZE_HALF; i++)
+ int len = mem ? SIZE : SIZE_HALF;
+ for (i = 0; i < len; i++)
{
r[i] = (s[i] > UINT_MAX) ? UINT_MAX : s[i];
r[i] = (i < SIZE) ? r[i] : 0;
@@ -31,6 +32,7 @@ TEST (void)
UNION_TYPE (AVX512F_LEN, i_uq) src;
MASK_TYPE mask = MASK_VALUE;
unsigned int res_ref[SIZE_HALF];
+ unsigned int res_ref2[SIZE_HALF];
for (i = 0; i < SIZE; i++)
{
@@ -39,12 +41,17 @@ TEST (void)
res4[i] = DEFAULT_VALUE;
}
+ for (i = SIZE; i < SIZE_HALF; i++)
+ {
+ res_ref2[i] = DEFAULT_VALUE * 2;
+ res4[i] = DEFAULT_VALUE * 2;
+ }
+
res1.x = INTRINSIC (_cvtusepi64_epi32) (src.x);
res2.x = INTRINSIC (_mask_cvtusepi64_epi32) (res2.x, mask, src.x);
res3.x = INTRINSIC (_maskz_cvtusepi64_epi32) (mask, src.x);
- INTRINSIC (_mask_cvtusepi64_storeu_epi32) (res4, mask, src.x);
- CALC (res_ref, src.a);
+ CALC (res_ref, src.a, 0);
if (UNION_CHECK (AVX512F_LEN_HALF, i_ud) (res1, res_ref))
abort ();
@@ -53,10 +60,14 @@ TEST (void)
if (UNION_CHECK (AVX512F_LEN_HALF, i_ud) (res2, res_ref))
abort ();
- if (checkVui (res4, res_ref, SIZE_HALF))
- abort ();
-
MASK_ZERO (i_ud) (res_ref, mask, SIZE);
if (UNION_CHECK (AVX512F_LEN_HALF, i_ud) (res3, res_ref))
abort ();
+
+ CALC (res_ref2, src.a, 1);
+ INTRINSIC (_mask_cvtusepi64_storeu_epi32) (res4, mask, src.x);
+
+ MASK_MERGE (i_d) (res_ref2, mask, SIZE);
+ if (checkVi (res4, res_ref2, SIZE_HALF))
+ abort ();
}
diff --git a/gcc/testsuite/gcc.target/i386/avx512f-vpmovusqw-2.c b/gcc/testsuite/gcc.target/i386/avx512f-vpmovusqw-2.c
index c1ce9ed65bc..7aeda113ac6 100644
--- a/gcc/testsuite/gcc.target/i386/avx512f-vpmovusqw-2.c
+++ b/gcc/testsuite/gcc.target/i386/avx512f-vpmovusqw-2.c
@@ -10,11 +10,12 @@
#include "avx512f-mask-type.h"
#include <limits.h>
-static void
-CALC (unsigned short *r, unsigned long long *s)
+void
+CALC (unsigned short *r, unsigned long long *s, int mem)
{
int i;
- for (i = 0; i < 8; i++)
+ int len = mem ? SIZE : 8;
+ for (i = 0; i < len; i++)
{
r[i] = (s[i] > USHRT_MAX) ? USHRT_MAX : s[i];
r[i] = (i < SIZE) ? r[i] : 0;
@@ -30,6 +31,7 @@ TEST (void)
UNION_TYPE (AVX512F_LEN, i_uq) src;
MASK_TYPE mask = MASK_VALUE;
unsigned short res_ref[8];
+ unsigned short res_ref2[8];
for (i = 0; i < SIZE; i++)
{
@@ -38,12 +40,17 @@ TEST (void)
res4[i] = DEFAULT_VALUE;
}
+ for (i = SIZE; i < 8; i++)
+ {
+ res_ref2[i] = DEFAULT_VALUE * 2;
+ res4[i] = DEFAULT_VALUE * 2;
+ }
+
res1.x = INTRINSIC (_cvtusepi64_epi16) (src.x);
res2.x = INTRINSIC (_mask_cvtusepi64_epi16) (res2.x, mask, src.x);
res3.x = INTRINSIC (_maskz_cvtusepi64_epi16) (mask, src.x);
- INTRINSIC (_mask_cvtusepi64_storeu_epi16) (res4, mask, src.x);
- CALC (res_ref, src.a);
+ CALC (res_ref, src.a, 0);
if (UNION_CHECK (128, i_uw) (res1, res_ref))
abort ();
@@ -52,10 +59,14 @@ TEST (void)
if (UNION_CHECK (128, i_uw) (res2, res_ref))
abort ();
- if (checkVus (res4, res_ref, 8))
- abort ();
-
MASK_ZERO (i_uw) (res_ref, mask, SIZE);
if (UNION_CHECK (128, i_uw) (res3, res_ref))
abort ();
+
+ INTRINSIC (_mask_cvtusepi64_storeu_epi16) (res4, mask, src.x);
+ CALC (res_ref2, src.a, 1);
+
+ MASK_MERGE (i_w) (res_ref2, mask, SIZE);
+ if (checkVs (res4, res_ref2, 8))
+ abort ();
}
diff --git a/gcc/testsuite/gcc.target/i386/avx512f-vpord-2.c b/gcc/testsuite/gcc.target/i386/avx512f-vpord-2.c
index 5656a81e3fb..86dbfa7fef2 100644
--- a/gcc/testsuite/gcc.target/i386/avx512f-vpord-2.c
+++ b/gcc/testsuite/gcc.target/i386/avx512f-vpord-2.c
@@ -34,18 +34,22 @@ TEST (void)
res3.a[i] = DEFAULT_VALUE;
}
+#if AVX512F_LEN == 512
res1.x = INTRINSIC (_or_si512) (s1.x, s2.x);
res2.x = INTRINSIC (_or_epi32) (s1.x, s2.x);
+#endif
res3.x = INTRINSIC (_mask_or_epi32) (res3.x, mask, s1.x, s2.x);
res4.x = INTRINSIC (_maskz_or_epi32) (mask, s1.x, s2.x);
CALC (s1.a, s2.a, res_ref);
+#if AVX512F_LEN == 512
if (UNION_CHECK (AVX512F_LEN, i_d) (res1, res_ref))
abort ();
if (UNION_CHECK (AVX512F_LEN, i_d) (res2, res_ref))
abort ();
+#endif
MASK_MERGE (i_d) (res_ref, mask, SIZE);
if (UNION_CHECK (AVX512F_LEN, i_d) (res3, res_ref))
diff --git a/gcc/testsuite/gcc.target/i386/avx512f-vporq-2.c b/gcc/testsuite/gcc.target/i386/avx512f-vporq-2.c
index 9af7172e31c..86dc1abd18b 100644
--- a/gcc/testsuite/gcc.target/i386/avx512f-vporq-2.c
+++ b/gcc/testsuite/gcc.target/i386/avx512f-vporq-2.c
@@ -33,15 +33,18 @@ TEST (void)
sign = -sign;
res2.a[i] = DEFAULT_VALUE;
}
-
+#if AVX512F_LEN == 512
res1.x = INTRINSIC (_or_epi64) (s1.x, s2.x);
+#endif
res2.x = INTRINSIC (_mask_or_epi64) (res2.x, mask, s1.x, s2.x);
res3.x = INTRINSIC (_maskz_or_epi64) (mask, s1.x, s2.x);
CALC (s1.a, s2.a, res_ref);
+#if AVX512F_LEN == 512
if (UNION_CHECK (AVX512F_LEN, i_q) (res1, res_ref))
abort ();
+#endif
MASK_MERGE (i_q) (res_ref, mask, SIZE);
if (UNION_CHECK (AVX512F_LEN, i_q) (res2, res_ref))
diff --git a/gcc/testsuite/gcc.target/i386/avx512f-vpxord-2.c b/gcc/testsuite/gcc.target/i386/avx512f-vpxord-2.c
index fa431aed140..44ca56c6328 100644
--- a/gcc/testsuite/gcc.target/i386/avx512f-vpxord-2.c
+++ b/gcc/testsuite/gcc.target/i386/avx512f-vpxord-2.c
@@ -34,18 +34,22 @@ TEST (void)
res3.a[i] = DEFAULT_VALUE;
}
+#if AVX512F_LEN == 512
res1.x = INTRINSIC (_xor_si512) (s1.x, s2.x);
res2.x = INTRINSIC (_xor_epi32) (s1.x, s2.x);
+#endif
res3.x = INTRINSIC (_mask_xor_epi32) (res3.x, mask, s1.x, s2.x);
res4.x = INTRINSIC (_maskz_xor_epi32) (mask, s1.x, s2.x);
CALC (s1.a, s2.a, res_ref);
+#if AVX512F_LEN == 512
if (UNION_CHECK (AVX512F_LEN, i_d) (res1, res_ref))
abort ();
if (UNION_CHECK (AVX512F_LEN, i_d) (res2, res_ref))
abort ();
+#endif
MASK_MERGE (i_d) (res_ref, mask, SIZE);
if (UNION_CHECK (AVX512F_LEN, i_d) (res3, res_ref))
diff --git a/gcc/testsuite/gcc.target/i386/avx512f-vpxorq-2.c b/gcc/testsuite/gcc.target/i386/avx512f-vpxorq-2.c
index ef605476bbc..c35d019089e 100644
--- a/gcc/testsuite/gcc.target/i386/avx512f-vpxorq-2.c
+++ b/gcc/testsuite/gcc.target/i386/avx512f-vpxorq-2.c
@@ -34,14 +34,18 @@ TEST (void)
res2.a[i] = DEFAULT_VALUE;
}
+#if AVX512F_LEN == 512
res1.x = INTRINSIC (_xor_epi64) (s1.x, s2.x);
+#endif
res2.x = INTRINSIC (_mask_xor_epi64) (res2.x, mask, s1.x, s2.x);
res3.x = INTRINSIC (_maskz_xor_epi64) (mask, s1.x, s2.x);
CALC (s1.a, s2.a, res_ref);
+#if AVX512F_LEN == 512
if (UNION_CHECK (AVX512F_LEN, i_q) (res1, res_ref))
- abort ();
+ abort ();
+#endif
MASK_MERGE (i_q) (res_ref, mask, SIZE);
if (UNION_CHECK (AVX512F_LEN, i_q) (res2, res_ref))
diff --git a/gcc/testsuite/gcc.target/i386/avx512f-vrndscaleps-2.c b/gcc/testsuite/gcc.target/i386/avx512f-vrndscaleps-2.c
index ca8edad8f40..25f385c31df 100644
--- a/gcc/testsuite/gcc.target/i386/avx512f-vrndscaleps-2.c
+++ b/gcc/testsuite/gcc.target/i386/avx512f-vrndscaleps-2.c
@@ -38,7 +38,7 @@ TEST (void)
UNION_TYPE (AVX512F_LEN,) res1, res2, res3, s;
float res_ref[SIZE];
- MASK_TYPE mask = 6 ^ (0xffff >> SIZE);
+ MASK_TYPE mask = MASK_VALUE;
imm = _MM_FROUND_FLOOR | (7 << 4);
diff --git a/gcc/testsuite/gcc.target/i386/avx512f-vshuff32x4-2.c b/gcc/testsuite/gcc.target/i386/avx512f-vshuff32x4-2.c
index fa976260af4..d9b44b369e2 100644
--- a/gcc/testsuite/gcc.target/i386/avx512f-vshuff32x4-2.c
+++ b/gcc/testsuite/gcc.target/i386/avx512f-vshuff32x4-2.c
@@ -38,7 +38,11 @@ TEST (void)
MASK_TYPE mask = MASK_VALUE;
float e[SIZE];
int i;
+#if AVX512F_LEN == 512
int imm = 203;
+#else
+ int imm = 1;
+#endif
for (i = 0; i < SIZE; i++)
{
diff --git a/gcc/testsuite/gcc.target/i386/avx512f-vshuff64x2-2.c b/gcc/testsuite/gcc.target/i386/avx512f-vshuff64x2-2.c
index 9f5e093b39b..a4427fd4865 100644
--- a/gcc/testsuite/gcc.target/i386/avx512f-vshuff64x2-2.c
+++ b/gcc/testsuite/gcc.target/i386/avx512f-vshuff64x2-2.c
@@ -38,7 +38,11 @@ TEST (void)
MASK_TYPE mask = MASK_VALUE;
double e[SIZE];
int i;
+#if AVX512F_LEN == 512
int imm = 203;
+#else
+ int imm = 1;
+#endif
for (i = 0; i < SIZE; i++)
{
diff --git a/gcc/testsuite/gcc.target/i386/avx512f-vshufi32x4-2.c b/gcc/testsuite/gcc.target/i386/avx512f-vshufi32x4-2.c
index b51eec7d97f..00641707b1c 100644
--- a/gcc/testsuite/gcc.target/i386/avx512f-vshufi32x4-2.c
+++ b/gcc/testsuite/gcc.target/i386/avx512f-vshufi32x4-2.c
@@ -38,7 +38,11 @@ TEST (void)
MASK_TYPE mask = MASK_VALUE;
int e[SIZE];
int i;
+#if AVX512F_LEN == 512
int imm = 203;
+#else
+ int imm = 1;
+#endif
for (i = 0; i < SIZE; i++)
{
diff --git a/gcc/testsuite/gcc.target/i386/avx512f-vshufi64x2-2.c b/gcc/testsuite/gcc.target/i386/avx512f-vshufi64x2-2.c
index 5428eaeeb60..f35a0418f5b 100644
--- a/gcc/testsuite/gcc.target/i386/avx512f-vshufi64x2-2.c
+++ b/gcc/testsuite/gcc.target/i386/avx512f-vshufi64x2-2.c
@@ -38,7 +38,11 @@ TEST (void)
MASK_TYPE mask = MASK_VALUE;
long long e[SIZE];
int i;
+#if AVX512F_LEN == 512
int imm = 203;
+#else
+ int imm = 1;
+#endif
for (i = 0; i < SIZE; i++)
{
diff --git a/gcc/testsuite/gcc.target/i386/avx512vl-check.h b/gcc/testsuite/gcc.target/i386/avx512vl-check.h
new file mode 100644
index 00000000000..c017ee28e47
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/avx512vl-check.h
@@ -0,0 +1,49 @@
+#include <stdlib.h>
+#include "cpuid.h"
+#include "m512-check.h"
+#include "avx512f-os-support.h"
+
+static void avx512vl_test (void);
+
+static void __attribute__ ((noinline)) do_test (void)
+{
+ avx512vl_test ();
+}
+
+int
+main ()
+{
+ unsigned int eax, ebx, ecx, edx;
+
+ if (!__get_cpuid (1, &eax, &ebx, &ecx, &edx))
+ return 0;
+
+ /* Run avx512vl test only if host has avx512vl support. */
+ if ((ecx & bit_OSXSAVE) == (bit_OSXSAVE))
+ {
+ if (__get_cpuid_max (0, NULL) < 7)
+ return 0;
+
+ __cpuid_count (7, 0, eax, ebx, ecx, edx);
+
+ if ((avx512f_os_support ())
+ && ((ebx & bit_AVX512F) == bit_AVX512F)
+ && ((ebx & bit_AVX512VL) == bit_AVX512VL))
+ {
+ do_test ();
+#ifdef DEBUG
+ printf ("PASSED\n");
+#endif
+ return 0;
+ }
+#ifdef DEBUG
+ printf ("SKIPPED\n");
+#endif
+ }
+#ifdef DEBUG
+ else
+ printf ("SKIPPED\n");
+#endif
+
+ return 0;
+}
diff --git a/gcc/testsuite/gcc.target/i386/avx512vl-gather-1.c b/gcc/testsuite/gcc.target/i386/avx512vl-gather-1.c
new file mode 100644
index 00000000000..cb700575912
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/avx512vl-gather-1.c
@@ -0,0 +1,217 @@
+/* { dg-do run } */
+/* { dg-require-effective-target avx512vl } */
+/* { dg-options "-O3 -mavx512vl" } */
+
+#include "avx512vl-check.h"
+
+#define N 12
+float vf1[N+4], vf2[N];
+double vd1[N+4], vd2[N];
+int vi1[N+4], vi2[N], k[N];
+long long vl1[N+4], vl2[N];
+long l[N];
+
+__attribute__((noinline, noclone)) void
+f1 (void)
+{
+ int i;
+ for (i = 0; i < N; i++)
+ vf2[i] = vf1[k[i]];
+}
+
+__attribute__((noinline, noclone)) void
+f2 (void)
+{
+ int i;
+ for (i = 0; i < N; i++)
+ vi2[i] = vi1[k[i]];
+}
+
+__attribute__((noinline, noclone)) void
+f3 (int x)
+{
+ int i;
+ for (i = 0; i < N; i++)
+ vf2[i] = vf1[k[i] + x];
+}
+
+__attribute__((noinline, noclone)) void
+f4 (int x)
+{
+ int i;
+ for (i = 0; i < N; i++)
+ vi2[i] = vi1[k[i] + x];
+}
+
+__attribute__((noinline, noclone)) void
+f5 (void)
+{
+ int i;
+ for (i = 0; i < N; i++)
+ vd2[i] = vd1[k[i]];
+}
+
+__attribute__((noinline, noclone)) void
+f6 (void)
+{
+ int i;
+ for (i = 0; i < N; i++)
+ vl2[i] = vl1[k[i]];
+}
+
+__attribute__((noinline, noclone)) void
+f7 (int x)
+{
+ int i;
+ for (i = 0; i < N; i++)
+ vd2[i] = vd1[k[i] + x];
+}
+
+__attribute__((noinline, noclone)) void
+f8 (int x)
+{
+ int i;
+ for (i = 0; i < N; i++)
+ vl2[i] = vl1[k[i] + x];
+}
+
+__attribute__((noinline, noclone)) void
+f9 (void)
+{
+ int i;
+ for (i = 0; i < N; i++)
+ vf2[i] = vf1[l[i]];
+}
+
+__attribute__((noinline, noclone)) void
+f10 (void)
+{
+ int i;
+ for (i = 0; i < N; i++)
+ vi2[i] = vi1[l[i]];
+}
+
+__attribute__((noinline, noclone)) void
+f11 (int x)
+{
+ int i;
+ for (i = 0; i < N; i++)
+ vf2[i] = vf1[l[i] + x];
+}
+
+__attribute__((noinline, noclone)) void
+f12 (int x)
+{
+ int i;
+ for (i = 0; i < N; i++)
+ vi2[i] = vi1[l[i] + x];
+}
+
+__attribute__((noinline, noclone)) void
+f13 (void)
+{
+ int i;
+ for (i = 0; i < N; i++)
+ vd2[i] = vd1[l[i]];
+}
+
+__attribute__((noinline, noclone)) void
+f14 (void)
+{
+ int i;
+ for (i = 0; i < N; i++)
+ vl2[i] = vl1[l[i]];
+}
+
+__attribute__((noinline, noclone)) void
+f15 (int x)
+{
+ int i;
+ for (i = 0; i < N; i++)
+ vd2[i] = vd1[l[i] + x];
+}
+
+__attribute__((noinline, noclone)) void
+f16 (int x)
+{
+ int i;
+ for (i = 0; i < N; i++)
+ vl2[i] = vl1[l[i] + x];
+}
+
+static void
+avx512vl_test (void)
+{
+ int i;
+
+ for (i = 0; i < N + 4; i++)
+ {
+ asm ("");
+ vf1[i] = 17.0f + i;
+ vd1[i] = 19.0 + i;
+ vi1[i] = 21 + i;
+ vl1[i] = 23L + i;
+ }
+ for (i = 0; i < N; i++)
+ {
+ asm ("");
+ k[i] = (i * 731) & (N - 1);
+ l[i] = (i * 657) & (N - 1);
+ }
+
+ f1 ();
+ f2 ();
+ for (i = 0; i < N; i++)
+ if (vf2[i] != ((i * 731) & (N - 1)) + 17
+ || vi2[i] != ((i * 731) & (N - 1)) + 21)
+ abort ();
+
+ f3 (1);
+ f4 (2);
+ for (i = 0; i < N; i++)
+ if (vf2[i] != ((i * 731) & (N - 1)) + 17 + 1
+ || vi2[i] != ((i * 731) & (N - 1)) + 21 + 2)
+ abort ();
+
+ f5 ();
+ f6 ();
+ for (i = 0; i < N; i++)
+ if (vd2[i] != ((i * 731) & (N - 1)) + 19
+ || vl2[i] != ((i * 731) & (N - 1)) + 23)
+ abort ();
+
+ f7 (3);
+ f8 (2);
+ for (i = 0; i < N; i++)
+ if (vd2[i] != ((i * 731) & (N - 1)) + 19 + 3
+ || vl2[i] != ((i * 731) & (N - 1)) + 23 + 2)
+ abort ();
+
+ f9 ();
+ f10 ();
+ for (i = 0; i < N; i++)
+ if (vf2[i] != ((i * 657) & (N - 1)) + 17
+ || vi2[i] != ((i * 657) & (N - 1)) + 21)
+ abort ();
+
+ f11 (4);
+ f12 (1);
+ for (i = 0; i < N; i++)
+ if (vf2[i] != ((i * 657) & (N - 1)) + 17 + 4
+ || vi2[i] != ((i * 657) & (N - 1)) + 21 + 1)
+ abort ();
+
+ f13 ();
+ f14 ();
+ for (i = 0; i < N; i++)
+ if (vd2[i] != ((i * 657) & (N - 1)) + 19
+ || vl2[i] != ((i * 657) & (N - 1)) + 23)
+ abort ();
+
+ f15 (2);
+ f16 (4);
+ for (i = 0; i < N; i++)
+ if (vd2[i] != ((i * 657) & (N - 1)) + 19 + 2
+ || vl2[i] != ((i * 657) & (N - 1)) + 23 + 4)
+ abort ();
+}
diff --git a/gcc/testsuite/gcc.target/i386/avx512vl-i32gatherd-1.c b/gcc/testsuite/gcc.target/i386/avx512vl-i32gatherd-1.c
new file mode 100644
index 00000000000..880f5eff35b
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/avx512vl-i32gatherd-1.c
@@ -0,0 +1,20 @@
+/* { dg-do compile } */
+/* { dg-options "-mavx512vl -O2" } */
+/* { dg-final { scan-assembler-times "vpgatherdd\[ \\t\]+\[^\n\]*ymm\[0-9\]\[^\n\]*ymm\[0-9\]{%k\[1-7\]}" 2 } } */
+/* { dg-final { scan-assembler-times "vpgatherdd\[ \\t\]+\[^\n\]*xmm\[0-9\]\[^\n\]*xmm\[0-9\]{%k\[1-7\]}" 2 } } */
+
+#include <immintrin.h>
+
+volatile __m256i x1, idx1;
+volatile __m128i x2, idx2;
+volatile __mmask8 m8;
+int *base;
+
+void extern
+avx512vl_test (void)
+{
+ x1 = _mm256_mmask_i32gather_epi32 (x1, 0xFF, idx1, base, 8);
+ x1 = _mm256_mmask_i32gather_epi32 (x1, m8, idx1, base, 8);
+ x2 = _mm_mmask_i32gather_epi32 (x2, 0xFF, idx2, base, 8);
+ x2 = _mm_mmask_i32gather_epi32 (x2, m8, idx2, base, 8);
+}
diff --git a/gcc/testsuite/gcc.target/i386/avx512vl-i32gatherpd-1.c b/gcc/testsuite/gcc.target/i386/avx512vl-i32gatherpd-1.c
new file mode 100644
index 00000000000..48e7f217119
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/avx512vl-i32gatherpd-1.c
@@ -0,0 +1,21 @@
+/* { dg-do compile } */
+/* { dg-options "-mavx512vl -O2" } */
+/* { dg-final { scan-assembler-times "vgatherdpd\[ \\t\]+\[^\n\]*xmm\[0-9\]\[^\n\]*ymm\[0-9\]{%k\[1-7\]}" 2 } } */
+/* { dg-final { scan-assembler-times "vgatherdpd\[ \\t\]+\[^\n\]*xmm\[0-9\]\[^\n\]*xmm\[0-9\]{%k\[1-7\]}" 2 } } */
+
+#include <immintrin.h>
+
+volatile __m256d x1;
+volatile __m128d x2;
+volatile __m128i idx;
+volatile __mmask8 m8;
+double *base;
+
+void extern
+avx512vl_test (void)
+{
+ x1 = _mm256_mmask_i32gather_pd (x1, 0xFF, idx, base, 8);
+ x1 = _mm256_mmask_i32gather_pd (x1, m8, idx, base, 8);
+ x2 = _mm_mmask_i32gather_pd (x2, 0xFF, idx, base, 8);
+ x2 = _mm_mmask_i32gather_pd (x2, m8, idx, base, 8);
+}
diff --git a/gcc/testsuite/gcc.target/i386/avx512vl-i32gatherps-1.c b/gcc/testsuite/gcc.target/i386/avx512vl-i32gatherps-1.c
new file mode 100644
index 00000000000..fcd0a583287
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/avx512vl-i32gatherps-1.c
@@ -0,0 +1,22 @@
+/* { dg-do compile } */
+/* { dg-options "-mavx512vl -O2" } */
+/* { dg-final { scan-assembler-times "vgatherdps\[ \\t\]+\[^\n\]*ymm\[0-9\]\[^\n\]*ymm\[0-9\]{%k\[1-7\]}" 2 } } */
+/* { dg-final { scan-assembler-times "vgatherdps\[ \\t\]+\[^\n\]*xmm\[0-9\]\[^\n\]*xmm\[0-9\]{%k\[1-7\]}" 2 } } */
+
+#include <immintrin.h>
+
+volatile __m256 x1;
+volatile __m128 x2;
+volatile __m256i idx1;
+volatile __m128i idx2;
+volatile __mmask8 m8;
+float *base;
+
+void extern
+avx512vl_test (void)
+{
+ x1 = _mm256_mmask_i32gather_ps (x1, 0xFF, idx1, base, 8);
+ x1 = _mm256_mmask_i32gather_ps (x1, m8, idx1, base, 8);
+ x2 = _mm_mmask_i32gather_ps (x2, 0xFF, idx2, base, 8);
+ x2 = _mm_mmask_i32gather_ps (x2, m8, idx2, base, 8);
+}
diff --git a/gcc/testsuite/gcc.target/i386/avx512vl-i32gatherq-1.c b/gcc/testsuite/gcc.target/i386/avx512vl-i32gatherq-1.c
new file mode 100644
index 00000000000..771909cc7e4
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/avx512vl-i32gatherq-1.c
@@ -0,0 +1,20 @@
+/* { dg-do compile } */
+/* { dg-options "-mavx512vl -O2" } */
+/* { dg-final { scan-assembler-times "vpgatherdq\[ \\t\]+\[^\n\]*xmm\[0-9\]\[^\n\]*ymm\[0-9\]{%k\[1-7\]}" 2 } } */
+/* { dg-final { scan-assembler-times "vpgatherdq\[ \\t\]+\[^\n\]*xmm\[0-9\]\[^\n\]*xmm\[0-9\]{%k\[1-7\]}" 2 } } */
+
+#include <immintrin.h>
+
+volatile __m256i x1;
+volatile __m128i x2, idx;
+volatile __mmask8 m8;
+long long *base;
+
+void extern
+avx512vl_test (void)
+{
+ x1 = _mm256_mmask_i32gather_epi64 (x1, 0xFF, idx, base, 8);
+ x1 = _mm256_mmask_i32gather_epi64 (x1, m8, idx, base, 8);
+ x2 = _mm_mmask_i32gather_epi64 (x2, 0xFF, idx, base, 8);
+ x2 = _mm_mmask_i32gather_epi64 (x2, m8, idx, base, 8);
+}
diff --git a/gcc/testsuite/gcc.target/i386/avx512vl-i32scatterd-1.c b/gcc/testsuite/gcc.target/i386/avx512vl-i32scatterd-1.c
new file mode 100644
index 00000000000..761c7698751
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/avx512vl-i32scatterd-1.c
@@ -0,0 +1,21 @@
+/* { dg-do compile } */
+/* { dg-options "-mavx512vl -O2" } */
+/* { dg-final { scan-assembler-times "vpscatterdd\[ \\t\]+\[^\n\]*ymm\[0-9\]\[^\n\]*ymm\[0-9\]\[^\n\]*{%k\[1-7\]}" 2 } } */
+/* { dg-final { scan-assembler-times "vpscatterdd\[ \\t\]+\[^\n\]*xmm\[0-9\]\[^\n\]*xmm\[0-9\]\[^\n\]*{%k\[1-7\]}" 2 } } */
+
+#include <immintrin.h>
+
+volatile __m256i src1, idx1;
+volatile __m128i src2, idx2;
+volatile __mmask8 m8;
+int *addr;
+
+void extern
+avx512vl_test (void)
+{
+ _mm256_i32scatter_epi32 (addr, idx1, src1, 8);
+ _mm256_mask_i32scatter_epi32 (addr, m8, idx1, src1, 8);
+
+ _mm_i32scatter_epi32 (addr, idx2, src2, 8);
+ _mm_mask_i32scatter_epi32 (addr, m8, idx2, src2, 8);
+}
diff --git a/gcc/testsuite/gcc.target/i386/avx512vl-i32scatterpd-1.c b/gcc/testsuite/gcc.target/i386/avx512vl-i32scatterpd-1.c
new file mode 100644
index 00000000000..914ad8d591b
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/avx512vl-i32scatterpd-1.c
@@ -0,0 +1,22 @@
+/* { dg-do compile } */
+/* { dg-options "-mavx512vl -O2" } */
+/* { dg-final { scan-assembler-times "vscatterdpd\[ \\t\]+\[^\n\]*ymm\[0-9\]\[^\n\]*xmm\[0-9\]\[^\n\]*{%k\[1-7\]}" 2 } } */
+/* { dg-final { scan-assembler-times "vscatterdpd\[ \\t\]+\[^\n\]*xmm\[0-9\]\[^\n\]*xmm\[0-9\]\[^\n\]*{%k\[1-7\]}" 2 } } */
+
+#include <immintrin.h>
+
+volatile __m256d src1;
+volatile __m128d src2;
+volatile __m128i idx;
+volatile __mmask8 m8;
+double *addr;
+
+void extern
+avx512vl_test (void)
+{
+ _mm256_i32scatter_pd (addr, idx, src1, 8);
+ _mm256_mask_i32scatter_pd (addr, m8, idx, src1, 8);
+
+ _mm_i32scatter_pd (addr, idx, src2, 8);
+ _mm_mask_i32scatter_pd (addr, m8, idx, src2, 8);
+}
diff --git a/gcc/testsuite/gcc.target/i386/avx512vl-i32scatterps-1.c b/gcc/testsuite/gcc.target/i386/avx512vl-i32scatterps-1.c
new file mode 100644
index 00000000000..72de9dd7f32
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/avx512vl-i32scatterps-1.c
@@ -0,0 +1,23 @@
+/* { dg-do compile } */
+/* { dg-options "-mavx512vl -O2" } */
+/* { dg-final { scan-assembler-times "vscatterdps\[ \\t\]+\[^\n\]*ymm\[0-9\]\[^\n\]*ymm\[0-9\]\[^\n\]*{%k\[1-7\]}" 2 } } */
+/* { dg-final { scan-assembler-times "vscatterdps\[ \\t\]+\[^\n\]*xmm\[0-9\]\[^\n\]*xmm\[0-9\]\[^\n\]*{%k\[1-7\]}" 2 } } */
+
+#include <immintrin.h>
+
+volatile __m256 src1;
+volatile __m128 src2;
+volatile __m256i idx1;
+volatile __m128i idx2;
+volatile __mmask8 m8;
+float *addr;
+
+void extern
+avx512vl_test (void)
+{
+ _mm256_i32scatter_ps (addr, idx1, src1, 8);
+ _mm256_mask_i32scatter_ps (addr, m8, idx1, src1, 8);
+
+ _mm_i32scatter_ps (addr, idx2, src2, 8);
+ _mm_mask_i32scatter_ps (addr, m8, idx2, src2, 8);
+}
diff --git a/gcc/testsuite/gcc.target/i386/avx512vl-i32scatterq-1.c b/gcc/testsuite/gcc.target/i386/avx512vl-i32scatterq-1.c
new file mode 100644
index 00000000000..b1eb8eae90e
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/avx512vl-i32scatterq-1.c
@@ -0,0 +1,21 @@
+/* { dg-do compile } */
+/* { dg-options "-mavx512vl -O2" } */
+/* { dg-final { scan-assembler-times "vpscatterdq\[ \\t\]+\[^\n\]*ymm\[0-9\]\[^\n\]*xmm\[0-9\]\[^\n\]*{%k\[1-7\]}" 2 } } */
+/* { dg-final { scan-assembler-times "vpscatterdq\[ \\t\]+\[^\n\]*xmm\[0-9\]\[^\n\]*xmm\[0-9\]\[^\n\]*{%k\[1-7\]}" 2 } } */
+
+#include <immintrin.h>
+
+volatile __m256i src1;
+volatile __m128i src2, idx;
+volatile __mmask8 m8;
+long long *addr;
+
+void extern
+avx512vl_test (void)
+{
+ _mm256_i32scatter_epi64 (addr, idx, src1, 8);
+ _mm256_mask_i32scatter_epi64 (addr, m8, idx, src1, 8);
+
+ _mm_i32scatter_epi64 (addr, idx, src2, 8);
+ _mm_mask_i32scatter_epi64 (addr, m8, idx, src2, 8);
+}
diff --git a/gcc/testsuite/gcc.target/i386/avx512vl-i64gatherd-1.c b/gcc/testsuite/gcc.target/i386/avx512vl-i64gatherd-1.c
new file mode 100644
index 00000000000..a11520c2948
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/avx512vl-i64gatherd-1.c
@@ -0,0 +1,20 @@
+/* { dg-do compile } */
+/* { dg-options "-mavx512vl -O2" } */
+/* { dg-final { scan-assembler-times "vpgatherqd\[ \\t\]+\[^\n\]*ymm\[0-9\]\[^\n\]*xmm\[0-9\]{%k\[1-7\]}" 2 } } */
+/* { dg-final { scan-assembler-times "vpgatherqd\[ \\t\]+\[^\n\]*xmm\[0-9\]\[^\n\]*xmm\[0-9\]{%k\[1-7\]}" 2 } } */
+
+#include <immintrin.h>
+
+volatile __m256i idx1;
+volatile __m128i idx2, x;
+volatile __mmask8 m8;
+int *base;
+
+void extern
+avx512vl_test (void)
+{
+ x = _mm256_mmask_i64gather_epi32 (x, 0xFF, idx1, base, 8);
+ x = _mm256_mmask_i64gather_epi32 (x, m8, idx1, base, 8);
+ x = _mm_mmask_i64gather_epi32 (x, 0xFF, idx2, base, 8);
+ x = _mm_mmask_i64gather_epi32 (x, m8, idx2, base, 8);
+}
diff --git a/gcc/testsuite/gcc.target/i386/avx512vl-i64gatherpd-1.c b/gcc/testsuite/gcc.target/i386/avx512vl-i64gatherpd-1.c
new file mode 100644
index 00000000000..1b1b2cf6843
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/avx512vl-i64gatherpd-1.c
@@ -0,0 +1,22 @@
+/* { dg-do compile } */
+/* { dg-options "-mavx512vl -O2" } */
+/* { dg-final { scan-assembler-times "vgatherqpd\[ \\t\]+\[^\n\]*ymm\[0-9\]\[^\n\]*ymm\[0-9\]{%k\[1-7\]}" 2 } } */
+/* { dg-final { scan-assembler-times "vgatherqpd\[ \\t\]+\[^\n\]*xmm\[0-9\]\[^\n\]*xmm\[0-9\]{%k\[1-7\]}" 2 } } */
+
+#include <immintrin.h>
+
+volatile __m256d x1;
+volatile __m128d x2;
+volatile __m256i idx1;
+volatile __m128i idx2;
+volatile __mmask8 m8;
+double *base;
+
+void extern
+avx512vl_test (void)
+{
+ x1 = _mm256_mmask_i64gather_pd (x1, 0xFF, idx1, base, 8);
+ x1 = _mm256_mmask_i64gather_pd (x1, m8, idx1, base, 8);
+ x2 = _mm_mmask_i64gather_pd (x2, 0xFF, idx2, base, 8);
+ x2 = _mm_mmask_i64gather_pd (x2, m8, idx2, base, 8);
+}
diff --git a/gcc/testsuite/gcc.target/i386/avx512vl-i64gatherps-1.c b/gcc/testsuite/gcc.target/i386/avx512vl-i64gatherps-1.c
new file mode 100644
index 00000000000..0b8f6fec181
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/avx512vl-i64gatherps-1.c
@@ -0,0 +1,21 @@
+/* { dg-do compile } */
+/* { dg-options "-mavx512vl -O2" } */
+/* { dg-final { scan-assembler-times "vgatherqps\[ \\t\]+\[^\n\]*ymm\[0-9\]\[^\n\]*xmm\[0-9\]{%k\[1-7\]}" 2 } } */
+/* { dg-final { scan-assembler-times "vgatherqps\[ \\t\]+\[^\n\]*xmm\[0-9\]\[^\n\]*xmm\[0-9\]{%k\[1-7\]}" 2 } } */
+
+#include <immintrin.h>
+
+volatile __m128 x;
+volatile __m256i idx1;
+volatile __m128i idx2;
+volatile __mmask8 m8;
+float *base;
+
+void extern
+avx512vl_test (void)
+{
+ x = _mm256_mmask_i64gather_ps (x, 0xFF, idx1, base, 8);
+ x = _mm256_mmask_i64gather_ps (x, m8, idx1, base, 8);
+ x = _mm_mmask_i64gather_ps (x, 0xFF, idx2, base, 8);
+ x = _mm_mmask_i64gather_ps (x, m8, idx2, base, 8);
+}
diff --git a/gcc/testsuite/gcc.target/i386/avx512vl-i64gatherq-1.c b/gcc/testsuite/gcc.target/i386/avx512vl-i64gatherq-1.c
new file mode 100644
index 00000000000..ccdb416ffed
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/avx512vl-i64gatherq-1.c
@@ -0,0 +1,20 @@
+/* { dg-do compile } */
+/* { dg-options "-mavx512vl -O2" } */
+/* { dg-final { scan-assembler-times "vpgatherqq\[ \\t\]+\[^\n\]*ymm\[0-9\]\[^\n\]*ymm\[0-9\]{%k\[1-7\]}" 2} } */
+/* { dg-final { scan-assembler-times "vpgatherqq\[ \\t\]+\[^\n\]*xmm\[0-9\]\[^\n\]*xmm\[0-9\]{%k\[1-7\]}" 2 } } */
+
+#include <immintrin.h>
+
+volatile __m256i x1, idx1;
+volatile __m128i x2, idx2;
+volatile __mmask8 m8;
+long long *base;
+
+void extern
+avx512vl_test (void)
+{
+ x1 = _mm256_mmask_i64gather_epi64 (x1, 0xFF, idx1, base, 8);
+ x1 = _mm256_mmask_i64gather_epi64 (x1, m8, idx1, base, 8);
+ x2 = _mm_mmask_i64gather_epi64 (x2, 0xFF, idx2, base, 8);
+ x2 = _mm_mmask_i64gather_epi64 (x2, m8, idx2, base, 8);
+}
diff --git a/gcc/testsuite/gcc.target/i386/avx512vl-i64scatterd-1.c b/gcc/testsuite/gcc.target/i386/avx512vl-i64scatterd-1.c
new file mode 100644
index 00000000000..8a124eced24
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/avx512vl-i64scatterd-1.c
@@ -0,0 +1,21 @@
+/* { dg-do compile } */
+/* { dg-options "-mavx512vl -O2" } */
+/* { dg-final { scan-assembler-times "vpscatterqd\[ \\t\]+\[^\n\]*xmm\[0-9\]\[^\n\]*ymm\[0-9\]\[^\n\]*{%k\[1-7\]}" 2 } } */
+/* { dg-final { scan-assembler-times "vpscatterqd\[ \\t\]+\[^\n\]*xmm\[0-9\]\[^\n\]*xmm\[0-9\]\[^\n\]*{%k\[1-7\]}" 2 } } */
+
+#include <immintrin.h>
+
+volatile __m256i idx1;
+volatile __m128i idx2, src;
+volatile __mmask8 m8;
+int *addr;
+
+void extern
+avx512vl_test (void)
+{
+ _mm256_i64scatter_epi32 (addr, idx1, src, 8);
+ _mm256_mask_i64scatter_epi32 (addr, m8, idx1, src, 8);
+
+ _mm_i64scatter_epi32 (addr, idx2, src, 8);
+ _mm_mask_i64scatter_epi32 (addr, m8, idx2, src, 8);
+}
diff --git a/gcc/testsuite/gcc.target/i386/avx512vl-i64scatterpd-1.c b/gcc/testsuite/gcc.target/i386/avx512vl-i64scatterpd-1.c
new file mode 100644
index 00000000000..20e9babb8bd
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/avx512vl-i64scatterpd-1.c
@@ -0,0 +1,23 @@
+/* { dg-do compile } */
+/* { dg-options "-mavx512vl -O2" } */
+/* { dg-final { scan-assembler-times "vscatterqpd\[ \\t\]+\[^\n\]*ymm\[0-9\]\[^\n\]*ymm\[0-9\]\[^\n\]*{%k\[1-7\]}" 2 } } */
+/* { dg-final { scan-assembler-times "vscatterqpd\[ \\t\]+\[^\n\]*xmm\[0-9\]\[^\n\]*xmm\[0-9\]\[^\n\]*{%k\[1-7\]}" 2 } } */
+
+#include <immintrin.h>
+
+volatile __m256d src1;
+volatile __m128d src2;
+volatile __m256i idx1;
+volatile __m128i idx2;
+volatile __mmask8 m8;
+double *addr;
+
+void extern
+avx512vl_test (void)
+{
+ _mm256_i64scatter_pd (addr, idx1, src1, 8);
+ _mm256_mask_i64scatter_pd (addr, m8, idx1, src1, 8);
+
+ _mm_i64scatter_pd (addr, idx2, src2, 8);
+ _mm_mask_i64scatter_pd (addr, m8, idx2, src2, 8);
+}
diff --git a/gcc/testsuite/gcc.target/i386/avx512vl-i64scatterps-1.c b/gcc/testsuite/gcc.target/i386/avx512vl-i64scatterps-1.c
new file mode 100644
index 00000000000..53bac89e709
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/avx512vl-i64scatterps-1.c
@@ -0,0 +1,22 @@
+/* { dg-do compile } */
+/* { dg-options "-mavx512vl -O2" } */
+/* { dg-final { scan-assembler-times "vscatterqps\[ \\t\]+\[^\n\]*xmm\[0-9\]\[^\n\]*ymm\[0-9\]\[^\n\]*{%k\[1-7\]}" 2 } } */
+/* { dg-final { scan-assembler-times "vscatterqps\[ \\t\]+\[^\n\]*xmm\[0-9\]\[^\n\]*xmm\[0-9\]\[^\n\]*{%k\[1-7\]}" 2 } } */
+
+#include <immintrin.h>
+
+volatile __m128 src;
+volatile __m256i idx1;
+volatile __m128i idx2;
+volatile __mmask8 m8;
+float *addr;
+
+void extern
+avx512vl_test (void)
+{
+ _mm256_i64scatter_ps (addr, idx1, src, 8);
+ _mm256_mask_i64scatter_ps (addr, m8, idx1, src, 8);
+
+ _mm_i64scatter_ps (addr, idx2, src, 8);
+ _mm_mask_i64scatter_ps (addr, m8, idx2, src, 8);
+}
diff --git a/gcc/testsuite/gcc.target/i386/avx512vl-i64scatterq-1.c b/gcc/testsuite/gcc.target/i386/avx512vl-i64scatterq-1.c
new file mode 100644
index 00000000000..4235835857a
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/avx512vl-i64scatterq-1.c
@@ -0,0 +1,21 @@
+/* { dg-do compile } */
+/* { dg-options "-mavx512vl -O2" } */
+/* { dg-final { scan-assembler-times "vpscatterqq\[ \\t\]+\[^\n\]*ymm\[0-9\]\[^\n\]*ymm\[0-9\]\[^\n\]*{%k\[1-7\]}" 2 } } */
+/* { dg-final { scan-assembler-times "vpscatterqq\[ \\t\]+\[^\n\]*xmm\[0-9\]\[^\n\]*xmm\[0-9\]\[^\n\]*{%k\[1-7\]}" 2 } } */
+
+#include <immintrin.h>
+
+volatile __m256i src1, idx1;
+volatile __m128i src2, idx2;
+volatile __mmask8 m8;
+long long *addr;
+
+void extern
+avx512vl_test (void)
+{
+ _mm256_i64scatter_epi64 (addr, idx1, src1, 8);
+ _mm256_mask_i64scatter_epi64 (addr, m8, idx1, src1, 8);
+
+ _mm_i64scatter_epi64 (addr, idx2, src2, 8);
+ _mm_mask_i64scatter_epi64 (addr, m8, idx2, src2, 8);
+}
diff --git a/gcc/testsuite/gcc.target/i386/avx512vl-vaddpd-1.c b/gcc/testsuite/gcc.target/i386/avx512vl-vaddpd-1.c
new file mode 100644
index 00000000000..f28ff356061
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/avx512vl-vaddpd-1.c
@@ -0,0 +1,22 @@
+/* { dg-do compile } */
+/* { dg-options "-O2 -mavx512vl" } */
+/* { dg-final { scan-assembler-times "vaddpd\[ \\t\]+\[^\n\]*%xmm\[0-9\]\{%k\[1-7\]\}\[^\{\]" 1 } } */
+/* { dg-final { scan-assembler-times "vaddpd\[ \\t\]+\[^\n\]*%xmm\[0-9\]\{%k\[1-7\]\}\{z\}" 1 } } */
+/* { dg-final { scan-assembler-times "vaddpd\[ \\t\]+\[^\n\]*%ymm\[0-9\]\{%k\[1-7\]\}\[^\{\]" 1 } } */
+/* { dg-final { scan-assembler-times "vaddpd\[ \\t\]+\[^\n\]*%ymm\[0-9\]\{%k\[1-7\]\}\{z\}" 1 } } */
+
+#include <immintrin.h>
+
+volatile __m128d x128;
+volatile __m256d x256;
+volatile __mmask8 m;
+
+void extern
+avx512vl_test (void)
+{
+ x128 = _mm_mask_add_pd (x128, m, x128, x128);
+ x128 = _mm_maskz_add_pd (m, x128, x128);
+
+ x256 = _mm256_mask_add_pd (x256, m, x256, x256);
+ x256 = _mm256_maskz_add_pd (m, x256, x256);
+}
diff --git a/gcc/testsuite/gcc.target/i386/avx512vl-vaddpd-2.c b/gcc/testsuite/gcc.target/i386/avx512vl-vaddpd-2.c
new file mode 100644
index 00000000000..e9fd14be84a
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/avx512vl-vaddpd-2.c
@@ -0,0 +1,14 @@
+/* { dg-do run } */
+/* { dg-options "-O2 -mavx512vl -DAVX512VL" } */
+/* { dg-require-effective-target avx512vl } */
+
+#define AVX512F_LEN 256
+#define AVX512F_LEN_HALF 128
+#include "avx512f-vaddpd-2.c"
+
+#undef AVX512F_LEN
+#undef AVX512F_LEN_HALF
+
+#define AVX512F_LEN 128
+#define AVX512F_LEN_HALF 128
+#include "avx512f-vaddpd-2.c"
diff --git a/gcc/testsuite/gcc.target/i386/avx512vl-vaddps-1.c b/gcc/testsuite/gcc.target/i386/avx512vl-vaddps-1.c
new file mode 100644
index 00000000000..8c9f23e1e85
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/avx512vl-vaddps-1.c
@@ -0,0 +1,22 @@
+/* { dg-do compile } */
+/* { dg-options "-O2 -mavx512vl" } */
+/* { dg-final { scan-assembler-times "vaddps\[ \\t\]+\[^\n\]*%xmm\[0-9\]\{%k\[1-7\]\}\[^\{\]" 1 } } */
+/* { dg-final { scan-assembler-times "vaddps\[ \\t\]+\[^\n\]*%xmm\[0-9\]\{%k\[1-7\]\}\{z\}" 1 } } */
+/* { dg-final { scan-assembler-times "vaddps\[ \\t\]+\[^\n\]*%ymm\[0-9\]\{%k\[1-7\]\}\[^\{\]" 1 } } */
+/* { dg-final { scan-assembler-times "vaddps\[ \\t\]+\[^\n\]*%ymm\[0-9\]\{%k\[1-7\]\}\{z\}" 1 } } */
+
+#include <immintrin.h>
+
+volatile __m128 x128;
+volatile __m256 x256;
+volatile __mmask8 m;
+
+void extern
+avx512vl_test (void)
+{
+ x128 = _mm_mask_add_ps (x128, m, x128, x128);
+ x128 = _mm_maskz_add_ps (m, x128, x128);
+
+ x256 = _mm256_mask_add_ps (x256, m, x256, x256);
+ x256 = _mm256_maskz_add_ps (m, x256, x256);
+}
diff --git a/gcc/testsuite/gcc.target/i386/avx512vl-vaddps-2.c b/gcc/testsuite/gcc.target/i386/avx512vl-vaddps-2.c
new file mode 100644
index 00000000000..0c2b7f18d6b
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/avx512vl-vaddps-2.c
@@ -0,0 +1,14 @@
+/* { dg-do run } */
+/* { dg-options "-O2 -mavx512vl -DAVX512VL" } */
+/* { dg-require-effective-target avx512vl } */
+
+#define AVX512F_LEN 256
+#define AVX512F_LEN_HALF 128
+#include "avx512f-vaddps-2.c"
+
+#undef AVX512F_LEN
+#undef AVX512F_LEN_HALF
+
+#define AVX512F_LEN 128
+#define AVX512F_LEN_HALF 128
+#include "avx512f-vaddps-2.c"
diff --git a/gcc/testsuite/gcc.target/i386/avx512vl-valignd-1.c b/gcc/testsuite/gcc.target/i386/avx512vl-valignd-1.c
new file mode 100644
index 00000000000..0b48abe180c
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/avx512vl-valignd-1.c
@@ -0,0 +1,26 @@
+/* { dg-do compile } */
+/* { dg-options "-O2 -mavx512vl" } */
+/* { dg-final { scan-assembler-times "valignd\[ \\t\]+\[^\n\]*%ymm\[0-9\]\[^\{\]" 3 } } */
+/* { dg-final { scan-assembler-times "valignd\[ \\t\]+\[^\n\]*%ymm\[0-9\]\{%k\[1-7\]\}\[^\{\]" 1 } } */
+/* { dg-final { scan-assembler-times "valignd\[ \\t\]+\[^\n\]*%ymm\[0-9\]\{%k\[1-7\]\}\{z\}" 1 } } */
+/* { dg-final { scan-assembler-times "valignd\[ \\t\]+\[^\n\]*%xmm\[0-9\]\[^\{\]" 3 } } */
+/* { dg-final { scan-assembler-times "valignd\[ \\t\]+\[^\n\]*%xmm\[0-9\]\{%k\[1-7\]\}\[^\{\]" 1 } } */
+/* { dg-final { scan-assembler-times "valignd\[ \\t\]+\[^\n\]*%xmm\[0-9\]\{%k\[1-7\]\}\{z\}" 1 } } */
+
+#include <immintrin.h>
+
+volatile __m256i y;
+volatile __m128i x;
+volatile __mmask8 m;
+
+void extern
+avx512vl_test (void)
+{
+ y = _mm256_alignr_epi32 (y, y, 3);
+ y = _mm256_mask_alignr_epi32 (y, m, y, y, 3);
+ y = _mm256_maskz_alignr_epi32 (m, y, y, 3);
+
+ x = _mm_alignr_epi32 (x, x, 3);
+ x = _mm_mask_alignr_epi32 (x, m, x, x, 3);
+ x = _mm_maskz_alignr_epi32 (m, x, x, 3);
+}
diff --git a/gcc/testsuite/gcc.target/i386/avx512vl-valignd-2.c b/gcc/testsuite/gcc.target/i386/avx512vl-valignd-2.c
new file mode 100644
index 00000000000..ef7c67830f4
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/avx512vl-valignd-2.c
@@ -0,0 +1,14 @@
+/* { dg-do run } */
+/* { dg-options "-O2 -mavx512vl -DAVX512VL" } */
+/* { dg-require-effective-target avx512vl } */
+
+#define AVX512F_LEN 256
+#define AVX512F_LEN_HALF 128
+#include "avx512f-valignd-2.c"
+
+#undef AVX512F_LEN
+#undef AVX512F_LEN_HALF
+
+#define AVX512F_LEN 128
+#define AVX512F_LEN_HALF 128
+#include "avx512f-valignd-2.c"
diff --git a/gcc/testsuite/gcc.target/i386/avx512vl-valignq-1.c b/gcc/testsuite/gcc.target/i386/avx512vl-valignq-1.c
new file mode 100644
index 00000000000..cd0b0787b0e
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/avx512vl-valignq-1.c
@@ -0,0 +1,26 @@
+/* { dg-do compile } */
+/* { dg-options "-O2 -mavx512vl" } */
+/* { dg-final { scan-assembler-times "valignq\[ \\t\]+\[^\n\]*%ymm\[0-9\]\[^\{\]" 3 } } */
+/* { dg-final { scan-assembler-times "valignq\[ \\t\]+\[^\n\]*%ymm\[0-9\]\{%k\[1-7\]\}\[^\{\]" 1 } } */
+/* { dg-final { scan-assembler-times "valignq\[ \\t\]+\[^\n\]*%ymm\[0-9\]\{%k\[1-7\]\}\{z\}" 1 } } */
+/* { dg-final { scan-assembler-times "valignq\[ \\t\]+\[^\n\]*%xmm\[0-9\]\[^\{\]" 3 } } */
+/* { dg-final { scan-assembler-times "valignq\[ \\t\]+\[^\n\]*%xmm\[0-9\]\{%k\[1-7\]\}\[^\{\]" 1 } } */
+/* { dg-final { scan-assembler-times "valignq\[ \\t\]+\[^\n\]*%xmm\[0-9\]\{%k\[1-7\]\}\{z\}" 1 } } */
+
+#include <immintrin.h>
+
+volatile __m256i y;
+volatile __m128i x;
+volatile __mmask8 m;
+
+void extern
+avx512vl_test (void)
+{
+ y = _mm256_alignr_epi64 (y, y, 3);
+ y = _mm256_mask_alignr_epi64 (y, m, y, y, 3);
+ y = _mm256_maskz_alignr_epi64 (m, y, y, 3);
+
+ x = _mm_alignr_epi64 (x, x, 3);
+ x = _mm_mask_alignr_epi64 (x, m, x, x, 3);
+ x = _mm_maskz_alignr_epi64 (m, x, x, 3);
+}
diff --git a/gcc/testsuite/gcc.target/i386/avx512vl-valignq-2.c b/gcc/testsuite/gcc.target/i386/avx512vl-valignq-2.c
new file mode 100644
index 00000000000..b59cbf0bc5e
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/avx512vl-valignq-2.c
@@ -0,0 +1,14 @@
+/* { dg-do run } */
+/* { dg-options "-O2 -mavx512vl -DAVX512VL" } */
+/* { dg-require-effective-target avx512vl } */
+
+#define AVX512F_LEN 256
+#define AVX512F_LEN_HALF 128
+#include "avx512f-valignq-2.c"
+
+#undef AVX512F_LEN
+#undef AVX512F_LEN_HALF
+
+#define AVX512F_LEN 128
+#define AVX512F_LEN_HALF 128
+#include "avx512f-valignq-2.c"
diff --git a/gcc/testsuite/gcc.target/i386/avx512vl-vandnpd-2.c b/gcc/testsuite/gcc.target/i386/avx512vl-vandnpd-2.c
new file mode 100644
index 00000000000..72a4ccae28c
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/avx512vl-vandnpd-2.c
@@ -0,0 +1,14 @@
+/* { dg-do run } */
+/* { dg-options "-O2 -mavx512dq -mavx512vl -DAVX512VL" } */
+/* { dg-require-effective-target avx512vl } */
+
+#define AVX512F_LEN 256
+#define AVX512F_LEN_HALF 128
+#include "avx512dq-vandnpd-2.c"
+
+#undef AVX512F_LEN
+#undef AVX512F_LEN_HALF
+
+#define AVX512F_LEN 128
+#define AVX512F_LEN_HALF 128
+#include "avx512dq-vandnpd-2.c"
diff --git a/gcc/testsuite/gcc.target/i386/avx512vl-vandnps-2.c b/gcc/testsuite/gcc.target/i386/avx512vl-vandnps-2.c
new file mode 100644
index 00000000000..77950d0e251
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/avx512vl-vandnps-2.c
@@ -0,0 +1,14 @@
+/* { dg-do run } */
+/* { dg-options "-O2 -mavx512dq -mavx512vl -DAVX512VL" } */
+/* { dg-require-effective-target avx512vl } */
+
+#define AVX512F_LEN 256
+#define AVX512F_LEN_HALF 128
+#include "avx512dq-vandnps-2.c"
+
+#undef AVX512F_LEN
+#undef AVX512F_LEN_HALF
+
+#define AVX512F_LEN 128
+#define AVX512F_LEN_HALF 128
+#include "avx512dq-vandnps-2.c"
diff --git a/gcc/testsuite/gcc.target/i386/avx512vl-vandpd-2.c b/gcc/testsuite/gcc.target/i386/avx512vl-vandpd-2.c
new file mode 100644
index 00000000000..44716b328f6
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/avx512vl-vandpd-2.c
@@ -0,0 +1,14 @@
+/* { dg-do run } */
+/* { dg-options "-O2 -mavx512dq -mavx512vl -DAVX512VL" } */
+/* { dg-require-effective-target avx512vl } */
+
+#define AVX512F_LEN 256
+#define AVX512F_LEN_HALF 128
+#include "avx512dq-vandpd-2.c"
+
+#undef AVX512F_LEN
+#undef AVX512F_LEN_HALF
+
+#define AVX512F_LEN 128
+#define AVX512F_LEN_HALF 128
+#include "avx512dq-vandpd-2.c"
diff --git a/gcc/testsuite/gcc.target/i386/avx512vl-vandps-2.c b/gcc/testsuite/gcc.target/i386/avx512vl-vandps-2.c
new file mode 100644
index 00000000000..ec8ee9e719b
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/avx512vl-vandps-2.c
@@ -0,0 +1,14 @@
+/* { dg-do run } */
+/* { dg-options "-O2 -mavx512dq -mavx512vl -DAVX512VL" } */
+/* { dg-require-effective-target avx512vl } */
+
+#define AVX512F_LEN 256
+#define AVX512F_LEN_HALF 128
+#include "avx512dq-vandps-2.c"
+
+#undef AVX512F_LEN
+#undef AVX512F_LEN_HALF
+
+#define AVX512F_LEN 128
+#define AVX512F_LEN_HALF 128
+#include "avx512dq-vandps-2.c"
diff --git a/gcc/testsuite/gcc.target/i386/avx512vl-vblendmpd-1.c b/gcc/testsuite/gcc.target/i386/avx512vl-vblendmpd-1.c
new file mode 100644
index 00000000000..ed8b670a20d
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/avx512vl-vblendmpd-1.c
@@ -0,0 +1,17 @@
+/* { dg-do compile } */
+/* { dg-options "-mavx512vl -O2" } */
+/* { dg-final { scan-assembler "(vblendmpd|vmovapd)\[ \\t\]+\[^\n\]*%ymm\[0-9\]\{%k\[1-7\]\}" } } */
+/* { dg-final { scan-assembler "(vblendmpd|vmovapd)\[ \\t\]+\[^\n\]*%xmm\[0-9\]\{%k\[1-7\]\}" } } */
+
+#include <immintrin.h>
+
+volatile __m256d x;
+volatile __m128d xx;
+volatile __mmask8 m;
+
+void extern
+avx512vl_test (void)
+{
+ x = _mm256_mask_blend_pd (m, x, x);
+ xx = _mm_mask_blend_pd (m, xx, xx);
+}
diff --git a/gcc/testsuite/gcc.target/i386/avx512vl-vblendmpd-2.c b/gcc/testsuite/gcc.target/i386/avx512vl-vblendmpd-2.c
new file mode 100644
index 00000000000..56b80b11487
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/avx512vl-vblendmpd-2.c
@@ -0,0 +1,14 @@
+/* { dg-do run } */
+/* { dg-options "-O2 -mavx512vl -DAVX512VL" } */
+/* { dg-require-effective-target avx512vl } */
+
+#define AVX512F_LEN 256
+#define AVX512F_LEN_HALF 128
+#include "avx512f-vblendmpd-2.c"
+
+#undef AVX512F_LEN
+#undef AVX512F_LEN_HALF
+
+#define AVX512F_LEN 128
+#define AVX512F_LEN_HALF 128
+#include "avx512f-vblendmpd-2.c"
diff --git a/gcc/testsuite/gcc.target/i386/avx512vl-vblendmps-1.c b/gcc/testsuite/gcc.target/i386/avx512vl-vblendmps-1.c
new file mode 100644
index 00000000000..a43d4773651
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/avx512vl-vblendmps-1.c
@@ -0,0 +1,17 @@
+/* { dg-do compile } */
+/* { dg-options "-mavx512vl -O2" } */
+/* { dg-final { scan-assembler "(vblendmps|vmovaps)\[ \\t\]+\[^\n\]*%ymm\[0-9\]\{%k\[1-7\]\}" } } */
+/* { dg-final { scan-assembler "(vblendmps|vmovaps)\[ \\t\]+\[^\n\]*%xmm\[0-9\]\{%k\[1-7\]\}" } } */
+
+#include <immintrin.h>
+
+volatile __m256 x;
+volatile __m128 xx;
+volatile __mmask8 m;
+
+void extern
+avx512vl_test (void)
+{
+ x = _mm256_mask_blend_ps (m, x, x);
+ xx = _mm_mask_blend_ps (m, xx, xx);
+}
diff --git a/gcc/testsuite/gcc.target/i386/avx512vl-vblendmps-2.c b/gcc/testsuite/gcc.target/i386/avx512vl-vblendmps-2.c
new file mode 100644
index 00000000000..16a40f8d68c
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/avx512vl-vblendmps-2.c
@@ -0,0 +1,14 @@
+/* { dg-do run } */
+/* { dg-options "-O2 -mavx512vl -DAVX512VL" } */
+/* { dg-require-effective-target avx512vl } */
+
+#define AVX512F_LEN 256
+#define AVX512F_LEN_HALF 128
+#include "avx512f-vblendmps-2.c"
+
+#undef AVX512F_LEN
+#undef AVX512F_LEN_HALF
+
+#define AVX512F_LEN 128
+#define AVX512F_LEN_HALF 128
+#include "avx512f-vblendmps-2.c"
diff --git a/gcc/testsuite/gcc.target/i386/avx512vl-vbroadcastf32x2-2.c b/gcc/testsuite/gcc.target/i386/avx512vl-vbroadcastf32x2-2.c
new file mode 100644
index 00000000000..7afe2015040
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/avx512vl-vbroadcastf32x2-2.c
@@ -0,0 +1,10 @@
+/* { dg-do run } */
+/* { dg-options "-O2 -mavx512dq -mavx512vl -DAVX512VL" } */
+/* { dg-require-effective-target avx512vl } */
+
+#define AVX512F_LEN 256
+#define AVX512F_LEN_HALF 128
+#include "avx512dq-vbroadcastf32x2-2.c"
+
+void
+test_128 () {}
diff --git a/gcc/testsuite/gcc.target/i386/avx512vl-vbroadcastf32x4-1.c b/gcc/testsuite/gcc.target/i386/avx512vl-vbroadcastf32x4-1.c
new file mode 100644
index 00000000000..135975f77ae
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/avx512vl-vbroadcastf32x4-1.c
@@ -0,0 +1,19 @@
+/* { dg-do compile } */
+/* { dg-options "-mavx512vl -mavx512dq -O2" } */
+/* { dg-final { scan-assembler-times "vbroadcastf32x4\[ \\t\]+\[^\n\]*%ymm\[0-9\]\[^\{\]|vshuff32x4\[ \\t\]+\[^\n\]*%ymm\[0-9\]\[^\n\]*%ymm\[0-9\]\[^\n\]*%ymm\[0-9\]\[^\{\]" 1 } } */
+/* { dg-final { scan-assembler-times "vbroadcastf32x4\[ \\t\]+\[^\n\]*%ymm\[0-9\]\{%k\[1-7\]\}\[^\{\]|vshuff32x4\[ \\t\]+\[^\n\]*%ymm\[0-9\]\{%k\[1-7\]\}\{z\}" 1 } } */
+/* { dg-final { scan-assembler-times "vbroadcastf32x4\[ \\t\]+\[^\n\]*%ymm\[0-9\]\{%k\[1-7\]\}\{z\}|vshuff32x4\[ \\t\]+\[^\n\]*%ymm\[0-9\]\{%k\[1-7\]\}\[^\{\]" 1 } } */
+
+#include <immintrin.h>
+
+volatile __m256 x;
+volatile __m128 y;
+volatile __mmask8 m;
+
+void extern
+avx512vl_test (void)
+{
+ x = _mm256_broadcast_f32x4 (y);
+ x = _mm256_mask_broadcast_f32x4 (x, m, y);
+ x = _mm256_maskz_broadcast_f32x4 (m, y);
+}
diff --git a/gcc/testsuite/gcc.target/i386/avx512vl-vbroadcastf32x4-2.c b/gcc/testsuite/gcc.target/i386/avx512vl-vbroadcastf32x4-2.c
new file mode 100644
index 00000000000..b5bc7e2c739
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/avx512vl-vbroadcastf32x4-2.c
@@ -0,0 +1,10 @@
+/* { dg-do run } */
+/* { dg-options "-O2 -mavx512vl -mavx512dq -DAVX512VL" } */
+/* { dg-require-effective-target avx512vl } */
+
+#define AVX512F_LEN 256
+#define AVX512F_LEN_HALF 128
+#include "avx512f-vbroadcastf32x4-2.c"
+
+void
+test_128 () {}
diff --git a/gcc/testsuite/gcc.target/i386/avx512vl-vbroadcastf64x2-2.c b/gcc/testsuite/gcc.target/i386/avx512vl-vbroadcastf64x2-2.c
new file mode 100644
index 00000000000..df9893779d5
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/avx512vl-vbroadcastf64x2-2.c
@@ -0,0 +1,10 @@
+/* { dg-do run } */
+/* { dg-options "-O2 -mavx512dq -mavx512vl -DAVX512VL" } */
+/* { dg-require-effective-target avx512vl } */
+
+#define AVX512F_LEN 256
+#define AVX512F_LEN_HALF 128
+#include "avx512dq-vbroadcastf64x2-2.c"
+
+void
+test_128 () {}
diff --git a/gcc/testsuite/gcc.target/i386/avx512vl-vbroadcasti32x2-2.c b/gcc/testsuite/gcc.target/i386/avx512vl-vbroadcasti32x2-2.c
new file mode 100644
index 00000000000..90469664a2c
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/avx512vl-vbroadcasti32x2-2.c
@@ -0,0 +1,14 @@
+/* { dg-do run } */
+/* { dg-options "-O2 -mavx512dq -mavx512vl -DAVX512VL" } */
+/* { dg-require-effective-target avx512vl } */
+
+#define AVX512F_LEN 256
+#define AVX512F_LEN_HALF 128
+#include "avx512dq-vbroadcasti32x2-2.c"
+
+#undef AVX512F_LEN
+#undef AVX512F_LEN_HALF
+
+#define AVX512F_LEN 128
+#define AVX512F_LEN_HALF 128
+#include "avx512dq-vbroadcasti32x2-2.c"
diff --git a/gcc/testsuite/gcc.target/i386/avx512vl-vbroadcasti32x4-1.c b/gcc/testsuite/gcc.target/i386/avx512vl-vbroadcasti32x4-1.c
new file mode 100644
index 00000000000..ae6945d1975
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/avx512vl-vbroadcasti32x4-1.c
@@ -0,0 +1,19 @@
+/* { dg-do compile } */
+/* { dg-options "-mavx512vl -mavx512dq -O2" } */
+/* { dg-final { scan-assembler-times "vbroadcasti32x4\[ \\t\]+\[^\n\]*%ymm\[0-9\]\[^\{\]|vshufi32x4\[ \\t\]+\[^\n\]*%ymm\[0-9\]\[^\n\]*%ymm\[0-9\]\[^\n\]*%ymm\[0-9\]\[^\{\]" 1 } } */
+/* { dg-final { scan-assembler-times "vbroadcasti32x4\[ \\t\]+\[^\n\]*%ymm\[0-9\]\{%k\[1-7\]\}\[^\{\]|vshufi32x4\[ \\t\]+\[^\n\]*%ymm\[0-9\]\{%k\[1-7\]\}\{z\}" 1 } } */
+/* { dg-final { scan-assembler-times "vbroadcasti32x4\[ \\t\]+\[^\n\]*%ymm\[0-9\]\{%k\[1-7\]\}\{z\}|vshufi32x4\[ \\t\]+\[^\n\]*%ymm\[0-9\]\{%k\[1-7\]\}\[^\{\]" 1 } } */
+
+#include <immintrin.h>
+
+volatile __m256i x;
+volatile __m128i y;
+volatile __mmask8 m;
+
+void extern
+avx512vl_test (void)
+{
+ x = _mm256_broadcast_i32x4 (y);
+ x = _mm256_mask_broadcast_i32x4 (x, m, y);
+ x = _mm256_maskz_broadcast_i32x4 (m, y);
+}
diff --git a/gcc/testsuite/gcc.target/i386/avx512vl-vbroadcasti32x4-2.c b/gcc/testsuite/gcc.target/i386/avx512vl-vbroadcasti32x4-2.c
new file mode 100644
index 00000000000..feca66049c4
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/avx512vl-vbroadcasti32x4-2.c
@@ -0,0 +1,10 @@
+/* { dg-do run } */
+/* { dg-options "-O2 -mavx512vl -mavx512dq -DAVX512VL" } */
+/* { dg-require-effective-target avx512vl } */
+
+#define AVX512F_LEN 256
+#define AVX512F_LEN_HALF 128
+#include "avx512f-vbroadcasti32x4-2.c"
+
+void
+test_128 () {}
diff --git a/gcc/testsuite/gcc.target/i386/avx512vl-vbroadcasti64x2-2.c b/gcc/testsuite/gcc.target/i386/avx512vl-vbroadcasti64x2-2.c
new file mode 100644
index 00000000000..eded7cb8046
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/avx512vl-vbroadcasti64x2-2.c
@@ -0,0 +1,10 @@
+/* { dg-do run } */
+/* { dg-options "-O2 -mavx512dq -mavx512vl -DAVX512VL" } */
+/* { dg-require-effective-target avx512vl } */
+
+#define AVX512F_LEN 256
+#define AVX512F_LEN_HALF 128
+#include "avx512dq-vbroadcasti64x2-2.c"
+
+void
+test_128 () {}
diff --git a/gcc/testsuite/gcc.target/i386/avx512vl-vbroadcastsd-1.c b/gcc/testsuite/gcc.target/i386/avx512vl-vbroadcastsd-1.c
new file mode 100644
index 00000000000..08cd026e8d4
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/avx512vl-vbroadcastsd-1.c
@@ -0,0 +1,19 @@
+/* { dg-do compile } */
+/* { dg-options "-mavx512vl -O2" } */
+/* { dg-final { scan-assembler-times "vbroadcastsd\[ \\t\]+\[^\n\]*%ymm\[0-9\]\[^\{\]" 1 } } */
+/* { dg-final { scan-assembler-times "vbroadcastsd\[ \\t\]+\[^\n\]*%ymm\[0-9\]\{%k\[1-7\]\}\[^\{\]" 1 } } */
+/* { dg-final { scan-assembler-times "vbroadcastsd\[ \\t\]+\[^\n\]*%ymm\[0-9\]\{%k\[1-7\]\}\{z\}" 1 } } */
+
+#include <immintrin.h>
+
+volatile __m256d x;
+volatile __m128d y;
+volatile __mmask8 m;
+
+void extern
+avx512vl_test (void)
+{
+ x = _mm256_broadcastsd_pd (y);
+ x = _mm256_mask_broadcastsd_pd (x, m, y);
+ x = _mm256_maskz_broadcastsd_pd (m, y);
+}
diff --git a/gcc/testsuite/gcc.target/i386/avx512vl-vbroadcastsd-2.c b/gcc/testsuite/gcc.target/i386/avx512vl-vbroadcastsd-2.c
new file mode 100644
index 00000000000..1655482c13d
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/avx512vl-vbroadcastsd-2.c
@@ -0,0 +1,10 @@
+/* { dg-do run } */
+/* { dg-options "-O2 -mavx512vl -DAVX512VL" } */
+/* { dg-require-effective-target avx512vl } */
+
+#define AVX512F_LEN 256
+#define AVX512F_LEN_HALF 128
+#include "avx512f-vbroadcastsd-2.c"
+
+void
+test_128 () {}
diff --git a/gcc/testsuite/gcc.target/i386/avx512vl-vbroadcastss-1.c b/gcc/testsuite/gcc.target/i386/avx512vl-vbroadcastss-1.c
new file mode 100644
index 00000000000..0cbb7bf51ec
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/avx512vl-vbroadcastss-1.c
@@ -0,0 +1,25 @@
+/* { dg-do compile } */
+/* { dg-options "-mavx512vl -O2" } */
+/* { dg-final { scan-assembler-times "vbroadcastss\[ \\t\]+\[^\n\]*%ymm\[0-9\]\[^\{\]" 1 } } */
+/* { dg-final { scan-assembler-times "vbroadcastss\[ \\t\]+\[^\n\]*%ymm\[0-9\]\{%k\[1-7\]\}\[^\{\]" 1 } } */
+/* { dg-final { scan-assembler-times "vbroadcastss\[ \\t\]+\[^\n\]*%ymm\[0-9\]\{%k\[1-7\]\}\{z\}" 1 } } */
+/* { dg-final { scan-assembler-times "vbroadcastss\[ \\t\]+\[^\n\]*%xmm\[0-9\]\[\\n\]" 1 } } */
+/* { dg-final { scan-assembler-times "vbroadcastss\[ \\t\]+\[^\n\]*%xmm\[0-9\]\{%k\[1-7\]\}\[^\{\]" 1 } } */
+/* { dg-final { scan-assembler-times "vbroadcastss\[ \\t\]+\[^\n\]*%xmm\[0-9\]\{%k\[1-7\]\}\{z\}" 1 } } */
+
+#include <immintrin.h>
+
+volatile __m256 x;
+volatile __m128 y;
+volatile __mmask8 m;
+
+void extern
+avx512vl_test (void)
+{
+ x = _mm256_broadcastss_ps (y);
+ x = _mm256_mask_broadcastss_ps (x, m, y);
+ x = _mm256_maskz_broadcastss_ps (m, y);
+ y = _mm_broadcastss_ps (y);
+ y = _mm_mask_broadcastss_ps (y, m, y);
+ y = _mm_maskz_broadcastss_ps (m, y);
+}
diff --git a/gcc/testsuite/gcc.target/i386/avx512vl-vbroadcastss-2.c b/gcc/testsuite/gcc.target/i386/avx512vl-vbroadcastss-2.c
new file mode 100644
index 00000000000..e6dae6ff9ce
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/avx512vl-vbroadcastss-2.c
@@ -0,0 +1,14 @@
+/* { dg-do run } */
+/* { dg-options "-O2 -mavx512vl -DAVX512VL" } */
+/* { dg-require-effective-target avx512vl } */
+
+#define AVX512F_LEN 256
+#define AVX512F_LEN_HALF 128
+#include "avx512f-vbroadcastss-2.c"
+
+#undef AVX512F_LEN
+#undef AVX512F_LEN_HALF
+
+#define AVX512F_LEN 128
+#define AVX512F_LEN_HALF 128
+#include "avx512f-vbroadcastss-2.c"
diff --git a/gcc/testsuite/gcc.target/i386/avx512vl-vcmppd-1.c b/gcc/testsuite/gcc.target/i386/avx512vl-vcmppd-1.c
new file mode 100644
index 00000000000..3195e930432
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/avx512vl-vcmppd-1.c
@@ -0,0 +1,21 @@
+/* { dg-do compile } */
+/* { dg-options "-O2 -mavx512vl" } */
+/* { dg-final { scan-assembler "vcmppd\[ \\t\]+\[^\n\]*%ymm\[0-9\]\[^\n^k\]*%k\[1-7\]\[^\{\]" } } */
+/* { dg-final { scan-assembler "vcmppd\[ \\t\]+\[^\n\]*%ymm\[0-9\]\[^\n^k\]*%k\[1-7\]\{" } } */
+/* { dg-final { scan-assembler "vcmppd\[ \\t\]+\[^\n\]*%xmm\[0-9\]\[^\n^k\]*%k\[1-7\]\[^\{\]" } } */
+/* { dg-final { scan-assembler "vcmppd\[ \\t\]+\[^\n\]*%xmm\[0-9\]\[^\n^k\]*%k\[1-7\]\{" } } */
+
+#include <immintrin.h>
+
+volatile __m256d x;
+volatile __m128d xx;
+volatile __mmask8 m;
+
+void extern
+avx512vl_test (void)
+{
+ m = _mm256_cmp_pd_mask (x, x, _CMP_FALSE_OQ);
+ m = _mm256_mask_cmp_pd_mask (m, x, x, _CMP_FALSE_OQ);
+ m = _mm_cmp_pd_mask (xx, xx, _CMP_FALSE_OQ);
+ m = _mm_mask_cmp_pd_mask (m, xx, xx, _CMP_FALSE_OQ);
+}
diff --git a/gcc/testsuite/gcc.target/i386/avx512vl-vcmppd-2.c b/gcc/testsuite/gcc.target/i386/avx512vl-vcmppd-2.c
new file mode 100644
index 00000000000..2c4937ffeb7
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/avx512vl-vcmppd-2.c
@@ -0,0 +1,14 @@
+/* { dg-do run } */
+/* { dg-options "-O2 -mavx512vl -DAVX512VL" } */
+/* { dg-require-effective-target avx512vl } */
+
+#define AVX512F_LEN 256
+#define AVX512F_LEN_HALF 128
+#include "avx512f-vcmppd-2.c"
+
+#undef AVX512F_LEN
+#undef AVX512F_LEN_HALF
+
+#define AVX512F_LEN 128
+#define AVX512F_LEN_HALF 128
+#include "avx512f-vcmppd-2.c"
diff --git a/gcc/testsuite/gcc.target/i386/avx512vl-vcmpps-1.c b/gcc/testsuite/gcc.target/i386/avx512vl-vcmpps-1.c
new file mode 100644
index 00000000000..7bb6d42b450
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/avx512vl-vcmpps-1.c
@@ -0,0 +1,21 @@
+/* { dg-do compile } */
+/* { dg-options "-O2 -mavx512vl" } */
+/* { dg-final { scan-assembler "vcmpps\[ \\t\]+\[^\n\]*%ymm\[0-9\]\[^\n^k\]*%k\[1-7\]\[^\{\]" } } */
+/* { dg-final { scan-assembler "vcmpps\[ \\t\]+\[^\n\]*%ymm\[0-9\]\[^\n^k\]*%k\[1-7\]\{" } } */
+/* { dg-final { scan-assembler "vcmpps\[ \\t\]+\[^\n\]*%xmm\[0-9\]\[^\n^k\]*%k\[1-7\]\[^\{\]" } } */
+/* { dg-final { scan-assembler "vcmpps\[ \\t\]+\[^\n\]*%xmm\[0-9\]\[^\n^k\]*%k\[1-7\]\{" } } */
+
+#include <immintrin.h>
+
+volatile __m256 x;
+volatile __m128 xx;
+volatile __mmask8 m;
+
+void extern
+avx512vl_test (void)
+{
+ m = _mm256_cmp_ps_mask (x, x, _CMP_FALSE_OQ);
+ m = _mm256_mask_cmp_ps_mask (m, x, x, _CMP_FALSE_OQ);
+ m = _mm_cmp_ps_mask (xx, xx, _CMP_FALSE_OQ);
+ m = _mm_mask_cmp_ps_mask (m, xx, xx, _CMP_FALSE_OQ);
+}
diff --git a/gcc/testsuite/gcc.target/i386/avx512vl-vcmpps-2.c b/gcc/testsuite/gcc.target/i386/avx512vl-vcmpps-2.c
new file mode 100644
index 00000000000..6a726dfd814
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/avx512vl-vcmpps-2.c
@@ -0,0 +1,14 @@
+/* { dg-do run } */
+/* { dg-options "-O2 -mavx512vl -DAVX512VL" } */
+/* { dg-require-effective-target avx512vl } */
+
+#define AVX512F_LEN 256
+#define AVX512F_LEN_HALF 128
+#include "avx512f-vcmpps-2.c"
+
+#undef AVX512F_LEN
+#undef AVX512F_LEN_HALF
+
+#define AVX512F_LEN 128
+#define AVX512F_LEN_HALF 128
+#include "avx512f-vcmpps-2.c"
diff --git a/gcc/testsuite/gcc.target/i386/avx512vl-vcompresspd-1.c b/gcc/testsuite/gcc.target/i386/avx512vl-vcompresspd-1.c
new file mode 100644
index 00000000000..ad837a13511
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/avx512vl-vcompresspd-1.c
@@ -0,0 +1,28 @@
+/* { dg-do compile } */
+/* { dg-options "-mavx512vl -O2" } */
+/* { dg-final { scan-assembler-times "vcompresspd\[ \\t\]+\[^\n\]*%ymm\[0-9\]\{%k\[1-7\]\}\[^\{\]" 1 } } */
+/* { dg-final { scan-assembler-times "vcompresspd\[ \\t\]+\[^\n\]*%xmm\[0-9\]\{%k\[1-7\]\}\[^\{\]" 1 } } */
+/* { dg-final { scan-assembler-times "vcompresspd\[ \\t\]+\[^\n\]*%ymm\[0-9\]\{%k\[1-7\]\}\{z\}" 1 } } */
+/* { dg-final { scan-assembler-times "vcompresspd\[ \\t\]+\[^\n\]*%xmm\[0-9\]\{%k\[1-7\]\}\{z\}" 1 } } */
+/* { dg-final { scan-assembler-times "vcompresspd\[ \\t\]+\[^\n\]*%ymm\[0-9\]\[^\n\]*\\)\{%k\[1-7\]\}\[^\{\]" 1 } } */
+/* { dg-final { scan-assembler-times "vcompresspd\[ \\t\]+\[^\n\]*%xmm\[0-9\]\[^\n\]*\\)\{%k\[1-7\]\}\[^\{\]" 1 } } */
+
+#include <immintrin.h>
+
+double *p;
+volatile __m256d x1;
+volatile __m128d x2;
+volatile __mmask8 m;
+
+void extern
+avx512vl_test (void)
+{
+ x1 = _mm256_mask_compress_pd (x1, m, x1);
+ x2 = _mm_mask_compress_pd (x2, m, x2);
+
+ x1 = _mm256_maskz_compress_pd (m, x1);
+ x2 = _mm_maskz_compress_pd (m, x2);
+
+ _mm256_mask_compressstoreu_pd (p, m, x1);
+ _mm_mask_compressstoreu_pd (p, m, x2);
+}
diff --git a/gcc/testsuite/gcc.target/i386/avx512vl-vcompresspd-2.c b/gcc/testsuite/gcc.target/i386/avx512vl-vcompresspd-2.c
new file mode 100644
index 00000000000..eee9bfb447b
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/avx512vl-vcompresspd-2.c
@@ -0,0 +1,14 @@
+/* { dg-do run } */
+/* { dg-options "-O2 -mavx512vl -DAVX512VL" } */
+/* { dg-require-effective-target avx512vl } */
+
+#define AVX512F_LEN 256
+#define AVX512F_LEN_HALF 128
+#include "avx512f-vcompresspd-2.c"
+
+#undef AVX512F_LEN
+#undef AVX512F_LEN_HALF
+
+#define AVX512F_LEN 128
+#define AVX512F_LEN_HALF 128
+#include "avx512f-vcompresspd-2.c"
diff --git a/gcc/testsuite/gcc.target/i386/avx512vl-vcompressps-1.c b/gcc/testsuite/gcc.target/i386/avx512vl-vcompressps-1.c
new file mode 100644
index 00000000000..785f5160b4e
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/avx512vl-vcompressps-1.c
@@ -0,0 +1,28 @@
+/* { dg-do compile } */
+/* { dg-options "-mavx512vl -O2" } */
+/* { dg-final { scan-assembler-times "vcompressps\[ \\t\]+\[^\n\]*%ymm\[0-9\]\{%k\[1-7\]\}\[^\{\]" 1 } } */
+/* { dg-final { scan-assembler-times "vcompressps\[ \\t\]+\[^\n\]*%xmm\[0-9\]\{%k\[1-7\]\}\[^\{\]" 1 } } */
+/* { dg-final { scan-assembler-times "vcompressps\[ \\t\]+\[^\n\]*%ymm\[0-9\]\{%k\[1-7\]\}\{z\}" 1 } } */
+/* { dg-final { scan-assembler-times "vcompressps\[ \\t\]+\[^\n\]*%xmm\[0-9\]\{%k\[1-7\]\}\{z\}" 1 } } */
+/* { dg-final { scan-assembler-times "vcompressps\[ \\t\]+\[^\n\]*%ymm\[0-9\]\[^\n\]*\\)\{%k\[1-7\]\}\[^\{\]" 1 } } */
+/* { dg-final { scan-assembler-times "vcompressps\[ \\t\]+\[^\n\]*%xmm\[0-9\]\[^\n\]*\\)\{%k\[1-7\]\}\[^\{\]" 1 } } */
+
+#include <immintrin.h>
+
+float *p;
+volatile __m256 x1;
+volatile __m128 x2;
+volatile __mmask8 m;
+
+void extern
+avx512vl_test (void)
+{
+ x1 = _mm256_mask_compress_ps (x1, m, x1);
+ x2 = _mm_mask_compress_ps (x2, m, x2);
+
+ x1 = _mm256_maskz_compress_ps (m, x1);
+ x2 = _mm_maskz_compress_ps (m, x2);
+
+ _mm256_mask_compressstoreu_ps (p, m, x1);
+ _mm_mask_compressstoreu_ps (p, m, x2);
+}
diff --git a/gcc/testsuite/gcc.target/i386/avx512vl-vcompressps-2.c b/gcc/testsuite/gcc.target/i386/avx512vl-vcompressps-2.c
new file mode 100644
index 00000000000..67c7790436b
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/avx512vl-vcompressps-2.c
@@ -0,0 +1,14 @@
+/* { dg-do run } */
+/* { dg-options "-O2 -mavx512vl -DAVX512VL" } */
+/* { dg-require-effective-target avx512vl } */
+
+#define AVX512F_LEN 256
+#define AVX512F_LEN_HALF 128
+#include "avx512f-vcompressps-2.c"
+
+#undef AVX512F_LEN
+#undef AVX512F_LEN_HALF
+
+#define AVX512F_LEN 128
+#define AVX512F_LEN_HALF 128
+#include "avx512f-vcompressps-2.c"
diff --git a/gcc/testsuite/gcc.target/i386/avx512vl-vcvtdq2pd-1.c b/gcc/testsuite/gcc.target/i386/avx512vl-vcvtdq2pd-1.c
new file mode 100644
index 00000000000..1c8cfd33fac
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/avx512vl-vcvtdq2pd-1.c
@@ -0,0 +1,23 @@
+/* { dg-do compile } */
+/* { dg-options "-mavx512vl -O2" } */
+/* { dg-final { scan-assembler-times "vcvtdq2pd\[ \\t\]+\[^\n\]*%xmm\[0-9\]\[^\n\]*%ymm\[0-9\]\{%k\[1-7\]\}\[^\{\]" 1 } } */
+/* { dg-final { scan-assembler-times "vcvtdq2pd\[ \\t\]+\[^\n\]*%xmm\[0-9\]\[^\n\]*%xmm\[0-9\]\{%k\[1-7\]\}\[^\{\]" 1 } } */
+/* { dg-final { scan-assembler-times "vcvtdq2pd\[ \\t\]+\[^\n\]*%xmm\[0-9\]\[^\n\]*%ymm\[0-9\]\{%k\[1-7\]\}\{z\}" 1 } } */
+/* { dg-final { scan-assembler-times "vcvtdq2pd\[ \\t\]+\[^\n\]*%xmm\[0-9\]\[^\n\]*%xmm\[0-9\]\{%k\[1-7\]\}\{z\}" 1 } } */
+
+#include <immintrin.h>
+
+volatile __m128i s;
+volatile __m256d res1;
+volatile __m128d res2;
+volatile __mmask8 m;
+
+void extern
+avx512vl_test (void)
+{
+ res1 = _mm256_mask_cvtepi32_pd (res1, m, s);
+ res2 = _mm_mask_cvtepi32_pd (res2, m, s);
+
+ res1 = _mm256_maskz_cvtepi32_pd (m, s);
+ res2 = _mm_maskz_cvtepi32_pd (m, s);
+}
diff --git a/gcc/testsuite/gcc.target/i386/avx512vl-vcvtdq2pd-2.c b/gcc/testsuite/gcc.target/i386/avx512vl-vcvtdq2pd-2.c
new file mode 100644
index 00000000000..8d45492799b
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/avx512vl-vcvtdq2pd-2.c
@@ -0,0 +1,14 @@
+/* { dg-do run } */
+/* { dg-options "-O2 -mavx512vl -DAVX512VL" } */
+/* { dg-require-effective-target avx512vl } */
+
+#define AVX512F_LEN 256
+#define AVX512F_LEN_HALF 128
+#include "avx512f-vcvtdq2pd-2.c"
+
+#undef AVX512F_LEN
+#undef AVX512F_LEN_HALF
+
+#define AVX512F_LEN 128
+#define AVX512F_LEN_HALF 128
+#include "avx512f-vcvtdq2pd-2.c"
diff --git a/gcc/testsuite/gcc.target/i386/avx512vl-vcvtdq2ps-1.c b/gcc/testsuite/gcc.target/i386/avx512vl-vcvtdq2ps-1.c
new file mode 100644
index 00000000000..82aa1cc58bb
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/avx512vl-vcvtdq2ps-1.c
@@ -0,0 +1,24 @@
+/* { dg-do compile } */
+/* { dg-options "-mavx512vl -O2" } */
+/* { dg-final { scan-assembler-times "vcvtdq2ps\[ \\t\]+\[^\n\]*%ymm\[0-9\]\{%k\[1-7\]\}\[^\{\]" 1 } } */
+/* { dg-final { scan-assembler-times "vcvtdq2ps\[ \\t\]+\[^\n\]*%xmm\[0-9\]\{%k\[1-7\]\}\[^\{\]" 1 } } */
+/* { dg-final { scan-assembler-times "vcvtdq2ps\[ \\t\]+\[^\n\]*%ymm\[0-9\]\{%k\[1-7\]\}\{z\}" 1 } } */
+/* { dg-final { scan-assembler-times "vcvtdq2ps\[ \\t\]+\[^\n\]*%xmm\[0-9\]\{%k\[1-7\]\}\{z\}" 1 } } */
+
+#include <immintrin.h>
+
+volatile __m256i s1;
+volatile __m128i s2;
+volatile __m256 res1;
+volatile __m128 res2;
+volatile __mmask8 m;
+
+void extern
+avx512vl_test (void)
+{
+ res1 = _mm256_mask_cvtepi32_ps (res1, m, s1);
+ res2 = _mm_mask_cvtepi32_ps (res2, m, s2);
+
+ res1 = _mm256_maskz_cvtepi32_ps (m, s1);
+ res2 = _mm_maskz_cvtepi32_ps (m, s2);
+}
diff --git a/gcc/testsuite/gcc.target/i386/avx512vl-vcvtdq2ps-2.c b/gcc/testsuite/gcc.target/i386/avx512vl-vcvtdq2ps-2.c
new file mode 100644
index 00000000000..d6751544ace
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/avx512vl-vcvtdq2ps-2.c
@@ -0,0 +1,14 @@
+/* { dg-do run } */
+/* { dg-options "-O2 -mavx512vl -DAVX512VL" } */
+/* { dg-require-effective-target avx512vl } */
+
+#define AVX512F_LEN 256
+#define AVX512F_LEN_HALF 128
+#include "avx512f-vcvtdq2ps-2.c"
+
+#undef AVX512F_LEN
+#undef AVX512F_LEN_HALF
+
+#define AVX512F_LEN 128
+#define AVX512F_LEN_HALF 128
+#include "avx512f-vcvtdq2ps-2.c"
diff --git a/gcc/testsuite/gcc.target/i386/avx512vl-vcvtpd2dq-1.c b/gcc/testsuite/gcc.target/i386/avx512vl-vcvtpd2dq-1.c
new file mode 100644
index 00000000000..168c6e37eb4
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/avx512vl-vcvtpd2dq-1.c
@@ -0,0 +1,23 @@
+/* { dg-do compile } */
+/* { dg-options "-mavx512vl -O2" } */
+/* { dg-final { scan-assembler-times "vcvtpd2dqy\[ \\t\]+\[^\n\]*%ymm\[0-9\]\[^\n\]*%xmm\[0-9\]\{%k\[1-7\]\}\[^\{\]" 1 } } */
+/* { dg-final { scan-assembler-times "vcvtpd2dqx\[ \\t\]+\[^\n\]*%xmm\[0-9\]\[^\n\]*%xmm\[0-9\]\{%k\[1-7\]\}\[^\{\]" 1 } } */
+/* { dg-final { scan-assembler-times "vcvtpd2dqy\[ \\t\]+\[^\n\]*%ymm\[0-9\]\[^\n\]*%xmm\[0-9\]\{%k\[1-7\]\}\{z\}" 1 } } */
+/* { dg-final { scan-assembler-times "vcvtpd2dqx\[ \\t\]+\[^\n\]*%xmm\[0-9\]\[^\n\]*%xmm\[0-9\]\{%k\[1-7\]\}\{z\}" 1 } } */
+
+#include <immintrin.h>
+
+volatile __m256d s1;
+volatile __m128d s2;
+volatile __m128i res;
+volatile __mmask8 m;
+
+void extern
+avx512vl_test (void)
+{
+ res = _mm256_mask_cvtpd_epi32 (res, m, s1);
+ res = _mm_mask_cvtpd_epi32 (res, m, s2);
+
+ res = _mm256_maskz_cvtpd_epi32 (m, s1);
+ res = _mm_maskz_cvtpd_epi32 (m, s2);
+}
diff --git a/gcc/testsuite/gcc.target/i386/avx512vl-vcvtpd2dq-2.c b/gcc/testsuite/gcc.target/i386/avx512vl-vcvtpd2dq-2.c
new file mode 100644
index 00000000000..c16d528b44d
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/avx512vl-vcvtpd2dq-2.c
@@ -0,0 +1,14 @@
+/* { dg-do run } */
+/* { dg-options "-O2 -mavx512vl -DAVX512VL" } */
+/* { dg-require-effective-target avx512vl } */
+
+#define AVX512F_LEN 256
+#define AVX512F_LEN_HALF 128
+#include "avx512f-vcvtpd2dq-2.c"
+
+#undef AVX512F_LEN
+#undef AVX512F_LEN_HALF
+
+#define AVX512F_LEN 128
+#define AVX512F_LEN_HALF 128
+#include "avx512f-vcvtpd2dq-2.c"
diff --git a/gcc/testsuite/gcc.target/i386/avx512vl-vcvtpd2ps-1.c b/gcc/testsuite/gcc.target/i386/avx512vl-vcvtpd2ps-1.c
new file mode 100644
index 00000000000..b18582b0a6f
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/avx512vl-vcvtpd2ps-1.c
@@ -0,0 +1,22 @@
+/* { dg-do compile } */
+/* { dg-options "-O2 -mavx512vl" } */
+/* { dg-final { scan-assembler-times "vcvtpd2psy\[ \\t\]+\[^\n\]*%xmm\[0-9\]\{%k\[1-7\]\}\{z\}" 1 } } */
+/* { dg-final { scan-assembler-times "vcvtpd2psy\[ \\t\]+\[^\n\]*%xmm\[0-9\]\{%k\[1-7\]\}\{z\}" 1 } } */
+/* { dg-final { scan-assembler-times "vcvtpd2psx\[ \\t\]+\[^\n\]*%xmm\[0-9\]\{%k\[1-7\]\}\[^\{\]" 1 } } */
+/* { dg-final { scan-assembler-times "vcvtpd2psx\[ \\t\]+\[^\n\]*%xmm\[0-9\]\{%k\[1-7\]\}\[^\{\]" 1 } } */
+
+#include <immintrin.h>
+
+volatile __m256d x;
+volatile __m128d xx;
+volatile __m128 y;
+
+void extern
+avx512vl_test (void)
+{
+ y = _mm256_mask_cvtpd_ps (y, 4, x);
+ y = _mm256_maskz_cvtpd_ps (6, x);
+ y = _mm_mask_cvtpd_ps (y, 4, xx);
+ y = _mm_maskz_cvtpd_ps (6, xx);
+}
+
diff --git a/gcc/testsuite/gcc.target/i386/avx512vl-vcvtpd2ps-2.c b/gcc/testsuite/gcc.target/i386/avx512vl-vcvtpd2ps-2.c
new file mode 100644
index 00000000000..854a3340316
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/avx512vl-vcvtpd2ps-2.c
@@ -0,0 +1,14 @@
+/* { dg-do run } */
+/* { dg-options "-O2 -mavx512vl -DAVX512VL" } */
+/* { dg-require-effective-target avx512vl } */
+
+#define AVX512F_LEN 256
+#define AVX512F_LEN_HALF 128
+#include "avx512f-vcvtpd2ps-2.c"
+
+#undef AVX512F_LEN
+#undef AVX512F_LEN_HALF
+
+#define AVX512F_LEN 128
+#define AVX512F_LEN_HALF 128
+#include "avx512f-vcvtpd2ps-2.c"
diff --git a/gcc/testsuite/gcc.target/i386/avx512vl-vcvtpd2qq-2.c b/gcc/testsuite/gcc.target/i386/avx512vl-vcvtpd2qq-2.c
new file mode 100644
index 00000000000..1109d898c4a
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/avx512vl-vcvtpd2qq-2.c
@@ -0,0 +1,14 @@
+/* { dg-do run } */
+/* { dg-options "-O2 -mavx512dq -mavx512vl -DAVX512VL" } */
+/* { dg-require-effective-target avx512vl } */
+
+#define AVX512F_LEN 256
+#define AVX512F_LEN_HALF 128
+#include "avx512dq-vcvtpd2qq-2.c"
+
+#undef AVX512F_LEN
+#undef AVX512F_LEN_HALF
+
+#define AVX512F_LEN 128
+#define AVX512F_LEN_HALF 128
+#include "avx512dq-vcvtpd2qq-2.c"
diff --git a/gcc/testsuite/gcc.target/i386/avx512vl-vcvtpd2udq-1.c b/gcc/testsuite/gcc.target/i386/avx512vl-vcvtpd2udq-1.c
new file mode 100644
index 00000000000..7544b6bc9ca
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/avx512vl-vcvtpd2udq-1.c
@@ -0,0 +1,28 @@
+/* { dg-do compile } */
+/* { dg-options "-mavx512vl -O2" } */
+/* { dg-final { scan-assembler-times "vcvtpd2udqy\[ \\t\]+\[^\n\]*%ymm\[0-9\]\[^\n\]*%xmm\[0-9\]\[^\{\]" 1 } } */
+/* { dg-final { scan-assembler-times "vcvtpd2udqx\[ \\t\]+\[^\n\]*%xmm\[0-9\]\[^\n\]*%xmm\[0-9\]\[^\{\]" 1 } } */
+/* { dg-final { scan-assembler-times "vcvtpd2udqy\[ \\t\]+\[^\n\]*%ymm\[0-9\]\[^\n\]*%xmm\[0-9\]\{%k\[1-7\]\}\[^\{\]" 1 } } */
+/* { dg-final { scan-assembler-times "vcvtpd2udqx\[ \\t\]+\[^\n\]*%xmm\[0-9\]\[^\n\]*%xmm\[0-9\]\{%k\[1-7\]\}\[^\{\]" 1 } } */
+/* { dg-final { scan-assembler-times "vcvtpd2udqy\[ \\t\]+\[^\n\]*%ymm\[0-9\]\[^\n\]*%xmm\[0-9\]\{%k\[1-7\]\}\{z\}" 1 } } */
+/* { dg-final { scan-assembler-times "vcvtpd2udqx\[ \\t\]+\[^\n\]*%xmm\[0-9\]\[^\n\]*%xmm\[0-9\]\{%k\[1-7\]\}\{z\}" 1 } } */
+
+#include <immintrin.h>
+
+volatile __m256d s1;
+volatile __m128d s2;
+volatile __m128i res;
+volatile __mmask8 m;
+
+void extern
+avx512vl_test (void)
+{
+ res = _mm256_cvtpd_epu32 (s1);
+ res = _mm_cvtpd_epu32 (s2);
+
+ res = _mm256_mask_cvtpd_epu32 (res, m, s1);
+ res = _mm_mask_cvtpd_epu32 (res, m, s2);
+
+ res = _mm256_maskz_cvtpd_epu32 (m, s1);
+ res = _mm_maskz_cvtpd_epu32 (m, s2);
+}
diff --git a/gcc/testsuite/gcc.target/i386/avx512vl-vcvtpd2udq-2.c b/gcc/testsuite/gcc.target/i386/avx512vl-vcvtpd2udq-2.c
new file mode 100644
index 00000000000..d949d430b1d
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/avx512vl-vcvtpd2udq-2.c
@@ -0,0 +1,14 @@
+/* { dg-do run } */
+/* { dg-options "-O2 -mavx512vl -DAVX512VL" } */
+/* { dg-require-effective-target avx512vl } */
+
+#define AVX512F_LEN 256
+#define AVX512F_LEN_HALF 128
+#include "avx512f-vcvtpd2udq-2.c"
+
+#undef AVX512F_LEN
+#undef AVX512F_LEN_HALF
+
+#define AVX512F_LEN 128
+#define AVX512F_LEN_HALF 128
+#include "avx512f-vcvtpd2udq-2.c"
diff --git a/gcc/testsuite/gcc.target/i386/avx512vl-vcvtpd2uqq-2.c b/gcc/testsuite/gcc.target/i386/avx512vl-vcvtpd2uqq-2.c
new file mode 100644
index 00000000000..01648cd92d2
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/avx512vl-vcvtpd2uqq-2.c
@@ -0,0 +1,14 @@
+/* { dg-do run } */
+/* { dg-options "-O2 -mavx512dq -mavx512vl -DAVX512VL" } */
+/* { dg-require-effective-target avx512vl } */
+
+#define AVX512F_LEN 256
+#define AVX512F_LEN_HALF 128
+#include "avx512dq-vcvtpd2uqq-2.c"
+
+#undef AVX512F_LEN
+#undef AVX512F_LEN_HALF
+
+#define AVX512F_LEN 128
+#define AVX512F_LEN_HALF 128
+#include "avx512dq-vcvtpd2uqq-2.c"
diff --git a/gcc/testsuite/gcc.target/i386/avx512vl-vcvtph2ps-1.c b/gcc/testsuite/gcc.target/i386/avx512vl-vcvtph2ps-1.c
new file mode 100644
index 00000000000..de22503c0df
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/avx512vl-vcvtph2ps-1.c
@@ -0,0 +1,22 @@
+/* { dg-do compile } */
+/* { dg-options "-O2 -mavx512vl" } */
+/* { dg-final { scan-assembler-times "vcvtph2ps\[ \\t\]+\[^\n\]*%ymm\[0-9\]\{%k\[1-7\]\}\[^\{\]" 1 } } */
+/* { dg-final { scan-assembler-times "vcvtph2ps\[ \\t\]+\[^\n\]*%ymm\[0-9\]\{%k\[1-7\]\}\{z\}" 1 } } */
+/* { dg-final { scan-assembler-times "vcvtph2ps\[ \\t\]+\[^\n\]*%xmm\[0-9\]\{%k\[1-7\]\}\[^\{\]" 1 } } */
+/* { dg-final { scan-assembler-times "vcvtph2ps\[ \\t\]+\[^\n\]*%xmm\[0-9\]\{%k\[1-7\]\}\{z\}" 1 } } */
+
+#include <immintrin.h>
+
+volatile __m128i x;
+volatile __m256 y;
+volatile __m128i xx;
+volatile __m128 yy;
+
+void extern
+avx512bw_test (void)
+{
+ y = _mm256_mask_cvtph_ps (y, 4, x);
+ y = _mm256_maskz_cvtph_ps (6, x);
+ yy = _mm_mask_cvtph_ps (yy, 4, xx);
+ yy = _mm_maskz_cvtph_ps (6, xx);
+}
diff --git a/gcc/testsuite/gcc.target/i386/avx512vl-vcvtph2ps-2.c b/gcc/testsuite/gcc.target/i386/avx512vl-vcvtph2ps-2.c
new file mode 100644
index 00000000000..5d2c1104eee
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/avx512vl-vcvtph2ps-2.c
@@ -0,0 +1,14 @@
+/* { dg-do run } */
+/* { dg-options "-O2 -mf16c -mavx512vl -DAVX512VL" } */
+/* { dg-require-effective-target avx512vl } */
+
+#define AVX512F_LEN 256
+#define AVX512F_LEN_HALF 128
+#include "avx512f-vcvtph2ps-2.c"
+
+#undef AVX512F_LEN
+#undef AVX512F_LEN_HALF
+
+#define AVX512F_LEN 128
+#define AVX512F_LEN_HALF 128
+#include "avx512f-vcvtph2ps-2.c"
diff --git a/gcc/testsuite/gcc.target/i386/avx512vl-vcvtps2dq-1.c b/gcc/testsuite/gcc.target/i386/avx512vl-vcvtps2dq-1.c
new file mode 100644
index 00000000000..a38521402b5
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/avx512vl-vcvtps2dq-1.c
@@ -0,0 +1,23 @@
+/* { dg-do compile } */
+/* { dg-options "-mavx512vl -O2" } */
+/* { dg-final { scan-assembler-times "vcvtps2dq\[ \\t\]+\[^\n\]*%ymm\[0-9\]\{%k\[1-7\]\}\[^\{\]" 1 } } */
+/* { dg-final { scan-assembler-times "vcvtps2dq\[ \\t\]+\[^\n\]*%ymm\[0-9\]\{%k\[1-7\]\}\{z\}" 1 } } */
+/* { dg-final { scan-assembler-times "vcvtps2dq\[ \\t\]+\[^\n\]*%xmm\[0-9\]\{%k\[1-7\]\}\[^\{\]" 1 } } */
+/* { dg-final { scan-assembler-times "vcvtps2dq\[ \\t\]+\[^\n\]*%xmm\[0-9\]\{%k\[1-7\]\}\{z\}" 1 } } */
+
+#include <immintrin.h>
+
+volatile __m256 x1;
+volatile __m128 x2;
+volatile __m256i z1;
+volatile __m128i z2;
+volatile __mmask8 m;
+
+void extern
+avx512vl_test (void)
+{
+ z1 = _mm256_mask_cvtps_epi32 (z1, m, x1);
+ z1 = _mm256_maskz_cvtps_epi32 (m, x1);
+ z2 = _mm_mask_cvtps_epi32 (z2, m, x2);
+ z2 = _mm_maskz_cvtps_epi32 (m, x2);
+}
diff --git a/gcc/testsuite/gcc.target/i386/avx512vl-vcvtps2dq-2.c b/gcc/testsuite/gcc.target/i386/avx512vl-vcvtps2dq-2.c
new file mode 100644
index 00000000000..ba660ba9546
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/avx512vl-vcvtps2dq-2.c
@@ -0,0 +1,14 @@
+/* { dg-do run } */
+/* { dg-options "-O2 -mavx512vl -DAVX512VL" } */
+/* { dg-require-effective-target avx512vl } */
+
+#define AVX512F_LEN 256
+#define AVX512F_LEN_HALF 128
+#include "avx512f-vcvtps2dq-2.c"
+
+#undef AVX512F_LEN
+#undef AVX512F_LEN_HALF
+
+#define AVX512F_LEN 128
+#define AVX512F_LEN_HALF 128
+#include "avx512f-vcvtps2dq-2.c"
diff --git a/gcc/testsuite/gcc.target/i386/avx512vl-vcvtps2pd-1.c b/gcc/testsuite/gcc.target/i386/avx512vl-vcvtps2pd-1.c
new file mode 100644
index 00000000000..e7dfc2c7f73
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/avx512vl-vcvtps2pd-1.c
@@ -0,0 +1,23 @@
+/* { dg-do compile } */
+/* { dg-options "-mavx512vl -O2" } */
+/* { dg-final { scan-assembler-times "vcvtps2pd\[ \\t\]+\[^\n\]*%xmm\[0-9\]\[^\n\]*%ymm\[0-9\]\{%k\[1-7\]\}\[^\{\]" 1 } } */
+/* { dg-final { scan-assembler-times "vcvtps2pd\[ \\t\]+\[^\n\]*%xmm\[0-9\]\[^\n\]*%xmm\[0-9\]\{%k\[1-7\]\}\[^\{\]" 1 } } */
+/* { dg-final { scan-assembler-times "vcvtps2pd\[ \\t\]+\[^\n\]*%xmm\[0-9\]\[^\n\]*%ymm\[0-9\]\{%k\[1-7\]\}\{z\}" 1 } } */
+/* { dg-final { scan-assembler-times "vcvtps2pd\[ \\t\]+\[^\n\]*%xmm\[0-9\]\[^\n\]*%xmm\[0-9\]\{%k\[1-7\]\}\{z\}" 1 } } */
+
+#include <immintrin.h>
+
+volatile __m128 s;
+volatile __m256d res1;
+volatile __m128d res2;
+volatile __mmask8 m;
+
+void extern
+avx512vl_test (void)
+{
+ res1 = _mm256_mask_cvtps_pd (res1, m, s);
+ res2 = _mm_mask_cvtps_pd (res2, m, s);
+
+ res1 = _mm256_maskz_cvtps_pd (m, s);
+ res2 = _mm_maskz_cvtps_pd (m, s);
+}
diff --git a/gcc/testsuite/gcc.target/i386/avx512vl-vcvtps2pd-2.c b/gcc/testsuite/gcc.target/i386/avx512vl-vcvtps2pd-2.c
new file mode 100644
index 00000000000..641d86198e6
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/avx512vl-vcvtps2pd-2.c
@@ -0,0 +1,14 @@
+/* { dg-do run } */
+/* { dg-options "-O2 -mavx512vl -DAVX512VL" } */
+/* { dg-require-effective-target avx512vl } */
+
+#define AVX512F_LEN 256
+#define AVX512F_LEN_HALF 128
+#include "avx512f-vcvtps2pd-2.c"
+
+#undef AVX512F_LEN
+#undef AVX512F_LEN_HALF
+
+#define AVX512F_LEN 128
+#define AVX512F_LEN_HALF 128
+#include "avx512f-vcvtps2pd-2.c"
diff --git a/gcc/testsuite/gcc.target/i386/avx512vl-vcvtps2ph-1.c b/gcc/testsuite/gcc.target/i386/avx512vl-vcvtps2ph-1.c
new file mode 100644
index 00000000000..28314ee9af5
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/avx512vl-vcvtps2ph-1.c
@@ -0,0 +1,20 @@
+/* { dg-do compile } */
+/* { dg-options "-O2 -mavx512vl" } */
+/* { dg-final { scan-assembler-times "vcvtps2ph\[ \\t\]+\[^\n\]*%xmm\[0-9\]\{%k\[1-7\]\}\[^\{\]" 2 } } */
+/* { dg-final { scan-assembler-times "vcvtps2ph\[ \\t\]+\[^\n\]*%xmm\[0-9\]\{%k\[1-7\]\}\{z\}" 2 } } */
+
+#include <immintrin.h>
+
+volatile __m256 x;
+volatile __m128i y;
+volatile __m128 xx;
+volatile __m128i yy;
+
+void extern
+avx512bw_test (void)
+{
+ y = _mm256_maskz_cvtps_ph (4, x, 0);
+ y = _mm256_mask_cvtps_ph (y, 2, x, 0);
+ yy = _mm_maskz_cvtps_ph (4, xx, 0);
+ yy = _mm_mask_cvtps_ph (yy, 2, xx, 0);
+}
diff --git a/gcc/testsuite/gcc.target/i386/avx512vl-vcvtps2ph-2.c b/gcc/testsuite/gcc.target/i386/avx512vl-vcvtps2ph-2.c
new file mode 100644
index 00000000000..3d9e4fbeb71
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/avx512vl-vcvtps2ph-2.c
@@ -0,0 +1,14 @@
+/* { dg-do run } */
+/* { dg-options "-O2 -mf16c -mavx512vl -DAVX512VL" } */
+/* { dg-require-effective-target avx512vl } */
+
+#define AVX512F_LEN 256
+#define AVX512F_LEN_HALF 128
+#include "avx512f-vcvtps2ph-2.c"
+
+#undef AVX512F_LEN
+#undef AVX512F_LEN_HALF
+
+#define AVX512F_LEN 128
+#define AVX512F_LEN_HALF 128
+#include "avx512f-vcvtps2ph-2.c"
diff --git a/gcc/testsuite/gcc.target/i386/avx512vl-vcvtps2qq-2.c b/gcc/testsuite/gcc.target/i386/avx512vl-vcvtps2qq-2.c
new file mode 100644
index 00000000000..876f9bec165
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/avx512vl-vcvtps2qq-2.c
@@ -0,0 +1,14 @@
+/* { dg-do run } */
+/* { dg-options "-O2 -mavx512dq -mavx512vl -DAVX512VL" } */
+/* { dg-require-effective-target avx512vl } */
+
+#define AVX512F_LEN 256
+#define AVX512F_LEN_HALF 128
+#include "avx512dq-vcvtps2qq-2.c"
+
+#undef AVX512F_LEN
+#undef AVX512F_LEN_HALF
+
+#define AVX512F_LEN 128
+#define AVX512F_LEN_HALF 128
+#include "avx512dq-vcvtps2qq-2.c"
diff --git a/gcc/testsuite/gcc.target/i386/avx512vl-vcvtps2udq-1.c b/gcc/testsuite/gcc.target/i386/avx512vl-vcvtps2udq-1.c
new file mode 100644
index 00000000000..30d0337db2e
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/avx512vl-vcvtps2udq-1.c
@@ -0,0 +1,27 @@
+/* { dg-do compile } */
+/* { dg-options "-mavx512vl -O2" } */
+/* { dg-final { scan-assembler-times "vcvtps2udq\[ \\t\]+\[^\n\]*%ymm\[0-9\]\[\\n\]" 1 } } */
+/* { dg-final { scan-assembler-times "vcvtps2udq\[ \\t\]+\[^\n\]*%ymm\[0-9\]\{%k\[1-7\]\}\[^\{\]" 1 } } */
+/* { dg-final { scan-assembler-times "vcvtps2udq\[ \\t\]+\[^\n\]*%ymm\[0-9\]\{%k\[1-7\]\}\{z\}" 1 } } */
+/* { dg-final { scan-assembler-times "vcvtps2udq\[ \\t\]+\[^\n\]*%xmm\[0-9\]\[\\n\]" 1 } } */
+/* { dg-final { scan-assembler-times "vcvtps2udq\[ \\t\]+\[^\n\]*%xmm\[0-9\]\{%k\[1-7\]\}\[^\{\]" 1 } } */
+/* { dg-final { scan-assembler-times "vcvtps2udq\[ \\t\]+\[^\n\]*%xmm\[0-9\]\{%k\[1-7\]\}\{z\}" 1 } } */
+
+#include <immintrin.h>
+
+volatile __m256 x1;
+volatile __m128 x2;
+volatile __m256i z1;
+volatile __m128i z2;
+volatile __mmask8 m;
+
+void extern
+avx512vl_test (void)
+{
+ z1 = _mm256_cvtps_epu32 (x1);
+ z1 = _mm256_mask_cvtps_epu32 (z1, m, x1);
+ z1 = _mm256_maskz_cvtps_epu32 (m, x1);
+ z2 = _mm_cvtps_epu32 (x2);
+ z2 = _mm_mask_cvtps_epu32 (z2, m, x2);
+ z2 = _mm_maskz_cvtps_epu32 (m, x2);
+}
diff --git a/gcc/testsuite/gcc.target/i386/avx512vl-vcvtps2udq-2.c b/gcc/testsuite/gcc.target/i386/avx512vl-vcvtps2udq-2.c
new file mode 100644
index 00000000000..6bb32728e85
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/avx512vl-vcvtps2udq-2.c
@@ -0,0 +1,14 @@
+/* { dg-do run } */
+/* { dg-options "-O2 -mavx512vl -DAVX512VL" } */
+/* { dg-require-effective-target avx512vl } */
+
+#define AVX512F_LEN 256
+#define AVX512F_LEN_HALF 128
+#include "avx512f-vcvtps2udq-2.c"
+
+#undef AVX512F_LEN
+#undef AVX512F_LEN_HALF
+
+#define AVX512F_LEN 128
+#define AVX512F_LEN_HALF 128
+#include "avx512f-vcvtps2udq-2.c"
diff --git a/gcc/testsuite/gcc.target/i386/avx512vl-vcvtps2uqq-2.c b/gcc/testsuite/gcc.target/i386/avx512vl-vcvtps2uqq-2.c
new file mode 100644
index 00000000000..512022dd067
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/avx512vl-vcvtps2uqq-2.c
@@ -0,0 +1,14 @@
+/* { dg-do run } */
+/* { dg-options "-O2 -mavx512dq -mavx512vl -DAVX512VL" } */
+/* { dg-require-effective-target avx512vl } */
+
+#define AVX512F_LEN 256
+#define AVX512F_LEN_HALF 128
+#include "avx512dq-vcvtps2uqq-2.c"
+
+#undef AVX512F_LEN
+#undef AVX512F_LEN_HALF
+
+#define AVX512F_LEN 128
+#define AVX512F_LEN_HALF 128
+#include "avx512dq-vcvtps2uqq-2.c"
diff --git a/gcc/testsuite/gcc.target/i386/avx512vl-vcvtqq2pd-2.c b/gcc/testsuite/gcc.target/i386/avx512vl-vcvtqq2pd-2.c
new file mode 100644
index 00000000000..9a85b269fc3
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/avx512vl-vcvtqq2pd-2.c
@@ -0,0 +1,14 @@
+/* { dg-do run } */
+/* { dg-options "-O2 -mavx512dq -mavx512vl -DAVX512VL" } */
+/* { dg-require-effective-target avx512vl } */
+
+#define AVX512F_LEN 256
+#define AVX512F_LEN_HALF 128
+#include "avx512dq-vcvtqq2pd-2.c"
+
+#undef AVX512F_LEN
+#undef AVX512F_LEN_HALF
+
+#define AVX512F_LEN 128
+#define AVX512F_LEN_HALF 128
+#include "avx512dq-vcvtqq2pd-2.c"
diff --git a/gcc/testsuite/gcc.target/i386/avx512vl-vcvtqq2ps-2.c b/gcc/testsuite/gcc.target/i386/avx512vl-vcvtqq2ps-2.c
new file mode 100644
index 00000000000..1ed75fd40ed
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/avx512vl-vcvtqq2ps-2.c
@@ -0,0 +1,14 @@
+/* { dg-do run } */
+/* { dg-options "-O2 -mavx512dq -mavx512vl -DAVX512VL" } */
+/* { dg-require-effective-target avx512vl } */
+
+#define AVX512F_LEN 256
+#define AVX512F_LEN_HALF 128
+#include "avx512dq-vcvtqq2ps-2.c"
+
+#undef AVX512F_LEN
+#undef AVX512F_LEN_HALF
+
+#define AVX512F_LEN 128
+#define AVX512F_LEN_HALF 128
+#include "avx512dq-vcvtqq2ps-2.c"
diff --git a/gcc/testsuite/gcc.target/i386/avx512vl-vcvttpd2dq-1.c b/gcc/testsuite/gcc.target/i386/avx512vl-vcvttpd2dq-1.c
new file mode 100644
index 00000000000..729f49d91fd
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/avx512vl-vcvttpd2dq-1.c
@@ -0,0 +1,23 @@
+/* { dg-do compile } */
+/* { dg-options "-mavx512vl -O2" } */
+/* { dg-final { scan-assembler-times "vcvttpd2dqy\[ \\t\]+\[^\n\]*%ymm\[0-9\]\[^\n\]*%xmm\[0-9\]\{%k\[1-7\]\}\[^\{\]" 1 } } */
+/* { dg-final { scan-assembler-times "vcvttpd2dqx\[ \\t\]+\[^\n\]*%xmm\[0-9\]\[^\n\]*%xmm\[0-9\]\{%k\[1-7\]\}\[^\{\]" 1 } } */
+/* { dg-final { scan-assembler-times "vcvttpd2dqy\[ \\t\]+\[^\n\]*%ymm\[0-9\]\[^\n\]*%xmm\[0-9\]\{%k\[1-7\]\}\{z\}" 1 } } */
+/* { dg-final { scan-assembler-times "vcvttpd2dqx\[ \\t\]+\[^\n\]*%xmm\[0-9\]\[^\n\]*%xmm\[0-9\]\{%k\[1-7\]\}\{z\}" 1 } } */
+
+#include <immintrin.h>
+
+volatile __m256d s1;
+volatile __m128d s2;
+volatile __m128i res;
+volatile __mmask8 m;
+
+void extern
+avx512vl_test (void)
+{
+ res = _mm256_mask_cvttpd_epi32 (res, m, s1);
+ res = _mm_mask_cvttpd_epi32 (res, m, s2);
+
+ res = _mm256_maskz_cvttpd_epi32 (m, s1);
+ res = _mm_maskz_cvttpd_epi32 (m, s2);
+}
diff --git a/gcc/testsuite/gcc.target/i386/avx512vl-vcvttpd2dq-2.c b/gcc/testsuite/gcc.target/i386/avx512vl-vcvttpd2dq-2.c
new file mode 100644
index 00000000000..24fd9ad642f
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/avx512vl-vcvttpd2dq-2.c
@@ -0,0 +1,14 @@
+/* { dg-do run } */
+/* { dg-options "-O2 -mavx512vl -DAVX512VL" } */
+/* { dg-require-effective-target avx512vl } */
+
+#define AVX512F_LEN 256
+#define AVX512F_LEN_HALF 128
+#include "avx512f-vcvttpd2dq-2.c"
+
+#undef AVX512F_LEN
+#undef AVX512F_LEN_HALF
+
+#define AVX512F_LEN 128
+#define AVX512F_LEN_HALF 128
+#include "avx512f-vcvttpd2dq-2.c"
diff --git a/gcc/testsuite/gcc.target/i386/avx512vl-vcvttpd2qq-2.c b/gcc/testsuite/gcc.target/i386/avx512vl-vcvttpd2qq-2.c
new file mode 100644
index 00000000000..b09268037ca
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/avx512vl-vcvttpd2qq-2.c
@@ -0,0 +1,14 @@
+/* { dg-do run } */
+/* { dg-options "-O2 -mavx512dq -mavx512vl -DAVX512VL" } */
+/* { dg-require-effective-target avx512vl } */
+
+#define AVX512F_LEN 256
+#define AVX512F_LEN_HALF 128
+#include "avx512dq-vcvttpd2qq-2.c"
+
+#undef AVX512F_LEN
+#undef AVX512F_LEN_HALF
+
+#define AVX512F_LEN 128
+#define AVX512F_LEN_HALF 128
+#include "avx512dq-vcvttpd2qq-2.c"
diff --git a/gcc/testsuite/gcc.target/i386/avx512vl-vcvttpd2udq-1.c b/gcc/testsuite/gcc.target/i386/avx512vl-vcvttpd2udq-1.c
new file mode 100644
index 00000000000..1b0cd9dd52c
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/avx512vl-vcvttpd2udq-1.c
@@ -0,0 +1,28 @@
+/* { dg-do compile } */
+/* { dg-options "-mavx512vl -O2" } */
+/* { dg-final { scan-assembler-times "vcvttpd2udqy\[ \\t\]+\[^\n\]*%ymm\[0-9\]\[^\n\]*%xmm\[0-9\]\[^\{\]" 1 } } */
+/* { dg-final { scan-assembler-times "vcvttpd2udqx\[ \\t\]+\[^\n\]*%xmm\[0-9\]\[^\n\]*%xmm\[0-9\]\[^\{\]" 1 } } */
+/* { dg-final { scan-assembler-times "vcvttpd2udqy\[ \\t\]+\[^\n\]*%ymm\[0-9\]\[^\n\]*%xmm\[0-9\]\{%k\[1-7\]\}\[^\{\]" 1 } } */
+/* { dg-final { scan-assembler-times "vcvttpd2udqx\[ \\t\]+\[^\n\]*%xmm\[0-9\]\[^\n\]*%xmm\[0-9\]\{%k\[1-7\]\}\[^\{\]" 1 } } */
+/* { dg-final { scan-assembler-times "vcvttpd2udqy\[ \\t\]+\[^\n\]*%ymm\[0-9\]\[^\n\]*%xmm\[0-9\]\{%k\[1-7\]\}\{z\}" 1 } } */
+/* { dg-final { scan-assembler-times "vcvttpd2udqx\[ \\t\]+\[^\n\]*%xmm\[0-9\]\[^\n\]*%xmm\[0-9\]\{%k\[1-7\]\}\{z\}" 1 } } */
+
+#include <immintrin.h>
+
+volatile __m256d s1;
+volatile __m128d s2;
+volatile __m128i res;
+volatile __mmask8 m;
+
+void extern
+avx512vl_test (void)
+{
+ res = _mm256_cvttpd_epu32 (s1);
+ res = _mm_cvttpd_epu32 (s2);
+
+ res = _mm256_mask_cvttpd_epu32 (res, m, s1);
+ res = _mm_mask_cvttpd_epu32 (res, m, s2);
+
+ res = _mm256_maskz_cvttpd_epu32 (m, s1);
+ res = _mm_maskz_cvttpd_epu32 (m, s2);
+}
diff --git a/gcc/testsuite/gcc.target/i386/avx512vl-vcvttpd2udq-2.c b/gcc/testsuite/gcc.target/i386/avx512vl-vcvttpd2udq-2.c
new file mode 100644
index 00000000000..53f04207be0
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/avx512vl-vcvttpd2udq-2.c
@@ -0,0 +1,14 @@
+/* { dg-do run } */
+/* { dg-options "-O2 -mavx512vl -DAVX512VL" } */
+/* { dg-require-effective-target avx512vl } */
+
+#define AVX512F_LEN 256
+#define AVX512F_LEN_HALF 128
+#include "avx512f-vcvttpd2udq-2.c"
+
+#undef AVX512F_LEN
+#undef AVX512F_LEN_HALF
+
+#define AVX512F_LEN 128
+#define AVX512F_LEN_HALF 128
+#include "avx512f-vcvttpd2udq-2.c"
diff --git a/gcc/testsuite/gcc.target/i386/avx512vl-vcvttpd2uqq-2.c b/gcc/testsuite/gcc.target/i386/avx512vl-vcvttpd2uqq-2.c
new file mode 100644
index 00000000000..caf512ad950
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/avx512vl-vcvttpd2uqq-2.c
@@ -0,0 +1,14 @@
+/* { dg-do run } */
+/* { dg-options "-O2 -mavx512dq -mavx512vl -DAVX512VL" } */
+/* { dg-require-effective-target avx512vl } */
+
+#define AVX512F_LEN 256
+#define AVX512F_LEN_HALF 128
+#include "avx512dq-vcvttpd2uqq-2.c"
+
+#undef AVX512F_LEN
+#undef AVX512F_LEN_HALF
+
+#define AVX512F_LEN 128
+#define AVX512F_LEN_HALF 128
+#include "avx512dq-vcvttpd2uqq-2.c"
diff --git a/gcc/testsuite/gcc.target/i386/avx512vl-vcvttps2dq-1.c b/gcc/testsuite/gcc.target/i386/avx512vl-vcvttps2dq-1.c
new file mode 100644
index 00000000000..1aa638b5f31
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/avx512vl-vcvttps2dq-1.c
@@ -0,0 +1,23 @@
+/* { dg-do compile } */
+/* { dg-options "-mavx512vl -O2" } */
+/* { dg-final { scan-assembler-times "vcvttps2dq\[ \\t\]+\[^\n\]*%xmm\[0-9\]\[^\n\]*%xmm\[0-9\]\{%k\[1-7\]\}\[^\{\]" 1 } } */
+/* { dg-final { scan-assembler-times "vcvttps2dq\[ \\t\]+\[^\n\]*%xmm\[0-9\]\[^\n\]*%xmm\[0-9\]\{%k\[1-7\]\}\{z\}" 1 } } */
+/* { dg-final { scan-assembler-times "vcvttps2dq\[ \\t\]+\[^\n\]*%ymm\[0-9\]\[^\n\]*%ymm\[0-9\]\{%k\[1-7\]\}\[^\{\]" 1 } } */
+/* { dg-final { scan-assembler-times "vcvttps2dq\[ \\t\]+\[^\n\]*%ymm\[0-9\]\[^\n\]*%ymm\[0-9\]\{%k\[1-7\]\}\{z\}" 1 } } */
+
+#include <immintrin.h>
+
+volatile __m256 s1;
+volatile __m128 s2;
+volatile __m256i res1;
+volatile __m128i res2;
+volatile __mmask8 m;
+
+void extern
+avx512vl_test (void)
+{
+ res1 = _mm256_mask_cvttps_epi32 (res1, m, s1);
+ res1 = _mm256_maskz_cvttps_epi32 (m, s1);
+ res2 = _mm_mask_cvttps_epi32 (res2, m, s2);
+ res2 = _mm_maskz_cvttps_epi32 (m, s2);
+}
diff --git a/gcc/testsuite/gcc.target/i386/avx512vl-vcvttps2dq-2.c b/gcc/testsuite/gcc.target/i386/avx512vl-vcvttps2dq-2.c
new file mode 100644
index 00000000000..173db3c002b
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/avx512vl-vcvttps2dq-2.c
@@ -0,0 +1,14 @@
+/* { dg-do run } */
+/* { dg-options "-O2 -mavx512vl -DAVX512VL" } */
+/* { dg-require-effective-target avx512vl } */
+
+#define AVX512F_LEN 256
+#define AVX512F_LEN_HALF 128
+#include "avx512f-vcvttps2dq-2.c"
+
+#undef AVX512F_LEN
+#undef AVX512F_LEN_HALF
+
+#define AVX512F_LEN 128
+#define AVX512F_LEN_HALF 128
+#include "avx512f-vcvttps2dq-2.c"
diff --git a/gcc/testsuite/gcc.target/i386/avx512vl-vcvttps2qq-2.c b/gcc/testsuite/gcc.target/i386/avx512vl-vcvttps2qq-2.c
new file mode 100644
index 00000000000..f45db8f5c22
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/avx512vl-vcvttps2qq-2.c
@@ -0,0 +1,14 @@
+/* { dg-do run } */
+/* { dg-options "-O2 -mavx512dq -mavx512vl -DAVX512VL" } */
+/* { dg-require-effective-target avx512vl } */
+
+#define AVX512F_LEN 256
+#define AVX512F_LEN_HALF 128
+#include "avx512dq-vcvttps2qq-2.c"
+
+#undef AVX512F_LEN
+#undef AVX512F_LEN_HALF
+
+#define AVX512F_LEN 128
+#define AVX512F_LEN_HALF 128
+#include "avx512dq-vcvttps2qq-2.c"
diff --git a/gcc/testsuite/gcc.target/i386/avx512vl-vcvttps2udq-1.c b/gcc/testsuite/gcc.target/i386/avx512vl-vcvttps2udq-1.c
new file mode 100644
index 00000000000..f40b18a9ade
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/avx512vl-vcvttps2udq-1.c
@@ -0,0 +1,27 @@
+/* { dg-do compile } */
+/* { dg-options "-mavx512vl -O2" } */
+/* { dg-final { scan-assembler-times "vcvttps2udq\[ \\t\]+\[^\n\]*%xmm\[0-9\]\[^\n\]*%xmm\[0-9\]\[^\{\]" 1 } } */
+/* { dg-final { scan-assembler-times "vcvttps2udq\[ \\t\]+\[^\n\]*%xmm\[0-9\]\[^\n\]*%xmm\[0-9\]\{%k\[1-7\]\}\[^\{\]" 1 } } */
+/* { dg-final { scan-assembler-times "vcvttps2udq\[ \\t\]+\[^\n\]*%xmm\[0-9\]\[^\n\]*%xmm\[0-9\]\{%k\[1-7\]\}\{z\}" 1 } } */
+/* { dg-final { scan-assembler-times "vcvttps2udq\[ \\t\]+\[^\n\]*%ymm\[0-9\]\[^\n\]*%ymm\[0-9\]\[^\{\]" 1 } } */
+/* { dg-final { scan-assembler-times "vcvttps2udq\[ \\t\]+\[^\n\]*%ymm\[0-9\]\[^\n\]*%ymm\[0-9\]\{%k\[1-7\]\}\[^\{\]" 1 } } */
+/* { dg-final { scan-assembler-times "vcvttps2udq\[ \\t\]+\[^\n\]*%ymm\[0-9\]\[^\n\]*%ymm\[0-9\]\{%k\[1-7\]\}\{z\}" 1 } } */
+
+#include <immintrin.h>
+
+volatile __m256 s1;
+volatile __m128 s2;
+volatile __m256i res1;
+volatile __m128i res2;
+volatile __mmask8 m;
+
+void extern
+avx512vl_test (void)
+{
+ res1 = _mm256_cvttps_epu32 (s1);
+ res1 = _mm256_mask_cvttps_epu32 (res1, m, s1);
+ res1 = _mm256_maskz_cvttps_epu32 (m, s1);
+ res2 = _mm_cvttps_epu32 (s2);
+ res2 = _mm_mask_cvttps_epu32 (res2, m, s2);
+ res2 = _mm_maskz_cvttps_epu32 (m, s2);
+}
diff --git a/gcc/testsuite/gcc.target/i386/avx512vl-vcvttps2udq-2.c b/gcc/testsuite/gcc.target/i386/avx512vl-vcvttps2udq-2.c
new file mode 100644
index 00000000000..e8779ea8b9a
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/avx512vl-vcvttps2udq-2.c
@@ -0,0 +1,14 @@
+/* { dg-do run } */
+/* { dg-options "-O2 -mavx512vl -DAVX512VL" } */
+/* { dg-require-effective-target avx512vl } */
+
+#define AVX512F_LEN 256
+#define AVX512F_LEN_HALF 128
+#include "avx512f-vcvttps2udq-2.c"
+
+#undef AVX512F_LEN
+#undef AVX512F_LEN_HALF
+
+#define AVX512F_LEN 128
+#define AVX512F_LEN_HALF 128
+#include "avx512f-vcvttps2udq-2.c"
diff --git a/gcc/testsuite/gcc.target/i386/avx512vl-vcvttps2uqq-2.c b/gcc/testsuite/gcc.target/i386/avx512vl-vcvttps2uqq-2.c
new file mode 100644
index 00000000000..bb15ecd05db
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/avx512vl-vcvttps2uqq-2.c
@@ -0,0 +1,14 @@
+/* { dg-do run } */
+/* { dg-options "-O2 -mavx512dq -mavx512vl -DAVX512VL" } */
+/* { dg-require-effective-target avx512vl } */
+
+#define AVX512F_LEN 256
+#define AVX512F_LEN_HALF 128
+#include "avx512dq-vcvttps2uqq-2.c"
+
+#undef AVX512F_LEN
+#undef AVX512F_LEN_HALF
+
+#define AVX512F_LEN 128
+#define AVX512F_LEN_HALF 128
+#include "avx512dq-vcvttps2uqq-2.c"
diff --git a/gcc/testsuite/gcc.target/i386/avx512vl-vcvtudq2pd-1.c b/gcc/testsuite/gcc.target/i386/avx512vl-vcvtudq2pd-1.c
new file mode 100644
index 00000000000..3355617b7bc
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/avx512vl-vcvtudq2pd-1.c
@@ -0,0 +1,28 @@
+/* { dg-do compile } */
+/* { dg-options "-mavx512vl -O2" } */
+/* { dg-final { scan-assembler-times "vcvtudq2pd\[ \\t\]+\[^\n\]*%xmm\[0-9\]\[^\n\]*%ymm\[0-9\]\[^\{\]" 1 } } */
+/* { dg-final { scan-assembler-times "vcvtudq2pd\[ \\t\]+\[^\n\]*%xmm\[0-9\]\[^\n\]*%xmm\[0-9\]\[^\{\]" 1 } } */
+/* { dg-final { scan-assembler-times "vcvtudq2pd\[ \\t\]+\[^\n\]*%xmm\[0-9\]\[^\n\]*%ymm\[0-9\]\{%k\[1-7\]\}\[^\{\]" 1 } } */
+/* { dg-final { scan-assembler-times "vcvtudq2pd\[ \\t\]+\[^\n\]*%xmm\[0-9\]\[^\n\]*%xmm\[0-9\]\{%k\[1-7\]\}\[^\{\]" 1 } } */
+/* { dg-final { scan-assembler-times "vcvtudq2pd\[ \\t\]+\[^\n\]*%xmm\[0-9\]\[^\n\]*%ymm\[0-9\]\{%k\[1-7\]\}\{z\}" 1 } } */
+/* { dg-final { scan-assembler-times "vcvtudq2pd\[ \\t\]+\[^\n\]*%xmm\[0-9\]\[^\n\]*%xmm\[0-9\]\{%k\[1-7\]\}\{z\}" 1 } } */
+
+#include <immintrin.h>
+
+volatile __m128i s;
+volatile __m256d res1;
+volatile __m128d res2;
+volatile __mmask8 m;
+
+void extern
+avx512vl_test (void)
+{
+ res1 = _mm256_cvtepu32_pd (s);
+ res2 = _mm_cvtepu32_pd (s);
+
+ res1 = _mm256_mask_cvtepu32_pd (res1, m, s);
+ res2 = _mm_mask_cvtepu32_pd (res2, m, s);
+
+ res1 = _mm256_maskz_cvtepu32_pd (m, s);
+ res2 = _mm_maskz_cvtepu32_pd (m, s);
+}
diff --git a/gcc/testsuite/gcc.target/i386/avx512vl-vcvtudq2pd-2.c b/gcc/testsuite/gcc.target/i386/avx512vl-vcvtudq2pd-2.c
new file mode 100644
index 00000000000..698448500a6
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/avx512vl-vcvtudq2pd-2.c
@@ -0,0 +1,14 @@
+/* { dg-do run } */
+/* { dg-options "-O2 -mavx512vl -DAVX512VL" } */
+/* { dg-require-effective-target avx512vl } */
+
+#define AVX512F_LEN 256
+#define AVX512F_LEN_HALF 128
+#include "avx512f-vcvtudq2pd-2.c"
+
+#undef AVX512F_LEN
+#undef AVX512F_LEN_HALF
+
+#define AVX512F_LEN 128
+#define AVX512F_LEN_HALF 128
+#include "avx512f-vcvtudq2pd-2.c"
diff --git a/gcc/testsuite/gcc.target/i386/avx512vl-vcvtudq2ps-1.c b/gcc/testsuite/gcc.target/i386/avx512vl-vcvtudq2ps-1.c
new file mode 100644
index 00000000000..ef468346372
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/avx512vl-vcvtudq2ps-1.c
@@ -0,0 +1,29 @@
+/* { dg-do compile } */
+/* { dg-options "-mavx512vl -O2" } */
+/* { dg-final { scan-assembler-times "vcvtudq2ps\[ \\t\]+\[^\n\]*%ymm\[0-9\]\[^\{\]" 3 } } */
+/* { dg-final { scan-assembler-times "vcvtudq2ps\[ \\t\]+\[^\n\]*%xmm\[0-9\]\[^\{\]" 3 } } */
+/* { dg-final { scan-assembler-times "vcvtudq2ps\[ \\t\]+\[^\n\]*%ymm\[0-9\]\{%k\[1-7\]\}\[^\{\]" 1 } } */
+/* { dg-final { scan-assembler-times "vcvtudq2ps\[ \\t\]+\[^\n\]*%xmm\[0-9\]\{%k\[1-7\]\}\[^\{\]" 1 } } */
+/* { dg-final { scan-assembler-times "vcvtudq2ps\[ \\t\]+\[^\n\]*%ymm\[0-9\]\{%k\[1-7\]\}\{z\}" 1 } } */
+/* { dg-final { scan-assembler-times "vcvtudq2ps\[ \\t\]+\[^\n\]*%xmm\[0-9\]\{%k\[1-7\]\}\{z\}" 1 } } */
+
+#include <immintrin.h>
+
+volatile __m256i s1;
+volatile __m128i s2;
+volatile __m256 res1;
+volatile __m128 res2;
+volatile __mmask8 m;
+
+void extern
+avx512vl_test (void)
+{
+ res1 = _mm256_cvtepu32_ps (s1);
+ res2 = _mm_cvtepu32_ps (s2);
+
+ res1 = _mm256_mask_cvtepu32_ps (res1, m, s1);
+ res2 = _mm_mask_cvtepu32_ps (res2, m, s2);
+
+ res1 = _mm256_maskz_cvtepu32_ps (m, s1);
+ res2 = _mm_maskz_cvtepu32_ps (m, s2);
+}
diff --git a/gcc/testsuite/gcc.target/i386/avx512vl-vcvtudq2ps-2.c b/gcc/testsuite/gcc.target/i386/avx512vl-vcvtudq2ps-2.c
new file mode 100644
index 00000000000..98731c68449
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/avx512vl-vcvtudq2ps-2.c
@@ -0,0 +1,14 @@
+/* { dg-do run } */
+/* { dg-options "-O2 -mavx512vl -DAVX512VL" } */
+/* { dg-require-effective-target avx512vl } */
+
+#define AVX512F_LEN 256
+#define AVX512F_LEN_HALF 128
+#include "avx512f-vcvtudq2ps-2.c"
+
+#undef AVX512F_LEN
+#undef AVX512F_LEN_HALF
+
+#define AVX512F_LEN 128
+#define AVX512F_LEN_HALF 128
+#include "avx512f-vcvtudq2ps-2.c"
diff --git a/gcc/testsuite/gcc.target/i386/avx512vl-vcvtuqq2pd-2.c b/gcc/testsuite/gcc.target/i386/avx512vl-vcvtuqq2pd-2.c
new file mode 100644
index 00000000000..d803b32ab59
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/avx512vl-vcvtuqq2pd-2.c
@@ -0,0 +1,14 @@
+/* { dg-do run } */
+/* { dg-options "-O2 -mavx512dq -mavx512vl -DAVX512VL" } */
+/* { dg-require-effective-target avx512vl } */
+
+#define AVX512F_LEN 256
+#define AVX512F_LEN_HALF 128
+#include "avx512dq-vcvtuqq2pd-2.c"
+
+#undef AVX512F_LEN
+#undef AVX512F_LEN_HALF
+
+#define AVX512F_LEN 128
+#define AVX512F_LEN_HALF 128
+#include "avx512dq-vcvtuqq2pd-2.c"
diff --git a/gcc/testsuite/gcc.target/i386/avx512vl-vcvtuqq2ps-2.c b/gcc/testsuite/gcc.target/i386/avx512vl-vcvtuqq2ps-2.c
new file mode 100644
index 00000000000..c99919e09f7
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/avx512vl-vcvtuqq2ps-2.c
@@ -0,0 +1,14 @@
+/* { dg-do run } */
+/* { dg-options "-O2 -mavx512dq -mavx512vl -DAVX512VL" } */
+/* { dg-require-effective-target avx512vl } */
+
+#define AVX512F_LEN 256
+#define AVX512F_LEN_HALF 128
+#include "avx512dq-vcvtuqq2ps-2.c"
+
+#undef AVX512F_LEN
+#undef AVX512F_LEN_HALF
+
+#define AVX512F_LEN 128
+#define AVX512F_LEN_HALF 128
+#include "avx512dq-vcvtuqq2ps-2.c"
diff --git a/gcc/testsuite/gcc.target/i386/avx512vl-vdbpsadbw-2.c b/gcc/testsuite/gcc.target/i386/avx512vl-vdbpsadbw-2.c
new file mode 100644
index 00000000000..62c52ea8757
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/avx512vl-vdbpsadbw-2.c
@@ -0,0 +1,14 @@
+/* { dg-do run } */
+/* { dg-options "-O2 -mavx512bw -mavx512vl -DAVX512VL" } */
+/* { dg-require-effective-target avx512vl } */
+
+#define AVX512F_LEN 256
+#define AVX512F_LEN_HALF 128
+#include "avx512bw-vdbpsadbw-2.c"
+
+#undef AVX512F_LEN
+#undef AVX512F_LEN_HALF
+
+#define AVX512F_LEN 128
+#define AVX512F_LEN_HALF 128
+#include "avx512bw-vdbpsadbw-2.c"
diff --git a/gcc/testsuite/gcc.target/i386/avx512vl-vdivpd-1.c b/gcc/testsuite/gcc.target/i386/avx512vl-vdivpd-1.c
new file mode 100644
index 00000000000..e840cdbecca
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/avx512vl-vdivpd-1.c
@@ -0,0 +1,23 @@
+/* { dg-do compile } */
+/* { dg-options "-O2 -mavx512vl" } */
+/* { dg-final { scan-assembler-times "vdivpd\[ \\t\]+\[^\n\]*%ymm\[0-9\]\[^\{\]" 2 } } */
+/* { dg-final { scan-assembler-times "vdivpd\[ \\t\]+\[^\n\]*%ymm\[0-9\]\{%k\[1-7\]\}\[^\{\]" 1 } } */
+/* { dg-final { scan-assembler-times "vdivpd\[ \\t\]+\[^\n\]*%ymm\[0-9\]\{%k\[1-7\]\}\{z\}" 1 } } */
+/* { dg-final { scan-assembler-times "vdivpd\[ \\t\]+\[^\n\]*%xmm\[0-9\]\[^\{\]" 2 } } */
+/* { dg-final { scan-assembler-times "vdivpd\[ \\t\]+\[^\n\]*%xmm\[0-9\]\{%k\[1-7\]\}\[^\{\]" 1 } } */
+/* { dg-final { scan-assembler-times "vdivpd\[ \\t\]+\[^\n\]*%xmm\[0-9\]\{%k\[1-7\]\}\{z\}" 1 } } */
+
+#include <immintrin.h>
+
+volatile __m256d x256;
+volatile __m128d x128;
+volatile __mmask8 m;
+
+void extern
+avx512vl_test (void)
+{
+ x256 = _mm256_mask_div_pd (x256, m, x256, x256);
+ x256 = _mm256_maskz_div_pd (m, x256, x256);
+ x128 = _mm_mask_div_pd (x128, m, x128, x128);
+ x128 = _mm_maskz_div_pd (m, x128, x128);
+}
diff --git a/gcc/testsuite/gcc.target/i386/avx512vl-vdivpd-2.c b/gcc/testsuite/gcc.target/i386/avx512vl-vdivpd-2.c
new file mode 100644
index 00000000000..bf9c41524a8
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/avx512vl-vdivpd-2.c
@@ -0,0 +1,14 @@
+/* { dg-do run } */
+/* { dg-options "-O2 -mavx512vl -DAVX512VL" } */
+/* { dg-require-effective-target avx512vl } */
+
+#define AVX512F_LEN 256
+#define AVX512F_LEN_HALF 128
+#include "avx512f-vdivpd-2.c"
+
+#undef AVX512F_LEN
+#undef AVX512F_LEN_HALF
+
+#define AVX512F_LEN 128
+#define AVX512F_LEN_HALF 128
+#include "avx512f-vdivpd-2.c"
diff --git a/gcc/testsuite/gcc.target/i386/avx512vl-vdivps-1.c b/gcc/testsuite/gcc.target/i386/avx512vl-vdivps-1.c
new file mode 100644
index 00000000000..37b9aa5feac
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/avx512vl-vdivps-1.c
@@ -0,0 +1,23 @@
+/* { dg-do compile } */
+/* { dg-options "-O2 -mavx512vl" } */
+/* { dg-final { scan-assembler-times "vdivps\[ \\t\]+\[^\n\]*%ymm\[0-9\]\[^\{\]" 2 } } */
+/* { dg-final { scan-assembler-times "vdivps\[ \\t\]+\[^\n\]*%ymm\[0-9\]\{%k\[1-7\]\}\[^\{\]" 1 } } */
+/* { dg-final { scan-assembler-times "vdivps\[ \\t\]+\[^\n\]*%ymm\[0-9\]\{%k\[1-7\]\}\{z\}" 1 } } */
+/* { dg-final { scan-assembler-times "vdivps\[ \\t\]+\[^\n\]*%xmm\[0-9\]\[^\{\]" 2 } } */
+/* { dg-final { scan-assembler-times "vdivps\[ \\t\]+\[^\n\]*%xmm\[0-9\]\{%k\[1-7\]\}\[^\{\]" 1 } } */
+/* { dg-final { scan-assembler-times "vdivps\[ \\t\]+\[^\n\]*%xmm\[0-9\]\{%k\[1-7\]\}\{z\}" 1 } } */
+
+#include <immintrin.h>
+
+volatile __m256 x256;
+volatile __m128 x128;
+volatile __mmask8 m;
+
+void extern
+avx512vl_test (void)
+{
+ x256 = _mm256_mask_div_ps (x256, m, x256, x256);
+ x256 = _mm256_maskz_div_ps (m, x256, x256);
+ x128 = _mm_mask_div_ps (x128, m, x128, x128);
+ x128 = _mm_maskz_div_ps (m, x128, x128);
+}
diff --git a/gcc/testsuite/gcc.target/i386/avx512vl-vdivps-2.c b/gcc/testsuite/gcc.target/i386/avx512vl-vdivps-2.c
new file mode 100644
index 00000000000..629bd1dcad3
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/avx512vl-vdivps-2.c
@@ -0,0 +1,14 @@
+/* { dg-do run } */
+/* { dg-options "-O2 -mavx512vl -DAVX512VL" } */
+/* { dg-require-effective-target avx512vl } */
+
+#define AVX512F_LEN 256
+#define AVX512F_LEN_HALF 128
+#include "avx512f-vdivps-2.c"
+
+#undef AVX512F_LEN
+#undef AVX512F_LEN_HALF
+
+#define AVX512F_LEN 128
+#define AVX512F_LEN_HALF 128
+#include "avx512f-vdivps-2.c"
diff --git a/gcc/testsuite/gcc.target/i386/avx512vl-vexpandpd-1.c b/gcc/testsuite/gcc.target/i386/avx512vl-vexpandpd-1.c
new file mode 100644
index 00000000000..03044c5c6ec
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/avx512vl-vexpandpd-1.c
@@ -0,0 +1,29 @@
+/* { dg-do compile } */
+/* { dg-options "-mavx512vl -O2" } */
+/* { dg-final { scan-assembler-times "vexpandpd\[ \\t\]+\[^\n\]*%ymm\[0-9\]\{%k\[1-7\]\}\[^\{\]" 2 } } */
+/* { dg-final { scan-assembler-times "vexpandpd\[ \\t\]+\[^\n\]*%xmm\[0-9\]\{%k\[1-7\]\}\[^\{\]" 2 } } */
+/* { dg-final { scan-assembler-times "vexpandpd\[ \\t\]+\[^\n\]*%ymm\[0-9\]\{%k\[1-7\]\}\{z\}" 2 } } */
+/* { dg-final { scan-assembler-times "vexpandpd\[ \\t\]+\[^\n\]*%xmm\[0-9\]\{%k\[1-7\]\}\{z\}" 2 } } */
+
+#include <immintrin.h>
+
+double *p;
+volatile __m256d x1;
+volatile __m128d x2;
+volatile __mmask8 m;
+
+void extern
+avx512vl_test (void)
+{
+ x1 = _mm256_mask_expand_pd (x1, m, x1);
+ x2 = _mm_mask_expand_pd (x2, m, x2);
+
+ x1 = _mm256_maskz_expand_pd (m, x1);
+ x2 = _mm_maskz_expand_pd (m, x2);
+
+ x1 = _mm256_mask_expandloadu_pd (x1, m, p);
+ x2 = _mm_mask_expandloadu_pd (x2, m, p);
+
+ x1 = _mm256_maskz_expandloadu_pd (m, p);
+ x2 = _mm_maskz_expandloadu_pd (m, p);
+}
diff --git a/gcc/testsuite/gcc.target/i386/avx512vl-vexpandpd-2.c b/gcc/testsuite/gcc.target/i386/avx512vl-vexpandpd-2.c
new file mode 100644
index 00000000000..4cb6a228c7d
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/avx512vl-vexpandpd-2.c
@@ -0,0 +1,14 @@
+/* { dg-do run } */
+/* { dg-options "-O2 -mavx512vl -DAVX512VL" } */
+/* { dg-require-effective-target avx512vl } */
+
+#define AVX512F_LEN 256
+#define AVX512F_LEN_HALF 128
+#include "avx512f-vexpandpd-2.c"
+
+#undef AVX512F_LEN
+#undef AVX512F_LEN_HALF
+
+#define AVX512F_LEN 128
+#define AVX512F_LEN_HALF 128
+#include "avx512f-vexpandpd-2.c"
diff --git a/gcc/testsuite/gcc.target/i386/avx512vl-vexpandps-1.c b/gcc/testsuite/gcc.target/i386/avx512vl-vexpandps-1.c
new file mode 100644
index 00000000000..d6a05e916b4
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/avx512vl-vexpandps-1.c
@@ -0,0 +1,29 @@
+/* { dg-do compile } */
+/* { dg-options "-mavx512vl -O2" } */
+/* { dg-final { scan-assembler-times "vexpandps\[ \\t\]+\[^\n\]*%ymm\[0-9\]\{%k\[1-7\]\}\[^\{\]" 2 } } */
+/* { dg-final { scan-assembler-times "vexpandps\[ \\t\]+\[^\n\]*%xmm\[0-9\]\{%k\[1-7\]\}\[^\{\]" 2 } } */
+/* { dg-final { scan-assembler-times "vexpandps\[ \\t\]+\[^\n\]*%ymm\[0-9\]\{%k\[1-7\]\}\{z\}" 2 } } */
+/* { dg-final { scan-assembler-times "vexpandps\[ \\t\]+\[^\n\]*%xmm\[0-9\]\{%k\[1-7\]\}\{z\}" 2 } } */
+
+#include <immintrin.h>
+
+float *p;
+volatile __m256 x1;
+volatile __m128 x2;
+volatile __mmask8 m;
+
+void extern
+avx512vl_test (void)
+{
+ x1 = _mm256_mask_expand_ps (x1, m, x1);
+ x2 = _mm_mask_expand_ps (x2, m, x2);
+
+ x1 = _mm256_maskz_expand_ps (m, x1);
+ x2 = _mm_maskz_expand_ps (m, x2);
+
+ x1 = _mm256_mask_expandloadu_ps (x1, m, p);
+ x2 = _mm_mask_expandloadu_ps (x2, m, p);
+
+ x1 = _mm256_maskz_expandloadu_ps (m, p);
+ x2 = _mm_maskz_expandloadu_ps (m, p);
+}
diff --git a/gcc/testsuite/gcc.target/i386/avx512vl-vexpandps-2.c b/gcc/testsuite/gcc.target/i386/avx512vl-vexpandps-2.c
new file mode 100644
index 00000000000..d9ccd4e9bba
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/avx512vl-vexpandps-2.c
@@ -0,0 +1,14 @@
+/* { dg-do run } */
+/* { dg-options "-O2 -mavx512vl -DAVX512VL" } */
+/* { dg-require-effective-target avx512vl } */
+
+#define AVX512F_LEN 256
+#define AVX512F_LEN_HALF 128
+#include "avx512f-vexpandps-2.c"
+
+#undef AVX512F_LEN
+#undef AVX512F_LEN_HALF
+
+#define AVX512F_LEN 128
+#define AVX512F_LEN_HALF 128
+#include "avx512f-vexpandps-2.c"
diff --git a/gcc/testsuite/gcc.target/i386/avx512vl-vextractf32x4-1.c b/gcc/testsuite/gcc.target/i386/avx512vl-vextractf32x4-1.c
new file mode 100644
index 00000000000..58148e01824
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/avx512vl-vextractf32x4-1.c
@@ -0,0 +1,18 @@
+/* { dg-do compile } */
+/* { dg-options "-mavx512vl -O2" } */
+/* { dg-final { scan-assembler-times "vextractf32x4\[ \\t\]+\[^\n\]*%ymm\[0-9\]" 3 } } */
+/* { dg-final { scan-assembler-times "vextractf32x4\[ \\t\]+\[^\n\]*%xmm\[0-9\]\{%k\[1-7\]\}\{z\}" 1 } } */
+/* { dg-final { scan-assembler-times "vextractf32x4\[ \\t\]+\[^\n\]*%xmm\[0-9\]\{%k\[1-7\]\}\[^\{\]" 1 } } */
+
+#include <immintrin.h>
+
+volatile __m256 x;
+volatile __m128 y;
+
+void extern
+avx512vl_test (void)
+{
+ y = _mm256_extractf32x4_ps (x, 1);
+ y = _mm256_mask_extractf32x4_ps (y, 2, x, 1);
+ y = _mm256_maskz_extractf32x4_ps (2, x, 1);
+}
diff --git a/gcc/testsuite/gcc.target/i386/avx512vl-vextractf32x4-2.c b/gcc/testsuite/gcc.target/i386/avx512vl-vextractf32x4-2.c
new file mode 100644
index 00000000000..c93d518eb07
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/avx512vl-vextractf32x4-2.c
@@ -0,0 +1,10 @@
+/* { dg-do run } */
+/* { dg-options "-O2 -mavx512vl -DAVX512VL" } */
+/* { dg-require-effective-target avx512vl } */
+
+#define AVX512F_LEN 256
+#define AVX512F_LEN_HALF 128
+#include "avx512f-vextractf32x4-2.c"
+
+void
+test_128 () {}
diff --git a/gcc/testsuite/gcc.target/i386/avx512vl-vextractf64x2-2.c b/gcc/testsuite/gcc.target/i386/avx512vl-vextractf64x2-2.c
new file mode 100644
index 00000000000..6123426fe3e
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/avx512vl-vextractf64x2-2.c
@@ -0,0 +1,10 @@
+/* { dg-do run } */
+/* { dg-options "-O2 -mavx512dq -mavx512vl -DAVX512VL" } */
+/* { dg-require-effective-target avx512vl } */
+
+#define AVX512F_LEN 256
+#define AVX512F_LEN_HALF 128
+#include "avx512dq-vextractf64x2-2.c"
+
+void
+test_128 () {}
diff --git a/gcc/testsuite/gcc.target/i386/avx512vl-vextracti32x4-1.c b/gcc/testsuite/gcc.target/i386/avx512vl-vextracti32x4-1.c
new file mode 100644
index 00000000000..fb26d6a6c8f
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/avx512vl-vextracti32x4-1.c
@@ -0,0 +1,18 @@
+/* { dg-do compile } */
+/* { dg-options "-mavx512vl -O2" } */
+/* { dg-final { scan-assembler-times "vextracti32x4\[ \\t\]+\[^\n\]*%ymm\[0-9\]" 3 } } */
+/* { dg-final { scan-assembler-times "vextracti32x4\[ \\t\]+\[^\n\]*%xmm\[0-9\]\{%k\[1-7\]\}\[^\{\]" 1 } } */
+/* { dg-final { scan-assembler-times "vextracti32x4\[ \\t\]+\[^\n\]*%xmm\[0-9\]\{%k\[1-7\]\}\{z\}" 1 } } */
+
+#include <immintrin.h>
+
+volatile __m256i x;
+volatile __m128i y;
+
+void extern
+avx512vl_test (void)
+{
+ y = _mm256_extracti32x4_epi32 (x, 1);
+ y = _mm256_mask_extracti32x4_epi32 (y, 2, x, 1);
+ y = _mm256_maskz_extracti32x4_epi32 (2, x, 1);
+}
diff --git a/gcc/testsuite/gcc.target/i386/avx512vl-vextracti32x4-2.c b/gcc/testsuite/gcc.target/i386/avx512vl-vextracti32x4-2.c
new file mode 100644
index 00000000000..d9c40906e08
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/avx512vl-vextracti32x4-2.c
@@ -0,0 +1,10 @@
+/* { dg-do run } */
+/* { dg-options "-O2 -mavx512vl -DAVX512VL" } */
+/* { dg-require-effective-target avx512vl } */
+
+#define AVX512F_LEN 256
+#define AVX512F_LEN_HALF 128
+#include "avx512f-vextracti32x4-2.c"
+
+void
+test_128 () {}
diff --git a/gcc/testsuite/gcc.target/i386/avx512vl-vextracti64x2-2.c b/gcc/testsuite/gcc.target/i386/avx512vl-vextracti64x2-2.c
new file mode 100644
index 00000000000..9b8554c7e72
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/avx512vl-vextracti64x2-2.c
@@ -0,0 +1,10 @@
+/* { dg-do run } */
+/* { dg-options "-O2 -mavx512dq -mavx512vl -DAVX512VL" } */
+/* { dg-require-effective-target avx512vl } */
+
+#define AVX512F_LEN 256
+#define AVX512F_LEN_HALF 128
+#include "avx512dq-vextracti64x2-2.c"
+
+void
+test_128 () {}
diff --git a/gcc/testsuite/gcc.target/i386/avx512vl-vfixupimmpd-1.c b/gcc/testsuite/gcc.target/i386/avx512vl-vfixupimmpd-1.c
new file mode 100644
index 00000000000..a95d83fefa7
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/avx512vl-vfixupimmpd-1.c
@@ -0,0 +1,25 @@
+/* { dg-do compile } */
+/* { dg-options "-O2 -mavx512vl" } */
+/* { dg-final { scan-assembler "vfixupimmpd\[ \\t\]+\[^\n\]*%ymm\[0-9\]\{%k\[1-7\]\}\[^\{\]" } } */
+/* { dg-final { scan-assembler "vfixupimmpd\[ \\t\]+\[^\n\]*%ymm\[0-9\]\{%k\[1-7\]\}\{z\}" } } */
+/* { dg-final { scan-assembler "vfixupimmpd\[ \\t\]+\[^\n\]*%xmm\[0-9\]\{%k\[1-7\]\}\[^\{\]" } } */
+/* { dg-final { scan-assembler "vfixupimmpd\[ \\t\]+\[^\n\]*%xmm\[0-9\]\{%k\[1-7\]\}\{z\}" } } */
+
+#include <immintrin.h>
+
+volatile __m256d xx;
+volatile __m256i yy;
+volatile __m128d x2;
+volatile __m128i y2;
+volatile __mmask8 m;
+
+void extern
+avx512vl_test (void)
+{
+ xx = _mm256_fixupimm_pd (xx, xx, yy, 3);
+ xx = _mm256_mask_fixupimm_pd (xx, m, xx, yy, 3);
+ xx = _mm256_maskz_fixupimm_pd (m, xx, xx, yy, 3);
+ x2 = _mm_fixupimm_pd (x2, x2, y2, 3);
+ x2 = _mm_mask_fixupimm_pd (x2, m, x2, y2, 3);
+ x2 = _mm_maskz_fixupimm_pd (m, x2, x2, y2, 3);
+}
diff --git a/gcc/testsuite/gcc.target/i386/avx512vl-vfixupimmpd-2.c b/gcc/testsuite/gcc.target/i386/avx512vl-vfixupimmpd-2.c
new file mode 100644
index 00000000000..36750096ee7
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/avx512vl-vfixupimmpd-2.c
@@ -0,0 +1,14 @@
+/* { dg-do run } */
+/* { dg-options "-O2 -mavx512vl -DAVX512VL" } */
+/* { dg-require-effective-target avx512vl } */
+
+#define AVX512F_LEN 256
+#define AVX512F_LEN_HALF 128
+#include "avx512f-vfixupimmpd-2.c"
+
+#undef AVX512F_LEN
+#undef AVX512F_LEN_HALF
+
+#define AVX512F_LEN 128
+#define AVX512F_LEN_HALF 128
+#include "avx512f-vfixupimmpd-2.c"
diff --git a/gcc/testsuite/gcc.target/i386/avx512vl-vfixupimmps-1.c b/gcc/testsuite/gcc.target/i386/avx512vl-vfixupimmps-1.c
new file mode 100644
index 00000000000..e6b6c5740c6
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/avx512vl-vfixupimmps-1.c
@@ -0,0 +1,25 @@
+/* { dg-do compile } */
+/* { dg-options "-O2 -mavx512vl" } */
+/* { dg-final { scan-assembler "vfixupimmps\[ \\t\]+\[^\n\]*%ymm\[0-9\]\{%k\[1-7\]\}\[^\{\]" } } */
+/* { dg-final { scan-assembler "vfixupimmps\[ \\t\]+\[^\n\]*%ymm\[0-9\]\{%k\[1-7\]\}\{z\}" } } */
+/* { dg-final { scan-assembler "vfixupimmps\[ \\t\]+\[^\n\]*%xmm\[0-9\]\{%k\[1-7\]\}\[^\{\]" } } */
+/* { dg-final { scan-assembler "vfixupimmps\[ \\t\]+\[^\n\]*%xmm\[0-9\]\{%k\[1-7\]\}\{z\}" } } */
+
+#include <immintrin.h>
+
+volatile __m256 xx;
+volatile __m256i yy;
+volatile __m128 x2;
+volatile __m128i y2;
+volatile __mmask8 m;
+
+void extern
+avx512vl_test (void)
+{
+ xx = _mm256_fixupimm_ps (xx, xx, yy, 3);
+ xx = _mm256_mask_fixupimm_ps (xx, m, xx, yy, 3);
+ xx = _mm256_maskz_fixupimm_ps (m, xx, xx, yy, 3);
+ x2 = _mm_fixupimm_ps (x2, x2, y2, 3);
+ x2 = _mm_mask_fixupimm_ps (x2, m, x2, y2, 3);
+ x2 = _mm_maskz_fixupimm_ps (m, x2, x2, y2, 3);
+}
diff --git a/gcc/testsuite/gcc.target/i386/avx512vl-vfixupimmps-2.c b/gcc/testsuite/gcc.target/i386/avx512vl-vfixupimmps-2.c
new file mode 100644
index 00000000000..a37ad595430
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/avx512vl-vfixupimmps-2.c
@@ -0,0 +1,14 @@
+/* { dg-do run } */
+/* { dg-options "-O2 -mavx512vl -DAVX512VL" } */
+/* { dg-require-effective-target avx512vl } */
+
+#define AVX512F_LEN 256
+#define AVX512F_LEN_HALF 128
+#include "avx512f-vfixupimmps-2.c"
+
+#undef AVX512F_LEN
+#undef AVX512F_LEN_HALF
+
+#define AVX512F_LEN 128
+#define AVX512F_LEN_HALF 128
+#include "avx512f-vfixupimmps-2.c"
diff --git a/gcc/testsuite/gcc.target/i386/avx512vl-vfmaddXXXpd-1.c b/gcc/testsuite/gcc.target/i386/avx512vl-vfmaddXXXpd-1.c
new file mode 100644
index 00000000000..da6ea659800
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/avx512vl-vfmaddXXXpd-1.c
@@ -0,0 +1,27 @@
+/* { dg-do compile } */
+/* { dg-options "-mavx512vl -O2" } */
+/* { dg-final { scan-assembler-times "vfmadd...pd\[ \\t\]+\[^\n\]*%ymm\[0-9\]\{%k\[1-7\]\}\[^\{\]" 2 } } */
+/* { dg-final { scan-assembler-times "vfmadd...pd\[ \\t\]+\[^\n\]*%xmm\[0-9\]\{%k\[1-7\]\}\[^\{\]" 2 } } */
+/* { dg-final { scan-assembler-times "vfmadd231pd\[ \\t\]+\[^\n\]*%ymm\[0-9\]\{%k\[1-7\]\}\[^\{\]" 1 } } */
+/* { dg-final { scan-assembler-times "vfmadd231pd\[ \\t\]+\[^\n\]*%xmm\[0-9\]\{%k\[1-7\]\}\[^\{\]" 1 } } */
+/* { dg-final { scan-assembler-times "vfmadd...pd\[ \\t\]+\[^\n\]*%ymm\[0-9\]\{%k\[1-7\]\}\{z\}" 1 } } */
+/* { dg-final { scan-assembler-times "vfmadd...pd\[ \\t\]+\[^\n\]*%xmm\[0-9\]\{%k\[1-7\]\}\{z\}" 1 } } */
+
+#include <immintrin.h>
+
+volatile __m256d yy, y2, y3;
+volatile __m128d xx, x2, x3;
+volatile __mmask8 m;
+
+void extern
+avx512vl_test (void)
+{
+ yy = _mm256_mask_fmadd_pd (yy, m, y2, y3);
+ xx = _mm_mask_fmadd_pd (xx, m, x2, x3);
+
+ y3 = _mm256_mask3_fmadd_pd (yy, y2, y3, m);
+ x3 = _mm_mask3_fmadd_pd (xx, x2, x3, m);
+
+ yy = _mm256_maskz_fmadd_pd (m, yy, y2, y3);
+ xx = _mm_maskz_fmadd_pd (m, xx, x2, x3);
+}
diff --git a/gcc/testsuite/gcc.target/i386/avx512vl-vfmaddXXXpd-2.c b/gcc/testsuite/gcc.target/i386/avx512vl-vfmaddXXXpd-2.c
new file mode 100644
index 00000000000..3fdb818411b
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/avx512vl-vfmaddXXXpd-2.c
@@ -0,0 +1,14 @@
+/* { dg-do run } */
+/* { dg-options "-O2 -mavx512vl -DAVX512VL" } */
+/* { dg-require-effective-target avx512vl } */
+
+#define AVX512F_LEN 256
+#define AVX512F_LEN_HALF 128
+#include "avx512f-vfmaddXXXpd-2.c"
+
+#undef AVX512F_LEN
+#undef AVX512F_LEN_HALF
+
+#define AVX512F_LEN 128
+#define AVX512F_LEN_HALF 128
+#include "avx512f-vfmaddXXXpd-2.c"
diff --git a/gcc/testsuite/gcc.target/i386/avx512vl-vfmaddXXXps-1.c b/gcc/testsuite/gcc.target/i386/avx512vl-vfmaddXXXps-1.c
new file mode 100644
index 00000000000..185a8296f24
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/avx512vl-vfmaddXXXps-1.c
@@ -0,0 +1,27 @@
+/* { dg-do compile } */
+/* { dg-options "-mavx512vl -O2" } */
+/* { dg-final { scan-assembler-times "vfmadd...ps\[ \\t\]+\[^\n\]*%ymm\[0-9\]\{%k\[1-7\]\}\[^\{\]" 2 } } */
+/* { dg-final { scan-assembler-times "vfmadd...ps\[ \\t\]+\[^\n\]*%xmm\[0-9\]\{%k\[1-7\]\}\[^\{\]" 2 } } */
+/* { dg-final { scan-assembler-times "vfmadd231ps\[ \\t\]+\[^\n\]*%ymm\[0-9\]\{%k\[1-7\]\}\[^\{\]" 1 } } */
+/* { dg-final { scan-assembler-times "vfmadd231ps\[ \\t\]+\[^\n\]*%xmm\[0-9\]\{%k\[1-7\]\}\[^\{\]" 1 } } */
+/* { dg-final { scan-assembler-times "vfmadd...ps\[ \\t\]+\[^\n\]*%ymm\[0-9\]\{%k\[1-7\]\}\{z\}" 1 } } */
+/* { dg-final { scan-assembler-times "vfmadd...ps\[ \\t\]+\[^\n\]*%xmm\[0-9\]\{%k\[1-7\]\}\{z\}" 1 } } */
+
+#include <immintrin.h>
+
+volatile __m256 yy, y2, y3;
+volatile __m128 xx, x2, x3;
+volatile __mmask8 m;
+
+void extern
+avx512vl_test (void)
+{
+ yy = _mm256_mask_fmadd_ps (yy, m, y2, y3);
+ xx = _mm_mask_fmadd_ps (xx, m, x2, x3);
+
+ y3 = _mm256_mask3_fmadd_ps (yy, y2, y3, m);
+ x3 = _mm_mask3_fmadd_ps (xx, x2, x3, m);
+
+ yy = _mm256_maskz_fmadd_ps (m, yy, y2, y3);
+ xx = _mm_maskz_fmadd_ps (m, xx, x2, x3);
+}
diff --git a/gcc/testsuite/gcc.target/i386/avx512vl-vfmaddXXXps-2.c b/gcc/testsuite/gcc.target/i386/avx512vl-vfmaddXXXps-2.c
new file mode 100644
index 00000000000..8d5dd7629cd
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/avx512vl-vfmaddXXXps-2.c
@@ -0,0 +1,14 @@
+/* { dg-do run } */
+/* { dg-options "-O2 -mavx512vl -DAVX512VL" } */
+/* { dg-require-effective-target avx512vl } */
+
+#define AVX512F_LEN 256
+#define AVX512F_LEN_HALF 128
+#include "avx512f-vfmaddXXXps-2.c"
+
+#undef AVX512F_LEN
+#undef AVX512F_LEN_HALF
+
+#define AVX512F_LEN 128
+#define AVX512F_LEN_HALF 128
+#include "avx512f-vfmaddXXXps-2.c"
diff --git a/gcc/testsuite/gcc.target/i386/avx512vl-vfmaddsubXXXpd-1.c b/gcc/testsuite/gcc.target/i386/avx512vl-vfmaddsubXXXpd-1.c
new file mode 100644
index 00000000000..b0c015cba5d
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/avx512vl-vfmaddsubXXXpd-1.c
@@ -0,0 +1,27 @@
+/* { dg-do compile } */
+/* { dg-options "-mavx512vl -O2" } */
+/* { dg-final { scan-assembler-times "vfmaddsub...pd\[ \\t\]+\[^\n\]*%ymm\[0-9\]\{%k\[1-7\]\}\[^\{\]" 2 } } */
+/* { dg-final { scan-assembler-times "vfmaddsub...pd\[ \\t\]+\[^\n\]*%xmm\[0-9\]\{%k\[1-7\]\}\[^\{\]" 2 } } */
+/* { dg-final { scan-assembler-times "vfmaddsub231pd\[ \\t\]+\[^\n\]*%ymm\[0-9\]\{%k\[1-7\]\}\[^\{\]" 1 } } */
+/* { dg-final { scan-assembler-times "vfmaddsub231pd\[ \\t\]+\[^\n\]*%xmm\[0-9\]\{%k\[1-7\]\}\[^\{\]" 1 } } */
+/* { dg-final { scan-assembler-times "vfmaddsub...pd\[ \\t\]+\[^\n\]*%ymm\[0-9\]\{%k\[1-7\]\}\{z\}" 1 } } */
+/* { dg-final { scan-assembler-times "vfmaddsub...pd\[ \\t\]+\[^\n\]*%xmm\[0-9\]\{%k\[1-7\]\}\{z\}" 1 } } */
+
+#include <immintrin.h>
+
+volatile __m256d yy, y2, y3;
+volatile __m128d xx, x2, x3;
+volatile __mmask8 m;
+
+void extern
+avx512vl_test (void)
+{
+ yy = _mm256_mask_fmaddsub_pd (yy, m, y2, y3);
+ xx = _mm_mask_fmaddsub_pd (xx, m, x2, x3);
+
+ y3 = _mm256_mask3_fmaddsub_pd (yy, y2, y3, m);
+ x3 = _mm_mask3_fmaddsub_pd (xx, x2, x3, m);
+
+ yy = _mm256_maskz_fmaddsub_pd (m, yy, y2, y3);
+ xx = _mm_maskz_fmaddsub_pd (m, xx, x2, x3);
+}
diff --git a/gcc/testsuite/gcc.target/i386/avx512vl-vfmaddsubXXXpd-2.c b/gcc/testsuite/gcc.target/i386/avx512vl-vfmaddsubXXXpd-2.c
new file mode 100644
index 00000000000..178d43c0f99
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/avx512vl-vfmaddsubXXXpd-2.c
@@ -0,0 +1,14 @@
+/* { dg-do run } */
+/* { dg-options "-O2 -mavx512vl -DAVX512VL" } */
+/* { dg-require-effective-target avx512vl } */
+
+#define AVX512F_LEN 256
+#define AVX512F_LEN_HALF 128
+#include "avx512f-vfmaddsubXXXpd-2.c"
+
+#undef AVX512F_LEN
+#undef AVX512F_LEN_HALF
+
+#define AVX512F_LEN 128
+#define AVX512F_LEN_HALF 128
+#include "avx512f-vfmaddsubXXXpd-2.c"
diff --git a/gcc/testsuite/gcc.target/i386/avx512vl-vfmaddsubXXXps-1.c b/gcc/testsuite/gcc.target/i386/avx512vl-vfmaddsubXXXps-1.c
new file mode 100644
index 00000000000..25efa30d0fd
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/avx512vl-vfmaddsubXXXps-1.c
@@ -0,0 +1,27 @@
+/* { dg-do compile } */
+/* { dg-options "-mavx512vl -O2" } */
+/* { dg-final { scan-assembler-times "vfmaddsub...ps\[ \\t\]+\[^\n\]*%ymm\[0-9\]\{%k\[1-7\]\}\[^\{\]" 2 } } */
+/* { dg-final { scan-assembler-times "vfmaddsub...ps\[ \\t\]+\[^\n\]*%xmm\[0-9\]\{%k\[1-7\]\}\[^\{\]" 2 } } */
+/* { dg-final { scan-assembler-times "vfmaddsub231ps\[ \\t\]+\[^\n\]*%ymm\[0-9\]\{%k\[1-7\]\}\[^\{\]" 1 } } */
+/* { dg-final { scan-assembler-times "vfmaddsub231ps\[ \\t\]+\[^\n\]*%xmm\[0-9\]\{%k\[1-7\]\}\[^\{\]" 1 } } */
+/* { dg-final { scan-assembler-times "vfmaddsub...ps\[ \\t\]+\[^\n\]*%ymm\[0-9\]\{%k\[1-7\]\}\{z\}" 1 } } */
+/* { dg-final { scan-assembler-times "vfmaddsub...ps\[ \\t\]+\[^\n\]*%xmm\[0-9\]\{%k\[1-7\]\}\{z\}" 1 } } */
+
+#include <immintrin.h>
+
+volatile __m256 yy, y2, y3;
+volatile __m128 xx, x2, x3;
+volatile __mmask8 m;
+
+void extern
+avx512vl_test (void)
+{
+ yy = _mm256_mask_fmaddsub_ps (yy, m, y2, y3);
+ xx = _mm_mask_fmaddsub_ps (xx, m, x2, x3);
+
+ y3 = _mm256_mask3_fmaddsub_ps (yy, y2, y3, m);
+ x3 = _mm_mask3_fmaddsub_ps (xx, x2, x3, m);
+
+ yy = _mm256_maskz_fmaddsub_ps (m, yy, y2, y3);
+ xx = _mm_maskz_fmaddsub_ps (m, xx, x2, x3);
+}
diff --git a/gcc/testsuite/gcc.target/i386/avx512vl-vfmaddsubXXXps-2.c b/gcc/testsuite/gcc.target/i386/avx512vl-vfmaddsubXXXps-2.c
new file mode 100644
index 00000000000..b148aef5ed8
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/avx512vl-vfmaddsubXXXps-2.c
@@ -0,0 +1,14 @@
+/* { dg-do run } */
+/* { dg-options "-O2 -mavx512vl -DAVX512VL" } */
+/* { dg-require-effective-target avx512vl } */
+
+#define AVX512F_LEN 256
+#define AVX512F_LEN_HALF 128
+#include "avx512f-vfmaddsubXXXps-2.c"
+
+#undef AVX512F_LEN
+#undef AVX512F_LEN_HALF
+
+#define AVX512F_LEN 128
+#define AVX512F_LEN_HALF 128
+#include "avx512f-vfmaddsubXXXps-2.c"
diff --git a/gcc/testsuite/gcc.target/i386/avx512vl-vfmsubXXXpd-1.c b/gcc/testsuite/gcc.target/i386/avx512vl-vfmsubXXXpd-1.c
new file mode 100644
index 00000000000..e0d52a5401a
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/avx512vl-vfmsubXXXpd-1.c
@@ -0,0 +1,27 @@
+/* { dg-do compile } */
+/* { dg-options "-mavx512vl -O2" } */
+/* { dg-final { scan-assembler-times "vfmsub...pd\[ \\t\]+\[^\n\]*%ymm\[0-9\]\{%k\[1-7\]\}\[^\{\]" 2 } } */
+/* { dg-final { scan-assembler-times "vfmsub...pd\[ \\t\]+\[^\n\]*%xmm\[0-9\]\{%k\[1-7\]\}\[^\{\]" 2 } } */
+/* { dg-final { scan-assembler-times "vfmsub231pd\[ \\t\]+\[^\n\]*%ymm\[0-9\]\{%k\[1-7\]\}\[^\{\]" 1 } } */
+/* { dg-final { scan-assembler-times "vfmsub231pd\[ \\t\]+\[^\n\]*%xmm\[0-9\]\{%k\[1-7\]\}\[^\{\]" 1 } } */
+/* { dg-final { scan-assembler-times "vfmsub...pd\[ \\t\]+\[^\n\]*%ymm\[0-9\]\{%k\[1-7\]\}\{z\}" 1 } } */
+/* { dg-final { scan-assembler-times "vfmsub...pd\[ \\t\]+\[^\n\]*%xmm\[0-9\]\{%k\[1-7\]\}\{z\}" 1 } } */
+
+#include <immintrin.h>
+
+volatile __m256d yy, y2, y3;
+volatile __m128d xx, x2, x3;
+volatile __mmask8 m;
+
+void extern
+avx512vl_test (void)
+{
+ yy = _mm256_mask_fmsub_pd (yy, m, y2, y3);
+ xx = _mm_mask_fmsub_pd (xx, m, x2, x3);
+
+ y3 = _mm256_mask3_fmsub_pd (yy, y2, y3, m);
+ x3 = _mm_mask3_fmsub_pd (xx, x2, x3, m);
+
+ yy = _mm256_maskz_fmsub_pd (m, yy, y2, y3);
+ xx = _mm_maskz_fmsub_pd (m, xx, x2, x3);
+}
diff --git a/gcc/testsuite/gcc.target/i386/avx512vl-vfmsubXXXpd-2.c b/gcc/testsuite/gcc.target/i386/avx512vl-vfmsubXXXpd-2.c
new file mode 100644
index 00000000000..6a973dc3383
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/avx512vl-vfmsubXXXpd-2.c
@@ -0,0 +1,14 @@
+/* { dg-do run } */
+/* { dg-options "-O2 -mavx512vl -DAVX512VL" } */
+/* { dg-require-effective-target avx512vl } */
+
+#define AVX512F_LEN 256
+#define AVX512F_LEN_HALF 128
+#include "avx512f-vfmsubXXXpd-2.c"
+
+#undef AVX512F_LEN
+#undef AVX512F_LEN_HALF
+
+#define AVX512F_LEN 128
+#define AVX512F_LEN_HALF 128
+#include "avx512f-vfmsubXXXpd-2.c"
diff --git a/gcc/testsuite/gcc.target/i386/avx512vl-vfmsubXXXps-1.c b/gcc/testsuite/gcc.target/i386/avx512vl-vfmsubXXXps-1.c
new file mode 100644
index 00000000000..8e5ab4e380f
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/avx512vl-vfmsubXXXps-1.c
@@ -0,0 +1,27 @@
+/* { dg-do compile } */
+/* { dg-options "-mavx512vl -O2" } */
+/* { dg-final { scan-assembler-times "vfmsub...ps\[ \\t\]+\[^\n\]*%ymm\[0-9\]\{%k\[1-7\]\}\[^\{\]" 2 } } */
+/* { dg-final { scan-assembler-times "vfmsub...ps\[ \\t\]+\[^\n\]*%xmm\[0-9\]\{%k\[1-7\]\}\[^\{\]" 2 } } */
+/* { dg-final { scan-assembler-times "vfmsub231ps\[ \\t\]+\[^\n\]*%ymm\[0-9\]\{%k\[1-7\]\}\[^\{\]" 1 } } */
+/* { dg-final { scan-assembler-times "vfmsub231ps\[ \\t\]+\[^\n\]*%xmm\[0-9\]\{%k\[1-7\]\}\[^\{\]" 1 } } */
+/* { dg-final { scan-assembler-times "vfmsub...ps\[ \\t\]+\[^\n\]*%ymm\[0-9\]\{%k\[1-7\]\}\{z\}" 1 } } */
+/* { dg-final { scan-assembler-times "vfmsub...ps\[ \\t\]+\[^\n\]*%xmm\[0-9\]\{%k\[1-7\]\}\{z\}" 1 } } */
+
+#include <immintrin.h>
+
+volatile __m256 yy, y2, y3;
+volatile __m128 xx, x2, x3;
+volatile __mmask8 m;
+
+void extern
+avx512vl_test (void)
+{
+ yy = _mm256_mask_fmsub_ps (yy, m, y2, y3);
+ xx = _mm_mask_fmsub_ps (xx, m, x2, x3);
+
+ y3 = _mm256_mask3_fmsub_ps (yy, y2, y3, m);
+ x3 = _mm_mask3_fmsub_ps (xx, x2, x3, m);
+
+ yy = _mm256_maskz_fmsub_ps (m, yy, y2, y3);
+ xx = _mm_maskz_fmsub_ps (m, xx, x2, x3);
+}
diff --git a/gcc/testsuite/gcc.target/i386/avx512vl-vfmsubXXXps-2.c b/gcc/testsuite/gcc.target/i386/avx512vl-vfmsubXXXps-2.c
new file mode 100644
index 00000000000..e9059e2e97b
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/avx512vl-vfmsubXXXps-2.c
@@ -0,0 +1,14 @@
+/* { dg-do run } */
+/* { dg-options "-O2 -mavx512vl -DAVX512VL" } */
+/* { dg-require-effective-target avx512vl } */
+
+#define AVX512F_LEN 256
+#define AVX512F_LEN_HALF 128
+#include "avx512f-vfmsubXXXps-2.c"
+
+#undef AVX512F_LEN
+#undef AVX512F_LEN_HALF
+
+#define AVX512F_LEN 128
+#define AVX512F_LEN_HALF 128
+#include "avx512f-vfmsubXXXps-2.c"
diff --git a/gcc/testsuite/gcc.target/i386/avx512vl-vfmsubaddXXXpd-1.c b/gcc/testsuite/gcc.target/i386/avx512vl-vfmsubaddXXXpd-1.c
new file mode 100644
index 00000000000..7bf5324d9f5
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/avx512vl-vfmsubaddXXXpd-1.c
@@ -0,0 +1,27 @@
+/* { dg-do compile } */
+/* { dg-options "-mavx512vl -O2" } */
+/* { dg-final { scan-assembler-times "vfmsubadd...pd\[ \\t\]+\[^\n\]*%ymm\[0-9\]\{%k\[1-7\]\}\[^\{\]" 2 } } */
+/* { dg-final { scan-assembler-times "vfmsubadd...pd\[ \\t\]+\[^\n\]*%xmm\[0-9\]\{%k\[1-7\]\}\[^\{\]" 2 } } */
+/* { dg-final { scan-assembler-times "vfmsubadd231pd\[ \\t\]+\[^\n\]*%ymm\[0-9\]\{%k\[1-7\]\}\[^\{\]" 1 } } */
+/* { dg-final { scan-assembler-times "vfmsubadd231pd\[ \\t\]+\[^\n\]*%xmm\[0-9\]\{%k\[1-7\]\}\[^\{\]" 1 } } */
+/* { dg-final { scan-assembler-times "vfmsubadd...pd\[ \\t\]+\[^\n\]*%ymm\[0-9\]\{%k\[1-7\]\}\{z\}" 1 } } */
+/* { dg-final { scan-assembler-times "vfmsubadd...pd\[ \\t\]+\[^\n\]*%xmm\[0-9\]\{%k\[1-7\]\}\{z\}" 1 } } */
+
+#include <immintrin.h>
+
+volatile __m256d yy, y2, y3;
+volatile __m128d xx, x2, x3;
+volatile __mmask8 m;
+
+void extern
+avx512vl_test (void)
+{
+ yy = _mm256_mask_fmsubadd_pd (yy, m, y2, y3);
+ xx = _mm_mask_fmsubadd_pd (xx, m, x2, x3);
+
+ y3 = _mm256_mask3_fmsubadd_pd (yy, y2, y3, m);
+ x3 = _mm_mask3_fmsubadd_pd (xx, x2, x3, m);
+
+ yy = _mm256_maskz_fmsubadd_pd (m, yy, y2, y3);
+ xx = _mm_maskz_fmsubadd_pd (m, xx, x2, x3);
+}
diff --git a/gcc/testsuite/gcc.target/i386/avx512vl-vfmsubaddXXXpd-2.c b/gcc/testsuite/gcc.target/i386/avx512vl-vfmsubaddXXXpd-2.c
new file mode 100644
index 00000000000..733416c6d67
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/avx512vl-vfmsubaddXXXpd-2.c
@@ -0,0 +1,14 @@
+/* { dg-do run } */
+/* { dg-options "-O2 -mavx512vl -DAVX512VL" } */
+/* { dg-require-effective-target avx512vl } */
+
+#define AVX512F_LEN 256
+#define AVX512F_LEN_HALF 128
+#include "avx512f-vfmsubaddXXXpd-2.c"
+
+#undef AVX512F_LEN
+#undef AVX512F_LEN_HALF
+
+#define AVX512F_LEN 128
+#define AVX512F_LEN_HALF 128
+#include "avx512f-vfmsubaddXXXpd-2.c"
diff --git a/gcc/testsuite/gcc.target/i386/avx512vl-vfmsubaddXXXps-1.c b/gcc/testsuite/gcc.target/i386/avx512vl-vfmsubaddXXXps-1.c
new file mode 100644
index 00000000000..c1af7cf84e7
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/avx512vl-vfmsubaddXXXps-1.c
@@ -0,0 +1,27 @@
+/* { dg-do compile } */
+/* { dg-options "-mavx512vl -O2" } */
+/* { dg-final { scan-assembler-times "vfmsubadd...ps\[ \\t\]+\[^\n\]*%ymm\[0-9\]\{%k\[1-7\]\}\[^\{\]" 2 } } */
+/* { dg-final { scan-assembler-times "vfmsubadd...ps\[ \\t\]+\[^\n\]*%xmm\[0-9\]\{%k\[1-7\]\}\[^\{\]" 2 } } */
+/* { dg-final { scan-assembler-times "vfmsubadd231ps\[ \\t\]+\[^\n\]*%ymm\[0-9\]\{%k\[1-7\]\}\[^\{\]" 1 } } */
+/* { dg-final { scan-assembler-times "vfmsubadd231ps\[ \\t\]+\[^\n\]*%xmm\[0-9\]\{%k\[1-7\]\}\[^\{\]" 1 } } */
+/* { dg-final { scan-assembler-times "vfmsubadd...ps\[ \\t\]+\[^\n\]*%ymm\[0-9\]\{%k\[1-7\]\}\{z\}" 1 } } */
+/* { dg-final { scan-assembler-times "vfmsubadd...ps\[ \\t\]+\[^\n\]*%xmm\[0-9\]\{%k\[1-7\]\}\{z\}" 1 } } */
+
+#include <immintrin.h>
+
+volatile __m256 yy, y2, y3;
+volatile __m128 xx, x2, x3;
+volatile __mmask8 m;
+
+void extern
+avx512vl_test (void)
+{
+ yy = _mm256_mask_fmsubadd_ps (yy, m, y2, y3);
+ xx = _mm_mask_fmsubadd_ps (xx, m, x2, x3);
+
+ y3 = _mm256_mask3_fmsubadd_ps (yy, y2, y3, m);
+ x3 = _mm_mask3_fmsubadd_ps (xx, x2, x3, m);
+
+ yy = _mm256_maskz_fmsubadd_ps (m, yy, y2, y3);
+ xx = _mm_maskz_fmsubadd_ps (m, xx, x2, x3);
+}
diff --git a/gcc/testsuite/gcc.target/i386/avx512vl-vfmsubaddXXXps-2.c b/gcc/testsuite/gcc.target/i386/avx512vl-vfmsubaddXXXps-2.c
new file mode 100644
index 00000000000..eade1ca593a
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/avx512vl-vfmsubaddXXXps-2.c
@@ -0,0 +1,14 @@
+/* { dg-do run } */
+/* { dg-options "-O2 -mavx512vl -DAVX512VL" } */
+/* { dg-require-effective-target avx512vl } */
+
+#define AVX512F_LEN 256
+#define AVX512F_LEN_HALF 128
+#include "avx512f-vfmsubaddXXXps-2.c"
+
+#undef AVX512F_LEN
+#undef AVX512F_LEN_HALF
+
+#define AVX512F_LEN 128
+#define AVX512F_LEN_HALF 128
+#include "avx512f-vfmsubaddXXXps-2.c"
diff --git a/gcc/testsuite/gcc.target/i386/avx512vl-vfnmaddXXXpd-1.c b/gcc/testsuite/gcc.target/i386/avx512vl-vfnmaddXXXpd-1.c
new file mode 100644
index 00000000000..3335e64b69a
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/avx512vl-vfnmaddXXXpd-1.c
@@ -0,0 +1,27 @@
+/* { dg-do compile } */
+/* { dg-options "-mavx512vl -O2" } */
+/* { dg-final { scan-assembler-times "vfnmadd...pd\[ \\t\]+\[^\n\]*%ymm\[0-9\]\{%k\[1-7\]\}\[^\{\]" 2 } } */
+/* { dg-final { scan-assembler-times "vfnmadd...pd\[ \\t\]+\[^\n\]*%xmm\[0-9\]\{%k\[1-7\]\}\[^\{\]" 2 } } */
+/* { dg-final { scan-assembler-times "vfnmadd231pd\[ \\t\]+\[^\n\]*%ymm\[0-9\]\{%k\[1-7\]\}\[^\{\]" 1 } } */
+/* { dg-final { scan-assembler-times "vfnmadd231pd\[ \\t\]+\[^\n\]*%xmm\[0-9\]\{%k\[1-7\]\}\[^\{\]" 1 } } */
+/* { dg-final { scan-assembler-times "vfnmadd...pd\[ \\t\]+\[^\n\]*%ymm\[0-9\]\{%k\[1-7\]\}\{z\}" 1 } } */
+/* { dg-final { scan-assembler-times "vfnmadd...pd\[ \\t\]+\[^\n\]*%xmm\[0-9\]\{%k\[1-7\]\}\{z\}" 1 } } */
+
+#include <immintrin.h>
+
+volatile __m256d yy, y2, y3;
+volatile __m128d xx, x2, x3;
+volatile __mmask8 m;
+
+void extern
+avx512vl_test (void)
+{
+ yy = _mm256_mask_fnmadd_pd (yy, m, y2, y3);
+ xx = _mm_mask_fnmadd_pd (xx, m, x2, x3);
+
+ y3 = _mm256_mask3_fnmadd_pd (yy, y2, y3, m);
+ x3 = _mm_mask3_fnmadd_pd (xx, x2, x3, m);
+
+ yy = _mm256_maskz_fnmadd_pd (m, yy, y2, y3);
+ xx = _mm_maskz_fnmadd_pd (m, xx, x2, x3);
+}
diff --git a/gcc/testsuite/gcc.target/i386/avx512vl-vfnmaddXXXpd-2.c b/gcc/testsuite/gcc.target/i386/avx512vl-vfnmaddXXXpd-2.c
new file mode 100644
index 00000000000..4f37d83eede
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/avx512vl-vfnmaddXXXpd-2.c
@@ -0,0 +1,14 @@
+/* { dg-do run } */
+/* { dg-options "-O2 -mavx512vl -DAVX512VL" } */
+/* { dg-require-effective-target avx512vl } */
+
+#define AVX512F_LEN 256
+#define AVX512F_LEN_HALF 128
+#include "avx512f-vfnmaddXXXpd-2.c"
+
+#undef AVX512F_LEN
+#undef AVX512F_LEN_HALF
+
+#define AVX512F_LEN 128
+#define AVX512F_LEN_HALF 128
+#include "avx512f-vfnmaddXXXpd-2.c"
diff --git a/gcc/testsuite/gcc.target/i386/avx512vl-vfnmaddXXXps-1.c b/gcc/testsuite/gcc.target/i386/avx512vl-vfnmaddXXXps-1.c
new file mode 100644
index 00000000000..8d30de40ac4
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/avx512vl-vfnmaddXXXps-1.c
@@ -0,0 +1,27 @@
+/* { dg-do compile } */
+/* { dg-options "-mavx512vl -O2" } */
+/* { dg-final { scan-assembler-times "vfnmadd...ps\[ \\t\]+\[^\n\]*%ymm\[0-9\]\{%k\[1-7\]\}\[^\{\]" 2 } } */
+/* { dg-final { scan-assembler-times "vfnmadd...ps\[ \\t\]+\[^\n\]*%xmm\[0-9\]\{%k\[1-7\]\}\[^\{\]" 2 } } */
+/* { dg-final { scan-assembler-times "vfnmadd231ps\[ \\t\]+\[^\n\]*%ymm\[0-9\]\{%k\[1-7\]\}\[^\{\]" 1 } } */
+/* { dg-final { scan-assembler-times "vfnmadd231ps\[ \\t\]+\[^\n\]*%xmm\[0-9\]\{%k\[1-7\]\}\[^\{\]" 1 } } */
+/* { dg-final { scan-assembler-times "vfnmadd...ps\[ \\t\]+\[^\n\]*%ymm\[0-9\]\{%k\[1-7\]\}\{z\}" 1 } } */
+/* { dg-final { scan-assembler-times "vfnmadd...ps\[ \\t\]+\[^\n\]*%xmm\[0-9\]\{%k\[1-7\]\}\{z\}" 1 } } */
+
+#include <immintrin.h>
+
+volatile __m256 yy, y2, y3;
+volatile __m128 xx, x2, x3;
+volatile __mmask8 m;
+
+void extern
+avx512vl_test (void)
+{
+ yy = _mm256_mask_fnmadd_ps (yy, m, y2, y3);
+ xx = _mm_mask_fnmadd_ps (xx, m, x2, x3);
+
+ y3 = _mm256_mask3_fnmadd_ps (yy, y2, y3, m);
+ x3 = _mm_mask3_fnmadd_ps (xx, x2, x3, m);
+
+ yy = _mm256_maskz_fnmadd_ps (m, yy, y2, y3);
+ xx = _mm_maskz_fnmadd_ps (m, xx, x2, x3);
+}
diff --git a/gcc/testsuite/gcc.target/i386/avx512vl-vfnmaddXXXps-2.c b/gcc/testsuite/gcc.target/i386/avx512vl-vfnmaddXXXps-2.c
new file mode 100644
index 00000000000..72b722d1959
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/avx512vl-vfnmaddXXXps-2.c
@@ -0,0 +1,14 @@
+/* { dg-do run } */
+/* { dg-options "-O2 -mavx512vl -DAVX512VL" } */
+/* { dg-require-effective-target avx512vl } */
+
+#define AVX512F_LEN 256
+#define AVX512F_LEN_HALF 128
+#include "avx512f-vfnmaddXXXps-2.c"
+
+#undef AVX512F_LEN
+#undef AVX512F_LEN_HALF
+
+#define AVX512F_LEN 128
+#define AVX512F_LEN_HALF 128
+#include "avx512f-vfnmaddXXXps-2.c"
diff --git a/gcc/testsuite/gcc.target/i386/avx512vl-vfnmsubXXXpd-1.c b/gcc/testsuite/gcc.target/i386/avx512vl-vfnmsubXXXpd-1.c
new file mode 100644
index 00000000000..50ba7be10e6
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/avx512vl-vfnmsubXXXpd-1.c
@@ -0,0 +1,27 @@
+/* { dg-do compile } */
+/* { dg-options "-mavx512vl -O2" } */
+/* { dg-final { scan-assembler-times "vfnmsub...pd\[ \\t\]+\[^\n\]*%ymm\[0-9\]\{%k\[1-7\]\}\[^\{\]" 2 } } */
+/* { dg-final { scan-assembler-times "vfnmsub...pd\[ \\t\]+\[^\n\]*%xmm\[0-9\]\{%k\[1-7\]\}\[^\{\]" 2 } } */
+/* { dg-final { scan-assembler-times "vfnmsub231pd\[ \\t\]+\[^\n\]*%ymm\[0-9\]\{%k\[1-7\]\}\[^\{\]" 1 } } */
+/* { dg-final { scan-assembler-times "vfnmsub231pd\[ \\t\]+\[^\n\]*%xmm\[0-9\]\{%k\[1-7\]\}\[^\{\]" 1 } } */
+/* { dg-final { scan-assembler-times "vfnmsub...pd\[ \\t\]+\[^\n\]*%ymm\[0-9\]\{%k\[1-7\]\}\{z\}" 1 } } */
+/* { dg-final { scan-assembler-times "vfnmsub...pd\[ \\t\]+\[^\n\]*%xmm\[0-9\]\{%k\[1-7\]\}\{z\}" 1 } } */
+
+#include <immintrin.h>
+
+volatile __m256d yy, y2, y3;
+volatile __m128d xx, x2, x3;
+volatile __mmask8 m;
+
+void extern
+avx512vl_test (void)
+{
+ yy = _mm256_mask_fnmsub_pd (yy, m, y2, y3);
+ xx = _mm_mask_fnmsub_pd (xx, m, x2, x3);
+
+ y3 = _mm256_mask3_fnmsub_pd (yy, y2, y3, m);
+ x3 = _mm_mask3_fnmsub_pd (xx, x2, x3, m);
+
+ yy = _mm256_maskz_fnmsub_pd (m, yy, y2, y3);
+ xx = _mm_maskz_fnmsub_pd (m, xx, x2, x3);
+}
diff --git a/gcc/testsuite/gcc.target/i386/avx512vl-vfnmsubXXXpd-2.c b/gcc/testsuite/gcc.target/i386/avx512vl-vfnmsubXXXpd-2.c
new file mode 100644
index 00000000000..f2369677cdc
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/avx512vl-vfnmsubXXXpd-2.c
@@ -0,0 +1,14 @@
+/* { dg-do run } */
+/* { dg-options "-O2 -mavx512vl -DAVX512VL" } */
+/* { dg-require-effective-target avx512vl } */
+
+#define AVX512F_LEN 256
+#define AVX512F_LEN_HALF 128
+#include "avx512f-vfnmsubXXXpd-2.c"
+
+#undef AVX512F_LEN
+#undef AVX512F_LEN_HALF
+
+#define AVX512F_LEN 128
+#define AVX512F_LEN_HALF 128
+#include "avx512f-vfnmsubXXXpd-2.c"
diff --git a/gcc/testsuite/gcc.target/i386/avx512vl-vfnmsubXXXps-1.c b/gcc/testsuite/gcc.target/i386/avx512vl-vfnmsubXXXps-1.c
new file mode 100644
index 00000000000..396fe24db7a
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/avx512vl-vfnmsubXXXps-1.c
@@ -0,0 +1,27 @@
+/* { dg-do compile } */
+/* { dg-options "-mavx512vl -O2" } */
+/* { dg-final { scan-assembler-times "vfnmsub...ps\[ \\t\]+\[^\n\]*%ymm\[0-9\]\{%k\[1-7\]\}\[^\{\]" 2 } } */
+/* { dg-final { scan-assembler-times "vfnmsub...ps\[ \\t\]+\[^\n\]*%xmm\[0-9\]\{%k\[1-7\]\}\[^\{\]" 2 } } */
+/* { dg-final { scan-assembler-times "vfnmsub231ps\[ \\t\]+\[^\n\]*%ymm\[0-9\]\{%k\[1-7\]\}\[^\{\]" 1 } } */
+/* { dg-final { scan-assembler-times "vfnmsub231ps\[ \\t\]+\[^\n\]*%xmm\[0-9\]\{%k\[1-7\]\}\[^\{\]" 1 } } */
+/* { dg-final { scan-assembler-times "vfnmsub...ps\[ \\t\]+\[^\n\]*%ymm\[0-9\]\{%k\[1-7\]\}\{z\}" 1 } } */
+/* { dg-final { scan-assembler-times "vfnmsub...ps\[ \\t\]+\[^\n\]*%xmm\[0-9\]\{%k\[1-7\]\}\{z\}" 1 } } */
+
+#include <immintrin.h>
+
+volatile __m256 yy, y2, y3;
+volatile __m128 xx, x2, x3;
+volatile __mmask8 m;
+
+void extern
+avx512vl_test (void)
+{
+ yy = _mm256_mask_fnmsub_ps (yy, m, y2, y3);
+ xx = _mm_mask_fnmsub_ps (xx, m, x2, x3);
+
+ y3 = _mm256_mask3_fnmsub_ps (yy, y2, y3, m);
+ x3 = _mm_mask3_fnmsub_ps (xx, x2, x3, m);
+
+ yy = _mm256_maskz_fnmsub_ps (m, yy, y2, y3);
+ xx = _mm_maskz_fnmsub_ps (m, xx, x2, x3);
+}
diff --git a/gcc/testsuite/gcc.target/i386/avx512vl-vfnmsubXXXps-2.c b/gcc/testsuite/gcc.target/i386/avx512vl-vfnmsubXXXps-2.c
new file mode 100644
index 00000000000..1913e446646
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/avx512vl-vfnmsubXXXps-2.c
@@ -0,0 +1,14 @@
+/* { dg-do run } */
+/* { dg-options "-O2 -mavx512vl -DAVX512VL" } */
+/* { dg-require-effective-target avx512vl } */
+
+#define AVX512F_LEN 256
+#define AVX512F_LEN_HALF 128
+#include "avx512f-vfnmsubXXXps-2.c"
+
+#undef AVX512F_LEN
+#undef AVX512F_LEN_HALF
+
+#define AVX512F_LEN 128
+#define AVX512F_LEN_HALF 128
+#include "avx512f-vfnmsubXXXps-2.c"
diff --git a/gcc/testsuite/gcc.target/i386/avx512vl-vfpclasspd-2.c b/gcc/testsuite/gcc.target/i386/avx512vl-vfpclasspd-2.c
new file mode 100644
index 00000000000..d9346c311b7
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/avx512vl-vfpclasspd-2.c
@@ -0,0 +1,14 @@
+/* { dg-do run } */
+/* { dg-options "-O2 -mavx512dq -mavx512vl -DAVX512VL" } */
+/* { dg-require-effective-target avx512vl } */
+
+#define AVX512F_LEN 256
+#define AVX512F_LEN_HALF 128
+#include "avx512dq-vfpclasspd-2.c"
+
+#undef AVX512F_LEN
+#undef AVX512F_LEN_HALF
+
+#define AVX512F_LEN 128
+#define AVX512F_LEN_HALF 128
+#include "avx512dq-vfpclasspd-2.c"
diff --git a/gcc/testsuite/gcc.target/i386/avx512vl-vfpclassps-2.c b/gcc/testsuite/gcc.target/i386/avx512vl-vfpclassps-2.c
new file mode 100644
index 00000000000..9bf59b32ada
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/avx512vl-vfpclassps-2.c
@@ -0,0 +1,14 @@
+/* { dg-do run } */
+/* { dg-options "-O2 -mavx512dq -mavx512vl -DAVX512VL" } */
+/* { dg-require-effective-target avx512vl } */
+
+#define AVX512F_LEN 256
+#define AVX512F_LEN_HALF 128
+#include "avx512dq-vfpclassps-2.c"
+
+#undef AVX512F_LEN
+#undef AVX512F_LEN_HALF
+
+#define AVX512F_LEN 128
+#define AVX512F_LEN_HALF 128
+#include "avx512dq-vfpclassps-2.c"
diff --git a/gcc/testsuite/gcc.target/i386/avx512vl-vgetexppd-1.c b/gcc/testsuite/gcc.target/i386/avx512vl-vgetexppd-1.c
new file mode 100644
index 00000000000..b0b7dcca0e4
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/avx512vl-vgetexppd-1.c
@@ -0,0 +1,25 @@
+/* { dg-do compile } */
+/* { dg-options "-mavx512vl -O2" } */
+/* { dg-final { scan-assembler-times "vgetexppd\[ \\t\]+\[^\n\]*%ymm\[0-9\]\[^\{\]" 3} } */
+/* { dg-final { scan-assembler-times "vgetexppd\[ \\t\]+\[^\n\]*%ymm\[0-9\]\{%k\[1-7\]\}\[^\{\]" 1} } */
+/* { dg-final { scan-assembler-times "vgetexppd\[ \\t\]+\[^\n\]*%ymm\[0-9\]\{%k\[1-7\]\}\{z\}" 1} } */
+/* { dg-final { scan-assembler-times "vgetexppd\[ \\t\]+\[^\n\]*%xmm\[0-9\]\[^\{\]" 3} } */
+/* { dg-final { scan-assembler-times "vgetexppd\[ \\t\]+\[^\n\]*%xmm\[0-9\]\{%k\[1-7\]\}\[^\{\]" 1} } */
+/* { dg-final { scan-assembler-times "vgetexppd\[ \\t\]+\[^\n\]*%xmm\[0-9\]\{%k\[1-7\]\}\{z\}" 1} } */
+
+#include <immintrin.h>
+
+volatile __m256d xx;
+volatile __m128d x2;
+volatile __mmask8 m8;
+
+void extern
+avx512vl_test (void)
+{
+ xx = _mm256_getexp_pd (xx);
+ xx = _mm256_mask_getexp_pd (xx, m8, xx);
+ xx = _mm256_maskz_getexp_pd (m8, xx);
+ x2 = _mm_getexp_pd (x2);
+ x2 = _mm_mask_getexp_pd (x2, m8, x2);
+ x2 = _mm_maskz_getexp_pd (m8, x2);
+}
diff --git a/gcc/testsuite/gcc.target/i386/avx512vl-vgetexppd-2.c b/gcc/testsuite/gcc.target/i386/avx512vl-vgetexppd-2.c
new file mode 100644
index 00000000000..b8ebaf521b2
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/avx512vl-vgetexppd-2.c
@@ -0,0 +1,14 @@
+/* { dg-do run } */
+/* { dg-options "-O2 -mavx512vl -DAVX512VL" } */
+/* { dg-require-effective-target avx512vl } */
+
+#define AVX512F_LEN 256
+#define AVX512F_LEN_HALF 128
+#include "avx512f-vgetexppd-2.c"
+
+#undef AVX512F_LEN
+#undef AVX512F_LEN_HALF
+
+#define AVX512F_LEN 128
+#define AVX512F_LEN_HALF 128
+#include "avx512f-vgetexppd-2.c"
diff --git a/gcc/testsuite/gcc.target/i386/avx512vl-vgetexpps-1.c b/gcc/testsuite/gcc.target/i386/avx512vl-vgetexpps-1.c
new file mode 100644
index 00000000000..4aa5bccd763
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/avx512vl-vgetexpps-1.c
@@ -0,0 +1,25 @@
+/* { dg-do compile } */
+/* { dg-options "-mavx512vl -O2" } */
+/* { dg-final { scan-assembler-times "vgetexpps\[ \\t\]+\[^\n\]*%ymm\[0-9\]\[^\{\]" 3} } */
+/* { dg-final { scan-assembler-times "vgetexpps\[ \\t\]+\[^\n\]*%ymm\[0-9\]\{%k\[1-7\]\}\[^\{\]" 1} } */
+/* { dg-final { scan-assembler-times "vgetexpps\[ \\t\]+\[^\n\]*%ymm\[0-9\]\{%k\[1-7\]\}\{z\}" 1} } */
+/* { dg-final { scan-assembler-times "vgetexpps\[ \\t\]+\[^\n\]*%xmm\[0-9\]\[^\{\]" 3} } */
+/* { dg-final { scan-assembler-times "vgetexpps\[ \\t\]+\[^\n\]*%xmm\[0-9\]\{%k\[1-7\]\}\[^\{\]" 1} } */
+/* { dg-final { scan-assembler-times "vgetexpps\[ \\t\]+\[^\n\]*%xmm\[0-9\]\{%k\[1-7\]\}\{z\}" 1} } */
+
+#include <immintrin.h>
+
+volatile __m256 xx;
+volatile __m128 x2;
+volatile __mmask8 m8;
+
+void extern
+avx512vl_test (void)
+{
+ xx = _mm256_getexp_ps (xx);
+ xx = _mm256_mask_getexp_ps (xx, m8, xx);
+ xx = _mm256_maskz_getexp_ps (m8, xx);
+ x2 = _mm_getexp_ps (x2);
+ x2 = _mm_mask_getexp_ps (x2, m8, x2);
+ x2 = _mm_maskz_getexp_ps (m8, x2);
+}
diff --git a/gcc/testsuite/gcc.target/i386/avx512vl-vgetexpps-2.c b/gcc/testsuite/gcc.target/i386/avx512vl-vgetexpps-2.c
new file mode 100644
index 00000000000..643ccae84ab
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/avx512vl-vgetexpps-2.c
@@ -0,0 +1,14 @@
+/* { dg-do run } */
+/* { dg-options "-O2 -mavx512vl -DAVX512VL" } */
+/* { dg-require-effective-target avx512vl } */
+
+#define AVX512F_LEN 256
+#define AVX512F_LEN_HALF 128
+#include "avx512f-vgetexpps-2.c"
+
+#undef AVX512F_LEN
+#undef AVX512F_LEN_HALF
+
+#define AVX512F_LEN 128
+#define AVX512F_LEN_HALF 128
+#include "avx512f-vgetexpps-2.c"
diff --git a/gcc/testsuite/gcc.target/i386/avx512vl-vgetmantpd-1.c b/gcc/testsuite/gcc.target/i386/avx512vl-vgetmantpd-1.c
new file mode 100644
index 00000000000..ad5ae3b0735
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/avx512vl-vgetmantpd-1.c
@@ -0,0 +1,29 @@
+/* { dg-do compile } */
+/* { dg-options "-O2 -mavx512vl" } */
+/* { dg-final { scan-assembler-times "vgetmantpd\[ \\t\]+\[^\n\]*%ymm\[0-9\]\[\\n\]" 1 } } */
+/* { dg-final { scan-assembler-times "vgetmantpd\[ \\t\]+\[^\n\]*%ymm\[0-9\]\{%k\[1-7\]\}\[^\{\]" 1 } } */
+/* { dg-final { scan-assembler-times "vgetmantpd\[ \\t\]+\[^\n\]*%ymm\[0-9\]\{%k\[1-7\]\}\{z\}" 1 } } */
+/* { dg-final { scan-assembler-times "vgetmantpd\[ \\t\]+\[^\n\]*%xmm\[0-9\]\[\\n\]" 1 } } */
+/* { dg-final { scan-assembler-times "vgetmantpd\[ \\t\]+\[^\n\]*%xmm\[0-9\]\{%k\[1-7\]\}\[^\{\]" 1 } } */
+/* { dg-final { scan-assembler-times "vgetmantpd\[ \\t\]+\[^\n\]*%xmm\[0-9\]\{%k\[1-7\]\}\{z\}" 1 } } */
+
+#include <immintrin.h>
+
+volatile __m256d x, y;
+volatile __m128d a, b;
+volatile __mmask8 m;
+
+void extern
+avx512vl_test (void)
+{
+ x = _mm256_getmant_pd (y, _MM_MANT_NORM_p75_1p5, _MM_MANT_SIGN_src);
+ x = _mm256_mask_getmant_pd (x, m, y, _MM_MANT_NORM_p75_1p5,
+ _MM_MANT_SIGN_src);
+ x = _mm256_maskz_getmant_pd (m, y, _MM_MANT_NORM_p75_1p5,
+ _MM_MANT_SIGN_src);
+ a = _mm_getmant_pd (b, _MM_MANT_NORM_p75_1p5, _MM_MANT_SIGN_src);
+ a = _mm_mask_getmant_pd (a, m, b, _MM_MANT_NORM_p75_1p5,
+ _MM_MANT_SIGN_src);
+ a = _mm_maskz_getmant_pd (m, b, _MM_MANT_NORM_p75_1p5,
+ _MM_MANT_SIGN_src);
+}
diff --git a/gcc/testsuite/gcc.target/i386/avx512vl-vgetmantpd-2.c b/gcc/testsuite/gcc.target/i386/avx512vl-vgetmantpd-2.c
new file mode 100644
index 00000000000..7007074ca86
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/avx512vl-vgetmantpd-2.c
@@ -0,0 +1,14 @@
+/* { dg-do run } */
+/* { dg-options "-O2 -mavx512vl -DAVX512VL" } */
+/* { dg-require-effective-target avx512vl } */
+
+#define AVX512F_LEN 256
+#define AVX512F_LEN_HALF 128
+#include "avx512f-vgetmantpd-2.c"
+
+#undef AVX512F_LEN
+#undef AVX512F_LEN_HALF
+
+#define AVX512F_LEN 128
+#define AVX512F_LEN_HALF 128
+#include "avx512f-vgetmantpd-2.c"
diff --git a/gcc/testsuite/gcc.target/i386/avx512vl-vgetmantps-1.c b/gcc/testsuite/gcc.target/i386/avx512vl-vgetmantps-1.c
new file mode 100644
index 00000000000..089293546e2
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/avx512vl-vgetmantps-1.c
@@ -0,0 +1,29 @@
+/* { dg-do compile } */
+/* { dg-options "-O2 -mavx512vl" } */
+/* { dg-final { scan-assembler-times "vgetmantps\[ \\t\]+\[^\n\]*%ymm\[0-9\]\[\\n\]" 1 } } */
+/* { dg-final { scan-assembler-times "vgetmantps\[ \\t\]+\[^\n\]*%ymm\[0-9\]\{%k\[1-7\]\}\[^\{\]" 1 } } */
+/* { dg-final { scan-assembler-times "vgetmantps\[ \\t\]+\[^\n\]*%ymm\[0-9\]\{%k\[1-7\]\}\{z\}" 1 } } */
+/* { dg-final { scan-assembler-times "vgetmantps\[ \\t\]+\[^\n\]*%xmm\[0-9\]\[\\n\]" 1 } } */
+/* { dg-final { scan-assembler-times "vgetmantps\[ \\t\]+\[^\n\]*%xmm\[0-9\]\{%k\[1-7\]\}\[^\{\]" 1 } } */
+/* { dg-final { scan-assembler-times "vgetmantps\[ \\t\]+\[^\n\]*%xmm\[0-9\]\{%k\[1-7\]\}\{z\}" 1 } } */
+
+#include <immintrin.h>
+
+volatile __m256 x, y;
+volatile __m128 a, b;
+volatile __mmask8 m;
+
+void extern
+avx512vl_test (void)
+{
+ x = _mm256_getmant_ps (y, _MM_MANT_NORM_p75_1p5, _MM_MANT_SIGN_src);
+ x = _mm256_mask_getmant_ps (x, m, y, _MM_MANT_NORM_p75_1p5,
+ _MM_MANT_SIGN_src);
+ x = _mm256_maskz_getmant_ps (m, y, _MM_MANT_NORM_p75_1p5,
+ _MM_MANT_SIGN_src);
+ a = _mm_getmant_ps (b, _MM_MANT_NORM_p75_1p5, _MM_MANT_SIGN_src);
+ a = _mm_mask_getmant_ps (a, m, b, _MM_MANT_NORM_p75_1p5,
+ _MM_MANT_SIGN_src);
+ a = _mm_maskz_getmant_ps (m, b, _MM_MANT_NORM_p75_1p5,
+ _MM_MANT_SIGN_src);
+}
diff --git a/gcc/testsuite/gcc.target/i386/avx512vl-vgetmantps-2.c b/gcc/testsuite/gcc.target/i386/avx512vl-vgetmantps-2.c
new file mode 100644
index 00000000000..b76ca953379
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/avx512vl-vgetmantps-2.c
@@ -0,0 +1,14 @@
+/* { dg-do run } */
+/* { dg-options "-O2 -mavx512vl -DAVX512VL" } */
+/* { dg-require-effective-target avx512vl } */
+
+#define AVX512F_LEN 256
+#define AVX512F_LEN_HALF 128
+#include "avx512f-vgetmantps-2.c"
+
+#undef AVX512F_LEN
+#undef AVX512F_LEN_HALF
+
+#define AVX512F_LEN 128
+#define AVX512F_LEN_HALF 128
+#include "avx512f-vgetmantps-2.c"
diff --git a/gcc/testsuite/gcc.target/i386/avx512vl-vinsertf32x4-1.c b/gcc/testsuite/gcc.target/i386/avx512vl-vinsertf32x4-1.c
new file mode 100644
index 00000000000..c0addd817d1
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/avx512vl-vinsertf32x4-1.c
@@ -0,0 +1,18 @@
+/* { dg-do compile } */
+/* { dg-options "-mavx512vl -O2" } */
+/* { dg-final { scan-assembler-times "vinsertf32x4\[^\n\]*ymm" 3 } } */
+/* { dg-final { scan-assembler-times "vinsertf32x4\[^\n\]*\{z\}" 1 } } */
+/* { dg-final { scan-assembler-times "vinsertf32x4\[^\n\]*\{%k\[1-7\]\}\[^\{\]" 1 } } */
+
+#include <immintrin.h>
+
+volatile __m256 x;
+volatile __m128 y;
+
+void extern
+avx512vl_test (void)
+{
+ x = _mm256_insertf32x4 (x, y, 1);
+ x = _mm256_mask_insertf32x4 (x, 2, x, y, 1);
+ x = _mm256_maskz_insertf32x4 (2, x, y, 1);
+}
diff --git a/gcc/testsuite/gcc.target/i386/avx512vl-vinsertf32x4-2.c b/gcc/testsuite/gcc.target/i386/avx512vl-vinsertf32x4-2.c
new file mode 100644
index 00000000000..f6f98024996
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/avx512vl-vinsertf32x4-2.c
@@ -0,0 +1,10 @@
+/* { dg-do run } */
+/* { dg-options "-O2 -mavx512vl -DAVX512VL" } */
+/* { dg-require-effective-target avx512vl } */
+
+#define AVX512F_LEN 256
+#define AVX512F_LEN_HALF 128
+#include "avx512f-vinsertf32x4-2.c"
+
+void
+test_128 () {}
diff --git a/gcc/testsuite/gcc.target/i386/avx512vl-vinsertf64x2-2.c b/gcc/testsuite/gcc.target/i386/avx512vl-vinsertf64x2-2.c
new file mode 100644
index 00000000000..6793449fdf0
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/avx512vl-vinsertf64x2-2.c
@@ -0,0 +1,10 @@
+/* { dg-do run } */
+/* { dg-options "-O2 -mavx512dq -mavx512vl -DAVX512VL" } */
+/* { dg-require-effective-target avx512vl } */
+
+#define AVX512F_LEN 256
+#define AVX512F_LEN_HALF 128
+#include "avx512dq-vinsertf64x2-2.c"
+
+void
+test_128 () {}
diff --git a/gcc/testsuite/gcc.target/i386/avx512vl-vinserti32x4-1.c b/gcc/testsuite/gcc.target/i386/avx512vl-vinserti32x4-1.c
new file mode 100644
index 00000000000..08e0897ab68
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/avx512vl-vinserti32x4-1.c
@@ -0,0 +1,18 @@
+/* { dg-do compile } */
+/* { dg-options "-mavx512vl -O2" } */
+/* { dg-final { scan-assembler-times "vinserti32x4\[^\n\]*ymm" 3 } } */
+/* { dg-final { scan-assembler-times "vinserti32x4\[^\n\]*\{z\}" 1 } } */
+/* { dg-final { scan-assembler-times "vinserti32x4\[^\n\]*\{%k\[1-7\]\}\[^\{\]" 1 } } */
+
+#include <immintrin.h>
+
+volatile __m256i x;
+volatile __m128i y;
+
+void extern
+avx512vl_test (void)
+{
+ x = _mm256_inserti32x4 (x, y, 1);
+ x = _mm256_mask_inserti32x4 (x, 2, x, y, 1);
+ x = _mm256_maskz_inserti32x4 (2, x, y, 1);
+}
diff --git a/gcc/testsuite/gcc.target/i386/avx512vl-vinserti32x4-2.c b/gcc/testsuite/gcc.target/i386/avx512vl-vinserti32x4-2.c
new file mode 100644
index 00000000000..03c29ae6e9e
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/avx512vl-vinserti32x4-2.c
@@ -0,0 +1,10 @@
+/* { dg-do run } */
+/* { dg-options "-O2 -mavx512vl -DAVX512VL" } */
+/* { dg-require-effective-target avx512vl } */
+
+#define AVX512F_LEN 256
+#define AVX512F_LEN_HALF 128
+#include "avx512f-vinserti32x4-2.c"
+
+void
+test_128 () {}
diff --git a/gcc/testsuite/gcc.target/i386/avx512vl-vinserti64x2-2.c b/gcc/testsuite/gcc.target/i386/avx512vl-vinserti64x2-2.c
new file mode 100644
index 00000000000..faefdec8259
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/avx512vl-vinserti64x2-2.c
@@ -0,0 +1,10 @@
+/* { dg-do run } */
+/* { dg-options "-O2 -mavx512dq -mavx512vl -DAVX512VL" } */
+/* { dg-require-effective-target avx512vl } */
+
+#define AVX512F_LEN 256
+#define AVX512F_LEN_HALF 128
+#include "avx512dq-vinserti64x2-2.c"
+
+void
+test_128 () {}
diff --git a/gcc/testsuite/gcc.target/i386/avx512vl-vmaxpd-1.c b/gcc/testsuite/gcc.target/i386/avx512vl-vmaxpd-1.c
new file mode 100644
index 00000000000..c90b022ecd6
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/avx512vl-vmaxpd-1.c
@@ -0,0 +1,23 @@
+/* { dg-do compile } */
+/* { dg-options "-O2 -mavx512vl" } */
+/* { dg-final { scan-assembler-times "vmaxpd\[ \\t\]+\[^\n\]*%ymm\[0-9\]\[^\{\]" 2 } } */
+/* { dg-final { scan-assembler-times "vmaxpd\[ \\t\]+\[^\n\]*%ymm\[0-9\]\{%k\[1-7\]\}\[^\{\]" 1 } } */
+/* { dg-final { scan-assembler-times "vmaxpd\[ \\t\]+\[^\n\]*%ymm\[0-9\]\{%k\[1-7\]\}\{z\}" 1 } } */
+/* { dg-final { scan-assembler-times "vmaxpd\[ \\t\]+\[^\n\]*%xmm\[0-9\]\[^\{\]" 2 } } */
+/* { dg-final { scan-assembler-times "vmaxpd\[ \\t\]+\[^\n\]*%xmm\[0-9\]\{%k\[1-7\]\}\[^\{\]" 1 } } */
+/* { dg-final { scan-assembler-times "vmaxpd\[ \\t\]+\[^\n\]*%xmm\[0-9\]\{%k\[1-7\]\}\{z\}" 1 } } */
+
+#include <immintrin.h>
+
+volatile __m256d x256;
+volatile __m128d x128;
+volatile __mmask8 m8;
+
+void extern
+avx512vl_test (void)
+{
+ x256 = _mm256_mask_max_pd (x256, m8, x256, x256);
+ x256 = _mm256_maskz_max_pd (m8, x256, x256);
+ x128 = _mm_mask_max_pd (x128, m8, x128, x128);
+ x128 = _mm_maskz_max_pd (m8, x128, x128);
+}
diff --git a/gcc/testsuite/gcc.target/i386/avx512vl-vmaxpd-2.c b/gcc/testsuite/gcc.target/i386/avx512vl-vmaxpd-2.c
new file mode 100644
index 00000000000..40c878d7f28
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/avx512vl-vmaxpd-2.c
@@ -0,0 +1,14 @@
+/* { dg-do run } */
+/* { dg-options "-O2 -mavx512vl -DAVX512VL" } */
+/* { dg-require-effective-target avx512vl } */
+
+#define AVX512F_LEN 256
+#define AVX512F_LEN_HALF 128
+#include "avx512f-vmaxpd-2.c"
+
+#undef AVX512F_LEN
+#undef AVX512F_LEN_HALF
+
+#define AVX512F_LEN 128
+#define AVX512F_LEN_HALF 128
+#include "avx512f-vmaxpd-2.c"
diff --git a/gcc/testsuite/gcc.target/i386/avx512vl-vmaxps-1.c b/gcc/testsuite/gcc.target/i386/avx512vl-vmaxps-1.c
new file mode 100644
index 00000000000..666975677d9
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/avx512vl-vmaxps-1.c
@@ -0,0 +1,23 @@
+/* { dg-do compile } */
+/* { dg-options "-O2 -mavx512vl" } */
+/* { dg-final { scan-assembler-times "vmaxps\[ \\t\]+\[^\n\]*%ymm\[0-9\]\[^\{\]" 2 } } */
+/* { dg-final { scan-assembler-times "vmaxps\[ \\t\]+\[^\n\]*%ymm\[0-9\]\{%k\[1-7\]\}\[^\{\]" 1 } } */
+/* { dg-final { scan-assembler-times "vmaxps\[ \\t\]+\[^\n\]*%ymm\[0-9\]\{%k\[1-7\]\}\{z\}" 1 } } */
+/* { dg-final { scan-assembler-times "vmaxps\[ \\t\]+\[^\n\]*%xmm\[0-9\]\[^\{\]" 2 } } */
+/* { dg-final { scan-assembler-times "vmaxps\[ \\t\]+\[^\n\]*%xmm\[0-9\]\{%k\[1-7\]\}\[^\{\]" 1 } } */
+/* { dg-final { scan-assembler-times "vmaxps\[ \\t\]+\[^\n\]*%xmm\[0-9\]\{%k\[1-7\]\}\{z\}" 1 } } */
+
+#include <immintrin.h>
+
+volatile __m256 x256;
+volatile __m128 x128;
+volatile __mmask8 m8;
+
+void extern
+avx512vl_test (void)
+{
+ x256 = _mm256_mask_max_ps (x256, m8, x256, x256);
+ x256 = _mm256_maskz_max_ps (m8, x256, x256);
+ x128 = _mm_mask_max_ps (x128, m8, x128, x128);
+ x128 = _mm_maskz_max_ps (m8, x128, x128);
+}
diff --git a/gcc/testsuite/gcc.target/i386/avx512vl-vmaxps-2.c b/gcc/testsuite/gcc.target/i386/avx512vl-vmaxps-2.c
new file mode 100644
index 00000000000..d86addd9e68
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/avx512vl-vmaxps-2.c
@@ -0,0 +1,14 @@
+/* { dg-do run } */
+/* { dg-options "-O2 -mavx512vl -DAVX512VL" } */
+/* { dg-require-effective-target avx512vl } */
+
+#define AVX512F_LEN 256
+#define AVX512F_LEN_HALF 128
+#include "avx512f-vmaxps-2.c"
+
+#undef AVX512F_LEN
+#undef AVX512F_LEN_HALF
+
+#define AVX512F_LEN 128
+#define AVX512F_LEN_HALF 128
+#include "avx512f-vmaxps-2.c"
diff --git a/gcc/testsuite/gcc.target/i386/avx512vl-vminpd-1.c b/gcc/testsuite/gcc.target/i386/avx512vl-vminpd-1.c
new file mode 100644
index 00000000000..bda66f16e63
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/avx512vl-vminpd-1.c
@@ -0,0 +1,23 @@
+/* { dg-do compile } */
+/* { dg-options "-O2 -mavx512vl" } */
+/* { dg-final { scan-assembler-times "vminpd\[ \\t\]+\[^\n\]*%ymm\[0-9\]\[^\{\]" 2 } } */
+/* { dg-final { scan-assembler-times "vminpd\[ \\t\]+\[^\n\]*%ymm\[0-9\]\{%k\[1-7\]\}\[^\{\]" 1 } } */
+/* { dg-final { scan-assembler-times "vminpd\[ \\t\]+\[^\n\]*%ymm\[0-9\]\{%k\[1-7\]\}\{z\}" 1 } } */
+/* { dg-final { scan-assembler-times "vminpd\[ \\t\]+\[^\n\]*%xmm\[0-9\]\[^\{\]" 2 } } */
+/* { dg-final { scan-assembler-times "vminpd\[ \\t\]+\[^\n\]*%xmm\[0-9\]\{%k\[1-7\]\}\[^\{\]" 1 } } */
+/* { dg-final { scan-assembler-times "vminpd\[ \\t\]+\[^\n\]*%xmm\[0-9\]\{%k\[1-7\]\}\{z\}" 1 } } */
+
+#include <immintrin.h>
+
+volatile __m256d x256;
+volatile __m128d x128;
+volatile __mmask8 m8;
+
+void extern
+avx512vl_test (void)
+{
+ x256 = _mm256_mask_min_pd (x256, m8, x256, x256);
+ x256 = _mm256_maskz_min_pd (m8, x256, x256);
+ x128 = _mm_mask_min_pd (x128, m8, x128, x128);
+ x128 = _mm_maskz_min_pd (m8, x128, x128);
+}
diff --git a/gcc/testsuite/gcc.target/i386/avx512vl-vminpd-2.c b/gcc/testsuite/gcc.target/i386/avx512vl-vminpd-2.c
new file mode 100644
index 00000000000..deea0532727
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/avx512vl-vminpd-2.c
@@ -0,0 +1,14 @@
+/* { dg-do run } */
+/* { dg-options "-O2 -mavx512vl -DAVX512VL" } */
+/* { dg-require-effective-target avx512vl } */
+
+#define AVX512F_LEN 256
+#define AVX512F_LEN_HALF 128
+#include "avx512f-vminpd-2.c"
+
+#undef AVX512F_LEN
+#undef AVX512F_LEN_HALF
+
+#define AVX512F_LEN 128
+#define AVX512F_LEN_HALF 128
+#include "avx512f-vminpd-2.c"
diff --git a/gcc/testsuite/gcc.target/i386/avx512vl-vminps-1.c b/gcc/testsuite/gcc.target/i386/avx512vl-vminps-1.c
new file mode 100644
index 00000000000..7a27b15f31c
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/avx512vl-vminps-1.c
@@ -0,0 +1,23 @@
+/* { dg-do compile } */
+/* { dg-options "-O2 -mavx512vl" } */
+/* { dg-final { scan-assembler-times "vminps\[ \\t\]+\[^\n\]*%ymm\[0-9\]\[^\{\]" 2 } } */
+/* { dg-final { scan-assembler-times "vminps\[ \\t\]+\[^\n\]*%ymm\[0-9\]\{%k\[1-7\]\}\[^\{\]" 1 } } */
+/* { dg-final { scan-assembler-times "vminps\[ \\t\]+\[^\n\]*%ymm\[0-9\]\{%k\[1-7\]\}\{z\}" 1 } } */
+/* { dg-final { scan-assembler-times "vminps\[ \\t\]+\[^\n\]*%xmm\[0-9\]\[^\{\]" 2 } } */
+/* { dg-final { scan-assembler-times "vminps\[ \\t\]+\[^\n\]*%xmm\[0-9\]\{%k\[1-7\]\}\[^\{\]" 1 } } */
+/* { dg-final { scan-assembler-times "vminps\[ \\t\]+\[^\n\]*%xmm\[0-9\]\{%k\[1-7\]\}\{z\}" 1 } } */
+
+#include <immintrin.h>
+
+volatile __m256 x256;
+volatile __m128 x128;
+volatile __mmask8 m8;
+
+void extern
+avx512vl_test (void)
+{
+ x256 = _mm256_mask_min_ps (x256, m8, x256, x256);
+ x256 = _mm256_maskz_min_ps (m8, x256, x256);
+ x128 = _mm_mask_min_ps (x128, m8, x128, x128);
+ x128 = _mm_maskz_min_ps (m8, x128, x128);
+}
diff --git a/gcc/testsuite/gcc.target/i386/avx512vl-vminps-2.c b/gcc/testsuite/gcc.target/i386/avx512vl-vminps-2.c
new file mode 100644
index 00000000000..b896a34ab5a
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/avx512vl-vminps-2.c
@@ -0,0 +1,14 @@
+/* { dg-do run } */
+/* { dg-options "-O2 -mavx512vl -DAVX512VL" } */
+/* { dg-require-effective-target avx512vl } */
+
+#define AVX512F_LEN 256
+#define AVX512F_LEN_HALF 128
+#include "avx512f-vminps-2.c"
+
+#undef AVX512F_LEN
+#undef AVX512F_LEN_HALF
+
+#define AVX512F_LEN 128
+#define AVX512F_LEN_HALF 128
+#include "avx512f-vminps-2.c"
diff --git a/gcc/testsuite/gcc.target/i386/avx512vl-vmovapd-1.c b/gcc/testsuite/gcc.target/i386/avx512vl-vmovapd-1.c
new file mode 100644
index 00000000000..8a24a366119
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/avx512vl-vmovapd-1.c
@@ -0,0 +1,38 @@
+/* { dg-do compile } */
+/* { dg-options "-mavx512vl -O2" } */
+/* { dg-final { scan-assembler-times "vmovapd\[ \\t\]+\[^\n\]*%ymm\[0-9\]\[^\n\]*%ymm\[0-9\]\{%k\[1-7\]\}\[^\{\]" 1 } } */
+/* { dg-final { scan-assembler-times "vmovapd\[ \\t\]+\[^\n\]*%xmm\[0-9\]\[^\n\]*%xmm\[0-9\]\{%k\[1-7\]\}\[^\{\]" 1 } } */
+/* { dg-final { scan-assembler-times "vmovapd\[ \\t\]+\[^\n\]*%ymm\[0-9\]\[^\n\]*%ymm\[0-9\]\{%k\[1-7\]\}\{z\}" 1 } } */
+/* { dg-final { scan-assembler-times "vmovapd\[ \\t\]+\[^\n\]*%xmm\[0-9\]\[^\n\]*%xmm\[0-9\]\{%k\[1-7\]\}\{z\}" 1 } } */
+/* { dg-final { scan-assembler-times "vmovapd\[ \\t\]+\[^\n\]*\\)\[^\n\]*%ymm\[0-9\]\{%k\[1-7\]\}\[^\{\]" 1 } } */
+/* { dg-final { scan-assembler-times "vmovapd\[ \\t\]+\[^\n\]*\\)\[^\n\]*%xmm\[0-9\]\{%k\[1-7\]\}\[^\{\]" 1 } } */
+/* { dg-final { scan-assembler-times "vmovapd\[ \\t\]+\[^\n\]*\\)\[^\n\]*%ymm\[0-9\]\{%k\[1-7\]\}\{z\}" 1 } } */
+/* { dg-final { scan-assembler-times "vmovapd\[ \\t\]+\[^\n\]*\\)\[^\n\]*%xmm\[0-9\]\{%k\[1-7\]\}\{z\}" 1 } } */
+/* { dg-final { scan-assembler-times "vmovapd\[ \\t\]+\[^\n\]*%ymm\[0-9\]\[^\n\]*\\)\{%k\[1-7\]\}\[^\{\]" 1 } } */
+/* { dg-final { scan-assembler-times "vmovapd\[ \\t\]+\[^\n\]*%xmm\[0-9\]\[^\n\]*\\)\{%k\[1-7\]\}\[^\{\]" 1 } } */
+
+#include <immintrin.h>
+
+double *p;
+volatile __m256d yy, y2;
+volatile __m128d xx, x2;
+volatile __mmask8 m;
+
+void extern
+avx512vl_test (void)
+{
+ yy = _mm256_mask_mov_pd (yy, m, y2);
+ xx = _mm_mask_mov_pd (xx, m, x2);
+
+ yy = _mm256_maskz_mov_pd (m, y2);
+ xx = _mm_maskz_mov_pd (m, x2);
+
+ yy = _mm256_mask_load_pd (yy, m, p);
+ xx = _mm_mask_load_pd (xx, m, p);
+
+ yy = _mm256_maskz_load_pd (m, p);
+ xx = _mm_maskz_load_pd (m, p);
+
+ _mm256_mask_store_pd (p, m, yy);
+ _mm_mask_store_pd (p, m, xx);
+}
diff --git a/gcc/testsuite/gcc.target/i386/avx512vl-vmovapd-2.c b/gcc/testsuite/gcc.target/i386/avx512vl-vmovapd-2.c
new file mode 100644
index 00000000000..11d7af1ac2a
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/avx512vl-vmovapd-2.c
@@ -0,0 +1,14 @@
+/* { dg-do run } */
+/* { dg-options "-O2 -mavx512vl -DAVX512VL" } */
+/* { dg-require-effective-target avx512vl } */
+
+#define AVX512F_LEN 256
+#define AVX512F_LEN_HALF 128
+#include "avx512f-vmovapd-2.c"
+
+#undef AVX512F_LEN
+#undef AVX512F_LEN_HALF
+
+#define AVX512F_LEN 128
+#define AVX512F_LEN_HALF 128
+#include "avx512f-vmovapd-2.c"
diff --git a/gcc/testsuite/gcc.target/i386/avx512vl-vmovaps-1.c b/gcc/testsuite/gcc.target/i386/avx512vl-vmovaps-1.c
new file mode 100644
index 00000000000..5104292f099
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/avx512vl-vmovaps-1.c
@@ -0,0 +1,38 @@
+/* { dg-do compile } */
+/* { dg-options "-mavx512vl -O2" } */
+/* { dg-final { scan-assembler-times "vmovaps\[ \\t\]+\[^\n\]*%ymm\[0-9\]\[^\n\]*%ymm\[0-9\]\{%k\[1-7\]\}\[^\{\]" 1 } } */
+/* { dg-final { scan-assembler-times "vmovaps\[ \\t\]+\[^\n\]*%xmm\[0-9\]\[^\n\]*%xmm\[0-9\]\{%k\[1-7\]\}\[^\{\]" 1 } } */
+/* { dg-final { scan-assembler-times "vmovaps\[ \\t\]+\[^\n\]*%ymm\[0-9\]\[^\n\]*%ymm\[0-9\]\{%k\[1-7\]\}\{z\}" 1 } } */
+/* { dg-final { scan-assembler-times "vmovaps\[ \\t\]+\[^\n\]*%xmm\[0-9\]\[^\n\]*%xmm\[0-9\]\{%k\[1-7\]\}\{z\}" 1 } } */
+/* { dg-final { scan-assembler-times "vmovaps\[ \\t\]+\[^\n\]*\\)\[^\n\]*%ymm\[0-9\]\{%k\[1-7\]\}\[^\{\]" 1 } } */
+/* { dg-final { scan-assembler-times "vmovaps\[ \\t\]+\[^\n\]*\\)\[^\n\]*%xmm\[0-9\]\{%k\[1-7\]\}\[^\{\]" 1 } } */
+/* { dg-final { scan-assembler-times "vmovaps\[ \\t\]+\[^\n\]*\\)\[^\n\]*%ymm\[0-9\]\{%k\[1-7\]\}\{z\}" 1 } } */
+/* { dg-final { scan-assembler-times "vmovaps\[ \\t\]+\[^\n\]*\\)\[^\n\]*%xmm\[0-9\]\{%k\[1-7\]\}\{z\}" 1 } } */
+/* { dg-final { scan-assembler-times "vmovaps\[ \\t\]+\[^\n\]*%ymm\[0-9\]\[^\n\]*\\)\{%k\[1-7\]\}\[^\{\]" 1 } } */
+/* { dg-final { scan-assembler-times "vmovaps\[ \\t\]+\[^\n\]*%xmm\[0-9\]\[^\n\]*\\)\{%k\[1-7\]\}\[^\{\]" 1 } } */
+
+#include <immintrin.h>
+
+float __attribute__ ((aligned (32))) *p;
+volatile __m256 yy, y2;
+volatile __m128 xx, x2;
+volatile __mmask8 m;
+
+void extern
+avx512vl_test (void)
+{
+ yy = _mm256_mask_mov_ps (yy, m, y2);
+ xx = _mm_mask_mov_ps (xx, m, x2);
+
+ yy = _mm256_maskz_mov_ps (m, y2);
+ xx = _mm_maskz_mov_ps (m, x2);
+
+ yy = _mm256_mask_load_ps (yy, m, p);
+ xx = _mm_mask_load_ps (xx, m, p);
+
+ yy = _mm256_maskz_load_ps (m, p);
+ xx = _mm_maskz_load_ps (m, p);
+
+ _mm256_mask_store_ps (p, m, yy);
+ _mm_mask_store_ps (p, m, xx);
+}
diff --git a/gcc/testsuite/gcc.target/i386/avx512vl-vmovaps-2.c b/gcc/testsuite/gcc.target/i386/avx512vl-vmovaps-2.c
new file mode 100644
index 00000000000..6d876e91f7d
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/avx512vl-vmovaps-2.c
@@ -0,0 +1,14 @@
+/* { dg-do run } */
+/* { dg-options "-O2 -mavx512vl -DAVX512VL" } */
+/* { dg-require-effective-target avx512vl } */
+
+#define AVX512F_LEN 256
+#define AVX512F_LEN_HALF 128
+#include "avx512f-vmovaps-2.c"
+
+#undef AVX512F_LEN
+#undef AVX512F_LEN_HALF
+
+#define AVX512F_LEN 128
+#define AVX512F_LEN_HALF 128
+#include "avx512f-vmovaps-2.c"
diff --git a/gcc/testsuite/gcc.target/i386/avx512vl-vmovddup-1.c b/gcc/testsuite/gcc.target/i386/avx512vl-vmovddup-1.c
new file mode 100644
index 00000000000..9dd98de51d0
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/avx512vl-vmovddup-1.c
@@ -0,0 +1,22 @@
+/* { dg-do compile } */
+/* { dg-options "-mavx512vl -O2" } */
+/* { dg-final { scan-assembler-times "vmovddup\[ \\t\]+\[^\n\]*%ymm\[0-9\]\{%k\[1-7\]\}\[^\{\]|vunpcklpd\[ \\t\]+\[^\n\]*%ymm\[0-9\]\{%k\[1-7\]\}\[^\{\]" 1 } } */
+/* { dg-final { scan-assembler-times "vmovddup\[ \\t\]+\[^\n\]*%xmm\[0-9\]\{%k\[1-7\]\}\[^\{\]|vunpcklpd\[ \\t\]+\[^\n\]*%xmm\[0-9\]\{%k\[1-7\]\}\[^\{\]" 1 } } */
+/* { dg-final { scan-assembler-times "vmovddup\[ \\t\]+\[^\n\]*%ymm\[0-9\]\{%k\[1-7\]\}\{z\}|vunpcklpd\[ \\t\]+\[^\n\]*%ymm\[0-9\]\{%k\[1-7\]\}\{z\}" 1 } } */
+/* { dg-final { scan-assembler-times "vmovddup\[ \\t\]+\[^\n\]*%xmm\[0-9\]\{%k\[1-7\]\}\{z\}|vunpcklpd\[ \\t\]+\[^\n\]*%xmm\[0-9\]\{%k\[1-7\]\}\{z\}" 1 } } */
+
+#include <immintrin.h>
+
+volatile __m256d yy, y2;
+volatile __m128d xx, x2;
+volatile __mmask8 m8;
+
+void extern
+avx512vl_test (void)
+{
+ yy = _mm256_mask_movedup_pd (yy, m8, y2);
+ yy = _mm256_maskz_movedup_pd (m8, y2);
+
+ xx = _mm_mask_movedup_pd (xx, m8, x2);
+ xx = _mm_maskz_movedup_pd (m8, x2);
+}
diff --git a/gcc/testsuite/gcc.target/i386/avx512vl-vmovddup-2.c b/gcc/testsuite/gcc.target/i386/avx512vl-vmovddup-2.c
new file mode 100644
index 00000000000..a5eaaafd00a
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/avx512vl-vmovddup-2.c
@@ -0,0 +1,14 @@
+/* { dg-do run } */
+/* { dg-options "-O2 -mavx512vl -DAVX512VL" } */
+/* { dg-require-effective-target avx512vl } */
+
+#define AVX512F_LEN 256
+#define AVX512F_LEN_HALF 128
+#include "avx512f-vmovddup-2.c"
+
+#undef AVX512F_LEN
+#undef AVX512F_LEN_HALF
+
+#define AVX512F_LEN 128
+#define AVX512F_LEN_HALF 128
+#include "avx512f-vmovddup-2.c"
diff --git a/gcc/testsuite/gcc.target/i386/avx512vl-vmovdqa32-1.c b/gcc/testsuite/gcc.target/i386/avx512vl-vmovdqa32-1.c
new file mode 100644
index 00000000000..498232723e1
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/avx512vl-vmovdqa32-1.c
@@ -0,0 +1,38 @@
+/* { dg-do compile } */
+/* { dg-options "-mavx512vl -O2" } */
+/* { dg-final { scan-assembler-times "vmovdqa32\[ \\t\]+\[^\n\]*%ymm\[0-9\]\[^\n\]*%ymm\[0-9\]\{%k\[1-7\]\}\[^\{\]" 1 } } */
+/* { dg-final { scan-assembler-times "vmovdqa32\[ \\t\]+\[^\n\]*%xmm\[0-9\]\[^\n\]*%xmm\[0-9\]\{%k\[1-7\]\}\[^\{\]" 1 } } */
+/* { dg-final { scan-assembler-times "vmovdqa32\[ \\t\]+\[^\n\]*%ymm\[0-9\]\[^\n\]*%ymm\[0-9\]\{%k\[1-7\]\}\{z\}" 1 } } */
+/* { dg-final { scan-assembler-times "vmovdqa32\[ \\t\]+\[^\n\]*%xmm\[0-9\]\[^\n\]*%xmm\[0-9\]\{%k\[1-7\]\}\{z\}" 1 } } */
+/* { dg-final { scan-assembler-times "vmovdqa32\[ \\t\]+\[^\n\]*\\)\[^\n\]*%ymm\[0-9\]\{%k\[1-7\]\}\[^\{\]" 1 } } */
+/* { dg-final { scan-assembler-times "vmovdqa32\[ \\t\]+\[^\n\]*\\)\[^\n\]*%xmm\[0-9\]\{%k\[1-7\]\}\[^\{\]" 1 } } */
+/* { dg-final { scan-assembler-times "vmovdqa32\[ \\t\]+\[^\n\]*\\)\[^\n\]*%ymm\[0-9\]\{%k\[1-7\]\}\{z\}" 1 } } */
+/* { dg-final { scan-assembler-times "vmovdqa32\[ \\t\]+\[^\n\]*\\)\[^\n\]*%xmm\[0-9\]\{%k\[1-7\]\}\{z\}" 1 } } */
+/* { dg-final { scan-assembler-times "vmovdqa32\[ \\t\]+\[^\n\]*%ymm\[0-9\]\[^\n\]*\\)\{%k\[1-7\]\}\[^\{\]" 1 } } */
+/* { dg-final { scan-assembler-times "vmovdqa32\[ \\t\]+\[^\n\]*%xmm\[0-9\]\[^\n\]*\\)\{%k\[1-7\]\}\[^\{\]" 1 } } */
+
+#include <immintrin.h>
+
+int *p;
+volatile __m256i yy, y2;
+volatile __m128i xx, x2;
+volatile __mmask8 m;
+
+void extern
+avx512vl_test (void)
+{
+ yy = _mm256_mask_mov_epi32 (yy, m, y2);
+ xx = _mm_mask_mov_epi32 (xx, m, x2);
+
+ yy = _mm256_maskz_mov_epi32 (m, y2);
+ xx = _mm_maskz_mov_epi32 (m, x2);
+
+ yy = _mm256_mask_load_epi32 (yy, m, p);
+ xx = _mm_mask_load_epi32 (xx, m, p);
+
+ yy = _mm256_maskz_load_epi32 (m, p);
+ xx = _mm_maskz_load_epi32 (m, p);
+
+ _mm256_mask_store_epi32 (p, m, yy);
+ _mm_mask_store_epi32 (p, m, xx);
+}
diff --git a/gcc/testsuite/gcc.target/i386/avx512vl-vmovdqa32-2.c b/gcc/testsuite/gcc.target/i386/avx512vl-vmovdqa32-2.c
new file mode 100644
index 00000000000..0a0c90501ee
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/avx512vl-vmovdqa32-2.c
@@ -0,0 +1,14 @@
+/* { dg-do run } */
+/* { dg-options "-O2 -mavx512vl -DAVX512VL" } */
+/* { dg-require-effective-target avx512vl } */
+
+#define AVX512F_LEN 256
+#define AVX512F_LEN_HALF 128
+#include "avx512f-vmovdqa32-2.c"
+
+#undef AVX512F_LEN
+#undef AVX512F_LEN_HALF
+
+#define AVX512F_LEN 128
+#define AVX512F_LEN_HALF 128
+#include "avx512f-vmovdqa32-2.c"
diff --git a/gcc/testsuite/gcc.target/i386/avx512vl-vmovdqa64-1.c b/gcc/testsuite/gcc.target/i386/avx512vl-vmovdqa64-1.c
new file mode 100644
index 00000000000..fb80b245b50
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/avx512vl-vmovdqa64-1.c
@@ -0,0 +1,48 @@
+/* { dg-do compile } */
+/* { dg-options "-mavx512vl -O2" } */
+/* { dg-final { scan-assembler-times "vmovdqa64\[ \\t\]+\[^\n\]*%ymm\[0-9\]\[^\n\]*%ymm\[0-9\]\{%k\[1-7\]\}\[^\{\]" 1 } } */
+/* { dg-final { scan-assembler-times "vmovdqa64\[ \\t\]+\[^\n\]*%xmm\[0-9\]\[^\n\]*%xmm\[0-9\]\{%k\[1-7\]\}\[^\{\]" 1 } } */
+/* { dg-final { scan-assembler-times "vmovdqa64\[ \\t\]+\[^\n\]*%ymm\[0-9\]\[^\n\]*%ymm\[0-9\]\{%k\[1-7\]\}\{z\}" 1 } } */
+/* { dg-final { scan-assembler-times "vmovdqa64\[ \\t\]+\[^\n\]*%xmm\[0-9\]\[^\n\]*%xmm\[0-9\]\{%k\[1-7\]\}\{z\}" 1 } } */
+/* { dg-final { scan-assembler-times "vmovdqa64\[ \\t\]+\\(\[^\n\]*%ymm\[0-9\]\[^\{\]" 1 } } */
+/* { dg-final { scan-assembler-times "vmovdqa64\[ \\t\]+\\(\[^\n\]*%xmm\[0-9\]\[^\{\]" 1 } } */
+/* { dg-final { scan-assembler-times "vmovdqa64\[ \\t\]+\[^\n\]*\\)\[^\n\]*%ymm\[0-9\]\{%k\[1-7\]\}\[^\{\]" 1 } } */
+/* { dg-final { scan-assembler-times "vmovdqa64\[ \\t\]+\[^\n\]*\\)\[^\n\]*%xmm\[0-9\]\{%k\[1-7\]\}\[^\{\]" 1 } } */
+/* { dg-final { scan-assembler-times "vmovdqa64\[ \\t\]+\[^\n\]*\\)\[^\n\]*%ymm\[0-9\]\{%k\[1-7\]\}\{z\}" 1 } } */
+/* { dg-final { scan-assembler-times "vmovdqa64\[ \\t\]+\[^\n\]*\\)\[^\n\]*%xmm\[0-9\]\{%k\[1-7\]\}\{z\}" 1 } } */
+/* { dg-final { scan-assembler-times "vmovdqa64\[ \\t\]+\[^\n\]*%ymm\[0-9\]\[^\n^x^y\]*\\(" 2 } } */
+/* { dg-final { scan-assembler-times "vmovdqa64\[ \\t\]+\[^\n\]*%xmm\[0-9\]\[^\n^x^y\]*\\(" 2 { xfail *-*-* } } } */
+/* { dg-final { scan-assembler-times "vmovdqa64\[ \\t\]+\[^\n\]*%ymm\[0-9\]\[^\n\]*\\)\{%k\[1-7\]\}\[^\{\]" 1 } } */
+/* { dg-final { scan-assembler-times "vmovdqa64\[ \\t\]+\[^\n\]*%xmm\[0-9\]\[^\n\]*\\)\{%k\[1-7\]\}\[^\{\]" 1 } } */
+
+#include <immintrin.h>
+
+long long *p;
+volatile __m256i yy, y2;
+volatile __m128i xx, x2;
+volatile __mmask8 m;
+
+void extern
+avx512vl_test (void)
+{
+ yy = _mm256_mask_mov_epi64 (yy, m, y2);
+ xx = _mm_mask_mov_epi64 (xx, m, x2);
+
+ yy = _mm256_maskz_mov_epi64 (m, y2);
+ xx = _mm_maskz_mov_epi64 (m, x2);
+
+ yy = _mm256_load_epi64 (p);
+ xx = _mm_load_epi64 (p);
+
+ yy = _mm256_mask_load_epi64 (yy, m, p);
+ xx = _mm_mask_load_epi64 (xx, m, p);
+
+ yy = _mm256_maskz_load_epi64 (m, p);
+ xx = _mm_maskz_load_epi64 (m, p);
+
+ _mm256_store_epi64 (p, yy);
+ _mm_store_epi64 (p, xx);
+
+ _mm256_mask_store_epi64 (p, m, yy);
+ _mm_mask_store_epi64 (p, m, xx);
+}
diff --git a/gcc/testsuite/gcc.target/i386/avx512vl-vmovdqa64-2.c b/gcc/testsuite/gcc.target/i386/avx512vl-vmovdqa64-2.c
new file mode 100644
index 00000000000..c404ea7e0e7
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/avx512vl-vmovdqa64-2.c
@@ -0,0 +1,14 @@
+/* { dg-do run } */
+/* { dg-options "-O2 -mavx512vl -DAVX512VL" } */
+/* { dg-require-effective-target avx512vl } */
+
+#define AVX512F_LEN 256
+#define AVX512F_LEN_HALF 128
+#include "avx512f-vmovdqa64-2.c"
+
+#undef AVX512F_LEN
+#undef AVX512F_LEN_HALF
+
+#define AVX512F_LEN 128
+#define AVX512F_LEN_HALF 128
+#include "avx512f-vmovdqa64-2.c"
diff --git a/gcc/testsuite/gcc.target/i386/avx512vl-vmovdqu16-2.c b/gcc/testsuite/gcc.target/i386/avx512vl-vmovdqu16-2.c
new file mode 100644
index 00000000000..35651e5ddd9
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/avx512vl-vmovdqu16-2.c
@@ -0,0 +1,14 @@
+/* { dg-do run } */
+/* { dg-options "-O2 -mavx512bw -mavx512vl -DAVX512VL" } */
+/* { dg-require-effective-target avx512vl } */
+
+#define AVX512F_LEN 256
+#define AVX512F_LEN_HALF 128
+#include "avx512bw-vmovdqu16-2.c"
+
+#undef AVX512F_LEN
+#undef AVX512F_LEN_HALF
+
+#define AVX512F_LEN 128
+#define AVX512F_LEN_HALF 128
+#include "avx512bw-vmovdqu16-2.c"
diff --git a/gcc/testsuite/gcc.target/i386/avx512vl-vmovdqu32-1.c b/gcc/testsuite/gcc.target/i386/avx512vl-vmovdqu32-1.c
new file mode 100644
index 00000000000..ecffbc7f4e3
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/avx512vl-vmovdqu32-1.c
@@ -0,0 +1,28 @@
+/* { dg-do compile } */
+/* { dg-options "-mavx512vl -O2" } */
+/* { dg-final { scan-assembler-times "vmovdqu32\[ \\t\]+\[^\n\]*\\)\[^\n\]*%ymm\[0-9\]\{%k\[1-7\]\}\[^\{\]" 1 } } */
+/* { dg-final { scan-assembler-times "vmovdqu32\[ \\t\]+\[^\n\]*\\)\[^\n\]*%xmm\[0-9\]\{%k\[1-7\]\}\[^\{\]" 1 } } */
+/* { dg-final { scan-assembler-times "vmovdqu32\[ \\t\]+\[^\n\]*\\)\[^\n\]*%ymm\[0-9\]\{%k\[1-7\]\}\{z\}" 1 } } */
+/* { dg-final { scan-assembler-times "vmovdqu32\[ \\t\]+\[^\n\]*\\)\[^\n\]*%xmm\[0-9\]\{%k\[1-7\]\}\{z\}" 1 } } */
+/* { dg-final { scan-assembler-times "vmovdqu32\[ \\t\]+\[^\n\]*%ymm\[0-9\]\[^\n\]*\\)\{%k\[1-7\]\}\[^\{\]" 1 } } */
+/* { dg-final { scan-assembler-times "vmovdqu32\[ \\t\]+\[^\n\]*%xmm\[0-9\]\[^\n\]*\\)\{%k\[1-7\]\}\[^\{\]" 1 } } */
+
+#include <immintrin.h>
+
+int *p;
+volatile __m256i x1;
+volatile __m128i x2;
+volatile __mmask8 m;
+
+void extern
+avx512vl_test (void)
+{
+ x1 = _mm256_mask_loadu_epi32 (x1, m, p);
+ x2 = _mm_mask_loadu_epi32 (x2, m, p);
+
+ x1 = _mm256_maskz_loadu_epi32 (m, p);
+ x2 = _mm_maskz_loadu_epi32 (m, p);
+
+ _mm256_mask_storeu_epi32 (p, m, x1);
+ _mm_mask_storeu_epi32 (p, m, x2);
+}
diff --git a/gcc/testsuite/gcc.target/i386/avx512vl-vmovdqu32-2.c b/gcc/testsuite/gcc.target/i386/avx512vl-vmovdqu32-2.c
new file mode 100644
index 00000000000..ffff00dd57e
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/avx512vl-vmovdqu32-2.c
@@ -0,0 +1,14 @@
+/* { dg-do run } */
+/* { dg-options "-O2 -mavx512vl -DAVX512VL" } */
+/* { dg-require-effective-target avx512vl } */
+
+#define AVX512F_LEN 256
+#define AVX512F_LEN_HALF 128
+#include "avx512f-vmovdqu32-2.c"
+
+#undef AVX512F_LEN
+#undef AVX512F_LEN_HALF
+
+#define AVX512F_LEN 128
+#define AVX512F_LEN_HALF 128
+#include "avx512f-vmovdqu32-2.c"
diff --git a/gcc/testsuite/gcc.target/i386/avx512vl-vmovdqu64-1.c b/gcc/testsuite/gcc.target/i386/avx512vl-vmovdqu64-1.c
new file mode 100644
index 00000000000..9e3fb5884c4
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/avx512vl-vmovdqu64-1.c
@@ -0,0 +1,28 @@
+/* { dg-do compile } */
+/* { dg-options "-mavx512vl -O2" } */
+/* { dg-final { scan-assembler-times "vmovdqu64\[ \\t\]+\[^\n\]*\\)\[^\n\]*%ymm\[0-9\]\{%k\[1-7\]\}\[^\{\]" 1 } } */
+/* { dg-final { scan-assembler-times "vmovdqu64\[ \\t\]+\[^\n\]*\\)\[^\n\]*%xmm\[0-9\]\{%k\[1-7\]\}\[^\{\]" 1 } } */
+/* { dg-final { scan-assembler-times "vmovdqu64\[ \\t\]+\[^\n\]*\\)\[^\n\]*%ymm\[0-9\]\{%k\[1-7\]\}\{z\}" 1 } } */
+/* { dg-final { scan-assembler-times "vmovdqu64\[ \\t\]+\[^\n\]*\\)\[^\n\]*%xmm\[0-9\]\{%k\[1-7\]\}\{z\}" 1 } } */
+/* { dg-final { scan-assembler-times "vmovdqu64\[ \\t\]+\[^\n\]*%ymm\[0-9\]\[^\n\]*\\)\{%k\[1-7\]\}\[^\{\]" 1 } } */
+/* { dg-final { scan-assembler-times "vmovdqu64\[ \\t\]+\[^\n\]*%xmm\[0-9\]\[^\n\]*\\)\{%k\[1-7\]\}\[^\{\]" 1 } } */
+
+#include <immintrin.h>
+
+long long *p;
+volatile __m256i x1;
+volatile __m128i x2;
+volatile __mmask8 m;
+
+void extern
+avx512vl_test (void)
+{
+ x1 = _mm256_mask_loadu_epi64 (x1, m, p);
+ x2 = _mm_mask_loadu_epi64 (x2, m, p);
+
+ x1 = _mm256_maskz_loadu_epi64 (m, p);
+ x2 = _mm_maskz_loadu_epi64 (m, p);
+
+ _mm256_mask_storeu_epi64 (p, m, x1);
+ _mm_mask_storeu_epi64 (p, m, x2);
+}
diff --git a/gcc/testsuite/gcc.target/i386/avx512vl-vmovdqu64-2.c b/gcc/testsuite/gcc.target/i386/avx512vl-vmovdqu64-2.c
new file mode 100644
index 00000000000..451edccaf55
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/avx512vl-vmovdqu64-2.c
@@ -0,0 +1,14 @@
+/* { dg-do run } */
+/* { dg-options "-O2 -mavx512vl -DAVX512VL" } */
+/* { dg-require-effective-target avx512vl } */
+
+#define AVX512F_LEN 256
+#define AVX512F_LEN_HALF 128
+#include "avx512f-vmovdqu64-2.c"
+
+#undef AVX512F_LEN
+#undef AVX512F_LEN_HALF
+
+#define AVX512F_LEN 128
+#define AVX512F_LEN_HALF 128
+#include "avx512f-vmovdqu64-2.c"
diff --git a/gcc/testsuite/gcc.target/i386/avx512vl-vmovdqu8-2.c b/gcc/testsuite/gcc.target/i386/avx512vl-vmovdqu8-2.c
new file mode 100644
index 00000000000..6d14df55dbd
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/avx512vl-vmovdqu8-2.c
@@ -0,0 +1,14 @@
+/* { dg-do run } */
+/* { dg-options "-O2 -mavx512bw -mavx512vl -DAVX512VL" } */
+/* { dg-require-effective-target avx512vl } */
+
+#define AVX512F_LEN 256
+#define AVX512F_LEN_HALF 128
+#include "avx512bw-vmovdqu8-2.c"
+
+#undef AVX512F_LEN
+#undef AVX512F_LEN_HALF
+
+#define AVX512F_LEN 128
+#define AVX512F_LEN_HALF 128
+#include "avx512bw-vmovdqu8-2.c"
diff --git a/gcc/testsuite/gcc.target/i386/avx512vl-vmovntdqa-1.c b/gcc/testsuite/gcc.target/i386/avx512vl-vmovntdqa-1.c
new file mode 100644
index 00000000000..9ac860f3c2e
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/avx512vl-vmovntdqa-1.c
@@ -0,0 +1,14 @@
+/* { dg-do compile } */
+/* { dg-options "-mavx512vl -O2" } */
+/* { dg-final { scan-assembler "vmovntdqa\[ \\t\]+\[^\n\]*%zmm\[0-9\]"} } */
+
+#include <immintrin.h>
+
+volatile __m512i x;
+__m512i *y;
+
+void extern
+avx512vl_test (void)
+{
+ x = _mm512_stream_load_si512 (y);
+}
diff --git a/gcc/testsuite/gcc.target/i386/avx512vl-vmovshdup-1.c b/gcc/testsuite/gcc.target/i386/avx512vl-vmovshdup-1.c
new file mode 100644
index 00000000000..80d88e0779f
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/avx512vl-vmovshdup-1.c
@@ -0,0 +1,22 @@
+/* { dg-do compile } */
+/* { dg-options "-mavx512vl -O2" } */
+/* { dg-final { scan-assembler-times "vmovshdup\[ \\t\]+\[^\n\]*%ymm\[0-9\]\{%k\[1-7\]\}\[^\{\]" 1 } } */
+/* { dg-final { scan-assembler-times "vmovshdup\[ \\t\]+\[^\n\]*%xmm\[0-9\]\{%k\[1-7\]\}\[^\{\]" 1 } } */
+/* { dg-final { scan-assembler-times "vmovshdup\[ \\t\]+\[^\n\]*%ymm\[0-9\]\{%k\[1-7\]\}\{z\}" 1 } } */
+/* { dg-final { scan-assembler-times "vmovshdup\[ \\t\]+\[^\n\]*%xmm\[0-9\]\{%k\[1-7\]\}\{z\}" 1 } } */
+
+#include <immintrin.h>
+
+volatile __m256 y;
+volatile __m128 x;
+volatile __mmask8 m;
+
+void extern
+avx512vl_test (void)
+{
+ y = _mm256_mask_movehdup_ps (y, m, y);
+ x = _mm_mask_movehdup_ps (x, m, x);
+
+ y = _mm256_maskz_movehdup_ps (m, y);
+ x = _mm_maskz_movehdup_ps (m, x);
+}
diff --git a/gcc/testsuite/gcc.target/i386/avx512vl-vmovshdup-2.c b/gcc/testsuite/gcc.target/i386/avx512vl-vmovshdup-2.c
new file mode 100644
index 00000000000..d7b883de824
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/avx512vl-vmovshdup-2.c
@@ -0,0 +1,14 @@
+/* { dg-do run } */
+/* { dg-options "-O2 -mavx512vl -DAVX512VL" } */
+/* { dg-require-effective-target avx512vl } */
+
+#define AVX512F_LEN 256
+#define AVX512F_LEN_HALF 128
+#include "avx512f-vmovshdup-2.c"
+
+#undef AVX512F_LEN
+#undef AVX512F_LEN_HALF
+
+#define AVX512F_LEN 128
+#define AVX512F_LEN_HALF 128
+#include "avx512f-vmovshdup-2.c"
diff --git a/gcc/testsuite/gcc.target/i386/avx512vl-vmovsldup-1.c b/gcc/testsuite/gcc.target/i386/avx512vl-vmovsldup-1.c
new file mode 100644
index 00000000000..147c9de1750
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/avx512vl-vmovsldup-1.c
@@ -0,0 +1,22 @@
+/* { dg-do compile } */
+/* { dg-options "-mavx512vl -O2" } */
+/* { dg-final { scan-assembler-times "vmovsldup\[ \\t\]+\[^\n\]*%ymm\[0-9\]\{%k\[1-7\]\}\[^\{\]" 1 } } */
+/* { dg-final { scan-assembler-times "vmovsldup\[ \\t\]+\[^\n\]*%xmm\[0-9\]\{%k\[1-7\]\}\[^\{\]" 1 } } */
+/* { dg-final { scan-assembler-times "vmovsldup\[ \\t\]+\[^\n\]*%ymm\[0-9\]\{%k\[1-7\]\}\{z\}" 1 } } */
+/* { dg-final { scan-assembler-times "vmovsldup\[ \\t\]+\[^\n\]*%xmm\[0-9\]\{%k\[1-7\]\}\{z\}" 1 } } */
+
+#include <immintrin.h>
+
+volatile __m256 y;
+volatile __m128 x;
+volatile __mmask8 m;
+
+void extern
+avx512vl_test (void)
+{
+ y = _mm256_mask_moveldup_ps (y, m, y);
+ x = _mm_mask_moveldup_ps (x, m, x);
+
+ y = _mm256_maskz_moveldup_ps (m, y);
+ x = _mm_maskz_moveldup_ps (m, x);
+}
diff --git a/gcc/testsuite/gcc.target/i386/avx512vl-vmovsldup-2.c b/gcc/testsuite/gcc.target/i386/avx512vl-vmovsldup-2.c
new file mode 100644
index 00000000000..877f1907acb
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/avx512vl-vmovsldup-2.c
@@ -0,0 +1,14 @@
+/* { dg-do run } */
+/* { dg-options "-O2 -mavx512vl -DAVX512VL" } */
+/* { dg-require-effective-target avx512vl } */
+
+#define AVX512F_LEN 256
+#define AVX512F_LEN_HALF 128
+#include "avx512f-vmovsldup-2.c"
+
+#undef AVX512F_LEN
+#undef AVX512F_LEN_HALF
+
+#define AVX512F_LEN 128
+#define AVX512F_LEN_HALF 128
+#include "avx512f-vmovsldup-2.c"
diff --git a/gcc/testsuite/gcc.target/i386/avx512vl-vmovupd-1.c b/gcc/testsuite/gcc.target/i386/avx512vl-vmovupd-1.c
new file mode 100644
index 00000000000..3bd734d2497
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/avx512vl-vmovupd-1.c
@@ -0,0 +1,28 @@
+/* { dg-do compile } */
+/* { dg-options "-mavx512vl -O2" } */
+/* { dg-final { scan-assembler-times "vmovupd\[ \\t\]+\[^\n\]*\\)\[^\n\]*%ymm\[0-9\]\{%k\[1-7\]\}\[^\{\]" 1 } } */
+/* { dg-final { scan-assembler-times "vmovupd\[ \\t\]+\[^\n\]*\\)\[^\n\]*%xmm\[0-9\]\{%k\[1-7\]\}\[^\{\]" 1 } } */
+/* { dg-final { scan-assembler-times "vmovupd\[ \\t\]+\[^\n\]*\\)\[^\n\]*%ymm\[0-9\]\{%k\[1-7\]\}\{z\}" 1 } } */
+/* { dg-final { scan-assembler-times "vmovupd\[ \\t\]+\[^\n\]*\\)\[^\n\]*%xmm\[0-9\]\{%k\[1-7\]\}\{z\}" 1 } } */
+/* { dg-final { scan-assembler-times "vmovupd\[ \\t\]+\[^\n\]*%ymm\[0-9\]\[^\n\]*\\)\{%k\[1-7\]\}\[^\{\]" 1 } } */
+/* { dg-final { scan-assembler-times "vmovupd\[ \\t\]+\[^\n\]*%xmm\[0-9\]\[^\n\]*\\)\{%k\[1-7\]\}\[^\{\]" 1 } } */
+
+#include <immintrin.h>
+
+double *p;
+volatile __m256d x1;
+volatile __m128d x2;
+volatile __mmask8 m;
+
+void extern
+avx512vl_test (void)
+{
+ x1 = _mm256_mask_loadu_pd (x1, m, p);
+ x2 = _mm_mask_loadu_pd (x2, m, p);
+
+ x1 = _mm256_maskz_loadu_pd (m, p);
+ x2 = _mm_maskz_loadu_pd (m, p);
+
+ _mm256_mask_storeu_pd (p, m, x1);
+ _mm_mask_storeu_pd (p, m, x2);
+}
diff --git a/gcc/testsuite/gcc.target/i386/avx512vl-vmovupd-2.c b/gcc/testsuite/gcc.target/i386/avx512vl-vmovupd-2.c
new file mode 100644
index 00000000000..f9ccc6a8f57
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/avx512vl-vmovupd-2.c
@@ -0,0 +1,14 @@
+/* { dg-do run } */
+/* { dg-options "-O2 -mavx512vl -DAVX512VL" } */
+/* { dg-require-effective-target avx512vl } */
+
+#define AVX512F_LEN 256
+#define AVX512F_LEN_HALF 128
+#include "avx512f-vmovupd-2.c"
+
+#undef AVX512F_LEN
+#undef AVX512F_LEN_HALF
+
+#define AVX512F_LEN 128
+#define AVX512F_LEN_HALF 128
+#include "avx512f-vmovupd-2.c"
diff --git a/gcc/testsuite/gcc.target/i386/avx512vl-vmovups-1.c b/gcc/testsuite/gcc.target/i386/avx512vl-vmovups-1.c
new file mode 100644
index 00000000000..0f48eaed0bd
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/avx512vl-vmovups-1.c
@@ -0,0 +1,28 @@
+/* { dg-do compile } */
+/* { dg-options "-mavx512vl -O2" } */
+/* { dg-final { scan-assembler-times "vmovups\[ \\t\]+\[^\n\]*\\)\[^\n\]*%ymm\[0-9\]\{%k\[1-7\]\}\[^\{\]" 1 } } */
+/* { dg-final { scan-assembler-times "vmovups\[ \\t\]+\[^\n\]*\\)\[^\n\]*%xmm\[0-9\]\{%k\[1-7\]\}\[^\{\]" 1 } } */
+/* { dg-final { scan-assembler-times "vmovups\[ \\t\]+\[^\n\]*\\)\[^\n\]*%ymm\[0-9\]\{%k\[1-7\]\}\{z\}" 1 } } */
+/* { dg-final { scan-assembler-times "vmovups\[ \\t\]+\[^\n\]*\\)\[^\n\]*%xmm\[0-9\]\{%k\[1-7\]\}\{z\}" 1 } } */
+/* { dg-final { scan-assembler-times "vmovups\[ \\t\]+\[^\n\]*%ymm\[0-9\]\[^\n\]*\\)\{%k\[1-7\]\}\[^\{\]" 1 } } */
+/* { dg-final { scan-assembler-times "vmovups\[ \\t\]+\[^\n\]*%xmm\[0-9\]\[^\n\]*\\)\{%k\[1-7\]\}\[^\{\]" 1 } } */
+
+#include <immintrin.h>
+
+float *p;
+volatile __m256 x1;
+volatile __m128 x2;
+volatile __mmask8 m;
+
+void extern
+avx512vl_test (void)
+{
+ x1 = _mm256_mask_loadu_ps (x1, m, p);
+ x2 = _mm_mask_loadu_ps (x2, m, p);
+
+ x1 = _mm256_maskz_loadu_ps (m, p);
+ x2 = _mm_maskz_loadu_ps (m, p);
+
+ _mm256_mask_storeu_ps (p, m, x1);
+ _mm_mask_storeu_ps (p, m, x2);
+}
diff --git a/gcc/testsuite/gcc.target/i386/avx512vl-vmovups-2.c b/gcc/testsuite/gcc.target/i386/avx512vl-vmovups-2.c
new file mode 100644
index 00000000000..fe49a6c8b42
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/avx512vl-vmovups-2.c
@@ -0,0 +1,14 @@
+/* { dg-do run } */
+/* { dg-options "-O2 -mavx512vl -DAVX512VL" } */
+/* { dg-require-effective-target avx512vl } */
+
+#define AVX512F_LEN 256
+#define AVX512F_LEN_HALF 128
+#include "avx512f-vmovups-2.c"
+
+#undef AVX512F_LEN
+#undef AVX512F_LEN_HALF
+
+#define AVX512F_LEN 128
+#define AVX512F_LEN_HALF 128
+#include "avx512f-vmovups-2.c"
diff --git a/gcc/testsuite/gcc.target/i386/avx512vl-vmulpd-1.c b/gcc/testsuite/gcc.target/i386/avx512vl-vmulpd-1.c
new file mode 100644
index 00000000000..7346ee88273
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/avx512vl-vmulpd-1.c
@@ -0,0 +1,23 @@
+/* { dg-do compile } */
+/* { dg-options "-O2 -mavx512vl" } */
+/* { dg-final { scan-assembler-times "vmulpd\[ \\t\]+\[^\n\]*%ymm\[0-9\]\[^\{\]" 2 } } */
+/* { dg-final { scan-assembler-times "vmulpd\[ \\t\]+\[^\n\]*%ymm\[0-9\]\{%k\[1-7\]\}\[^\{\]" 1 } } */
+/* { dg-final { scan-assembler-times "vmulpd\[ \\t\]+\[^\n\]*%ymm\[0-9\]\{%k\[1-7\]\}\{z\}" 1 } } */
+/* { dg-final { scan-assembler-times "vmulpd\[ \\t\]+\[^\n\]*%xmm\[0-9\]\[^\{\]" 2 } } */
+/* { dg-final { scan-assembler-times "vmulpd\[ \\t\]+\[^\n\]*%xmm\[0-9\]\{%k\[1-7\]\}\[^\{\]" 1 } } */
+/* { dg-final { scan-assembler-times "vmulpd\[ \\t\]+\[^\n\]*%xmm\[0-9\]\{%k\[1-7\]\}\{z\}" 1 } } */
+
+#include <immintrin.h>
+
+volatile __m256d x256;
+volatile __m128d x128;
+volatile __mmask8 m;
+
+void extern
+avx512vl_test (void)
+{
+ x256 = _mm256_mask_mul_pd (x256, m, x256, x256);
+ x256 = _mm256_maskz_mul_pd (m, x256, x256);
+ x128 = _mm_mask_mul_pd (x128, m, x128, x128);
+ x128 = _mm_maskz_mul_pd (m, x128, x128);
+}
diff --git a/gcc/testsuite/gcc.target/i386/avx512vl-vmulpd-2.c b/gcc/testsuite/gcc.target/i386/avx512vl-vmulpd-2.c
new file mode 100644
index 00000000000..ffd0862347e
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/avx512vl-vmulpd-2.c
@@ -0,0 +1,14 @@
+/* { dg-do run } */
+/* { dg-options "-O2 -mavx512vl -DAVX512VL" } */
+/* { dg-require-effective-target avx512vl } */
+
+#define AVX512F_LEN 256
+#define AVX512F_LEN_HALF 128
+#include "avx512f-vmulpd-2.c"
+
+#undef AVX512F_LEN
+#undef AVX512F_LEN_HALF
+
+#define AVX512F_LEN 128
+#define AVX512F_LEN_HALF 128
+#include "avx512f-vmulpd-2.c"
diff --git a/gcc/testsuite/gcc.target/i386/avx512vl-vmulps-1.c b/gcc/testsuite/gcc.target/i386/avx512vl-vmulps-1.c
new file mode 100644
index 00000000000..8e06e874db7
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/avx512vl-vmulps-1.c
@@ -0,0 +1,23 @@
+/* { dg-do compile } */
+/* { dg-options "-O2 -mavx512vl" } */
+/* { dg-final { scan-assembler-times "vmulps\[ \\t\]+\[^\n\]*%ymm\[0-9\]\[^\{\]" 2 } } */
+/* { dg-final { scan-assembler-times "vmulps\[ \\t\]+\[^\n\]*%ymm\[0-9\]\{%k\[1-7\]\}\[^\{\]" 1 } } */
+/* { dg-final { scan-assembler-times "vmulps\[ \\t\]+\[^\n\]*%ymm\[0-9\]\{%k\[1-7\]\}\{z\}" 1 } } */
+/* { dg-final { scan-assembler-times "vmulps\[ \\t\]+\[^\n\]*%xmm\[0-9\]\[^\{\]" 2 } } */
+/* { dg-final { scan-assembler-times "vmulps\[ \\t\]+\[^\n\]*%xmm\[0-9\]\{%k\[1-7\]\}\[^\{\]" 1 } } */
+/* { dg-final { scan-assembler-times "vmulps\[ \\t\]+\[^\n\]*%xmm\[0-9\]\{%k\[1-7\]\}\{z\}" 1 } } */
+
+#include <immintrin.h>
+
+volatile __m256 x256;
+volatile __m128 x128;
+volatile __mmask8 m;
+
+void extern
+avx512vl_test (void)
+{
+ x256 = _mm256_mask_mul_ps (x256, m, x256, x256);
+ x256 = _mm256_maskz_mul_ps (m, x256, x256);
+ x128 = _mm_mask_mul_ps (x128, m, x128, x128);
+ x128 = _mm_maskz_mul_ps (m, x128, x128);
+}
diff --git a/gcc/testsuite/gcc.target/i386/avx512vl-vmulps-2.c b/gcc/testsuite/gcc.target/i386/avx512vl-vmulps-2.c
new file mode 100644
index 00000000000..b0cf529ab6b
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/avx512vl-vmulps-2.c
@@ -0,0 +1,14 @@
+/* { dg-do run } */
+/* { dg-options "-O2 -mavx512vl -DAVX512VL" } */
+/* { dg-require-effective-target avx512vl } */
+
+#define AVX512F_LEN 256
+#define AVX512F_LEN_HALF 128
+#include "avx512f-vmulps-2.c"
+
+#undef AVX512F_LEN
+#undef AVX512F_LEN_HALF
+
+#define AVX512F_LEN 128
+#define AVX512F_LEN_HALF 128
+#include "avx512f-vmulps-2.c"
diff --git a/gcc/testsuite/gcc.target/i386/avx512vl-vorpd-2.c b/gcc/testsuite/gcc.target/i386/avx512vl-vorpd-2.c
new file mode 100644
index 00000000000..8cb5a2fdedc
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/avx512vl-vorpd-2.c
@@ -0,0 +1,14 @@
+/* { dg-do run } */
+/* { dg-options "-O2 -mavx512dq -mavx512vl -DAVX512VL" } */
+/* { dg-require-effective-target avx512vl } */
+
+#define AVX512F_LEN 256
+#define AVX512F_LEN_HALF 128
+#include "avx512dq-vorpd-2.c"
+
+#undef AVX512F_LEN
+#undef AVX512F_LEN_HALF
+
+#define AVX512F_LEN 128
+#define AVX512F_LEN_HALF 128
+#include "avx512dq-vorpd-2.c"
diff --git a/gcc/testsuite/gcc.target/i386/avx512vl-vorps-2.c b/gcc/testsuite/gcc.target/i386/avx512vl-vorps-2.c
new file mode 100644
index 00000000000..d628291cae1
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/avx512vl-vorps-2.c
@@ -0,0 +1,14 @@
+/* { dg-do run } */
+/* { dg-options "-O2 -mavx512dq -mavx512vl -DAVX512VL" } */
+/* { dg-require-effective-target avx512vl } */
+
+#define AVX512F_LEN 256
+#define AVX512F_LEN_HALF 128
+#include "avx512dq-vorps-2.c"
+
+#undef AVX512F_LEN
+#undef AVX512F_LEN_HALF
+
+#define AVX512F_LEN 128
+#define AVX512F_LEN_HALF 128
+#include "avx512dq-vorps-2.c"
diff --git a/gcc/testsuite/gcc.target/i386/avx512vl-vpabsb-2.c b/gcc/testsuite/gcc.target/i386/avx512vl-vpabsb-2.c
new file mode 100644
index 00000000000..a1535d7f9b6
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/avx512vl-vpabsb-2.c
@@ -0,0 +1,14 @@
+/* { dg-do run } */
+/* { dg-options "-O2 -mavx512bw -mavx512vl -DAVX512VL" } */
+/* { dg-require-effective-target avx512vl } */
+
+#define AVX512F_LEN 256
+#define AVX512F_LEN_HALF 128
+#include "avx512bw-vpabsb-2.c"
+
+#undef AVX512F_LEN
+#undef AVX512F_LEN_HALF
+
+#define AVX512F_LEN 128
+#define AVX512F_LEN_HALF 128
+#include "avx512bw-vpabsb-2.c"
diff --git a/gcc/testsuite/gcc.target/i386/avx512vl-vpabsd-1.c b/gcc/testsuite/gcc.target/i386/avx512vl-vpabsd-1.c
new file mode 100644
index 00000000000..c8a7a0d50a3
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/avx512vl-vpabsd-1.c
@@ -0,0 +1,20 @@
+/* { dg-do compile } */
+/* { dg-options "-mavx512vl -O2" } */
+/* { dg-final { scan-assembler-times "vpabsd\[ \\t\]+\[^\n\]*%ymm\[0-9\]\{%k\[1-7\]\}\{z\}" 1 } } */
+/* { dg-final { scan-assembler-times "vpabsd\[ \\t\]+\[^\n\]*%ymm\[0-9\]\{%k\[1-7\]\}\[^\{\]" 1 } } */
+/* { dg-final { scan-assembler-times "vpabsd\[ \\t\]+\[^\n\]*%xmm\[0-9\]\{%k\[1-7\]\}\{z\}" 1 } } */
+/* { dg-final { scan-assembler-times "vpabsd\[ \\t\]+\[^\n\]*%xmm\[0-9\]\{%k\[1-7\]\}\[^\{\]" 1 } } */
+
+#include <immintrin.h>
+
+volatile __m256i xx;
+volatile __m128i x2;
+
+void extern
+avx512vl_test (void)
+{
+ xx = _mm256_mask_abs_epi32 (xx, 2,xx);
+ xx = _mm256_maskz_abs_epi32 (2, xx);
+ x2 = _mm_mask_abs_epi32 (x2, 2, x2);
+ x2 = _mm_maskz_abs_epi32 (2, x2);
+}
diff --git a/gcc/testsuite/gcc.target/i386/avx512vl-vpabsd-2.c b/gcc/testsuite/gcc.target/i386/avx512vl-vpabsd-2.c
new file mode 100644
index 00000000000..4106fe340bd
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/avx512vl-vpabsd-2.c
@@ -0,0 +1,14 @@
+/* { dg-do run } */
+/* { dg-options "-O2 -mavx512vl -DAVX512VL" } */
+/* { dg-require-effective-target avx512vl } */
+
+#define AVX512F_LEN 256
+#define AVX512F_LEN_HALF 128
+#include "avx512f-vpabsd-2.c"
+
+#undef AVX512F_LEN
+#undef AVX512F_LEN_HALF
+
+#define AVX512F_LEN 128
+#define AVX512F_LEN_HALF 128
+#include "avx512f-vpabsd-2.c"
diff --git a/gcc/testsuite/gcc.target/i386/avx512vl-vpabsq-1.c b/gcc/testsuite/gcc.target/i386/avx512vl-vpabsq-1.c
new file mode 100644
index 00000000000..590d8ecda46
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/avx512vl-vpabsq-1.c
@@ -0,0 +1,24 @@
+/* { dg-do compile } */
+/* { dg-options "-mavx512vl -O2" } */
+/* { dg-final { scan-assembler-times "vpabsq\[ \\t\]+\[^\n\]*%ymm\[0-9\]\[^\{\]" 3 } } */
+/* { dg-final { scan-assembler-times "vpabsq\[ \\t\]+\[^\n\]*%ymm\[0-9\]\{%k\[1-7\]\}\{z\}" 1 } } */
+/* { dg-final { scan-assembler-times "vpabsq\[ \\t\]+\[^\n\]*%ymm\[0-9\]\{%k\[1-7\]\}\[^\{\]" 1 } } */
+/* { dg-final { scan-assembler-times "vpabsq\[ \\t\]+\[^\n\]*%xmm\[0-9\]\[^\{\]" 3 } } */
+/* { dg-final { scan-assembler-times "vpabsq\[ \\t\]+\[^\n\]*%xmm\[0-9\]\{%k\[1-7\]\}\{z\}" 1 } } */
+/* { dg-final { scan-assembler-times "vpabsq\[ \\t\]+\[^\n\]*%xmm\[0-9\]\{%k\[1-7\]\}\[^\{\]" 1 } } */
+
+#include <immintrin.h>
+
+volatile __m256i xx;
+volatile __m128i x2;
+
+void extern
+avx512vl_test (void)
+{
+ xx = _mm256_abs_epi64 (xx);
+ xx = _mm256_mask_abs_epi64 (xx, 2, xx);
+ xx = _mm256_maskz_abs_epi64 (2, xx);
+ x2 = _mm_abs_epi64 (x2);
+ x2 = _mm_mask_abs_epi64 (x2, 2, x2);
+ x2 = _mm_maskz_abs_epi64 (2, x2);
+}
diff --git a/gcc/testsuite/gcc.target/i386/avx512vl-vpabsq-2.c b/gcc/testsuite/gcc.target/i386/avx512vl-vpabsq-2.c
new file mode 100644
index 00000000000..4e9a54a402a
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/avx512vl-vpabsq-2.c
@@ -0,0 +1,14 @@
+/* { dg-do run } */
+/* { dg-options "-O2 -mavx512vl -DAVX512VL" } */
+/* { dg-require-effective-target avx512vl } */
+
+#define AVX512F_LEN 256
+#define AVX512F_LEN_HALF 128
+#include "avx512f-vpabsq-2.c"
+
+#undef AVX512F_LEN
+#undef AVX512F_LEN_HALF
+
+#define AVX512F_LEN 128
+#define AVX512F_LEN_HALF 128
+#include "avx512f-vpabsq-2.c"
diff --git a/gcc/testsuite/gcc.target/i386/avx512vl-vpabsw-2.c b/gcc/testsuite/gcc.target/i386/avx512vl-vpabsw-2.c
new file mode 100644
index 00000000000..a7bd947f285
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/avx512vl-vpabsw-2.c
@@ -0,0 +1,14 @@
+/* { dg-do run } */
+/* { dg-options "-O2 -mavx512bw -mavx512vl -DAVX512VL" } */
+/* { dg-require-effective-target avx512vl } */
+
+#define AVX512F_LEN 256
+#define AVX512F_LEN_HALF 128
+#include "avx512bw-vpabsw-2.c"
+
+#undef AVX512F_LEN
+#undef AVX512F_LEN_HALF
+
+#define AVX512F_LEN 128
+#define AVX512F_LEN_HALF 128
+#include "avx512bw-vpabsw-2.c"
diff --git a/gcc/testsuite/gcc.target/i386/avx512vl-vpackssdw-2.c b/gcc/testsuite/gcc.target/i386/avx512vl-vpackssdw-2.c
new file mode 100644
index 00000000000..6bad76290e5
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/avx512vl-vpackssdw-2.c
@@ -0,0 +1,14 @@
+/* { dg-do run } */
+/* { dg-options "-O2 -mavx512bw -mavx512vl -DAVX512VL" } */
+/* { dg-require-effective-target avx512vl } */
+
+#define AVX512F_LEN 256
+#define AVX512F_LEN_HALF 128
+#include "avx512bw-vpackssdw-2.c"
+
+#undef AVX512F_LEN
+#undef AVX512F_LEN_HALF
+
+#define AVX512F_LEN 128
+#define AVX512F_LEN_HALF 128
+#include "avx512bw-vpackssdw-2.c"
diff --git a/gcc/testsuite/gcc.target/i386/avx512vl-vpacksswb-2.c b/gcc/testsuite/gcc.target/i386/avx512vl-vpacksswb-2.c
new file mode 100644
index 00000000000..7c369e0b861
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/avx512vl-vpacksswb-2.c
@@ -0,0 +1,14 @@
+/* { dg-do run } */
+/* { dg-options "-O2 -mavx512bw -mavx512vl -DAVX512VL" } */
+/* { dg-require-effective-target avx512vl } */
+
+#define AVX512F_LEN 256
+#define AVX512F_LEN_HALF 128
+#include "avx512bw-vpacksswb-2.c"
+
+#undef AVX512F_LEN
+#undef AVX512F_LEN_HALF
+
+#define AVX512F_LEN 128
+#define AVX512F_LEN_HALF 128
+#include "avx512bw-vpacksswb-2.c"
diff --git a/gcc/testsuite/gcc.target/i386/avx512vl-vpackusdw-2.c b/gcc/testsuite/gcc.target/i386/avx512vl-vpackusdw-2.c
new file mode 100644
index 00000000000..253adae9b72
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/avx512vl-vpackusdw-2.c
@@ -0,0 +1,14 @@
+/* { dg-do run } */
+/* { dg-options "-O2 -mavx512bw -mavx512vl -DAVX512VL" } */
+/* { dg-require-effective-target avx512vl } */
+
+#define AVX512F_LEN 256
+#define AVX512F_LEN_HALF 128
+#include "avx512bw-vpackusdw-2.c"
+
+#undef AVX512F_LEN
+#undef AVX512F_LEN_HALF
+
+#define AVX512F_LEN 128
+#define AVX512F_LEN_HALF 128
+#include "avx512bw-vpackusdw-2.c"
diff --git a/gcc/testsuite/gcc.target/i386/avx512vl-vpackuswb-2.c b/gcc/testsuite/gcc.target/i386/avx512vl-vpackuswb-2.c
new file mode 100644
index 00000000000..a052e60e0b2
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/avx512vl-vpackuswb-2.c
@@ -0,0 +1,14 @@
+/* { dg-do run } */
+/* { dg-options "-O2 -mavx512bw -mavx512vl -DAVX512VL" } */
+/* { dg-require-effective-target avx512vl } */
+
+#define AVX512F_LEN 256
+#define AVX512F_LEN_HALF 128
+#include "avx512bw-vpackuswb-2.c"
+
+#undef AVX512F_LEN
+#undef AVX512F_LEN_HALF
+
+#define AVX512F_LEN 128
+#define AVX512F_LEN_HALF 128
+#include "avx512bw-vpackuswb-2.c"
diff --git a/gcc/testsuite/gcc.target/i386/avx512vl-vpaddb-2.c b/gcc/testsuite/gcc.target/i386/avx512vl-vpaddb-2.c
new file mode 100644
index 00000000000..07c9fd0ddb2
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/avx512vl-vpaddb-2.c
@@ -0,0 +1,14 @@
+/* { dg-do run } */
+/* { dg-options "-O2 -mavx512bw -mavx512vl -DAVX512VL" } */
+/* { dg-require-effective-target avx512vl } */
+
+#define AVX512F_LEN 256
+#define AVX512F_LEN_HALF 128
+#include "avx512bw-vpaddb-2.c"
+
+#undef AVX512F_LEN
+#undef AVX512F_LEN_HALF
+
+#define AVX512F_LEN 128
+#define AVX512F_LEN_HALF 128
+#include "avx512bw-vpaddb-2.c"
diff --git a/gcc/testsuite/gcc.target/i386/avx512vl-vpaddd-1.c b/gcc/testsuite/gcc.target/i386/avx512vl-vpaddd-1.c
new file mode 100644
index 00000000000..d468c06c191
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/avx512vl-vpaddd-1.c
@@ -0,0 +1,23 @@
+/* { dg-do compile } */
+/* { dg-options "-O2 -mavx512vl" } */
+/* { dg-final { scan-assembler-times "vpaddd\[ \\t\]+\[^\n\]*%ymm\[0-9\]\[^\{\]" 2 } } */
+/* { dg-final { scan-assembler-times "vpaddd\[ \\t\]+\[^\n\]*%ymm\[0-9\]\{%k\[1-7\]\}\[^\{\]" 1 } } */
+/* { dg-final { scan-assembler-times "vpaddd\[ \\t\]+\[^\n\]*%ymm\[0-9\]\{%k\[1-7\]\}\{z\}" 1 } } */
+/* { dg-final { scan-assembler-times "vpaddd\[ \\t\]+\[^\n\]*%xmm\[0-9\]\[^\{\]" 2 } } */
+/* { dg-final { scan-assembler-times "vpaddd\[ \\t\]+\[^\n\]*%xmm\[0-9\]\{%k\[1-7\]\}\[^\{\]" 1 } } */
+/* { dg-final { scan-assembler-times "vpaddd\[ \\t\]+\[^\n\]*%xmm\[0-9\]\{%k\[1-7\]\}\{z\}" 1 } } */
+
+#include <immintrin.h>
+
+volatile __m256i x256;
+volatile __m128i x128;
+volatile __mmask8 m8;
+
+void extern
+avx512vl_test (void)
+{
+ x256 = _mm256_mask_add_epi32 (x256, m8, x256, x256);
+ x256 = _mm256_maskz_add_epi32 (m8, x256, x256);
+ x128 = _mm_mask_add_epi32 (x128, m8, x128, x128);
+ x128 = _mm_maskz_add_epi32 (m8, x128, x128);
+}
diff --git a/gcc/testsuite/gcc.target/i386/avx512vl-vpaddd-2.c b/gcc/testsuite/gcc.target/i386/avx512vl-vpaddd-2.c
new file mode 100644
index 00000000000..3dc72ef3faf
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/avx512vl-vpaddd-2.c
@@ -0,0 +1,14 @@
+/* { dg-do run } */
+/* { dg-options "-O2 -mavx512vl -DAVX512VL" } */
+/* { dg-require-effective-target avx512vl } */
+
+#define AVX512F_LEN 256
+#define AVX512F_LEN_HALF 128
+#include "avx512f-vpaddd-2.c"
+
+#undef AVX512F_LEN
+#undef AVX512F_LEN_HALF
+
+#define AVX512F_LEN 128
+#define AVX512F_LEN_HALF 128
+#include "avx512f-vpaddd-2.c"
diff --git a/gcc/testsuite/gcc.target/i386/avx512vl-vpaddq-1.c b/gcc/testsuite/gcc.target/i386/avx512vl-vpaddq-1.c
new file mode 100644
index 00000000000..3b94fdd0b57
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/avx512vl-vpaddq-1.c
@@ -0,0 +1,23 @@
+/* { dg-do compile } */
+/* { dg-options "-O2 -mavx512vl" } */
+/* { dg-final { scan-assembler-times "vpaddq\[ \\t\]+\[^\n\]*%ymm\[0-9\]\[^\{\]" 2 } } */
+/* { dg-final { scan-assembler-times "vpaddq\[ \\t\]+\[^\n\]*%ymm\[0-9\]\{%k\[1-7\]\}\[^\{\]" 1 } } */
+/* { dg-final { scan-assembler-times "vpaddq\[ \\t\]+\[^\n\]*%ymm\[0-9\]\{%k\[1-7\]\}\{z\}" 1 } } */
+/* { dg-final { scan-assembler-times "vpaddq\[ \\t\]+\[^\n\]*%xmm\[0-9\]\[^\{\]" 2 } } */
+/* { dg-final { scan-assembler-times "vpaddq\[ \\t\]+\[^\n\]*%xmm\[0-9\]\{%k\[1-7\]\}\[^\{\]" 1 } } */
+/* { dg-final { scan-assembler-times "vpaddq\[ \\t\]+\[^\n\]*%xmm\[0-9\]\{%k\[1-7\]\}\{z\}" 1 } } */
+
+#include <immintrin.h>
+
+volatile __m256i x256;
+volatile __m128i x128;
+volatile __mmask8 m8;
+
+void extern
+avx512vl_test (void)
+{
+ x256 = _mm256_mask_add_epi64 (x256, m8, x256, x256);
+ x256 = _mm256_maskz_add_epi64 (m8, x256, x256);
+ x128 = _mm_mask_add_epi64 (x128, m8, x128, x128);
+ x128 = _mm_maskz_add_epi64 (m8, x128, x128);
+}
diff --git a/gcc/testsuite/gcc.target/i386/avx512vl-vpaddq-2.c b/gcc/testsuite/gcc.target/i386/avx512vl-vpaddq-2.c
new file mode 100644
index 00000000000..7fb63b757e2
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/avx512vl-vpaddq-2.c
@@ -0,0 +1,14 @@
+/* { dg-do run } */
+/* { dg-options "-O2 -mavx512vl -DAVX512VL" } */
+/* { dg-require-effective-target avx512vl } */
+
+#define AVX512F_LEN 256
+#define AVX512F_LEN_HALF 128
+#include "avx512f-vpaddq-2.c"
+
+#undef AVX512F_LEN
+#undef AVX512F_LEN_HALF
+
+#define AVX512F_LEN 128
+#define AVX512F_LEN_HALF 128
+#include "avx512f-vpaddq-2.c"
diff --git a/gcc/testsuite/gcc.target/i386/avx512vl-vpaddsb-2.c b/gcc/testsuite/gcc.target/i386/avx512vl-vpaddsb-2.c
new file mode 100644
index 00000000000..f17892871cf
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/avx512vl-vpaddsb-2.c
@@ -0,0 +1,14 @@
+/* { dg-do run } */
+/* { dg-options "-O2 -mavx512bw -mavx512vl -DAVX512VL" } */
+/* { dg-require-effective-target avx512vl } */
+
+#define AVX512F_LEN 256
+#define AVX512F_LEN_HALF 128
+#include "avx512bw-vpaddsb-2.c"
+
+#undef AVX512F_LEN
+#undef AVX512F_LEN_HALF
+
+#define AVX512F_LEN 128
+#define AVX512F_LEN_HALF 128
+#include "avx512bw-vpaddsb-2.c"
diff --git a/gcc/testsuite/gcc.target/i386/avx512vl-vpaddsw-2.c b/gcc/testsuite/gcc.target/i386/avx512vl-vpaddsw-2.c
new file mode 100644
index 00000000000..51b24a38dba
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/avx512vl-vpaddsw-2.c
@@ -0,0 +1,14 @@
+/* { dg-do run } */
+/* { dg-options "-O2 -mavx512bw -mavx512vl -DAVX512VL" } */
+/* { dg-require-effective-target avx512vl } */
+
+#define AVX512F_LEN 256
+#define AVX512F_LEN_HALF 128
+#include "avx512bw-vpaddsw-2.c"
+
+#undef AVX512F_LEN
+#undef AVX512F_LEN_HALF
+
+#define AVX512F_LEN 128
+#define AVX512F_LEN_HALF 128
+#include "avx512bw-vpaddsw-2.c"
diff --git a/gcc/testsuite/gcc.target/i386/avx512vl-vpaddusb-2.c b/gcc/testsuite/gcc.target/i386/avx512vl-vpaddusb-2.c
new file mode 100644
index 00000000000..122edcd9c4f
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/avx512vl-vpaddusb-2.c
@@ -0,0 +1,14 @@
+/* { dg-do run } */
+/* { dg-options "-O2 -mavx512bw -mavx512vl -DAVX512VL" } */
+/* { dg-require-effective-target avx512vl } */
+
+#define AVX512F_LEN 256
+#define AVX512F_LEN_HALF 128
+#include "avx512bw-vpaddusb-2.c"
+
+#undef AVX512F_LEN
+#undef AVX512F_LEN_HALF
+
+#define AVX512F_LEN 128
+#define AVX512F_LEN_HALF 128
+#include "avx512bw-vpaddusb-2.c"
diff --git a/gcc/testsuite/gcc.target/i386/avx512vl-vpaddusw-2.c b/gcc/testsuite/gcc.target/i386/avx512vl-vpaddusw-2.c
new file mode 100644
index 00000000000..75726328621
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/avx512vl-vpaddusw-2.c
@@ -0,0 +1,14 @@
+/* { dg-do run } */
+/* { dg-options "-O2 -mavx512bw -mavx512vl -DAVX512VL" } */
+/* { dg-require-effective-target avx512vl } */
+
+#define AVX512F_LEN 256
+#define AVX512F_LEN_HALF 128
+#include "avx512bw-vpaddusw-2.c"
+
+#undef AVX512F_LEN
+#undef AVX512F_LEN_HALF
+
+#define AVX512F_LEN 128
+#define AVX512F_LEN_HALF 128
+#include "avx512bw-vpaddusw-2.c"
diff --git a/gcc/testsuite/gcc.target/i386/avx512vl-vpaddw-2.c b/gcc/testsuite/gcc.target/i386/avx512vl-vpaddw-2.c
new file mode 100644
index 00000000000..a3a5db355b2
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/avx512vl-vpaddw-2.c
@@ -0,0 +1,14 @@
+/* { dg-do run } */
+/* { dg-options "-O2 -mavx512bw -mavx512vl -DAVX512VL" } */
+/* { dg-require-effective-target avx512vl } */
+
+#define AVX512F_LEN 256
+#define AVX512F_LEN_HALF 128
+#include "avx512bw-vpaddw-2.c"
+
+#undef AVX512F_LEN
+#undef AVX512F_LEN_HALF
+
+#define AVX512F_LEN 128
+#define AVX512F_LEN_HALF 128
+#include "avx512bw-vpaddw-2.c"
diff --git a/gcc/testsuite/gcc.target/i386/avx512vl-vpalignr-2.c b/gcc/testsuite/gcc.target/i386/avx512vl-vpalignr-2.c
new file mode 100644
index 00000000000..0d30c652622
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/avx512vl-vpalignr-2.c
@@ -0,0 +1,14 @@
+/* { dg-do run } */
+/* { dg-options "-O2 -mavx512bw -mavx512vl -DAVX512VL" } */
+/* { dg-require-effective-target avx512vl } */
+
+#define AVX512F_LEN 256
+#define AVX512F_LEN_HALF 128
+#include "avx512bw-vpalignr-2.c"
+
+#undef AVX512F_LEN
+#undef AVX512F_LEN_HALF
+
+#define AVX512F_LEN 128
+#define AVX512F_LEN_HALF 128
+#include "avx512bw-vpalignr-2.c"
diff --git a/gcc/testsuite/gcc.target/i386/avx512vl-vpandd-1.c b/gcc/testsuite/gcc.target/i386/avx512vl-vpandd-1.c
new file mode 100644
index 00000000000..23cffba81c3
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/avx512vl-vpandd-1.c
@@ -0,0 +1,24 @@
+/* { dg-do compile } */
+/* { dg-options "-mavx512vl -O2" } */
+/* { dg-final { scan-assembler-times "vpandd\[ \\t\]+\[^\n\]*%ymm\[0-9\]\[^\{\]" 2 } } */
+/* { dg-final { scan-assembler-times "vpandd\[ \\t\]+\[^\n\]*%ymm\[0-9\]\{%k\[1-7\]\}\[^\{\]" 1 } } */
+/* { dg-final { scan-assembler-times "vpandd\[ \\t\]+\[^\n\]*%ymm\[0-9\]\{%k\[1-7\]\}\{z\}" 1 } } */
+/* { dg-final { scan-assembler-times "vpandd\[ \\t\]+\[^\n\]*%xmm\[0-9\]\[^\{\]" 2 } } */
+/* { dg-final { scan-assembler-times "vpandd\[ \\t\]+\[^\n\]*%xmm\[0-9\]\{%k\[1-7\]\}\[^\{\]" 1 } } */
+/* { dg-final { scan-assembler-times "vpandd\[ \\t\]+\[^\n\]*%xmm\[0-9\]\{%k\[1-7\]\}\{z\}" 1 } } */
+
+#include <immintrin.h>
+
+volatile __m256i y;
+volatile __m128i x;
+volatile __mmask8 m;
+
+void extern
+avx512vl_test (void)
+{
+ y = _mm256_mask_and_epi32 (y, m, y, y);
+ y = _mm256_maskz_and_epi32 (m, y, y);
+
+ x = _mm_mask_and_epi32 (x, m, x, x);
+ x = _mm_maskz_and_epi32 (m, x, x);
+}
diff --git a/gcc/testsuite/gcc.target/i386/avx512vl-vpandd-2.c b/gcc/testsuite/gcc.target/i386/avx512vl-vpandd-2.c
new file mode 100644
index 00000000000..85a806b1e06
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/avx512vl-vpandd-2.c
@@ -0,0 +1,14 @@
+/* { dg-do run } */
+/* { dg-options "-O2 -mavx512vl -DAVX512VL" } */
+/* { dg-require-effective-target avx512vl } */
+
+#define AVX512F_LEN 256
+#define AVX512F_LEN_HALF 128
+#include "avx512f-vpandd-2.c"
+
+#undef AVX512F_LEN
+#undef AVX512F_LEN_HALF
+
+#define AVX512F_LEN 128
+#define AVX512F_LEN_HALF 128
+#include "avx512f-vpandd-2.c"
diff --git a/gcc/testsuite/gcc.target/i386/avx512vl-vpandnd-1.c b/gcc/testsuite/gcc.target/i386/avx512vl-vpandnd-1.c
new file mode 100644
index 00000000000..e244cd899c4
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/avx512vl-vpandnd-1.c
@@ -0,0 +1,24 @@
+/* { dg-do compile } */
+/* { dg-options "-mavx512vl -O2" } */
+/* { dg-final { scan-assembler-times "vpandnd\[ \\t\]+\[^\n\]*%ymm\[0-9\]\[^\{\]" 2 } } */
+/* { dg-final { scan-assembler-times "vpandnd\[ \\t\]+\[^\n\]*%ymm\[0-9\]\{%k\[1-7\]\}\[^\{\]" 1 } } */
+/* { dg-final { scan-assembler-times "vpandnd\[ \\t\]+\[^\n\]*%ymm\[0-9\]\{%k\[1-7\]\}\{z\}" 1 } } */
+/* { dg-final { scan-assembler-times "vpandnd\[ \\t\]+\[^\n\]*%xmm\[0-9\]\[^\{\]" 2 } } */
+/* { dg-final { scan-assembler-times "vpandnd\[ \\t\]+\[^\n\]*%xmm\[0-9\]\{%k\[1-7\]\}\[^\{\]" 1 } } */
+/* { dg-final { scan-assembler-times "vpandnd\[ \\t\]+\[^\n\]*%xmm\[0-9\]\{%k\[1-7\]\}\{z\}" 1 } } */
+
+#include <immintrin.h>
+
+volatile __m256i y;
+volatile __m128i x;
+volatile __mmask8 m;
+
+void extern
+avx512vl_test (void)
+{
+ y = _mm256_mask_andnot_epi32 (y, m, y, y);
+ y = _mm256_maskz_andnot_epi32 (m, y, y);
+
+ x = _mm_mask_andnot_epi32 (x, m, x, x);
+ x = _mm_maskz_andnot_epi32 (m, x, x);
+}
diff --git a/gcc/testsuite/gcc.target/i386/avx512vl-vpandnd-2.c b/gcc/testsuite/gcc.target/i386/avx512vl-vpandnd-2.c
new file mode 100644
index 00000000000..8805d30d2f2
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/avx512vl-vpandnd-2.c
@@ -0,0 +1,14 @@
+/* { dg-do run } */
+/* { dg-options "-O2 -mavx512vl -DAVX512VL" } */
+/* { dg-require-effective-target avx512vl } */
+
+#define AVX512F_LEN 256
+#define AVX512F_LEN_HALF 128
+#include "avx512f-vpandnd-2.c"
+
+#undef AVX512F_LEN
+#undef AVX512F_LEN_HALF
+
+#define AVX512F_LEN 128
+#define AVX512F_LEN_HALF 128
+#include "avx512f-vpandnd-2.c"
diff --git a/gcc/testsuite/gcc.target/i386/avx512vl-vpandnq-1.c b/gcc/testsuite/gcc.target/i386/avx512vl-vpandnq-1.c
new file mode 100644
index 00000000000..cc171d1c298
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/avx512vl-vpandnq-1.c
@@ -0,0 +1,24 @@
+/* { dg-do compile } */
+/* { dg-options "-mavx512vl -O2" } */
+/* { dg-final { scan-assembler-times "vpandnq\[ \\t\]+\[^\n\]*%ymm\[0-9\]\[^\{\]" 2 } } */
+/* { dg-final { scan-assembler-times "vpandnq\[ \\t\]+\[^\n\]*%ymm\[0-9\]\{%k\[1-7\]\}\[^\{\]" 1 } } */
+/* { dg-final { scan-assembler-times "vpandnq\[ \\t\]+\[^\n\]*%ymm\[0-9\]\{%k\[1-7\]\}\{z\}" 1 } } */
+/* { dg-final { scan-assembler-times "vpandnq\[ \\t\]+\[^\n\]*%xmm\[0-9\]\[^\{\]" 2 } } */
+/* { dg-final { scan-assembler-times "vpandnq\[ \\t\]+\[^\n\]*%xmm\[0-9\]\{%k\[1-7\]\}\[^\{\]" 1 } } */
+/* { dg-final { scan-assembler-times "vpandnq\[ \\t\]+\[^\n\]*%xmm\[0-9\]\{%k\[1-7\]\}\{z\}" 1 } } */
+
+#include <immintrin.h>
+
+volatile __m256i y;
+volatile __m128i x;
+volatile __mmask8 m;
+
+void extern
+avx512vl_test (void)
+{
+ y = _mm256_mask_andnot_epi64 (y, m, y, y);
+ y = _mm256_maskz_andnot_epi64 (m, y, y);
+
+ x = _mm_mask_andnot_epi64 (x, m, x, x);
+ x = _mm_maskz_andnot_epi64 (m, x, x);
+}
diff --git a/gcc/testsuite/gcc.target/i386/avx512vl-vpandnq-2.c b/gcc/testsuite/gcc.target/i386/avx512vl-vpandnq-2.c
new file mode 100644
index 00000000000..da4169b40f3
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/avx512vl-vpandnq-2.c
@@ -0,0 +1,14 @@
+/* { dg-do run } */
+/* { dg-options "-O2 -mavx512vl -DAVX512VL" } */
+/* { dg-require-effective-target avx512vl } */
+
+#define AVX512F_LEN 256
+#define AVX512F_LEN_HALF 128
+#include "avx512f-vpandnq-2.c"
+
+#undef AVX512F_LEN
+#undef AVX512F_LEN_HALF
+
+#define AVX512F_LEN 128
+#define AVX512F_LEN_HALF 128
+#include "avx512f-vpandnq-2.c"
diff --git a/gcc/testsuite/gcc.target/i386/avx512vl-vpandq-1.c b/gcc/testsuite/gcc.target/i386/avx512vl-vpandq-1.c
new file mode 100644
index 00000000000..3922c9faa7e
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/avx512vl-vpandq-1.c
@@ -0,0 +1,24 @@
+/* { dg-do compile } */
+/* { dg-options "-mavx512vl -O2" } */
+/* { dg-final { scan-assembler-times "vpandq\[ \\t\]+\[^\n\]*%ymm\[0-9\]\[^\{\]" 2 } } */
+/* { dg-final { scan-assembler-times "vpandq\[ \\t\]+\[^\n\]*%ymm\[0-9\]\{%k\[1-7\]\}\[^\{\]" 1 } } */
+/* { dg-final { scan-assembler-times "vpandq\[ \\t\]+\[^\n\]*%ymm\[0-9\]\{%k\[1-7\]\}\{z\}" 1 } } */
+/* { dg-final { scan-assembler-times "vpandq\[ \\t\]+\[^\n\]*%xmm\[0-9\]\[^\{\]" 2 } } */
+/* { dg-final { scan-assembler-times "vpandq\[ \\t\]+\[^\n\]*%xmm\[0-9\]\{%k\[1-7\]\}\[^\{\]" 1 } } */
+/* { dg-final { scan-assembler-times "vpandq\[ \\t\]+\[^\n\]*%xmm\[0-9\]\{%k\[1-7\]\}\{z\}" 1 } } */
+
+#include <immintrin.h>
+
+volatile __m256i y;
+volatile __m128i x;
+volatile __mmask8 m;
+
+void extern
+avx512vl_test (void)
+{
+ y = _mm256_mask_and_epi64 (y, m, y, y);
+ y = _mm256_maskz_and_epi64 (m, y, y);
+
+ x = _mm_mask_and_epi64 (x, m, x, x);
+ x = _mm_maskz_and_epi64 (m, x, x);
+}
diff --git a/gcc/testsuite/gcc.target/i386/avx512vl-vpandq-2.c b/gcc/testsuite/gcc.target/i386/avx512vl-vpandq-2.c
new file mode 100644
index 00000000000..3579cf45e5b
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/avx512vl-vpandq-2.c
@@ -0,0 +1,14 @@
+/* { dg-do run } */
+/* { dg-options "-O2 -mavx512vl -DAVX512VL" } */
+/* { dg-require-effective-target avx512vl } */
+
+#define AVX512F_LEN 256
+#define AVX512F_LEN_HALF 128
+#include "avx512f-vpandq-2.c"
+
+#undef AVX512F_LEN
+#undef AVX512F_LEN_HALF
+
+#define AVX512F_LEN 128
+#define AVX512F_LEN_HALF 128
+#include "avx512f-vpandq-2.c"
diff --git a/gcc/testsuite/gcc.target/i386/avx512vl-vpavgb-2.c b/gcc/testsuite/gcc.target/i386/avx512vl-vpavgb-2.c
new file mode 100644
index 00000000000..ffcff26e12c
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/avx512vl-vpavgb-2.c
@@ -0,0 +1,14 @@
+/* { dg-do run } */
+/* { dg-options "-O2 -mavx512bw -mavx512vl -DAVX512VL" } */
+/* { dg-require-effective-target avx512vl } */
+
+#define AVX512F_LEN 256
+#define AVX512F_LEN_HALF 128
+#include "avx512bw-vpavgb-2.c"
+
+#undef AVX512F_LEN
+#undef AVX512F_LEN_HALF
+
+#define AVX512F_LEN 128
+#define AVX512F_LEN_HALF 128
+#include "avx512bw-vpavgb-2.c"
diff --git a/gcc/testsuite/gcc.target/i386/avx512vl-vpavgw-2.c b/gcc/testsuite/gcc.target/i386/avx512vl-vpavgw-2.c
new file mode 100644
index 00000000000..8ce32abcb0b
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/avx512vl-vpavgw-2.c
@@ -0,0 +1,14 @@
+/* { dg-do run } */
+/* { dg-options "-O2 -mavx512bw -mavx512vl -DAVX512VL" } */
+/* { dg-require-effective-target avx512vl } */
+
+#define AVX512F_LEN 256
+#define AVX512F_LEN_HALF 128
+#include "avx512bw-vpavgw-2.c"
+
+#undef AVX512F_LEN
+#undef AVX512F_LEN_HALF
+
+#define AVX512F_LEN 128
+#define AVX512F_LEN_HALF 128
+#include "avx512bw-vpavgw-2.c"
diff --git a/gcc/testsuite/gcc.target/i386/avx512vl-vpblendmb-2.c b/gcc/testsuite/gcc.target/i386/avx512vl-vpblendmb-2.c
new file mode 100644
index 00000000000..ff339726c03
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/avx512vl-vpblendmb-2.c
@@ -0,0 +1,14 @@
+/* { dg-do run } */
+/* { dg-options "-O2 -mavx512bw -mavx512vl -DAVX512VL" } */
+/* { dg-require-effective-target avx512vl } */
+
+#define AVX512F_LEN 256
+#define AVX512F_LEN_HALF 128
+#include "avx512bw-vpblendmb-2.c"
+
+#undef AVX512F_LEN
+#undef AVX512F_LEN_HALF
+
+#define AVX512F_LEN 128
+#define AVX512F_LEN_HALF 128
+#include "avx512bw-vpblendmb-2.c"
diff --git a/gcc/testsuite/gcc.target/i386/avx512vl-vpblendmd-1.c b/gcc/testsuite/gcc.target/i386/avx512vl-vpblendmd-1.c
new file mode 100644
index 00000000000..4a62debab6c
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/avx512vl-vpblendmd-1.c
@@ -0,0 +1,17 @@
+/* { dg-do compile } */
+/* { dg-options "-mavx512vl -O2" } */
+/* { dg-final { scan-assembler "(vpblendmd|vmovdqa32)\[ \\t\]+\[^\n\]*%ymm\[0-9\]\{%k\[1-7\]\}" } } */
+/* { dg-final { scan-assembler "(vpblendmd|vmovdqa32)\[ \\t\]+\[^\n\]*%xmm\[0-9\]\{%k\[1-7\]\}" } } */
+
+#include <immintrin.h>
+
+volatile __m256i x;
+volatile __m128i xx;
+volatile __mmask8 m;
+
+void extern
+avx512vl_test (void)
+{
+ x = _mm256_mask_blend_epi32 (m, x, x);
+ xx = _mm_mask_blend_epi32 (m, xx, xx);
+}
diff --git a/gcc/testsuite/gcc.target/i386/avx512vl-vpblendmd-2.c b/gcc/testsuite/gcc.target/i386/avx512vl-vpblendmd-2.c
new file mode 100644
index 00000000000..b05227cbb8b
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/avx512vl-vpblendmd-2.c
@@ -0,0 +1,14 @@
+/* { dg-do run } */
+/* { dg-options "-O2 -mavx512vl -DAVX512VL" } */
+/* { dg-require-effective-target avx512vl } */
+
+#define AVX512F_LEN 256
+#define AVX512F_LEN_HALF 128
+#include "avx512f-vpblendmd-2.c"
+
+#undef AVX512F_LEN
+#undef AVX512F_LEN_HALF
+
+#define AVX512F_LEN 128
+#define AVX512F_LEN_HALF 128
+#include "avx512f-vpblendmd-2.c"
diff --git a/gcc/testsuite/gcc.target/i386/avx512vl-vpblendmq-1.c b/gcc/testsuite/gcc.target/i386/avx512vl-vpblendmq-1.c
new file mode 100644
index 00000000000..dc23d17aa55
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/avx512vl-vpblendmq-1.c
@@ -0,0 +1,17 @@
+/* { dg-do compile } */
+/* { dg-options "-mavx512vl -O2" } */
+/* { dg-final { scan-assembler "(vpblendmq|vmovdqa64)\[ \\t\]+\[^\n\]*%ymm\[0-9\]\{%k\[1-7\]\}" } } */
+/* { dg-final { scan-assembler "(vpblendmq|vmovdqa64)\[ \\t\]+\[^\n\]*%xmm\[0-9\]\{%k\[1-7\]\}" } } */
+
+#include <immintrin.h>
+
+volatile __m256i x;
+volatile __m128i xx;
+volatile __mmask8 m;
+
+void extern
+avx512vl_test (void)
+{
+ x = _mm256_mask_blend_epi64 (m, x, x);
+ xx = _mm_mask_blend_epi64 (m, xx, xx);
+}
diff --git a/gcc/testsuite/gcc.target/i386/avx512vl-vpblendmq-2.c b/gcc/testsuite/gcc.target/i386/avx512vl-vpblendmq-2.c
new file mode 100644
index 00000000000..2b15de68a41
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/avx512vl-vpblendmq-2.c
@@ -0,0 +1,14 @@
+/* { dg-do run } */
+/* { dg-options "-O2 -mavx512vl -DAVX512VL" } */
+/* { dg-require-effective-target avx512vl } */
+
+#define AVX512F_LEN 256
+#define AVX512F_LEN_HALF 128
+#include "avx512f-vpblendmq-2.c"
+
+#undef AVX512F_LEN
+#undef AVX512F_LEN_HALF
+
+#define AVX512F_LEN 128
+#define AVX512F_LEN_HALF 128
+#include "avx512f-vpblendmq-2.c"
diff --git a/gcc/testsuite/gcc.target/i386/avx512vl-vpblendmw-2.c b/gcc/testsuite/gcc.target/i386/avx512vl-vpblendmw-2.c
new file mode 100644
index 00000000000..74c59aab49a
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/avx512vl-vpblendmw-2.c
@@ -0,0 +1,14 @@
+/* { dg-do run } */
+/* { dg-options "-O2 -mavx512bw -mavx512vl -DAVX512VL" } */
+/* { dg-require-effective-target avx512vl } */
+
+#define AVX512F_LEN 256
+#define AVX512F_LEN_HALF 128
+#include "avx512bw-vpblendmw-2.c"
+
+#undef AVX512F_LEN
+#undef AVX512F_LEN_HALF
+
+#define AVX512F_LEN 128
+#define AVX512F_LEN_HALF 128
+#include "avx512bw-vpblendmw-2.c"
diff --git a/gcc/testsuite/gcc.target/i386/avx512vl-vpbroadcastb-2.c b/gcc/testsuite/gcc.target/i386/avx512vl-vpbroadcastb-2.c
new file mode 100644
index 00000000000..5f5575ac266
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/avx512vl-vpbroadcastb-2.c
@@ -0,0 +1,14 @@
+/* { dg-do run } */
+/* { dg-options "-O2 -mavx512bw -mavx512vl -DAVX512VL" } */
+/* { dg-require-effective-target avx512vl } */
+
+#define AVX512F_LEN 256
+#define AVX512F_LEN_HALF 128
+#include "avx512bw-vpbroadcastb-2.c"
+
+#undef AVX512F_LEN
+#undef AVX512F_LEN_HALF
+
+#define AVX512F_LEN 128
+#define AVX512F_LEN_HALF 128
+#include "avx512bw-vpbroadcastb-2.c"
diff --git a/gcc/testsuite/gcc.target/i386/avx512vl-vpbroadcastd-1.c b/gcc/testsuite/gcc.target/i386/avx512vl-vpbroadcastd-1.c
new file mode 100644
index 00000000000..c665163d005
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/avx512vl-vpbroadcastd-1.c
@@ -0,0 +1,31 @@
+/* { dg-do compile } */
+/* { dg-options "-mavx512vl -O2" } */
+/* { dg-final { scan-assembler-times "vpbroadcastd\[ \\t\]+\[^\n\]*%ymm\[0-9\]\{%k\[1-7\]\}\[^\{\]" 2 } } */
+/* { dg-final { scan-assembler-times "vpbroadcastd\[ \\t\]+\[^\n\]*%ymm\[0-9\]\{%k\[1-7\]\}\{z\}" 2 } } */
+/* { dg-final { scan-assembler-times "vpbroadcastd\[ \\t\]+\[^\n\]*%xmm\[0-9\]\{%k\[1-7\]\}\[^\{\]" 2 } } */
+/* { dg-final { scan-assembler-times "vpbroadcastd\[ \\t\]+\[^\n\]*%xmm\[0-9\]\{%k\[1-7\]\}\{z\}" 2 } } */
+/* { dg-final { scan-assembler-times "vpbroadcastd\[ \\t\]+%e\[^\n\]*%ymm\[0-9\]\{%k\[1-7\]\}\[^\{\]" 1 { target { ! { ia32 } } } } } */
+/* { dg-final { scan-assembler-times "vpbroadcastd\[ \\t\]+%e\[^\n\]*%ymm\[0-9\]\{%k\[1-7\]\}\{z\}" 1 { target { ! { ia32 } } } } } */
+/* { dg-final { scan-assembler-times "vpbroadcastd\[ \\t\]+%e\[^\n\]*%xmm\[0-9\]\{%k\[1-7\]\}\[^\{\]" 1 { target { ! { ia32 } } } } } */
+/* { dg-final { scan-assembler-times "vpbroadcastd\[ \\t\]+%e\[^\n\]*%xmm\[0-9\]\{%k\[1-7\]\}\{z\}" 1 { target { ! { ia32 } } } } } */
+
+#include <immintrin.h>
+
+volatile __m256i x;
+volatile __m128i y;
+volatile int z;
+volatile __mmask8 m;
+
+void extern
+avx512vl_test (void)
+{
+ x = _mm256_mask_broadcastd_epi32 (x, m, y);
+ x = _mm256_maskz_broadcastd_epi32 (m, y);
+ y = _mm_mask_broadcastd_epi32 (y, m, y);
+ y = _mm_maskz_broadcastd_epi32 (m, y);
+
+ x = _mm256_mask_set1_epi32 (x, m, z);
+ x = _mm256_maskz_set1_epi32 (m, z);
+ y = _mm_mask_set1_epi32 (y, m, z);
+ y = _mm_maskz_set1_epi32 (m, z);
+}
diff --git a/gcc/testsuite/gcc.target/i386/avx512vl-vpbroadcastd-2.c b/gcc/testsuite/gcc.target/i386/avx512vl-vpbroadcastd-2.c
new file mode 100644
index 00000000000..bfa207f9153
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/avx512vl-vpbroadcastd-2.c
@@ -0,0 +1,14 @@
+/* { dg-do run } */
+/* { dg-options "-O2 -mavx512vl -DAVX512VL" } */
+/* { dg-require-effective-target avx512vl } */
+
+#define AVX512F_LEN 256
+#define AVX512F_LEN_HALF 128
+#include "avx512f-vpbroadcastd-2.c"
+
+#undef AVX512F_LEN
+#undef AVX512F_LEN_HALF
+
+#define AVX512F_LEN 128
+#define AVX512F_LEN_HALF 128
+#include "avx512f-vpbroadcastd-2.c"
diff --git a/gcc/testsuite/gcc.target/i386/avx512vl-vpbroadcastmb2q-1.c b/gcc/testsuite/gcc.target/i386/avx512vl-vpbroadcastmb2q-1.c
new file mode 100644
index 00000000000..24172166131
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/avx512vl-vpbroadcastmb2q-1.c
@@ -0,0 +1,17 @@
+/* { dg-do compile } */
+/* { dg-options "-mavx512vl -mavx512cd -O2" } */
+/* { dg-final { scan-assembler "vpbroadcastmb2q\[ \\t\]+\[^\n\]*k\[1-7\]\[^\n\]*%xmm\[0-7\]" } } */
+/* { dg-final { scan-assembler "vpbroadcastmb2q\[ \\t\]+\[^\n\]*k\[1-7\]\[^\n\]*%ymm\[0-7\]" } } */
+
+#include <immintrin.h>
+
+volatile __m128i x128;
+volatile __m256i x256;
+volatile __mmask8 m8;
+
+void extern
+avx512vl_test (void)
+{
+ x128 = _mm_broadcastmb_epi64 (m8);
+ x256 = _mm256_broadcastmb_epi64 (m8);
+}
diff --git a/gcc/testsuite/gcc.target/i386/avx512vl-vpbroadcastmw2d-1.c b/gcc/testsuite/gcc.target/i386/avx512vl-vpbroadcastmw2d-1.c
new file mode 100644
index 00000000000..a6891b2fc8f
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/avx512vl-vpbroadcastmw2d-1.c
@@ -0,0 +1,17 @@
+/* { dg-do compile } */
+/* { dg-options "-mavx512vl -mavx512cd -O2" } */
+/* { dg-final { scan-assembler "vpbroadcastmw2d\[ \\t\]+\[^\n\]*k\[1-7\]\[^\n\]*%xmm\[0-7\]" } } */
+/* { dg-final { scan-assembler "vpbroadcastmw2d\[ \\t\]+\[^\n\]*k\[1-7\]\[^\n\]*%ymm\[0-7\]" } } */
+
+#include <immintrin.h>
+
+volatile __m128i x128;
+volatile __m256i x256;
+volatile __mmask16 m16;
+
+void extern
+avx512vl_test (void)
+{
+ x128 = _mm_broadcastmw_epi32 (m16);
+ x256 = _mm256_broadcastmw_epi32 (m16);
+}
diff --git a/gcc/testsuite/gcc.target/i386/avx512vl-vpbroadcastq-1.c b/gcc/testsuite/gcc.target/i386/avx512vl-vpbroadcastq-1.c
new file mode 100644
index 00000000000..a957334a1c5
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/avx512vl-vpbroadcastq-1.c
@@ -0,0 +1,31 @@
+/* { dg-do compile } */
+/* { dg-options "-mavx512vl -O2" } */
+/* { dg-final { scan-assembler-times "vpbroadcastq\[ \\t\]+\[^\n\]*%ymm\[0-9\]\{%k\[1-7\]\}\[^\{\]" 2 } } */
+/* { dg-final { scan-assembler-times "vpbroadcastq\[ \\t\]+\[^\n\]*%ymm\[0-9\]\{%k\[1-7\]\}\{z\}" 2 } } */
+/* { dg-final { scan-assembler-times "vpbroadcastq\[ \\t\]+\[^\n\]*%xmm\[0-9\]\{%k\[1-7\]\}\[^\{\]" 2 } } */
+/* { dg-final { scan-assembler-times "vpbroadcastq\[ \\t\]+\[^\n\]*%xmm\[0-9\]\{%k\[1-7\]\}\{z\}" 2 } } */
+/* { dg-final { scan-assembler-times "vpbroadcastq\[ \\t\]+%r\[^\n\]*%ymm\[0-9\]\{%k\[1-7\]\}\[^\{\]" 1 { target { ! { ia32 } } } } } */
+/* { dg-final { scan-assembler-times "vpbroadcastq\[ \\t\]+%r\[^\n\]*%ymm\[0-9\]\{%k\[1-7\]\}\{z\}" 1 { target { ! { ia32 } } } } } */
+/* { dg-final { scan-assembler-times "vpbroadcastq\[ \\t\]+%r\[^\n\]*%xmm\[0-9\]\{%k\[1-7\]\}\[^\{\]" 1 { target { ! { ia32 } } } } } */
+/* { dg-final { scan-assembler-times "vpbroadcastq\[ \\t\]+%r\[^\n\]*%xmm\[0-9\]\{%k\[1-7\]\}\{z\}" 1 { target { ! { ia32 } } } } } */
+
+#include <immintrin.h>
+
+volatile __m256i x;
+volatile __m128i y;
+volatile long long z;
+volatile __mmask8 m;
+
+void extern
+avx512vl_test (void)
+{
+ x = _mm256_mask_broadcastq_epi64 (x, m, y);
+ x = _mm256_maskz_broadcastq_epi64 (m, y);
+ y = _mm_mask_broadcastq_epi64 (y, m, y);
+ y = _mm_maskz_broadcastq_epi64 (m, y);
+
+ x = _mm256_mask_set1_epi64 (x, m, z);
+ x = _mm256_maskz_set1_epi64 (m, z);
+ y = _mm_mask_set1_epi64 (y, m, z);
+ y = _mm_maskz_set1_epi64 (m, z);
+}
diff --git a/gcc/testsuite/gcc.target/i386/avx512vl-vpbroadcastq-2.c b/gcc/testsuite/gcc.target/i386/avx512vl-vpbroadcastq-2.c
new file mode 100644
index 00000000000..f4880280ea0
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/avx512vl-vpbroadcastq-2.c
@@ -0,0 +1,14 @@
+/* { dg-do run } */
+/* { dg-options "-O2 -mavx512vl -DAVX512VL" } */
+/* { dg-require-effective-target avx512vl } */
+
+#define AVX512F_LEN 256
+#define AVX512F_LEN_HALF 128
+#include "avx512f-vpbroadcastq-2.c"
+
+#undef AVX512F_LEN
+#undef AVX512F_LEN_HALF
+
+#define AVX512F_LEN 128
+#define AVX512F_LEN_HALF 128
+#include "avx512f-vpbroadcastq-2.c"
diff --git a/gcc/testsuite/gcc.target/i386/avx512vl-vpbroadcastw-2.c b/gcc/testsuite/gcc.target/i386/avx512vl-vpbroadcastw-2.c
new file mode 100644
index 00000000000..981abef006b
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/avx512vl-vpbroadcastw-2.c
@@ -0,0 +1,14 @@
+/* { dg-do run } */
+/* { dg-options "-O2 -mavx512bw -mavx512vl -DAVX512VL" } */
+/* { dg-require-effective-target avx512vl } */
+
+#define AVX512F_LEN 256
+#define AVX512F_LEN_HALF 128
+#include "avx512bw-vpbroadcastw-2.c"
+
+#undef AVX512F_LEN
+#undef AVX512F_LEN_HALF
+
+#define AVX512F_LEN 128
+#define AVX512F_LEN_HALF 128
+#include "avx512bw-vpbroadcastw-2.c"
diff --git a/gcc/testsuite/gcc.target/i386/avx512vl-vpcmpb-2.c b/gcc/testsuite/gcc.target/i386/avx512vl-vpcmpb-2.c
new file mode 100644
index 00000000000..7b8e413c164
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/avx512vl-vpcmpb-2.c
@@ -0,0 +1,14 @@
+/* { dg-do run } */
+/* { dg-options "-O2 -mavx512bw -mavx512vl -DAVX512VL" } */
+/* { dg-require-effective-target avx512vl } */
+
+#define AVX512F_LEN 256
+#define AVX512F_LEN_HALF 128
+#include "avx512bw-vpcmpb-2.c"
+
+#undef AVX512F_LEN
+#undef AVX512F_LEN_HALF
+
+#define AVX512F_LEN 128
+#define AVX512F_LEN_HALF 128
+#include "avx512bw-vpcmpb-2.c"
diff --git a/gcc/testsuite/gcc.target/i386/avx512vl-vpcmpd-1.c b/gcc/testsuite/gcc.target/i386/avx512vl-vpcmpd-1.c
new file mode 100644
index 00000000000..ce99466f1ed
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/avx512vl-vpcmpd-1.c
@@ -0,0 +1,21 @@
+/* { dg-do compile } */
+/* { dg-options "-O2 -mavx512vl" } */
+/* { dg-final { scan-assembler "vpcmpd\[ \\t\]+\[^\n\]*%ymm\[0-9\]\[^\n^k\]*%k\[1-7\]\[^\{\]" } } */
+/* { dg-final { scan-assembler "vpcmpd\[ \\t\]+\[^\n\]*%ymm\[0-9\]\[^\n^k\]*%k\[1-7\]\{" } } */
+/* { dg-final { scan-assembler "vpcmpd\[ \\t\]+\[^\n\]*%xmm\[0-9\]\[^\n^k\]*%k\[1-7\]\[^\{\]" } } */
+/* { dg-final { scan-assembler "vpcmpd\[ \\t\]+\[^\n\]*%xmm\[0-9\]\[^\n^k\]*%k\[1-7\]\{" } } */
+
+#include <immintrin.h>
+
+volatile __m256i x;
+volatile __m128i xx;
+volatile __mmask8 m;
+
+void extern
+avx512vl_test (void)
+{
+ m = _mm256_cmp_epi32_mask (x, x, _MM_CMPINT_EQ);
+ m = _mm256_mask_cmp_epi32_mask (m, x, x, _MM_CMPINT_EQ);
+ m = _mm_cmp_epi32_mask (xx, xx, _MM_CMPINT_EQ);
+ m = _mm_mask_cmp_epi32_mask (m, xx, xx, _MM_CMPINT_EQ);
+}
diff --git a/gcc/testsuite/gcc.target/i386/avx512vl-vpcmpd-2.c b/gcc/testsuite/gcc.target/i386/avx512vl-vpcmpd-2.c
new file mode 100644
index 00000000000..957cb039128
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/avx512vl-vpcmpd-2.c
@@ -0,0 +1,14 @@
+/* { dg-do run } */
+/* { dg-options "-O2 -mavx512vl -DAVX512VL" } */
+/* { dg-require-effective-target avx512vl } */
+
+#define AVX512F_LEN 256
+#define AVX512F_LEN_HALF 128
+#include "avx512f-vpcmpd-2.c"
+
+#undef AVX512F_LEN
+#undef AVX512F_LEN_HALF
+
+#define AVX512F_LEN 128
+#define AVX512F_LEN_HALF 128
+#include "avx512f-vpcmpd-2.c"
diff --git a/gcc/testsuite/gcc.target/i386/avx512vl-vpcmpeqb-2.c b/gcc/testsuite/gcc.target/i386/avx512vl-vpcmpeqb-2.c
new file mode 100644
index 00000000000..c5b068b071f
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/avx512vl-vpcmpeqb-2.c
@@ -0,0 +1,14 @@
+/* { dg-do run } */
+/* { dg-options "-O2 -mavx512bw -mavx512vl -DAVX512VL" } */
+/* { dg-require-effective-target avx512vl } */
+
+#define AVX512F_LEN 256
+#define AVX512F_LEN_HALF 128
+#include "avx512bw-vpcmpeqb-2.c"
+
+#undef AVX512F_LEN
+#undef AVX512F_LEN_HALF
+
+#define AVX512F_LEN 128
+#define AVX512F_LEN_HALF 128
+#include "avx512bw-vpcmpeqb-2.c"
diff --git a/gcc/testsuite/gcc.target/i386/avx512vl-vpcmpeqd-1.c b/gcc/testsuite/gcc.target/i386/avx512vl-vpcmpeqd-1.c
new file mode 100644
index 00000000000..c4a3a55930a
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/avx512vl-vpcmpeqd-1.c
@@ -0,0 +1,21 @@
+/* { dg-do compile } */
+/* { dg-options "-mavx512vl -O2" } */
+/* { dg-final { scan-assembler-times "vpcmpeqd\[ \\t\]+\[^\n\]*%ymm\[0-9\]\[^\n\]*%k\[1-7\]\{%k\[1-7\]\}" 1 } } */
+/* { dg-final { scan-assembler-times "vpcmpeqd\[ \\t\]+\[^\n\]*%xmm\[0-9\]\[^\n\]*%k\[1-7\]\{%k\[1-7\]\}" 1 } } */
+/* { dg-final { scan-assembler-times "vpcmpeqd\[ \\t\]+\[^\n\]*%ymm\[0-9\]\[^\n^k\]*%k\[1-7\]\[^\{\]" 1 } } */
+/* { dg-final { scan-assembler-times "vpcmpeqd\[ \\t\]+\[^\n\]*%xmm\[0-9\]\[^\n^k\]*%k\[1-7\]\[^\{\]" 1 } } */
+
+#include <immintrin.h>
+
+volatile __m256i x256;
+volatile __m128i x128;
+volatile __mmask8 m;
+
+void extern
+avx512vl_test (void)
+{
+ m = _mm_cmpeq_epi32_mask (x128, x128);
+ m = _mm256_cmpeq_epi32_mask (x256, x256);
+ m = _mm_mask_cmpeq_epi32_mask (3, x128, x128);
+ m = _mm256_mask_cmpeq_epi32_mask (3, x256, x256);
+}
diff --git a/gcc/testsuite/gcc.target/i386/avx512vl-vpcmpeqd-2.c b/gcc/testsuite/gcc.target/i386/avx512vl-vpcmpeqd-2.c
new file mode 100644
index 00000000000..fd6982ebe91
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/avx512vl-vpcmpeqd-2.c
@@ -0,0 +1,14 @@
+/* { dg-do run } */
+/* { dg-options "-O2 -mavx512vl -DAVX512VL" } */
+/* { dg-require-effective-target avx512vl } */
+
+#define AVX512F_LEN 256
+#define AVX512F_LEN_HALF 128
+#include "avx512f-vpcmpeqd-2.c"
+
+#undef AVX512F_LEN
+#undef AVX512F_LEN_HALF
+
+#define AVX512F_LEN 128
+#define AVX512F_LEN_HALF 128
+#include "avx512f-vpcmpeqd-2.c"
diff --git a/gcc/testsuite/gcc.target/i386/avx512vl-vpcmpeqq-1.c b/gcc/testsuite/gcc.target/i386/avx512vl-vpcmpeqq-1.c
new file mode 100644
index 00000000000..65ef59206bc
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/avx512vl-vpcmpeqq-1.c
@@ -0,0 +1,21 @@
+/* { dg-do compile } */
+/* { dg-options "-mavx512vl -O2" } */
+/* { dg-final { scan-assembler "vpcmpeqq\[ \\t\]+\[^\n\]*%ymm\[0-9\]\[^\n\]*%k\[1-7\]\{%k\[1-7\]\}" } } */
+/* { dg-final { scan-assembler "vpcmpeqq\[ \\t\]+\[^\n\]*%xmm\[0-9\]\[^\n\]*%k\[1-7\]\{%k\[1-7\]\}" } } */
+/* { dg-final { scan-assembler "vpcmpeqq\[ \\t\]+\[^\n\]*%ymm\[0-9\]\[^\n\]*%k\[1-7\]\[^\{\]" } } */
+/* { dg-final { scan-assembler "vpcmpeqq\[ \\t\]+\[^\n\]*%xmm\[0-9\]\[^\n\]*%k\[1-7\]\[^\{\]" } } */
+
+#include <immintrin.h>
+
+volatile __m256i x256;
+volatile __m128i x128;
+volatile __mmask8 m;
+
+void extern
+avx512vl_test (void)
+{
+ m = _mm_cmpeq_epi64_mask (x128, x128);
+ m = _mm256_cmpeq_epi64_mask (x256, x256);
+ m = _mm_mask_cmpeq_epi64_mask (3, x128, x128);
+ m = _mm256_mask_cmpeq_epi64_mask (3, x256, x256);
+}
diff --git a/gcc/testsuite/gcc.target/i386/avx512vl-vpcmpeqq-2.c b/gcc/testsuite/gcc.target/i386/avx512vl-vpcmpeqq-2.c
new file mode 100644
index 00000000000..b99ac1b050f
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/avx512vl-vpcmpeqq-2.c
@@ -0,0 +1,14 @@
+/* { dg-do run } */
+/* { dg-options "-O2 -mavx512vl -DAVX512VL" } */
+/* { dg-require-effective-target avx512vl } */
+
+#define AVX512F_LEN 256
+#define AVX512F_LEN_HALF 128
+#include "avx512f-vpcmpeqq-2.c"
+
+#undef AVX512F_LEN
+#undef AVX512F_LEN_HALF
+
+#define AVX512F_LEN 128
+#define AVX512F_LEN_HALF 128
+#include "avx512f-vpcmpeqq-2.c"
diff --git a/gcc/testsuite/gcc.target/i386/avx512vl-vpcmpeqw-2.c b/gcc/testsuite/gcc.target/i386/avx512vl-vpcmpeqw-2.c
new file mode 100644
index 00000000000..82cd9cbfc12
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/avx512vl-vpcmpeqw-2.c
@@ -0,0 +1,14 @@
+/* { dg-do run } */
+/* { dg-options "-O2 -mavx512bw -mavx512vl -DAVX512VL" } */
+/* { dg-require-effective-target avx512vl } */
+
+#define AVX512F_LEN 256
+#define AVX512F_LEN_HALF 128
+#include "avx512bw-vpcmpeqw-2.c"
+
+#undef AVX512F_LEN
+#undef AVX512F_LEN_HALF
+
+#define AVX512F_LEN 128
+#define AVX512F_LEN_HALF 128
+#include "avx512bw-vpcmpeqw-2.c"
diff --git a/gcc/testsuite/gcc.target/i386/avx512vl-vpcmpged-2.c b/gcc/testsuite/gcc.target/i386/avx512vl-vpcmpged-2.c
new file mode 100644
index 00000000000..4af12f25036
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/avx512vl-vpcmpged-2.c
@@ -0,0 +1,14 @@
+/* { dg-do run } */
+/* { dg-options "-O2 -mavx512vl -DAVX512VL" } */
+/* { dg-require-effective-target avx512vl } */
+
+#define AVX512F_LEN 256
+#define AVX512F_LEN_HALF 128
+#include "avx512f-vpcmpged-2.c"
+
+#undef AVX512F_LEN
+#undef AVX512F_LEN_HALF
+
+#define AVX512F_LEN 128
+#define AVX512F_LEN_HALF 128
+#include "avx512f-vpcmpged-2.c"
diff --git a/gcc/testsuite/gcc.target/i386/avx512vl-vpcmpgeq-2.c b/gcc/testsuite/gcc.target/i386/avx512vl-vpcmpgeq-2.c
new file mode 100644
index 00000000000..ca5a3cbeb1e
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/avx512vl-vpcmpgeq-2.c
@@ -0,0 +1,14 @@
+/* { dg-do run } */
+/* { dg-options "-O2 -mavx512vl -DAVX512VL" } */
+/* { dg-require-effective-target avx512vl } */
+
+#define AVX512F_LEN 256
+#define AVX512F_LEN_HALF 128
+#include "avx512f-vpcmpgeq-2.c"
+
+#undef AVX512F_LEN
+#undef AVX512F_LEN_HALF
+
+#define AVX512F_LEN 128
+#define AVX512F_LEN_HALF 128
+#include "avx512f-vpcmpgeq-2.c"
diff --git a/gcc/testsuite/gcc.target/i386/avx512vl-vpcmpgeud-2.c b/gcc/testsuite/gcc.target/i386/avx512vl-vpcmpgeud-2.c
new file mode 100644
index 00000000000..f9ad3d4ad1f
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/avx512vl-vpcmpgeud-2.c
@@ -0,0 +1,14 @@
+/* { dg-do run } */
+/* { dg-options "-O2 -mavx512vl -DAVX512VL" } */
+/* { dg-require-effective-target avx512vl } */
+
+#define AVX512F_LEN 256
+#define AVX512F_LEN_HALF 128
+#include "avx512f-vpcmpgeud-2.c"
+
+#undef AVX512F_LEN
+#undef AVX512F_LEN_HALF
+
+#define AVX512F_LEN 128
+#define AVX512F_LEN_HALF 128
+#include "avx512f-vpcmpgeud-2.c"
diff --git a/gcc/testsuite/gcc.target/i386/avx512vl-vpcmpgeuq-2.c b/gcc/testsuite/gcc.target/i386/avx512vl-vpcmpgeuq-2.c
new file mode 100644
index 00000000000..2ed2506c592
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/avx512vl-vpcmpgeuq-2.c
@@ -0,0 +1,14 @@
+/* { dg-do run } */
+/* { dg-options "-O2 -mavx512vl -DAVX512VL" } */
+/* { dg-require-effective-target avx512vl } */
+
+#define AVX512F_LEN 256
+#define AVX512F_LEN_HALF 128
+#include "avx512f-vpcmpgeuq-2.c"
+
+#undef AVX512F_LEN
+#undef AVX512F_LEN_HALF
+
+#define AVX512F_LEN 128
+#define AVX512F_LEN_HALF 128
+#include "avx512f-vpcmpgeuq-2.c"
diff --git a/gcc/testsuite/gcc.target/i386/avx512vl-vpcmpgtb-2.c b/gcc/testsuite/gcc.target/i386/avx512vl-vpcmpgtb-2.c
new file mode 100644
index 00000000000..143368a7166
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/avx512vl-vpcmpgtb-2.c
@@ -0,0 +1,14 @@
+/* { dg-do run } */
+/* { dg-options "-O2 -mavx512bw -mavx512vl -DAVX512VL" } */
+/* { dg-require-effective-target avx512vl } */
+
+#define AVX512F_LEN 256
+#define AVX512F_LEN_HALF 128
+#include "avx512bw-vpcmpgtb-2.c"
+
+#undef AVX512F_LEN
+#undef AVX512F_LEN_HALF
+
+#define AVX512F_LEN 128
+#define AVX512F_LEN_HALF 128
+#include "avx512bw-vpcmpgtb-2.c"
diff --git a/gcc/testsuite/gcc.target/i386/avx512vl-vpcmpgtd-1.c b/gcc/testsuite/gcc.target/i386/avx512vl-vpcmpgtd-1.c
new file mode 100644
index 00000000000..ba621ea694c
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/avx512vl-vpcmpgtd-1.c
@@ -0,0 +1,21 @@
+/* { dg-do compile } */
+/* { dg-options "-mavx512vl -O2" } */
+/* { dg-final { scan-assembler "vpcmpgtd\[ \\t\]+\[^\n\]*%ymm\[0-9\]\[^\n\]*%k\[1-7\]\{%k\[1-7\]\}" } } */
+/* { dg-final { scan-assembler "vpcmpgtd\[ \\t\]+\[^\n\]*%xmm\[0-9\]\[^\n\]*%k\[1-7\]\{%k\[1-7\]\}" } } */
+/* { dg-final { scan-assembler "vpcmpgtd\[ \\t\]+\[^\n\]*%ymm\[0-9\]\[^\n\]*%k\[1-7\]\[^\{\]" } } */
+/* { dg-final { scan-assembler "vpcmpgtd\[ \\t\]+\[^\n\]*%xmm\[0-9\]\[^\n\]*%k\[1-7\]\[^\{\]" } } */
+
+#include <immintrin.h>
+
+volatile __m256i x256;
+volatile __m128i x128;
+volatile __mmask8 m;
+
+void extern
+avx512vl_test (void)
+{
+ m = _mm_cmpgt_epi32_mask (x128, x128);
+ m = _mm256_cmpgt_epi32_mask (x256, x256);
+ m = _mm_mask_cmpgt_epi32_mask (3, x128, x128);
+ m = _mm256_mask_cmpgt_epi32_mask (3, x256, x256);
+}
diff --git a/gcc/testsuite/gcc.target/i386/avx512vl-vpcmpgtd-2.c b/gcc/testsuite/gcc.target/i386/avx512vl-vpcmpgtd-2.c
new file mode 100644
index 00000000000..99daeb413c6
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/avx512vl-vpcmpgtd-2.c
@@ -0,0 +1,14 @@
+/* { dg-do run } */
+/* { dg-options "-O2 -mavx512vl -DAVX512VL" } */
+/* { dg-require-effective-target avx512vl } */
+
+#define AVX512F_LEN 256
+#define AVX512F_LEN_HALF 128
+#include "avx512f-vpcmpgtd-2.c"
+
+#undef AVX512F_LEN
+#undef AVX512F_LEN_HALF
+
+#define AVX512F_LEN 128
+#define AVX512F_LEN_HALF 128
+#include "avx512f-vpcmpgtd-2.c"
diff --git a/gcc/testsuite/gcc.target/i386/avx512vl-vpcmpgtq-1.c b/gcc/testsuite/gcc.target/i386/avx512vl-vpcmpgtq-1.c
new file mode 100644
index 00000000000..ce8cc4dc037
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/avx512vl-vpcmpgtq-1.c
@@ -0,0 +1,21 @@
+/* { dg-do compile } */
+/* { dg-options "-mavx512vl -O2" } */
+/* { dg-final { scan-assembler "vpcmpgtq\[ \\t\]+\[^\n\]*%ymm\[0-9\]\[^\n\]*%k\[1-7\]\[^\{\]" } } */
+/* { dg-final { scan-assembler "vpcmpgtq\[ \\t\]+\[^\n\]*%xmm\[0-9\]\[^\n\]*%k\[1-7\]\[^\{\]" } } */
+/* { dg-final { scan-assembler "vpcmpgtq\[ \\t\]+\[^\n\]*%ymm\[0-9\]\[^\n\]*%k\[1-7\]\{%k\[1-7\]\}" } } */
+/* { dg-final { scan-assembler "vpcmpgtq\[ \\t\]+\[^\n\]*%xmm\[0-9\]\[^\n\]*%k\[1-7\]\{%k\[1-7\]\}" } } */
+
+#include <immintrin.h>
+
+volatile __m256i x256;
+volatile __m128i x128;
+volatile __mmask8 m;
+
+void extern
+avx512vl_test (void)
+{
+ m = _mm_cmpgt_epi64_mask (x128, x128);
+ m = _mm256_cmpgt_epi64_mask (x256, x256);
+ m = _mm_mask_cmpgt_epi64_mask (3, x128, x128);
+ m = _mm256_mask_cmpgt_epi64_mask (3, x256, x256);
+}
diff --git a/gcc/testsuite/gcc.target/i386/avx512vl-vpcmpgtq-2.c b/gcc/testsuite/gcc.target/i386/avx512vl-vpcmpgtq-2.c
new file mode 100644
index 00000000000..85f47be3d96
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/avx512vl-vpcmpgtq-2.c
@@ -0,0 +1,14 @@
+/* { dg-do run } */
+/* { dg-options "-O2 -mavx512vl -DAVX512VL" } */
+/* { dg-require-effective-target avx512vl } */
+
+#define AVX512F_LEN 256
+#define AVX512F_LEN_HALF 128
+#include "avx512f-vpcmpgtq-2.c"
+
+#undef AVX512F_LEN
+#undef AVX512F_LEN_HALF
+
+#define AVX512F_LEN 128
+#define AVX512F_LEN_HALF 128
+#include "avx512f-vpcmpgtq-2.c"
diff --git a/gcc/testsuite/gcc.target/i386/avx512vl-vpcmpgtw-2.c b/gcc/testsuite/gcc.target/i386/avx512vl-vpcmpgtw-2.c
new file mode 100644
index 00000000000..1d5fcae7dce
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/avx512vl-vpcmpgtw-2.c
@@ -0,0 +1,14 @@
+/* { dg-do run } */
+/* { dg-options "-O2 -mavx512bw -mavx512vl -DAVX512VL" } */
+/* { dg-require-effective-target avx512vl } */
+
+#define AVX512F_LEN 256
+#define AVX512F_LEN_HALF 128
+#include "avx512bw-vpcmpgtw-2.c"
+
+#undef AVX512F_LEN
+#undef AVX512F_LEN_HALF
+
+#define AVX512F_LEN 128
+#define AVX512F_LEN_HALF 128
+#include "avx512bw-vpcmpgtw-2.c"
diff --git a/gcc/testsuite/gcc.target/i386/avx512vl-vpcmpled-2.c b/gcc/testsuite/gcc.target/i386/avx512vl-vpcmpled-2.c
new file mode 100644
index 00000000000..756b836c39a
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/avx512vl-vpcmpled-2.c
@@ -0,0 +1,14 @@
+/* { dg-do run } */
+/* { dg-options "-O2 -mavx512vl -DAVX512VL" } */
+/* { dg-require-effective-target avx512vl } */
+
+#define AVX512F_LEN 256
+#define AVX512F_LEN_HALF 128
+#include "avx512f-vpcmpled-2.c"
+
+#undef AVX512F_LEN
+#undef AVX512F_LEN_HALF
+
+#define AVX512F_LEN 128
+#define AVX512F_LEN_HALF 128
+#include "avx512f-vpcmpled-2.c"
diff --git a/gcc/testsuite/gcc.target/i386/avx512vl-vpcmpleq-2.c b/gcc/testsuite/gcc.target/i386/avx512vl-vpcmpleq-2.c
new file mode 100644
index 00000000000..5a342713873
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/avx512vl-vpcmpleq-2.c
@@ -0,0 +1,14 @@
+/* { dg-do run } */
+/* { dg-options "-O2 -mavx512vl -DAVX512VL" } */
+/* { dg-require-effective-target avx512vl } */
+
+#define AVX512F_LEN 256
+#define AVX512F_LEN_HALF 128
+#include "avx512f-vpcmpleq-2.c"
+
+#undef AVX512F_LEN
+#undef AVX512F_LEN_HALF
+
+#define AVX512F_LEN 128
+#define AVX512F_LEN_HALF 128
+#include "avx512f-vpcmpleq-2.c"
diff --git a/gcc/testsuite/gcc.target/i386/avx512vl-vpcmpleud-2.c b/gcc/testsuite/gcc.target/i386/avx512vl-vpcmpleud-2.c
new file mode 100644
index 00000000000..dc26cd24272
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/avx512vl-vpcmpleud-2.c
@@ -0,0 +1,14 @@
+/* { dg-do run } */
+/* { dg-options "-O2 -mavx512vl -DAVX512VL" } */
+/* { dg-require-effective-target avx512vl } */
+
+#define AVX512F_LEN 256
+#define AVX512F_LEN_HALF 128
+#include "avx512f-vpcmpleud-2.c"
+
+#undef AVX512F_LEN
+#undef AVX512F_LEN_HALF
+
+#define AVX512F_LEN 128
+#define AVX512F_LEN_HALF 128
+#include "avx512f-vpcmpleud-2.c"
diff --git a/gcc/testsuite/gcc.target/i386/avx512vl-vpcmpleuq-2.c b/gcc/testsuite/gcc.target/i386/avx512vl-vpcmpleuq-2.c
new file mode 100644
index 00000000000..41ebcb9bfea
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/avx512vl-vpcmpleuq-2.c
@@ -0,0 +1,14 @@
+/* { dg-do run } */
+/* { dg-options "-O2 -mavx512vl -DAVX512VL" } */
+/* { dg-require-effective-target avx512vl } */
+
+#define AVX512F_LEN 256
+#define AVX512F_LEN_HALF 128
+#include "avx512f-vpcmpleuq-2.c"
+
+#undef AVX512F_LEN
+#undef AVX512F_LEN_HALF
+
+#define AVX512F_LEN 128
+#define AVX512F_LEN_HALF 128
+#include "avx512f-vpcmpleuq-2.c"
diff --git a/gcc/testsuite/gcc.target/i386/avx512vl-vpcmpltd-2.c b/gcc/testsuite/gcc.target/i386/avx512vl-vpcmpltd-2.c
new file mode 100644
index 00000000000..2e0e5d59b8f
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/avx512vl-vpcmpltd-2.c
@@ -0,0 +1,14 @@
+/* { dg-do run } */
+/* { dg-options "-O2 -mavx512vl -DAVX512VL" } */
+/* { dg-require-effective-target avx512vl } */
+
+#define AVX512F_LEN 256
+#define AVX512F_LEN_HALF 128
+#include "avx512f-vpcmpltd-2.c"
+
+#undef AVX512F_LEN
+#undef AVX512F_LEN_HALF
+
+#define AVX512F_LEN 128
+#define AVX512F_LEN_HALF 128
+#include "avx512f-vpcmpltd-2.c"
diff --git a/gcc/testsuite/gcc.target/i386/avx512vl-vpcmpltq-2.c b/gcc/testsuite/gcc.target/i386/avx512vl-vpcmpltq-2.c
new file mode 100644
index 00000000000..5e3d16c23a4
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/avx512vl-vpcmpltq-2.c
@@ -0,0 +1,14 @@
+/* { dg-do run } */
+/* { dg-options "-O2 -mavx512vl -DAVX512VL" } */
+/* { dg-require-effective-target avx512vl } */
+
+#define AVX512F_LEN 256
+#define AVX512F_LEN_HALF 128
+#include "avx512f-vpcmpltq-2.c"
+
+#undef AVX512F_LEN
+#undef AVX512F_LEN_HALF
+
+#define AVX512F_LEN 128
+#define AVX512F_LEN_HALF 128
+#include "avx512f-vpcmpltq-2.c"
diff --git a/gcc/testsuite/gcc.target/i386/avx512vl-vpcmpltud-2.c b/gcc/testsuite/gcc.target/i386/avx512vl-vpcmpltud-2.c
new file mode 100644
index 00000000000..59d45ce944d
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/avx512vl-vpcmpltud-2.c
@@ -0,0 +1,14 @@
+/* { dg-do run } */
+/* { dg-options "-O2 -mavx512vl -DAVX512VL" } */
+/* { dg-require-effective-target avx512vl } */
+
+#define AVX512F_LEN 256
+#define AVX512F_LEN_HALF 128
+#include "avx512f-vpcmpltud-2.c"
+
+#undef AVX512F_LEN
+#undef AVX512F_LEN_HALF
+
+#define AVX512F_LEN 128
+#define AVX512F_LEN_HALF 128
+#include "avx512f-vpcmpltud-2.c"
diff --git a/gcc/testsuite/gcc.target/i386/avx512vl-vpcmpltuq-2.c b/gcc/testsuite/gcc.target/i386/avx512vl-vpcmpltuq-2.c
new file mode 100644
index 00000000000..4380c6948a6
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/avx512vl-vpcmpltuq-2.c
@@ -0,0 +1,14 @@
+/* { dg-do run } */
+/* { dg-options "-O2 -mavx512vl -DAVX512VL" } */
+/* { dg-require-effective-target avx512vl } */
+
+#define AVX512F_LEN 256
+#define AVX512F_LEN_HALF 128
+#include "avx512f-vpcmpltuq-2.c"
+
+#undef AVX512F_LEN
+#undef AVX512F_LEN_HALF
+
+#define AVX512F_LEN 128
+#define AVX512F_LEN_HALF 128
+#include "avx512f-vpcmpltuq-2.c"
diff --git a/gcc/testsuite/gcc.target/i386/avx512vl-vpcmpneqd-2.c b/gcc/testsuite/gcc.target/i386/avx512vl-vpcmpneqd-2.c
new file mode 100644
index 00000000000..2fe87e9ed26
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/avx512vl-vpcmpneqd-2.c
@@ -0,0 +1,14 @@
+/* { dg-do run } */
+/* { dg-options "-O2 -mavx512vl -DAVX512VL" } */
+/* { dg-require-effective-target avx512vl } */
+
+#define AVX512F_LEN 256
+#define AVX512F_LEN_HALF 128
+#include "avx512f-vpcmpneqd-2.c"
+
+#undef AVX512F_LEN
+#undef AVX512F_LEN_HALF
+
+#define AVX512F_LEN 128
+#define AVX512F_LEN_HALF 128
+#include "avx512f-vpcmpneqd-2.c"
diff --git a/gcc/testsuite/gcc.target/i386/avx512vl-vpcmpneqq-2.c b/gcc/testsuite/gcc.target/i386/avx512vl-vpcmpneqq-2.c
new file mode 100644
index 00000000000..c0fb0291fa1
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/avx512vl-vpcmpneqq-2.c
@@ -0,0 +1,14 @@
+/* { dg-do run } */
+/* { dg-options "-O2 -mavx512vl -DAVX512VL" } */
+/* { dg-require-effective-target avx512vl } */
+
+#define AVX512F_LEN 256
+#define AVX512F_LEN_HALF 128
+#include "avx512f-vpcmpneqq-2.c"
+
+#undef AVX512F_LEN
+#undef AVX512F_LEN_HALF
+
+#define AVX512F_LEN 128
+#define AVX512F_LEN_HALF 128
+#include "avx512f-vpcmpneqq-2.c"
diff --git a/gcc/testsuite/gcc.target/i386/avx512vl-vpcmpnequd-2.c b/gcc/testsuite/gcc.target/i386/avx512vl-vpcmpnequd-2.c
new file mode 100644
index 00000000000..ac9326784e4
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/avx512vl-vpcmpnequd-2.c
@@ -0,0 +1,14 @@
+/* { dg-do run } */
+/* { dg-options "-O2 -mavx512vl -DAVX512VL" } */
+/* { dg-require-effective-target avx512vl } */
+
+#define AVX512F_LEN 256
+#define AVX512F_LEN_HALF 128
+#include "avx512f-vpcmpnequd-2.c"
+
+#undef AVX512F_LEN
+#undef AVX512F_LEN_HALF
+
+#define AVX512F_LEN 128
+#define AVX512F_LEN_HALF 128
+#include "avx512f-vpcmpnequd-2.c"
diff --git a/gcc/testsuite/gcc.target/i386/avx512vl-vpcmpnequq-2.c b/gcc/testsuite/gcc.target/i386/avx512vl-vpcmpnequq-2.c
new file mode 100644
index 00000000000..649179f6e46
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/avx512vl-vpcmpnequq-2.c
@@ -0,0 +1,14 @@
+/* { dg-do run } */
+/* { dg-options "-O2 -mavx512vl -DAVX512VL" } */
+/* { dg-require-effective-target avx512vl } */
+
+#define AVX512F_LEN 256
+#define AVX512F_LEN_HALF 128
+#include "avx512f-vpcmpnequq-2.c"
+
+#undef AVX512F_LEN
+#undef AVX512F_LEN_HALF
+
+#define AVX512F_LEN 128
+#define AVX512F_LEN_HALF 128
+#include "avx512f-vpcmpnequq-2.c"
diff --git a/gcc/testsuite/gcc.target/i386/avx512vl-vpcmpq-1.c b/gcc/testsuite/gcc.target/i386/avx512vl-vpcmpq-1.c
new file mode 100644
index 00000000000..6be6171dacd
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/avx512vl-vpcmpq-1.c
@@ -0,0 +1,21 @@
+/* { dg-do compile } */
+/* { dg-options "-O2 -mavx512vl" } */
+/* { dg-final { scan-assembler "vpcmpq\[ \\t\]+\[^\n\]*%ymm\[0-9\]\[^\n^k\]*%k\[1-7\]\{" } } */
+/* { dg-final { scan-assembler "vpcmpq\[ \\t\]+\[^\n\]*%ymm\[0-9\]\[^\n^k\]*%k\[1-7\]\[^\{\]" } } */
+/* { dg-final { scan-assembler "vpcmpq\[ \\t\]+\[^\n\]*%xmm\[0-9\]\[^\n^k\]*%k\[1-7\]\{" } } */
+/* { dg-final { scan-assembler "vpcmpq\[ \\t\]+\[^\n\]*%xmm\[0-9\]\[^\n^k\]*%k\[1-7\]\[^\{\]" } } */
+
+#include <immintrin.h>
+
+volatile __m256i x;
+volatile __m128i xx;
+volatile __mmask8 m;
+
+void extern
+avx512bw_test (void)
+{
+ m = _mm256_cmp_epi64_mask (x, x, _MM_CMPINT_UNUSED);
+ m = _mm256_mask_cmp_epi64_mask (m, x, x, _MM_CMPINT_NE);
+ m = _mm_cmp_epi64_mask (xx, xx, _MM_CMPINT_NLT);
+ m = _mm_mask_cmp_epi64_mask (m, xx, xx, _MM_CMPINT_GE);
+}
diff --git a/gcc/testsuite/gcc.target/i386/avx512vl-vpcmpq-2.c b/gcc/testsuite/gcc.target/i386/avx512vl-vpcmpq-2.c
new file mode 100644
index 00000000000..7de841472af
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/avx512vl-vpcmpq-2.c
@@ -0,0 +1,14 @@
+/* { dg-do run } */
+/* { dg-options "-O2 -mavx512vl -DAVX512VL" } */
+/* { dg-require-effective-target avx512vl } */
+
+#define AVX512F_LEN 256
+#define AVX512F_LEN_HALF 128
+#include "avx512f-vpcmpq-2.c"
+
+#undef AVX512F_LEN
+#undef AVX512F_LEN_HALF
+
+#define AVX512F_LEN 128
+#define AVX512F_LEN_HALF 128
+#include "avx512f-vpcmpq-2.c"
diff --git a/gcc/testsuite/gcc.target/i386/avx512vl-vpcmpub-2.c b/gcc/testsuite/gcc.target/i386/avx512vl-vpcmpub-2.c
new file mode 100644
index 00000000000..4be301aab4a
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/avx512vl-vpcmpub-2.c
@@ -0,0 +1,14 @@
+/* { dg-do run } */
+/* { dg-options "-O2 -mavx512bw -mavx512vl -DAVX512VL" } */
+/* { dg-require-effective-target avx512vl } */
+
+#define AVX512F_LEN 256
+#define AVX512F_LEN_HALF 128
+#include "avx512bw-vpcmpub-2.c"
+
+#undef AVX512F_LEN
+#undef AVX512F_LEN_HALF
+
+#define AVX512F_LEN 128
+#define AVX512F_LEN_HALF 128
+#include "avx512bw-vpcmpub-2.c"
diff --git a/gcc/testsuite/gcc.target/i386/avx512vl-vpcmpud-1.c b/gcc/testsuite/gcc.target/i386/avx512vl-vpcmpud-1.c
new file mode 100644
index 00000000000..6de5a88be79
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/avx512vl-vpcmpud-1.c
@@ -0,0 +1,21 @@
+/* { dg-do compile } */
+/* { dg-options "-O2 -mavx512vl" } */
+/* { dg-final { scan-assembler "vpcmpud\[ \\t\]+\[^\n\]*%ymm\[0-9\]\[^\n^k\]*%k\[1-7\]\[^\{\]" } } */
+/* { dg-final { scan-assembler "vpcmpud\[ \\t\]+\[^\n\]*%ymm\[0-9\]\[^\n^k\]*%k\[1-7\]\{" } } */
+/* { dg-final { scan-assembler "vpcmpud\[ \\t\]+\[^\n\]*%xmm\[0-9\]\[^\n^k\]*%k\[1-7\]\[^\{\]" } } */
+/* { dg-final { scan-assembler "vpcmpud\[ \\t\]+\[^\n\]*%xmm\[0-9\]\[^\n^k\]*%k\[1-7\]\{" } } */
+
+#include <immintrin.h>
+
+volatile __m256i x;
+volatile __m128i xx;
+volatile __mmask8 m;
+
+void extern
+avx512vl_test (void)
+{
+ m = _mm256_cmp_epu32_mask (x, x, _MM_CMPINT_LE);
+ m = _mm256_mask_cmp_epu32_mask (m, x, x, _MM_CMPINT_UNUSED);
+ m = _mm_cmp_epu32_mask (xx, xx, _MM_CMPINT_NE);
+ m = _mm_mask_cmp_epu32_mask (m, xx, xx, _MM_CMPINT_NLT);
+}
diff --git a/gcc/testsuite/gcc.target/i386/avx512vl-vpcmpud-2.c b/gcc/testsuite/gcc.target/i386/avx512vl-vpcmpud-2.c
new file mode 100644
index 00000000000..0df8f868e68
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/avx512vl-vpcmpud-2.c
@@ -0,0 +1,14 @@
+/* { dg-do run } */
+/* { dg-options "-O2 -mavx512vl -DAVX512VL" } */
+/* { dg-require-effective-target avx512vl } */
+
+#define AVX512F_LEN 256
+#define AVX512F_LEN_HALF 128
+#include "avx512f-vpcmpud-2.c"
+
+#undef AVX512F_LEN
+#undef AVX512F_LEN_HALF
+
+#define AVX512F_LEN 128
+#define AVX512F_LEN_HALF 128
+#include "avx512f-vpcmpud-2.c"
diff --git a/gcc/testsuite/gcc.target/i386/avx512vl-vpcmpuq-1.c b/gcc/testsuite/gcc.target/i386/avx512vl-vpcmpuq-1.c
new file mode 100644
index 00000000000..24b2da47ed2
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/avx512vl-vpcmpuq-1.c
@@ -0,0 +1,21 @@
+/* { dg-do compile } */
+/* { dg-options "-O2 -mavx512vl" } */
+/* { dg-final { scan-assembler "vpcmpuq\[ \\t\]+\[^\n\]*%ymm\[0-9\]\[^\n^k\]*%k\[1-7\]\[^\{\]" } } */
+/* { dg-final { scan-assembler "vpcmpuq\[ \\t\]+\[^\n\]*%ymm\[0-9\]\[^\n^k\]*%k\[1-7\]\{" } } */
+/* { dg-final { scan-assembler "vpcmpuq\[ \\t\]+\[^\n\]*%xmm\[0-9\]\[^\n^k\]*%k\[1-7\]\[^\{\]" } } */
+/* { dg-final { scan-assembler "vpcmpuq\[ \\t\]+\[^\n\]*%xmm\[0-9\]\[^\n^k\]*%k\[1-7\]\{" } } */
+
+#include <immintrin.h>
+
+volatile __m256i x;
+volatile __m128i xx;
+volatile __mmask8 m;
+
+void extern
+avx512vl_test (void)
+{
+ m = _mm256_cmp_epu64_mask (x, x, _MM_CMPINT_NLE);
+ m = _mm256_mask_cmp_epu64_mask (m, x, x, _MM_CMPINT_GT);
+ m = _mm_cmp_epu64_mask (xx, xx, _MM_CMPINT_EQ);
+ m = _mm_mask_cmp_epu64_mask (m, xx, xx, _MM_CMPINT_LT);
+}
diff --git a/gcc/testsuite/gcc.target/i386/avx512vl-vpcmpuq-2.c b/gcc/testsuite/gcc.target/i386/avx512vl-vpcmpuq-2.c
new file mode 100644
index 00000000000..18896276787
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/avx512vl-vpcmpuq-2.c
@@ -0,0 +1,14 @@
+/* { dg-do run } */
+/* { dg-options "-O2 -mavx512vl -DAVX512VL" } */
+/* { dg-require-effective-target avx512vl } */
+
+#define AVX512F_LEN 256
+#define AVX512F_LEN_HALF 128
+#include "avx512f-vpcmpuq-2.c"
+
+#undef AVX512F_LEN
+#undef AVX512F_LEN_HALF
+
+#define AVX512F_LEN 128
+#define AVX512F_LEN_HALF 128
+#include "avx512f-vpcmpuq-2.c"
diff --git a/gcc/testsuite/gcc.target/i386/avx512vl-vpcmpuw-2.c b/gcc/testsuite/gcc.target/i386/avx512vl-vpcmpuw-2.c
new file mode 100644
index 00000000000..2b0ec73bbf7
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/avx512vl-vpcmpuw-2.c
@@ -0,0 +1,14 @@
+/* { dg-do run } */
+/* { dg-options "-O2 -mavx512bw -mavx512vl -DAVX512VL" } */
+/* { dg-require-effective-target avx512vl } */
+
+#define AVX512F_LEN 256
+#define AVX512F_LEN_HALF 128
+#include "avx512bw-vpcmpuw-2.c"
+
+#undef AVX512F_LEN
+#undef AVX512F_LEN_HALF
+
+#define AVX512F_LEN 128
+#define AVX512F_LEN_HALF 128
+#include "avx512bw-vpcmpuw-2.c"
diff --git a/gcc/testsuite/gcc.target/i386/avx512vl-vpcmpw-2.c b/gcc/testsuite/gcc.target/i386/avx512vl-vpcmpw-2.c
new file mode 100644
index 00000000000..fcd32b57d0e
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/avx512vl-vpcmpw-2.c
@@ -0,0 +1,14 @@
+/* { dg-do run } */
+/* { dg-options "-O2 -mavx512bw -mavx512vl -DAVX512VL" } */
+/* { dg-require-effective-target avx512vl } */
+
+#define AVX512F_LEN 256
+#define AVX512F_LEN_HALF 128
+#include "avx512bw-vpcmpw-2.c"
+
+#undef AVX512F_LEN
+#undef AVX512F_LEN_HALF
+
+#define AVX512F_LEN 128
+#define AVX512F_LEN_HALF 128
+#include "avx512bw-vpcmpw-2.c"
diff --git a/gcc/testsuite/gcc.target/i386/avx512vl-vpcompressd-1.c b/gcc/testsuite/gcc.target/i386/avx512vl-vpcompressd-1.c
new file mode 100644
index 00000000000..af00ab0ac46
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/avx512vl-vpcompressd-1.c
@@ -0,0 +1,28 @@
+/* { dg-do compile } */
+/* { dg-options "-mavx512vl -O2" } */
+/* { dg-final { scan-assembler-times "vpcompressd\[ \\t\]+\[^\n\]*%ymm\[0-9\]\{%k\[1-7\]\}\[^\{\]" 1 } } */
+/* { dg-final { scan-assembler-times "vpcompressd\[ \\t\]+\[^\n\]*%xmm\[0-9\]\{%k\[1-7\]\}\[^\{\]" 1 } } */
+/* { dg-final { scan-assembler-times "vpcompressd\[ \\t\]+\[^\n\]*%ymm\[0-9\]\{%k\[1-7\]\}\{z\}" 1 } } */
+/* { dg-final { scan-assembler-times "vpcompressd\[ \\t\]+\[^\n\]*%xmm\[0-9\]\{%k\[1-7\]\}\{z\}" 1 } } */
+/* { dg-final { scan-assembler-times "vpcompressd\[ \\t\]+\[^\n\]*%ymm\[0-9\]\[^\n\]*\\)\{%k\[1-7\]\}\[^\{\]" 1 } } */
+/* { dg-final { scan-assembler-times "vpcompressd\[ \\t\]+\[^\n\]*%xmm\[0-9\]\[^\n\]*\\)\{%k\[1-7\]\}\[^\{\]" 1 } } */
+
+#include <immintrin.h>
+
+int *p;
+volatile __m256i x1;
+volatile __m128i x2;
+volatile __mmask8 m;
+
+void extern
+avx512vl_test (void)
+{
+ x1 = _mm256_mask_compress_epi32 (x1, m, x1);
+ x2 = _mm_mask_compress_epi32 (x2, m, x2);
+
+ x1 = _mm256_maskz_compress_epi32 (m, x1);
+ x2 = _mm_maskz_compress_epi32 (m, x2);
+
+ _mm256_mask_compressstoreu_epi32 (p, m, x1);
+ _mm_mask_compressstoreu_epi32 (p, m, x2);
+}
diff --git a/gcc/testsuite/gcc.target/i386/avx512vl-vpcompressd-2.c b/gcc/testsuite/gcc.target/i386/avx512vl-vpcompressd-2.c
new file mode 100644
index 00000000000..f6f1b08bb0a
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/avx512vl-vpcompressd-2.c
@@ -0,0 +1,14 @@
+/* { dg-do run } */
+/* { dg-options "-O2 -mavx512vl -DAVX512VL" } */
+/* { dg-require-effective-target avx512vl } */
+
+#define AVX512F_LEN 256
+#define AVX512F_LEN_HALF 128
+#include "avx512f-vpcompressd-2.c"
+
+#undef AVX512F_LEN
+#undef AVX512F_LEN_HALF
+
+#define AVX512F_LEN 128
+#define AVX512F_LEN_HALF 128
+#include "avx512f-vpcompressd-2.c"
diff --git a/gcc/testsuite/gcc.target/i386/avx512vl-vpcompressq-1.c b/gcc/testsuite/gcc.target/i386/avx512vl-vpcompressq-1.c
new file mode 100644
index 00000000000..0fb73ca64b6
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/avx512vl-vpcompressq-1.c
@@ -0,0 +1,28 @@
+/* { dg-do compile } */
+/* { dg-options "-mavx512vl -O2" } */
+/* { dg-final { scan-assembler-times "vpcompressq\[ \\t\]+\[^\n\]*%ymm\[0-9\]\{%k\[1-7\]\}\[^\{\]" 1 } } */
+/* { dg-final { scan-assembler-times "vpcompressq\[ \\t\]+\[^\n\]*%xmm\[0-9\]\{%k\[1-7\]\}\[^\{\]" 1 } } */
+/* { dg-final { scan-assembler-times "vpcompressq\[ \\t\]+\[^\n\]*%ymm\[0-9\]\{%k\[1-7\]\}\{z\}" 1 } } */
+/* { dg-final { scan-assembler-times "vpcompressq\[ \\t\]+\[^\n\]*%xmm\[0-9\]\{%k\[1-7\]\}\{z\}" 1 } } */
+/* { dg-final { scan-assembler-times "vpcompressq\[ \\t\]+\[^\n\]*%ymm\[0-9\]\[^\n\]*\\)\{%k\[1-7\]\}\[^\{\]" 1 } } */
+/* { dg-final { scan-assembler-times "vpcompressq\[ \\t\]+\[^\n\]*%xmm\[0-9\]\[^\n\]*\\)\{%k\[1-7\]\}\[^\{\]" 1 } } */
+
+#include <immintrin.h>
+
+long long *p;
+volatile __m256i x1;
+volatile __m128i x2;
+volatile __mmask8 m;
+
+void extern
+avx512vl_test (void)
+{
+ x1 = _mm256_mask_compress_epi64 (x1, m, x1);
+ x2 = _mm_mask_compress_epi64 (x2, m, x2);
+
+ x1 = _mm256_maskz_compress_epi64 (m, x1);
+ x2 = _mm_maskz_compress_epi64 (m, x2);
+
+ _mm256_mask_compressstoreu_epi64 (p, m, x1);
+ _mm_mask_compressstoreu_epi64 (p, m, x2);
+}
diff --git a/gcc/testsuite/gcc.target/i386/avx512vl-vpcompressq-2.c b/gcc/testsuite/gcc.target/i386/avx512vl-vpcompressq-2.c
new file mode 100644
index 00000000000..f9544eb1de7
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/avx512vl-vpcompressq-2.c
@@ -0,0 +1,14 @@
+/* { dg-do run } */
+/* { dg-options "-O2 -mavx512vl -DAVX512VL" } */
+/* { dg-require-effective-target avx512vl } */
+
+#define AVX512F_LEN 256
+#define AVX512F_LEN_HALF 128
+#include "avx512f-vpcompressq-2.c"
+
+#undef AVX512F_LEN
+#undef AVX512F_LEN_HALF
+
+#define AVX512F_LEN 128
+#define AVX512F_LEN_HALF 128
+#include "avx512f-vpcompressq-2.c"
diff --git a/gcc/testsuite/gcc.target/i386/avx512vl-vpconflictd-1.c b/gcc/testsuite/gcc.target/i386/avx512vl-vpconflictd-1.c
new file mode 100644
index 00000000000..db7a2e0c22c
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/avx512vl-vpconflictd-1.c
@@ -0,0 +1,26 @@
+/* { dg-do compile } */
+/* { dg-options "-mavx512vl -mavx512cd -O2" } */
+/* { dg-final { scan-assembler-times "vpconflictd\[ \\t\]+\[^\n\]*%ymm\[0-9\]" 3 } } */
+/* { dg-final { scan-assembler-times "vpconflictd\[ \\t\]+\[^\n\]*%ymm\[0-9\]\{%k\[1-7\]\}\{z\}" 1 } } */
+/* { dg-final { scan-assembler-times "vpconflictd\[ \\t\]+\[^\n\]*%ymm\[0-9\]\{%k\[1-7\]\}\[^\{\]" 1 } } */
+/* { dg-final { scan-assembler-times "vpconflictd\[ \\t\]+\[^\n\]*%xmm\[0-9\]" 3 } } */
+/* { dg-final { scan-assembler-times "vpconflictd\[ \\t\]+\[^\n\]*%xmm\[0-9\]\{%k\[1-7\]\}\{z\}" 1 } } */
+/* { dg-final { scan-assembler-times "vpconflictd\[ \\t\]+\[^\n\]*%xmm\[0-9\]\{%k\[1-7\]\}\[^\{\]" 1 } } */
+
+#include <immintrin.h>
+
+volatile __m256i s;
+volatile __m256i res;
+volatile __m128i s2;
+volatile __m128i res2;
+
+void extern
+avx512vl_test (void)
+{
+ res = _mm256_conflict_epi32 (s);
+ res = _mm256_mask_conflict_epi32 (res, 2, s);
+ res = _mm256_maskz_conflict_epi32 (2, s);
+ res2 = _mm_conflict_epi32 (s2);
+ res2 = _mm_mask_conflict_epi32 (res2, 2, s2);
+ res2 = _mm_maskz_conflict_epi32 (2, s2);
+}
diff --git a/gcc/testsuite/gcc.target/i386/avx512vl-vpconflictq-1.c b/gcc/testsuite/gcc.target/i386/avx512vl-vpconflictq-1.c
new file mode 100644
index 00000000000..b81684c8450
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/avx512vl-vpconflictq-1.c
@@ -0,0 +1,26 @@
+/* { dg-do compile } */
+/* { dg-options "-mavx512vl -mavx512cd -O2" } */
+/* { dg-final { scan-assembler-times "vpconflictq\[ \\t\]+\[^\n\]*%ymm\[0-9\]" 3 } } */
+/* { dg-final { scan-assembler-times "vpconflictq\[ \\t\]+\[^\n\]*%ymm\[0-9\]\{%k\[1-7\]\}\{z\}" 1 } } */
+/* { dg-final { scan-assembler-times "vpconflictq\[ \\t\]+\[^\n\]*%ymm\[0-9\]\{%k\[1-7\]\}\[^\{\]" 1 } } */
+/* { dg-final { scan-assembler-times "vpconflictq\[ \\t\]+\[^\n\]*%xmm\[0-9\]" 3 } } */
+/* { dg-final { scan-assembler-times "vpconflictq\[ \\t\]+\[^\n\]*%xmm\[0-9\]\{%k\[1-7\]\}\{z\}" 1 } } */
+/* { dg-final { scan-assembler-times "vpconflictq\[ \\t\]+\[^\n\]*%xmm\[0-9\]\{%k\[1-7\]\}\[^\{\]" 1 } } */
+
+#include <immintrin.h>
+
+volatile __m256i s;
+volatile __m256i res;
+volatile __m128i s2;
+volatile __m128i res2;
+
+void extern
+avx512vl_test (void)
+{
+ res = _mm256_conflict_epi64 (s);
+ res = _mm256_mask_conflict_epi64 (res, 2, s);
+ res = _mm256_maskz_conflict_epi64 (2, s);
+ res2 = _mm_conflict_epi64 (s2);
+ res2 = _mm_mask_conflict_epi64 (res2, 2, s2);
+ res2 = _mm_maskz_conflict_epi64 (2, s2);
+}
diff --git a/gcc/testsuite/gcc.target/i386/avx512vl-vpermd-1.c b/gcc/testsuite/gcc.target/i386/avx512vl-vpermd-1.c
new file mode 100644
index 00000000000..06135081af0
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/avx512vl-vpermd-1.c
@@ -0,0 +1,16 @@
+/* { dg-do compile } */
+/* { dg-options "-mavx512vl -O2" } */
+/* { dg-final { scan-assembler-times "vpermd\[ \\t\]+\[^\n\]*%ymm\[0-9\]\{%k\[1-7\]\}\{z\}" 1 } } */
+/* { dg-final { scan-assembler-times "vpermd\[ \\t\]+\[^\n\]*%ymm\[0-9\]\{%k\[1-7\]\}\[^\{\]" 1 } } */
+
+#include <immintrin.h>
+
+volatile __m256i x;
+volatile __mmask8 m;
+
+void extern
+avx512vl_test (void)
+{
+ x = _mm256_maskz_permutexvar_epi32 (m, x, x);
+ x = _mm256_mask_permutexvar_epi32 (x, m, x, x);
+}
diff --git a/gcc/testsuite/gcc.target/i386/avx512vl-vpermd-2.c b/gcc/testsuite/gcc.target/i386/avx512vl-vpermd-2.c
new file mode 100644
index 00000000000..1d10e6ed215
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/avx512vl-vpermd-2.c
@@ -0,0 +1,10 @@
+/* { dg-do run } */
+/* { dg-options "-O2 -mavx512vl -DAVX512VL" } */
+/* { dg-require-effective-target avx512vl } */
+
+#define AVX512F_LEN 256
+#define AVX512F_LEN_HALF 128
+#include "avx512f-vpermd-2.c"
+
+void
+test_128 () {}
diff --git a/gcc/testsuite/gcc.target/i386/avx512vl-vpermi2d-1.c b/gcc/testsuite/gcc.target/i386/avx512vl-vpermi2d-1.c
new file mode 100644
index 00000000000..7211427a78e
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/avx512vl-vpermi2d-1.c
@@ -0,0 +1,19 @@
+/* { dg-do compile } */
+/* { dg-options "-mavx512vl -O2" } */
+/* { dg-final { scan-assembler-times "vpermi2d\[ \\t\]+\[^\n\]*%ymm\[0-9\]\{%k\[1-7\]\}\[^\{\]" 1 } } */
+/* { dg-final { scan-assembler-times "vpermi2d\[ \\t\]+\[^\n\]*%xmm\[0-9\]\{%k\[1-7\]\}\[^\{\]" 1 } } */
+
+#include <immintrin.h>
+
+volatile __m256i x1;
+volatile __m128i x2;
+volatile __m256i y;
+volatile __m128i z;
+volatile __mmask8 m;
+
+void extern
+avx512vl_test (void)
+{
+ x1 = _mm256_mask2_permutex2var_epi32 (x1, y, m, x1);
+ x2 = _mm_mask2_permutex2var_epi32 (x2, z, m, x2);
+}
diff --git a/gcc/testsuite/gcc.target/i386/avx512vl-vpermi2d-2.c b/gcc/testsuite/gcc.target/i386/avx512vl-vpermi2d-2.c
new file mode 100644
index 00000000000..090cb40000f
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/avx512vl-vpermi2d-2.c
@@ -0,0 +1,14 @@
+/* { dg-do run } */
+/* { dg-options "-O2 -mavx512vl -DAVX512VL" } */
+/* { dg-require-effective-target avx512vl } */
+
+#define AVX512F_LEN 256
+#define AVX512F_LEN_HALF 128
+#include "avx512f-vpermi2d-2.c"
+
+#undef AVX512F_LEN
+#undef AVX512F_LEN_HALF
+
+#define AVX512F_LEN 128
+#define AVX512F_LEN_HALF 128
+#include "avx512f-vpermi2d-2.c"
diff --git a/gcc/testsuite/gcc.target/i386/avx512vl-vpermi2pd-1.c b/gcc/testsuite/gcc.target/i386/avx512vl-vpermi2pd-1.c
new file mode 100644
index 00000000000..7e1e971d174
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/avx512vl-vpermi2pd-1.c
@@ -0,0 +1,19 @@
+/* { dg-do compile } */
+/* { dg-options "-mavx512vl -O2" } */
+/* { dg-final { scan-assembler-times "vpermi2pd\[ \\t\]+\[^\n\]*%ymm\[0-9\]\{%k\[1-7\]\}\[^\{\]" 1 } } */
+/* { dg-final { scan-assembler-times "vpermi2pd\[ \\t\]+\[^\n\]*%xmm\[0-9\]\{%k\[1-7\]\}\[^\{\]" 1 } } */
+
+#include <immintrin.h>
+
+volatile __m256d x1;
+volatile __m128d x2;
+volatile __m256i y;
+volatile __m128i z;
+volatile __mmask8 m;
+
+void extern
+avx512vl_test (void)
+{
+ x1 = _mm256_mask2_permutex2var_pd (x1, y, m, x1);
+ x2 = _mm_mask2_permutex2var_pd (x2, z, m, x2);
+}
diff --git a/gcc/testsuite/gcc.target/i386/avx512vl-vpermi2pd-2.c b/gcc/testsuite/gcc.target/i386/avx512vl-vpermi2pd-2.c
new file mode 100644
index 00000000000..a4ce1323e8f
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/avx512vl-vpermi2pd-2.c
@@ -0,0 +1,14 @@
+/* { dg-do run } */
+/* { dg-options "-O2 -mavx512vl -DAVX512VL" } */
+/* { dg-require-effective-target avx512vl } */
+
+#define AVX512F_LEN 256
+#define AVX512F_LEN_HALF 128
+#include "avx512f-vpermi2pd-2.c"
+
+#undef AVX512F_LEN
+#undef AVX512F_LEN_HALF
+
+#define AVX512F_LEN 128
+#define AVX512F_LEN_HALF 128
+#include "avx512f-vpermi2pd-2.c"
diff --git a/gcc/testsuite/gcc.target/i386/avx512vl-vpermi2ps-1.c b/gcc/testsuite/gcc.target/i386/avx512vl-vpermi2ps-1.c
new file mode 100644
index 00000000000..ff3a2529e59
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/avx512vl-vpermi2ps-1.c
@@ -0,0 +1,19 @@
+/* { dg-do compile } */
+/* { dg-options "-mavx512vl -O2" } */
+/* { dg-final { scan-assembler-times "vpermi2ps\[ \\t\]+\[^\n\]*%ymm\[0-9\]\{%k\[1-7\]\}\[^\{\]" 1 } } */
+/* { dg-final { scan-assembler-times "vpermi2ps\[ \\t\]+\[^\n\]*%xmm\[0-9\]\{%k\[1-7\]\}\[^\{\]" 1 } } */
+
+#include <immintrin.h>
+
+volatile __m256 x1;
+volatile __m128 x2;
+volatile __m256i y;
+volatile __m128i z;
+volatile __mmask8 m;
+
+void extern
+avx512vl_test (void)
+{
+ x1 = _mm256_mask2_permutex2var_ps (x1, y, m, x1);
+ x2 = _mm_mask2_permutex2var_ps (x2, z, m, x2);
+}
diff --git a/gcc/testsuite/gcc.target/i386/avx512vl-vpermi2ps-2.c b/gcc/testsuite/gcc.target/i386/avx512vl-vpermi2ps-2.c
new file mode 100644
index 00000000000..377ee1b74fe
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/avx512vl-vpermi2ps-2.c
@@ -0,0 +1,14 @@
+/* { dg-do run } */
+/* { dg-options "-O2 -mavx512vl -DAVX512VL" } */
+/* { dg-require-effective-target avx512vl } */
+
+#define AVX512F_LEN 256
+#define AVX512F_LEN_HALF 128
+#include "avx512f-vpermi2ps-2.c"
+
+#undef AVX512F_LEN
+#undef AVX512F_LEN_HALF
+
+#define AVX512F_LEN 128
+#define AVX512F_LEN_HALF 128
+#include "avx512f-vpermi2ps-2.c"
diff --git a/gcc/testsuite/gcc.target/i386/avx512vl-vpermi2q-1.c b/gcc/testsuite/gcc.target/i386/avx512vl-vpermi2q-1.c
new file mode 100644
index 00000000000..9edffe8bea5
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/avx512vl-vpermi2q-1.c
@@ -0,0 +1,19 @@
+/* { dg-do compile } */
+/* { dg-options "-mavx512vl -O2" } */
+/* { dg-final { scan-assembler-times "vpermi2q\[ \\t\]+\[^\n\]*%ymm\[0-9\]\{%k\[1-7\]\}\[^\{\]" 1 } } */
+/* { dg-final { scan-assembler-times "vpermi2q\[ \\t\]+\[^\n\]*%xmm\[0-9\]\{%k\[1-7\]\}\[^\{\]" 1 } } */
+
+#include <immintrin.h>
+
+volatile __m256i x1;
+volatile __m128i x2;
+volatile __m256i y;
+volatile __m128i z;
+volatile __mmask8 m;
+
+void extern
+avx512vl_test (void)
+{
+ x1 = _mm256_mask2_permutex2var_epi64 (x1, y, m, x1);
+ x2 = _mm_mask2_permutex2var_epi64 (x2, z, m, x2);
+}
diff --git a/gcc/testsuite/gcc.target/i386/avx512vl-vpermi2q-2.c b/gcc/testsuite/gcc.target/i386/avx512vl-vpermi2q-2.c
new file mode 100644
index 00000000000..22418b31ac2
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/avx512vl-vpermi2q-2.c
@@ -0,0 +1,14 @@
+/* { dg-do run } */
+/* { dg-options "-O2 -mavx512vl -DAVX512VL" } */
+/* { dg-require-effective-target avx512vl } */
+
+#define AVX512F_LEN 256
+#define AVX512F_LEN_HALF 128
+#include "avx512f-vpermi2q-2.c"
+
+#undef AVX512F_LEN
+#undef AVX512F_LEN_HALF
+
+#define AVX512F_LEN 128
+#define AVX512F_LEN_HALF 128
+#include "avx512f-vpermi2q-2.c"
diff --git a/gcc/testsuite/gcc.target/i386/avx512vl-vpermi2w-2.c b/gcc/testsuite/gcc.target/i386/avx512vl-vpermi2w-2.c
new file mode 100644
index 00000000000..edac8cdf649
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/avx512vl-vpermi2w-2.c
@@ -0,0 +1,14 @@
+/* { dg-do run } */
+/* { dg-options "-O2 -mavx512bw -mavx512vl -DAVX512VL" } */
+/* { dg-require-effective-target avx512vl } */
+
+#define AVX512F_LEN 256
+#define AVX512F_LEN_HALF 128
+#include "avx512bw-vpermi2w-2.c"
+
+#undef AVX512F_LEN
+#undef AVX512F_LEN_HALF
+
+#define AVX512F_LEN 128
+#define AVX512F_LEN_HALF 128
+#include "avx512bw-vpermi2w-2.c"
diff --git a/gcc/testsuite/gcc.target/i386/avx512vl-vpermilpd-1.c b/gcc/testsuite/gcc.target/i386/avx512vl-vpermilpd-1.c
new file mode 100644
index 00000000000..8986e09bd25
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/avx512vl-vpermilpd-1.c
@@ -0,0 +1,25 @@
+/* { dg-do compile } */
+/* { dg-options "-mavx512vl -O2" } */
+/* { dg-final { scan-assembler-times "vpermilpd\[ \\t\]+\[^\n\]*%ymm\[0-9\]\[^\{\]" 2 } } */
+/* { dg-final { scan-assembler-times "vpermilpd\[ \\t\]+\[^\n\]*%ymm\[0-9\]\{%k\[1-7\]\}\[^\{\]" 1 } } */
+/* { dg-final { scan-assembler-times "vpermilpd\[ \\t\]+\[^\n\]*%ymm\[0-9\]\{%k\[1-7\]\}\{z\}" 1 } } */
+/* { dg-final { scan-assembler-times "vpermilpd\[ \\t\]+\[^\n\]*%xmm\[0-9\]\[^\{\]" 2 } } */
+/* { dg-final { scan-assembler-times "vpermilpd\[ \\t\]+\[^\n\]*%xmm\[0-9\]\{%k\[1-7\]\}\[^\{\]" 1 } } */
+/* { dg-final { scan-assembler-times "vpermilpd\[ \\t\]+\[^\n\]*%xmm\[0-9\]\{%k\[1-7\]\}\{z\}" 1 } } */
+
+#include <immintrin.h>
+
+volatile __m256d y;
+volatile __m256i c;
+volatile __m128d x;
+volatile __m128i k;
+volatile __mmask8 m;
+
+void extern
+avx512vl_test (void)
+{
+ y = _mm256_mask_permutevar_pd (y, m, y, c);
+ y = _mm256_maskz_permutevar_pd (m, y, c);
+ x = _mm_mask_permutevar_pd (x, m, x, k);
+ x = _mm_maskz_permutevar_pd (m, x, k);
+}
diff --git a/gcc/testsuite/gcc.target/i386/avx512vl-vpermilpd-2.c b/gcc/testsuite/gcc.target/i386/avx512vl-vpermilpd-2.c
new file mode 100644
index 00000000000..bf2383094ba
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/avx512vl-vpermilpd-2.c
@@ -0,0 +1,14 @@
+/* { dg-do run } */
+/* { dg-options "-O2 -mavx512vl -DAVX512VL" } */
+/* { dg-require-effective-target avx512vl } */
+
+#define AVX512F_LEN 256
+#define AVX512F_LEN_HALF 128
+#include "avx512f-vpermilpd-2.c"
+
+#undef AVX512F_LEN
+#undef AVX512F_LEN_HALF
+
+#define AVX512F_LEN 128
+#define AVX512F_LEN_HALF 128
+#include "avx512f-vpermilpd-2.c"
diff --git a/gcc/testsuite/gcc.target/i386/avx512vl-vpermilpdi-1.c b/gcc/testsuite/gcc.target/i386/avx512vl-vpermilpdi-1.c
new file mode 100644
index 00000000000..0f84f40a3db
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/avx512vl-vpermilpdi-1.c
@@ -0,0 +1,23 @@
+/* { dg-do compile } */
+/* { dg-options "-mavx512vl -O2" } */
+/* { dg-final { scan-assembler-times "vpermilpd\[ \\t\]+\[^\n\]*13\[^\n\]*%ymm\[0-9\]\[^\{\]" 2 { xfail *-*-* } } } */
+/* { dg-final { scan-assembler-times "vpermilpd\[ \\t\]+\[^\n\]*13\[^\n\]*%ymm\[0-9\]\{%k\[1-7\]\}\[^\{\]" 1 { xfail *-*-* } } } */
+/* { dg-final { scan-assembler-times "vpermilpd\[ \\t\]+\[^\n\]*13\[^\n\]*%ymm\[0-9\]\{%k\[1-7\]\}\{z\}" 1 { xfail *-*-* } } } */
+/* { dg-final { scan-assembler-times "vpermilpd\[ \\t\]+\[^\n\]*3\[^\n\]*%xmm\[0-9\]\[^\{\]" 2 } } */
+/* { dg-final { scan-assembler-times "vpermilpd\[ \\t\]+\[^\n\]*3\[^\n\]*%xmm\[0-9\]\{%k\[1-7\]\}\[^\{\]" 1 } } */
+/* { dg-final { scan-assembler-times "vpermilpd\[ \\t\]+\[^\n\]*3\[^\n\]*%xmm\[0-9\]\{%k\[1-7\]\}\{z\}" 1 } } */
+
+#include <immintrin.h>
+
+volatile __m256d y;
+volatile __m128d x;
+volatile __mmask8 m;
+
+void extern
+avx512vl_test (void)
+{
+ y = _mm256_mask_permute_pd (y, m, y, 13);
+ y = _mm256_maskz_permute_pd (m, y, 13);
+ x = _mm_mask_permute_pd (x, m, x, 3);
+ x = _mm_maskz_permute_pd (m, x, 3);
+}
diff --git a/gcc/testsuite/gcc.target/i386/avx512vl-vpermilpdi-2.c b/gcc/testsuite/gcc.target/i386/avx512vl-vpermilpdi-2.c
new file mode 100644
index 00000000000..5a2b4046288
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/avx512vl-vpermilpdi-2.c
@@ -0,0 +1,14 @@
+/* { dg-do run } */
+/* { dg-options "-O2 -mavx512vl -DAVX512VL" } */
+/* { dg-require-effective-target avx512vl } */
+
+#define AVX512F_LEN 256
+#define AVX512F_LEN_HALF 128
+#include "avx512f-vpermilpdi-2.c"
+
+#undef AVX512F_LEN
+#undef AVX512F_LEN_HALF
+
+#define AVX512F_LEN 128
+#define AVX512F_LEN_HALF 128
+#include "avx512f-vpermilpdi-2.c"
diff --git a/gcc/testsuite/gcc.target/i386/avx512vl-vpermilps-1.c b/gcc/testsuite/gcc.target/i386/avx512vl-vpermilps-1.c
new file mode 100644
index 00000000000..3bf91d62564
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/avx512vl-vpermilps-1.c
@@ -0,0 +1,25 @@
+/* { dg-do compile } */
+/* { dg-options "-mavx512vl -O2" } */
+/* { dg-final { scan-assembler-times "vpermilps\[ \\t\]+\[^\n\]*%ymm\[0-9\]\[^\{\]" 2 } } */
+/* { dg-final { scan-assembler-times "vpermilps\[ \\t\]+\[^\n\]*%ymm\[0-9\]\{%k\[1-7\]\}\[^\{\]" 1 } } */
+/* { dg-final { scan-assembler-times "vpermilps\[ \\t\]+\[^\n\]*%ymm\[0-9\]\{%k\[1-7\]\}\{z\}" 1 } } */
+/* { dg-final { scan-assembler-times "vpermilps\[ \\t\]+\[^\n\]*%xmm\[0-9\]\[^\{\]" 2 } } */
+/* { dg-final { scan-assembler-times "vpermilps\[ \\t\]+\[^\n\]*%xmm\[0-9\]\{%k\[1-7\]\}\[^\{\]" 1 } } */
+/* { dg-final { scan-assembler-times "vpermilps\[ \\t\]+\[^\n\]*%xmm\[0-9\]\{%k\[1-7\]\}\{z\}" 1 } } */
+
+#include <immintrin.h>
+
+volatile __m256 y;
+volatile __m128 x;
+volatile __m256i c;
+volatile __m128i k;
+volatile __mmask8 m;
+
+void extern
+avx512vl_test (void)
+{
+ y = _mm256_mask_permutevar_ps (y, m, y, c);
+ y = _mm256_maskz_permutevar_ps (m, y, c);
+ x = _mm_mask_permutevar_ps (x, m, x, k);
+ x = _mm_maskz_permutevar_ps (m, x, k);
+}
diff --git a/gcc/testsuite/gcc.target/i386/avx512vl-vpermilps-2.c b/gcc/testsuite/gcc.target/i386/avx512vl-vpermilps-2.c
new file mode 100644
index 00000000000..df724c7db0d
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/avx512vl-vpermilps-2.c
@@ -0,0 +1,14 @@
+/* { dg-do run } */
+/* { dg-options "-O2 -mavx512vl -DAVX512VL" } */
+/* { dg-require-effective-target avx512vl } */
+
+#define AVX512F_LEN 256
+#define AVX512F_LEN_HALF 128
+#include "avx512f-vpermilps-2.c"
+
+#undef AVX512F_LEN
+#undef AVX512F_LEN_HALF
+
+#define AVX512F_LEN 128
+#define AVX512F_LEN_HALF 128
+#include "avx512f-vpermilps-2.c"
diff --git a/gcc/testsuite/gcc.target/i386/avx512vl-vpermilpsi-1.c b/gcc/testsuite/gcc.target/i386/avx512vl-vpermilpsi-1.c
new file mode 100644
index 00000000000..503a65178e0
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/avx512vl-vpermilpsi-1.c
@@ -0,0 +1,23 @@
+/* { dg-do compile } */
+/* { dg-options "-mavx512vl -O2" } */
+/* { dg-final { scan-assembler-times "vpermilps\[ \\t\]+\[^\n\]*13\[^\n\]*%ymm\[0-9\]\[^\{\]" 2 } } */
+/* { dg-final { scan-assembler-times "vpermilps\[ \\t\]+\[^\n\]*13\[^\n\]*%ymm\[0-9\]\{%k\[1-7\]\}\[^\{\]" 1 } } */
+/* { dg-final { scan-assembler-times "vpermilps\[ \\t\]+\[^\n\]*13\[^\n\]*%ymm\[0-9\]\{%k\[1-7\]\}\{z\}" 1 } } */
+/* { dg-final { scan-assembler-times "vpermilps\[ \\t\]+\[^\n\]*13\[^\n\]*%xmm\[0-9\]\[^\{\]" 2 } } */
+/* { dg-final { scan-assembler-times "vpermilps\[ \\t\]+\[^\n\]*13\[^\n\]*%xmm\[0-9\]\{%k\[1-7\]\}\[^\{\]" 1 } } */
+/* { dg-final { scan-assembler-times "vpermilps\[ \\t\]+\[^\n\]*13\[^\n\]*%xmm\[0-9\]\{%k\[1-7\]\}\{z\}" 1 } } */
+
+#include <immintrin.h>
+
+volatile __m256 y;
+volatile __m128 x;
+volatile __mmask8 m;
+
+void extern
+avx512vl_test (void)
+{
+ y = _mm256_mask_permute_ps (y, m, y, 13);
+ y = _mm256_maskz_permute_ps (m, y, 13);
+ x = _mm_mask_permute_ps (x, m, x, 13);
+ x = _mm_maskz_permute_ps (m, x, 13);
+}
diff --git a/gcc/testsuite/gcc.target/i386/avx512vl-vpermilpsi-2.c b/gcc/testsuite/gcc.target/i386/avx512vl-vpermilpsi-2.c
new file mode 100644
index 00000000000..c5e7cbff635
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/avx512vl-vpermilpsi-2.c
@@ -0,0 +1,14 @@
+/* { dg-do run } */
+/* { dg-options "-O2 -mavx512vl -DAVX512VL" } */
+/* { dg-require-effective-target avx512vl } */
+
+#define AVX512F_LEN 256
+#define AVX512F_LEN_HALF 128
+#include "avx512f-vpermilpsi-2.c"
+
+#undef AVX512F_LEN
+#undef AVX512F_LEN_HALF
+
+#define AVX512F_LEN 128
+#define AVX512F_LEN_HALF 128
+#include "avx512f-vpermilpsi-2.c"
diff --git a/gcc/testsuite/gcc.target/i386/avx512vl-vpermpd-1.c b/gcc/testsuite/gcc.target/i386/avx512vl-vpermpd-1.c
new file mode 100644
index 00000000000..d2fde4bb256
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/avx512vl-vpermpd-1.c
@@ -0,0 +1,19 @@
+/* { dg-do compile } */
+/* { dg-options "-mavx512vl -O2" } */
+/* { dg-final { scan-assembler-times "vpermpd\[ \\t\]+\[^\n\]*%ymm\[0-9\]\[^\{\]" 3 } } */
+/* { dg-final { scan-assembler-times "vpermpd\[ \\t\]+\[^\n\]*%ymm\[0-9\]\{%k\[1-7\]\}\[^\{\]" 1 } } */
+/* { dg-final { scan-assembler-times "vpermpd\[ \\t\]+\[^\n\]*%ymm\[0-9\]\{%k\[1-7\]\}\{z\}" 1 } } */
+
+#include <immintrin.h>
+
+volatile __m256i x;
+volatile __m256d y;
+volatile __mmask8 m;
+
+void extern
+avx512vl_test (void)
+{
+ y = _mm256_permutexvar_pd (x, y);
+ y = _mm256_mask_permutexvar_pd (y, m, x, y);
+ y = _mm256_maskz_permutexvar_pd (m, x, y);
+}
diff --git a/gcc/testsuite/gcc.target/i386/avx512vl-vpermpd-2.c b/gcc/testsuite/gcc.target/i386/avx512vl-vpermpd-2.c
new file mode 100644
index 00000000000..84ae96626a5
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/avx512vl-vpermpd-2.c
@@ -0,0 +1,10 @@
+/* { dg-do run } */
+/* { dg-options "-O2 -mavx512vl -DAVX512VL" } */
+/* { dg-require-effective-target avx512vl } */
+
+#define AVX512F_LEN 256
+#define AVX512F_LEN_HALF 128
+#include "avx512f-vpermpd-2.c"
+
+void
+test_128 () {}
diff --git a/gcc/testsuite/gcc.target/i386/avx512vl-vpermpdi-1.c b/gcc/testsuite/gcc.target/i386/avx512vl-vpermpdi-1.c
new file mode 100644
index 00000000000..aeedc5f2692
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/avx512vl-vpermpdi-1.c
@@ -0,0 +1,18 @@
+/* { dg-do compile } */
+/* { dg-options "-mavx512vl -O2" } */
+/* { dg-final { scan-assembler-times "vpermpd\[ \\t\]+\[^\n\]*%ymm\[0-9\]\[^\{\]" 3 } } */
+/* { dg-final { scan-assembler-times "vpermpd\[ \\t\]+\[^\n\]*%ymm\[0-9\]\{%k\[1-7\]\}\[^\{\]" 1 } } */
+/* { dg-final { scan-assembler-times "vpermpd\[ \\t\]+\[^\n\]*%ymm\[0-9\]\{%k\[1-7\]\}\{z\}" 1 } } */
+
+#include <immintrin.h>
+
+volatile __m256d x;
+volatile __mmask8 m;
+
+void extern
+avx512vl_test (void)
+{
+ x = _mm256_permutex_pd (x, 13);
+ x = _mm256_mask_permutex_pd (x, m, x, 13);
+ x = _mm256_maskz_permutex_pd (m, x, 13);
+}
diff --git a/gcc/testsuite/gcc.target/i386/avx512vl-vpermpdi-2.c b/gcc/testsuite/gcc.target/i386/avx512vl-vpermpdi-2.c
new file mode 100644
index 00000000000..c48043b8608
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/avx512vl-vpermpdi-2.c
@@ -0,0 +1,10 @@
+/* { dg-do run } */
+/* { dg-options "-O2 -mavx512vl -DAVX512VL" } */
+/* { dg-require-effective-target avx512vl } */
+
+#define AVX512F_LEN 256
+#define AVX512F_LEN_HALF 128
+#include "avx512f-vpermpdi-2.c"
+
+void
+test_128 () {}
diff --git a/gcc/testsuite/gcc.target/i386/avx512vl-vpermps-1.c b/gcc/testsuite/gcc.target/i386/avx512vl-vpermps-1.c
new file mode 100644
index 00000000000..9ed8c80359c
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/avx512vl-vpermps-1.c
@@ -0,0 +1,19 @@
+/* { dg-do compile } */
+/* { dg-options "-mavx512vl -O2" } */
+/* { dg-final { scan-assembler-times "vpermps\[ \\t\]+\[^\n\]*%ymm\[0-9\]\[^\{\]" 3 } } */
+/* { dg-final { scan-assembler-times "vpermps\[ \\t\]+\[^\n\]*%ymm\[0-9\]\{%k\[1-7\]\}\[^\{\]" 1 } } */
+/* { dg-final { scan-assembler-times "vpermps\[ \\t\]+\[^\n\]*%ymm\[0-9\]\{%k\[1-7\]\}\{z\}" 1 } } */
+
+#include <immintrin.h>
+
+volatile __m256i x;
+volatile __m256 y;
+volatile __mmask8 m;
+
+void extern
+avx512vl_test (void)
+{
+ y = _mm256_permutexvar_ps (x, y);
+ y = _mm256_mask_permutexvar_ps (y, m, x, y);
+ y = _mm256_maskz_permutexvar_ps (m, x, y);
+}
diff --git a/gcc/testsuite/gcc.target/i386/avx512vl-vpermps-2.c b/gcc/testsuite/gcc.target/i386/avx512vl-vpermps-2.c
new file mode 100644
index 00000000000..e53b1777de7
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/avx512vl-vpermps-2.c
@@ -0,0 +1,10 @@
+/* { dg-do run } */
+/* { dg-options "-O2 -mavx512vl -DAVX512VL" } */
+/* { dg-require-effective-target avx512vl } */
+
+#define AVX512F_LEN 256
+#define AVX512F_LEN_HALF 128
+#include "avx512f-vpermps-2.c"
+
+void
+test_128 () {}
diff --git a/gcc/testsuite/gcc.target/i386/avx512vl-vpermq-imm-1.c b/gcc/testsuite/gcc.target/i386/avx512vl-vpermq-imm-1.c
new file mode 100644
index 00000000000..f949a760206
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/avx512vl-vpermq-imm-1.c
@@ -0,0 +1,16 @@
+/* { dg-do compile } */
+/* { dg-options "-mavx512vl -O2" } */
+/* { dg-final { scan-assembler-times "vpermq\[ \\t\]+\[^\n\]*%ymm\[0-9\]\{%k\[1-7\]\}\[^\{\]" 1 } } */
+/* { dg-final { scan-assembler-times "vpermq\[ \\t\]+\[^\n\]*%ymm\[0-9\]\{%k\[1-7\]\}\{z\}" 1 } } */
+
+#include <immintrin.h>
+
+volatile __m256i x;
+volatile __mmask8 m;
+
+void extern
+avx512vl_test (void)
+{
+ x = _mm256_mask_permutex_epi64 (x, m, x, 13);
+ x = _mm256_maskz_permutex_epi64 (m, x, 13);
+}
diff --git a/gcc/testsuite/gcc.target/i386/avx512vl-vpermq-imm-2.c b/gcc/testsuite/gcc.target/i386/avx512vl-vpermq-imm-2.c
new file mode 100644
index 00000000000..ac7c671ba96
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/avx512vl-vpermq-imm-2.c
@@ -0,0 +1,10 @@
+/* { dg-do run } */
+/* { dg-options "-O2 -mavx512vl -DAVX512VL" } */
+/* { dg-require-effective-target avx512vl } */
+
+#define AVX512F_LEN 256
+#define AVX512F_LEN_HALF 128
+#include "avx512f-vpermq-imm-2.c"
+
+void
+test_128 () {}
diff --git a/gcc/testsuite/gcc.target/i386/avx512vl-vpermq-var-1.c b/gcc/testsuite/gcc.target/i386/avx512vl-vpermq-var-1.c
new file mode 100644
index 00000000000..b333e90073a
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/avx512vl-vpermq-var-1.c
@@ -0,0 +1,16 @@
+/* { dg-do compile } */
+/* { dg-options "-mavx512vl -O2" } */
+/* { dg-final { scan-assembler-times "vpermq\[ \\t\]+\[^\n\]*%ymm\[0-9\]\{%k\[1-7\]\}\{z\}" 1 } } */
+/* { dg-final { scan-assembler-times "vpermq\[ \\t\]+\[^\n\]*%ymm\[0-9\]\{%k\[1-7\]\}\[^\{\]" 1 } } */
+
+#include <immintrin.h>
+
+volatile __m256i x;
+volatile __mmask8 m;
+
+void extern
+avx512vl_test (void)
+{
+ x = _mm256_maskz_permutexvar_epi64 (m, x, x);
+ x = _mm256_mask_permutexvar_epi64 (x, m, x, x);
+}
diff --git a/gcc/testsuite/gcc.target/i386/avx512vl-vpermq-var-2.c b/gcc/testsuite/gcc.target/i386/avx512vl-vpermq-var-2.c
new file mode 100644
index 00000000000..af096f52e49
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/avx512vl-vpermq-var-2.c
@@ -0,0 +1,10 @@
+/* { dg-do run } */
+/* { dg-options "-O2 -mavx512vl -DAVX512VL" } */
+/* { dg-require-effective-target avx512vl } */
+
+#define AVX512F_LEN 256
+#define AVX512F_LEN_HALF 128
+#include "avx512f-vpermq-var-2.c"
+
+void
+test_128 () {}
diff --git a/gcc/testsuite/gcc.target/i386/avx512vl-vpermt2d-1.c b/gcc/testsuite/gcc.target/i386/avx512vl-vpermt2d-1.c
new file mode 100644
index 00000000000..91930846cb0
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/avx512vl-vpermt2d-1.c
@@ -0,0 +1,27 @@
+/* { dg-do compile } */
+/* { dg-options "-mavx512vl -O2" } */
+/* { dg-final { scan-assembler-times "vpermt2d\[ \\t\]+\[^\n\]*%ymm\[0-9\]" 3 } } */
+/* { dg-final { scan-assembler-times "vpermt2d\[ \\t\]+\[^\n\]*%xmm\[0-9\]" 3 } } */
+/* { dg-final { scan-assembler-times "vpermt2d\[ \\t\]+\[^\n\]*%ymm\[0-9\]\{%k\[1-7\]\}\[^\{\]" 1 } } */
+/* { dg-final { scan-assembler-times "vpermt2d\[ \\t\]+\[^\n\]*%xmm\[0-9\]\{%k\[1-7\]\}\[^\{\]" 1 } } */
+/* { dg-final { scan-assembler-times "vpermt2d\[ \\t\]+\[^\n\]*%ymm\[0-9\]\{%k\[1-7\]\}\{z\}" 1 } } */
+/* { dg-final { scan-assembler-times "vpermt2d\[ \\t\]+\[^\n\]*%xmm\[0-9\]\{%k\[1-7\]\}\{z\}" 1 } } */
+
+#include <immintrin.h>
+
+volatile __m256i x1;
+volatile __m128i x2;
+volatile __m256i y;
+volatile __m128i z;
+volatile __mmask8 m;
+
+void extern
+avx512vl_test (void)
+{
+ x1 = _mm256_permutex2var_epi32 (x1, y, x1);
+ x1 = _mm256_mask_permutex2var_epi32 (x1, m, y, x1);
+ x1 = _mm256_maskz_permutex2var_epi32 (m, x1, y, x1);
+ x2 = _mm_permutex2var_epi32 (x2, z, x2);
+ x2 = _mm_mask_permutex2var_epi32 (x2, m, z, x2);
+ x2 = _mm_maskz_permutex2var_epi32 (m, x2, z, x2);
+}
diff --git a/gcc/testsuite/gcc.target/i386/avx512vl-vpermt2d-2.c b/gcc/testsuite/gcc.target/i386/avx512vl-vpermt2d-2.c
new file mode 100644
index 00000000000..82e045c572a
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/avx512vl-vpermt2d-2.c
@@ -0,0 +1,14 @@
+/* { dg-do run } */
+/* { dg-options "-O2 -mavx512vl -DAVX512VL" } */
+/* { dg-require-effective-target avx512vl } */
+
+#define AVX512F_LEN 256
+#define AVX512F_LEN_HALF 128
+#include "avx512f-vpermt2d-2.c"
+
+#undef AVX512F_LEN
+#undef AVX512F_LEN_HALF
+
+#define AVX512F_LEN 128
+#define AVX512F_LEN_HALF 128
+#include "avx512f-vpermt2d-2.c"
diff --git a/gcc/testsuite/gcc.target/i386/avx512vl-vpermt2pd-1.c b/gcc/testsuite/gcc.target/i386/avx512vl-vpermt2pd-1.c
new file mode 100644
index 00000000000..8a61fcfc26e
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/avx512vl-vpermt2pd-1.c
@@ -0,0 +1,27 @@
+/* { dg-do compile } */
+/* { dg-options "-mavx512vl -O2" } */
+/* { dg-final { scan-assembler-times "vpermt2pd\[ \\t\]+\[^\n\]*%ymm\[0-9\]" 3 } } */
+/* { dg-final { scan-assembler-times "vpermt2pd\[ \\t\]+\[^\n\]*%xmm\[0-9\]" 3 } } */
+/* { dg-final { scan-assembler-times "vpermt2pd\[ \\t\]+\[^\n\]*%ymm\[0-9\]\{%k\[1-7\]\}\[^\{\]" 1 } } */
+/* { dg-final { scan-assembler-times "vpermt2pd\[ \\t\]+\[^\n\]*%xmm\[0-9\]\{%k\[1-7\]\}\[^\{\]" 1 } } */
+/* { dg-final { scan-assembler-times "vpermt2pd\[ \\t\]+\[^\n\]*%ymm\[0-9\]\{%k\[1-7\]\}\{z\}" 1 } } */
+/* { dg-final { scan-assembler-times "vpermt2pd\[ \\t\]+\[^\n\]*%xmm\[0-9\]\{%k\[1-7\]\}\{z\}" 1 } } */
+
+#include <immintrin.h>
+
+volatile __m256d x1;
+volatile __m128d x2;
+volatile __m256i y;
+volatile __m128i z;
+volatile __mmask8 m;
+
+void extern
+avx512vl_test (void)
+{
+ x1 = _mm256_permutex2var_pd (x1, y, x1);
+ x1 = _mm256_mask_permutex2var_pd (x1, m, y, x1);
+ x1 = _mm256_maskz_permutex2var_pd (m, x1, y, x1);
+ x2 = _mm_permutex2var_pd (x2, z, x2);
+ x2 = _mm_mask_permutex2var_pd (x2, m, z, x2);
+ x2 = _mm_maskz_permutex2var_pd (m, x2, z, x2);
+}
diff --git a/gcc/testsuite/gcc.target/i386/avx512vl-vpermt2pd-2.c b/gcc/testsuite/gcc.target/i386/avx512vl-vpermt2pd-2.c
new file mode 100644
index 00000000000..66681eb4da7
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/avx512vl-vpermt2pd-2.c
@@ -0,0 +1,14 @@
+/* { dg-do run } */
+/* { dg-options "-O2 -mavx512vl -DAVX512VL" } */
+/* { dg-require-effective-target avx512vl } */
+
+#define AVX512F_LEN 256
+#define AVX512F_LEN_HALF 128
+#include "avx512f-vpermt2pd-2.c"
+
+#undef AVX512F_LEN
+#undef AVX512F_LEN_HALF
+
+#define AVX512F_LEN 128
+#define AVX512F_LEN_HALF 128
+#include "avx512f-vpermt2pd-2.c"
diff --git a/gcc/testsuite/gcc.target/i386/avx512vl-vpermt2ps-1.c b/gcc/testsuite/gcc.target/i386/avx512vl-vpermt2ps-1.c
new file mode 100644
index 00000000000..57125e4a4c7
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/avx512vl-vpermt2ps-1.c
@@ -0,0 +1,27 @@
+/* { dg-do compile } */
+/* { dg-options "-mavx512vl -O2" } */
+/* { dg-final { scan-assembler-times "vpermt2ps\[ \\t\]+\[^\n\]*%ymm\[0-9\]" 3 } } */
+/* { dg-final { scan-assembler-times "vpermt2ps\[ \\t\]+\[^\n\]*%xmm\[0-9\]" 3 } } */
+/* { dg-final { scan-assembler-times "vpermt2ps\[ \\t\]+\[^\n\]*%ymm\[0-9\]\{%k\[1-7\]\}\[^\{\]" 1 } } */
+/* { dg-final { scan-assembler-times "vpermt2ps\[ \\t\]+\[^\n\]*%xmm\[0-9\]\{%k\[1-7\]\}\[^\{\]" 1 } } */
+/* { dg-final { scan-assembler-times "vpermt2ps\[ \\t\]+\[^\n\]*%ymm\[0-9\]\{%k\[1-7\]\}\{z\}" 1 } } */
+/* { dg-final { scan-assembler-times "vpermt2ps\[ \\t\]+\[^\n\]*%xmm\[0-9\]\{%k\[1-7\]\}\{z\}" 1 } } */
+
+#include <immintrin.h>
+
+volatile __m256 x1;
+volatile __m128 x2;
+volatile __m256i y;
+volatile __m128i z;
+volatile __mmask8 m;
+
+void extern
+avx512vl_test (void)
+{
+ x1 = _mm256_permutex2var_ps (x1, y, x1);
+ x1 = _mm256_mask_permutex2var_ps (x1, m, y, x1);
+ x1 = _mm256_maskz_permutex2var_ps (m, x1, y, x1);
+ x2 = _mm_permutex2var_ps (x2, z, x2);
+ x2 = _mm_mask_permutex2var_ps (x2, m, z, x2);
+ x2 = _mm_maskz_permutex2var_ps (m, x2, z, x2);
+}
diff --git a/gcc/testsuite/gcc.target/i386/avx512vl-vpermt2ps-2.c b/gcc/testsuite/gcc.target/i386/avx512vl-vpermt2ps-2.c
new file mode 100644
index 00000000000..cf6e0a8ee1a
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/avx512vl-vpermt2ps-2.c
@@ -0,0 +1,14 @@
+/* { dg-do run } */
+/* { dg-options "-O2 -mavx512vl -DAVX512VL" } */
+/* { dg-require-effective-target avx512vl } */
+
+#define AVX512F_LEN 256
+#define AVX512F_LEN_HALF 128
+#include "avx512f-vpermt2ps-2.c"
+
+#undef AVX512F_LEN
+#undef AVX512F_LEN_HALF
+
+#define AVX512F_LEN 128
+#define AVX512F_LEN_HALF 128
+#include "avx512f-vpermt2ps-2.c"
diff --git a/gcc/testsuite/gcc.target/i386/avx512vl-vpermt2q-1.c b/gcc/testsuite/gcc.target/i386/avx512vl-vpermt2q-1.c
new file mode 100644
index 00000000000..ba76a81324d
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/avx512vl-vpermt2q-1.c
@@ -0,0 +1,27 @@
+/* { dg-do compile } */
+/* { dg-options "-mavx512vl -O2" } */
+/* { dg-final { scan-assembler-times "vpermt2q\[ \\t\]+\[^\n\]*%ymm\[0-9\]" 3 } } */
+/* { dg-final { scan-assembler-times "vpermt2q\[ \\t\]+\[^\n\]*%xmm\[0-9\]" 3 } } */
+/* { dg-final { scan-assembler-times "vpermt2q\[ \\t\]+\[^\n\]*%ymm\[0-9\]\{%k\[1-7\]\}\[^\{\]" 1 } } */
+/* { dg-final { scan-assembler-times "vpermt2q\[ \\t\]+\[^\n\]*%xmm\[0-9\]\{%k\[1-7\]\}\[^\{\]" 1 } } */
+/* { dg-final { scan-assembler-times "vpermt2q\[ \\t\]+\[^\n\]*%ymm\[0-9\]\{%k\[1-7\]\}\{z\}" 1 } } */
+/* { dg-final { scan-assembler-times "vpermt2q\[ \\t\]+\[^\n\]*%xmm\[0-9\]\{%k\[1-7\]\}\{z\}" 1 } } */
+
+#include <immintrin.h>
+
+volatile __m256i x1;
+volatile __m128i x2;
+volatile __m256i y;
+volatile __m128i z;
+volatile __mmask8 m;
+
+void extern
+avx512vl_test (void)
+{
+ x1 = _mm256_permutex2var_epi64 (x1, y, x1);
+ x1 = _mm256_mask_permutex2var_epi64 (x1, m, y, x1);
+ x1 = _mm256_maskz_permutex2var_epi64 (m, x1, y, x1);
+ x2 = _mm_permutex2var_epi64 (x2, z, x2);
+ x2 = _mm_mask_permutex2var_epi64 (x2, m, z, x2);
+ x2 = _mm_maskz_permutex2var_epi64 (m, x2, z, x2);
+}
diff --git a/gcc/testsuite/gcc.target/i386/avx512vl-vpermt2q-2.c b/gcc/testsuite/gcc.target/i386/avx512vl-vpermt2q-2.c
new file mode 100644
index 00000000000..998b1752d04
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/avx512vl-vpermt2q-2.c
@@ -0,0 +1,14 @@
+/* { dg-do run } */
+/* { dg-options "-O2 -mavx512vl -DAVX512VL" } */
+/* { dg-require-effective-target avx512vl } */
+
+#define AVX512F_LEN 256
+#define AVX512F_LEN_HALF 128
+#include "avx512f-vpermt2q-2.c"
+
+#undef AVX512F_LEN
+#undef AVX512F_LEN_HALF
+
+#define AVX512F_LEN 128
+#define AVX512F_LEN_HALF 128
+#include "avx512f-vpermt2q-2.c"
diff --git a/gcc/testsuite/gcc.target/i386/avx512vl-vpermt2w-2.c b/gcc/testsuite/gcc.target/i386/avx512vl-vpermt2w-2.c
new file mode 100644
index 00000000000..bf33b6ab572
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/avx512vl-vpermt2w-2.c
@@ -0,0 +1,14 @@
+/* { dg-do run } */
+/* { dg-options "-O2 -mavx512bw -mavx512vl -DAVX512VL" } */
+/* { dg-require-effective-target avx512vl } */
+
+#define AVX512F_LEN 256
+#define AVX512F_LEN_HALF 128
+#include "avx512bw-vpermt2w-2.c"
+
+#undef AVX512F_LEN
+#undef AVX512F_LEN_HALF
+
+#define AVX512F_LEN 128
+#define AVX512F_LEN_HALF 128
+#include "avx512bw-vpermt2w-2.c"
diff --git a/gcc/testsuite/gcc.target/i386/avx512vl-vpermw-2.c b/gcc/testsuite/gcc.target/i386/avx512vl-vpermw-2.c
new file mode 100644
index 00000000000..2f01064ecfc
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/avx512vl-vpermw-2.c
@@ -0,0 +1,14 @@
+/* { dg-do run } */
+/* { dg-options "-O2 -mavx512bw -mavx512vl -DAVX512VL" } */
+/* { dg-require-effective-target avx512vl } */
+
+#define AVX512F_LEN 256
+#define AVX512F_LEN_HALF 128
+#include "avx512bw-vpermw-2.c"
+
+#undef AVX512F_LEN
+#undef AVX512F_LEN_HALF
+
+#define AVX512F_LEN 128
+#define AVX512F_LEN_HALF 128
+#include "avx512bw-vpermw-2.c"
diff --git a/gcc/testsuite/gcc.target/i386/avx512vl-vpexpandd-1.c b/gcc/testsuite/gcc.target/i386/avx512vl-vpexpandd-1.c
new file mode 100644
index 00000000000..aa68a5c400f
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/avx512vl-vpexpandd-1.c
@@ -0,0 +1,29 @@
+/* { dg-do compile } */
+/* { dg-options "-mavx512vl -O2" } */
+/* { dg-final { scan-assembler-times "vpexpandd\[ \\t\]+\[^\n\]*%ymm\[0-9\]\{%k\[1-7\]\}\[^\{\]" 2 } } */
+/* { dg-final { scan-assembler-times "vpexpandd\[ \\t\]+\[^\n\]*%xmm\[0-9\]\{%k\[1-7\]\}\[^\{\]" 2 } } */
+/* { dg-final { scan-assembler-times "vpexpandd\[ \\t\]+\[^\n\]*%ymm\[0-9\]\{%k\[1-7\]\}\{z\}" 2 } } */
+/* { dg-final { scan-assembler-times "vpexpandd\[ \\t\]+\[^\n\]*%xmm\[0-9\]\{%k\[1-7\]\}\{z\}" 2 } } */
+
+#include <immintrin.h>
+
+int *p;
+volatile __m256i x1;
+volatile __m128i x2;
+volatile __mmask8 m;
+
+void extern
+avx512vl_test (void)
+{
+ x1 = _mm256_mask_expand_epi32 (x1, m, x1);
+ x2 = _mm_mask_expand_epi32 (x2, m, x2);
+
+ x1 = _mm256_maskz_expand_epi32 (m, x1);
+ x2 = _mm_maskz_expand_epi32 (m, x2);
+
+ x1 = _mm256_mask_expandloadu_epi32 (x1, m, p);
+ x2 = _mm_mask_expandloadu_epi32 (x2, m, p);
+
+ x1 = _mm256_maskz_expandloadu_epi32 (m, p);
+ x2 = _mm_maskz_expandloadu_epi32 (m, p);
+}
diff --git a/gcc/testsuite/gcc.target/i386/avx512vl-vpexpandd-2.c b/gcc/testsuite/gcc.target/i386/avx512vl-vpexpandd-2.c
new file mode 100644
index 00000000000..c353d5a1311
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/avx512vl-vpexpandd-2.c
@@ -0,0 +1,14 @@
+/* { dg-do run } */
+/* { dg-options "-O2 -mavx512vl -DAVX512VL" } */
+/* { dg-require-effective-target avx512vl } */
+
+#define AVX512F_LEN 256
+#define AVX512F_LEN_HALF 128
+#include "avx512f-vpexpandd-2.c"
+
+#undef AVX512F_LEN
+#undef AVX512F_LEN_HALF
+
+#define AVX512F_LEN 128
+#define AVX512F_LEN_HALF 128
+#include "avx512f-vpexpandd-2.c"
diff --git a/gcc/testsuite/gcc.target/i386/avx512vl-vpexpandq-1.c b/gcc/testsuite/gcc.target/i386/avx512vl-vpexpandq-1.c
new file mode 100644
index 00000000000..418e1b41e64
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/avx512vl-vpexpandq-1.c
@@ -0,0 +1,29 @@
+/* { dg-do compile } */
+/* { dg-options "-mavx512vl -O2" } */
+/* { dg-final { scan-assembler-times "vpexpandq\[ \\t\]+\[^\n\]*%ymm\[0-9\]\{%k\[1-7\]\}\[^\{\]" 2 } } */
+/* { dg-final { scan-assembler-times "vpexpandq\[ \\t\]+\[^\n\]*%xmm\[0-9\]\{%k\[1-7\]\}\[^\{\]" 2 } } */
+/* { dg-final { scan-assembler-times "vpexpandq\[ \\t\]+\[^\n\]*%ymm\[0-9\]\{%k\[1-7\]\}\{z\}" 2 } } */
+/* { dg-final { scan-assembler-times "vpexpandq\[ \\t\]+\[^\n\]*%xmm\[0-9\]\{%k\[1-7\]\}\{z\}" 2 } } */
+
+#include <immintrin.h>
+
+long long *p;
+volatile __m256i x1;
+volatile __m128i x2;
+volatile __mmask8 m;
+
+void extern
+avx512vl_test (void)
+{
+ x1 = _mm256_mask_expand_epi64 (x1, m, x1);
+ x2 = _mm_mask_expand_epi64 (x2, m, x2);
+
+ x1 = _mm256_maskz_expand_epi64 (m, x1);
+ x2 = _mm_maskz_expand_epi64 (m, x2);
+
+ x1 = _mm256_mask_expandloadu_epi64 (x1, m, p);
+ x2 = _mm_mask_expandloadu_epi64 (x2, m, p);
+
+ x1 = _mm256_maskz_expandloadu_epi64 (m, p);
+ x2 = _mm_maskz_expandloadu_epi64 (m, p);
+}
diff --git a/gcc/testsuite/gcc.target/i386/avx512vl-vpexpandq-2.c b/gcc/testsuite/gcc.target/i386/avx512vl-vpexpandq-2.c
new file mode 100644
index 00000000000..2b571b7faa2
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/avx512vl-vpexpandq-2.c
@@ -0,0 +1,14 @@
+/* { dg-do run } */
+/* { dg-options "-O2 -mavx512vl -DAVX512VL" } */
+/* { dg-require-effective-target avx512vl } */
+
+#define AVX512F_LEN 256
+#define AVX512F_LEN_HALF 128
+#include "avx512f-vpexpandq-2.c"
+
+#undef AVX512F_LEN
+#undef AVX512F_LEN_HALF
+
+#define AVX512F_LEN 128
+#define AVX512F_LEN_HALF 128
+#include "avx512f-vpexpandq-2.c"
diff --git a/gcc/testsuite/gcc.target/i386/avx512vl-vplzcntd-1.c b/gcc/testsuite/gcc.target/i386/avx512vl-vplzcntd-1.c
new file mode 100644
index 00000000000..e93805d459b
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/avx512vl-vplzcntd-1.c
@@ -0,0 +1,26 @@
+/* { dg-do compile } */
+/* { dg-options "-mavx512vl -mavx512cd -O2" } */
+/* { dg-final { scan-assembler-times "vplzcntd\[ \\t\]+\[^\n\]*%ymm\[0-9\]" 3 } } */
+/* { dg-final { scan-assembler-times "vplzcntd\[ \\t\]+\[^\n\]*%ymm\[0-9\]\{%k\[1-7\]\}\{z\}" 1} } */
+/* { dg-final { scan-assembler-times "vplzcntd\[ \\t\]+\[^\n\]*%ymm\[0-9\]\{%k\[1-7\]\}\[^\{\]" 1 } } */
+/* { dg-final { scan-assembler-times "vplzcntd\[ \\t\]+\[^\n\]*%xmm\[0-9\]" 3 } } */
+/* { dg-final { scan-assembler-times "vplzcntd\[ \\t\]+\[^\n\]*%xmm\[0-9\]\{%k\[1-7\]\}\{z\}" 1} } */
+/* { dg-final { scan-assembler-times "vplzcntd\[ \\t\]+\[^\n\]*%xmm\[0-9\]\{%k\[1-7\]\}\[^\{\]" 1 } } */
+
+#include <immintrin.h>
+
+volatile __m256i s;
+volatile __m256i res;
+volatile __m128i s2;
+volatile __m128i res2;
+
+void extern
+avx512vl_test (void)
+{
+ res = _mm256_lzcnt_epi32 (s);
+ res = _mm256_mask_lzcnt_epi32 (res, 2, s);
+ res = _mm256_maskz_lzcnt_epi32 (2, s);
+ res2 = _mm_lzcnt_epi32 (s2);
+ res2 = _mm_mask_lzcnt_epi32 (res2, 2, s2);
+ res2 = _mm_maskz_lzcnt_epi32 (2, s2);
+}
diff --git a/gcc/testsuite/gcc.target/i386/avx512vl-vplzcntq-1.c b/gcc/testsuite/gcc.target/i386/avx512vl-vplzcntq-1.c
new file mode 100644
index 00000000000..ef8042ef3f5
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/avx512vl-vplzcntq-1.c
@@ -0,0 +1,26 @@
+/* { dg-do compile } */
+/* { dg-options "-mavx512vl -mavx512cd -O2" } */
+/* { dg-final { scan-assembler-times "vplzcntq\[ \\t\]+\[^\n\]*%ymm\[0-9\]" 3 } } */
+/* { dg-final { scan-assembler-times "vplzcntq\[ \\t\]+\[^\n\]*%ymm\[0-9\]\{%k\[1-7\]\}\[^\{\]" 1 } } */
+/* { dg-final { scan-assembler-times "vplzcntq\[ \\t\]+\[^\n\]*%ymm\[0-9\]\{%k\[1-7\]\}\{z\}" 1 } } */
+/* { dg-final { scan-assembler-times "vplzcntq\[ \\t\]+\[^\n\]*%xmm\[0-9\]" 3 } } */
+/* { dg-final { scan-assembler-times "vplzcntq\[ \\t\]+\[^\n\]*%xmm\[0-9\]\{%k\[1-7\]\}\[^\{\]" 1 } } */
+/* { dg-final { scan-assembler-times "vplzcntq\[ \\t\]+\[^\n\]*%xmm\[0-9\]\{%k\[1-7\]\}\{z\}" 1 } } */
+
+#include <immintrin.h>
+
+volatile __m256i s;
+volatile __m256i res;
+volatile __m128i s2;
+volatile __m128i res2;
+
+void extern
+avx512vl_test (void)
+{
+ res = _mm256_lzcnt_epi64 (s);
+ res = _mm256_maskz_lzcnt_epi64 (2, s);
+ res = _mm256_mask_lzcnt_epi64 (res, 2, s);
+ res2 = _mm_lzcnt_epi64 (s2);
+ res2 = _mm_maskz_lzcnt_epi64 (2, s2);
+ res2 = _mm_mask_lzcnt_epi64 (res2, 2, s2);
+}
diff --git a/gcc/testsuite/gcc.target/i386/avx512vl-vpmaddubsw-2.c b/gcc/testsuite/gcc.target/i386/avx512vl-vpmaddubsw-2.c
new file mode 100644
index 00000000000..5eb756b1ebf
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/avx512vl-vpmaddubsw-2.c
@@ -0,0 +1,14 @@
+/* { dg-do run } */
+/* { dg-options "-O2 -mavx512bw -mavx512vl -DAVX512VL" } */
+/* { dg-require-effective-target avx512vl } */
+
+#define AVX512F_LEN 256
+#define AVX512F_LEN_HALF 128
+#include "avx512bw-vpmaddubsw-2.c"
+
+#undef AVX512F_LEN
+#undef AVX512F_LEN_HALF
+
+#define AVX512F_LEN 128
+#define AVX512F_LEN_HALF 128
+#include "avx512bw-vpmaddubsw-2.c"
diff --git a/gcc/testsuite/gcc.target/i386/avx512vl-vpmaddwd-2.c b/gcc/testsuite/gcc.target/i386/avx512vl-vpmaddwd-2.c
new file mode 100644
index 00000000000..2e1b6c95518
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/avx512vl-vpmaddwd-2.c
@@ -0,0 +1,14 @@
+/* { dg-do run } */
+/* { dg-options "-O2 -mavx512bw -mavx512vl -DAVX512VL" } */
+/* { dg-require-effective-target avx512vl } */
+
+#define AVX512F_LEN 256
+#define AVX512F_LEN_HALF 128
+#include "avx512bw-vpmaddwd-2.c"
+
+#undef AVX512F_LEN
+#undef AVX512F_LEN_HALF
+
+#define AVX512F_LEN 128
+#define AVX512F_LEN_HALF 128
+#include "avx512bw-vpmaddwd-2.c"
diff --git a/gcc/testsuite/gcc.target/i386/avx512vl-vpmaxsb-2.c b/gcc/testsuite/gcc.target/i386/avx512vl-vpmaxsb-2.c
new file mode 100644
index 00000000000..06c22a21616
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/avx512vl-vpmaxsb-2.c
@@ -0,0 +1,14 @@
+/* { dg-do run } */
+/* { dg-options "-O2 -mavx512bw -mavx512vl -DAVX512VL" } */
+/* { dg-require-effective-target avx512vl } */
+
+#define AVX512F_LEN 256
+#define AVX512F_LEN_HALF 128
+#include "avx512bw-vpmaxsb-2.c"
+
+#undef AVX512F_LEN
+#undef AVX512F_LEN_HALF
+
+#define AVX512F_LEN 128
+#define AVX512F_LEN_HALF 128
+#include "avx512bw-vpmaxsb-2.c"
diff --git a/gcc/testsuite/gcc.target/i386/avx512vl-vpmaxsd-1.c b/gcc/testsuite/gcc.target/i386/avx512vl-vpmaxsd-1.c
new file mode 100644
index 00000000000..a2db4f0db85
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/avx512vl-vpmaxsd-1.c
@@ -0,0 +1,21 @@
+/* { dg-do compile } */
+/* { dg-options "-mavx512vl -O2" } */
+/* { dg-final { scan-assembler-times "vpmaxsd\[ \\t\]+\[^\n\]*%ymm\[0-9\]\{%k\[1-7\]\}\[^\{\]" 1 } } */
+/* { dg-final { scan-assembler-times "vpmaxsd\[ \\t\]+\[^\n\]*%ymm\[0-9\]\{%k\[1-7\]\}\{z\}" 1 } } */
+/* { dg-final { scan-assembler-times "vpmaxsd\[ \\t\]+\[^\n\]*%xmm\[0-9\]\{%k\[1-7\]\}\[^\{\]" 1 } } */
+/* { dg-final { scan-assembler-times "vpmaxsd\[ \\t\]+\[^\n\]*%xmm\[0-9\]\{%k\[1-7\]\}\{z\}" 1 } } */
+
+#include <immintrin.h>
+
+volatile __m256i x;
+volatile __m128i y;
+volatile __mmask8 m;
+
+void extern
+avx512vl_test (void)
+{
+ x = _mm256_mask_max_epi32 (x, m, x, x);
+ x = _mm256_maskz_max_epi32 (m, x, x);
+ y = _mm_mask_max_epi32 (y, m, y, y);
+ y = _mm_maskz_max_epi32 (m, y, y);
+}
diff --git a/gcc/testsuite/gcc.target/i386/avx512vl-vpmaxsd-2.c b/gcc/testsuite/gcc.target/i386/avx512vl-vpmaxsd-2.c
new file mode 100644
index 00000000000..ff1017afbd5
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/avx512vl-vpmaxsd-2.c
@@ -0,0 +1,14 @@
+/* { dg-do run } */
+/* { dg-options "-O2 -mavx512vl -DAVX512VL" } */
+/* { dg-require-effective-target avx512vl } */
+
+#define AVX512F_LEN 256
+#define AVX512F_LEN_HALF 128
+#include "avx512f-vpmaxsd-2.c"
+
+#undef AVX512F_LEN
+#undef AVX512F_LEN_HALF
+
+#define AVX512F_LEN 128
+#define AVX512F_LEN_HALF 128
+#include "avx512f-vpmaxsd-2.c"
diff --git a/gcc/testsuite/gcc.target/i386/avx512vl-vpmaxsq-1.c b/gcc/testsuite/gcc.target/i386/avx512vl-vpmaxsq-1.c
new file mode 100644
index 00000000000..7c1d669d8cc
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/avx512vl-vpmaxsq-1.c
@@ -0,0 +1,25 @@
+/* { dg-do compile } */
+/* { dg-options "-mavx512vl -mavx512bw -O2" } */
+/* { dg-final { scan-assembler-times "vpmaxsq\[ \\t\]+\[^\n\]*%ymm\[0-9\]\[^\{\]" 3 } } */
+/* { dg-final { scan-assembler-times "vpmaxsq\[ \\t\]+\[^\n\]*%ymm\[0-9\]\{%k\[1-7\]\}\[^\{\]" 1 } } */
+/* { dg-final { scan-assembler-times "vpmaxsq\[ \\t\]+\[^\n\]*%ymm\[0-9\]\{%k\[1-7\]\}\{z\}" 1 } } */
+/* { dg-final { scan-assembler-times "vpmaxsq\[ \\t\]+\[^\n\]*%xmm\[0-9\]\[^\{\]" 3 } } */
+/* { dg-final { scan-assembler-times "vpmaxsq\[ \\t\]+\[^\n\]*%xmm\[0-9\]\{%k\[1-7\]\}\[^\{\]" 1 } } */
+/* { dg-final { scan-assembler-times "vpmaxsq\[ \\t\]+\[^\n\]*%xmm\[0-9\]\{%k\[1-7\]\}\{z\}" 1 } } */
+
+#include <immintrin.h>
+
+volatile __m256i x;
+volatile __m128i y;
+volatile __mmask8 m;
+
+void extern
+avx512vl_test (void)
+{
+ x = _mm256_max_epi64 (x, x);
+ x = _mm256_mask_max_epi64 (x, m, x, x);
+ x = _mm256_maskz_max_epi64 (m, x, x);
+ y = _mm_max_epi64 (y, y);
+ y = _mm_mask_max_epi64 (y, m, y, y);
+ y = _mm_maskz_max_epi64 (m, y, y);
+}
diff --git a/gcc/testsuite/gcc.target/i386/avx512vl-vpmaxsq-2.c b/gcc/testsuite/gcc.target/i386/avx512vl-vpmaxsq-2.c
new file mode 100644
index 00000000000..958e8d3190a
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/avx512vl-vpmaxsq-2.c
@@ -0,0 +1,14 @@
+/* { dg-do run } */
+/* { dg-options "-O2 -mavx512vl -DAVX512VL" } */
+/* { dg-require-effective-target avx512vl } */
+
+#define AVX512F_LEN 256
+#define AVX512F_LEN_HALF 128
+#include "avx512f-vpmaxsq-2.c"
+
+#undef AVX512F_LEN
+#undef AVX512F_LEN_HALF
+
+#define AVX512F_LEN 128
+#define AVX512F_LEN_HALF 128
+#include "avx512f-vpmaxsq-2.c"
diff --git a/gcc/testsuite/gcc.target/i386/avx512vl-vpmaxsw-2.c b/gcc/testsuite/gcc.target/i386/avx512vl-vpmaxsw-2.c
new file mode 100644
index 00000000000..bc769aa7add
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/avx512vl-vpmaxsw-2.c
@@ -0,0 +1,14 @@
+/* { dg-do run } */
+/* { dg-options "-O2 -mavx512bw -mavx512vl -DAVX512VL" } */
+/* { dg-require-effective-target avx512vl } */
+
+#define AVX512F_LEN 256
+#define AVX512F_LEN_HALF 128
+#include "avx512bw-vpmaxsw-2.c"
+
+#undef AVX512F_LEN
+#undef AVX512F_LEN_HALF
+
+#define AVX512F_LEN 128
+#define AVX512F_LEN_HALF 128
+#include "avx512bw-vpmaxsw-2.c"
diff --git a/gcc/testsuite/gcc.target/i386/avx512vl-vpmaxub-2.c b/gcc/testsuite/gcc.target/i386/avx512vl-vpmaxub-2.c
new file mode 100644
index 00000000000..90797d900e9
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/avx512vl-vpmaxub-2.c
@@ -0,0 +1,14 @@
+/* { dg-do run } */
+/* { dg-options "-O2 -mavx512bw -mavx512vl -DAVX512VL" } */
+/* { dg-require-effective-target avx512vl } */
+
+#define AVX512F_LEN 256
+#define AVX512F_LEN_HALF 128
+#include "avx512bw-vpmaxub-2.c"
+
+#undef AVX512F_LEN
+#undef AVX512F_LEN_HALF
+
+#define AVX512F_LEN 128
+#define AVX512F_LEN_HALF 128
+#include "avx512bw-vpmaxub-2.c"
diff --git a/gcc/testsuite/gcc.target/i386/avx512vl-vpmaxud-1.c b/gcc/testsuite/gcc.target/i386/avx512vl-vpmaxud-1.c
new file mode 100644
index 00000000000..d2e6d1210a1
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/avx512vl-vpmaxud-1.c
@@ -0,0 +1,21 @@
+/* { dg-do compile } */
+/* { dg-options "-mavx512vl -O2" } */
+/* { dg-final { scan-assembler-times "vpmaxud\[ \\t\]+\[^\n\]*%ymm\[0-9\]\{%k\[1-7\]\}\[^\{\]" 1 } } */
+/* { dg-final { scan-assembler-times "vpmaxud\[ \\t\]+\[^\n\]*%ymm\[0-9\]\{%k\[1-7\]\}\{z\}" 1 } } */
+/* { dg-final { scan-assembler-times "vpmaxud\[ \\t\]+\[^\n\]*%xmm\[0-9\]\{%k\[1-7\]\}\[^\{\]" 1 } } */
+/* { dg-final { scan-assembler-times "vpmaxud\[ \\t\]+\[^\n\]*%xmm\[0-9\]\{%k\[1-7\]\}\{z\}" 1 } } */
+
+#include <immintrin.h>
+
+volatile __m256i x;
+volatile __m128i y;
+volatile __mmask8 m;
+
+void extern
+avx512vl_test (void)
+{
+ x = _mm256_mask_max_epu32 (x, m, x, x);
+ x = _mm256_maskz_max_epu32 (m, x, x);
+ y = _mm_mask_max_epu32 (y, m, y, y);
+ y = _mm_maskz_max_epu32 (m, y, y);
+}
diff --git a/gcc/testsuite/gcc.target/i386/avx512vl-vpmaxud-2.c b/gcc/testsuite/gcc.target/i386/avx512vl-vpmaxud-2.c
new file mode 100644
index 00000000000..f61e911fe60
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/avx512vl-vpmaxud-2.c
@@ -0,0 +1,14 @@
+/* { dg-do run } */
+/* { dg-options "-O2 -mavx512vl -DAVX512VL" } */
+/* { dg-require-effective-target avx512vl } */
+
+#define AVX512F_LEN 256
+#define AVX512F_LEN_HALF 128
+#include "avx512f-vpmaxud-2.c"
+
+#undef AVX512F_LEN
+#undef AVX512F_LEN_HALF
+
+#define AVX512F_LEN 128
+#define AVX512F_LEN_HALF 128
+#include "avx512f-vpmaxud-2.c"
diff --git a/gcc/testsuite/gcc.target/i386/avx512vl-vpmaxuq-1.c b/gcc/testsuite/gcc.target/i386/avx512vl-vpmaxuq-1.c
new file mode 100644
index 00000000000..7ff7768a1af
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/avx512vl-vpmaxuq-1.c
@@ -0,0 +1,25 @@
+/* { dg-do compile } */
+/* { dg-options "-mavx512vl -mavx512bw -O2" } */
+/* { dg-final { scan-assembler-times "vpmaxuq\[ \\t\]+\[^\n\]*%ymm\[0-9\]\[^\{\]" 3 } } */
+/* { dg-final { scan-assembler-times "vpmaxuq\[ \\t\]+\[^\n\]*%ymm\[0-9\]\{%k\[1-7\]\}\[^\{\]" 1 } } */
+/* { dg-final { scan-assembler-times "vpmaxuq\[ \\t\]+\[^\n\]*%ymm\[0-9\]\{%k\[1-7\]\}\{z\}" 1 } } */
+/* { dg-final { scan-assembler-times "vpmaxuq\[ \\t\]+\[^\n\]*%xmm\[0-9\]\[^\{\]" 3 } } */
+/* { dg-final { scan-assembler-times "vpmaxuq\[ \\t\]+\[^\n\]*%xmm\[0-9\]\{%k\[1-7\]\}\[^\{\]" 1 } } */
+/* { dg-final { scan-assembler-times "vpmaxuq\[ \\t\]+\[^\n\]*%xmm\[0-9\]\{%k\[1-7\]\}\{z\}" 1 } } */
+
+#include <immintrin.h>
+
+volatile __m256i x;
+volatile __m128i y;
+volatile __mmask8 m;
+
+void extern
+avx512vl_test (void)
+{
+ x = _mm256_max_epu64 (x, x);
+ x = _mm256_mask_max_epu64 (x, m, x, x);
+ x = _mm256_maskz_max_epu64 (m, x, x);
+ y = _mm_max_epu64 (y, y);
+ y = _mm_mask_max_epu64 (y, m, y, y);
+ y = _mm_maskz_max_epu64 (m, y, y);
+}
diff --git a/gcc/testsuite/gcc.target/i386/avx512vl-vpmaxuq-2.c b/gcc/testsuite/gcc.target/i386/avx512vl-vpmaxuq-2.c
new file mode 100644
index 00000000000..bb985567224
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/avx512vl-vpmaxuq-2.c
@@ -0,0 +1,14 @@
+/* { dg-do run } */
+/* { dg-options "-O2 -mavx512vl -DAVX512VL" } */
+/* { dg-require-effective-target avx512vl } */
+
+#define AVX512F_LEN 256
+#define AVX512F_LEN_HALF 128
+#include "avx512f-vpmaxuq-2.c"
+
+#undef AVX512F_LEN
+#undef AVX512F_LEN_HALF
+
+#define AVX512F_LEN 128
+#define AVX512F_LEN_HALF 128
+#include "avx512f-vpmaxuq-2.c"
diff --git a/gcc/testsuite/gcc.target/i386/avx512vl-vpmaxuw-2.c b/gcc/testsuite/gcc.target/i386/avx512vl-vpmaxuw-2.c
new file mode 100644
index 00000000000..65c78fd62c0
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/avx512vl-vpmaxuw-2.c
@@ -0,0 +1,14 @@
+/* { dg-do run } */
+/* { dg-options "-O2 -mavx512bw -mavx512vl -DAVX512VL" } */
+/* { dg-require-effective-target avx512vl } */
+
+#define AVX512F_LEN 256
+#define AVX512F_LEN_HALF 128
+#include "avx512bw-vpmaxuw-2.c"
+
+#undef AVX512F_LEN
+#undef AVX512F_LEN_HALF
+
+#define AVX512F_LEN 128
+#define AVX512F_LEN_HALF 128
+#include "avx512bw-vpmaxuw-2.c"
diff --git a/gcc/testsuite/gcc.target/i386/avx512vl-vpminsb-2.c b/gcc/testsuite/gcc.target/i386/avx512vl-vpminsb-2.c
new file mode 100644
index 00000000000..16282201365
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/avx512vl-vpminsb-2.c
@@ -0,0 +1,14 @@
+/* { dg-do run } */
+/* { dg-options "-O2 -mavx512bw -mavx512vl -DAVX512VL" } */
+/* { dg-require-effective-target avx512vl } */
+
+#define AVX512F_LEN 256
+#define AVX512F_LEN_HALF 128
+#include "avx512bw-vpminsb-2.c"
+
+#undef AVX512F_LEN
+#undef AVX512F_LEN_HALF
+
+#define AVX512F_LEN 128
+#define AVX512F_LEN_HALF 128
+#include "avx512bw-vpminsb-2.c"
diff --git a/gcc/testsuite/gcc.target/i386/avx512vl-vpminsd-1.c b/gcc/testsuite/gcc.target/i386/avx512vl-vpminsd-1.c
new file mode 100644
index 00000000000..8a87dbae59b
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/avx512vl-vpminsd-1.c
@@ -0,0 +1,21 @@
+/* { dg-do compile } */
+/* { dg-options "-mavx512vl -O2" } */
+/* { dg-final { scan-assembler-times "vpminsd\[ \\t\]+\[^\n\]*%ymm\[0-9\]\{%k\[1-7\]\}\[^\{\]" 1 } } */
+/* { dg-final { scan-assembler-times "vpminsd\[ \\t\]+\[^\n\]*%ymm\[0-9\]\{%k\[1-7\]\}\{z\}" 1 } } */
+/* { dg-final { scan-assembler-times "vpminsd\[ \\t\]+\[^\n\]*%xmm\[0-9\]\{%k\[1-7\]\}\[^\{\]" 1 } } */
+/* { dg-final { scan-assembler-times "vpminsd\[ \\t\]+\[^\n\]*%xmm\[0-9\]\{%k\[1-7\]\}\{z\}" 1 } } */
+
+#include <immintrin.h>
+
+volatile __m256i x;
+volatile __m128i y;
+volatile __mmask8 m;
+
+void extern
+avx512vl_test (void)
+{
+ x = _mm256_mask_min_epi32 (x, m, x, x);
+ x = _mm256_maskz_min_epi32 (m, x, x);
+ y = _mm_mask_min_epi32 (y, m, y, y);
+ y = _mm_maskz_min_epi32 (m, y, y);
+}
diff --git a/gcc/testsuite/gcc.target/i386/avx512vl-vpminsd-2.c b/gcc/testsuite/gcc.target/i386/avx512vl-vpminsd-2.c
new file mode 100644
index 00000000000..cc465e8492f
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/avx512vl-vpminsd-2.c
@@ -0,0 +1,14 @@
+/* { dg-do run } */
+/* { dg-options "-O2 -mavx512vl -DAVX512VL" } */
+/* { dg-require-effective-target avx512vl } */
+
+#define AVX512F_LEN 256
+#define AVX512F_LEN_HALF 128
+#include "avx512f-vpminsd-2.c"
+
+#undef AVX512F_LEN
+#undef AVX512F_LEN_HALF
+
+#define AVX512F_LEN 128
+#define AVX512F_LEN_HALF 128
+#include "avx512f-vpminsd-2.c"
diff --git a/gcc/testsuite/gcc.target/i386/avx512vl-vpminsq-1.c b/gcc/testsuite/gcc.target/i386/avx512vl-vpminsq-1.c
new file mode 100644
index 00000000000..7a76c9de1de
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/avx512vl-vpminsq-1.c
@@ -0,0 +1,25 @@
+/* { dg-do compile } */
+/* { dg-options "-mavx512vl -mavx512bw -O2" } */
+/* { dg-final { scan-assembler-times "vpminsq\[ \\t\]+\[^\n\]*%ymm\[0-9\]\[^\{\]" 3 } } */
+/* { dg-final { scan-assembler-times "vpminsq\[ \\t\]+\[^\n\]*%ymm\[0-9\]\{%k\[1-7\]\}\[^\{\]" 1 } } */
+/* { dg-final { scan-assembler-times "vpminsq\[ \\t\]+\[^\n\]*%ymm\[0-9\]\{%k\[1-7\]\}\{z\}" 1 } } */
+/* { dg-final { scan-assembler-times "vpminsq\[ \\t\]+\[^\n\]*%xmm\[0-9\]\[^\{\]" 3 } } */
+/* { dg-final { scan-assembler-times "vpminsq\[ \\t\]+\[^\n\]*%xmm\[0-9\]\{%k\[1-7\]\}\[^\{\]" 1 } } */
+/* { dg-final { scan-assembler-times "vpminsq\[ \\t\]+\[^\n\]*%xmm\[0-9\]\{%k\[1-7\]\}\{z\}" 1 } } */
+
+#include <immintrin.h>
+
+volatile __m256i x;
+volatile __m128i y;
+volatile __mmask8 m;
+
+void extern
+avx512vl_test (void)
+{
+ x = _mm256_min_epi64 (x, x);
+ x = _mm256_mask_min_epi64 (x, m, x, x);
+ x = _mm256_maskz_min_epi64 (m, x, x);
+ y = _mm_min_epi64 (y, y);
+ y = _mm_mask_min_epi64 (y, m, y, y);
+ y = _mm_maskz_min_epi64 (m, y, y);
+}
diff --git a/gcc/testsuite/gcc.target/i386/avx512vl-vpminsq-2.c b/gcc/testsuite/gcc.target/i386/avx512vl-vpminsq-2.c
new file mode 100644
index 00000000000..b0f68a0fe28
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/avx512vl-vpminsq-2.c
@@ -0,0 +1,14 @@
+/* { dg-do run } */
+/* { dg-options "-O2 -mavx512vl -DAVX512VL" } */
+/* { dg-require-effective-target avx512vl } */
+
+#define AVX512F_LEN 256
+#define AVX512F_LEN_HALF 128
+#include "avx512f-vpminsq-2.c"
+
+#undef AVX512F_LEN
+#undef AVX512F_LEN_HALF
+
+#define AVX512F_LEN 128
+#define AVX512F_LEN_HALF 128
+#include "avx512f-vpminsq-2.c"
diff --git a/gcc/testsuite/gcc.target/i386/avx512vl-vpminsw-2.c b/gcc/testsuite/gcc.target/i386/avx512vl-vpminsw-2.c
new file mode 100644
index 00000000000..940a87f9eb3
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/avx512vl-vpminsw-2.c
@@ -0,0 +1,14 @@
+/* { dg-do run } */
+/* { dg-options "-O2 -mavx512bw -mavx512vl -DAVX512VL" } */
+/* { dg-require-effective-target avx512vl } */
+
+#define AVX512F_LEN 256
+#define AVX512F_LEN_HALF 128
+#include "avx512bw-vpminsw-2.c"
+
+#undef AVX512F_LEN
+#undef AVX512F_LEN_HALF
+
+#define AVX512F_LEN 128
+#define AVX512F_LEN_HALF 128
+#include "avx512bw-vpminsw-2.c"
diff --git a/gcc/testsuite/gcc.target/i386/avx512vl-vpminub-2.c b/gcc/testsuite/gcc.target/i386/avx512vl-vpminub-2.c
new file mode 100644
index 00000000000..d6f21456b6d
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/avx512vl-vpminub-2.c
@@ -0,0 +1,14 @@
+/* { dg-do run } */
+/* { dg-options "-O2 -mavx512bw -mavx512vl -DAVX512VL" } */
+/* { dg-require-effective-target avx512vl } */
+
+#define AVX512F_LEN 256
+#define AVX512F_LEN_HALF 128
+#include "avx512bw-vpminub-2.c"
+
+#undef AVX512F_LEN
+#undef AVX512F_LEN_HALF
+
+#define AVX512F_LEN 128
+#define AVX512F_LEN_HALF 128
+#include "avx512bw-vpminub-2.c"
diff --git a/gcc/testsuite/gcc.target/i386/avx512vl-vpminud-1.c b/gcc/testsuite/gcc.target/i386/avx512vl-vpminud-1.c
new file mode 100644
index 00000000000..7c55d8713e2
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/avx512vl-vpminud-1.c
@@ -0,0 +1,21 @@
+/* { dg-do compile } */
+/* { dg-options "-mavx512vl -O2" } */
+/* { dg-final { scan-assembler-times "vpminud\[ \\t\]+\[^\n\]*%ymm\[0-9\]\{%k\[1-7\]\}\[^\{\]" 1 } } */
+/* { dg-final { scan-assembler-times "vpminud\[ \\t\]+\[^\n\]*%ymm\[0-9\]\{%k\[1-7\]\}\{z\}" 1 } } */
+/* { dg-final { scan-assembler-times "vpminud\[ \\t\]+\[^\n\]*%xmm\[0-9\]\{%k\[1-7\]\}\[^\{\]" 1 } } */
+/* { dg-final { scan-assembler-times "vpminud\[ \\t\]+\[^\n\]*%xmm\[0-9\]\{%k\[1-7\]\}\{z\}" 1 } } */
+
+#include <immintrin.h>
+
+volatile __m256i x;
+volatile __m128i y;
+volatile __mmask8 m;
+
+void extern
+avx512vl_test (void)
+{
+ x = _mm256_mask_min_epu32 (x, m, x, x);
+ x = _mm256_maskz_min_epu32 (m, x, x);
+ y = _mm_mask_min_epu32 (y, m, y, y);
+ y = _mm_maskz_min_epu32 (m, y, y);
+}
diff --git a/gcc/testsuite/gcc.target/i386/avx512vl-vpminud-2.c b/gcc/testsuite/gcc.target/i386/avx512vl-vpminud-2.c
new file mode 100644
index 00000000000..34a17f8e08a
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/avx512vl-vpminud-2.c
@@ -0,0 +1,14 @@
+/* { dg-do run } */
+/* { dg-options "-O2 -mavx512vl -DAVX512VL" } */
+/* { dg-require-effective-target avx512vl } */
+
+#define AVX512F_LEN 256
+#define AVX512F_LEN_HALF 128
+#include "avx512f-vpminud-2.c"
+
+#undef AVX512F_LEN
+#undef AVX512F_LEN_HALF
+
+#define AVX512F_LEN 128
+#define AVX512F_LEN_HALF 128
+#include "avx512f-vpminud-2.c"
diff --git a/gcc/testsuite/gcc.target/i386/avx512vl-vpminuq-1.c b/gcc/testsuite/gcc.target/i386/avx512vl-vpminuq-1.c
new file mode 100644
index 00000000000..d2c997a04b0
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/avx512vl-vpminuq-1.c
@@ -0,0 +1,25 @@
+/* { dg-do compile } */
+/* { dg-options "-mavx512vl -mavx512bw -O2" } */
+/* { dg-final { scan-assembler-times "vpminuq\[ \\t\]+\[^\n\]*%ymm\[0-9\]\[^\{\]" 3 } } */
+/* { dg-final { scan-assembler-times "vpminuq\[ \\t\]+\[^\n\]*%ymm\[0-9\]\{%k\[1-7\]\}\[^\{\]" 1 } } */
+/* { dg-final { scan-assembler-times "vpminuq\[ \\t\]+\[^\n\]*%ymm\[0-9\]\{%k\[1-7\]\}\{z\}" 1 } } */
+/* { dg-final { scan-assembler-times "vpminuq\[ \\t\]+\[^\n\]*%xmm\[0-9\]\[^\{\]" 3 } } */
+/* { dg-final { scan-assembler-times "vpminuq\[ \\t\]+\[^\n\]*%xmm\[0-9\]\{%k\[1-7\]\}\[^\{\]" 1 } } */
+/* { dg-final { scan-assembler-times "vpminuq\[ \\t\]+\[^\n\]*%xmm\[0-9\]\{%k\[1-7\]\}\{z\}" 1 } } */
+
+#include <immintrin.h>
+
+volatile __m256i x;
+volatile __m128i y;
+volatile __mmask8 m;
+
+void extern
+avx512vl_test (void)
+{
+ x = _mm256_min_epu64 (x, x);
+ x = _mm256_mask_min_epu64 (x, m, x, x);
+ x = _mm256_maskz_min_epu64 (m, x, x);
+ y = _mm_min_epu64 (y, y);
+ y = _mm_mask_min_epu64 (y, m, y, y);
+ y = _mm_maskz_min_epu64 (m, y, y);
+}
diff --git a/gcc/testsuite/gcc.target/i386/avx512vl-vpminuq-2.c b/gcc/testsuite/gcc.target/i386/avx512vl-vpminuq-2.c
new file mode 100644
index 00000000000..67f1e4afa8d
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/avx512vl-vpminuq-2.c
@@ -0,0 +1,14 @@
+/* { dg-do run } */
+/* { dg-options "-O2 -mavx512vl -DAVX512VL" } */
+/* { dg-require-effective-target avx512vl } */
+
+#define AVX512F_LEN 256
+#define AVX512F_LEN_HALF 128
+#include "avx512f-vpminuq-2.c"
+
+#undef AVX512F_LEN
+#undef AVX512F_LEN_HALF
+
+#define AVX512F_LEN 128
+#define AVX512F_LEN_HALF 128
+#include "avx512f-vpminuq-2.c"
diff --git a/gcc/testsuite/gcc.target/i386/avx512vl-vpminuw-2.c b/gcc/testsuite/gcc.target/i386/avx512vl-vpminuw-2.c
new file mode 100644
index 00000000000..71f0accb84c
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/avx512vl-vpminuw-2.c
@@ -0,0 +1,14 @@
+/* { dg-do run } */
+/* { dg-options "-O2 -mavx512bw -mavx512vl -DAVX512VL" } */
+/* { dg-require-effective-target avx512vl } */
+
+#define AVX512F_LEN 256
+#define AVX512F_LEN_HALF 128
+#include "avx512bw-vpminuw-2.c"
+
+#undef AVX512F_LEN
+#undef AVX512F_LEN_HALF
+
+#define AVX512F_LEN 128
+#define AVX512F_LEN_HALF 128
+#include "avx512bw-vpminuw-2.c"
diff --git a/gcc/testsuite/gcc.target/i386/avx512vl-vpmovb2m-2.c b/gcc/testsuite/gcc.target/i386/avx512vl-vpmovb2m-2.c
new file mode 100644
index 00000000000..d178049552f
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/avx512vl-vpmovb2m-2.c
@@ -0,0 +1,14 @@
+/* { dg-do run } */
+/* { dg-options "-O2 -mavx512bw -mavx512vl -DAVX512VL" } */
+/* { dg-require-effective-target avx512vl } */
+
+#define AVX512F_LEN 256
+#define AVX512F_LEN_HALF 128
+#include "avx512bw-vpmovb2m-2.c"
+
+#undef AVX512F_LEN
+#undef AVX512F_LEN_HALF
+
+#define AVX512F_LEN 128
+#define AVX512F_LEN_HALF 128
+#include "avx512bw-vpmovb2m-2.c"
diff --git a/gcc/testsuite/gcc.target/i386/avx512vl-vpmovd2m-2.c b/gcc/testsuite/gcc.target/i386/avx512vl-vpmovd2m-2.c
new file mode 100644
index 00000000000..bd76c212336
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/avx512vl-vpmovd2m-2.c
@@ -0,0 +1,14 @@
+/* { dg-do run } */
+/* { dg-options "-O2 -mavx512dq -mavx512vl -DAVX512VL" } */
+/* { dg-require-effective-target avx512vl } */
+
+#define AVX512F_LEN 256
+#define AVX512F_LEN_HALF 128
+#include "avx512dq-vpmovd2m-2.c"
+
+#undef AVX512F_LEN
+#undef AVX512F_LEN_HALF
+
+#define AVX512F_LEN 128
+#define AVX512F_LEN_HALF 128
+#include "avx512dq-vpmovd2m-2.c"
diff --git a/gcc/testsuite/gcc.target/i386/avx512vl-vpmovdb-1.c b/gcc/testsuite/gcc.target/i386/avx512vl-vpmovdb-1.c
new file mode 100644
index 00000000000..5ea9418ab7b
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/avx512vl-vpmovdb-1.c
@@ -0,0 +1,25 @@
+/* { dg-do compile } */
+/* { dg-options "-mavx512vl -O2" } */
+/* { dg-final { scan-assembler-times "vpmovdb\[ \\t\]+\[^\n\]*%xmm\[0-9\]\[^\n\]*%xmm\[0-9\]\[\\n\]" 1 } } */
+/* { dg-final { scan-assembler-times "vpmovdb\[ \\t\]+\[^\n\]*%xmm\[0-9\]\[^\n\]*%xmm\[0-9\]\{%k\[1-7\]\}\[^\{\]" 1 } } */
+/* { dg-final { scan-assembler-times "vpmovdb\[ \\t\]+\[^\n\]*%xmm\[0-9\]\[^\n\]*%xmm\[0-9\]\{%k\[1-7\]\}{z}" 1 } } */
+/* { dg-final { scan-assembler-times "vpmovdb\[ \\t\]+\[^\n\]*%ymm\[0-9\]\[^\n\]*%xmm\[0-9\]\[\\n\]" 1 } } */
+/* { dg-final { scan-assembler-times "vpmovdb\[ \\t\]+\[^\n\]*%ymm\[0-9\]\[^\n\]*%xmm\[0-9\]\{%k\[1-7\]\}\[^\{\]" 1 } } */
+/* { dg-final { scan-assembler-times "vpmovdb\[ \\t\]+\[^\n\]*%ymm\[0-9\]\[^\n\]*%xmm\[0-9\]\{%k\[1-7\]\}{z}" 1 } } */
+
+#include <immintrin.h>
+
+volatile __m128i x, z;
+volatile __m256i y;
+volatile __mmask8 m;
+
+void extern
+avx512vl_test (void)
+{
+ z = _mm_cvtepi32_epi8 (x);
+ z = _mm_mask_cvtepi32_epi8 (z, m, x);
+ z = _mm_maskz_cvtepi32_epi8 (m, x);
+ z = _mm256_cvtepi32_epi8 (y);
+ z = _mm256_mask_cvtepi32_epi8 (z, m, y);
+ z = _mm256_maskz_cvtepi32_epi8 (m, y);
+}
diff --git a/gcc/testsuite/gcc.target/i386/avx512vl-vpmovdb-2.c b/gcc/testsuite/gcc.target/i386/avx512vl-vpmovdb-2.c
new file mode 100644
index 00000000000..1d2f3adc2a4
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/avx512vl-vpmovdb-2.c
@@ -0,0 +1,14 @@
+/* { dg-do run } */
+/* { dg-options "-O2 -mavx512vl -DAVX512VL" } */
+/* { dg-require-effective-target avx512vl } */
+
+#define AVX512F_LEN 256
+#define AVX512F_LEN_HALF 128
+#include "avx512f-vpmovdb-2.c"
+
+#undef AVX512F_LEN
+#undef AVX512F_LEN_HALF
+
+#define AVX512F_LEN 128
+#define AVX512F_LEN_HALF 128
+#include "avx512f-vpmovdb-2.c"
diff --git a/gcc/testsuite/gcc.target/i386/avx512vl-vpmovdw-1.c b/gcc/testsuite/gcc.target/i386/avx512vl-vpmovdw-1.c
new file mode 100644
index 00000000000..cba590ddcce
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/avx512vl-vpmovdw-1.c
@@ -0,0 +1,25 @@
+/* { dg-do compile } */
+/* { dg-options "-mavx512vl -O2" } */
+/* { dg-final { scan-assembler-times "vpmovdw\[ \\t\]+\[^\n\]*%xmm\[0-9\]\[^\n\]*%xmm\[0-9\]\[\\n\]" 1 } } */
+/* { dg-final { scan-assembler-times "vpmovdw\[ \\t\]+\[^\n\]*%xmm\[0-9\]\[^\n\]*%xmm\[0-9\]\{%k\[1-7\]\}\[^\{\]" 1 } } */
+/* { dg-final { scan-assembler-times "vpmovdw\[ \\t\]+\[^\n\]*%xmm\[0-9\]\[^\n\]*%xmm\[0-9\]\{%k\[1-7\]\}{z}" 1 } } */
+/* { dg-final { scan-assembler-times "vpmovdw\[ \\t\]+\[^\n\]*%ymm\[0-9\]\[^\n\]*%xmm\[0-9\]\[\\n\]" 1 } } */
+/* { dg-final { scan-assembler-times "vpmovdw\[ \\t\]+\[^\n\]*%ymm\[0-9\]\[^\n\]*%xmm\[0-9\]\{%k\[1-7\]\}\[^\{\]" 1 } } */
+/* { dg-final { scan-assembler-times "vpmovdw\[ \\t\]+\[^\n\]*%ymm\[0-9\]\[^\n\]*%xmm\[0-9\]\{%k\[1-7\]\}{z}" 1 } } */
+
+#include <immintrin.h>
+
+volatile __m128i x, z;
+volatile __m256i y;
+volatile __mmask8 m;
+
+void extern
+avx512vl_test (void)
+{
+ z = _mm_cvtepi32_epi16 (x);
+ z = _mm_mask_cvtepi32_epi16 (z, m, x);
+ z = _mm_maskz_cvtepi32_epi16 (m, x);
+ z = _mm256_cvtepi32_epi16 (y);
+ z = _mm256_mask_cvtepi32_epi16 (z, m, y);
+ z = _mm256_maskz_cvtepi32_epi16 (m, y);
+}
diff --git a/gcc/testsuite/gcc.target/i386/avx512vl-vpmovdw-2.c b/gcc/testsuite/gcc.target/i386/avx512vl-vpmovdw-2.c
new file mode 100644
index 00000000000..7ec76377f54
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/avx512vl-vpmovdw-2.c
@@ -0,0 +1,14 @@
+/* { dg-do run } */
+/* { dg-options "-O2 -mavx512vl -DAVX512VL" } */
+/* { dg-require-effective-target avx512vl } */
+
+#define AVX512F_LEN 256
+#define AVX512F_LEN_HALF 128
+#include "avx512f-vpmovdw-2.c"
+
+#undef AVX512F_LEN
+#undef AVX512F_LEN_HALF
+
+#define AVX512F_LEN 128
+#define AVX512F_LEN_HALF 128
+#include "avx512f-vpmovdw-2.c"
diff --git a/gcc/testsuite/gcc.target/i386/avx512vl-vpmovm2b-2.c b/gcc/testsuite/gcc.target/i386/avx512vl-vpmovm2b-2.c
new file mode 100644
index 00000000000..8783abd0feb
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/avx512vl-vpmovm2b-2.c
@@ -0,0 +1,14 @@
+/* { dg-do run } */
+/* { dg-options "-O2 -mavx512bw -mavx512vl -DAVX512VL" } */
+/* { dg-require-effective-target avx512vl } */
+
+#define AVX512F_LEN 256
+#define AVX512F_LEN_HALF 128
+#include "avx512bw-vpmovm2b-2.c"
+
+#undef AVX512F_LEN
+#undef AVX512F_LEN_HALF
+
+#define AVX512F_LEN 128
+#define AVX512F_LEN_HALF 128
+#include "avx512bw-vpmovm2b-2.c"
diff --git a/gcc/testsuite/gcc.target/i386/avx512vl-vpmovm2d-2.c b/gcc/testsuite/gcc.target/i386/avx512vl-vpmovm2d-2.c
new file mode 100644
index 00000000000..639d0cc4d01
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/avx512vl-vpmovm2d-2.c
@@ -0,0 +1,14 @@
+/* { dg-do run } */
+/* { dg-options "-O2 -mavx512dq -mavx512vl -DAVX512VL" } */
+/* { dg-require-effective-target avx512vl } */
+
+#define AVX512F_LEN 256
+#define AVX512F_LEN_HALF 128
+#include "avx512dq-vpmovm2d-2.c"
+
+#undef AVX512F_LEN
+#undef AVX512F_LEN_HALF
+
+#define AVX512F_LEN 128
+#define AVX512F_LEN_HALF 128
+#include "avx512dq-vpmovm2d-2.c"
diff --git a/gcc/testsuite/gcc.target/i386/avx512vl-vpmovm2q-2.c b/gcc/testsuite/gcc.target/i386/avx512vl-vpmovm2q-2.c
new file mode 100644
index 00000000000..cc316a8d0ea
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/avx512vl-vpmovm2q-2.c
@@ -0,0 +1,14 @@
+/* { dg-do run } */
+/* { dg-options "-O2 -mavx512dq -mavx512vl -DAVX512VL" } */
+/* { dg-require-effective-target avx512vl } */
+
+#define AVX512F_LEN 256
+#define AVX512F_LEN_HALF 128
+#include "avx512dq-vpmovm2q-2.c"
+
+#undef AVX512F_LEN
+#undef AVX512F_LEN_HALF
+
+#define AVX512F_LEN 128
+#define AVX512F_LEN_HALF 128
+#include "avx512dq-vpmovm2q-2.c"
diff --git a/gcc/testsuite/gcc.target/i386/avx512vl-vpmovm2w-2.c b/gcc/testsuite/gcc.target/i386/avx512vl-vpmovm2w-2.c
new file mode 100644
index 00000000000..3b8eaa7b975
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/avx512vl-vpmovm2w-2.c
@@ -0,0 +1,14 @@
+/* { dg-do run } */
+/* { dg-options "-O2 -mavx512bw -mavx512vl -DAVX512VL" } */
+/* { dg-require-effective-target avx512vl } */
+
+#define AVX512F_LEN 256
+#define AVX512F_LEN_HALF 128
+#include "avx512bw-vpmovm2w-2.c"
+
+#undef AVX512F_LEN
+#undef AVX512F_LEN_HALF
+
+#define AVX512F_LEN 128
+#define AVX512F_LEN_HALF 128
+#include "avx512bw-vpmovm2w-2.c"
diff --git a/gcc/testsuite/gcc.target/i386/avx512vl-vpmovq2m-2.c b/gcc/testsuite/gcc.target/i386/avx512vl-vpmovq2m-2.c
new file mode 100644
index 00000000000..dfcd7996385
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/avx512vl-vpmovq2m-2.c
@@ -0,0 +1,14 @@
+/* { dg-do run } */
+/* { dg-options "-O2 -mavx512dq -mavx512vl -DAVX512VL" } */
+/* { dg-require-effective-target avx512vl } */
+
+#define AVX512F_LEN 256
+#define AVX512F_LEN_HALF 128
+#include "avx512dq-vpmovq2m-2.c"
+
+#undef AVX512F_LEN
+#undef AVX512F_LEN_HALF
+
+#define AVX512F_LEN 128
+#define AVX512F_LEN_HALF 128
+#include "avx512dq-vpmovq2m-2.c"
diff --git a/gcc/testsuite/gcc.target/i386/avx512vl-vpmovqb-1.c b/gcc/testsuite/gcc.target/i386/avx512vl-vpmovqb-1.c
new file mode 100644
index 00000000000..8cdca2dfa41
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/avx512vl-vpmovqb-1.c
@@ -0,0 +1,25 @@
+/* { dg-do compile } */
+/* { dg-options "-mavx512vl -O2" } */
+/* { dg-final { scan-assembler-times "vpmovqb\[ \\t\]+\[^\n\]*%xmm\[0-9\]\[^\n\]*%xmm\[0-9\]\[\\n\]" 1 } } */
+/* { dg-final { scan-assembler-times "vpmovqb\[ \\t\]+\[^\n\]*%xmm\[0-9\]\[^\n\]*%xmm\[0-9\]\{%k\[1-7\]\}\[^\{\]" 1 } } */
+/* { dg-final { scan-assembler-times "vpmovqb\[ \\t\]+\[^\n\]*%xmm\[0-9\]\[^\n\]*%xmm\[0-9\]\{%k\[1-7\]\}{z}" 1 } } */
+/* { dg-final { scan-assembler-times "vpmovqb\[ \\t\]+\[^\n\]*%ymm\[0-9\]\[^\n\]*%xmm\[0-9\]\[\\n\]" 1 } } */
+/* { dg-final { scan-assembler-times "vpmovqb\[ \\t\]+\[^\n\]*%ymm\[0-9\]\[^\n\]*%xmm\[0-9\]\{%k\[1-7\]\}\[^\{\]" 1 } } */
+/* { dg-final { scan-assembler-times "vpmovqb\[ \\t\]+\[^\n\]*%ymm\[0-9\]\[^\n\]*%xmm\[0-9\]\{%k\[1-7\]\}{z}" 1 } } */
+
+#include <immintrin.h>
+
+volatile __m128i x, z;
+volatile __m256i y;
+volatile __mmask8 m;
+
+void extern
+avx512vl_test (void)
+{
+ z = _mm_cvtepi64_epi8 (x);
+ z = _mm_mask_cvtepi64_epi8 (z, m, x);
+ z = _mm_maskz_cvtepi64_epi8 (m, x);
+ z = _mm256_cvtepi64_epi8 (y);
+ z = _mm256_mask_cvtepi64_epi8 (z, m, y);
+ z = _mm256_maskz_cvtepi64_epi8 (m, y);
+}
diff --git a/gcc/testsuite/gcc.target/i386/avx512vl-vpmovqb-2.c b/gcc/testsuite/gcc.target/i386/avx512vl-vpmovqb-2.c
new file mode 100644
index 00000000000..893d30bbae1
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/avx512vl-vpmovqb-2.c
@@ -0,0 +1,14 @@
+/* { dg-do run } */
+/* { dg-options "-O2 -mavx512vl -DAVX512VL" } */
+/* { dg-require-effective-target avx512vl } */
+
+#define AVX512F_LEN 256
+#define AVX512F_LEN_HALF 128
+#include "avx512f-vpmovqb-2.c"
+
+#undef AVX512F_LEN
+#undef AVX512F_LEN_HALF
+
+#define AVX512F_LEN 128
+#define AVX512F_LEN_HALF 128
+#include "avx512f-vpmovqb-2.c"
diff --git a/gcc/testsuite/gcc.target/i386/avx512vl-vpmovqd-1.c b/gcc/testsuite/gcc.target/i386/avx512vl-vpmovqd-1.c
new file mode 100644
index 00000000000..063d937c415
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/avx512vl-vpmovqd-1.c
@@ -0,0 +1,25 @@
+/* { dg-do compile } */
+/* { dg-options "-mavx512vl -O2" } */
+/* { dg-final { scan-assembler-times "vpmovqd\[ \\t\]+\[^\n\]*%xmm\[0-9\]\[^\n\]*%xmm\[0-9\]\[\\n\]" 1 } } */
+/* { dg-final { scan-assembler-times "vpmovqd\[ \\t\]+\[^\n\]*%xmm\[0-9\]\[^\n\]*%xmm\[0-9\]\{%k\[1-7\]\}\[^\{\]" 1 } } */
+/* { dg-final { scan-assembler-times "vpmovqd\[ \\t\]+\[^\n\]*%xmm\[0-9\]\[^\n\]*%xmm\[0-9\]\{%k\[1-7\]\}{z}" 1 } } */
+/* { dg-final { scan-assembler-times "vpmovqd\[ \\t\]+\[^\n\]*%ymm\[0-9\]\[^\n\]*%xmm\[0-9\]\[\\n\]" 1 } } */
+/* { dg-final { scan-assembler-times "vpmovqd\[ \\t\]+\[^\n\]*%ymm\[0-9\]\[^\n\]*%xmm\[0-9\]\{%k\[1-7\]\}\[^\{\]" 1 } } */
+/* { dg-final { scan-assembler-times "vpmovqd\[ \\t\]+\[^\n\]*%ymm\[0-9\]\[^\n\]*%xmm\[0-9\]\{%k\[1-7\]\}{z}" 1 } } */
+
+#include <immintrin.h>
+
+volatile __m128i x, z;
+volatile __m256i y;
+volatile __mmask8 m;
+
+void extern
+avx512vl_test (void)
+{
+ z = _mm_cvtepi64_epi32 (x);
+ z = _mm_mask_cvtepi64_epi32 (z, m, x);
+ z = _mm_maskz_cvtepi64_epi32 (m, x);
+ z = _mm256_cvtepi64_epi32 (y);
+ z = _mm256_mask_cvtepi64_epi32 (z, m, y);
+ z = _mm256_maskz_cvtepi64_epi32 (m, y);
+}
diff --git a/gcc/testsuite/gcc.target/i386/avx512vl-vpmovqd-2.c b/gcc/testsuite/gcc.target/i386/avx512vl-vpmovqd-2.c
new file mode 100644
index 00000000000..2570919bef8
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/avx512vl-vpmovqd-2.c
@@ -0,0 +1,14 @@
+/* { dg-do run } */
+/* { dg-options "-O2 -mavx512vl -DAVX512VL" } */
+/* { dg-require-effective-target avx512vl } */
+
+#define AVX512F_LEN 256
+#define AVX512F_LEN_HALF 128
+#include "avx512f-vpmovqd-2.c"
+
+#undef AVX512F_LEN
+#undef AVX512F_LEN_HALF
+
+#define AVX512F_LEN 128
+#define AVX512F_LEN_HALF 128
+#include "avx512f-vpmovqd-2.c"
diff --git a/gcc/testsuite/gcc.target/i386/avx512vl-vpmovqw-1.c b/gcc/testsuite/gcc.target/i386/avx512vl-vpmovqw-1.c
new file mode 100644
index 00000000000..e3d6ddfe646
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/avx512vl-vpmovqw-1.c
@@ -0,0 +1,25 @@
+/* { dg-do compile } */
+/* { dg-options "-mavx512vl -O2" } */
+/* { dg-final { scan-assembler-times "vpmovqw\[ \\t\]+\[^\n\]*%xmm\[0-9\]\[^\n\]*%xmm\[0-9\]\[\\n\]" 1 } } */
+/* { dg-final { scan-assembler-times "vpmovqw\[ \\t\]+\[^\n\]*%xmm\[0-9\]\[^\n\]*%xmm\[0-9\]\{%k\[1-7\]\}\[^\{\]" 1 } } */
+/* { dg-final { scan-assembler-times "vpmovqw\[ \\t\]+\[^\n\]*%xmm\[0-9\]\[^\n\]*%xmm\[0-9\]\{%k\[1-7\]\}{z}" 1 } } */
+/* { dg-final { scan-assembler-times "vpmovqw\[ \\t\]+\[^\n\]*%ymm\[0-9\]\[^\n\]*%xmm\[0-9\]\[\\n\]" 1 } } */
+/* { dg-final { scan-assembler-times "vpmovqw\[ \\t\]+\[^\n\]*%ymm\[0-9\]\[^\n\]*%xmm\[0-9\]\{%k\[1-7\]\}\[^\{\]" 1 } } */
+/* { dg-final { scan-assembler-times "vpmovqw\[ \\t\]+\[^\n\]*%ymm\[0-9\]\[^\n\]*%xmm\[0-9\]\{%k\[1-7\]\}{z}" 1 } } */
+
+#include <immintrin.h>
+
+volatile __m128i x, z;
+volatile __m256i y;
+volatile __mmask8 m;
+
+void extern
+avx512vl_test (void)
+{
+ z = _mm_cvtepi64_epi16 (x);
+ z = _mm_mask_cvtepi64_epi16 (z, m, x);
+ z = _mm_maskz_cvtepi64_epi16 (m, x);
+ z = _mm256_cvtepi64_epi16 (y);
+ z = _mm256_mask_cvtepi64_epi16 (z, m, y);
+ z = _mm256_maskz_cvtepi64_epi16 (m, y);
+}
diff --git a/gcc/testsuite/gcc.target/i386/avx512vl-vpmovqw-2.c b/gcc/testsuite/gcc.target/i386/avx512vl-vpmovqw-2.c
new file mode 100644
index 00000000000..0983ac57d57
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/avx512vl-vpmovqw-2.c
@@ -0,0 +1,14 @@
+/* { dg-do run } */
+/* { dg-options "-O2 -mavx512vl -DAVX512VL" } */
+/* { dg-require-effective-target avx512vl } */
+
+#define AVX512F_LEN 256
+#define AVX512F_LEN_HALF 128
+#include "avx512f-vpmovqw-2.c"
+
+#undef AVX512F_LEN
+#undef AVX512F_LEN_HALF
+
+#define AVX512F_LEN 128
+#define AVX512F_LEN_HALF 128
+#include "avx512f-vpmovqw-2.c"
diff --git a/gcc/testsuite/gcc.target/i386/avx512vl-vpmovsdb-1.c b/gcc/testsuite/gcc.target/i386/avx512vl-vpmovsdb-1.c
new file mode 100644
index 00000000000..876f0cfea94
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/avx512vl-vpmovsdb-1.c
@@ -0,0 +1,25 @@
+/* { dg-do compile } */
+/* { dg-options "-mavx512vl -O2" } */
+/* { dg-final { scan-assembler-times "vpmovsdb\[ \\t\]+\[^\n\]*%xmm\[0-9\]\[^\n\]*%xmm\[0-9\]\[\\n\]" 1 } } */
+/* { dg-final { scan-assembler-times "vpmovsdb\[ \\t\]+\[^\n\]*%xmm\[0-9\]\[^\n\]*%xmm\[0-9\]\{%k\[1-7\]\}\[^\{\]" 1 } } */
+/* { dg-final { scan-assembler-times "vpmovsdb\[ \\t\]+\[^\n\]*%xmm\[0-9\]\[^\n\]*%xmm\[0-9\]\{%k\[1-7\]\}{z}" 1 } } */
+/* { dg-final { scan-assembler-times "vpmovsdb\[ \\t\]+\[^\n\]*%ymm\[0-9\]\[^\n\]*%xmm\[0-9\]\[\\n\]" 1 } } */
+/* { dg-final { scan-assembler-times "vpmovsdb\[ \\t\]+\[^\n\]*%ymm\[0-9\]\[^\n\]*%xmm\[0-9\]\{%k\[1-7\]\}\[^\{\]" 1 } } */
+/* { dg-final { scan-assembler-times "vpmovsdb\[ \\t\]+\[^\n\]*%ymm\[0-9\]\[^\n\]*%xmm\[0-9\]\{%k\[1-7\]\}{z}" 1 } } */
+
+#include <immintrin.h>
+
+volatile __m128i x, z;
+volatile __m256i y;
+volatile __mmask8 m;
+
+void extern
+avx512vl_test (void)
+{
+ z = _mm_cvtsepi32_epi8 (x);
+ z = _mm_mask_cvtsepi32_epi8 (z, m, x);
+ z = _mm_maskz_cvtsepi32_epi8 (m, x);
+ z = _mm256_cvtsepi32_epi8 (y);
+ z = _mm256_mask_cvtsepi32_epi8 (z, m, y);
+ z = _mm256_maskz_cvtsepi32_epi8 (m, y);
+}
diff --git a/gcc/testsuite/gcc.target/i386/avx512vl-vpmovsdb-2.c b/gcc/testsuite/gcc.target/i386/avx512vl-vpmovsdb-2.c
new file mode 100644
index 00000000000..2a73d23af61
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/avx512vl-vpmovsdb-2.c
@@ -0,0 +1,14 @@
+/* { dg-do run } */
+/* { dg-options "-O2 -mavx512vl -DAVX512VL" } */
+/* { dg-require-effective-target avx512vl } */
+
+#define AVX512F_LEN 256
+#define AVX512F_LEN_HALF 128
+#include "avx512f-vpmovsdb-2.c"
+
+#undef AVX512F_LEN
+#undef AVX512F_LEN_HALF
+
+#define AVX512F_LEN 128
+#define AVX512F_LEN_HALF 128
+#include "avx512f-vpmovsdb-2.c"
diff --git a/gcc/testsuite/gcc.target/i386/avx512vl-vpmovsdw-1.c b/gcc/testsuite/gcc.target/i386/avx512vl-vpmovsdw-1.c
new file mode 100644
index 00000000000..ae2a45b8fdd
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/avx512vl-vpmovsdw-1.c
@@ -0,0 +1,25 @@
+/* { dg-do compile } */
+/* { dg-options "-mavx512vl -O2" } */
+/* { dg-final { scan-assembler-times "vpmovsdw\[ \\t\]+\[^\n\]*%xmm\[0-9\]\[^\n\]*%xmm\[0-9\]\[\\n\]" 1 } } */
+/* { dg-final { scan-assembler-times "vpmovsdw\[ \\t\]+\[^\n\]*%xmm\[0-9\]\[^\n\]*%xmm\[0-9\]\{%k\[1-7\]\}\[^\{\]" 1 } } */
+/* { dg-final { scan-assembler-times "vpmovsdw\[ \\t\]+\[^\n\]*%xmm\[0-9\]\[^\n\]*%xmm\[0-9\]\{%k\[1-7\]\}{z}" 1 } } */
+/* { dg-final { scan-assembler-times "vpmovsdw\[ \\t\]+\[^\n\]*%ymm\[0-9\]\[^\n\]*%xmm\[0-9\]\[\\n\]" 1 } } */
+/* { dg-final { scan-assembler-times "vpmovsdw\[ \\t\]+\[^\n\]*%ymm\[0-9\]\[^\n\]*%xmm\[0-9\]\{%k\[1-7\]\}\[^\{\]" 1 } } */
+/* { dg-final { scan-assembler-times "vpmovsdw\[ \\t\]+\[^\n\]*%ymm\[0-9\]\[^\n\]*%xmm\[0-9\]\{%k\[1-7\]\}{z}" 1 } } */
+
+#include <immintrin.h>
+
+volatile __m128i x, z;
+volatile __m256i y;
+volatile __mmask8 m;
+
+void extern
+avx512vl_test (void)
+{
+ z = _mm_cvtsepi32_epi16 (x);
+ z = _mm_mask_cvtsepi32_epi16 (z, m, x);
+ z = _mm_maskz_cvtsepi32_epi16 (m, x);
+ z = _mm256_cvtsepi32_epi16 (y);
+ z = _mm256_mask_cvtsepi32_epi16 (z, m, y);
+ z = _mm256_maskz_cvtsepi32_epi16 (m, y);
+}
diff --git a/gcc/testsuite/gcc.target/i386/avx512vl-vpmovsdw-2.c b/gcc/testsuite/gcc.target/i386/avx512vl-vpmovsdw-2.c
new file mode 100644
index 00000000000..e1bd82abd04
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/avx512vl-vpmovsdw-2.c
@@ -0,0 +1,14 @@
+/* { dg-do run } */
+/* { dg-options "-O2 -mavx512vl -DAVX512VL" } */
+/* { dg-require-effective-target avx512vl } */
+
+#define AVX512F_LEN 256
+#define AVX512F_LEN_HALF 128
+#include "avx512f-vpmovsdw-2.c"
+
+#undef AVX512F_LEN
+#undef AVX512F_LEN_HALF
+
+#define AVX512F_LEN 128
+#define AVX512F_LEN_HALF 128
+#include "avx512f-vpmovsdw-2.c"
diff --git a/gcc/testsuite/gcc.target/i386/avx512vl-vpmovsqb-1.c b/gcc/testsuite/gcc.target/i386/avx512vl-vpmovsqb-1.c
new file mode 100644
index 00000000000..68f70fdd3dc
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/avx512vl-vpmovsqb-1.c
@@ -0,0 +1,25 @@
+/* { dg-do compile } */
+/* { dg-options "-mavx512vl -O2" } */
+/* { dg-final { scan-assembler-times "vpmovsqb\[ \\t\]+\[^\n\]*%xmm\[0-9\]\[^\n\]*%xmm\[0-9\]\[\\n\]" 1 } } */
+/* { dg-final { scan-assembler-times "vpmovsqb\[ \\t\]+\[^\n\]*%xmm\[0-9\]\[^\n\]*%xmm\[0-9\]\{%k\[1-7\]\}\[^\{\]" 1 } } */
+/* { dg-final { scan-assembler-times "vpmovsqb\[ \\t\]+\[^\n\]*%xmm\[0-9\]\[^\n\]*%xmm\[0-9\]\{%k\[1-7\]\}{z}" 1 } } */
+/* { dg-final { scan-assembler-times "vpmovsqb\[ \\t\]+\[^\n\]*%ymm\[0-9\]\[^\n\]*%xmm\[0-9\]\[\\n\]" 1 } } */
+/* { dg-final { scan-assembler-times "vpmovsqb\[ \\t\]+\[^\n\]*%ymm\[0-9\]\[^\n\]*%xmm\[0-9\]\{%k\[1-7\]\}\[^\{\]" 1 } } */
+/* { dg-final { scan-assembler-times "vpmovsqb\[ \\t\]+\[^\n\]*%ymm\[0-9\]\[^\n\]*%xmm\[0-9\]\{%k\[1-7\]\}{z}" 1 } } */
+
+#include <immintrin.h>
+
+volatile __m128i x, z;
+volatile __m256i y;
+volatile __mmask8 m;
+
+void extern
+avx512vl_test (void)
+{
+ z = _mm_cvtsepi64_epi8 (x);
+ z = _mm_mask_cvtsepi64_epi8 (z, m, x);
+ z = _mm_maskz_cvtsepi64_epi8 (m, x);
+ z = _mm256_cvtsepi64_epi8 (y);
+ z = _mm256_mask_cvtsepi64_epi8 (z, m, y);
+ z = _mm256_maskz_cvtsepi64_epi8 (m, y);
+}
diff --git a/gcc/testsuite/gcc.target/i386/avx512vl-vpmovsqb-2.c b/gcc/testsuite/gcc.target/i386/avx512vl-vpmovsqb-2.c
new file mode 100644
index 00000000000..0ffc86c8256
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/avx512vl-vpmovsqb-2.c
@@ -0,0 +1,14 @@
+/* { dg-do run } */
+/* { dg-options "-O2 -mavx512vl -DAVX512VL" } */
+/* { dg-require-effective-target avx512vl } */
+
+#define AVX512F_LEN 256
+#define AVX512F_LEN_HALF 128
+#include "avx512f-vpmovsqb-2.c"
+
+#undef AVX512F_LEN
+#undef AVX512F_LEN_HALF
+
+#define AVX512F_LEN 128
+#define AVX512F_LEN_HALF 128
+#include "avx512f-vpmovsqb-2.c"
diff --git a/gcc/testsuite/gcc.target/i386/avx512vl-vpmovsqd-1.c b/gcc/testsuite/gcc.target/i386/avx512vl-vpmovsqd-1.c
new file mode 100644
index 00000000000..6ca1fe284c4
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/avx512vl-vpmovsqd-1.c
@@ -0,0 +1,25 @@
+/* { dg-do compile } */
+/* { dg-options "-mavx512vl -O2" } */
+/* { dg-final { scan-assembler-times "vpmovsqd\[ \\t\]+\[^\n\]*%xmm\[0-9\]\[^\n\]*%xmm\[0-9\]\[\\n\]" 1 } } */
+/* { dg-final { scan-assembler-times "vpmovsqd\[ \\t\]+\[^\n\]*%xmm\[0-9\]\[^\n\]*%xmm\[0-9\]\{%k\[1-7\]\}\[^\{\]" 1 } } */
+/* { dg-final { scan-assembler-times "vpmovsqd\[ \\t\]+\[^\n\]*%xmm\[0-9\]\[^\n\]*%xmm\[0-9\]\{%k\[1-7\]\}{z}" 1 } } */
+/* { dg-final { scan-assembler-times "vpmovsqd\[ \\t\]+\[^\n\]*%ymm\[0-9\]\[^\n\]*%xmm\[0-9\]\[\\n\]" 1 } } */
+/* { dg-final { scan-assembler-times "vpmovsqd\[ \\t\]+\[^\n\]*%ymm\[0-9\]\[^\n\]*%xmm\[0-9\]\{%k\[1-7\]\}\[^\{\]" 1 } } */
+/* { dg-final { scan-assembler-times "vpmovsqd\[ \\t\]+\[^\n\]*%ymm\[0-9\]\[^\n\]*%xmm\[0-9\]\{%k\[1-7\]\}{z}" 1 } } */
+
+#include <immintrin.h>
+
+volatile __m128i x, z;
+volatile __m256i y;
+volatile __mmask8 m;
+
+void extern
+avx512vl_test (void)
+{
+ z = _mm_cvtsepi64_epi32 (x);
+ z = _mm_mask_cvtsepi64_epi32 (z, m, x);
+ z = _mm_maskz_cvtsepi64_epi32 (m, x);
+ z = _mm256_cvtsepi64_epi32 (y);
+ z = _mm256_mask_cvtsepi64_epi32 (z, m, y);
+ z = _mm256_maskz_cvtsepi64_epi32 (m, y);
+}
diff --git a/gcc/testsuite/gcc.target/i386/avx512vl-vpmovsqd-2.c b/gcc/testsuite/gcc.target/i386/avx512vl-vpmovsqd-2.c
new file mode 100644
index 00000000000..7e9ed106c86
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/avx512vl-vpmovsqd-2.c
@@ -0,0 +1,14 @@
+/* { dg-do run } */
+/* { dg-options "-O2 -mavx512vl -DAVX512VL" } */
+/* { dg-require-effective-target avx512vl } */
+
+#define AVX512F_LEN 256
+#define AVX512F_LEN_HALF 128
+#include "avx512f-vpmovsqd-2.c"
+
+#undef AVX512F_LEN
+#undef AVX512F_LEN_HALF
+
+#define AVX512F_LEN 128
+#define AVX512F_LEN_HALF 128
+#include "avx512f-vpmovsqd-2.c"
diff --git a/gcc/testsuite/gcc.target/i386/avx512vl-vpmovsqw-1.c b/gcc/testsuite/gcc.target/i386/avx512vl-vpmovsqw-1.c
new file mode 100644
index 00000000000..cca0d09a983
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/avx512vl-vpmovsqw-1.c
@@ -0,0 +1,25 @@
+/* { dg-do compile } */
+/* { dg-options "-mavx512vl -O2" } */
+/* { dg-final { scan-assembler-times "vpmovsqw\[ \\t\]+\[^\n\]*%xmm\[0-9\]\[^\n\]*%xmm\[0-9\]\[\\n\]" 1 } } */
+/* { dg-final { scan-assembler-times "vpmovsqw\[ \\t\]+\[^\n\]*%xmm\[0-9\]\[^\n\]*%xmm\[0-9\]\{%k\[1-7\]\}\[^\{\]" 1 } } */
+/* { dg-final { scan-assembler-times "vpmovsqw\[ \\t\]+\[^\n\]*%xmm\[0-9\]\[^\n\]*%xmm\[0-9\]\{%k\[1-7\]\}{z}" 1 } } */
+/* { dg-final { scan-assembler-times "vpmovsqw\[ \\t\]+\[^\n\]*%ymm\[0-9\]\[^\n\]*%xmm\[0-9\]\[\\n\]" 1 } } */
+/* { dg-final { scan-assembler-times "vpmovsqw\[ \\t\]+\[^\n\]*%ymm\[0-9\]\[^\n\]*%xmm\[0-9\]\{%k\[1-7\]\}\[^\{\]" 1 } } */
+/* { dg-final { scan-assembler-times "vpmovsqw\[ \\t\]+\[^\n\]*%ymm\[0-9\]\[^\n\]*%xmm\[0-9\]\{%k\[1-7\]\}{z}" 1 } } */
+
+#include <immintrin.h>
+
+volatile __m128i x, z;
+volatile __m256i y;
+volatile __mmask8 m;
+
+void extern
+avx512vl_test (void)
+{
+ z = _mm_cvtsepi64_epi16 (x);
+ z = _mm_mask_cvtsepi64_epi16 (z, m, x);
+ z = _mm_maskz_cvtsepi64_epi16 (m, x);
+ z = _mm256_cvtsepi64_epi16 (y);
+ z = _mm256_mask_cvtsepi64_epi16 (z, m, y);
+ z = _mm256_maskz_cvtsepi64_epi16 (m, y);
+}
diff --git a/gcc/testsuite/gcc.target/i386/avx512vl-vpmovsqw-2.c b/gcc/testsuite/gcc.target/i386/avx512vl-vpmovsqw-2.c
new file mode 100644
index 00000000000..d0dacb477c8
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/avx512vl-vpmovsqw-2.c
@@ -0,0 +1,14 @@
+/* { dg-do run } */
+/* { dg-options "-O2 -mavx512vl -DAVX512VL" } */
+/* { dg-require-effective-target avx512vl } */
+
+#define AVX512F_LEN 256
+#define AVX512F_LEN_HALF 128
+#include "avx512f-vpmovsqw-2.c"
+
+#undef AVX512F_LEN
+#undef AVX512F_LEN_HALF
+
+#define AVX512F_LEN 128
+#define AVX512F_LEN_HALF 128
+#include "avx512f-vpmovsqw-2.c"
diff --git a/gcc/testsuite/gcc.target/i386/avx512vl-vpmovswb-2.c b/gcc/testsuite/gcc.target/i386/avx512vl-vpmovswb-2.c
new file mode 100644
index 00000000000..d2384d917c6
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/avx512vl-vpmovswb-2.c
@@ -0,0 +1,14 @@
+/* { dg-do run } */
+/* { dg-options "-O2 -mavx512bw -mavx512vl -DAVX512VL" } */
+/* { dg-require-effective-target avx512vl } */
+
+#define AVX512F_LEN 256
+#define AVX512F_LEN_HALF 128
+#include "avx512bw-vpmovswb-2.c"
+
+#undef AVX512F_LEN
+#undef AVX512F_LEN_HALF
+
+#define AVX512F_LEN 128
+#define AVX512F_LEN_HALF 128
+#include "avx512bw-vpmovswb-2.c"
diff --git a/gcc/testsuite/gcc.target/i386/avx512vl-vpmovsxbd-1.c b/gcc/testsuite/gcc.target/i386/avx512vl-vpmovsxbd-1.c
new file mode 100644
index 00000000000..e54b3f81da7
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/avx512vl-vpmovsxbd-1.c
@@ -0,0 +1,22 @@
+/* { dg-do compile } */
+/* { dg-options "-mavx512vl -O2" } */
+/* { dg-final { scan-assembler-times "vpmovsxbd\[ \\t\]+\[^\n\]*%xmm\[0-9\]\[^\n\]*%ymm\[0-9\]\{%k\[1-7\]\}\[^\{\]" 1 } } */
+/* { dg-final { scan-assembler-times "vpmovsxbd\[ \\t\]+\[^\n\]*%xmm\[0-9\]\[^\n\]*%xmm\[0-9\]\{%k\[1-7\]\}\[^\{\]" 1 } } */
+/* { dg-final { scan-assembler-times "vpmovsxbd\[ \\t\]+\[^\n\]*%xmm\[0-9\]\[^\n\]*%ymm\[0-9\]\{%k\[1-7\]\}\{z\}" 1 } } */
+/* { dg-final { scan-assembler-times "vpmovsxbd\[ \\t\]+\[^\n\]*%xmm\[0-9\]\[^\n\]*%xmm\[0-9\]\{%k\[1-7\]\}\{z\}" 1 } } */
+
+#include <immintrin.h>
+
+volatile __m128i s, res2;
+volatile __m256i res1;
+volatile __mmask8 m;
+
+void extern
+avx512vl_test (void)
+{
+ res1 = _mm256_mask_cvtepi8_epi32 (res1, m, s);
+ res2 = _mm_mask_cvtepi8_epi32 (res2, m, s);
+
+ res1 = _mm256_maskz_cvtepi8_epi32 (m, s);
+ res2 = _mm_maskz_cvtepi8_epi32 (m, s);
+}
diff --git a/gcc/testsuite/gcc.target/i386/avx512vl-vpmovsxbd-2.c b/gcc/testsuite/gcc.target/i386/avx512vl-vpmovsxbd-2.c
new file mode 100644
index 00000000000..1b36fd4c05b
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/avx512vl-vpmovsxbd-2.c
@@ -0,0 +1,14 @@
+/* { dg-do run } */
+/* { dg-options "-O2 -mavx512vl -DAVX512VL" } */
+/* { dg-require-effective-target avx512vl } */
+
+#define AVX512F_LEN 256
+#define AVX512F_LEN_HALF 128
+#include "avx512f-vpmovsxbd-2.c"
+
+#undef AVX512F_LEN
+#undef AVX512F_LEN_HALF
+
+#define AVX512F_LEN 128
+#define AVX512F_LEN_HALF 128
+#include "avx512f-vpmovsxbd-2.c"
diff --git a/gcc/testsuite/gcc.target/i386/avx512vl-vpmovsxbq-1.c b/gcc/testsuite/gcc.target/i386/avx512vl-vpmovsxbq-1.c
new file mode 100644
index 00000000000..095f532493f
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/avx512vl-vpmovsxbq-1.c
@@ -0,0 +1,22 @@
+/* { dg-do compile } */
+/* { dg-options "-mavx512vl -O2" } */
+/* { dg-final { scan-assembler-times "vpmovsxbq\[ \\t\]+\[^\n\]*%xmm\[0-9\]\[^\n\]*%ymm\[0-9\]\{%k\[1-7\]\}\[^\{\]" 1 } } */
+/* { dg-final { scan-assembler-times "vpmovsxbq\[ \\t\]+\[^\n\]*%xmm\[0-9\]\[^\n\]*%xmm\[0-9\]\{%k\[1-7\]\}\[^\{\]" 1 } } */
+/* { dg-final { scan-assembler-times "vpmovsxbq\[ \\t\]+\[^\n\]*%xmm\[0-9\]\[^\n\]*%ymm\[0-9\]\{%k\[1-7\]\}\{z\}" 1 } } */
+/* { dg-final { scan-assembler-times "vpmovsxbq\[ \\t\]+\[^\n\]*%xmm\[0-9\]\[^\n\]*%xmm\[0-9\]\{%k\[1-7\]\}\{z\}" 1 } } */
+
+#include <immintrin.h>
+
+volatile __m128i s, res2;
+volatile __m256i res1;
+volatile __mmask8 m;
+
+void extern
+avx512vl_test (void)
+{
+ res1 = _mm256_mask_cvtepi8_epi64 (res1, m, s);
+ res2 = _mm_mask_cvtepi8_epi64 (res2, m, s);
+
+ res1 = _mm256_maskz_cvtepi8_epi64 (m, s);
+ res2 = _mm_maskz_cvtepi8_epi64 (m, s);
+}
diff --git a/gcc/testsuite/gcc.target/i386/avx512vl-vpmovsxbq-2.c b/gcc/testsuite/gcc.target/i386/avx512vl-vpmovsxbq-2.c
new file mode 100644
index 00000000000..289ebd4cc0e
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/avx512vl-vpmovsxbq-2.c
@@ -0,0 +1,14 @@
+/* { dg-do run } */
+/* { dg-options "-O2 -mavx512vl -DAVX512VL" } */
+/* { dg-require-effective-target avx512vl } */
+
+#define AVX512F_LEN 256
+#define AVX512F_LEN_HALF 128
+#include "avx512f-vpmovsxbq-2.c"
+
+#undef AVX512F_LEN
+#undef AVX512F_LEN_HALF
+
+#define AVX512F_LEN 128
+#define AVX512F_LEN_HALF 128
+#include "avx512f-vpmovsxbq-2.c"
diff --git a/gcc/testsuite/gcc.target/i386/avx512vl-vpmovsxbw-2.c b/gcc/testsuite/gcc.target/i386/avx512vl-vpmovsxbw-2.c
new file mode 100644
index 00000000000..71dca4eca76
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/avx512vl-vpmovsxbw-2.c
@@ -0,0 +1,14 @@
+/* { dg-do run } */
+/* { dg-options "-O2 -mavx512bw -mavx512vl -DAVX512VL" } */
+/* { dg-require-effective-target avx512vl } */
+
+#define AVX512F_LEN 256
+#define AVX512F_LEN_HALF 128
+#include "avx512bw-vpmovsxbw-2.c"
+
+#undef AVX512F_LEN
+#undef AVX512F_LEN_HALF
+
+#define AVX512F_LEN 128
+#define AVX512F_LEN_HALF 128
+#include "avx512bw-vpmovsxbw-2.c"
diff --git a/gcc/testsuite/gcc.target/i386/avx512vl-vpmovsxdq-1.c b/gcc/testsuite/gcc.target/i386/avx512vl-vpmovsxdq-1.c
new file mode 100644
index 00000000000..c6ba11be5c4
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/avx512vl-vpmovsxdq-1.c
@@ -0,0 +1,22 @@
+/* { dg-do compile } */
+/* { dg-options "-mavx512vl -O2" } */
+/* { dg-final { scan-assembler-times "vpmovsxdq\[ \\t\]+\[^\n\]*%xmm\[0-9\]\[^\n\]*%ymm\[0-9\]\{%k\[1-7\]\}\[^\{\]" 1 } } */
+/* { dg-final { scan-assembler-times "vpmovsxdq\[ \\t\]+\[^\n\]*%xmm\[0-9\]\[^\n\]*%xmm\[0-9\]\{%k\[1-7\]\}\[^\{\]" 1 } } */
+/* { dg-final { scan-assembler-times "vpmovsxdq\[ \\t\]+\[^\n\]*%xmm\[0-9\]\[^\n\]*%ymm\[0-9\]\{%k\[1-7\]\}\{z\}" 1 } } */
+/* { dg-final { scan-assembler-times "vpmovsxdq\[ \\t\]+\[^\n\]*%xmm\[0-9\]\[^\n\]*%xmm\[0-9\]\{%k\[1-7\]\}\{z\}" 1 } } */
+
+#include <immintrin.h>
+
+volatile __m128i s, res2;
+volatile __m256i res1;
+volatile __mmask8 m;
+
+void extern
+avx512vl_test (void)
+{
+ res1 = _mm256_mask_cvtepi32_epi64 (res1, m, s);
+ res2 = _mm_mask_cvtepi32_epi64 (res2, m, s);
+
+ res1 = _mm256_maskz_cvtepi32_epi64 (m, s);
+ res2 = _mm_maskz_cvtepi32_epi64 (m, s);
+}
diff --git a/gcc/testsuite/gcc.target/i386/avx512vl-vpmovsxdq-2.c b/gcc/testsuite/gcc.target/i386/avx512vl-vpmovsxdq-2.c
new file mode 100644
index 00000000000..8c564c1210d
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/avx512vl-vpmovsxdq-2.c
@@ -0,0 +1,14 @@
+/* { dg-do run } */
+/* { dg-options "-O2 -mavx512vl -DAVX512VL" } */
+/* { dg-require-effective-target avx512vl } */
+
+#define AVX512F_LEN 256
+#define AVX512F_LEN_HALF 128
+#include "avx512f-vpmovsxdq-2.c"
+
+#undef AVX512F_LEN
+#undef AVX512F_LEN_HALF
+
+#define AVX512F_LEN 128
+#define AVX512F_LEN_HALF 128
+#include "avx512f-vpmovsxdq-2.c"
diff --git a/gcc/testsuite/gcc.target/i386/avx512vl-vpmovsxwd-1.c b/gcc/testsuite/gcc.target/i386/avx512vl-vpmovsxwd-1.c
new file mode 100644
index 00000000000..69a019d05df
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/avx512vl-vpmovsxwd-1.c
@@ -0,0 +1,22 @@
+/* { dg-do compile } */
+/* { dg-options "-mavx512vl -O2" } */
+/* { dg-final { scan-assembler-times "vpmovsxwd\[ \\t\]+\[^\n\]*%xmm\[0-9\]\[^\n\]*%ymm\[0-9\]\{%k\[1-7\]\}\[^\{\]" 1 } } */
+/* { dg-final { scan-assembler-times "vpmovsxwd\[ \\t\]+\[^\n\]*%xmm\[0-9\]\[^\n\]*%xmm\[0-9\]\{%k\[1-7\]\}\[^\{\]" 1 } } */
+/* { dg-final { scan-assembler-times "vpmovsxwd\[ \\t\]+\[^\n\]*%xmm\[0-9\]\[^\n\]*%ymm\[0-9\]\{%k\[1-7\]\}\{z\}" 1 } } */
+/* { dg-final { scan-assembler-times "vpmovsxwd\[ \\t\]+\[^\n\]*%xmm\[0-9\]\[^\n\]*%xmm\[0-9\]\{%k\[1-7\]\}\{z\}" 1 } } */
+
+#include <immintrin.h>
+
+volatile __m128i s, res2;
+volatile __m256i res1;
+volatile __mmask8 m;
+
+void extern
+avx512vl_test (void)
+{
+ res1 = _mm256_mask_cvtepi16_epi32 (res1, m, s);
+ res2 = _mm_mask_cvtepi16_epi32 (res2, m, s);
+
+ res1 = _mm256_maskz_cvtepi16_epi32 (m, s);
+ res2 = _mm_maskz_cvtepi16_epi32 (m, s);
+}
diff --git a/gcc/testsuite/gcc.target/i386/avx512vl-vpmovsxwd-2.c b/gcc/testsuite/gcc.target/i386/avx512vl-vpmovsxwd-2.c
new file mode 100644
index 00000000000..e8d466ca4ce
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/avx512vl-vpmovsxwd-2.c
@@ -0,0 +1,14 @@
+/* { dg-do run } */
+/* { dg-options "-O2 -mavx512vl -DAVX512VL" } */
+/* { dg-require-effective-target avx512vl } */
+
+#define AVX512F_LEN 256
+#define AVX512F_LEN_HALF 128
+#include "avx512f-vpmovsxwd-2.c"
+
+#undef AVX512F_LEN
+#undef AVX512F_LEN_HALF
+
+#define AVX512F_LEN 128
+#define AVX512F_LEN_HALF 128
+#include "avx512f-vpmovsxwd-2.c"
diff --git a/gcc/testsuite/gcc.target/i386/avx512vl-vpmovsxwq-1.c b/gcc/testsuite/gcc.target/i386/avx512vl-vpmovsxwq-1.c
new file mode 100644
index 00000000000..72ad8618354
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/avx512vl-vpmovsxwq-1.c
@@ -0,0 +1,22 @@
+/* { dg-do compile } */
+/* { dg-options "-mavx512vl -O2" } */
+/* { dg-final { scan-assembler-times "vpmovsxwq\[ \\t\]+\[^\n\]*%xmm\[0-9\]\[^\n\]*%ymm\[0-9\]\{%k\[1-7\]\}\[^\{\]" 1 } } */
+/* { dg-final { scan-assembler-times "vpmovsxwq\[ \\t\]+\[^\n\]*%xmm\[0-9\]\[^\n\]*%xmm\[0-9\]\{%k\[1-7\]\}\[^\{\]" 1 } } */
+/* { dg-final { scan-assembler-times "vpmovsxwq\[ \\t\]+\[^\n\]*%xmm\[0-9\]\[^\n\]*%ymm\[0-9\]\{%k\[1-7\]\}\{z\}" 1 } } */
+/* { dg-final { scan-assembler-times "vpmovsxwq\[ \\t\]+\[^\n\]*%xmm\[0-9\]\[^\n\]*%xmm\[0-9\]\{%k\[1-7\]\}\{z\}" 1 } } */
+
+#include <immintrin.h>
+
+volatile __m128i s, res2;
+volatile __m256i res1;
+volatile __mmask8 m;
+
+void extern
+avx512vl_test (void)
+{
+ res1 = _mm256_mask_cvtepi16_epi64 (res1, m, s);
+ res2 = _mm_mask_cvtepi16_epi64 (res2, m, s);
+
+ res1 = _mm256_maskz_cvtepi16_epi64 (m, s);
+ res2 = _mm_maskz_cvtepi16_epi64 (m, s);
+}
diff --git a/gcc/testsuite/gcc.target/i386/avx512vl-vpmovsxwq-2.c b/gcc/testsuite/gcc.target/i386/avx512vl-vpmovsxwq-2.c
new file mode 100644
index 00000000000..cb4cf243334
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/avx512vl-vpmovsxwq-2.c
@@ -0,0 +1,14 @@
+/* { dg-do run } */
+/* { dg-options "-O2 -mavx512vl -DAVX512VL" } */
+/* { dg-require-effective-target avx512vl } */
+
+#define AVX512F_LEN 256
+#define AVX512F_LEN_HALF 128
+#include "avx512f-vpmovsxwq-2.c"
+
+#undef AVX512F_LEN
+#undef AVX512F_LEN_HALF
+
+#define AVX512F_LEN 128
+#define AVX512F_LEN_HALF 128
+#include "avx512f-vpmovsxwq-2.c"
diff --git a/gcc/testsuite/gcc.target/i386/avx512vl-vpmovusdb-1.c b/gcc/testsuite/gcc.target/i386/avx512vl-vpmovusdb-1.c
new file mode 100644
index 00000000000..81a308ff297
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/avx512vl-vpmovusdb-1.c
@@ -0,0 +1,25 @@
+/* { dg-do compile } */
+/* { dg-options "-mavx512vl -O2" } */
+/* { dg-final { scan-assembler-times "vpmovusdb\[ \\t\]+\[^\n\]*%xmm\[0-9\]\[^\n\]*%xmm\[0-9\]\[\\n\]" 1 } } */
+/* { dg-final { scan-assembler-times "vpmovusdb\[ \\t\]+\[^\n\]*%xmm\[0-9\]\[^\n\]*%xmm\[0-9\]\{%k\[1-7\]\}\[^\{\]" 1 } } */
+/* { dg-final { scan-assembler-times "vpmovusdb\[ \\t\]+\[^\n\]*%xmm\[0-9\]\[^\n\]*%xmm\[0-9\]\{%k\[1-7\]\}{z}" 1 } } */
+/* { dg-final { scan-assembler-times "vpmovusdb\[ \\t\]+\[^\n\]*%ymm\[0-9\]\[^\n\]*%xmm\[0-9\]\[\\n\]" 1 } } */
+/* { dg-final { scan-assembler-times "vpmovusdb\[ \\t\]+\[^\n\]*%ymm\[0-9\]\[^\n\]*%xmm\[0-9\]\{%k\[1-7\]\}\[^\{\]" 1 } } */
+/* { dg-final { scan-assembler-times "vpmovusdb\[ \\t\]+\[^\n\]*%ymm\[0-9\]\[^\n\]*%xmm\[0-9\]\{%k\[1-7\]\}{z}" 1 } } */
+
+#include <immintrin.h>
+
+volatile __m128i x, z;
+volatile __m256i y;
+volatile __mmask8 m;
+
+void extern
+avx512vl_test (void)
+{
+ z = _mm_cvtusepi32_epi8 (x);
+ z = _mm_mask_cvtusepi32_epi8 (z, m, x);
+ z = _mm_maskz_cvtusepi32_epi8 (m, x);
+ z = _mm256_cvtusepi32_epi8 (y);
+ z = _mm256_mask_cvtusepi32_epi8 (z, m, y);
+ z = _mm256_maskz_cvtusepi32_epi8 (m, y);
+}
diff --git a/gcc/testsuite/gcc.target/i386/avx512vl-vpmovusdb-2.c b/gcc/testsuite/gcc.target/i386/avx512vl-vpmovusdb-2.c
new file mode 100644
index 00000000000..4230463c1bd
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/avx512vl-vpmovusdb-2.c
@@ -0,0 +1,14 @@
+/* { dg-do run } */
+/* { dg-options "-O2 -mavx512vl -DAVX512VL" } */
+/* { dg-require-effective-target avx512vl } */
+
+#define AVX512F_LEN 256
+#define AVX512F_LEN_HALF 128
+#include "avx512f-vpmovusdb-2.c"
+
+#undef AVX512F_LEN
+#undef AVX512F_LEN_HALF
+
+#define AVX512F_LEN 128
+#define AVX512F_LEN_HALF 128
+#include "avx512f-vpmovusdb-2.c"
diff --git a/gcc/testsuite/gcc.target/i386/avx512vl-vpmovusdw-1.c b/gcc/testsuite/gcc.target/i386/avx512vl-vpmovusdw-1.c
new file mode 100644
index 00000000000..9fe534dfcee
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/avx512vl-vpmovusdw-1.c
@@ -0,0 +1,25 @@
+/* { dg-do compile } */
+/* { dg-options "-mavx512vl -O2" } */
+/* { dg-final { scan-assembler-times "vpmovusdw\[ \\t\]+\[^\n\]*%xmm\[0-9\]\[^\n\]*%xmm\[0-9\]\[\\n\]" 1 } } */
+/* { dg-final { scan-assembler-times "vpmovusdw\[ \\t\]+\[^\n\]*%xmm\[0-9\]\[^\n\]*%xmm\[0-9\]\{%k\[1-7\]\}\[^\{\]" 1 } } */
+/* { dg-final { scan-assembler-times "vpmovusdw\[ \\t\]+\[^\n\]*%xmm\[0-9\]\[^\n\]*%xmm\[0-9\]\{%k\[1-7\]\}{z}" 1 } } */
+/* { dg-final { scan-assembler-times "vpmovusdw\[ \\t\]+\[^\n\]*%ymm\[0-9\]\[^\n\]*%xmm\[0-9\]\[\\n\]" 1 } } */
+/* { dg-final { scan-assembler-times "vpmovusdw\[ \\t\]+\[^\n\]*%ymm\[0-9\]\[^\n\]*%xmm\[0-9\]\{%k\[1-7\]\}\[^\{\]" 1 } } */
+/* { dg-final { scan-assembler-times "vpmovusdw\[ \\t\]+\[^\n\]*%ymm\[0-9\]\[^\n\]*%xmm\[0-9\]\{%k\[1-7\]\}{z}" 1 } } */
+
+#include <immintrin.h>
+
+volatile __m128i x, z;
+volatile __m256i y;
+volatile __mmask8 m;
+
+void extern
+avx512vl_test (void)
+{
+ z = _mm_cvtusepi32_epi16 (x);
+ z = _mm_mask_cvtusepi32_epi16 (z, m, x);
+ z = _mm_maskz_cvtusepi32_epi16 (m, x);
+ z = _mm256_cvtusepi32_epi16 (y);
+ z = _mm256_mask_cvtusepi32_epi16 (z, m, y);
+ z = _mm256_maskz_cvtusepi32_epi16 (m, y);
+}
diff --git a/gcc/testsuite/gcc.target/i386/avx512vl-vpmovusdw-2.c b/gcc/testsuite/gcc.target/i386/avx512vl-vpmovusdw-2.c
new file mode 100644
index 00000000000..db4fff646da
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/avx512vl-vpmovusdw-2.c
@@ -0,0 +1,14 @@
+/* { dg-do run } */
+/* { dg-options "-O2 -mavx512vl -DAVX512VL" } */
+/* { dg-require-effective-target avx512vl } */
+
+#define AVX512F_LEN 256
+#define AVX512F_LEN_HALF 128
+#include "avx512f-vpmovusdw-2.c"
+
+#undef AVX512F_LEN
+#undef AVX512F_LEN_HALF
+
+#define AVX512F_LEN 128
+#define AVX512F_LEN_HALF 128
+#include "avx512f-vpmovusdw-2.c"
diff --git a/gcc/testsuite/gcc.target/i386/avx512vl-vpmovusqb-1.c b/gcc/testsuite/gcc.target/i386/avx512vl-vpmovusqb-1.c
new file mode 100644
index 00000000000..d64e81c2eda
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/avx512vl-vpmovusqb-1.c
@@ -0,0 +1,25 @@
+/* { dg-do compile } */
+/* { dg-options "-mavx512vl -O2" } */
+/* { dg-final { scan-assembler-times "vpmovusqb\[ \\t\]+\[^\n\]*%xmm\[0-9\]\[^\n\]*%xmm\[0-9\]\[\\n\]" 1 } } */
+/* { dg-final { scan-assembler-times "vpmovusqb\[ \\t\]+\[^\n\]*%xmm\[0-9\]\[^\n\]*%xmm\[0-9\]\{%k\[1-7\]\}\[^\{\]" 1 } } */
+/* { dg-final { scan-assembler-times "vpmovusqb\[ \\t\]+\[^\n\]*%xmm\[0-9\]\[^\n\]*%xmm\[0-9\]\{%k\[1-7\]\}{z}" 1 } } */
+/* { dg-final { scan-assembler-times "vpmovusqb\[ \\t\]+\[^\n\]*%ymm\[0-9\]\[^\n\]*%xmm\[0-9\]\[\\n\]" 1 } } */
+/* { dg-final { scan-assembler-times "vpmovusqb\[ \\t\]+\[^\n\]*%ymm\[0-9\]\[^\n\]*%xmm\[0-9\]\{%k\[1-7\]\}\[^\{\]" 1 } } */
+/* { dg-final { scan-assembler-times "vpmovusqb\[ \\t\]+\[^\n\]*%ymm\[0-9\]\[^\n\]*%xmm\[0-9\]\{%k\[1-7\]\}{z}" 1 } } */
+
+#include <immintrin.h>
+
+volatile __m128i x, z;
+volatile __m256i y;
+volatile __mmask8 m;
+
+void extern
+avx512vl_test (void)
+{
+ z = _mm_cvtusepi64_epi8 (x);
+ z = _mm_mask_cvtusepi64_epi8 (z, m, x);
+ z = _mm_maskz_cvtusepi64_epi8 (m, x);
+ z = _mm256_cvtusepi64_epi8 (y);
+ z = _mm256_mask_cvtusepi64_epi8 (z, m, y);
+ z = _mm256_maskz_cvtusepi64_epi8 (m, y);
+}
diff --git a/gcc/testsuite/gcc.target/i386/avx512vl-vpmovusqb-2.c b/gcc/testsuite/gcc.target/i386/avx512vl-vpmovusqb-2.c
new file mode 100644
index 00000000000..644f1785714
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/avx512vl-vpmovusqb-2.c
@@ -0,0 +1,14 @@
+/* { dg-do run } */
+/* { dg-options "-O2 -mavx512vl -DAVX512VL" } */
+/* { dg-require-effective-target avx512vl } */
+
+#define AVX512F_LEN 256
+#define AVX512F_LEN_HALF 128
+#include "avx512f-vpmovusqb-2.c"
+
+#undef AVX512F_LEN
+#undef AVX512F_LEN_HALF
+
+#define AVX512F_LEN 128
+#define AVX512F_LEN_HALF 128
+#include "avx512f-vpmovusqb-2.c"
diff --git a/gcc/testsuite/gcc.target/i386/avx512vl-vpmovusqd-1.c b/gcc/testsuite/gcc.target/i386/avx512vl-vpmovusqd-1.c
new file mode 100644
index 00000000000..05d0bb5e2a8
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/avx512vl-vpmovusqd-1.c
@@ -0,0 +1,25 @@
+/* { dg-do compile } */
+/* { dg-options "-mavx512vl -O2" } */
+/* { dg-final { scan-assembler-times "vpmovusqd\[ \\t\]+\[^\n\]*%xmm\[0-9\]\[^\n\]*%xmm\[0-9\]\[\\n\]" 1 } } */
+/* { dg-final { scan-assembler-times "vpmovusqd\[ \\t\]+\[^\n\]*%xmm\[0-9\]\[^\n\]*%xmm\[0-9\]\{%k\[1-7\]\}\[^\{\]" 1 } } */
+/* { dg-final { scan-assembler-times "vpmovusqd\[ \\t\]+\[^\n\]*%xmm\[0-9\]\[^\n\]*%xmm\[0-9\]\{%k\[1-7\]\}{z}" 1 } } */
+/* { dg-final { scan-assembler-times "vpmovusqd\[ \\t\]+\[^\n\]*%ymm\[0-9\]\[^\n\]*%xmm\[0-9\]\[\\n\]" 1 } } */
+/* { dg-final { scan-assembler-times "vpmovusqd\[ \\t\]+\[^\n\]*%ymm\[0-9\]\[^\n\]*%xmm\[0-9\]\{%k\[1-7\]\}\[^\{\]" 1 } } */
+/* { dg-final { scan-assembler-times "vpmovusqd\[ \\t\]+\[^\n\]*%ymm\[0-9\]\[^\n\]*%xmm\[0-9\]\{%k\[1-7\]\}{z}" 1 } } */
+
+#include <immintrin.h>
+
+volatile __m128i x, z;
+volatile __m256i y;
+volatile __mmask8 m;
+
+void extern
+avx512vl_test (void)
+{
+ z = _mm_cvtusepi64_epi32 (x);
+ z = _mm_mask_cvtusepi64_epi32 (z, m, x);
+ z = _mm_maskz_cvtusepi64_epi32 (m, x);
+ z = _mm256_cvtusepi64_epi32 (y);
+ z = _mm256_mask_cvtusepi64_epi32 (z, m, y);
+ z = _mm256_maskz_cvtusepi64_epi32 (m, y);
+}
diff --git a/gcc/testsuite/gcc.target/i386/avx512vl-vpmovusqd-2.c b/gcc/testsuite/gcc.target/i386/avx512vl-vpmovusqd-2.c
new file mode 100644
index 00000000000..6ea1dacc285
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/avx512vl-vpmovusqd-2.c
@@ -0,0 +1,14 @@
+/* { dg-do run } */
+/* { dg-options "-O2 -mavx512vl -DAVX512VL" } */
+/* { dg-require-effective-target avx512vl } */
+
+#define AVX512F_LEN 256
+#define AVX512F_LEN_HALF 128
+#include "avx512f-vpmovusqd-2.c"
+
+#undef AVX512F_LEN
+#undef AVX512F_LEN_HALF
+
+#define AVX512F_LEN 128
+#define AVX512F_LEN_HALF 128
+#include "avx512f-vpmovusqd-2.c"
diff --git a/gcc/testsuite/gcc.target/i386/avx512vl-vpmovusqw-1.c b/gcc/testsuite/gcc.target/i386/avx512vl-vpmovusqw-1.c
new file mode 100644
index 00000000000..d98b6d56d9b
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/avx512vl-vpmovusqw-1.c
@@ -0,0 +1,25 @@
+/* { dg-do compile } */
+/* { dg-options "-mavx512vl -O2" } */
+/* { dg-final { scan-assembler-times "vpmovusqw\[ \\t\]+\[^\n\]*%xmm\[0-9\]\[^\n\]*%xmm\[0-9\]\[\\n\]" 1 } } */
+/* { dg-final { scan-assembler-times "vpmovusqw\[ \\t\]+\[^\n\]*%xmm\[0-9\]\[^\n\]*%xmm\[0-9\]\{%k\[1-7\]\}\[^\{\]" 1 } } */
+/* { dg-final { scan-assembler-times "vpmovusqw\[ \\t\]+\[^\n\]*%xmm\[0-9\]\[^\n\]*%xmm\[0-9\]\{%k\[1-7\]\}{z}" 1 } } */
+/* { dg-final { scan-assembler-times "vpmovusqw\[ \\t\]+\[^\n\]*%ymm\[0-9\]\[^\n\]*%xmm\[0-9\]\[\\n\]" 1 } } */
+/* { dg-final { scan-assembler-times "vpmovusqw\[ \\t\]+\[^\n\]*%ymm\[0-9\]\[^\n\]*%xmm\[0-9\]\{%k\[1-7\]\}\[^\{\]" 1 } } */
+/* { dg-final { scan-assembler-times "vpmovusqw\[ \\t\]+\[^\n\]*%ymm\[0-9\]\[^\n\]*%xmm\[0-9\]\{%k\[1-7\]\}{z}" 1 } } */
+
+#include <immintrin.h>
+
+volatile __m128i x, z;
+volatile __m256i y;
+volatile __mmask8 m;
+
+void extern
+avx512vl_test (void)
+{
+ z = _mm_cvtusepi64_epi16 (x);
+ z = _mm_mask_cvtusepi64_epi16 (z, m, x);
+ z = _mm_maskz_cvtusepi64_epi16 (m, x);
+ z = _mm256_cvtusepi64_epi16 (y);
+ z = _mm256_mask_cvtusepi64_epi16 (z, m, y);
+ z = _mm256_maskz_cvtusepi64_epi16 (m, y);
+}
diff --git a/gcc/testsuite/gcc.target/i386/avx512vl-vpmovusqw-2.c b/gcc/testsuite/gcc.target/i386/avx512vl-vpmovusqw-2.c
new file mode 100644
index 00000000000..a7a34af4076
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/avx512vl-vpmovusqw-2.c
@@ -0,0 +1,14 @@
+/* { dg-do run } */
+/* { dg-options "-O2 -mavx512vl -DAVX512VL" } */
+/* { dg-require-effective-target avx512vl } */
+
+#define AVX512F_LEN 256
+#define AVX512F_LEN_HALF 128
+#include "avx512f-vpmovusqw-2.c"
+
+#undef AVX512F_LEN
+#undef AVX512F_LEN_HALF
+
+#define AVX512F_LEN 128
+#define AVX512F_LEN_HALF 128
+#include "avx512f-vpmovusqw-2.c"
diff --git a/gcc/testsuite/gcc.target/i386/avx512vl-vpmovuswb-2.c b/gcc/testsuite/gcc.target/i386/avx512vl-vpmovuswb-2.c
new file mode 100644
index 00000000000..bd8b215c0d0
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/avx512vl-vpmovuswb-2.c
@@ -0,0 +1,14 @@
+/* { dg-do run } */
+/* { dg-options "-O2 -mavx512bw -mavx512vl -DAVX512VL" } */
+/* { dg-require-effective-target avx512vl } */
+
+#define AVX512F_LEN 256
+#define AVX512F_LEN_HALF 128
+#include "avx512bw-vpmovuswb-2.c"
+
+#undef AVX512F_LEN
+#undef AVX512F_LEN_HALF
+
+#define AVX512F_LEN 128
+#define AVX512F_LEN_HALF 128
+#include "avx512bw-vpmovuswb-2.c"
diff --git a/gcc/testsuite/gcc.target/i386/avx512vl-vpmovw2m-2.c b/gcc/testsuite/gcc.target/i386/avx512vl-vpmovw2m-2.c
new file mode 100644
index 00000000000..dd2da1d84d5
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/avx512vl-vpmovw2m-2.c
@@ -0,0 +1,14 @@
+/* { dg-do run } */
+/* { dg-options "-O2 -mavx512bw -mavx512vl -DAVX512VL" } */
+/* { dg-require-effective-target avx512vl } */
+
+#define AVX512F_LEN 256
+#define AVX512F_LEN_HALF 128
+#include "avx512bw-vpmovw2m-2.c"
+
+#undef AVX512F_LEN
+#undef AVX512F_LEN_HALF
+
+#define AVX512F_LEN 128
+#define AVX512F_LEN_HALF 128
+#include "avx512bw-vpmovw2m-2.c"
diff --git a/gcc/testsuite/gcc.target/i386/avx512vl-vpmovwb-2.c b/gcc/testsuite/gcc.target/i386/avx512vl-vpmovwb-2.c
new file mode 100644
index 00000000000..ed7c246d7f1
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/avx512vl-vpmovwb-2.c
@@ -0,0 +1,14 @@
+/* { dg-do run } */
+/* { dg-options "-O2 -mavx512bw -mavx512vl -DAVX512VL" } */
+/* { dg-require-effective-target avx512vl } */
+
+#define AVX512F_LEN 256
+#define AVX512F_LEN_HALF 128
+#include "avx512bw-vpmovwb-2.c"
+
+#undef AVX512F_LEN
+#undef AVX512F_LEN_HALF
+
+#define AVX512F_LEN 128
+#define AVX512F_LEN_HALF 128
+#include "avx512bw-vpmovwb-2.c"
diff --git a/gcc/testsuite/gcc.target/i386/avx512vl-vpmovzxbd-1.c b/gcc/testsuite/gcc.target/i386/avx512vl-vpmovzxbd-1.c
new file mode 100644
index 00000000000..23d6ed138b8
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/avx512vl-vpmovzxbd-1.c
@@ -0,0 +1,22 @@
+/* { dg-do compile } */
+/* { dg-options "-mavx512vl -O2" } */
+/* { dg-final { scan-assembler-times "vpmovzxbd\[ \\t\]+\[^\n\]*%xmm\[0-9\]\[^\n\]*%ymm\[0-9\]\{%k\[1-7\]\}\[^\{\]" 1 } } */
+/* { dg-final { scan-assembler-times "vpmovzxbd\[ \\t\]+\[^\n\]*%xmm\[0-9\]\[^\n\]*%xmm\[0-9\]\{%k\[1-7\]\}\[^\{\]" 1 } } */
+/* { dg-final { scan-assembler-times "vpmovzxbd\[ \\t\]+\[^\n\]*%xmm\[0-9\]\[^\n\]*%ymm\[0-9\]\{%k\[1-7\]\}\{z\}" 1 } } */
+/* { dg-final { scan-assembler-times "vpmovzxbd\[ \\t\]+\[^\n\]*%xmm\[0-9\]\[^\n\]*%xmm\[0-9\]\{%k\[1-7\]\}\{z\}" 1 } } */
+
+#include <immintrin.h>
+
+volatile __m128i s, res2;
+volatile __m256i res1;
+volatile __mmask8 m;
+
+void extern
+avx512vl_test (void)
+{
+ res1 = _mm256_mask_cvtepu8_epi32 (res1, m, s);
+ res2 = _mm_mask_cvtepu8_epi32 (res2, m, s);
+
+ res1 = _mm256_maskz_cvtepu8_epi32 (m, s);
+ res2 = _mm_maskz_cvtepu8_epi32 (m, s);
+}
diff --git a/gcc/testsuite/gcc.target/i386/avx512vl-vpmovzxbd-2.c b/gcc/testsuite/gcc.target/i386/avx512vl-vpmovzxbd-2.c
new file mode 100644
index 00000000000..db10b72ca1a
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/avx512vl-vpmovzxbd-2.c
@@ -0,0 +1,14 @@
+/* { dg-do run } */
+/* { dg-options "-O2 -mavx512vl -DAVX512VL" } */
+/* { dg-require-effective-target avx512vl } */
+
+#define AVX512F_LEN 256
+#define AVX512F_LEN_HALF 128
+#include "avx512f-vpmovzxbd-2.c"
+
+#undef AVX512F_LEN
+#undef AVX512F_LEN_HALF
+
+#define AVX512F_LEN 128
+#define AVX512F_LEN_HALF 128
+#include "avx512f-vpmovzxbd-2.c"
diff --git a/gcc/testsuite/gcc.target/i386/avx512vl-vpmovzxbq-1.c b/gcc/testsuite/gcc.target/i386/avx512vl-vpmovzxbq-1.c
new file mode 100644
index 00000000000..af642114afb
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/avx512vl-vpmovzxbq-1.c
@@ -0,0 +1,22 @@
+/* { dg-do compile } */
+/* { dg-options "-mavx512vl -O2" } */
+/* { dg-final { scan-assembler-times "vpmovzxbq\[ \\t\]+\[^\n\]*%xmm\[0-9\]\[^\n\]*%ymm\[0-9\]\{%k\[1-7\]\}\[^\{\]" 1 } } */
+/* { dg-final { scan-assembler-times "vpmovzxbq\[ \\t\]+\[^\n\]*%xmm\[0-9\]\[^\n\]*%xmm\[0-9\]\{%k\[1-7\]\}\[^\{\]" 1 } } */
+/* { dg-final { scan-assembler-times "vpmovzxbq\[ \\t\]+\[^\n\]*%xmm\[0-9\]\[^\n\]*%ymm\[0-9\]\{%k\[1-7\]\}\{z\}" 1 } } */
+/* { dg-final { scan-assembler-times "vpmovzxbq\[ \\t\]+\[^\n\]*%xmm\[0-9\]\[^\n\]*%xmm\[0-9\]\{%k\[1-7\]\}\{z\}" 1 } } */
+
+#include <immintrin.h>
+
+volatile __m128i s, res2;
+volatile __m256i res1;
+volatile __mmask8 m;
+
+void extern
+avx512vl_test (void)
+{
+ res1 = _mm256_mask_cvtepu8_epi64 (res1, m, s);
+ res2 = _mm_mask_cvtepu8_epi64 (res2, m, s);
+
+ res1 = _mm256_maskz_cvtepu8_epi64 (m, s);
+ res2 = _mm_maskz_cvtepu8_epi64 (m, s);
+}
diff --git a/gcc/testsuite/gcc.target/i386/avx512vl-vpmovzxbq-2.c b/gcc/testsuite/gcc.target/i386/avx512vl-vpmovzxbq-2.c
new file mode 100644
index 00000000000..bd193327563
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/avx512vl-vpmovzxbq-2.c
@@ -0,0 +1,14 @@
+/* { dg-do run } */
+/* { dg-options "-O2 -mavx512vl -DAVX512VL" } */
+/* { dg-require-effective-target avx512vl } */
+
+#define AVX512F_LEN 256
+#define AVX512F_LEN_HALF 128
+#include "avx512f-vpmovzxbq-2.c"
+
+#undef AVX512F_LEN
+#undef AVX512F_LEN_HALF
+
+#define AVX512F_LEN 128
+#define AVX512F_LEN_HALF 128
+#include "avx512f-vpmovzxbq-2.c"
diff --git a/gcc/testsuite/gcc.target/i386/avx512vl-vpmovzxbw-2.c b/gcc/testsuite/gcc.target/i386/avx512vl-vpmovzxbw-2.c
new file mode 100644
index 00000000000..c24ebf7fb69
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/avx512vl-vpmovzxbw-2.c
@@ -0,0 +1,14 @@
+/* { dg-do run } */
+/* { dg-options "-O2 -mavx512bw -mavx512vl -DAVX512VL" } */
+/* { dg-require-effective-target avx512vl } */
+
+#define AVX512F_LEN 256
+#define AVX512F_LEN_HALF 128
+#include "avx512bw-vpmovzxbw-2.c"
+
+#undef AVX512F_LEN
+#undef AVX512F_LEN_HALF
+
+#define AVX512F_LEN 128
+#define AVX512F_LEN_HALF 128
+#include "avx512bw-vpmovzxbw-2.c"
diff --git a/gcc/testsuite/gcc.target/i386/avx512vl-vpmovzxdq-1.c b/gcc/testsuite/gcc.target/i386/avx512vl-vpmovzxdq-1.c
new file mode 100644
index 00000000000..4695b5545cf
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/avx512vl-vpmovzxdq-1.c
@@ -0,0 +1,22 @@
+/* { dg-do compile } */
+/* { dg-options "-mavx512vl -O2" } */
+/* { dg-final { scan-assembler-times "vpmovzxdq\[ \\t\]+\[^\n\]*%xmm\[0-9\]\[^\n\]*%ymm\[0-9\]\{%k\[1-7\]\}\[^\{\]" 1 } } */
+/* { dg-final { scan-assembler-times "vpmovzxdq\[ \\t\]+\[^\n\]*%xmm\[0-9\]\[^\n\]*%xmm\[0-9\]\{%k\[1-7\]\}\[^\{\]" 1 } } */
+/* { dg-final { scan-assembler-times "vpmovzxdq\[ \\t\]+\[^\n\]*%xmm\[0-9\]\[^\n\]*%ymm\[0-9\]\{%k\[1-7\]\}\{z\}" 1 } } */
+/* { dg-final { scan-assembler-times "vpmovzxdq\[ \\t\]+\[^\n\]*%xmm\[0-9\]\[^\n\]*%xmm\[0-9\]\{%k\[1-7\]\}\{z\}" 1 } } */
+
+#include <immintrin.h>
+
+volatile __m128i s, res2;
+volatile __m256i res1;
+volatile __mmask8 m;
+
+void extern
+avx512vl_test (void)
+{
+ res1 = _mm256_mask_cvtepu32_epi64 (res1, m, s);
+ res2 = _mm_mask_cvtepu32_epi64 (res2, m, s);
+
+ res1 = _mm256_maskz_cvtepu32_epi64 (m, s);
+ res2 = _mm_maskz_cvtepu32_epi64 (m, s);
+}
diff --git a/gcc/testsuite/gcc.target/i386/avx512vl-vpmovzxdq-2.c b/gcc/testsuite/gcc.target/i386/avx512vl-vpmovzxdq-2.c
new file mode 100644
index 00000000000..c351c22aaa7
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/avx512vl-vpmovzxdq-2.c
@@ -0,0 +1,14 @@
+/* { dg-do run } */
+/* { dg-options "-O2 -mavx512vl -DAVX512VL" } */
+/* { dg-require-effective-target avx512vl } */
+
+#define AVX512F_LEN 256
+#define AVX512F_LEN_HALF 128
+#include "avx512f-vpmovzxdq-2.c"
+
+#undef AVX512F_LEN
+#undef AVX512F_LEN_HALF
+
+#define AVX512F_LEN 128
+#define AVX512F_LEN_HALF 128
+#include "avx512f-vpmovzxdq-2.c"
diff --git a/gcc/testsuite/gcc.target/i386/avx512vl-vpmovzxwd-1.c b/gcc/testsuite/gcc.target/i386/avx512vl-vpmovzxwd-1.c
new file mode 100644
index 00000000000..b18d2fa668a
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/avx512vl-vpmovzxwd-1.c
@@ -0,0 +1,22 @@
+/* { dg-do compile } */
+/* { dg-options "-mavx512vl -O2" } */
+/* { dg-final { scan-assembler-times "vpmovzxwd\[ \\t\]+\[^\n\]*%xmm\[0-9\]\[^\n\]*%ymm\[0-9\]\{%k\[1-7\]\}\[^\{\]" 1 } } */
+/* { dg-final { scan-assembler-times "vpmovzxwd\[ \\t\]+\[^\n\]*%xmm\[0-9\]\[^\n\]*%xmm\[0-9\]\{%k\[1-7\]\}\[^\{\]" 1 } } */
+/* { dg-final { scan-assembler-times "vpmovzxwd\[ \\t\]+\[^\n\]*%xmm\[0-9\]\[^\n\]*%ymm\[0-9\]\{%k\[1-7\]\}\{z\}" 1 } } */
+/* { dg-final { scan-assembler-times "vpmovzxwd\[ \\t\]+\[^\n\]*%xmm\[0-9\]\[^\n\]*%xmm\[0-9\]\{%k\[1-7\]\}\{z\}" 1 } } */
+
+#include <immintrin.h>
+
+volatile __m128i s, res2;
+volatile __m256i res1;
+volatile __mmask8 m;
+
+void extern
+avx512vl_test (void)
+{
+ res1 = _mm256_mask_cvtepu16_epi32 (res1, m, s);
+ res2 = _mm_mask_cvtepu16_epi32 (res2, m, s);
+
+ res1 = _mm256_maskz_cvtepu16_epi32 (m, s);
+ res2 = _mm_maskz_cvtepu16_epi32 (m, s);
+}
diff --git a/gcc/testsuite/gcc.target/i386/avx512vl-vpmovzxwd-2.c b/gcc/testsuite/gcc.target/i386/avx512vl-vpmovzxwd-2.c
new file mode 100644
index 00000000000..a8eef3e88b1
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/avx512vl-vpmovzxwd-2.c
@@ -0,0 +1,14 @@
+/* { dg-do run } */
+/* { dg-options "-O2 -mavx512vl -DAVX512VL" } */
+/* { dg-require-effective-target avx512vl } */
+
+#define AVX512F_LEN 256
+#define AVX512F_LEN_HALF 128
+#include "avx512f-vpmovzxwd-2.c"
+
+#undef AVX512F_LEN
+#undef AVX512F_LEN_HALF
+
+#define AVX512F_LEN 128
+#define AVX512F_LEN_HALF 128
+#include "avx512f-vpmovzxwd-2.c"
diff --git a/gcc/testsuite/gcc.target/i386/avx512vl-vpmovzxwq-1.c b/gcc/testsuite/gcc.target/i386/avx512vl-vpmovzxwq-1.c
new file mode 100644
index 00000000000..4e624109f95
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/avx512vl-vpmovzxwq-1.c
@@ -0,0 +1,22 @@
+/* { dg-do compile } */
+/* { dg-options "-mavx512vl -O2" } */
+/* { dg-final { scan-assembler-times "vpmovzxwq\[ \\t\]+\[^\n\]*%xmm\[0-9\]\[^\n\]*%ymm\[0-9\]\{%k\[1-7\]\}\[^\{\]" 1 } } */
+/* { dg-final { scan-assembler-times "vpmovzxwq\[ \\t\]+\[^\n\]*%xmm\[0-9\]\[^\n\]*%xmm\[0-9\]\{%k\[1-7\]\}\[^\{\]" 1 } } */
+/* { dg-final { scan-assembler-times "vpmovzxwq\[ \\t\]+\[^\n\]*%xmm\[0-9\]\[^\n\]*%ymm\[0-9\]\{%k\[1-7\]\}\{z\}" 1 } } */
+/* { dg-final { scan-assembler-times "vpmovzxwq\[ \\t\]+\[^\n\]*%xmm\[0-9\]\[^\n\]*%xmm\[0-9\]\{%k\[1-7\]\}\{z\}" 1 } } */
+
+#include <immintrin.h>
+
+volatile __m128i s, res2;
+volatile __m256i res1;
+volatile __mmask8 m;
+
+void extern
+avx512vl_test (void)
+{
+ res1 = _mm256_mask_cvtepu16_epi64 (res1, m, s);
+ res2 = _mm_mask_cvtepu16_epi64 (res2, m, s);
+
+ res1 = _mm256_maskz_cvtepu16_epi64 (m, s);
+ res2 = _mm_maskz_cvtepu16_epi64 (m, s);
+}
diff --git a/gcc/testsuite/gcc.target/i386/avx512vl-vpmovzxwq-2.c b/gcc/testsuite/gcc.target/i386/avx512vl-vpmovzxwq-2.c
new file mode 100644
index 00000000000..1be6cb5b9b3
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/avx512vl-vpmovzxwq-2.c
@@ -0,0 +1,14 @@
+/* { dg-do run } */
+/* { dg-options "-O2 -mavx512vl -DAVX512VL" } */
+/* { dg-require-effective-target avx512vl } */
+
+#define AVX512F_LEN 256
+#define AVX512F_LEN_HALF 128
+#include "avx512f-vpmovzxwq-2.c"
+
+#undef AVX512F_LEN
+#undef AVX512F_LEN_HALF
+
+#define AVX512F_LEN 128
+#define AVX512F_LEN_HALF 128
+#include "avx512f-vpmovzxwq-2.c"
diff --git a/gcc/testsuite/gcc.target/i386/avx512vl-vpmuldq-1.c b/gcc/testsuite/gcc.target/i386/avx512vl-vpmuldq-1.c
new file mode 100644
index 00000000000..9a57dd0614c
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/avx512vl-vpmuldq-1.c
@@ -0,0 +1,21 @@
+/* { dg-do compile } */
+/* { dg-options "-mavx512vl -O2" } */
+/* { dg-final { scan-assembler-times "vpmuldq\[ \\t\]+\[^\n\]*%ymm\[0-9\]\{%k\[1-7\]\}\[^\{\]" 1 } } */
+/* { dg-final { scan-assembler-times "vpmuldq\[ \\t\]+\[^\n\]*%ymm\[0-9\]\{%k\[1-7\]\}{z}" 1 } } */
+/* { dg-final { scan-assembler-times "vpmuldq\[ \\t\]+\[^\n\]*%xmm\[0-9\]\{%k\[1-7\]\}\[^\{\]" 1 } } */
+/* { dg-final { scan-assembler-times "vpmuldq\[ \\t\]+\[^\n\]*%xmm\[0-9\]\{%k\[1-7\]\}{z}" 1 } } */
+
+#include <immintrin.h>
+
+volatile __m256i x;
+volatile __m128i y;
+volatile __mmask8 m;
+
+void extern
+avx512vl_test (void)
+{
+ x = _mm256_mask_mul_epi32 (x, m, x, x);
+ x = _mm256_maskz_mul_epi32 (m, x, x);
+ y = _mm_mask_mul_epi32 (y, m, y, y);
+ y = _mm_maskz_mul_epi32 (m, y, y);
+}
diff --git a/gcc/testsuite/gcc.target/i386/avx512vl-vpmuldq-2.c b/gcc/testsuite/gcc.target/i386/avx512vl-vpmuldq-2.c
new file mode 100644
index 00000000000..7d2eea55d2c
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/avx512vl-vpmuldq-2.c
@@ -0,0 +1,14 @@
+/* { dg-do run } */
+/* { dg-options "-O2 -mavx512vl -DAVX512VL" } */
+/* { dg-require-effective-target avx512vl } */
+
+#define AVX512F_LEN 256
+#define AVX512F_LEN_HALF 128
+#include "avx512f-vpmuldq-2.c"
+
+#undef AVX512F_LEN
+#undef AVX512F_LEN_HALF
+
+#define AVX512F_LEN 128
+#define AVX512F_LEN_HALF 128
+#include "avx512f-vpmuldq-2.c"
diff --git a/gcc/testsuite/gcc.target/i386/avx512vl-vpmulhrsw-2.c b/gcc/testsuite/gcc.target/i386/avx512vl-vpmulhrsw-2.c
new file mode 100644
index 00000000000..39841f5d774
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/avx512vl-vpmulhrsw-2.c
@@ -0,0 +1,14 @@
+/* { dg-do run } */
+/* { dg-options "-O2 -mavx512bw -mavx512vl -DAVX512VL" } */
+/* { dg-require-effective-target avx512vl } */
+
+#define AVX512F_LEN 256
+#define AVX512F_LEN_HALF 128
+#include "avx512bw-vpmulhrsw-2.c"
+
+#undef AVX512F_LEN
+#undef AVX512F_LEN_HALF
+
+#define AVX512F_LEN 128
+#define AVX512F_LEN_HALF 128
+#include "avx512bw-vpmulhrsw-2.c"
diff --git a/gcc/testsuite/gcc.target/i386/avx512vl-vpmulhuw-2.c b/gcc/testsuite/gcc.target/i386/avx512vl-vpmulhuw-2.c
new file mode 100644
index 00000000000..5f2dcb9eecd
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/avx512vl-vpmulhuw-2.c
@@ -0,0 +1,14 @@
+/* { dg-do run } */
+/* { dg-options "-O2 -mavx512bw -mavx512vl -DAVX512VL" } */
+/* { dg-require-effective-target avx512vl } */
+
+#define AVX512F_LEN 256
+#define AVX512F_LEN_HALF 128
+#include "avx512bw-vpmulhuw-2.c"
+
+#undef AVX512F_LEN
+#undef AVX512F_LEN_HALF
+
+#define AVX512F_LEN 128
+#define AVX512F_LEN_HALF 128
+#include "avx512bw-vpmulhuw-2.c"
diff --git a/gcc/testsuite/gcc.target/i386/avx512vl-vpmulhw-2.c b/gcc/testsuite/gcc.target/i386/avx512vl-vpmulhw-2.c
new file mode 100644
index 00000000000..669ee7c98fe
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/avx512vl-vpmulhw-2.c
@@ -0,0 +1,14 @@
+/* { dg-do run } */
+/* { dg-options "-O2 -mavx512bw -mavx512vl -DAVX512VL" } */
+/* { dg-require-effective-target avx512vl } */
+
+#define AVX512F_LEN 256
+#define AVX512F_LEN_HALF 128
+#include "avx512bw-vpmulhw-2.c"
+
+#undef AVX512F_LEN
+#undef AVX512F_LEN_HALF
+
+#define AVX512F_LEN 128
+#define AVX512F_LEN_HALF 128
+#include "avx512bw-vpmulhw-2.c"
diff --git a/gcc/testsuite/gcc.target/i386/avx512vl-vpmulld-1.c b/gcc/testsuite/gcc.target/i386/avx512vl-vpmulld-1.c
new file mode 100644
index 00000000000..6cc59e5964b
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/avx512vl-vpmulld-1.c
@@ -0,0 +1,21 @@
+/* { dg-do compile } */
+/* { dg-options "-mavx512vl -O2" } */
+/* { dg-final { scan-assembler-times "vpmulld\[ \\t\]+\[^\n\]*%ymm\[0-9\]\{%k\[1-7\]\}\[^\{\]" 1 } } */
+/* { dg-final { scan-assembler-times "vpmulld\[ \\t\]+\[^\n\]*%ymm\[0-9\]\{%k\[1-7\]\}\[^\{\]" 1 } } */
+/* { dg-final { scan-assembler-times "vpmulld\[ \\t\]+\[^\n\]*%xmm\[0-9\]\{%k\[1-7\]\}{z}" 1 } } */
+/* { dg-final { scan-assembler-times "vpmulld\[ \\t\]+\[^\n\]*%xmm\[0-9\]\{%k\[1-7\]\}{z}" 1 } } */
+
+#include <immintrin.h>
+
+volatile __m256i y;
+volatile __m128i z;
+volatile __mmask8 myz;
+
+void extern
+avx512vl_test (void)
+{
+ y = _mm256_mask_mullo_epi32 (y, myz, y, y);
+ y = _mm256_maskz_mullo_epi32 (myz, y, y);
+ z = _mm_mask_mullo_epi32 (z, myz, z, z);
+ z = _mm_maskz_mullo_epi32 (myz, z, z);
+}
diff --git a/gcc/testsuite/gcc.target/i386/avx512vl-vpmulld-2.c b/gcc/testsuite/gcc.target/i386/avx512vl-vpmulld-2.c
new file mode 100644
index 00000000000..1ea4456a9f2
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/avx512vl-vpmulld-2.c
@@ -0,0 +1,14 @@
+/* { dg-do run } */
+/* { dg-options "-O2 -mavx512vl -DAVX512VL" } */
+/* { dg-require-effective-target avx512vl } */
+
+#define AVX512F_LEN 256
+#define AVX512F_LEN_HALF 128
+#include "avx512f-vpmulld-2.c"
+
+#undef AVX512F_LEN
+#undef AVX512F_LEN_HALF
+
+#define AVX512F_LEN 128
+#define AVX512F_LEN_HALF 128
+#include "avx512f-vpmulld-2.c"
diff --git a/gcc/testsuite/gcc.target/i386/avx512vl-vpmullq-2.c b/gcc/testsuite/gcc.target/i386/avx512vl-vpmullq-2.c
new file mode 100644
index 00000000000..36a77b133cb
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/avx512vl-vpmullq-2.c
@@ -0,0 +1,14 @@
+/* { dg-do run } */
+/* { dg-options "-O2 -mavx512dq -mavx512vl -DAVX512VL" } */
+/* { dg-require-effective-target avx512vl } */
+
+#define AVX512F_LEN 256
+#define AVX512F_LEN_HALF 128
+#include "avx512dq-vpmullq-2.c"
+
+#undef AVX512F_LEN
+#undef AVX512F_LEN_HALF
+
+#define AVX512F_LEN 128
+#define AVX512F_LEN_HALF 128
+#include "avx512dq-vpmullq-2.c"
diff --git a/gcc/testsuite/gcc.target/i386/avx512vl-vpmullw-2.c b/gcc/testsuite/gcc.target/i386/avx512vl-vpmullw-2.c
new file mode 100644
index 00000000000..f01b3c97986
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/avx512vl-vpmullw-2.c
@@ -0,0 +1,14 @@
+/* { dg-do run } */
+/* { dg-options "-O2 -mavx512bw -mavx512vl -DAVX512VL" } */
+/* { dg-require-effective-target avx512vl } */
+
+#define AVX512F_LEN 256
+#define AVX512F_LEN_HALF 128
+#include "avx512bw-vpmullw-2.c"
+
+#undef AVX512F_LEN
+#undef AVX512F_LEN_HALF
+
+#define AVX512F_LEN 128
+#define AVX512F_LEN_HALF 128
+#include "avx512bw-vpmullw-2.c"
diff --git a/gcc/testsuite/gcc.target/i386/avx512vl-vpmuludq-1.c b/gcc/testsuite/gcc.target/i386/avx512vl-vpmuludq-1.c
new file mode 100644
index 00000000000..dfe0e2a12af
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/avx512vl-vpmuludq-1.c
@@ -0,0 +1,20 @@
+/* { dg-options "-mavx512vl -O2" } */
+/* { dg-final { scan-assembler-times "vpmuludq\[ \\t\]+\[^\n\]*%ymm\[0-9\]\{%k\[1-7\]\}\[^\{\]" 1 } } */
+/* { dg-final { scan-assembler-times "vpmuludq\[ \\t\]+\[^\n\]*%ymm\[0-9\]\{%k\[1-7\]\}{z}" 1 } } */
+/* { dg-final { scan-assembler-times "vpmuludq\[ \\t\]+\[^\n\]*%xmm\[0-9\]\{%k\[1-7\]\}\[^\{\]" 1 } } */
+/* { dg-final { scan-assembler-times "vpmuludq\[ \\t\]+\[^\n\]*%xmm\[0-9\]\{%k\[1-7\]\}{z}" 1 } } */
+
+#include <immintrin.h>
+
+volatile __m256i x;
+volatile __m128i y;
+volatile __mmask8 m;
+
+void extern
+avx512vl_test (void)
+{
+ x = _mm256_mask_mul_epu32 (x, m, x, x);
+ x = _mm256_maskz_mul_epu32 (m, x, x);
+ y = _mm_mask_mul_epu32 (y, m, y, y);
+ y = _mm_maskz_mul_epu32 (m, y, y);
+}
diff --git a/gcc/testsuite/gcc.target/i386/avx512vl-vpmuludq-2.c b/gcc/testsuite/gcc.target/i386/avx512vl-vpmuludq-2.c
new file mode 100644
index 00000000000..cd2b3e6da55
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/avx512vl-vpmuludq-2.c
@@ -0,0 +1,14 @@
+/* { dg-do run } */
+/* { dg-options "-O2 -mavx512vl -DAVX512VL" } */
+/* { dg-require-effective-target avx512vl } */
+
+#define AVX512F_LEN 256
+#define AVX512F_LEN_HALF 128
+#include "avx512f-vpmuludq-2.c"
+
+#undef AVX512F_LEN
+#undef AVX512F_LEN_HALF
+
+#define AVX512F_LEN 128
+#define AVX512F_LEN_HALF 128
+#include "avx512f-vpmuludq-2.c"
diff --git a/gcc/testsuite/gcc.target/i386/avx512vl-vpord-1.c b/gcc/testsuite/gcc.target/i386/avx512vl-vpord-1.c
new file mode 100644
index 00000000000..01a0aa67dae
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/avx512vl-vpord-1.c
@@ -0,0 +1,24 @@
+/* { dg-do compile } */
+/* { dg-options "-mavx512vl -O2" } */
+/* { dg-final { scan-assembler-times "vpord\[ \\t\]+\[^\n\]*%ymm\[0-9\]\[^\{\]" 2 } } */
+/* { dg-final { scan-assembler-times "vpord\[ \\t\]+\[^\n\]*%ymm\[0-9\]\{%k\[1-7\]\}\[^\{\]" 1 } } */
+/* { dg-final { scan-assembler-times "vpord\[ \\t\]+\[^\n\]*%ymm\[0-9\]\{%k\[1-7\]\}\{z\}" 1 } } */
+/* { dg-final { scan-assembler-times "vpord\[ \\t\]+\[^\n\]*%xmm\[0-9\]\[^\{\]" 2 } } */
+/* { dg-final { scan-assembler-times "vpord\[ \\t\]+\[^\n\]*%xmm\[0-9\]\{%k\[1-7\]\}\[^\{\]" 1 } } */
+/* { dg-final { scan-assembler-times "vpord\[ \\t\]+\[^\n\]*%xmm\[0-9\]\{%k\[1-7\]\}\{z\}" 1 } } */
+
+#include <immintrin.h>
+
+volatile __m256i y;
+volatile __m128i x;
+volatile __mmask8 m;
+
+void extern
+avx512vl_test (void)
+{
+ y = _mm256_mask_or_epi32 (y, m, y, y);
+ y = _mm256_maskz_or_epi32 (m, y, y);
+
+ x = _mm_mask_or_epi32 (x, m, x, x);
+ x = _mm_maskz_or_epi32 (m, x, x);
+}
diff --git a/gcc/testsuite/gcc.target/i386/avx512vl-vpord-2.c b/gcc/testsuite/gcc.target/i386/avx512vl-vpord-2.c
new file mode 100644
index 00000000000..c81e1f763ff
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/avx512vl-vpord-2.c
@@ -0,0 +1,14 @@
+/* { dg-do run } */
+/* { dg-options "-O2 -mavx512vl -DAVX512VL" } */
+/* { dg-require-effective-target avx512vl } */
+
+#define AVX512F_LEN 256
+#define AVX512F_LEN_HALF 128
+#include "avx512f-vpord-2.c"
+
+#undef AVX512F_LEN
+#undef AVX512F_LEN_HALF
+
+#define AVX512F_LEN 128
+#define AVX512F_LEN_HALF 128
+#include "avx512f-vpord-2.c"
diff --git a/gcc/testsuite/gcc.target/i386/avx512vl-vporq-1.c b/gcc/testsuite/gcc.target/i386/avx512vl-vporq-1.c
new file mode 100644
index 00000000000..b9a89a32be1
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/avx512vl-vporq-1.c
@@ -0,0 +1,24 @@
+/* { dg-do compile } */
+/* { dg-options "-mavx512vl -O2" } */
+/* { dg-final { scan-assembler-times "vporq\[ \\t\]+\[^\n\]*%ymm\[0-9\]\[^\{\]" 2 } } */
+/* { dg-final { scan-assembler-times "vporq\[ \\t\]+\[^\n\]*%ymm\[0-9\]\{%k\[1-7\]\}\[^\{\]" 1 } } */
+/* { dg-final { scan-assembler-times "vporq\[ \\t\]+\[^\n\]*%ymm\[0-9\]\{%k\[1-7\]\}\{z\}" 1 } } */
+/* { dg-final { scan-assembler-times "vporq\[ \\t\]+\[^\n\]*%xmm\[0-9\]\[^\{\]" 2 } } */
+/* { dg-final { scan-assembler-times "vporq\[ \\t\]+\[^\n\]*%xmm\[0-9\]\{%k\[1-7\]\}\[^\{\]" 1 } } */
+/* { dg-final { scan-assembler-times "vporq\[ \\t\]+\[^\n\]*%xmm\[0-9\]\{%k\[1-7\]\}\{z\}" 1 } } */
+
+#include <immintrin.h>
+
+volatile __m256i y;
+volatile __m128i x;
+volatile __mmask8 m;
+
+void extern
+avx512vl_test (void)
+{
+ y = _mm256_mask_or_epi64 (y, m, y, y);
+ y = _mm256_maskz_or_epi64 (m, y, y);
+
+ x = _mm_mask_or_epi64 (x, m, x, x);
+ x = _mm_maskz_or_epi64 (m, x, x);
+}
diff --git a/gcc/testsuite/gcc.target/i386/avx512vl-vporq-2.c b/gcc/testsuite/gcc.target/i386/avx512vl-vporq-2.c
new file mode 100644
index 00000000000..776181fdba2
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/avx512vl-vporq-2.c
@@ -0,0 +1,14 @@
+/* { dg-do run } */
+/* { dg-options "-O2 -mavx512vl -DAVX512VL" } */
+/* { dg-require-effective-target avx512vl } */
+
+#define AVX512F_LEN 256
+#define AVX512F_LEN_HALF 128
+#include "avx512f-vporq-2.c"
+
+#undef AVX512F_LEN
+#undef AVX512F_LEN_HALF
+
+#define AVX512F_LEN 128
+#define AVX512F_LEN_HALF 128
+#include "avx512f-vporq-2.c"
diff --git a/gcc/testsuite/gcc.target/i386/avx512vl-vprold-1.c b/gcc/testsuite/gcc.target/i386/avx512vl-vprold-1.c
new file mode 100644
index 00000000000..22cb7c13c4d
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/avx512vl-vprold-1.c
@@ -0,0 +1,26 @@
+/* { dg-do compile } */
+/* { dg-options "-mavx512vl -O2" } */
+/* { dg-final { scan-assembler-times "vprold\[ \\t\]+\[^\n\]*%ymm\[0-9\]\[^\{\]" 3 } } */
+/* { dg-final { scan-assembler-times "vprold\[ \\t\]+\[^\n\]*%ymm\[0-9\]\{%k\[1-7\]\}\[^\{\]" 1 } } */
+/* { dg-final { scan-assembler-times "vprold\[ \\t\]+\[^\n\]*%ymm\[0-9\]\{%k\[1-7\]\}\{z\}" 1 } } */
+/* { dg-final { scan-assembler-times "vprold\[ \\t\]+\[^\n\]*%xmm\[0-9\]\[^\{\]" 3 } } */
+/* { dg-final { scan-assembler-times "vprold\[ \\t\]+\[^\n\]*%xmm\[0-9\]\{%k\[1-7\]\}\[^\{\]" 1 } } */
+/* { dg-final { scan-assembler-times "vprold\[ \\t\]+\[^\n\]*%xmm\[0-9\]\{%k\[1-7\]\}\{z\}" 1 } } */
+
+#include <immintrin.h>
+
+volatile __m256i x;
+volatile __m128i y;
+volatile __mmask8 m;
+
+void extern
+avx512vl_test (void)
+{
+ x = _mm256_rol_epi32 (x, 11);
+ x = _mm256_mask_rol_epi32 (x, m, x, 11);
+ x = _mm256_maskz_rol_epi32 (m, x, 11);
+
+ y = _mm_rol_epi32 (y, 12);
+ y = _mm_mask_rol_epi32 (y, m, y, 12);
+ y = _mm_maskz_rol_epi32 (m, y, 12);
+}
diff --git a/gcc/testsuite/gcc.target/i386/avx512vl-vprold-2.c b/gcc/testsuite/gcc.target/i386/avx512vl-vprold-2.c
new file mode 100644
index 00000000000..4c7b4ab629f
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/avx512vl-vprold-2.c
@@ -0,0 +1,14 @@
+/* { dg-do run } */
+/* { dg-options "-O2 -mavx512vl -DAVX512VL" } */
+/* { dg-require-effective-target avx512vl } */
+
+#define AVX512F_LEN 256
+#define AVX512F_LEN_HALF 128
+#include "avx512f-vprold-2.c"
+
+#undef AVX512F_LEN
+#undef AVX512F_LEN_HALF
+
+#define AVX512F_LEN 128
+#define AVX512F_LEN_HALF 128
+#include "avx512f-vprold-2.c"
diff --git a/gcc/testsuite/gcc.target/i386/avx512vl-vprolq-1.c b/gcc/testsuite/gcc.target/i386/avx512vl-vprolq-1.c
new file mode 100644
index 00000000000..24ef525eee5
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/avx512vl-vprolq-1.c
@@ -0,0 +1,26 @@
+/* { dg-do compile } */
+/* { dg-options "-mavx512vl -O2" } */
+/* { dg-final { scan-assembler-times "vprolq\[ \\t\]+\[^\n\]*%ymm\[0-9\]\[^\{\]" 3 } } */
+/* { dg-final { scan-assembler-times "vprolq\[ \\t\]+\[^\n\]*%ymm\[0-9\]\{%k\[1-7\]\}\[^\{\]" 1 } } */
+/* { dg-final { scan-assembler-times "vprolq\[ \\t\]+\[^\n\]*%ymm\[0-9\]\{%k\[1-7\]\}\{z\}" 1 } } */
+/* { dg-final { scan-assembler-times "vprolq\[ \\t\]+\[^\n\]*%xmm\[0-9\]\[^\{\]" 3 } } */
+/* { dg-final { scan-assembler-times "vprolq\[ \\t\]+\[^\n\]*%xmm\[0-9\]\{%k\[1-7\]\}\[^\{\]" 1 } } */
+/* { dg-final { scan-assembler-times "vprolq\[ \\t\]+\[^\n\]*%xmm\[0-9\]\{%k\[1-7\]\}\{z\}" 1 } } */
+
+#include <immintrin.h>
+
+volatile __m256i x;
+volatile __m128i y;
+volatile __mmask8 m;
+
+void extern
+avx512vl_test (void)
+{
+ x = _mm256_rol_epi64 (x, 11);
+ x = _mm256_mask_rol_epi64 (x, m, x, 11);
+ x = _mm256_maskz_rol_epi64 (m, x, 11);
+
+ y = _mm_rol_epi64 (y, 12);
+ y = _mm_mask_rol_epi64 (y, m, y, 12);
+ y = _mm_maskz_rol_epi64 (m, y, 12);
+}
diff --git a/gcc/testsuite/gcc.target/i386/avx512vl-vprolq-2.c b/gcc/testsuite/gcc.target/i386/avx512vl-vprolq-2.c
new file mode 100644
index 00000000000..cf1f98f2647
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/avx512vl-vprolq-2.c
@@ -0,0 +1,14 @@
+/* { dg-do run } */
+/* { dg-options "-O2 -mavx512vl -DAVX512VL" } */
+/* { dg-require-effective-target avx512vl } */
+
+#define AVX512F_LEN 256
+#define AVX512F_LEN_HALF 128
+#include "avx512f-vprolq-2.c"
+
+#undef AVX512F_LEN
+#undef AVX512F_LEN_HALF
+
+#define AVX512F_LEN 128
+#define AVX512F_LEN_HALF 128
+#include "avx512f-vprolq-2.c"
diff --git a/gcc/testsuite/gcc.target/i386/avx512vl-vprolvd-1.c b/gcc/testsuite/gcc.target/i386/avx512vl-vprolvd-1.c
new file mode 100644
index 00000000000..2f6b60451f6
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/avx512vl-vprolvd-1.c
@@ -0,0 +1,26 @@
+/* { dg-do compile } */
+/* { dg-options "-mavx512vl -O2" } */
+/* { dg-final { scan-assembler-times "vprolvd\[ \\t\]+\[^\n\]*%ymm\[0-9\]\[^\{\]" 3 } } */
+/* { dg-final { scan-assembler-times "vprolvd\[ \\t\]+\[^\n\]*%ymm\[0-9\]\{%k\[1-7\]\}\[^\{\]" 1 } } */
+/* { dg-final { scan-assembler-times "vprolvd\[ \\t\]+\[^\n\]*%ymm\[0-9\]\{%k\[1-7\]\}\{z\}" 1 } } */
+/* { dg-final { scan-assembler-times "vprolvd\[ \\t\]+\[^\n\]*%xmm\[0-9\]\[^\{\]" 3 } } */
+/* { dg-final { scan-assembler-times "vprolvd\[ \\t\]+\[^\n\]*%xmm\[0-9\]\{%k\[1-7\]\}\[^\{\]" 1 } } */
+/* { dg-final { scan-assembler-times "vprolvd\[ \\t\]+\[^\n\]*%xmm\[0-9\]\{%k\[1-7\]\}\{z\}" 1 } } */
+
+#include <immintrin.h>
+
+volatile __m256i x;
+volatile __m128i y;
+volatile __mmask8 m;
+
+void extern
+avx512vl_test (void)
+{
+ x = _mm256_rolv_epi32 (x, x);
+ x = _mm256_mask_rolv_epi32 (x, m, x, x);
+ x = _mm256_maskz_rolv_epi32 (m, x, x);
+
+ y = _mm_rolv_epi32 (y, y);
+ y = _mm_mask_rolv_epi32 (y, m, y, y);
+ y = _mm_maskz_rolv_epi32 (m, y, y);
+}
diff --git a/gcc/testsuite/gcc.target/i386/avx512vl-vprolvd-2.c b/gcc/testsuite/gcc.target/i386/avx512vl-vprolvd-2.c
new file mode 100644
index 00000000000..0f31644b0fb
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/avx512vl-vprolvd-2.c
@@ -0,0 +1,14 @@
+/* { dg-do run } */
+/* { dg-options "-O2 -mavx512vl -DAVX512VL" } */
+/* { dg-require-effective-target avx512vl } */
+
+#define AVX512F_LEN 256
+#define AVX512F_LEN_HALF 128
+#include "avx512f-vprolvd-2.c"
+
+#undef AVX512F_LEN
+#undef AVX512F_LEN_HALF
+
+#define AVX512F_LEN 128
+#define AVX512F_LEN_HALF 128
+#include "avx512f-vprolvd-2.c"
diff --git a/gcc/testsuite/gcc.target/i386/avx512vl-vprolvq-1.c b/gcc/testsuite/gcc.target/i386/avx512vl-vprolvq-1.c
new file mode 100644
index 00000000000..9a4e68084bf
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/avx512vl-vprolvq-1.c
@@ -0,0 +1,26 @@
+/* { dg-do compile } */
+/* { dg-options "-mavx512vl -O2" } */
+/* { dg-final { scan-assembler-times "vprolvq\[ \\t\]+\[^\n\]*%ymm\[0-9\]\[^\{\]" 3 } } */
+/* { dg-final { scan-assembler-times "vprolvq\[ \\t\]+\[^\n\]*%ymm\[0-9\]\{%k\[1-7\]\}\[^\{\]" 1 } } */
+/* { dg-final { scan-assembler-times "vprolvq\[ \\t\]+\[^\n\]*%ymm\[0-9\]\{%k\[1-7\]\}\{z\}" 1 } } */
+/* { dg-final { scan-assembler-times "vprolvq\[ \\t\]+\[^\n\]*%xmm\[0-9\]\[^\{\]" 3 } } */
+/* { dg-final { scan-assembler-times "vprolvq\[ \\t\]+\[^\n\]*%xmm\[0-9\]\{%k\[1-7\]\}\[^\{\]" 1 } } */
+/* { dg-final { scan-assembler-times "vprolvq\[ \\t\]+\[^\n\]*%xmm\[0-9\]\{%k\[1-7\]\}\{z\}" 1 } } */
+
+#include <immintrin.h>
+
+volatile __m256i x;
+volatile __m128i y;
+volatile __mmask8 m;
+
+void extern
+avx512vl_test (void)
+{
+ x = _mm256_rolv_epi64 (x, x);
+ x = _mm256_mask_rolv_epi64 (x, m, x, x);
+ x = _mm256_maskz_rolv_epi64 (m, x, x);
+
+ y = _mm_rolv_epi64 (y, y);
+ y = _mm_mask_rolv_epi64 (y, m, y, y);
+ y = _mm_maskz_rolv_epi64 (m, y, y);
+}
diff --git a/gcc/testsuite/gcc.target/i386/avx512vl-vprolvq-2.c b/gcc/testsuite/gcc.target/i386/avx512vl-vprolvq-2.c
new file mode 100644
index 00000000000..b203c9fdd5a
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/avx512vl-vprolvq-2.c
@@ -0,0 +1,14 @@
+/* { dg-do run } */
+/* { dg-options "-O2 -mavx512vl -DAVX512VL" } */
+/* { dg-require-effective-target avx512vl } */
+
+#define AVX512F_LEN 256
+#define AVX512F_LEN_HALF 128
+#include "avx512f-vprolvq-2.c"
+
+#undef AVX512F_LEN
+#undef AVX512F_LEN_HALF
+
+#define AVX512F_LEN 128
+#define AVX512F_LEN_HALF 128
+#include "avx512f-vprolvq-2.c"
diff --git a/gcc/testsuite/gcc.target/i386/avx512vl-vprord-1.c b/gcc/testsuite/gcc.target/i386/avx512vl-vprord-1.c
new file mode 100644
index 00000000000..e9fad895877
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/avx512vl-vprord-1.c
@@ -0,0 +1,26 @@
+/* { dg-do compile } */
+/* { dg-options "-mavx512vl -O2" } */
+/* { dg-final { scan-assembler-times "vprord\[ \\t\]+\[^\n\]*%ymm\[0-9\]\[^\{\]" 3 } } */
+/* { dg-final { scan-assembler-times "vprord\[ \\t\]+\[^\n\]*%ymm\[0-9\]\{%k\[1-7\]\}\[^\{\]" 1 } } */
+/* { dg-final { scan-assembler-times "vprord\[ \\t\]+\[^\n\]*%ymm\[0-9\]\{%k\[1-7\]\}\{z\}" 1 } } */
+/* { dg-final { scan-assembler-times "vprord\[ \\t\]+\[^\n\]*%xmm\[0-9\]\[^\{\]" 3 } } */
+/* { dg-final { scan-assembler-times "vprord\[ \\t\]+\[^\n\]*%xmm\[0-9\]\{%k\[1-7\]\}\[^\{\]" 1 } } */
+/* { dg-final { scan-assembler-times "vprord\[ \\t\]+\[^\n\]*%xmm\[0-9\]\{%k\[1-7\]\}\{z\}" 1 } } */
+
+#include <immintrin.h>
+
+volatile __m256i x;
+volatile __m128i y;
+volatile __mmask8 m;
+
+void extern
+avx512vl_test (void)
+{
+ x = _mm256_ror_epi32 (x, 11);
+ x = _mm256_mask_ror_epi32 (x, m, x, 11);
+ x = _mm256_maskz_ror_epi32 (m, x, 11);
+
+ y = _mm_ror_epi32 (y, 12);
+ y = _mm_mask_ror_epi32 (y, m, y, 12);
+ y = _mm_maskz_ror_epi32 (m, y, 12);
+}
diff --git a/gcc/testsuite/gcc.target/i386/avx512vl-vprord-2.c b/gcc/testsuite/gcc.target/i386/avx512vl-vprord-2.c
new file mode 100644
index 00000000000..6ae0e3ba6bf
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/avx512vl-vprord-2.c
@@ -0,0 +1,14 @@
+/* { dg-do run } */
+/* { dg-options "-O2 -mavx512vl -DAVX512VL" } */
+/* { dg-require-effective-target avx512vl } */
+
+#define AVX512F_LEN 256
+#define AVX512F_LEN_HALF 128
+#include "avx512f-vprord-2.c"
+
+#undef AVX512F_LEN
+#undef AVX512F_LEN_HALF
+
+#define AVX512F_LEN 128
+#define AVX512F_LEN_HALF 128
+#include "avx512f-vprord-2.c"
diff --git a/gcc/testsuite/gcc.target/i386/avx512vl-vprorq-1.c b/gcc/testsuite/gcc.target/i386/avx512vl-vprorq-1.c
new file mode 100644
index 00000000000..1511ad60e64
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/avx512vl-vprorq-1.c
@@ -0,0 +1,26 @@
+/* { dg-do compile } */
+/* { dg-options "-mavx512vl -O2" } */
+/* { dg-final { scan-assembler-times "vprorq\[ \\t\]+\[^\n\]*%ymm\[0-9\]\[^\{\]" 3 } } */
+/* { dg-final { scan-assembler-times "vprorq\[ \\t\]+\[^\n\]*%ymm\[0-9\]\{%k\[1-7\]\}\[^\{\]" 1 } } */
+/* { dg-final { scan-assembler-times "vprorq\[ \\t\]+\[^\n\]*%ymm\[0-9\]\{%k\[1-7\]\}\{z\}" 1 } } */
+/* { dg-final { scan-assembler-times "vprorq\[ \\t\]+\[^\n\]*%xmm\[0-9\]\[^\{\]" 3 } } */
+/* { dg-final { scan-assembler-times "vprorq\[ \\t\]+\[^\n\]*%xmm\[0-9\]\{%k\[1-7\]\}\[^\{\]" 1 } } */
+/* { dg-final { scan-assembler-times "vprorq\[ \\t\]+\[^\n\]*%xmm\[0-9\]\{%k\[1-7\]\}\{z\}" 1 } } */
+
+#include <immintrin.h>
+
+volatile __m256i x;
+volatile __m128i y;
+volatile __mmask8 m;
+
+void extern
+avx512vl_test (void)
+{
+ x = _mm256_ror_epi64 (x, 11);
+ x = _mm256_mask_ror_epi64 (x, m, x, 11);
+ x = _mm256_maskz_ror_epi64 (m, x, 11);
+
+ y = _mm_ror_epi64 (y, 12);
+ y = _mm_mask_ror_epi64 (y, m, y, 12);
+ y = _mm_maskz_ror_epi64 (m, y, 12);
+}
diff --git a/gcc/testsuite/gcc.target/i386/avx512vl-vprorq-2.c b/gcc/testsuite/gcc.target/i386/avx512vl-vprorq-2.c
new file mode 100644
index 00000000000..83aacd134e6
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/avx512vl-vprorq-2.c
@@ -0,0 +1,14 @@
+/* { dg-do run } */
+/* { dg-options "-O2 -mavx512vl -DAVX512VL" } */
+/* { dg-require-effective-target avx512vl } */
+
+#define AVX512F_LEN 256
+#define AVX512F_LEN_HALF 128
+#include "avx512f-vprorq-2.c"
+
+#undef AVX512F_LEN
+#undef AVX512F_LEN_HALF
+
+#define AVX512F_LEN 128
+#define AVX512F_LEN_HALF 128
+#include "avx512f-vprorq-2.c"
diff --git a/gcc/testsuite/gcc.target/i386/avx512vl-vprorvd-1.c b/gcc/testsuite/gcc.target/i386/avx512vl-vprorvd-1.c
new file mode 100644
index 00000000000..1c383adf790
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/avx512vl-vprorvd-1.c
@@ -0,0 +1,26 @@
+/* { dg-do compile } */
+/* { dg-options "-mavx512vl -O2" } */
+/* { dg-final { scan-assembler-times "vprorvd\[ \\t\]+\[^\n\]*%ymm\[0-9\]\[^\{\]" 3 } } */
+/* { dg-final { scan-assembler-times "vprorvd\[ \\t\]+\[^\n\]*%ymm\[0-9\]\{%k\[1-7\]\}\[^\{\]" 1 } } */
+/* { dg-final { scan-assembler-times "vprorvd\[ \\t\]+\[^\n\]*%ymm\[0-9\]\{%k\[1-7\]\}\{z\}" 1 } } */
+/* { dg-final { scan-assembler-times "vprorvd\[ \\t\]+\[^\n\]*%xmm\[0-9\]\[^\{\]" 3 } } */
+/* { dg-final { scan-assembler-times "vprorvd\[ \\t\]+\[^\n\]*%xmm\[0-9\]\{%k\[1-7\]\}\[^\{\]" 1 } } */
+/* { dg-final { scan-assembler-times "vprorvd\[ \\t\]+\[^\n\]*%xmm\[0-9\]\{%k\[1-7\]\}\{z\}" 1 } } */
+
+#include <immintrin.h>
+
+volatile __m256i x;
+volatile __m128i y;
+volatile __mmask8 m;
+
+void extern
+avx512vl_test (void)
+{
+ x = _mm256_rorv_epi32 (x, x);
+ x = _mm256_mask_rorv_epi32 (x, m, x, x);
+ x = _mm256_maskz_rorv_epi32 (m, x, x);
+
+ y = _mm_rorv_epi32 (y, y);
+ y = _mm_mask_rorv_epi32 (y, m, y, y);
+ y = _mm_maskz_rorv_epi32 (m, y, y);
+}
diff --git a/gcc/testsuite/gcc.target/i386/avx512vl-vprorvd-2.c b/gcc/testsuite/gcc.target/i386/avx512vl-vprorvd-2.c
new file mode 100644
index 00000000000..373beebb2ab
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/avx512vl-vprorvd-2.c
@@ -0,0 +1,14 @@
+/* { dg-do run } */
+/* { dg-options "-O2 -mavx512vl -DAVX512VL" } */
+/* { dg-require-effective-target avx512vl } */
+
+#define AVX512F_LEN 256
+#define AVX512F_LEN_HALF 128
+#include "avx512f-vprorvd-2.c"
+
+#undef AVX512F_LEN
+#undef AVX512F_LEN_HALF
+
+#define AVX512F_LEN 128
+#define AVX512F_LEN_HALF 128
+#include "avx512f-vprorvd-2.c"
diff --git a/gcc/testsuite/gcc.target/i386/avx512vl-vprorvq-1.c b/gcc/testsuite/gcc.target/i386/avx512vl-vprorvq-1.c
new file mode 100644
index 00000000000..58435058035
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/avx512vl-vprorvq-1.c
@@ -0,0 +1,26 @@
+/* { dg-do compile } */
+/* { dg-options "-mavx512vl -O2" } */
+/* { dg-final { scan-assembler-times "vprorvq\[ \\t\]+\[^\n\]*%ymm\[0-9\]\[^\{\]" 3 } } */
+/* { dg-final { scan-assembler-times "vprorvq\[ \\t\]+\[^\n\]*%ymm\[0-9\]\{%k\[1-7\]\}\[^\{\]" 1 } } */
+/* { dg-final { scan-assembler-times "vprorvq\[ \\t\]+\[^\n\]*%ymm\[0-9\]\{%k\[1-7\]\}\{z\}" 1 } } */
+/* { dg-final { scan-assembler-times "vprorvq\[ \\t\]+\[^\n\]*%xmm\[0-9\]\[^\{\]" 3 } } */
+/* { dg-final { scan-assembler-times "vprorvq\[ \\t\]+\[^\n\]*%xmm\[0-9\]\{%k\[1-7\]\}\[^\{\]" 1 } } */
+/* { dg-final { scan-assembler-times "vprorvq\[ \\t\]+\[^\n\]*%xmm\[0-9\]\{%k\[1-7\]\}\{z\}" 1 } } */
+
+#include <immintrin.h>
+
+volatile __m256i x;
+volatile __m128i y;
+volatile __mmask8 m;
+
+void extern
+avx512vl_test (void)
+{
+ x = _mm256_rorv_epi64 (x, x);
+ x = _mm256_mask_rorv_epi64 (x, m, x, x);
+ x = _mm256_maskz_rorv_epi64 (m, x, x);
+
+ y = _mm_rorv_epi64 (y, y);
+ y = _mm_mask_rorv_epi64 (y, m, y, y);
+ y = _mm_maskz_rorv_epi64 (m, y, y);
+}
diff --git a/gcc/testsuite/gcc.target/i386/avx512vl-vprorvq-2.c b/gcc/testsuite/gcc.target/i386/avx512vl-vprorvq-2.c
new file mode 100644
index 00000000000..781b62b8e12
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/avx512vl-vprorvq-2.c
@@ -0,0 +1,14 @@
+/* { dg-do run } */
+/* { dg-options "-O2 -mavx512vl -DAVX512VL" } */
+/* { dg-require-effective-target avx512vl } */
+
+#define AVX512F_LEN 256
+#define AVX512F_LEN_HALF 128
+#include "avx512f-vprorvq-2.c"
+
+#undef AVX512F_LEN
+#undef AVX512F_LEN_HALF
+
+#define AVX512F_LEN 128
+#define AVX512F_LEN_HALF 128
+#include "avx512f-vprorvq-2.c"
diff --git a/gcc/testsuite/gcc.target/i386/avx512vl-vpsadbw-1.c b/gcc/testsuite/gcc.target/i386/avx512vl-vpsadbw-1.c
new file mode 100644
index 00000000000..5ee60602ab4
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/avx512vl-vpsadbw-1.c
@@ -0,0 +1,13 @@
+/* { dg-do compile } */
+/* { dg-options "-mavx512vl -mavx512bw -O2" } */
+/* { dg-final { scan-assembler "vpsadbw\[ \\t\]+\[^\n\]*%zmm\[0-9\]"} } */
+
+#include <immintrin.h>
+
+volatile __m512i x;
+
+void extern
+avx512vl_test (void)
+{
+ x = _mm512_sad_epu8 (x, x);
+}
diff --git a/gcc/testsuite/gcc.target/i386/avx512vl-vpshufb-2.c b/gcc/testsuite/gcc.target/i386/avx512vl-vpshufb-2.c
new file mode 100644
index 00000000000..85133e3a6c0
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/avx512vl-vpshufb-2.c
@@ -0,0 +1,14 @@
+/* { dg-do run } */
+/* { dg-options "-O2 -mavx512bw -mavx512vl -DAVX512VL" } */
+/* { dg-require-effective-target avx512vl } */
+
+#define AVX512F_LEN 256
+#define AVX512F_LEN_HALF 128
+#include "avx512bw-vpshufb-2.c"
+
+#undef AVX512F_LEN
+#undef AVX512F_LEN_HALF
+
+#define AVX512F_LEN 128
+#define AVX512F_LEN_HALF 128
+#include "avx512bw-vpshufb-2.c"
diff --git a/gcc/testsuite/gcc.target/i386/avx512vl-vpshufd-1.c b/gcc/testsuite/gcc.target/i386/avx512vl-vpshufd-1.c
new file mode 100644
index 00000000000..d7f6380be5e
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/avx512vl-vpshufd-1.c
@@ -0,0 +1,21 @@
+/* { dg-do compile } */
+/* { dg-options "-mavx512vl -O2" } */
+/* { dg-final { scan-assembler-times "vpshufd\[ \\t\]+\[^\n\]*%ymm\[0-9\]\{%k\[1-7\]\}\{z\}" 1 } } */
+/* { dg-final { scan-assembler-times "vpshufd\[ \\t\]+\[^\n\]*%ymm\[0-9\]\{%k\[1-7\]\}\[^\{\]" 1 } } */
+/* { dg-final { scan-assembler-times "vpshufd\[ \\t\]+\[^\n\]*%xmm\[0-9\]\{%k\[1-7\]\}\{z\}" 1 } } */
+/* { dg-final { scan-assembler-times "vpshufd\[ \\t\]+\[^\n\]*%xmm\[0-9\]\{%k\[1-7\]\}\[^\{\]" 1 } } */
+
+#include <immintrin.h>
+
+volatile __m256i x;
+volatile __m128i y;
+volatile __mmask8 m;
+
+void extern
+avx512vl_test (void)
+{
+ x = _mm256_mask_shuffle_epi32 (x, m, x, _MM_PERM_AADB);
+ x = _mm256_maskz_shuffle_epi32 (m, x, _MM_PERM_AADB);
+ y = _mm_mask_shuffle_epi32 (y, m, y, _MM_PERM_AADB);
+ y = _mm_maskz_shuffle_epi32 (m, y, _MM_PERM_AADB);
+}
diff --git a/gcc/testsuite/gcc.target/i386/avx512vl-vpshufd-2.c b/gcc/testsuite/gcc.target/i386/avx512vl-vpshufd-2.c
new file mode 100644
index 00000000000..54223fd4dd4
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/avx512vl-vpshufd-2.c
@@ -0,0 +1,14 @@
+/* { dg-do run } */
+/* { dg-options "-O2 -mavx512vl -DAVX512VL" } */
+/* { dg-require-effective-target avx512vl } */
+
+#define AVX512F_LEN 256
+#define AVX512F_LEN_HALF 128
+#include "avx512f-vpshufd-2.c"
+
+#undef AVX512F_LEN
+#undef AVX512F_LEN_HALF
+
+#define AVX512F_LEN 128
+#define AVX512F_LEN_HALF 128
+#include "avx512f-vpshufd-2.c"
diff --git a/gcc/testsuite/gcc.target/i386/avx512vl-vpshufhw-2.c b/gcc/testsuite/gcc.target/i386/avx512vl-vpshufhw-2.c
new file mode 100644
index 00000000000..a65ec081c1e
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/avx512vl-vpshufhw-2.c
@@ -0,0 +1,14 @@
+/* { dg-do run } */
+/* { dg-options "-O2 -mavx512bw -mavx512vl -DAVX512VL" } */
+/* { dg-require-effective-target avx512vl } */
+
+#define AVX512F_LEN 256
+#define AVX512F_LEN_HALF 128
+#include "avx512bw-vpshufhw-2.c"
+
+#undef AVX512F_LEN
+#undef AVX512F_LEN_HALF
+
+#define AVX512F_LEN 128
+#define AVX512F_LEN_HALF 128
+#include "avx512bw-vpshufhw-2.c"
diff --git a/gcc/testsuite/gcc.target/i386/avx512vl-vpshuflw-2.c b/gcc/testsuite/gcc.target/i386/avx512vl-vpshuflw-2.c
new file mode 100644
index 00000000000..6d534f154be
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/avx512vl-vpshuflw-2.c
@@ -0,0 +1,14 @@
+/* { dg-do run } */
+/* { dg-options "-O2 -mavx512bw -mavx512vl -DAVX512VL" } */
+/* { dg-require-effective-target avx512vl } */
+
+#define AVX512F_LEN 256
+#define AVX512F_LEN_HALF 128
+#include "avx512bw-vpshuflw-2.c"
+
+#undef AVX512F_LEN
+#undef AVX512F_LEN_HALF
+
+#define AVX512F_LEN 128
+#define AVX512F_LEN_HALF 128
+#include "avx512bw-vpshuflw-2.c"
diff --git a/gcc/testsuite/gcc.target/i386/avx512vl-vpslld-1.c b/gcc/testsuite/gcc.target/i386/avx512vl-vpslld-1.c
new file mode 100644
index 00000000000..f6e9e24423b
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/avx512vl-vpslld-1.c
@@ -0,0 +1,23 @@
+/* { dg-do compile } */
+/* { dg-options "-O2 -mavx512vl" } */
+/* { dg-final { scan-assembler-times "vpslld\[ \\t\]+\[^\n\]*%ymm\[0-9\]\[^\{\]" 2 } } */
+/* { dg-final { scan-assembler-times "vpslld\[ \\t\]+\[^\n\]*%ymm\[0-9\]\{%k\[1-7\]\}\[^\{\]" 1 } } */
+/* { dg-final { scan-assembler-times "vpslld\[ \\t\]+\[^\n\]*%ymm\[0-9\]\{%k\[1-7\]\}\{z\}" 1 } } */
+/* { dg-final { scan-assembler-times "vpslld\[ \\t\]+\[^\n\]*%xmm\[0-9\]\[^\{\]" 4 } } */
+/* { dg-final { scan-assembler-times "vpslld\[ \\t\]+\[^\n\]*%xmm\[0-9\]\{%k\[1-7\]\}\[^\{\]" 1 } } */
+/* { dg-final { scan-assembler-times "vpslld\[ \\t\]+\[^\n\]*%xmm\[0-9\]\{%k\[1-7\]\}\{z\}" 1 } } */
+
+#include <immintrin.h>
+volatile __m256i x256;
+volatile __m128i x128;
+volatile __mmask8 m256;
+volatile __mmask8 m128;
+
+void extern
+avx512vl_test (void)
+{
+ x256 = _mm256_mask_sll_epi32 (x256, m256, x256, x128);
+ x256 = _mm256_maskz_sll_epi32 (m256, x256, x128);
+ x128 = _mm_mask_sll_epi32 (x128, m128, x128, x128);
+ x128 = _mm_maskz_sll_epi32 (m128, x128, x128);
+}
diff --git a/gcc/testsuite/gcc.target/i386/avx512vl-vpslld-2.c b/gcc/testsuite/gcc.target/i386/avx512vl-vpslld-2.c
new file mode 100644
index 00000000000..681feb04ccb
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/avx512vl-vpslld-2.c
@@ -0,0 +1,14 @@
+/* { dg-do run } */
+/* { dg-options "-O2 -mavx512vl -DAVX512VL" } */
+/* { dg-require-effective-target avx512vl } */
+
+#define AVX512F_LEN 256
+#define AVX512F_LEN_HALF 128
+#include "avx512f-vpslld-2.c"
+
+#undef AVX512F_LEN
+#undef AVX512F_LEN_HALF
+
+#define AVX512F_LEN 128
+#define AVX512F_LEN_HALF 128
+#include "avx512f-vpslld-2.c"
diff --git a/gcc/testsuite/gcc.target/i386/avx512vl-vpslldi-1.c b/gcc/testsuite/gcc.target/i386/avx512vl-vpslldi-1.c
new file mode 100644
index 00000000000..ad049c012c4
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/avx512vl-vpslldi-1.c
@@ -0,0 +1,24 @@
+/* { dg-do compile } */
+/* { dg-options "-O2 -mavx512vl" } */
+/* { dg-final { scan-assembler-times "vpslld\[ \\t\]+\[^\n\]*%ymm\[0-9\]\[^\{\]" 2 } } */
+/* { dg-final { scan-assembler-times "vpslld\[ \\t\]+\[^\n\]*%ymm\[0-9\]\{%k\[1-7\]\}\[^\{\]" 1 } } */
+/* { dg-final { scan-assembler-times "vpslld\[ \\t\]+\[^\n\]*%ymm\[0-9\]\{%k\[1-7\]\}\{z\}" 1 } } */
+/* { dg-final { scan-assembler-times "vpslld\[ \\t\]+\[^\n\]*%xmm\[0-9\]\[^\{\]" 2 } } */
+/* { dg-final { scan-assembler-times "vpslld\[ \\t\]+\[^\n\]*%xmm\[0-9\]\{%k\[1-7\]\}\[^\{\]" 1 } } */
+/* { dg-final { scan-assembler-times "vpslld\[ \\t\]+\[^\n\]*%xmm\[0-9\]\{%k\[1-7\]\}\{z\}" 1 } } */
+
+#include <immintrin.h>
+volatile __m256i x256;
+volatile __m128i x128;
+volatile __mmask8 m256;
+volatile __mmask8 m128;
+#define y 7
+
+void extern
+avx512vl_test (void)
+{
+ x256 = _mm256_mask_slli_epi32 (x256, m256, x256, y);
+ x256 = _mm256_maskz_slli_epi32 (m256, x256, y);
+ x128 = _mm_mask_slli_epi32 (x128, m128, x128, y);
+ x128 = _mm_maskz_slli_epi32 (m128, x128, y);
+}
diff --git a/gcc/testsuite/gcc.target/i386/avx512vl-vpslldi-2.c b/gcc/testsuite/gcc.target/i386/avx512vl-vpslldi-2.c
new file mode 100644
index 00000000000..31baa684d52
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/avx512vl-vpslldi-2.c
@@ -0,0 +1,14 @@
+/* { dg-do run } */
+/* { dg-options "-O2 -mavx512vl -DAVX512VL" } */
+/* { dg-require-effective-target avx512vl } */
+
+#define AVX512F_LEN 256
+#define AVX512F_LEN_HALF 128
+#include "avx512f-vpslldi-2.c"
+
+#undef AVX512F_LEN
+#undef AVX512F_LEN_HALF
+
+#define AVX512F_LEN 128
+#define AVX512F_LEN_HALF 128
+#include "avx512f-vpslldi-2.c"
diff --git a/gcc/testsuite/gcc.target/i386/avx512vl-vpsllq-1.c b/gcc/testsuite/gcc.target/i386/avx512vl-vpsllq-1.c
new file mode 100644
index 00000000000..7a703cf2ad5
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/avx512vl-vpsllq-1.c
@@ -0,0 +1,23 @@
+/* { dg-do compile } */
+/* { dg-options "-O2 -mavx512vl" } */
+/* { dg-final { scan-assembler-times "vpsllq\[ \\t\]+\[^\n\]*%ymm\[0-9\]\[^\{\]" 2 } } */
+/* { dg-final { scan-assembler-times "vpsllq\[ \\t\]+\[^\n\]*%ymm\[0-9\]\{%k\[1-7\]\}\[^\{\]" 1 } } */
+/* { dg-final { scan-assembler-times "vpsllq\[ \\t\]+\[^\n\]*%ymm\[0-9\]\{%k\[1-7\]\}\{z\}" 1 } } */
+/* { dg-final { scan-assembler-times "vpsllq\[ \\t\]+\[^\n\]*%xmm\[0-9\]\[^\{\]" 4 } } */
+/* { dg-final { scan-assembler-times "vpsllq\[ \\t\]+\[^\n\]*%xmm\[0-9\]\{%k\[1-7\]\}\[^\{\]" 1 } } */
+/* { dg-final { scan-assembler-times "vpsllq\[ \\t\]+\[^\n\]*%xmm\[0-9\]\{%k\[1-7\]\}\{z\}" 1 } } */
+
+#include <immintrin.h>
+volatile __m256i x256;
+volatile __m128i x128;
+volatile __mmask8 m256;
+volatile __mmask8 m128;
+
+void extern
+avx512vl_test (void)
+{
+ x256 = _mm256_mask_sll_epi64 (x256, m256, x256, x128);
+ x256 = _mm256_maskz_sll_epi64 (m256, x256, x128);
+ x128 = _mm_mask_sll_epi64 (x128, m128, x128, x128);
+ x128 = _mm_maskz_sll_epi64 (m128, x128, x128);
+}
diff --git a/gcc/testsuite/gcc.target/i386/avx512vl-vpsllq-2.c b/gcc/testsuite/gcc.target/i386/avx512vl-vpsllq-2.c
new file mode 100644
index 00000000000..135292f3576
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/avx512vl-vpsllq-2.c
@@ -0,0 +1,14 @@
+/* { dg-do run } */
+/* { dg-options "-O2 -mavx512vl -DAVX512VL" } */
+/* { dg-require-effective-target avx512vl } */
+
+#define AVX512F_LEN 256
+#define AVX512F_LEN_HALF 128
+#include "avx512f-vpsllq-2.c"
+
+#undef AVX512F_LEN
+#undef AVX512F_LEN_HALF
+
+#define AVX512F_LEN 128
+#define AVX512F_LEN_HALF 128
+#include "avx512f-vpsllq-2.c"
diff --git a/gcc/testsuite/gcc.target/i386/avx512vl-vpsllqi-1.c b/gcc/testsuite/gcc.target/i386/avx512vl-vpsllqi-1.c
new file mode 100644
index 00000000000..6a38a410e73
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/avx512vl-vpsllqi-1.c
@@ -0,0 +1,24 @@
+/* { dg-do compile } */
+/* { dg-options "-O2 -mavx512vl" } */
+/* { dg-final { scan-assembler-times "vpsllq\[ \\t\]+\[^\n\]*%ymm\[0-9\]\[^\{\]" 2 } } */
+/* { dg-final { scan-assembler-times "vpsllq\[ \\t\]+\[^\n\]*%ymm\[0-9\]\{%k\[1-7\]\}\[^\{\]" 1 } } */
+/* { dg-final { scan-assembler-times "vpsllq\[ \\t\]+\[^\n\]*%ymm\[0-9\]\{%k\[1-7\]\}\{z\}" 1 } } */
+/* { dg-final { scan-assembler-times "vpsllq\[ \\t\]+\[^\n\]*%xmm\[0-9\]\[^\{\]" 2 } } */
+/* { dg-final { scan-assembler-times "vpsllq\[ \\t\]+\[^\n\]*%xmm\[0-9\]\{%k\[1-7\]\}\[^\{\]" 1 } } */
+/* { dg-final { scan-assembler-times "vpsllq\[ \\t\]+\[^\n\]*%xmm\[0-9\]\{%k\[1-7\]\}\{z\}" 1 } } */
+
+#include <immintrin.h>
+volatile __m256i x256;
+volatile __m128i x128;
+volatile __mmask8 m256;
+volatile __mmask8 m128;
+#define y 7
+
+void extern
+avx512vl_test (void)
+{
+ x256 = _mm256_mask_slli_epi64 (x256, m256, x256, y);
+ x256 = _mm256_maskz_slli_epi64 (m256, x256, y);
+ x128 = _mm_mask_slli_epi64 (x128, m128, x128, y);
+ x128 = _mm_maskz_slli_epi64 (m128, x128, y);
+}
diff --git a/gcc/testsuite/gcc.target/i386/avx512vl-vpsllqi-2.c b/gcc/testsuite/gcc.target/i386/avx512vl-vpsllqi-2.c
new file mode 100644
index 00000000000..764df718a02
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/avx512vl-vpsllqi-2.c
@@ -0,0 +1,14 @@
+/* { dg-do run } */
+/* { dg-options "-O2 -mavx512vl -DAVX512VL" } */
+/* { dg-require-effective-target avx512vl } */
+
+#define AVX512F_LEN 256
+#define AVX512F_LEN_HALF 128
+#include "avx512f-vpsllqi-2.c"
+
+#undef AVX512F_LEN
+#undef AVX512F_LEN_HALF
+
+#define AVX512F_LEN 128
+#define AVX512F_LEN_HALF 128
+#include "avx512f-vpsllqi-2.c"
diff --git a/gcc/testsuite/gcc.target/i386/avx512vl-vpsllvd-1.c b/gcc/testsuite/gcc.target/i386/avx512vl-vpsllvd-1.c
new file mode 100644
index 00000000000..dc5947f08d1
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/avx512vl-vpsllvd-1.c
@@ -0,0 +1,23 @@
+/* { dg-do compile } */
+/* { dg-options "-O2 -mavx512vl" } */
+/* { dg-final { scan-assembler-times "vpsllvd\[ \\t\]+\[^\n\]*%ymm\[0-9\]\[^\{\]" 2 } } */
+/* { dg-final { scan-assembler-times "vpsllvd\[ \\t\]+\[^\n\]*%ymm\[0-9\]\{%k\[1-7\]\}\[^\{\]" 1 } } */
+/* { dg-final { scan-assembler-times "vpsllvd\[ \\t\]+\[^\n\]*%ymm\[0-9\]\{%k\[1-7\]\}\{z\}" 1 } } */
+/* { dg-final { scan-assembler-times "vpsllvd\[ \\t\]+\[^\n\]*%xmm\[0-9\]\[^\{\]" 2 } } */
+/* { dg-final { scan-assembler-times "vpsllvd\[ \\t\]+\[^\n\]*%xmm\[0-9\]\{%k\[1-7\]\}\[^\{\]" 1 } } */
+/* { dg-final { scan-assembler-times "vpsllvd\[ \\t\]+\[^\n\]*%xmm\[0-9\]\{%k\[1-7\]\}\{z\}" 1 } } */
+
+#include <immintrin.h>
+
+volatile __m256i x256, y256;
+volatile __m128i x128, y128;
+volatile __mmask8 m;
+
+void extern
+avx512vl_test (void)
+{
+ x256 = _mm256_mask_sllv_epi32 (x256, m, x256, y256);
+ x256 = _mm256_maskz_sllv_epi32 (m, x256, y256);
+ x128 = _mm_mask_sllv_epi32 (x128, m, x128, y128);
+ x128 = _mm_maskz_sllv_epi32 (m, x128, y128);
+}
diff --git a/gcc/testsuite/gcc.target/i386/avx512vl-vpsllvd-2.c b/gcc/testsuite/gcc.target/i386/avx512vl-vpsllvd-2.c
new file mode 100644
index 00000000000..f1281d33926
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/avx512vl-vpsllvd-2.c
@@ -0,0 +1,14 @@
+/* { dg-do run } */
+/* { dg-options "-O2 -mavx512vl -DAVX512VL" } */
+/* { dg-require-effective-target avx512vl } */
+
+#define AVX512F_LEN 256
+#define AVX512F_LEN_HALF 128
+#include "avx512f-vpsllvd-2.c"
+
+#undef AVX512F_LEN
+#undef AVX512F_LEN_HALF
+
+#define AVX512F_LEN 128
+#define AVX512F_LEN_HALF 128
+#include "avx512f-vpsllvd-2.c"
diff --git a/gcc/testsuite/gcc.target/i386/avx512vl-vpsllvq-1.c b/gcc/testsuite/gcc.target/i386/avx512vl-vpsllvq-1.c
new file mode 100644
index 00000000000..f9d28c7facb
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/avx512vl-vpsllvq-1.c
@@ -0,0 +1,23 @@
+/* { dg-do compile } */
+/* { dg-options "-O2 -mavx512vl" } */
+/* { dg-final { scan-assembler-times "vpsllvq\[ \\t\]+\[^\n\]*%ymm\[0-9\]\[^\{\]" 2 } } */
+/* { dg-final { scan-assembler-times "vpsllvq\[ \\t\]+\[^\n\]*%ymm\[0-9\]\{%k\[1-7\]\}\[^\{\]" 1 } } */
+/* { dg-final { scan-assembler-times "vpsllvq\[ \\t\]+\[^\n\]*%ymm\[0-9\]\{%k\[1-7\]\}\{z\}" 1 } } */
+/* { dg-final { scan-assembler-times "vpsllvq\[ \\t\]+\[^\n\]*%xmm\[0-9\]\[^\{\]" 2 } } */
+/* { dg-final { scan-assembler-times "vpsllvq\[ \\t\]+\[^\n\]*%xmm\[0-9\]\{%k\[1-7\]\}\[^\{\]" 1 } } */
+/* { dg-final { scan-assembler-times "vpsllvq\[ \\t\]+\[^\n\]*%xmm\[0-9\]\{%k\[1-7\]\}\{z\}" 1 } } */
+
+#include <immintrin.h>
+
+volatile __m256i x256, y256;
+volatile __m128i x128, y128;
+volatile __mmask8 m;
+
+void extern
+avx512vl_test (void)
+{
+ x256 = _mm256_mask_sllv_epi64 (x256, m, x256, y256);
+ x256 = _mm256_maskz_sllv_epi64 (m, x256, y256);
+ x128 = _mm_mask_sllv_epi64 (x128, m, x128, y128);
+ x128 = _mm_maskz_sllv_epi64 (m, x128, y128);
+}
diff --git a/gcc/testsuite/gcc.target/i386/avx512vl-vpsllvq-2.c b/gcc/testsuite/gcc.target/i386/avx512vl-vpsllvq-2.c
new file mode 100644
index 00000000000..19b087b0446
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/avx512vl-vpsllvq-2.c
@@ -0,0 +1,14 @@
+/* { dg-do run } */
+/* { dg-options "-O2 -mavx512vl -DAVX512VL" } */
+/* { dg-require-effective-target avx512vl } */
+
+#define AVX512F_LEN 256
+#define AVX512F_LEN_HALF 128
+#include "avx512f-vpsllvq-2.c"
+
+#undef AVX512F_LEN
+#undef AVX512F_LEN_HALF
+
+#define AVX512F_LEN 128
+#define AVX512F_LEN_HALF 128
+#include "avx512f-vpsllvq-2.c"
diff --git a/gcc/testsuite/gcc.target/i386/avx512vl-vpsllvw-2.c b/gcc/testsuite/gcc.target/i386/avx512vl-vpsllvw-2.c
new file mode 100644
index 00000000000..1b49e2b0407
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/avx512vl-vpsllvw-2.c
@@ -0,0 +1,14 @@
+/* { dg-do run } */
+/* { dg-options "-O2 -mavx512bw -mavx512vl -DAVX512VL" } */
+/* { dg-require-effective-target avx512vl } */
+
+#define AVX512F_LEN 256
+#define AVX512F_LEN_HALF 128
+#include "avx512bw-vpsllvw-2.c"
+
+#undef AVX512F_LEN
+#undef AVX512F_LEN_HALF
+
+#define AVX512F_LEN 128
+#define AVX512F_LEN_HALF 128
+#include "avx512bw-vpsllvw-2.c"
diff --git a/gcc/testsuite/gcc.target/i386/avx512vl-vpsllw-2.c b/gcc/testsuite/gcc.target/i386/avx512vl-vpsllw-2.c
new file mode 100644
index 00000000000..dc5a099ab7a
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/avx512vl-vpsllw-2.c
@@ -0,0 +1,14 @@
+/* { dg-do run } */
+/* { dg-options "-O2 -mavx512bw -mavx512vl -DAVX512VL" } */
+/* { dg-require-effective-target avx512vl } */
+
+#define AVX512F_LEN 256
+#define AVX512F_LEN_HALF 128
+#include "avx512bw-vpsllw-2.c"
+
+#undef AVX512F_LEN
+#undef AVX512F_LEN_HALF
+
+#define AVX512F_LEN 128
+#define AVX512F_LEN_HALF 128
+#include "avx512bw-vpsllw-2.c"
diff --git a/gcc/testsuite/gcc.target/i386/avx512vl-vpsllwi-2.c b/gcc/testsuite/gcc.target/i386/avx512vl-vpsllwi-2.c
new file mode 100644
index 00000000000..a4b03ee02e3
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/avx512vl-vpsllwi-2.c
@@ -0,0 +1,14 @@
+/* { dg-do run } */
+/* { dg-options "-O2 -mavx512bw -mavx512vl -DAVX512VL" } */
+/* { dg-require-effective-target avx512vl } */
+
+#define AVX512F_LEN 256
+#define AVX512F_LEN_HALF 128
+#include "avx512bw-vpsllwi-2.c"
+
+#undef AVX512F_LEN
+#undef AVX512F_LEN_HALF
+
+#define AVX512F_LEN 128
+#define AVX512F_LEN_HALF 128
+#include "avx512bw-vpsllwi-2.c"
diff --git a/gcc/testsuite/gcc.target/i386/avx512vl-vpsrad-1.c b/gcc/testsuite/gcc.target/i386/avx512vl-vpsrad-1.c
new file mode 100644
index 00000000000..bd71aa5054a
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/avx512vl-vpsrad-1.c
@@ -0,0 +1,24 @@
+/* { dg-do compile } */
+/* { dg-options "-O2 -mavx512vl" } */
+/* { dg-final { scan-assembler-times "vpsrad\[ \\t\]+\[^\n\]*, %ymm\[0-9\]\[^\{\]" 2 } } */
+/* { dg-final { scan-assembler-times "vpsrad\[ \\t\]+\[^\n\]*, %ymm\[0-9\]\{%k\[1-7\]\}\[^\{\]" 1 } } */
+/* { dg-final { scan-assembler-times "vpsrad\[ \\t\]+\[^\n\]*, %ymm\[0-9\]\{%k\[1-7\]\}\{z\}" 1 } } */
+/* { dg-final { scan-assembler-times "vpsrad\[ \\t\]+\[^\n\]*, %xmm\[0-9\]\[^\{\]" 2 } } */
+/* { dg-final { scan-assembler-times "vpsrad\[ \\t\]+\[^\n\]*, %xmm\[0-9\]\{%k\[1-7\]\}\[^\{\]" 1 } } */
+/* { dg-final { scan-assembler-times "vpsrad\[ \\t\]+\[^\n\]*, %xmm\[0-9\]\{%k\[1-7\]\}\{z\}" 1 } } */
+
+#include <immintrin.h>
+
+volatile __m256i x256;
+volatile __m128i x128;
+volatile __m128i y;
+volatile __mmask8 m;
+
+void extern
+avx512vl_test (void)
+{
+ x256 = _mm256_mask_sra_epi32 (x256, m, x256, y);
+ x256 = _mm256_maskz_sra_epi32 (m, x256, y);
+ x128 = _mm_mask_sra_epi32 (x128, m, x128, y);
+ x128 = _mm_maskz_sra_epi32 (m, x128, y);
+}
diff --git a/gcc/testsuite/gcc.target/i386/avx512vl-vpsrad-2.c b/gcc/testsuite/gcc.target/i386/avx512vl-vpsrad-2.c
new file mode 100644
index 00000000000..86abe6c2148
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/avx512vl-vpsrad-2.c
@@ -0,0 +1,14 @@
+/* { dg-do run } */
+/* { dg-options "-O2 -mavx512vl -DAVX512VL" } */
+/* { dg-require-effective-target avx512vl } */
+
+#define AVX512F_LEN 256
+#define AVX512F_LEN_HALF 128
+#include "avx512f-vpsrad-2.c"
+
+#undef AVX512F_LEN
+#undef AVX512F_LEN_HALF
+
+#define AVX512F_LEN 128
+#define AVX512F_LEN_HALF 128
+#include "avx512f-vpsrad-2.c"
diff --git a/gcc/testsuite/gcc.target/i386/avx512vl-vpsradi-1.c b/gcc/testsuite/gcc.target/i386/avx512vl-vpsradi-1.c
new file mode 100644
index 00000000000..a732be5bedb
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/avx512vl-vpsradi-1.c
@@ -0,0 +1,24 @@
+/* { dg-do compile } */
+/* { dg-options "-O2 -mavx512vl" } */
+/* { dg-final { scan-assembler-times "vpsrad\[ \\t\]+\[^\n\]*, %ymm\[0-9\]\[^\{\]" 2 } } */
+/* { dg-final { scan-assembler-times "vpsrad\[ \\t\]+\[^\n\]*, %ymm\[0-9\]\{%k\[1-7\]\}\[^\{\]" 1 } } */
+/* { dg-final { scan-assembler-times "vpsrad\[ \\t\]+\[^\n\]*, %ymm\[0-9\]\{%k\[1-7\]\}\{z\}" 1 } } */
+/* { dg-final { scan-assembler-times "vpsrad\[ \\t\]+\[^\n\]*, %xmm\[0-9\]\[^\{\]" 2 } } */
+/* { dg-final { scan-assembler-times "vpsrad\[ \\t\]+\[^\n\]*, %xmm\[0-9\]\{%k\[1-7\]\}\[^\{\]" 1 } } */
+/* { dg-final { scan-assembler-times "vpsrad\[ \\t\]+\[^\n\]*, %xmm\[0-9\]\{%k\[1-7\]\}\{z\}" 1 } } */
+
+#include <immintrin.h>
+
+volatile __m256i x256;
+volatile __m128i x128;
+#define y 7
+volatile __mmask8 m;
+
+void extern
+avx512vl_test (void)
+{
+ x256 = _mm256_mask_srai_epi32 (x256, m, x256, y);
+ x256 = _mm256_maskz_srai_epi32 (m, x256, y);
+ x128 = _mm_mask_srai_epi32 (x128, m, x128, y);
+ x128 = _mm_maskz_srai_epi32 (m, x128, y);
+}
diff --git a/gcc/testsuite/gcc.target/i386/avx512vl-vpsradi-2.c b/gcc/testsuite/gcc.target/i386/avx512vl-vpsradi-2.c
new file mode 100644
index 00000000000..b77874583c4
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/avx512vl-vpsradi-2.c
@@ -0,0 +1,14 @@
+/* { dg-do run } */
+/* { dg-options "-O2 -mavx512vl -DAVX512VL" } */
+/* { dg-require-effective-target avx512vl } */
+
+#define AVX512F_LEN 256
+#define AVX512F_LEN_HALF 128
+#include "avx512f-vpsradi-2.c"
+
+#undef AVX512F_LEN
+#undef AVX512F_LEN_HALF
+
+#define AVX512F_LEN 128
+#define AVX512F_LEN_HALF 128
+#include "avx512f-vpsradi-2.c"
diff --git a/gcc/testsuite/gcc.target/i386/avx512vl-vpsraq-1.c b/gcc/testsuite/gcc.target/i386/avx512vl-vpsraq-1.c
new file mode 100644
index 00000000000..e1d754358e8
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/avx512vl-vpsraq-1.c
@@ -0,0 +1,26 @@
+/* { dg-do compile } */
+/* { dg-options "-O2 -mavx512vl" } */
+/* { dg-final { scan-assembler-times "vpsraq\[ \\t\]+\[^\n\]*, %ymm\[0-9\]\[^\{\]" 3 } } */
+/* { dg-final { scan-assembler-times "vpsraq\[ \\t\]+\[^\n\]*, %ymm\[0-9\]\{%k\[1-7\]\}\[^\{\]" 1 } } */
+/* { dg-final { scan-assembler-times "vpsraq\[ \\t\]+\[^\n\]*, %ymm\[0-9\]\{%k\[1-7\]\}\{z\}" 1 } } */
+/* { dg-final { scan-assembler-times "vpsraq\[ \\t\]+\[^\n\]*, %xmm\[0-9\]\[^\{\]" 3 } } */
+/* { dg-final { scan-assembler-times "vpsraq\[ \\t\]+\[^\n\]*, %xmm\[0-9\]\{%k\[1-7\]\}\[^\{\]" 1 } } */
+/* { dg-final { scan-assembler-times "vpsraq\[ \\t\]+\[^\n\]*, %xmm\[0-9\]\{%k\[1-7\]\}\{z\}" 1 } } */
+
+#include <immintrin.h>
+
+volatile __m256i x256;
+volatile __m128i x128;
+volatile __m128i y;
+volatile __mmask8 m;
+
+void extern
+avx512vl_test (void)
+{
+ x256 = _mm256_sra_epi64 (x256, y);
+ x256 = _mm256_mask_sra_epi64 (x256, m, x256, y);
+ x256 = _mm256_maskz_sra_epi64 (m, x256, y);
+ x128 = _mm_sra_epi64 (x128, y);
+ x128 = _mm_mask_sra_epi64 (x128, m, x128, y);
+ x128 = _mm_maskz_sra_epi64 (m, x128, y);
+}
diff --git a/gcc/testsuite/gcc.target/i386/avx512vl-vpsraq-2.c b/gcc/testsuite/gcc.target/i386/avx512vl-vpsraq-2.c
new file mode 100644
index 00000000000..3331f6b2e3b
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/avx512vl-vpsraq-2.c
@@ -0,0 +1,14 @@
+/* { dg-do run } */
+/* { dg-options "-O2 -mavx512vl -DAVX512VL" } */
+/* { dg-require-effective-target avx512vl } */
+
+#define AVX512F_LEN 256
+#define AVX512F_LEN_HALF 128
+#include "avx512f-vpsraq-2.c"
+
+#undef AVX512F_LEN
+#undef AVX512F_LEN_HALF
+
+#define AVX512F_LEN 128
+#define AVX512F_LEN_HALF 128
+#include "avx512f-vpsraq-2.c"
diff --git a/gcc/testsuite/gcc.target/i386/avx512vl-vpsraqi-1.c b/gcc/testsuite/gcc.target/i386/avx512vl-vpsraqi-1.c
new file mode 100644
index 00000000000..27a9e39d2c4
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/avx512vl-vpsraqi-1.c
@@ -0,0 +1,26 @@
+/* { dg-do compile } */
+/* { dg-options "-O2 -mavx512vl" } */
+/* { dg-final { scan-assembler-times "vpsraq\[ \\t\]+\[^\n\]*, %ymm\[0-9\]\[^\{\]" 3 } } */
+/* { dg-final { scan-assembler-times "vpsraq\[ \\t\]+\[^\n\]*, %ymm\[0-9\]\{%k\[1-7\]\}\[^\{\]" 1 } } */
+/* { dg-final { scan-assembler-times "vpsraq\[ \\t\]+\[^\n\]*, %ymm\[0-9\]\{%k\[1-7\]\}\{z\}" 1 } } */
+/* { dg-final { scan-assembler-times "vpsraq\[ \\t\]+\[^\n\]*, %xmm\[0-9\]\[^\{\]" 3 } } */
+/* { dg-final { scan-assembler-times "vpsraq\[ \\t\]+\[^\n\]*, %xmm\[0-9\]\{%k\[1-7\]\}\[^\{\]" 1 } } */
+/* { dg-final { scan-assembler-times "vpsraq\[ \\t\]+\[^\n\]*, %xmm\[0-9\]\{%k\[1-7\]\}\{z\}" 1 } } */
+
+#include <immintrin.h>
+
+volatile __m256i x256;
+volatile __m128i x128;
+#define y 7
+volatile __mmask8 m;
+
+void extern
+avx512vl_test (void)
+{
+ x256 = _mm256_srai_epi64 (x256, y);
+ x256 = _mm256_mask_srai_epi64 (x256, m, x256, y);
+ x256 = _mm256_maskz_srai_epi64 (m, x256, y);
+ x128 = _mm_srai_epi64 (x128, y);
+ x128 = _mm_mask_srai_epi64 (x128, m, x128, y);
+ x128 = _mm_maskz_srai_epi64 (m, x128, y);
+}
diff --git a/gcc/testsuite/gcc.target/i386/avx512vl-vpsraqi-2.c b/gcc/testsuite/gcc.target/i386/avx512vl-vpsraqi-2.c
new file mode 100644
index 00000000000..25b6c44e8b4
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/avx512vl-vpsraqi-2.c
@@ -0,0 +1,14 @@
+/* { dg-do run } */
+/* { dg-options "-O2 -mavx512vl -DAVX512VL" } */
+/* { dg-require-effective-target avx512vl } */
+
+#define AVX512F_LEN 256
+#define AVX512F_LEN_HALF 128
+#include "avx512f-vpsraqi-2.c"
+
+#undef AVX512F_LEN
+#undef AVX512F_LEN_HALF
+
+#define AVX512F_LEN 128
+#define AVX512F_LEN_HALF 128
+#include "avx512f-vpsraqi-2.c"
diff --git a/gcc/testsuite/gcc.target/i386/avx512vl-vpsravd-1.c b/gcc/testsuite/gcc.target/i386/avx512vl-vpsravd-1.c
new file mode 100644
index 00000000000..46d99a35621
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/avx512vl-vpsravd-1.c
@@ -0,0 +1,23 @@
+/* { dg-do compile } */
+/* { dg-options "-O2 -mavx512vl" } */
+/* { dg-final { scan-assembler-times "vpsravd\[ \\t\]+\[^\n\]*%ymm\[0-9\]\[^\{\]" 2 } } */
+/* { dg-final { scan-assembler-times "vpsravd\[ \\t\]+\[^\n\]*%ymm\[0-9\]\{%k\[1-7\]\}\[^\{\]" 1 } } */
+/* { dg-final { scan-assembler-times "vpsravd\[ \\t\]+\[^\n\]*%ymm\[0-9\]\{%k\[1-7\]\}\{z\}" 1 } } */
+/* { dg-final { scan-assembler-times "vpsravd\[ \\t\]+\[^\n\]*%xmm\[0-9\]\[^\{\]" 2 } } */
+/* { dg-final { scan-assembler-times "vpsravd\[ \\t\]+\[^\n\]*%xmm\[0-9\]\{%k\[1-7\]\}\[^\{\]" 1 } } */
+/* { dg-final { scan-assembler-times "vpsravd\[ \\t\]+\[^\n\]*%xmm\[0-9\]\{%k\[1-7\]\}\{z\}" 1 } } */
+
+#include <immintrin.h>
+
+volatile __m256i x256, y256;
+volatile __m128i x128, y128;
+volatile __mmask8 m;
+
+void extern
+avx512vl_test (void)
+{
+ x256 = _mm256_mask_srav_epi32 (x256, m, x256, y256);
+ x256 = _mm256_maskz_srav_epi32 (m, x256, y256);
+ x128 = _mm_mask_srav_epi32 (x128, m, x128, y128);
+ x128 = _mm_maskz_srav_epi32 (m, x128, y128);
+}
diff --git a/gcc/testsuite/gcc.target/i386/avx512vl-vpsravd-2.c b/gcc/testsuite/gcc.target/i386/avx512vl-vpsravd-2.c
new file mode 100644
index 00000000000..da8c2afdaa7
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/avx512vl-vpsravd-2.c
@@ -0,0 +1,14 @@
+/* { dg-do run } */
+/* { dg-options "-O2 -mavx512vl -DAVX512VL" } */
+/* { dg-require-effective-target avx512vl } */
+
+#define AVX512F_LEN 256
+#define AVX512F_LEN_HALF 128
+#include "avx512f-vpsravd-2.c"
+
+#undef AVX512F_LEN
+#undef AVX512F_LEN_HALF
+
+#define AVX512F_LEN 128
+#define AVX512F_LEN_HALF 128
+#include "avx512f-vpsravd-2.c"
diff --git a/gcc/testsuite/gcc.target/i386/avx512vl-vpsravq-1.c b/gcc/testsuite/gcc.target/i386/avx512vl-vpsravq-1.c
new file mode 100644
index 00000000000..07e51b255f3
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/avx512vl-vpsravq-1.c
@@ -0,0 +1,25 @@
+/* { dg-do compile } */
+/* { dg-options "-O2 -mavx512vl" } */
+/* { dg-final { scan-assembler-times "vpsravq\[ \\t\]+\[^\n\]*%ymm\[0-9\]\[^\{\]" 3 } } */
+/* { dg-final { scan-assembler-times "vpsravq\[ \\t\]+\[^\n\]*%ymm\[0-9\]\{%k\[1-7\]\}\[^\{\]" 1 } } */
+/* { dg-final { scan-assembler-times "vpsravq\[ \\t\]+\[^\n\]*%ymm\[0-9\]\{%k\[1-7\]\}\{z\}" 1 } } */
+/* { dg-final { scan-assembler-times "vpsravq\[ \\t\]+\[^\n\]*%xmm\[0-9\]\[^\{\]" 3 } } */
+/* { dg-final { scan-assembler-times "vpsravq\[ \\t\]+\[^\n\]*%xmm\[0-9\]\{%k\[1-7\]\}\[^\{\]" 1 } } */
+/* { dg-final { scan-assembler-times "vpsravq\[ \\t\]+\[^\n\]*%xmm\[0-9\]\{%k\[1-7\]\}\{z\}" 1 } } */
+
+#include <immintrin.h>
+
+volatile __m256i x256, y256;
+volatile __m128i x128, y128;
+volatile __mmask8 m;
+
+void extern
+avx512vl_test (void)
+{
+ x256 = _mm256_srav_epi64 (x256, y256);
+ x256 = _mm256_mask_srav_epi64 (x256, m, x256, y256);
+ x256 = _mm256_maskz_srav_epi64 (m, x256, y256);
+ x128 = _mm_srav_epi64 (x128, y128);
+ x128 = _mm_mask_srav_epi64 (x128, m, x128, y128);
+ x128 = _mm_maskz_srav_epi64 (m, x128, y128);
+}
diff --git a/gcc/testsuite/gcc.target/i386/avx512vl-vpsravq-2.c b/gcc/testsuite/gcc.target/i386/avx512vl-vpsravq-2.c
new file mode 100644
index 00000000000..ae95fa3eb8b
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/avx512vl-vpsravq-2.c
@@ -0,0 +1,14 @@
+/* { dg-do run } */
+/* { dg-options "-O2 -mavx512vl -DAVX512VL" } */
+/* { dg-require-effective-target avx512vl } */
+
+#define AVX512F_LEN 256
+#define AVX512F_LEN_HALF 128
+#include "avx512f-vpsravq-2.c"
+
+#undef AVX512F_LEN
+#undef AVX512F_LEN_HALF
+
+#define AVX512F_LEN 128
+#define AVX512F_LEN_HALF 128
+#include "avx512f-vpsravq-2.c"
diff --git a/gcc/testsuite/gcc.target/i386/avx512vl-vpsravw-2.c b/gcc/testsuite/gcc.target/i386/avx512vl-vpsravw-2.c
new file mode 100644
index 00000000000..74ce9e04d8f
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/avx512vl-vpsravw-2.c
@@ -0,0 +1,14 @@
+/* { dg-do run } */
+/* { dg-options "-O2 -mavx512bw -mavx512vl -DAVX512VL" } */
+/* { dg-require-effective-target avx512vl } */
+
+#define AVX512F_LEN 256
+#define AVX512F_LEN_HALF 128
+#include "avx512bw-vpsravw-2.c"
+
+#undef AVX512F_LEN
+#undef AVX512F_LEN_HALF
+
+#define AVX512F_LEN 128
+#define AVX512F_LEN_HALF 128
+#include "avx512bw-vpsravw-2.c"
diff --git a/gcc/testsuite/gcc.target/i386/avx512vl-vpsraw-2.c b/gcc/testsuite/gcc.target/i386/avx512vl-vpsraw-2.c
new file mode 100644
index 00000000000..e6fdc9000c4
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/avx512vl-vpsraw-2.c
@@ -0,0 +1,14 @@
+/* { dg-do run } */
+/* { dg-options "-O2 -mavx512bw -mavx512vl -DAVX512VL" } */
+/* { dg-require-effective-target avx512vl } */
+
+#define AVX512F_LEN 256
+#define AVX512F_LEN_HALF 128
+#include "avx512bw-vpsraw-2.c"
+
+#undef AVX512F_LEN
+#undef AVX512F_LEN_HALF
+
+#define AVX512F_LEN 128
+#define AVX512F_LEN_HALF 128
+#include "avx512bw-vpsraw-2.c"
diff --git a/gcc/testsuite/gcc.target/i386/avx512vl-vpsrawi-2.c b/gcc/testsuite/gcc.target/i386/avx512vl-vpsrawi-2.c
new file mode 100644
index 00000000000..6ce2c923eff
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/avx512vl-vpsrawi-2.c
@@ -0,0 +1,14 @@
+/* { dg-do run } */
+/* { dg-options "-O2 -mavx512bw -mavx512vl -DAVX512VL" } */
+/* { dg-require-effective-target avx512vl } */
+
+#define AVX512F_LEN 256
+#define AVX512F_LEN_HALF 128
+#include "avx512bw-vpsrawi-2.c"
+
+#undef AVX512F_LEN
+#undef AVX512F_LEN_HALF
+
+#define AVX512F_LEN 128
+#define AVX512F_LEN_HALF 128
+#include "avx512bw-vpsrawi-2.c"
diff --git a/gcc/testsuite/gcc.target/i386/avx512vl-vpsrld-1.c b/gcc/testsuite/gcc.target/i386/avx512vl-vpsrld-1.c
new file mode 100644
index 00000000000..812580302b0
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/avx512vl-vpsrld-1.c
@@ -0,0 +1,24 @@
+/* { dg-do compile } */
+/* { dg-options "-O2 -mavx512vl" } */
+/* { dg-final { scan-assembler-times "vpsrld\[ \\t\]+\[^\n\]*, %ymm\[0-9\]\[^\{\]" 2 } } */
+/* { dg-final { scan-assembler-times "vpsrld\[ \\t\]+\[^\n\]*, %ymm\[0-9\]\{%k\[1-7\]\}\[^\{\]" 1 } } */
+/* { dg-final { scan-assembler-times "vpsrld\[ \\t\]+\[^\n\]*, %ymm\[0-9\]\{%k\[1-7\]\}\{z\}" 1 } } */
+/* { dg-final { scan-assembler-times "vpsrld\[ \\t\]+\[^\n\]*, %xmm\[0-9\]\[^\{\]" 2 } } */
+/* { dg-final { scan-assembler-times "vpsrld\[ \\t\]+\[^\n\]*, %xmm\[0-9\]\{%k\[1-7\]\}\[^\{\]" 1 } } */
+/* { dg-final { scan-assembler-times "vpsrld\[ \\t\]+\[^\n\]*, %xmm\[0-9\]\{%k\[1-7\]\}\{z\}" 1 } } */
+
+#include <immintrin.h>
+
+volatile __m256i x256;
+volatile __m128i x128;
+volatile __m128i y;
+volatile __mmask8 m;
+
+void extern
+avx512vl_test (void)
+{
+ x256 = _mm256_mask_srl_epi32 (x256, m, x256, y);
+ x256 = _mm256_maskz_srl_epi32 (m, x256, y);
+ x128 = _mm_mask_srl_epi32 (x128, m, x128, y);
+ x128 = _mm_maskz_srl_epi32 (m, x128, y);
+}
diff --git a/gcc/testsuite/gcc.target/i386/avx512vl-vpsrld-2.c b/gcc/testsuite/gcc.target/i386/avx512vl-vpsrld-2.c
new file mode 100644
index 00000000000..1c420f170f2
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/avx512vl-vpsrld-2.c
@@ -0,0 +1,14 @@
+/* { dg-do run } */
+/* { dg-options "-O2 -mavx512vl -DAVX512VL" } */
+/* { dg-require-effective-target avx512vl } */
+
+#define AVX512F_LEN 256
+#define AVX512F_LEN_HALF 128
+#include "avx512f-vpsrld-2.c"
+
+#undef AVX512F_LEN
+#undef AVX512F_LEN_HALF
+
+#define AVX512F_LEN 128
+#define AVX512F_LEN_HALF 128
+#include "avx512f-vpsrld-2.c"
diff --git a/gcc/testsuite/gcc.target/i386/avx512vl-vpsrldi-1.c b/gcc/testsuite/gcc.target/i386/avx512vl-vpsrldi-1.c
new file mode 100644
index 00000000000..cd33ddd50c1
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/avx512vl-vpsrldi-1.c
@@ -0,0 +1,24 @@
+/* { dg-do compile } */
+/* { dg-options "-O2 -mavx512vl" } */
+/* { dg-final { scan-assembler-times "vpsrld\[ \\t\]+\[^\n\]*, %ymm\[0-9\]\[^\{\]" 2 } } */
+/* { dg-final { scan-assembler-times "vpsrld\[ \\t\]+\[^\n\]*, %ymm\[0-9\]\{%k\[1-7\]\}\[^\{\]" 1 } } */
+/* { dg-final { scan-assembler-times "vpsrld\[ \\t\]+\[^\n\]*, %ymm\[0-9\]\{%k\[1-7\]\}\{z\}" 1 } } */
+/* { dg-final { scan-assembler-times "vpsrld\[ \\t\]+\[^\n\]*, %xmm\[0-9\]\[^\{\]" 2 } } */
+/* { dg-final { scan-assembler-times "vpsrld\[ \\t\]+\[^\n\]*, %xmm\[0-9\]\{%k\[1-7\]\}\[^\{\]" 1 } } */
+/* { dg-final { scan-assembler-times "vpsrld\[ \\t\]+\[^\n\]*, %xmm\[0-9\]\{%k\[1-7\]\}\{z\}" 1 } } */
+
+#include <immintrin.h>
+
+volatile __m256i x256;
+volatile __m128i x128;
+#define y 7
+volatile __mmask8 m;
+
+void extern
+avx512vl_test (void)
+{
+ x256 = _mm256_mask_srli_epi32 (x256, m, x256, y);
+ x256 = _mm256_maskz_srli_epi32 (m, x256, y);
+ x128 = _mm_mask_srli_epi32 (x128, m, x128, y);
+ x128 = _mm_maskz_srli_epi32 (m, x128, y);
+}
diff --git a/gcc/testsuite/gcc.target/i386/avx512vl-vpsrldi-2.c b/gcc/testsuite/gcc.target/i386/avx512vl-vpsrldi-2.c
new file mode 100644
index 00000000000..86ff3ab787a
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/avx512vl-vpsrldi-2.c
@@ -0,0 +1,14 @@
+/* { dg-do run } */
+/* { dg-options "-O2 -mavx512vl -DAVX512VL" } */
+/* { dg-require-effective-target avx512vl } */
+
+#define AVX512F_LEN 256
+#define AVX512F_LEN_HALF 128
+#include "avx512f-vpsrldi-2.c"
+
+#undef AVX512F_LEN
+#undef AVX512F_LEN_HALF
+
+#define AVX512F_LEN 128
+#define AVX512F_LEN_HALF 128
+#include "avx512f-vpsrldi-2.c"
diff --git a/gcc/testsuite/gcc.target/i386/avx512vl-vpsrlq-1.c b/gcc/testsuite/gcc.target/i386/avx512vl-vpsrlq-1.c
new file mode 100644
index 00000000000..995e14f9888
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/avx512vl-vpsrlq-1.c
@@ -0,0 +1,26 @@
+/* { dg-do compile } */
+/* { dg-options "-O2 -mavx512vl" } */
+/* { dg-final { scan-assembler-times "vpsrlq\[ \\t\]+\[^\n\]*, %ymm\[0-9\]\[^\{\]" 3 } } */
+/* { dg-final { scan-assembler-times "vpsrlq\[ \\t\]+\[^\n\]*, %ymm\[0-9\]\{%k\[1-7\]\}\[^\{\]" 1 } } */
+/* { dg-final { scan-assembler-times "vpsrlq\[ \\t\]+\[^\n\]*, %ymm\[0-9\]\{%k\[1-7\]\}\{z\}" 1 } } */
+/* { dg-final { scan-assembler-times "vpsrlq\[ \\t\]+\[^\n\]*, %xmm\[0-9\]\[^\{\]" 3 } } */
+/* { dg-final { scan-assembler-times "vpsrlq\[ \\t\]+\[^\n\]*, %xmm\[0-9\]\{%k\[1-7\]\}\[^\{\]" 1 } } */
+/* { dg-final { scan-assembler-times "vpsrlq\[ \\t\]+\[^\n\]*, %xmm\[0-9\]\{%k\[1-7\]\}\{z\}" 1 } } */
+
+#include <immintrin.h>
+
+volatile __m256i x256;
+volatile __m128i x128;
+volatile __m128i y;
+volatile __mmask8 m;
+
+void extern
+avx512vl_test (void)
+{
+ x256 = _mm256_srl_epi64 (x256, y);
+ x256 = _mm256_mask_srl_epi64 (x256, m, x256, y);
+ x256 = _mm256_maskz_srl_epi64 (m, x256, y);
+ x128 = _mm_srl_epi64 (x128, y);
+ x128 = _mm_mask_srl_epi64 (x128, m, x128, y);
+ x128 = _mm_maskz_srl_epi64 (m, x128, y);
+}
diff --git a/gcc/testsuite/gcc.target/i386/avx512vl-vpsrlq-2.c b/gcc/testsuite/gcc.target/i386/avx512vl-vpsrlq-2.c
new file mode 100644
index 00000000000..db6a3d18012
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/avx512vl-vpsrlq-2.c
@@ -0,0 +1,14 @@
+/* { dg-do run } */
+/* { dg-options "-O2 -mavx512vl -DAVX512VL" } */
+/* { dg-require-effective-target avx512vl } */
+
+#define AVX512F_LEN 256
+#define AVX512F_LEN_HALF 128
+#include "avx512f-vpsrlq-2.c"
+
+#undef AVX512F_LEN
+#undef AVX512F_LEN_HALF
+
+#define AVX512F_LEN 128
+#define AVX512F_LEN_HALF 128
+#include "avx512f-vpsrlq-2.c"
diff --git a/gcc/testsuite/gcc.target/i386/avx512vl-vpsrlqi-1.c b/gcc/testsuite/gcc.target/i386/avx512vl-vpsrlqi-1.c
new file mode 100644
index 00000000000..5479e4be49e
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/avx512vl-vpsrlqi-1.c
@@ -0,0 +1,26 @@
+/* { dg-do compile } */
+/* { dg-options "-O2 -mavx512vl" } */
+/* { dg-final { scan-assembler-times "vpsrlq\[ \\t\]+\[^\n\]*, %ymm\[0-9\]\[^\{\]" 3 } } */
+/* { dg-final { scan-assembler-times "vpsrlq\[ \\t\]+\[^\n\]*, %ymm\[0-9\]\{%k\[1-7\]\}\[^\{\]" 1 } } */
+/* { dg-final { scan-assembler-times "vpsrlq\[ \\t\]+\[^\n\]*, %ymm\[0-9\]\{%k\[1-7\]\}\{z\}" 1 } } */
+/* { dg-final { scan-assembler-times "vpsrlq\[ \\t\]+\[^\n\]*, %xmm\[0-9\]\[^\{\]" 3 } } */
+/* { dg-final { scan-assembler-times "vpsrlq\[ \\t\]+\[^\n\]*, %xmm\[0-9\]\{%k\[1-7\]\}\[^\{\]" 1 } } */
+/* { dg-final { scan-assembler-times "vpsrlq\[ \\t\]+\[^\n\]*, %xmm\[0-9\]\{%k\[1-7\]\}\{z\}" 1 } } */
+
+#include <immintrin.h>
+
+volatile __m256i x256;
+volatile __m128i x128;
+#define y 7
+volatile __mmask8 m;
+
+void extern
+avx512vl_test (void)
+{
+ x256 = _mm256_srli_epi64 (x256, y);
+ x256 = _mm256_mask_srli_epi64 (x256, m, x256, y);
+ x256 = _mm256_maskz_srli_epi64 (m, x256, y);
+ x128 = _mm_srli_epi64 (x128, y);
+ x128 = _mm_mask_srli_epi64 (x128, m, x128, y);
+ x128 = _mm_maskz_srli_epi64 (m, x128, y);
+}
diff --git a/gcc/testsuite/gcc.target/i386/avx512vl-vpsrlqi-2.c b/gcc/testsuite/gcc.target/i386/avx512vl-vpsrlqi-2.c
new file mode 100644
index 00000000000..25a1f243f53
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/avx512vl-vpsrlqi-2.c
@@ -0,0 +1,14 @@
+/* { dg-do run } */
+/* { dg-options "-O2 -mavx512vl -DAVX512VL" } */
+/* { dg-require-effective-target avx512vl } */
+
+#define AVX512F_LEN 256
+#define AVX512F_LEN_HALF 128
+#include "avx512f-vpsrlqi-2.c"
+
+#undef AVX512F_LEN
+#undef AVX512F_LEN_HALF
+
+#define AVX512F_LEN 128
+#define AVX512F_LEN_HALF 128
+#include "avx512f-vpsrlqi-2.c"
diff --git a/gcc/testsuite/gcc.target/i386/avx512vl-vpsrlvd-1.c b/gcc/testsuite/gcc.target/i386/avx512vl-vpsrlvd-1.c
new file mode 100644
index 00000000000..828f911b2ea
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/avx512vl-vpsrlvd-1.c
@@ -0,0 +1,23 @@
+/* { dg-do compile } */
+/* { dg-options "-O2 -mavx512vl" } */
+/* { dg-final { scan-assembler-times "vpsrlvd\[ \\t\]+\[^\n\]*%ymm\[0-9\]\[^\{\]" 2 } } */
+/* { dg-final { scan-assembler-times "vpsrlvd\[ \\t\]+\[^\n\]*%ymm\[0-9\]\{%k\[1-7\]\}\[^\{\]" 1 } } */
+/* { dg-final { scan-assembler-times "vpsrlvd\[ \\t\]+\[^\n\]*%ymm\[0-9\]\{%k\[1-7\]\}\{z\}" 1 } } */
+/* { dg-final { scan-assembler-times "vpsrlvd\[ \\t\]+\[^\n\]*%xmm\[0-9\]\[^\{\]" 2 } } */
+/* { dg-final { scan-assembler-times "vpsrlvd\[ \\t\]+\[^\n\]*%xmm\[0-9\]\{%k\[1-7\]\}\[^\{\]" 1 } } */
+/* { dg-final { scan-assembler-times "vpsrlvd\[ \\t\]+\[^\n\]*%xmm\[0-9\]\{%k\[1-7\]\}\{z\}" 1 } } */
+
+#include <immintrin.h>
+
+volatile __m256i x256, y256;
+volatile __m128i x128, y128;
+volatile __mmask16 m;
+
+void extern
+avx512vl_test (void)
+{
+ x256 = _mm256_mask_srlv_epi32 (x256, m, x256, y256);
+ x256 = _mm256_maskz_srlv_epi32 (m, x256, y256);
+ x128 = _mm_mask_srlv_epi32 (x128, m, x128, y128);
+ x128 = _mm_maskz_srlv_epi32 (m, x128, y128);
+}
diff --git a/gcc/testsuite/gcc.target/i386/avx512vl-vpsrlvd-2.c b/gcc/testsuite/gcc.target/i386/avx512vl-vpsrlvd-2.c
new file mode 100644
index 00000000000..dfbf3647548
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/avx512vl-vpsrlvd-2.c
@@ -0,0 +1,14 @@
+/* { dg-do run } */
+/* { dg-options "-O2 -mavx512vl -DAVX512VL" } */
+/* { dg-require-effective-target avx512vl } */
+
+#define AVX512F_LEN 256
+#define AVX512F_LEN_HALF 128
+#include "avx512f-vpsrlvd-2.c"
+
+#undef AVX512F_LEN
+#undef AVX512F_LEN_HALF
+
+#define AVX512F_LEN 128
+#define AVX512F_LEN_HALF 128
+#include "avx512f-vpsrlvd-2.c"
diff --git a/gcc/testsuite/gcc.target/i386/avx512vl-vpsrlvq-1.c b/gcc/testsuite/gcc.target/i386/avx512vl-vpsrlvq-1.c
new file mode 100644
index 00000000000..e68f0db33c8
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/avx512vl-vpsrlvq-1.c
@@ -0,0 +1,23 @@
+/* { dg-do compile } */
+/* { dg-options "-O2 -mavx512vl" } */
+/* { dg-final { scan-assembler-times "vpsrlvq\[ \\t\]+\[^\n\]*%ymm\[0-9\]\[^\{\]" 2 } } */
+/* { dg-final { scan-assembler-times "vpsrlvq\[ \\t\]+\[^\n\]*%ymm\[0-9\]\{%k\[1-7\]\}\[^\{\]" 1 } } */
+/* { dg-final { scan-assembler-times "vpsrlvq\[ \\t\]+\[^\n\]*%ymm\[0-9\]\{%k\[1-7\]\}\{z\}" 1 } } */
+/* { dg-final { scan-assembler-times "vpsrlvq\[ \\t\]+\[^\n\]*%xmm\[0-9\]\[^\{\]" 2 } } */
+/* { dg-final { scan-assembler-times "vpsrlvq\[ \\t\]+\[^\n\]*%xmm\[0-9\]\{%k\[1-7\]\}\[^\{\]" 1 } } */
+/* { dg-final { scan-assembler-times "vpsrlvq\[ \\t\]+\[^\n\]*%xmm\[0-9\]\{%k\[1-7\]\}\{z\}" 1 } } */
+
+#include <immintrin.h>
+
+volatile __m256i x256, y256;
+volatile __m128i x128, y128;
+volatile __mmask8 m;
+
+void extern
+avx512vl_test (void)
+{
+ x256 = _mm256_mask_srlv_epi64 (x256, m, x256, y256);
+ x256 = _mm256_maskz_srlv_epi64 (m, x256, y256);
+ x128 = _mm_mask_srlv_epi64 (x128, m, x128, y128);
+ x128 = _mm_maskz_srlv_epi64 (m, x128, y128);
+}
diff --git a/gcc/testsuite/gcc.target/i386/avx512vl-vpsrlvq-2.c b/gcc/testsuite/gcc.target/i386/avx512vl-vpsrlvq-2.c
new file mode 100644
index 00000000000..e9640a7346b
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/avx512vl-vpsrlvq-2.c
@@ -0,0 +1,14 @@
+/* { dg-do run } */
+/* { dg-options "-O2 -mavx512vl -DAVX512VL" } */
+/* { dg-require-effective-target avx512vl } */
+
+#define AVX512F_LEN 256
+#define AVX512F_LEN_HALF 128
+#include "avx512f-vpsrlvq-2.c"
+
+#undef AVX512F_LEN
+#undef AVX512F_LEN_HALF
+
+#define AVX512F_LEN 128
+#define AVX512F_LEN_HALF 128
+#include "avx512f-vpsrlvq-2.c"
diff --git a/gcc/testsuite/gcc.target/i386/avx512vl-vpsrlvw-2.c b/gcc/testsuite/gcc.target/i386/avx512vl-vpsrlvw-2.c
new file mode 100644
index 00000000000..8209fdde685
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/avx512vl-vpsrlvw-2.c
@@ -0,0 +1,14 @@
+/* { dg-do run } */
+/* { dg-options "-O2 -mavx512bw -mavx512vl -DAVX512VL" } */
+/* { dg-require-effective-target avx512vl } */
+
+#define AVX512F_LEN 256
+#define AVX512F_LEN_HALF 128
+#include "avx512bw-vpsrlvw-2.c"
+
+#undef AVX512F_LEN
+#undef AVX512F_LEN_HALF
+
+#define AVX512F_LEN 128
+#define AVX512F_LEN_HALF 128
+#include "avx512bw-vpsrlvw-2.c"
diff --git a/gcc/testsuite/gcc.target/i386/avx512vl-vpsrlw-2.c b/gcc/testsuite/gcc.target/i386/avx512vl-vpsrlw-2.c
new file mode 100644
index 00000000000..d8a4894ee07
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/avx512vl-vpsrlw-2.c
@@ -0,0 +1,14 @@
+/* { dg-do run } */
+/* { dg-options "-O2 -mavx512bw -mavx512vl -DAVX512VL" } */
+/* { dg-require-effective-target avx512vl } */
+
+#define AVX512F_LEN 256
+#define AVX512F_LEN_HALF 128
+#include "avx512bw-vpsrlw-2.c"
+
+#undef AVX512F_LEN
+#undef AVX512F_LEN_HALF
+
+#define AVX512F_LEN 128
+#define AVX512F_LEN_HALF 128
+#include "avx512bw-vpsrlw-2.c"
diff --git a/gcc/testsuite/gcc.target/i386/avx512vl-vpsrlwi-2.c b/gcc/testsuite/gcc.target/i386/avx512vl-vpsrlwi-2.c
new file mode 100644
index 00000000000..4d973e83716
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/avx512vl-vpsrlwi-2.c
@@ -0,0 +1,14 @@
+/* { dg-do run } */
+/* { dg-options "-O2 -mavx512bw -mavx512vl -DAVX512VL" } */
+/* { dg-require-effective-target avx512vl } */
+
+#define AVX512F_LEN 256
+#define AVX512F_LEN_HALF 128
+#include "avx512bw-vpsrlwi-2.c"
+
+#undef AVX512F_LEN
+#undef AVX512F_LEN_HALF
+
+#define AVX512F_LEN 128
+#define AVX512F_LEN_HALF 128
+#include "avx512bw-vpsrlwi-2.c"
diff --git a/gcc/testsuite/gcc.target/i386/avx512vl-vpsubb-2.c b/gcc/testsuite/gcc.target/i386/avx512vl-vpsubb-2.c
new file mode 100644
index 00000000000..8c1e8dca13b
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/avx512vl-vpsubb-2.c
@@ -0,0 +1,14 @@
+/* { dg-do run } */
+/* { dg-options "-O2 -mavx512bw -mavx512vl -DAVX512VL" } */
+/* { dg-require-effective-target avx512vl } */
+
+#define AVX512F_LEN 256
+#define AVX512F_LEN_HALF 128
+#include "avx512bw-vpsubb-2.c"
+
+#undef AVX512F_LEN
+#undef AVX512F_LEN_HALF
+
+#define AVX512F_LEN 128
+#define AVX512F_LEN_HALF 128
+#include "avx512bw-vpsubb-2.c"
diff --git a/gcc/testsuite/gcc.target/i386/avx512vl-vpsubd-1.c b/gcc/testsuite/gcc.target/i386/avx512vl-vpsubd-1.c
new file mode 100644
index 00000000000..14cbf7e69d7
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/avx512vl-vpsubd-1.c
@@ -0,0 +1,23 @@
+/* { dg-do compile } */
+/* { dg-options "-O2 -mavx512vl" } */
+/* { dg-final { scan-assembler-times "vpsubd\[ \\t\]+\[^\n\]*%ymm\[0-9\]\[^\{\]" 2 } } */
+/* { dg-final { scan-assembler-times "vpsubd\[ \\t\]+\[^\n\]*%ymm\[0-9\]\{%k\[1-7\]\}\[^\{\]" 1 } } */
+/* { dg-final { scan-assembler-times "vpsubd\[ \\t\]+\[^\n\]*%ymm\[0-9\]\{%k\[1-7\]\}\{z\}" 1 } } */
+/* { dg-final { scan-assembler-times "vpsubd\[ \\t\]+\[^\n\]*%xmm\[0-9\]\[^\{\]" 2 } } */
+/* { dg-final { scan-assembler-times "vpsubd\[ \\t\]+\[^\n\]*%xmm\[0-9\]\{%k\[1-7\]\}\[^\{\]" 1 } } */
+/* { dg-final { scan-assembler-times "vpsubd\[ \\t\]+\[^\n\]*%xmm\[0-9\]\{%k\[1-7\]\}\{z\}" 1 } } */
+
+#include <immintrin.h>
+
+volatile __m256i x256;
+volatile __m128i x128;
+volatile __mmask8 m8;
+
+void extern
+avx512vl_test (void)
+{
+ x256 = _mm256_mask_sub_epi32 (x256, m8, x256, x256);
+ x256 = _mm256_maskz_sub_epi32 (m8, x256, x256);
+ x128 = _mm_mask_sub_epi32 (x128, m8, x128, x128);
+ x128 = _mm_maskz_sub_epi32 (m8, x128, x128);
+}
diff --git a/gcc/testsuite/gcc.target/i386/avx512vl-vpsubd-2.c b/gcc/testsuite/gcc.target/i386/avx512vl-vpsubd-2.c
new file mode 100644
index 00000000000..f16988f1ae6
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/avx512vl-vpsubd-2.c
@@ -0,0 +1,14 @@
+/* { dg-do run } */
+/* { dg-options "-O2 -mavx512vl -DAVX512VL" } */
+/* { dg-require-effective-target avx512vl } */
+
+#define AVX512F_LEN 256
+#define AVX512F_LEN_HALF 128
+#include "avx512f-vpsubd-2.c"
+
+#undef AVX512F_LEN
+#undef AVX512F_LEN_HALF
+
+#define AVX512F_LEN 128
+#define AVX512F_LEN_HALF 128
+#include "avx512f-vpsubd-2.c"
diff --git a/gcc/testsuite/gcc.target/i386/avx512vl-vpsubq-1.c b/gcc/testsuite/gcc.target/i386/avx512vl-vpsubq-1.c
new file mode 100644
index 00000000000..b11ce0a4168
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/avx512vl-vpsubq-1.c
@@ -0,0 +1,23 @@
+/* { dg-do compile } */
+/* { dg-options "-O2 -mavx512vl" } */
+/* { dg-final { scan-assembler-times "vpsubq\[ \\t\]+\[^\n\]*%ymm\[0-9\]\[^\{\]" 2 } } */
+/* { dg-final { scan-assembler-times "vpsubq\[ \\t\]+\[^\n\]*%ymm\[0-9\]\{%k\[1-7\]\}\[^\{\]" 1 } } */
+/* { dg-final { scan-assembler-times "vpsubq\[ \\t\]+\[^\n\]*%ymm\[0-9\]\{%k\[1-7\]\}\{z\}" 1 } } */
+/* { dg-final { scan-assembler-times "vpsubq\[ \\t\]+\[^\n\]*%xmm\[0-9\]\[^\{\]" 2 } } */
+/* { dg-final { scan-assembler-times "vpsubq\[ \\t\]+\[^\n\]*%xmm\[0-9\]\{%k\[1-7\]\}\[^\{\]" 1 } } */
+/* { dg-final { scan-assembler-times "vpsubq\[ \\t\]+\[^\n\]*%xmm\[0-9\]\{%k\[1-7\]\}\{z\}" 1 } } */
+
+#include <immintrin.h>
+
+volatile __m256i x256;
+volatile __m128i x128;
+volatile __mmask8 m8;
+
+void extern
+avx512vl_test (void)
+{
+ x256 = _mm256_mask_sub_epi64 (x256, m8, x256, x256);
+ x256 = _mm256_maskz_sub_epi64 (m8, x256, x256);
+ x128 = _mm_mask_sub_epi64 (x128, m8, x128, x128);
+ x128 = _mm_maskz_sub_epi64 (m8, x128, x128);
+}
diff --git a/gcc/testsuite/gcc.target/i386/avx512vl-vpsubq-2.c b/gcc/testsuite/gcc.target/i386/avx512vl-vpsubq-2.c
new file mode 100644
index 00000000000..37e4d82a217
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/avx512vl-vpsubq-2.c
@@ -0,0 +1,14 @@
+/* { dg-do run } */
+/* { dg-options "-O2 -mavx512vl -DAVX512VL" } */
+/* { dg-require-effective-target avx512vl } */
+
+#define AVX512F_LEN 256
+#define AVX512F_LEN_HALF 128
+#include "avx512f-vpsubq-2.c"
+
+#undef AVX512F_LEN
+#undef AVX512F_LEN_HALF
+
+#define AVX512F_LEN 128
+#define AVX512F_LEN_HALF 128
+#include "avx512f-vpsubq-2.c"
diff --git a/gcc/testsuite/gcc.target/i386/avx512vl-vpsubsb-2.c b/gcc/testsuite/gcc.target/i386/avx512vl-vpsubsb-2.c
new file mode 100644
index 00000000000..e426dbff940
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/avx512vl-vpsubsb-2.c
@@ -0,0 +1,14 @@
+/* { dg-do run } */
+/* { dg-options "-O2 -mavx512bw -mavx512vl -DAVX512VL" } */
+/* { dg-require-effective-target avx512vl } */
+
+#define AVX512F_LEN 256
+#define AVX512F_LEN_HALF 128
+#include "avx512bw-vpsubsb-2.c"
+
+#undef AVX512F_LEN
+#undef AVX512F_LEN_HALF
+
+#define AVX512F_LEN 128
+#define AVX512F_LEN_HALF 128
+#include "avx512bw-vpsubsb-2.c"
diff --git a/gcc/testsuite/gcc.target/i386/avx512vl-vpsubsw-2.c b/gcc/testsuite/gcc.target/i386/avx512vl-vpsubsw-2.c
new file mode 100644
index 00000000000..6f573124ce5
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/avx512vl-vpsubsw-2.c
@@ -0,0 +1,14 @@
+/* { dg-do run } */
+/* { dg-options "-O2 -mavx512bw -mavx512vl -DAVX512VL" } */
+/* { dg-require-effective-target avx512vl } */
+
+#define AVX512F_LEN 256
+#define AVX512F_LEN_HALF 128
+#include "avx512bw-vpsubsw-2.c"
+
+#undef AVX512F_LEN
+#undef AVX512F_LEN_HALF
+
+#define AVX512F_LEN 128
+#define AVX512F_LEN_HALF 128
+#include "avx512bw-vpsubsw-2.c"
diff --git a/gcc/testsuite/gcc.target/i386/avx512vl-vpsubusb-2.c b/gcc/testsuite/gcc.target/i386/avx512vl-vpsubusb-2.c
new file mode 100644
index 00000000000..f92d757b368
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/avx512vl-vpsubusb-2.c
@@ -0,0 +1,14 @@
+/* { dg-do run } */
+/* { dg-options "-O2 -mavx512bw -mavx512vl -DAVX512VL" } */
+/* { dg-require-effective-target avx512vl } */
+
+#define AVX512F_LEN 256
+#define AVX512F_LEN_HALF 128
+#include "avx512bw-vpsubusb-2.c"
+
+#undef AVX512F_LEN
+#undef AVX512F_LEN_HALF
+
+#define AVX512F_LEN 128
+#define AVX512F_LEN_HALF 128
+#include "avx512bw-vpsubusb-2.c"
diff --git a/gcc/testsuite/gcc.target/i386/avx512vl-vpsubusw-2.c b/gcc/testsuite/gcc.target/i386/avx512vl-vpsubusw-2.c
new file mode 100644
index 00000000000..4553ea92a35
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/avx512vl-vpsubusw-2.c
@@ -0,0 +1,14 @@
+/* { dg-do run } */
+/* { dg-options "-O2 -mavx512bw -mavx512vl -DAVX512VL" } */
+/* { dg-require-effective-target avx512vl } */
+
+#define AVX512F_LEN 256
+#define AVX512F_LEN_HALF 128
+#include "avx512bw-vpsubusw-2.c"
+
+#undef AVX512F_LEN
+#undef AVX512F_LEN_HALF
+
+#define AVX512F_LEN 128
+#define AVX512F_LEN_HALF 128
+#include "avx512bw-vpsubusw-2.c"
diff --git a/gcc/testsuite/gcc.target/i386/avx512vl-vpsubw-2.c b/gcc/testsuite/gcc.target/i386/avx512vl-vpsubw-2.c
new file mode 100644
index 00000000000..b229c087afa
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/avx512vl-vpsubw-2.c
@@ -0,0 +1,14 @@
+/* { dg-do run } */
+/* { dg-options "-O2 -mavx512bw -mavx512vl -DAVX512VL" } */
+/* { dg-require-effective-target avx512vl } */
+
+#define AVX512F_LEN 256
+#define AVX512F_LEN_HALF 128
+#include "avx512bw-vpsubw-2.c"
+
+#undef AVX512F_LEN
+#undef AVX512F_LEN_HALF
+
+#define AVX512F_LEN 128
+#define AVX512F_LEN_HALF 128
+#include "avx512bw-vpsubw-2.c"
diff --git a/gcc/testsuite/gcc.target/i386/avx512vl-vpternlogd-1.c b/gcc/testsuite/gcc.target/i386/avx512vl-vpternlogd-1.c
new file mode 100644
index 00000000000..c280c359db7
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/avx512vl-vpternlogd-1.c
@@ -0,0 +1,26 @@
+/* { dg-do compile } */
+/* { dg-options "-mavx512vl -O2" } */
+/* { dg-final { scan-assembler-times "vpternlogd\[ \\t\]+\[^\n\]*%ymm\[0-9\]\[^\{\]" 3 } } */
+/* { dg-final { scan-assembler-times "vpternlogd\[ \\t\]+\[^\n\]*%xmm\[0-9\]\[^\{\]" 3 } } */
+/* { dg-final { scan-assembler-times "vpternlogd\[ \\t\]+\[^\n\]*%ymm\[0-9\]\{%k\[1-7\]\}\[^\{\]" 1 } } */
+/* { dg-final { scan-assembler-times "vpternlogd\[ \\t\]+\[^\n\]*%xmm\[0-9\]\{%k\[1-7\]\}\[^\{\]" 1 } } */
+/* { dg-final { scan-assembler-times "vpternlogd\[ \\t\]+\[^\n\]*%ymm\[0-9\]\{%k\[1-7\]\}\{z\}" 1 } } */
+/* { dg-final { scan-assembler-times "vpternlogd\[ \\t\]+\[^\n\]*%xmm\[0-9\]\{%k\[1-7\]\}\{z\}" 1 } } */
+
+#include <immintrin.h>
+
+volatile __m256i y, y2, y3;
+volatile __m128i x, x2, x3;
+volatile __mmask8 m;
+
+void extern
+avx512vl_test (void)
+{
+ y = _mm256_ternarylogic_epi32 (y, y2, y3, 0xF0);
+ y = _mm256_mask_ternarylogic_epi32 (y, m, y2, y3, 0xF0);
+ y = _mm256_maskz_ternarylogic_epi32 (m, y, y2, y3, 0xF0);
+
+ x = _mm_ternarylogic_epi32 (x, x2, x3, 0xF0);
+ x = _mm_mask_ternarylogic_epi32 (x, m, x2, x3, 0xF0);
+ x = _mm_maskz_ternarylogic_epi32 (m, x, x2, x3, 0xF0);
+}
diff --git a/gcc/testsuite/gcc.target/i386/avx512vl-vpternlogd-2.c b/gcc/testsuite/gcc.target/i386/avx512vl-vpternlogd-2.c
new file mode 100644
index 00000000000..0336a153e2c
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/avx512vl-vpternlogd-2.c
@@ -0,0 +1,14 @@
+/* { dg-do run } */
+/* { dg-options "-O2 -mavx512vl -DAVX512VL" } */
+/* { dg-require-effective-target avx512vl } */
+
+#define AVX512F_LEN 256
+#define AVX512F_LEN_HALF 128
+#include "avx512f-vpternlogd-2.c"
+
+#undef AVX512F_LEN
+#undef AVX512F_LEN_HALF
+
+#define AVX512F_LEN 128
+#define AVX512F_LEN_HALF 128
+#include "avx512f-vpternlogd-2.c"
diff --git a/gcc/testsuite/gcc.target/i386/avx512vl-vpternlogq-1.c b/gcc/testsuite/gcc.target/i386/avx512vl-vpternlogq-1.c
new file mode 100644
index 00000000000..ae06ee9df65
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/avx512vl-vpternlogq-1.c
@@ -0,0 +1,26 @@
+/* { dg-do compile } */
+/* { dg-options "-mavx512vl -O2" } */
+/* { dg-final { scan-assembler-times "vpternlogq\[ \\t\]+\[^\n\]*%ymm\[0-9\]\[^\{\]" 3 } } */
+/* { dg-final { scan-assembler-times "vpternlogq\[ \\t\]+\[^\n\]*%xmm\[0-9\]\[^\{\]" 3 } } */
+/* { dg-final { scan-assembler-times "vpternlogq\[ \\t\]+\[^\n\]*%ymm\[0-9\]\{%k\[1-7\]\}\[^\{\]" 1 } } */
+/* { dg-final { scan-assembler-times "vpternlogq\[ \\t\]+\[^\n\]*%xmm\[0-9\]\{%k\[1-7\]\}\[^\{\]" 1 } } */
+/* { dg-final { scan-assembler-times "vpternlogq\[ \\t\]+\[^\n\]*%ymm\[0-9\]\{%k\[1-7\]\}\{z\}" 1 } } */
+/* { dg-final { scan-assembler-times "vpternlogq\[ \\t\]+\[^\n\]*%xmm\[0-9\]\{%k\[1-7\]\}\{z\}" 1 } } */
+
+#include <immintrin.h>
+
+volatile __m256i y, y2, y3;
+volatile __m128i x, x2, x3;
+volatile __mmask8 m;
+
+void extern
+avx512vl_test (void)
+{
+ y = _mm256_ternarylogic_epi64 (y, y2, y3, 0xF0);
+ y = _mm256_mask_ternarylogic_epi64 (y, m, y2, y3, 0xF0);
+ y = _mm256_maskz_ternarylogic_epi64 (m, y, y2, y3, 0xF0);
+
+ x = _mm_ternarylogic_epi64 (x, x2, x3, 0xF0);
+ x = _mm_mask_ternarylogic_epi64 (x, m, x2, x3, 0xF0);
+ x = _mm_maskz_ternarylogic_epi64 (m, x, x2, x3, 0xF0);
+}
diff --git a/gcc/testsuite/gcc.target/i386/avx512vl-vpternlogq-2.c b/gcc/testsuite/gcc.target/i386/avx512vl-vpternlogq-2.c
new file mode 100644
index 00000000000..9187e0adea3
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/avx512vl-vpternlogq-2.c
@@ -0,0 +1,14 @@
+/* { dg-do run } */
+/* { dg-options "-O2 -mavx512vl -DAVX512VL" } */
+/* { dg-require-effective-target avx512vl } */
+
+#define AVX512F_LEN 256
+#define AVX512F_LEN_HALF 128
+#include "avx512f-vpternlogq-2.c"
+
+#undef AVX512F_LEN
+#undef AVX512F_LEN_HALF
+
+#define AVX512F_LEN 128
+#define AVX512F_LEN_HALF 128
+#include "avx512f-vpternlogq-2.c"
diff --git a/gcc/testsuite/gcc.target/i386/avx512vl-vptestmb-2.c b/gcc/testsuite/gcc.target/i386/avx512vl-vptestmb-2.c
new file mode 100644
index 00000000000..595b9c91594
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/avx512vl-vptestmb-2.c
@@ -0,0 +1,14 @@
+/* { dg-do run } */
+/* { dg-options "-O2 -mavx512bw -mavx512vl -DAVX512VL" } */
+/* { dg-require-effective-target avx512vl } */
+
+#define AVX512F_LEN 256
+#define AVX512F_LEN_HALF 128
+#include "avx512bw-vptestmb-2.c"
+
+#undef AVX512F_LEN
+#undef AVX512F_LEN_HALF
+
+#define AVX512F_LEN 128
+#define AVX512F_LEN_HALF 128
+#include "avx512bw-vptestmb-2.c"
diff --git a/gcc/testsuite/gcc.target/i386/avx512vl-vptestmd-1.c b/gcc/testsuite/gcc.target/i386/avx512vl-vptestmd-1.c
new file mode 100644
index 00000000000..f9ea72b1ccd
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/avx512vl-vptestmd-1.c
@@ -0,0 +1,21 @@
+/* { dg-do compile } */
+/* { dg-options "-mavx512vl -O2" } */
+/* { dg-final { scan-assembler "vptestmd\[ \\t\]+\[^\n\]*%xmm\[0-7\]\[^\n\]*k\[1-7\]\[^\{\]" } } */
+/* { dg-final { scan-assembler "vptestmd\[ \\t\]+\[^\n\]*%xmm\[0-7\]\[^\n\]*k\[1-7\]\[^\{\]" } } */
+/* { dg-final { scan-assembler "vptestmd\[ \\t\]+\[^\n\]*%ymm\[0-7\]\[^\n\]*k\[1-7\]\{%*k\[1-7\]\}" } } */
+/* { dg-final { scan-assembler "vptestmd\[ \\t\]+\[^\n\]*%ymm\[0-7\]\[^\n\]*k\[1-7\]\{%*k\[1-7\]\}" } } */
+
+#include <immintrin.h>
+
+volatile __m128i x128;
+volatile __m256i x256;
+volatile __mmask8 m;
+
+void extern
+avx512vl_test (void)
+{
+ m = _mm_test_epi32_mask (x128, x128);
+ m = _mm256_test_epi32_mask (x256, x256);
+ m = _mm_mask_test_epi32_mask (3, x128, x128);
+ m = _mm256_mask_test_epi32_mask (3, x256, x256);
+}
diff --git a/gcc/testsuite/gcc.target/i386/avx512vl-vptestmd-2.c b/gcc/testsuite/gcc.target/i386/avx512vl-vptestmd-2.c
new file mode 100644
index 00000000000..d0fed9722b3
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/avx512vl-vptestmd-2.c
@@ -0,0 +1,14 @@
+/* { dg-do run } */
+/* { dg-options "-O2 -mavx512vl -DAVX512VL" } */
+/* { dg-require-effective-target avx512vl } */
+
+#define AVX512F_LEN 256
+#define AVX512F_LEN_HALF 128
+#include "avx512f-vptestmd-2.c"
+
+#undef AVX512F_LEN
+#undef AVX512F_LEN_HALF
+
+#define AVX512F_LEN 128
+#define AVX512F_LEN_HALF 128
+#include "avx512f-vptestmd-2.c"
diff --git a/gcc/testsuite/gcc.target/i386/avx512vl-vptestmq-1.c b/gcc/testsuite/gcc.target/i386/avx512vl-vptestmq-1.c
new file mode 100644
index 00000000000..d463e84ccba
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/avx512vl-vptestmq-1.c
@@ -0,0 +1,21 @@
+/* { dg-do compile } */
+/* { dg-options "-mavx512vl -O2" } */
+/* { dg-final { scan-assembler "vptestmq\[ \\t\]+\[^\n\]*%xmm\[0-7\]\[^\n\]*k\[1-7\]\[^\{\]" } } */
+/* { dg-final { scan-assembler "vptestmq\[ \\t\]+\[^\n\]*%ymm\[0-7\]\[^\n\]*k\[1-7\]\[^\{\]" } } */
+/* { dg-final { scan-assembler "vptestmq\[ \\t\]+\[^\n\]*%xmm\[0-7\]\[^\n\]*k\[1-7\]\{%k\[1-7\]\}" } } */
+/* { dg-final { scan-assembler "vptestmq\[ \\t\]+\[^\n\]*%ymm\[0-7\]\[^\n\]*k\[1-7\]\{%k\[1-7\]\}" } } */
+
+#include <immintrin.h>
+
+volatile __m128i x128;
+volatile __m256i x256;
+volatile __mmask8 m;
+
+void extern
+avx512vl_test (void)
+{
+ m = _mm_test_epi64_mask (x128, x128);
+ m = _mm256_test_epi64_mask (x256, x256);
+ m = _mm_mask_test_epi64_mask (3, x128, x128);
+ m = _mm256_mask_test_epi64_mask (3, x256, x256);
+}
diff --git a/gcc/testsuite/gcc.target/i386/avx512vl-vptestmq-2.c b/gcc/testsuite/gcc.target/i386/avx512vl-vptestmq-2.c
new file mode 100644
index 00000000000..0cc41241566
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/avx512vl-vptestmq-2.c
@@ -0,0 +1,14 @@
+/* { dg-do run } */
+/* { dg-options "-O2 -mavx512vl -DAVX512VL" } */
+/* { dg-require-effective-target avx512vl } */
+
+#define AVX512F_LEN 256
+#define AVX512F_LEN_HALF 128
+#include "avx512f-vptestmq-2.c"
+
+#undef AVX512F_LEN
+#undef AVX512F_LEN_HALF
+
+#define AVX512F_LEN 128
+#define AVX512F_LEN_HALF 128
+#include "avx512f-vptestmq-2.c"
diff --git a/gcc/testsuite/gcc.target/i386/avx512vl-vptestmw-2.c b/gcc/testsuite/gcc.target/i386/avx512vl-vptestmw-2.c
new file mode 100644
index 00000000000..d1598c0b699
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/avx512vl-vptestmw-2.c
@@ -0,0 +1,14 @@
+/* { dg-do run } */
+/* { dg-options "-O2 -mavx512bw -mavx512vl -DAVX512VL" } */
+/* { dg-require-effective-target avx512vl } */
+
+#define AVX512F_LEN 256
+#define AVX512F_LEN_HALF 128
+#include "avx512bw-vptestmw-2.c"
+
+#undef AVX512F_LEN
+#undef AVX512F_LEN_HALF
+
+#define AVX512F_LEN 128
+#define AVX512F_LEN_HALF 128
+#include "avx512bw-vptestmw-2.c"
diff --git a/gcc/testsuite/gcc.target/i386/avx512vl-vptestnmb-2.c b/gcc/testsuite/gcc.target/i386/avx512vl-vptestnmb-2.c
new file mode 100644
index 00000000000..2df27f29908
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/avx512vl-vptestnmb-2.c
@@ -0,0 +1,14 @@
+/* { dg-do run } */
+/* { dg-options "-O2 -mavx512bw -mavx512vl -DAVX512VL" } */
+/* { dg-require-effective-target avx512vl } */
+
+#define AVX512F_LEN 256
+#define AVX512F_LEN_HALF 128
+#include "avx512bw-vptestnmb-2.c"
+
+#undef AVX512F_LEN
+#undef AVX512F_LEN_HALF
+
+#define AVX512F_LEN 128
+#define AVX512F_LEN_HALF 128
+#include "avx512bw-vptestnmb-2.c"
diff --git a/gcc/testsuite/gcc.target/i386/avx512vl-vptestnmd-1.c b/gcc/testsuite/gcc.target/i386/avx512vl-vptestnmd-1.c
new file mode 100644
index 00000000000..37086c44058
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/avx512vl-vptestnmd-1.c
@@ -0,0 +1,21 @@
+/* { dg-do compile } */
+/* { dg-options "-mavx512vl -O2" } */
+/* { dg-final { scan-assembler "vptestnmd\[ \\t\]+\[^\n\]*%xmm\[0-7\]\[^\n\]*k\[1-7\]\[^\{\]" } } */
+/* { dg-final { scan-assembler "vptestnmd\[ \\t\]+\[^\n\]*%ymm\[0-7\]\[^\n\]*k\[1-7\]\[^\{\]" } } */
+/* { dg-final { scan-assembler "vptestnmd\[ \\t\]+\[^\n\]*%xmm\[0-7\]\[^\n\]*k\[1-7\]\{%k\[1-7\]\}" } } */
+/* { dg-final { scan-assembler "vptestnmd\[ \\t\]+\[^\n\]*%ymm\[0-7\]\[^\n\]*k\[1-7\]\{%k\[1-7\]\}" } } */
+
+#include <immintrin.h>
+
+volatile __m128i x128;
+volatile __m256i x256;
+volatile __mmask8 m;
+
+void extern
+avx512vl_test (void)
+{
+ m = _mm_testn_epi32_mask (x128, x128);
+ m = _mm256_testn_epi32_mask (x256, x256);
+ m = _mm_mask_testn_epi32_mask (3, x128, x128);
+ m = _mm256_mask_testn_epi32_mask (3, x256, x256);
+}
diff --git a/gcc/testsuite/gcc.target/i386/avx512vl-vptestnmd-2.c b/gcc/testsuite/gcc.target/i386/avx512vl-vptestnmd-2.c
new file mode 100644
index 00000000000..307e618036a
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/avx512vl-vptestnmd-2.c
@@ -0,0 +1,14 @@
+/* { dg-do run } */
+/* { dg-options "-O2 -mavx512vl -DAVX512VL" } */
+/* { dg-require-effective-target avx512vl } */
+
+#define AVX512F_LEN 256
+#define AVX512F_LEN_HALF 128
+#include "avx512f-vptestnmd-2.c"
+
+#undef AVX512F_LEN
+#undef AVX512F_LEN_HALF
+
+#define AVX512F_LEN 128
+#define AVX512F_LEN_HALF 128
+#include "avx512f-vptestnmd-2.c"
diff --git a/gcc/testsuite/gcc.target/i386/avx512vl-vptestnmq-1.c b/gcc/testsuite/gcc.target/i386/avx512vl-vptestnmq-1.c
new file mode 100644
index 00000000000..91fb87b68eb
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/avx512vl-vptestnmq-1.c
@@ -0,0 +1,21 @@
+/* { dg-do compile } */
+/* { dg-options "-mavx512vl -O2" } */
+/* { dg-final { scan-assembler "vptestnmq\[ \\t\]+\[^\n\]*%xmm\[0-7\]\[^\n\]*k\[1-7\]\[^\{\]" } } */
+/* { dg-final { scan-assembler "vptestnmq\[ \\t\]+\[^\n\]*%ymm\[0-7\]\[^\n\]*k\[1-7\]\[^\{\]" } } */
+/* { dg-final { scan-assembler "vptestnmq\[ \\t\]+\[^\n\]*%xmm\[0-7\]\[^\n\]*k\[1-7\]\{%k\[1-7\]\}" } } */
+/* { dg-final { scan-assembler "vptestnmq\[ \\t\]+\[^\n\]*%ymm\[0-7\]\[^\n\]*k\[1-7\]\{%k\[1-7\]\}" } } */
+
+#include <immintrin.h>
+
+volatile __m128i x128;
+volatile __m256i x256;
+volatile __mmask8 m;
+
+void extern
+avx512vl_test (void)
+{
+ m = _mm_testn_epi64_mask (x128, x128);
+ m = _mm256_testn_epi64_mask (x256, x256);
+ m = _mm_mask_testn_epi64_mask (3, x128, x128);
+ m = _mm256_mask_testn_epi64_mask (3, x256, x256);
+}
diff --git a/gcc/testsuite/gcc.target/i386/avx512vl-vptestnmq-2.c b/gcc/testsuite/gcc.target/i386/avx512vl-vptestnmq-2.c
new file mode 100644
index 00000000000..89aa0ff4a15
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/avx512vl-vptestnmq-2.c
@@ -0,0 +1,14 @@
+/* { dg-do run } */
+/* { dg-options "-O2 -mavx512vl -DAVX512VL" } */
+/* { dg-require-effective-target avx512vl } */
+
+#define AVX512F_LEN 256
+#define AVX512F_LEN_HALF 128
+#include "avx512f-vptestnmq-2.c"
+
+#undef AVX512F_LEN
+#undef AVX512F_LEN_HALF
+
+#define AVX512F_LEN 128
+#define AVX512F_LEN_HALF 128
+#include "avx512f-vptestnmq-2.c"
diff --git a/gcc/testsuite/gcc.target/i386/avx512vl-vptestnmw-2.c b/gcc/testsuite/gcc.target/i386/avx512vl-vptestnmw-2.c
new file mode 100644
index 00000000000..bbc8c75e6e1
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/avx512vl-vptestnmw-2.c
@@ -0,0 +1,14 @@
+/* { dg-do run } */
+/* { dg-options "-O2 -mavx512bw -mavx512vl -DAVX512VL" } */
+/* { dg-require-effective-target avx512vl } */
+
+#define AVX512F_LEN 256
+#define AVX512F_LEN_HALF 128
+#include "avx512bw-vptestnmw-2.c"
+
+#undef AVX512F_LEN
+#undef AVX512F_LEN_HALF
+
+#define AVX512F_LEN 128
+#define AVX512F_LEN_HALF 128
+#include "avx512bw-vptestnmw-2.c"
diff --git a/gcc/testsuite/gcc.target/i386/avx512vl-vpunpckhbw-2.c b/gcc/testsuite/gcc.target/i386/avx512vl-vpunpckhbw-2.c
new file mode 100644
index 00000000000..fadf7ba761a
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/avx512vl-vpunpckhbw-2.c
@@ -0,0 +1,14 @@
+/* { dg-do run } */
+/* { dg-options "-O2 -mavx512bw -mavx512vl -DAVX512VL" } */
+/* { dg-require-effective-target avx512vl } */
+
+#define AVX512F_LEN 256
+#define AVX512F_LEN_HALF 128
+#include "avx512bw-vpunpckhbw-2.c"
+
+#undef AVX512F_LEN
+#undef AVX512F_LEN_HALF
+
+#define AVX512F_LEN 128
+#define AVX512F_LEN_HALF 128
+#include "avx512bw-vpunpckhbw-2.c"
diff --git a/gcc/testsuite/gcc.target/i386/avx512vl-vpunpckhdq-1.c b/gcc/testsuite/gcc.target/i386/avx512vl-vpunpckhdq-1.c
new file mode 100644
index 00000000000..06f7db7f23c
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/avx512vl-vpunpckhdq-1.c
@@ -0,0 +1,21 @@
+/* { dg-do compile } */
+/* { dg-options "-mavx512vl -O2" } */
+/* { dg-final { scan-assembler-times "vpunpckhdq\[ \\t\]+\[^\n\]*%ymm\[0-9\]\{%k\[1-7\]\}\[^\{\]" 1 } } */
+/* { dg-final { scan-assembler-times "vpunpckhdq\[ \\t\]+\[^\n\]*%ymm\[0-9\]\{%k\[1-7\]\}\{z\}" 1 } } */
+/* { dg-final { scan-assembler-times "vpunpckhdq\[ \\t\]+\[^\n\]*%xmm\[0-9\]\{%k\[1-7\]\}\[^\{\]" 1 } } */
+/* { dg-final { scan-assembler-times "vpunpckhdq\[ \\t\]+\[^\n\]*%xmm\[0-9\]\{%k\[1-7\]\}\{z\}" 1 } } */
+
+#include <immintrin.h>
+
+volatile __m256i x, y, z;
+volatile __m128i a, b, c;
+volatile __mmask8 m;
+
+void extern
+avx512bw_test (void)
+{
+ x = _mm256_mask_unpackhi_epi32 (x, m, y, z);
+ x = _mm256_maskz_unpackhi_epi32 (m, y, z);
+ a = _mm_mask_unpackhi_epi32 (a, m, b, c);
+ a = _mm_maskz_unpackhi_epi32 (m, b, c);
+}
diff --git a/gcc/testsuite/gcc.target/i386/avx512vl-vpunpckhdq-2.c b/gcc/testsuite/gcc.target/i386/avx512vl-vpunpckhdq-2.c
new file mode 100644
index 00000000000..6d4743caef4
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/avx512vl-vpunpckhdq-2.c
@@ -0,0 +1,14 @@
+/* { dg-do run } */
+/* { dg-options "-O2 -mavx512vl -DAVX512VL" } */
+/* { dg-require-effective-target avx512vl } */
+
+#define AVX512F_LEN 256
+#define AVX512F_LEN_HALF 128
+#include "avx512f-vpunpckhdq-2.c"
+
+#undef AVX512F_LEN
+#undef AVX512F_LEN_HALF
+
+#define AVX512F_LEN 128
+#define AVX512F_LEN_HALF 128
+#include "avx512f-vpunpckhdq-2.c"
diff --git a/gcc/testsuite/gcc.target/i386/avx512vl-vpunpckhqdq-1.c b/gcc/testsuite/gcc.target/i386/avx512vl-vpunpckhqdq-1.c
new file mode 100644
index 00000000000..0948430d6ef
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/avx512vl-vpunpckhqdq-1.c
@@ -0,0 +1,21 @@
+/* { dg-do compile } */
+/* { dg-options "-mavx512vl -O2" } */
+/* { dg-final { scan-assembler-times "vpunpckhqdq\[ \\t\]+\[^\n\]*%ymm\[0-9\]\{%k\[1-7\]\}\[^\{\]" 1 } } */
+/* { dg-final { scan-assembler-times "vpunpckhqdq\[ \\t\]+\[^\n\]*%ymm\[0-9\]\{%k\[1-7\]\}\{z\}" 1 } } */
+/* { dg-final { scan-assembler-times "vpunpckhqdq\[ \\t\]+\[^\n\]*%xmm\[0-9\]\{%k\[1-7\]\}\[^\{\]" 1 } } */
+/* { dg-final { scan-assembler-times "vpunpckhqdq\[ \\t\]+\[^\n\]*%xmm\[0-9\]\{%k\[1-7\]\}\{z\}" 1 } } */
+
+#include <immintrin.h>
+
+volatile __m256i x, y, z;
+volatile __m128i a, b, c;
+volatile __mmask8 m;
+
+void extern
+avx512bw_test (void)
+{
+ x = _mm256_mask_unpackhi_epi64 (x, m, y, z);
+ x = _mm256_maskz_unpackhi_epi64 (m, y, z);
+ a = _mm_mask_unpackhi_epi64 (a, m, b, c);
+ a = _mm_maskz_unpackhi_epi64 (m, b, c);
+}
diff --git a/gcc/testsuite/gcc.target/i386/avx512vl-vpunpckhqdq-2.c b/gcc/testsuite/gcc.target/i386/avx512vl-vpunpckhqdq-2.c
new file mode 100644
index 00000000000..f61e456dd27
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/avx512vl-vpunpckhqdq-2.c
@@ -0,0 +1,14 @@
+/* { dg-do run } */
+/* { dg-options "-O2 -mavx512vl -DAVX512VL" } */
+/* { dg-require-effective-target avx512vl } */
+
+#define AVX512F_LEN 256
+#define AVX512F_LEN_HALF 128
+#include "avx512f-vpunpckhqdq-2.c"
+
+#undef AVX512F_LEN
+#undef AVX512F_LEN_HALF
+
+#define AVX512F_LEN 128
+#define AVX512F_LEN_HALF 128
+#include "avx512f-vpunpckhqdq-2.c"
diff --git a/gcc/testsuite/gcc.target/i386/avx512vl-vpunpckhwd-2.c b/gcc/testsuite/gcc.target/i386/avx512vl-vpunpckhwd-2.c
new file mode 100644
index 00000000000..41ad9b7df17
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/avx512vl-vpunpckhwd-2.c
@@ -0,0 +1,14 @@
+/* { dg-do run } */
+/* { dg-options "-O2 -mavx512bw -mavx512vl -DAVX512VL" } */
+/* { dg-require-effective-target avx512vl } */
+
+#define AVX512F_LEN 256
+#define AVX512F_LEN_HALF 128
+#include "avx512bw-vpunpckhwd-2.c"
+
+#undef AVX512F_LEN
+#undef AVX512F_LEN_HALF
+
+#define AVX512F_LEN 128
+#define AVX512F_LEN_HALF 128
+#include "avx512bw-vpunpckhwd-2.c"
diff --git a/gcc/testsuite/gcc.target/i386/avx512vl-vpunpcklbw-2.c b/gcc/testsuite/gcc.target/i386/avx512vl-vpunpcklbw-2.c
new file mode 100644
index 00000000000..d275f18feae
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/avx512vl-vpunpcklbw-2.c
@@ -0,0 +1,14 @@
+/* { dg-do run } */
+/* { dg-options "-O2 -mavx512bw -mavx512vl -DAVX512VL" } */
+/* { dg-require-effective-target avx512vl } */
+
+#define AVX512F_LEN 256
+#define AVX512F_LEN_HALF 128
+#include "avx512bw-vpunpcklbw-2.c"
+
+#undef AVX512F_LEN
+#undef AVX512F_LEN_HALF
+
+#define AVX512F_LEN 128
+#define AVX512F_LEN_HALF 128
+#include "avx512bw-vpunpcklbw-2.c"
diff --git a/gcc/testsuite/gcc.target/i386/avx512vl-vpunpckldq-1.c b/gcc/testsuite/gcc.target/i386/avx512vl-vpunpckldq-1.c
new file mode 100644
index 00000000000..19e10c1e898
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/avx512vl-vpunpckldq-1.c
@@ -0,0 +1,21 @@
+/* { dg-do compile } */
+/* { dg-options "-mavx512vl -O2" } */
+/* { dg-final { scan-assembler-times "vpunpckldq\[ \\t\]+\[^\n\]*%ymm\[0-9\]\{%k\[1-7\]\}\[^\{\]" 1 } } */
+/* { dg-final { scan-assembler-times "vpunpckldq\[ \\t\]+\[^\n\]*%ymm\[0-9\]\{%k\[1-7\]\}\{z\}" 1 } } */
+/* { dg-final { scan-assembler-times "vpunpckldq\[ \\t\]+\[^\n\]*%xmm\[0-9\]\{%k\[1-7\]\}\[^\{\]" 1 } } */
+/* { dg-final { scan-assembler-times "vpunpckldq\[ \\t\]+\[^\n\]*%xmm\[0-9\]\{%k\[1-7\]\}\{z\}" 1 } } */
+
+#include <immintrin.h>
+
+volatile __m256i x, y, z;
+volatile __m128i a, b, c;
+volatile __mmask8 m;
+
+void extern
+avx512bw_test (void)
+{
+ x = _mm256_mask_unpacklo_epi32 (x, m, y, z);
+ x = _mm256_maskz_unpacklo_epi32 (m, y, z);
+ a = _mm_mask_unpacklo_epi32 (a, m, b, c);
+ a = _mm_maskz_unpacklo_epi32 (m, b, c);
+}
diff --git a/gcc/testsuite/gcc.target/i386/avx512vl-vpunpckldq-2.c b/gcc/testsuite/gcc.target/i386/avx512vl-vpunpckldq-2.c
new file mode 100644
index 00000000000..6d3099ec862
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/avx512vl-vpunpckldq-2.c
@@ -0,0 +1,14 @@
+/* { dg-do run } */
+/* { dg-options "-O2 -mavx512vl -DAVX512VL" } */
+/* { dg-require-effective-target avx512vl } */
+
+#define AVX512F_LEN 256
+#define AVX512F_LEN_HALF 128
+#include "avx512f-vpunpckldq-2.c"
+
+#undef AVX512F_LEN
+#undef AVX512F_LEN_HALF
+
+#define AVX512F_LEN 128
+#define AVX512F_LEN_HALF 128
+#include "avx512f-vpunpckldq-2.c"
diff --git a/gcc/testsuite/gcc.target/i386/avx512vl-vpunpcklqdq-1.c b/gcc/testsuite/gcc.target/i386/avx512vl-vpunpcklqdq-1.c
new file mode 100644
index 00000000000..b3add1e8026
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/avx512vl-vpunpcklqdq-1.c
@@ -0,0 +1,21 @@
+/* { dg-do compile } */
+/* { dg-options "-mavx512vl -O2" } */
+/* { dg-final { scan-assembler-times "vpunpcklqdq\[ \\t\]+\[^\n\]*%ymm\[0-9\]\{%k\[1-7\]\}\[^\{\]" 1 } } */
+/* { dg-final { scan-assembler-times "vpunpcklqdq\[ \\t\]+\[^\n\]*%ymm\[0-9\]\{%k\[1-7\]\}\{z\}" 1 } } */
+/* { dg-final { scan-assembler-times "vpunpcklqdq\[ \\t\]+\[^\n\]*%xmm\[0-9\]\{%k\[1-7\]\}\[^\{\]" 1 } } */
+/* { dg-final { scan-assembler-times "vpunpcklqdq\[ \\t\]+\[^\n\]*%xmm\[0-9\]\{%k\[1-7\]\}\{z\}" 1 } } */
+
+#include <immintrin.h>
+
+volatile __m256i x, y, z;
+volatile __m128i a, b, c;
+volatile __mmask8 m;
+
+void extern
+avx512bw_test (void)
+{
+ x = _mm256_mask_unpacklo_epi64 (x, m, y, z);
+ x = _mm256_maskz_unpacklo_epi64 (m, y, z);
+ a = _mm_mask_unpacklo_epi64 (a, m, b, c);
+ a = _mm_maskz_unpacklo_epi64 (m, b, c);
+}
diff --git a/gcc/testsuite/gcc.target/i386/avx512vl-vpunpcklqdq-2.c b/gcc/testsuite/gcc.target/i386/avx512vl-vpunpcklqdq-2.c
new file mode 100644
index 00000000000..37ec820b557
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/avx512vl-vpunpcklqdq-2.c
@@ -0,0 +1,14 @@
+/* { dg-do run } */
+/* { dg-options "-O2 -mavx512vl -DAVX512VL" } */
+/* { dg-require-effective-target avx512vl } */
+
+#define AVX512F_LEN 256
+#define AVX512F_LEN_HALF 128
+#include "avx512f-vpunpcklqdq-2.c"
+
+#undef AVX512F_LEN
+#undef AVX512F_LEN_HALF
+
+#define AVX512F_LEN 128
+#define AVX512F_LEN_HALF 128
+#include "avx512f-vpunpcklqdq-2.c"
diff --git a/gcc/testsuite/gcc.target/i386/avx512vl-vpunpcklwd-2.c b/gcc/testsuite/gcc.target/i386/avx512vl-vpunpcklwd-2.c
new file mode 100644
index 00000000000..4047202ad90
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/avx512vl-vpunpcklwd-2.c
@@ -0,0 +1,14 @@
+/* { dg-do run } */
+/* { dg-options "-O2 -mavx512bw -mavx512vl -DAVX512VL" } */
+/* { dg-require-effective-target avx512vl } */
+
+#define AVX512F_LEN 256
+#define AVX512F_LEN_HALF 128
+#include "avx512bw-vpunpcklwd-2.c"
+
+#undef AVX512F_LEN
+#undef AVX512F_LEN_HALF
+
+#define AVX512F_LEN 128
+#define AVX512F_LEN_HALF 128
+#include "avx512bw-vpunpcklwd-2.c"
diff --git a/gcc/testsuite/gcc.target/i386/avx512vl-vpxord-1.c b/gcc/testsuite/gcc.target/i386/avx512vl-vpxord-1.c
new file mode 100644
index 00000000000..aa65a8b7b05
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/avx512vl-vpxord-1.c
@@ -0,0 +1,24 @@
+/* { dg-do compile } */
+/* { dg-options "-mavx512vl -O2" } */
+/* { dg-final { scan-assembler-times "vpxord\[ \\t\]+\[^\n\]*%ymm\[0-9\]\[^\{\]" 2 } } */
+/* { dg-final { scan-assembler-times "vpxord\[ \\t\]+\[^\n\]*%ymm\[0-9\]\{%k\[1-7\]\}\[^\{\]" 1 } } */
+/* { dg-final { scan-assembler-times "vpxord\[ \\t\]+\[^\n\]*%ymm\[0-9\]\{%k\[1-7\]\}\{z\}" 1 } } */
+/* { dg-final { scan-assembler-times "vpxord\[ \\t\]+\[^\n\]*%xmm\[0-9\]\[^\{\]" 2 } } */
+/* { dg-final { scan-assembler-times "vpxord\[ \\t\]+\[^\n\]*%xmm\[0-9\]\{%k\[1-7\]\}\[^\{\]" 1 } } */
+/* { dg-final { scan-assembler-times "vpxord\[ \\t\]+\[^\n\]*%xmm\[0-9\]\{%k\[1-7\]\}\{z\}" 1 } } */
+
+#include <immintrin.h>
+
+volatile __m256i y;
+volatile __m128i x;
+volatile __mmask8 m;
+
+void extern
+avx512vl_test (void)
+{
+ y = _mm256_mask_xor_epi32 (y, m, y, y);
+ y = _mm256_maskz_xor_epi32 (m, y, y);
+
+ x = _mm_mask_xor_epi32 (x, m, x, x);
+ x = _mm_maskz_xor_epi32 (m, x, x);
+}
diff --git a/gcc/testsuite/gcc.target/i386/avx512vl-vpxord-2.c b/gcc/testsuite/gcc.target/i386/avx512vl-vpxord-2.c
new file mode 100644
index 00000000000..f67fdf9377d
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/avx512vl-vpxord-2.c
@@ -0,0 +1,14 @@
+/* { dg-do run } */
+/* { dg-options "-O2 -mavx512vl -DAVX512VL" } */
+/* { dg-require-effective-target avx512vl } */
+
+#define AVX512F_LEN 256
+#define AVX512F_LEN_HALF 128
+#include "avx512f-vpxord-2.c"
+
+#undef AVX512F_LEN
+#undef AVX512F_LEN_HALF
+
+#define AVX512F_LEN 128
+#define AVX512F_LEN_HALF 128
+#include "avx512f-vpxord-2.c"
diff --git a/gcc/testsuite/gcc.target/i386/avx512vl-vpxorq-1.c b/gcc/testsuite/gcc.target/i386/avx512vl-vpxorq-1.c
new file mode 100644
index 00000000000..a7df4fde482
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/avx512vl-vpxorq-1.c
@@ -0,0 +1,24 @@
+/* { dg-do compile } */
+/* { dg-options "-mavx512vl -O2" } */
+/* { dg-final { scan-assembler-times "vpxorq\[ \\t\]+\[^\n\]*%ymm\[0-9\]\[^\{\]" 2 } } */
+/* { dg-final { scan-assembler-times "vpxorq\[ \\t\]+\[^\n\]*%ymm\[0-9\]\{%k\[1-7\]\}\[^\{\]" 1 } } */
+/* { dg-final { scan-assembler-times "vpxorq\[ \\t\]+\[^\n\]*%ymm\[0-9\]\{%k\[1-7\]\}\{z\}" 1 } } */
+/* { dg-final { scan-assembler-times "vpxorq\[ \\t\]+\[^\n\]*%xmm\[0-9\]\[^\{\]" 2 } } */
+/* { dg-final { scan-assembler-times "vpxorq\[ \\t\]+\[^\n\]*%xmm\[0-9\]\{%k\[1-7\]\}\[^\{\]" 1 } } */
+/* { dg-final { scan-assembler-times "vpxorq\[ \\t\]+\[^\n\]*%xmm\[0-9\]\{%k\[1-7\]\}\{z\}" 1 } } */
+
+#include <immintrin.h>
+
+volatile __m256i y;
+volatile __m128i x;
+volatile __mmask8 m;
+
+void extern
+avx512vl_test (void)
+{
+ y = _mm256_mask_xor_epi64 (y, m, y, y);
+ y = _mm256_maskz_xor_epi64 (m, y, y);
+
+ x = _mm_mask_xor_epi64 (x, m, x, x);
+ x = _mm_maskz_xor_epi64 (m, x, x);
+}
diff --git a/gcc/testsuite/gcc.target/i386/avx512vl-vpxorq-2.c b/gcc/testsuite/gcc.target/i386/avx512vl-vpxorq-2.c
new file mode 100644
index 00000000000..f28e67122cb
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/avx512vl-vpxorq-2.c
@@ -0,0 +1,14 @@
+/* { dg-do run } */
+/* { dg-options "-O2 -mavx512vl -DAVX512VL" } */
+/* { dg-require-effective-target avx512vl } */
+
+#define AVX512F_LEN 256
+#define AVX512F_LEN_HALF 128
+#include "avx512f-vpxorq-2.c"
+
+#undef AVX512F_LEN
+#undef AVX512F_LEN_HALF
+
+#define AVX512F_LEN 128
+#define AVX512F_LEN_HALF 128
+#include "avx512f-vpxorq-2.c"
diff --git a/gcc/testsuite/gcc.target/i386/avx512vl-vrangepd-2.c b/gcc/testsuite/gcc.target/i386/avx512vl-vrangepd-2.c
new file mode 100644
index 00000000000..2353bbde069
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/avx512vl-vrangepd-2.c
@@ -0,0 +1,14 @@
+/* { dg-do run } */
+/* { dg-options "-O2 -mavx512dq -mavx512vl -DAVX512VL" } */
+/* { dg-require-effective-target avx512vl } */
+
+#define AVX512F_LEN 256
+#define AVX512F_LEN_HALF 128
+#include "avx512dq-vrangepd-2.c"
+
+#undef AVX512F_LEN
+#undef AVX512F_LEN_HALF
+
+#define AVX512F_LEN 128
+#define AVX512F_LEN_HALF 128
+#include "avx512dq-vrangepd-2.c"
diff --git a/gcc/testsuite/gcc.target/i386/avx512vl-vrangeps-2.c b/gcc/testsuite/gcc.target/i386/avx512vl-vrangeps-2.c
new file mode 100644
index 00000000000..b3ee2343ec6
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/avx512vl-vrangeps-2.c
@@ -0,0 +1,14 @@
+/* { dg-do run } */
+/* { dg-options "-O2 -mavx512dq -mavx512vl -DAVX512VL" } */
+/* { dg-require-effective-target avx512vl } */
+
+#define AVX512F_LEN 256
+#define AVX512F_LEN_HALF 128
+#include "avx512dq-vrangeps-2.c"
+
+#undef AVX512F_LEN
+#undef AVX512F_LEN_HALF
+
+#define AVX512F_LEN 128
+#define AVX512F_LEN_HALF 128
+#include "avx512dq-vrangeps-2.c"
diff --git a/gcc/testsuite/gcc.target/i386/avx512vl-vrcp14pd-1.c b/gcc/testsuite/gcc.target/i386/avx512vl-vrcp14pd-1.c
new file mode 100644
index 00000000000..dff3b0255ed
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/avx512vl-vrcp14pd-1.c
@@ -0,0 +1,27 @@
+/* { dg-do compile } */
+/* { dg-options "-mavx512vl -O2" } */
+/* { dg-final { scan-assembler-times "vrcp14pd\[ \\t\]+\[^\n\]*%ymm\[0-9\]\[^\{\]" 3 } } */
+/* { dg-final { scan-assembler-times "vrcp14pd\[ \\t\]+\[^\n\]*%xmm\[0-9\]\[^\{\]" 3 } } */
+/* { dg-final { scan-assembler-times "vrcp14pd\[ \\t\]+\[^\n\]*%ymm\[0-9\]\{%k\[1-7\]\}\[^\{\]" 1 } } */
+/* { dg-final { scan-assembler-times "vrcp14pd\[ \\t\]+\[^\n\]*%xmm\[0-9\]\{%k\[1-7\]\}\[^\{\]" 1 } } */
+/* { dg-final { scan-assembler-times "vrcp14pd\[ \\t\]+\[^\n\]*%ymm\[0-9\]\{%k\[1-7\]\}\{z\}" 1 } } */
+/* { dg-final { scan-assembler-times "vrcp14pd\[ \\t\]+\[^\n\]*%xmm\[0-9\]\{%k\[1-7\]\}\{z\}" 1 } } */
+
+#include <immintrin.h>
+
+volatile __m256d x1;
+volatile __m128d x2;
+volatile __mmask8 m;
+
+void extern
+avx512vl_test (void)
+{
+ x1 = _mm256_rcp14_pd (x1);
+ x2 = _mm_rcp14_pd (x2);
+
+ x1 = _mm256_mask_rcp14_pd (x1, m, x1);
+ x2 = _mm_mask_rcp14_pd (x2, m, x2);
+
+ x1 = _mm256_maskz_rcp14_pd (m, x1);
+ x2 = _mm_maskz_rcp14_pd (m, x2);
+}
diff --git a/gcc/testsuite/gcc.target/i386/avx512vl-vrcp14pd-2.c b/gcc/testsuite/gcc.target/i386/avx512vl-vrcp14pd-2.c
new file mode 100644
index 00000000000..737c81db026
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/avx512vl-vrcp14pd-2.c
@@ -0,0 +1,14 @@
+/* { dg-do run } */
+/* { dg-options "-O2 -mavx512vl -DAVX512VL" } */
+/* { dg-require-effective-target avx512vl } */
+
+#define AVX512F_LEN 256
+#define AVX512F_LEN_HALF 128
+#include "avx512f-vrcp14pd-2.c"
+
+#undef AVX512F_LEN
+#undef AVX512F_LEN_HALF
+
+#define AVX512F_LEN 128
+#define AVX512F_LEN_HALF 128
+#include "avx512f-vrcp14pd-2.c"
diff --git a/gcc/testsuite/gcc.target/i386/avx512vl-vrcp14ps-1.c b/gcc/testsuite/gcc.target/i386/avx512vl-vrcp14ps-1.c
new file mode 100644
index 00000000000..3ca71f4e3ae
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/avx512vl-vrcp14ps-1.c
@@ -0,0 +1,27 @@
+/* { dg-do compile } */
+/* { dg-options "-mavx512vl -O2" } */
+/* { dg-final { scan-assembler-times "vrcp14ps\[ \\t\]+\[^\n\]*%ymm\[0-9\]\[^\{\]" 3 } } */
+/* { dg-final { scan-assembler-times "vrcp14ps\[ \\t\]+\[^\n\]*%xmm\[0-9\]\[^\{\]" 3 } } */
+/* { dg-final { scan-assembler-times "vrcp14ps\[ \\t\]+\[^\n\]*%ymm\[0-9\]\{%k\[1-7\]\}\[^\{\]" 1 } } */
+/* { dg-final { scan-assembler-times "vrcp14ps\[ \\t\]+\[^\n\]*%xmm\[0-9\]\{%k\[1-7\]\}\[^\{\]" 1 } } */
+/* { dg-final { scan-assembler-times "vrcp14ps\[ \\t\]+\[^\n\]*%ymm\[0-9\]\{%k\[1-7\]\}\{z\}" 1 } } */
+/* { dg-final { scan-assembler-times "vrcp14ps\[ \\t\]+\[^\n\]*%xmm\[0-9\]\{%k\[1-7\]\}\{z\}" 1 } } */
+
+#include <immintrin.h>
+
+volatile __m256 x1;
+volatile __m128 x2;
+volatile __mmask8 m;
+
+void extern
+avx512vl_test (void)
+{
+ x1 = _mm256_rcp14_ps (x1);
+ x2 = _mm_rcp14_ps (x2);
+
+ x1 = _mm256_mask_rcp14_ps (x1, m, x1);
+ x2 = _mm_mask_rcp14_ps (x2, m, x2);
+
+ x1 = _mm256_maskz_rcp14_ps (m, x1);
+ x2 = _mm_maskz_rcp14_ps (m, x2);
+}
diff --git a/gcc/testsuite/gcc.target/i386/avx512vl-vrcp14ps-2.c b/gcc/testsuite/gcc.target/i386/avx512vl-vrcp14ps-2.c
new file mode 100644
index 00000000000..8241aa2649e
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/avx512vl-vrcp14ps-2.c
@@ -0,0 +1,14 @@
+/* { dg-do run } */
+/* { dg-options "-O2 -mavx512vl -DAVX512VL" } */
+/* { dg-require-effective-target avx512vl } */
+
+#define AVX512F_LEN 256
+#define AVX512F_LEN_HALF 128
+#include "avx512f-vrcp14ps-2.c"
+
+#undef AVX512F_LEN
+#undef AVX512F_LEN_HALF
+
+#define AVX512F_LEN 128
+#define AVX512F_LEN_HALF 128
+#include "avx512f-vrcp14ps-2.c"
diff --git a/gcc/testsuite/gcc.target/i386/avx512vl-vreducepd-2.c b/gcc/testsuite/gcc.target/i386/avx512vl-vreducepd-2.c
new file mode 100644
index 00000000000..9083ccc96f0
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/avx512vl-vreducepd-2.c
@@ -0,0 +1,14 @@
+/* { dg-do run } */
+/* { dg-options "-O2 -mavx512dq -mavx512vl -DAVX512VL" } */
+/* { dg-require-effective-target avx512vl } */
+
+#define AVX512F_LEN 256
+#define AVX512F_LEN_HALF 128
+#include "avx512dq-vreducepd-2.c"
+
+#undef AVX512F_LEN
+#undef AVX512F_LEN_HALF
+
+#define AVX512F_LEN 128
+#define AVX512F_LEN_HALF 128
+#include "avx512dq-vreducepd-2.c"
diff --git a/gcc/testsuite/gcc.target/i386/avx512vl-vreduceps-2.c b/gcc/testsuite/gcc.target/i386/avx512vl-vreduceps-2.c
new file mode 100644
index 00000000000..6c571fb5df7
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/avx512vl-vreduceps-2.c
@@ -0,0 +1,14 @@
+/* { dg-do run } */
+/* { dg-options "-O2 -mavx512dq -mavx512vl -DAVX512VL" } */
+/* { dg-require-effective-target avx512vl } */
+
+#define AVX512F_LEN 256
+#define AVX512F_LEN_HALF 128
+#include "avx512dq-vreduceps-2.c"
+
+#undef AVX512F_LEN
+#undef AVX512F_LEN_HALF
+
+#define AVX512F_LEN 128
+#define AVX512F_LEN_HALF 128
+#include "avx512dq-vreduceps-2.c"
diff --git a/gcc/testsuite/gcc.target/i386/avx512vl-vrndscalepd-1.c b/gcc/testsuite/gcc.target/i386/avx512vl-vrndscalepd-1.c
new file mode 100644
index 00000000000..7971ce99797
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/avx512vl-vrndscalepd-1.c
@@ -0,0 +1,36 @@
+/* { dg-do compile } */
+/* { dg-options "-mavx512vl -O2" } */
+/* { dg-final { scan-assembler-times "vrndscalepd\[ \\t\]+\[^\n\]*%ymm\[0-9\]" 9} } */
+/* { dg-final { scan-assembler-times "vrndscalepd\[ \\t\]+\[^\n\]*%ymm\[0-9\]\{%k\[1-7\]\}\{z\}" 3} } */
+/* { dg-final { scan-assembler-times "vrndscalepd\[ \\t\]+\[^\n\]*%ymm\[0-9\]\{%k\[1-7\]\}\[^\{\]" 3} } */
+/* { dg-final { scan-assembler-times "vrndscalepd\[ \\t\]+\[^\n\]*%xmm\[0-9\]" 9} } */
+/* { dg-final { scan-assembler-times "vrndscalepd\[ \\t\]+\[^\n\]*%xmm\[0-9\]\{%k\[1-7\]\}\{z\}" 3} } */
+/* { dg-final { scan-assembler-times "vrndscalepd\[ \\t\]+\[^\n\]*%xmm\[0-9\]\{%k\[1-7\]\}\[^\{\]" 3} } */
+
+#include <immintrin.h>
+
+volatile __m256d x1;
+volatile __m128d x2;
+
+void extern
+avx512vl_test (void)
+{
+ x1 = _mm256_roundscale_pd (x1, 0x42);
+ x1 = _mm256_ceil_pd (x1);
+ x1 = _mm256_floor_pd (x1);
+ x1 = _mm256_mask_roundscale_pd (x1, 2, x1, 0x42);
+ x1 = _mm256_mask_ceil_pd (x1, 2, x1);
+ x1 = _mm256_mask_floor_pd (x1, 2, x1);
+ x1 = _mm256_maskz_roundscale_pd (2, x1, 0x42);
+ x1 = _mm256_maskz_ceil_pd (2, x1);
+ x1 = _mm256_maskz_floor_pd (2, x1);
+ x2 = _mm_roundscale_pd (x2, 0x42);
+ x2 = _mm_ceil_pd (x2);
+ x2 = _mm_floor_pd (x2);
+ x2 = _mm_mask_roundscale_pd (x2, 2, x2, 0x42);
+ x2 = _mm_mask_ceil_pd (x2, 2, x2);
+ x2 = _mm_mask_floor_pd (x2, 2, x2);
+ x2 = _mm_maskz_roundscale_pd (2, x2, 0x42);
+ x2 = _mm_maskz_ceil_pd (2, x2);
+ x2 = _mm_maskz_floor_pd (2, x2);
+}
diff --git a/gcc/testsuite/gcc.target/i386/avx512vl-vrndscalepd-2.c b/gcc/testsuite/gcc.target/i386/avx512vl-vrndscalepd-2.c
new file mode 100644
index 00000000000..77d56318743
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/avx512vl-vrndscalepd-2.c
@@ -0,0 +1,14 @@
+/* { dg-do run } */
+/* { dg-options "-O2 -mavx512vl -DAVX512VL" } */
+/* { dg-require-effective-target avx512vl } */
+
+#define AVX512F_LEN 256
+#define AVX512F_LEN_HALF 128
+#include "avx512f-vrndscalepd-2.c"
+
+#undef AVX512F_LEN
+#undef AVX512F_LEN_HALF
+
+#define AVX512F_LEN 128
+#define AVX512F_LEN_HALF 128
+#include "avx512f-vrndscalepd-2.c"
diff --git a/gcc/testsuite/gcc.target/i386/avx512vl-vrndscaleps-1.c b/gcc/testsuite/gcc.target/i386/avx512vl-vrndscaleps-1.c
new file mode 100644
index 00000000000..ee054724dc4
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/avx512vl-vrndscaleps-1.c
@@ -0,0 +1,36 @@
+/* { dg-do compile } */
+/* { dg-options "-mavx512vl -O2" } */
+/* { dg-final { scan-assembler-times "vrndscaleps\[ \\t\]+\[^\n\]*%ymm\[0-9\]" 9} } */
+/* { dg-final { scan-assembler-times "vrndscaleps\[ \\t\]+\[^\n\]*%ymm\[0-9\]\{%k\[1-7\]\}\[^\{\]" 3} } */
+/* { dg-final { scan-assembler-times "vrndscaleps\[ \\t\]+\[^\n\]*%ymm\[0-9\]\{%k\[1-7\]\}\{z\}" 3} } */
+/* { dg-final { scan-assembler-times "vrndscaleps\[ \\t\]+\[^\n\]*%xmm\[0-9\]" 9} } */
+/* { dg-final { scan-assembler-times "vrndscaleps\[ \\t\]+\[^\n\]*%xmm\[0-9\]\{%k\[1-7\]\}\[^\{\]" 3} } */
+/* { dg-final { scan-assembler-times "vrndscaleps\[ \\t\]+\[^\n\]*%xmm\[0-9\]\{%k\[1-7\]\}\{z\}" 3} } */
+
+#include <immintrin.h>
+
+volatile __m256 x1;
+volatile __m128 x2;
+
+void extern
+avx512vl_test (void)
+{
+ x1 = _mm256_roundscale_ps (x1, 0x42);
+ x1 = _mm256_ceil_ps (x1);
+ x1 = _mm256_floor_ps (x1);
+ x1 = _mm256_mask_roundscale_ps (x1, 2, x1, 0x42);
+ x1 = _mm256_mask_ceil_ps (x1, 2, x1);
+ x1 = _mm256_mask_floor_ps (x1, 2, x1);
+ x1 = _mm256_maskz_roundscale_ps (2, x1, 0x42);
+ x1 = _mm256_maskz_ceil_ps (2, x1);
+ x1 = _mm256_maskz_floor_ps (2, x1);
+ x2 = _mm_roundscale_ps (x2, 0x42);
+ x2 = _mm_ceil_ps (x2);
+ x2 = _mm_floor_ps (x2);
+ x2 = _mm_mask_roundscale_ps (x2, 2, x2, 0x42);
+ x2 = _mm_mask_ceil_ps (x2, 2, x2);
+ x2 = _mm_mask_floor_ps (x2, 2, x2);
+ x2 = _mm_maskz_roundscale_ps (2, x2, 0x42);
+ x2 = _mm_maskz_ceil_ps (2, x2);
+ x2 = _mm_maskz_floor_ps (2, x2);
+}
diff --git a/gcc/testsuite/gcc.target/i386/avx512vl-vrndscaleps-2.c b/gcc/testsuite/gcc.target/i386/avx512vl-vrndscaleps-2.c
new file mode 100644
index 00000000000..7f7566bebd4
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/avx512vl-vrndscaleps-2.c
@@ -0,0 +1,14 @@
+/* { dg-do run } */
+/* { dg-options "-O2 -mavx512vl -DAVX512VL" } */
+/* { dg-require-effective-target avx512vl } */
+
+#define AVX512F_LEN 256
+#define AVX512F_LEN_HALF 128
+#include "avx512f-vrndscaleps-2.c"
+
+#undef AVX512F_LEN
+#undef AVX512F_LEN_HALF
+
+#define AVX512F_LEN 128
+#define AVX512F_LEN_HALF 128
+#include "avx512f-vrndscaleps-2.c"
diff --git a/gcc/testsuite/gcc.target/i386/avx512vl-vrsqrt14pd-1.c b/gcc/testsuite/gcc.target/i386/avx512vl-vrsqrt14pd-1.c
new file mode 100644
index 00000000000..3400276379a
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/avx512vl-vrsqrt14pd-1.c
@@ -0,0 +1,26 @@
+/* { dg-do compile } */
+/* { dg-options "-mavx512vl -O2" } */
+/* { dg-final { scan-assembler-times "vrsqrt14pd\[ \\t\]+\[^\n\]*%ymm\[0-9\]\[^\{\]" 3 } } */
+/* { dg-final { scan-assembler-times "vrsqrt14pd\[ \\t\]+\[^\n\]*%xmm\[0-9\]\[^\{\]" 3 } } */
+/* { dg-final { scan-assembler-times "vrsqrt14pd\[ \\t\]+\[^\n\]*%ymm\[0-9\]\{%k\[1-7\]\}\[^\{\]" 1 } } */
+/* { dg-final { scan-assembler-times "vrsqrt14pd\[ \\t\]+\[^\n\]*%xmm\[0-9\]\{%k\[1-7\]\}\[^\{\]" 1 } } */
+/* { dg-final { scan-assembler-times "vrsqrt14pd\[ \\t\]+\[^\n\]*%ymm\[0-9\]\{%k\[1-7\]\}\{z\}" 1 } } */
+/* { dg-final { scan-assembler-times "vrsqrt14pd\[ \\t\]+\[^\n\]*%xmm\[0-9\]\{%k\[1-7\]\}\{z\}" 1 } } */
+
+#include <immintrin.h>
+
+volatile __m256d x1;
+volatile __m128d x2;
+volatile __mmask8 m;
+
+void extern
+avx512vl_test (void)
+{
+ x1 = _mm256_rsqrt14_pd (x1);
+ x1 = _mm256_mask_rsqrt14_pd (x1, m, x1);
+ x1 = _mm256_maskz_rsqrt14_pd (m, x1);
+
+ x2 = _mm_rsqrt14_pd (x2);
+ x2 = _mm_mask_rsqrt14_pd (x2, m, x2);
+ x2 = _mm_maskz_rsqrt14_pd (m, x2);
+}
diff --git a/gcc/testsuite/gcc.target/i386/avx512vl-vrsqrt14pd-2.c b/gcc/testsuite/gcc.target/i386/avx512vl-vrsqrt14pd-2.c
new file mode 100644
index 00000000000..750e5916d9d
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/avx512vl-vrsqrt14pd-2.c
@@ -0,0 +1,14 @@
+/* { dg-do run } */
+/* { dg-options "-O2 -mavx512vl -DAVX512VL" } */
+/* { dg-require-effective-target avx512vl } */
+
+#define AVX512F_LEN 256
+#define AVX512F_LEN_HALF 128
+#include "avx512f-vrsqrt14pd-2.c"
+
+#undef AVX512F_LEN
+#undef AVX512F_LEN_HALF
+
+#define AVX512F_LEN 128
+#define AVX512F_LEN_HALF 128
+#include "avx512f-vrsqrt14pd-2.c"
diff --git a/gcc/testsuite/gcc.target/i386/avx512vl-vrsqrt14ps-1.c b/gcc/testsuite/gcc.target/i386/avx512vl-vrsqrt14ps-1.c
new file mode 100644
index 00000000000..840e60b5b2d
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/avx512vl-vrsqrt14ps-1.c
@@ -0,0 +1,26 @@
+/* { dg-do compile } */
+/* { dg-options "-mavx512vl -O2" } */
+/* { dg-final { scan-assembler-times "vrsqrt14ps\[ \\t\]+\[^\n\]*%ymm\[0-9\]\[^\{\]" 3 } } */
+/* { dg-final { scan-assembler-times "vrsqrt14ps\[ \\t\]+\[^\n\]*%xmm\[0-9\]\[^\{\]" 3 } } */
+/* { dg-final { scan-assembler-times "vrsqrt14ps\[ \\t\]+\[^\n\]*%ymm\[0-9\]\{%k\[1-7\]\}\[^\{\]" 1 } } */
+/* { dg-final { scan-assembler-times "vrsqrt14ps\[ \\t\]+\[^\n\]*%xmm\[0-9\]\{%k\[1-7\]\}\[^\{\]" 1 } } */
+/* { dg-final { scan-assembler-times "vrsqrt14ps\[ \\t\]+\[^\n\]*%ymm\[0-9\]\{%k\[1-7\]\}\{z\}" 1 } } */
+/* { dg-final { scan-assembler-times "vrsqrt14ps\[ \\t\]+\[^\n\]*%xmm\[0-9\]\{%k\[1-7\]\}\{z\}" 1 } } */
+
+#include <immintrin.h>
+
+volatile __m256 x1;
+volatile __m128 x2;
+volatile __mmask8 m;
+
+void extern
+avx512vl_test (void)
+{
+ x1 = _mm256_rsqrt14_ps (x1);
+ x1 = _mm256_mask_rsqrt14_ps (x1, m, x1);
+ x1 = _mm256_maskz_rsqrt14_ps (m, x1);
+
+ x2 = _mm_rsqrt14_ps (x2);
+ x2 = _mm_mask_rsqrt14_ps (x2, m, x2);
+ x2 = _mm_maskz_rsqrt14_ps (m, x2);
+}
diff --git a/gcc/testsuite/gcc.target/i386/avx512vl-vrsqrt14ps-2.c b/gcc/testsuite/gcc.target/i386/avx512vl-vrsqrt14ps-2.c
new file mode 100644
index 00000000000..6eed5988b13
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/avx512vl-vrsqrt14ps-2.c
@@ -0,0 +1,14 @@
+/* { dg-do run } */
+/* { dg-options "-O2 -mavx512vl -DAVX512VL" } */
+/* { dg-require-effective-target avx512vl } */
+
+#define AVX512F_LEN 256
+#define AVX512F_LEN_HALF 128
+#include "avx512f-vrsqrt14ps-2.c"
+
+#undef AVX512F_LEN
+#undef AVX512F_LEN_HALF
+
+#define AVX512F_LEN 128
+#define AVX512F_LEN_HALF 128
+#include "avx512f-vrsqrt14ps-2.c"
diff --git a/gcc/testsuite/gcc.target/i386/avx512vl-vscalefpd-1.c b/gcc/testsuite/gcc.target/i386/avx512vl-vscalefpd-1.c
new file mode 100644
index 00000000000..63e571d23ff
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/avx512vl-vscalefpd-1.c
@@ -0,0 +1,25 @@
+/* { dg-do compile } */
+/* { dg-options "-O2 -mavx512vl" } */
+/* { dg-final { scan-assembler-times "vscalefpd\[ \\t\]+\[^\n\]*%ymm\[0-9\]\[^\{\]" 3 } } */
+/* { dg-final { scan-assembler-times "vscalefpd\[ \\t\]+\[^\n\]*%ymm\[0-9\]\{%k\[1-7\]\}\[^\{\]" 1 } } */
+/* { dg-final { scan-assembler-times "vscalefpd\[ \\t\]+\[^\n\]*%ymm\[0-9\]\{%k\[1-7\]\}\{z\}" 1 } } */
+/* { dg-final { scan-assembler-times "vscalefpd\[ \\t\]+\[^\n\]*%xmm\[0-9\]\[^\{\]" 3 } } */
+/* { dg-final { scan-assembler-times "vscalefpd\[ \\t\]+\[^\n\]*%xmm\[0-9\]\{%k\[1-7\]\}\[^\{\]" 1 } } */
+/* { dg-final { scan-assembler-times "vscalefpd\[ \\t\]+\[^\n\]*%xmm\[0-9\]\{%k\[1-7\]\}\{z\}" 1 } } */
+
+#include <immintrin.h>
+
+volatile __m256d x256;
+volatile __m128d x128;
+volatile __mmask8 m8;
+
+void extern
+avx512vl_test (void)
+{
+ x256 = _mm256_scalef_pd (x256, x256);
+ x256 = _mm256_mask_scalef_pd (x256, m8, x256, x256);
+ x256 = _mm256_maskz_scalef_pd (m8, x256, x256);
+ x128 = _mm_scalef_pd (x128, x128);
+ x128 = _mm_mask_scalef_pd (x128, m8, x128, x128);
+ x128 = _mm_maskz_scalef_pd (m8, x128, x128);
+}
diff --git a/gcc/testsuite/gcc.target/i386/avx512vl-vscalefpd-2.c b/gcc/testsuite/gcc.target/i386/avx512vl-vscalefpd-2.c
new file mode 100644
index 00000000000..0d8e4c4255e
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/avx512vl-vscalefpd-2.c
@@ -0,0 +1,14 @@
+/* { dg-do run } */
+/* { dg-options "-O2 -mavx512vl -DAVX512VL" } */
+/* { dg-require-effective-target avx512vl } */
+
+#define AVX512F_LEN 256
+#define AVX512F_LEN_HALF 128
+#include "avx512f-vscalefpd-2.c"
+
+#undef AVX512F_LEN
+#undef AVX512F_LEN_HALF
+
+#define AVX512F_LEN 128
+#define AVX512F_LEN_HALF 128
+#include "avx512f-vscalefpd-2.c"
diff --git a/gcc/testsuite/gcc.target/i386/avx512vl-vscalefps-1.c b/gcc/testsuite/gcc.target/i386/avx512vl-vscalefps-1.c
new file mode 100644
index 00000000000..b12359c33e6
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/avx512vl-vscalefps-1.c
@@ -0,0 +1,25 @@
+/* { dg-do compile } */
+/* { dg-options "-O2 -mavx512vl" } */
+/* { dg-final { scan-assembler-times "vscalefps\[ \\t\]+\[^\n\]*%ymm\[0-9\]\[^\{\]" 3 } } */
+/* { dg-final { scan-assembler-times "vscalefps\[ \\t\]+\[^\n\]*%ymm\[0-9\]\{%k\[1-7\]\}\[^\{\]" 1 } } */
+/* { dg-final { scan-assembler-times "vscalefps\[ \\t\]+\[^\n\]*%ymm\[0-9\]\{%k\[1-7\]\}\{z\}" 1 } } */
+/* { dg-final { scan-assembler-times "vscalefps\[ \\t\]+\[^\n\]*%xmm\[0-9\]\[^\{\]" 3 } } */
+/* { dg-final { scan-assembler-times "vscalefps\[ \\t\]+\[^\n\]*%xmm\[0-9\]\{%k\[1-7\]\}\[^\{\]" 1 } } */
+/* { dg-final { scan-assembler-times "vscalefps\[ \\t\]+\[^\n\]*%xmm\[0-9\]\{%k\[1-7\]\}\{z\}" 1 } } */
+
+#include <immintrin.h>
+
+volatile __m256 x256;
+volatile __m128 x128;
+volatile __mmask8 m8;
+
+void extern
+avx512vl_test (void)
+{
+ x256 = _mm256_scalef_ps (x256, x256);
+ x256 = _mm256_mask_scalef_ps (x256, m8, x256, x256);
+ x256 = _mm256_maskz_scalef_ps (m8, x256, x256);
+ x128 = _mm_scalef_ps (x128, x128);
+ x128 = _mm_mask_scalef_ps (x128, m8, x128, x128);
+ x128 = _mm_maskz_scalef_ps (m8, x128, x128);
+}
diff --git a/gcc/testsuite/gcc.target/i386/avx512vl-vscalefps-2.c b/gcc/testsuite/gcc.target/i386/avx512vl-vscalefps-2.c
new file mode 100644
index 00000000000..d655a1b6dcd
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/avx512vl-vscalefps-2.c
@@ -0,0 +1,14 @@
+/* { dg-do run } */
+/* { dg-options "-O2 -mavx512vl -DAVX512VL" } */
+/* { dg-require-effective-target avx512vl } */
+
+#define AVX512F_LEN 256
+#define AVX512F_LEN_HALF 128
+#include "avx512f-vscalefps-2.c"
+
+#undef AVX512F_LEN
+#undef AVX512F_LEN_HALF
+
+#define AVX512F_LEN 128
+#define AVX512F_LEN_HALF 128
+#include "avx512f-vscalefps-2.c"
diff --git a/gcc/testsuite/gcc.target/i386/avx512vl-vshuff32x4-1.c b/gcc/testsuite/gcc.target/i386/avx512vl-vshuff32x4-1.c
new file mode 100644
index 00000000000..d8a9f56e415
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/avx512vl-vshuff32x4-1.c
@@ -0,0 +1,18 @@
+/* { dg-do compile } */
+/* { dg-options "-mavx512vl -O2" } */
+/* { dg-final { scan-assembler-times "vshuff32x4\[ \\t\]+\[^\n\]*%ymm\[0-9\]" 3 } } */
+/* { dg-final { scan-assembler-times "vshuff32x4\[ \\t\]+\[^\n\]*%ymm\[0-9\]\{%k\[1-7\]\}\{z\}" 1 } } */
+/* { dg-final { scan-assembler-times "vshuff32x4\[ \\t\]+\[^\n\]*%ymm\[0-9\]\{%k\[1-7\]\}\[^\{\]" 1 } } */
+
+#include <immintrin.h>
+
+volatile __m256 x;
+volatile __mmask8 m;
+
+void extern
+avx512vl_test (void)
+{
+ x = _mm256_shuffle_f32x4 (x, x, 2);
+ x = _mm256_mask_shuffle_f32x4 (x, m, x, x, 2);
+ x = _mm256_maskz_shuffle_f32x4 (m, x, x, 2);
+}
diff --git a/gcc/testsuite/gcc.target/i386/avx512vl-vshuff32x4-2.c b/gcc/testsuite/gcc.target/i386/avx512vl-vshuff32x4-2.c
new file mode 100644
index 00000000000..7fbbff3c9c4
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/avx512vl-vshuff32x4-2.c
@@ -0,0 +1,10 @@
+/* { dg-do run } */
+/* { dg-options "-O2 -mavx512vl -DAVX512VL" } */
+/* { dg-require-effective-target avx512vl } */
+
+#define AVX512F_LEN 256
+#define AVX512F_LEN_HALF 128
+#include "avx512f-vshuff32x4-2.c"
+
+void
+test_128 () {}
diff --git a/gcc/testsuite/gcc.target/i386/avx512vl-vshuff64x2-1.c b/gcc/testsuite/gcc.target/i386/avx512vl-vshuff64x2-1.c
new file mode 100644
index 00000000000..83b1d148b1a
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/avx512vl-vshuff64x2-1.c
@@ -0,0 +1,18 @@
+/* { dg-do compile } */
+/* { dg-options "-mavx512vl -O2" } */
+/* { dg-final { scan-assembler-times "vshuff64x2\[ \\t\]+\[^\n\]*%ymm\[0-9\]" 3 } } */
+/* { dg-final { scan-assembler-times "vshuff64x2\[ \\t\]+\[^\n\]*%ymm\[0-9\]\{%k\[1-7\]\}\{z\}" 1 } } */
+/* { dg-final { scan-assembler-times "vshuff64x2\[ \\t\]+\[^\n\]*%ymm\[0-9\]\{%k\[1-7\]\}\[^\{\]" 1 } } */
+
+#include <immintrin.h>
+
+volatile __m256d x;
+volatile __mmask8 m;
+
+void extern
+avx512vl_test (void)
+{
+ x = _mm256_shuffle_f64x2 (x, x, 2);
+ x = _mm256_mask_shuffle_f64x2 (x, m, x, x, 2);
+ x = _mm256_maskz_shuffle_f64x2 (m, x, x, 2);
+}
diff --git a/gcc/testsuite/gcc.target/i386/avx512vl-vshuff64x2-2.c b/gcc/testsuite/gcc.target/i386/avx512vl-vshuff64x2-2.c
new file mode 100644
index 00000000000..e751077b74b
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/avx512vl-vshuff64x2-2.c
@@ -0,0 +1,10 @@
+/* { dg-do run } */
+/* { dg-options "-O2 -mavx512vl -DAVX512VL" } */
+/* { dg-require-effective-target avx512vl } */
+
+#define AVX512F_LEN 256
+#define AVX512F_LEN_HALF 128
+#include "avx512f-vshuff64x2-2.c"
+
+void
+test_128 () {}
diff --git a/gcc/testsuite/gcc.target/i386/avx512vl-vshufi32x4-1.c b/gcc/testsuite/gcc.target/i386/avx512vl-vshufi32x4-1.c
new file mode 100644
index 00000000000..0d52a339275
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/avx512vl-vshufi32x4-1.c
@@ -0,0 +1,18 @@
+/* { dg-do compile } */
+/* { dg-options "-mavx512vl -O2" } */
+/* { dg-final { scan-assembler-times "vshufi32x4\[ \\t\]+\[^\n\]*%ymm\[0-9\]" 3 } } */
+/* { dg-final { scan-assembler-times "vshufi32x4\[ \\t\]+\[^\n\]*%ymm\[0-9\]\{%k\[1-7\]\}\{z\}" 1 } } */
+/* { dg-final { scan-assembler-times "vshufi32x4\[ \\t\]+\[^\n\]*%ymm\[0-9\]\{%k\[1-7\]\}\[^\{\]" 1 } } */
+
+#include <immintrin.h>
+
+volatile __m256i x;
+volatile __mmask8 m;
+
+void extern
+avx512vl_test (void)
+{
+ x = _mm256_shuffle_i32x4 (x, x, 2);
+ x = _mm256_mask_shuffle_i32x4 (x, m, x, x, 2);
+ x = _mm256_maskz_shuffle_i32x4 (m, x, x, 2);
+}
diff --git a/gcc/testsuite/gcc.target/i386/avx512vl-vshufi32x4-2.c b/gcc/testsuite/gcc.target/i386/avx512vl-vshufi32x4-2.c
new file mode 100644
index 00000000000..8debef41427
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/avx512vl-vshufi32x4-2.c
@@ -0,0 +1,10 @@
+/* { dg-do run } */
+/* { dg-options "-O2 -mavx512vl -DAVX512VL" } */
+/* { dg-require-effective-target avx512vl } */
+
+#define AVX512F_LEN 256
+#define AVX512F_LEN_HALF 128
+#include "avx512f-vshufi32x4-2.c"
+
+void
+test_128 () {}
diff --git a/gcc/testsuite/gcc.target/i386/avx512vl-vshufi64x2-1.c b/gcc/testsuite/gcc.target/i386/avx512vl-vshufi64x2-1.c
new file mode 100644
index 00000000000..bde1bf08e8b
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/avx512vl-vshufi64x2-1.c
@@ -0,0 +1,18 @@
+/* { dg-do compile } */
+/* { dg-options "-mavx512dq -mavx512vl -O2" } */
+/* { dg-final { scan-assembler-times "vshufi64x2\[ \\t\]+\[^\n\]*%ymm\[0-9\]" 3 } } */
+/* { dg-final { scan-assembler-times "vshufi64x2\[ \\t\]+\[^\n\]*%ymm\[0-9\]\{%k\[1-7\]\}\{z\}" 1 } } */
+/* { dg-final { scan-assembler-times "vshufi64x2\[ \\t\]+\[^\n\]*%ymm\[0-9\]\{%k\[1-7\]\}\[^\{\]" 1 } } */
+
+#include <immintrin.h>
+
+volatile __m256i x;
+volatile __mmask8 m;
+
+void extern
+avx512vl_test (void)
+{
+ x = _mm256_shuffle_i64x2 (x, x, 2);
+ x = _mm256_mask_shuffle_i64x2 (x, m, x, x, 2);
+ x = _mm256_maskz_shuffle_i64x2 (m, x, x, 2);
+}
diff --git a/gcc/testsuite/gcc.target/i386/avx512vl-vshufi64x2-2.c b/gcc/testsuite/gcc.target/i386/avx512vl-vshufi64x2-2.c
new file mode 100644
index 00000000000..726234c3d1e
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/avx512vl-vshufi64x2-2.c
@@ -0,0 +1,10 @@
+/* { dg-do run } */
+/* { dg-options "-O2 -mavx512vl -DAVX512VL" } */
+/* { dg-require-effective-target avx512vl } */
+
+#define AVX512F_LEN 256
+#define AVX512F_LEN_HALF 128
+#include "avx512f-vshufi64x2-2.c"
+
+void
+test_128 () {}
diff --git a/gcc/testsuite/gcc.target/i386/avx512vl-vshufpd-1.c b/gcc/testsuite/gcc.target/i386/avx512vl-vshufpd-1.c
new file mode 100644
index 00000000000..addb7cd9837
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/avx512vl-vshufpd-1.c
@@ -0,0 +1,21 @@
+/* { dg-do compile } */
+/* { dg-options "-O2 -mavx512vl" } */
+/* { dg-final { scan-assembler-times "vshufpd\[ \\t\]+\[^\n\]*%ymm\[0-9\]\{%k\[1-7\]\}\{z\}" 1 } } */
+/* { dg-final { scan-assembler-times "vshufpd\[ \\t\]+\[^\n\]*%ymm\[0-9\]\{%k\[1-7\]\}\[^\{\]" 1 } } */
+/* { dg-final { scan-assembler-times "vshufpd\[ \\t\]+\[^\n\]*%xmm\[0-9\]\{%k\[1-7\]\}\{z\}" 1 } } */
+/* { dg-final { scan-assembler-times "vshufpd\[ \\t\]+\[^\n\]*%xmm\[0-9\]\{%k\[1-7\]\}\[^\{\]" 1 } } */
+
+#include <immintrin.h>
+
+volatile __m256d x;
+volatile __m128d y;
+volatile __mmask8 m;
+
+void extern
+avx512vl_test (void)
+{
+ x = _mm256_mask_shuffle_pd (x, m, x, x, 13);
+ x = _mm256_maskz_shuffle_pd (m, x, x, 13);
+ y = _mm_mask_shuffle_pd (y, m, y, y, 1);
+ y = _mm_maskz_shuffle_pd (m, y, y, 1);
+}
diff --git a/gcc/testsuite/gcc.target/i386/avx512vl-vshufpd-2.c b/gcc/testsuite/gcc.target/i386/avx512vl-vshufpd-2.c
new file mode 100644
index 00000000000..8b4ef9882a2
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/avx512vl-vshufpd-2.c
@@ -0,0 +1,14 @@
+/* { dg-do run } */
+/* { dg-options "-O2 -mavx512vl -DAVX512VL" } */
+/* { dg-require-effective-target avx512vl } */
+
+#define AVX512F_LEN 256
+#define AVX512F_LEN_HALF 128
+#include "avx512f-vshufpd-2.c"
+
+#undef AVX512F_LEN
+#undef AVX512F_LEN_HALF
+
+#define AVX512F_LEN 128
+#define AVX512F_LEN_HALF 128
+#include "avx512f-vshufpd-2.c"
diff --git a/gcc/testsuite/gcc.target/i386/avx512vl-vshufps-1.c b/gcc/testsuite/gcc.target/i386/avx512vl-vshufps-1.c
new file mode 100644
index 00000000000..560195cbea0
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/avx512vl-vshufps-1.c
@@ -0,0 +1,21 @@
+/* { dg-do compile } */
+/* { dg-options "-O2 -mavx512vl" } */
+/* { dg-final { scan-assembler-times "vshufps\[ \\t\]+\[^\n\]*%ymm\[0-9\]\{%k\[1-7\]\}\{z\}" 1 } } */
+/* { dg-final { scan-assembler-times "vshufps\[ \\t\]+\[^\n\]*%ymm\[0-9\]\{%k\[1-7\]\}\[^\{\]" 1 } } */
+/* { dg-final { scan-assembler-times "vshufps\[ \\t\]+\[^\n\]*%xmm\[0-9\]\{%k\[1-7\]\}\{z\}" 1 } } */
+/* { dg-final { scan-assembler-times "vshufps\[ \\t\]+\[^\n\]*%xmm\[0-9\]\{%k\[1-7\]\}\[^\{\]" 1 } } */
+
+#include <immintrin.h>
+
+volatile __m256 x;
+volatile __m128 y;
+volatile __mmask8 m;
+
+void extern
+avx512vl_test (void)
+{
+ x = _mm256_mask_shuffle_ps (x, m, x, x, 64);
+ x = _mm256_maskz_shuffle_ps (m, x, x, 64);
+ y = _mm_mask_shuffle_ps (y, m, y, y, 64);
+ y = _mm_maskz_shuffle_ps (m, y, y, 64);
+}
diff --git a/gcc/testsuite/gcc.target/i386/avx512vl-vshufps-2.c b/gcc/testsuite/gcc.target/i386/avx512vl-vshufps-2.c
new file mode 100644
index 00000000000..260922f27fb
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/avx512vl-vshufps-2.c
@@ -0,0 +1,14 @@
+/* { dg-do run } */
+/* { dg-options "-O2 -mavx512vl -DAVX512VL" } */
+/* { dg-require-effective-target avx512vl } */
+
+#define AVX512F_LEN 256
+#define AVX512F_LEN_HALF 128
+#include "avx512f-vshufps-2.c"
+
+#undef AVX512F_LEN
+#undef AVX512F_LEN_HALF
+
+#define AVX512F_LEN 128
+#define AVX512F_LEN_HALF 128
+#include "avx512f-vshufps-2.c"
diff --git a/gcc/testsuite/gcc.target/i386/avx512vl-vsqrtpd-1.c b/gcc/testsuite/gcc.target/i386/avx512vl-vsqrtpd-1.c
new file mode 100644
index 00000000000..63dbb83d63b
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/avx512vl-vsqrtpd-1.c
@@ -0,0 +1,22 @@
+/* { dg-do compile } */
+/* { dg-options "-mavx512vl -O2" } */
+/* { dg-final { scan-assembler-times "vsqrtpd\[ \\t\]+\[^\n\]*%ymm\[0-9\]\{%k\[1-7\]\}\[^\{\]" 1 } } */
+/* { dg-final { scan-assembler-times "vsqrtpd\[ \\t\]+\[^\n\]*%xmm\[0-9\]\{%k\[1-7\]\}\[^\{\]" 1 } } */
+/* { dg-final { scan-assembler-times "vsqrtpd\[ \\t\]+\[^\n\]*%ymm\[0-9\]\{%k\[1-7\]\}\{z\}" 1 } } */
+/* { dg-final { scan-assembler-times "vsqrtpd\[ \\t\]+\[^\n\]*%xmm\[0-9\]\{%k\[1-7\]\}\{z\}" 1 } } */
+
+#include <immintrin.h>
+
+volatile __m256d x1;
+volatile __m128d x2;
+volatile __mmask8 m;
+
+void extern
+avx512vl_test (void)
+{
+ x1 = _mm256_mask_sqrt_pd (x1, m, x1);
+ x1 = _mm256_maskz_sqrt_pd (m, x1);
+
+ x2 = _mm_mask_sqrt_pd (x2, m, x2);
+ x2 = _mm_maskz_sqrt_pd (m, x2);
+}
diff --git a/gcc/testsuite/gcc.target/i386/avx512vl-vsqrtpd-2.c b/gcc/testsuite/gcc.target/i386/avx512vl-vsqrtpd-2.c
new file mode 100644
index 00000000000..15698e2a344
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/avx512vl-vsqrtpd-2.c
@@ -0,0 +1,14 @@
+/* { dg-do run } */
+/* { dg-options "-O2 -mavx512vl -DAVX512VL" } */
+/* { dg-require-effective-target avx512vl } */
+
+#define AVX512F_LEN 256
+#define AVX512F_LEN_HALF 128
+#include "avx512f-vsqrtpd-2.c"
+
+#undef AVX512F_LEN
+#undef AVX512F_LEN_HALF
+
+#define AVX512F_LEN 128
+#define AVX512F_LEN_HALF 128
+#include "avx512f-vsqrtpd-2.c"
diff --git a/gcc/testsuite/gcc.target/i386/avx512vl-vsqrtps-1.c b/gcc/testsuite/gcc.target/i386/avx512vl-vsqrtps-1.c
new file mode 100644
index 00000000000..9bfd599232b
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/avx512vl-vsqrtps-1.c
@@ -0,0 +1,22 @@
+/* { dg-do compile } */
+/* { dg-options "-mavx512vl -O2" } */
+/* { dg-final { scan-assembler-times "vsqrtps\[ \\t\]+\[^\n\]*%ymm\[0-9\]\{%k\[1-7\]\}\[^\{\]" 1 } } */
+/* { dg-final { scan-assembler-times "vsqrtps\[ \\t\]+\[^\n\]*%xmm\[0-9\]\{%k\[1-7\]\}\[^\{\]" 1 } } */
+/* { dg-final { scan-assembler-times "vsqrtps\[ \\t\]+\[^\n\]*%ymm\[0-9\]\{%k\[1-7\]\}\{z\}" 1 } } */
+/* { dg-final { scan-assembler-times "vsqrtps\[ \\t\]+\[^\n\]*%xmm\[0-9\]\{%k\[1-7\]\}\{z\}" 1 } } */
+
+#include <immintrin.h>
+
+volatile __m256 x1;
+volatile __m128 x2;
+volatile __mmask8 m;
+
+void extern
+avx512vl_test (void)
+{
+ x1 = _mm256_mask_sqrt_ps (x1, m, x1);
+ x1 = _mm256_maskz_sqrt_ps (m, x1);
+
+ x2 = _mm_mask_sqrt_ps (x2, m, x2);
+ x2 = _mm_maskz_sqrt_ps (m, x2);
+}
diff --git a/gcc/testsuite/gcc.target/i386/avx512vl-vsqrtps-2.c b/gcc/testsuite/gcc.target/i386/avx512vl-vsqrtps-2.c
new file mode 100644
index 00000000000..1bd5bb12e18
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/avx512vl-vsqrtps-2.c
@@ -0,0 +1,14 @@
+/* { dg-do run } */
+/* { dg-options "-O2 -mavx512vl -DAVX512VL" } */
+/* { dg-require-effective-target avx512vl } */
+
+#define AVX512F_LEN 256
+#define AVX512F_LEN_HALF 128
+#include "avx512f-vsqrtps-2.c"
+
+#undef AVX512F_LEN
+#undef AVX512F_LEN_HALF
+
+#define AVX512F_LEN 128
+#define AVX512F_LEN_HALF 128
+#include "avx512f-vsqrtps-2.c"
diff --git a/gcc/testsuite/gcc.target/i386/avx512vl-vsubpd-1.c b/gcc/testsuite/gcc.target/i386/avx512vl-vsubpd-1.c
new file mode 100644
index 00000000000..9234f0fe33e
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/avx512vl-vsubpd-1.c
@@ -0,0 +1,22 @@
+/* { dg-do compile } */
+/* { dg-options "-O2 -mavx512vl" } */
+/* { dg-final { scan-assembler-times "vsubpd\[ \\t\]+\[^\n\]*%xmm\[0-9\]\{%k\[1-7\]\}\[^\{\]" 1 } } */
+/* { dg-final { scan-assembler-times "vsubpd\[ \\t\]+\[^\n\]*%xmm\[0-9\]\{%k\[1-7\]\}\{z\}" 1 } } */
+/* { dg-final { scan-assembler-times "vsubpd\[ \\t\]+\[^\n\]*%ymm\[0-9\]\{%k\[1-7\]\}\[^\{\]" 1 } } */
+/* { dg-final { scan-assembler-times "vsubpd\[ \\t\]+\[^\n\]*%ymm\[0-9\]\{%k\[1-7\]\}\{z\}" 1 } } */
+
+#include <immintrin.h>
+
+volatile __m128d x128;
+volatile __m256d x256;
+volatile __mmask8 m;
+
+void extern
+avx512vl_test (void)
+{
+ x128 = _mm_mask_sub_pd (x128, m, x128, x128);
+ x128 = _mm_maskz_sub_pd (m, x128, x128);
+
+ x256 = _mm256_mask_sub_pd (x256, m, x256, x256);
+ x256 = _mm256_maskz_sub_pd (m, x256, x256);
+}
diff --git a/gcc/testsuite/gcc.target/i386/avx512vl-vsubpd-2.c b/gcc/testsuite/gcc.target/i386/avx512vl-vsubpd-2.c
new file mode 100644
index 00000000000..b3a9c8f76dd
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/avx512vl-vsubpd-2.c
@@ -0,0 +1,14 @@
+/* { dg-do run } */
+/* { dg-options "-O2 -mavx512f -mavx512vl -DAVX512VL" } */
+/* { dg-require-effective-target avx512vl } */
+
+#define AVX512F_LEN 256
+#define AVX512F_LEN_HALF 128
+#include "avx512f-vsubpd-2.c"
+
+#undef AVX512F_LEN
+#undef AVX512F_LEN_HALF
+
+#define AVX512F_LEN 128
+#define AVX512F_LEN_HALF 128
+#include "avx512f-vsubpd-2.c"
diff --git a/gcc/testsuite/gcc.target/i386/avx512vl-vsubps-1.c b/gcc/testsuite/gcc.target/i386/avx512vl-vsubps-1.c
new file mode 100644
index 00000000000..a8909163bdb
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/avx512vl-vsubps-1.c
@@ -0,0 +1,22 @@
+/* { dg-do compile } */
+/* { dg-options "-O2 -mavx512vl" } */
+/* { dg-final { scan-assembler-times "vsubps\[ \\t\]+\[^\n\]*%xmm\[0-9\]\{%k\[1-7\]\}\[^\{\]" 1 } } */
+/* { dg-final { scan-assembler-times "vsubps\[ \\t\]+\[^\n\]*%xmm\[0-9\]\{%k\[1-7\]\}\{z\}" 1 } } */
+/* { dg-final { scan-assembler-times "vsubps\[ \\t\]+\[^\n\]*%ymm\[0-9\]\{%k\[1-7\]\}\[^\{\]" 1 } } */
+/* { dg-final { scan-assembler-times "vsubps\[ \\t\]+\[^\n\]*%ymm\[0-9\]\{%k\[1-7\]\}\{z\}" 1 } } */
+
+#include <immintrin.h>
+
+volatile __m128 x128;
+volatile __m256 x256;
+volatile __mmask8 m;
+
+void extern
+avx512vl_test (void)
+{
+ x128 = _mm_mask_sub_ps (x128, m, x128, x128);
+ x128 = _mm_maskz_sub_ps (m, x128, x128);
+
+ x256 = _mm256_mask_sub_ps (x256, m, x256, x256);
+ x256 = _mm256_maskz_sub_ps (m, x256, x256);
+}
diff --git a/gcc/testsuite/gcc.target/i386/avx512vl-vsubps-2.c b/gcc/testsuite/gcc.target/i386/avx512vl-vsubps-2.c
new file mode 100644
index 00000000000..d22740a098d
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/avx512vl-vsubps-2.c
@@ -0,0 +1,14 @@
+/* { dg-do run } */
+/* { dg-options "-O2 -mavx512vl -DAVX512VL" } */
+/* { dg-require-effective-target avx512vl } */
+
+#define AVX512F_LEN 256
+#define AVX512F_LEN_HALF 128
+#include "avx512f-vsubps-2.c"
+
+#undef AVX512F_LEN
+#undef AVX512F_LEN_HALF
+
+#define AVX512F_LEN 128
+#define AVX512F_LEN_HALF 128
+#include "avx512f-vsubps-2.c"
diff --git a/gcc/testsuite/gcc.target/i386/avx512vl-vunpckhpd-1.c b/gcc/testsuite/gcc.target/i386/avx512vl-vunpckhpd-1.c
new file mode 100644
index 00000000000..3c292804d2a
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/avx512vl-vunpckhpd-1.c
@@ -0,0 +1,22 @@
+/* { dg-do compile } */
+/* { dg-options "-mavx512vl -O2" } */
+/* { dg-final { scan-assembler-times "vunpckhpd\[ \\t\]+\[^\n\]*%ymm\[0-9\]\{%k\[1-7\]\}\{z\}" 1 } } */
+/* { dg-final { scan-assembler-times "vunpckhpd\[ \\t\]+\[^\n\]*%xmm\[0-9\]\{%k\[1-7\]\}\{z\}" 1 } } */
+/* { dg-final { scan-assembler-times "vunpckhpd\[ \\t\]+\[^\n\]*%ymm\[0-9\]\{%k\[1-7\]\}\[^\{\]" 1 } } */
+/* { dg-final { scan-assembler-times "vunpckhpd\[ \\t\]+\[^\n\]*%xmm\[0-9\]\{%k\[1-7\]\}\[^\{\]" 1 } } */
+
+#include <immintrin.h>
+
+volatile __m256d yy, y2, y3;
+volatile __m128d xx, x2, x3;
+volatile __mmask8 m;
+
+void extern
+avx512vl_test (void)
+{
+ yy = _mm256_mask_unpackhi_pd (yy, m, y2, y3);
+ xx = _mm_mask_unpackhi_pd (xx, m, x2, x3);
+
+ yy = _mm256_maskz_unpackhi_pd (m, y2, y3);
+ xx = _mm_maskz_unpackhi_pd (m, x2, x3);
+}
diff --git a/gcc/testsuite/gcc.target/i386/avx512vl-vunpckhpd-2.c b/gcc/testsuite/gcc.target/i386/avx512vl-vunpckhpd-2.c
new file mode 100644
index 00000000000..a3323db368e
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/avx512vl-vunpckhpd-2.c
@@ -0,0 +1,14 @@
+/* { dg-do run } */
+/* { dg-options "-O2 -mavx512vl -DAVX512VL" } */
+/* { dg-require-effective-target avx512vl } */
+
+#define AVX512F_LEN 256
+#define AVX512F_LEN_HALF 128
+#include "avx512f-vunpckhpd-2.c"
+
+#undef AVX512F_LEN
+#undef AVX512F_LEN_HALF
+
+#define AVX512F_LEN 128
+#define AVX512F_LEN_HALF 128
+#include "avx512f-vunpckhpd-2.c"
diff --git a/gcc/testsuite/gcc.target/i386/avx512vl-vunpckhps-1.c b/gcc/testsuite/gcc.target/i386/avx512vl-vunpckhps-1.c
new file mode 100644
index 00000000000..1a8aa61ccff
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/avx512vl-vunpckhps-1.c
@@ -0,0 +1,22 @@
+/* { dg-do compile } */
+/* { dg-options "-mavx512vl -O2" } */
+/* { dg-final { scan-assembler-times "vunpckhps\[ \\t\]+\[^\n\]*%ymm\[0-9\]\{%k\[1-7\]\}\{z\}" 1 } } */
+/* { dg-final { scan-assembler-times "vunpckhps\[ \\t\]+\[^\n\]*%xmm\[0-9\]\{%k\[1-7\]\}\{z\}" 1 } } */
+/* { dg-final { scan-assembler-times "vunpckhps\[ \\t\]+\[^\n\]*%ymm\[0-9\]\{%k\[1-7\]\}\[^\{\]" 1 } } */
+/* { dg-final { scan-assembler-times "vunpckhps\[ \\t\]+\[^\n\]*%xmm\[0-9\]\{%k\[1-7\]\}\[^\{\]" 1 } } */
+
+#include <immintrin.h>
+
+volatile __m256 yy, y2, y3;
+volatile __m128 xx, x2, x3;
+volatile __mmask8 m;
+
+void extern
+avx512vl_test (void)
+{
+ yy = _mm256_mask_unpackhi_ps (yy, m, y2, y3);
+ xx = _mm_mask_unpackhi_ps (xx, m, x2, x3);
+
+ yy = _mm256_maskz_unpackhi_ps (m, y2, y3);
+ xx = _mm_maskz_unpackhi_ps (m, x2, x3);
+}
diff --git a/gcc/testsuite/gcc.target/i386/avx512vl-vunpckhps-2.c b/gcc/testsuite/gcc.target/i386/avx512vl-vunpckhps-2.c
new file mode 100644
index 00000000000..211cbd90a52
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/avx512vl-vunpckhps-2.c
@@ -0,0 +1,14 @@
+/* { dg-do run } */
+/* { dg-options "-O2 -mavx512vl -DAVX512VL" } */
+/* { dg-require-effective-target avx512vl } */
+
+#define AVX512F_LEN 256
+#define AVX512F_LEN_HALF 128
+#include "avx512f-vunpckhps-2.c"
+
+#undef AVX512F_LEN
+#undef AVX512F_LEN_HALF
+
+#define AVX512F_LEN 128
+#define AVX512F_LEN_HALF 128
+#include "avx512f-vunpckhps-2.c"
diff --git a/gcc/testsuite/gcc.target/i386/avx512vl-vunpcklpd-1.c b/gcc/testsuite/gcc.target/i386/avx512vl-vunpcklpd-1.c
new file mode 100644
index 00000000000..d2b6b867b4f
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/avx512vl-vunpcklpd-1.c
@@ -0,0 +1,22 @@
+/* { dg-do compile } */
+/* { dg-options "-mavx512vl -O2" } */
+/* { dg-final { scan-assembler-times "vunpcklpd\[ \\t\]+\[^\n\]*%ymm\[0-9\]\{%k\[1-7\]\}\{z\}" 1 } } */
+/* { dg-final { scan-assembler-times "vunpcklpd\[ \\t\]+\[^\n\]*%xmm\[0-9\]\{%k\[1-7\]\}\{z\}" 1 } } */
+/* { dg-final { scan-assembler-times "vunpcklpd\[ \\t\]+\[^\n\]*%ymm\[0-9\]\{%k\[1-7\]\}\[^\{\]" 1 } } */
+/* { dg-final { scan-assembler-times "vunpcklpd\[ \\t\]+\[^\n\]*%xmm\[0-9\]\{%k\[1-7\]\}\[^\{\]" 1 } } */
+
+#include <immintrin.h>
+
+volatile __m256d yy, y2, y3;
+volatile __m128d xx, x2, x3;
+volatile __mmask8 m;
+
+void extern
+avx512vl_test (void)
+{
+ yy = _mm256_mask_unpacklo_pd (yy, m, y2, y3);
+ xx = _mm_mask_unpacklo_pd (xx, m, x2, x3);
+
+ yy = _mm256_maskz_unpacklo_pd (m, y2, y3);
+ xx = _mm_maskz_unpacklo_pd (m, x2, x3);
+}
diff --git a/gcc/testsuite/gcc.target/i386/avx512vl-vunpcklpd-2.c b/gcc/testsuite/gcc.target/i386/avx512vl-vunpcklpd-2.c
new file mode 100644
index 00000000000..03df654b3ac
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/avx512vl-vunpcklpd-2.c
@@ -0,0 +1,14 @@
+/* { dg-do run } */
+/* { dg-options "-O2 -mavx512vl -DAVX512VL" } */
+/* { dg-require-effective-target avx512vl } */
+
+#define AVX512F_LEN 256
+#define AVX512F_LEN_HALF 128
+#include "avx512f-vunpcklpd-2.c"
+
+#undef AVX512F_LEN
+#undef AVX512F_LEN_HALF
+
+#define AVX512F_LEN 128
+#define AVX512F_LEN_HALF 128
+#include "avx512f-vunpcklpd-2.c"
diff --git a/gcc/testsuite/gcc.target/i386/avx512vl-vunpcklps-1.c b/gcc/testsuite/gcc.target/i386/avx512vl-vunpcklps-1.c
new file mode 100644
index 00000000000..bf49da0d7ea
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/avx512vl-vunpcklps-1.c
@@ -0,0 +1,24 @@
+/* { dg-do compile } */
+/* { dg-options "-mavx512vl -O2" } */
+/* { dg-final { scan-assembler-times "vunpcklps\[ \\t\]+\[^\n\]*%ymm\[0-9\]" 3 } } */
+/* { dg-final { scan-assembler-times "vunpcklps\[ \\t\]+\[^\n\]*%ymm\[0-9\]\{%k\[1-7\]\}\{z\}" 1 } } */
+/* { dg-final { scan-assembler-times "vunpcklps\[ \\t\]+\[^\n\]*%ymm\[0-9\]\{%k\[1-7\]\}\[^\{\]" 1 } } */
+/* { dg-final { scan-assembler-times "vunpcklps\[ \\t\]+\[^\n\]*%xmm\[0-9\]" 3 } } */
+/* { dg-final { scan-assembler-times "vunpcklps\[ \\t\]+\[^\n\]*%xmm\[0-9\]\{%k\[1-7\]\}\{z\}" 1 } } */
+/* { dg-final { scan-assembler-times "vunpcklps\[ \\t\]+\[^\n\]*%xmm\[0-9\]\{%k\[1-7\]\}\[^\{\]" 1 } } */
+
+#include <immintrin.h>
+
+volatile __m256 x, y, z;
+volatile __m128 xx, yy, zz;
+
+void extern
+avx512vl_test (void)
+{
+ x = _mm256_unpacklo_ps (y, z);
+ x = _mm256_mask_unpacklo_ps (x, 2, y, z);
+ x = _mm256_maskz_unpacklo_ps (2, y, z);
+ xx = _mm_unpacklo_ps (yy, zz);
+ xx = _mm_mask_unpacklo_ps (xx, 2, yy, zz);
+ xx = _mm_maskz_unpacklo_ps (2, yy, zz);
+}
diff --git a/gcc/testsuite/gcc.target/i386/avx512vl-vunpcklps-2.c b/gcc/testsuite/gcc.target/i386/avx512vl-vunpcklps-2.c
new file mode 100644
index 00000000000..2fa930673bd
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/avx512vl-vunpcklps-2.c
@@ -0,0 +1,14 @@
+/* { dg-do run } */
+/* { dg-options "-O2 -mavx512vl -DAVX512VL" } */
+/* { dg-require-effective-target avx512vl } */
+
+#define AVX512F_LEN 256
+#define AVX512F_LEN_HALF 128
+#include "avx512f-vunpcklps-2.c"
+
+#undef AVX512F_LEN
+#undef AVX512F_LEN_HALF
+
+#define AVX512F_LEN 128
+#define AVX512F_LEN_HALF 128
+#include "avx512f-vunpcklps-2.c"
diff --git a/gcc/testsuite/gcc.target/i386/avx512vl-vxorpd-2.c b/gcc/testsuite/gcc.target/i386/avx512vl-vxorpd-2.c
new file mode 100644
index 00000000000..1e88a3a609e
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/avx512vl-vxorpd-2.c
@@ -0,0 +1,14 @@
+/* { dg-do run } */
+/* { dg-options "-O2 -mavx512dq -mavx512vl -DAVX512VL" } */
+/* { dg-require-effective-target avx512vl } */
+
+#define AVX512F_LEN 256
+#define AVX512F_LEN_HALF 128
+#include "avx512dq-vxorpd-2.c"
+
+#undef AVX512F_LEN
+#undef AVX512F_LEN_HALF
+
+#define AVX512F_LEN 128
+#define AVX512F_LEN_HALF 128
+#include "avx512dq-vxorpd-2.c"
diff --git a/gcc/testsuite/gcc.target/i386/avx512vl-vxorps-2.c b/gcc/testsuite/gcc.target/i386/avx512vl-vxorps-2.c
new file mode 100644
index 00000000000..f6c3ed5bd1f
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/avx512vl-vxorps-2.c
@@ -0,0 +1,14 @@
+/* { dg-do run } */
+/* { dg-options "-O2 -mavx512dq -mavx512vl -DAVX512VL" } */
+/* { dg-require-effective-target avx512vl } */
+
+#define AVX512F_LEN 256
+#define AVX512F_LEN_HALF 128
+#include "avx512dq-vxorps-2.c"
+
+#undef AVX512F_LEN
+#undef AVX512F_LEN_HALF
+
+#define AVX512F_LEN 128
+#define AVX512F_LEN_HALF 128
+#include "avx512dq-vxorps-2.c"
diff --git a/gcc/testsuite/gcc.target/i386/i386.exp b/gcc/testsuite/gcc.target/i386/i386.exp
index c753a2b6658..e8da8e5c1a0 100644
--- a/gcc/testsuite/gcc.target/i386/i386.exp
+++ b/gcc/testsuite/gcc.target/i386/i386.exp
@@ -266,6 +266,19 @@ proc check_effective_target_avx512f { } {
} "-mavx512f" ]
}
+# Return 1 if avx512vl instructions can be compiled.
+proc check_effective_target_avx512vl { } {
+ return [check_no_compiler_messages avx512vl object {
+ typedef long long __v4di __attribute__ ((__vector_size__ (32)));
+ __v4di
+ mm256_and_epi64 (__v4di __X, __v4di __Y)
+ {
+ __v4di __W;
+ return __builtin_ia32_pandq256_mask (__X, __Y, __W, -1);
+ }
+ } "-mavx512vl" ]
+}
+
# Return 1 if avx512cd instructions can be compiled.
proc check_effective_target_avx512cd { } {
return [check_no_compiler_messages avx512cd_trans object {
@@ -274,8 +287,8 @@ proc check_effective_target_avx512cd { } {
_mm512_conflict_epi64 (__v8di __W, __v8di __A)
{
return (__v8di) __builtin_ia32_vpconflictdi_512_mask ((__v8di) __A,
- (__v8di) __W,
- -1);
+ (__v8di) __W,
+ -1);
}
} "-Wno-psabi -mavx512cd" ]
}
@@ -306,6 +319,36 @@ proc check_effective_target_sha { } {
} "-O2 -msha" ]
}
+# Return 1 if avx512dq instructions can be compiled.
+proc check_effective_target_avx512dq { } {
+ return [check_no_compiler_messages avx512dq object {
+ typedef long long __v8di __attribute__ ((__vector_size__ (64)));
+ __v8di
+ _mm512_mask_mullo_epi64 (__v8di __W, __v8di __A, __v8di __B)
+ {
+ return (__v8di) __builtin_ia32_pmullq512_mask ((__v8di) __A,
+ (__v8di) __B,
+ (__v8di) __W,
+ -1);
+ }
+ } "-mavx512dq" ]
+}
+
+# Return 1 if avx512bw instructions can be compiled.
+proc check_effective_target_avx512bw { } {
+ return [check_no_compiler_messages avx512bw object {
+ typedef short __v32hi __attribute__ ((__vector_size__ (64)));
+ __v32hi
+ _mm512_mask_mulhrs_epi16 (__v32hi __W, __v32hi __A, __v32hi __B)
+ {
+ return (__v32hi) __builtin_ia32_pmulhrsw512_mask ((__v32hi) __A,
+ (__v32hi) __B,
+ (__v32hi) __W,
+ -1);
+ }
+ } "-mavx512bw" ]
+}
+
# If a testcase doesn't have special options, use these.
global DEFAULT_CFLAGS
if ![info exists DEFAULT_CFLAGS] then {
diff --git a/gcc/testsuite/gcc.target/i386/m512-check.h b/gcc/testsuite/gcc.target/i386/m512-check.h
index 64e085bc602..1a1065d2053 100644
--- a/gcc/testsuite/gcc.target/i386/m512-check.h
+++ b/gcc/testsuite/gcc.target/i386/m512-check.h
@@ -111,3 +111,7 @@ check_rough_##UINON_TYPE (UINON_TYPE u, const VALUE_TYPE *v, \
CHECK_ROUGH_EXP (union512, float, "%f")
CHECK_ROUGH_EXP (union512d, double, "%f")
+CHECK_ROUGH_EXP (union256, float, "%f")
+CHECK_ROUGH_EXP (union256d, double, "%f")
+CHECK_ROUGH_EXP (union128, float, "%f")
+CHECK_ROUGH_EXP (union128d, double, "%f")
diff --git a/gcc/testsuite/gcc.target/i386/sse-12.c b/gcc/testsuite/gcc.target/i386/sse-12.c
index 88a7613e043..67bcf48fbc0 100644
--- a/gcc/testsuite/gcc.target/i386/sse-12.c
+++ b/gcc/testsuite/gcc.target/i386/sse-12.c
@@ -3,7 +3,7 @@
popcntintrin.h and mm_malloc.h are usable
with -O -std=c89 -pedantic-errors. */
/* { dg-do compile } */
-/* { dg-options "-O -std=c89 -pedantic-errors -march=k8 -msse4a -m3dnow -mavx -mavx2 -mfma4 -mxop -maes -mpclmul -mpopcnt -mabm -mlzcnt -mbmi -mbmi2 -mtbm -mlwp -mfsgsbase -mrdrnd -mf16c -mfma -mrtm -mrdseed -mprfchw -madx -mfxsr -mxsaveopt -mavx512f -mavx512er -mavx512cd -mavx512pf -msha -mprefetchwt1 -mxsavec -mxsaves -mclflushopt" } */
+/* { dg-options "-O -std=c89 -pedantic-errors -march=k8 -msse4a -m3dnow -mavx -mavx2 -mfma4 -mxop -maes -mpclmul -mpopcnt -mabm -mlzcnt -mbmi -mbmi2 -mtbm -mlwp -mfsgsbase -mrdrnd -mf16c -mfma -mrtm -mrdseed -mprfchw -madx -mfxsr -mxsaveopt -mavx512f -mavx512er -mavx512cd -mavx512pf -msha -mprefetchwt1 -mxsavec -mxsaves -mclflushopt -mavx512bw -mavx512dq -mavx512vl" } */
#include <x86intrin.h>
diff --git a/gcc/testsuite/gcc.target/i386/sse-13.c b/gcc/testsuite/gcc.target/i386/sse-13.c
index de54a45a2e0..b17e8ebb5e0 100644
--- a/gcc/testsuite/gcc.target/i386/sse-13.c
+++ b/gcc/testsuite/gcc.target/i386/sse-13.c
@@ -1,5 +1,5 @@
/* { dg-do compile } */
-/* { dg-options "-O2 -Werror-implicit-function-declaration -march=k8 -msse4a -m3dnow -mavx -mavx2 -mfma4 -mxop -maes -mpclmul -mpopcnt -mabm -mlzcnt -mbmi -mbmi2 -mtbm -mlwp -mfsgsbase -mrdrnd -mf16c -mfma -mrtm -mrdseed -mprfchw -madx -mfxsr -mxsaveopt -mavx512f -mavx512er -mavx512cd -mavx512pf -msha -mprefetchwt1 -mxsavec -mxsaves -mclflushopt" } */
+/* { dg-options "-O2 -Werror-implicit-function-declaration -march=k8 -msse4a -m3dnow -mavx -mavx2 -mfma4 -mxop -maes -mpclmul -mpopcnt -mabm -mlzcnt -mbmi -mbmi2 -mtbm -mlwp -mfsgsbase -mrdrnd -mf16c -mfma -mrtm -mrdseed -mprfchw -madx -mfxsr -mxsaveopt -mavx512f -mavx512er -mavx512cd -mavx512pf -msha -mprefetchwt1 -mxsavec -mxsaves -mclflushopt -mavx512vl -mavx512dq -mavx512bw" } */
#include <mm_malloc.h>
@@ -382,3 +382,207 @@
/* shaintrin.h */
#define __builtin_ia32_sha1rnds4(A, B, C) __builtin_ia32_sha1rnds4(A, B, 1)
+
+/* TODO split later */
+#define __builtin_ia32_pslldq512(A, B) __builtin_ia32_pslldq512(A, 8)
+#define __builtin_ia32_psrldq512(A, B) __builtin_ia32_psrldq512(A, 8)
+#define __builtin_ia32_alignd128_mask(A, B, F, D, E) __builtin_ia32_alignd128_mask(A, B, 1, D, E)
+#define __builtin_ia32_alignd256_mask(A, B, F, D, E) __builtin_ia32_alignd256_mask(A, B, 1, D, E)
+#define __builtin_ia32_alignq128_mask(A, B, F, D, E) __builtin_ia32_alignq128_mask(A, B, 1, D, E)
+#define __builtin_ia32_alignq256_mask(A, B, F, D, E) __builtin_ia32_alignq256_mask(A, B, 1, D, E)
+#define __builtin_ia32_cmpb128_mask(A, B, E, D) __builtin_ia32_cmpb128_mask(A, B, 1, D)
+#define __builtin_ia32_cmpb256_mask(A, B, E, D) __builtin_ia32_cmpb256_mask(A, B, 1, D)
+#define __builtin_ia32_cmpb512_mask(A, B, E, D) __builtin_ia32_cmpb512_mask(A, B, 1, D)
+#define __builtin_ia32_cmpd128_mask(A, B, E, D) __builtin_ia32_cmpd128_mask(A, B, 1, D)
+#define __builtin_ia32_cmpd256_mask(A, B, E, D) __builtin_ia32_cmpd256_mask(A, B, 1, D)
+#define __builtin_ia32_cmppd128_mask(A, B, E, D) __builtin_ia32_cmppd128_mask(A, B, 1, D)
+#define __builtin_ia32_cmppd256_mask(A, B, E, D) __builtin_ia32_cmppd256_mask(A, B, 1, D)
+#define __builtin_ia32_cmpps128_mask(A, B, E, D) __builtin_ia32_cmpps128_mask(A, B, 1, D)
+#define __builtin_ia32_cmpps256_mask(A, B, E, D) __builtin_ia32_cmpps256_mask(A, B, 1, D)
+#define __builtin_ia32_cmpq128_mask(A, B, E, D) __builtin_ia32_cmpq128_mask(A, B, 1, D)
+#define __builtin_ia32_cmpq256_mask(A, B, E, D) __builtin_ia32_cmpq256_mask(A, B, 1, D)
+#define __builtin_ia32_cmpw128_mask(A, B, E, D) __builtin_ia32_cmpw128_mask(A, B, 1, D)
+#define __builtin_ia32_cmpw256_mask(A, B, E, D) __builtin_ia32_cmpw256_mask(A, B, 1, D)
+#define __builtin_ia32_cmpw512_mask(A, B, E, D) __builtin_ia32_cmpw512_mask(A, B, 1, D)
+#define __builtin_ia32_cvtpd2qq512_mask(A, B, C, D) __builtin_ia32_cvtpd2qq512_mask(A, B, C, 8)
+#define __builtin_ia32_cvtpd2uqq512_mask(A, B, C, D) __builtin_ia32_cvtpd2uqq512_mask(A, B, C, 8)
+#define __builtin_ia32_cvtps2qq512_mask(A, B, C, D) __builtin_ia32_cvtps2qq512_mask(A, B, C, 8)
+#define __builtin_ia32_cvtps2uqq512_mask(A, B, C, D) __builtin_ia32_cvtps2uqq512_mask(A, B, C, 8)
+#define __builtin_ia32_cvtqq2pd512_mask(A, B, C, D) __builtin_ia32_cvtqq2pd512_mask(A, B, C, 8)
+#define __builtin_ia32_cvtqq2ps512_mask(A, B, C, D) __builtin_ia32_cvtqq2ps512_mask(A, B, C, 8)
+#define __builtin_ia32_cvttpd2qq512_mask(A, B, C, D) __builtin_ia32_cvttpd2qq512_mask(A, B, C, 8)
+#define __builtin_ia32_cvttpd2uqq512_mask(A, B, C, D) __builtin_ia32_cvttpd2uqq512_mask(A, B, C, 8)
+#define __builtin_ia32_cvttps2qq512_mask(A, B, C, D) __builtin_ia32_cvttps2qq512_mask(A, B, C, 8)
+#define __builtin_ia32_cvttps2uqq512_mask(A, B, C, D) __builtin_ia32_cvttps2uqq512_mask(A, B, C, 8)
+#define __builtin_ia32_cvtuqq2pd512_mask(A, B, C, D) __builtin_ia32_cvtuqq2pd512_mask(A, B, C, 8)
+#define __builtin_ia32_cvtuqq2ps512_mask(A, B, C, D) __builtin_ia32_cvtuqq2ps512_mask(A, B, C, 8)
+#define __builtin_ia32_dbpsadbw128_mask(A, B, F, D, E) __builtin_ia32_dbpsadbw128_mask(A, B, 1, D, E)
+#define __builtin_ia32_dbpsadbw256_mask(A, B, F, D, E) __builtin_ia32_dbpsadbw256_mask(A, B, 1, D, E)
+#define __builtin_ia32_dbpsadbw512_mask(A, B, F, D, E) __builtin_ia32_dbpsadbw512_mask(A, B, 1, D, E)
+#define __builtin_ia32_extractf32x4_256_mask(A, E, C, D) __builtin_ia32_extractf32x4_256_mask(A, 1, C, D)
+#define __builtin_ia32_extractf32x8_mask(A, E, C, D) __builtin_ia32_extractf32x8_mask(A, 1, C, D)
+#define __builtin_ia32_extractf64x2_256_mask(A, E, C, D) __builtin_ia32_extractf64x2_256_mask(A, 1, C, D)
+#define __builtin_ia32_extractf64x2_512_mask(A, E, C, D) __builtin_ia32_extractf64x2_512_mask(A, 1, C, D)
+#define __builtin_ia32_extracti32x4_256_mask(A, E, C, D) __builtin_ia32_extracti32x4_256_mask(A, 1, C, D)
+#define __builtin_ia32_extracti32x8_mask(A, E, C, D) __builtin_ia32_extracti32x8_mask(A, 1, C, D)
+#define __builtin_ia32_extracti64x2_256_mask(A, E, C, D) __builtin_ia32_extracti64x2_256_mask(A, 1, C, D)
+#define __builtin_ia32_extracti64x2_512_mask(A, E, C, D) __builtin_ia32_extracti64x2_512_mask(A, 1, C, D)
+#define __builtin_ia32_fixupimmpd128(A, B, C, E) __builtin_ia32_fixupimmpd128(A, B, C, 1)
+#define __builtin_ia32_fixupimmpd128_mask(A, B, C, F, E) __builtin_ia32_fixupimmpd128_mask(A, B, C, 1, E)
+#define __builtin_ia32_fixupimmpd128_maskz(A, B, C, F, E) __builtin_ia32_fixupimmpd128_maskz(A, B, C, 1, E)
+#define __builtin_ia32_fixupimmpd256(A, B, C, E) __builtin_ia32_fixupimmpd256(A, B, C, 1)
+#define __builtin_ia32_fixupimmpd256_mask(A, B, C, F, E) __builtin_ia32_fixupimmpd256_mask(A, B, C, 1, E)
+#define __builtin_ia32_fixupimmpd256_maskz(A, B, C, F, E) __builtin_ia32_fixupimmpd256_maskz(A, B, C, 1, E)
+#define __builtin_ia32_fixupimmps128(A, B, C, E) __builtin_ia32_fixupimmps128(A, B, C, 1)
+#define __builtin_ia32_fixupimmps128_mask(A, B, C, F, E) __builtin_ia32_fixupimmps128_mask(A, B, C, 1, E)
+#define __builtin_ia32_fixupimmps128_maskz(A, B, C, F, E) __builtin_ia32_fixupimmps128_maskz(A, B, C, 1, E)
+#define __builtin_ia32_fixupimmps256(A, B, C, E) __builtin_ia32_fixupimmps256(A, B, C, 1)
+#define __builtin_ia32_fixupimmps256_mask(A, B, C, F, E) __builtin_ia32_fixupimmps256_mask(A, B, C, 1, E)
+#define __builtin_ia32_fixupimmps256_maskz(A, B, C, F, E) __builtin_ia32_fixupimmps256_maskz(A, B, C, 1, E)
+#define __builtin_ia32_fpclasspd128_mask(A, D, C) __builtin_ia32_fpclasspd128_mask(A, 1, C)
+#define __builtin_ia32_fpclasspd256_mask(A, D, C) __builtin_ia32_fpclasspd256_mask(A, 1, C)
+#define __builtin_ia32_fpclasspd512_mask(A, D, C) __builtin_ia32_fpclasspd512_mask(A, 1, C)
+#define __builtin_ia32_fpclassps128_mask(A, D, C) __builtin_ia32_fpclassps128_mask(A, 1, C)
+#define __builtin_ia32_fpclassps256_mask(A, D, C) __builtin_ia32_fpclassps256_mask(A, 1, C)
+#define __builtin_ia32_fpclassps512_mask(A, D, C) __builtin_ia32_fpclassps512_mask(A, 1, C)
+#define __builtin_ia32_fpclasssd(A, D) __builtin_ia32_fpclasssd(A, 1)
+#define __builtin_ia32_fpclassss(A, D) __builtin_ia32_fpclassss(A, 1)
+#define __builtin_ia32_gather3div2df(A, B, C, D, F) __builtin_ia32_gather3div2df(A, B, C, D, 1)
+#define __builtin_ia32_gather3div2di(A, B, C, D, F) __builtin_ia32_gather3div2di(A, B, C, D, 1)
+#define __builtin_ia32_gather3div4df(A, B, C, D, F) __builtin_ia32_gather3div4df(A, B, C, D, 1)
+#define __builtin_ia32_gather3div4di(A, B, C, D, F) __builtin_ia32_gather3div4di(A, B, C, D, 1)
+#define __builtin_ia32_gather3div4sf(A, B, C, D, F) __builtin_ia32_gather3div4sf(A, B, C, D, 1)
+#define __builtin_ia32_gather3div4si(A, B, C, D, F) __builtin_ia32_gather3div4si(A, B, C, D, 1)
+#define __builtin_ia32_gather3div8sf(A, B, C, D, F) __builtin_ia32_gather3div8sf(A, B, C, D, 1)
+#define __builtin_ia32_gather3div8si(A, B, C, D, F) __builtin_ia32_gather3div8si(A, B, C, D, 1)
+#define __builtin_ia32_gather3siv2df(A, B, C, D, F) __builtin_ia32_gather3siv2df(A, B, C, D, 1)
+#define __builtin_ia32_gather3siv2di(A, B, C, D, F) __builtin_ia32_gather3siv2di(A, B, C, D, 1)
+#define __builtin_ia32_gather3siv4df(A, B, C, D, F) __builtin_ia32_gather3siv4df(A, B, C, D, 1)
+#define __builtin_ia32_gather3siv4di(A, B, C, D, F) __builtin_ia32_gather3siv4di(A, B, C, D, 1)
+#define __builtin_ia32_gather3siv4sf(A, B, C, D, F) __builtin_ia32_gather3siv4sf(A, B, C, D, 1)
+#define __builtin_ia32_gather3siv4si(A, B, C, D, F) __builtin_ia32_gather3siv4si(A, B, C, D, 1)
+#define __builtin_ia32_gather3siv8sf(A, B, C, D, F) __builtin_ia32_gather3siv8sf(A, B, C, D, 1)
+#define __builtin_ia32_gather3siv8si(A, B, C, D, F) __builtin_ia32_gather3siv8si(A, B, C, D, 1)
+#define __builtin_ia32_getmantpd128_mask(A, E, C, D) __builtin_ia32_getmantpd128_mask(A, 1, C, D)
+#define __builtin_ia32_getmantpd256_mask(A, E, C, D) __builtin_ia32_getmantpd256_mask(A, 1, C, D)
+#define __builtin_ia32_getmantps128_mask(A, E, C, D) __builtin_ia32_getmantps128_mask(A, 1, C, D)
+#define __builtin_ia32_getmantps256_mask(A, E, C, D) __builtin_ia32_getmantps256_mask(A, 1, C, D)
+#define __builtin_ia32_insertf32x4_256_mask(A, B, F, D, E) __builtin_ia32_insertf32x4_256_mask(A, B, 1, D, E)
+#define __builtin_ia32_insertf32x8_mask(A, B, F, D, E) __builtin_ia32_insertf32x8_mask(A, B, 1, D, E)
+#define __builtin_ia32_insertf64x2_256_mask(A, B, F, D, E) __builtin_ia32_insertf64x2_256_mask(A, B, 1, D, E)
+#define __builtin_ia32_insertf64x2_512_mask(A, B, F, D, E) __builtin_ia32_insertf64x2_512_mask(A, B, 1, D, E)
+#define __builtin_ia32_inserti32x4_256_mask(A, B, F, D, E) __builtin_ia32_inserti32x4_256_mask(A, B, 1, D, E)
+#define __builtin_ia32_inserti32x8_mask(A, B, F, D, E) __builtin_ia32_inserti32x8_mask(A, B, 1, D, E)
+#define __builtin_ia32_inserti64x2_256_mask(A, B, F, D, E) __builtin_ia32_inserti64x2_256_mask(A, B, 1, D, E)
+#define __builtin_ia32_inserti64x2_512_mask(A, B, F, D, E) __builtin_ia32_inserti64x2_512_mask(A, B, 1, D, E)
+#define __builtin_ia32_palignr128_mask(A, B, F, D, E) __builtin_ia32_palignr128_mask(A, B, 8, D, E)
+#define __builtin_ia32_palignr256_mask(A, B, F, D, E) __builtin_ia32_palignr256_mask(A, B, 8, D, E)
+#define __builtin_ia32_palignr512(A, B, D) __builtin_ia32_palignr512(A, B, 8)
+#define __builtin_ia32_palignr512_mask(A, B, F, D, E) __builtin_ia32_palignr512_mask(A, B, 8, D, E)
+#define __builtin_ia32_permdf256_mask(A, E, C, D) __builtin_ia32_permdf256_mask(A, 1, C, D)
+#define __builtin_ia32_permdi256_mask(A, E, C, D) __builtin_ia32_permdi256_mask(A, 1, C, D)
+#define __builtin_ia32_prold128_mask(A, E, C, D) __builtin_ia32_prold128_mask(A, 1, C, D)
+#define __builtin_ia32_prold256_mask(A, E, C, D) __builtin_ia32_prold256_mask(A, 1, C, D)
+#define __builtin_ia32_prolq128_mask(A, E, C, D) __builtin_ia32_prolq128_mask(A, 1, C, D)
+#define __builtin_ia32_prolq256_mask(A, E, C, D) __builtin_ia32_prolq256_mask(A, 1, C, D)
+#define __builtin_ia32_prord128_mask(A, E, C, D) __builtin_ia32_prord128_mask(A, 1, C, D)
+#define __builtin_ia32_prord256_mask(A, E, C, D) __builtin_ia32_prord256_mask(A, 1, C, D)
+#define __builtin_ia32_prorq128_mask(A, E, C, D) __builtin_ia32_prorq128_mask(A, 1, C, D)
+#define __builtin_ia32_prorq256_mask(A, E, C, D) __builtin_ia32_prorq256_mask(A, 1, C, D)
+#define __builtin_ia32_pshufd128_mask(A, E, C, D) __builtin_ia32_pshufd128_mask(A, 1, C, D)
+#define __builtin_ia32_pshufd256_mask(A, E, C, D) __builtin_ia32_pshufd256_mask(A, 1, C, D)
+#define __builtin_ia32_pshufhw128_mask(A, E, C, D) __builtin_ia32_pshufhw128_mask(A, 1, C, D)
+#define __builtin_ia32_pshufhw256_mask(A, E, C, D) __builtin_ia32_pshufhw256_mask(A, 1, C, D)
+#define __builtin_ia32_pshufhw512_mask(A, E, C, D) __builtin_ia32_pshufhw512_mask(A, 1, C, D)
+#define __builtin_ia32_pshuflw128_mask(A, E, C, D) __builtin_ia32_pshuflw128_mask(A, 1, C, D)
+#define __builtin_ia32_pshuflw256_mask(A, E, C, D) __builtin_ia32_pshuflw256_mask(A, 1, C, D)
+#define __builtin_ia32_pshuflw512_mask(A, E, C, D) __builtin_ia32_pshuflw512_mask(A, 1, C, D)
+#define __builtin_ia32_pslldi128_mask(A, E, C, D) __builtin_ia32_pslldi128_mask(A, 1, C, D)
+#define __builtin_ia32_pslldi256_mask(A, E, C, D) __builtin_ia32_pslldi256_mask(A, 1, C, D)
+#define __builtin_ia32_psllqi128_mask(A, E, C, D) __builtin_ia32_psllqi128_mask(A, 1, C, D)
+#define __builtin_ia32_psllqi256_mask(A, E, C, D) __builtin_ia32_psllqi256_mask(A, 1, C, D)
+#define __builtin_ia32_psllwi128_mask(A, E, C, D) __builtin_ia32_psllwi128_mask(A, 1, C, D)
+#define __builtin_ia32_psllwi256_mask(A, E, C, D) __builtin_ia32_psllwi256_mask(A, 1, C, D)
+#define __builtin_ia32_psllwi512_mask(A, E, C, D) __builtin_ia32_psllwi512_mask(A, 1, C, D)
+#define __builtin_ia32_psradi128_mask(A, E, C, D) __builtin_ia32_psradi128_mask(A, 1, C, D)
+#define __builtin_ia32_psradi256_mask(A, E, C, D) __builtin_ia32_psradi256_mask(A, 1, C, D)
+#define __builtin_ia32_psraqi128_mask(A, E, C, D) __builtin_ia32_psraqi128_mask(A, 1, C, D)
+#define __builtin_ia32_psraqi256_mask(A, E, C, D) __builtin_ia32_psraqi256_mask(A, 1, C, D)
+#define __builtin_ia32_psrawi128_mask(A, E, C, D) __builtin_ia32_psrawi128_mask(A, 1, C, D)
+#define __builtin_ia32_psrawi256_mask(A, E, C, D) __builtin_ia32_psrawi256_mask(A, 1, C, D)
+#define __builtin_ia32_psrawi512_mask(A, E, C, D) __builtin_ia32_psrawi512_mask(A, 1, C, D)
+#define __builtin_ia32_psrldi128_mask(A, E, C, D) __builtin_ia32_psrldi128_mask(A, 1, C, D)
+#define __builtin_ia32_psrldi256_mask(A, E, C, D) __builtin_ia32_psrldi256_mask(A, 1, C, D)
+#define __builtin_ia32_psrlqi128_mask(A, E, C, D) __builtin_ia32_psrlqi128_mask(A, 1, C, D)
+#define __builtin_ia32_psrlqi256_mask(A, E, C, D) __builtin_ia32_psrlqi256_mask(A, 1, C, D)
+#define __builtin_ia32_psrlwi128_mask(A, E, C, D) __builtin_ia32_psrlwi128_mask(A, 1, C, D)
+#define __builtin_ia32_psrlwi256_mask(A, E, C, D) __builtin_ia32_psrlwi256_mask(A, 1, C, D)
+#define __builtin_ia32_psrlwi512_mask(A, E, C, D) __builtin_ia32_psrlwi512_mask(A, 1, C, D)
+#define __builtin_ia32_pternlogd128_mask(A, B, C, F, E) __builtin_ia32_pternlogd128_mask(A, B, C, 1, E)
+#define __builtin_ia32_pternlogd128_maskz(A, B, C, F, E) __builtin_ia32_pternlogd128_maskz(A, B, C, 1, E)
+#define __builtin_ia32_pternlogd256_mask(A, B, C, F, E) __builtin_ia32_pternlogd256_mask(A, B, C, 1, E)
+#define __builtin_ia32_pternlogd256_maskz(A, B, C, F, E) __builtin_ia32_pternlogd256_maskz(A, B, C, 1, E)
+#define __builtin_ia32_pternlogq128_mask(A, B, C, F, E) __builtin_ia32_pternlogq128_mask(A, B, C, 1, E)
+#define __builtin_ia32_pternlogq128_maskz(A, B, C, F, E) __builtin_ia32_pternlogq128_maskz(A, B, C, 1, E)
+#define __builtin_ia32_pternlogq256_mask(A, B, C, F, E) __builtin_ia32_pternlogq256_mask(A, B, C, 1, E)
+#define __builtin_ia32_pternlogq256_maskz(A, B, C, F, E) __builtin_ia32_pternlogq256_maskz(A, B, C, 1, E)
+#define __builtin_ia32_rangepd128_mask(A, B, F, D, E) __builtin_ia32_rangepd128_mask(A, B, 1, D, E)
+#define __builtin_ia32_rangepd256_mask(A, B, F, D, E) __builtin_ia32_rangepd256_mask(A, B, 1, D, E)
+#define __builtin_ia32_rangepd512_mask(A, B, I, D, E, F) __builtin_ia32_rangepd512_mask(A, B, 1, D, E, 8)
+#define __builtin_ia32_rangeps128_mask(A, B, F, D, E) __builtin_ia32_rangeps128_mask(A, B, 1, D, E)
+#define __builtin_ia32_rangeps256_mask(A, B, F, D, E) __builtin_ia32_rangeps256_mask(A, B, 1, D, E)
+#define __builtin_ia32_rangeps512_mask(A, B, I, D, E, F) __builtin_ia32_rangeps512_mask(A, B, 1, D, E, 8)
+#define __builtin_ia32_rangesd128_round(A, B, I, F) __builtin_ia32_rangesd128_round(A, B, 1, 8)
+#define __builtin_ia32_rangess128_round(A, B, I, F) __builtin_ia32_rangess128_round(A, B, 1, 8)
+#define __builtin_ia32_reducepd128_mask(A, E, C, D) __builtin_ia32_reducepd128_mask(A, 1, C, D)
+#define __builtin_ia32_reducepd256_mask(A, E, C, D) __builtin_ia32_reducepd256_mask(A, 1, C, D)
+#define __builtin_ia32_reducepd512_mask(A, E, C, D) __builtin_ia32_reducepd512_mask(A, 1, C, D)
+#define __builtin_ia32_reduceps128_mask(A, E, C, D) __builtin_ia32_reduceps128_mask(A, 1, C, D)
+#define __builtin_ia32_reduceps256_mask(A, E, C, D) __builtin_ia32_reduceps256_mask(A, 1, C, D)
+#define __builtin_ia32_reduceps512_mask(A, E, C, D) __builtin_ia32_reduceps512_mask(A, 1, C, D)
+#define __builtin_ia32_reducesd(A, B, F) __builtin_ia32_reducesd(A, B, 1)
+#define __builtin_ia32_reducess(A, B, F) __builtin_ia32_reducess(A, B, 1)
+#define __builtin_ia32_rndscalepd_128_mask(A, E, C, D) __builtin_ia32_rndscalepd_128_mask(A, 1, C, D)
+#define __builtin_ia32_rndscalepd_256_mask(A, E, C, D) __builtin_ia32_rndscalepd_256_mask(A, 1, C, D)
+#define __builtin_ia32_rndscaleps_128_mask(A, E, C, D) __builtin_ia32_rndscaleps_128_mask(A, 1, C, D)
+#define __builtin_ia32_rndscaleps_256_mask(A, E, C, D) __builtin_ia32_rndscaleps_256_mask(A, 1, C, D)
+#define __builtin_ia32_scatterdiv2df(A, B, C, D, F) __builtin_ia32_scatterdiv2df(A, B, C, D, 1)
+#define __builtin_ia32_scatterdiv2di(A, B, C, D, F) __builtin_ia32_scatterdiv2di(A, B, C, D, 1)
+#define __builtin_ia32_scatterdiv4df(A, B, C, D, F) __builtin_ia32_scatterdiv4df(A, B, C, D, 1)
+#define __builtin_ia32_scatterdiv4di(A, B, C, D, F) __builtin_ia32_scatterdiv4di(A, B, C, D, 1)
+#define __builtin_ia32_scatterdiv4sf(A, B, C, D, F) __builtin_ia32_scatterdiv4sf(A, B, C, D, 1)
+#define __builtin_ia32_scatterdiv4si(A, B, C, D, F) __builtin_ia32_scatterdiv4si(A, B, C, D, 1)
+#define __builtin_ia32_scatterdiv8sf(A, B, C, D, F) __builtin_ia32_scatterdiv8sf(A, B, C, D, 1)
+#define __builtin_ia32_scatterdiv8si(A, B, C, D, F) __builtin_ia32_scatterdiv8si(A, B, C, D, 1)
+#define __builtin_ia32_scattersiv2df(A, B, C, D, F) __builtin_ia32_scattersiv2df(A, B, C, D, 1)
+#define __builtin_ia32_scattersiv2di(A, B, C, D, F) __builtin_ia32_scattersiv2di(A, B, C, D, 1)
+#define __builtin_ia32_scattersiv4df(A, B, C, D, F) __builtin_ia32_scattersiv4df(A, B, C, D, 1)
+#define __builtin_ia32_scattersiv4di(A, B, C, D, F) __builtin_ia32_scattersiv4di(A, B, C, D, 1)
+#define __builtin_ia32_scattersiv4sf(A, B, C, D, F) __builtin_ia32_scattersiv4sf(A, B, C, D, 1)
+#define __builtin_ia32_scattersiv4si(A, B, C, D, F) __builtin_ia32_scattersiv4si(A, B, C, D, 1)
+#define __builtin_ia32_scattersiv8sf(A, B, C, D, F) __builtin_ia32_scattersiv8sf(A, B, C, D, 1)
+#define __builtin_ia32_scattersiv8si(A, B, C, D, F) __builtin_ia32_scattersiv8si(A, B, C, D, 1)
+#define __builtin_ia32_shuf_f32x4_256_mask(A, B, F, D, E) __builtin_ia32_shuf_f32x4_256_mask(A, B, 1, D, E)
+#define __builtin_ia32_shuf_f64x2_256_mask(A, B, F, D, E) __builtin_ia32_shuf_f64x2_256_mask(A, B, 1, D, E)
+#define __builtin_ia32_shuf_i32x4_256_mask(A, B, F, D, E) __builtin_ia32_shuf_i32x4_256_mask(A, B, 1, D, E)
+#define __builtin_ia32_shuf_i64x2_256_mask(A, B, F, D, E) __builtin_ia32_shuf_i64x2_256_mask(A, B, 1, D, E)
+#define __builtin_ia32_shufpd128_mask(A, B, F, D, E) __builtin_ia32_shufpd128_mask(A, B, 1, D, E)
+#define __builtin_ia32_shufpd256_mask(A, B, F, D, E) __builtin_ia32_shufpd256_mask(A, B, 1, D, E)
+#define __builtin_ia32_shufps128_mask(A, B, F, D, E) __builtin_ia32_shufps128_mask(A, B, 1, D, E)
+#define __builtin_ia32_shufps256_mask(A, B, F, D, E) __builtin_ia32_shufps256_mask(A, B, 1, D, E)
+#define __builtin_ia32_ucmpb128_mask(A, B, E, D) __builtin_ia32_ucmpb128_mask(A, B, 1, D)
+#define __builtin_ia32_ucmpb256_mask(A, B, E, D) __builtin_ia32_ucmpb256_mask(A, B, 1, D)
+#define __builtin_ia32_ucmpb512_mask(A, B, E, D) __builtin_ia32_ucmpb512_mask(A, B, 1, D)
+#define __builtin_ia32_ucmpd128_mask(A, B, E, D) __builtin_ia32_ucmpd128_mask(A, B, 1, D)
+#define __builtin_ia32_ucmpd256_mask(A, B, E, D) __builtin_ia32_ucmpd256_mask(A, B, 1, D)
+#define __builtin_ia32_ucmpq128_mask(A, B, E, D) __builtin_ia32_ucmpq128_mask(A, B, 1, D)
+#define __builtin_ia32_ucmpq256_mask(A, B, E, D) __builtin_ia32_ucmpq256_mask(A, B, 1, D)
+#define __builtin_ia32_ucmpw128_mask(A, B, E, D) __builtin_ia32_ucmpw128_mask(A, B, 1, D)
+#define __builtin_ia32_ucmpw256_mask(A, B, E, D) __builtin_ia32_ucmpw256_mask(A, B, 1, D)
+#define __builtin_ia32_ucmpw512_mask(A, B, E, D) __builtin_ia32_ucmpw512_mask(A, B, 1, D)
+#define __builtin_ia32_vcvtps2ph256_mask(A, E, C, D) __builtin_ia32_vcvtps2ph256_mask(A, 1, C, D)
+#define __builtin_ia32_vcvtps2ph_mask(A, E, C, D) __builtin_ia32_vcvtps2ph_mask(A, 1, C, D)
+#define __builtin_ia32_vpermilpd256_mask(A, E, C, D) __builtin_ia32_vpermilpd256_mask(A, 1, C, D)
+#define __builtin_ia32_vpermilpd_mask(A, E, C, D) __builtin_ia32_vpermilpd_mask(A, 1, C, D)
+#define __builtin_ia32_vpermilps256_mask(A, E, C, D) __builtin_ia32_vpermilps256_mask(A, 1, C, D)
+#define __builtin_ia32_vpermilps_mask(A, E, C, D) __builtin_ia32_vpermilps_mask(A, 1, C, D)
diff --git a/gcc/testsuite/gcc.target/i386/sse-14.c b/gcc/testsuite/gcc.target/i386/sse-14.c
index 2641b801a03..28475c44302 100644
--- a/gcc/testsuite/gcc.target/i386/sse-14.c
+++ b/gcc/testsuite/gcc.target/i386/sse-14.c
@@ -1,5 +1,5 @@
/* { dg-do compile } */
-/* { dg-options "-O0 -Werror-implicit-function-declaration -march=k8 -msse4a -m3dnow -mavx -mavx2 -mfma4 -mxop -maes -mpclmul -mpopcnt -mabm -mlzcnt -mbmi -mbmi2 -mtbm -mlwp -mfsgsbase -mrdrnd -mf16c -mfma -mrtm -mrdseed -mprfchw -madx -mfxsr -mxsaveopt -mavx512f -mavx512er -mavx512cd -mavx512pf -msha -mprefetchwt1 -mxsavec -mxsaves -mclflushopt" } */
+/* { dg-options "-O0 -Werror-implicit-function-declaration -march=k8 -msse4a -m3dnow -mavx -mavx2 -mfma4 -mxop -maes -mpclmul -mpopcnt -mabm -mlzcnt -mbmi -mbmi2 -mtbm -mlwp -mfsgsbase -mrdrnd -mf16c -mfma -mrtm -mrdseed -mprfchw -madx -mfxsr -mxsaveopt -mavx512f -mavx512er -mavx512cd -mavx512pf -msha -mprefetchwt1 -mxsavec -mxsaves -mclflushopt -mavx512dq -mavx512bw -mavx512vl" } */
#include <mm_malloc.h>
diff --git a/gcc/testsuite/gcc.target/i386/sse-22.c b/gcc/testsuite/gcc.target/i386/sse-22.c
index 26b805d4a41..a0098d4ece2 100644
--- a/gcc/testsuite/gcc.target/i386/sse-22.c
+++ b/gcc/testsuite/gcc.target/i386/sse-22.c
@@ -99,7 +99,7 @@
#ifndef DIFFERENT_PRAGMAS
-#pragma GCC target ("sse4a,3dnow,avx,avx2,fma4,xop,aes,pclmul,popcnt,abm,lzcnt,bmi,bmi2,tbm,lwp,fsgsbase,rdrnd,f16c,rtm,rdseed,prfchw,adx,fxsr,xsaveopt,avx512f,avx512er,avx512cd,avx512pf,sha,prefetchwt1")
+#pragma GCC target ("sse4a,3dnow,avx,avx2,fma4,xop,aes,pclmul,popcnt,abm,lzcnt,bmi,bmi2,tbm,lwp,fsgsbase,rdrnd,f16c,rtm,rdseed,prfchw,adx,fxsr,xsaveopt,avx512f,avx512er,avx512cd,avx512pf,sha,prefetchwt1,avx512vl,avx512bw,avx512dq")
#endif
/* Following intrinsics require immediate arguments. They
@@ -214,7 +214,7 @@ test_4 (_mm_cmpestrz, int, __m128i, int, __m128i, int, 1)
/* immintrin.h (AVX/AVX2/RDRND/FSGSBASE/F16C/RTM/AVX512F/SHA) */
#ifdef DIFFERENT_PRAGMAS
-#pragma GCC target ("avx,avx2,rdrnd,fsgsbase,f16c,rtm,avx512f,avx512er,avx512cd,avx512pf,sha")
+#pragma GCC target ("avx,avx2,rdrnd,fsgsbase,f16c,rtm,avx512f,avx512er,avx512cd,avx512pf,sha,avx512vl,avx512bw,avx512dq")
#endif
#include <immintrin.h>
test_1 (_cvtss_sh, unsigned short, float, 1)
diff --git a/gcc/testsuite/gcc.target/i386/sse-23.c b/gcc/testsuite/gcc.target/i386/sse-23.c
index fe61e17df97..46ef77b5acc 100644
--- a/gcc/testsuite/gcc.target/i386/sse-23.c
+++ b/gcc/testsuite/gcc.target/i386/sse-23.c
@@ -385,7 +385,211 @@
/* shaintrin.h */
#define __builtin_ia32_sha1rnds4(A, B, C) __builtin_ia32_sha1rnds4(A, B, 1)
-#pragma GCC target ("sse4a,3dnow,avx,avx2,fma4,xop,aes,pclmul,popcnt,abm,lzcnt,bmi,bmi2,tbm,lwp,fsgsbase,rdrnd,f16c,fma,rtm,rdseed,prfchw,adx,fxsr,xsaveopt,avx512f,avx512er,avx512cd,avx512pf,sha,prefetchwt1,xsavec,xsaves,clflushopt")
+/* TODO split later */
+#define __builtin_ia32_pslldq512(A, B) __builtin_ia32_pslldq512(A, 8)
+#define __builtin_ia32_psrldq512(A, B) __builtin_ia32_psrldq512(A, 8)
+#define __builtin_ia32_alignd128_mask(A, B, F, D, E) __builtin_ia32_alignd128_mask(A, B, 1, D, E)
+#define __builtin_ia32_alignd256_mask(A, B, F, D, E) __builtin_ia32_alignd256_mask(A, B, 1, D, E)
+#define __builtin_ia32_alignq128_mask(A, B, F, D, E) __builtin_ia32_alignq128_mask(A, B, 1, D, E)
+#define __builtin_ia32_alignq256_mask(A, B, F, D, E) __builtin_ia32_alignq256_mask(A, B, 1, D, E)
+#define __builtin_ia32_cmpb128_mask(A, B, E, D) __builtin_ia32_cmpb128_mask(A, B, 1, D)
+#define __builtin_ia32_cmpb256_mask(A, B, E, D) __builtin_ia32_cmpb256_mask(A, B, 1, D)
+#define __builtin_ia32_cmpb512_mask(A, B, E, D) __builtin_ia32_cmpb512_mask(A, B, 1, D)
+#define __builtin_ia32_cmpd128_mask(A, B, E, D) __builtin_ia32_cmpd128_mask(A, B, 1, D)
+#define __builtin_ia32_cmpd256_mask(A, B, E, D) __builtin_ia32_cmpd256_mask(A, B, 1, D)
+#define __builtin_ia32_cmppd128_mask(A, B, E, D) __builtin_ia32_cmppd128_mask(A, B, 1, D)
+#define __builtin_ia32_cmppd256_mask(A, B, E, D) __builtin_ia32_cmppd256_mask(A, B, 1, D)
+#define __builtin_ia32_cmpps128_mask(A, B, E, D) __builtin_ia32_cmpps128_mask(A, B, 1, D)
+#define __builtin_ia32_cmpps256_mask(A, B, E, D) __builtin_ia32_cmpps256_mask(A, B, 1, D)
+#define __builtin_ia32_cmpq128_mask(A, B, E, D) __builtin_ia32_cmpq128_mask(A, B, 1, D)
+#define __builtin_ia32_cmpq256_mask(A, B, E, D) __builtin_ia32_cmpq256_mask(A, B, 1, D)
+#define __builtin_ia32_cmpw128_mask(A, B, E, D) __builtin_ia32_cmpw128_mask(A, B, 1, D)
+#define __builtin_ia32_cmpw256_mask(A, B, E, D) __builtin_ia32_cmpw256_mask(A, B, 1, D)
+#define __builtin_ia32_cmpw512_mask(A, B, E, D) __builtin_ia32_cmpw512_mask(A, B, 1, D)
+#define __builtin_ia32_cvtpd2qq512_mask(A, B, C, D) __builtin_ia32_cvtpd2qq512_mask(A, B, C, 8)
+#define __builtin_ia32_cvtpd2uqq512_mask(A, B, C, D) __builtin_ia32_cvtpd2uqq512_mask(A, B, C, 8)
+#define __builtin_ia32_cvtps2qq512_mask(A, B, C, D) __builtin_ia32_cvtps2qq512_mask(A, B, C, 8)
+#define __builtin_ia32_cvtps2uqq512_mask(A, B, C, D) __builtin_ia32_cvtps2uqq512_mask(A, B, C, 8)
+#define __builtin_ia32_cvtqq2pd512_mask(A, B, C, D) __builtin_ia32_cvtqq2pd512_mask(A, B, C, 8)
+#define __builtin_ia32_cvtqq2ps512_mask(A, B, C, D) __builtin_ia32_cvtqq2ps512_mask(A, B, C, 8)
+#define __builtin_ia32_cvttpd2qq512_mask(A, B, C, D) __builtin_ia32_cvttpd2qq512_mask(A, B, C, 8)
+#define __builtin_ia32_cvttpd2uqq512_mask(A, B, C, D) __builtin_ia32_cvttpd2uqq512_mask(A, B, C, 8)
+#define __builtin_ia32_cvttps2qq512_mask(A, B, C, D) __builtin_ia32_cvttps2qq512_mask(A, B, C, 8)
+#define __builtin_ia32_cvttps2uqq512_mask(A, B, C, D) __builtin_ia32_cvttps2uqq512_mask(A, B, C, 8)
+#define __builtin_ia32_cvtuqq2pd512_mask(A, B, C, D) __builtin_ia32_cvtuqq2pd512_mask(A, B, C, 8)
+#define __builtin_ia32_cvtuqq2ps512_mask(A, B, C, D) __builtin_ia32_cvtuqq2ps512_mask(A, B, C, 8)
+#define __builtin_ia32_dbpsadbw128_mask(A, B, F, D, E) __builtin_ia32_dbpsadbw128_mask(A, B, 1, D, E)
+#define __builtin_ia32_dbpsadbw256_mask(A, B, F, D, E) __builtin_ia32_dbpsadbw256_mask(A, B, 1, D, E)
+#define __builtin_ia32_dbpsadbw512_mask(A, B, F, D, E) __builtin_ia32_dbpsadbw512_mask(A, B, 1, D, E)
+#define __builtin_ia32_extractf32x4_256_mask(A, E, C, D) __builtin_ia32_extractf32x4_256_mask(A, 1, C, D)
+#define __builtin_ia32_extractf32x8_mask(A, E, C, D) __builtin_ia32_extractf32x8_mask(A, 1, C, D)
+#define __builtin_ia32_extractf64x2_256_mask(A, E, C, D) __builtin_ia32_extractf64x2_256_mask(A, 1, C, D)
+#define __builtin_ia32_extractf64x2_512_mask(A, E, C, D) __builtin_ia32_extractf64x2_512_mask(A, 1, C, D)
+#define __builtin_ia32_extracti32x4_256_mask(A, E, C, D) __builtin_ia32_extracti32x4_256_mask(A, 1, C, D)
+#define __builtin_ia32_extracti32x8_mask(A, E, C, D) __builtin_ia32_extracti32x8_mask(A, 1, C, D)
+#define __builtin_ia32_extracti64x2_256_mask(A, E, C, D) __builtin_ia32_extracti64x2_256_mask(A, 1, C, D)
+#define __builtin_ia32_extracti64x2_512_mask(A, E, C, D) __builtin_ia32_extracti64x2_512_mask(A, 1, C, D)
+#define __builtin_ia32_fixupimmpd128(A, B, C, E) __builtin_ia32_fixupimmpd128(A, B, C, 1)
+#define __builtin_ia32_fixupimmpd128_mask(A, B, C, F, E) __builtin_ia32_fixupimmpd128_mask(A, B, C, 1, E)
+#define __builtin_ia32_fixupimmpd128_maskz(A, B, C, F, E) __builtin_ia32_fixupimmpd128_maskz(A, B, C, 1, E)
+#define __builtin_ia32_fixupimmpd256(A, B, C, E) __builtin_ia32_fixupimmpd256(A, B, C, 1)
+#define __builtin_ia32_fixupimmpd256_mask(A, B, C, F, E) __builtin_ia32_fixupimmpd256_mask(A, B, C, 1, E)
+#define __builtin_ia32_fixupimmpd256_maskz(A, B, C, F, E) __builtin_ia32_fixupimmpd256_maskz(A, B, C, 1, E)
+#define __builtin_ia32_fixupimmps128(A, B, C, E) __builtin_ia32_fixupimmps128(A, B, C, 1)
+#define __builtin_ia32_fixupimmps128_mask(A, B, C, F, E) __builtin_ia32_fixupimmps128_mask(A, B, C, 1, E)
+#define __builtin_ia32_fixupimmps128_maskz(A, B, C, F, E) __builtin_ia32_fixupimmps128_maskz(A, B, C, 1, E)
+#define __builtin_ia32_fixupimmps256(A, B, C, E) __builtin_ia32_fixupimmps256(A, B, C, 1)
+#define __builtin_ia32_fixupimmps256_mask(A, B, C, F, E) __builtin_ia32_fixupimmps256_mask(A, B, C, 1, E)
+#define __builtin_ia32_fixupimmps256_maskz(A, B, C, F, E) __builtin_ia32_fixupimmps256_maskz(A, B, C, 1, E)
+#define __builtin_ia32_fpclasspd128_mask(A, D, C) __builtin_ia32_fpclasspd128_mask(A, 1, C)
+#define __builtin_ia32_fpclasspd256_mask(A, D, C) __builtin_ia32_fpclasspd256_mask(A, 1, C)
+#define __builtin_ia32_fpclasspd512_mask(A, D, C) __builtin_ia32_fpclasspd512_mask(A, 1, C)
+#define __builtin_ia32_fpclassps128_mask(A, D, C) __builtin_ia32_fpclassps128_mask(A, 1, C)
+#define __builtin_ia32_fpclassps256_mask(A, D, C) __builtin_ia32_fpclassps256_mask(A, 1, C)
+#define __builtin_ia32_fpclassps512_mask(A, D, C) __builtin_ia32_fpclassps512_mask(A, 1, C)
+#define __builtin_ia32_fpclasssd(A, D) __builtin_ia32_fpclasssd(A, 1)
+#define __builtin_ia32_fpclassss(A, D) __builtin_ia32_fpclassss(A, 1)
+#define __builtin_ia32_gather3div2df(A, B, C, D, F) __builtin_ia32_gather3div2df(A, B, C, D, 1)
+#define __builtin_ia32_gather3div2di(A, B, C, D, F) __builtin_ia32_gather3div2di(A, B, C, D, 1)
+#define __builtin_ia32_gather3div4df(A, B, C, D, F) __builtin_ia32_gather3div4df(A, B, C, D, 1)
+#define __builtin_ia32_gather3div4di(A, B, C, D, F) __builtin_ia32_gather3div4di(A, B, C, D, 1)
+#define __builtin_ia32_gather3div4sf(A, B, C, D, F) __builtin_ia32_gather3div4sf(A, B, C, D, 1)
+#define __builtin_ia32_gather3div4si(A, B, C, D, F) __builtin_ia32_gather3div4si(A, B, C, D, 1)
+#define __builtin_ia32_gather3div8sf(A, B, C, D, F) __builtin_ia32_gather3div8sf(A, B, C, D, 1)
+#define __builtin_ia32_gather3div8si(A, B, C, D, F) __builtin_ia32_gather3div8si(A, B, C, D, 1)
+#define __builtin_ia32_gather3siv2df(A, B, C, D, F) __builtin_ia32_gather3siv2df(A, B, C, D, 1)
+#define __builtin_ia32_gather3siv2di(A, B, C, D, F) __builtin_ia32_gather3siv2di(A, B, C, D, 1)
+#define __builtin_ia32_gather3siv4df(A, B, C, D, F) __builtin_ia32_gather3siv4df(A, B, C, D, 1)
+#define __builtin_ia32_gather3siv4di(A, B, C, D, F) __builtin_ia32_gather3siv4di(A, B, C, D, 1)
+#define __builtin_ia32_gather3siv4sf(A, B, C, D, F) __builtin_ia32_gather3siv4sf(A, B, C, D, 1)
+#define __builtin_ia32_gather3siv4si(A, B, C, D, F) __builtin_ia32_gather3siv4si(A, B, C, D, 1)
+#define __builtin_ia32_gather3siv8sf(A, B, C, D, F) __builtin_ia32_gather3siv8sf(A, B, C, D, 1)
+#define __builtin_ia32_gather3siv8si(A, B, C, D, F) __builtin_ia32_gather3siv8si(A, B, C, D, 1)
+#define __builtin_ia32_getmantpd128_mask(A, E, C, D) __builtin_ia32_getmantpd128_mask(A, 1, C, D)
+#define __builtin_ia32_getmantpd256_mask(A, E, C, D) __builtin_ia32_getmantpd256_mask(A, 1, C, D)
+#define __builtin_ia32_getmantps128_mask(A, E, C, D) __builtin_ia32_getmantps128_mask(A, 1, C, D)
+#define __builtin_ia32_getmantps256_mask(A, E, C, D) __builtin_ia32_getmantps256_mask(A, 1, C, D)
+#define __builtin_ia32_insertf32x4_256_mask(A, B, F, D, E) __builtin_ia32_insertf32x4_256_mask(A, B, 1, D, E)
+#define __builtin_ia32_insertf32x8_mask(A, B, F, D, E) __builtin_ia32_insertf32x8_mask(A, B, 1, D, E)
+#define __builtin_ia32_insertf64x2_256_mask(A, B, F, D, E) __builtin_ia32_insertf64x2_256_mask(A, B, 1, D, E)
+#define __builtin_ia32_insertf64x2_512_mask(A, B, F, D, E) __builtin_ia32_insertf64x2_512_mask(A, B, 1, D, E)
+#define __builtin_ia32_inserti32x4_256_mask(A, B, F, D, E) __builtin_ia32_inserti32x4_256_mask(A, B, 1, D, E)
+#define __builtin_ia32_inserti32x8_mask(A, B, F, D, E) __builtin_ia32_inserti32x8_mask(A, B, 1, D, E)
+#define __builtin_ia32_inserti64x2_256_mask(A, B, F, D, E) __builtin_ia32_inserti64x2_256_mask(A, B, 1, D, E)
+#define __builtin_ia32_inserti64x2_512_mask(A, B, F, D, E) __builtin_ia32_inserti64x2_512_mask(A, B, 1, D, E)
+#define __builtin_ia32_palignr128_mask(A, B, F, D, E) __builtin_ia32_palignr128_mask(A, B, 8, D, E)
+#define __builtin_ia32_palignr256_mask(A, B, F, D, E) __builtin_ia32_palignr256_mask(A, B, 8, D, E)
+#define __builtin_ia32_palignr512(A, B, D) __builtin_ia32_palignr512(A, B, 8)
+#define __builtin_ia32_palignr512_mask(A, B, F, D, E) __builtin_ia32_palignr512_mask(A, B, 8, D, E)
+#define __builtin_ia32_permdf256_mask(A, E, C, D) __builtin_ia32_permdf256_mask(A, 1, C, D)
+#define __builtin_ia32_permdi256_mask(A, E, C, D) __builtin_ia32_permdi256_mask(A, 1, C, D)
+#define __builtin_ia32_prold128_mask(A, E, C, D) __builtin_ia32_prold128_mask(A, 1, C, D)
+#define __builtin_ia32_prold256_mask(A, E, C, D) __builtin_ia32_prold256_mask(A, 1, C, D)
+#define __builtin_ia32_prolq128_mask(A, E, C, D) __builtin_ia32_prolq128_mask(A, 1, C, D)
+#define __builtin_ia32_prolq256_mask(A, E, C, D) __builtin_ia32_prolq256_mask(A, 1, C, D)
+#define __builtin_ia32_prord128_mask(A, E, C, D) __builtin_ia32_prord128_mask(A, 1, C, D)
+#define __builtin_ia32_prord256_mask(A, E, C, D) __builtin_ia32_prord256_mask(A, 1, C, D)
+#define __builtin_ia32_prorq128_mask(A, E, C, D) __builtin_ia32_prorq128_mask(A, 1, C, D)
+#define __builtin_ia32_prorq256_mask(A, E, C, D) __builtin_ia32_prorq256_mask(A, 1, C, D)
+#define __builtin_ia32_pshufd128_mask(A, E, C, D) __builtin_ia32_pshufd128_mask(A, 1, C, D)
+#define __builtin_ia32_pshufd256_mask(A, E, C, D) __builtin_ia32_pshufd256_mask(A, 1, C, D)
+#define __builtin_ia32_pshufhw128_mask(A, E, C, D) __builtin_ia32_pshufhw128_mask(A, 1, C, D)
+#define __builtin_ia32_pshufhw256_mask(A, E, C, D) __builtin_ia32_pshufhw256_mask(A, 1, C, D)
+#define __builtin_ia32_pshufhw512_mask(A, E, C, D) __builtin_ia32_pshufhw512_mask(A, 1, C, D)
+#define __builtin_ia32_pshuflw128_mask(A, E, C, D) __builtin_ia32_pshuflw128_mask(A, 1, C, D)
+#define __builtin_ia32_pshuflw256_mask(A, E, C, D) __builtin_ia32_pshuflw256_mask(A, 1, C, D)
+#define __builtin_ia32_pshuflw512_mask(A, E, C, D) __builtin_ia32_pshuflw512_mask(A, 1, C, D)
+#define __builtin_ia32_pslldi128_mask(A, E, C, D) __builtin_ia32_pslldi128_mask(A, 1, C, D)
+#define __builtin_ia32_pslldi256_mask(A, E, C, D) __builtin_ia32_pslldi256_mask(A, 1, C, D)
+#define __builtin_ia32_psllqi128_mask(A, E, C, D) __builtin_ia32_psllqi128_mask(A, 1, C, D)
+#define __builtin_ia32_psllqi256_mask(A, E, C, D) __builtin_ia32_psllqi256_mask(A, 1, C, D)
+#define __builtin_ia32_psllwi128_mask(A, E, C, D) __builtin_ia32_psllwi128_mask(A, 1, C, D)
+#define __builtin_ia32_psllwi256_mask(A, E, C, D) __builtin_ia32_psllwi256_mask(A, 1, C, D)
+#define __builtin_ia32_psllwi512_mask(A, E, C, D) __builtin_ia32_psllwi512_mask(A, 1, C, D)
+#define __builtin_ia32_psradi128_mask(A, E, C, D) __builtin_ia32_psradi128_mask(A, 1, C, D)
+#define __builtin_ia32_psradi256_mask(A, E, C, D) __builtin_ia32_psradi256_mask(A, 1, C, D)
+#define __builtin_ia32_psraqi128_mask(A, E, C, D) __builtin_ia32_psraqi128_mask(A, 1, C, D)
+#define __builtin_ia32_psraqi256_mask(A, E, C, D) __builtin_ia32_psraqi256_mask(A, 1, C, D)
+#define __builtin_ia32_psrawi128_mask(A, E, C, D) __builtin_ia32_psrawi128_mask(A, 1, C, D)
+#define __builtin_ia32_psrawi256_mask(A, E, C, D) __builtin_ia32_psrawi256_mask(A, 1, C, D)
+#define __builtin_ia32_psrawi512_mask(A, E, C, D) __builtin_ia32_psrawi512_mask(A, 1, C, D)
+#define __builtin_ia32_psrldi128_mask(A, E, C, D) __builtin_ia32_psrldi128_mask(A, 1, C, D)
+#define __builtin_ia32_psrldi256_mask(A, E, C, D) __builtin_ia32_psrldi256_mask(A, 1, C, D)
+#define __builtin_ia32_psrlqi128_mask(A, E, C, D) __builtin_ia32_psrlqi128_mask(A, 1, C, D)
+#define __builtin_ia32_psrlqi256_mask(A, E, C, D) __builtin_ia32_psrlqi256_mask(A, 1, C, D)
+#define __builtin_ia32_psrlwi128_mask(A, E, C, D) __builtin_ia32_psrlwi128_mask(A, 1, C, D)
+#define __builtin_ia32_psrlwi256_mask(A, E, C, D) __builtin_ia32_psrlwi256_mask(A, 1, C, D)
+#define __builtin_ia32_psrlwi512_mask(A, E, C, D) __builtin_ia32_psrlwi512_mask(A, 1, C, D)
+#define __builtin_ia32_pternlogd128_mask(A, B, C, F, E) __builtin_ia32_pternlogd128_mask(A, B, C, 1, E)
+#define __builtin_ia32_pternlogd128_maskz(A, B, C, F, E) __builtin_ia32_pternlogd128_maskz(A, B, C, 1, E)
+#define __builtin_ia32_pternlogd256_mask(A, B, C, F, E) __builtin_ia32_pternlogd256_mask(A, B, C, 1, E)
+#define __builtin_ia32_pternlogd256_maskz(A, B, C, F, E) __builtin_ia32_pternlogd256_maskz(A, B, C, 1, E)
+#define __builtin_ia32_pternlogq128_mask(A, B, C, F, E) __builtin_ia32_pternlogq128_mask(A, B, C, 1, E)
+#define __builtin_ia32_pternlogq128_maskz(A, B, C, F, E) __builtin_ia32_pternlogq128_maskz(A, B, C, 1, E)
+#define __builtin_ia32_pternlogq256_mask(A, B, C, F, E) __builtin_ia32_pternlogq256_mask(A, B, C, 1, E)
+#define __builtin_ia32_pternlogq256_maskz(A, B, C, F, E) __builtin_ia32_pternlogq256_maskz(A, B, C, 1, E)
+#define __builtin_ia32_rangepd128_mask(A, B, F, D, E) __builtin_ia32_rangepd128_mask(A, B, 1, D, E)
+#define __builtin_ia32_rangepd256_mask(A, B, F, D, E) __builtin_ia32_rangepd256_mask(A, B, 1, D, E)
+#define __builtin_ia32_rangepd512_mask(A, B, I, D, E, F) __builtin_ia32_rangepd512_mask(A, B, 1, D, E, 8)
+#define __builtin_ia32_rangeps128_mask(A, B, F, D, E) __builtin_ia32_rangeps128_mask(A, B, 1, D, E)
+#define __builtin_ia32_rangeps256_mask(A, B, F, D, E) __builtin_ia32_rangeps256_mask(A, B, 1, D, E)
+#define __builtin_ia32_rangeps512_mask(A, B, I, D, E, F) __builtin_ia32_rangeps512_mask(A, B, 1, D, E, 8)
+#define __builtin_ia32_rangesd128_round(A, B, I, F) __builtin_ia32_rangesd128_round(A, B, 1, 8)
+#define __builtin_ia32_rangess128_round(A, B, I, F) __builtin_ia32_rangess128_round(A, B, 1, 8)
+#define __builtin_ia32_reducepd128_mask(A, E, C, D) __builtin_ia32_reducepd128_mask(A, 1, C, D)
+#define __builtin_ia32_reducepd256_mask(A, E, C, D) __builtin_ia32_reducepd256_mask(A, 1, C, D)
+#define __builtin_ia32_reducepd512_mask(A, E, C, D) __builtin_ia32_reducepd512_mask(A, 1, C, D)
+#define __builtin_ia32_reduceps128_mask(A, E, C, D) __builtin_ia32_reduceps128_mask(A, 1, C, D)
+#define __builtin_ia32_reduceps256_mask(A, E, C, D) __builtin_ia32_reduceps256_mask(A, 1, C, D)
+#define __builtin_ia32_reduceps512_mask(A, E, C, D) __builtin_ia32_reduceps512_mask(A, 1, C, D)
+#define __builtin_ia32_reducesd(A, B, F) __builtin_ia32_reducesd(A, B, 1)
+#define __builtin_ia32_reducess(A, B, F) __builtin_ia32_reducess(A, B, 1)
+#define __builtin_ia32_rndscalepd_128_mask(A, E, C, D) __builtin_ia32_rndscalepd_128_mask(A, 1, C, D)
+#define __builtin_ia32_rndscalepd_256_mask(A, E, C, D) __builtin_ia32_rndscalepd_256_mask(A, 1, C, D)
+#define __builtin_ia32_rndscaleps_128_mask(A, E, C, D) __builtin_ia32_rndscaleps_128_mask(A, 1, C, D)
+#define __builtin_ia32_rndscaleps_256_mask(A, E, C, D) __builtin_ia32_rndscaleps_256_mask(A, 1, C, D)
+#define __builtin_ia32_scatterdiv2df(A, B, C, D, F) __builtin_ia32_scatterdiv2df(A, B, C, D, 1)
+#define __builtin_ia32_scatterdiv2di(A, B, C, D, F) __builtin_ia32_scatterdiv2di(A, B, C, D, 1)
+#define __builtin_ia32_scatterdiv4df(A, B, C, D, F) __builtin_ia32_scatterdiv4df(A, B, C, D, 1)
+#define __builtin_ia32_scatterdiv4di(A, B, C, D, F) __builtin_ia32_scatterdiv4di(A, B, C, D, 1)
+#define __builtin_ia32_scatterdiv4sf(A, B, C, D, F) __builtin_ia32_scatterdiv4sf(A, B, C, D, 1)
+#define __builtin_ia32_scatterdiv4si(A, B, C, D, F) __builtin_ia32_scatterdiv4si(A, B, C, D, 1)
+#define __builtin_ia32_scatterdiv8sf(A, B, C, D, F) __builtin_ia32_scatterdiv8sf(A, B, C, D, 1)
+#define __builtin_ia32_scatterdiv8si(A, B, C, D, F) __builtin_ia32_scatterdiv8si(A, B, C, D, 1)
+#define __builtin_ia32_scattersiv2df(A, B, C, D, F) __builtin_ia32_scattersiv2df(A, B, C, D, 1)
+#define __builtin_ia32_scattersiv2di(A, B, C, D, F) __builtin_ia32_scattersiv2di(A, B, C, D, 1)
+#define __builtin_ia32_scattersiv4df(A, B, C, D, F) __builtin_ia32_scattersiv4df(A, B, C, D, 1)
+#define __builtin_ia32_scattersiv4di(A, B, C, D, F) __builtin_ia32_scattersiv4di(A, B, C, D, 1)
+#define __builtin_ia32_scattersiv4sf(A, B, C, D, F) __builtin_ia32_scattersiv4sf(A, B, C, D, 1)
+#define __builtin_ia32_scattersiv4si(A, B, C, D, F) __builtin_ia32_scattersiv4si(A, B, C, D, 1)
+#define __builtin_ia32_scattersiv8sf(A, B, C, D, F) __builtin_ia32_scattersiv8sf(A, B, C, D, 1)
+#define __builtin_ia32_scattersiv8si(A, B, C, D, F) __builtin_ia32_scattersiv8si(A, B, C, D, 1)
+#define __builtin_ia32_shuf_f32x4_256_mask(A, B, F, D, E) __builtin_ia32_shuf_f32x4_256_mask(A, B, 1, D, E)
+#define __builtin_ia32_shuf_f64x2_256_mask(A, B, F, D, E) __builtin_ia32_shuf_f64x2_256_mask(A, B, 1, D, E)
+#define __builtin_ia32_shuf_i32x4_256_mask(A, B, F, D, E) __builtin_ia32_shuf_i32x4_256_mask(A, B, 1, D, E)
+#define __builtin_ia32_shuf_i64x2_256_mask(A, B, F, D, E) __builtin_ia32_shuf_i64x2_256_mask(A, B, 1, D, E)
+#define __builtin_ia32_shufpd128_mask(A, B, F, D, E) __builtin_ia32_shufpd128_mask(A, B, 1, D, E)
+#define __builtin_ia32_shufpd256_mask(A, B, F, D, E) __builtin_ia32_shufpd256_mask(A, B, 1, D, E)
+#define __builtin_ia32_shufps128_mask(A, B, F, D, E) __builtin_ia32_shufps128_mask(A, B, 1, D, E)
+#define __builtin_ia32_shufps256_mask(A, B, F, D, E) __builtin_ia32_shufps256_mask(A, B, 1, D, E)
+#define __builtin_ia32_ucmpb128_mask(A, B, E, D) __builtin_ia32_ucmpb128_mask(A, B, 1, D)
+#define __builtin_ia32_ucmpb256_mask(A, B, E, D) __builtin_ia32_ucmpb256_mask(A, B, 1, D)
+#define __builtin_ia32_ucmpb512_mask(A, B, E, D) __builtin_ia32_ucmpb512_mask(A, B, 1, D)
+#define __builtin_ia32_ucmpd128_mask(A, B, E, D) __builtin_ia32_ucmpd128_mask(A, B, 1, D)
+#define __builtin_ia32_ucmpd256_mask(A, B, E, D) __builtin_ia32_ucmpd256_mask(A, B, 1, D)
+#define __builtin_ia32_ucmpq128_mask(A, B, E, D) __builtin_ia32_ucmpq128_mask(A, B, 1, D)
+#define __builtin_ia32_ucmpq256_mask(A, B, E, D) __builtin_ia32_ucmpq256_mask(A, B, 1, D)
+#define __builtin_ia32_ucmpw128_mask(A, B, E, D) __builtin_ia32_ucmpw128_mask(A, B, 1, D)
+#define __builtin_ia32_ucmpw256_mask(A, B, E, D) __builtin_ia32_ucmpw256_mask(A, B, 1, D)
+#define __builtin_ia32_ucmpw512_mask(A, B, E, D) __builtin_ia32_ucmpw512_mask(A, B, 1, D)
+#define __builtin_ia32_vcvtps2ph256_mask(A, E, C, D) __builtin_ia32_vcvtps2ph256_mask(A, 1, C, D)
+#define __builtin_ia32_vcvtps2ph_mask(A, E, C, D) __builtin_ia32_vcvtps2ph_mask(A, 1, C, D)
+#define __builtin_ia32_vpermilpd256_mask(A, E, C, D) __builtin_ia32_vpermilpd256_mask(A, 1, C, D)
+#define __builtin_ia32_vpermilpd_mask(A, E, C, D) __builtin_ia32_vpermilpd_mask(A, 1, C, D)
+#define __builtin_ia32_vpermilps256_mask(A, E, C, D) __builtin_ia32_vpermilps256_mask(A, 1, C, D)
+#define __builtin_ia32_vpermilps_mask(A, E, C, D) __builtin_ia32_vpermilps_mask(A, 1, C, D)
+
+#pragma GCC target ("sse4a,3dnow,avx,avx2,fma4,xop,aes,pclmul,popcnt,abm,lzcnt,bmi,bmi2,tbm,lwp,fsgsbase,rdrnd,f16c,fma,rtm,rdseed,prfchw,adx,fxsr,xsaveopt,avx512f,avx512er,avx512cd,avx512pf,sha,prefetchwt1,xsavec,xsaves,clflushopt,avx512bw,avx512dq,avx512vl")
#include <wmmintrin.h>
#include <smmintrin.h>
#include <mm3dnow.h>
diff --git a/gcc/tree-core.h b/gcc/tree-core.h
index 34c48fa1c99..fe2e62b7178 100644
--- a/gcc/tree-core.h
+++ b/gcc/tree-core.h
@@ -1526,7 +1526,7 @@ struct GTY(()) tree_function_decl {
DECL_FUNCTION_CODE. Otherwise unused.
??? The bitfield needs to be able to hold all target function
codes as well. */
- ENUM_BITFIELD(built_in_function) function_code : 11;
+ ENUM_BITFIELD(built_in_function) function_code : 12;
ENUM_BITFIELD(built_in_class) built_in_class : 2;
unsigned static_ctor_flag : 1;