aboutsummaryrefslogtreecommitdiff
path: root/gcc/config/i386
diff options
context:
space:
mode:
Diffstat (limited to 'gcc/config/i386')
-rw-r--r--gcc/config/i386/avx512bwintrin.h2658
-rw-r--r--gcc/config/i386/avx512dqintrin.h2308
-rw-r--r--gcc/config/i386/avx512vlbwintrin.h4218
-rw-r--r--gcc/config/i386/avx512vldqintrin.h2035
-rw-r--r--gcc/config/i386/avx512vlintrin.h13213
-rw-r--r--gcc/config/i386/cpuid.h3
-rw-r--r--gcc/config/i386/driver-i386.c10
-rw-r--r--gcc/config/i386/i386-builtin-types.def453
-rw-r--r--gcc/config/i386/i386-c.c6
-rw-r--r--gcc/config/i386/i386-modes.def3
-rw-r--r--gcc/config/i386/i386.c3066
-rw-r--r--gcc/config/i386/i386.h19
-rw-r--r--gcc/config/i386/i386.md287
-rw-r--r--gcc/config/i386/i386.opt12
-rw-r--r--gcc/config/i386/immintrin.h10
-rw-r--r--gcc/config/i386/sse.md4953
-rw-r--r--gcc/config/i386/subst.md51
17 files changed, 31689 insertions, 1616 deletions
diff --git a/gcc/config/i386/avx512bwintrin.h b/gcc/config/i386/avx512bwintrin.h
new file mode 100644
index 00000000000..b5caab956b5
--- /dev/null
+++ b/gcc/config/i386/avx512bwintrin.h
@@ -0,0 +1,2658 @@
+/* Copyright (C) 2013
+ Free Software Foundation, Inc.
+
+ This file is part of GCC.
+
+ GCC is free software; you can redistribute it and/or modify
+ it under the terms of the GNU General Public License as published by
+ the Free Software Foundation; either version 3, or (at your option)
+ any later version.
+
+ GCC is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ GNU General Public License for more details.
+
+ Under Section 7 of GPL version 3, you are granted additional
+ permissions described in the GCC Runtime Library Exception, version
+ 3.1, as published by the Free Software Foundation.
+
+ You should have received a copy of the GNU General Public License and
+ a copy of the GCC Runtime Library Exception along with this program;
+ see the files COPYING3 and COPYING.RUNTIME respectively. If not, see
+ <http://www.gnu.org/licenses/>. */
+
+#ifndef _IMMINTRIN_H_INCLUDED
+#error "Never use <avx512bwintrin.h> directly; include <immintrin.h> instead."
+#endif
+
+#ifndef _AVX512BWINTRIN_H_INCLUDED
+#define _AVX512BWINTRIN_H_INCLUDED
+
+#ifndef __AVX512BW__
+#pragma GCC push_options
+#pragma GCC target("avx512bw")
+#define __DISABLE_AVX512BW__
+#endif /* __AVX512BW__ */
+
+/* Internal data types for implementing the intrinsics. */
+typedef short __v32hi __attribute__ ((__vector_size__ (64)));
+typedef char __v64qi __attribute__ ((__vector_size__ (64)));
+
+typedef unsigned long long __mmask64;
+
+extern __inline __m512i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_setzero_qi (void)
+{
+ return __extension__ (__m512i)(__v64qi){ 0, 0, 0, 0, 0, 0, 0, 0,
+ 0, 0, 0, 0, 0, 0, 0, 0,
+ 0, 0, 0, 0, 0, 0, 0, 0,
+ 0, 0, 0, 0, 0, 0, 0, 0,
+ 0, 0, 0, 0, 0, 0, 0, 0,
+ 0, 0, 0, 0, 0, 0, 0, 0,
+ 0, 0, 0, 0, 0, 0, 0, 0,
+ 0, 0, 0, 0, 0, 0, 0, 0 };
+}
+
+extern __inline __m512i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_setzero_hi (void)
+{
+ return __extension__ (__m512i)(__v32hi){ 0, 0, 0, 0, 0, 0, 0, 0,
+ 0, 0, 0, 0, 0, 0, 0, 0,
+ 0, 0, 0, 0, 0, 0, 0, 0,
+ 0, 0, 0, 0, 0, 0, 0, 0 };
+}
+
+extern __inline __m512i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_mask_mov_epi16 (__m512i __W, __mmask32 __U, __m512i __A)
+{
+ return (__m512i) __builtin_ia32_movdquhi512_mask ((__v32hi) __A,
+ (__v32hi) __W,
+ (__mmask32) __U);
+}
+
+extern __inline __m512i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_maskz_mov_epi16 (__mmask32 __U, __m512i __A)
+{
+ return (__m512i) __builtin_ia32_movdquhi512_mask ((__v32hi) __A,
+ (__v32hi)
+ _mm512_setzero_hi (),
+ (__mmask32) __U);
+}
+
+extern __inline __m512i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_mask_loadu_epi16 (__m512i __W, __mmask32 __U, void const *__P)
+{
+ return (__m512i) __builtin_ia32_loaddquhi512_mask ((__v32hi *) __P,
+ (__v32hi) __W,
+ (__mmask32) __U);
+}
+
+extern __inline __m512i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_maskz_loadu_epi16 (__mmask32 __U, void const *__P)
+{
+ return (__m512i) __builtin_ia32_loaddquhi512_mask ((__v32hi *) __P,
+ (__v32hi)
+ _mm512_setzero_hi (),
+ (__mmask32) __U);
+}
+
+extern __inline void
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_mask_storeu_epi16 (void *__P, __mmask32 __U, __m512i __A)
+{
+ __builtin_ia32_storedquhi512_mask ((__v32hi *) __P,
+ (__v32hi) __A,
+ (__mmask32) __U);
+}
+
+extern __inline __m512i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_mask_mov_epi8 (__m512i __W, __mmask64 __U, __m512i __A)
+{
+ return (__m512i) __builtin_ia32_movdquqi512_mask ((__v64qi) __A,
+ (__v64qi) __W,
+ (__mmask64) __U);
+}
+
+extern __inline __m512i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_maskz_mov_epi8 (__mmask64 __U, __m512i __A)
+{
+ return (__m512i) __builtin_ia32_movdquqi512_mask ((__v64qi) __A,
+ (__v64qi)
+ _mm512_setzero_hi (),
+ (__mmask64) __U);
+}
+
+extern __inline __mmask32
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_kunpackw (__mmask32 __A, __mmask32 __B)
+{
+ return (__mmask32) __builtin_ia32_kunpcksi ((__mmask32) __A,
+ (__mmask32) __B);
+}
+
+extern __inline __mmask64
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_kunpackd (__mmask64 __A, __mmask64 __B)
+{
+ return (__mmask64) __builtin_ia32_kunpckdi ((__mmask64) __A,
+ (__mmask64) __B);
+}
+
+extern __inline __m512i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_mask_loadu_epi8 (__m512i __W, __mmask64 __U, void const *__P)
+{
+ return (__m512i) __builtin_ia32_loaddquqi512_mask ((__v64qi *) __P,
+ (__v64qi) __W,
+ (__mmask64) __U);
+}
+
+extern __inline __m512i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_maskz_loadu_epi8 (__mmask64 __U, void const *__P)
+{
+ return (__m512i) __builtin_ia32_loaddquqi512_mask ((__v64qi *) __P,
+ (__v64qi)
+ _mm512_setzero_hi (),
+ (__mmask64) __U);
+}
+
+extern __inline void
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_mask_storeu_epi8 (void *__P, __mmask64 __U, __m512i __A)
+{
+ __builtin_ia32_storedquqi512_mask ((__v64qi *) __P,
+ (__v64qi) __A,
+ (__mmask64) __U);
+}
+
+extern __inline __m512i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_sad_epu8 (__m512i __A, __m512i __B)
+{
+ return (__m512i) __builtin_ia32_psadbw512 ((__v64qi) __A,
+ (__v64qi) __B);
+}
+
+extern __inline __m256i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_cvtepi16_epi8 (__m512i __A)
+{
+ __v32qi __O;
+ return (__m256i) __builtin_ia32_pmovwb512_mask ((__v32hi) __A, __O,
+ (__mmask32) -1);
+}
+
+extern __inline __m256i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_mask_cvtepi16_epi8 (__m256i __O, __mmask32 __M, __m512i __A)
+{
+ return (__m256i) __builtin_ia32_pmovwb512_mask ((__v32hi) __A,
+ (__v32qi) __O, __M);
+}
+
+extern __inline __m256i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_maskz_cvtepi16_epi8 (__mmask32 __M, __m512i __A)
+{
+ return (__m256i) __builtin_ia32_pmovwb512_mask ((__v32hi) __A,
+ (__v32qi)
+ _mm256_setzero_si256 (),
+ __M);
+}
+
+extern __inline __m256i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_cvtsepi16_epi8 (__m512i __A)
+{
+ __v32qi __O;
+ return (__m256i) __builtin_ia32_pmovswb512_mask ((__v32hi) __A, __O,
+ (__mmask32) -1);
+}
+
+extern __inline __m256i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_mask_cvtsepi16_epi8 (__m256i __O, __mmask32 __M, __m512i __A)
+{
+ return (__m256i) __builtin_ia32_pmovswb512_mask ((__v32hi) __A,
+ (__v32qi) __O, __M);
+}
+
+extern __inline __m256i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_maskz_cvtsepi16_epi8 (__mmask32 __M, __m512i __A)
+{
+ return (__m256i) __builtin_ia32_pmovswb512_mask ((__v32hi) __A,
+ (__v32qi)
+ _mm256_setzero_si256 (),
+ __M);
+}
+
+extern __inline __m256i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_cvtusepi16_epi8 (__m512i __A)
+{
+ __v32qi __O;
+ return (__m256i) __builtin_ia32_pmovuswb512_mask ((__v32hi) __A, __O,
+ (__mmask32) -1);
+}
+
+extern __inline __m256i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_mask_cvtusepi16_epi8 (__m256i __O, __mmask32 __M, __m512i __A)
+{
+ return (__m256i) __builtin_ia32_pmovuswb512_mask ((__v32hi) __A,
+ (__v32qi) __O,
+ __M);
+}
+
+extern __inline __m256i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_maskz_cvtusepi16_epi8 (__mmask32 __M, __m512i __A)
+{
+ return (__m256i) __builtin_ia32_pmovuswb512_mask ((__v32hi) __A,
+ (__v32qi)
+ _mm256_setzero_si256 (),
+ __M);
+}
+
+extern __inline __m512i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_broadcastb_epi8 (__m128i __A)
+{
+ __v64qi __O;
+ return (__m512i) __builtin_ia32_pbroadcastb512_mask ((__v16qi) __A,
+ __O,
+ (__mmask64) -
+ 1);
+}
+
+extern __inline __m512i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_mask_broadcastb_epi8 (__m512i __O, __mmask64 __M, __m128i __A)
+{
+ return (__m512i) __builtin_ia32_pbroadcastb512_mask ((__v16qi) __A,
+ (__v64qi) __O,
+ __M);
+}
+
+extern __inline __m512i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_maskz_broadcastb_epi8 (__mmask64 __M, __m128i __A)
+{
+ return (__m512i) __builtin_ia32_pbroadcastb512_mask ((__v16qi) __A,
+ (__v64qi)
+ _mm512_setzero_si512 (),
+ __M);
+}
+
+extern __inline __m512i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_mask_set1_epi8 (__m512i __O, __mmask64 __M, char __A)
+{
+ return (__m512i) __builtin_ia32_pbroadcastb512_gpr_mask (__A,
+ (__v64qi) __O,
+ __M);
+}
+
+extern __inline __m512i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_maskz_set1_epi8 (__mmask64 __M, char __A)
+{
+ return (__m512i) __builtin_ia32_pbroadcastb512_gpr_mask (__A,
+ (__v64qi)
+ _mm512_setzero_si512 (),
+ __M);
+}
+
+extern __inline __m512i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_broadcastw_epi16 (__m128i __A)
+{
+ __v32hi __O;
+ return (__m512i) __builtin_ia32_pbroadcastw512_mask ((__v8hi) __A,
+ __O,
+ (__mmask32) -
+ 1);
+}
+
+extern __inline __m512i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_mask_broadcastw_epi16 (__m512i __O, __mmask32 __M, __m128i __A)
+{
+ return (__m512i) __builtin_ia32_pbroadcastw512_mask ((__v8hi) __A,
+ (__v32hi) __O,
+ __M);
+}
+
+extern __inline __m512i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_maskz_broadcastw_epi16 (__mmask32 __M, __m128i __A)
+{
+ return (__m512i) __builtin_ia32_pbroadcastw512_mask ((__v8hi) __A,
+ (__v32hi)
+ _mm512_setzero_si512 (),
+ __M);
+}
+
+extern __inline __m512i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_mask_set1_epi16 (__m512i __O, __mmask32 __M, short __A)
+{
+ return (__m512i) __builtin_ia32_pbroadcastw512_gpr_mask (__A,
+ (__v32hi) __O,
+ __M);
+}
+
+extern __inline __m512i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_maskz_set1_epi16 (__mmask32 __M, short __A)
+{
+ return (__m512i) __builtin_ia32_pbroadcastw512_gpr_mask (__A,
+ (__v32hi)
+ _mm512_setzero_si512 (),
+ __M);
+}
+
+extern __inline __m512i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_mulhrs_epi16 (__m512i __A, __m512i __B)
+{
+ return (__m512i) __builtin_ia32_pmulhrsw512_mask ((__v32hi) __A,
+ (__v32hi) __B,
+ (__v32hi)
+ _mm512_setzero_hi (),
+ (__mmask32) -1);
+}
+
+extern __inline __m512i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_mask_mulhrs_epi16 (__m512i __W, __mmask32 __U, __m512i __A,
+ __m512i __B)
+{
+ return (__m512i) __builtin_ia32_pmulhrsw512_mask ((__v32hi) __A,
+ (__v32hi) __B,
+ (__v32hi) __W,
+ (__mmask32) __U);
+}
+
+extern __inline __m512i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_maskz_mulhrs_epi16 (__mmask32 __U, __m512i __A, __m512i __B)
+{
+ return (__m512i) __builtin_ia32_pmulhrsw512_mask ((__v32hi) __A,
+ (__v32hi) __B,
+ (__v32hi)
+ _mm512_setzero_hi (),
+ (__mmask32) __U);
+}
+
+extern __inline __m512i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_mulhi_epi16 (__m512i __A, __m512i __B)
+{
+ return (__m512i) __builtin_ia32_pmulhw512_mask ((__v32hi) __A,
+ (__v32hi) __B,
+ (__v32hi)
+ _mm512_setzero_hi (),
+ (__mmask32) -1);
+}
+
+extern __inline __m512i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_mask_mulhi_epi16 (__m512i __W, __mmask32 __U, __m512i __A,
+ __m512i __B)
+{
+ return (__m512i) __builtin_ia32_pmulhw512_mask ((__v32hi) __A,
+ (__v32hi) __B,
+ (__v32hi) __W,
+ (__mmask32) __U);
+}
+
+extern __inline __m512i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_maskz_mulhi_epi16 (__mmask32 __U, __m512i __A, __m512i __B)
+{
+ return (__m512i) __builtin_ia32_pmulhw512_mask ((__v32hi) __A,
+ (__v32hi) __B,
+ (__v32hi)
+ _mm512_setzero_hi (),
+ (__mmask32) __U);
+}
+
+extern __inline __m512i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_mulhi_epu16 (__m512i __A, __m512i __B)
+{
+ return (__m512i) __builtin_ia32_pmulhuw512_mask ((__v32hi) __A,
+ (__v32hi) __B,
+ (__v32hi)
+ _mm512_setzero_hi (),
+ (__mmask32) -1);
+}
+
+extern __inline __m512i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_mask_mulhi_epu16 (__m512i __W, __mmask32 __U, __m512i __A,
+ __m512i __B)
+{
+ return (__m512i) __builtin_ia32_pmulhuw512_mask ((__v32hi) __A,
+ (__v32hi) __B,
+ (__v32hi) __W,
+ (__mmask32) __U);
+}
+
+extern __inline __m512i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_maskz_mulhi_epu16 (__mmask32 __U, __m512i __A, __m512i __B)
+{
+ return (__m512i) __builtin_ia32_pmulhuw512_mask ((__v32hi) __A,
+ (__v32hi) __B,
+ (__v32hi)
+ _mm512_setzero_hi (),
+ (__mmask32) __U);
+}
+
+extern __inline __m512i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_mullo_epi16 (__m512i __A, __m512i __B)
+{
+ return (__m512i) __builtin_ia32_pmullw512_mask ((__v32hi) __A,
+ (__v32hi) __B,
+ (__v32hi)
+ _mm512_setzero_hi (),
+ (__mmask32) -1);
+}
+
+extern __inline __m512i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_mask_mullo_epi16 (__m512i __W, __mmask32 __U, __m512i __A,
+ __m512i __B)
+{
+ return (__m512i) __builtin_ia32_pmullw512_mask ((__v32hi) __A,
+ (__v32hi) __B,
+ (__v32hi) __W,
+ (__mmask32) __U);
+}
+
+extern __inline __m512i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_maskz_mullo_epi16 (__mmask32 __U, __m512i __A, __m512i __B)
+{
+ return (__m512i) __builtin_ia32_pmullw512_mask ((__v32hi) __A,
+ (__v32hi) __B,
+ (__v32hi)
+ _mm512_setzero_hi (),
+ (__mmask32) __U);
+}
+
+extern __inline __m512i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_cvtepi8_epi16 (__m256i __A)
+{
+ return (__m512i) __builtin_ia32_pmovsxbw512_mask ((__v32qi) __A,
+ (__v32hi)
+ _mm512_setzero_hi (),
+ (__mmask32) -1);
+}
+
+extern __inline __m512i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_mask_cvtepi8_epi16 (__m512i __W, __mmask32 __U, __m256i __A)
+{
+ return (__m512i) __builtin_ia32_pmovsxbw512_mask ((__v32qi) __A,
+ (__v32hi) __W,
+ (__mmask32) __U);
+}
+
+extern __inline __m512i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_maskz_cvtepi8_epi16 (__mmask32 __U, __m256i __A)
+{
+ return (__m512i) __builtin_ia32_pmovsxbw512_mask ((__v32qi) __A,
+ (__v32hi)
+ _mm512_setzero_si512 (),
+ (__mmask32) __U);
+}
+
+extern __inline __m512i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_cvtepu8_epi16 (__m256i __A)
+{
+ return (__m512i) __builtin_ia32_pmovzxbw512_mask ((__v32qi) __A,
+ (__v32hi)
+ _mm512_setzero_hi (),
+ (__mmask32) -1);
+}
+
+extern __inline __m512i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_mask_cvtepu8_epi16 (__m512i __W, __mmask32 __U, __m256i __A)
+{
+ return (__m512i) __builtin_ia32_pmovzxbw512_mask ((__v32qi) __A,
+ (__v32hi) __W,
+ (__mmask32) __U);
+}
+
+extern __inline __m512i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_maskz_cvtepu8_epi16 (__mmask32 __U, __m256i __A)
+{
+ return (__m512i) __builtin_ia32_pmovzxbw512_mask ((__v32qi) __A,
+ (__v32hi)
+ _mm512_setzero_si512 (),
+ (__mmask32) __U);
+}
+
+extern __inline __m512i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_permutexvar_epi16 (__m512i __A, __m512i __B)
+{
+ return (__m512i) __builtin_ia32_permvarhi512_mask ((__v32hi) __A,
+ (__v32hi) __B,
+ (__v32hi)
+ _mm512_setzero_hi (),
+ (__mmask32) -1);
+}
+
+extern __inline __m512i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_maskz_permutexvar_epi16 (__mmask32 __M, __m512i __A,
+ __m512i __B)
+{
+ return (__m512i) __builtin_ia32_permvarhi512_mask ((__v32hi) __A,
+ (__v32hi) __B,
+ (__v32hi)
+ _mm512_setzero_si512 (),
+ (__mmask32) __M);
+}
+
+extern __inline __m512i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_mask_permutexvar_epi16 (__m512i __W, __mmask32 __M, __m512i __A,
+ __m512i __B)
+{
+ return (__m512i) __builtin_ia32_permvarhi512_mask ((__v32hi) __A,
+ (__v32hi) __B,
+ (__v32hi) __W,
+ (__mmask32) __M);
+}
+
+extern __inline __m512i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_permutex2var_epi16 (__m512i __A, __m512i __I, __m512i __B)
+{
+ return (__m512i) __builtin_ia32_vpermt2varhi512_mask ((__v32hi) __I
+ /* idx */ ,
+ (__v32hi) __A,
+ (__v32hi) __B,
+ (__mmask32) -
+ 1);
+}
+
+extern __inline __m512i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_mask_permutex2var_epi16 (__m512i __A, __mmask32 __U,
+ __m512i __I, __m512i __B)
+{
+ return (__m512i) __builtin_ia32_vpermt2varhi512_mask ((__v32hi) __I
+ /* idx */ ,
+ (__v32hi) __A,
+ (__v32hi) __B,
+ (__mmask32)
+ __U);
+}
+
+extern __inline __m512i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_mask2_permutex2var_epi16 (__m512i __A, __m512i __I,
+ __mmask32 __U, __m512i __B)
+{
+ return (__m512i) __builtin_ia32_vpermi2varhi512_mask ((__v32hi) __A,
+ (__v32hi) __I
+ /* idx */ ,
+ (__v32hi) __B,
+ (__mmask32)
+ __U);
+}
+
+extern __inline __m512i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_maskz_permutex2var_epi16 (__mmask32 __U, __m512i __A,
+ __m512i __I, __m512i __B)
+{
+ return (__m512i) __builtin_ia32_vpermt2varhi512_maskz ((__v32hi) __I
+ /* idx */ ,
+ (__v32hi) __A,
+ (__v32hi) __B,
+ (__mmask32)
+ __U);
+}
+
+extern __inline __m512i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_avg_epu8 (__m512i __A, __m512i __B)
+{
+ return (__m512i) __builtin_ia32_pavgb512_mask ((__v64qi) __A,
+ (__v64qi) __B,
+ (__v64qi)
+ _mm512_setzero_qi (),
+ (__mmask64) -1);
+}
+
+extern __inline __m512i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_mask_avg_epu8 (__m512i __W, __mmask64 __U, __m512i __A,
+ __m512i __B)
+{
+ return (__m512i) __builtin_ia32_pavgb512_mask ((__v64qi) __A,
+ (__v64qi) __B,
+ (__v64qi) __W,
+ (__mmask64) __U);
+}
+
+extern __inline __m512i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_maskz_avg_epu8 (__mmask64 __U, __m512i __A, __m512i __B)
+{
+ return (__m512i) __builtin_ia32_pavgb512_mask ((__v64qi) __A,
+ (__v64qi) __B,
+ (__v64qi)
+ _mm512_setzero_si512 (),
+ (__mmask64) __U);
+}
+
+extern __inline __m512i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_add_epi8 (__m512i __A, __m512i __B)
+{
+ return (__m512i) __builtin_ia32_paddb512_mask ((__v64qi) __A,
+ (__v64qi) __B,
+ (__v64qi)
+ _mm512_setzero_qi (),
+ (__mmask64) -1);
+}
+
+extern __inline __m512i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_mask_add_epi8 (__m512i __W, __mmask64 __U, __m512i __A,
+ __m512i __B)
+{
+ return (__m512i) __builtin_ia32_paddb512_mask ((__v64qi) __A,
+ (__v64qi) __B,
+ (__v64qi) __W,
+ (__mmask64) __U);
+}
+
+extern __inline __m512i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_maskz_add_epi8 (__mmask64 __U, __m512i __A, __m512i __B)
+{
+ return (__m512i) __builtin_ia32_paddb512_mask ((__v64qi) __A,
+ (__v64qi) __B,
+ (__v64qi)
+ _mm512_setzero_qi (),
+ (__mmask64) __U);
+}
+
+extern __inline __m512i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_sub_epi8 (__m512i __A, __m512i __B)
+{
+ return (__m512i) __builtin_ia32_psubb512_mask ((__v64qi) __A,
+ (__v64qi) __B,
+ (__v64qi)
+ _mm512_setzero_qi (),
+ (__mmask64) -1);
+}
+
+extern __inline __m512i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_mask_sub_epi8 (__m512i __W, __mmask64 __U, __m512i __A,
+ __m512i __B)
+{
+ return (__m512i) __builtin_ia32_psubb512_mask ((__v64qi) __A,
+ (__v64qi) __B,
+ (__v64qi) __W,
+ (__mmask64) __U);
+}
+
+extern __inline __m512i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_maskz_sub_epi8 (__mmask64 __U, __m512i __A, __m512i __B)
+{
+ return (__m512i) __builtin_ia32_psubb512_mask ((__v64qi) __A,
+ (__v64qi) __B,
+ (__v64qi)
+ _mm512_setzero_qi (),
+ (__mmask64) __U);
+}
+
+extern __inline __m512i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_avg_epu16 (__m512i __A, __m512i __B)
+{
+ return (__m512i) __builtin_ia32_pavgw512_mask ((__v32hi) __A,
+ (__v32hi) __B,
+ (__v32hi)
+ _mm512_setzero_hi (),
+ (__mmask32) -1);
+}
+
+extern __inline __m512i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_mask_avg_epu16 (__m512i __W, __mmask32 __U, __m512i __A,
+ __m512i __B)
+{
+ return (__m512i) __builtin_ia32_pavgw512_mask ((__v32hi) __A,
+ (__v32hi) __B,
+ (__v32hi) __W,
+ (__mmask32) __U);
+}
+
+extern __inline __m512i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_maskz_avg_epu16 (__mmask32 __U, __m512i __A, __m512i __B)
+{
+ return (__m512i) __builtin_ia32_pavgw512_mask ((__v32hi) __A,
+ (__v32hi) __B,
+ (__v32hi)
+ _mm512_setzero_si512 (),
+ (__mmask32) __U);
+}
+
+extern __inline __m512i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_subs_epi8 (__m512i __A, __m512i __B)
+{
+ return (__m512i) __builtin_ia32_psubsb512_mask ((__v64qi) __A,
+ (__v64qi) __B,
+ (__v64qi)
+ _mm512_setzero_qi (),
+ (__mmask64) -1);
+}
+
+extern __inline __m512i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_mask_subs_epi8 (__m512i __W, __mmask64 __U, __m512i __A,
+ __m512i __B)
+{
+ return (__m512i) __builtin_ia32_psubsb512_mask ((__v64qi) __A,
+ (__v64qi) __B,
+ (__v64qi) __W,
+ (__mmask64) __U);
+}
+
+extern __inline __m512i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_maskz_subs_epi8 (__mmask64 __U, __m512i __A, __m512i __B)
+{
+ return (__m512i) __builtin_ia32_psubsb512_mask ((__v64qi) __A,
+ (__v64qi) __B,
+ (__v64qi)
+ _mm512_setzero_qi (),
+ (__mmask64) __U);
+}
+
+extern __inline __m512i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_subs_epu8 (__m512i __A, __m512i __B)
+{
+ return (__m512i) __builtin_ia32_psubusb512_mask ((__v64qi) __A,
+ (__v64qi) __B,
+ (__v64qi)
+ _mm512_setzero_qi (),
+ (__mmask64) -1);
+}
+
+extern __inline __m512i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_mask_subs_epu8 (__m512i __W, __mmask64 __U, __m512i __A,
+ __m512i __B)
+{
+ return (__m512i) __builtin_ia32_psubusb512_mask ((__v64qi) __A,
+ (__v64qi) __B,
+ (__v64qi) __W,
+ (__mmask64) __U);
+}
+
+extern __inline __m512i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_maskz_subs_epu8 (__mmask64 __U, __m512i __A, __m512i __B)
+{
+ return (__m512i) __builtin_ia32_psubusb512_mask ((__v64qi) __A,
+ (__v64qi) __B,
+ (__v64qi)
+ _mm512_setzero_qi (),
+ (__mmask64) __U);
+}
+
+extern __inline __m512i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_adds_epi8 (__m512i __A, __m512i __B)
+{
+ return (__m512i) __builtin_ia32_paddsb512_mask ((__v64qi) __A,
+ (__v64qi) __B,
+ (__v64qi)
+ _mm512_setzero_qi (),
+ (__mmask64) -1);
+}
+
+extern __inline __m512i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_mask_adds_epi8 (__m512i __W, __mmask64 __U, __m512i __A,
+ __m512i __B)
+{
+ return (__m512i) __builtin_ia32_paddsb512_mask ((__v64qi) __A,
+ (__v64qi) __B,
+ (__v64qi) __W,
+ (__mmask64) __U);
+}
+
+extern __inline __m512i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_maskz_adds_epi8 (__mmask64 __U, __m512i __A, __m512i __B)
+{
+ return (__m512i) __builtin_ia32_paddsb512_mask ((__v64qi) __A,
+ (__v64qi) __B,
+ (__v64qi)
+ _mm512_setzero_qi (),
+ (__mmask64) __U);
+}
+
+extern __inline __m512i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_adds_epu8 (__m512i __A, __m512i __B)
+{
+ return (__m512i) __builtin_ia32_paddusb512_mask ((__v64qi) __A,
+ (__v64qi) __B,
+ (__v64qi)
+ _mm512_setzero_qi (),
+ (__mmask64) -1);
+}
+
+extern __inline __m512i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_mask_adds_epu8 (__m512i __W, __mmask64 __U, __m512i __A,
+ __m512i __B)
+{
+ return (__m512i) __builtin_ia32_paddusb512_mask ((__v64qi) __A,
+ (__v64qi) __B,
+ (__v64qi) __W,
+ (__mmask64) __U);
+}
+
+extern __inline __m512i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_maskz_adds_epu8 (__mmask64 __U, __m512i __A, __m512i __B)
+{
+ return (__m512i) __builtin_ia32_paddusb512_mask ((__v64qi) __A,
+ (__v64qi) __B,
+ (__v64qi)
+ _mm512_setzero_qi (),
+ (__mmask64) __U);
+}
+
+extern __inline __m512i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_sub_epi16 (__m512i __A, __m512i __B)
+{
+ return (__m512i) __builtin_ia32_psubw512_mask ((__v32hi) __A,
+ (__v32hi) __B,
+ (__v32hi)
+ _mm512_setzero_hi (),
+ (__mmask32) -1);
+}
+
+extern __inline __m512i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_mask_sub_epi16 (__m512i __W, __mmask32 __U, __m512i __A,
+ __m512i __B)
+{
+ return (__m512i) __builtin_ia32_psubw512_mask ((__v32hi) __A,
+ (__v32hi) __B,
+ (__v32hi) __W,
+ (__mmask32) __U);
+}
+
+extern __inline __m512i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_maskz_sub_epi16 (__mmask32 __U, __m512i __A, __m512i __B)
+{
+ return (__m512i) __builtin_ia32_psubw512_mask ((__v32hi) __A,
+ (__v32hi) __B,
+ (__v32hi)
+ _mm512_setzero_hi (),
+ (__mmask32) __U);
+}
+
+extern __inline __m512i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_subs_epi16 (__m512i __A, __m512i __B)
+{
+ return (__m512i) __builtin_ia32_psubsw512_mask ((__v32hi) __A,
+ (__v32hi) __B,
+ (__v32hi)
+ _mm512_setzero_hi (),
+ (__mmask32) -1);
+}
+
+extern __inline __m512i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_mask_subs_epi16 (__m512i __W, __mmask32 __U, __m512i __A,
+ __m512i __B)
+{
+ return (__m512i) __builtin_ia32_psubsw512_mask ((__v32hi) __A,
+ (__v32hi) __B,
+ (__v32hi) __W,
+ (__mmask32) __U);
+}
+
+extern __inline __m512i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_maskz_subs_epi16 (__mmask32 __U, __m512i __A, __m512i __B)
+{
+ return (__m512i) __builtin_ia32_psubsw512_mask ((__v32hi) __A,
+ (__v32hi) __B,
+ (__v32hi)
+ _mm512_setzero_hi (),
+ (__mmask32) __U);
+}
+
+extern __inline __m512i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_subs_epu16 (__m512i __A, __m512i __B)
+{
+ return (__m512i) __builtin_ia32_psubusw512_mask ((__v32hi) __A,
+ (__v32hi) __B,
+ (__v32hi)
+ _mm512_setzero_hi (),
+ (__mmask32) -1);
+}
+
+extern __inline __m512i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_mask_subs_epu16 (__m512i __W, __mmask32 __U, __m512i __A,
+ __m512i __B)
+{
+ return (__m512i) __builtin_ia32_psubusw512_mask ((__v32hi) __A,
+ (__v32hi) __B,
+ (__v32hi) __W,
+ (__mmask32) __U);
+}
+
+extern __inline __m512i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_maskz_subs_epu16 (__mmask32 __U, __m512i __A, __m512i __B)
+{
+ return (__m512i) __builtin_ia32_psubusw512_mask ((__v32hi) __A,
+ (__v32hi) __B,
+ (__v32hi)
+ _mm512_setzero_hi (),
+ (__mmask32) __U);
+}
+
+extern __inline __m512i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_add_epi16 (__m512i __A, __m512i __B)
+{
+ return (__m512i) __builtin_ia32_paddw512_mask ((__v32hi) __A,
+ (__v32hi) __B,
+ (__v32hi)
+ _mm512_setzero_hi (),
+ (__mmask32) -1);
+}
+
+extern __inline __m512i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_mask_add_epi16 (__m512i __W, __mmask32 __U, __m512i __A,
+ __m512i __B)
+{
+ return (__m512i) __builtin_ia32_paddw512_mask ((__v32hi) __A,
+ (__v32hi) __B,
+ (__v32hi) __W,
+ (__mmask32) __U);
+}
+
+extern __inline __m512i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_maskz_add_epi16 (__mmask32 __U, __m512i __A, __m512i __B)
+{
+ return (__m512i) __builtin_ia32_paddw512_mask ((__v32hi) __A,
+ (__v32hi) __B,
+ (__v32hi)
+ _mm512_setzero_hi (),
+ (__mmask32) __U);
+}
+
+extern __inline __m512i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_adds_epi16 (__m512i __A, __m512i __B)
+{
+ return (__m512i) __builtin_ia32_paddsw512_mask ((__v32hi) __A,
+ (__v32hi) __B,
+ (__v32hi)
+ _mm512_setzero_hi (),
+ (__mmask32) -1);
+}
+
+extern __inline __m512i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_mask_adds_epi16 (__m512i __W, __mmask32 __U, __m512i __A,
+ __m512i __B)
+{
+ return (__m512i) __builtin_ia32_paddsw512_mask ((__v32hi) __A,
+ (__v32hi) __B,
+ (__v32hi) __W,
+ (__mmask32) __U);
+}
+
+extern __inline __m512i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_maskz_adds_epi16 (__mmask32 __U, __m512i __A, __m512i __B)
+{
+ return (__m512i) __builtin_ia32_paddsw512_mask ((__v32hi) __A,
+ (__v32hi) __B,
+ (__v32hi)
+ _mm512_setzero_hi (),
+ (__mmask32) __U);
+}
+
+extern __inline __m512i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_adds_epu16 (__m512i __A, __m512i __B)
+{
+ return (__m512i) __builtin_ia32_paddusw512_mask ((__v32hi) __A,
+ (__v32hi) __B,
+ (__v32hi)
+ _mm512_setzero_hi (),
+ (__mmask32) -1);
+}
+
+extern __inline __m512i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_mask_adds_epu16 (__m512i __W, __mmask32 __U, __m512i __A,
+ __m512i __B)
+{
+ return (__m512i) __builtin_ia32_paddusw512_mask ((__v32hi) __A,
+ (__v32hi) __B,
+ (__v32hi) __W,
+ (__mmask32) __U);
+}
+
+extern __inline __m512i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_maskz_adds_epu16 (__mmask32 __U, __m512i __A, __m512i __B)
+{
+ return (__m512i) __builtin_ia32_paddusw512_mask ((__v32hi) __A,
+ (__v32hi) __B,
+ (__v32hi)
+ _mm512_setzero_hi (),
+ (__mmask32) __U);
+}
+
+extern __inline __m512i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_srl_epi16 (__m512i __A, __m128i __B)
+{
+ return (__m512i) __builtin_ia32_psrlw512_mask ((__v32hi) __A,
+ (__v8hi) __B,
+ (__v32hi)
+ _mm512_setzero_hi (),
+ (__mmask32) -1);
+}
+
+extern __inline __m512i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_mask_srl_epi16 (__m512i __W, __mmask32 __U, __m512i __A,
+ __m128i __B)
+{
+ return (__m512i) __builtin_ia32_psrlw512_mask ((__v32hi) __A,
+ (__v8hi) __B,
+ (__v32hi) __W,
+ (__mmask32) __U);
+}
+
+extern __inline __m512i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_maskz_srl_epi16 (__mmask32 __U, __m512i __A, __m128i __B)
+{
+ return (__m512i) __builtin_ia32_psrlw512_mask ((__v32hi) __A,
+ (__v8hi) __B,
+ (__v32hi)
+ _mm512_setzero_hi (),
+ (__mmask32) __U);
+}
+
+extern __inline __m512i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_packs_epi16 (__m512i __A, __m512i __B)
+{
+ return (__m512i) __builtin_ia32_packsswb512_mask ((__v32hi) __A,
+ (__v32hi) __B,
+ (__v64qi)
+ _mm512_setzero_qi (),
+ (__mmask64) -1);
+}
+
+extern __inline __m512i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_sll_epi16 (__m512i __A, __m128i __B)
+{
+ return (__m512i) __builtin_ia32_psllw512_mask ((__v32hi) __A,
+ (__v8hi) __B,
+ (__v32hi)
+ _mm512_setzero_hi (),
+ (__mmask32) -1);
+}
+
+extern __inline __m512i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_mask_sll_epi16 (__m512i __W, __mmask32 __U, __m512i __A,
+ __m128i __B)
+{
+ return (__m512i) __builtin_ia32_psllw512_mask ((__v32hi) __A,
+ (__v8hi) __B,
+ (__v32hi) __W,
+ (__mmask32) __U);
+}
+
+extern __inline __m512i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_maskz_sll_epi16 (__mmask32 __U, __m512i __A, __m128i __B)
+{
+ return (__m512i) __builtin_ia32_psllw512_mask ((__v32hi) __A,
+ (__v8hi) __B,
+ (__v32hi)
+ _mm512_setzero_hi (),
+ (__mmask32) __U);
+}
+
+extern __inline __m512i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_maddubs_epi16 (__m512i __X, __m512i __Y)
+{
+ return (__m512i) __builtin_ia32_pmaddubsw512_mask ((__v64qi) __X,
+ (__v64qi) __Y,
+ (__v32hi)
+ _mm512_setzero_hi (),
+ (__mmask32) -1);
+}
+
+extern __inline __m512i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_mask_maddubs_epi16 (__m512i __W, __mmask32 __U, __m512i __X,
+ __m512i __Y)
+{
+ return (__m512i) __builtin_ia32_pmaddubsw512_mask ((__v64qi) __X,
+ (__v64qi) __Y,
+ (__v32hi) __W,
+ (__mmask32) __U);
+}
+
+extern __inline __m512i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_maskz_maddubs_epi16 (__mmask32 __U, __m512i __X, __m512i __Y)
+{
+ return (__m512i) __builtin_ia32_pmaddubsw512_mask ((__v64qi) __X,
+ (__v64qi) __Y,
+ (__v32hi)
+ _mm512_setzero_hi (),
+ (__mmask32) __U);
+}
+
+extern __inline __m512i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_madd_epi16 (__m512i __A, __m512i __B)
+{
+ return (__m512i) __builtin_ia32_pmaddwd512_mask ((__v32hi) __A,
+ (__v32hi) __B,
+ (__v16si)
+ _mm512_setzero_si512 (),
+ (__mmask16) -1);
+}
+
+extern __inline __m512i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_mask_madd_epi16 (__m512i __W, __mmask16 __U, __m512i __A,
+ __m512i __B)
+{
+ return (__m512i) __builtin_ia32_pmaddwd512_mask ((__v32hi) __A,
+ (__v32hi) __B,
+ (__v16si) __W,
+ (__mmask16) __U);
+}
+
+extern __inline __m512i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_maskz_madd_epi16 (__mmask16 __U, __m512i __A, __m512i __B)
+{
+ return (__m512i) __builtin_ia32_pmaddwd512_mask ((__v32hi) __A,
+ (__v32hi) __B,
+ (__v16si)
+ _mm512_setzero_si512 (),
+ (__mmask16) __U);
+}
+
+extern __inline __m512i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_unpackhi_epi8 (__m512i __A, __m512i __B)
+{
+ return (__m512i) __builtin_ia32_punpckhbw512_mask ((__v64qi) __A,
+ (__v64qi) __B,
+ (__v64qi)
+ _mm512_setzero_qi (),
+ (__mmask64) -1);
+}
+
+extern __inline __m512i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_mask_unpackhi_epi8 (__m512i __W, __mmask64 __U, __m512i __A,
+ __m512i __B)
+{
+ return (__m512i) __builtin_ia32_punpckhbw512_mask ((__v64qi) __A,
+ (__v64qi) __B,
+ (__v64qi) __W,
+ (__mmask64) __U);
+}
+
+extern __inline __m512i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_maskz_unpackhi_epi8 (__mmask64 __U, __m512i __A, __m512i __B)
+{
+ return (__m512i) __builtin_ia32_punpckhbw512_mask ((__v64qi) __A,
+ (__v64qi) __B,
+ (__v64qi)
+ _mm512_setzero_si512 (),
+ (__mmask64) __U);
+}
+
+extern __inline __m512i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_unpackhi_epi16 (__m512i __A, __m512i __B)
+{
+ return (__m512i) __builtin_ia32_punpckhwd512_mask ((__v32hi) __A,
+ (__v32hi) __B,
+ (__v32hi)
+ _mm512_setzero_hi (),
+ (__mmask32) -1);
+}
+
+extern __inline __m512i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_mask_unpackhi_epi16 (__m512i __W, __mmask32 __U, __m512i __A,
+ __m512i __B)
+{
+ return (__m512i) __builtin_ia32_punpckhwd512_mask ((__v32hi) __A,
+ (__v32hi) __B,
+ (__v32hi) __W,
+ (__mmask32) __U);
+}
+
+extern __inline __m512i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_maskz_unpackhi_epi16 (__mmask32 __U, __m512i __A, __m512i __B)
+{
+ return (__m512i) __builtin_ia32_punpckhwd512_mask ((__v32hi) __A,
+ (__v32hi) __B,
+ (__v32hi)
+ _mm512_setzero_si512 (),
+ (__mmask32) __U);
+}
+
+extern __inline __m512i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_unpacklo_epi8 (__m512i __A, __m512i __B)
+{
+ return (__m512i) __builtin_ia32_punpcklbw512_mask ((__v64qi) __A,
+ (__v64qi) __B,
+ (__v64qi)
+ _mm512_setzero_qi (),
+ (__mmask64) -1);
+}
+
+extern __inline __m512i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_mask_unpacklo_epi8 (__m512i __W, __mmask64 __U, __m512i __A,
+ __m512i __B)
+{
+ return (__m512i) __builtin_ia32_punpcklbw512_mask ((__v64qi) __A,
+ (__v64qi) __B,
+ (__v64qi) __W,
+ (__mmask64) __U);
+}
+
+extern __inline __m512i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_maskz_unpacklo_epi8 (__mmask64 __U, __m512i __A, __m512i __B)
+{
+ return (__m512i) __builtin_ia32_punpcklbw512_mask ((__v64qi) __A,
+ (__v64qi) __B,
+ (__v64qi)
+ _mm512_setzero_si512 (),
+ (__mmask64) __U);
+}
+
+extern __inline __m512i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_unpacklo_epi16 (__m512i __A, __m512i __B)
+{
+ return (__m512i) __builtin_ia32_punpcklwd512_mask ((__v32hi) __A,
+ (__v32hi) __B,
+ (__v32hi)
+ _mm512_setzero_hi (),
+ (__mmask32) -1);
+}
+
+extern __inline __m512i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_mask_unpacklo_epi16 (__m512i __W, __mmask32 __U, __m512i __A,
+ __m512i __B)
+{
+ return (__m512i) __builtin_ia32_punpcklwd512_mask ((__v32hi) __A,
+ (__v32hi) __B,
+ (__v32hi) __W,
+ (__mmask32) __U);
+}
+
+extern __inline __m512i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_maskz_unpacklo_epi16 (__mmask32 __U, __m512i __A, __m512i __B)
+{
+ return (__m512i) __builtin_ia32_punpcklwd512_mask ((__v32hi) __A,
+ (__v32hi) __B,
+ (__v32hi)
+ _mm512_setzero_si512 (),
+ (__mmask32) __U);
+}
+
+extern __inline __mmask64
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_cmpeq_epi8_mask (__m512i __A, __m512i __B)
+{
+ return (__mmask64) __builtin_ia32_pcmpeqb512_mask ((__v64qi) __A,
+ (__v64qi) __B,
+ (__mmask64) -1);
+}
+
+extern __inline __mmask64
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_mask_cmpeq_epi8_mask (__mmask64 __U, __m512i __A, __m512i __B)
+{
+ return (__mmask64) __builtin_ia32_pcmpeqb512_mask ((__v64qi) __A,
+ (__v64qi) __B,
+ __U);
+}
+
+extern __inline __mmask32
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_cmpeq_epi16_mask (__m512i __A, __m512i __B)
+{
+ return (__mmask32) __builtin_ia32_pcmpeqw512_mask ((__v32hi) __A,
+ (__v32hi) __B,
+ (__mmask32) -1);
+}
+
+extern __inline __mmask32
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_mask_cmpeq_epi16_mask (__mmask32 __U, __m512i __A, __m512i __B)
+{
+ return (__mmask32) __builtin_ia32_pcmpeqw512_mask ((__v32hi) __A,
+ (__v32hi) __B,
+ __U);
+}
+
+extern __inline __mmask64
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_cmpgt_epi8_mask (__m512i __A, __m512i __B)
+{
+ return (__mmask64) __builtin_ia32_pcmpgtb512_mask ((__v64qi) __A,
+ (__v64qi) __B,
+ (__mmask64) -1);
+}
+
+extern __inline __mmask64
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_mask_cmpgt_epi8_mask (__mmask64 __U, __m512i __A, __m512i __B)
+{
+ return (__mmask64) __builtin_ia32_pcmpgtb512_mask ((__v64qi) __A,
+ (__v64qi) __B,
+ __U);
+}
+
+extern __inline __mmask32
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_cmpgt_epi16_mask (__m512i __A, __m512i __B)
+{
+ return (__mmask32) __builtin_ia32_pcmpgtw512_mask ((__v32hi) __A,
+ (__v32hi) __B,
+ (__mmask32) -1);
+}
+
+extern __inline __mmask32
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_mask_cmpgt_epi16_mask (__mmask32 __U, __m512i __A, __m512i __B)
+{
+ return (__mmask32) __builtin_ia32_pcmpgtw512_mask ((__v32hi) __A,
+ (__v32hi) __B,
+ __U);
+}
+
+extern __inline __mmask64
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_movepi8_mask (__m512i __A)
+{
+ return (__mmask64) __builtin_ia32_cvtb2mask512 ((__v64qi) __A);
+}
+
+extern __inline __mmask32
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_movepi16_mask (__m512i __A)
+{
+ return (__mmask32) __builtin_ia32_cvtw2mask512 ((__v32hi) __A);
+}
+
+extern __inline __m512i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_movm_epi8 (__mmask64 __A)
+{
+ return (__m512i) __builtin_ia32_cvtmask2b512 (__A);
+}
+
+extern __inline __m512i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_movm_epi16 (__mmask32 __A)
+{
+ return (__m512i) __builtin_ia32_cvtmask2w512 (__A);
+}
+
+extern __inline __mmask64
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_test_epi8_mask (__m512i __A, __m512i __B)
+{
+ return (__mmask64) __builtin_ia32_ptestmb512 ((__v64qi) __A,
+ (__v64qi) __B,
+ (__mmask64) -1);
+}
+
+extern __inline __mmask64
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_mask_test_epi8_mask (__mmask64 __U, __m512i __A, __m512i __B)
+{
+ return (__mmask64) __builtin_ia32_ptestmb512 ((__v64qi) __A,
+ (__v64qi) __B, __U);
+}
+
+extern __inline __mmask32
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_test_epi16_mask (__m512i __A, __m512i __B)
+{
+ return (__mmask32) __builtin_ia32_ptestmw512 ((__v32hi) __A,
+ (__v32hi) __B,
+ (__mmask32) -1);
+}
+
+extern __inline __mmask32
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_mask_test_epi16_mask (__mmask32 __U, __m512i __A, __m512i __B)
+{
+ return (__mmask32) __builtin_ia32_ptestmw512 ((__v32hi) __A,
+ (__v32hi) __B, __U);
+}
+
+extern __inline __mmask64
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_testn_epi8_mask (__m512i __A, __m512i __B)
+{
+ return (__mmask64) __builtin_ia32_ptestnmb512 ((__v64qi) __A,
+ (__v64qi) __B,
+ (__mmask64) -1);
+}
+
+extern __inline __mmask64
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_mask_testn_epi8_mask (__mmask64 __U, __m512i __A, __m512i __B)
+{
+ return (__mmask64) __builtin_ia32_ptestnmb512 ((__v64qi) __A,
+ (__v64qi) __B, __U);
+}
+
+extern __inline __mmask32
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_testn_epi16_mask (__m512i __A, __m512i __B)
+{
+ return (__mmask32) __builtin_ia32_ptestnmw512 ((__v32hi) __A,
+ (__v32hi) __B,
+ (__mmask32) -1);
+}
+
+extern __inline __mmask32
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_mask_testn_epi16_mask (__mmask32 __U, __m512i __A, __m512i __B)
+{
+ return (__mmask32) __builtin_ia32_ptestnmw512 ((__v32hi) __A,
+ (__v32hi) __B, __U);
+}
+
+extern __inline __m512i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_shuffle_epi8 (__m512i __A, __m512i __B)
+{
+ return (__m512i) __builtin_ia32_pshufb512_mask ((__v64qi) __A,
+ (__v64qi) __B,
+ (__v64qi)
+ _mm512_setzero_qi (),
+ (__mmask64) -1);
+}
+
+extern __inline __m512i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_mask_shuffle_epi8 (__m512i __W, __mmask64 __U, __m512i __A,
+ __m512i __B)
+{
+ return (__m512i) __builtin_ia32_pshufb512_mask ((__v64qi) __A,
+ (__v64qi) __B,
+ (__v64qi) __W,
+ (__mmask64) __U);
+}
+
+extern __inline __m512i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_maskz_shuffle_epi8 (__mmask64 __U, __m512i __A, __m512i __B)
+{
+ return (__m512i) __builtin_ia32_pshufb512_mask ((__v64qi) __A,
+ (__v64qi) __B,
+ (__v64qi)
+ _mm512_setzero_qi (),
+ (__mmask64) __U);
+}
+
+extern __inline __m512i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_min_epu16 (__m512i __A, __m512i __B)
+{
+ return (__m512i) __builtin_ia32_pminuw512_mask ((__v32hi) __A,
+ (__v32hi) __B,
+ (__v32hi)
+ _mm512_setzero_hi (),
+ (__mmask32) -1);
+}
+
+extern __inline __m512i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_maskz_min_epu16 (__mmask32 __M, __m512i __A, __m512i __B)
+{
+ return (__m512i) __builtin_ia32_pminuw512_mask ((__v32hi) __A,
+ (__v32hi) __B,
+ (__v32hi)
+ _mm512_setzero_si512 (),
+ (__mmask32) __M);
+}
+
+extern __inline __m512i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_mask_min_epu16 (__m512i __W, __mmask32 __M, __m512i __A,
+ __m512i __B)
+{
+ return (__m512i) __builtin_ia32_pminuw512_mask ((__v32hi) __A,
+ (__v32hi) __B,
+ (__v32hi) __W,
+ (__mmask32) __M);
+}
+
+extern __inline __m512i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_min_epi16 (__m512i __A, __m512i __B)
+{
+ return (__m512i) __builtin_ia32_pminsw512_mask ((__v32hi) __A,
+ (__v32hi) __B,
+ (__v32hi)
+ _mm512_setzero_hi (),
+ (__mmask32) -1);
+}
+
+extern __inline __m512i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_maskz_min_epi16 (__mmask32 __M, __m512i __A, __m512i __B)
+{
+ return (__m512i) __builtin_ia32_pminsw512_mask ((__v32hi) __A,
+ (__v32hi) __B,
+ (__v32hi)
+ _mm512_setzero_si512 (),
+ (__mmask32) __M);
+}
+
+extern __inline __m512i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_mask_min_epi16 (__m512i __W, __mmask32 __M, __m512i __A,
+ __m512i __B)
+{
+ return (__m512i) __builtin_ia32_pminsw512_mask ((__v32hi) __A,
+ (__v32hi) __B,
+ (__v32hi) __W,
+ (__mmask32) __M);
+}
+
+extern __inline __m512i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_max_epu8 (__m512i __A, __m512i __B)
+{
+ return (__m512i) __builtin_ia32_pmaxub512_mask ((__v64qi) __A,
+ (__v64qi) __B,
+ (__v64qi)
+ _mm512_setzero_qi (),
+ (__mmask64) -1);
+}
+
+extern __inline __m512i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_maskz_max_epu8 (__mmask64 __M, __m512i __A, __m512i __B)
+{
+ return (__m512i) __builtin_ia32_pmaxub512_mask ((__v64qi) __A,
+ (__v64qi) __B,
+ (__v64qi)
+ _mm512_setzero_si512 (),
+ (__mmask64) __M);
+}
+
+extern __inline __m512i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_mask_max_epu8 (__m512i __W, __mmask64 __M, __m512i __A,
+ __m512i __B)
+{
+ return (__m512i) __builtin_ia32_pmaxub512_mask ((__v64qi) __A,
+ (__v64qi) __B,
+ (__v64qi) __W,
+ (__mmask64) __M);
+}
+
+extern __inline __m512i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_max_epi8 (__m512i __A, __m512i __B)
+{
+ return (__m512i) __builtin_ia32_pmaxsb512_mask ((__v64qi) __A,
+ (__v64qi) __B,
+ (__v64qi)
+ _mm512_setzero_qi (),
+ (__mmask64) -1);
+}
+
+extern __inline __m512i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_maskz_max_epi8 (__mmask64 __M, __m512i __A, __m512i __B)
+{
+ return (__m512i) __builtin_ia32_pmaxsb512_mask ((__v64qi) __A,
+ (__v64qi) __B,
+ (__v64qi)
+ _mm512_setzero_si512 (),
+ (__mmask64) __M);
+}
+
+extern __inline __m512i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_mask_max_epi8 (__m512i __W, __mmask64 __M, __m512i __A,
+ __m512i __B)
+{
+ return (__m512i) __builtin_ia32_pmaxsb512_mask ((__v64qi) __A,
+ (__v64qi) __B,
+ (__v64qi) __W,
+ (__mmask64) __M);
+}
+
+extern __inline __m512i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_min_epu8 (__m512i __A, __m512i __B)
+{
+ return (__m512i) __builtin_ia32_pminub512_mask ((__v64qi) __A,
+ (__v64qi) __B,
+ (__v64qi)
+ _mm512_setzero_qi (),
+ (__mmask64) -1);
+}
+
+extern __inline __m512i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_maskz_min_epu8 (__mmask64 __M, __m512i __A, __m512i __B)
+{
+ return (__m512i) __builtin_ia32_pminub512_mask ((__v64qi) __A,
+ (__v64qi) __B,
+ (__v64qi)
+ _mm512_setzero_si512 (),
+ (__mmask64) __M);
+}
+
+extern __inline __m512i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_mask_min_epu8 (__m512i __W, __mmask64 __M, __m512i __A,
+ __m512i __B)
+{
+ return (__m512i) __builtin_ia32_pminub512_mask ((__v64qi) __A,
+ (__v64qi) __B,
+ (__v64qi) __W,
+ (__mmask64) __M);
+}
+
+extern __inline __m512i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_min_epi8 (__m512i __A, __m512i __B)
+{
+ return (__m512i) __builtin_ia32_pminsb512_mask ((__v64qi) __A,
+ (__v64qi) __B,
+ (__v64qi)
+ _mm512_setzero_qi (),
+ (__mmask64) -1);
+}
+
+extern __inline __m512i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_maskz_min_epi8 (__mmask64 __M, __m512i __A, __m512i __B)
+{
+ return (__m512i) __builtin_ia32_pminsb512_mask ((__v64qi) __A,
+ (__v64qi) __B,
+ (__v64qi)
+ _mm512_setzero_si512 (),
+ (__mmask64) __M);
+}
+
+extern __inline __m512i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_mask_min_epi8 (__m512i __W, __mmask64 __M, __m512i __A,
+ __m512i __B)
+{
+ return (__m512i) __builtin_ia32_pminsb512_mask ((__v64qi) __A,
+ (__v64qi) __B,
+ (__v64qi) __W,
+ (__mmask64) __M);
+}
+
+extern __inline __m512i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_max_epi16 (__m512i __A, __m512i __B)
+{
+ return (__m512i) __builtin_ia32_pmaxsw512_mask ((__v32hi) __A,
+ (__v32hi) __B,
+ (__v32hi)
+ _mm512_setzero_hi (),
+ (__mmask32) -1);
+}
+
+extern __inline __m512i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_maskz_max_epi16 (__mmask32 __M, __m512i __A, __m512i __B)
+{
+ return (__m512i) __builtin_ia32_pmaxsw512_mask ((__v32hi) __A,
+ (__v32hi) __B,
+ (__v32hi)
+ _mm512_setzero_si512 (),
+ (__mmask32) __M);
+}
+
+extern __inline __m512i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_mask_max_epi16 (__m512i __W, __mmask32 __M, __m512i __A,
+ __m512i __B)
+{
+ return (__m512i) __builtin_ia32_pmaxsw512_mask ((__v32hi) __A,
+ (__v32hi) __B,
+ (__v32hi) __W,
+ (__mmask32) __M);
+}
+
+extern __inline __m512i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_max_epu16 (__m512i __A, __m512i __B)
+{
+ return (__m512i) __builtin_ia32_pmaxuw512_mask ((__v32hi) __A,
+ (__v32hi) __B,
+ (__v32hi)
+ _mm512_setzero_hi (),
+ (__mmask32) -1);
+}
+
+extern __inline __m512i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_maskz_max_epu16 (__mmask32 __M, __m512i __A, __m512i __B)
+{
+ return (__m512i) __builtin_ia32_pmaxuw512_mask ((__v32hi) __A,
+ (__v32hi) __B,
+ (__v32hi)
+ _mm512_setzero_si512 (),
+ (__mmask32) __M);
+}
+
+extern __inline __m512i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_mask_max_epu16 (__m512i __W, __mmask32 __M, __m512i __A,
+ __m512i __B)
+{
+ return (__m512i) __builtin_ia32_pmaxuw512_mask ((__v32hi) __A,
+ (__v32hi) __B,
+ (__v32hi) __W,
+ (__mmask32) __M);
+}
+
+extern __inline __m512i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_sra_epi16 (__m512i __A, __m128i __B)
+{
+ return (__m512i) __builtin_ia32_psraw512_mask ((__v32hi) __A,
+ (__v8hi) __B,
+ (__v32hi)
+ _mm512_setzero_hi (),
+ (__mmask32) -1);
+}
+
+extern __inline __m512i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_mask_sra_epi16 (__m512i __W, __mmask32 __U, __m512i __A,
+ __m128i __B)
+{
+ return (__m512i) __builtin_ia32_psraw512_mask ((__v32hi) __A,
+ (__v8hi) __B,
+ (__v32hi) __W,
+ (__mmask32) __U);
+}
+
+extern __inline __m512i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_maskz_sra_epi16 (__mmask32 __U, __m512i __A, __m128i __B)
+{
+ return (__m512i) __builtin_ia32_psraw512_mask ((__v32hi) __A,
+ (__v8hi) __B,
+ (__v32hi)
+ _mm512_setzero_hi (),
+ (__mmask32) __U);
+}
+
+extern __inline __m512i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_srav_epi16 (__m512i __A, __m512i __B)
+{
+ return (__m512i) __builtin_ia32_psrav32hi_mask ((__v32hi) __A,
+ (__v32hi) __B,
+ (__v32hi)
+ _mm512_setzero_hi (),
+ (__mmask32) -1);
+}
+
+extern __inline __m512i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_mask_srav_epi16 (__m512i __W, __mmask32 __U, __m512i __A,
+ __m512i __B)
+{
+ return (__m512i) __builtin_ia32_psrav32hi_mask ((__v32hi) __A,
+ (__v32hi) __B,
+ (__v32hi) __W,
+ (__mmask32) __U);
+}
+
+extern __inline __m512i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_maskz_srav_epi16 (__mmask32 __U, __m512i __A, __m512i __B)
+{
+ return (__m512i) __builtin_ia32_psrav32hi_mask ((__v32hi) __A,
+ (__v32hi) __B,
+ (__v32hi)
+ _mm512_setzero_hi (),
+ (__mmask32) __U);
+}
+
+extern __inline __m512i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_srlv_epi16 (__m512i __A, __m512i __B)
+{
+ return (__m512i) __builtin_ia32_psrlv32hi_mask ((__v32hi) __A,
+ (__v32hi) __B,
+ (__v32hi)
+ _mm512_setzero_hi (),
+ (__mmask32) -1);
+}
+
+extern __inline __m512i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_mask_srlv_epi16 (__m512i __W, __mmask32 __U, __m512i __A,
+ __m512i __B)
+{
+ return (__m512i) __builtin_ia32_psrlv32hi_mask ((__v32hi) __A,
+ (__v32hi) __B,
+ (__v32hi) __W,
+ (__mmask32) __U);
+}
+
+extern __inline __m512i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_maskz_srlv_epi16 (__mmask32 __U, __m512i __A, __m512i __B)
+{
+ return (__m512i) __builtin_ia32_psrlv32hi_mask ((__v32hi) __A,
+ (__v32hi) __B,
+ (__v32hi)
+ _mm512_setzero_hi (),
+ (__mmask32) __U);
+}
+
+extern __inline __m512i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_sllv_epi16 (__m512i __A, __m512i __B)
+{
+ return (__m512i) __builtin_ia32_psllv32hi_mask ((__v32hi) __A,
+ (__v32hi) __B,
+ (__v32hi)
+ _mm512_setzero_hi (),
+ (__mmask32) -1);
+}
+
+extern __inline __m512i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_mask_sllv_epi16 (__m512i __W, __mmask32 __U, __m512i __A,
+ __m512i __B)
+{
+ return (__m512i) __builtin_ia32_psllv32hi_mask ((__v32hi) __A,
+ (__v32hi) __B,
+ (__v32hi) __W,
+ (__mmask32) __U);
+}
+
+extern __inline __m512i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_maskz_sllv_epi16 (__mmask32 __U, __m512i __A, __m512i __B)
+{
+ return (__m512i) __builtin_ia32_psllv32hi_mask ((__v32hi) __A,
+ (__v32hi) __B,
+ (__v32hi)
+ _mm512_setzero_hi (),
+ (__mmask32) __U);
+}
+
+extern __inline __m512i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_mask_packs_epi16 (__m512i __W, __mmask64 __M, __m512i __A,
+ __m512i __B)
+{
+ return (__m512i) __builtin_ia32_packsswb512_mask ((__v32hi) __A,
+ (__v32hi) __B,
+ (__v64qi) __W,
+ (__mmask64) __M);
+}
+
+extern __inline __m512i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_maskz_packs_epi16 (__mmask64 __M, __m512i __A, __m512i __B)
+{
+ return (__m512i) __builtin_ia32_packsswb512_mask ((__v32hi) __A,
+ (__v32hi) __B,
+ (__v64qi)
+ _mm512_setzero_si512 (),
+ __M);
+}
+
+extern __inline __m512i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_packus_epi16 (__m512i __A, __m512i __B)
+{
+ return (__m512i) __builtin_ia32_packuswb512_mask ((__v32hi) __A,
+ (__v32hi) __B,
+ (__v64qi)
+ _mm512_setzero_qi (),
+ (__mmask64) -1);
+}
+
+extern __inline __m512i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_mask_packus_epi16 (__m512i __W, __mmask64 __M, __m512i __A,
+ __m512i __B)
+{
+ return (__m512i) __builtin_ia32_packuswb512_mask ((__v32hi) __A,
+ (__v32hi) __B,
+ (__v64qi) __W,
+ (__mmask64) __M);
+}
+
+extern __inline __m512i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_maskz_packus_epi16 (__mmask64 __M, __m512i __A, __m512i __B)
+{
+ return (__m512i) __builtin_ia32_packuswb512_mask ((__v32hi) __A,
+ (__v32hi) __B,
+ (__v64qi)
+ _mm512_setzero_si512 (),
+ (__mmask64) __M);
+}
+
+extern __inline __m512i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_abs_epi8 (__m512i __A)
+{
+ return (__m512i) __builtin_ia32_pabsb512_mask ((__v64qi) __A,
+ (__v64qi)
+ _mm512_setzero_qi (),
+ (__mmask64) -1);
+}
+
+extern __inline __m512i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_mask_abs_epi8 (__m512i __W, __mmask64 __U, __m512i __A)
+{
+ return (__m512i) __builtin_ia32_pabsb512_mask ((__v64qi) __A,
+ (__v64qi) __W,
+ (__mmask64) __U);
+}
+
+extern __inline __m512i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_maskz_abs_epi8 (__mmask64 __U, __m512i __A)
+{
+ return (__m512i) __builtin_ia32_pabsb512_mask ((__v64qi) __A,
+ (__v64qi)
+ _mm512_setzero_qi (),
+ (__mmask64) __U);
+}
+
+extern __inline __m512i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_abs_epi16 (__m512i __A)
+{
+ return (__m512i) __builtin_ia32_pabsw512_mask ((__v32hi) __A,
+ (__v32hi)
+ _mm512_setzero_hi (),
+ (__mmask32) -1);
+}
+
+extern __inline __m512i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_mask_abs_epi16 (__m512i __W, __mmask32 __U, __m512i __A)
+{
+ return (__m512i) __builtin_ia32_pabsw512_mask ((__v32hi) __A,
+ (__v32hi) __W,
+ (__mmask32) __U);
+}
+
+extern __inline __m512i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_maskz_abs_epi16 (__mmask32 __U, __m512i __A)
+{
+ return (__m512i) __builtin_ia32_pabsw512_mask ((__v32hi) __A,
+ (__v32hi)
+ _mm512_setzero_hi (),
+ (__mmask32) __U);
+}
+
+#ifdef __OPTIMIZE__
+extern __inline __m512i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_alignr_epi8 (__m512i __A, __m512i __B, const int __N)
+{
+ return (__m512i) __builtin_ia32_palignr512 ((__v8di) __A,
+ (__v8di) __B, __N * 8);
+}
+
+extern __inline __m512i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_mask_alignr_epi8 (__m512i __W, __mmask64 __U, __m512i __A,
+ __m512i __B, const int __N)
+{
+ return (__m512i) __builtin_ia32_palignr512_mask ((__v8di) __A,
+ (__v8di) __B,
+ __N * 8,
+ (__v8di) __W,
+ (__mmask64) __U);
+}
+
+extern __inline __m512i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_maskz_alignr_epi8 (__mmask64 __U, __m512i __A, __m512i __B,
+ const int __N)
+{
+ return (__m512i) __builtin_ia32_palignr512_mask ((__v8di) __A,
+ (__v8di) __B,
+ __N * 8,
+ (__v8di)
+ _mm512_setzero_si512 (),
+ (__mmask64) __U);
+}
+
+extern __inline __m512i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_dbsad_epu8 (__m512i __A, __m512i __B, const int __imm)
+{
+ return (__m512i) __builtin_ia32_dbpsadbw512_mask ((__v64qi) __A,
+ (__v64qi) __B,
+ __imm,
+ (__v32hi)
+ _mm512_setzero_hi (),
+ (__mmask32) -1);
+}
+
+extern __inline __m512i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_mask_dbsad_epu8 (__m512i __W, __mmask32 __U, __m512i __A,
+ __m512i __B, const int __imm)
+{
+ return (__m512i) __builtin_ia32_dbpsadbw512_mask ((__v64qi) __A,
+ (__v64qi) __B,
+ __imm,
+ (__v32hi) __W,
+ (__mmask32) __U);
+}
+
+extern __inline __m512i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_maskz_dbsad_epu8 (__mmask32 __U, __m512i __A, __m512i __B,
+ const int __imm)
+{
+ return (__m512i) __builtin_ia32_dbpsadbw512_mask ((__v64qi) __A,
+ (__v64qi) __B,
+ __imm,
+ (__v32hi)
+ _mm512_setzero_si512 (),
+ (__mmask32) __U);
+}
+
+extern __inline __m512i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_srli_epi16 (__m512i __A, const int __imm)
+{
+ return (__m512i) __builtin_ia32_psrlwi512_mask ((__v32hi) __A, __imm,
+ (__v32hi)
+ _mm512_setzero_hi (),
+ (__mmask32) -1);
+}
+
+extern __inline __m512i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_mask_srli_epi16 (__m512i __W, __mmask32 __U, __m512i __A,
+ const int __imm)
+{
+ return (__m512i) __builtin_ia32_psrlwi512_mask ((__v32hi) __A, __imm,
+ (__v32hi) __W,
+ (__mmask32) __U);
+}
+
+extern __inline __m512i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_maskz_srli_epi16 (__mmask32 __U, __m512i __A, const int __imm)
+{
+ return (__m512i) __builtin_ia32_psrlwi512_mask ((__v32hi) __A, __imm,
+ (__v32hi)
+ _mm512_setzero_hi (),
+ (__mmask32) __U);
+}
+
+extern __inline __m512i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_slli_epi16 (__m512i __A, const int __B)
+{
+ return (__m512i) __builtin_ia32_psllwi512_mask ((__v32hi) __A, __B,
+ (__v32hi)
+ _mm512_setzero_hi (),
+ (__mmask32) -1);
+}
+
+extern __inline __m512i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_mask_slli_epi16 (__m512i __W, __mmask32 __U, __m512i __A,
+ const int __B)
+{
+ return (__m512i) __builtin_ia32_psllwi512_mask ((__v32hi) __A, __B,
+ (__v32hi) __W,
+ (__mmask32) __U);
+}
+
+extern __inline __m512i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_maskz_slli_epi16 (__mmask32 __U, __m512i __A, const int __B)
+{
+ return (__m512i) __builtin_ia32_psllwi512_mask ((__v32hi) __A, __B,
+ (__v32hi)
+ _mm512_setzero_hi (),
+ (__mmask32) __U);
+}
+
+extern __inline __m512i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_shufflehi_epi16 (__m512i __A, const int __imm)
+{
+ return (__m512i) __builtin_ia32_pshufhw512_mask ((__v32hi) __A,
+ __imm,
+ (__v32hi)
+ _mm512_setzero_hi (),
+ (__mmask32) -1);
+}
+
+extern __inline __m512i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_mask_shufflehi_epi16 (__m512i __W, __mmask32 __U, __m512i __A,
+ const int __imm)
+{
+ return (__m512i) __builtin_ia32_pshufhw512_mask ((__v32hi) __A,
+ __imm,
+ (__v32hi) __W,
+ (__mmask32) __U);
+}
+
+extern __inline __m512i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_maskz_shufflehi_epi16 (__mmask32 __U, __m512i __A,
+ const int __imm)
+{
+ return (__m512i) __builtin_ia32_pshufhw512_mask ((__v32hi) __A,
+ __imm,
+ (__v32hi)
+ _mm512_setzero_hi (),
+ (__mmask32) __U);
+}
+
+extern __inline __m512i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_shufflelo_epi16 (__m512i __A, const int __imm)
+{
+ return (__m512i) __builtin_ia32_pshuflw512_mask ((__v32hi) __A,
+ __imm,
+ (__v32hi)
+ _mm512_setzero_hi (),
+ (__mmask32) -1);
+}
+
+extern __inline __m512i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_mask_shufflelo_epi16 (__m512i __W, __mmask32 __U, __m512i __A,
+ const int __imm)
+{
+ return (__m512i) __builtin_ia32_pshuflw512_mask ((__v32hi) __A,
+ __imm,
+ (__v32hi) __W,
+ (__mmask32) __U);
+}
+
+extern __inline __m512i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_maskz_shufflelo_epi16 (__mmask32 __U, __m512i __A,
+ const int __imm)
+{
+ return (__m512i) __builtin_ia32_pshuflw512_mask ((__v32hi) __A,
+ __imm,
+ (__v32hi)
+ _mm512_setzero_hi (),
+ (__mmask32) __U);
+}
+
+extern __inline __m512i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_srai_epi16 (__m512i __A, const int __imm)
+{
+ return (__m512i) __builtin_ia32_psrawi512_mask ((__v32hi) __A, __imm,
+ (__v32hi)
+ _mm512_setzero_hi (),
+ (__mmask32) -1);
+}
+
+extern __inline __m512i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_mask_srai_epi16 (__m512i __W, __mmask32 __U, __m512i __A,
+ const int __imm)
+{
+ return (__m512i) __builtin_ia32_psrawi512_mask ((__v32hi) __A, __imm,
+ (__v32hi) __W,
+ (__mmask32) __U);
+}
+
+extern __inline __m512i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_maskz_srai_epi16 (__mmask32 __U, __m512i __A, const int __imm)
+{
+ return (__m512i) __builtin_ia32_psrawi512_mask ((__v32hi) __A, __imm,
+ (__v32hi)
+ _mm512_setzero_hi (),
+ (__mmask32) __U);
+}
+
+extern __inline __m512i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_mask_blend_epi16 (__mmask32 __U, __m512i __A, __m512i __W)
+{
+ return (__m512i) __builtin_ia32_blendmw_512_mask ((__v32hi) __A,
+ (__v32hi) __W,
+ (__mmask32) __U);
+}
+
+extern __inline __m512i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_mask_blend_epi8 (__mmask64 __U, __m512i __A, __m512i __W)
+{
+ return (__m512i) __builtin_ia32_blendmb_512_mask ((__v64qi) __A,
+ (__v64qi) __W,
+ (__mmask64) __U);
+}
+
+extern __inline __mmask32
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_mask_cmp_epi16_mask (__mmask32 __U, __m512i __X, __m512i __Y,
+ const int __P)
+{
+ return (__mmask32) __builtin_ia32_cmpw512_mask ((__v32hi) __X,
+ (__v32hi) __Y, __P,
+ (__mmask32) __U);
+}
+
+extern __inline __mmask32
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_cmp_epi16_mask (__m512i __X, __m512i __Y, const int __P)
+{
+ return (__mmask32) __builtin_ia32_cmpw512_mask ((__v32hi) __X,
+ (__v32hi) __Y, __P,
+ (__mmask32) -1);
+}
+
+extern __inline __mmask64
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_mask_cmp_epi8_mask (__mmask32 __U, __m512i __X, __m512i __Y,
+ const int __P)
+{
+ return (__mmask64) __builtin_ia32_cmpb512_mask ((__v64qi) __X,
+ (__v64qi) __Y, __P,
+ (__mmask64) __U);
+}
+
+extern __inline __mmask64
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_cmp_epi8_mask (__m512i __X, __m512i __Y, const int __P)
+{
+ return (__mmask64) __builtin_ia32_cmpb512_mask ((__v64qi) __X,
+ (__v64qi) __Y, __P,
+ (__mmask64) -1);
+}
+
+extern __inline __mmask32
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_mask_cmp_epu16_mask (__mmask32 __U, __m512i __X, __m512i __Y,
+ const int __P)
+{
+ return (__mmask32) __builtin_ia32_ucmpw512_mask ((__v32hi) __X,
+ (__v32hi) __Y, __P,
+ (__mmask32) __U);
+}
+
+extern __inline __mmask32
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_cmp_epu16_mask (__m512i __X, __m512i __Y, const int __P)
+{
+ return (__mmask32) __builtin_ia32_ucmpw512_mask ((__v32hi) __X,
+ (__v32hi) __Y, __P,
+ (__mmask32) -1);
+}
+
+extern __inline __mmask64
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_mask_cmp_epu8_mask (__mmask32 __U, __m512i __X, __m512i __Y,
+ const int __P)
+{
+ return (__mmask64) __builtin_ia32_ucmpb512_mask ((__v64qi) __X,
+ (__v64qi) __Y, __P,
+ (__mmask64) __U);
+}
+
+extern __inline __mmask64
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_cmp_epu8_mask (__m512i __X, __m512i __Y, const int __P)
+{
+ return (__mmask64) __builtin_ia32_ucmpb512_mask ((__v64qi) __X,
+ (__v64qi) __Y, __P,
+ (__mmask64) -1);
+}
+
+extern __inline __m512i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_packs_epi32 (__m512i __A, __m512i __B)
+{
+ return (__m512i) __builtin_ia32_packssdw512_mask ((__v16si) __A,
+ (__v16si) __B,
+ (__v32hi)
+ _mm512_setzero_hi (),
+ (__mmask32) -1);
+}
+
+extern __inline __m512i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_maskz_packs_epi32 (__mmask32 __M, __m512i __A, __m512i __B)
+{
+ return (__m512i) __builtin_ia32_packssdw512_mask ((__v16si) __A,
+ (__v16si) __B,
+ (__v32hi)
+ _mm512_setzero_si512 (),
+ __M);
+}
+
+extern __inline __m512i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_mask_packs_epi32 (__m512i __W, __mmask32 __M, __m512i __A,
+ __m512i __B)
+{
+ return (__m512i) __builtin_ia32_packssdw512_mask ((__v16si) __A,
+ (__v16si) __B,
+ (__v32hi) __W,
+ __M);
+}
+
+extern __inline __m512i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_packus_epi32 (__m512i __A, __m512i __B)
+{
+ return (__m512i) __builtin_ia32_packusdw512_mask ((__v16si) __A,
+ (__v16si) __B,
+ (__v32hi)
+ _mm512_setzero_hi (),
+ (__mmask32) -1);
+}
+
+extern __inline __m512i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_maskz_packus_epi32 (__mmask32 __M, __m512i __A, __m512i __B)
+{
+ return (__m512i) __builtin_ia32_packusdw512_mask ((__v16si) __A,
+ (__v16si) __B,
+ (__v32hi)
+ _mm512_setzero_si512 (),
+ __M);
+}
+
+extern __inline __m512i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_mask_packus_epi32 (__m512i __W, __mmask32 __M, __m512i __A,
+ __m512i __B)
+{
+ return (__m512i) __builtin_ia32_packusdw512_mask ((__v16si) __A,
+ (__v16si) __B,
+ (__v32hi) __W,
+ __M);
+}
+
+extern __inline __m512i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_bslli_epi128 (__m512i __A, const int __N)
+{
+ return (__m512i) __builtin_ia32_pslldq512 (__A, __N * 8);
+}
+
+extern __inline __m512i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_bsrli_epi128 (__m512i __A, const int __N)
+{
+ return (__m512i) __builtin_ia32_psrldq512 (__A, __N * 8);
+}
+
+#else
+#define _mm512_alignr_epi8(X, Y, N) \
+ ((__m512i) __builtin_ia32_palignr512 ((__v8di)(__m512i)(X), \
+ (__v8di)(__m512i)(Y), \
+ (int)(N * 8)))
+
+#define _mm512_mask_alignr_epi8(W, U, X, Y, N) \
+ ((__m512i) __builtin_ia32_palignr512_mask ((__v8di)(__m512i)(X), \
+ (__v8di)(__m512i)(Y), (int)(N * 8), \
+ (__v8di)(__m512i)(X), (__mmask64)(U)))
+
+#define _mm512_maskz_alignr_epi8(U, X, Y, N) \
+ ((__m512i) __builtin_ia32_palignr512_mask ((__v8di)(__m512i)(X), \
+ (__v8di)(__m512i)(Y), (int)(N * 8), \
+ (__v8di)(__m512i)_mm512_setzero_si512 (), \
+ (__mmask64)(U)))
+
+#define _mm512_dbsad_epu8(X, Y, C) \
+ ((__m512i) __builtin_ia32_dbpsadbw512_mask ((__v64qi)(__m512i) (X), \
+ (__v64qi)(__m512i) (Y), (int) (C), \
+ (__v32hi)(__m512i)_mm512_setzero_si512 (),\
+ (__mmask32)-1))
+
+#define _mm512_mask_dbsad_epu8(W, U, X, Y, C) \
+ ((__m512i) __builtin_ia32_dbpsadbw512_mask ((__v64qi)(__m512i) (X), \
+ (__v64qi)(__m512i) (Y), (int) (C), \
+ (__v32hi)(__m512i)(W), \
+ (__mmask32)(U)))
+
+#define _mm512_maskz_dbsad_epu8(U, X, Y, C) \
+ ((__m512i) __builtin_ia32_dbpsadbw512_mask ((__v64qi)(__m512i) (X), \
+ (__v64qi)(__m512i) (Y), (int) (C), \
+ (__v32hi)(__m512i)_mm512_setzero_si512 (),\
+ (__mmask32)(U)))
+
+#define _mm512_srli_epi16(A, B) \
+ ((__m512i) __builtin_ia32_psrlwi512_mask ((__v32hi)(__m512i)(A), \
+ (int)(B), (__v32hi)_mm512_setzero_hi(), (__mmask32)-1))
+
+#define _mm512_mask_srli_epi16(W, U, A, B) \
+ ((__m512i) __builtin_ia32_psrlwi512_mask ((__v32hi)(__m512i)(A), \
+ (int)(B), (__v32hi)(__m512i)(W), (__mmask32)(U)))
+
+#define _mm512_maskz_srli_epi16(U, A, B) \
+ ((__m512i) __builtin_ia32_psrlwi512_mask ((__v32hi)(__m512i)(A), \
+ (int)(B), (__v32hi)_mm512_setzero_hi(), (__mmask32)(U)))
+
+#define _mm512_slli_epi16(X, C) \
+ ((__m512i)__builtin_ia32_psllwi512_mask ((__v32hi)(__m512i)(X), (int)(C),\
+ (__v32hi)(__m512i)_mm512_setzero_hi(),\
+ (__mmask32)-1))
+
+#define _mm512_mask_slli_epi16(W, U, X, C) \
+ ((__m512i)__builtin_ia32_psllwi512_mask ((__v32hi)(__m512i)(X), (int)(C),\
+ (__v32hi)(__m512i)(W),\
+ (__mmask32)(U)))
+
+#define _mm512_maskz_slli_epi16(U, X, C) \
+ ((__m512i)__builtin_ia32_psllwi512_mask ((__v32hi)(__m512i)(X), (int)(C),\
+ (__v32hi)(__m512i)_mm512_setzero_hi(),\
+ (__mmask32)(U)))
+
+#define _mm512_shufflehi_epi16(A, B) \
+ ((__m512i) __builtin_ia32_pshufhw512_mask ((__v32hi)(__m512i)(A), (int)(B), \
+ (__v32hi)(__m512i)_mm512_setzero_hi(), \
+ (__mmask32)-1))
+
+#define _mm512_mask_shufflehi_epi16(W, U, A, B) \
+ ((__m512i) __builtin_ia32_pshufhw512_mask ((__v32hi)(__m512i)(A), (int)(B), \
+ (__v32hi)(__m512i)(W), \
+ (__mmask32)(U)))
+
+#define _mm512_maskz_shufflehi_epi16(U, A, B) \
+ ((__m512i) __builtin_ia32_pshufhw512_mask ((__v32hi)(__m512i)(A), (int)(B), \
+ (__v32hi)(__m512i)_mm512_setzero_hi(), \
+ (__mmask32)(U)))
+
+#define _mm512_shufflelo_epi16(A, B) \
+ ((__m512i) __builtin_ia32_pshuflw512_mask ((__v32hi)(__m512i)(A), (int)(B), \
+ (__v32hi)(__m512i)_mm512_setzero_hi(), \
+ (__mmask32)-1))
+
+#define _mm512_mask_shufflelo_epi16(W, U, A, B) \
+ ((__m512i) __builtin_ia32_pshuflw512_mask ((__v32hi)(__m512i)(A), (int)(B), \
+ (__v32hi)(__m512i)(W), \
+ (__mmask32)(U)))
+
+#define _mm512_maskz_shufflelo_epi16(U, A, B) \
+ ((__m512i) __builtin_ia32_pshuflw512_mask ((__v32hi)(__m512i)(A), (int)(B), \
+ (__v32hi)(__m512i)_mm512_setzero_hi(), \
+ (__mmask32)(U)))
+
+#define _mm512_srai_epi16(A, B) \
+ ((__m512i) __builtin_ia32_psrawi512_mask ((__v32hi)(__m512i)(A), \
+ (int)(B), (__v32hi)_mm512_setzero_hi(), (__mmask32)-1))
+
+#define _mm512_mask_srai_epi16(W, U, A, B) \
+ ((__m512i) __builtin_ia32_psrawi512_mask ((__v32hi)(__m512i)(A), \
+ (int)(B), (__v32hi)(__m512i)(W), (__mmask32)(U)))
+
+#define _mm512_maskz_srai_epi16(U, A, B) \
+ ((__m512i) __builtin_ia32_psrawi512_mask ((__v32hi)(__m512i)(A), \
+ (int)(B), (__v32hi)_mm512_setzero_hi(), (__mmask32)(U)))
+
+#define _mm512_mask_blend_epi16(__U, __A, __W) \
+ ((__m512i) __builtin_ia32_blendmw_512_mask ((__v32hi) (__A), \
+ (__v32hi) (__W), \
+ (__mmask32) (__U)))
+
+#define _mm512_mask_blend_epi8(__U, __A, __W) \
+ ((__m512i) __builtin_ia32_blendmb_512_mask ((__v64qi) (__A), \
+ (__v64qi) (__W), \
+ (__mmask64) (__U)))
+
+#define _mm512_cmp_epi16_mask(X, Y, P) \
+ ((__mmask32) __builtin_ia32_cmpw512_mask ((__v32hi)(__m512i)(X), \
+ (__v32hi)(__m512i)(Y), (int)(P),\
+ (__mmask32)(-1)))
+
+#define _mm512_cmp_epi8_mask(X, Y, P) \
+ ((__mmask64) __builtin_ia32_cmpb512_mask ((__v64qi)(__m512i)(X), \
+ (__v64qi)(__m512i)(Y), (int)(P),\
+ (__mmask64)(-1)))
+
+#define _mm512_cmp_epu16_mask(X, Y, P) \
+ ((__mmask32) __builtin_ia32_ucmpw512_mask ((__v32hi)(__m512i)(X), \
+ (__v32hi)(__m512i)(Y), (int)(P),\
+ (__mmask32)(-1)))
+
+#define _mm512_cmp_epu8_mask(X, Y, P) \
+ ((__mmask64) __builtin_ia32_ucmpb512_mask ((__v64qi)(__m512i)(X), \
+ (__v64qi)(__m512i)(Y), (int)(P),\
+ (__mmask64)(-1)))
+
+#define _mm512_mask_cmp_epi16_mask(M, X, Y, P) \
+ ((__mmask32) __builtin_ia32_cmpw512_mask ((__v32hi)(__m512i)(X), \
+ (__v32hi)(__m512i)(Y), (int)(P),\
+ (__mmask32)(M)))
+
+#define _mm512_mask_cmp_epi8_mask(M, X, Y, P) \
+ ((__mmask64) __builtin_ia32_cmpb512_mask ((__v64qi)(__m512i)(X), \
+ (__v64qi)(__m512i)(Y), (int)(P),\
+ (__mmask64)(M)))
+
+#define _mm512_mask_cmp_epu16_mask(M, X, Y, P) \
+ ((__mmask32) __builtin_ia32_ucmpw512_mask ((__v32hi)(__m512i)(X), \
+ (__v32hi)(__m512i)(Y), (int)(P),\
+ (__mmask32)(M)))
+
+#define _mm512_mask_cmp_epu8_mask(M, X, Y, P) \
+ ((__mmask64) __builtin_ia32_ucmpb512_mask ((__v64qi)(__m512i)(X), \
+ (__v64qi)(__m512i)(Y), (int)(P),\
+ (__mmask64)(M)))
+
+#define _mm512_bslli_epi128(A, N) \
+ ((__m512i)__builtin_ia32_pslldq512 ((__m512i)(A), (int)(N) * 8))
+
+#define _mm512_bsrli_epi128(A, N) \
+ ((__m512i)__builtin_ia32_psrldq512 ((__m512i)(A), (int)(N) * 8))
+
+#endif
+
+#ifdef __DISABLE_AVX512BW__
+#undef __DISABLE_AVX512BW__
+#pragma GCC pop_options
+#endif /* __DISABLE_AVX512BW__ */
+
+#endif /* _AVX512BWINTRIN_H_INCLUDED */
diff --git a/gcc/config/i386/avx512dqintrin.h b/gcc/config/i386/avx512dqintrin.h
new file mode 100644
index 00000000000..41e8345aacf
--- /dev/null
+++ b/gcc/config/i386/avx512dqintrin.h
@@ -0,0 +1,2308 @@
+/* Copyright (C) 2013
+ Free Software Foundation, Inc.
+
+ This file is part of GCC.
+
+ GCC is free software; you can redistribute it and/or modify
+ it under the terms of the GNU General Public License as published by
+ the Free Software Foundation; either version 3, or (at your option)
+ any later version.
+
+ GCC is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ GNU General Public License for more details.
+
+ Under Section 7 of GPL version 3, you are granted additional
+ permissions described in the GCC Runtime Library Exception, version
+ 3.1, as published by the Free Software Foundation.
+
+ You should have received a copy of the GNU General Public License and
+ a copy of the GCC Runtime Library Exception along with this program;
+ see the files COPYING3 and COPYING.RUNTIME respectively. If not, see
+ <http://www.gnu.org/licenses/>. */
+
+#ifndef _IMMINTRIN_H_INCLUDED
+#error "Never use <avx512dqintrin.h> directly; include <immintrin.h> instead."
+#endif
+
+#ifndef _AVX512DQINTRIN_H_INCLUDED
+#define _AVX512DQINTRIN_H_INCLUDED
+
+#ifndef __AVX512DQ__
+#pragma GCC push_options
+#pragma GCC target("avx512dq")
+#define __DISABLE_AVX512DQ__
+#endif /* __AVX512DQ__ */
+
+extern __inline __m512d
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_broadcast_f64x2 (__m128d __A)
+{
+ __v8df __O;
+ return (__m512d) __builtin_ia32_broadcastf64x2_512_mask ((__v2df)
+ __A, __O,
+ (__mmask8) -
+ 1);
+}
+
+extern __inline __m512d
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_mask_broadcast_f64x2 (__m512d __O, __mmask8 __M, __m128d __A)
+{
+ return (__m512d) __builtin_ia32_broadcastf64x2_512_mask ((__v2df)
+ __A,
+ (__v8df)
+ __O, __M);
+}
+
+extern __inline __m512d
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_maskz_broadcast_f64x2 (__mmask8 __M, __m128d __A)
+{
+ return (__m512d) __builtin_ia32_broadcastf64x2_512_mask ((__v2df)
+ __A,
+ (__v8df)
+ _mm512_setzero_ps (),
+ __M);
+}
+
+extern __inline __m512i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_broadcast_i64x2 (__m128i __A)
+{
+ __v8di __O;
+ return (__m512i) __builtin_ia32_broadcasti64x2_512_mask ((__v2di)
+ __A, __O,
+ (__mmask8) -
+ 1);
+}
+
+extern __inline __m512i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_mask_broadcast_i64x2 (__m512i __O, __mmask8 __M, __m128i __A)
+{
+ return (__m512i) __builtin_ia32_broadcasti64x2_512_mask ((__v2di)
+ __A,
+ (__v8di)
+ __O, __M);
+}
+
+extern __inline __m512i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_maskz_broadcast_i64x2 (__mmask8 __M, __m128i __A)
+{
+ return (__m512i) __builtin_ia32_broadcasti64x2_512_mask ((__v2di)
+ __A,
+ (__v8di)
+ _mm512_setzero_si512 (),
+ __M);
+}
+
+extern __inline __m512
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_broadcast_f32x2 (__m128 __A)
+{
+ __v16sf __O;
+ return (__m512) __builtin_ia32_broadcastf32x2_512_mask ((__v4sf) __A,
+ __O,
+ (__mmask16) -
+ 1);
+}
+
+extern __inline __m512
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_mask_broadcast_f32x2 (__m512 __O, __mmask16 __M, __m128 __A)
+{
+ return (__m512) __builtin_ia32_broadcastf32x2_512_mask ((__v4sf) __A,
+ (__v16sf)
+ __O, __M);
+}
+
+extern __inline __m512
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_maskz_broadcast_f32x2 (__mmask16 __M, __m128 __A)
+{
+ return (__m512) __builtin_ia32_broadcastf32x2_512_mask ((__v4sf) __A,
+ (__v16sf)
+ _mm512_setzero_ps (),
+ __M);
+}
+
+extern __inline __m512i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_broadcast_i32x2 (__m128i __A)
+{
+ __v16si __O;
+ return (__m512i) __builtin_ia32_broadcasti32x2_512_mask ((__v4si)
+ __A, __O,
+ (__mmask16)
+ -1);
+}
+
+extern __inline __m512i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_mask_broadcast_i32x2 (__m512i __O, __mmask16 __M, __m128i __A)
+{
+ return (__m512i) __builtin_ia32_broadcasti32x2_512_mask ((__v4si)
+ __A,
+ (__v16si)
+ __O, __M);
+}
+
+extern __inline __m512i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_maskz_broadcast_i32x2 (__mmask16 __M, __m128i __A)
+{
+ return (__m512i) __builtin_ia32_broadcasti32x2_512_mask ((__v4si)
+ __A,
+ (__v16si)
+ _mm512_setzero_si512 (),
+ __M);
+}
+
+extern __inline __m512
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_broadcast_f32x8 (__m256 __A)
+{
+ __v16sf __O;
+ return (__m512) __builtin_ia32_broadcastf32x8_512_mask ((__v8sf) __A,
+ __O,
+ (__mmask16) -
+ 1);
+}
+
+extern __inline __m512
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_mask_broadcast_f32x8 (__m512 __O, __mmask16 __M, __m256 __A)
+{
+ return (__m512) __builtin_ia32_broadcastf32x8_512_mask ((__v8sf) __A,
+ (__v16sf)
+ __O, __M);
+}
+
+extern __inline __m512
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_maskz_broadcast_f32x8 (__mmask16 __M, __m256 __A)
+{
+ return (__m512) __builtin_ia32_broadcastf32x8_512_mask ((__v8sf) __A,
+ (__v16sf)
+ _mm512_setzero_ps (),
+ __M);
+}
+
+extern __inline __m512i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_broadcast_i32x8 (__m256i __A)
+{
+ __v16si __O;
+ return (__m512i) __builtin_ia32_broadcasti32x8_512_mask ((__v8si)
+ __A, __O,
+ (__mmask16)
+ -1);
+}
+
+extern __inline __m512i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_mask_broadcast_i32x8 (__m512i __O, __mmask16 __M, __m256i __A)
+{
+ return (__m512i) __builtin_ia32_broadcasti32x8_512_mask ((__v8si)
+ __A,
+ (__v16si)
+ __O, __M);
+}
+
+extern __inline __m512i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_maskz_broadcast_i32x8 (__mmask16 __M, __m256i __A)
+{
+ return (__m512i) __builtin_ia32_broadcasti32x8_512_mask ((__v8si)
+ __A,
+ (__v16si)
+ _mm512_setzero_si512 (),
+ __M);
+}
+
+extern __inline __m512i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_mullo_epi64 (__m512i __A, __m512i __B)
+{
+ return (__m512i) __builtin_ia32_pmullq512_mask ((__v8di) __A,
+ (__v8di) __B,
+ (__v8di)
+ _mm512_setzero_si512 (),
+ (__mmask8) -1);
+}
+
+extern __inline __m512i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_mask_mullo_epi64 (__m512i __W, __mmask8 __U, __m512i __A,
+ __m512i __B)
+{
+ return (__m512i) __builtin_ia32_pmullq512_mask ((__v8di) __A,
+ (__v8di) __B,
+ (__v8di) __W,
+ (__mmask8) __U);
+}
+
+extern __inline __m512i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_maskz_mullo_epi64 (__mmask8 __U, __m512i __A, __m512i __B)
+{
+ return (__m512i) __builtin_ia32_pmullq512_mask ((__v8di) __A,
+ (__v8di) __B,
+ (__v8di)
+ _mm512_setzero_si512 (),
+ (__mmask8) __U);
+}
+
+extern __inline __m512d
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_xor_pd (__m512d __A, __m512d __B)
+{
+ return (__m512d) __builtin_ia32_xorpd512_mask ((__v8df) __A,
+ (__v8df) __B,
+ (__v8df)
+ _mm512_setzero_pd (),
+ (__mmask8) -1);
+}
+
+extern __inline __m512d
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_mask_xor_pd (__m512d __W, __mmask8 __U, __m512d __A,
+ __m512d __B)
+{
+ return (__m512d) __builtin_ia32_xorpd512_mask ((__v8df) __A,
+ (__v8df) __B,
+ (__v8df) __W,
+ (__mmask8) __U);
+}
+
+extern __inline __m512d
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_maskz_xor_pd (__mmask8 __U, __m512d __A, __m512d __B)
+{
+ return (__m512d) __builtin_ia32_xorpd512_mask ((__v8df) __A,
+ (__v8df) __B,
+ (__v8df)
+ _mm512_setzero_pd (),
+ (__mmask8) __U);
+}
+
+extern __inline __m512
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_xor_ps (__m512 __A, __m512 __B)
+{
+ return (__m512) __builtin_ia32_xorps512_mask ((__v16sf) __A,
+ (__v16sf) __B,
+ (__v16sf)
+ _mm512_setzero_ps (),
+ (__mmask16) -1);
+}
+
+extern __inline __m512
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_mask_xor_ps (__m512 __W, __mmask16 __U, __m512 __A, __m512 __B)
+{
+ return (__m512) __builtin_ia32_xorps512_mask ((__v16sf) __A,
+ (__v16sf) __B,
+ (__v16sf) __W,
+ (__mmask16) __U);
+}
+
+extern __inline __m512
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_maskz_xor_ps (__mmask16 __U, __m512 __A, __m512 __B)
+{
+ return (__m512) __builtin_ia32_xorps512_mask ((__v16sf) __A,
+ (__v16sf) __B,
+ (__v16sf)
+ _mm512_setzero_ps (),
+ (__mmask16) __U);
+}
+
+extern __inline __m512d
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_or_pd (__m512d __A, __m512d __B)
+{
+ return (__m512d) __builtin_ia32_orpd512_mask ((__v8df) __A,
+ (__v8df) __B,
+ (__v8df)
+ _mm512_setzero_pd (),
+ (__mmask8) -1);
+}
+
+extern __inline __m512d
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_mask_or_pd (__m512d __W, __mmask8 __U, __m512d __A, __m512d __B)
+{
+ return (__m512d) __builtin_ia32_orpd512_mask ((__v8df) __A,
+ (__v8df) __B,
+ (__v8df) __W,
+ (__mmask8) __U);
+}
+
+extern __inline __m512d
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_maskz_or_pd (__mmask8 __U, __m512d __A, __m512d __B)
+{
+ return (__m512d) __builtin_ia32_orpd512_mask ((__v8df) __A,
+ (__v8df) __B,
+ (__v8df)
+ _mm512_setzero_pd (),
+ (__mmask8) __U);
+}
+
+extern __inline __m512
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_or_ps (__m512 __A, __m512 __B)
+{
+ return (__m512) __builtin_ia32_orps512_mask ((__v16sf) __A,
+ (__v16sf) __B,
+ (__v16sf)
+ _mm512_setzero_ps (),
+ (__mmask16) -1);
+}
+
+extern __inline __m512
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_mask_or_ps (__m512 __W, __mmask16 __U, __m512 __A, __m512 __B)
+{
+ return (__m512) __builtin_ia32_orps512_mask ((__v16sf) __A,
+ (__v16sf) __B,
+ (__v16sf) __W,
+ (__mmask16) __U);
+}
+
+extern __inline __m512
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_maskz_or_ps (__mmask16 __U, __m512 __A, __m512 __B)
+{
+ return (__m512) __builtin_ia32_orps512_mask ((__v16sf) __A,
+ (__v16sf) __B,
+ (__v16sf)
+ _mm512_setzero_ps (),
+ (__mmask16) __U);
+}
+
+extern __inline __m512d
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_and_pd (__m512d __A, __m512d __B)
+{
+ return (__m512d) __builtin_ia32_andpd512_mask ((__v8df) __A,
+ (__v8df) __B,
+ (__v8df)
+ _mm512_setzero_pd (),
+ (__mmask8) -1);
+}
+
+extern __inline __m512d
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_mask_and_pd (__m512d __W, __mmask8 __U, __m512d __A,
+ __m512d __B)
+{
+ return (__m512d) __builtin_ia32_andpd512_mask ((__v8df) __A,
+ (__v8df) __B,
+ (__v8df) __W,
+ (__mmask8) __U);
+}
+
+extern __inline __m512d
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_maskz_and_pd (__mmask8 __U, __m512d __A, __m512d __B)
+{
+ return (__m512d) __builtin_ia32_andpd512_mask ((__v8df) __A,
+ (__v8df) __B,
+ (__v8df)
+ _mm512_setzero_pd (),
+ (__mmask8) __U);
+}
+
+extern __inline __m512
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_and_ps (__m512 __A, __m512 __B)
+{
+ return (__m512) __builtin_ia32_andps512_mask ((__v16sf) __A,
+ (__v16sf) __B,
+ (__v16sf)
+ _mm512_setzero_ps (),
+ (__mmask16) -1);
+}
+
+extern __inline __m512
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_mask_and_ps (__m512 __W, __mmask16 __U, __m512 __A, __m512 __B)
+{
+ return (__m512) __builtin_ia32_andps512_mask ((__v16sf) __A,
+ (__v16sf) __B,
+ (__v16sf) __W,
+ (__mmask16) __U);
+}
+
+extern __inline __m512
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_maskz_and_ps (__mmask16 __U, __m512 __A, __m512 __B)
+{
+ return (__m512) __builtin_ia32_andps512_mask ((__v16sf) __A,
+ (__v16sf) __B,
+ (__v16sf)
+ _mm512_setzero_ps (),
+ (__mmask16) __U);
+}
+
+extern __inline __m512d
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_andnot_pd (__m512d __A, __m512d __B)
+{
+ return (__m512d) __builtin_ia32_andnpd512_mask ((__v8df) __A,
+ (__v8df) __B,
+ (__v8df)
+ _mm512_setzero_pd (),
+ (__mmask8) -1);
+}
+
+extern __inline __m512d
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_mask_andnot_pd (__m512d __W, __mmask8 __U, __m512d __A,
+ __m512d __B)
+{
+ return (__m512d) __builtin_ia32_andnpd512_mask ((__v8df) __A,
+ (__v8df) __B,
+ (__v8df) __W,
+ (__mmask8) __U);
+}
+
+extern __inline __m512d
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_maskz_andnot_pd (__mmask8 __U, __m512d __A, __m512d __B)
+{
+ return (__m512d) __builtin_ia32_andnpd512_mask ((__v8df) __A,
+ (__v8df) __B,
+ (__v8df)
+ _mm512_setzero_pd (),
+ (__mmask8) __U);
+}
+
+extern __inline __m512
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_andnot_ps (__m512 __A, __m512 __B)
+{
+ return (__m512) __builtin_ia32_andnps512_mask ((__v16sf) __A,
+ (__v16sf) __B,
+ (__v16sf)
+ _mm512_setzero_ps (),
+ (__mmask16) -1);
+}
+
+extern __inline __m512
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_mask_andnot_ps (__m512 __W, __mmask16 __U, __m512 __A,
+ __m512 __B)
+{
+ return (__m512) __builtin_ia32_andnps512_mask ((__v16sf) __A,
+ (__v16sf) __B,
+ (__v16sf) __W,
+ (__mmask16) __U);
+}
+
+extern __inline __m512
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_maskz_andnot_ps (__mmask16 __U, __m512 __A, __m512 __B)
+{
+ return (__m512) __builtin_ia32_andnps512_mask ((__v16sf) __A,
+ (__v16sf) __B,
+ (__v16sf)
+ _mm512_setzero_ps (),
+ (__mmask16) __U);
+}
+
+extern __inline __mmask16
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_movepi32_mask (__m512i __A)
+{
+ return (__mmask16) __builtin_ia32_cvtd2mask512 ((__v16si) __A);
+}
+
+extern __inline __mmask8
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_movepi64_mask (__m512i __A)
+{
+ return (__mmask8) __builtin_ia32_cvtq2mask512 ((__v8di) __A);
+}
+
+extern __inline __m512i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_movm_epi32 (__mmask16 __A)
+{
+ return (__m512i) __builtin_ia32_cvtmask2d512 (__A);
+}
+
+extern __inline __m512i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_movm_epi64 (__mmask8 __A)
+{
+ return (__m512i) __builtin_ia32_cvtmask2q512 (__A);
+}
+
+extern __inline __m512i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_cvttpd_epi64 (__m512d __A)
+{
+ return (__m512i) __builtin_ia32_cvttpd2qq512_mask ((__v8df) __A,
+ (__v8di)
+ _mm512_setzero_si512 (),
+ (__mmask8) -1,
+ _MM_FROUND_CUR_DIRECTION);
+}
+
+extern __inline __m512i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_mask_cvttpd_epi64 (__m512i __W, __mmask8 __U, __m512d __A)
+{
+ return (__m512i) __builtin_ia32_cvttpd2qq512_mask ((__v8df) __A,
+ (__v8di) __W,
+ (__mmask8) __U,
+ _MM_FROUND_CUR_DIRECTION);
+}
+
+extern __inline __m512i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_maskz_cvttpd_epi64 (__mmask8 __U, __m512d __A)
+{
+ return (__m512i) __builtin_ia32_cvttpd2qq512_mask ((__v8df) __A,
+ (__v8di)
+ _mm512_setzero_si512 (),
+ (__mmask8) __U,
+ _MM_FROUND_CUR_DIRECTION);
+}
+
+extern __inline __m512i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_cvttpd_epu64 (__m512d __A)
+{
+ return (__m512i) __builtin_ia32_cvttpd2uqq512_mask ((__v8df) __A,
+ (__v8di)
+ _mm512_setzero_si512 (),
+ (__mmask8) -1,
+ _MM_FROUND_CUR_DIRECTION);
+}
+
+extern __inline __m512i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_mask_cvttpd_epu64 (__m512i __W, __mmask8 __U, __m512d __A)
+{
+ return (__m512i) __builtin_ia32_cvttpd2uqq512_mask ((__v8df) __A,
+ (__v8di) __W,
+ (__mmask8) __U,
+ _MM_FROUND_CUR_DIRECTION);
+}
+
+extern __inline __m512i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_maskz_cvttpd_epu64 (__mmask8 __U, __m512d __A)
+{
+ return (__m512i) __builtin_ia32_cvttpd2uqq512_mask ((__v8df) __A,
+ (__v8di)
+ _mm512_setzero_si512 (),
+ (__mmask8) __U,
+ _MM_FROUND_CUR_DIRECTION);
+}
+
+extern __inline __m512i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_cvttps_epi64 (__m256 __A)
+{
+ return (__m512i) __builtin_ia32_cvttps2qq512_mask ((__v8sf) __A,
+ (__v8di)
+ _mm512_setzero_si512 (),
+ (__mmask8) -1,
+ _MM_FROUND_CUR_DIRECTION);
+}
+
+extern __inline __m512i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_mask_cvttps_epi64 (__m512i __W, __mmask8 __U, __m256 __A)
+{
+ return (__m512i) __builtin_ia32_cvttps2qq512_mask ((__v8sf) __A,
+ (__v8di) __W,
+ (__mmask8) __U,
+ _MM_FROUND_CUR_DIRECTION);
+}
+
+extern __inline __m512i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_maskz_cvttps_epi64 (__mmask8 __U, __m256 __A)
+{
+ return (__m512i) __builtin_ia32_cvttps2qq512_mask ((__v8sf) __A,
+ (__v8di)
+ _mm512_setzero_si512 (),
+ (__mmask8) __U,
+ _MM_FROUND_CUR_DIRECTION);
+}
+
+extern __inline __m512i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_cvttps_epu64 (__m256 __A)
+{
+ return (__m512i) __builtin_ia32_cvttps2uqq512_mask ((__v8sf) __A,
+ (__v8di)
+ _mm512_setzero_si512 (),
+ (__mmask8) -1,
+ _MM_FROUND_CUR_DIRECTION);
+}
+
+extern __inline __m512i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_mask_cvttps_epu64 (__m512i __W, __mmask8 __U, __m256 __A)
+{
+ return (__m512i) __builtin_ia32_cvttps2uqq512_mask ((__v8sf) __A,
+ (__v8di) __W,
+ (__mmask8) __U,
+ _MM_FROUND_CUR_DIRECTION);
+}
+
+extern __inline __m512i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_maskz_cvttps_epu64 (__mmask8 __U, __m256 __A)
+{
+ return (__m512i) __builtin_ia32_cvttps2uqq512_mask ((__v8sf) __A,
+ (__v8di)
+ _mm512_setzero_si512 (),
+ (__mmask8) __U,
+ _MM_FROUND_CUR_DIRECTION);
+}
+
+extern __inline __m512i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_cvtpd_epi64 (__m512d __A)
+{
+ return (__m512i) __builtin_ia32_cvtpd2qq512_mask ((__v8df) __A,
+ (__v8di)
+ _mm512_setzero_si512 (),
+ (__mmask8) -1,
+ _MM_FROUND_CUR_DIRECTION);
+}
+
+extern __inline __m512i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_mask_cvtpd_epi64 (__m512i __W, __mmask8 __U, __m512d __A)
+{
+ return (__m512i) __builtin_ia32_cvtpd2qq512_mask ((__v8df) __A,
+ (__v8di) __W,
+ (__mmask8) __U,
+ _MM_FROUND_CUR_DIRECTION);
+}
+
+extern __inline __m512i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_maskz_cvtpd_epi64 (__mmask8 __U, __m512d __A)
+{
+ return (__m512i) __builtin_ia32_cvtpd2qq512_mask ((__v8df) __A,
+ (__v8di)
+ _mm512_setzero_si512 (),
+ (__mmask8) __U,
+ _MM_FROUND_CUR_DIRECTION);
+}
+
+extern __inline __m512i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_cvtpd_epu64 (__m512d __A)
+{
+ return (__m512i) __builtin_ia32_cvtpd2uqq512_mask ((__v8df) __A,
+ (__v8di)
+ _mm512_setzero_si512 (),
+ (__mmask8) -1,
+ _MM_FROUND_CUR_DIRECTION);
+}
+
+extern __inline __m512i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_mask_cvtpd_epu64 (__m512i __W, __mmask8 __U, __m512d __A)
+{
+ return (__m512i) __builtin_ia32_cvtpd2uqq512_mask ((__v8df) __A,
+ (__v8di) __W,
+ (__mmask8) __U,
+ _MM_FROUND_CUR_DIRECTION);
+}
+
+extern __inline __m512i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_maskz_cvtpd_epu64 (__mmask8 __U, __m512d __A)
+{
+ return (__m512i) __builtin_ia32_cvtpd2uqq512_mask ((__v8df) __A,
+ (__v8di)
+ _mm512_setzero_si512 (),
+ (__mmask8) __U,
+ _MM_FROUND_CUR_DIRECTION);
+}
+
+extern __inline __m512i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_cvtps_epi64 (__m256 __A)
+{
+ return (__m512i) __builtin_ia32_cvtps2qq512_mask ((__v8sf) __A,
+ (__v8di)
+ _mm512_setzero_si512 (),
+ (__mmask8) -1,
+ _MM_FROUND_CUR_DIRECTION);
+}
+
+extern __inline __m512i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_mask_cvtps_epi64 (__m512i __W, __mmask8 __U, __m256 __A)
+{
+ return (__m512i) __builtin_ia32_cvtps2qq512_mask ((__v8sf) __A,
+ (__v8di) __W,
+ (__mmask8) __U,
+ _MM_FROUND_CUR_DIRECTION);
+}
+
+extern __inline __m512i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_maskz_cvtps_epi64 (__mmask8 __U, __m256 __A)
+{
+ return (__m512i) __builtin_ia32_cvtps2qq512_mask ((__v8sf) __A,
+ (__v8di)
+ _mm512_setzero_si512 (),
+ (__mmask8) __U,
+ _MM_FROUND_CUR_DIRECTION);
+}
+
+extern __inline __m512i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_cvtps_epu64 (__m256 __A)
+{
+ return (__m512i) __builtin_ia32_cvtps2uqq512_mask ((__v8sf) __A,
+ (__v8di)
+ _mm512_setzero_si512 (),
+ (__mmask8) -1,
+ _MM_FROUND_CUR_DIRECTION);
+}
+
+extern __inline __m512i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_mask_cvtps_epu64 (__m512i __W, __mmask8 __U, __m256 __A)
+{
+ return (__m512i) __builtin_ia32_cvtps2uqq512_mask ((__v8sf) __A,
+ (__v8di) __W,
+ (__mmask8) __U,
+ _MM_FROUND_CUR_DIRECTION);
+}
+
+extern __inline __m512i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_maskz_cvtps_epu64 (__mmask8 __U, __m256 __A)
+{
+ return (__m512i) __builtin_ia32_cvtps2uqq512_mask ((__v8sf) __A,
+ (__v8di)
+ _mm512_setzero_si512 (),
+ (__mmask8) __U,
+ _MM_FROUND_CUR_DIRECTION);
+}
+
+extern __inline __m256
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_cvtepi64_ps (__m512i __A)
+{
+ return (__m256) __builtin_ia32_cvtqq2ps512_mask ((__v8di) __A,
+ (__v8sf)
+ _mm256_setzero_ps (),
+ (__mmask8) -1,
+ _MM_FROUND_CUR_DIRECTION);
+}
+
+extern __inline __m256
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_mask_cvtepi64_ps (__m256 __W, __mmask8 __U, __m512i __A)
+{
+ return (__m256) __builtin_ia32_cvtqq2ps512_mask ((__v8di) __A,
+ (__v8sf) __W,
+ (__mmask8) __U,
+ _MM_FROUND_CUR_DIRECTION);
+}
+
+extern __inline __m256
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_maskz_cvtepi64_ps (__mmask8 __U, __m512i __A)
+{
+ return (__m256) __builtin_ia32_cvtqq2ps512_mask ((__v8di) __A,
+ (__v8sf)
+ _mm256_setzero_ps (),
+ (__mmask8) __U,
+ _MM_FROUND_CUR_DIRECTION);
+}
+
+extern __inline __m256
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_cvtepu64_ps (__m512i __A)
+{
+ return (__m256) __builtin_ia32_cvtuqq2ps512_mask ((__v8di) __A,
+ (__v8sf)
+ _mm256_setzero_ps (),
+ (__mmask8) -1,
+ _MM_FROUND_CUR_DIRECTION);
+}
+
+extern __inline __m256
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_mask_cvtepu64_ps (__m256 __W, __mmask8 __U, __m512i __A)
+{
+ return (__m256) __builtin_ia32_cvtuqq2ps512_mask ((__v8di) __A,
+ (__v8sf) __W,
+ (__mmask8) __U,
+ _MM_FROUND_CUR_DIRECTION);
+}
+
+extern __inline __m256
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_maskz_cvtepu64_ps (__mmask8 __U, __m512i __A)
+{
+ return (__m256) __builtin_ia32_cvtuqq2ps512_mask ((__v8di) __A,
+ (__v8sf)
+ _mm256_setzero_ps (),
+ (__mmask8) __U,
+ _MM_FROUND_CUR_DIRECTION);
+}
+
+extern __inline __m512d
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_cvtepi64_pd (__m512i __A)
+{
+ return (__m512d) __builtin_ia32_cvtqq2pd512_mask ((__v8di) __A,
+ (__v8df)
+ _mm512_setzero_pd (),
+ (__mmask8) -1,
+ _MM_FROUND_CUR_DIRECTION);
+}
+
+extern __inline __m512d
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_mask_cvtepi64_pd (__m512d __W, __mmask8 __U, __m512i __A)
+{
+ return (__m512d) __builtin_ia32_cvtqq2pd512_mask ((__v8di) __A,
+ (__v8df) __W,
+ (__mmask8) __U,
+ _MM_FROUND_CUR_DIRECTION);
+}
+
+extern __inline __m512d
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_maskz_cvtepi64_pd (__mmask8 __U, __m512i __A)
+{
+ return (__m512d) __builtin_ia32_cvtqq2pd512_mask ((__v8di) __A,
+ (__v8df)
+ _mm512_setzero_pd (),
+ (__mmask8) __U,
+ _MM_FROUND_CUR_DIRECTION);
+}
+
+extern __inline __m512d
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_cvtepu64_pd (__m512i __A)
+{
+ return (__m512d) __builtin_ia32_cvtuqq2pd512_mask ((__v8di) __A,
+ (__v8df)
+ _mm512_setzero_pd (),
+ (__mmask8) -1,
+ _MM_FROUND_CUR_DIRECTION);
+}
+
+extern __inline __m512d
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_mask_cvtepu64_pd (__m512d __W, __mmask8 __U, __m512i __A)
+{
+ return (__m512d) __builtin_ia32_cvtuqq2pd512_mask ((__v8di) __A,
+ (__v8df) __W,
+ (__mmask8) __U,
+ _MM_FROUND_CUR_DIRECTION);
+}
+
+extern __inline __m512d
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_maskz_cvtepu64_pd (__mmask8 __U, __m512i __A)
+{
+ return (__m512d) __builtin_ia32_cvtuqq2pd512_mask ((__v8di) __A,
+ (__v8df)
+ _mm512_setzero_pd (),
+ (__mmask8) __U,
+ _MM_FROUND_CUR_DIRECTION);
+}
+
+#ifdef __OPTIMIZE__
+extern __inline __m512d
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_range_pd (__m512d __A, __m512d __B, int __C)
+{
+ return (__m512d) __builtin_ia32_rangepd512_mask ((__v8df) __A,
+ (__v8df) __B, __C,
+ (__v8df)
+ _mm512_setzero_pd (),
+ (__mmask8) -1,
+ _MM_FROUND_CUR_DIRECTION);
+}
+
+extern __inline __m512d
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_mask_range_pd (__m512d __W, __mmask8 __U,
+ __m512d __A, __m512d __B, int __C)
+{
+ return (__m512d) __builtin_ia32_rangepd512_mask ((__v8df) __A,
+ (__v8df) __B, __C,
+ (__v8df) __W,
+ (__mmask8) __U,
+ _MM_FROUND_CUR_DIRECTION);
+}
+
+extern __inline __m512d
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_maskz_range_pd (__mmask8 __U, __m512d __A, __m512d __B, int __C)
+{
+ return (__m512d) __builtin_ia32_rangepd512_mask ((__v8df) __A,
+ (__v8df) __B, __C,
+ (__v8df)
+ _mm512_setzero_pd (),
+ (__mmask8) __U,
+ _MM_FROUND_CUR_DIRECTION);
+}
+
+extern __inline __m512
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_range_ps (__m512 __A, __m512 __B, int __C)
+{
+ return (__m512) __builtin_ia32_rangeps512_mask ((__v16sf) __A,
+ (__v16sf) __B, __C,
+ (__v16sf)
+ _mm512_setzero_ps (),
+ (__mmask16) -1,
+ _MM_FROUND_CUR_DIRECTION);
+}
+
+extern __inline __m512
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_mask_range_ps (__m512 __W, __mmask16 __U,
+ __m512 __A, __m512 __B, int __C)
+{
+ return (__m512) __builtin_ia32_rangeps512_mask ((__v16sf) __A,
+ (__v16sf) __B, __C,
+ (__v16sf) __W,
+ (__mmask16) __U,
+ _MM_FROUND_CUR_DIRECTION);
+}
+
+extern __inline __m512
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_maskz_range_ps (__mmask16 __U, __m512 __A, __m512 __B, int __C)
+{
+ return (__m512) __builtin_ia32_rangeps512_mask ((__v16sf) __A,
+ (__v16sf) __B, __C,
+ (__v16sf)
+ _mm512_setzero_ps (),
+ (__mmask16) __U,
+ _MM_FROUND_CUR_DIRECTION);
+}
+
+extern __inline __m128d
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_reduce_sd (__m128d __A, __m128d __B, int __C)
+{
+ return (__m128d) __builtin_ia32_reducesd ((__v2df) __A,
+ (__v2df) __B, __C);
+}
+
+extern __inline __m128
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_reduce_ss (__m128 __A, __m128 __B, int __C)
+{
+ return (__m128) __builtin_ia32_reducess ((__v4sf) __A,
+ (__v4sf) __B, __C);
+}
+
+extern __inline __m128d
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_range_sd (__m128d __A, __m128d __B, int __C)
+{
+ return (__m128d) __builtin_ia32_rangesd128_round ((__v2df) __A,
+ (__v2df) __B, __C,
+ _MM_FROUND_CUR_DIRECTION);
+}
+
+
+extern __inline __m128
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_range_ss (__m128 __A, __m128 __B, int __C)
+{
+ return (__m128) __builtin_ia32_rangess128_round ((__v4sf) __A,
+ (__v4sf) __B, __C,
+ _MM_FROUND_CUR_DIRECTION);
+}
+
+extern __inline __m128d
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_range_round_sd (__m128d __A, __m128d __B, int __C, const int __R)
+{
+ return (__m128d) __builtin_ia32_rangesd128_round ((__v2df) __A,
+ (__v2df) __B, __C,
+ __R);
+}
+
+extern __inline __m128
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_range_round_ss (__m128 __A, __m128 __B, int __C, const int __R)
+{
+ return (__m128) __builtin_ia32_rangess128_round ((__v4sf) __A,
+ (__v4sf) __B, __C,
+ __R);
+}
+
+extern __inline __mmask8
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_fpclass_ss_mask (__m128 __A, const int __imm)
+{
+ return (__mmask8) __builtin_ia32_fpclassss ((__v4sf) __A, __imm);
+}
+
+extern __inline __mmask8
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_fpclass_sd_mask (__m128d __A, const int __imm)
+{
+ return (__mmask8) __builtin_ia32_fpclasssd ((__v2df) __A, __imm);
+}
+
+extern __inline __m512i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_cvtt_roundpd_epi64 (__m512d __A, const int __R)
+{
+ return (__m512i) __builtin_ia32_cvttpd2qq512_mask ((__v8df) __A,
+ (__v8di)
+ _mm512_setzero_si512 (),
+ (__mmask8) -1,
+ __R);
+}
+
+extern __inline __m512i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_mask_cvtt_roundpd_epi64 (__m512i __W, __mmask8 __U, __m512d __A,
+ const int __R)
+{
+ return (__m512i) __builtin_ia32_cvttpd2qq512_mask ((__v8df) __A,
+ (__v8di) __W,
+ (__mmask8) __U,
+ __R);
+}
+
+extern __inline __m512i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_maskz_cvtt_roundpd_epi64 (__mmask8 __U, __m512d __A,
+ const int __R)
+{
+ return (__m512i) __builtin_ia32_cvttpd2qq512_mask ((__v8df) __A,
+ (__v8di)
+ _mm512_setzero_si512 (),
+ (__mmask8) __U,
+ __R);
+}
+
+extern __inline __m512i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_cvtt_roundpd_epu64 (__m512d __A, const int __R)
+{
+ return (__m512i) __builtin_ia32_cvttpd2uqq512_mask ((__v8df) __A,
+ (__v8di)
+ _mm512_setzero_si512 (),
+ (__mmask8) -1,
+ __R);
+}
+
+extern __inline __m512i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_mask_cvtt_roundpd_epu64 (__m512i __W, __mmask8 __U, __m512d __A,
+ const int __R)
+{
+ return (__m512i) __builtin_ia32_cvttpd2uqq512_mask ((__v8df) __A,
+ (__v8di) __W,
+ (__mmask8) __U,
+ __R);
+}
+
+extern __inline __m512i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_maskz_cvtt_roundpd_epu64 (__mmask8 __U, __m512d __A,
+ const int __R)
+{
+ return (__m512i) __builtin_ia32_cvttpd2uqq512_mask ((__v8df) __A,
+ (__v8di)
+ _mm512_setzero_si512 (),
+ (__mmask8) __U,
+ __R);
+}
+
+extern __inline __m512i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_cvtt_roundps_epi64 (__m256 __A, const int __R)
+{
+ return (__m512i) __builtin_ia32_cvttps2qq512_mask ((__v8sf) __A,
+ (__v8di)
+ _mm512_setzero_si512 (),
+ (__mmask8) -1,
+ __R);
+}
+
+extern __inline __m512i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_mask_cvtt_roundps_epi64 (__m512i __W, __mmask8 __U, __m256 __A,
+ const int __R)
+{
+ return (__m512i) __builtin_ia32_cvttps2qq512_mask ((__v8sf) __A,
+ (__v8di) __W,
+ (__mmask8) __U,
+ __R);
+}
+
+extern __inline __m512i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_maskz_cvtt_roundps_epi64 (__mmask8 __U, __m256 __A,
+ const int __R)
+{
+ return (__m512i) __builtin_ia32_cvttps2qq512_mask ((__v8sf) __A,
+ (__v8di)
+ _mm512_setzero_si512 (),
+ (__mmask8) __U,
+ __R);
+}
+
+extern __inline __m512i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_cvtt_roundps_epu64 (__m256 __A, const int __R)
+{
+ return (__m512i) __builtin_ia32_cvttps2uqq512_mask ((__v8sf) __A,
+ (__v8di)
+ _mm512_setzero_si512 (),
+ (__mmask8) -1,
+ __R);
+}
+
+extern __inline __m512i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_mask_cvtt_roundps_epu64 (__m512i __W, __mmask8 __U, __m256 __A,
+ const int __R)
+{
+ return (__m512i) __builtin_ia32_cvttps2uqq512_mask ((__v8sf) __A,
+ (__v8di) __W,
+ (__mmask8) __U,
+ __R);
+}
+
+extern __inline __m512i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_maskz_cvtt_roundps_epu64 (__mmask8 __U, __m256 __A,
+ const int __R)
+{
+ return (__m512i) __builtin_ia32_cvttps2uqq512_mask ((__v8sf) __A,
+ (__v8di)
+ _mm512_setzero_si512 (),
+ (__mmask8) __U,
+ __R);
+}
+
+extern __inline __m512i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_cvt_roundpd_epi64 (__m512d __A, const int __R)
+{
+ return (__m512i) __builtin_ia32_cvtpd2qq512_mask ((__v8df) __A,
+ (__v8di)
+ _mm512_setzero_si512 (),
+ (__mmask8) -1,
+ __R);
+}
+
+extern __inline __m512i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_mask_cvt_roundpd_epi64 (__m512i __W, __mmask8 __U, __m512d __A,
+ const int __R)
+{
+ return (__m512i) __builtin_ia32_cvtpd2qq512_mask ((__v8df) __A,
+ (__v8di) __W,
+ (__mmask8) __U,
+ __R);
+}
+
+extern __inline __m512i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_maskz_cvt_roundpd_epi64 (__mmask8 __U, __m512d __A,
+ const int __R)
+{
+ return (__m512i) __builtin_ia32_cvtpd2qq512_mask ((__v8df) __A,
+ (__v8di)
+ _mm512_setzero_si512 (),
+ (__mmask8) __U,
+ __R);
+}
+
+extern __inline __m512i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_cvt_roundpd_epu64 (__m512d __A, const int __R)
+{
+ return (__m512i) __builtin_ia32_cvtpd2uqq512_mask ((__v8df) __A,
+ (__v8di)
+ _mm512_setzero_si512 (),
+ (__mmask8) -1,
+ __R);
+}
+
+extern __inline __m512i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_mask_cvt_roundpd_epu64 (__m512i __W, __mmask8 __U, __m512d __A,
+ const int __R)
+{
+ return (__m512i) __builtin_ia32_cvtpd2uqq512_mask ((__v8df) __A,
+ (__v8di) __W,
+ (__mmask8) __U,
+ __R);
+}
+
+extern __inline __m512i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_maskz_cvt_roundpd_epu64 (__mmask8 __U, __m512d __A,
+ const int __R)
+{
+ return (__m512i) __builtin_ia32_cvtpd2uqq512_mask ((__v8df) __A,
+ (__v8di)
+ _mm512_setzero_si512 (),
+ (__mmask8) __U,
+ __R);
+}
+
+extern __inline __m512i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_cvt_roundps_epi64 (__m256 __A, const int __R)
+{
+ return (__m512i) __builtin_ia32_cvtps2qq512_mask ((__v8sf) __A,
+ (__v8di)
+ _mm512_setzero_si512 (),
+ (__mmask8) -1,
+ __R);
+}
+
+extern __inline __m512i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_mask_cvt_roundps_epi64 (__m512i __W, __mmask8 __U, __m256 __A,
+ const int __R)
+{
+ return (__m512i) __builtin_ia32_cvtps2qq512_mask ((__v8sf) __A,
+ (__v8di) __W,
+ (__mmask8) __U,
+ __R);
+}
+
+extern __inline __m512i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_maskz_cvt_roundps_epi64 (__mmask8 __U, __m256 __A,
+ const int __R)
+{
+ return (__m512i) __builtin_ia32_cvtps2qq512_mask ((__v8sf) __A,
+ (__v8di)
+ _mm512_setzero_si512 (),
+ (__mmask8) __U,
+ __R);
+}
+
+extern __inline __m512i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_cvt_roundps_epu64 (__m256 __A, const int __R)
+{
+ return (__m512i) __builtin_ia32_cvtps2uqq512_mask ((__v8sf) __A,
+ (__v8di)
+ _mm512_setzero_si512 (),
+ (__mmask8) -1,
+ __R);
+}
+
+extern __inline __m512i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_mask_cvt_roundps_epu64 (__m512i __W, __mmask8 __U, __m256 __A,
+ const int __R)
+{
+ return (__m512i) __builtin_ia32_cvtps2uqq512_mask ((__v8sf) __A,
+ (__v8di) __W,
+ (__mmask8) __U,
+ __R);
+}
+
+extern __inline __m512i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_maskz_cvt_roundps_epu64 (__mmask8 __U, __m256 __A,
+ const int __R)
+{
+ return (__m512i) __builtin_ia32_cvtps2uqq512_mask ((__v8sf) __A,
+ (__v8di)
+ _mm512_setzero_si512 (),
+ (__mmask8) __U,
+ __R);
+}
+
+extern __inline __m256
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_cvt_roundepi64_ps (__m512i __A, const int __R)
+{
+ return (__m256) __builtin_ia32_cvtqq2ps512_mask ((__v8di) __A,
+ (__v8sf)
+ _mm256_setzero_ps (),
+ (__mmask8) -1,
+ __R);
+}
+
+extern __inline __m256
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_mask_cvt_roundepi64_ps (__m256 __W, __mmask8 __U, __m512i __A,
+ const int __R)
+{
+ return (__m256) __builtin_ia32_cvtqq2ps512_mask ((__v8di) __A,
+ (__v8sf) __W,
+ (__mmask8) __U,
+ __R);
+}
+
+extern __inline __m256
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_maskz_cvt_roundepi64_ps (__mmask8 __U, __m512i __A,
+ const int __R)
+{
+ return (__m256) __builtin_ia32_cvtqq2ps512_mask ((__v8di) __A,
+ (__v8sf)
+ _mm256_setzero_ps (),
+ (__mmask8) __U,
+ __R);
+}
+
+extern __inline __m256
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_cvt_roundepu64_ps (__m512i __A, const int __R)
+{
+ return (__m256) __builtin_ia32_cvtuqq2ps512_mask ((__v8di) __A,
+ (__v8sf)
+ _mm256_setzero_ps (),
+ (__mmask8) -1,
+ __R);
+}
+
+extern __inline __m256
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_mask_cvt_roundepu64_ps (__m256 __W, __mmask8 __U, __m512i __A,
+ const int __R)
+{
+ return (__m256) __builtin_ia32_cvtuqq2ps512_mask ((__v8di) __A,
+ (__v8sf) __W,
+ (__mmask8) __U,
+ __R);
+}
+
+extern __inline __m256
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_maskz_cvt_roundepu64_ps (__mmask8 __U, __m512i __A,
+ const int __R)
+{
+ return (__m256) __builtin_ia32_cvtuqq2ps512_mask ((__v8di) __A,
+ (__v8sf)
+ _mm256_setzero_ps (),
+ (__mmask8) __U,
+ __R);
+}
+
+extern __inline __m512d
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_cvt_roundepi64_pd (__m512i __A, const int __R)
+{
+ return (__m512d) __builtin_ia32_cvtqq2pd512_mask ((__v8di) __A,
+ (__v8df)
+ _mm512_setzero_pd (),
+ (__mmask8) -1,
+ __R);
+}
+
+extern __inline __m512d
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_mask_cvt_roundepi64_pd (__m512d __W, __mmask8 __U, __m512i __A,
+ const int __R)
+{
+ return (__m512d) __builtin_ia32_cvtqq2pd512_mask ((__v8di) __A,
+ (__v8df) __W,
+ (__mmask8) __U,
+ __R);
+}
+
+extern __inline __m512d
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_maskz_cvt_roundepi64_pd (__mmask8 __U, __m512i __A,
+ const int __R)
+{
+ return (__m512d) __builtin_ia32_cvtqq2pd512_mask ((__v8di) __A,
+ (__v8df)
+ _mm512_setzero_pd (),
+ (__mmask8) __U,
+ __R);
+}
+
+extern __inline __m512d
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_cvt_roundepu64_pd (__m512i __A, const int __R)
+{
+ return (__m512d) __builtin_ia32_cvtuqq2pd512_mask ((__v8di) __A,
+ (__v8df)
+ _mm512_setzero_pd (),
+ (__mmask8) -1,
+ __R);
+}
+
+extern __inline __m512d
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_mask_cvt_roundepu64_pd (__m512d __W, __mmask8 __U, __m512i __A,
+ const int __R)
+{
+ return (__m512d) __builtin_ia32_cvtuqq2pd512_mask ((__v8di) __A,
+ (__v8df) __W,
+ (__mmask8) __U,
+ __R);
+}
+
+extern __inline __m512d
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_maskz_cvt_roundepu64_pd (__mmask8 __U, __m512i __A,
+ const int __R)
+{
+ return (__m512d) __builtin_ia32_cvtuqq2pd512_mask ((__v8di) __A,
+ (__v8df)
+ _mm512_setzero_pd (),
+ (__mmask8) __U,
+ __R);
+}
+
+extern __inline __m512d
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_reduce_pd (__m512d __A, int __B)
+{
+ return (__m512d) __builtin_ia32_reducepd512_mask ((__v8df) __A, __B,
+ (__v8df)
+ _mm512_setzero_pd (),
+ (__mmask8) -1);
+}
+
+extern __inline __m512d
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_mask_reduce_pd (__m512d __W, __mmask8 __U, __m512d __A, int __B)
+{
+ return (__m512d) __builtin_ia32_reducepd512_mask ((__v8df) __A, __B,
+ (__v8df) __W,
+ (__mmask8) __U);
+}
+
+extern __inline __m512d
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_maskz_reduce_pd (__mmask8 __U, __m512d __A, int __B)
+{
+ return (__m512d) __builtin_ia32_reducepd512_mask ((__v8df) __A, __B,
+ (__v8df)
+ _mm512_setzero_pd (),
+ (__mmask8) __U);
+}
+
+extern __inline __m512
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_reduce_ps (__m512 __A, int __B)
+{
+ return (__m512) __builtin_ia32_reduceps512_mask ((__v16sf) __A, __B,
+ (__v16sf)
+ _mm512_setzero_ps (),
+ (__mmask16) -1);
+}
+
+extern __inline __m512
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_mask_reduce_ps (__m512 __W, __mmask16 __U, __m512 __A, int __B)
+{
+ return (__m512) __builtin_ia32_reduceps512_mask ((__v16sf) __A, __B,
+ (__v16sf) __W,
+ (__mmask16) __U);
+}
+
+extern __inline __m512
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_maskz_reduce_ps (__mmask16 __U, __m512 __A, int __B)
+{
+ return (__m512) __builtin_ia32_reduceps512_mask ((__v16sf) __A, __B,
+ (__v16sf)
+ _mm512_setzero_ps (),
+ (__mmask16) __U);
+}
+
+extern __inline __m256
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_extractf32x8_ps (__m512 __A, const int __imm)
+{
+ return (__m256) __builtin_ia32_extractf32x8_mask ((__v16sf) __A,
+ __imm,
+ (__v8sf)
+ _mm256_setzero_ps (),
+ (__mmask8) -1);
+}
+
+extern __inline __m256
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_mask_extractf32x8_ps (__m256 __W, __mmask8 __U, __m512 __A,
+ const int __imm)
+{
+ return (__m256) __builtin_ia32_extractf32x8_mask ((__v16sf) __A,
+ __imm,
+ (__v8sf) __W,
+ (__mmask8) __U);
+}
+
+extern __inline __m256
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_maskz_extractf32x8_ps (__mmask8 __U, __m512 __A,
+ const int __imm)
+{
+ return (__m256) __builtin_ia32_extractf32x8_mask ((__v16sf) __A,
+ __imm,
+ (__v8sf)
+ _mm256_setzero_ps (),
+ (__mmask8) __U);
+}
+
+extern __inline __m128d
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_extractf64x2_pd (__m512d __A, const int __imm)
+{
+ return (__m128d) __builtin_ia32_extractf64x2_512_mask ((__v8df) __A,
+ __imm,
+ (__v2df)
+ _mm_setzero_pd (),
+ (__mmask8) -
+ 1);
+}
+
+extern __inline __m128d
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_mask_extractf64x2_pd (__m128d __W, __mmask8 __U, __m512d __A,
+ const int __imm)
+{
+ return (__m128d) __builtin_ia32_extractf64x2_512_mask ((__v8df) __A,
+ __imm,
+ (__v2df) __W,
+ (__mmask8)
+ __U);
+}
+
+extern __inline __m128d
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_maskz_extractf64x2_pd (__mmask8 __U, __m512d __A,
+ const int __imm)
+{
+ return (__m128d) __builtin_ia32_extractf64x2_512_mask ((__v8df) __A,
+ __imm,
+ (__v2df)
+ _mm_setzero_pd (),
+ (__mmask8)
+ __U);
+}
+
+extern __inline __m256i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_extracti32x8_epi32 (__m512i __A, const int __imm)
+{
+ return (__m256i) __builtin_ia32_extracti32x8_mask ((__v16si) __A,
+ __imm,
+ (__v8si)
+ _mm256_setzero_si256 (),
+ (__mmask8) -1);
+}
+
+extern __inline __m256i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_mask_extracti32x8_epi32 (__m256i __W, __mmask8 __U, __m512i __A,
+ const int __imm)
+{
+ return (__m256i) __builtin_ia32_extracti32x8_mask ((__v16si) __A,
+ __imm,
+ (__v8si) __W,
+ (__mmask8) __U);
+}
+
+extern __inline __m256i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_maskz_extracti32x8_epi32 (__mmask8 __U, __m512i __A,
+ const int __imm)
+{
+ return (__m256i) __builtin_ia32_extracti32x8_mask ((__v16si) __A,
+ __imm,
+ (__v8si)
+ _mm256_setzero_si256 (),
+ (__mmask8) __U);
+}
+
+extern __inline __m128i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_extracti64x2_epi64 (__m512i __A, const int __imm)
+{
+ return (__m128i) __builtin_ia32_extracti64x2_512_mask ((__v8di) __A,
+ __imm,
+ (__v2di)
+ _mm_setzero_di (),
+ (__mmask8) -
+ 1);
+}
+
+extern __inline __m128i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_mask_extracti64x2_epi64 (__m128i __W, __mmask8 __U, __m512i __A,
+ const int __imm)
+{
+ return (__m128i) __builtin_ia32_extracti64x2_512_mask ((__v8di) __A,
+ __imm,
+ (__v2di) __W,
+ (__mmask8)
+ __U);
+}
+
+extern __inline __m128i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_maskz_extracti64x2_epi64 (__mmask8 __U, __m512i __A,
+ const int __imm)
+{
+ return (__m128i) __builtin_ia32_extracti64x2_512_mask ((__v8di) __A,
+ __imm,
+ (__v2di)
+ _mm_setzero_di (),
+ (__mmask8)
+ __U);
+}
+
+extern __inline __m512d
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_range_round_pd (__m512d __A, __m512d __B, int __C,
+ const int __R)
+{
+ return (__m512d) __builtin_ia32_rangepd512_mask ((__v8df) __A,
+ (__v8df) __B, __C,
+ (__v8df)
+ _mm512_setzero_pd (),
+ (__mmask8) -1,
+ __R);
+}
+
+extern __inline __m512d
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_mask_range_round_pd (__m512d __W, __mmask8 __U,
+ __m512d __A, __m512d __B, int __C,
+ const int __R)
+{
+ return (__m512d) __builtin_ia32_rangepd512_mask ((__v8df) __A,
+ (__v8df) __B, __C,
+ (__v8df) __W,
+ (__mmask8) __U,
+ __R);
+}
+
+extern __inline __m512d
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_maskz_range_round_pd (__mmask8 __U, __m512d __A, __m512d __B,
+ int __C, const int __R)
+{
+ return (__m512d) __builtin_ia32_rangepd512_mask ((__v8df) __A,
+ (__v8df) __B, __C,
+ (__v8df)
+ _mm512_setzero_pd (),
+ (__mmask8) __U,
+ __R);
+}
+
+extern __inline __m512
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_range_round_ps (__m512 __A, __m512 __B, int __C, const int __R)
+{
+ return (__m512) __builtin_ia32_rangeps512_mask ((__v16sf) __A,
+ (__v16sf) __B, __C,
+ (__v16sf)
+ _mm512_setzero_ps (),
+ (__mmask16) -1,
+ __R);
+}
+
+extern __inline __m512
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_mask_range_round_ps (__m512 __W, __mmask16 __U,
+ __m512 __A, __m512 __B, int __C,
+ const int __R)
+{
+ return (__m512) __builtin_ia32_rangeps512_mask ((__v16sf) __A,
+ (__v16sf) __B, __C,
+ (__v16sf) __W,
+ (__mmask16) __U,
+ __R);
+}
+
+extern __inline __m512
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_maskz_range_round_ps (__mmask16 __U, __m512 __A, __m512 __B,
+ int __C, const int __R)
+{
+ return (__m512) __builtin_ia32_rangeps512_mask ((__v16sf) __A,
+ (__v16sf) __B, __C,
+ (__v16sf)
+ _mm512_setzero_ps (),
+ (__mmask16) __U,
+ __R);
+}
+
+extern __inline __m512i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_inserti32x8 (__m512i __A, __m256i __B, const int __imm)
+{
+ return (__m512i) __builtin_ia32_inserti32x8_mask ((__v16si) __A,
+ (__v8si) __B,
+ __imm,
+ (__v16si)
+ _mm512_setzero_si512 (),
+ (__mmask16) -1);
+}
+
+extern __inline __m512i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_mask_inserti32x8 (__m512i __W, __mmask16 __U, __m512i __A,
+ __m256i __B, const int __imm)
+{
+ return (__m512i) __builtin_ia32_inserti32x8_mask ((__v16si) __A,
+ (__v8si) __B,
+ __imm,
+ (__v16si) __W,
+ (__mmask16) __U);
+}
+
+extern __inline __m512i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_maskz_inserti32x8 (__mmask16 __U, __m512i __A, __m256i __B,
+ const int __imm)
+{
+ return (__m512i) __builtin_ia32_inserti32x8_mask ((__v16si) __A,
+ (__v8si) __B,
+ __imm,
+ (__v16si)
+ _mm512_setzero_si512 (),
+ (__mmask16) __U);
+}
+
+extern __inline __m512
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_insertf32x8 (__m512 __A, __m256 __B, const int __imm)
+{
+ return (__m512) __builtin_ia32_insertf32x8_mask ((__v16sf) __A,
+ (__v8sf) __B,
+ __imm,
+ (__v16sf)
+ _mm512_setzero_ps (),
+ (__mmask16) -1);
+}
+
+extern __inline __m512
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_mask_insertf32x8 (__m512 __W, __mmask16 __U, __m512 __A,
+ __m256 __B, const int __imm)
+{
+ return (__m512) __builtin_ia32_insertf32x8_mask ((__v16sf) __A,
+ (__v8sf) __B,
+ __imm,
+ (__v16sf) __W,
+ (__mmask16) __U);
+}
+
+extern __inline __m512
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_maskz_insertf32x8 (__mmask16 __U, __m512 __A, __m256 __B,
+ const int __imm)
+{
+ return (__m512) __builtin_ia32_insertf32x8_mask ((__v16sf) __A,
+ (__v8sf) __B,
+ __imm,
+ (__v16sf)
+ _mm512_setzero_ps (),
+ (__mmask16) __U);
+}
+
+extern __inline __m512i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_inserti64x2 (__m512i __A, __m128i __B, const int __imm)
+{
+ return (__m512i) __builtin_ia32_inserti64x2_512_mask ((__v8di) __A,
+ (__v2di) __B,
+ __imm,
+ (__v8di)
+ _mm512_setzero_si512 (),
+ (__mmask8) -
+ 1);
+}
+
+extern __inline __m512i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_mask_inserti64x2 (__m512i __W, __mmask8 __U, __m512i __A,
+ __m128i __B, const int __imm)
+{
+ return (__m512i) __builtin_ia32_inserti64x2_512_mask ((__v8di) __A,
+ (__v2di) __B,
+ __imm,
+ (__v8di) __W,
+ (__mmask8)
+ __U);
+}
+
+extern __inline __m512i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_maskz_inserti64x2 (__mmask8 __U, __m512i __A, __m128i __B,
+ const int __imm)
+{
+ return (__m512i) __builtin_ia32_inserti64x2_512_mask ((__v8di) __A,
+ (__v2di) __B,
+ __imm,
+ (__v8di)
+ _mm512_setzero_si512 (),
+ (__mmask8)
+ __U);
+}
+
+extern __inline __m512d
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_insertf64x2 (__m512d __A, __m128d __B, const int __imm)
+{
+ return (__m512d) __builtin_ia32_insertf64x2_512_mask ((__v8df) __A,
+ (__v2df) __B,
+ __imm,
+ (__v8df)
+ _mm512_setzero_pd (),
+ (__mmask8) -
+ 1);
+}
+
+extern __inline __m512d
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_mask_insertf64x2 (__m512d __W, __mmask8 __U, __m512d __A,
+ __m128d __B, const int __imm)
+{
+ return (__m512d) __builtin_ia32_insertf64x2_512_mask ((__v8df) __A,
+ (__v2df) __B,
+ __imm,
+ (__v8df) __W,
+ (__mmask8)
+ __U);
+}
+
+extern __inline __m512d
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_maskz_insertf64x2 (__mmask8 __U, __m512d __A, __m128d __B,
+ const int __imm)
+{
+ return (__m512d) __builtin_ia32_insertf64x2_512_mask ((__v8df) __A,
+ (__v2df) __B,
+ __imm,
+ (__v8df)
+ _mm512_setzero_pd (),
+ (__mmask8)
+ __U);
+}
+
+extern __inline __mmask8
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_mask_fpclass_pd_mask (__mmask8 __U, __m512d __A,
+ const int __imm)
+{
+ return (__mmask8) __builtin_ia32_fpclasspd512_mask ((__v8df) __A,
+ __imm, __U);
+}
+
+extern __inline __mmask8
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_fpclass_pd_mask (__m512d __A, const int __imm)
+{
+ return (__mmask8) __builtin_ia32_fpclasspd512_mask ((__v8df) __A,
+ __imm,
+ (__mmask8) -1);
+}
+
+extern __inline __mmask16
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_mask_fpclass_ps_mask (__mmask16 __U, __m512 __A,
+ const int __imm)
+{
+ return (__mmask16) __builtin_ia32_fpclassps512_mask ((__v16sf) __A,
+ __imm, __U);
+}
+
+extern __inline __mmask16
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_fpclass_ps_mask (__m512 __A, const int __imm)
+{
+ return (__mmask16) __builtin_ia32_fpclassps512_mask ((__v16sf) __A,
+ __imm,
+ (__mmask16) -
+ 1);
+}
+
+#else
+#define _mm_range_sd(A, B, C) \
+ ((__m128d) __builtin_ia32_rangesd128_round ((__v2df)(__m128d)(A), \
+ (__v2df)(__m128d)(B), (int)(C), \
+ _MM_FROUND_CUR_DIRECTION))
+
+#define _mm_range_ss(A, B, C) \
+ ((__m128) __builtin_ia32_rangess128_round ((__v4sf)(__m128)(A), \
+ (__v4sf)(__m128)(B), (int)(C), \
+ _MM_FROUND_CUR_DIRECTION))
+
+#define _mm_range_round_sd(A, B, C, R) \
+ ((__m128d) __builtin_ia32_rangesd128_round ((__v2df)(__m128d)(A), \
+ (__v2df)(__m128d)(B), (int)(C), (R)))
+
+#define _mm_range_round_ss(A, B, C, R) \
+ ((__m128) __builtin_ia32_rangess128_round ((__v4sf)(__m128)(A), \
+ (__v4sf)(__m128)(B), (int)(C), (R)))
+
+#define _mm512_cvtt_roundpd_epi64(A, B) \
+ ((__m512i)__builtin_ia32_cvttpd2qq512_mask((A), (__v8di)_mm512_setzero_si512 (), -1, (B)))
+
+#define _mm512_mask_cvtt_roundpd_epi64(W, U, A, B) \
+ ((__m512i)__builtin_ia32_cvttpd2qq512_mask((A), (__v8di)(W), (U), (B)))
+
+#define _mm512_maskz_cvtt_roundpd_epi64(U, A, B) \
+ ((__m512i)__builtin_ia32_cvttpd2qq512_mask((A), (__v8di)_mm512_setzero_si512 (), (U), (B)))
+
+#define _mm512_cvtt_roundpd_epu64(A, B) \
+ ((__m512i)__builtin_ia32_cvttpd2uqq512_mask((A), (__v8di)_mm512_setzero_si512 (), -1, (B)))
+
+#define _mm512_mask_cvtt_roundpd_epu64(W, U, A, B) \
+ ((__m512i)__builtin_ia32_cvttpd2uqq512_mask((A), (__v8di)(W), (U), (B)))
+
+#define _mm512_maskz_cvtt_roundpd_epu64(U, A, B) \
+ ((__m512i)__builtin_ia32_cvttpd2uqq512_mask((A), (__v8di)_mm512_setzero_si512 (), (U), (B)))
+
+#define _mm512_cvtt_roundps_epi64(A, B) \
+ ((__m512i)__builtin_ia32_cvttps2qq512_mask((A), (__v8di)_mm512_setzero_si512 (), -1, (B)))
+
+#define _mm512_mask_cvtt_roundps_epi64(W, U, A, B) \
+ ((__m512i)__builtin_ia32_cvttps2qq512_mask((A), (__v8di)(W), (U), (B)))
+
+#define _mm512_maskz_cvtt_roundps_epi64(U, A, B) \
+ ((__m512i)__builtin_ia32_cvttps2qq512_mask((A), (__v8di)_mm512_setzero_si512 (), (U), (B)))
+
+#define _mm512_cvtt_roundps_epu64(A, B) \
+ ((__m512i)__builtin_ia32_cvttps2uqq512_mask((A), (__v8di)_mm512_setzero_si512 (), -1, (B)))
+
+#define _mm512_mask_cvtt_roundps_epu64(W, U, A, B) \
+ ((__m512i)__builtin_ia32_cvttps2uqq512_mask((A), (__v8di)(W), (U), (B)))
+
+#define _mm512_maskz_cvtt_roundps_epu64(U, A, B) \
+ ((__m512i)__builtin_ia32_cvttps2uqq512_mask((A), (__v8di)_mm512_setzero_si512 (), (U), (B)))
+
+#define _mm512_cvt_roundpd_epi64(A, B) \
+ ((__m512i)__builtin_ia32_cvtpd2qq512_mask((A), (__v8di)_mm512_setzero_si512 (), -1, (B)))
+
+#define _mm512_mask_cvt_roundpd_epi64(W, U, A, B) \
+ ((__m512i)__builtin_ia32_cvtpd2qq512_mask((A), (__v8di)(W), (U), (B)))
+
+#define _mm512_maskz_cvt_roundpd_epi64(U, A, B) \
+ ((__m512i)__builtin_ia32_cvtpd2qq512_mask((A), (__v8di)_mm512_setzero_si512 (), (U), (B)))
+
+#define _mm512_cvt_roundpd_epu64(A, B) \
+ ((__m512i)__builtin_ia32_cvtpd2uqq512_mask((A), (__v8di)_mm512_setzero_si512 (), -1, (B)))
+
+#define _mm512_mask_cvt_roundpd_epu64(W, U, A, B) \
+ ((__m512i)__builtin_ia32_cvtpd2uqq512_mask((A), (__v8di)(W), (U), (B)))
+
+#define _mm512_maskz_cvt_roundpd_epu64(U, A, B) \
+ ((__m512i)__builtin_ia32_cvtpd2uqq512_mask((A), (__v8di)_mm512_setzero_si512 (), (U), (B)))
+
+#define _mm512_cvt_roundps_epi64(A, B) \
+ ((__m512i)__builtin_ia32_cvtps2qq512_mask((A), (__v8di)_mm512_setzero_si512 (), -1, (B)))
+
+#define _mm512_mask_cvt_roundps_epi64(W, U, A, B) \
+ ((__m512i)__builtin_ia32_cvtps2qq512_mask((A), (__v8di)(W), (U), (B)))
+
+#define _mm512_maskz_cvt_roundps_epi64(U, A, B) \
+ ((__m512i)__builtin_ia32_cvtps2qq512_mask((A), (__v8di)_mm512_setzero_si512 (), (U), (B)))
+
+#define _mm512_cvt_roundps_epu64(A, B) \
+ ((__m512i)__builtin_ia32_cvtps2uqq512_mask((A), (__v8di)_mm512_setzero_si512 (), -1, (B)))
+
+#define _mm512_mask_cvt_roundps_epu64(W, U, A, B) \
+ ((__m512i)__builtin_ia32_cvtps2uqq512_mask((A), (__v8di)(W), (U), (B)))
+
+#define _mm512_maskz_cvt_roundps_epu64(U, A, B) \
+ ((__m512i)__builtin_ia32_cvtps2uqq512_mask((A), (__v8di)_mm512_setzero_si512 (), (U), (B)))
+
+#define _mm512_cvt_roundepi64_ps(A, B) \
+ ((__m256)__builtin_ia32_cvtqq2ps512_mask((__v8di)(A), (__v8sf)_mm256_setzero_ps(), -1, (B)))
+
+#define _mm512_mask_cvt_roundepi64_ps(W, U, A, B) \
+ ((__m256)__builtin_ia32_cvtqq2ps512_mask((__v8di)(A), (W), (U), (B)))
+
+#define _mm512_maskz_cvt_roundepi64_ps(U, A, B) \
+ ((__m256)__builtin_ia32_cvtqq2ps512_mask((__v8di)(A), (__v8sf)_mm256_setzero_ps(), (U), (B)))
+
+#define _mm512_cvt_roundepu64_ps(A, B) \
+ ((__m256)__builtin_ia32_cvtuqq2ps512_mask((__v8di)(A), (__v8sf)_mm256_setzero_ps(), -1, (B)))
+
+#define _mm512_mask_cvt_roundepu64_ps(W, U, A, B) \
+ ((__m256)__builtin_ia32_cvtuqq2ps512_mask((__v8di)(A), (W), (U), (B)))
+
+#define _mm512_maskz_cvt_roundepu64_ps(U, A, B) \
+ ((__m256)__builtin_ia32_cvtuqq2ps512_mask((__v8di)(A), (__v8sf)_mm256_setzero_ps(), (U), (B)))
+
+#define _mm512_cvt_roundepi64_pd(A, B) \
+ ((__m512d)__builtin_ia32_cvtqq2pd512_mask((__v8di)(A), (__v8df)_mm512_setzero_pd(), -1, (B)))
+
+#define _mm512_mask_cvt_roundepi64_pd(W, U, A, B) \
+ ((__m512d)__builtin_ia32_cvtqq2pd512_mask((__v8di)(A), (W), (U), (B)))
+
+#define _mm512_maskz_cvt_roundepi64_pd(U, A, B) \
+ ((__m512d)__builtin_ia32_cvtqq2pd512_mask((__v8di)(A), (__v8df)_mm512_setzero_pd(), (U), (B)))
+
+#define _mm512_cvt_roundepu64_pd(A, B) \
+ ((__m512d)__builtin_ia32_cvtuqq2pd512_mask((__v8di)(A), (__v8df)_mm512_setzero_pd(), -1, (B)))
+
+#define _mm512_mask_cvt_roundepu64_pd(W, U, A, B) \
+ ((__m512d)__builtin_ia32_cvtuqq2pd512_mask((__v8di)(A), (W), (U), (B)))
+
+#define _mm512_maskz_cvt_roundepu64_pd(U, A, B) \
+ ((__m512d)__builtin_ia32_cvtuqq2pd512_mask((__v8di)(A), (__v8df)_mm512_setzero_pd(), (U), (B)))
+
+#define _mm512_reduce_pd(A, B) \
+ ((__m512d) __builtin_ia32_reducepd512_mask ((__v8df)(__m512d)(A), \
+ (int)(B), (__v8df)_mm512_setzero_pd(), (__mmask8)-1))
+
+#define _mm512_mask_reduce_pd(W, U, A, B) \
+ ((__m512d) __builtin_ia32_reducepd512_mask ((__v8df)(__m512d)(A), \
+ (int)(B), (__v8df)(__m512d)(W), (__mmask8)(U)))
+
+#define _mm512_maskz_reduce_pd(U, A, B) \
+ ((__m512d) __builtin_ia32_reducepd512_mask ((__v8df)(__m512d)(A), \
+ (int)(B), (__v8df)_mm512_setzero_pd(), (__mmask8)(U)))
+
+#define _mm512_reduce_ps(A, B) \
+ ((__m512) __builtin_ia32_reduceps512_mask ((__v16sf)(__m512)(A), \
+ (int)(B), (__v16sf)_mm512_setzero_ps(), (__mmask16)-1))
+
+#define _mm512_mask_reduce_ps(W, U, A, B) \
+ ((__m512) __builtin_ia32_reduceps512_mask ((__v16sf)(__m512)(A), \
+ (int)(B), (__v16sf)(__m512)(W), (__mmask16)(U)))
+
+#define _mm512_maskz_reduce_ps(U, A, B) \
+ ((__m512) __builtin_ia32_reduceps512_mask ((__v16sf)(__m512)(A), \
+ (int)(B), (__v16sf)_mm512_setzero_ps(), (__mmask16)(U)))
+
+#define _mm512_extractf32x8_ps(X, C) \
+ ((__m256) __builtin_ia32_extractf32x8_mask ((__v16sf)(__m512) (X), \
+ (int) (C), (__v8sf)(__m256) _mm256_setzero_ps(), (__mmask8)-1))
+
+#define _mm512_mask_extractf32x8_ps(W, U, X, C) \
+ ((__m256) __builtin_ia32_extractf32x8_mask ((__v16sf)(__m512) (X), \
+ (int) (C), (__v8sf)(__m256) (W), (__mmask8) (U)))
+
+#define _mm512_maskz_extractf32x8_ps(U, X, C) \
+ ((__m256) __builtin_ia32_extractf32x8_mask ((__v16sf)(__m512) (X), \
+ (int) (C), (__v8sf)(__m256) _mm256_setzero_ps(), (__mmask8) (U)))
+
+#define _mm512_extractf64x2_pd(X, C) \
+ ((__m128d) __builtin_ia32_extractf64x2_512_mask ((__v8df)(__m512d) (X),\
+ (int) (C), (__v2df)(__m128d) _mm_setzero_pd(), (__mmask8)-1))
+
+#define _mm512_mask_extractf64x2_pd(W, U, X, C) \
+ ((__m128d) __builtin_ia32_extractf64x2_512_mask ((__v8df)(__m512d) (X),\
+ (int) (C), (__v2df)(__m128d) (W), (__mmask8) (U)))
+
+#define _mm512_maskz_extractf64x2_pd(U, X, C) \
+ ((__m128d) __builtin_ia32_extractf64x2_512_mask ((__v8df)(__m512d) (X),\
+ (int) (C), (__v2df)(__m128d) _mm_setzero_pd(), (__mmask8) (U)))
+
+#define _mm512_extracti32x8_epi32(X, C) \
+ ((__m256i) __builtin_ia32_extracti32x8_mask ((__v16si)(__m512i) (X), \
+ (int) (C), (__v8si)(__m256i) _mm256_setzero_si256(), (__mmask8)-1))
+
+#define _mm512_mask_extracti32x8_epi32(W, U, X, C) \
+ ((__m256i) __builtin_ia32_extracti32x8_mask ((__v16si)(__m512i) (X), \
+ (int) (C), (__v8si)(__m256i) (W), (__mmask8) (U)))
+
+#define _mm512_maskz_extracti32x8_epi32(U, X, C) \
+ ((__m256i) __builtin_ia32_extracti32x8_mask ((__v16si)(__m512i) (X), \
+ (int) (C), (__v8si)(__m256i) _mm256_setzero_si256(), (__mmask8) (U)))
+
+#define _mm512_extracti64x2_epi64(X, C) \
+ ((__m128i) __builtin_ia32_extracti64x2_512_mask ((__v8di)(__m512i) (X),\
+ (int) (C), (__v2di)(__m128i) _mm_setzero_di(), (__mmask8)-1))
+
+#define _mm512_mask_extracti64x2_epi64(W, U, X, C) \
+ ((__m128i) __builtin_ia32_extracti64x2_512_mask ((__v8di)(__m512i) (X),\
+ (int) (C), (__v2di)(__m128i) (W), (__mmask8) (U)))
+
+#define _mm512_maskz_extracti64x2_epi64(U, X, C) \
+ ((__m128i) __builtin_ia32_extracti64x2_512_mask ((__v8di)(__m512i) (X),\
+ (int) (C), (__v2di)(__m128i) _mm_setzero_di(), (__mmask8) (U)))
+
+#define _mm512_range_pd(A, B, C) \
+ ((__m512d) __builtin_ia32_rangepd512_mask ((__v8df)(__m512d)(A), \
+ (__v8df)(__m512d)(B), (int)(C), \
+ (__v8df)_mm512_setzero_pd(), (__mmask8)-1, _MM_FROUND_CUR_DIRECTION))
+
+#define _mm512_mask_range_pd(W, U, A, B, C) \
+ ((__m512d) __builtin_ia32_rangepd512_mask ((__v8df)(__m512d)(A), \
+ (__v8df)(__m512d)(B), (int)(C), \
+ (__v8df)(__m512d)(W), (__mmask8)(U), _MM_FROUND_CUR_DIRECTION))
+
+#define _mm512_maskz_range_pd(U, A, B, C) \
+ ((__m512d) __builtin_ia32_rangepd512_mask ((__v8df)(__m512d)(A), \
+ (__v8df)(__m512d)(B), (int)(C), \
+ (__v8df)_mm512_setzero_pd(), (__mmask8)(U), _MM_FROUND_CUR_DIRECTION))
+
+#define _mm512_range_ps(A, B, C) \
+ ((__m512) __builtin_ia32_rangeps512_mask ((__v16sf)(__m512)(A), \
+ (__v16sf)(__m512)(B), (int)(C), \
+ (__v16sf)_mm512_setzero_ps(), (__mmask16)-1, _MM_FROUND_CUR_DIRECTION))
+
+#define _mm512_mask_range_ps(W, U, A, B, C) \
+ ((__m512) __builtin_ia32_rangeps512_mask ((__v16sf)(__m512)(A), \
+ (__v16sf)(__m512)(B), (int)(C), \
+ (__v16sf)(__m512)(W), (__mmask16)(U), _MM_FROUND_CUR_DIRECTION))
+
+#define _mm512_maskz_range_ps(U, A, B, C) \
+ ((__m512) __builtin_ia32_rangeps512_mask ((__v16sf)(__m512)(A), \
+ (__v16sf)(__m512)(B), (int)(C), \
+ (__v16sf)_mm512_setzero_ps(), (__mmask16)(U), _MM_FROUND_CUR_DIRECTION))
+
+#define _mm512_range_round_pd(A, B, C, R) \
+ ((__m512d) __builtin_ia32_rangepd512_mask ((__v8df)(__m512d)(A), \
+ (__v8df)(__m512d)(B), (int)(C), \
+ (__v8df)_mm512_setzero_pd(), (__mmask8)-1, (R)))
+
+#define _mm512_mask_range_round_pd(W, U, A, B, C, R) \
+ ((__m512d) __builtin_ia32_rangepd512_mask ((__v8df)(__m512d)(A), \
+ (__v8df)(__m512d)(B), (int)(C), \
+ (__v8df)(__m512d)(W), (__mmask8)(U), (R)))
+
+#define _mm512_maskz_range_round_pd(U, A, B, C, R) \
+ ((__m512d) __builtin_ia32_rangepd512_mask ((__v8df)(__m512d)(A), \
+ (__v8df)(__m512d)(B), (int)(C), \
+ (__v8df)_mm512_setzero_pd(), (__mmask8)(U), (R)))
+
+#define _mm512_range_round_ps(A, B, C, R) \
+ ((__m512) __builtin_ia32_rangeps512_mask ((__v16sf)(__m512)(A), \
+ (__v16sf)(__m512)(B), (int)(C), \
+ (__v16sf)_mm512_setzero_ps(), (__mmask16)-1, (R)))
+
+#define _mm512_mask_range_round_ps(W, U, A, B, C, R) \
+ ((__m512) __builtin_ia32_rangeps512_mask ((__v16sf)(__m512)(A), \
+ (__v16sf)(__m512)(B), (int)(C), \
+ (__v16sf)(__m512)(W), (__mmask16)(U), (R)))
+
+#define _mm512_maskz_range_round_ps(U, A, B, C, R) \
+ ((__m512) __builtin_ia32_rangeps512_mask ((__v16sf)(__m512)(A), \
+ (__v16sf)(__m512)(B), (int)(C), \
+ (__v16sf)_mm512_setzero_ps(), (__mmask16)(U), (R)))
+
+#define _mm512_insertf64x2(X, Y, C) \
+ ((__m512d) __builtin_ia32_insertf64x2_512_mask ((__v8df)(__m512d) (X),\
+ (__v2df)(__m128d) (Y), (int) (C), (__v8df)(__m512d) (X), \
+ (__mmask8)-1))
+
+#define _mm512_mask_insertf64x2(W, U, X, Y, C) \
+ ((__m512d) __builtin_ia32_insertf64x2_512_mask ((__v8df)(__m512d) (X),\
+ (__v2df)(__m128d) (Y), (int) (C), (__v8df)(__m512d) (W), \
+ (__mmask8) (U)))
+
+#define _mm512_maskz_insertf64x2(U, X, Y, C) \
+ ((__m512d) __builtin_ia32_insertf64x2_512_mask ((__v8df)(__m512d) (X),\
+ (__v2df)(__m128d) (Y), (int) (C), \
+ (__v8df)(__m512d) _mm512_setzero_pd(), (__mmask8) (U)))
+
+#define _mm512_inserti64x2(X, Y, C) \
+ ((__m512i) __builtin_ia32_inserti64x2_512_mask ((__v8di)(__m512i) (X),\
+ (__v2di)(__m128i) (Y), (int) (C), (__v8di)(__m512i) (X), (__mmask8)-1))
+
+#define _mm512_mask_inserti64x2(W, U, X, Y, C) \
+ ((__m512i) __builtin_ia32_inserti64x2_512_mask ((__v8di)(__m512i) (X),\
+ (__v2di)(__m128i) (Y), (int) (C), (__v8di)(__m512i) (W), \
+ (__mmask8) (U)))
+
+#define _mm512_maskz_inserti64x2(U, X, Y, C) \
+ ((__m512i) __builtin_ia32_inserti64x2_512_mask ((__v8di)(__m512i) (X),\
+ (__v2di)(__m128i) (Y), (int) (C), \
+ (__v8di)(__m512i) _mm512_setzero_si512 (), (__mmask8) (U)))
+
+#define _mm512_insertf32x8(X, Y, C) \
+ ((__m512) __builtin_ia32_insertf32x8_mask ((__v16sf)(__m512) (X), \
+ (__v8sf)(__m256) (Y), (int) (C),\
+ (__v16sf)(__m512)_mm512_setzero_ps(),\
+ (__mmask16)-1))
+
+#define _mm512_mask_insertf32x8(W, U, X, Y, C) \
+ ((__m512) __builtin_ia32_insertf32x8_mask ((__v16sf)(__m512) (X), \
+ (__v8sf)(__m256) (Y), (int) (C),\
+ (__v16sf)(__m512)(W),\
+ (__mmask16)(U)))
+
+#define _mm512_maskz_insertf32x8(U, X, Y, C) \
+ ((__m512) __builtin_ia32_insertf32x8_mask ((__v16sf)(__m512) (X), \
+ (__v8sf)(__m256) (Y), (int) (C),\
+ (__v16sf)(__m512)_mm512_setzero_ps(),\
+ (__mmask16)(U)))
+
+#define _mm512_inserti32x8(X, Y, C) \
+ ((__m512i) __builtin_ia32_inserti32x8_mask ((__v16si)(__m512i) (X), \
+ (__v8si)(__m256i) (Y), (int) (C),\
+ (__v16si)(__m512i)_mm512_setzero_si512 (),\
+ (__mmask16)-1))
+
+#define _mm512_mask_inserti32x8(W, U, X, Y, C) \
+ ((__m512i) __builtin_ia32_inserti32x8_mask ((__v16si)(__m512i) (X), \
+ (__v8si)(__m256i) (Y), (int) (C),\
+ (__v16si)(__m512i)(W),\
+ (__mmask16)(U)))
+
+#define _mm512_maskz_inserti32x8(U, X, Y, C) \
+ ((__m512i) __builtin_ia32_inserti32x8_mask ((__v16si)(__m512i) (X), \
+ (__v8si)(__m256i) (Y), (int) (C),\
+ (__v16si)(__m512i)_mm512_setzero_si512 (),\
+ (__mmask16)(U)))
+
+#define _mm_fpclass_ss_mask(X, C) \
+ ((__mmask8) __builtin_ia32_fpclassss ((__v4sf) (__m128) (X), (int) (C))) \
+
+#define _mm_fpclass_sd_mask(X, C) \
+ ((__mmask8) __builtin_ia32_fpclasssd ((__v2df) (__m128d) (X), (int) (C))) \
+
+#define _mm512_mask_fpclass_pd_mask(u, X, C) \
+ ((__mmask8) __builtin_ia32_fpclasspd512_mask ((__v8df) (__m512d) (X), \
+ (int) (C), (__mmask8)(u)))
+
+#define _mm512_mask_fpclass_ps_mask(u, x, c) \
+ ((__mmask16) __builtin_ia32_fpclassps512_mask ((__v16sf) (__m512) (x),\
+ (int) (c),(__mmask8)(u)))
+
+#define _mm512_fpclass_pd_mask(X, C) \
+ ((__mmask8) __builtin_ia32_fpclasspd512_mask ((__v8df) (__m512d) (X), \
+ (int) (C), (__mmask8)-1))
+
+#define _mm512_fpclass_ps_mask(x, c) \
+ ((__mmask16) __builtin_ia32_fpclassps512_mask ((__v16sf) (__m512) (x),\
+ (int) (c),(__mmask8)-1))
+
+#define _mm_reduce_sd(A, B, C) \
+ ((__m128d) __builtin_ia32_reducesd ((__v2df)(__m128d)(A), \
+ (__v2df)(__m128d)(B), (int)(C))) \
+
+#define _mm_reduce_ss(A, B, C) \
+ ((__m128) __builtin_ia32_reducess ((__v4sf)(__m128)(A), \
+ (__v4sf)(__m128)(A), (int)(C))) \
+
+#endif
+
+#ifdef __DISABLE_AVX512DQ__
+#undef __DISABLE_AVX512DQ__
+#pragma GCC pop_options
+#endif /* __DISABLE_AVX512DQ__ */
+
+#endif /* _AVX512DQINTRIN_H_INCLUDED */
diff --git a/gcc/config/i386/avx512vlbwintrin.h b/gcc/config/i386/avx512vlbwintrin.h
new file mode 100644
index 00000000000..c704550c7fd
--- /dev/null
+++ b/gcc/config/i386/avx512vlbwintrin.h
@@ -0,0 +1,4218 @@
+/* Copyright (C) 2014
+ Free Software Foundation, Inc.
+
+ This file is part of GCC.
+
+ GCC is free software; you can redistribute it and/or modify
+ it under the terms of the GNU General Public License as published by
+ the Free Software Foundation; either version 3, or (at your option)
+ any later version.
+
+ GCC is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ GNU General Public License for more details.
+
+ Under Section 7 of GPL version 3, you are granted additional
+ permissions described in the GCC Runtime Library Exception, version
+ 3.1, as published by the Free Software Foundation.
+
+ You should have received a copy of the GNU General Public License and
+ a copy of the GCC Runtime Library Exception along with this program;
+ see the files COPYING3 and COPYING.RUNTIME respectively. If not, see
+ <http://www.gnu.org/licenses/>. */
+
+#ifndef _IMMINTRIN_H_INCLUDED
+#error "Never use <avx512vlbwintrin.h> directly; include <immintrin.h> instead."
+#endif
+
+#ifndef _AVX512VLBWINTRIN_H_INCLUDED
+#define _AVX512VLBWINTRIN_H_INCLUDED
+
+#if !defined(__AVX512VL__) || !defined(__AVX512BW__)
+#pragma GCC push_options
+#pragma GCC target("avx512vl,avx512bw")
+#define __DISABLE_AVX512VLBW__
+#endif /* __AVX512VLBW__ */
+
+
+extern __inline __m256i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_mask_mov_epi8 (__m256i __W, __mmask32 __U, __m256i __A)
+{
+ return (__m256i) __builtin_ia32_movdquqi256_mask ((__v32qi) __A,
+ (__v32qi) __W,
+ (__mmask32) __U);
+}
+
+extern __inline __m256i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_maskz_mov_epi8 (__mmask32 __U, __m256i __A)
+{
+ return (__m256i) __builtin_ia32_movdquqi256_mask ((__v32qi) __A,
+ (__v32qi)
+ _mm256_setzero_si256 (),
+ (__mmask32) __U);
+}
+
+extern __inline __m128i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_mask_mov_epi8 (__m128i __W, __mmask16 __U, __m128i __A)
+{
+ return (__m128i) __builtin_ia32_movdquqi128_mask ((__v16qi) __A,
+ (__v16qi) __W,
+ (__mmask16) __U);
+}
+
+extern __inline __m128i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_maskz_mov_epi8 (__mmask16 __U, __m128i __A)
+{
+ return (__m128i) __builtin_ia32_movdquqi128_mask ((__v16qi) __A,
+ (__v16qi)
+ _mm_setzero_hi (),
+ (__mmask16) __U);
+}
+
+extern __inline void
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_mask_storeu_epi8 (void *__P, __mmask32 __U, __m256i __A)
+{
+ __builtin_ia32_storedquqi256_mask ((__v32qi *) __P,
+ (__v32qi) __A,
+ (__mmask32) __U);
+}
+
+extern __inline void
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_mask_storeu_epi8 (void *__P, __mmask16 __U, __m128i __A)
+{
+ __builtin_ia32_storedquqi128_mask ((__v16qi *) __P,
+ (__v16qi) __A,
+ (__mmask16) __U);
+}
+
+extern __inline __m256i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_mask_loadu_epi16 (__m256i __W, __mmask16 __U, void const *__P)
+{
+ return (__m256i) __builtin_ia32_loaddquhi256_mask ((__v16hi *) __P,
+ (__v16hi) __W,
+ (__mmask16) __U);
+}
+
+extern __inline __m256i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_maskz_loadu_epi16 (__mmask16 __U, void const *__P)
+{
+ return (__m256i) __builtin_ia32_loaddquhi256_mask ((__v16hi *) __P,
+ (__v16hi)
+ _mm256_setzero_si256 (),
+ (__mmask16) __U);
+}
+
+extern __inline __m128i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_mask_loadu_epi16 (__m128i __W, __mmask8 __U, void const *__P)
+{
+ return (__m128i) __builtin_ia32_loaddquhi128_mask ((__v8hi *) __P,
+ (__v8hi) __W,
+ (__mmask8) __U);
+}
+
+extern __inline __m128i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_maskz_loadu_epi16 (__mmask8 __U, void const *__P)
+{
+ return (__m128i) __builtin_ia32_loaddquhi128_mask ((__v8hi *) __P,
+ (__v8hi)
+ _mm_setzero_hi (),
+ (__mmask8) __U);
+}
+
+
+extern __inline __m256i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_mask_mov_epi16 (__m256i __W, __mmask16 __U, __m256i __A)
+{
+ return (__m256i) __builtin_ia32_movdquhi256_mask ((__v16hi) __A,
+ (__v16hi) __W,
+ (__mmask16) __U);
+}
+
+extern __inline __m256i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_maskz_mov_epi16 (__mmask16 __U, __m256i __A)
+{
+ return (__m256i) __builtin_ia32_movdquhi256_mask ((__v16hi) __A,
+ (__v16hi)
+ _mm256_setzero_si256 (),
+ (__mmask16) __U);
+}
+
+extern __inline __m128i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_mask_mov_epi16 (__m128i __W, __mmask8 __U, __m128i __A)
+{
+ return (__m128i) __builtin_ia32_movdquhi128_mask ((__v8hi) __A,
+ (__v8hi) __W,
+ (__mmask8) __U);
+}
+
+extern __inline __m128i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_maskz_mov_epi16 (__mmask8 __U, __m128i __A)
+{
+ return (__m128i) __builtin_ia32_movdquhi128_mask ((__v8hi) __A,
+ (__v8hi)
+ _mm_setzero_hi (),
+ (__mmask8) __U);
+}
+
+extern __inline __m256i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_mask_loadu_epi8 (__m256i __W, __mmask32 __U, void const *__P)
+{
+ return (__m256i) __builtin_ia32_loaddquqi256_mask ((__v32qi *) __P,
+ (__v32qi) __W,
+ (__mmask32) __U);
+}
+
+extern __inline __m256i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_maskz_loadu_epi8 (__mmask32 __U, void const *__P)
+{
+ return (__m256i) __builtin_ia32_loaddquqi256_mask ((__v32qi *) __P,
+ (__v32qi)
+ _mm256_setzero_si256 (),
+ (__mmask32) __U);
+}
+
+extern __inline __m128i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_mask_loadu_epi8 (__m128i __W, __mmask16 __U, void const *__P)
+{
+ return (__m128i) __builtin_ia32_loaddquqi128_mask ((__v16qi *) __P,
+ (__v16qi) __W,
+ (__mmask16) __U);
+}
+
+extern __inline __m128i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_maskz_loadu_epi8 (__mmask16 __U, void const *__P)
+{
+ return (__m128i) __builtin_ia32_loaddquqi128_mask ((__v16qi *) __P,
+ (__v16qi)
+ _mm_setzero_hi (),
+ (__mmask16) __U);
+}
+
+extern __inline __m128i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_cvtepi16_epi8 (__m256i __A)
+{
+ __v16qi __O;
+ return (__m128i) __builtin_ia32_pmovwb256_mask ((__v16hi) __A, __O,
+ (__mmask16) -1);
+}
+
+extern __inline __m128i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_mask_cvtepi16_epi8 (__m128i __O, __mmask16 __M, __m256i __A)
+{
+ return (__m128i) __builtin_ia32_pmovwb256_mask ((__v16hi) __A,
+ (__v16qi) __O, __M);
+}
+
+extern __inline __m128i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_maskz_cvtepi16_epi8 (__mmask16 __M, __m256i __A)
+{
+ return (__m128i) __builtin_ia32_pmovwb256_mask ((__v16hi) __A,
+ (__v16qi)
+ _mm_setzero_si128 (),
+ __M);
+}
+
+extern __inline __m128i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_cvtsepi16_epi8 (__m128i __A)
+{
+ __v16qi __O;
+ return (__m128i) __builtin_ia32_pmovswb128_mask ((__v8hi) __A, __O,
+ (__mmask8) -1);
+}
+
+extern __inline __m128i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_mask_cvtsepi16_epi8 (__m128i __O, __mmask8 __M, __m128i __A)
+{
+ return (__m128i) __builtin_ia32_pmovswb128_mask ((__v8hi) __A,
+ (__v16qi) __O, __M);
+}
+
+extern __inline __m128i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_maskz_cvtsepi16_epi8 (__mmask8 __M, __m128i __A)
+{
+ return (__m128i) __builtin_ia32_pmovswb128_mask ((__v8hi) __A,
+ (__v16qi)
+ _mm_setzero_si128 (),
+ __M);
+}
+
+extern __inline __m128i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_cvtsepi16_epi8 (__m256i __A)
+{
+ __v16qi __O;
+ return (__m128i) __builtin_ia32_pmovswb256_mask ((__v16hi) __A, __O,
+ (__mmask16) -1);
+}
+
+extern __inline __m128i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_mask_cvtsepi16_epi8 (__m128i __O, __mmask16 __M, __m256i __A)
+{
+ return (__m128i) __builtin_ia32_pmovswb256_mask ((__v16hi) __A,
+ (__v16qi) __O, __M);
+}
+
+extern __inline __m128i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_maskz_cvtsepi16_epi8 (__mmask16 __M, __m256i __A)
+{
+ return (__m128i) __builtin_ia32_pmovswb256_mask ((__v16hi) __A,
+ (__v16qi)
+ _mm_setzero_si128 (),
+ __M);
+}
+
+extern __inline __m128i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_cvtusepi16_epi8 (__m128i __A)
+{
+ __v16qi __O;
+ return (__m128i) __builtin_ia32_pmovuswb128_mask ((__v8hi) __A, __O,
+ (__mmask8) -1);
+}
+
+extern __inline __m128i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_mask_cvtusepi16_epi8 (__m128i __O, __mmask8 __M, __m128i __A)
+{
+ return (__m128i) __builtin_ia32_pmovuswb128_mask ((__v8hi) __A,
+ (__v16qi) __O,
+ __M);
+}
+
+extern __inline __m128i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_maskz_cvtusepi16_epi8 (__mmask8 __M, __m128i __A)
+{
+ return (__m128i) __builtin_ia32_pmovuswb128_mask ((__v8hi) __A,
+ (__v16qi)
+ _mm_setzero_si128 (),
+ __M);
+}
+
+extern __inline __m128i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_cvtusepi16_epi8 (__m256i __A)
+{
+ __v16qi __O;
+ return (__m128i) __builtin_ia32_pmovuswb256_mask ((__v16hi) __A, __O,
+ (__mmask16) -1);
+}
+
+extern __inline __m128i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_mask_cvtusepi16_epi8 (__m128i __O, __mmask16 __M, __m256i __A)
+{
+ return (__m128i) __builtin_ia32_pmovuswb256_mask ((__v16hi) __A,
+ (__v16qi) __O,
+ __M);
+}
+
+extern __inline __m128i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_maskz_cvtusepi16_epi8 (__mmask16 __M, __m256i __A)
+{
+ return (__m128i) __builtin_ia32_pmovuswb256_mask ((__v16hi) __A,
+ (__v16qi)
+ _mm_setzero_si128 (),
+ __M);
+}
+
+extern __inline __m256i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_mask_broadcastb_epi8 (__m256i __O, __mmask32 __M, __m128i __A)
+{
+ return (__m256i) __builtin_ia32_pbroadcastb256_mask ((__v16qi) __A,
+ (__v32qi) __O,
+ __M);
+}
+
+extern __inline __m256i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_maskz_broadcastb_epi8 (__mmask32 __M, __m128i __A)
+{
+ return (__m256i) __builtin_ia32_pbroadcastb256_mask ((__v16qi) __A,
+ (__v32qi)
+ _mm256_setzero_si256 (),
+ __M);
+}
+
+extern __inline __m256i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_mask_set1_epi8 (__m256i __O, __mmask32 __M, char __A)
+{
+ return (__m256i) __builtin_ia32_pbroadcastb256_gpr_mask (__A,
+ (__v32qi) __O,
+ __M);
+}
+
+extern __inline __m256i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_maskz_set1_epi8 (__mmask32 __M, char __A)
+{
+ return (__m256i) __builtin_ia32_pbroadcastb256_gpr_mask (__A,
+ (__v32qi)
+ _mm256_setzero_si256 (),
+ __M);
+}
+
+extern __inline __m128i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_mask_broadcastb_epi8 (__m128i __O, __mmask16 __M, __m128i __A)
+{
+ return (__m128i) __builtin_ia32_pbroadcastb128_mask ((__v16qi) __A,
+ (__v16qi) __O,
+ __M);
+}
+
+extern __inline __m128i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_maskz_broadcastb_epi8 (__mmask16 __M, __m128i __A)
+{
+ return (__m128i) __builtin_ia32_pbroadcastb128_mask ((__v16qi) __A,
+ (__v16qi)
+ _mm_setzero_si128 (),
+ __M);
+}
+
+extern __inline __m128i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_mask_set1_epi8 (__m128i __O, __mmask16 __M, char __A)
+{
+ return (__m128i) __builtin_ia32_pbroadcastb128_gpr_mask (__A,
+ (__v16qi) __O,
+ __M);
+}
+
+extern __inline __m128i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_maskz_set1_epi8 (__mmask16 __M, char __A)
+{
+ return (__m128i) __builtin_ia32_pbroadcastb128_gpr_mask (__A,
+ (__v16qi)
+ _mm_setzero_si128 (),
+ __M);
+}
+
+extern __inline __m256i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_mask_broadcastw_epi16 (__m256i __O, __mmask16 __M, __m128i __A)
+{
+ return (__m256i) __builtin_ia32_pbroadcastw256_mask ((__v8hi) __A,
+ (__v16hi) __O,
+ __M);
+}
+
+extern __inline __m256i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_maskz_broadcastw_epi16 (__mmask16 __M, __m128i __A)
+{
+ return (__m256i) __builtin_ia32_pbroadcastw256_mask ((__v8hi) __A,
+ (__v16hi)
+ _mm256_setzero_si256 (),
+ __M);
+}
+
+extern __inline __m256i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_mask_set1_epi16 (__m256i __O, __mmask16 __M, short __A)
+{
+ return (__m256i) __builtin_ia32_pbroadcastw256_gpr_mask (__A,
+ (__v16hi) __O,
+ __M);
+}
+
+extern __inline __m256i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_maskz_set1_epi16 (__mmask16 __M, short __A)
+{
+ return (__m256i) __builtin_ia32_pbroadcastw256_gpr_mask (__A,
+ (__v16hi)
+ _mm256_setzero_si256 (),
+ __M);
+}
+
+extern __inline __m128i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_mask_broadcastw_epi16 (__m128i __O, __mmask8 __M, __m128i __A)
+{
+ return (__m128i) __builtin_ia32_pbroadcastw128_mask ((__v8hi) __A,
+ (__v8hi) __O,
+ __M);
+}
+
+extern __inline __m128i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_maskz_broadcastw_epi16 (__mmask8 __M, __m128i __A)
+{
+ return (__m128i) __builtin_ia32_pbroadcastw128_mask ((__v8hi) __A,
+ (__v8hi)
+ _mm_setzero_si128 (),
+ __M);
+}
+
+extern __inline __m128i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_mask_set1_epi16 (__m128i __O, __mmask8 __M, short __A)
+{
+ return (__m128i) __builtin_ia32_pbroadcastw128_gpr_mask (__A,
+ (__v8hi) __O,
+ __M);
+}
+
+extern __inline __m128i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_maskz_set1_epi16 (__mmask8 __M, short __A)
+{
+ return (__m128i) __builtin_ia32_pbroadcastw128_gpr_mask (__A,
+ (__v8hi)
+ _mm_setzero_si128 (),
+ __M);
+}
+
+extern __inline __m256i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_permutexvar_epi16 (__m256i __A, __m256i __B)
+{
+ return (__m256i) __builtin_ia32_permvarhi256_mask ((__v16hi) __A,
+ (__v16hi) __B,
+ (__v16hi)
+ _mm256_setzero_si256 (),
+ (__mmask16) -1);
+}
+
+extern __inline __m256i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_maskz_permutexvar_epi16 (__mmask16 __M, __m256i __A,
+ __m256i __B)
+{
+ return (__m256i) __builtin_ia32_permvarhi256_mask ((__v16hi) __A,
+ (__v16hi) __B,
+ (__v16hi)
+ _mm256_setzero_si256 (),
+ (__mmask16) __M);
+}
+
+extern __inline __m256i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_mask_permutexvar_epi16 (__m256i __W, __mmask16 __M, __m256i __A,
+ __m256i __B)
+{
+ return (__m256i) __builtin_ia32_permvarhi256_mask ((__v16hi) __A,
+ (__v16hi) __B,
+ (__v16hi) __W,
+ (__mmask16) __M);
+}
+
+extern __inline __m128i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_permutexvar_epi16 (__m128i __A, __m128i __B)
+{
+ return (__m128i) __builtin_ia32_permvarhi128_mask ((__v8hi) __A,
+ (__v8hi) __B,
+ (__v8hi)
+ _mm_setzero_hi (),
+ (__mmask8) -1);
+}
+
+extern __inline __m128i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_maskz_permutexvar_epi16 (__mmask8 __M, __m128i __A, __m128i __B)
+{
+ return (__m128i) __builtin_ia32_permvarhi128_mask ((__v8hi) __A,
+ (__v8hi) __B,
+ (__v8hi)
+ _mm_setzero_si128 (),
+ (__mmask8) __M);
+}
+
+extern __inline __m128i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_mask_permutexvar_epi16 (__m128i __W, __mmask8 __M, __m128i __A,
+ __m128i __B)
+{
+ return (__m128i) __builtin_ia32_permvarhi128_mask ((__v8hi) __A,
+ (__v8hi) __B,
+ (__v8hi) __W,
+ (__mmask8) __M);
+}
+
+extern __inline __m256i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_permutex2var_epi16 (__m256i __A, __m256i __I, __m256i __B)
+{
+ return (__m256i) __builtin_ia32_vpermt2varhi256_mask ((__v16hi) __I
+ /* idx */ ,
+ (__v16hi) __A,
+ (__v16hi) __B,
+ (__mmask16) -
+ 1);
+}
+
+extern __inline __m256i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_mask_permutex2var_epi16 (__m256i __A, __mmask16 __U,
+ __m256i __I, __m256i __B)
+{
+ return (__m256i) __builtin_ia32_vpermt2varhi256_mask ((__v16hi) __I
+ /* idx */ ,
+ (__v16hi) __A,
+ (__v16hi) __B,
+ (__mmask16)
+ __U);
+}
+
+extern __inline __m256i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_mask2_permutex2var_epi16 (__m256i __A, __m256i __I,
+ __mmask16 __U, __m256i __B)
+{
+ return (__m256i) __builtin_ia32_vpermi2varhi256_mask ((__v16hi) __A,
+ (__v16hi) __I
+ /* idx */ ,
+ (__v16hi) __B,
+ (__mmask16)
+ __U);
+}
+
+extern __inline __m256i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_maskz_permutex2var_epi16 (__mmask16 __U, __m256i __A,
+ __m256i __I, __m256i __B)
+{
+ return (__m256i) __builtin_ia32_vpermt2varhi256_maskz ((__v16hi) __I
+ /* idx */ ,
+ (__v16hi) __A,
+ (__v16hi) __B,
+ (__mmask16)
+ __U);
+}
+
+extern __inline __m128i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_permutex2var_epi16 (__m128i __A, __m128i __I, __m128i __B)
+{
+ return (__m128i) __builtin_ia32_vpermt2varhi128_mask ((__v8hi) __I
+ /* idx */ ,
+ (__v8hi) __A,
+ (__v8hi) __B,
+ (__mmask8) -
+ 1);
+}
+
+extern __inline __m128i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_mask_permutex2var_epi16 (__m128i __A, __mmask8 __U, __m128i __I,
+ __m128i __B)
+{
+ return (__m128i) __builtin_ia32_vpermt2varhi128_mask ((__v8hi) __I
+ /* idx */ ,
+ (__v8hi) __A,
+ (__v8hi) __B,
+ (__mmask8)
+ __U);
+}
+
+extern __inline __m128i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_mask2_permutex2var_epi16 (__m128i __A, __m128i __I, __mmask8 __U,
+ __m128i __B)
+{
+ return (__m128i) __builtin_ia32_vpermi2varhi128_mask ((__v8hi) __A,
+ (__v8hi) __I
+ /* idx */ ,
+ (__v8hi) __B,
+ (__mmask8)
+ __U);
+}
+
+extern __inline __m128i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_maskz_permutex2var_epi16 (__mmask8 __U, __m128i __A, __m128i __I,
+ __m128i __B)
+{
+ return (__m128i) __builtin_ia32_vpermt2varhi128_maskz ((__v8hi) __I
+ /* idx */ ,
+ (__v8hi) __A,
+ (__v8hi) __B,
+ (__mmask8)
+ __U);
+}
+
+extern __inline __m256i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_mask_maddubs_epi16 (__m256i __W, __mmask16 __U, __m256i __X,
+ __m256i __Y)
+{
+ return (__m256i) __builtin_ia32_pmaddubsw256_mask ((__v32qi) __X,
+ (__v32qi) __Y,
+ (__v16hi) __W,
+ (__mmask16) __U);
+}
+
+extern __inline __m256i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_maskz_maddubs_epi16 (__mmask16 __U, __m256i __X, __m256i __Y)
+{
+ return (__m256i) __builtin_ia32_pmaddubsw256_mask ((__v32qi) __X,
+ (__v32qi) __Y,
+ (__v16hi)
+ _mm256_setzero_si256 (),
+ (__mmask16) __U);
+}
+
+extern __inline __m128i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_mask_maddubs_epi16 (__m128i __W, __mmask8 __U, __m128i __X,
+ __m128i __Y)
+{
+ return (__m128i) __builtin_ia32_pmaddubsw128_mask ((__v16qi) __X,
+ (__v16qi) __Y,
+ (__v8hi) __W,
+ (__mmask8) __U);
+}
+
+extern __inline __m128i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_maskz_maddubs_epi16 (__mmask8 __U, __m128i __X, __m128i __Y)
+{
+ return (__m128i) __builtin_ia32_pmaddubsw128_mask ((__v16qi) __X,
+ (__v16qi) __Y,
+ (__v8hi)
+ _mm_setzero_hi (),
+ (__mmask8) __U);
+}
+
+extern __inline __m256i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_mask_madd_epi16 (__m256i __W, __mmask8 __U, __m256i __A,
+ __m256i __B)
+{
+ return (__m256i) __builtin_ia32_pmaddwd256_mask ((__v16hi) __A,
+ (__v16hi) __B,
+ (__v8si) __W,
+ (__mmask8) __U);
+}
+
+extern __inline __m256i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_maskz_madd_epi16 (__mmask8 __U, __m256i __A, __m256i __B)
+{
+ return (__m256i) __builtin_ia32_pmaddwd256_mask ((__v16hi) __A,
+ (__v16hi) __B,
+ (__v8si)
+ _mm256_setzero_si256 (),
+ (__mmask8) __U);
+}
+
+extern __inline __m128i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_mask_madd_epi16 (__m128i __W, __mmask8 __U, __m128i __A,
+ __m128i __B)
+{
+ return (__m128i) __builtin_ia32_pmaddwd128_mask ((__v8hi) __A,
+ (__v8hi) __B,
+ (__v4si) __W,
+ (__mmask8) __U);
+}
+
+extern __inline __m128i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_maskz_madd_epi16 (__mmask8 __U, __m128i __A, __m128i __B)
+{
+ return (__m128i) __builtin_ia32_pmaddwd128_mask ((__v8hi) __A,
+ (__v8hi) __B,
+ (__v4si)
+ _mm_setzero_si128 (),
+ (__mmask8) __U);
+}
+
+extern __inline __mmask16
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_movepi8_mask (__m128i __A)
+{
+ return (__mmask16) __builtin_ia32_cvtb2mask128 ((__v16qi) __A);
+}
+
+extern __inline __mmask32
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_movepi8_mask (__m256i __A)
+{
+ return (__mmask32) __builtin_ia32_cvtb2mask256 ((__v32qi) __A);
+}
+
+extern __inline __mmask8
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_movepi16_mask (__m128i __A)
+{
+ return (__mmask8) __builtin_ia32_cvtw2mask128 ((__v8hi) __A);
+}
+
+extern __inline __mmask16
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_movepi16_mask (__m256i __A)
+{
+ return (__mmask16) __builtin_ia32_cvtw2mask256 ((__v16hi) __A);
+}
+
+extern __inline __m128i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_movm_epi8 (__mmask16 __A)
+{
+ return (__m128i) __builtin_ia32_cvtmask2b128 (__A);
+}
+
+extern __inline __m256i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_movm_epi8 (__mmask32 __A)
+{
+ return (__m256i) __builtin_ia32_cvtmask2b256 (__A);
+}
+
+extern __inline __m128i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_movm_epi16 (__mmask8 __A)
+{
+ return (__m128i) __builtin_ia32_cvtmask2w128 (__A);
+}
+
+extern __inline __m256i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_movm_epi16 (__mmask16 __A)
+{
+ return (__m256i) __builtin_ia32_cvtmask2w256 (__A);
+}
+
+extern __inline __mmask16
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_test_epi8_mask (__m128i __A, __m128i __B)
+{
+ return (__mmask16) __builtin_ia32_ptestmb128 ((__v16qi) __A,
+ (__v16qi) __B,
+ (__mmask16) -1);
+}
+
+extern __inline __mmask16
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_mask_test_epi8_mask (__mmask16 __U, __m128i __A, __m128i __B)
+{
+ return (__mmask16) __builtin_ia32_ptestmb128 ((__v16qi) __A,
+ (__v16qi) __B, __U);
+}
+
+extern __inline __mmask32
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_test_epi8_mask (__m256i __A, __m256i __B)
+{
+ return (__mmask32) __builtin_ia32_ptestmb256 ((__v32qi) __A,
+ (__v32qi) __B,
+ (__mmask32) -1);
+}
+
+extern __inline __mmask32
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_mask_test_epi8_mask (__mmask32 __U, __m256i __A, __m256i __B)
+{
+ return (__mmask32) __builtin_ia32_ptestmb256 ((__v32qi) __A,
+ (__v32qi) __B, __U);
+}
+
+extern __inline __mmask8
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_test_epi16_mask (__m128i __A, __m128i __B)
+{
+ return (__mmask8) __builtin_ia32_ptestmw128 ((__v8hi) __A,
+ (__v8hi) __B,
+ (__mmask8) -1);
+}
+
+extern __inline __mmask8
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_mask_test_epi16_mask (__mmask8 __U, __m128i __A, __m128i __B)
+{
+ return (__mmask8) __builtin_ia32_ptestmw128 ((__v8hi) __A,
+ (__v8hi) __B, __U);
+}
+
+extern __inline __mmask16
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_test_epi16_mask (__m256i __A, __m256i __B)
+{
+ return (__mmask16) __builtin_ia32_ptestmw256 ((__v16hi) __A,
+ (__v16hi) __B,
+ (__mmask16) -1);
+}
+
+extern __inline __mmask16
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_mask_test_epi16_mask (__mmask16 __U, __m256i __A, __m256i __B)
+{
+ return (__mmask16) __builtin_ia32_ptestmw256 ((__v16hi) __A,
+ (__v16hi) __B, __U);
+}
+
+extern __inline __m256i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_maskz_min_epu16 (__mmask16 __M, __m256i __A, __m256i __B)
+{
+ return (__m256i) __builtin_ia32_pminuw256_mask ((__v16hi) __A,
+ (__v16hi) __B,
+ (__v16hi)
+ _mm256_setzero_si256 (),
+ (__mmask16) __M);
+}
+
+extern __inline __m256i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_mask_min_epu16 (__m256i __W, __mmask16 __M, __m256i __A,
+ __m256i __B)
+{
+ return (__m256i) __builtin_ia32_pminuw256_mask ((__v16hi) __A,
+ (__v16hi) __B,
+ (__v16hi) __W,
+ (__mmask16) __M);
+}
+
+extern __inline __m128i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_maskz_min_epu16 (__mmask8 __M, __m128i __A, __m128i __B)
+{
+ return (__m128i) __builtin_ia32_pminuw128_mask ((__v8hi) __A,
+ (__v8hi) __B,
+ (__v8hi)
+ _mm_setzero_di (),
+ (__mmask8) __M);
+}
+
+extern __inline __m128i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_mask_min_epu16 (__m128i __W, __mmask8 __M, __m128i __A,
+ __m128i __B)
+{
+ return (__m128i) __builtin_ia32_pminuw128_mask ((__v8hi) __A,
+ (__v8hi) __B,
+ (__v8hi) __W,
+ (__mmask8) __M);
+}
+
+extern __inline __m256i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_maskz_min_epi16 (__mmask16 __M, __m256i __A, __m256i __B)
+{
+ return (__m256i) __builtin_ia32_pminsw256_mask ((__v16hi) __A,
+ (__v16hi) __B,
+ (__v16hi)
+ _mm256_setzero_si256 (),
+ (__mmask16) __M);
+}
+
+extern __inline __m256i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_mask_min_epi16 (__m256i __W, __mmask16 __M, __m256i __A,
+ __m256i __B)
+{
+ return (__m256i) __builtin_ia32_pminsw256_mask ((__v16hi) __A,
+ (__v16hi) __B,
+ (__v16hi) __W,
+ (__mmask16) __M);
+}
+
+extern __inline __m256i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_maskz_max_epu8 (__mmask32 __M, __m256i __A, __m256i __B)
+{
+ return (__m256i) __builtin_ia32_pmaxub256_mask ((__v32qi) __A,
+ (__v32qi) __B,
+ (__v32qi)
+ _mm256_setzero_si256 (),
+ (__mmask32) __M);
+}
+
+extern __inline __m256i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_mask_max_epu8 (__m256i __W, __mmask32 __M, __m256i __A,
+ __m256i __B)
+{
+ return (__m256i) __builtin_ia32_pmaxub256_mask ((__v32qi) __A,
+ (__v32qi) __B,
+ (__v32qi) __W,
+ (__mmask32) __M);
+}
+
+extern __inline __m128i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_maskz_max_epu8 (__mmask16 __M, __m128i __A, __m128i __B)
+{
+ return (__m128i) __builtin_ia32_pmaxub128_mask ((__v16qi) __A,
+ (__v16qi) __B,
+ (__v16qi)
+ _mm_setzero_di (),
+ (__mmask16) __M);
+}
+
+extern __inline __m128i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_mask_max_epu8 (__m128i __W, __mmask16 __M, __m128i __A,
+ __m128i __B)
+{
+ return (__m128i) __builtin_ia32_pmaxub128_mask ((__v16qi) __A,
+ (__v16qi) __B,
+ (__v16qi) __W,
+ (__mmask16) __M);
+}
+
+extern __inline __m256i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_maskz_max_epi8 (__mmask32 __M, __m256i __A, __m256i __B)
+{
+ return (__m256i) __builtin_ia32_pmaxsb256_mask ((__v32qi) __A,
+ (__v32qi) __B,
+ (__v32qi)
+ _mm256_setzero_si256 (),
+ (__mmask32) __M);
+}
+
+extern __inline __m256i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_mask_max_epi8 (__m256i __W, __mmask32 __M, __m256i __A,
+ __m256i __B)
+{
+ return (__m256i) __builtin_ia32_pmaxsb256_mask ((__v32qi) __A,
+ (__v32qi) __B,
+ (__v32qi) __W,
+ (__mmask32) __M);
+}
+
+extern __inline __m128i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_maskz_max_epi8 (__mmask16 __M, __m128i __A, __m128i __B)
+{
+ return (__m128i) __builtin_ia32_pmaxsb128_mask ((__v16qi) __A,
+ (__v16qi) __B,
+ (__v16qi)
+ _mm_setzero_di (),
+ (__mmask16) __M);
+}
+
+extern __inline __m128i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_mask_max_epi8 (__m128i __W, __mmask16 __M, __m128i __A,
+ __m128i __B)
+{
+ return (__m128i) __builtin_ia32_pmaxsb128_mask ((__v16qi) __A,
+ (__v16qi) __B,
+ (__v16qi) __W,
+ (__mmask16) __M);
+}
+
+extern __inline __m256i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_maskz_min_epu8 (__mmask32 __M, __m256i __A, __m256i __B)
+{
+ return (__m256i) __builtin_ia32_pminub256_mask ((__v32qi) __A,
+ (__v32qi) __B,
+ (__v32qi)
+ _mm256_setzero_si256 (),
+ (__mmask32) __M);
+}
+
+extern __inline __m256i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_mask_min_epu8 (__m256i __W, __mmask32 __M, __m256i __A,
+ __m256i __B)
+{
+ return (__m256i) __builtin_ia32_pminub256_mask ((__v32qi) __A,
+ (__v32qi) __B,
+ (__v32qi) __W,
+ (__mmask32) __M);
+}
+
+extern __inline __m128i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_maskz_min_epu8 (__mmask16 __M, __m128i __A, __m128i __B)
+{
+ return (__m128i) __builtin_ia32_pminub128_mask ((__v16qi) __A,
+ (__v16qi) __B,
+ (__v16qi)
+ _mm_setzero_di (),
+ (__mmask16) __M);
+}
+
+extern __inline __m128i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_mask_min_epu8 (__m128i __W, __mmask16 __M, __m128i __A,
+ __m128i __B)
+{
+ return (__m128i) __builtin_ia32_pminub128_mask ((__v16qi) __A,
+ (__v16qi) __B,
+ (__v16qi) __W,
+ (__mmask16) __M);
+}
+
+extern __inline __m256i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_maskz_min_epi8 (__mmask32 __M, __m256i __A, __m256i __B)
+{
+ return (__m256i) __builtin_ia32_pminsb256_mask ((__v32qi) __A,
+ (__v32qi) __B,
+ (__v32qi)
+ _mm256_setzero_si256 (),
+ (__mmask32) __M);
+}
+
+extern __inline __m256i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_mask_min_epi8 (__m256i __W, __mmask32 __M, __m256i __A,
+ __m256i __B)
+{
+ return (__m256i) __builtin_ia32_pminsb256_mask ((__v32qi) __A,
+ (__v32qi) __B,
+ (__v32qi) __W,
+ (__mmask32) __M);
+}
+
+extern __inline __m128i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_maskz_min_epi8 (__mmask16 __M, __m128i __A, __m128i __B)
+{
+ return (__m128i) __builtin_ia32_pminsb128_mask ((__v16qi) __A,
+ (__v16qi) __B,
+ (__v16qi)
+ _mm_setzero_di (),
+ (__mmask16) __M);
+}
+
+extern __inline __m128i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_mask_min_epi8 (__m128i __W, __mmask16 __M, __m128i __A,
+ __m128i __B)
+{
+ return (__m128i) __builtin_ia32_pminsb128_mask ((__v16qi) __A,
+ (__v16qi) __B,
+ (__v16qi) __W,
+ (__mmask16) __M);
+}
+
+extern __inline __m256i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_maskz_max_epi16 (__mmask16 __M, __m256i __A, __m256i __B)
+{
+ return (__m256i) __builtin_ia32_pmaxsw256_mask ((__v16hi) __A,
+ (__v16hi) __B,
+ (__v16hi)
+ _mm256_setzero_si256 (),
+ (__mmask16) __M);
+}
+
+extern __inline __m256i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_mask_max_epi16 (__m256i __W, __mmask16 __M, __m256i __A,
+ __m256i __B)
+{
+ return (__m256i) __builtin_ia32_pmaxsw256_mask ((__v16hi) __A,
+ (__v16hi) __B,
+ (__v16hi) __W,
+ (__mmask16) __M);
+}
+
+extern __inline __m128i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_maskz_max_epi16 (__mmask8 __M, __m128i __A, __m128i __B)
+{
+ return (__m128i) __builtin_ia32_pmaxsw128_mask ((__v8hi) __A,
+ (__v8hi) __B,
+ (__v8hi)
+ _mm_setzero_di (),
+ (__mmask8) __M);
+}
+
+extern __inline __m128i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_mask_max_epi16 (__m128i __W, __mmask8 __M, __m128i __A,
+ __m128i __B)
+{
+ return (__m128i) __builtin_ia32_pmaxsw128_mask ((__v8hi) __A,
+ (__v8hi) __B,
+ (__v8hi) __W,
+ (__mmask8) __M);
+}
+
+extern __inline __m256i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_maskz_max_epu16 (__mmask16 __M, __m256i __A, __m256i __B)
+{
+ return (__m256i) __builtin_ia32_pmaxuw256_mask ((__v16hi) __A,
+ (__v16hi) __B,
+ (__v16hi)
+ _mm256_setzero_si256 (),
+ (__mmask16) __M);
+}
+
+extern __inline __m256i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_mask_max_epu16 (__m256i __W, __mmask16 __M, __m256i __A,
+ __m256i __B)
+{
+ return (__m256i) __builtin_ia32_pmaxuw256_mask ((__v16hi) __A,
+ (__v16hi) __B,
+ (__v16hi) __W,
+ (__mmask16) __M);
+}
+
+extern __inline __m128i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_maskz_max_epu16 (__mmask8 __M, __m128i __A, __m128i __B)
+{
+ return (__m128i) __builtin_ia32_pmaxuw128_mask ((__v8hi) __A,
+ (__v8hi) __B,
+ (__v8hi)
+ _mm_setzero_di (),
+ (__mmask8) __M);
+}
+
+extern __inline __m128i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_mask_max_epu16 (__m128i __W, __mmask8 __M, __m128i __A,
+ __m128i __B)
+{
+ return (__m128i) __builtin_ia32_pmaxuw128_mask ((__v8hi) __A,
+ (__v8hi) __B,
+ (__v8hi) __W,
+ (__mmask8) __M);
+}
+
+extern __inline __m128i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_maskz_min_epi16 (__mmask8 __M, __m128i __A, __m128i __B)
+{
+ return (__m128i) __builtin_ia32_pminsw128_mask ((__v8hi) __A,
+ (__v8hi) __B,
+ (__v8hi)
+ _mm_setzero_di (),
+ (__mmask8) __M);
+}
+
+extern __inline __m128i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_mask_min_epi16 (__m128i __W, __mmask8 __M, __m128i __A,
+ __m128i __B)
+{
+ return (__m128i) __builtin_ia32_pminsw128_mask ((__v8hi) __A,
+ (__v8hi) __B,
+ (__v8hi) __W,
+ (__mmask8) __M);
+}
+
+#ifdef __OPTIMIZE__
+extern __inline __m256i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_mask_alignr_epi8 (__m256i __W, __mmask32 __U, __m256i __A,
+ __m256i __B, const int __N)
+{
+ return (__m256i) __builtin_ia32_palignr256_mask ((__v4di) __A,
+ (__v4di) __B,
+ __N * 8,
+ (__v4di) __W,
+ (__mmask32) __U);
+}
+
+extern __inline __m256i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_maskz_alignr_epi8 (__mmask32 __U, __m256i __A, __m256i __B,
+ const int __N)
+{
+ return (__m256i) __builtin_ia32_palignr256_mask ((__v4di) __A,
+ (__v4di) __B,
+ __N * 8,
+ (__v4di)
+ _mm256_setzero_si256 (),
+ (__mmask32) __U);
+}
+
+extern __inline __m128i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_mask_alignr_epi8 (__m128i __W, __mmask16 __U, __m128i __A,
+ __m128i __B, const int __N)
+{
+ return (__m128i) __builtin_ia32_palignr128_mask ((__v2di) __A,
+ (__v2di) __B,
+ __N * 8,
+ (__v2di) __W,
+ (__mmask16) __U);
+}
+
+extern __inline __m128i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_maskz_alignr_epi8 (__mmask16 __U, __m128i __A, __m128i __B,
+ const int __N)
+{
+ return (__m128i) __builtin_ia32_palignr128_mask ((__v2di) __A,
+ (__v2di) __B,
+ __N * 8,
+ (__v2di)
+ _mm_setzero_si128 (),
+ (__mmask16) __U);
+}
+
+extern __inline __m256i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_dbsad_epu8 (__m256i __A, __m256i __B, const int __imm)
+{
+ return (__m256i) __builtin_ia32_dbpsadbw256_mask ((__v32qi) __A,
+ (__v32qi) __B,
+ __imm,
+ (__v16hi)
+ _mm256_setzero_si256 (),
+ (__mmask16) -1);
+}
+
+extern __inline __m256i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_mask_dbsad_epu8 (__m256i __W, __mmask16 __U, __m256i __A,
+ __m256i __B, const int __imm)
+{
+ return (__m256i) __builtin_ia32_dbpsadbw256_mask ((__v32qi) __A,
+ (__v32qi) __B,
+ __imm,
+ (__v16hi) __W,
+ (__mmask16) __U);
+}
+
+extern __inline __m256i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_maskz_dbsad_epu8 (__mmask16 __U, __m256i __A, __m256i __B,
+ const int __imm)
+{
+ return (__m256i) __builtin_ia32_dbpsadbw256_mask ((__v32qi) __A,
+ (__v32qi) __B,
+ __imm,
+ (__v16hi)
+ _mm256_setzero_si256 (),
+ (__mmask16) __U);
+}
+
+extern __inline __m128i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_dbsad_epu8 (__m128i __A, __m128i __B, const int __imm)
+{
+ return (__m128i) __builtin_ia32_dbpsadbw128_mask ((__v16qi) __A,
+ (__v16qi) __B,
+ __imm,
+ (__v8hi)
+ _mm_setzero_hi (),
+ (__mmask8) -1);
+}
+
+extern __inline __m128i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_mask_dbsad_epu8 (__m128i __W, __mmask8 __U, __m128i __A,
+ __m128i __B, const int __imm)
+{
+ return (__m128i) __builtin_ia32_dbpsadbw128_mask ((__v16qi) __A,
+ (__v16qi) __B,
+ __imm,
+ (__v8hi) __W,
+ (__mmask8) __U);
+}
+
+extern __inline __m128i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_maskz_dbsad_epu8 (__mmask8 __U, __m128i __A, __m128i __B,
+ const int __imm)
+{
+ return (__m128i) __builtin_ia32_dbpsadbw128_mask ((__v16qi) __A,
+ (__v16qi) __B,
+ __imm,
+ (__v8hi)
+ _mm_setzero_si128 (),
+ (__mmask8) __U);
+}
+
+extern __inline __m128i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_mask_blend_epi16 (__mmask8 __U, __m128i __A, __m128i __W)
+{
+ return (__m128i) __builtin_ia32_blendmw_128_mask ((__v8hi) __A,
+ (__v8hi) __W,
+ (__mmask8) __U);
+}
+
+extern __inline __m128i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_mask_blend_epi8 (__mmask16 __U, __m128i __A, __m128i __W)
+{
+ return (__m128i) __builtin_ia32_blendmb_128_mask ((__v16qi) __A,
+ (__v16qi) __W,
+ (__mmask16) __U);
+}
+
+extern __inline __m256i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_mask_blend_epi16 (__mmask16 __U, __m256i __A, __m256i __W)
+{
+ return (__m256i) __builtin_ia32_blendmw_256_mask ((__v16hi) __A,
+ (__v16hi) __W,
+ (__mmask16) __U);
+}
+
+extern __inline __m256i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_mask_blend_epi8 (__mmask32 __U, __m256i __A, __m256i __W)
+{
+ return (__m256i) __builtin_ia32_blendmb_256_mask ((__v32qi) __A,
+ (__v32qi) __W,
+ (__mmask32) __U);
+}
+
+extern __inline __mmask8
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_mask_cmp_epi16_mask (__mmask8 __U, __m128i __X, __m128i __Y,
+ const int __P)
+{
+ return (__mmask8) __builtin_ia32_cmpw128_mask ((__v8hi) __X,
+ (__v8hi) __Y, __P,
+ (__mmask8) __U);
+}
+
+extern __inline __mmask8
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_cmp_epi16_mask (__m128i __X, __m128i __Y, const int __P)
+{
+ return (__mmask8) __builtin_ia32_cmpw128_mask ((__v8hi) __X,
+ (__v8hi) __Y, __P,
+ (__mmask8) -1);
+}
+
+extern __inline __mmask16
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_mask_cmp_epi16_mask (__mmask16 __U, __m256i __X, __m256i __Y,
+ const int __P)
+{
+ return (__mmask16) __builtin_ia32_cmpw256_mask ((__v16hi) __X,
+ (__v16hi) __Y, __P,
+ (__mmask16) __U);
+}
+
+extern __inline __mmask16
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_cmp_epi16_mask (__m256i __X, __m256i __Y, const int __P)
+{
+ return (__mmask16) __builtin_ia32_cmpw256_mask ((__v16hi) __X,
+ (__v16hi) __Y, __P,
+ (__mmask16) -1);
+}
+
+extern __inline __mmask16
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_mask_cmp_epi8_mask (__mmask8 __U, __m128i __X, __m128i __Y,
+ const int __P)
+{
+ return (__mmask16) __builtin_ia32_cmpb128_mask ((__v16qi) __X,
+ (__v16qi) __Y, __P,
+ (__mmask16) __U);
+}
+
+extern __inline __mmask16
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_cmp_epi8_mask (__m128i __X, __m128i __Y, const int __P)
+{
+ return (__mmask16) __builtin_ia32_cmpb128_mask ((__v16qi) __X,
+ (__v16qi) __Y, __P,
+ (__mmask16) -1);
+}
+
+extern __inline __mmask32
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_mask_cmp_epi8_mask (__mmask16 __U, __m256i __X, __m256i __Y,
+ const int __P)
+{
+ return (__mmask32) __builtin_ia32_cmpb256_mask ((__v32qi) __X,
+ (__v32qi) __Y, __P,
+ (__mmask32) __U);
+}
+
+extern __inline __mmask16
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_cmp_epi8_mask (__m256i __X, __m256i __Y, const int __P)
+{
+ return (__mmask32) __builtin_ia32_cmpb256_mask ((__v32qi) __X,
+ (__v32qi) __Y, __P,
+ (__mmask32) -1);
+}
+
+extern __inline __mmask8
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_mask_cmp_epu16_mask (__mmask8 __U, __m128i __X, __m128i __Y,
+ const int __P)
+{
+ return (__mmask8) __builtin_ia32_ucmpw128_mask ((__v8hi) __X,
+ (__v8hi) __Y, __P,
+ (__mmask8) __U);
+}
+
+extern __inline __mmask8
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_cmp_epu16_mask (__m128i __X, __m128i __Y, const int __P)
+{
+ return (__mmask8) __builtin_ia32_ucmpw128_mask ((__v8hi) __X,
+ (__v8hi) __Y, __P,
+ (__mmask8) -1);
+}
+
+extern __inline __mmask16
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_mask_cmp_epu16_mask (__mmask16 __U, __m256i __X, __m256i __Y,
+ const int __P)
+{
+ return (__mmask16) __builtin_ia32_ucmpw256_mask ((__v16hi) __X,
+ (__v16hi) __Y, __P,
+ (__mmask16) __U);
+}
+
+extern __inline __mmask16
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_cmp_epu16_mask (__m256i __X, __m256i __Y, const int __P)
+{
+ return (__mmask16) __builtin_ia32_ucmpw256_mask ((__v16hi) __X,
+ (__v16hi) __Y, __P,
+ (__mmask16) -1);
+}
+
+extern __inline __mmask16
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_mask_cmp_epu8_mask (__mmask8 __U, __m128i __X, __m128i __Y,
+ const int __P)
+{
+ return (__mmask16) __builtin_ia32_ucmpb128_mask ((__v16qi) __X,
+ (__v16qi) __Y, __P,
+ (__mmask16) __U);
+}
+
+extern __inline __mmask16
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_cmp_epu8_mask (__m128i __X, __m128i __Y, const int __P)
+{
+ return (__mmask16) __builtin_ia32_ucmpb128_mask ((__v16qi) __X,
+ (__v16qi) __Y, __P,
+ (__mmask16) -1);
+}
+
+extern __inline __mmask32
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_mask_cmp_epu8_mask (__mmask16 __U, __m256i __X, __m256i __Y,
+ const int __P)
+{
+ return (__mmask32) __builtin_ia32_ucmpb256_mask ((__v32qi) __X,
+ (__v32qi) __Y, __P,
+ (__mmask32) __U);
+}
+
+extern __inline __mmask16
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_cmp_epu8_mask (__m256i __X, __m256i __Y, const int __P)
+{
+ return (__mmask32) __builtin_ia32_ucmpb256_mask ((__v32qi) __X,
+ (__v32qi) __Y, __P,
+ (__mmask32) -1);
+}
+
+extern __inline __m256i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_mask_srli_epi16 (__m256i __W, __mmask16 __U, __m256i __A,
+ const int __imm)
+{
+ return (__m256i) __builtin_ia32_psrlwi256_mask ((__v16hi) __A, __imm,
+ (__v16hi) __W,
+ (__mmask16) __U);
+}
+
+extern __inline __m256i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_maskz_srli_epi16 (__mmask16 __U, __m256i __A, const int __imm)
+{
+ return (__m256i) __builtin_ia32_psrlwi256_mask ((__v16hi) __A, __imm,
+ (__v16hi)
+ _mm256_setzero_si256 (),
+ (__mmask16) __U);
+}
+
+extern __inline __m128i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_mask_srli_epi16 (__m128i __W, __mmask8 __U, __m128i __A,
+ const int __imm)
+{
+ return (__m128i) __builtin_ia32_psrlwi128_mask ((__v8hi) __A, __imm,
+ (__v8hi) __W,
+ (__mmask8) __U);
+}
+
+extern __inline __m128i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_maskz_srli_epi16 (__mmask8 __U, __m128i __A, const int __imm)
+{
+ return (__m128i) __builtin_ia32_psrlwi128_mask ((__v8hi) __A, __imm,
+ (__v8hi)
+ _mm_setzero_si128 (),
+ (__mmask8) __U);
+}
+
+extern __inline __m256i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_mask_shufflehi_epi16 (__m256i __W, __mmask16 __U, __m256i __A,
+ const int __imm)
+{
+ return (__m256i) __builtin_ia32_pshufhw256_mask ((__v16hi) __A,
+ __imm,
+ (__v16hi) __W,
+ (__mmask16) __U);
+}
+
+extern __inline __m256i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_maskz_shufflehi_epi16 (__mmask16 __U, __m256i __A,
+ const int __imm)
+{
+ return (__m256i) __builtin_ia32_pshufhw256_mask ((__v16hi) __A,
+ __imm,
+ (__v16hi)
+ _mm256_setzero_si256 (),
+ (__mmask16) __U);
+}
+
+extern __inline __m128i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_mask_shufflehi_epi16 (__m128i __W, __mmask8 __U, __m128i __A,
+ const int __imm)
+{
+ return (__m128i) __builtin_ia32_pshufhw128_mask ((__v8hi) __A, __imm,
+ (__v8hi) __W,
+ (__mmask8) __U);
+}
+
+extern __inline __m128i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_maskz_shufflehi_epi16 (__mmask8 __U, __m128i __A, const int __imm)
+{
+ return (__m128i) __builtin_ia32_pshufhw128_mask ((__v8hi) __A, __imm,
+ (__v8hi)
+ _mm_setzero_hi (),
+ (__mmask8) __U);
+}
+
+extern __inline __m256i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_mask_shufflelo_epi16 (__m256i __W, __mmask16 __U, __m256i __A,
+ const int __imm)
+{
+ return (__m256i) __builtin_ia32_pshuflw256_mask ((__v16hi) __A,
+ __imm,
+ (__v16hi) __W,
+ (__mmask16) __U);
+}
+
+extern __inline __m256i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_maskz_shufflelo_epi16 (__mmask16 __U, __m256i __A,
+ const int __imm)
+{
+ return (__m256i) __builtin_ia32_pshuflw256_mask ((__v16hi) __A,
+ __imm,
+ (__v16hi)
+ _mm256_setzero_si256 (),
+ (__mmask16) __U);
+}
+
+extern __inline __m128i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_mask_shufflelo_epi16 (__m128i __W, __mmask8 __U, __m128i __A,
+ const int __imm)
+{
+ return (__m128i) __builtin_ia32_pshuflw128_mask ((__v8hi) __A, __imm,
+ (__v8hi) __W,
+ (__mmask8) __U);
+}
+
+extern __inline __m128i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_maskz_shufflelo_epi16 (__mmask8 __U, __m128i __A, const int __imm)
+{
+ return (__m128i) __builtin_ia32_pshuflw128_mask ((__v8hi) __A, __imm,
+ (__v8hi)
+ _mm_setzero_hi (),
+ (__mmask8) __U);
+}
+
+extern __inline __m256i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_mask_srai_epi16 (__m256i __W, __mmask16 __U, __m256i __A,
+ const int __imm)
+{
+ return (__m256i) __builtin_ia32_psrawi256_mask ((__v16hi) __A, __imm,
+ (__v16hi) __W,
+ (__mmask16) __U);
+}
+
+extern __inline __m256i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_maskz_srai_epi16 (__mmask16 __U, __m256i __A, const int __imm)
+{
+ return (__m256i) __builtin_ia32_psrawi256_mask ((__v16hi) __A, __imm,
+ (__v16hi)
+ _mm256_setzero_si256 (),
+ (__mmask16) __U);
+}
+
+extern __inline __m128i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_mask_srai_epi16 (__m128i __W, __mmask8 __U, __m128i __A,
+ const int __imm)
+{
+ return (__m128i) __builtin_ia32_psrawi128_mask ((__v8hi) __A, __imm,
+ (__v8hi) __W,
+ (__mmask8) __U);
+}
+
+extern __inline __m128i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_maskz_srai_epi16 (__mmask8 __U, __m128i __A, const int __imm)
+{
+ return (__m128i) __builtin_ia32_psrawi128_mask ((__v8hi) __A, __imm,
+ (__v8hi)
+ _mm_setzero_si128 (),
+ (__mmask8) __U);
+}
+
+extern __inline __m256i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_mask_slli_epi16 (__m256i __W, __mmask16 __U, __m256i __A,
+ int __B)
+{
+ return (__m256i) __builtin_ia32_psllwi256_mask ((__v16hi) __A, __B,
+ (__v16hi) __W,
+ (__mmask16) __U);
+}
+
+extern __inline __m256i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_maskz_slli_epi16 (__mmask16 __U, __m256i __A, int __B)
+{
+ return (__m256i) __builtin_ia32_psllwi256_mask ((__v16hi) __A, __B,
+ (__v16hi)
+ _mm256_setzero_si256 (),
+ (__mmask16) __U);
+}
+
+extern __inline __m128i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_mask_slli_epi16 (__m128i __W, __mmask8 __U, __m128i __A, int __B)
+{
+ return (__m128i) __builtin_ia32_psllwi128_mask ((__v8hi) __A, __B,
+ (__v8hi) __W,
+ (__mmask8) __U);
+}
+
+extern __inline __m128i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_maskz_slli_epi16 (__mmask8 __U, __m128i __A, int __B)
+{
+ return (__m128i) __builtin_ia32_psllwi128_mask ((__v8hi) __A, __B,
+ (__v8hi)
+ _mm_setzero_si128 (),
+ (__mmask8) __U);
+}
+
+#else
+#define _mm256_mask_alignr_epi8(W, U, X, Y, N) \
+ ((__m256i) __builtin_ia32_palignr256_mask ((__v4di)(__m256i)(X), \
+ (__v4di)(__m256i)(Y), (int)(N * 8), \
+ (__v4di)(__m256i)(X), (__mmask32)(U)))
+
+#define _mm256_mask_srli_epi16(W, U, A, B) \
+ ((__m256i) __builtin_ia32_psrlwi256_mask ((__v16hi)(__m256i)(A), \
+ (int)(B), (__v16hi)(__m256i)(W), (__mmask16)(U)))
+
+#define _mm256_maskz_srli_epi16(U, A, B) \
+ ((__m256i) __builtin_ia32_psrlwi256_mask ((__v16hi)(__m256i)(A), \
+ (int)(B), (__v16hi)_mm256_setzero_si256 (), (__mmask16)(U)))
+
+#define _mm_mask_srli_epi16(W, U, A, B) \
+ ((__m128i) __builtin_ia32_psrlwi128_mask ((__v8hi)(__m128i)(A), \
+ (int)(B), (__v8hi)(__m128i)(W), (__mmask8)(U)))
+
+#define _mm_maskz_srli_epi16(U, A, B) \
+ ((__m128i) __builtin_ia32_psrlwi128_mask ((__v8hi)(__m128i)(A), \
+ (int)(B), (__v8hi)_mm_setzero_si128(), (__mmask8)(U)))
+
+#define _mm256_mask_srai_epi16(W, U, A, B) \
+ ((__m256i) __builtin_ia32_psrawi256_mask ((__v16hi)(__m256i)(A), \
+ (int)(B), (__v16hi)(__m256i)(W), (__mmask16)(U)))
+
+#define _mm256_maskz_srai_epi16(U, A, B) \
+ ((__m256i) __builtin_ia32_psrawi256_mask ((__v16hi)(__m256i)(A), \
+ (int)(B), (__v16hi)_mm256_setzero_si256 (), (__mmask16)(U)))
+
+#define _mm_mask_srai_epi16(W, U, A, B) \
+ ((__m128i) __builtin_ia32_psrawi128_mask ((__v8hi)(__m128i)(A), \
+ (int)(B), (__v8hi)(__m128i)(W), (__mmask8)(U)))
+
+#define _mm_maskz_srai_epi16(U, A, B) \
+ ((__m128i) __builtin_ia32_psrawi128_mask ((__v8hi)(__m128i)(A), \
+ (int)(B), (__v8hi)_mm_setzero_si128(), (__mmask8)(U)))
+
+#define _mm256_mask_shufflehi_epi16(W, U, A, B) \
+ ((__m256i) __builtin_ia32_pshufhw256_mask ((__v16hi)(__m256i)(A), (int)(B), \
+ (__v16hi)(__m256i)(W), \
+ (__mmask16)(U)))
+
+#define _mm256_maskz_shufflehi_epi16(U, A, B) \
+ ((__m256i) __builtin_ia32_pshufhw256_mask ((__v16hi)(__m256i)(A), (int)(B), \
+ (__v16hi)(__m256i)_mm256_setzero_si256 (), \
+ (__mmask16)(U)))
+
+#define _mm_mask_shufflehi_epi16(W, U, A, B) \
+ ((__m128i) __builtin_ia32_pshufhw128_mask ((__v8hi)(__m128i)(A), (int)(B), \
+ (__v8hi)(__m128i)(W), \
+ (__mmask8)(U)))
+
+#define _mm_maskz_shufflehi_epi16(U, A, B) \
+ ((__m128i) __builtin_ia32_pshufhw128_mask ((__v8hi)(__m128i)(A), (int)(B), \
+ (__v8hi)(__m128i)_mm_setzero_hi(), \
+ (__mmask8)(U)))
+
+#define _mm256_mask_shufflelo_epi16(W, U, A, B) \
+ ((__m256i) __builtin_ia32_pshuflw256_mask ((__v16hi)(__m256i)(A), (int)(B), \
+ (__v16hi)(__m256i)(W), \
+ (__mmask16)(U)))
+
+#define _mm256_maskz_shufflelo_epi16(U, A, B) \
+ ((__m256i) __builtin_ia32_pshuflw256_mask ((__v16hi)(__m256i)(A), (int)(B), \
+ (__v16hi)(__m256i)_mm256_setzero_si256 (), \
+ (__mmask16)(U)))
+
+#define _mm_mask_shufflelo_epi16(W, U, A, B) \
+ ((__m128i) __builtin_ia32_pshuflw128_mask ((__v8hi)(__m128i)(A), (int)(B), \
+ (__v8hi)(__m128i)(W), \
+ (__mmask8)(U)))
+
+#define _mm_maskz_shufflelo_epi16(U, A, B) \
+ ((__m128i) __builtin_ia32_pshuflw128_mask ((__v8hi)(__m128i)(A), (int)(B), \
+ (__v8hi)(__m128i)_mm_setzero_hi(), \
+ (__mmask8)(U)))
+
+#define _mm256_maskz_alignr_epi8(U, X, Y, N) \
+ ((__m256i) __builtin_ia32_palignr256_mask ((__v4di)(__m256i)(X), \
+ (__v4di)(__m256i)(Y), (int)(N * 8), \
+ (__v4di)(__m256i)_mm256_setzero_si256 (), \
+ (__mmask32)(U)))
+
+#define _mm_mask_alignr_epi8(W, U, X, Y, N) \
+ ((__m128i) __builtin_ia32_palignr128_mask ((__v2di)(__m128i)(X), \
+ (__v2di)(__m128i)(Y), (int)(N * 8), \
+ (__v2di)(__m128i)(X), (__mmask16)(U)))
+
+#define _mm_maskz_alignr_epi8(U, X, Y, N) \
+ ((__m128i) __builtin_ia32_palignr128_mask ((__v2di)(__m128i)(X), \
+ (__v2di)(__m128i)(Y), (int)(N * 8), \
+ (__v2di)(__m128i)_mm_setzero_di(), \
+ (__mmask16)(U)))
+
+#define _mm_mask_slli_epi16(W, U, X, C) \
+ ((__m128i)__builtin_ia32_psllwi128_mask ((__v8hi)(__m128i)(X), (int)(C),\
+ (__v8hi)(__m128i)(W),\
+ (__mmask8)(U)))
+
+#define _mm_maskz_slli_epi16(U, X, C) \
+ ((__m128i)__builtin_ia32_psllwi128_mask ((__v8hi)(__m128i)(X), (int)(C),\
+ (__v8hi)(__m128i)_mm_setzero_hi(),\
+ (__mmask8)(U)))
+
+#define _mm256_dbsad_epu8(X, Y, C) \
+ ((__m256i) __builtin_ia32_dbpsadbw256_mask ((__v32qi)(__m256i) (X), \
+ (__v32qi)(__m256i) (Y), (int) (C), \
+ (__v16hi)(__m256i)_mm256_setzero_si256(),\
+ (__mmask16)-1))
+
+#define _mm256_mask_slli_epi16(W, U, X, C) \
+ ((__m256i)__builtin_ia32_psllwi256_mask ((__v16hi)(__m256i)(X), (int)(C),\
+ (__v16hi)(__m256i)(W),\
+ (__mmask16)(U)))
+
+#define _mm256_maskz_slli_epi16(U, X, C) \
+ ((__m256i)__builtin_ia32_psllwi256_mask ((__v16hi)(__m256i)(X), (int)(C),\
+ (__v16hi)(__m256i)_mm256_setzero_si256 (),\
+ (__mmask16)(U)))
+
+#define _mm256_mask_dbsad_epu8(W, U, X, Y, C) \
+ ((__m256i) __builtin_ia32_dbpsadbw256_mask ((__v32qi)(__m256i) (X), \
+ (__v32qi)(__m256i) (Y), (int) (C), \
+ (__v16hi)(__m256i)(W), \
+ (__mmask16)(U)))
+
+#define _mm256_maskz_dbsad_epu8(U, X, Y, C) \
+ ((__m256i) __builtin_ia32_dbpsadbw256_mask ((__v32qi)(__m256i) (X), \
+ (__v32qi)(__m256i) (Y), (int) (C), \
+ (__v16hi)(__m256i)_mm256_setzero_si256(),\
+ (__mmask16)(U)))
+
+#define _mm_dbsad_epu8(X, Y, C) \
+ ((__m128i) __builtin_ia32_dbpsadbw128_mask ((__v16qi)(__m128i) (X), \
+ (__v16qi)(__m128i) (Y), (int) (C), \
+ (__v8hi)(__m128i)_mm_setzero_si128(), \
+ (__mmask8)-1))
+
+#define _mm_mask_dbsad_epu8(W, U, X, Y, C) \
+ ((__m128i) __builtin_ia32_dbpsadbw128_mask ((__v16qi)(__m128i) (X), \
+ (__v16qi)(__m128i) (Y), (int) (C), \
+ (__v8hi)(__m128i)(W), \
+ (__mmask8)(U)))
+
+#define _mm_maskz_dbsad_epu8(U, X, Y, C) \
+ ((__m128i) __builtin_ia32_dbpsadbw128_mask ((__v16qi)(__m128i) (X), \
+ (__v16qi)(__m128i) (Y), (int) (C), \
+ (__v8hi)(__m128i)_mm_setzero_si128(), \
+ (__mmask8)(U)))
+
+#define _mm_mask_blend_epi16(__U, __A, __W) \
+ ((__m128i) __builtin_ia32_blendmw_128_mask ((__v8hi) (__A), \
+ (__v8hi) (__W), \
+ (__mmask8) (__U)))
+
+#define _mm_mask_blend_epi8(__U, __A, __W) \
+ ((__m128i) __builtin_ia32_blendmb_128_mask ((__v16qi) (__A), \
+ (__v16qi) (__W), \
+ (__mmask16) (__U)))
+
+#define _mm256_mask_blend_epi16(__U, __A, __W) \
+ ((__m256i) __builtin_ia32_blendmw_256_mask ((__v16hi) (__A), \
+ (__v16hi) (__W), \
+ (__mmask16) (__U)))
+
+#define _mm256_mask_blend_epi8(__U, __A, __W) \
+ ((__m256i) __builtin_ia32_blendmb_256_mask ((__v32qi) (__A), \
+ (__v32qi) (__W), \
+ (__mmask32) (__U)))
+
+#define _mm_cmp_epi16_mask(X, Y, P) \
+ ((__mmask8) __builtin_ia32_cmpw128_mask ((__v8hi)(__m128i)(X), \
+ (__v8hi)(__m128i)(Y), (int)(P),\
+ (__mmask8)(-1)))
+
+#define _mm_cmp_epi8_mask(X, Y, P) \
+ ((__mmask16) __builtin_ia32_cmpb128_mask ((__v16qi)(__m128i)(X), \
+ (__v16qi)(__m128i)(Y), (int)(P),\
+ (__mmask16)(-1)))
+
+#define _mm256_cmp_epi16_mask(X, Y, P) \
+ ((__mmask16) __builtin_ia32_cmpw256_mask ((__v16hi)(__m256i)(X), \
+ (__v16hi)(__m256i)(Y), (int)(P),\
+ (__mmask16)(-1)))
+
+#define _mm256_cmp_epi8_mask(X, Y, P) \
+ ((__mmask32) __builtin_ia32_cmpb256_mask ((__v32qi)(__m256i)(X), \
+ (__v32qi)(__m256i)(Y), (int)(P),\
+ (__mmask32)(-1)))
+
+#define _mm_cmp_epu16_mask(X, Y, P) \
+ ((__mmask8) __builtin_ia32_ucmpw128_mask ((__v8hi)(__m128i)(X), \
+ (__v8hi)(__m128i)(Y), (int)(P),\
+ (__mmask8)(-1)))
+
+#define _mm_cmp_epu8_mask(X, Y, P) \
+ ((__mmask16) __builtin_ia32_ucmpb128_mask ((__v16qi)(__m128i)(X), \
+ (__v16qi)(__m128i)(Y), (int)(P),\
+ (__mmask16)(-1)))
+
+#define _mm256_cmp_epu16_mask(X, Y, P) \
+ ((__mmask16) __builtin_ia32_ucmpw256_mask ((__v16hi)(__m256i)(X), \
+ (__v16hi)(__m256i)(Y), (int)(P),\
+ (__mmask16)(-1)))
+
+#define _mm256_cmp_epu8_mask(X, Y, P) \
+ ((__mmask32) __builtin_ia32_ucmpb256_mask ((__v32qi)(__m256i)(X), \
+ (__v32qi)(__m256i)(Y), (int)(P),\
+ (__mmask32)-1))
+
+#define _mm_mask_cmp_epi16_mask(M, X, Y, P) \
+ ((__mmask8) __builtin_ia32_cmpw128_mask ((__v8hi)(__m128i)(X), \
+ (__v8hi)(__m128i)(Y), (int)(P),\
+ (__mmask16)(M)))
+
+#define _mm_mask_cmp_epi8_mask(M, X, Y, P) \
+ ((__mmask16) __builtin_ia32_cmpb128_mask ((__v16qi)(__m128i)(X), \
+ (__v16qi)(__m128i)(Y), (int)(P),\
+ (__mmask16)(M)))
+
+#define _mm256_mask_cmp_epi16_mask(M, X, Y, P) \
+ ((__mmask16) __builtin_ia32_cmpw256_mask ((__v16hi)(__m256i)(X), \
+ (__v16hi)(__m256i)(Y), (int)(P),\
+ (__mmask16)(M)))
+
+#define _mm256_mask_cmp_epi8_mask(M, X, Y, P) \
+ ((__mmask32) __builtin_ia32_cmpb256_mask ((__v32qi)(__m256i)(X), \
+ (__v32qi)(__m256i)(Y), (int)(P),\
+ (__mmask32)(M)))
+
+#define _mm_mask_cmp_epu16_mask(M, X, Y, P) \
+ ((__mmask8) __builtin_ia32_ucmpw128_mask ((__v8hi)(__m128i)(X), \
+ (__v8hi)(__m128i)(Y), (int)(P),\
+ (__mmask8)(M)))
+
+#define _mm_mask_cmp_epu8_mask(M, X, Y, P) \
+ ((__mmask16) __builtin_ia32_ucmpb128_mask ((__v16qi)(__m128i)(X), \
+ (__v16qi)(__m128i)(Y), (int)(P),\
+ (__mmask16)(M)))
+
+#define _mm256_mask_cmp_epu16_mask(M, X, Y, P) \
+ ((__mmask16) __builtin_ia32_ucmpw256_mask ((__v16hi)(__m256i)(X), \
+ (__v16hi)(__m256i)(Y), (int)(P),\
+ (__mmask16)(M)))
+
+#define _mm256_mask_cmp_epu8_mask(M, X, Y, P) \
+ ((__mmask32) __builtin_ia32_ucmpb256_mask ((__v32qi)(__m256i)(X), \
+ (__v32qi)(__m256i)(Y), (int)(P),\
+ (__mmask32)M))
+#endif
+
+extern __inline __mmask32
+ __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_cmpneq_epi8_mask (__m256i __X, __m256i __Y)
+{
+ return (__mmask32) __builtin_ia32_cmpb256_mask ((__v32qi) __X,
+ (__v32qi) __Y, 4,
+ (__mmask32) - 1);
+}
+
+extern __inline __mmask32
+ __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_cmplt_epi8_mask (__m256i __X, __m256i __Y)
+{
+ return (__mmask32) __builtin_ia32_cmpb256_mask ((__v32qi) __X,
+ (__v32qi) __Y, 1,
+ (__mmask32) - 1);
+}
+
+extern __inline __mmask32
+ __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_cmpge_epi8_mask (__m256i __X, __m256i __Y)
+{
+ return (__mmask32) __builtin_ia32_cmpb256_mask ((__v32qi) __X,
+ (__v32qi) __Y, 5,
+ (__mmask32) - 1);
+}
+
+extern __inline __mmask32
+ __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_cmple_epi8_mask (__m256i __X, __m256i __Y)
+{
+ return (__mmask32) __builtin_ia32_cmpb256_mask ((__v32qi) __X,
+ (__v32qi) __Y, 2,
+ (__mmask32) - 1);
+}
+
+extern __inline __mmask16
+ __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_cmpneq_epi16_mask (__m256i __X, __m256i __Y)
+{
+ return (__mmask16) __builtin_ia32_cmpw256_mask ((__v16hi) __X,
+ (__v16hi) __Y, 4,
+ (__mmask16) - 1);
+}
+
+extern __inline __mmask16
+ __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_cmplt_epi16_mask (__m256i __X, __m256i __Y)
+{
+ return (__mmask16) __builtin_ia32_cmpw256_mask ((__v16hi) __X,
+ (__v16hi) __Y, 1,
+ (__mmask16) - 1);
+}
+
+extern __inline __mmask16
+ __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_cmpge_epi16_mask (__m256i __X, __m256i __Y)
+{
+ return (__mmask16) __builtin_ia32_cmpw256_mask ((__v16hi) __X,
+ (__v16hi) __Y, 5,
+ (__mmask16) - 1);
+}
+
+extern __inline __mmask16
+ __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_cmple_epi16_mask (__m256i __X, __m256i __Y)
+{
+ return (__mmask16) __builtin_ia32_cmpw256_mask ((__v16hi) __X,
+ (__v16hi) __Y, 2,
+ (__mmask16) - 1);
+}
+
+extern __inline __mmask16
+ __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_cmpneq_epu8_mask (__m128i __X, __m128i __Y)
+{
+ return (__mmask16) __builtin_ia32_ucmpb128_mask ((__v16qi) __X,
+ (__v16qi) __Y, 4,
+ (__mmask16) - 1);
+}
+
+extern __inline __mmask16
+ __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_cmplt_epu8_mask (__m128i __X, __m128i __Y)
+{
+ return (__mmask16) __builtin_ia32_ucmpb128_mask ((__v16qi) __X,
+ (__v16qi) __Y, 1,
+ (__mmask16) - 1);
+}
+
+extern __inline __mmask16
+ __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_cmpge_epu8_mask (__m128i __X, __m128i __Y)
+{
+ return (__mmask16) __builtin_ia32_ucmpb128_mask ((__v16qi) __X,
+ (__v16qi) __Y, 5,
+ (__mmask16) - 1);
+}
+
+extern __inline __mmask16
+ __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_cmple_epu8_mask (__m128i __X, __m128i __Y)
+{
+ return (__mmask16) __builtin_ia32_ucmpb128_mask ((__v16qi) __X,
+ (__v16qi) __Y, 2,
+ (__mmask16) - 1);
+}
+
+extern __inline __mmask8
+ __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_cmpneq_epu16_mask (__m128i __X, __m128i __Y)
+{
+ return (__mmask8) __builtin_ia32_ucmpw128_mask ((__v8hi) __X,
+ (__v8hi) __Y, 4,
+ (__mmask8) - 1);
+}
+
+extern __inline __mmask8
+ __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_cmplt_epu16_mask (__m128i __X, __m128i __Y)
+{
+ return (__mmask8) __builtin_ia32_ucmpw128_mask ((__v8hi) __X,
+ (__v8hi) __Y, 1,
+ (__mmask8) - 1);
+}
+
+extern __inline __mmask8
+ __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_cmpge_epu16_mask (__m128i __X, __m128i __Y)
+{
+ return (__mmask8) __builtin_ia32_ucmpw128_mask ((__v8hi) __X,
+ (__v8hi) __Y, 5,
+ (__mmask8) - 1);
+}
+
+extern __inline __mmask8
+ __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_cmple_epu16_mask (__m128i __X, __m128i __Y)
+{
+ return (__mmask8) __builtin_ia32_ucmpw128_mask ((__v8hi) __X,
+ (__v8hi) __Y, 2,
+ (__mmask8) - 1);
+}
+
+extern __inline __mmask16
+ __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_cmpneq_epi8_mask (__m128i __X, __m128i __Y)
+{
+ return (__mmask16) __builtin_ia32_cmpb128_mask ((__v16qi) __X,
+ (__v16qi) __Y, 4,
+ (__mmask16) - 1);
+}
+
+extern __inline __mmask16
+ __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_cmplt_epi8_mask (__m128i __X, __m128i __Y)
+{
+ return (__mmask16) __builtin_ia32_cmpb128_mask ((__v16qi) __X,
+ (__v16qi) __Y, 1,
+ (__mmask16) - 1);
+}
+
+extern __inline __mmask16
+ __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_cmpge_epi8_mask (__m128i __X, __m128i __Y)
+{
+ return (__mmask16) __builtin_ia32_cmpb128_mask ((__v16qi) __X,
+ (__v16qi) __Y, 5,
+ (__mmask16) - 1);
+}
+
+extern __inline __mmask16
+ __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_cmple_epi8_mask (__m128i __X, __m128i __Y)
+{
+ return (__mmask16) __builtin_ia32_cmpb128_mask ((__v16qi) __X,
+ (__v16qi) __Y, 2,
+ (__mmask16) - 1);
+}
+
+extern __inline __mmask8
+ __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_cmpneq_epi16_mask (__m128i __X, __m128i __Y)
+{
+ return (__mmask8) __builtin_ia32_cmpw128_mask ((__v8hi) __X,
+ (__v8hi) __Y, 4,
+ (__mmask8) - 1);
+}
+
+extern __inline __mmask8
+ __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_cmplt_epi16_mask (__m128i __X, __m128i __Y)
+{
+ return (__mmask8) __builtin_ia32_cmpw128_mask ((__v8hi) __X,
+ (__v8hi) __Y, 1,
+ (__mmask8) - 1);
+}
+
+extern __inline __mmask8
+ __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_cmpge_epi16_mask (__m128i __X, __m128i __Y)
+{
+ return (__mmask8) __builtin_ia32_cmpw128_mask ((__v8hi) __X,
+ (__v8hi) __Y, 5,
+ (__mmask8) - 1);
+}
+
+extern __inline __mmask8
+ __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_cmple_epi16_mask (__m128i __X, __m128i __Y)
+{
+ return (__mmask8) __builtin_ia32_cmpw128_mask ((__v8hi) __X,
+ (__v8hi) __Y, 2,
+ (__mmask8) - 1);
+}
+
+extern __inline __m256i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_mask_mulhrs_epi16 (__m256i __W, __mmask16 __U, __m256i __X,
+ __m256i __Y)
+{
+ return (__m256i) __builtin_ia32_pmulhrsw256_mask ((__v16hi) __X,
+ (__v16hi) __Y,
+ (__v16hi) __W,
+ (__mmask16) __U);
+}
+
+extern __inline __m256i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_maskz_mulhrs_epi16 (__mmask16 __U, __m256i __X, __m256i __Y)
+{
+ return (__m256i) __builtin_ia32_pmulhrsw256_mask ((__v16hi) __X,
+ (__v16hi) __Y,
+ (__v16hi)
+ _mm256_setzero_si256 (),
+ (__mmask16) __U);
+}
+
+extern __inline __m256i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_mask_mulhi_epu16 (__m256i __W, __mmask16 __U, __m256i __A,
+ __m256i __B)
+{
+ return (__m256i) __builtin_ia32_pmulhuw256_mask ((__v16hi) __A,
+ (__v16hi) __B,
+ (__v16hi) __W,
+ (__mmask16) __U);
+}
+
+extern __inline __m256i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_maskz_mulhi_epu16 (__mmask16 __U, __m256i __A, __m256i __B)
+{
+ return (__m256i) __builtin_ia32_pmulhuw256_mask ((__v16hi) __A,
+ (__v16hi) __B,
+ (__v16hi)
+ _mm256_setzero_si256 (),
+ (__mmask16) __U);
+}
+
+extern __inline __m256i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_mask_mulhi_epi16 (__m256i __W, __mmask16 __U, __m256i __A,
+ __m256i __B)
+{
+ return (__m256i) __builtin_ia32_pmulhw256_mask ((__v16hi) __A,
+ (__v16hi) __B,
+ (__v16hi) __W,
+ (__mmask16) __U);
+}
+
+extern __inline __m256i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_maskz_mulhi_epi16 (__mmask16 __U, __m256i __A, __m256i __B)
+{
+ return (__m256i) __builtin_ia32_pmulhw256_mask ((__v16hi) __A,
+ (__v16hi) __B,
+ (__v16hi)
+ _mm256_setzero_si256 (),
+ (__mmask16) __U);
+}
+
+extern __inline __m128i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_mask_mulhi_epi16 (__m128i __W, __mmask8 __U, __m128i __A,
+ __m128i __B)
+{
+ return (__m128i) __builtin_ia32_pmulhw128_mask ((__v8hi) __A,
+ (__v8hi) __B,
+ (__v8hi) __W,
+ (__mmask8) __U);
+}
+
+extern __inline __m128i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_maskz_mulhi_epi16 (__mmask8 __U, __m128i __A, __m128i __B)
+{
+ return (__m128i) __builtin_ia32_pmulhw128_mask ((__v8hi) __A,
+ (__v8hi) __B,
+ (__v8hi)
+ _mm_setzero_hi (),
+ (__mmask8) __U);
+}
+
+extern __inline __m128i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_mask_mulhi_epu16 (__m128i __W, __mmask8 __U, __m128i __A,
+ __m128i __B)
+{
+ return (__m128i) __builtin_ia32_pmulhuw128_mask ((__v8hi) __A,
+ (__v8hi) __B,
+ (__v8hi) __W,
+ (__mmask8) __U);
+}
+
+extern __inline __m128i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_maskz_mulhi_epu16 (__mmask8 __U, __m128i __A, __m128i __B)
+{
+ return (__m128i) __builtin_ia32_pmulhuw128_mask ((__v8hi) __A,
+ (__v8hi) __B,
+ (__v8hi)
+ _mm_setzero_hi (),
+ (__mmask8) __U);
+}
+
+extern __inline __m128i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_mask_mulhrs_epi16 (__m128i __W, __mmask8 __U, __m128i __X,
+ __m128i __Y)
+{
+ return (__m128i) __builtin_ia32_pmulhrsw128_mask ((__v8hi) __X,
+ (__v8hi) __Y,
+ (__v8hi) __W,
+ (__mmask8) __U);
+}
+
+extern __inline __m128i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_maskz_mulhrs_epi16 (__mmask8 __U, __m128i __X, __m128i __Y)
+{
+ return (__m128i) __builtin_ia32_pmulhrsw128_mask ((__v8hi) __X,
+ (__v8hi) __Y,
+ (__v8hi)
+ _mm_setzero_hi (),
+ (__mmask8) __U);
+}
+
+extern __inline __m256i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_mask_mullo_epi16 (__m256i __W, __mmask16 __U, __m256i __A,
+ __m256i __B)
+{
+ return (__m256i) __builtin_ia32_pmullw256_mask ((__v16hi) __A,
+ (__v16hi) __B,
+ (__v16hi) __W,
+ (__mmask16) __U);
+}
+
+extern __inline __m256i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_maskz_mullo_epi16 (__mmask16 __U, __m256i __A, __m256i __B)
+{
+ return (__m256i) __builtin_ia32_pmullw256_mask ((__v16hi) __A,
+ (__v16hi) __B,
+ (__v16hi)
+ _mm256_setzero_si256 (),
+ (__mmask16) __U);
+}
+
+extern __inline __m128i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_mask_mullo_epi16 (__m128i __W, __mmask8 __U, __m128i __A,
+ __m128i __B)
+{
+ return (__m128i) __builtin_ia32_pmullw128_mask ((__v8hi) __A,
+ (__v8hi) __B,
+ (__v8hi) __W,
+ (__mmask8) __U);
+}
+
+extern __inline __m128i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_maskz_mullo_epi16 (__mmask8 __U, __m128i __A, __m128i __B)
+{
+ return (__m128i) __builtin_ia32_pmullw128_mask ((__v8hi) __A,
+ (__v8hi) __B,
+ (__v8hi)
+ _mm_setzero_hi (),
+ (__mmask8) __U);
+}
+
+extern __inline __m256i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_mask_cvtepi8_epi16 (__m256i __W, __mmask32 __U, __m128i __A)
+{
+ return (__m256i) __builtin_ia32_pmovsxbw256_mask ((__v16qi) __A,
+ (__v16hi) __W,
+ (__mmask16) __U);
+}
+
+extern __inline __m256i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_maskz_cvtepi8_epi16 (__mmask16 __U, __m128i __A)
+{
+ return (__m256i) __builtin_ia32_pmovsxbw256_mask ((__v16qi) __A,
+ (__v16hi)
+ _mm256_setzero_si256 (),
+ (__mmask16) __U);
+}
+
+extern __inline __m128i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_mask_cvtepi8_epi16 (__m128i __W, __mmask32 __U, __m128i __A)
+{
+ return (__m128i) __builtin_ia32_pmovsxbw128_mask ((__v16qi) __A,
+ (__v8hi) __W,
+ (__mmask8) __U);
+}
+
+extern __inline __m128i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_maskz_cvtepi8_epi16 (__mmask8 __U, __m128i __A)
+{
+ return (__m128i) __builtin_ia32_pmovsxbw128_mask ((__v16qi) __A,
+ (__v8hi)
+ _mm_setzero_si128 (),
+ (__mmask8) __U);
+}
+
+extern __inline __m256i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_mask_cvtepu8_epi16 (__m256i __W, __mmask32 __U, __m128i __A)
+{
+ return (__m256i) __builtin_ia32_pmovzxbw256_mask ((__v16qi) __A,
+ (__v16hi) __W,
+ (__mmask16) __U);
+}
+
+extern __inline __m256i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_maskz_cvtepu8_epi16 (__mmask16 __U, __m128i __A)
+{
+ return (__m256i) __builtin_ia32_pmovzxbw256_mask ((__v16qi) __A,
+ (__v16hi)
+ _mm256_setzero_si256 (),
+ (__mmask16) __U);
+}
+
+extern __inline __m128i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_mask_cvtepu8_epi16 (__m128i __W, __mmask32 __U, __m128i __A)
+{
+ return (__m128i) __builtin_ia32_pmovzxbw128_mask ((__v16qi) __A,
+ (__v8hi) __W,
+ (__mmask8) __U);
+}
+
+extern __inline __m128i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_maskz_cvtepu8_epi16 (__mmask8 __U, __m128i __A)
+{
+ return (__m128i) __builtin_ia32_pmovzxbw128_mask ((__v16qi) __A,
+ (__v8hi)
+ _mm_setzero_si128 (),
+ (__mmask8) __U);
+}
+
+extern __inline __m256i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_mask_avg_epu8 (__m256i __W, __mmask32 __U, __m256i __A,
+ __m256i __B)
+{
+ return (__m256i) __builtin_ia32_pavgb256_mask ((__v32qi) __A,
+ (__v32qi) __B,
+ (__v32qi) __W,
+ (__mmask32) __U);
+}
+
+extern __inline __m256i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_maskz_avg_epu8 (__mmask32 __U, __m256i __A, __m256i __B)
+{
+ return (__m256i) __builtin_ia32_pavgb256_mask ((__v32qi) __A,
+ (__v32qi) __B,
+ (__v32qi)
+ _mm256_setzero_si256 (),
+ (__mmask32) __U);
+}
+
+extern __inline __m128i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_mask_avg_epu8 (__m128i __W, __mmask16 __U, __m128i __A,
+ __m128i __B)
+{
+ return (__m128i) __builtin_ia32_pavgb128_mask ((__v16qi) __A,
+ (__v16qi) __B,
+ (__v16qi) __W,
+ (__mmask16) __U);
+}
+
+extern __inline __m128i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_maskz_avg_epu8 (__mmask16 __U, __m128i __A, __m128i __B)
+{
+ return (__m128i) __builtin_ia32_pavgb128_mask ((__v16qi) __A,
+ (__v16qi) __B,
+ (__v16qi)
+ _mm_setzero_si128 (),
+ (__mmask16) __U);
+}
+
+extern __inline __m256i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_mask_avg_epu16 (__m256i __W, __mmask16 __U, __m256i __A,
+ __m256i __B)
+{
+ return (__m256i) __builtin_ia32_pavgw256_mask ((__v16hi) __A,
+ (__v16hi) __B,
+ (__v16hi) __W,
+ (__mmask16) __U);
+}
+
+extern __inline __m256i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_maskz_avg_epu16 (__mmask16 __U, __m256i __A, __m256i __B)
+{
+ return (__m256i) __builtin_ia32_pavgw256_mask ((__v16hi) __A,
+ (__v16hi) __B,
+ (__v16hi)
+ _mm256_setzero_si256 (),
+ (__mmask16) __U);
+}
+
+extern __inline __m128i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_mask_avg_epu16 (__m128i __W, __mmask8 __U, __m128i __A,
+ __m128i __B)
+{
+ return (__m128i) __builtin_ia32_pavgw128_mask ((__v8hi) __A,
+ (__v8hi) __B,
+ (__v8hi) __W,
+ (__mmask8) __U);
+}
+
+extern __inline __m128i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_maskz_avg_epu16 (__mmask8 __U, __m128i __A, __m128i __B)
+{
+ return (__m128i) __builtin_ia32_pavgw128_mask ((__v8hi) __A,
+ (__v8hi) __B,
+ (__v8hi)
+ _mm_setzero_si128 (),
+ (__mmask8) __U);
+}
+
+extern __inline __m256i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_mask_add_epi8 (__m256i __W, __mmask32 __U, __m256i __A,
+ __m256i __B)
+{
+ return (__m256i) __builtin_ia32_paddb256_mask ((__v32qi) __A,
+ (__v32qi) __B,
+ (__v32qi) __W,
+ (__mmask32) __U);
+}
+
+extern __inline __m256i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_maskz_add_epi8 (__mmask32 __U, __m256i __A, __m256i __B)
+{
+ return (__m256i) __builtin_ia32_paddb256_mask ((__v32qi) __A,
+ (__v32qi) __B,
+ (__v32qi)
+ _mm256_setzero_si256 (),
+ (__mmask32) __U);
+}
+
+extern __inline __m256i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_mask_add_epi16 (__m256i __W, __mmask16 __U, __m256i __A,
+ __m256i __B)
+{
+ return (__m256i) __builtin_ia32_paddw256_mask ((__v16hi) __A,
+ (__v16hi) __B,
+ (__v16hi) __W,
+ (__mmask16) __U);
+}
+
+extern __inline __m256i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_maskz_add_epi16 (__mmask16 __U, __m256i __A, __m256i __B)
+{
+ return (__m256i) __builtin_ia32_paddw256_mask ((__v16hi) __A,
+ (__v16hi) __B,
+ (__v16hi)
+ _mm256_setzero_si256 (),
+ (__mmask16) __U);
+}
+
+extern __inline __m256i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_mask_adds_epi8 (__m256i __W, __mmask32 __U, __m256i __A,
+ __m256i __B)
+{
+ return (__m256i) __builtin_ia32_paddsb256_mask ((__v32qi) __A,
+ (__v32qi) __B,
+ (__v32qi) __W,
+ (__mmask32) __U);
+}
+
+extern __inline __m256i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_maskz_adds_epi8 (__mmask32 __U, __m256i __A, __m256i __B)
+{
+ return (__m256i) __builtin_ia32_paddsb256_mask ((__v32qi) __A,
+ (__v32qi) __B,
+ (__v32qi)
+ _mm256_setzero_si256 (),
+ (__mmask32) __U);
+}
+
+extern __inline __m256i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_mask_adds_epi16 (__m256i __W, __mmask16 __U, __m256i __A,
+ __m256i __B)
+{
+ return (__m256i) __builtin_ia32_paddsw256_mask ((__v16hi) __A,
+ (__v16hi) __B,
+ (__v16hi) __W,
+ (__mmask16) __U);
+}
+
+extern __inline __m256i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_maskz_adds_epi16 (__mmask16 __U, __m256i __A, __m256i __B)
+{
+ return (__m256i) __builtin_ia32_paddsw256_mask ((__v16hi) __A,
+ (__v16hi) __B,
+ (__v16hi)
+ _mm256_setzero_si256 (),
+ (__mmask16) __U);
+}
+
+extern __inline __m256i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_mask_adds_epu8 (__m256i __W, __mmask32 __U, __m256i __A,
+ __m256i __B)
+{
+ return (__m256i) __builtin_ia32_paddusb256_mask ((__v32qi) __A,
+ (__v32qi) __B,
+ (__v32qi) __W,
+ (__mmask32) __U);
+}
+
+extern __inline __m256i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_maskz_adds_epu8 (__mmask32 __U, __m256i __A, __m256i __B)
+{
+ return (__m256i) __builtin_ia32_paddusb256_mask ((__v32qi) __A,
+ (__v32qi) __B,
+ (__v32qi)
+ _mm256_setzero_si256 (),
+ (__mmask32) __U);
+}
+
+extern __inline __m256i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_mask_adds_epu16 (__m256i __W, __mmask16 __U, __m256i __A,
+ __m256i __B)
+{
+ return (__m256i) __builtin_ia32_paddusw256_mask ((__v16hi) __A,
+ (__v16hi) __B,
+ (__v16hi) __W,
+ (__mmask16) __U);
+}
+
+extern __inline __m256i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_maskz_adds_epu16 (__mmask16 __U, __m256i __A, __m256i __B)
+{
+ return (__m256i) __builtin_ia32_paddusw256_mask ((__v16hi) __A,
+ (__v16hi) __B,
+ (__v16hi)
+ _mm256_setzero_si256 (),
+ (__mmask16) __U);
+}
+
+extern __inline __m256i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_mask_sub_epi8 (__m256i __W, __mmask32 __U, __m256i __A,
+ __m256i __B)
+{
+ return (__m256i) __builtin_ia32_psubb256_mask ((__v32qi) __A,
+ (__v32qi) __B,
+ (__v32qi) __W,
+ (__mmask32) __U);
+}
+
+extern __inline __m256i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_maskz_sub_epi8 (__mmask32 __U, __m256i __A, __m256i __B)
+{
+ return (__m256i) __builtin_ia32_psubb256_mask ((__v32qi) __A,
+ (__v32qi) __B,
+ (__v32qi)
+ _mm256_setzero_si256 (),
+ (__mmask32) __U);
+}
+
+extern __inline __m256i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_mask_sub_epi16 (__m256i __W, __mmask16 __U, __m256i __A,
+ __m256i __B)
+{
+ return (__m256i) __builtin_ia32_psubw256_mask ((__v16hi) __A,
+ (__v16hi) __B,
+ (__v16hi) __W,
+ (__mmask16) __U);
+}
+
+extern __inline __m256i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_maskz_sub_epi16 (__mmask16 __U, __m256i __A, __m256i __B)
+{
+ return (__m256i) __builtin_ia32_psubw256_mask ((__v16hi) __A,
+ (__v16hi) __B,
+ (__v16hi)
+ _mm256_setzero_si256 (),
+ (__mmask16) __U);
+}
+
+extern __inline __m256i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_mask_subs_epi8 (__m256i __W, __mmask32 __U, __m256i __A,
+ __m256i __B)
+{
+ return (__m256i) __builtin_ia32_psubsb256_mask ((__v32qi) __A,
+ (__v32qi) __B,
+ (__v32qi) __W,
+ (__mmask32) __U);
+}
+
+extern __inline __m256i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_maskz_subs_epi8 (__mmask32 __U, __m256i __A, __m256i __B)
+{
+ return (__m256i) __builtin_ia32_psubsb256_mask ((__v32qi) __A,
+ (__v32qi) __B,
+ (__v32qi)
+ _mm256_setzero_si256 (),
+ (__mmask32) __U);
+}
+
+extern __inline __m256i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_mask_subs_epi16 (__m256i __W, __mmask16 __U, __m256i __A,
+ __m256i __B)
+{
+ return (__m256i) __builtin_ia32_psubsw256_mask ((__v16hi) __A,
+ (__v16hi) __B,
+ (__v16hi) __W,
+ (__mmask16) __U);
+}
+
+extern __inline __m256i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_maskz_subs_epi16 (__mmask16 __U, __m256i __A, __m256i __B)
+{
+ return (__m256i) __builtin_ia32_psubsw256_mask ((__v16hi) __A,
+ (__v16hi) __B,
+ (__v16hi)
+ _mm256_setzero_si256 (),
+ (__mmask16) __U);
+}
+
+extern __inline __m256i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_mask_subs_epu8 (__m256i __W, __mmask32 __U, __m256i __A,
+ __m256i __B)
+{
+ return (__m256i) __builtin_ia32_psubusb256_mask ((__v32qi) __A,
+ (__v32qi) __B,
+ (__v32qi) __W,
+ (__mmask32) __U);
+}
+
+extern __inline __m256i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_maskz_subs_epu8 (__mmask32 __U, __m256i __A, __m256i __B)
+{
+ return (__m256i) __builtin_ia32_psubusb256_mask ((__v32qi) __A,
+ (__v32qi) __B,
+ (__v32qi)
+ _mm256_setzero_si256 (),
+ (__mmask32) __U);
+}
+
+extern __inline __m256i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_mask_subs_epu16 (__m256i __W, __mmask16 __U, __m256i __A,
+ __m256i __B)
+{
+ return (__m256i) __builtin_ia32_psubusw256_mask ((__v16hi) __A,
+ (__v16hi) __B,
+ (__v16hi) __W,
+ (__mmask16) __U);
+}
+
+extern __inline __m256i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_maskz_subs_epu16 (__mmask16 __U, __m256i __A, __m256i __B)
+{
+ return (__m256i) __builtin_ia32_psubusw256_mask ((__v16hi) __A,
+ (__v16hi) __B,
+ (__v16hi)
+ _mm256_setzero_si256 (),
+ (__mmask16) __U);
+}
+
+extern __inline __m128i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_mask_add_epi8 (__m128i __W, __mmask16 __U, __m128i __A,
+ __m128i __B)
+{
+ return (__m128i) __builtin_ia32_paddb128_mask ((__v16qi) __A,
+ (__v16qi) __B,
+ (__v16qi) __W,
+ (__mmask16) __U);
+}
+
+extern __inline __m128i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_maskz_add_epi8 (__mmask16 __U, __m128i __A, __m128i __B)
+{
+ return (__m128i) __builtin_ia32_paddb128_mask ((__v16qi) __A,
+ (__v16qi) __B,
+ (__v16qi)
+ _mm_setzero_si128 (),
+ (__mmask16) __U);
+}
+
+extern __inline __m128i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_mask_add_epi16 (__m128i __W, __mmask8 __U, __m128i __A,
+ __m128i __B)
+{
+ return (__m128i) __builtin_ia32_paddw128_mask ((__v8hi) __A,
+ (__v8hi) __B,
+ (__v8hi) __W,
+ (__mmask8) __U);
+}
+
+extern __inline __m128i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_maskz_add_epi16 (__mmask8 __U, __m128i __A, __m128i __B)
+{
+ return (__m128i) __builtin_ia32_paddw128_mask ((__v8hi) __A,
+ (__v8hi) __B,
+ (__v8hi)
+ _mm_setzero_si128 (),
+ (__mmask8) __U);
+}
+
+extern __inline __m256i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_mask_unpackhi_epi8 (__m256i __W, __mmask32 __U, __m256i __A,
+ __m256i __B)
+{
+ return (__m256i) __builtin_ia32_punpckhbw256_mask ((__v32qi) __A,
+ (__v32qi) __B,
+ (__v32qi) __W,
+ (__mmask32) __U);
+}
+
+extern __inline __m256i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_maskz_unpackhi_epi8 (__mmask32 __U, __m256i __A, __m256i __B)
+{
+ return (__m256i) __builtin_ia32_punpckhbw256_mask ((__v32qi) __A,
+ (__v32qi) __B,
+ (__v32qi)
+ _mm256_setzero_si256 (),
+ (__mmask32) __U);
+}
+
+extern __inline __m128i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_mask_unpackhi_epi8 (__m128i __W, __mmask16 __U, __m128i __A,
+ __m128i __B)
+{
+ return (__m128i) __builtin_ia32_punpckhbw128_mask ((__v16qi) __A,
+ (__v16qi) __B,
+ (__v16qi) __W,
+ (__mmask16) __U);
+}
+
+extern __inline __m128i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_maskz_unpackhi_epi8 (__mmask16 __U, __m128i __A, __m128i __B)
+{
+ return (__m128i) __builtin_ia32_punpckhbw128_mask ((__v16qi) __A,
+ (__v16qi) __B,
+ (__v16qi)
+ _mm_setzero_si128 (),
+ (__mmask16) __U);
+}
+
+extern __inline __m256i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_mask_unpackhi_epi16 (__m256i __W, __mmask16 __U, __m256i __A,
+ __m256i __B)
+{
+ return (__m256i) __builtin_ia32_punpckhwd256_mask ((__v16hi) __A,
+ (__v16hi) __B,
+ (__v16hi) __W,
+ (__mmask16) __U);
+}
+
+extern __inline __m256i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_maskz_unpackhi_epi16 (__mmask16 __U, __m256i __A, __m256i __B)
+{
+ return (__m256i) __builtin_ia32_punpckhwd256_mask ((__v16hi) __A,
+ (__v16hi) __B,
+ (__v16hi)
+ _mm256_setzero_si256 (),
+ (__mmask16) __U);
+}
+
+extern __inline __m128i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_mask_unpackhi_epi16 (__m128i __W, __mmask8 __U, __m128i __A,
+ __m128i __B)
+{
+ return (__m128i) __builtin_ia32_punpckhwd128_mask ((__v8hi) __A,
+ (__v8hi) __B,
+ (__v8hi) __W,
+ (__mmask8) __U);
+}
+
+extern __inline __m128i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_maskz_unpackhi_epi16 (__mmask8 __U, __m128i __A, __m128i __B)
+{
+ return (__m128i) __builtin_ia32_punpckhwd128_mask ((__v8hi) __A,
+ (__v8hi) __B,
+ (__v8hi)
+ _mm_setzero_si128 (),
+ (__mmask8) __U);
+}
+
+extern __inline __m256i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_mask_unpacklo_epi8 (__m256i __W, __mmask32 __U, __m256i __A,
+ __m256i __B)
+{
+ return (__m256i) __builtin_ia32_punpcklbw256_mask ((__v32qi) __A,
+ (__v32qi) __B,
+ (__v32qi) __W,
+ (__mmask32) __U);
+}
+
+extern __inline __m256i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_maskz_unpacklo_epi8 (__mmask32 __U, __m256i __A, __m256i __B)
+{
+ return (__m256i) __builtin_ia32_punpcklbw256_mask ((__v32qi) __A,
+ (__v32qi) __B,
+ (__v32qi)
+ _mm256_setzero_si256 (),
+ (__mmask32) __U);
+}
+
+extern __inline __m128i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_mask_unpacklo_epi8 (__m128i __W, __mmask16 __U, __m128i __A,
+ __m128i __B)
+{
+ return (__m128i) __builtin_ia32_punpcklbw128_mask ((__v16qi) __A,
+ (__v16qi) __B,
+ (__v16qi) __W,
+ (__mmask16) __U);
+}
+
+extern __inline __m128i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_maskz_unpacklo_epi8 (__mmask16 __U, __m128i __A, __m128i __B)
+{
+ return (__m128i) __builtin_ia32_punpcklbw128_mask ((__v16qi) __A,
+ (__v16qi) __B,
+ (__v16qi)
+ _mm_setzero_si128 (),
+ (__mmask16) __U);
+}
+
+extern __inline __m256i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_mask_unpacklo_epi16 (__m256i __W, __mmask16 __U, __m256i __A,
+ __m256i __B)
+{
+ return (__m256i) __builtin_ia32_punpcklwd256_mask ((__v16hi) __A,
+ (__v16hi) __B,
+ (__v16hi) __W,
+ (__mmask16) __U);
+}
+
+extern __inline __m256i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_maskz_unpacklo_epi16 (__mmask16 __U, __m256i __A, __m256i __B)
+{
+ return (__m256i) __builtin_ia32_punpcklwd256_mask ((__v16hi) __A,
+ (__v16hi) __B,
+ (__v16hi)
+ _mm256_setzero_si256 (),
+ (__mmask16) __U);
+}
+
+extern __inline __m128i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_mask_unpacklo_epi16 (__m128i __W, __mmask8 __U, __m128i __A,
+ __m128i __B)
+{
+ return (__m128i) __builtin_ia32_punpcklwd128_mask ((__v8hi) __A,
+ (__v8hi) __B,
+ (__v8hi) __W,
+ (__mmask8) __U);
+}
+
+extern __inline __m128i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_maskz_unpacklo_epi16 (__mmask8 __U, __m128i __A, __m128i __B)
+{
+ return (__m128i) __builtin_ia32_punpcklwd128_mask ((__v8hi) __A,
+ (__v8hi) __B,
+ (__v8hi)
+ _mm_setzero_si128 (),
+ (__mmask8) __U);
+}
+
+extern __inline __mmask16
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_cmpeq_epi8_mask (__m128i __A, __m128i __B)
+{
+ return (__mmask16) __builtin_ia32_pcmpeqb128_mask ((__v16qi) __A,
+ (__v16qi) __B,
+ (__mmask16) -1);
+}
+
+extern __inline __mmask16
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_mask_cmpeq_epi8_mask (__mmask16 __U, __m128i __A, __m128i __B)
+{
+ return (__mmask16) __builtin_ia32_pcmpeqb128_mask ((__v16qi) __A,
+ (__v16qi) __B,
+ __U);
+}
+
+extern __inline __mmask32
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_cmpeq_epi8_mask (__m256i __A, __m256i __B)
+{
+ return (__mmask32) __builtin_ia32_pcmpeqb256_mask ((__v32qi) __A,
+ (__v32qi) __B,
+ (__mmask32) -1);
+}
+
+extern __inline __mmask32
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_mask_cmpeq_epi8_mask (__mmask32 __U, __m256i __A, __m256i __B)
+{
+ return (__mmask32) __builtin_ia32_pcmpeqb256_mask ((__v32qi) __A,
+ (__v32qi) __B,
+ __U);
+}
+
+extern __inline __mmask8
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_cmpeq_epi16_mask (__m128i __A, __m128i __B)
+{
+ return (__mmask8) __builtin_ia32_pcmpeqw128_mask ((__v8hi) __A,
+ (__v8hi) __B,
+ (__mmask8) -1);
+}
+
+extern __inline __mmask8
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_mask_cmpeq_epi16_mask (__mmask8 __U, __m128i __A, __m128i __B)
+{
+ return (__mmask8) __builtin_ia32_pcmpeqw128_mask ((__v8hi) __A,
+ (__v8hi) __B, __U);
+}
+
+extern __inline __mmask16
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_cmpeq_epi16_mask (__m256i __A, __m256i __B)
+{
+ return (__mmask16) __builtin_ia32_pcmpeqw256_mask ((__v16hi) __A,
+ (__v16hi) __B,
+ (__mmask16) -1);
+}
+
+extern __inline __mmask16
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_mask_cmpeq_epi16_mask (__mmask16 __U, __m256i __A, __m256i __B)
+{
+ return (__mmask16) __builtin_ia32_pcmpeqw256_mask ((__v16hi) __A,
+ (__v16hi) __B,
+ __U);
+}
+
+extern __inline __mmask16
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_cmpgt_epi8_mask (__m128i __A, __m128i __B)
+{
+ return (__mmask16) __builtin_ia32_pcmpgtb128_mask ((__v16qi) __A,
+ (__v16qi) __B,
+ (__mmask16) -1);
+}
+
+extern __inline __mmask16
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_mask_cmpgt_epi8_mask (__mmask16 __U, __m128i __A, __m128i __B)
+{
+ return (__mmask16) __builtin_ia32_pcmpgtb128_mask ((__v16qi) __A,
+ (__v16qi) __B,
+ __U);
+}
+
+extern __inline __mmask32
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_cmpgt_epi8_mask (__m256i __A, __m256i __B)
+{
+ return (__mmask32) __builtin_ia32_pcmpgtb256_mask ((__v32qi) __A,
+ (__v32qi) __B,
+ (__mmask32) -1);
+}
+
+extern __inline __mmask32
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_mask_cmpgt_epi8_mask (__mmask32 __U, __m256i __A, __m256i __B)
+{
+ return (__mmask32) __builtin_ia32_pcmpgtb256_mask ((__v32qi) __A,
+ (__v32qi) __B,
+ __U);
+}
+
+extern __inline __mmask8
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_cmpgt_epi16_mask (__m128i __A, __m128i __B)
+{
+ return (__mmask8) __builtin_ia32_pcmpgtw128_mask ((__v8hi) __A,
+ (__v8hi) __B,
+ (__mmask8) -1);
+}
+
+extern __inline __mmask8
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_mask_cmpgt_epi16_mask (__mmask8 __U, __m128i __A, __m128i __B)
+{
+ return (__mmask8) __builtin_ia32_pcmpgtw128_mask ((__v8hi) __A,
+ (__v8hi) __B, __U);
+}
+
+extern __inline __mmask16
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_cmpgt_epi16_mask (__m256i __A, __m256i __B)
+{
+ return (__mmask16) __builtin_ia32_pcmpgtw256_mask ((__v16hi) __A,
+ (__v16hi) __B,
+ (__mmask16) -1);
+}
+
+extern __inline __mmask16
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_mask_cmpgt_epi16_mask (__mmask16 __U, __m256i __A, __m256i __B)
+{
+ return (__mmask16) __builtin_ia32_pcmpgtw256_mask ((__v16hi) __A,
+ (__v16hi) __B,
+ __U);
+}
+
+extern __inline __mmask16
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_testn_epi8_mask (__m128i __A, __m128i __B)
+{
+ return (__mmask16) __builtin_ia32_ptestnmb128 ((__v16qi) __A,
+ (__v16qi) __B,
+ (__mmask16) -1);
+}
+
+extern __inline __mmask16
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_mask_testn_epi8_mask (__mmask16 __U, __m128i __A, __m128i __B)
+{
+ return (__mmask16) __builtin_ia32_ptestnmb128 ((__v16qi) __A,
+ (__v16qi) __B, __U);
+}
+
+extern __inline __mmask32
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_testn_epi8_mask (__m256i __A, __m256i __B)
+{
+ return (__mmask32) __builtin_ia32_ptestnmb256 ((__v32qi) __A,
+ (__v32qi) __B,
+ (__mmask32) -1);
+}
+
+extern __inline __mmask32
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_mask_testn_epi8_mask (__mmask32 __U, __m256i __A, __m256i __B)
+{
+ return (__mmask32) __builtin_ia32_ptestnmb256 ((__v32qi) __A,
+ (__v32qi) __B, __U);
+}
+
+extern __inline __mmask8
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_testn_epi16_mask (__m128i __A, __m128i __B)
+{
+ return (__mmask8) __builtin_ia32_ptestnmw128 ((__v8hi) __A,
+ (__v8hi) __B,
+ (__mmask8) -1);
+}
+
+extern __inline __mmask8
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_mask_testn_epi16_mask (__mmask8 __U, __m128i __A, __m128i __B)
+{
+ return (__mmask8) __builtin_ia32_ptestnmw128 ((__v8hi) __A,
+ (__v8hi) __B, __U);
+}
+
+extern __inline __mmask16
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_testn_epi16_mask (__m256i __A, __m256i __B)
+{
+ return (__mmask16) __builtin_ia32_ptestnmw256 ((__v16hi) __A,
+ (__v16hi) __B,
+ (__mmask16) -1);
+}
+
+extern __inline __mmask16
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_mask_testn_epi16_mask (__mmask16 __U, __m256i __A, __m256i __B)
+{
+ return (__mmask16) __builtin_ia32_ptestnmw256 ((__v16hi) __A,
+ (__v16hi) __B, __U);
+}
+
+extern __inline __m256i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_mask_shuffle_epi8 (__m256i __W, __mmask32 __U, __m256i __A,
+ __m256i __B)
+{
+ return (__m256i) __builtin_ia32_pshufb256_mask ((__v32qi) __A,
+ (__v32qi) __B,
+ (__v32qi) __W,
+ (__mmask32) __U);
+}
+
+extern __inline __m256i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_maskz_shuffle_epi8 (__mmask32 __U, __m256i __A, __m256i __B)
+{
+ return (__m256i) __builtin_ia32_pshufb256_mask ((__v32qi) __A,
+ (__v32qi) __B,
+ (__v32qi)
+ _mm256_setzero_si256 (),
+ (__mmask32) __U);
+}
+
+extern __inline __m128i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_mask_shuffle_epi8 (__m128i __W, __mmask16 __U, __m128i __A,
+ __m128i __B)
+{
+ return (__m128i) __builtin_ia32_pshufb128_mask ((__v16qi) __A,
+ (__v16qi) __B,
+ (__v16qi) __W,
+ (__mmask16) __U);
+}
+
+extern __inline __m128i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_maskz_shuffle_epi8 (__mmask16 __U, __m128i __A, __m128i __B)
+{
+ return (__m128i) __builtin_ia32_pshufb128_mask ((__v16qi) __A,
+ (__v16qi) __B,
+ (__v16qi)
+ _mm_setzero_si128 (),
+ (__mmask16) __U);
+}
+
+extern __inline __m256i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_maskz_packs_epi16 (__mmask32 __M, __m256i __A, __m256i __B)
+{
+ return (__m256i) __builtin_ia32_packsswb256_mask ((__v16hi) __A,
+ (__v16hi) __B,
+ (__v32qi)
+ _mm256_setzero_si256 (),
+ __M);
+}
+
+extern __inline __m256i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_mask_packs_epi16 (__m256i __W, __mmask32 __M, __m256i __A,
+ __m256i __B)
+{
+ return (__m256i) __builtin_ia32_packsswb256_mask ((__v16hi) __A,
+ (__v16hi) __B,
+ (__v32qi) __W,
+ __M);
+}
+
+extern __inline __m128i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_maskz_packs_epi16 (__mmask16 __M, __m128i __A, __m128i __B)
+{
+ return (__m128i) __builtin_ia32_packsswb128_mask ((__v8hi) __A,
+ (__v8hi) __B,
+ (__v16qi)
+ _mm_setzero_si128 (),
+ __M);
+}
+
+extern __inline __m128i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_mask_packs_epi16 (__m128i __W, __mmask16 __M, __m128i __A,
+ __m128i __B)
+{
+ return (__m128i) __builtin_ia32_packsswb128_mask ((__v8hi) __A,
+ (__v8hi) __B,
+ (__v16qi) __W,
+ __M);
+}
+
+extern __inline __m256i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_maskz_packus_epi16 (__mmask32 __M, __m256i __A, __m256i __B)
+{
+ return (__m256i) __builtin_ia32_packuswb256_mask ((__v16hi) __A,
+ (__v16hi) __B,
+ (__v32qi)
+ _mm256_setzero_si256 (),
+ __M);
+}
+
+extern __inline __m256i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_mask_packus_epi16 (__m256i __W, __mmask32 __M, __m256i __A,
+ __m256i __B)
+{
+ return (__m256i) __builtin_ia32_packuswb256_mask ((__v16hi) __A,
+ (__v16hi) __B,
+ (__v32qi) __W,
+ __M);
+}
+
+extern __inline __m128i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_maskz_packus_epi16 (__mmask16 __M, __m128i __A, __m128i __B)
+{
+ return (__m128i) __builtin_ia32_packuswb128_mask ((__v8hi) __A,
+ (__v8hi) __B,
+ (__v16qi)
+ _mm_setzero_si128 (),
+ __M);
+}
+
+extern __inline __m128i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_mask_packus_epi16 (__m128i __W, __mmask16 __M, __m128i __A,
+ __m128i __B)
+{
+ return (__m128i) __builtin_ia32_packuswb128_mask ((__v8hi) __A,
+ (__v8hi) __B,
+ (__v16qi) __W,
+ __M);
+}
+
+extern __inline __m256i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_mask_abs_epi8 (__m256i __W, __mmask32 __U, __m256i __A)
+{
+ return (__m256i) __builtin_ia32_pabsb256_mask ((__v32qi) __A,
+ (__v32qi) __W,
+ (__mmask32) __U);
+}
+
+extern __inline __m256i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_maskz_abs_epi8 (__mmask32 __U, __m256i __A)
+{
+ return (__m256i) __builtin_ia32_pabsb256_mask ((__v32qi) __A,
+ (__v32qi)
+ _mm256_setzero_si256 (),
+ (__mmask32) __U);
+}
+
+extern __inline __m128i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_mask_abs_epi8 (__m128i __W, __mmask16 __U, __m128i __A)
+{
+ return (__m128i) __builtin_ia32_pabsb128_mask ((__v16qi) __A,
+ (__v16qi) __W,
+ (__mmask16) __U);
+}
+
+extern __inline __m128i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_maskz_abs_epi8 (__mmask16 __U, __m128i __A)
+{
+ return (__m128i) __builtin_ia32_pabsb128_mask ((__v16qi) __A,
+ (__v16qi)
+ _mm_setzero_si128 (),
+ (__mmask16) __U);
+}
+
+extern __inline __m256i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_mask_abs_epi16 (__m256i __W, __mmask16 __U, __m256i __A)
+{
+ return (__m256i) __builtin_ia32_pabsw256_mask ((__v16hi) __A,
+ (__v16hi) __W,
+ (__mmask16) __U);
+}
+
+extern __inline __m256i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_maskz_abs_epi16 (__mmask16 __U, __m256i __A)
+{
+ return (__m256i) __builtin_ia32_pabsw256_mask ((__v16hi) __A,
+ (__v16hi)
+ _mm256_setzero_si256 (),
+ (__mmask16) __U);
+}
+
+extern __inline __m128i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_mask_abs_epi16 (__m128i __W, __mmask8 __U, __m128i __A)
+{
+ return (__m128i) __builtin_ia32_pabsw128_mask ((__v8hi) __A,
+ (__v8hi) __W,
+ (__mmask8) __U);
+}
+
+extern __inline __m128i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_maskz_abs_epi16 (__mmask8 __U, __m128i __A)
+{
+ return (__m128i) __builtin_ia32_pabsw128_mask ((__v8hi) __A,
+ (__v8hi)
+ _mm_setzero_si128 (),
+ (__mmask8) __U);
+}
+
+extern __inline __mmask32
+ __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_cmpneq_epu8_mask (__m256i __X, __m256i __Y)
+{
+ return (__mmask32) __builtin_ia32_ucmpb256_mask ((__v32qi) __X,
+ (__v32qi) __Y, 4,
+ (__mmask32) - 1);
+}
+
+extern __inline __mmask32
+ __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_cmplt_epu8_mask (__m256i __X, __m256i __Y)
+{
+ return (__mmask32) __builtin_ia32_ucmpb256_mask ((__v32qi) __X,
+ (__v32qi) __Y, 1,
+ (__mmask32) - 1);
+}
+
+extern __inline __mmask32
+ __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_cmpge_epu8_mask (__m256i __X, __m256i __Y)
+{
+ return (__mmask32) __builtin_ia32_ucmpb256_mask ((__v32qi) __X,
+ (__v32qi) __Y, 5,
+ (__mmask32) - 1);
+}
+
+extern __inline __mmask32
+ __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_cmple_epu8_mask (__m256i __X, __m256i __Y)
+{
+ return (__mmask32) __builtin_ia32_ucmpb256_mask ((__v32qi) __X,
+ (__v32qi) __Y, 2,
+ (__mmask32) - 1);
+}
+
+extern __inline __mmask16
+ __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_cmpneq_epu16_mask (__m256i __X, __m256i __Y)
+{
+ return (__mmask16) __builtin_ia32_ucmpw256_mask ((__v16hi) __X,
+ (__v16hi) __Y, 4,
+ (__mmask16) - 1);
+}
+
+extern __inline __mmask16
+ __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_cmplt_epu16_mask (__m256i __X, __m256i __Y)
+{
+ return (__mmask16) __builtin_ia32_ucmpw256_mask ((__v16hi) __X,
+ (__v16hi) __Y, 1,
+ (__mmask16) - 1);
+}
+
+extern __inline __mmask16
+ __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_cmpge_epu16_mask (__m256i __X, __m256i __Y)
+{
+ return (__mmask16) __builtin_ia32_ucmpw256_mask ((__v16hi) __X,
+ (__v16hi) __Y, 5,
+ (__mmask16) - 1);
+}
+
+extern __inline __mmask16
+ __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_cmple_epu16_mask (__m256i __X, __m256i __Y)
+{
+ return (__mmask16) __builtin_ia32_ucmpw256_mask ((__v16hi) __X,
+ (__v16hi) __Y, 2,
+ (__mmask16) - 1);
+}
+
+extern __inline void
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_mask_storeu_epi16 (void *__P, __mmask16 __U, __m256i __A)
+{
+ __builtin_ia32_storedquhi256_mask ((__v16hi *) __P,
+ (__v16hi) __A,
+ (__mmask16) __U);
+}
+
+extern __inline void
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_mask_storeu_epi16 (void *__P, __mmask8 __U, __m128i __A)
+{
+ __builtin_ia32_storedquhi128_mask ((__v8hi *) __P,
+ (__v8hi) __A,
+ (__mmask8) __U);
+}
+
+extern __inline __m128i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_mask_adds_epi16 (__m128i __W, __mmask8 __U, __m128i __A,
+ __m128i __B)
+{
+ return (__m128i) __builtin_ia32_paddsw128_mask ((__v8hi) __A,
+ (__v8hi) __B,
+ (__v8hi) __W,
+ (__mmask8) __U);
+}
+
+extern __inline __m128i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_mask_subs_epi8 (__m128i __W, __mmask16 __U, __m128i __A,
+ __m128i __B)
+{
+ return (__m128i) __builtin_ia32_psubsb128_mask ((__v16qi) __A,
+ (__v16qi) __B,
+ (__v16qi) __W,
+ (__mmask16) __U);
+}
+
+extern __inline __m128i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_maskz_subs_epi8 (__mmask16 __U, __m128i __A, __m128i __B)
+{
+ return (__m128i) __builtin_ia32_psubsb128_mask ((__v16qi) __A,
+ (__v16qi) __B,
+ (__v16qi)
+ _mm_setzero_si128 (),
+ (__mmask16) __U);
+}
+
+extern __inline __m128i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_mask_subs_epi16 (__m128i __W, __mmask8 __U, __m128i __A,
+ __m128i __B)
+{
+ return (__m128i) __builtin_ia32_psubsw128_mask ((__v8hi) __A,
+ (__v8hi) __B,
+ (__v8hi) __W,
+ (__mmask8) __U);
+}
+
+extern __inline __m128i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_maskz_subs_epi16 (__mmask8 __U, __m128i __A, __m128i __B)
+{
+ return (__m128i) __builtin_ia32_psubsw128_mask ((__v8hi) __A,
+ (__v8hi) __B,
+ (__v8hi)
+ _mm_setzero_si128 (),
+ (__mmask8) __U);
+}
+
+extern __inline __m128i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_mask_subs_epu8 (__m128i __W, __mmask16 __U, __m128i __A,
+ __m128i __B)
+{
+ return (__m128i) __builtin_ia32_psubusb128_mask ((__v16qi) __A,
+ (__v16qi) __B,
+ (__v16qi) __W,
+ (__mmask16) __U);
+}
+
+extern __inline __m128i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_maskz_subs_epu8 (__mmask16 __U, __m128i __A, __m128i __B)
+{
+ return (__m128i) __builtin_ia32_psubusb128_mask ((__v16qi) __A,
+ (__v16qi) __B,
+ (__v16qi)
+ _mm_setzero_si128 (),
+ (__mmask16) __U);
+}
+
+extern __inline __m128i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_mask_subs_epu16 (__m128i __W, __mmask8 __U, __m128i __A,
+ __m128i __B)
+{
+ return (__m128i) __builtin_ia32_psubusw128_mask ((__v8hi) __A,
+ (__v8hi) __B,
+ (__v8hi) __W,
+ (__mmask8) __U);
+}
+
+extern __inline __m128i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_maskz_subs_epu16 (__mmask8 __U, __m128i __A, __m128i __B)
+{
+ return (__m128i) __builtin_ia32_psubusw128_mask ((__v8hi) __A,
+ (__v8hi) __B,
+ (__v8hi)
+ _mm_setzero_si128 (),
+ (__mmask8) __U);
+}
+
+extern __inline __m256i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_mask_srl_epi16 (__m256i __W, __mmask16 __U, __m256i __A,
+ __m128i __B)
+{
+ return (__m256i) __builtin_ia32_psrlw256_mask ((__v16hi) __A,
+ (__v8hi) __B,
+ (__v16hi) __W,
+ (__mmask16) __U);
+}
+
+extern __inline __m256i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_maskz_srl_epi16 (__mmask16 __U, __m256i __A, __m128i __B)
+{
+ return (__m256i) __builtin_ia32_psrlw256_mask ((__v16hi) __A,
+ (__v8hi) __B,
+ (__v16hi)
+ _mm256_setzero_si256 (),
+ (__mmask16) __U);
+}
+
+extern __inline __m128i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_mask_srl_epi16 (__m128i __W, __mmask8 __U, __m128i __A,
+ __m128i __B)
+{
+ return (__m128i) __builtin_ia32_psrlw128_mask ((__v8hi) __A,
+ (__v8hi) __B,
+ (__v8hi) __W,
+ (__mmask8) __U);
+}
+
+extern __inline __m128i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_maskz_srl_epi16 (__mmask8 __U, __m128i __A, __m128i __B)
+{
+ return (__m128i) __builtin_ia32_psrlw128_mask ((__v8hi) __A,
+ (__v8hi) __B,
+ (__v8hi)
+ _mm_setzero_si128 (),
+ (__mmask8) __U);
+}
+
+extern __inline __m256i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_mask_sra_epi16 (__m256i __W, __mmask16 __U, __m256i __A,
+ __m128i __B)
+{
+ return (__m256i) __builtin_ia32_psraw256_mask ((__v16hi) __A,
+ (__v8hi) __B,
+ (__v16hi) __W,
+ (__mmask16) __U);
+}
+
+extern __inline __m256i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_maskz_sra_epi16 (__mmask16 __U, __m256i __A, __m128i __B)
+{
+ return (__m256i) __builtin_ia32_psraw256_mask ((__v16hi) __A,
+ (__v8hi) __B,
+ (__v16hi)
+ _mm256_setzero_si256 (),
+ (__mmask16) __U);
+}
+
+extern __inline __m128i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_mask_sra_epi16 (__m128i __W, __mmask8 __U, __m128i __A,
+ __m128i __B)
+{
+ return (__m128i) __builtin_ia32_psraw128_mask ((__v8hi) __A,
+ (__v8hi) __B,
+ (__v8hi) __W,
+ (__mmask8) __U);
+}
+
+extern __inline __m128i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_maskz_sra_epi16 (__mmask8 __U, __m128i __A, __m128i __B)
+{
+ return (__m128i) __builtin_ia32_psraw128_mask ((__v8hi) __A,
+ (__v8hi) __B,
+ (__v8hi)
+ _mm_setzero_si128 (),
+ (__mmask8) __U);
+}
+
+extern __inline __m128i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_maskz_adds_epi16 (__mmask8 __U, __m128i __A, __m128i __B)
+{
+ return (__m128i) __builtin_ia32_paddsw128_mask ((__v8hi) __A,
+ (__v8hi) __B,
+ (__v8hi)
+ _mm_setzero_si128 (),
+ (__mmask8) __U);
+}
+
+extern __inline __m128i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_mask_adds_epu8 (__m128i __W, __mmask16 __U, __m128i __A,
+ __m128i __B)
+{
+ return (__m128i) __builtin_ia32_paddusb128_mask ((__v16qi) __A,
+ (__v16qi) __B,
+ (__v16qi) __W,
+ (__mmask16) __U);
+}
+
+extern __inline __m128i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_maskz_adds_epu8 (__mmask16 __U, __m128i __A, __m128i __B)
+{
+ return (__m128i) __builtin_ia32_paddusb128_mask ((__v16qi) __A,
+ (__v16qi) __B,
+ (__v16qi)
+ _mm_setzero_si128 (),
+ (__mmask16) __U);
+}
+
+extern __inline __m128i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_mask_adds_epu16 (__m128i __W, __mmask8 __U, __m128i __A,
+ __m128i __B)
+{
+ return (__m128i) __builtin_ia32_paddusw128_mask ((__v8hi) __A,
+ (__v8hi) __B,
+ (__v8hi) __W,
+ (__mmask8) __U);
+}
+
+extern __inline __m128i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_maskz_adds_epu16 (__mmask8 __U, __m128i __A, __m128i __B)
+{
+ return (__m128i) __builtin_ia32_paddusw128_mask ((__v8hi) __A,
+ (__v8hi) __B,
+ (__v8hi)
+ _mm_setzero_si128 (),
+ (__mmask8) __U);
+}
+
+extern __inline __m128i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_mask_sub_epi8 (__m128i __W, __mmask16 __U, __m128i __A,
+ __m128i __B)
+{
+ return (__m128i) __builtin_ia32_psubb128_mask ((__v16qi) __A,
+ (__v16qi) __B,
+ (__v16qi) __W,
+ (__mmask16) __U);
+}
+
+extern __inline __m128i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_maskz_sub_epi8 (__mmask16 __U, __m128i __A, __m128i __B)
+{
+ return (__m128i) __builtin_ia32_psubb128_mask ((__v16qi) __A,
+ (__v16qi) __B,
+ (__v16qi)
+ _mm_setzero_si128 (),
+ (__mmask16) __U);
+}
+
+extern __inline __m128i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_mask_sub_epi16 (__m128i __W, __mmask8 __U, __m128i __A,
+ __m128i __B)
+{
+ return (__m128i) __builtin_ia32_psubw128_mask ((__v8hi) __A,
+ (__v8hi) __B,
+ (__v8hi) __W,
+ (__mmask8) __U);
+}
+
+extern __inline __m128i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_maskz_sub_epi16 (__mmask8 __U, __m128i __A, __m128i __B)
+{
+ return (__m128i) __builtin_ia32_psubw128_mask ((__v8hi) __A,
+ (__v8hi) __B,
+ (__v8hi)
+ _mm_setzero_si128 (),
+ (__mmask8) __U);
+}
+
+extern __inline __m128i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_mask_adds_epi8 (__m128i __W, __mmask16 __U, __m128i __A,
+ __m128i __B)
+{
+ return (__m128i) __builtin_ia32_paddsb128_mask ((__v16qi) __A,
+ (__v16qi) __B,
+ (__v16qi) __W,
+ (__mmask16) __U);
+}
+
+extern __inline __m128i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_maskz_adds_epi8 (__mmask16 __U, __m128i __A, __m128i __B)
+{
+ return (__m128i) __builtin_ia32_paddsb128_mask ((__v16qi) __A,
+ (__v16qi) __B,
+ (__v16qi)
+ _mm_setzero_si128 (),
+ (__mmask16) __U);
+}
+
+extern __inline __m128i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_cvtepi16_epi8 (__m128i __A)
+{
+ __v16qi __O;
+ return (__m128i) __builtin_ia32_pmovwb128_mask ((__v8hi) __A, __O,
+ (__mmask8) -1);
+}
+
+extern __inline __m128i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_mask_cvtepi16_epi8 (__m128i __O, __mmask8 __M, __m128i __A)
+{
+ return (__m128i) __builtin_ia32_pmovwb128_mask ((__v8hi) __A,
+ (__v16qi) __O, __M);
+}
+
+extern __inline __m128i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_maskz_cvtepi16_epi8 (__mmask8 __M, __m128i __A)
+{
+ return (__m128i) __builtin_ia32_pmovwb128_mask ((__v8hi) __A,
+ (__v16qi)
+ _mm_setzero_si128 (),
+ __M);
+}
+
+extern __inline __m256i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_srav_epi16 (__m256i __A, __m256i __B)
+{
+ return (__m256i) __builtin_ia32_psrav16hi_mask ((__v16hi) __A,
+ (__v16hi) __B,
+ (__v16hi)
+ _mm256_setzero_si256 (),
+ (__mmask16) -1);
+}
+
+extern __inline __m256i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_mask_srav_epi16 (__m256i __W, __mmask16 __U, __m256i __A,
+ __m256i __B)
+{
+ return (__m256i) __builtin_ia32_psrav16hi_mask ((__v16hi) __A,
+ (__v16hi) __B,
+ (__v16hi) __W,
+ (__mmask16) __U);
+}
+
+extern __inline __m256i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_maskz_srav_epi16 (__mmask16 __U, __m256i __A, __m256i __B)
+{
+ return (__m256i) __builtin_ia32_psrav16hi_mask ((__v16hi) __A,
+ (__v16hi) __B,
+ (__v16hi)
+ _mm256_setzero_si256 (),
+ (__mmask16) __U);
+}
+
+extern __inline __m128i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_srav_epi16 (__m128i __A, __m128i __B)
+{
+ return (__m128i) __builtin_ia32_psrav8hi_mask ((__v8hi) __A,
+ (__v8hi) __B,
+ (__v8hi)
+ _mm_setzero_hi (),
+ (__mmask8) -1);
+}
+
+extern __inline __m128i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_mask_srav_epi16 (__m128i __W, __mmask8 __U, __m128i __A,
+ __m128i __B)
+{
+ return (__m128i) __builtin_ia32_psrav8hi_mask ((__v8hi) __A,
+ (__v8hi) __B,
+ (__v8hi) __W,
+ (__mmask8) __U);
+}
+
+extern __inline __m128i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_maskz_srav_epi16 (__mmask8 __U, __m128i __A, __m128i __B)
+{
+ return (__m128i) __builtin_ia32_psrav8hi_mask ((__v8hi) __A,
+ (__v8hi) __B,
+ (__v8hi)
+ _mm_setzero_si128 (),
+ (__mmask8) __U);
+}
+
+extern __inline __m256i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_srlv_epi16 (__m256i __A, __m256i __B)
+{
+ return (__m256i) __builtin_ia32_psrlv16hi_mask ((__v16hi) __A,
+ (__v16hi) __B,
+ (__v16hi)
+ _mm256_setzero_si256 (),
+ (__mmask16) -1);
+}
+
+extern __inline __m256i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_mask_srlv_epi16 (__m256i __W, __mmask16 __U, __m256i __A,
+ __m256i __B)
+{
+ return (__m256i) __builtin_ia32_psrlv16hi_mask ((__v16hi) __A,
+ (__v16hi) __B,
+ (__v16hi) __W,
+ (__mmask16) __U);
+}
+
+extern __inline __m256i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_maskz_srlv_epi16 (__mmask16 __U, __m256i __A, __m256i __B)
+{
+ return (__m256i) __builtin_ia32_psrlv16hi_mask ((__v16hi) __A,
+ (__v16hi) __B,
+ (__v16hi)
+ _mm256_setzero_si256 (),
+ (__mmask16) __U);
+}
+
+extern __inline __m128i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_srlv_epi16 (__m128i __A, __m128i __B)
+{
+ return (__m128i) __builtin_ia32_psrlv8hi_mask ((__v8hi) __A,
+ (__v8hi) __B,
+ (__v8hi)
+ _mm_setzero_hi (),
+ (__mmask8) -1);
+}
+
+extern __inline __m128i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_mask_srlv_epi16 (__m128i __W, __mmask8 __U, __m128i __A,
+ __m128i __B)
+{
+ return (__m128i) __builtin_ia32_psrlv8hi_mask ((__v8hi) __A,
+ (__v8hi) __B,
+ (__v8hi) __W,
+ (__mmask8) __U);
+}
+
+extern __inline __m128i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_maskz_srlv_epi16 (__mmask8 __U, __m128i __A, __m128i __B)
+{
+ return (__m128i) __builtin_ia32_psrlv8hi_mask ((__v8hi) __A,
+ (__v8hi) __B,
+ (__v8hi)
+ _mm_setzero_si128 (),
+ (__mmask8) __U);
+}
+
+extern __inline __m256i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_sllv_epi16 (__m256i __A, __m256i __B)
+{
+ return (__m256i) __builtin_ia32_psllv16hi_mask ((__v16hi) __A,
+ (__v16hi) __B,
+ (__v16hi)
+ _mm256_setzero_si256 (),
+ (__mmask16) -1);
+}
+
+extern __inline __m256i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_mask_sllv_epi16 (__m256i __W, __mmask16 __U, __m256i __A,
+ __m256i __B)
+{
+ return (__m256i) __builtin_ia32_psllv16hi_mask ((__v16hi) __A,
+ (__v16hi) __B,
+ (__v16hi) __W,
+ (__mmask16) __U);
+}
+
+extern __inline __m256i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_maskz_sllv_epi16 (__mmask16 __U, __m256i __A, __m256i __B)
+{
+ return (__m256i) __builtin_ia32_psllv16hi_mask ((__v16hi) __A,
+ (__v16hi) __B,
+ (__v16hi)
+ _mm256_setzero_si256 (),
+ (__mmask16) __U);
+}
+
+extern __inline __m128i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_sllv_epi16 (__m128i __A, __m128i __B)
+{
+ return (__m128i) __builtin_ia32_psllv8hi_mask ((__v8hi) __A,
+ (__v8hi) __B,
+ (__v8hi)
+ _mm_setzero_hi (),
+ (__mmask8) -1);
+}
+
+extern __inline __m128i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_mask_sllv_epi16 (__m128i __W, __mmask8 __U, __m128i __A,
+ __m128i __B)
+{
+ return (__m128i) __builtin_ia32_psllv8hi_mask ((__v8hi) __A,
+ (__v8hi) __B,
+ (__v8hi) __W,
+ (__mmask8) __U);
+}
+
+extern __inline __m128i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_maskz_sllv_epi16 (__mmask8 __U, __m128i __A, __m128i __B)
+{
+ return (__m128i) __builtin_ia32_psllv8hi_mask ((__v8hi) __A,
+ (__v8hi) __B,
+ (__v8hi)
+ _mm_setzero_si128 (),
+ (__mmask8) __U);
+}
+
+extern __inline __m128i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_mask_sll_epi16 (__m128i __W, __mmask8 __U, __m128i __A,
+ __m128i __B)
+{
+ return (__m128i) __builtin_ia32_psllw128_mask ((__v8hi) __A,
+ (__v8hi) __B,
+ (__v8hi) __W,
+ (__mmask8) __U);
+}
+
+extern __inline __m128i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_maskz_sll_epi16 (__mmask8 __U, __m128i __A, __m128i __B)
+{
+ return (__m128i) __builtin_ia32_psllw128_mask ((__v8hi) __A,
+ (__v8hi) __B,
+ (__v8hi)
+ _mm_setzero_si128 (),
+ (__mmask8) __U);
+}
+
+extern __inline __m256i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_mask_sll_epi16 (__m256i __W, __mmask16 __U, __m256i __A,
+ __m128i __B)
+{
+ return (__m256i) __builtin_ia32_psllw256_mask ((__v16hi) __A,
+ (__v8hi) __B,
+ (__v16hi) __W,
+ (__mmask16) __U);
+}
+
+extern __inline __m256i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_maskz_sll_epi16 (__mmask16 __U, __m256i __A, __m128i __B)
+{
+ return (__m256i) __builtin_ia32_psllw256_mask ((__v16hi) __A,
+ (__v8hi) __B,
+ (__v16hi)
+ _mm256_setzero_si256 (),
+ (__mmask16) __U);
+}
+
+extern __inline __m256i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_maskz_packus_epi32 (__mmask16 __M, __m256i __A, __m256i __B)
+{
+ return (__m256i) __builtin_ia32_packusdw256_mask ((__v8si) __A,
+ (__v8si) __B,
+ (__v16hi)
+ _mm256_setzero_si256 (),
+ __M);
+}
+
+extern __inline __m256i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_mask_packus_epi32 (__m256i __W, __mmask16 __M, __m256i __A,
+ __m256i __B)
+{
+ return (__m256i) __builtin_ia32_packusdw256_mask ((__v8si) __A,
+ (__v8si) __B,
+ (__v16hi) __W,
+ __M);
+}
+
+extern __inline __m128i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_maskz_packus_epi32 (__mmask8 __M, __m128i __A, __m128i __B)
+{
+ return (__m128i) __builtin_ia32_packusdw128_mask ((__v4si) __A,
+ (__v4si) __B,
+ (__v8hi)
+ _mm_setzero_si128 (),
+ __M);
+}
+
+extern __inline __m128i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_mask_packus_epi32 (__m128i __W, __mmask16 __M, __m128i __A,
+ __m128i __B)
+{
+ return (__m128i) __builtin_ia32_packusdw128_mask ((__v4si) __A,
+ (__v4si) __B,
+ (__v8hi) __W, __M);
+}
+
+extern __inline __m256i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_maskz_packs_epi32 (__mmask16 __M, __m256i __A, __m256i __B)
+{
+ return (__m256i) __builtin_ia32_packssdw256_mask ((__v8si) __A,
+ (__v8si) __B,
+ (__v16hi)
+ _mm256_setzero_si256 (),
+ __M);
+}
+
+extern __inline __m256i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_mask_packs_epi32 (__m256i __W, __mmask16 __M, __m256i __A,
+ __m256i __B)
+{
+ return (__m256i) __builtin_ia32_packssdw256_mask ((__v8si) __A,
+ (__v8si) __B,
+ (__v16hi) __W,
+ __M);
+}
+
+extern __inline __m128i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_maskz_packs_epi32 (__mmask8 __M, __m128i __A, __m128i __B)
+{
+ return (__m128i) __builtin_ia32_packssdw128_mask ((__v4si) __A,
+ (__v4si) __B,
+ (__v8hi)
+ _mm_setzero_si128 (),
+ __M);
+}
+
+extern __inline __m128i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_mask_packs_epi32 (__m128i __W, __mmask16 __M, __m128i __A,
+ __m128i __B)
+{
+ return (__m128i) __builtin_ia32_packssdw128_mask ((__v4si) __A,
+ (__v4si) __B,
+ (__v8hi) __W, __M);
+}
+
+#ifdef __DISABLE_AVX512VLBW__
+#undef __DISABLE_AVX512VLBW__
+#pragma GCC pop_options
+#endif /* __DISABLE_AVX512VLBW__ */
+
+#endif /* _AVX512VLBWINTRIN_H_INCLUDED */
diff --git a/gcc/config/i386/avx512vldqintrin.h b/gcc/config/i386/avx512vldqintrin.h
new file mode 100644
index 00000000000..bff3ead8637
--- /dev/null
+++ b/gcc/config/i386/avx512vldqintrin.h
@@ -0,0 +1,2035 @@
+/* Copyright (C) 2014
+ Free Software Foundation, Inc.
+
+ This file is part of GCC.
+
+ GCC is free software; you can redistribute it and/or modify
+ it under the terms of the GNU General Public License as published by
+ the Free Software Foundation; either version 3, or (at your option)
+ any later version.
+
+ GCC is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ GNU General Public License for more details.
+
+ Under Section 7 of GPL version 3, you are granted additional
+ permissions described in the GCC Runtime Library Exception, version
+ 3.1, as published by the Free Software Foundation.
+
+ You should have received a copy of the GNU General Public License and
+ a copy of the GCC Runtime Library Exception along with this program;
+ see the files COPYING3 and COPYING.RUNTIME respectively. If not, see
+ <http://www.gnu.org/licenses/>. */
+
+#ifndef _IMMINTRIN_H_INCLUDED
+#error "Never use <avx512vldqintrin.h> directly; include <immintrin.h> instead."
+#endif
+
+#ifndef _AVX512VLDQINTRIN_H_INCLUDED
+#define _AVX512VLDQINTRIN_H_INCLUDED
+
+#if !defined(__AVX512VL__) || !defined(__AVX512DQ__)
+#pragma GCC push_options
+#pragma GCC target("avx512vl,avx512dq")
+#define __DISABLE_AVX512VLDQ__
+#endif /* __AVX512VLDQ__ */
+
+extern __inline __m256i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_cvttpd_epi64 (__m256d __A)
+{
+ return (__m256i) __builtin_ia32_cvttpd2qq256_mask ((__v4df) __A,
+ (__v4di)
+ _mm256_setzero_si256 (),
+ (__mmask8) -1);
+}
+
+extern __inline __m256i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_mask_cvttpd_epi64 (__m256i __W, __mmask8 __U, __m256d __A)
+{
+ return (__m256i) __builtin_ia32_cvttpd2qq256_mask ((__v4df) __A,
+ (__v4di) __W,
+ (__mmask8) __U);
+}
+
+extern __inline __m256i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_maskz_cvttpd_epi64 (__mmask8 __U, __m256d __A)
+{
+ return (__m256i) __builtin_ia32_cvttpd2qq256_mask ((__v4df) __A,
+ (__v4di)
+ _mm256_setzero_si256 (),
+ (__mmask8) __U);
+}
+
+extern __inline __m128i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_cvttpd_epi64 (__m128d __A)
+{
+ return (__m128i) __builtin_ia32_cvttpd2qq128_mask ((__v2df) __A,
+ (__v2di)
+ _mm_setzero_di (),
+ (__mmask8) -1);
+}
+
+extern __inline __m128i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_mask_cvttpd_epi64 (__m128i __W, __mmask8 __U, __m128d __A)
+{
+ return (__m128i) __builtin_ia32_cvttpd2qq128_mask ((__v2df) __A,
+ (__v2di) __W,
+ (__mmask8) __U);
+}
+
+extern __inline __m128i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_maskz_cvttpd_epi64 (__mmask8 __U, __m128d __A)
+{
+ return (__m128i) __builtin_ia32_cvttpd2qq128_mask ((__v2df) __A,
+ (__v2di)
+ _mm_setzero_si128 (),
+ (__mmask8) __U);
+}
+
+extern __inline __m256i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_cvttpd_epu64 (__m256d __A)
+{
+ return (__m256i) __builtin_ia32_cvttpd2uqq256_mask ((__v4df) __A,
+ (__v4di)
+ _mm256_setzero_si256 (),
+ (__mmask8) -1);
+}
+
+extern __inline __m256i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_mask_cvttpd_epu64 (__m256i __W, __mmask8 __U, __m256d __A)
+{
+ return (__m256i) __builtin_ia32_cvttpd2uqq256_mask ((__v4df) __A,
+ (__v4di) __W,
+ (__mmask8) __U);
+}
+
+extern __inline __m256i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_maskz_cvttpd_epu64 (__mmask8 __U, __m256d __A)
+{
+ return (__m256i) __builtin_ia32_cvttpd2uqq256_mask ((__v4df) __A,
+ (__v4di)
+ _mm256_setzero_si256 (),
+ (__mmask8) __U);
+}
+
+extern __inline __m128i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_cvttpd_epu64 (__m128d __A)
+{
+ return (__m128i) __builtin_ia32_cvttpd2uqq128_mask ((__v2df) __A,
+ (__v2di)
+ _mm_setzero_di (),
+ (__mmask8) -1);
+}
+
+extern __inline __m128i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_mask_cvttpd_epu64 (__m128i __W, __mmask8 __U, __m128d __A)
+{
+ return (__m128i) __builtin_ia32_cvttpd2uqq128_mask ((__v2df) __A,
+ (__v2di) __W,
+ (__mmask8) __U);
+}
+
+extern __inline __m128i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_maskz_cvttpd_epu64 (__mmask8 __U, __m128d __A)
+{
+ return (__m128i) __builtin_ia32_cvttpd2uqq128_mask ((__v2df) __A,
+ (__v2di)
+ _mm_setzero_si128 (),
+ (__mmask8) __U);
+}
+
+extern __inline __m256i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_cvtpd_epi64 (__m256d __A)
+{
+ return (__m256i) __builtin_ia32_cvtpd2qq256_mask ((__v4df) __A,
+ (__v4di)
+ _mm256_setzero_si256 (),
+ (__mmask8) -1);
+}
+
+extern __inline __m256i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_mask_cvtpd_epi64 (__m256i __W, __mmask8 __U, __m256d __A)
+{
+ return (__m256i) __builtin_ia32_cvtpd2qq256_mask ((__v4df) __A,
+ (__v4di) __W,
+ (__mmask8) __U);
+}
+
+extern __inline __m256i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_maskz_cvtpd_epi64 (__mmask8 __U, __m256d __A)
+{
+ return (__m256i) __builtin_ia32_cvtpd2qq256_mask ((__v4df) __A,
+ (__v4di)
+ _mm256_setzero_si256 (),
+ (__mmask8) __U);
+}
+
+extern __inline __m128i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_cvtpd_epi64 (__m128d __A)
+{
+ return (__m128i) __builtin_ia32_cvtpd2qq128_mask ((__v2df) __A,
+ (__v2di)
+ _mm_setzero_di (),
+ (__mmask8) -1);
+}
+
+extern __inline __m128i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_mask_cvtpd_epi64 (__m128i __W, __mmask8 __U, __m128d __A)
+{
+ return (__m128i) __builtin_ia32_cvtpd2qq128_mask ((__v2df) __A,
+ (__v2di) __W,
+ (__mmask8) __U);
+}
+
+extern __inline __m128i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_maskz_cvtpd_epi64 (__mmask8 __U, __m128d __A)
+{
+ return (__m128i) __builtin_ia32_cvtpd2qq128_mask ((__v2df) __A,
+ (__v2di)
+ _mm_setzero_si128 (),
+ (__mmask8) __U);
+}
+
+extern __inline __m256i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_cvtpd_epu64 (__m256d __A)
+{
+ return (__m256i) __builtin_ia32_cvtpd2uqq256_mask ((__v4df) __A,
+ (__v4di)
+ _mm256_setzero_si256 (),
+ (__mmask8) -1);
+}
+
+extern __inline __m256i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_mask_cvtpd_epu64 (__m256i __W, __mmask8 __U, __m256d __A)
+{
+ return (__m256i) __builtin_ia32_cvtpd2uqq256_mask ((__v4df) __A,
+ (__v4di) __W,
+ (__mmask8) __U);
+}
+
+extern __inline __m256i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_maskz_cvtpd_epu64 (__mmask8 __U, __m256d __A)
+{
+ return (__m256i) __builtin_ia32_cvtpd2uqq256_mask ((__v4df) __A,
+ (__v4di)
+ _mm256_setzero_si256 (),
+ (__mmask8) __U);
+}
+
+extern __inline __m128i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_cvtpd_epu64 (__m128d __A)
+{
+ return (__m128i) __builtin_ia32_cvtpd2uqq128_mask ((__v2df) __A,
+ (__v2di)
+ _mm_setzero_di (),
+ (__mmask8) -1);
+}
+
+extern __inline __m128i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_mask_cvtpd_epu64 (__m128i __W, __mmask8 __U, __m128d __A)
+{
+ return (__m128i) __builtin_ia32_cvtpd2uqq128_mask ((__v2df) __A,
+ (__v2di) __W,
+ (__mmask8) __U);
+}
+
+extern __inline __m128i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_maskz_cvtpd_epu64 (__mmask8 __U, __m128d __A)
+{
+ return (__m128i) __builtin_ia32_cvtpd2uqq128_mask ((__v2df) __A,
+ (__v2di)
+ _mm_setzero_si128 (),
+ (__mmask8) __U);
+}
+
+extern __inline __m256i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_cvttps_epi64 (__m128 __A)
+{
+ return (__m256i) __builtin_ia32_cvttps2qq256_mask ((__v4sf) __A,
+ (__v4di)
+ _mm256_setzero_si256 (),
+ (__mmask8) -1);
+}
+
+extern __inline __m256i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_mask_cvttps_epi64 (__m256i __W, __mmask8 __U, __m128 __A)
+{
+ return (__m256i) __builtin_ia32_cvttps2qq256_mask ((__v4sf) __A,
+ (__v4di) __W,
+ (__mmask8) __U);
+}
+
+extern __inline __m256i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_maskz_cvttps_epi64 (__mmask8 __U, __m128 __A)
+{
+ return (__m256i) __builtin_ia32_cvttps2qq256_mask ((__v4sf) __A,
+ (__v4di)
+ _mm256_setzero_si256 (),
+ (__mmask8) __U);
+}
+
+extern __inline __m128i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_cvttps_epi64 (__m128 __A)
+{
+ return (__m128i) __builtin_ia32_cvttps2qq128_mask ((__v4sf) __A,
+ (__v2di)
+ _mm_setzero_di (),
+ (__mmask8) -1);
+}
+
+extern __inline __m128i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_mask_cvttps_epi64 (__m128i __W, __mmask8 __U, __m128 __A)
+{
+ return (__m128i) __builtin_ia32_cvttps2qq128_mask ((__v4sf) __A,
+ (__v2di) __W,
+ (__mmask8) __U);
+}
+
+extern __inline __m128i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_maskz_cvttps_epi64 (__mmask8 __U, __m128 __A)
+{
+ return (__m128i) __builtin_ia32_cvttps2qq128_mask ((__v4sf) __A,
+ (__v2di)
+ _mm_setzero_di (),
+ (__mmask8) __U);
+}
+
+extern __inline __m256i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_cvttps_epu64 (__m128 __A)
+{
+ return (__m256i) __builtin_ia32_cvttps2uqq256_mask ((__v4sf) __A,
+ (__v4di)
+ _mm256_setzero_si256 (),
+ (__mmask8) -1);
+}
+
+extern __inline __m256i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_mask_cvttps_epu64 (__m256i __W, __mmask8 __U, __m128 __A)
+{
+ return (__m256i) __builtin_ia32_cvttps2uqq256_mask ((__v4sf) __A,
+ (__v4di) __W,
+ (__mmask8) __U);
+}
+
+extern __inline __m256i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_maskz_cvttps_epu64 (__mmask8 __U, __m128 __A)
+{
+ return (__m256i) __builtin_ia32_cvttps2uqq256_mask ((__v4sf) __A,
+ (__v4di)
+ _mm256_setzero_si256 (),
+ (__mmask8) __U);
+}
+
+extern __inline __m128i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_cvttps_epu64 (__m128 __A)
+{
+ return (__m128i) __builtin_ia32_cvttps2uqq128_mask ((__v4sf) __A,
+ (__v2di)
+ _mm_setzero_di (),
+ (__mmask8) -1);
+}
+
+extern __inline __m128i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_mask_cvttps_epu64 (__m128i __W, __mmask8 __U, __m128 __A)
+{
+ return (__m128i) __builtin_ia32_cvttps2uqq128_mask ((__v4sf) __A,
+ (__v2di) __W,
+ (__mmask8) __U);
+}
+
+extern __inline __m128i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_maskz_cvttps_epu64 (__mmask8 __U, __m128 __A)
+{
+ return (__m128i) __builtin_ia32_cvttps2uqq128_mask ((__v4sf) __A,
+ (__v2di)
+ _mm_setzero_di (),
+ (__mmask8) __U);
+}
+
+extern __inline __m256d
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_broadcast_f64x2 (__m128d __A)
+{
+ __v4df __O;
+ return (__m256d) __builtin_ia32_broadcastf64x2_256_mask ((__v2df)
+ __A, __O,
+ (__mmask8) -
+ 1);
+}
+
+extern __inline __m256d
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_mask_broadcast_f64x2 (__m256d __O, __mmask8 __M, __m128d __A)
+{
+ return (__m256d) __builtin_ia32_broadcastf64x2_256_mask ((__v2df)
+ __A,
+ (__v4df)
+ __O, __M);
+}
+
+extern __inline __m256d
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_maskz_broadcast_f64x2 (__mmask8 __M, __m128d __A)
+{
+ return (__m256d) __builtin_ia32_broadcastf64x2_256_mask ((__v2df)
+ __A,
+ (__v4df)
+ _mm256_setzero_ps (),
+ __M);
+}
+
+extern __inline __m256i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_broadcast_i64x2 (__m128i __A)
+{
+ __v4di __O;
+ return (__m256i) __builtin_ia32_broadcasti64x2_256_mask ((__v2di)
+ __A, __O,
+ (__mmask8) -
+ 1);
+}
+
+extern __inline __m256i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_mask_broadcast_i64x2 (__m256i __O, __mmask8 __M, __m128i __A)
+{
+ return (__m256i) __builtin_ia32_broadcasti64x2_256_mask ((__v2di)
+ __A,
+ (__v4di)
+ __O, __M);
+}
+
+extern __inline __m256i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_maskz_broadcast_i64x2 (__mmask8 __M, __m128i __A)
+{
+ return (__m256i) __builtin_ia32_broadcasti64x2_256_mask ((__v2di)
+ __A,
+ (__v4di)
+ _mm256_setzero_si256 (),
+ __M);
+}
+
+extern __inline __m256
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_broadcast_f32x2 (__m128 __A)
+{
+ __v8sf __O;
+ return (__m256) __builtin_ia32_broadcastf32x2_256_mask ((__v4sf) __A,
+ __O,
+ (__mmask16) -
+ 1);
+}
+
+extern __inline __m256
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_mask_broadcast_f32x2 (__m256 __O, __mmask8 __M, __m128 __A)
+{
+ return (__m256) __builtin_ia32_broadcastf32x2_256_mask ((__v4sf) __A,
+ (__v8sf) __O,
+ __M);
+}
+
+extern __inline __m256
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_maskz_broadcast_f32x2 (__mmask8 __M, __m128 __A)
+{
+ return (__m256) __builtin_ia32_broadcastf32x2_256_mask ((__v4sf) __A,
+ (__v8sf)
+ _mm256_setzero_ps (),
+ __M);
+}
+
+extern __inline __m256i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_broadcast_i32x2 (__m128i __A)
+{
+ __v8si __O;
+ return (__m256i) __builtin_ia32_broadcasti32x2_256_mask ((__v4si)
+ __A, __O,
+ (__mmask8) -
+ 1);
+}
+
+extern __inline __m256i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_mask_broadcast_i32x2 (__m256i __O, __mmask8 __M, __m128i __A)
+{
+ return (__m256i) __builtin_ia32_broadcasti32x2_256_mask ((__v4si)
+ __A,
+ (__v8si)
+ __O, __M);
+}
+
+extern __inline __m256i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_maskz_broadcast_i32x2 (__mmask8 __M, __m128i __A)
+{
+ return (__m256i) __builtin_ia32_broadcasti32x2_256_mask ((__v4si)
+ __A,
+ (__v8si)
+ _mm256_setzero_si256 (),
+ __M);
+}
+
+extern __inline __m128i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_broadcast_i32x2 (__m128i __A)
+{
+ __v4si __O;
+ return (__m128i) __builtin_ia32_broadcasti32x2_128_mask ((__v4si)
+ __A, __O,
+ (__mmask8) -
+ 1);
+}
+
+extern __inline __m128i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_mask_broadcast_i32x2 (__m128i __O, __mmask8 __M, __m128i __A)
+{
+ return (__m128i) __builtin_ia32_broadcasti32x2_128_mask ((__v4si)
+ __A,
+ (__v4si)
+ __O, __M);
+}
+
+extern __inline __m128i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_maskz_broadcast_i32x2 (__mmask8 __M, __m128i __A)
+{
+ return (__m128i) __builtin_ia32_broadcasti32x2_128_mask ((__v4si)
+ __A,
+ (__v4si)
+ _mm_setzero_si128 (),
+ __M);
+}
+
+extern __inline __m256i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_mullo_epi64 (__m256i __A, __m256i __B)
+{
+ return (__m256i) __builtin_ia32_pmullq256_mask ((__v4di) __A,
+ (__v4di) __B,
+ (__v4di)
+ _mm256_setzero_si256 (),
+ (__mmask8) -1);
+}
+
+extern __inline __m256i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_mask_mullo_epi64 (__m256i __W, __mmask8 __U, __m256i __A,
+ __m256i __B)
+{
+ return (__m256i) __builtin_ia32_pmullq256_mask ((__v4di) __A,
+ (__v4di) __B,
+ (__v4di) __W,
+ (__mmask8) __U);
+}
+
+extern __inline __m256i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_maskz_mullo_epi64 (__mmask8 __U, __m256i __A, __m256i __B)
+{
+ return (__m256i) __builtin_ia32_pmullq256_mask ((__v4di) __A,
+ (__v4di) __B,
+ (__v4di)
+ _mm256_setzero_si256 (),
+ (__mmask8) __U);
+}
+
+extern __inline __m128i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_mullo_epi64 (__m128i __A, __m128i __B)
+{
+ return (__m128i) __builtin_ia32_pmullq128_mask ((__v2di) __A,
+ (__v2di) __B,
+ (__v2di)
+ _mm_setzero_di (),
+ (__mmask8) -1);
+}
+
+extern __inline __m128i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_mask_mullo_epi64 (__m128i __W, __mmask8 __U, __m128i __A,
+ __m128i __B)
+{
+ return (__m128i) __builtin_ia32_pmullq128_mask ((__v2di) __A,
+ (__v2di) __B,
+ (__v2di) __W,
+ (__mmask8) __U);
+}
+
+extern __inline __m128i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_maskz_mullo_epi64 (__mmask8 __U, __m128i __A, __m128i __B)
+{
+ return (__m128i) __builtin_ia32_pmullq128_mask ((__v2di) __A,
+ (__v2di) __B,
+ (__v2di)
+ _mm_setzero_di (),
+ (__mmask8) __U);
+}
+
+extern __inline __m256d
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_mask_andnot_pd (__m256d __W, __mmask8 __U, __m256d __A,
+ __m256d __B)
+{
+ return (__m256d) __builtin_ia32_andnpd256_mask ((__v4df) __A,
+ (__v4df) __B,
+ (__v4df) __W,
+ (__mmask8) __U);
+}
+
+extern __inline __m256d
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_maskz_andnot_pd (__mmask8 __U, __m256d __A, __m256d __B)
+{
+ return (__m256d) __builtin_ia32_andnpd256_mask ((__v4df) __A,
+ (__v4df) __B,
+ (__v4df)
+ _mm256_setzero_pd (),
+ (__mmask8) __U);
+}
+
+extern __inline __m128d
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_mask_andnot_pd (__m128d __W, __mmask8 __U, __m128d __A,
+ __m128d __B)
+{
+ return (__m128d) __builtin_ia32_andnpd128_mask ((__v2df) __A,
+ (__v2df) __B,
+ (__v2df) __W,
+ (__mmask8) __U);
+}
+
+extern __inline __m128d
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_maskz_andnot_pd (__mmask8 __U, __m128d __A, __m128d __B)
+{
+ return (__m128d) __builtin_ia32_andnpd128_mask ((__v2df) __A,
+ (__v2df) __B,
+ (__v2df)
+ _mm_setzero_pd (),
+ (__mmask8) __U);
+}
+
+extern __inline __m256
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_mask_andnot_ps (__m256 __W, __mmask8 __U, __m256 __A,
+ __m256 __B)
+{
+ return (__m256) __builtin_ia32_andnps256_mask ((__v8sf) __A,
+ (__v8sf) __B,
+ (__v8sf) __W,
+ (__mmask8) __U);
+}
+
+extern __inline __m256
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_maskz_andnot_ps (__mmask8 __U, __m256 __A, __m256 __B)
+{
+ return (__m256) __builtin_ia32_andnps256_mask ((__v8sf) __A,
+ (__v8sf) __B,
+ (__v8sf)
+ _mm256_setzero_ps (),
+ (__mmask8) __U);
+}
+
+extern __inline __m128
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_mask_andnot_ps (__m128 __W, __mmask8 __U, __m128 __A, __m128 __B)
+{
+ return (__m128) __builtin_ia32_andnps128_mask ((__v4sf) __A,
+ (__v4sf) __B,
+ (__v4sf) __W,
+ (__mmask8) __U);
+}
+
+extern __inline __m128
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_maskz_andnot_ps (__mmask8 __U, __m128 __A, __m128 __B)
+{
+ return (__m128) __builtin_ia32_andnps128_mask ((__v4sf) __A,
+ (__v4sf) __B,
+ (__v4sf)
+ _mm_setzero_ps (),
+ (__mmask8) __U);
+}
+
+extern __inline __m256i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_cvtps_epi64 (__m128 __A)
+{
+ return (__m256i) __builtin_ia32_cvtps2qq256_mask ((__v4sf) __A,
+ (__v4di)
+ _mm256_setzero_si256 (),
+ (__mmask8) -1);
+}
+
+extern __inline __m256i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_mask_cvtps_epi64 (__m256i __W, __mmask8 __U, __m128 __A)
+{
+ return (__m256i) __builtin_ia32_cvtps2qq256_mask ((__v4sf) __A,
+ (__v4di) __W,
+ (__mmask8) __U);
+}
+
+extern __inline __m256i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_maskz_cvtps_epi64 (__mmask8 __U, __m128 __A)
+{
+ return (__m256i) __builtin_ia32_cvtps2qq256_mask ((__v4sf) __A,
+ (__v4di)
+ _mm256_setzero_si256 (),
+ (__mmask8) __U);
+}
+
+extern __inline __m128i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_cvtps_epi64 (__m128 __A)
+{
+ return (__m128i) __builtin_ia32_cvtps2qq128_mask ((__v4sf) __A,
+ (__v2di)
+ _mm_setzero_di (),
+ (__mmask8) -1);
+}
+
+extern __inline __m128i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_mask_cvtps_epi64 (__m128i __W, __mmask8 __U, __m128 __A)
+{
+ return (__m128i) __builtin_ia32_cvtps2qq128_mask ((__v4sf) __A,
+ (__v2di) __W,
+ (__mmask8) __U);
+}
+
+extern __inline __m128i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_maskz_cvtps_epi64 (__mmask8 __U, __m128 __A)
+{
+ return (__m128i) __builtin_ia32_cvtps2qq128_mask ((__v4sf) __A,
+ (__v2di)
+ _mm_setzero_di (),
+ (__mmask8) __U);
+}
+
+extern __inline __m256i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_cvtps_epu64 (__m128 __A)
+{
+ return (__m256i) __builtin_ia32_cvtps2uqq256_mask ((__v4sf) __A,
+ (__v4di)
+ _mm256_setzero_si256 (),
+ (__mmask8) -1);
+}
+
+extern __inline __m256i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_mask_cvtps_epu64 (__m256i __W, __mmask8 __U, __m128 __A)
+{
+ return (__m256i) __builtin_ia32_cvtps2uqq256_mask ((__v4sf) __A,
+ (__v4di) __W,
+ (__mmask8) __U);
+}
+
+extern __inline __m256i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_maskz_cvtps_epu64 (__mmask8 __U, __m128 __A)
+{
+ return (__m256i) __builtin_ia32_cvtps2uqq256_mask ((__v4sf) __A,
+ (__v4di)
+ _mm256_setzero_si256 (),
+ (__mmask8) __U);
+}
+
+extern __inline __m128i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_cvtps_epu64 (__m128 __A)
+{
+ return (__m128i) __builtin_ia32_cvtps2uqq128_mask ((__v4sf) __A,
+ (__v2di)
+ _mm_setzero_di (),
+ (__mmask8) -1);
+}
+
+extern __inline __m128i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_mask_cvtps_epu64 (__m128i __W, __mmask8 __U, __m128 __A)
+{
+ return (__m128i) __builtin_ia32_cvtps2uqq128_mask ((__v4sf) __A,
+ (__v2di) __W,
+ (__mmask8) __U);
+}
+
+extern __inline __m128i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_maskz_cvtps_epu64 (__mmask8 __U, __m128 __A)
+{
+ return (__m128i) __builtin_ia32_cvtps2uqq128_mask ((__v4sf) __A,
+ (__v2di)
+ _mm_setzero_di (),
+ (__mmask8) __U);
+}
+
+extern __inline __m128
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_cvtepi64_ps (__m256i __A)
+{
+ return (__m128) __builtin_ia32_cvtqq2ps256_mask ((__v4di) __A,
+ (__v4sf)
+ _mm_setzero_ps (),
+ (__mmask8) -1);
+}
+
+extern __inline __m128
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_mask_cvtepi64_ps (__m128 __W, __mmask8 __U, __m256i __A)
+{
+ return (__m128) __builtin_ia32_cvtqq2ps256_mask ((__v4di) __A,
+ (__v4sf) __W,
+ (__mmask8) __U);
+}
+
+extern __inline __m128
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_maskz_cvtepi64_ps (__mmask8 __U, __m256i __A)
+{
+ return (__m128) __builtin_ia32_cvtqq2ps256_mask ((__v4di) __A,
+ (__v4sf)
+ _mm_setzero_ps (),
+ (__mmask8) __U);
+}
+
+extern __inline __m128
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_cvtepi64_ps (__m128i __A)
+{
+ return (__m128) __builtin_ia32_cvtqq2ps128_mask ((__v2di) __A,
+ (__v4sf)
+ _mm_setzero_ps (),
+ (__mmask8) -1);
+}
+
+extern __inline __m128
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_mask_cvtepi64_ps (__m128 __W, __mmask8 __U, __m128i __A)
+{
+ return (__m128) __builtin_ia32_cvtqq2ps128_mask ((__v2di) __A,
+ (__v4sf) __W,
+ (__mmask8) __U);
+}
+
+extern __inline __m128
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_maskz_cvtepi64_ps (__mmask8 __U, __m128i __A)
+{
+ return (__m128) __builtin_ia32_cvtqq2ps128_mask ((__v2di) __A,
+ (__v4sf)
+ _mm_setzero_ps (),
+ (__mmask8) __U);
+}
+
+extern __inline __m128
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_cvtepu64_ps (__m256i __A)
+{
+ return (__m128) __builtin_ia32_cvtuqq2ps256_mask ((__v4di) __A,
+ (__v4sf)
+ _mm_setzero_ps (),
+ (__mmask8) -1);
+}
+
+extern __inline __m128
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_mask_cvtepu64_ps (__m128 __W, __mmask8 __U, __m256i __A)
+{
+ return (__m128) __builtin_ia32_cvtuqq2ps256_mask ((__v4di) __A,
+ (__v4sf) __W,
+ (__mmask8) __U);
+}
+
+extern __inline __m128
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_maskz_cvtepu64_ps (__mmask8 __U, __m256i __A)
+{
+ return (__m128) __builtin_ia32_cvtuqq2ps256_mask ((__v4di) __A,
+ (__v4sf)
+ _mm_setzero_ps (),
+ (__mmask8) __U);
+}
+
+extern __inline __m128
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_cvtepu64_ps (__m128i __A)
+{
+ return (__m128) __builtin_ia32_cvtuqq2ps128_mask ((__v2di) __A,
+ (__v4sf)
+ _mm_setzero_ps (),
+ (__mmask8) -1);
+}
+
+extern __inline __m128
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_mask_cvtepu64_ps (__m128 __W, __mmask8 __U, __m128i __A)
+{
+ return (__m128) __builtin_ia32_cvtuqq2ps128_mask ((__v2di) __A,
+ (__v4sf) __W,
+ (__mmask8) __U);
+}
+
+extern __inline __m128
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_maskz_cvtepu64_ps (__mmask8 __U, __m128i __A)
+{
+ return (__m128) __builtin_ia32_cvtuqq2ps128_mask ((__v2di) __A,
+ (__v4sf)
+ _mm_setzero_ps (),
+ (__mmask8) __U);
+}
+
+extern __inline __m256d
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_cvtepi64_pd (__m256i __A)
+{
+ return (__m256d) __builtin_ia32_cvtqq2pd256_mask ((__v4di) __A,
+ (__v4df)
+ _mm256_setzero_pd (),
+ (__mmask8) -1);
+}
+
+extern __inline __m256d
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_mask_cvtepi64_pd (__m256d __W, __mmask8 __U, __m256i __A)
+{
+ return (__m256d) __builtin_ia32_cvtqq2pd256_mask ((__v4di) __A,
+ (__v4df) __W,
+ (__mmask8) __U);
+}
+
+extern __inline __m256d
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_maskz_cvtepi64_pd (__mmask8 __U, __m256i __A)
+{
+ return (__m256d) __builtin_ia32_cvtqq2pd256_mask ((__v4di) __A,
+ (__v4df)
+ _mm256_setzero_pd (),
+ (__mmask8) __U);
+}
+
+extern __inline __m128d
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_cvtepi64_pd (__m128i __A)
+{
+ return (__m128d) __builtin_ia32_cvtqq2pd128_mask ((__v2di) __A,
+ (__v2df)
+ _mm_setzero_pd (),
+ (__mmask8) -1);
+}
+
+extern __inline __m128d
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_mask_cvtepi64_pd (__m128d __W, __mmask8 __U, __m128i __A)
+{
+ return (__m128d) __builtin_ia32_cvtqq2pd128_mask ((__v2di) __A,
+ (__v2df) __W,
+ (__mmask8) __U);
+}
+
+extern __inline __m128d
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_maskz_cvtepi64_pd (__mmask8 __U, __m128i __A)
+{
+ return (__m128d) __builtin_ia32_cvtqq2pd128_mask ((__v2di) __A,
+ (__v2df)
+ _mm_setzero_pd (),
+ (__mmask8) __U);
+}
+
+extern __inline __m256d
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_cvtepu64_pd (__m256i __A)
+{
+ return (__m256d) __builtin_ia32_cvtuqq2pd256_mask ((__v4di) __A,
+ (__v4df)
+ _mm256_setzero_pd (),
+ (__mmask8) -1);
+}
+
+extern __inline __m256d
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_mask_cvtepu64_pd (__m256d __W, __mmask8 __U, __m256i __A)
+{
+ return (__m256d) __builtin_ia32_cvtuqq2pd256_mask ((__v4di) __A,
+ (__v4df) __W,
+ (__mmask8) __U);
+}
+
+extern __inline __m256d
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_maskz_cvtepu64_pd (__mmask8 __U, __m256i __A)
+{
+ return (__m256d) __builtin_ia32_cvtuqq2pd256_mask ((__v4di) __A,
+ (__v4df)
+ _mm256_setzero_pd (),
+ (__mmask8) __U);
+}
+
+extern __inline __m256d
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_mask_and_pd (__m256d __W, __mmask8 __U, __m256d __A,
+ __m256d __B)
+{
+ return (__m256d) __builtin_ia32_andpd256_mask ((__v4df) __A,
+ (__v4df) __B,
+ (__v4df) __W,
+ (__mmask8) __U);
+}
+
+extern __inline __m256d
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_maskz_and_pd (__mmask8 __U, __m256d __A, __m256d __B)
+{
+ return (__m256d) __builtin_ia32_andpd256_mask ((__v4df) __A,
+ (__v4df) __B,
+ (__v4df)
+ _mm256_setzero_pd (),
+ (__mmask8) __U);
+}
+
+extern __inline __m128d
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_mask_and_pd (__m128d __W, __mmask8 __U, __m128d __A, __m128d __B)
+{
+ return (__m128d) __builtin_ia32_andpd128_mask ((__v2df) __A,
+ (__v2df) __B,
+ (__v2df) __W,
+ (__mmask8) __U);
+}
+
+extern __inline __m128d
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_maskz_and_pd (__mmask8 __U, __m128d __A, __m128d __B)
+{
+ return (__m128d) __builtin_ia32_andpd128_mask ((__v2df) __A,
+ (__v2df) __B,
+ (__v2df)
+ _mm_setzero_pd (),
+ (__mmask8) __U);
+}
+
+extern __inline __m256
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_mask_and_ps (__m256 __W, __mmask8 __U, __m256 __A, __m256 __B)
+{
+ return (__m256) __builtin_ia32_andps256_mask ((__v8sf) __A,
+ (__v8sf) __B,
+ (__v8sf) __W,
+ (__mmask8) __U);
+}
+
+extern __inline __m256
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_maskz_and_ps (__mmask8 __U, __m256 __A, __m256 __B)
+{
+ return (__m256) __builtin_ia32_andps256_mask ((__v8sf) __A,
+ (__v8sf) __B,
+ (__v8sf)
+ _mm256_setzero_ps (),
+ (__mmask8) __U);
+}
+
+extern __inline __m128
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_mask_and_ps (__m128 __W, __mmask8 __U, __m128 __A, __m128 __B)
+{
+ return (__m128) __builtin_ia32_andps128_mask ((__v4sf) __A,
+ (__v4sf) __B,
+ (__v4sf) __W,
+ (__mmask8) __U);
+}
+
+extern __inline __m128
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_maskz_and_ps (__mmask8 __U, __m128 __A, __m128 __B)
+{
+ return (__m128) __builtin_ia32_andps128_mask ((__v4sf) __A,
+ (__v4sf) __B,
+ (__v4sf)
+ _mm_setzero_ps (),
+ (__mmask8) __U);
+}
+
+extern __inline __m128d
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_cvtepu64_pd (__m128i __A)
+{
+ return (__m128d) __builtin_ia32_cvtuqq2pd128_mask ((__v2di) __A,
+ (__v2df)
+ _mm_setzero_pd (),
+ (__mmask8) -1);
+}
+
+extern __inline __m128d
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_mask_cvtepu64_pd (__m128d __W, __mmask8 __U, __m128i __A)
+{
+ return (__m128d) __builtin_ia32_cvtuqq2pd128_mask ((__v2di) __A,
+ (__v2df) __W,
+ (__mmask8) __U);
+}
+
+extern __inline __m128d
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_maskz_cvtepu64_pd (__mmask8 __U, __m128i __A)
+{
+ return (__m128d) __builtin_ia32_cvtuqq2pd128_mask ((__v2di) __A,
+ (__v2df)
+ _mm_setzero_pd (),
+ (__mmask8) __U);
+}
+
+extern __inline __m256d
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_mask_xor_pd (__m256d __W, __mmask8 __U, __m256d __A,
+ __m256d __B)
+{
+ return (__m256d) __builtin_ia32_xorpd256_mask ((__v4df) __A,
+ (__v4df) __B,
+ (__v4df) __W,
+ (__mmask8) __U);
+}
+
+extern __inline __m256d
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_maskz_xor_pd (__mmask8 __U, __m256d __A, __m256d __B)
+{
+ return (__m256d) __builtin_ia32_xorpd256_mask ((__v4df) __A,
+ (__v4df) __B,
+ (__v4df)
+ _mm256_setzero_pd (),
+ (__mmask8) __U);
+}
+
+extern __inline __m128d
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_mask_xor_pd (__m128d __W, __mmask8 __U, __m128d __A, __m128d __B)
+{
+ return (__m128d) __builtin_ia32_xorpd128_mask ((__v2df) __A,
+ (__v2df) __B,
+ (__v2df) __W,
+ (__mmask8) __U);
+}
+
+extern __inline __m128d
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_maskz_xor_pd (__mmask8 __U, __m128d __A, __m128d __B)
+{
+ return (__m128d) __builtin_ia32_xorpd128_mask ((__v2df) __A,
+ (__v2df) __B,
+ (__v2df)
+ _mm_setzero_pd (),
+ (__mmask8) __U);
+}
+
+extern __inline __m256
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_mask_xor_ps (__m256 __W, __mmask8 __U, __m256 __A, __m256 __B)
+{
+ return (__m256) __builtin_ia32_xorps256_mask ((__v8sf) __A,
+ (__v8sf) __B,
+ (__v8sf) __W,
+ (__mmask8) __U);
+}
+
+extern __inline __m256
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_maskz_xor_ps (__mmask8 __U, __m256 __A, __m256 __B)
+{
+ return (__m256) __builtin_ia32_xorps256_mask ((__v8sf) __A,
+ (__v8sf) __B,
+ (__v8sf)
+ _mm256_setzero_ps (),
+ (__mmask8) __U);
+}
+
+extern __inline __m128
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_mask_xor_ps (__m128 __W, __mmask8 __U, __m128 __A, __m128 __B)
+{
+ return (__m128) __builtin_ia32_xorps128_mask ((__v4sf) __A,
+ (__v4sf) __B,
+ (__v4sf) __W,
+ (__mmask8) __U);
+}
+
+extern __inline __m128
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_maskz_xor_ps (__mmask8 __U, __m128 __A, __m128 __B)
+{
+ return (__m128) __builtin_ia32_xorps128_mask ((__v4sf) __A,
+ (__v4sf) __B,
+ (__v4sf)
+ _mm_setzero_ps (),
+ (__mmask8) __U);
+}
+
+extern __inline __m256d
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_mask_or_pd (__m256d __W, __mmask8 __U, __m256d __A, __m256d __B)
+{
+ return (__m256d) __builtin_ia32_orpd256_mask ((__v4df) __A,
+ (__v4df) __B,
+ (__v4df) __W,
+ (__mmask8) __U);
+}
+
+extern __inline __m256d
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_maskz_or_pd (__mmask8 __U, __m256d __A, __m256d __B)
+{
+ return (__m256d) __builtin_ia32_orpd256_mask ((__v4df) __A,
+ (__v4df) __B,
+ (__v4df)
+ _mm256_setzero_pd (),
+ (__mmask8) __U);
+}
+
+extern __inline __m128d
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_mask_or_pd (__m128d __W, __mmask8 __U, __m128d __A, __m128d __B)
+{
+ return (__m128d) __builtin_ia32_orpd128_mask ((__v2df) __A,
+ (__v2df) __B,
+ (__v2df) __W,
+ (__mmask8) __U);
+}
+
+extern __inline __m128d
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_maskz_or_pd (__mmask8 __U, __m128d __A, __m128d __B)
+{
+ return (__m128d) __builtin_ia32_orpd128_mask ((__v2df) __A,
+ (__v2df) __B,
+ (__v2df)
+ _mm_setzero_pd (),
+ (__mmask8) __U);
+}
+
+extern __inline __m256
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_mask_or_ps (__m256 __W, __mmask8 __U, __m256 __A, __m256 __B)
+{
+ return (__m256) __builtin_ia32_orps256_mask ((__v8sf) __A,
+ (__v8sf) __B,
+ (__v8sf) __W,
+ (__mmask8) __U);
+}
+
+extern __inline __m256
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_maskz_or_ps (__mmask8 __U, __m256 __A, __m256 __B)
+{
+ return (__m256) __builtin_ia32_orps256_mask ((__v8sf) __A,
+ (__v8sf) __B,
+ (__v8sf)
+ _mm256_setzero_ps (),
+ (__mmask8) __U);
+}
+
+extern __inline __m128
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_mask_or_ps (__m128 __W, __mmask8 __U, __m128 __A, __m128 __B)
+{
+ return (__m128) __builtin_ia32_orps128_mask ((__v4sf) __A,
+ (__v4sf) __B,
+ (__v4sf) __W,
+ (__mmask8) __U);
+}
+
+extern __inline __m128
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_maskz_or_ps (__mmask8 __U, __m128 __A, __m128 __B)
+{
+ return (__m128) __builtin_ia32_orps128_mask ((__v4sf) __A,
+ (__v4sf) __B,
+ (__v4sf)
+ _mm_setzero_ps (),
+ (__mmask8) __U);
+}
+
+extern __inline __m128i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_movm_epi32 (__mmask8 __A)
+{
+ return (__m128i) __builtin_ia32_cvtmask2d128 (__A);
+}
+
+extern __inline __m256i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_movm_epi32 (__mmask8 __A)
+{
+ return (__m256i) __builtin_ia32_cvtmask2d256 (__A);
+}
+
+extern __inline __m128i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_movm_epi64 (__mmask8 __A)
+{
+ return (__m128i) __builtin_ia32_cvtmask2q128 (__A);
+}
+
+extern __inline __m256i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_movm_epi64 (__mmask8 __A)
+{
+ return (__m256i) __builtin_ia32_cvtmask2q256 (__A);
+}
+
+extern __inline __mmask8
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_movepi32_mask (__m128i __A)
+{
+ return (__mmask8) __builtin_ia32_cvtd2mask128 ((__v4si) __A);
+}
+
+extern __inline __mmask8
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_movepi32_mask (__m256i __A)
+{
+ return (__mmask8) __builtin_ia32_cvtd2mask256 ((__v8si) __A);
+}
+
+extern __inline __mmask8
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_movepi64_mask (__m128i __A)
+{
+ return (__mmask8) __builtin_ia32_cvtq2mask128 ((__v2di) __A);
+}
+
+extern __inline __mmask8
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_movepi64_mask (__m256i __A)
+{
+ return (__mmask8) __builtin_ia32_cvtq2mask256 ((__v4di) __A);
+}
+
+#ifdef __OPTIMIZE__
+extern __inline __m128d
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_extractf64x2_pd (__m256d __A, const int __imm)
+{
+ return (__m128d) __builtin_ia32_extractf64x2_256_mask ((__v4df) __A,
+ __imm,
+ (__v2df)
+ _mm_setzero_pd (),
+ (__mmask8) -
+ 1);
+}
+
+extern __inline __m128d
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_mask_extractf64x2_pd (__m128d __W, __mmask8 __U, __m256d __A,
+ const int __imm)
+{
+ return (__m128d) __builtin_ia32_extractf64x2_256_mask ((__v4df) __A,
+ __imm,
+ (__v2df) __W,
+ (__mmask8)
+ __U);
+}
+
+extern __inline __m128d
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_maskz_extractf64x2_pd (__mmask8 __U, __m256d __A,
+ const int __imm)
+{
+ return (__m128d) __builtin_ia32_extractf64x2_256_mask ((__v4df) __A,
+ __imm,
+ (__v2df)
+ _mm_setzero_pd (),
+ (__mmask8)
+ __U);
+}
+
+extern __inline __m128i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_extracti64x2_epi64 (__m256i __A, const int __imm)
+{
+ return (__m128i) __builtin_ia32_extracti64x2_256_mask ((__v4di) __A,
+ __imm,
+ (__v2di)
+ _mm_setzero_di (),
+ (__mmask8) -
+ 1);
+}
+
+extern __inline __m128i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_mask_extracti64x2_epi64 (__m128i __W, __mmask8 __U, __m256i __A,
+ const int __imm)
+{
+ return (__m128i) __builtin_ia32_extracti64x2_256_mask ((__v4di) __A,
+ __imm,
+ (__v2di) __W,
+ (__mmask8)
+ __U);
+}
+
+extern __inline __m128i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_maskz_extracti64x2_epi64 (__mmask8 __U, __m256i __A,
+ const int __imm)
+{
+ return (__m128i) __builtin_ia32_extracti64x2_256_mask ((__v4di) __A,
+ __imm,
+ (__v2di)
+ _mm_setzero_di (),
+ (__mmask8)
+ __U);
+}
+
+extern __inline __m256d
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_reduce_pd (__m256d __A, int __B)
+{
+ return (__m256d) __builtin_ia32_reducepd256_mask ((__v4df) __A, __B,
+ (__v4df)
+ _mm256_setzero_pd (),
+ (__mmask8) -1);
+}
+
+extern __inline __m256d
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_mask_reduce_pd (__m256d __W, __mmask8 __U, __m256d __A, int __B)
+{
+ return (__m256d) __builtin_ia32_reducepd256_mask ((__v4df) __A, __B,
+ (__v4df) __W,
+ (__mmask8) __U);
+}
+
+extern __inline __m256d
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_maskz_reduce_pd (__mmask8 __U, __m256d __A, int __B)
+{
+ return (__m256d) __builtin_ia32_reducepd256_mask ((__v4df) __A, __B,
+ (__v4df)
+ _mm256_setzero_pd (),
+ (__mmask8) __U);
+}
+
+extern __inline __m128d
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_reduce_pd (__m128d __A, int __B)
+{
+ return (__m128d) __builtin_ia32_reducepd128_mask ((__v2df) __A, __B,
+ (__v2df)
+ _mm_setzero_pd (),
+ (__mmask8) -1);
+}
+
+extern __inline __m128d
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_mask_reduce_pd (__m128d __W, __mmask8 __U, __m128d __A, int __B)
+{
+ return (__m128d) __builtin_ia32_reducepd128_mask ((__v2df) __A, __B,
+ (__v2df) __W,
+ (__mmask8) __U);
+}
+
+extern __inline __m128d
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_maskz_reduce_pd (__mmask8 __U, __m128d __A, int __B)
+{
+ return (__m128d) __builtin_ia32_reducepd128_mask ((__v2df) __A, __B,
+ (__v2df)
+ _mm_setzero_pd (),
+ (__mmask8) __U);
+}
+
+extern __inline __m256
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_reduce_ps (__m256 __A, int __B)
+{
+ return (__m256) __builtin_ia32_reduceps256_mask ((__v8sf) __A, __B,
+ (__v8sf)
+ _mm256_setzero_ps (),
+ (__mmask8) -1);
+}
+
+extern __inline __m256
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_mask_reduce_ps (__m256 __W, __mmask8 __U, __m256 __A, int __B)
+{
+ return (__m256) __builtin_ia32_reduceps256_mask ((__v8sf) __A, __B,
+ (__v8sf) __W,
+ (__mmask8) __U);
+}
+
+extern __inline __m256
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_maskz_reduce_ps (__mmask8 __U, __m256 __A, int __B)
+{
+ return (__m256) __builtin_ia32_reduceps256_mask ((__v8sf) __A, __B,
+ (__v8sf)
+ _mm256_setzero_ps (),
+ (__mmask8) __U);
+}
+
+extern __inline __m128
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_reduce_ps (__m128 __A, int __B)
+{
+ return (__m128) __builtin_ia32_reduceps128_mask ((__v4sf) __A, __B,
+ (__v4sf)
+ _mm_setzero_ps (),
+ (__mmask8) -1);
+}
+
+extern __inline __m128
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_mask_reduce_ps (__m128 __W, __mmask8 __U, __m128 __A, int __B)
+{
+ return (__m128) __builtin_ia32_reduceps128_mask ((__v4sf) __A, __B,
+ (__v4sf) __W,
+ (__mmask8) __U);
+}
+
+extern __inline __m128
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_maskz_reduce_ps (__mmask8 __U, __m128 __A, int __B)
+{
+ return (__m128) __builtin_ia32_reduceps128_mask ((__v4sf) __A, __B,
+ (__v4sf)
+ _mm_setzero_ps (),
+ (__mmask8) __U);
+}
+
+extern __inline __m256d
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_range_pd (__m256d __A, __m256d __B, int __C)
+{
+ return (__m256d) __builtin_ia32_rangepd256_mask ((__v4df) __A,
+ (__v4df) __B, __C,
+ (__v4df)
+ _mm256_setzero_pd (),
+ (__mmask8) -1);
+}
+
+extern __inline __m256d
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_mask_range_pd (__m256d __W, __mmask8 __U,
+ __m256d __A, __m256d __B, int __C)
+{
+ return (__m256d) __builtin_ia32_rangepd256_mask ((__v4df) __A,
+ (__v4df) __B, __C,
+ (__v4df) __W,
+ (__mmask8) __U);
+}
+
+extern __inline __m256d
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_maskz_range_pd (__mmask8 __U, __m256d __A, __m256d __B, int __C)
+{
+ return (__m256d) __builtin_ia32_rangepd256_mask ((__v4df) __A,
+ (__v4df) __B, __C,
+ (__v4df)
+ _mm256_setzero_pd (),
+ (__mmask8) __U);
+}
+
+extern __inline __m128d
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_range_pd (__m128d __A, __m128d __B, int __C)
+{
+ return (__m128d) __builtin_ia32_rangepd128_mask ((__v2df) __A,
+ (__v2df) __B, __C,
+ (__v2df)
+ _mm_setzero_pd (),
+ (__mmask8) -1);
+}
+
+extern __inline __m128d
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_mask_range_pd (__m128d __W, __mmask8 __U,
+ __m128d __A, __m128d __B, int __C)
+{
+ return (__m128d) __builtin_ia32_rangepd128_mask ((__v2df) __A,
+ (__v2df) __B, __C,
+ (__v2df) __W,
+ (__mmask8) __U);
+}
+
+extern __inline __m128d
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_maskz_range_pd (__mmask8 __U, __m128d __A, __m128d __B, int __C)
+{
+ return (__m128d) __builtin_ia32_rangepd128_mask ((__v2df) __A,
+ (__v2df) __B, __C,
+ (__v2df)
+ _mm_setzero_pd (),
+ (__mmask8) __U);
+}
+
+extern __inline __m256
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_range_ps (__m256 __A, __m256 __B, int __C)
+{
+ return (__m256) __builtin_ia32_rangeps256_mask ((__v8sf) __A,
+ (__v8sf) __B, __C,
+ (__v8sf)
+ _mm256_setzero_ps (),
+ (__mmask8) -1);
+}
+
+extern __inline __m256
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_mask_range_ps (__m256 __W, __mmask8 __U, __m256 __A, __m256 __B,
+ int __C)
+{
+ return (__m256) __builtin_ia32_rangeps256_mask ((__v8sf) __A,
+ (__v8sf) __B, __C,
+ (__v8sf) __W,
+ (__mmask8) __U);
+}
+
+extern __inline __m256
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_maskz_range_ps (__mmask8 __U, __m256 __A, __m256 __B, int __C)
+{
+ return (__m256) __builtin_ia32_rangeps256_mask ((__v8sf) __A,
+ (__v8sf) __B, __C,
+ (__v8sf)
+ _mm256_setzero_ps (),
+ (__mmask8) __U);
+}
+
+extern __inline __m128
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_range_ps (__m128 __A, __m128 __B, int __C)
+{
+ return (__m128) __builtin_ia32_rangeps128_mask ((__v4sf) __A,
+ (__v4sf) __B, __C,
+ (__v4sf)
+ _mm_setzero_ps (),
+ (__mmask8) -1);
+}
+
+extern __inline __m128
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_mask_range_ps (__m128 __W, __mmask8 __U,
+ __m128 __A, __m128 __B, int __C)
+{
+ return (__m128) __builtin_ia32_rangeps128_mask ((__v4sf) __A,
+ (__v4sf) __B, __C,
+ (__v4sf) __W,
+ (__mmask8) __U);
+}
+
+extern __inline __m128
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_maskz_range_ps (__mmask8 __U, __m128 __A, __m128 __B, int __C)
+{
+ return (__m128) __builtin_ia32_rangeps128_mask ((__v4sf) __A,
+ (__v4sf) __B, __C,
+ (__v4sf)
+ _mm_setzero_ps (),
+ (__mmask8) __U);
+}
+
+extern __inline __mmask8
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_mask_fpclass_pd_mask (__mmask8 __U, __m256d __A,
+ const int __imm)
+{
+ return (__mmask8) __builtin_ia32_fpclasspd256_mask ((__v4df) __A,
+ __imm, __U);
+}
+
+extern __inline __mmask8
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_fpclass_pd_mask (__m256d __A, const int __imm)
+{
+ return (__mmask8) __builtin_ia32_fpclasspd256_mask ((__v4df) __A,
+ __imm,
+ (__mmask8) -1);
+}
+
+extern __inline __mmask8
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_mask_fpclass_ps_mask (__mmask8 __U, __m256 __A, const int __imm)
+{
+ return (__mmask8) __builtin_ia32_fpclassps256_mask ((__v8sf) __A,
+ __imm, __U);
+}
+
+extern __inline __mmask8
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_fpclass_ps_mask (__m256 __A, const int __imm)
+{
+ return (__mmask8) __builtin_ia32_fpclassps256_mask ((__v8sf) __A,
+ __imm,
+ (__mmask8) -1);
+}
+
+extern __inline __mmask8
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_mask_fpclass_pd_mask (__mmask8 __U, __m128d __A, const int __imm)
+{
+ return (__mmask8) __builtin_ia32_fpclasspd128_mask ((__v2df) __A,
+ __imm, __U);
+}
+
+extern __inline __mmask8
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_fpclass_pd_mask (__m128d __A, const int __imm)
+{
+ return (__mmask8) __builtin_ia32_fpclasspd128_mask ((__v2df) __A,
+ __imm,
+ (__mmask8) -1);
+}
+
+extern __inline __mmask8
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_mask_fpclass_ps_mask (__mmask8 __U, __m128 __A, const int __imm)
+{
+ return (__mmask8) __builtin_ia32_fpclassps128_mask ((__v4sf) __A,
+ __imm, __U);
+}
+
+extern __inline __mmask8
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_fpclass_ps_mask (__m128 __A, const int __imm)
+{
+ return (__mmask8) __builtin_ia32_fpclassps128_mask ((__v4sf) __A,
+ __imm,
+ (__mmask8) -1);
+}
+
+extern __inline __m256i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_inserti64x2 (__m256i __A, __m128i __B, const int __imm)
+{
+ return (__m256i) __builtin_ia32_inserti64x2_256_mask ((__v4di) __A,
+ (__v2di) __B,
+ __imm,
+ (__v4di)
+ _mm256_setzero_si256 (),
+ (__mmask8) -
+ 1);
+}
+
+extern __inline __m256i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_mask_inserti64x2 (__m256i __W, __mmask8 __U, __m256i __A,
+ __m128i __B, const int __imm)
+{
+ return (__m256i) __builtin_ia32_inserti64x2_256_mask ((__v4di) __A,
+ (__v2di) __B,
+ __imm,
+ (__v4di) __W,
+ (__mmask8)
+ __U);
+}
+
+extern __inline __m256i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_maskz_inserti64x2 (__mmask8 __U, __m256i __A, __m128i __B,
+ const int __imm)
+{
+ return (__m256i) __builtin_ia32_inserti64x2_256_mask ((__v4di) __A,
+ (__v2di) __B,
+ __imm,
+ (__v4di)
+ _mm256_setzero_si256 (),
+ (__mmask8)
+ __U);
+}
+
+extern __inline __m256d
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_insertf64x2 (__m256d __A, __m128d __B, const int __imm)
+{
+ return (__m256d) __builtin_ia32_insertf64x2_256_mask ((__v4df) __A,
+ (__v2df) __B,
+ __imm,
+ (__v4df)
+ _mm256_setzero_pd (),
+ (__mmask8) -
+ 1);
+}
+
+extern __inline __m256d
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_mask_insertf64x2 (__m256d __W, __mmask8 __U, __m256d __A,
+ __m128d __B, const int __imm)
+{
+ return (__m256d) __builtin_ia32_insertf64x2_256_mask ((__v4df) __A,
+ (__v2df) __B,
+ __imm,
+ (__v4df) __W,
+ (__mmask8)
+ __U);
+}
+
+extern __inline __m256d
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_maskz_insertf64x2 (__mmask8 __U, __m256d __A, __m128d __B,
+ const int __imm)
+{
+ return (__m256d) __builtin_ia32_insertf64x2_256_mask ((__v4df) __A,
+ (__v2df) __B,
+ __imm,
+ (__v4df)
+ _mm256_setzero_pd (),
+ (__mmask8)
+ __U);
+}
+
+#else
+#define _mm256_insertf64x2(X, Y, C) \
+ ((__m256d) __builtin_ia32_insertf64x2_256_mask ((__v4df)(__m256d) (X),\
+ (__v2df)(__m128d) (Y), (int) (C), \
+ (__v4df)(__m256d)_mm256_setzero_pd(), \
+ (__mmask8)-1))
+
+#define _mm256_mask_insertf64x2(W, U, X, Y, C) \
+ ((__m256d) __builtin_ia32_insertf64x2_256_mask ((__v4df)(__m256d) (X),\
+ (__v2df)(__m128d) (Y), (int) (C), \
+ (__v4df)(__m256d)(W), \
+ (__mmask8)(U)))
+
+#define _mm256_maskz_insertf64x2(U, X, Y, C) \
+ ((__m256d) __builtin_ia32_insertf64x2_256_mask ((__v4df)(__m256d) (X),\
+ (__v2df)(__m128d) (Y), (int) (C), \
+ (__v4df)(__m256d)_mm256_setzero_pd(), \
+ (__mmask8)(U)))
+
+#define _mm256_inserti64x2(X, Y, C) \
+ ((__m256i) __builtin_ia32_inserti64x2_256_mask ((__v4di)(__m256i) (X),\
+ (__v2di)(__m128i) (Y), (int) (C), \
+ (__v4di)(__m256i)_mm256_setzero_si256 (), \
+ (__mmask8)-1))
+
+#define _mm256_mask_inserti64x2(W, U, X, Y, C) \
+ ((__m256i) __builtin_ia32_inserti64x2_256_mask ((__v4di)(__m256i) (X),\
+ (__v2di)(__m128i) (Y), (int) (C), \
+ (__v4di)(__m256i)(W), \
+ (__mmask8)(U)))
+
+#define _mm256_maskz_inserti64x2(U, X, Y, C) \
+ ((__m256i) __builtin_ia32_inserti64x2_256_mask ((__v4di)(__m256i) (X),\
+ (__v2di)(__m128i) (Y), (int) (C), \
+ (__v4di)(__m256i)_mm256_setzero_si256 (), \
+ (__mmask8)(U)))
+
+#define _mm256_extractf64x2_pd(X, C) \
+ ((__m128d) __builtin_ia32_extractf64x2_256_mask ((__v4df)(__m256d) (X),\
+ (int) (C), (__v2df)(__m128d) _mm_setzero_pd(), (__mmask8)-1))
+
+#define _mm256_mask_extractf64x2_pd(W, U, X, C) \
+ ((__m128d) __builtin_ia32_extractf64x2_256_mask ((__v4df)(__m256d) (X),\
+ (int) (C), (__v2df)(__m128d) (W), (__mmask8) (U)))
+
+#define _mm256_maskz_extractf64x2_pd(U, X, C) \
+ ((__m128d) __builtin_ia32_extractf64x2_256_mask ((__v4df)(__m256d) (X),\
+ (int) (C), (__v2df)(__m128d) _mm_setzero_pd(), (__mmask8) (U)))
+
+#define _mm256_extracti64x2_epi64(X, C) \
+ ((__m128i) __builtin_ia32_extracti64x2_256_mask ((__v4di)(__m256i) (X),\
+ (int) (C), (__v2di)(__m128i) _mm_setzero_di(), (__mmask8)-1))
+
+#define _mm256_mask_extracti64x2_epi64(W, U, X, C) \
+ ((__m128i) __builtin_ia32_extracti64x2_256_mask ((__v4di)(__m256i) (X),\
+ (int) (C), (__v2di)(__m128i) (W), (__mmask8) (U)))
+
+#define _mm256_maskz_extracti64x2_epi64(U, X, C) \
+ ((__m128i) __builtin_ia32_extracti64x2_256_mask ((__v4di)(__m256i) (X),\
+ (int) (C), (__v2di)(__m128i) _mm_setzero_di(), (__mmask8) (U)))
+
+#define _mm256_reduce_pd(A, B) \
+ ((__m256d) __builtin_ia32_reducepd256_mask ((__v4df)(__m256d)(A), \
+ (int)(B), (__v4df)_mm256_setzero_pd(), (__mmask8)-1))
+
+#define _mm256_mask_reduce_pd(W, U, A, B) \
+ ((__m256d) __builtin_ia32_reducepd256_mask ((__v4df)(__m256d)(A), \
+ (int)(B), (__v4df)(__m256d)(W), (__mmask8)(U)))
+
+#define _mm256_maskz_reduce_pd(U, A, B) \
+ ((__m256d) __builtin_ia32_reducepd256_mask ((__v4df)(__m256d)(A), \
+ (int)(B), (__v4df)_mm256_setzero_pd(), (__mmask8)(U)))
+
+#define _mm_reduce_pd(A, B) \
+ ((__m128d) __builtin_ia32_reducepd128_mask ((__v2df)(__m128d)(A), \
+ (int)(B), (__v2df)_mm_setzero_pd(), (__mmask8)-1))
+
+#define _mm_mask_reduce_pd(W, U, A, B) \
+ ((__m128d) __builtin_ia32_reducepd128_mask ((__v2df)(__m128d)(A), \
+ (int)(B), (__v2df)(__m128d)(W), (__mmask8)(U)))
+
+#define _mm_maskz_reduce_pd(U, A, B) \
+ ((__m128d) __builtin_ia32_reducepd128_mask ((__v2df)(__m128d)(A), \
+ (int)(B), (__v2df)_mm_setzero_pd(), (__mmask8)(U)))
+
+#define _mm256_reduce_ps(A, B) \
+ ((__m256) __builtin_ia32_reduceps256_mask ((__v8sf)(__m256)(A), \
+ (int)(B), (__v8sf)_mm256_setzero_ps(), (__mmask8)-1))
+
+#define _mm256_mask_reduce_ps(W, U, A, B) \
+ ((__m256) __builtin_ia32_reduceps256_mask ((__v8sf)(__m256)(A), \
+ (int)(B), (__v8sf)(__m256)(W), (__mmask8)(U)))
+
+#define _mm256_maskz_reduce_ps(U, A, B) \
+ ((__m256) __builtin_ia32_reduceps256_mask ((__v8sf)(__m256)(A), \
+ (int)(B), (__v8sf)_mm256_setzero_ps(), (__mmask8)(U)))
+
+#define _mm_reduce_ps(A, B) \
+ ((__m128) __builtin_ia32_reduceps128_mask ((__v4sf)(__m128)(A), \
+ (int)(B), (__v4sf)_mm_setzero_ps(), (__mmask8)-1))
+
+#define _mm_mask_reduce_ps(W, U, A, B) \
+ ((__m128) __builtin_ia32_reduceps128_mask ((__v4sf)(__m128)(A), \
+ (int)(B), (__v4sf)(__m128)(W), (__mmask8)(U)))
+
+#define _mm_maskz_reduce_ps(U, A, B) \
+ ((__m128) __builtin_ia32_reduceps128_mask ((__v4sf)(__m128)(A), \
+ (int)(B), (__v4sf)_mm_setzero_ps(), (__mmask8)(U)))
+
+#define _mm256_range_pd(A, B, C) \
+ ((__m256d) __builtin_ia32_rangepd256_mask ((__v4df)(__m256d)(A), \
+ (__v4df)(__m256d)(B), (int)(C), \
+ (__v4df)_mm256_setzero_pd(), (__mmask8)-1))
+
+#define _mm256_maskz_range_pd(U, A, B, C) \
+ ((__m256d) __builtin_ia32_rangepd256_mask ((__v4df)(__m256d)(A), \
+ (__v4df)(__m256d)(B), (int)(C), \
+ (__v4df)_mm256_setzero_pd(), (__mmask8)(U)))
+
+#define _mm_range_pd(A, B, C) \
+ ((__m128d) __builtin_ia32_rangepd128_mask ((__v2df)(__m128d)(A), \
+ (__v2df)(__m128d)(B), (int)(C), \
+ (__v2df)_mm_setzero_pd(), (__mmask8)-1))
+
+#define _mm256_range_ps(A, B, C) \
+ ((__m256) __builtin_ia32_rangeps256_mask ((__v8sf)(__m256)(A), \
+ (__v8sf)(__m256)(B), (int)(C), \
+ (__v8sf)_mm256_setzero_ps(), (__mmask8)-1))
+
+#define _mm256_mask_range_ps(W, U, A, B, C) \
+ ((__m256) __builtin_ia32_rangeps256_mask ((__v8sf)(__m256)(A), \
+ (__v8sf)(__m256)(B), (int)(C), \
+ (__v8sf)(__m256)(W), (__mmask8)(U)))
+
+#define _mm256_maskz_range_ps(U, A, B, C) \
+ ((__m256) __builtin_ia32_rangeps256_mask ((__v8sf)(__m256)(A), \
+ (__v8sf)(__m256)(B), (int)(C), \
+ (__v8sf)_mm256_setzero_ps(), (__mmask8)(U)))
+
+#define _mm_range_ps(A, B, C) \
+ ((__m128) __builtin_ia32_rangeps128_mask ((__v4sf)(__m128)(A), \
+ (__v4sf)(__m128)(B), (int)(C), \
+ (__v4sf)_mm_setzero_ps(), (__mmask8)-1))
+
+#define _mm_mask_range_ps(W, U, A, B, C) \
+ ((__m128) __builtin_ia32_rangeps128_mask ((__v4sf)(__m128)(A), \
+ (__v4sf)(__m128)(B), (int)(C), \
+ (__v4sf)(__m128)(W), (__mmask8)(U)))
+
+#define _mm_maskz_range_ps(U, A, B, C) \
+ ((__m128) __builtin_ia32_rangeps128_mask ((__v4sf)(__m128)(A), \
+ (__v4sf)(__m128)(B), (int)(C), \
+ (__v4sf)_mm_setzero_ps(), (__mmask8)(U)))
+
+#define _mm256_mask_range_pd(W, U, A, B, C) \
+ ((__m256d) __builtin_ia32_rangepd256_mask ((__v4df)(__m256d)(A), \
+ (__v4df)(__m256d)(B), (int)(C), \
+ (__v4df)(__m256d)(W), (__mmask8)(U)))
+
+#define _mm_mask_range_pd(W, U, A, B, C) \
+ ((__m128d) __builtin_ia32_rangepd128_mask ((__v2df)(__m128d)(A), \
+ (__v2df)(__m128d)(B), (int)(C), \
+ (__v2df)(__m128d)(W), (__mmask8)(U)))
+
+#define _mm_maskz_range_pd(U, A, B, C) \
+ ((__m128d) __builtin_ia32_rangepd128_mask ((__v2df)(__m128d)(A), \
+ (__v2df)(__m128d)(B), (int)(C), \
+ (__v2df)_mm_setzero_pd(), (__mmask8)(U)))
+
+#define _mm256_mask_fpclass_pd_mask(u, X, C) \
+ ((__mmask8) __builtin_ia32_fpclasspd256_mask ((__v4df) (__m256d) (X), \
+ (int) (C),(__mmask8)(u)))
+
+#define _mm256_mask_fpclass_ps_mask(u, X, C) \
+ ((__mmask8) __builtin_ia32_fpclassps256_mask ((__v8sf) (__m256) (X), \
+ (int) (C),(__mmask8)(u)))
+
+#define _mm_mask_fpclass_pd_mask(u, X, C) \
+ ((__mmask8) __builtin_ia32_fpclasspd128_mask ((__v2df) (__m128d) (X), \
+ (int) (C),(__mmask8)(u)))
+
+#define _mm_mask_fpclass_ps_mask(u, X, C) \
+ ((__mmask8) __builtin_ia32_fpclassps128_mask ((__v4sf) (__m128) (X), \
+ (int) (C),(__mmask8)(u)))
+
+#define _mm256_fpclass_pd_mask(X, C) \
+ ((__mmask8) __builtin_ia32_fpclasspd256_mask ((__v4df) (__m256d) (X), \
+ (int) (C),(__mmask8)-1))
+
+#define _mm256_fpclass_ps_mask(X, C) \
+ ((__mmask8) __builtin_ia32_fpclassps256_mask ((__v8sf) (__m256) (X), \
+ (int) (C),(__mmask8)-1))
+
+#define _mm_fpclass_pd_mask(X, C) \
+ ((__mmask8) __builtin_ia32_fpclasspd128_mask ((__v2df) (__m128d) (X), \
+ (int) (C),(__mmask8)-1))
+
+#define _mm_fpclass_ps_mask(X, C) \
+ ((__mmask8) __builtin_ia32_fpclassps128_mask ((__v4sf) (__m128) (X), \
+ (int) (C),(__mmask8)-1))
+
+#endif
+
+#ifdef __DISABLE_AVX512VLDQ__
+#undef __DISABLE_AVX512VLDQ__
+#pragma GCC pop_options
+#endif /* __DISABLE_AVX512VLDQ__ */
+
+#endif /* _AVX512VLDQINTRIN_H_INCLUDED */
diff --git a/gcc/config/i386/avx512vlintrin.h b/gcc/config/i386/avx512vlintrin.h
new file mode 100644
index 00000000000..5c45e8b5d9b
--- /dev/null
+++ b/gcc/config/i386/avx512vlintrin.h
@@ -0,0 +1,13213 @@
+/* Copyright (C) 2014
+ Free Software Foundation, Inc.
+
+ This file is part of GCC.
+
+ GCC is free software; you can redistribute it and/or modify
+ it under the terms of the GNU General Public License as published by
+ the Free Software Foundation; either version 3, or (at your option)
+ any later version.
+
+ GCC is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ GNU General Public License for more details.
+
+ Under Section 7 of GPL version 3, you are granted additional
+ permissions described in the GCC Runtime Library Exception, version
+ 3.1, as published by the Free Software Foundation.
+
+ You should have received a copy of the GNU General Public License and
+ a copy of the GCC Runtime Library Exception along with this program;
+ see the files COPYING3 and COPYING.RUNTIME respectively. If not, see
+ <http://www.gnu.org/licenses/>. */
+
+#ifndef _IMMINTRIN_H_INCLUDED
+#error "Never use <avx512vlintrin.h> directly; include <immintrin.h> instead."
+#endif
+
+#ifndef _AVX512VLINTRIN_H_INCLUDED
+#define _AVX512VLINTRIN_H_INCLUDED
+
+/* Doesn't require avx512vl target and is used in avx512dqintrin.h. */
+extern __inline __m128i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_setzero_di (void)
+{
+ return __extension__ (__m128i)(__v2di){ 0, 0};
+}
+
+#ifndef __AVX512VL__
+#pragma GCC push_options
+#pragma GCC target("avx512vl")
+#define __DISABLE_AVX512VL__
+#endif /* __AVX512VL__ */
+
+/* Internal data types for implementing the intrinsics. */
+typedef unsigned int __mmask32;
+
+extern __inline __m256d
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_mask_mov_pd (__m256d __W, __mmask8 __U, __m256d __A)
+{
+ return (__m256d) __builtin_ia32_movapd256_mask ((__v4df) __A,
+ (__v4df) __W,
+ (__mmask8) __U);
+}
+
+extern __inline __m256d
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_maskz_mov_pd (__mmask8 __U, __m256d __A)
+{
+ return (__m256d) __builtin_ia32_movapd256_mask ((__v4df) __A,
+ (__v4df)
+ _mm256_setzero_pd (),
+ (__mmask8) __U);
+}
+
+extern __inline __m128d
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_mask_mov_pd (__m128d __W, __mmask8 __U, __m128d __A)
+{
+ return (__m128d) __builtin_ia32_movapd128_mask ((__v2df) __A,
+ (__v2df) __W,
+ (__mmask8) __U);
+}
+
+extern __inline __m128d
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_maskz_mov_pd (__mmask8 __U, __m128d __A)
+{
+ return (__m128d) __builtin_ia32_movapd128_mask ((__v2df) __A,
+ (__v2df)
+ _mm_setzero_pd (),
+ (__mmask8) __U);
+}
+
+extern __inline __m256d
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_mask_load_pd (__m256d __W, __mmask8 __U, void const *__P)
+{
+ return (__m256d) __builtin_ia32_loadapd256_mask ((__v4df *) __P,
+ (__v4df) __W,
+ (__mmask8) __U);
+}
+
+extern __inline __m256d
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_maskz_load_pd (__mmask8 __U, void const *__P)
+{
+ return (__m256d) __builtin_ia32_loadapd256_mask ((__v4df *) __P,
+ (__v4df)
+ _mm256_setzero_pd (),
+ (__mmask8) __U);
+}
+
+extern __inline __m128d
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_mask_load_pd (__m128d __W, __mmask8 __U, void const *__P)
+{
+ return (__m128d) __builtin_ia32_loadapd128_mask ((__v2df *) __P,
+ (__v2df) __W,
+ (__mmask8) __U);
+}
+
+extern __inline __m128d
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_maskz_load_pd (__mmask8 __U, void const *__P)
+{
+ return (__m128d) __builtin_ia32_loadapd128_mask ((__v2df *) __P,
+ (__v2df)
+ _mm_setzero_pd (),
+ (__mmask8) __U);
+}
+
+extern __inline void
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_mask_store_pd (void *__P, __mmask8 __U, __m256d __A)
+{
+ __builtin_ia32_storeapd256_mask ((__v4df *) __P,
+ (__v4df) __A,
+ (__mmask8) __U);
+}
+
+extern __inline void
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_mask_store_pd (void *__P, __mmask8 __U, __m128d __A)
+{
+ __builtin_ia32_storeapd128_mask ((__v2df *) __P,
+ (__v2df) __A,
+ (__mmask8) __U);
+}
+
+extern __inline __m256
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_mask_mov_ps (__m256 __W, __mmask8 __U, __m256 __A)
+{
+ return (__m256) __builtin_ia32_movaps256_mask ((__v8sf) __A,
+ (__v8sf) __W,
+ (__mmask8) __U);
+}
+
+extern __inline __m256
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_maskz_mov_ps (__mmask8 __U, __m256 __A)
+{
+ return (__m256) __builtin_ia32_movaps256_mask ((__v8sf) __A,
+ (__v8sf)
+ _mm256_setzero_ps (),
+ (__mmask8) __U);
+}
+
+extern __inline __m128
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_mask_mov_ps (__m128 __W, __mmask8 __U, __m128 __A)
+{
+ return (__m128) __builtin_ia32_movaps128_mask ((__v4sf) __A,
+ (__v4sf) __W,
+ (__mmask8) __U);
+}
+
+extern __inline __m128
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_maskz_mov_ps (__mmask8 __U, __m128 __A)
+{
+ return (__m128) __builtin_ia32_movaps128_mask ((__v4sf) __A,
+ (__v4sf)
+ _mm_setzero_ps (),
+ (__mmask8) __U);
+}
+
+extern __inline __m256
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_mask_load_ps (__m256 __W, __mmask8 __U, void const *__P)
+{
+ return (__m256) __builtin_ia32_loadaps256_mask ((__v8sf *) __P,
+ (__v8sf) __W,
+ (__mmask8) __U);
+}
+
+extern __inline __m256
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_maskz_load_ps (__mmask8 __U, void const *__P)
+{
+ return (__m256) __builtin_ia32_loadaps256_mask ((__v8sf *) __P,
+ (__v8sf)
+ _mm256_setzero_ps (),
+ (__mmask8) __U);
+}
+
+extern __inline __m128
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_mask_load_ps (__m128 __W, __mmask8 __U, void const *__P)
+{
+ return (__m128) __builtin_ia32_loadaps128_mask ((__v4sf *) __P,
+ (__v4sf) __W,
+ (__mmask8) __U);
+}
+
+extern __inline __m128
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_maskz_load_ps (__mmask8 __U, void const *__P)
+{
+ return (__m128) __builtin_ia32_loadaps128_mask ((__v4sf *) __P,
+ (__v4sf)
+ _mm_setzero_ps (),
+ (__mmask8) __U);
+}
+
+extern __inline void
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_mask_store_ps (void *__P, __mmask8 __U, __m256 __A)
+{
+ __builtin_ia32_storeaps256_mask ((__v8sf *) __P,
+ (__v8sf) __A,
+ (__mmask8) __U);
+}
+
+extern __inline void
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_mask_store_ps (void *__P, __mmask8 __U, __m128 __A)
+{
+ __builtin_ia32_storeaps128_mask ((__v4sf *) __P,
+ (__v4sf) __A,
+ (__mmask8) __U);
+}
+
+extern __inline __m256i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_mask_mov_epi64 (__m256i __W, __mmask8 __U, __m256i __A)
+{
+ return (__m256i) __builtin_ia32_movdqa64_256_mask ((__v4di) __A,
+ (__v4di) __W,
+ (__mmask8) __U);
+}
+
+extern __inline __m256i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_maskz_mov_epi64 (__mmask8 __U, __m256i __A)
+{
+ return (__m256i) __builtin_ia32_movdqa64_256_mask ((__v4di) __A,
+ (__v4di)
+ _mm256_setzero_si256 (),
+ (__mmask8) __U);
+}
+
+extern __inline __m128i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_mask_mov_epi64 (__m128i __W, __mmask8 __U, __m128i __A)
+{
+ return (__m128i) __builtin_ia32_movdqa64_128_mask ((__v2di) __A,
+ (__v2di) __W,
+ (__mmask8) __U);
+}
+
+extern __inline __m128i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_maskz_mov_epi64 (__mmask8 __U, __m128i __A)
+{
+ return (__m128i) __builtin_ia32_movdqa64_128_mask ((__v2di) __A,
+ (__v2di)
+ _mm_setzero_di (),
+ (__mmask8) __U);
+}
+
+extern __inline __m256i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_mask_load_epi64 (__m256i __W, __mmask8 __U, void const *__P)
+{
+ return (__m256i) __builtin_ia32_movdqa64load256_mask ((__v4di *) __P,
+ (__v4di) __W,
+ (__mmask8)
+ __U);
+}
+
+extern __inline __m256i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_maskz_load_epi64 (__mmask8 __U, void const *__P)
+{
+ return (__m256i) __builtin_ia32_movdqa64load256_mask ((__v4di *) __P,
+ (__v4di)
+ _mm256_setzero_si256 (),
+ (__mmask8)
+ __U);
+}
+
+extern __inline __m128i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_mask_load_epi64 (__m128i __W, __mmask8 __U, void const *__P)
+{
+ return (__m128i) __builtin_ia32_movdqa64load128_mask ((__v2di *) __P,
+ (__v2di) __W,
+ (__mmask8)
+ __U);
+}
+
+extern __inline __m128i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_maskz_load_epi64 (__mmask8 __U, void const *__P)
+{
+ return (__m128i) __builtin_ia32_movdqa64load128_mask ((__v2di *) __P,
+ (__v2di)
+ _mm_setzero_di (),
+ (__mmask8)
+ __U);
+}
+
+extern __inline void
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_mask_store_epi64 (void *__P, __mmask8 __U, __m256i __A)
+{
+ __builtin_ia32_movdqa64store256_mask ((__v4di *) __P,
+ (__v4di) __A,
+ (__mmask8) __U);
+}
+
+extern __inline void
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_mask_store_epi64 (void *__P, __mmask8 __U, __m128i __A)
+{
+ __builtin_ia32_movdqa64store128_mask ((__v2di *) __P,
+ (__v2di) __A,
+ (__mmask8) __U);
+}
+
+extern __inline __m256i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_mask_mov_epi32 (__m256i __W, __mmask8 __U, __m256i __A)
+{
+ return (__m256i) __builtin_ia32_movdqa32_256_mask ((__v8si) __A,
+ (__v8si) __W,
+ (__mmask8) __U);
+}
+
+extern __inline __m256i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_maskz_mov_epi32 (__mmask8 __U, __m256i __A)
+{
+ return (__m256i) __builtin_ia32_movdqa32_256_mask ((__v8si) __A,
+ (__v8si)
+ _mm256_setzero_si256 (),
+ (__mmask8) __U);
+}
+
+extern __inline __m128i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_mask_mov_epi32 (__m128i __W, __mmask8 __U, __m128i __A)
+{
+ return (__m128i) __builtin_ia32_movdqa32_128_mask ((__v4si) __A,
+ (__v4si) __W,
+ (__mmask8) __U);
+}
+
+extern __inline __m128i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_maskz_mov_epi32 (__mmask8 __U, __m128i __A)
+{
+ return (__m128i) __builtin_ia32_movdqa32_128_mask ((__v4si) __A,
+ (__v4si)
+ _mm_setzero_si128 (),
+ (__mmask8) __U);
+}
+
+extern __inline __m256i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_mask_load_epi32 (__m256i __W, __mmask8 __U, void const *__P)
+{
+ return (__m256i) __builtin_ia32_movdqa32load256_mask ((__v8si *) __P,
+ (__v8si) __W,
+ (__mmask8)
+ __U);
+}
+
+extern __inline __m256i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_maskz_load_epi32 (__mmask8 __U, void const *__P)
+{
+ return (__m256i) __builtin_ia32_movdqa32load256_mask ((__v8si *) __P,
+ (__v8si)
+ _mm256_setzero_si256 (),
+ (__mmask8)
+ __U);
+}
+
+extern __inline __m128i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_mask_load_epi32 (__m128i __W, __mmask8 __U, void const *__P)
+{
+ return (__m128i) __builtin_ia32_movdqa32load128_mask ((__v4si *) __P,
+ (__v4si) __W,
+ (__mmask8)
+ __U);
+}
+
+extern __inline __m128i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_maskz_load_epi32 (__mmask8 __U, void const *__P)
+{
+ return (__m128i) __builtin_ia32_movdqa32load128_mask ((__v4si *) __P,
+ (__v4si)
+ _mm_setzero_si128 (),
+ (__mmask8)
+ __U);
+}
+
+extern __inline void
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_mask_store_epi32 (void *__P, __mmask8 __U, __m256i __A)
+{
+ __builtin_ia32_movdqa32store256_mask ((__v8si *) __P,
+ (__v8si) __A,
+ (__mmask8) __U);
+}
+
+extern __inline void
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_mask_store_epi32 (void *__P, __mmask8 __U, __m128i __A)
+{
+ __builtin_ia32_movdqa32store128_mask ((__v4si *) __P,
+ (__v4si) __A,
+ (__mmask8) __U);
+}
+
+extern __inline __m128i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_setzero_hi (void)
+{
+ return __extension__ (__m128i) (__v8hi)
+ {
+ 0, 0, 0, 0, 0, 0, 0, 0};
+}
+
+extern __inline __m128d
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_mask_add_pd (__m128d __W, __mmask8 __U, __m128d __A, __m128d __B)
+{
+ return (__m128d) __builtin_ia32_addpd128_mask ((__v2df) __A,
+ (__v2df) __B,
+ (__v2df) __W,
+ (__mmask8) __U);
+}
+
+extern __inline __m128d
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_maskz_add_pd (__mmask8 __U, __m128d __A, __m128d __B)
+{
+ return (__m128d) __builtin_ia32_addpd128_mask ((__v2df) __A,
+ (__v2df) __B,
+ (__v2df)
+ _mm_setzero_pd (),
+ (__mmask8) __U);
+}
+
+extern __inline __m256d
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_mask_add_pd (__m256d __W, __mmask8 __U, __m256d __A,
+ __m256d __B)
+{
+ return (__m256d) __builtin_ia32_addpd256_mask ((__v4df) __A,
+ (__v4df) __B,
+ (__v4df) __W,
+ (__mmask8) __U);
+}
+
+extern __inline __m256d
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_maskz_add_pd (__mmask8 __U, __m256d __A, __m256d __B)
+{
+ return (__m256d) __builtin_ia32_addpd256_mask ((__v4df) __A,
+ (__v4df) __B,
+ (__v4df)
+ _mm256_setzero_pd (),
+ (__mmask8) __U);
+}
+
+extern __inline __m128
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_mask_add_ps (__m128 __W, __mmask16 __U, __m128 __A, __m128 __B)
+{
+ return (__m128) __builtin_ia32_addps128_mask ((__v4sf) __A,
+ (__v4sf) __B,
+ (__v4sf) __W,
+ (__mmask8) __U);
+}
+
+extern __inline __m128
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_maskz_add_ps (__mmask16 __U, __m128 __A, __m128 __B)
+{
+ return (__m128) __builtin_ia32_addps128_mask ((__v4sf) __A,
+ (__v4sf) __B,
+ (__v4sf)
+ _mm_setzero_ps (),
+ (__mmask8) __U);
+}
+
+extern __inline __m256
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_mask_add_ps (__m256 __W, __mmask16 __U, __m256 __A, __m256 __B)
+{
+ return (__m256) __builtin_ia32_addps256_mask ((__v8sf) __A,
+ (__v8sf) __B,
+ (__v8sf) __W,
+ (__mmask8) __U);
+}
+
+extern __inline __m256
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_maskz_add_ps (__mmask16 __U, __m256 __A, __m256 __B)
+{
+ return (__m256) __builtin_ia32_addps256_mask ((__v8sf) __A,
+ (__v8sf) __B,
+ (__v8sf)
+ _mm256_setzero_ps (),
+ (__mmask8) __U);
+}
+
+extern __inline __m128d
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_mask_sub_pd (__m128d __W, __mmask8 __U, __m128d __A, __m128d __B)
+{
+ return (__m128d) __builtin_ia32_subpd128_mask ((__v2df) __A,
+ (__v2df) __B,
+ (__v2df) __W,
+ (__mmask8) __U);
+}
+
+extern __inline __m128d
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_maskz_sub_pd (__mmask8 __U, __m128d __A, __m128d __B)
+{
+ return (__m128d) __builtin_ia32_subpd128_mask ((__v2df) __A,
+ (__v2df) __B,
+ (__v2df)
+ _mm_setzero_pd (),
+ (__mmask8) __U);
+}
+
+extern __inline __m256d
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_mask_sub_pd (__m256d __W, __mmask8 __U, __m256d __A,
+ __m256d __B)
+{
+ return (__m256d) __builtin_ia32_subpd256_mask ((__v4df) __A,
+ (__v4df) __B,
+ (__v4df) __W,
+ (__mmask8) __U);
+}
+
+extern __inline __m256d
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_maskz_sub_pd (__mmask8 __U, __m256d __A, __m256d __B)
+{
+ return (__m256d) __builtin_ia32_subpd256_mask ((__v4df) __A,
+ (__v4df) __B,
+ (__v4df)
+ _mm256_setzero_pd (),
+ (__mmask8) __U);
+}
+
+extern __inline __m128
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_mask_sub_ps (__m128 __W, __mmask16 __U, __m128 __A, __m128 __B)
+{
+ return (__m128) __builtin_ia32_subps128_mask ((__v4sf) __A,
+ (__v4sf) __B,
+ (__v4sf) __W,
+ (__mmask8) __U);
+}
+
+extern __inline __m128
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_maskz_sub_ps (__mmask16 __U, __m128 __A, __m128 __B)
+{
+ return (__m128) __builtin_ia32_subps128_mask ((__v4sf) __A,
+ (__v4sf) __B,
+ (__v4sf)
+ _mm_setzero_ps (),
+ (__mmask8) __U);
+}
+
+extern __inline __m256
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_mask_sub_ps (__m256 __W, __mmask16 __U, __m256 __A, __m256 __B)
+{
+ return (__m256) __builtin_ia32_subps256_mask ((__v8sf) __A,
+ (__v8sf) __B,
+ (__v8sf) __W,
+ (__mmask8) __U);
+}
+
+extern __inline __m256
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_maskz_sub_ps (__mmask16 __U, __m256 __A, __m256 __B)
+{
+ return (__m256) __builtin_ia32_subps256_mask ((__v8sf) __A,
+ (__v8sf) __B,
+ (__v8sf)
+ _mm256_setzero_ps (),
+ (__mmask8) __U);
+}
+
+extern __inline __m256i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_load_epi64 (void const *__P)
+{
+ return *(__m256i *) __P;
+}
+
+extern __inline __m128i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_load_epi64 (void const *__P)
+{
+ return *(__m128i *) __P;
+}
+
+extern __inline void
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_store_epi64 (void *__P, __m256i __A)
+{
+ *(__m256i *) __P = __A;
+}
+
+extern __inline void
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_store_epi64 (void *__P, __m128i __A)
+{
+ *(__m128i *) __P = __A;
+}
+
+extern __inline __m256d
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_mask_loadu_pd (__m256d __W, __mmask8 __U, void const *__P)
+{
+ return (__m256d) __builtin_ia32_loadupd256_mask ((__v4df *) __P,
+ (__v4df) __W,
+ (__mmask8) __U);
+}
+
+extern __inline __m256d
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_maskz_loadu_pd (__mmask8 __U, void const *__P)
+{
+ return (__m256d) __builtin_ia32_loadupd256_mask ((__v4df *) __P,
+ (__v4df)
+ _mm256_setzero_pd (),
+ (__mmask8) __U);
+}
+
+extern __inline __m128d
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_mask_loadu_pd (__m128d __W, __mmask8 __U, void const *__P)
+{
+ return (__m128d) __builtin_ia32_loadupd128_mask ((__v2df *) __P,
+ (__v2df) __W,
+ (__mmask8) __U);
+}
+
+extern __inline __m128d
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_maskz_loadu_pd (__mmask8 __U, void const *__P)
+{
+ return (__m128d) __builtin_ia32_loadupd128_mask ((__v2df *) __P,
+ (__v2df)
+ _mm_setzero_pd (),
+ (__mmask8) __U);
+}
+
+extern __inline void
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_mask_storeu_pd (void *__P, __mmask8 __U, __m256d __A)
+{
+ __builtin_ia32_storeupd256_mask ((__v4df *) __P,
+ (__v4df) __A,
+ (__mmask8) __U);
+}
+
+extern __inline void
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_mask_storeu_pd (void *__P, __mmask8 __U, __m128d __A)
+{
+ __builtin_ia32_storeupd128_mask ((__v2df *) __P,
+ (__v2df) __A,
+ (__mmask8) __U);
+}
+
+extern __inline __m256
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_mask_loadu_ps (__m256 __W, __mmask8 __U, void const *__P)
+{
+ return (__m256) __builtin_ia32_loadups256_mask ((__v8sf *) __P,
+ (__v8sf) __W,
+ (__mmask8) __U);
+}
+
+extern __inline __m256
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_maskz_loadu_ps (__mmask8 __U, void const *__P)
+{
+ return (__m256) __builtin_ia32_loadups256_mask ((__v8sf *) __P,
+ (__v8sf)
+ _mm256_setzero_ps (),
+ (__mmask8) __U);
+}
+
+extern __inline __m128
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_mask_loadu_ps (__m128 __W, __mmask8 __U, void const *__P)
+{
+ return (__m128) __builtin_ia32_loadups128_mask ((__v4sf *) __P,
+ (__v4sf) __W,
+ (__mmask8) __U);
+}
+
+extern __inline __m128
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_maskz_loadu_ps (__mmask8 __U, void const *__P)
+{
+ return (__m128) __builtin_ia32_loadups128_mask ((__v4sf *) __P,
+ (__v4sf)
+ _mm_setzero_ps (),
+ (__mmask8) __U);
+}
+
+extern __inline void
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_mask_storeu_ps (void *__P, __mmask8 __U, __m256 __A)
+{
+ __builtin_ia32_storeups256_mask ((__v8sf *) __P,
+ (__v8sf) __A,
+ (__mmask8) __U);
+}
+
+extern __inline void
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_mask_storeu_ps (void *__P, __mmask8 __U, __m128 __A)
+{
+ __builtin_ia32_storeups128_mask ((__v4sf *) __P,
+ (__v4sf) __A,
+ (__mmask8) __U);
+}
+
+extern __inline __m256i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_mask_loadu_epi64 (__m256i __W, __mmask8 __U, void const *__P)
+{
+ return (__m256i) __builtin_ia32_loaddqudi256_mask ((__v4di *) __P,
+ (__v4di) __W,
+ (__mmask8) __U);
+}
+
+extern __inline __m256i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_maskz_loadu_epi64 (__mmask8 __U, void const *__P)
+{
+ return (__m256i) __builtin_ia32_loaddqudi256_mask ((__v4di *) __P,
+ (__v4di)
+ _mm256_setzero_si256 (),
+ (__mmask8) __U);
+}
+
+extern __inline __m128i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_mask_loadu_epi64 (__m128i __W, __mmask8 __U, void const *__P)
+{
+ return (__m128i) __builtin_ia32_loaddqudi128_mask ((__v2di *) __P,
+ (__v2di) __W,
+ (__mmask8) __U);
+}
+
+extern __inline __m128i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_maskz_loadu_epi64 (__mmask8 __U, void const *__P)
+{
+ return (__m128i) __builtin_ia32_loaddqudi128_mask ((__v2di *) __P,
+ (__v2di)
+ _mm_setzero_di (),
+ (__mmask8) __U);
+}
+
+extern __inline void
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_mask_storeu_epi64 (void *__P, __mmask8 __U, __m256i __A)
+{
+ __builtin_ia32_storedqudi256_mask ((__v4di *) __P,
+ (__v4di) __A,
+ (__mmask8) __U);
+}
+
+extern __inline void
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_mask_storeu_epi64 (void *__P, __mmask8 __U, __m128i __A)
+{
+ __builtin_ia32_storedqudi128_mask ((__v2di *) __P,
+ (__v2di) __A,
+ (__mmask8) __U);
+}
+
+extern __inline __m256i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_mask_loadu_epi32 (__m256i __W, __mmask8 __U, void const *__P)
+{
+ return (__m256i) __builtin_ia32_loaddqusi256_mask ((__v8si *) __P,
+ (__v8si) __W,
+ (__mmask8) __U);
+}
+
+extern __inline __m256i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_maskz_loadu_epi32 (__mmask8 __U, void const *__P)
+{
+ return (__m256i) __builtin_ia32_loaddqusi256_mask ((__v8si *) __P,
+ (__v8si)
+ _mm256_setzero_si256 (),
+ (__mmask8) __U);
+}
+
+extern __inline __m128i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_mask_loadu_epi32 (__m128i __W, __mmask8 __U, void const *__P)
+{
+ return (__m128i) __builtin_ia32_loaddqusi128_mask ((__v4si *) __P,
+ (__v4si) __W,
+ (__mmask8) __U);
+}
+
+extern __inline __m128i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_maskz_loadu_epi32 (__mmask8 __U, void const *__P)
+{
+ return (__m128i) __builtin_ia32_loaddqusi128_mask ((__v4si *) __P,
+ (__v4si)
+ _mm_setzero_si128 (),
+ (__mmask8) __U);
+}
+
+extern __inline void
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_mask_storeu_epi32 (void *__P, __mmask8 __U, __m256i __A)
+{
+ __builtin_ia32_storedqusi256_mask ((__v8si *) __P,
+ (__v8si) __A,
+ (__mmask8) __U);
+}
+
+extern __inline void
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_mask_storeu_epi32 (void *__P, __mmask8 __U, __m128i __A)
+{
+ __builtin_ia32_storedqusi128_mask ((__v4si *) __P,
+ (__v4si) __A,
+ (__mmask8) __U);
+}
+
+extern __inline __m256i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_mask_abs_epi32 (__m256i __W, __mmask8 __U, __m256i __A)
+{
+ return (__m256i) __builtin_ia32_pabsd256_mask ((__v8si) __A,
+ (__v8si) __W,
+ (__mmask8) __U);
+}
+
+extern __inline __m256i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_maskz_abs_epi32 (__mmask8 __U, __m256i __A)
+{
+ return (__m256i) __builtin_ia32_pabsd256_mask ((__v8si) __A,
+ (__v8si)
+ _mm256_setzero_si256 (),
+ (__mmask8) __U);
+}
+
+extern __inline __m128i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_mask_abs_epi32 (__m128i __W, __mmask8 __U, __m128i __A)
+{
+ return (__m128i) __builtin_ia32_pabsd128_mask ((__v4si) __A,
+ (__v4si) __W,
+ (__mmask8) __U);
+}
+
+extern __inline __m128i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_maskz_abs_epi32 (__mmask8 __U, __m128i __A)
+{
+ return (__m128i) __builtin_ia32_pabsd128_mask ((__v4si) __A,
+ (__v4si)
+ _mm_setzero_si128 (),
+ (__mmask8) __U);
+}
+
+extern __inline __m256i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_abs_epi64 (__m256i __A)
+{
+ return (__m256i) __builtin_ia32_pabsq256_mask ((__v4di) __A,
+ (__v4di)
+ _mm256_setzero_si256 (),
+ (__mmask8) -1);
+}
+
+extern __inline __m256i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_mask_abs_epi64 (__m256i __W, __mmask8 __U, __m256i __A)
+{
+ return (__m256i) __builtin_ia32_pabsq256_mask ((__v4di) __A,
+ (__v4di) __W,
+ (__mmask8) __U);
+}
+
+extern __inline __m256i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_maskz_abs_epi64 (__mmask8 __U, __m256i __A)
+{
+ return (__m256i) __builtin_ia32_pabsq256_mask ((__v4di) __A,
+ (__v4di)
+ _mm256_setzero_si256 (),
+ (__mmask8) __U);
+}
+
+extern __inline __m128i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_abs_epi64 (__m128i __A)
+{
+ return (__m128i) __builtin_ia32_pabsq128_mask ((__v2di) __A,
+ (__v2di)
+ _mm_setzero_di (),
+ (__mmask8) -1);
+}
+
+extern __inline __m128i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_mask_abs_epi64 (__m128i __W, __mmask8 __U, __m128i __A)
+{
+ return (__m128i) __builtin_ia32_pabsq128_mask ((__v2di) __A,
+ (__v2di) __W,
+ (__mmask8) __U);
+}
+
+extern __inline __m128i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_maskz_abs_epi64 (__mmask8 __U, __m128i __A)
+{
+ return (__m128i) __builtin_ia32_pabsq128_mask ((__v2di) __A,
+ (__v2di)
+ _mm_setzero_di (),
+ (__mmask8) __U);
+}
+
+extern __inline __m128i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_cvtpd_epu32 (__m256d __A)
+{
+ return (__m128i) __builtin_ia32_cvtpd2udq256_mask ((__v4df) __A,
+ (__v4si)
+ _mm_setzero_si128 (),
+ (__mmask8) -1);
+}
+
+extern __inline __m128i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_mask_cvtpd_epu32 (__m128i __W, __mmask8 __U, __m256d __A)
+{
+ return (__m128i) __builtin_ia32_cvtpd2udq256_mask ((__v4df) __A,
+ (__v4si) __W,
+ (__mmask8) __U);
+}
+
+extern __inline __m128i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_maskz_cvtpd_epu32 (__mmask8 __U, __m256d __A)
+{
+ return (__m128i) __builtin_ia32_cvtpd2udq256_mask ((__v4df) __A,
+ (__v4si)
+ _mm_setzero_si128 (),
+ (__mmask8) __U);
+}
+
+extern __inline __m128i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_cvtpd_epu32 (__m128d __A)
+{
+ return (__m128i) __builtin_ia32_cvtpd2udq128_mask ((__v2df) __A,
+ (__v4si)
+ _mm_setzero_si128 (),
+ (__mmask8) -1);
+}
+
+extern __inline __m128i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_mask_cvtpd_epu32 (__m128i __W, __mmask8 __U, __m128d __A)
+{
+ return (__m128i) __builtin_ia32_cvtpd2udq128_mask ((__v2df) __A,
+ (__v4si) __W,
+ (__mmask8) __U);
+}
+
+extern __inline __m128i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_maskz_cvtpd_epu32 (__mmask8 __U, __m128d __A)
+{
+ return (__m128i) __builtin_ia32_cvtpd2udq128_mask ((__v2df) __A,
+ (__v4si)
+ _mm_setzero_si128 (),
+ (__mmask8) __U);
+}
+
+extern __inline __m256i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_mask_cvttps_epi32 (__m256i __W, __mmask8 __U, __m256 __A)
+{
+ return (__m256i) __builtin_ia32_cvttps2dq256_mask ((__v8sf) __A,
+ (__v8si) __W,
+ (__mmask8) __U);
+}
+
+extern __inline __m256i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_maskz_cvttps_epi32 (__mmask8 __U, __m256 __A)
+{
+ return (__m256i) __builtin_ia32_cvttps2dq256_mask ((__v8sf) __A,
+ (__v8si)
+ _mm256_setzero_si256 (),
+ (__mmask8) __U);
+}
+
+extern __inline __m128i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_mask_cvttps_epi32 (__m128i __W, __mmask8 __U, __m128 __A)
+{
+ return (__m128i) __builtin_ia32_cvttps2dq128_mask ((__v4sf) __A,
+ (__v4si) __W,
+ (__mmask8) __U);
+}
+
+extern __inline __m128i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_maskz_cvttps_epi32 (__mmask8 __U, __m128 __A)
+{
+ return (__m128i) __builtin_ia32_cvttps2dq128_mask ((__v4sf) __A,
+ (__v4si)
+ _mm_setzero_si128 (),
+ (__mmask8) __U);
+}
+
+extern __inline __m256i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_cvttps_epu32 (__m256 __A)
+{
+ return (__m256i) __builtin_ia32_cvttps2udq256_mask ((__v8sf) __A,
+ (__v8si)
+ _mm256_setzero_si256 (),
+ (__mmask8) -1);
+}
+
+extern __inline __m256i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_mask_cvttps_epu32 (__m256i __W, __mmask8 __U, __m256 __A)
+{
+ return (__m256i) __builtin_ia32_cvttps2udq256_mask ((__v8sf) __A,
+ (__v8si) __W,
+ (__mmask8) __U);
+}
+
+extern __inline __m256i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_maskz_cvttps_epu32 (__mmask8 __U, __m256 __A)
+{
+ return (__m256i) __builtin_ia32_cvttps2udq256_mask ((__v8sf) __A,
+ (__v8si)
+ _mm256_setzero_si256 (),
+ (__mmask8) __U);
+}
+
+extern __inline __m128i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_cvttps_epu32 (__m128 __A)
+{
+ return (__m128i) __builtin_ia32_cvttps2udq128_mask ((__v4sf) __A,
+ (__v4si)
+ _mm_setzero_si128 (),
+ (__mmask8) -1);
+}
+
+extern __inline __m128i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_mask_cvttps_epu32 (__m128i __W, __mmask8 __U, __m128 __A)
+{
+ return (__m128i) __builtin_ia32_cvttps2udq128_mask ((__v4sf) __A,
+ (__v4si) __W,
+ (__mmask8) __U);
+}
+
+extern __inline __m128i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_maskz_cvttps_epu32 (__mmask8 __U, __m128 __A)
+{
+ return (__m128i) __builtin_ia32_cvttps2udq128_mask ((__v4sf) __A,
+ (__v4si)
+ _mm_setzero_si128 (),
+ (__mmask8) __U);
+}
+
+extern __inline __m128i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_mask_cvttpd_epi32 (__m128i __W, __mmask8 __U, __m256d __A)
+{
+ return (__m128i) __builtin_ia32_cvttpd2dq256_mask ((__v4df) __A,
+ (__v4si) __W,
+ (__mmask8) __U);
+}
+
+extern __inline __m128i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_maskz_cvttpd_epi32 (__mmask8 __U, __m256d __A)
+{
+ return (__m128i) __builtin_ia32_cvttpd2dq256_mask ((__v4df) __A,
+ (__v4si)
+ _mm_setzero_si128 (),
+ (__mmask8) __U);
+}
+
+extern __inline __m128i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_mask_cvttpd_epi32 (__m128i __W, __mmask8 __U, __m128d __A)
+{
+ return (__m128i) __builtin_ia32_cvttpd2dq128_mask ((__v2df) __A,
+ (__v4si) __W,
+ (__mmask8) __U);
+}
+
+extern __inline __m128i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_maskz_cvttpd_epi32 (__mmask8 __U, __m128d __A)
+{
+ return (__m128i) __builtin_ia32_cvttpd2dq128_mask ((__v2df) __A,
+ (__v4si)
+ _mm_setzero_si128 (),
+ (__mmask8) __U);
+}
+
+extern __inline __m128i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_cvttpd_epu32 (__m256d __A)
+{
+ return (__m128i) __builtin_ia32_cvttpd2udq256_mask ((__v4df) __A,
+ (__v4si)
+ _mm_setzero_si128 (),
+ (__mmask8) -1);
+}
+
+extern __inline __m128i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_mask_cvttpd_epu32 (__m128i __W, __mmask8 __U, __m256d __A)
+{
+ return (__m128i) __builtin_ia32_cvttpd2udq256_mask ((__v4df) __A,
+ (__v4si) __W,
+ (__mmask8) __U);
+}
+
+extern __inline __m128i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_maskz_cvttpd_epu32 (__mmask8 __U, __m256d __A)
+{
+ return (__m128i) __builtin_ia32_cvttpd2udq256_mask ((__v4df) __A,
+ (__v4si)
+ _mm_setzero_si128 (),
+ (__mmask8) __U);
+}
+
+extern __inline __m128i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_cvttpd_epu32 (__m128d __A)
+{
+ return (__m128i) __builtin_ia32_cvttpd2udq128_mask ((__v2df) __A,
+ (__v4si)
+ _mm_setzero_si128 (),
+ (__mmask8) -1);
+}
+
+extern __inline __m128i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_mask_cvttpd_epu32 (__m128i __W, __mmask8 __U, __m128d __A)
+{
+ return (__m128i) __builtin_ia32_cvttpd2udq128_mask ((__v2df) __A,
+ (__v4si) __W,
+ (__mmask8) __U);
+}
+
+extern __inline __m128i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_maskz_cvttpd_epu32 (__mmask8 __U, __m128d __A)
+{
+ return (__m128i) __builtin_ia32_cvttpd2udq128_mask ((__v2df) __A,
+ (__v4si)
+ _mm_setzero_si128 (),
+ (__mmask8) __U);
+}
+
+extern __inline __m128i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_mask_cvtpd_epi32 (__m128i __W, __mmask8 __U, __m256d __A)
+{
+ return (__m128i) __builtin_ia32_cvtpd2dq256_mask ((__v4df) __A,
+ (__v4si) __W,
+ (__mmask8) __U);
+}
+
+extern __inline __m128i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_maskz_cvtpd_epi32 (__mmask8 __U, __m256d __A)
+{
+ return (__m128i) __builtin_ia32_cvtpd2dq256_mask ((__v4df) __A,
+ (__v4si)
+ _mm_setzero_si128 (),
+ (__mmask8) __U);
+}
+
+extern __inline __m128i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_mask_cvtpd_epi32 (__m128i __W, __mmask8 __U, __m128d __A)
+{
+ return (__m128i) __builtin_ia32_cvtpd2dq128_mask ((__v2df) __A,
+ (__v4si) __W,
+ (__mmask8) __U);
+}
+
+extern __inline __m128i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_maskz_cvtpd_epi32 (__mmask8 __U, __m128d __A)
+{
+ return (__m128i) __builtin_ia32_cvtpd2dq128_mask ((__v2df) __A,
+ (__v4si)
+ _mm_setzero_si128 (),
+ (__mmask8) __U);
+}
+
+extern __inline __m256d
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_mask_cvtepi32_pd (__m256d __W, __mmask8 __U, __m128i __A)
+{
+ return (__m256d) __builtin_ia32_cvtdq2pd256_mask ((__v4si) __A,
+ (__v4df) __W,
+ (__mmask8) __U);
+}
+
+extern __inline __m256d
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_maskz_cvtepi32_pd (__mmask8 __U, __m128i __A)
+{
+ return (__m256d) __builtin_ia32_cvtdq2pd256_mask ((__v4si) __A,
+ (__v4df)
+ _mm256_setzero_pd (),
+ (__mmask8) __U);
+}
+
+extern __inline __m128d
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_mask_cvtepi32_pd (__m128d __W, __mmask8 __U, __m128i __A)
+{
+ return (__m128d) __builtin_ia32_cvtdq2pd128_mask ((__v4si) __A,
+ (__v2df) __W,
+ (__mmask8) __U);
+}
+
+extern __inline __m128d
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_maskz_cvtepi32_pd (__mmask8 __U, __m128i __A)
+{
+ return (__m128d) __builtin_ia32_cvtdq2pd128_mask ((__v4si) __A,
+ (__v2df)
+ _mm_setzero_pd (),
+ (__mmask8) __U);
+}
+
+extern __inline __m256d
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_cvtepu32_pd (__m128i __A)
+{
+ return (__m256d) __builtin_ia32_cvtudq2pd256_mask ((__v4si) __A,
+ (__v4df)
+ _mm256_setzero_pd (),
+ (__mmask8) -1);
+}
+
+extern __inline __m256d
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_mask_cvtepu32_pd (__m256d __W, __mmask8 __U, __m128i __A)
+{
+ return (__m256d) __builtin_ia32_cvtudq2pd256_mask ((__v4si) __A,
+ (__v4df) __W,
+ (__mmask8) __U);
+}
+
+extern __inline __m256d
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_maskz_cvtepu32_pd (__mmask8 __U, __m128i __A)
+{
+ return (__m256d) __builtin_ia32_cvtudq2pd256_mask ((__v4si) __A,
+ (__v4df)
+ _mm256_setzero_pd (),
+ (__mmask8) __U);
+}
+
+extern __inline __m128d
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_cvtepu32_pd (__m128i __A)
+{
+ return (__m128d) __builtin_ia32_cvtudq2pd128_mask ((__v4si) __A,
+ (__v2df)
+ _mm_setzero_pd (),
+ (__mmask8) -1);
+}
+
+extern __inline __m128d
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_mask_cvtepu32_pd (__m128d __W, __mmask8 __U, __m128i __A)
+{
+ return (__m128d) __builtin_ia32_cvtudq2pd128_mask ((__v4si) __A,
+ (__v2df) __W,
+ (__mmask8) __U);
+}
+
+extern __inline __m128d
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_maskz_cvtepu32_pd (__mmask8 __U, __m128i __A)
+{
+ return (__m128d) __builtin_ia32_cvtudq2pd128_mask ((__v4si) __A,
+ (__v2df)
+ _mm_setzero_pd (),
+ (__mmask8) __U);
+}
+
+extern __inline __m256
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_mask_cvtepi32_ps (__m256 __W, __mmask8 __U, __m256i __A)
+{
+ return (__m256) __builtin_ia32_cvtdq2ps256_mask ((__v8si) __A,
+ (__v8sf) __W,
+ (__mmask8) __U);
+}
+
+extern __inline __m256
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_maskz_cvtepi32_ps (__mmask16 __U, __m256i __A)
+{
+ return (__m256) __builtin_ia32_cvtdq2ps256_mask ((__v8si) __A,
+ (__v8sf)
+ _mm256_setzero_ps (),
+ (__mmask8) __U);
+}
+
+extern __inline __m128
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_mask_cvtepi32_ps (__m128 __W, __mmask8 __U, __m128i __A)
+{
+ return (__m128) __builtin_ia32_cvtdq2ps128_mask ((__v4si) __A,
+ (__v4sf) __W,
+ (__mmask8) __U);
+}
+
+extern __inline __m128
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_maskz_cvtepi32_ps (__mmask16 __U, __m128i __A)
+{
+ return (__m128) __builtin_ia32_cvtdq2ps128_mask ((__v4si) __A,
+ (__v4sf)
+ _mm_setzero_ps (),
+ (__mmask8) __U);
+}
+
+extern __inline __m256
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_cvtepu32_ps (__m256i __A)
+{
+ return (__m256) __builtin_ia32_cvtudq2ps256_mask ((__v8si) __A,
+ (__v8sf)
+ _mm256_setzero_ps (),
+ (__mmask8) -1);
+}
+
+extern __inline __m256
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_mask_cvtepu32_ps (__m256 __W, __mmask8 __U, __m256i __A)
+{
+ return (__m256) __builtin_ia32_cvtudq2ps256_mask ((__v8si) __A,
+ (__v8sf) __W,
+ (__mmask8) __U);
+}
+
+extern __inline __m256
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_maskz_cvtepu32_ps (__mmask8 __U, __m256i __A)
+{
+ return (__m256) __builtin_ia32_cvtudq2ps256_mask ((__v8si) __A,
+ (__v8sf)
+ _mm256_setzero_ps (),
+ (__mmask8) __U);
+}
+
+extern __inline __m128
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_cvtepu32_ps (__m128i __A)
+{
+ return (__m128) __builtin_ia32_cvtudq2ps128_mask ((__v4si) __A,
+ (__v4sf)
+ _mm_setzero_ps (),
+ (__mmask8) -1);
+}
+
+extern __inline __m128
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_mask_cvtepu32_ps (__m128 __W, __mmask8 __U, __m128i __A)
+{
+ return (__m128) __builtin_ia32_cvtudq2ps128_mask ((__v4si) __A,
+ (__v4sf) __W,
+ (__mmask8) __U);
+}
+
+extern __inline __m128
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_maskz_cvtepu32_ps (__mmask8 __U, __m128i __A)
+{
+ return (__m128) __builtin_ia32_cvtudq2ps128_mask ((__v4si) __A,
+ (__v4sf)
+ _mm_setzero_ps (),
+ (__mmask8) __U);
+}
+
+extern __inline __m256d
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_mask_cvtps_pd (__m256d __W, __mmask8 __U, __m128 __A)
+{
+ return (__m256d) __builtin_ia32_cvtps2pd256_mask ((__v4sf) __A,
+ (__v4df) __W,
+ (__mmask8) __U);
+}
+
+extern __inline __m256d
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_maskz_cvtps_pd (__mmask8 __U, __m128 __A)
+{
+ return (__m256d) __builtin_ia32_cvtps2pd256_mask ((__v4sf) __A,
+ (__v4df)
+ _mm256_setzero_pd (),
+ (__mmask8) __U);
+}
+
+extern __inline __m128d
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_mask_cvtps_pd (__m128d __W, __mmask8 __U, __m128 __A)
+{
+ return (__m128d) __builtin_ia32_cvtps2pd128_mask ((__v4sf) __A,
+ (__v2df) __W,
+ (__mmask8) __U);
+}
+
+extern __inline __m128d
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_maskz_cvtps_pd (__mmask8 __U, __m128 __A)
+{
+ return (__m128d) __builtin_ia32_cvtps2pd128_mask ((__v4sf) __A,
+ (__v2df)
+ _mm_setzero_pd (),
+ (__mmask8) __U);
+}
+
+extern __inline __m128i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_cvtepi32_epi8 (__m128i __A)
+{
+ __v16qi __O;
+ return (__m128i) __builtin_ia32_pmovdb128_mask ((__v4si) __A, __O,
+ (__mmask8) -1);
+}
+
+extern __inline void
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_mask_cvtepi32_storeu_epi8 (void * __P, __mmask8 __M, __m128i __A)
+{
+ __builtin_ia32_pmovdb128mem_mask ((__v16qi *) __P, (__v4si) __A, __M);
+}
+
+extern __inline __m128i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_mask_cvtepi32_epi8 (__m128i __O, __mmask8 __M, __m128i __A)
+{
+ return (__m128i) __builtin_ia32_pmovdb128_mask ((__v4si) __A,
+ (__v16qi) __O, __M);
+}
+
+extern __inline __m128i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_maskz_cvtepi32_epi8 (__mmask8 __M, __m128i __A)
+{
+ return (__m128i) __builtin_ia32_pmovdb128_mask ((__v4si) __A,
+ (__v16qi)
+ _mm_setzero_si128 (),
+ __M);
+}
+
+extern __inline __m128i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_cvtepi32_epi8 (__m256i __A)
+{
+ __v16qi __O;
+ return (__m128i) __builtin_ia32_pmovdb256_mask ((__v8si) __A, __O,
+ (__mmask8) -1);
+}
+
+extern __inline __m128i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_mask_cvtepi32_epi8 (__m128i __O, __mmask8 __M, __m256i __A)
+{
+ return (__m128i) __builtin_ia32_pmovdb256_mask ((__v8si) __A,
+ (__v16qi) __O, __M);
+}
+
+extern __inline void
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_mask_cvtepi32_storeu_epi8 (void * __P, __mmask8 __M, __m256i __A)
+{
+ __builtin_ia32_pmovdb256mem_mask ((__v16qi *) __P, (__v8si) __A, __M);
+}
+
+extern __inline __m128i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_maskz_cvtepi32_epi8 (__mmask8 __M, __m256i __A)
+{
+ return (__m128i) __builtin_ia32_pmovdb256_mask ((__v8si) __A,
+ (__v16qi)
+ _mm_setzero_si128 (),
+ __M);
+}
+
+extern __inline __m128i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_cvtsepi32_epi8 (__m128i __A)
+{
+ __v16qi __O;
+ return (__m128i) __builtin_ia32_pmovsdb128_mask ((__v4si) __A, __O,
+ (__mmask8) -1);
+}
+
+extern __inline void
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_mask_cvtsepi32_storeu_epi8 (void * __P, __mmask8 __M, __m128i __A)
+{
+ __builtin_ia32_pmovsdb128mem_mask ((__v16qi *) __P, (__v4si) __A, __M);
+}
+
+extern __inline __m128i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_mask_cvtsepi32_epi8 (__m128i __O, __mmask8 __M, __m128i __A)
+{
+ return (__m128i) __builtin_ia32_pmovsdb128_mask ((__v4si) __A,
+ (__v16qi) __O, __M);
+}
+
+extern __inline __m128i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_maskz_cvtsepi32_epi8 (__mmask8 __M, __m128i __A)
+{
+ return (__m128i) __builtin_ia32_pmovsdb128_mask ((__v4si) __A,
+ (__v16qi)
+ _mm_setzero_si128 (),
+ __M);
+}
+
+extern __inline __m128i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_cvtsepi32_epi8 (__m256i __A)
+{
+ __v16qi __O;
+ return (__m128i) __builtin_ia32_pmovsdb256_mask ((__v8si) __A, __O,
+ (__mmask8) -1);
+}
+
+extern __inline void
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_mask_cvtsepi32_storeu_epi8 (void * __P, __mmask8 __M, __m256i __A)
+{
+ __builtin_ia32_pmovsdb256mem_mask ((__v16qi *) __P, (__v8si) __A, __M);
+}
+
+extern __inline __m128i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_mask_cvtsepi32_epi8 (__m128i __O, __mmask8 __M, __m256i __A)
+{
+ return (__m128i) __builtin_ia32_pmovsdb256_mask ((__v8si) __A,
+ (__v16qi) __O, __M);
+}
+
+extern __inline __m128i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_maskz_cvtsepi32_epi8 (__mmask8 __M, __m256i __A)
+{
+ return (__m128i) __builtin_ia32_pmovsdb256_mask ((__v8si) __A,
+ (__v16qi)
+ _mm_setzero_si128 (),
+ __M);
+}
+
+extern __inline __m128i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_cvtusepi32_epi8 (__m128i __A)
+{
+ __v16qi __O;
+ return (__m128i) __builtin_ia32_pmovusdb128_mask ((__v4si) __A, __O,
+ (__mmask8) -1);
+}
+
+extern __inline void
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_mask_cvtusepi32_storeu_epi8 (void * __P, __mmask8 __M, __m128i __A)
+{
+ __builtin_ia32_pmovusdb128mem_mask ((__v16qi *) __P, (__v4si) __A, __M);
+}
+
+extern __inline __m128i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_mask_cvtusepi32_epi8 (__m128i __O, __mmask8 __M, __m128i __A)
+{
+ return (__m128i) __builtin_ia32_pmovusdb128_mask ((__v4si) __A,
+ (__v16qi) __O,
+ __M);
+}
+
+extern __inline __m128i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_maskz_cvtusepi32_epi8 (__mmask8 __M, __m128i __A)
+{
+ return (__m128i) __builtin_ia32_pmovusdb128_mask ((__v4si) __A,
+ (__v16qi)
+ _mm_setzero_si128 (),
+ __M);
+}
+
+extern __inline __m128i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_cvtusepi32_epi8 (__m256i __A)
+{
+ __v16qi __O;
+ return (__m128i) __builtin_ia32_pmovusdb256_mask ((__v8si) __A, __O,
+ (__mmask8) -1);
+}
+
+extern __inline void
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_mask_cvtusepi32_storeu_epi8 (void * __P, __mmask8 __M, __m256i __A)
+{
+ __builtin_ia32_pmovusdb256mem_mask ((__v16qi*) __P, (__v8si) __A, __M);
+}
+
+extern __inline __m128i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_mask_cvtusepi32_epi8 (__m128i __O, __mmask8 __M, __m256i __A)
+{
+ return (__m128i) __builtin_ia32_pmovusdb256_mask ((__v8si) __A,
+ (__v16qi) __O,
+ __M);
+}
+
+extern __inline __m128i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_maskz_cvtusepi32_epi8 (__mmask8 __M, __m256i __A)
+{
+ return (__m128i) __builtin_ia32_pmovusdb256_mask ((__v8si) __A,
+ (__v16qi)
+ _mm_setzero_si128 (),
+ __M);
+}
+
+extern __inline __m128i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_cvtepi32_epi16 (__m128i __A)
+{
+ __v8hi __O;
+ return (__m128i) __builtin_ia32_pmovdw128_mask ((__v4si) __A, __O,
+ (__mmask8) -1);
+}
+
+extern __inline void
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_mask_cvtepi32_storeu_epi16 (void * __P, __mmask8 __M, __m128i __A)
+{
+ __builtin_ia32_pmovdw128mem_mask ((__v8hi *) __P, (__v4si) __A, __M);
+}
+
+extern __inline __m128i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_mask_cvtepi32_epi16 (__m128i __O, __mmask8 __M, __m128i __A)
+{
+ return (__m128i) __builtin_ia32_pmovdw128_mask ((__v4si) __A,
+ (__v8hi) __O, __M);
+}
+
+extern __inline __m128i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_maskz_cvtepi32_epi16 (__mmask8 __M, __m128i __A)
+{
+ return (__m128i) __builtin_ia32_pmovdw128_mask ((__v4si) __A,
+ (__v8hi)
+ _mm_setzero_si128 (),
+ __M);
+}
+
+extern __inline __m128i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_cvtepi32_epi16 (__m256i __A)
+{
+ __v8hi __O;
+ return (__m128i) __builtin_ia32_pmovdw256_mask ((__v8si) __A, __O,
+ (__mmask8) -1);
+}
+
+extern __inline
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_mask_cvtepi32_storeu_epi16 (void * __P, __mmask8 __M, __m256i __A)
+{
+ __builtin_ia32_pmovdw256mem_mask ((__v8hi *) __P, (__v8si) __A, __M);
+}
+
+extern __inline __m128i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_mask_cvtepi32_epi16 (__m128i __O, __mmask8 __M, __m256i __A)
+{
+ return (__m128i) __builtin_ia32_pmovdw256_mask ((__v8si) __A,
+ (__v8hi) __O, __M);
+}
+
+extern __inline __m128i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_maskz_cvtepi32_epi16 (__mmask8 __M, __m256i __A)
+{
+ return (__m128i) __builtin_ia32_pmovdw256_mask ((__v8si) __A,
+ (__v8hi)
+ _mm_setzero_si128 (),
+ __M);
+}
+
+extern __inline __m128i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_cvtsepi32_epi16 (__m128i __A)
+{
+ __v8hi __O;
+ return (__m128i) __builtin_ia32_pmovsdw128_mask ((__v4si) __A, __O,
+ (__mmask8) -1);
+}
+
+extern __inline void
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_mask_cvtsepi32_storeu_epi16 (void * __P, __mmask8 __M, __m128i __A)
+{
+ __builtin_ia32_pmovsdw128mem_mask ((__v8hi *) __P, (__v4si) __A, __M);
+}
+
+extern __inline __m128i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_mask_cvtsepi32_epi16 (__m128i __O, __mmask8 __M, __m128i __A)
+{
+ return (__m128i) __builtin_ia32_pmovsdw128_mask ((__v4si) __A,
+ (__v8hi) __O, __M);
+}
+
+extern __inline __m128i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_maskz_cvtsepi32_epi16 (__mmask8 __M, __m128i __A)
+{
+ return (__m128i) __builtin_ia32_pmovsdw128_mask ((__v4si) __A,
+ (__v8hi)
+ _mm_setzero_si128 (),
+ __M);
+}
+
+extern __inline __m128i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_cvtsepi32_epi16 (__m256i __A)
+{
+ __v8hi __O;
+ return (__m128i) __builtin_ia32_pmovsdw256_mask ((__v8si) __A, __O,
+ (__mmask8) -1);
+}
+
+extern __inline void
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_mask_cvtsepi32_storeu_epi16 (void * __P, __mmask8 __M, __m256i __A)
+{
+ __builtin_ia32_pmovsdw256mem_mask ((__v8hi *) __P, (__v8si) __A, __M);
+}
+
+extern __inline __m128i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_mask_cvtsepi32_epi16 (__m128i __O, __mmask8 __M, __m256i __A)
+{
+ return (__m128i) __builtin_ia32_pmovsdw256_mask ((__v8si) __A,
+ (__v8hi) __O, __M);
+}
+
+extern __inline __m128i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_maskz_cvtsepi32_epi16 (__mmask8 __M, __m256i __A)
+{
+ return (__m128i) __builtin_ia32_pmovsdw256_mask ((__v8si) __A,
+ (__v8hi)
+ _mm_setzero_si128 (),
+ __M);
+}
+
+extern __inline __m128i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_cvtusepi32_epi16 (__m128i __A)
+{
+ __v8hi __O;
+ return (__m128i) __builtin_ia32_pmovusdw128_mask ((__v4si) __A, __O,
+ (__mmask8) -1);
+}
+
+extern __inline
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_mask_cvtusepi32_storeu_epi16 (void * __P, __mmask8 __M, __m128i __A)
+{
+ __builtin_ia32_pmovusdw128mem_mask ((__v8hi *) __P, (__v4si) __A, __M);
+}
+
+extern __inline __m128i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_mask_cvtusepi32_epi16 (__m128i __O, __mmask8 __M, __m128i __A)
+{
+ return (__m128i) __builtin_ia32_pmovusdw128_mask ((__v4si) __A,
+ (__v8hi) __O, __M);
+}
+
+extern __inline __m128i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_maskz_cvtusepi32_epi16 (__mmask8 __M, __m128i __A)
+{
+ return (__m128i) __builtin_ia32_pmovusdw128_mask ((__v4si) __A,
+ (__v8hi)
+ _mm_setzero_si128 (),
+ __M);
+}
+
+extern __inline __m128i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_cvtusepi32_epi16 (__m256i __A)
+{
+ __v8hi __O;
+ return (__m128i) __builtin_ia32_pmovusdw256_mask ((__v8si) __A, __O,
+ (__mmask8) -1);
+}
+
+extern __inline void
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_mask_cvtusepi32_storeu_epi16 (void * __P, __mmask8 __M, __m256i __A)
+{
+ __builtin_ia32_pmovusdw256mem_mask ((__v8hi *) __P, (__v8si) __A, __M);
+}
+
+extern __inline __m128i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_mask_cvtusepi32_epi16 (__m128i __O, __mmask8 __M, __m256i __A)
+{
+ return (__m128i) __builtin_ia32_pmovusdw256_mask ((__v8si) __A,
+ (__v8hi) __O, __M);
+}
+
+extern __inline __m128i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_maskz_cvtusepi32_epi16 (__mmask8 __M, __m256i __A)
+{
+ return (__m128i) __builtin_ia32_pmovusdw256_mask ((__v8si) __A,
+ (__v8hi)
+ _mm_setzero_si128 (),
+ __M);
+}
+
+extern __inline __m128i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_cvtepi64_epi8 (__m128i __A)
+{
+ __v16qi __O;
+ return (__m128i) __builtin_ia32_pmovqb128_mask ((__v2di) __A, __O,
+ (__mmask8) -1);
+}
+
+extern __inline void
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_mask_cvtepi64_storeu_epi8 (void * __P, __mmask8 __M, __m128i __A)
+{
+ __builtin_ia32_pmovqb128mem_mask ((__v16qi *) __P, (__v2di) __A, __M);
+}
+
+extern __inline __m128i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_mask_cvtepi64_epi8 (__m128i __O, __mmask8 __M, __m128i __A)
+{
+ return (__m128i) __builtin_ia32_pmovqb128_mask ((__v2di) __A,
+ (__v16qi) __O, __M);
+}
+
+extern __inline __m128i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_maskz_cvtepi64_epi8 (__mmask8 __M, __m128i __A)
+{
+ return (__m128i) __builtin_ia32_pmovqb128_mask ((__v2di) __A,
+ (__v16qi)
+ _mm_setzero_si128 (),
+ __M);
+}
+
+extern __inline __m128i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_cvtepi64_epi8 (__m256i __A)
+{
+ __v16qi __O;
+ return (__m128i) __builtin_ia32_pmovqb256_mask ((__v4di) __A, __O,
+ (__mmask8) -1);
+}
+
+extern __inline void
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_mask_cvtepi64_storeu_epi8 (void * __P, __mmask8 __M, __m256i __A)
+{
+ __builtin_ia32_pmovqb256mem_mask ((__v16qi *) __P, (__v4di) __A, __M);
+}
+
+extern __inline __m128i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_mask_cvtepi64_epi8 (__m128i __O, __mmask8 __M, __m256i __A)
+{
+ return (__m128i) __builtin_ia32_pmovqb256_mask ((__v4di) __A,
+ (__v16qi) __O, __M);
+}
+
+extern __inline __m128i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_maskz_cvtepi64_epi8 (__mmask8 __M, __m256i __A)
+{
+ return (__m128i) __builtin_ia32_pmovqb256_mask ((__v4di) __A,
+ (__v16qi)
+ _mm_setzero_si128 (),
+ __M);
+}
+
+extern __inline __m128i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_cvtsepi64_epi8 (__m128i __A)
+{
+ __v16qi __O;
+ return (__m128i) __builtin_ia32_pmovsqb128_mask ((__v2di) __A, __O,
+ (__mmask8) -1);
+}
+
+extern __inline void
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_mask_cvtsepi64_storeu_epi8 (void * __P, __mmask8 __M, __m128i __A)
+{
+ __builtin_ia32_pmovsqb128mem_mask ((__v16qi *) __P, (__v2di) __A, __M);
+}
+
+extern __inline __m128i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_mask_cvtsepi64_epi8 (__m128i __O, __mmask8 __M, __m128i __A)
+{
+ return (__m128i) __builtin_ia32_pmovsqb128_mask ((__v2di) __A,
+ (__v16qi) __O, __M);
+}
+
+extern __inline __m128i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_maskz_cvtsepi64_epi8 (__mmask8 __M, __m128i __A)
+{
+ return (__m128i) __builtin_ia32_pmovsqb128_mask ((__v2di) __A,
+ (__v16qi)
+ _mm_setzero_si128 (),
+ __M);
+}
+
+extern __inline __m128i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_cvtsepi64_epi8 (__m256i __A)
+{
+ __v16qi __O;
+ return (__m128i) __builtin_ia32_pmovsqb256_mask ((__v4di) __A, __O,
+ (__mmask8) -1);
+}
+
+extern __inline void
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_mask_cvtsepi64_storeu_epi8 (void * __P, __mmask8 __M, __m256i __A)
+{
+ __builtin_ia32_pmovsqb256mem_mask ((__v16qi *) __P, (__v4di) __A, __M);
+}
+
+extern __inline __m128i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_mask_cvtsepi64_epi8 (__m128i __O, __mmask8 __M, __m256i __A)
+{
+ return (__m128i) __builtin_ia32_pmovsqb256_mask ((__v4di) __A,
+ (__v16qi) __O, __M);
+}
+
+extern __inline __m128i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_maskz_cvtsepi64_epi8 (__mmask8 __M, __m256i __A)
+{
+ return (__m128i) __builtin_ia32_pmovsqb256_mask ((__v4di) __A,
+ (__v16qi)
+ _mm_setzero_si128 (),
+ __M);
+}
+
+extern __inline __m128i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_cvtusepi64_epi8 (__m128i __A)
+{
+ __v16qi __O;
+ return (__m128i) __builtin_ia32_pmovusqb128_mask ((__v2di) __A, __O,
+ (__mmask8) -1);
+}
+
+extern __inline void
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_mask_cvtusepi64_storeu_epi8 (void * __P, __mmask8 __M, __m128i __A)
+{
+ __builtin_ia32_pmovusqb128mem_mask ((__v16qi *) __P, (__v2di) __A, __M);
+}
+
+extern __inline __m128i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_mask_cvtusepi64_epi8 (__m128i __O, __mmask8 __M, __m128i __A)
+{
+ return (__m128i) __builtin_ia32_pmovusqb128_mask ((__v2di) __A,
+ (__v16qi) __O,
+ __M);
+}
+
+extern __inline __m128i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_maskz_cvtusepi64_epi8 (__mmask8 __M, __m128i __A)
+{
+ return (__m128i) __builtin_ia32_pmovusqb128_mask ((__v2di) __A,
+ (__v16qi)
+ _mm_setzero_si128 (),
+ __M);
+}
+
+extern __inline __m128i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_cvtusepi64_epi8 (__m256i __A)
+{
+ __v16qi __O;
+ return (__m128i) __builtin_ia32_pmovusqb256_mask ((__v4di) __A, __O,
+ (__mmask8) -1);
+}
+
+extern __inline void
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_mask_cvtusepi64_storeu_epi8 (void * __P, __mmask8 __M, __m256i __A)
+{
+ __builtin_ia32_pmovusqb256mem_mask ((__v16qi *) __P, (__v4di) __A, __M);
+}
+
+extern __inline __m128i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_mask_cvtusepi64_epi8 (__m128i __O, __mmask8 __M, __m256i __A)
+{
+ return (__m128i) __builtin_ia32_pmovusqb256_mask ((__v4di) __A,
+ (__v16qi) __O,
+ __M);
+}
+
+extern __inline __m128i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_maskz_cvtusepi64_epi8 (__mmask8 __M, __m256i __A)
+{
+ return (__m128i) __builtin_ia32_pmovusqb256_mask ((__v4di) __A,
+ (__v16qi)
+ _mm_setzero_si128 (),
+ __M);
+}
+
+extern __inline __m128i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_cvtepi64_epi16 (__m128i __A)
+{
+ __v8hi __O;
+ return (__m128i) __builtin_ia32_pmovqw128_mask ((__v2di) __A, __O,
+ (__mmask8) -1);
+}
+
+extern __inline void
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_mask_cvtepi64_storeu_epi16 (void * __P, __mmask8 __M, __m128i __A)
+{
+ __builtin_ia32_pmovqw128mem_mask ((__v8hi *) __P, (__v2di) __A, __M);
+}
+
+extern __inline __m128i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_mask_cvtepi64_epi16 (__m128i __O, __mmask8 __M, __m128i __A)
+{
+ return (__m128i) __builtin_ia32_pmovqw128_mask ((__v2di) __A,
+ (__v8hi) __O, __M);
+}
+
+extern __inline __m128i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_maskz_cvtepi64_epi16 (__mmask8 __M, __m128i __A)
+{
+ return (__m128i) __builtin_ia32_pmovqw128_mask ((__v2di) __A,
+ (__v8hi)
+ _mm_setzero_si128 (),
+ __M);
+}
+
+extern __inline __m128i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_cvtepi64_epi16 (__m256i __A)
+{
+ __v8hi __O;
+ return (__m128i) __builtin_ia32_pmovqw256_mask ((__v4di) __A, __O,
+ (__mmask8) -1);
+}
+
+extern __inline void
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_mask_cvtepi64_storeu_epi16 (void * __P, __mmask8 __M, __m256i __A)
+{
+ __builtin_ia32_pmovqw256mem_mask ((__v8hi *) __P, (__v4di) __A, __M);
+}
+
+extern __inline __m128i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_mask_cvtepi64_epi16 (__m128i __O, __mmask8 __M, __m256i __A)
+{
+ return (__m128i) __builtin_ia32_pmovqw256_mask ((__v4di) __A,
+ (__v8hi) __O, __M);
+}
+
+extern __inline __m128i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_maskz_cvtepi64_epi16 (__mmask8 __M, __m256i __A)
+{
+ return (__m128i) __builtin_ia32_pmovqw256_mask ((__v4di) __A,
+ (__v8hi)
+ _mm_setzero_si128 (),
+ __M);
+}
+
+extern __inline __m128i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_cvtsepi64_epi16 (__m128i __A)
+{
+ __v8hi __O;
+ return (__m128i) __builtin_ia32_pmovsqw128_mask ((__v2di) __A, __O,
+ (__mmask8) -1);
+}
+
+extern __inline void
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_mask_cvtsepi64_storeu_epi16 (void * __P, __mmask8 __M, __m128i __A)
+{
+ __builtin_ia32_pmovsqw128mem_mask ((__v8hi *) __P, (__v2di) __A, __M);
+}
+
+extern __inline __m128i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_mask_cvtsepi64_epi16 (__m128i __O, __mmask8 __M, __m128i __A)
+{
+ return (__m128i) __builtin_ia32_pmovsqw128_mask ((__v2di) __A,
+ (__v8hi) __O, __M);
+}
+
+extern __inline __m128i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_maskz_cvtsepi64_epi16 (__mmask8 __M, __m128i __A)
+{
+ return (__m128i) __builtin_ia32_pmovsqw128_mask ((__v2di) __A,
+ (__v8hi)
+ _mm_setzero_si128 (),
+ __M);
+}
+
+extern __inline __m128i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_cvtsepi64_epi16 (__m256i __A)
+{
+ __v8hi __O;
+ return (__m128i) __builtin_ia32_pmovsqw256_mask ((__v4di) __A, __O,
+ (__mmask8) -1);
+}
+
+extern __inline void
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_mask_cvtsepi64_storeu_epi16 (void * __P, __mmask8 __M, __m256i __A)
+{
+ __builtin_ia32_pmovsqw256mem_mask ((__v8hi *) __P, (__v4di) __A, __M);
+}
+
+extern __inline __m128i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_mask_cvtsepi64_epi16 (__m128i __O, __mmask8 __M, __m256i __A)
+{
+ return (__m128i) __builtin_ia32_pmovsqw256_mask ((__v4di) __A,
+ (__v8hi) __O, __M);
+}
+
+extern __inline __m128i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_maskz_cvtsepi64_epi16 (__mmask8 __M, __m256i __A)
+{
+ return (__m128i) __builtin_ia32_pmovsqw256_mask ((__v4di) __A,
+ (__v8hi)
+ _mm_setzero_si128 (),
+ __M);
+}
+
+extern __inline __m128i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_cvtusepi64_epi16 (__m128i __A)
+{
+ __v8hi __O;
+ return (__m128i) __builtin_ia32_pmovusqw128_mask ((__v2di) __A, __O,
+ (__mmask8) -1);
+}
+
+extern __inline void
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_mask_cvtusepi64_storeu_epi16 (void * __P, __mmask8 __M, __m128i __A)
+{
+ __builtin_ia32_pmovusqw128mem_mask ((__v8hi *) __P, (__v2di) __A, __M);
+}
+
+extern __inline __m128i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_mask_cvtusepi64_epi16 (__m128i __O, __mmask8 __M, __m128i __A)
+{
+ return (__m128i) __builtin_ia32_pmovusqw128_mask ((__v2di) __A,
+ (__v8hi) __O, __M);
+}
+
+extern __inline __m128i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_maskz_cvtusepi64_epi16 (__mmask8 __M, __m128i __A)
+{
+ return (__m128i) __builtin_ia32_pmovusqw128_mask ((__v2di) __A,
+ (__v8hi)
+ _mm_setzero_si128 (),
+ __M);
+}
+
+extern __inline __m128i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_cvtusepi64_epi16 (__m256i __A)
+{
+ __v8hi __O;
+ return (__m128i) __builtin_ia32_pmovusqw256_mask ((__v4di) __A, __O,
+ (__mmask8) -1);
+}
+
+extern __inline void
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_mask_cvtusepi64_storeu_epi16 (void * __P, __mmask8 __M, __m256i __A)
+{
+ __builtin_ia32_pmovusqw256mem_mask ((__v8hi *) __P, (__v4di) __A, __M);
+}
+
+extern __inline __m128i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_mask_cvtusepi64_epi16 (__m128i __O, __mmask8 __M, __m256i __A)
+{
+ return (__m128i) __builtin_ia32_pmovusqw256_mask ((__v4di) __A,
+ (__v8hi) __O, __M);
+}
+
+extern __inline __m128i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_maskz_cvtusepi64_epi16 (__mmask8 __M, __m256i __A)
+{
+ return (__m128i) __builtin_ia32_pmovusqw256_mask ((__v4di) __A,
+ (__v8hi)
+ _mm_setzero_si128 (),
+ __M);
+}
+
+extern __inline __m128i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_cvtepi64_epi32 (__m128i __A)
+{
+ __v4si __O;
+ return (__m128i) __builtin_ia32_pmovqd128_mask ((__v2di) __A, __O,
+ (__mmask8) -1);
+}
+
+extern __inline void
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_mask_cvtepi64_storeu_epi32 (void * __P, __mmask8 __M, __m128i __A)
+{
+ __builtin_ia32_pmovqd128mem_mask ((__v4si *) __P, (__v2di) __A, __M);
+}
+
+extern __inline __m128i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_mask_cvtepi64_epi32 (__m128i __O, __mmask8 __M, __m128i __A)
+{
+ return (__m128i) __builtin_ia32_pmovqd128_mask ((__v2di) __A,
+ (__v4si) __O, __M);
+}
+
+extern __inline __m128i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_maskz_cvtepi64_epi32 (__mmask8 __M, __m128i __A)
+{
+ return (__m128i) __builtin_ia32_pmovqd128_mask ((__v2di) __A,
+ (__v4si)
+ _mm_setzero_si128 (),
+ __M);
+}
+
+extern __inline __m128i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_cvtepi64_epi32 (__m256i __A)
+{
+ __v4si __O;
+ return (__m128i) __builtin_ia32_pmovqd256_mask ((__v4di) __A, __O,
+ (__mmask8) -1);
+}
+
+extern __inline void
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_mask_cvtepi64_storeu_epi32 (void * __P, __mmask8 __M, __m256i __A)
+{
+ __builtin_ia32_pmovqd256mem_mask ((__v4si *) __P, (__v4di) __A, __M);
+}
+
+extern __inline __m128i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_mask_cvtepi64_epi32 (__m128i __O, __mmask8 __M, __m256i __A)
+{
+ return (__m128i) __builtin_ia32_pmovqd256_mask ((__v4di) __A,
+ (__v4si) __O, __M);
+}
+
+extern __inline __m128i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_maskz_cvtepi64_epi32 (__mmask8 __M, __m256i __A)
+{
+ return (__m128i) __builtin_ia32_pmovqd256_mask ((__v4di) __A,
+ (__v4si)
+ _mm_setzero_si128 (),
+ __M);
+}
+
+extern __inline __m128i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_cvtsepi64_epi32 (__m128i __A)
+{
+ __v4si __O;
+ return (__m128i) __builtin_ia32_pmovsqd128_mask ((__v2di) __A, __O,
+ (__mmask8) -1);
+}
+
+extern __inline
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_mask_cvtsepi64_storeu_epi32 (void * __P, __mmask8 __M, __m128i __A)
+{
+ __builtin_ia32_pmovsqd128mem_mask ((__v4si *) __P, (__v2di) __A, __M);
+}
+
+extern __inline __m128i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_mask_cvtsepi64_epi32 (__m128i __O, __mmask8 __M, __m128i __A)
+{
+ return (__m128i) __builtin_ia32_pmovsqd128_mask ((__v2di) __A,
+ (__v4si) __O, __M);
+}
+
+extern __inline __m128i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_maskz_cvtsepi64_epi32 (__mmask8 __M, __m128i __A)
+{
+ return (__m128i) __builtin_ia32_pmovsqd128_mask ((__v2di) __A,
+ (__v4si)
+ _mm_setzero_si128 (),
+ __M);
+}
+
+extern __inline __m128i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_cvtsepi64_epi32 (__m256i __A)
+{
+ __v4si __O;
+ return (__m128i) __builtin_ia32_pmovsqd256_mask ((__v4di) __A, __O,
+ (__mmask8) -1);
+}
+
+extern __inline void
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_mask_cvtsepi64_storeu_epi32 (void * __P, __mmask8 __M, __m256i __A)
+{
+ __builtin_ia32_pmovsqd256mem_mask ((__v4si *) __P, (__v4di) __A, __M);
+}
+
+extern __inline __m128i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_mask_cvtsepi64_epi32 (__m128i __O, __mmask8 __M, __m256i __A)
+{
+ return (__m128i) __builtin_ia32_pmovsqd256_mask ((__v4di) __A,
+ (__v4si) __O, __M);
+}
+
+extern __inline __m128i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_maskz_cvtsepi64_epi32 (__mmask8 __M, __m256i __A)
+{
+ return (__m128i) __builtin_ia32_pmovsqd256_mask ((__v4di) __A,
+ (__v4si)
+ _mm_setzero_si128 (),
+ __M);
+}
+
+extern __inline __m128i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_cvtusepi64_epi32 (__m128i __A)
+{
+ __v4si __O;
+ return (__m128i) __builtin_ia32_pmovusqd128_mask ((__v2di) __A, __O,
+ (__mmask8) -1);
+}
+
+extern __inline void
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_mask_cvtusepi64_storeu_epi32 (void * __P, __mmask8 __M, __m128i __A)
+{
+ __builtin_ia32_pmovusqd128mem_mask ((__v4si *) __P, (__v2di) __A, __M);
+}
+
+extern __inline __m128i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_mask_cvtusepi64_epi32 (__m128i __O, __mmask8 __M, __m128i __A)
+{
+ return (__m128i) __builtin_ia32_pmovusqd128_mask ((__v2di) __A,
+ (__v4si) __O, __M);
+}
+
+extern __inline __m128i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_maskz_cvtusepi64_epi32 (__mmask8 __M, __m128i __A)
+{
+ return (__m128i) __builtin_ia32_pmovusqd128_mask ((__v2di) __A,
+ (__v4si)
+ _mm_setzero_si128 (),
+ __M);
+}
+
+extern __inline __m128i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_cvtusepi64_epi32 (__m256i __A)
+{
+ __v4si __O;
+ return (__m128i) __builtin_ia32_pmovusqd256_mask ((__v4di) __A, __O,
+ (__mmask8) -1);
+}
+
+extern __inline void
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_mask_cvtusepi64_storeu_epi32 (void * __P, __mmask8 __M, __m256i __A)
+{
+ __builtin_ia32_pmovusqd256mem_mask ((__v4si *) __P, (__v4di) __A, __M);
+}
+
+extern __inline __m128i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_mask_cvtusepi64_epi32 (__m128i __O, __mmask8 __M, __m256i __A)
+{
+ return (__m128i) __builtin_ia32_pmovusqd256_mask ((__v4di) __A,
+ (__v4si) __O, __M);
+}
+
+extern __inline __m128i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_maskz_cvtusepi64_epi32 (__mmask8 __M, __m256i __A)
+{
+ return (__m128i) __builtin_ia32_pmovusqd256_mask ((__v4di) __A,
+ (__v4si)
+ _mm_setzero_si128 (),
+ __M);
+}
+
+extern __inline __m256
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_mask_broadcastss_ps (__m256 __O, __mmask8 __M, __m128 __A)
+{
+ return (__m256) __builtin_ia32_broadcastss256_mask ((__v4sf) __A,
+ (__v8sf) __O,
+ __M);
+}
+
+extern __inline __m256
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_maskz_broadcastss_ps (__mmask8 __M, __m128 __A)
+{
+ return (__m256) __builtin_ia32_broadcastss256_mask ((__v4sf) __A,
+ (__v8sf)
+ _mm256_setzero_ps (),
+ __M);
+}
+
+extern __inline __m128
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_mask_broadcastss_ps (__m128 __O, __mmask8 __M, __m128 __A)
+{
+ return (__m128) __builtin_ia32_broadcastss128_mask ((__v4sf) __A,
+ (__v4sf) __O,
+ __M);
+}
+
+extern __inline __m128
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_maskz_broadcastss_ps (__mmask8 __M, __m128 __A)
+{
+ return (__m128) __builtin_ia32_broadcastss128_mask ((__v4sf) __A,
+ (__v4sf)
+ _mm_setzero_ps (),
+ __M);
+}
+
+extern __inline __m256d
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_mask_broadcastsd_pd (__m256d __O, __mmask8 __M, __m128d __A)
+{
+ return (__m256d) __builtin_ia32_broadcastsd256_mask ((__v2df) __A,
+ (__v4df) __O,
+ __M);
+}
+
+extern __inline __m256d
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_maskz_broadcastsd_pd (__mmask8 __M, __m128d __A)
+{
+ return (__m256d) __builtin_ia32_broadcastsd256_mask ((__v2df) __A,
+ (__v4df)
+ _mm256_setzero_pd (),
+ __M);
+}
+
+extern __inline __m256i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_mask_broadcastd_epi32 (__m256i __O, __mmask8 __M, __m128i __A)
+{
+ return (__m256i) __builtin_ia32_pbroadcastd256_mask ((__v4si) __A,
+ (__v8si) __O,
+ __M);
+}
+
+extern __inline __m256i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_maskz_broadcastd_epi32 (__mmask8 __M, __m128i __A)
+{
+ return (__m256i) __builtin_ia32_pbroadcastd256_mask ((__v4si) __A,
+ (__v8si)
+ _mm256_setzero_si256 (),
+ __M);
+}
+
+extern __inline __m256i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_mask_set1_epi32 (__m256i __O, __mmask8 __M, int __A)
+{
+ return (__m256i) __builtin_ia32_pbroadcastd256_gpr_mask (__A, (__v8si) __O,
+ __M);
+}
+
+extern __inline __m256i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_maskz_set1_epi32 (__mmask8 __M, int __A)
+{
+ return (__m256i) __builtin_ia32_pbroadcastd256_gpr_mask (__A,
+ (__v8si)
+ _mm256_setzero_si256 (),
+ __M);
+}
+
+extern __inline __m128i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_mask_broadcastd_epi32 (__m128i __O, __mmask8 __M, __m128i __A)
+{
+ return (__m128i) __builtin_ia32_pbroadcastd128_mask ((__v4si) __A,
+ (__v4si) __O,
+ __M);
+}
+
+extern __inline __m128i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_maskz_broadcastd_epi32 (__mmask8 __M, __m128i __A)
+{
+ return (__m128i) __builtin_ia32_pbroadcastd128_mask ((__v4si) __A,
+ (__v4si)
+ _mm_setzero_si128 (),
+ __M);
+}
+
+extern __inline __m128i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_mask_set1_epi32 (__m128i __O, __mmask8 __M, int __A)
+{
+ return (__m128i) __builtin_ia32_pbroadcastd128_gpr_mask (__A, (__v4si) __O,
+ __M);
+}
+
+extern __inline __m128i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_maskz_set1_epi32 (__mmask8 __M, int __A)
+{
+ return (__m128i) __builtin_ia32_pbroadcastd128_gpr_mask (__A,
+ (__v4si)
+ _mm_setzero_si128 (),
+ __M);
+}
+
+extern __inline __m256i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_mask_broadcastq_epi64 (__m256i __O, __mmask8 __M, __m128i __A)
+{
+ return (__m256i) __builtin_ia32_pbroadcastq256_mask ((__v2di) __A,
+ (__v4di) __O,
+ __M);
+}
+
+extern __inline __m256i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_maskz_broadcastq_epi64 (__mmask8 __M, __m128i __A)
+{
+ return (__m256i) __builtin_ia32_pbroadcastq256_mask ((__v2di) __A,
+ (__v4di)
+ _mm256_setzero_si256 (),
+ __M);
+}
+
+extern __inline __m256i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_mask_set1_epi64 (__m256i __O, __mmask8 __M, long long __A)
+{
+#ifdef TARGET_64BIT
+ return (__m256i) __builtin_ia32_pbroadcastq256_gpr_mask (__A, (__v4di) __O,
+ __M);
+#else
+ return (__m256i) __builtin_ia32_pbroadcastq256_mem_mask (__A, (__v4di) __O,
+ __M);
+#endif
+}
+
+extern __inline __m256i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_maskz_set1_epi64 (__mmask8 __M, long long __A)
+{
+#ifdef TARGET_64BIT
+ return (__m256i) __builtin_ia32_pbroadcastq256_gpr_mask (__A,
+ (__v4di)
+ _mm256_setzero_si256 (),
+ __M);
+#else
+ return (__m256i) __builtin_ia32_pbroadcastq256_mem_mask (__A,
+ (__v4di)
+ _mm256_setzero_si256 (),
+ __M);
+#endif
+}
+
+extern __inline __m128i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_mask_broadcastq_epi64 (__m128i __O, __mmask8 __M, __m128i __A)
+{
+ return (__m128i) __builtin_ia32_pbroadcastq128_mask ((__v2di) __A,
+ (__v2di) __O,
+ __M);
+}
+
+extern __inline __m128i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_maskz_broadcastq_epi64 (__mmask8 __M, __m128i __A)
+{
+ return (__m128i) __builtin_ia32_pbroadcastq128_mask ((__v2di) __A,
+ (__v2di)
+ _mm_setzero_si128 (),
+ __M);
+}
+
+extern __inline __m128i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_mask_set1_epi64 (__m128i __O, __mmask8 __M, long long __A)
+{
+#ifdef TARGET_64BIT
+ return (__m128i) __builtin_ia32_pbroadcastq128_gpr_mask (__A, (__v2di) __O,
+ __M);
+#else
+ return (__m128i) __builtin_ia32_pbroadcastq128_mem_mask (__A, (__v2di) __O,
+ __M);
+#endif
+}
+
+extern __inline __m128i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_maskz_set1_epi64 (__mmask8 __M, long long __A)
+{
+#ifdef TARGET_64BIT
+ return (__m128i) __builtin_ia32_pbroadcastq128_gpr_mask (__A,
+ (__v2di)
+ _mm_setzero_si128 (),
+ __M);
+#else
+ return (__m128i) __builtin_ia32_pbroadcastq128_mem_mask (__A,
+ (__v2di)
+ _mm_setzero_si128 (),
+ __M);
+#endif
+}
+
+extern __inline __m256
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_broadcast_f32x4 (__m128 __A)
+{
+ __v8sf __O;
+ return (__m256) __builtin_ia32_broadcastf32x4_256_mask ((__v4sf) __A,
+ __O,
+ (__mmask8) -
+ 1);
+}
+
+extern __inline __m256
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_mask_broadcast_f32x4 (__m256 __O, __mmask8 __M, __m128 __A)
+{
+ return (__m256) __builtin_ia32_broadcastf32x4_256_mask ((__v4sf) __A,
+ (__v8sf) __O,
+ __M);
+}
+
+extern __inline __m256
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_maskz_broadcast_f32x4 (__mmask8 __M, __m128 __A)
+{
+ return (__m256) __builtin_ia32_broadcastf32x4_256_mask ((__v4sf) __A,
+ (__v8sf)
+ _mm256_setzero_ps (),
+ __M);
+}
+
+extern __inline __m256i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_broadcast_i32x4 (__m128i __A)
+{
+ __v8si __O;
+ return (__m256i) __builtin_ia32_broadcasti32x4_256_mask ((__v4si)
+ __A, __O,
+ (__mmask8) -
+ 1);
+}
+
+extern __inline __m256i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_mask_broadcast_i32x4 (__m256i __O, __mmask8 __M, __m128i __A)
+{
+ return (__m256i) __builtin_ia32_broadcasti32x4_256_mask ((__v4si)
+ __A,
+ (__v8si)
+ __O, __M);
+}
+
+extern __inline __m256i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_maskz_broadcast_i32x4 (__mmask8 __M, __m128i __A)
+{
+ return (__m256i) __builtin_ia32_broadcasti32x4_256_mask ((__v4si)
+ __A,
+ (__v8si)
+ _mm256_setzero_si256 (),
+ __M);
+}
+
+extern __inline __m256i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_mask_cvtepi8_epi32 (__m256i __W, __mmask8 __U, __m128i __A)
+{
+ return (__m256i) __builtin_ia32_pmovsxbd256_mask ((__v16qi) __A,
+ (__v8si) __W,
+ (__mmask8) __U);
+}
+
+extern __inline __m256i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_maskz_cvtepi8_epi32 (__mmask8 __U, __m128i __A)
+{
+ return (__m256i) __builtin_ia32_pmovsxbd256_mask ((__v16qi) __A,
+ (__v8si)
+ _mm256_setzero_si256 (),
+ (__mmask8) __U);
+}
+
+extern __inline __m128i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_mask_cvtepi8_epi32 (__m128i __W, __mmask8 __U, __m128i __A)
+{
+ return (__m128i) __builtin_ia32_pmovsxbd128_mask ((__v16qi) __A,
+ (__v4si) __W,
+ (__mmask8) __U);
+}
+
+extern __inline __m128i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_maskz_cvtepi8_epi32 (__mmask8 __U, __m128i __A)
+{
+ return (__m128i) __builtin_ia32_pmovsxbd128_mask ((__v16qi) __A,
+ (__v4si)
+ _mm_setzero_si128 (),
+ (__mmask8) __U);
+}
+
+extern __inline __m256i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_mask_cvtepi8_epi64 (__m256i __W, __mmask8 __U, __m128i __A)
+{
+ return (__m256i) __builtin_ia32_pmovsxbq256_mask ((__v16qi) __A,
+ (__v4di) __W,
+ (__mmask8) __U);
+}
+
+extern __inline __m256i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_maskz_cvtepi8_epi64 (__mmask8 __U, __m128i __A)
+{
+ return (__m256i) __builtin_ia32_pmovsxbq256_mask ((__v16qi) __A,
+ (__v4di)
+ _mm256_setzero_si256 (),
+ (__mmask8) __U);
+}
+
+extern __inline __m128i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_mask_cvtepi8_epi64 (__m128i __W, __mmask8 __U, __m128i __A)
+{
+ return (__m128i) __builtin_ia32_pmovsxbq128_mask ((__v16qi) __A,
+ (__v2di) __W,
+ (__mmask8) __U);
+}
+
+extern __inline __m128i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_maskz_cvtepi8_epi64 (__mmask8 __U, __m128i __A)
+{
+ return (__m128i) __builtin_ia32_pmovsxbq128_mask ((__v16qi) __A,
+ (__v2di)
+ _mm_setzero_si128 (),
+ (__mmask8) __U);
+}
+
+extern __inline __m256i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_mask_cvtepi16_epi32 (__m256i __W, __mmask8 __U, __m128i __A)
+{
+ return (__m256i) __builtin_ia32_pmovsxwd256_mask ((__v8hi) __A,
+ (__v8si) __W,
+ (__mmask8) __U);
+}
+
+extern __inline __m256i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_maskz_cvtepi16_epi32 (__mmask8 __U, __m128i __A)
+{
+ return (__m256i) __builtin_ia32_pmovsxwd256_mask ((__v8hi) __A,
+ (__v8si)
+ _mm256_setzero_si256 (),
+ (__mmask8) __U);
+}
+
+extern __inline __m128i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_mask_cvtepi16_epi32 (__m128i __W, __mmask8 __U, __m128i __A)
+{
+ return (__m128i) __builtin_ia32_pmovsxwd128_mask ((__v8hi) __A,
+ (__v4si) __W,
+ (__mmask8) __U);
+}
+
+extern __inline __m128i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_maskz_cvtepi16_epi32 (__mmask8 __U, __m128i __A)
+{
+ return (__m128i) __builtin_ia32_pmovsxwd128_mask ((__v8hi) __A,
+ (__v4si)
+ _mm_setzero_si128 (),
+ (__mmask8) __U);
+}
+
+extern __inline __m256i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_mask_cvtepi16_epi64 (__m256i __W, __mmask8 __U, __m128i __A)
+{
+ return (__m256i) __builtin_ia32_pmovsxwq256_mask ((__v8hi) __A,
+ (__v4di) __W,
+ (__mmask8) __U);
+}
+
+extern __inline __m256i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_maskz_cvtepi16_epi64 (__mmask8 __U, __m128i __A)
+{
+ return (__m256i) __builtin_ia32_pmovsxwq256_mask ((__v8hi) __A,
+ (__v4di)
+ _mm256_setzero_si256 (),
+ (__mmask8) __U);
+}
+
+extern __inline __m128i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_mask_cvtepi16_epi64 (__m128i __W, __mmask8 __U, __m128i __A)
+{
+ return (__m128i) __builtin_ia32_pmovsxwq128_mask ((__v8hi) __A,
+ (__v2di) __W,
+ (__mmask8) __U);
+}
+
+extern __inline __m128i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_maskz_cvtepi16_epi64 (__mmask8 __U, __m128i __A)
+{
+ return (__m128i) __builtin_ia32_pmovsxwq128_mask ((__v8hi) __A,
+ (__v2di)
+ _mm_setzero_si128 (),
+ (__mmask8) __U);
+}
+
+extern __inline __m256i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_mask_cvtepi32_epi64 (__m256i __W, __mmask8 __U, __m128i __X)
+{
+ return (__m256i) __builtin_ia32_pmovsxdq256_mask ((__v4si) __X,
+ (__v4di) __W,
+ (__mmask8) __U);
+}
+
+extern __inline __m256i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_maskz_cvtepi32_epi64 (__mmask8 __U, __m128i __X)
+{
+ return (__m256i) __builtin_ia32_pmovsxdq256_mask ((__v4si) __X,
+ (__v4di)
+ _mm256_setzero_si256 (),
+ (__mmask8) __U);
+}
+
+extern __inline __m128i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_mask_cvtepi32_epi64 (__m128i __W, __mmask8 __U, __m128i __X)
+{
+ return (__m128i) __builtin_ia32_pmovsxdq128_mask ((__v4si) __X,
+ (__v2di) __W,
+ (__mmask8) __U);
+}
+
+extern __inline __m128i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_maskz_cvtepi32_epi64 (__mmask8 __U, __m128i __X)
+{
+ return (__m128i) __builtin_ia32_pmovsxdq128_mask ((__v4si) __X,
+ (__v2di)
+ _mm_setzero_si128 (),
+ (__mmask8) __U);
+}
+
+extern __inline __m256i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_mask_cvtepu8_epi32 (__m256i __W, __mmask8 __U, __m128i __A)
+{
+ return (__m256i) __builtin_ia32_pmovzxbd256_mask ((__v16qi) __A,
+ (__v8si) __W,
+ (__mmask8) __U);
+}
+
+extern __inline __m256i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_maskz_cvtepu8_epi32 (__mmask8 __U, __m128i __A)
+{
+ return (__m256i) __builtin_ia32_pmovzxbd256_mask ((__v16qi) __A,
+ (__v8si)
+ _mm256_setzero_si256 (),
+ (__mmask8) __U);
+}
+
+extern __inline __m128i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_mask_cvtepu8_epi32 (__m128i __W, __mmask8 __U, __m128i __A)
+{
+ return (__m128i) __builtin_ia32_pmovzxbd128_mask ((__v16qi) __A,
+ (__v4si) __W,
+ (__mmask8) __U);
+}
+
+extern __inline __m128i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_maskz_cvtepu8_epi32 (__mmask8 __U, __m128i __A)
+{
+ return (__m128i) __builtin_ia32_pmovzxbd128_mask ((__v16qi) __A,
+ (__v4si)
+ _mm_setzero_si128 (),
+ (__mmask8) __U);
+}
+
+extern __inline __m256i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_mask_cvtepu8_epi64 (__m256i __W, __mmask8 __U, __m128i __A)
+{
+ return (__m256i) __builtin_ia32_pmovzxbq256_mask ((__v16qi) __A,
+ (__v4di) __W,
+ (__mmask8) __U);
+}
+
+extern __inline __m256i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_maskz_cvtepu8_epi64 (__mmask8 __U, __m128i __A)
+{
+ return (__m256i) __builtin_ia32_pmovzxbq256_mask ((__v16qi) __A,
+ (__v4di)
+ _mm256_setzero_si256 (),
+ (__mmask8) __U);
+}
+
+extern __inline __m128i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_mask_cvtepu8_epi64 (__m128i __W, __mmask8 __U, __m128i __A)
+{
+ return (__m128i) __builtin_ia32_pmovzxbq128_mask ((__v16qi) __A,
+ (__v2di) __W,
+ (__mmask8) __U);
+}
+
+extern __inline __m128i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_maskz_cvtepu8_epi64 (__mmask8 __U, __m128i __A)
+{
+ return (__m128i) __builtin_ia32_pmovzxbq128_mask ((__v16qi) __A,
+ (__v2di)
+ _mm_setzero_si128 (),
+ (__mmask8) __U);
+}
+
+extern __inline __m256i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_mask_cvtepu16_epi32 (__m256i __W, __mmask8 __U, __m128i __A)
+{
+ return (__m256i) __builtin_ia32_pmovzxwd256_mask ((__v8hi) __A,
+ (__v8si) __W,
+ (__mmask8) __U);
+}
+
+extern __inline __m256i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_maskz_cvtepu16_epi32 (__mmask8 __U, __m128i __A)
+{
+ return (__m256i) __builtin_ia32_pmovzxwd256_mask ((__v8hi) __A,
+ (__v8si)
+ _mm256_setzero_si256 (),
+ (__mmask8) __U);
+}
+
+extern __inline __m128i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_mask_cvtepu16_epi32 (__m128i __W, __mmask8 __U, __m128i __A)
+{
+ return (__m128i) __builtin_ia32_pmovzxwd128_mask ((__v8hi) __A,
+ (__v4si) __W,
+ (__mmask8) __U);
+}
+
+extern __inline __m128i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_maskz_cvtepu16_epi32 (__mmask8 __U, __m128i __A)
+{
+ return (__m128i) __builtin_ia32_pmovzxwd128_mask ((__v8hi) __A,
+ (__v4si)
+ _mm_setzero_si128 (),
+ (__mmask8) __U);
+}
+
+extern __inline __m256i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_mask_cvtepu16_epi64 (__m256i __W, __mmask8 __U, __m128i __A)
+{
+ return (__m256i) __builtin_ia32_pmovzxwq256_mask ((__v8hi) __A,
+ (__v4di) __W,
+ (__mmask8) __U);
+}
+
+extern __inline __m256i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_maskz_cvtepu16_epi64 (__mmask8 __U, __m128i __A)
+{
+ return (__m256i) __builtin_ia32_pmovzxwq256_mask ((__v8hi) __A,
+ (__v4di)
+ _mm256_setzero_si256 (),
+ (__mmask8) __U);
+}
+
+extern __inline __m128i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_mask_cvtepu16_epi64 (__m128i __W, __mmask8 __U, __m128i __A)
+{
+ return (__m128i) __builtin_ia32_pmovzxwq128_mask ((__v8hi) __A,
+ (__v2di) __W,
+ (__mmask8) __U);
+}
+
+extern __inline __m128i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_maskz_cvtepu16_epi64 (__mmask8 __U, __m128i __A)
+{
+ return (__m128i) __builtin_ia32_pmovzxwq128_mask ((__v8hi) __A,
+ (__v2di)
+ _mm_setzero_si128 (),
+ (__mmask8) __U);
+}
+
+extern __inline __m256i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_mask_cvtepu32_epi64 (__m256i __W, __mmask8 __U, __m128i __X)
+{
+ return (__m256i) __builtin_ia32_pmovzxdq256_mask ((__v4si) __X,
+ (__v4di) __W,
+ (__mmask8) __U);
+}
+
+extern __inline __m256i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_maskz_cvtepu32_epi64 (__mmask8 __U, __m128i __X)
+{
+ return (__m256i) __builtin_ia32_pmovzxdq256_mask ((__v4si) __X,
+ (__v4di)
+ _mm256_setzero_si256 (),
+ (__mmask8) __U);
+}
+
+extern __inline __m128i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_mask_cvtepu32_epi64 (__m128i __W, __mmask8 __U, __m128i __X)
+{
+ return (__m128i) __builtin_ia32_pmovzxdq128_mask ((__v4si) __X,
+ (__v2di) __W,
+ (__mmask8) __U);
+}
+
+extern __inline __m128i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_maskz_cvtepu32_epi64 (__mmask8 __U, __m128i __X)
+{
+ return (__m128i) __builtin_ia32_pmovzxdq128_mask ((__v4si) __X,
+ (__v2di)
+ _mm_setzero_si128 (),
+ (__mmask8) __U);
+}
+
+extern __inline __m256d
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_rcp14_pd (__m256d __A)
+{
+ return (__m256d) __builtin_ia32_rcp14pd256_mask ((__v4df) __A,
+ (__v4df)
+ _mm256_setzero_pd (),
+ (__mmask8) -1);
+}
+
+extern __inline __m256d
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_mask_rcp14_pd (__m256d __W, __mmask8 __U, __m256d __A)
+{
+ return (__m256d) __builtin_ia32_rcp14pd256_mask ((__v4df) __A,
+ (__v4df) __W,
+ (__mmask8) __U);
+}
+
+extern __inline __m256d
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_maskz_rcp14_pd (__mmask8 __U, __m256d __A)
+{
+ return (__m256d) __builtin_ia32_rcp14pd256_mask ((__v4df) __A,
+ (__v4df)
+ _mm256_setzero_pd (),
+ (__mmask8) __U);
+}
+
+extern __inline __m128d
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_rcp14_pd (__m128d __A)
+{
+ return (__m128d) __builtin_ia32_rcp14pd128_mask ((__v2df) __A,
+ (__v2df)
+ _mm_setzero_pd (),
+ (__mmask8) -1);
+}
+
+extern __inline __m128d
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_mask_rcp14_pd (__m128d __W, __mmask8 __U, __m128d __A)
+{
+ return (__m128d) __builtin_ia32_rcp14pd128_mask ((__v2df) __A,
+ (__v2df) __W,
+ (__mmask8) __U);
+}
+
+extern __inline __m128d
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_maskz_rcp14_pd (__mmask8 __U, __m128d __A)
+{
+ return (__m128d) __builtin_ia32_rcp14pd128_mask ((__v2df) __A,
+ (__v2df)
+ _mm_setzero_pd (),
+ (__mmask8) __U);
+}
+
+extern __inline __m256
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_rcp14_ps (__m256 __A)
+{
+ return (__m256) __builtin_ia32_rcp14ps256_mask ((__v8sf) __A,
+ (__v8sf)
+ _mm256_setzero_ps (),
+ (__mmask8) -1);
+}
+
+extern __inline __m256
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_mask_rcp14_ps (__m256 __W, __mmask8 __U, __m256 __A)
+{
+ return (__m256) __builtin_ia32_rcp14ps256_mask ((__v8sf) __A,
+ (__v8sf) __W,
+ (__mmask8) __U);
+}
+
+extern __inline __m256
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_maskz_rcp14_ps (__mmask8 __U, __m256 __A)
+{
+ return (__m256) __builtin_ia32_rcp14ps256_mask ((__v8sf) __A,
+ (__v8sf)
+ _mm256_setzero_ps (),
+ (__mmask8) __U);
+}
+
+extern __inline __m128
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_rcp14_ps (__m128 __A)
+{
+ return (__m128) __builtin_ia32_rcp14ps128_mask ((__v4sf) __A,
+ (__v4sf)
+ _mm_setzero_ps (),
+ (__mmask8) -1);
+}
+
+extern __inline __m128
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_mask_rcp14_ps (__m128 __W, __mmask8 __U, __m128 __A)
+{
+ return (__m128) __builtin_ia32_rcp14ps128_mask ((__v4sf) __A,
+ (__v4sf) __W,
+ (__mmask8) __U);
+}
+
+extern __inline __m128
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_maskz_rcp14_ps (__mmask8 __U, __m128 __A)
+{
+ return (__m128) __builtin_ia32_rcp14ps128_mask ((__v4sf) __A,
+ (__v4sf)
+ _mm_setzero_ps (),
+ (__mmask8) __U);
+}
+
+extern __inline __m256d
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_rsqrt14_pd (__m256d __A)
+{
+ return (__m256d) __builtin_ia32_rsqrt14pd256_mask ((__v4df) __A,
+ (__v4df)
+ _mm256_setzero_pd (),
+ (__mmask8) -1);
+}
+
+extern __inline __m256d
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_mask_rsqrt14_pd (__m256d __W, __mmask8 __U, __m256d __A)
+{
+ return (__m256d) __builtin_ia32_rsqrt14pd256_mask ((__v4df) __A,
+ (__v4df) __W,
+ (__mmask8) __U);
+}
+
+extern __inline __m256d
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_maskz_rsqrt14_pd (__mmask8 __U, __m256d __A)
+{
+ return (__m256d) __builtin_ia32_rsqrt14pd256_mask ((__v4df) __A,
+ (__v4df)
+ _mm256_setzero_pd (),
+ (__mmask8) __U);
+}
+
+extern __inline __m128d
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_rsqrt14_pd (__m128d __A)
+{
+ return (__m128d) __builtin_ia32_rsqrt14pd128_mask ((__v2df) __A,
+ (__v2df)
+ _mm_setzero_pd (),
+ (__mmask8) -1);
+}
+
+extern __inline __m128d
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_mask_rsqrt14_pd (__m128d __W, __mmask8 __U, __m128d __A)
+{
+ return (__m128d) __builtin_ia32_rsqrt14pd128_mask ((__v2df) __A,
+ (__v2df) __W,
+ (__mmask8) __U);
+}
+
+extern __inline __m128d
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_maskz_rsqrt14_pd (__mmask8 __U, __m128d __A)
+{
+ return (__m128d) __builtin_ia32_rsqrt14pd128_mask ((__v2df) __A,
+ (__v2df)
+ _mm_setzero_pd (),
+ (__mmask8) __U);
+}
+
+extern __inline __m256
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_rsqrt14_ps (__m256 __A)
+{
+ return (__m256) __builtin_ia32_rsqrt14ps256_mask ((__v8sf) __A,
+ (__v8sf)
+ _mm256_setzero_ps (),
+ (__mmask8) -1);
+}
+
+extern __inline __m256
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_mask_rsqrt14_ps (__m256 __W, __mmask8 __U, __m256 __A)
+{
+ return (__m256) __builtin_ia32_rsqrt14ps256_mask ((__v8sf) __A,
+ (__v8sf) __W,
+ (__mmask8) __U);
+}
+
+extern __inline __m256
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_maskz_rsqrt14_ps (__mmask8 __U, __m256 __A)
+{
+ return (__m256) __builtin_ia32_rsqrt14ps256_mask ((__v8sf) __A,
+ (__v8sf)
+ _mm256_setzero_ps (),
+ (__mmask8) __U);
+}
+
+extern __inline __m128
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_rsqrt14_ps (__m128 __A)
+{
+ return (__m128) __builtin_ia32_rsqrt14ps128_mask ((__v4sf) __A,
+ (__v4sf)
+ _mm_setzero_ps (),
+ (__mmask8) -1);
+}
+
+extern __inline __m128
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_mask_rsqrt14_ps (__m128 __W, __mmask8 __U, __m128 __A)
+{
+ return (__m128) __builtin_ia32_rsqrt14ps128_mask ((__v4sf) __A,
+ (__v4sf) __W,
+ (__mmask8) __U);
+}
+
+extern __inline __m128
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_maskz_rsqrt14_ps (__mmask8 __U, __m128 __A)
+{
+ return (__m128) __builtin_ia32_rsqrt14ps128_mask ((__v4sf) __A,
+ (__v4sf)
+ _mm_setzero_ps (),
+ (__mmask8) __U);
+}
+
+extern __inline __m256d
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_mask_sqrt_pd (__m256d __W, __mmask8 __U, __m256d __A)
+{
+ return (__m256d) __builtin_ia32_sqrtpd256_mask ((__v4df) __A,
+ (__v4df) __W,
+ (__mmask8) __U);
+}
+
+extern __inline __m256d
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_maskz_sqrt_pd (__mmask8 __U, __m256d __A)
+{
+ return (__m256d) __builtin_ia32_sqrtpd256_mask ((__v4df) __A,
+ (__v4df)
+ _mm256_setzero_pd (),
+ (__mmask8) __U);
+}
+
+extern __inline __m128d
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_mask_sqrt_pd (__m128d __W, __mmask8 __U, __m128d __A)
+{
+ return (__m128d) __builtin_ia32_sqrtpd128_mask ((__v2df) __A,
+ (__v2df) __W,
+ (__mmask8) __U);
+}
+
+extern __inline __m128d
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_maskz_sqrt_pd (__mmask8 __U, __m128d __A)
+{
+ return (__m128d) __builtin_ia32_sqrtpd128_mask ((__v2df) __A,
+ (__v2df)
+ _mm_setzero_pd (),
+ (__mmask8) __U);
+}
+
+extern __inline __m256
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_mask_sqrt_ps (__m256 __W, __mmask8 __U, __m256 __A)
+{
+ return (__m256) __builtin_ia32_sqrtps256_mask ((__v8sf) __A,
+ (__v8sf) __W,
+ (__mmask8) __U);
+}
+
+extern __inline __m256
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_maskz_sqrt_ps (__mmask8 __U, __m256 __A)
+{
+ return (__m256) __builtin_ia32_sqrtps256_mask ((__v8sf) __A,
+ (__v8sf)
+ _mm256_setzero_ps (),
+ (__mmask8) __U);
+}
+
+extern __inline __m128
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_mask_sqrt_ps (__m128 __W, __mmask8 __U, __m128 __A)
+{
+ return (__m128) __builtin_ia32_sqrtps128_mask ((__v4sf) __A,
+ (__v4sf) __W,
+ (__mmask8) __U);
+}
+
+extern __inline __m128
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_maskz_sqrt_ps (__mmask8 __U, __m128 __A)
+{
+ return (__m128) __builtin_ia32_sqrtps128_mask ((__v4sf) __A,
+ (__v4sf)
+ _mm_setzero_ps (),
+ (__mmask8) __U);
+}
+
+extern __inline __m256i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_mask_add_epi32 (__m256i __W, __mmask8 __U, __m256i __A,
+ __m256i __B)
+{
+ return (__m256i) __builtin_ia32_paddd256_mask ((__v8si) __A,
+ (__v8si) __B,
+ (__v8si) __W,
+ (__mmask8) __U);
+}
+
+extern __inline __m256i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_maskz_add_epi32 (__mmask8 __U, __m256i __A, __m256i __B)
+{
+ return (__m256i) __builtin_ia32_paddd256_mask ((__v8si) __A,
+ (__v8si) __B,
+ (__v8si)
+ _mm256_setzero_si256 (),
+ (__mmask8) __U);
+}
+
+extern __inline __m256i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_mask_add_epi64 (__m256i __W, __mmask8 __U, __m256i __A,
+ __m256i __B)
+{
+ return (__m256i) __builtin_ia32_paddq256_mask ((__v4di) __A,
+ (__v4di) __B,
+ (__v4di) __W,
+ (__mmask8) __U);
+}
+
+extern __inline __m256i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_maskz_add_epi64 (__mmask8 __U, __m256i __A, __m256i __B)
+{
+ return (__m256i) __builtin_ia32_paddq256_mask ((__v4di) __A,
+ (__v4di) __B,
+ (__v4di)
+ _mm256_setzero_si256 (),
+ (__mmask8) __U);
+}
+
+extern __inline __m256i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_mask_sub_epi32 (__m256i __W, __mmask8 __U, __m256i __A,
+ __m256i __B)
+{
+ return (__m256i) __builtin_ia32_psubd256_mask ((__v8si) __A,
+ (__v8si) __B,
+ (__v8si) __W,
+ (__mmask8) __U);
+}
+
+extern __inline __m256i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_maskz_sub_epi32 (__mmask8 __U, __m256i __A, __m256i __B)
+{
+ return (__m256i) __builtin_ia32_psubd256_mask ((__v8si) __A,
+ (__v8si) __B,
+ (__v8si)
+ _mm256_setzero_si256 (),
+ (__mmask8) __U);
+}
+
+extern __inline __m256i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_mask_sub_epi64 (__m256i __W, __mmask8 __U, __m256i __A,
+ __m256i __B)
+{
+ return (__m256i) __builtin_ia32_psubq256_mask ((__v4di) __A,
+ (__v4di) __B,
+ (__v4di) __W,
+ (__mmask8) __U);
+}
+
+extern __inline __m256i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_maskz_sub_epi64 (__mmask8 __U, __m256i __A, __m256i __B)
+{
+ return (__m256i) __builtin_ia32_psubq256_mask ((__v4di) __A,
+ (__v4di) __B,
+ (__v4di)
+ _mm256_setzero_si256 (),
+ (__mmask8) __U);
+}
+
+extern __inline __m128i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_mask_add_epi32 (__m128i __W, __mmask8 __U, __m128i __A,
+ __m128i __B)
+{
+ return (__m128i) __builtin_ia32_paddd128_mask ((__v4si) __A,
+ (__v4si) __B,
+ (__v4si) __W,
+ (__mmask8) __U);
+}
+
+extern __inline __m128i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_maskz_add_epi32 (__mmask8 __U, __m128i __A, __m128i __B)
+{
+ return (__m128i) __builtin_ia32_paddd128_mask ((__v4si) __A,
+ (__v4si) __B,
+ (__v4si)
+ _mm_setzero_si128 (),
+ (__mmask8) __U);
+}
+
+extern __inline __m128i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_mask_add_epi64 (__m128i __W, __mmask8 __U, __m128i __A,
+ __m128i __B)
+{
+ return (__m128i) __builtin_ia32_paddq128_mask ((__v2di) __A,
+ (__v2di) __B,
+ (__v2di) __W,
+ (__mmask8) __U);
+}
+
+extern __inline __m128i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_maskz_add_epi64 (__mmask8 __U, __m128i __A, __m128i __B)
+{
+ return (__m128i) __builtin_ia32_paddq128_mask ((__v2di) __A,
+ (__v2di) __B,
+ (__v2di)
+ _mm_setzero_si128 (),
+ (__mmask8) __U);
+}
+
+extern __inline __m128i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_mask_sub_epi32 (__m128i __W, __mmask8 __U, __m128i __A,
+ __m128i __B)
+{
+ return (__m128i) __builtin_ia32_psubd128_mask ((__v4si) __A,
+ (__v4si) __B,
+ (__v4si) __W,
+ (__mmask8) __U);
+}
+
+extern __inline __m128i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_maskz_sub_epi32 (__mmask8 __U, __m128i __A, __m128i __B)
+{
+ return (__m128i) __builtin_ia32_psubd128_mask ((__v4si) __A,
+ (__v4si) __B,
+ (__v4si)
+ _mm_setzero_si128 (),
+ (__mmask8) __U);
+}
+
+extern __inline __m128i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_mask_sub_epi64 (__m128i __W, __mmask8 __U, __m128i __A,
+ __m128i __B)
+{
+ return (__m128i) __builtin_ia32_psubq128_mask ((__v2di) __A,
+ (__v2di) __B,
+ (__v2di) __W,
+ (__mmask8) __U);
+}
+
+extern __inline __m128i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_maskz_sub_epi64 (__mmask8 __U, __m128i __A, __m128i __B)
+{
+ return (__m128i) __builtin_ia32_psubq128_mask ((__v2di) __A,
+ (__v2di) __B,
+ (__v2di)
+ _mm_setzero_si128 (),
+ (__mmask8) __U);
+}
+
+extern __inline __m256
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_getexp_ps (__m256 __A)
+{
+ return (__m256) __builtin_ia32_getexpps256_mask ((__v8sf) __A,
+ (__v8sf)
+ _mm256_setzero_ps (),
+ (__mmask8) -1);
+}
+
+extern __inline __m256
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_mask_getexp_ps (__m256 __W, __mmask8 __U, __m256 __A)
+{
+ return (__m256) __builtin_ia32_getexpps256_mask ((__v8sf) __A,
+ (__v8sf) __W,
+ (__mmask8) __U);
+}
+
+extern __inline __m256
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_maskz_getexp_ps (__mmask8 __U, __m256 __A)
+{
+ return (__m256) __builtin_ia32_getexpps256_mask ((__v8sf) __A,
+ (__v8sf)
+ _mm256_setzero_ps (),
+ (__mmask8) __U);
+}
+
+extern __inline __m256d
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_getexp_pd (__m256d __A)
+{
+ return (__m256d) __builtin_ia32_getexppd256_mask ((__v4df) __A,
+ (__v4df)
+ _mm256_setzero_pd (),
+ (__mmask8) -1);
+}
+
+extern __inline __m256d
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_mask_getexp_pd (__m256d __W, __mmask8 __U, __m256d __A)
+{
+ return (__m256d) __builtin_ia32_getexppd256_mask ((__v4df) __A,
+ (__v4df) __W,
+ (__mmask8) __U);
+}
+
+extern __inline __m256d
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_maskz_getexp_pd (__mmask8 __U, __m256d __A)
+{
+ return (__m256d) __builtin_ia32_getexppd256_mask ((__v4df) __A,
+ (__v4df)
+ _mm256_setzero_pd (),
+ (__mmask8) __U);
+}
+
+extern __inline __m128
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_getexp_ps (__m128 __A)
+{
+ return (__m128) __builtin_ia32_getexpps128_mask ((__v4sf) __A,
+ (__v4sf)
+ _mm_setzero_ps (),
+ (__mmask8) -1);
+}
+
+extern __inline __m128
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_mask_getexp_ps (__m128 __W, __mmask8 __U, __m128 __A)
+{
+ return (__m128) __builtin_ia32_getexpps128_mask ((__v4sf) __A,
+ (__v4sf) __W,
+ (__mmask8) __U);
+}
+
+extern __inline __m128
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_maskz_getexp_ps (__mmask8 __U, __m128 __A)
+{
+ return (__m128) __builtin_ia32_getexpps128_mask ((__v4sf) __A,
+ (__v4sf)
+ _mm_setzero_ps (),
+ (__mmask8) __U);
+}
+
+extern __inline __m128d
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_getexp_pd (__m128d __A)
+{
+ return (__m128d) __builtin_ia32_getexppd128_mask ((__v2df) __A,
+ (__v2df)
+ _mm_setzero_pd (),
+ (__mmask8) -1);
+}
+
+extern __inline __m128d
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_mask_getexp_pd (__m128d __W, __mmask8 __U, __m128d __A)
+{
+ return (__m128d) __builtin_ia32_getexppd128_mask ((__v2df) __A,
+ (__v2df) __W,
+ (__mmask8) __U);
+}
+
+extern __inline __m128d
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_maskz_getexp_pd (__mmask8 __U, __m128d __A)
+{
+ return (__m128d) __builtin_ia32_getexppd128_mask ((__v2df) __A,
+ (__v2df)
+ _mm_setzero_pd (),
+ (__mmask8) __U);
+}
+
+extern __inline __m256i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_mask_srl_epi32 (__m256i __W, __mmask8 __U, __m256i __A,
+ __m128i __B)
+{
+ return (__m256i) __builtin_ia32_psrld256_mask ((__v8si) __A,
+ (__v4si) __B,
+ (__v8si) __W,
+ (__mmask8) __U);
+}
+
+extern __inline __m256i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_maskz_srl_epi32 (__mmask8 __U, __m256i __A, __m128i __B)
+{
+ return (__m256i) __builtin_ia32_psrld256_mask ((__v8si) __A,
+ (__v4si) __B,
+ (__v8si)
+ _mm256_setzero_si256 (),
+ (__mmask8) __U);
+}
+
+extern __inline __m128i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_mask_srl_epi32 (__m128i __W, __mmask8 __U, __m128i __A,
+ __m128i __B)
+{
+ return (__m128i) __builtin_ia32_psrld128_mask ((__v4si) __A,
+ (__v4si) __B,
+ (__v4si) __W,
+ (__mmask8) __U);
+}
+
+extern __inline __m128i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_maskz_srl_epi32 (__mmask8 __U, __m128i __A, __m128i __B)
+{
+ return (__m128i) __builtin_ia32_psrld128_mask ((__v4si) __A,
+ (__v4si) __B,
+ (__v4si)
+ _mm_setzero_si128 (),
+ (__mmask8) __U);
+}
+
+extern __inline __m256i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_mask_srl_epi64 (__m256i __W, __mmask8 __U, __m256i __A,
+ __m128i __B)
+{
+ return (__m256i) __builtin_ia32_psrlq256_mask ((__v4di) __A,
+ (__v2di) __B,
+ (__v4di) __W,
+ (__mmask8) __U);
+}
+
+extern __inline __m256i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_maskz_srl_epi64 (__mmask8 __U, __m256i __A, __m128i __B)
+{
+ return (__m256i) __builtin_ia32_psrlq256_mask ((__v4di) __A,
+ (__v2di) __B,
+ (__v4di)
+ _mm256_setzero_si256 (),
+ (__mmask8) __U);
+}
+
+extern __inline __m128i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_mask_srl_epi64 (__m128i __W, __mmask8 __U, __m128i __A,
+ __m128i __B)
+{
+ return (__m128i) __builtin_ia32_psrlq128_mask ((__v2di) __A,
+ (__v2di) __B,
+ (__v2di) __W,
+ (__mmask8) __U);
+}
+
+extern __inline __m128i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_maskz_srl_epi64 (__mmask8 __U, __m128i __A, __m128i __B)
+{
+ return (__m128i) __builtin_ia32_psrlq128_mask ((__v2di) __A,
+ (__v2di) __B,
+ (__v2di)
+ _mm_setzero_di (),
+ (__mmask8) __U);
+}
+
+extern __inline __m256i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_mask_and_epi32 (__m256i __W, __mmask8 __U, __m256i __A,
+ __m256i __B)
+{
+ return (__m256i) __builtin_ia32_pandd256_mask ((__v8si) __A,
+ (__v8si) __B,
+ (__v8si) __W,
+ (__mmask8) __U);
+}
+
+extern __inline __m256i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_maskz_and_epi32 (__mmask8 __U, __m256i __A, __m256i __B)
+{
+ return (__m256i) __builtin_ia32_pandd256_mask ((__v8si) __A,
+ (__v8si) __B,
+ (__v8si)
+ _mm256_setzero_si256 (),
+ (__mmask8) __U);
+}
+
+extern __inline __m256d
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_scalef_pd (__m256d __A, __m256d __B)
+{
+ return (__m256d) __builtin_ia32_scalefpd256_mask ((__v4df) __A,
+ (__v4df) __B,
+ (__v4df)
+ _mm256_setzero_pd (),
+ (__mmask8) -1);
+}
+
+extern __inline __m256d
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_mask_scalef_pd (__m256d __W, __mmask8 __U, __m256d __A,
+ __m256d __B)
+{
+ return (__m256d) __builtin_ia32_scalefpd256_mask ((__v4df) __A,
+ (__v4df) __B,
+ (__v4df) __W,
+ (__mmask8) __U);
+}
+
+extern __inline __m256d
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_maskz_scalef_pd (__mmask8 __U, __m256d __A, __m256d __B)
+{
+ return (__m256d) __builtin_ia32_scalefpd256_mask ((__v4df) __A,
+ (__v4df) __B,
+ (__v4df)
+ _mm256_setzero_pd (),
+ (__mmask8) __U);
+}
+
+extern __inline __m256
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_scalef_ps (__m256 __A, __m256 __B)
+{
+ return (__m256) __builtin_ia32_scalefps256_mask ((__v8sf) __A,
+ (__v8sf) __B,
+ (__v8sf)
+ _mm256_setzero_ps (),
+ (__mmask8) -1);
+}
+
+extern __inline __m256
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_mask_scalef_ps (__m256 __W, __mmask8 __U, __m256 __A,
+ __m256 __B)
+{
+ return (__m256) __builtin_ia32_scalefps256_mask ((__v8sf) __A,
+ (__v8sf) __B,
+ (__v8sf) __W,
+ (__mmask8) __U);
+}
+
+extern __inline __m256
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_maskz_scalef_ps (__mmask8 __U, __m256 __A, __m256 __B)
+{
+ return (__m256) __builtin_ia32_scalefps256_mask ((__v8sf) __A,
+ (__v8sf) __B,
+ (__v8sf)
+ _mm256_setzero_ps (),
+ (__mmask8) __U);
+}
+
+extern __inline __m128d
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_scalef_pd (__m128d __A, __m128d __B)
+{
+ return (__m128d) __builtin_ia32_scalefpd128_mask ((__v2df) __A,
+ (__v2df) __B,
+ (__v2df)
+ _mm_setzero_pd (),
+ (__mmask8) -1);
+}
+
+extern __inline __m128d
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_mask_scalef_pd (__m128d __W, __mmask8 __U, __m128d __A,
+ __m128d __B)
+{
+ return (__m128d) __builtin_ia32_scalefpd128_mask ((__v2df) __A,
+ (__v2df) __B,
+ (__v2df) __W,
+ (__mmask8) __U);
+}
+
+extern __inline __m128d
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_maskz_scalef_pd (__mmask8 __U, __m128d __A, __m128d __B)
+{
+ return (__m128d) __builtin_ia32_scalefpd128_mask ((__v2df) __A,
+ (__v2df) __B,
+ (__v2df)
+ _mm_setzero_pd (),
+ (__mmask8) __U);
+}
+
+extern __inline __m128
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_scalef_ps (__m128 __A, __m128 __B)
+{
+ return (__m128) __builtin_ia32_scalefps128_mask ((__v4sf) __A,
+ (__v4sf) __B,
+ (__v4sf)
+ _mm_setzero_ps (),
+ (__mmask8) -1);
+}
+
+extern __inline __m128
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_mask_scalef_ps (__m128 __W, __mmask8 __U, __m128 __A, __m128 __B)
+{
+ return (__m128) __builtin_ia32_scalefps128_mask ((__v4sf) __A,
+ (__v4sf) __B,
+ (__v4sf) __W,
+ (__mmask8) __U);
+}
+
+extern __inline __m128
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_maskz_scalef_ps (__mmask8 __U, __m128 __A, __m128 __B)
+{
+ return (__m128) __builtin_ia32_scalefps128_mask ((__v4sf) __A,
+ (__v4sf) __B,
+ (__v4sf)
+ _mm_setzero_ps (),
+ (__mmask8) __U);
+}
+
+extern __inline __m256d
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_mask_fmadd_pd (__m256d __A, __mmask8 __U, __m256d __B,
+ __m256d __C)
+{
+ return (__m256d) __builtin_ia32_vfmaddpd256_mask ((__v4df) __A,
+ (__v4df) __B,
+ (__v4df) __C,
+ (__mmask8) __U);
+}
+
+extern __inline __m256d
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_mask3_fmadd_pd (__m256d __A, __m256d __B, __m256d __C,
+ __mmask8 __U)
+{
+ return (__m256d) __builtin_ia32_vfmaddpd256_mask3 ((__v4df) __A,
+ (__v4df) __B,
+ (__v4df) __C,
+ (__mmask8) __U);
+}
+
+extern __inline __m256d
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_maskz_fmadd_pd (__mmask8 __U, __m256d __A, __m256d __B,
+ __m256d __C)
+{
+ return (__m256d) __builtin_ia32_vfmaddpd256_maskz ((__v4df) __A,
+ (__v4df) __B,
+ (__v4df) __C,
+ (__mmask8) __U);
+}
+
+extern __inline __m128d
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_mask_fmadd_pd (__m128d __A, __mmask8 __U, __m128d __B, __m128d __C)
+{
+ return (__m128d) __builtin_ia32_vfmaddpd128_mask ((__v2df) __A,
+ (__v2df) __B,
+ (__v2df) __C,
+ (__mmask8) __U);
+}
+
+extern __inline __m128d
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_mask3_fmadd_pd (__m128d __A, __m128d __B, __m128d __C,
+ __mmask8 __U)
+{
+ return (__m128d) __builtin_ia32_vfmaddpd128_mask3 ((__v2df) __A,
+ (__v2df) __B,
+ (__v2df) __C,
+ (__mmask8) __U);
+}
+
+extern __inline __m128d
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_maskz_fmadd_pd (__mmask8 __U, __m128d __A, __m128d __B,
+ __m128d __C)
+{
+ return (__m128d) __builtin_ia32_vfmaddpd128_maskz ((__v2df) __A,
+ (__v2df) __B,
+ (__v2df) __C,
+ (__mmask8) __U);
+}
+
+extern __inline __m256
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_mask_fmadd_ps (__m256 __A, __mmask8 __U, __m256 __B, __m256 __C)
+{
+ return (__m256) __builtin_ia32_vfmaddps256_mask ((__v8sf) __A,
+ (__v8sf) __B,
+ (__v8sf) __C,
+ (__mmask8) __U);
+}
+
+extern __inline __m256
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_mask3_fmadd_ps (__m256 __A, __m256 __B, __m256 __C,
+ __mmask8 __U)
+{
+ return (__m256) __builtin_ia32_vfmaddps256_mask3 ((__v8sf) __A,
+ (__v8sf) __B,
+ (__v8sf) __C,
+ (__mmask8) __U);
+}
+
+extern __inline __m256
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_maskz_fmadd_ps (__mmask8 __U, __m256 __A, __m256 __B,
+ __m256 __C)
+{
+ return (__m256) __builtin_ia32_vfmaddps256_maskz ((__v8sf) __A,
+ (__v8sf) __B,
+ (__v8sf) __C,
+ (__mmask8) __U);
+}
+
+extern __inline __m128
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_mask_fmadd_ps (__m128 __A, __mmask8 __U, __m128 __B, __m128 __C)
+{
+ return (__m128) __builtin_ia32_vfmaddps128_mask ((__v4sf) __A,
+ (__v4sf) __B,
+ (__v4sf) __C,
+ (__mmask8) __U);
+}
+
+extern __inline __m128
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_mask3_fmadd_ps (__m128 __A, __m128 __B, __m128 __C, __mmask8 __U)
+{
+ return (__m128) __builtin_ia32_vfmaddps128_mask3 ((__v4sf) __A,
+ (__v4sf) __B,
+ (__v4sf) __C,
+ (__mmask8) __U);
+}
+
+extern __inline __m128
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_maskz_fmadd_ps (__mmask8 __U, __m128 __A, __m128 __B, __m128 __C)
+{
+ return (__m128) __builtin_ia32_vfmaddps128_maskz ((__v4sf) __A,
+ (__v4sf) __B,
+ (__v4sf) __C,
+ (__mmask8) __U);
+}
+
+extern __inline __m256d
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_mask_fmsub_pd (__m256d __A, __mmask8 __U, __m256d __B,
+ __m256d __C)
+{
+ return (__m256d) __builtin_ia32_vfmaddpd256_mask ((__v4df) __A,
+ (__v4df) __B,
+ -(__v4df) __C,
+ (__mmask8) __U);
+}
+
+extern __inline __m256d
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_mask3_fmsub_pd (__m256d __A, __m256d __B, __m256d __C,
+ __mmask8 __U)
+{
+ return (__m256d) __builtin_ia32_vfmsubpd256_mask3 ((__v4df) __A,
+ (__v4df) __B,
+ (__v4df) __C,
+ (__mmask8) __U);
+}
+
+extern __inline __m256d
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_maskz_fmsub_pd (__mmask8 __U, __m256d __A, __m256d __B,
+ __m256d __C)
+{
+ return (__m256d) __builtin_ia32_vfmaddpd256_maskz ((__v4df) __A,
+ (__v4df) __B,
+ -(__v4df) __C,
+ (__mmask8) __U);
+}
+
+extern __inline __m128d
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_mask_fmsub_pd (__m128d __A, __mmask8 __U, __m128d __B, __m128d __C)
+{
+ return (__m128d) __builtin_ia32_vfmaddpd128_mask ((__v2df) __A,
+ (__v2df) __B,
+ -(__v2df) __C,
+ (__mmask8) __U);
+}
+
+extern __inline __m128d
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_mask3_fmsub_pd (__m128d __A, __m128d __B, __m128d __C,
+ __mmask8 __U)
+{
+ return (__m128d) __builtin_ia32_vfmsubpd128_mask3 ((__v2df) __A,
+ (__v2df) __B,
+ (__v2df) __C,
+ (__mmask8) __U);
+}
+
+extern __inline __m128d
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_maskz_fmsub_pd (__mmask8 __U, __m128d __A, __m128d __B,
+ __m128d __C)
+{
+ return (__m128d) __builtin_ia32_vfmaddpd128_maskz ((__v2df) __A,
+ (__v2df) __B,
+ -(__v2df) __C,
+ (__mmask8) __U);
+}
+
+extern __inline __m256
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_mask_fmsub_ps (__m256 __A, __mmask8 __U, __m256 __B, __m256 __C)
+{
+ return (__m256) __builtin_ia32_vfmaddps256_mask ((__v8sf) __A,
+ (__v8sf) __B,
+ -(__v8sf) __C,
+ (__mmask8) __U);
+}
+
+extern __inline __m256
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_mask3_fmsub_ps (__m256 __A, __m256 __B, __m256 __C,
+ __mmask8 __U)
+{
+ return (__m256) __builtin_ia32_vfmsubps256_mask3 ((__v8sf) __A,
+ (__v8sf) __B,
+ (__v8sf) __C,
+ (__mmask8) __U);
+}
+
+extern __inline __m256
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_maskz_fmsub_ps (__mmask8 __U, __m256 __A, __m256 __B,
+ __m256 __C)
+{
+ return (__m256) __builtin_ia32_vfmaddps256_maskz ((__v8sf) __A,
+ (__v8sf) __B,
+ -(__v8sf) __C,
+ (__mmask8) __U);
+}
+
+extern __inline __m128
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_mask_fmsub_ps (__m128 __A, __mmask8 __U, __m128 __B, __m128 __C)
+{
+ return (__m128) __builtin_ia32_vfmaddps128_mask ((__v4sf) __A,
+ (__v4sf) __B,
+ -(__v4sf) __C,
+ (__mmask8) __U);
+}
+
+extern __inline __m128
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_mask3_fmsub_ps (__m128 __A, __m128 __B, __m128 __C, __mmask8 __U)
+{
+ return (__m128) __builtin_ia32_vfmsubps128_mask3 ((__v4sf) __A,
+ (__v4sf) __B,
+ (__v4sf) __C,
+ (__mmask8) __U);
+}
+
+extern __inline __m128
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_maskz_fmsub_ps (__mmask8 __U, __m128 __A, __m128 __B, __m128 __C)
+{
+ return (__m128) __builtin_ia32_vfmaddps128_maskz ((__v4sf) __A,
+ (__v4sf) __B,
+ -(__v4sf) __C,
+ (__mmask8) __U);
+}
+
+extern __inline __m256d
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_mask_fmaddsub_pd (__m256d __A, __mmask8 __U, __m256d __B,
+ __m256d __C)
+{
+ return (__m256d) __builtin_ia32_vfmaddsubpd256_mask ((__v4df) __A,
+ (__v4df) __B,
+ (__v4df) __C,
+ (__mmask8) __U);
+}
+
+extern __inline __m256d
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_mask3_fmaddsub_pd (__m256d __A, __m256d __B, __m256d __C,
+ __mmask8 __U)
+{
+ return (__m256d) __builtin_ia32_vfmaddsubpd256_mask3 ((__v4df) __A,
+ (__v4df) __B,
+ (__v4df) __C,
+ (__mmask8)
+ __U);
+}
+
+extern __inline __m256d
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_maskz_fmaddsub_pd (__mmask8 __U, __m256d __A, __m256d __B,
+ __m256d __C)
+{
+ return (__m256d) __builtin_ia32_vfmaddsubpd256_maskz ((__v4df) __A,
+ (__v4df) __B,
+ (__v4df) __C,
+ (__mmask8)
+ __U);
+}
+
+extern __inline __m128d
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_mask_fmaddsub_pd (__m128d __A, __mmask8 __U, __m128d __B,
+ __m128d __C)
+{
+ return (__m128d) __builtin_ia32_vfmaddsubpd128_mask ((__v2df) __A,
+ (__v2df) __B,
+ (__v2df) __C,
+ (__mmask8) __U);
+}
+
+extern __inline __m128d
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_mask3_fmaddsub_pd (__m128d __A, __m128d __B, __m128d __C,
+ __mmask8 __U)
+{
+ return (__m128d) __builtin_ia32_vfmaddsubpd128_mask3 ((__v2df) __A,
+ (__v2df) __B,
+ (__v2df) __C,
+ (__mmask8)
+ __U);
+}
+
+extern __inline __m128d
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_maskz_fmaddsub_pd (__mmask8 __U, __m128d __A, __m128d __B,
+ __m128d __C)
+{
+ return (__m128d) __builtin_ia32_vfmaddsubpd128_maskz ((__v2df) __A,
+ (__v2df) __B,
+ (__v2df) __C,
+ (__mmask8)
+ __U);
+}
+
+extern __inline __m256
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_mask_fmaddsub_ps (__m256 __A, __mmask8 __U, __m256 __B,
+ __m256 __C)
+{
+ return (__m256) __builtin_ia32_vfmaddsubps256_mask ((__v8sf) __A,
+ (__v8sf) __B,
+ (__v8sf) __C,
+ (__mmask8) __U);
+}
+
+extern __inline __m256
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_mask3_fmaddsub_ps (__m256 __A, __m256 __B, __m256 __C,
+ __mmask8 __U)
+{
+ return (__m256) __builtin_ia32_vfmaddsubps256_mask3 ((__v8sf) __A,
+ (__v8sf) __B,
+ (__v8sf) __C,
+ (__mmask8) __U);
+}
+
+extern __inline __m256
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_maskz_fmaddsub_ps (__mmask8 __U, __m256 __A, __m256 __B,
+ __m256 __C)
+{
+ return (__m256) __builtin_ia32_vfmaddsubps256_maskz ((__v8sf) __A,
+ (__v8sf) __B,
+ (__v8sf) __C,
+ (__mmask8) __U);
+}
+
+extern __inline __m128
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_mask_fmaddsub_ps (__m128 __A, __mmask8 __U, __m128 __B, __m128 __C)
+{
+ return (__m128) __builtin_ia32_vfmaddsubps128_mask ((__v4sf) __A,
+ (__v4sf) __B,
+ (__v4sf) __C,
+ (__mmask8) __U);
+}
+
+extern __inline __m128
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_mask3_fmaddsub_ps (__m128 __A, __m128 __B, __m128 __C,
+ __mmask8 __U)
+{
+ return (__m128) __builtin_ia32_vfmaddsubps128_mask3 ((__v4sf) __A,
+ (__v4sf) __B,
+ (__v4sf) __C,
+ (__mmask8) __U);
+}
+
+extern __inline __m128
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_maskz_fmaddsub_ps (__mmask8 __U, __m128 __A, __m128 __B,
+ __m128 __C)
+{
+ return (__m128) __builtin_ia32_vfmaddsubps128_maskz ((__v4sf) __A,
+ (__v4sf) __B,
+ (__v4sf) __C,
+ (__mmask8) __U);
+}
+
+extern __inline __m256d
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_mask_fmsubadd_pd (__m256d __A, __mmask8 __U, __m256d __B,
+ __m256d __C)
+{
+ return (__m256d) __builtin_ia32_vfmaddsubpd256_mask ((__v4df) __A,
+ (__v4df) __B,
+ -(__v4df) __C,
+ (__mmask8) __U);
+}
+
+extern __inline __m256d
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_mask3_fmsubadd_pd (__m256d __A, __m256d __B, __m256d __C,
+ __mmask8 __U)
+{
+ return (__m256d) __builtin_ia32_vfmsubaddpd256_mask3 ((__v4df) __A,
+ (__v4df) __B,
+ (__v4df) __C,
+ (__mmask8)
+ __U);
+}
+
+extern __inline __m256d
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_maskz_fmsubadd_pd (__mmask8 __U, __m256d __A, __m256d __B,
+ __m256d __C)
+{
+ return (__m256d) __builtin_ia32_vfmaddsubpd256_maskz ((__v4df) __A,
+ (__v4df) __B,
+ -(__v4df) __C,
+ (__mmask8)
+ __U);
+}
+
+extern __inline __m128d
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_mask_fmsubadd_pd (__m128d __A, __mmask8 __U, __m128d __B,
+ __m128d __C)
+{
+ return (__m128d) __builtin_ia32_vfmaddsubpd128_mask ((__v2df) __A,
+ (__v2df) __B,
+ -(__v2df) __C,
+ (__mmask8) __U);
+}
+
+extern __inline __m128d
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_mask3_fmsubadd_pd (__m128d __A, __m128d __B, __m128d __C,
+ __mmask8 __U)
+{
+ return (__m128d) __builtin_ia32_vfmsubaddpd128_mask3 ((__v2df) __A,
+ (__v2df) __B,
+ (__v2df) __C,
+ (__mmask8)
+ __U);
+}
+
+extern __inline __m128d
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_maskz_fmsubadd_pd (__mmask8 __U, __m128d __A, __m128d __B,
+ __m128d __C)
+{
+ return (__m128d) __builtin_ia32_vfmaddsubpd128_maskz ((__v2df) __A,
+ (__v2df) __B,
+ -(__v2df) __C,
+ (__mmask8)
+ __U);
+}
+
+extern __inline __m256
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_mask_fmsubadd_ps (__m256 __A, __mmask8 __U, __m256 __B,
+ __m256 __C)
+{
+ return (__m256) __builtin_ia32_vfmaddsubps256_mask ((__v8sf) __A,
+ (__v8sf) __B,
+ -(__v8sf) __C,
+ (__mmask8) __U);
+}
+
+extern __inline __m256
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_mask3_fmsubadd_ps (__m256 __A, __m256 __B, __m256 __C,
+ __mmask8 __U)
+{
+ return (__m256) __builtin_ia32_vfmsubaddps256_mask3 ((__v8sf) __A,
+ (__v8sf) __B,
+ (__v8sf) __C,
+ (__mmask8) __U);
+}
+
+extern __inline __m256
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_maskz_fmsubadd_ps (__mmask8 __U, __m256 __A, __m256 __B,
+ __m256 __C)
+{
+ return (__m256) __builtin_ia32_vfmaddsubps256_maskz ((__v8sf) __A,
+ (__v8sf) __B,
+ -(__v8sf) __C,
+ (__mmask8) __U);
+}
+
+extern __inline __m128
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_mask_fmsubadd_ps (__m128 __A, __mmask8 __U, __m128 __B, __m128 __C)
+{
+ return (__m128) __builtin_ia32_vfmaddsubps128_mask ((__v4sf) __A,
+ (__v4sf) __B,
+ -(__v4sf) __C,
+ (__mmask8) __U);
+}
+
+extern __inline __m128
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_mask3_fmsubadd_ps (__m128 __A, __m128 __B, __m128 __C,
+ __mmask8 __U)
+{
+ return (__m128) __builtin_ia32_vfmsubaddps128_mask3 ((__v4sf) __A,
+ (__v4sf) __B,
+ (__v4sf) __C,
+ (__mmask8) __U);
+}
+
+extern __inline __m128
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_maskz_fmsubadd_ps (__mmask8 __U, __m128 __A, __m128 __B,
+ __m128 __C)
+{
+ return (__m128) __builtin_ia32_vfmaddsubps128_maskz ((__v4sf) __A,
+ (__v4sf) __B,
+ -(__v4sf) __C,
+ (__mmask8) __U);
+}
+
+extern __inline __m256d
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_mask_fnmadd_pd (__m256d __A, __mmask8 __U, __m256d __B,
+ __m256d __C)
+{
+ return (__m256d) __builtin_ia32_vfnmaddpd256_mask ((__v4df) __A,
+ (__v4df) __B,
+ (__v4df) __C,
+ (__mmask8) __U);
+}
+
+extern __inline __m256d
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_mask3_fnmadd_pd (__m256d __A, __m256d __B, __m256d __C,
+ __mmask8 __U)
+{
+ return (__m256d) __builtin_ia32_vfmaddpd256_mask3 (-(__v4df) __A,
+ (__v4df) __B,
+ (__v4df) __C,
+ (__mmask8) __U);
+}
+
+extern __inline __m256d
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_maskz_fnmadd_pd (__mmask8 __U, __m256d __A, __m256d __B,
+ __m256d __C)
+{
+ return (__m256d) __builtin_ia32_vfmaddpd256_maskz (-(__v4df) __A,
+ (__v4df) __B,
+ (__v4df) __C,
+ (__mmask8) __U);
+}
+
+extern __inline __m128d
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_mask_fnmadd_pd (__m128d __A, __mmask8 __U, __m128d __B,
+ __m128d __C)
+{
+ return (__m128d) __builtin_ia32_vfnmaddpd128_mask ((__v2df) __A,
+ (__v2df) __B,
+ (__v2df) __C,
+ (__mmask8) __U);
+}
+
+extern __inline __m128d
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_mask3_fnmadd_pd (__m128d __A, __m128d __B, __m128d __C,
+ __mmask8 __U)
+{
+ return (__m128d) __builtin_ia32_vfmaddpd128_mask3 (-(__v2df) __A,
+ (__v2df) __B,
+ (__v2df) __C,
+ (__mmask8) __U);
+}
+
+extern __inline __m128d
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_maskz_fnmadd_pd (__mmask8 __U, __m128d __A, __m128d __B,
+ __m128d __C)
+{
+ return (__m128d) __builtin_ia32_vfmaddpd128_maskz (-(__v2df) __A,
+ (__v2df) __B,
+ (__v2df) __C,
+ (__mmask8) __U);
+}
+
+extern __inline __m256
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_mask_fnmadd_ps (__m256 __A, __mmask8 __U, __m256 __B,
+ __m256 __C)
+{
+ return (__m256) __builtin_ia32_vfnmaddps256_mask ((__v8sf) __A,
+ (__v8sf) __B,
+ (__v8sf) __C,
+ (__mmask8) __U);
+}
+
+extern __inline __m256
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_mask3_fnmadd_ps (__m256 __A, __m256 __B, __m256 __C,
+ __mmask8 __U)
+{
+ return (__m256) __builtin_ia32_vfmaddps256_mask3 (-(__v8sf) __A,
+ (__v8sf) __B,
+ (__v8sf) __C,
+ (__mmask8) __U);
+}
+
+extern __inline __m256
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_maskz_fnmadd_ps (__mmask8 __U, __m256 __A, __m256 __B,
+ __m256 __C)
+{
+ return (__m256) __builtin_ia32_vfmaddps256_maskz (-(__v8sf) __A,
+ (__v8sf) __B,
+ (__v8sf) __C,
+ (__mmask8) __U);
+}
+
+extern __inline __m128
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_mask_fnmadd_ps (__m128 __A, __mmask8 __U, __m128 __B, __m128 __C)
+{
+ return (__m128) __builtin_ia32_vfnmaddps128_mask ((__v4sf) __A,
+ (__v4sf) __B,
+ (__v4sf) __C,
+ (__mmask8) __U);
+}
+
+extern __inline __m128
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_mask3_fnmadd_ps (__m128 __A, __m128 __B, __m128 __C, __mmask8 __U)
+{
+ return (__m128) __builtin_ia32_vfmaddps128_mask3 (-(__v4sf) __A,
+ (__v4sf) __B,
+ (__v4sf) __C,
+ (__mmask8) __U);
+}
+
+extern __inline __m128
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_maskz_fnmadd_ps (__mmask8 __U, __m128 __A, __m128 __B, __m128 __C)
+{
+ return (__m128) __builtin_ia32_vfmaddps128_maskz (-(__v4sf) __A,
+ (__v4sf) __B,
+ (__v4sf) __C,
+ (__mmask8) __U);
+}
+
+extern __inline __m256d
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_mask_fnmsub_pd (__m256d __A, __mmask8 __U, __m256d __B,
+ __m256d __C)
+{
+ return (__m256d) __builtin_ia32_vfnmsubpd256_mask ((__v4df) __A,
+ (__v4df) __B,
+ (__v4df) __C,
+ (__mmask8) __U);
+}
+
+extern __inline __m256d
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_mask3_fnmsub_pd (__m256d __A, __m256d __B, __m256d __C,
+ __mmask8 __U)
+{
+ return (__m256d) __builtin_ia32_vfnmsubpd256_mask3 ((__v4df) __A,
+ (__v4df) __B,
+ (__v4df) __C,
+ (__mmask8) __U);
+}
+
+extern __inline __m256d
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_maskz_fnmsub_pd (__mmask8 __U, __m256d __A, __m256d __B,
+ __m256d __C)
+{
+ return (__m256d) __builtin_ia32_vfmaddpd256_maskz (-(__v4df) __A,
+ (__v4df) __B,
+ -(__v4df) __C,
+ (__mmask8) __U);
+}
+
+extern __inline __m128d
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_mask_fnmsub_pd (__m128d __A, __mmask8 __U, __m128d __B,
+ __m128d __C)
+{
+ return (__m128d) __builtin_ia32_vfnmsubpd128_mask ((__v2df) __A,
+ (__v2df) __B,
+ (__v2df) __C,
+ (__mmask8) __U);
+}
+
+extern __inline __m128d
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_mask3_fnmsub_pd (__m128d __A, __m128d __B, __m128d __C,
+ __mmask8 __U)
+{
+ return (__m128d) __builtin_ia32_vfnmsubpd128_mask3 ((__v2df) __A,
+ (__v2df) __B,
+ (__v2df) __C,
+ (__mmask8) __U);
+}
+
+extern __inline __m128d
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_maskz_fnmsub_pd (__mmask8 __U, __m128d __A, __m128d __B,
+ __m128d __C)
+{
+ return (__m128d) __builtin_ia32_vfmaddpd128_maskz (-(__v2df) __A,
+ (__v2df) __B,
+ -(__v2df) __C,
+ (__mmask8) __U);
+}
+
+extern __inline __m256
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_mask_fnmsub_ps (__m256 __A, __mmask8 __U, __m256 __B,
+ __m256 __C)
+{
+ return (__m256) __builtin_ia32_vfnmsubps256_mask ((__v8sf) __A,
+ (__v8sf) __B,
+ (__v8sf) __C,
+ (__mmask8) __U);
+}
+
+extern __inline __m256
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_mask3_fnmsub_ps (__m256 __A, __m256 __B, __m256 __C,
+ __mmask8 __U)
+{
+ return (__m256) __builtin_ia32_vfnmsubps256_mask3 ((__v8sf) __A,
+ (__v8sf) __B,
+ (__v8sf) __C,
+ (__mmask8) __U);
+}
+
+extern __inline __m256
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_maskz_fnmsub_ps (__mmask8 __U, __m256 __A, __m256 __B,
+ __m256 __C)
+{
+ return (__m256) __builtin_ia32_vfmaddps256_maskz (-(__v8sf) __A,
+ (__v8sf) __B,
+ -(__v8sf) __C,
+ (__mmask8) __U);
+}
+
+extern __inline __m128
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_mask_fnmsub_ps (__m128 __A, __mmask8 __U, __m128 __B, __m128 __C)
+{
+ return (__m128) __builtin_ia32_vfnmsubps128_mask ((__v4sf) __A,
+ (__v4sf) __B,
+ (__v4sf) __C,
+ (__mmask8) __U);
+}
+
+extern __inline __m128
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_mask3_fnmsub_ps (__m128 __A, __m128 __B, __m128 __C, __mmask8 __U)
+{
+ return (__m128) __builtin_ia32_vfnmsubps128_mask3 ((__v4sf) __A,
+ (__v4sf) __B,
+ (__v4sf) __C,
+ (__mmask8) __U);
+}
+
+extern __inline __m128
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_maskz_fnmsub_ps (__mmask8 __U, __m128 __A, __m128 __B, __m128 __C)
+{
+ return (__m128) __builtin_ia32_vfmaddps128_maskz (-(__v4sf) __A,
+ (__v4sf) __B,
+ -(__v4sf) __C,
+ (__mmask8) __U);
+}
+
+extern __inline __m128i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_mask_and_epi32 (__m128i __W, __mmask8 __U, __m128i __A,
+ __m128i __B)
+{
+ return (__m128i) __builtin_ia32_pandd128_mask ((__v4si) __A,
+ (__v4si) __B,
+ (__v4si) __W,
+ (__mmask8) __U);
+}
+
+extern __inline __m128i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_maskz_and_epi32 (__mmask8 __U, __m128i __A, __m128i __B)
+{
+ return (__m128i) __builtin_ia32_pandd128_mask ((__v4si) __A,
+ (__v4si) __B,
+ (__v4si)
+ _mm_setzero_si128 (),
+ (__mmask8) __U);
+}
+
+extern __inline __m256i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_mask_andnot_epi32 (__m256i __W, __mmask8 __U, __m256i __A,
+ __m256i __B)
+{
+ return (__m256i) __builtin_ia32_pandnd256_mask ((__v8si) __A,
+ (__v8si) __B,
+ (__v8si) __W,
+ (__mmask8) __U);
+}
+
+extern __inline __m256i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_maskz_andnot_epi32 (__mmask8 __U, __m256i __A, __m256i __B)
+{
+ return (__m256i) __builtin_ia32_pandnd256_mask ((__v8si) __A,
+ (__v8si) __B,
+ (__v8si)
+ _mm256_setzero_si256 (),
+ (__mmask8) __U);
+}
+
+extern __inline __m128i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_mask_andnot_epi32 (__m128i __W, __mmask8 __U, __m128i __A,
+ __m128i __B)
+{
+ return (__m128i) __builtin_ia32_pandnd128_mask ((__v4si) __A,
+ (__v4si) __B,
+ (__v4si) __W,
+ (__mmask8) __U);
+}
+
+extern __inline __m128i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_maskz_andnot_epi32 (__mmask8 __U, __m128i __A, __m128i __B)
+{
+ return (__m128i) __builtin_ia32_pandnd128_mask ((__v4si) __A,
+ (__v4si) __B,
+ (__v4si)
+ _mm_setzero_si128 (),
+ (__mmask8) __U);
+}
+
+extern __inline __m256i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_mask_or_epi32 (__m256i __W, __mmask8 __U, __m256i __A,
+ __m256i __B)
+{
+ return (__m256i) __builtin_ia32_pord256_mask ((__v8si) __A,
+ (__v8si) __B,
+ (__v8si) __W,
+ (__mmask8) __U);
+}
+
+extern __inline __m256i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_maskz_or_epi32 (__mmask8 __U, __m256i __A, __m256i __B)
+{
+ return (__m256i) __builtin_ia32_pord256_mask ((__v8si) __A,
+ (__v8si) __B,
+ (__v8si)
+ _mm256_setzero_si256 (),
+ (__mmask8) __U);
+}
+
+extern __inline __m128i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_mask_or_epi32 (__m128i __W, __mmask8 __U, __m128i __A, __m128i __B)
+{
+ return (__m128i) __builtin_ia32_pord128_mask ((__v4si) __A,
+ (__v4si) __B,
+ (__v4si) __W,
+ (__mmask8) __U);
+}
+
+extern __inline __m128i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_maskz_or_epi32 (__mmask8 __U, __m128i __A, __m128i __B)
+{
+ return (__m128i) __builtin_ia32_pord128_mask ((__v4si) __A,
+ (__v4si) __B,
+ (__v4si)
+ _mm_setzero_si128 (),
+ (__mmask8) __U);
+}
+
+extern __inline __m256i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_mask_xor_epi32 (__m256i __W, __mmask8 __U, __m256i __A,
+ __m256i __B)
+{
+ return (__m256i) __builtin_ia32_pxord256_mask ((__v8si) __A,
+ (__v8si) __B,
+ (__v8si) __W,
+ (__mmask8) __U);
+}
+
+extern __inline __m256i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_maskz_xor_epi32 (__mmask8 __U, __m256i __A, __m256i __B)
+{
+ return (__m256i) __builtin_ia32_pxord256_mask ((__v8si) __A,
+ (__v8si) __B,
+ (__v8si)
+ _mm256_setzero_si256 (),
+ (__mmask8) __U);
+}
+
+extern __inline __m128i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_mask_xor_epi32 (__m128i __W, __mmask8 __U, __m128i __A,
+ __m128i __B)
+{
+ return (__m128i) __builtin_ia32_pxord128_mask ((__v4si) __A,
+ (__v4si) __B,
+ (__v4si) __W,
+ (__mmask8) __U);
+}
+
+extern __inline __m128i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_maskz_xor_epi32 (__mmask8 __U, __m128i __A, __m128i __B)
+{
+ return (__m128i) __builtin_ia32_pxord128_mask ((__v4si) __A,
+ (__v4si) __B,
+ (__v4si)
+ _mm_setzero_si128 (),
+ (__mmask8) __U);
+}
+
+extern __inline __m128
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_mask_cvtpd_ps (__m128 __W, __mmask8 __U, __m128d __A)
+{
+ return (__m128) __builtin_ia32_cvtpd2ps_mask ((__v2df) __A,
+ (__v4sf) __W,
+ (__mmask8) __U);
+}
+
+extern __inline __m128
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_maskz_cvtpd_ps (__mmask8 __U, __m128d __A)
+{
+ return (__m128) __builtin_ia32_cvtpd2ps_mask ((__v2df) __A,
+ (__v4sf)
+ _mm_setzero_ps (),
+ (__mmask8) __U);
+}
+
+extern __inline __m128
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_mask_cvtpd_ps (__m128 __W, __mmask8 __U, __m256d __A)
+{
+ return (__m128) __builtin_ia32_cvtpd2ps256_mask ((__v4df) __A,
+ (__v4sf) __W,
+ (__mmask8) __U);
+}
+
+extern __inline __m128
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_maskz_cvtpd_ps (__mmask8 __U, __m256d __A)
+{
+ return (__m128) __builtin_ia32_cvtpd2ps256_mask ((__v4df) __A,
+ (__v4sf)
+ _mm_setzero_ps (),
+ (__mmask8) __U);
+}
+
+extern __inline __m256i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_mask_cvtps_epi32 (__m256i __W, __mmask8 __U, __m256 __A)
+{
+ return (__m256i) __builtin_ia32_cvtps2dq256_mask ((__v8sf) __A,
+ (__v8si) __W,
+ (__mmask8) __U);
+}
+
+extern __inline __m256i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_maskz_cvtps_epi32 (__mmask8 __U, __m256 __A)
+{
+ return (__m256i) __builtin_ia32_cvtps2dq256_mask ((__v8sf) __A,
+ (__v8si)
+ _mm256_setzero_si256 (),
+ (__mmask8) __U);
+}
+
+extern __inline __m128i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_mask_cvtps_epi32 (__m128i __W, __mmask8 __U, __m128 __A)
+{
+ return (__m128i) __builtin_ia32_cvtps2dq128_mask ((__v4sf) __A,
+ (__v4si) __W,
+ (__mmask8) __U);
+}
+
+extern __inline __m128i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_maskz_cvtps_epi32 (__mmask8 __U, __m128 __A)
+{
+ return (__m128i) __builtin_ia32_cvtps2dq128_mask ((__v4sf) __A,
+ (__v4si)
+ _mm_setzero_si128 (),
+ (__mmask8) __U);
+}
+
+extern __inline __m256i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_cvtps_epu32 (__m256 __A)
+{
+ return (__m256i) __builtin_ia32_cvtps2udq256_mask ((__v8sf) __A,
+ (__v8si)
+ _mm256_setzero_si256 (),
+ (__mmask8) -1);
+}
+
+extern __inline __m256i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_mask_cvtps_epu32 (__m256i __W, __mmask8 __U, __m256 __A)
+{
+ return (__m256i) __builtin_ia32_cvtps2udq256_mask ((__v8sf) __A,
+ (__v8si) __W,
+ (__mmask8) __U);
+}
+
+extern __inline __m256i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_maskz_cvtps_epu32 (__mmask8 __U, __m256 __A)
+{
+ return (__m256i) __builtin_ia32_cvtps2udq256_mask ((__v8sf) __A,
+ (__v8si)
+ _mm256_setzero_si256 (),
+ (__mmask8) __U);
+}
+
+extern __inline __m128i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_cvtps_epu32 (__m128 __A)
+{
+ return (__m128i) __builtin_ia32_cvtps2udq128_mask ((__v4sf) __A,
+ (__v4si)
+ _mm_setzero_si128 (),
+ (__mmask8) -1);
+}
+
+extern __inline __m128i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_mask_cvtps_epu32 (__m128i __W, __mmask8 __U, __m128 __A)
+{
+ return (__m128i) __builtin_ia32_cvtps2udq128_mask ((__v4sf) __A,
+ (__v4si) __W,
+ (__mmask8) __U);
+}
+
+extern __inline __m128i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_maskz_cvtps_epu32 (__mmask8 __U, __m128 __A)
+{
+ return (__m128i) __builtin_ia32_cvtps2udq128_mask ((__v4sf) __A,
+ (__v4si)
+ _mm_setzero_si128 (),
+ (__mmask8) __U);
+}
+
+extern __inline __m256d
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_mask_movedup_pd (__m256d __W, __mmask8 __U, __m256d __A)
+{
+ return (__m256d) __builtin_ia32_movddup256_mask ((__v4df) __A,
+ (__v4df) __W,
+ (__mmask8) __U);
+}
+
+extern __inline __m256d
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_maskz_movedup_pd (__mmask8 __U, __m256d __A)
+{
+ return (__m256d) __builtin_ia32_movddup256_mask ((__v4df) __A,
+ (__v4df)
+ _mm256_setzero_pd (),
+ (__mmask8) __U);
+}
+
+extern __inline __m128d
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_mask_movedup_pd (__m128d __W, __mmask8 __U, __m128d __A)
+{
+ return (__m128d) __builtin_ia32_movddup128_mask ((__v2df) __A,
+ (__v2df) __W,
+ (__mmask8) __U);
+}
+
+extern __inline __m128d
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_maskz_movedup_pd (__mmask8 __U, __m128d __A)
+{
+ return (__m128d) __builtin_ia32_movddup128_mask ((__v2df) __A,
+ (__v2df)
+ _mm_setzero_pd (),
+ (__mmask8) __U);
+}
+
+extern __inline __m256
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_mask_movehdup_ps (__m256 __W, __mmask8 __U, __m256 __A)
+{
+ return (__m256) __builtin_ia32_movshdup256_mask ((__v8sf) __A,
+ (__v8sf) __W,
+ (__mmask8) __U);
+}
+
+extern __inline __m256
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_maskz_movehdup_ps (__mmask8 __U, __m256 __A)
+{
+ return (__m256) __builtin_ia32_movshdup256_mask ((__v8sf) __A,
+ (__v8sf)
+ _mm256_setzero_ps (),
+ (__mmask8) __U);
+}
+
+extern __inline __m128
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_mask_movehdup_ps (__m128 __W, __mmask8 __U, __m128 __A)
+{
+ return (__m128) __builtin_ia32_movshdup128_mask ((__v4sf) __A,
+ (__v4sf) __W,
+ (__mmask8) __U);
+}
+
+extern __inline __m128
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_maskz_movehdup_ps (__mmask8 __U, __m128 __A)
+{
+ return (__m128) __builtin_ia32_movshdup128_mask ((__v4sf) __A,
+ (__v4sf)
+ _mm_setzero_ps (),
+ (__mmask8) __U);
+}
+
+extern __inline __m256
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_mask_moveldup_ps (__m256 __W, __mmask8 __U, __m256 __A)
+{
+ return (__m256) __builtin_ia32_movsldup256_mask ((__v8sf) __A,
+ (__v8sf) __W,
+ (__mmask8) __U);
+}
+
+extern __inline __m256
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_maskz_moveldup_ps (__mmask8 __U, __m256 __A)
+{
+ return (__m256) __builtin_ia32_movsldup256_mask ((__v8sf) __A,
+ (__v8sf)
+ _mm256_setzero_ps (),
+ (__mmask8) __U);
+}
+
+extern __inline __m128
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_mask_moveldup_ps (__m128 __W, __mmask8 __U, __m128 __A)
+{
+ return (__m128) __builtin_ia32_movsldup128_mask ((__v4sf) __A,
+ (__v4sf) __W,
+ (__mmask8) __U);
+}
+
+extern __inline __m128
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_maskz_moveldup_ps (__mmask8 __U, __m128 __A)
+{
+ return (__m128) __builtin_ia32_movsldup128_mask ((__v4sf) __A,
+ (__v4sf)
+ _mm_setzero_ps (),
+ (__mmask8) __U);
+}
+
+extern __inline __m128i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_mask_unpackhi_epi32 (__m128i __W, __mmask8 __U, __m128i __A,
+ __m128i __B)
+{
+ return (__m128i) __builtin_ia32_punpckhdq128_mask ((__v4si) __A,
+ (__v4si) __B,
+ (__v4si) __W,
+ (__mmask8) __U);
+}
+
+extern __inline __m128i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_maskz_unpackhi_epi32 (__mmask8 __U, __m128i __A, __m128i __B)
+{
+ return (__m128i) __builtin_ia32_punpckhdq128_mask ((__v4si) __A,
+ (__v4si) __B,
+ (__v4si)
+ _mm_setzero_si128 (),
+ (__mmask8) __U);
+}
+
+extern __inline __m256i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_mask_unpackhi_epi32 (__m256i __W, __mmask8 __U, __m256i __A,
+ __m256i __B)
+{
+ return (__m256i) __builtin_ia32_punpckhdq256_mask ((__v8si) __A,
+ (__v8si) __B,
+ (__v8si) __W,
+ (__mmask8) __U);
+}
+
+extern __inline __m256i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_maskz_unpackhi_epi32 (__mmask8 __U, __m256i __A, __m256i __B)
+{
+ return (__m256i) __builtin_ia32_punpckhdq256_mask ((__v8si) __A,
+ (__v8si) __B,
+ (__v8si)
+ _mm256_setzero_si256 (),
+ (__mmask8) __U);
+}
+
+extern __inline __m128i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_mask_unpackhi_epi64 (__m128i __W, __mmask8 __U, __m128i __A,
+ __m128i __B)
+{
+ return (__m128i) __builtin_ia32_punpckhqdq128_mask ((__v2di) __A,
+ (__v2di) __B,
+ (__v2di) __W,
+ (__mmask8) __U);
+}
+
+extern __inline __m128i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_maskz_unpackhi_epi64 (__mmask8 __U, __m128i __A, __m128i __B)
+{
+ return (__m128i) __builtin_ia32_punpckhqdq128_mask ((__v2di) __A,
+ (__v2di) __B,
+ (__v2di)
+ _mm_setzero_di (),
+ (__mmask8) __U);
+}
+
+extern __inline __m256i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_mask_unpackhi_epi64 (__m256i __W, __mmask8 __U, __m256i __A,
+ __m256i __B)
+{
+ return (__m256i) __builtin_ia32_punpckhqdq256_mask ((__v4di) __A,
+ (__v4di) __B,
+ (__v4di) __W,
+ (__mmask8) __U);
+}
+
+extern __inline __m256i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_maskz_unpackhi_epi64 (__mmask8 __U, __m256i __A, __m256i __B)
+{
+ return (__m256i) __builtin_ia32_punpckhqdq256_mask ((__v4di) __A,
+ (__v4di) __B,
+ (__v4di)
+ _mm256_setzero_si256 (),
+ (__mmask8) __U);
+}
+
+extern __inline __m128i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_mask_unpacklo_epi32 (__m128i __W, __mmask8 __U, __m128i __A,
+ __m128i __B)
+{
+ return (__m128i) __builtin_ia32_punpckldq128_mask ((__v4si) __A,
+ (__v4si) __B,
+ (__v4si) __W,
+ (__mmask8) __U);
+}
+
+extern __inline __m128i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_maskz_unpacklo_epi32 (__mmask8 __U, __m128i __A, __m128i __B)
+{
+ return (__m128i) __builtin_ia32_punpckldq128_mask ((__v4si) __A,
+ (__v4si) __B,
+ (__v4si)
+ _mm_setzero_si128 (),
+ (__mmask8) __U);
+}
+
+extern __inline __m256i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_mask_unpacklo_epi32 (__m256i __W, __mmask8 __U, __m256i __A,
+ __m256i __B)
+{
+ return (__m256i) __builtin_ia32_punpckldq256_mask ((__v8si) __A,
+ (__v8si) __B,
+ (__v8si) __W,
+ (__mmask8) __U);
+}
+
+extern __inline __m256i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_maskz_unpacklo_epi32 (__mmask8 __U, __m256i __A, __m256i __B)
+{
+ return (__m256i) __builtin_ia32_punpckldq256_mask ((__v8si) __A,
+ (__v8si) __B,
+ (__v8si)
+ _mm256_setzero_si256 (),
+ (__mmask8) __U);
+}
+
+extern __inline __m128i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_mask_unpacklo_epi64 (__m128i __W, __mmask8 __U, __m128i __A,
+ __m128i __B)
+{
+ return (__m128i) __builtin_ia32_punpcklqdq128_mask ((__v2di) __A,
+ (__v2di) __B,
+ (__v2di) __W,
+ (__mmask8) __U);
+}
+
+extern __inline __m128i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_maskz_unpacklo_epi64 (__mmask8 __U, __m128i __A, __m128i __B)
+{
+ return (__m128i) __builtin_ia32_punpcklqdq128_mask ((__v2di) __A,
+ (__v2di) __B,
+ (__v2di)
+ _mm_setzero_di (),
+ (__mmask8) __U);
+}
+
+extern __inline __m256i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_mask_unpacklo_epi64 (__m256i __W, __mmask8 __U, __m256i __A,
+ __m256i __B)
+{
+ return (__m256i) __builtin_ia32_punpcklqdq256_mask ((__v4di) __A,
+ (__v4di) __B,
+ (__v4di) __W,
+ (__mmask8) __U);
+}
+
+extern __inline __m256i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_maskz_unpacklo_epi64 (__mmask8 __U, __m256i __A, __m256i __B)
+{
+ return (__m256i) __builtin_ia32_punpcklqdq256_mask ((__v4di) __A,
+ (__v4di) __B,
+ (__v4di)
+ _mm256_setzero_si256 (),
+ (__mmask8) __U);
+}
+
+extern __inline __mmask8
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_cmpeq_epi32_mask (__m128i __A, __m128i __B)
+{
+ return (__mmask8) __builtin_ia32_pcmpeqd128_mask ((__v4si) __A,
+ (__v4si) __B,
+ (__mmask8) -1);
+}
+
+extern __inline __mmask8
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_mask_cmpeq_epi32_mask (__mmask8 __U, __m128i __A, __m128i __B)
+{
+ return (__mmask8) __builtin_ia32_pcmpeqd128_mask ((__v4si) __A,
+ (__v4si) __B, __U);
+}
+
+extern __inline __mmask8
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_cmpeq_epi32_mask (__m256i __A, __m256i __B)
+{
+ return (__mmask8) __builtin_ia32_pcmpeqd256_mask ((__v8si) __A,
+ (__v8si) __B,
+ (__mmask8) -1);
+}
+
+extern __inline __mmask8
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_mask_cmpeq_epi32_mask (__mmask8 __U, __m256i __A, __m256i __B)
+{
+ return (__mmask8) __builtin_ia32_pcmpeqd256_mask ((__v8si) __A,
+ (__v8si) __B, __U);
+}
+
+extern __inline __mmask8
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_cmpeq_epi64_mask (__m128i __A, __m128i __B)
+{
+ return (__mmask8) __builtin_ia32_pcmpeqq128_mask ((__v2di) __A,
+ (__v2di) __B,
+ (__mmask8) -1);
+}
+
+extern __inline __mmask8
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_mask_cmpeq_epi64_mask (__mmask8 __U, __m128i __A, __m128i __B)
+{
+ return (__mmask8) __builtin_ia32_pcmpeqq128_mask ((__v2di) __A,
+ (__v2di) __B, __U);
+}
+
+extern __inline __mmask8
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_cmpeq_epi64_mask (__m256i __A, __m256i __B)
+{
+ return (__mmask8) __builtin_ia32_pcmpeqq256_mask ((__v4di) __A,
+ (__v4di) __B,
+ (__mmask8) -1);
+}
+
+extern __inline __mmask8
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_mask_cmpeq_epi64_mask (__mmask8 __U, __m256i __A, __m256i __B)
+{
+ return (__mmask8) __builtin_ia32_pcmpeqq256_mask ((__v4di) __A,
+ (__v4di) __B, __U);
+}
+
+extern __inline __mmask8
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_cmpgt_epi32_mask (__m128i __A, __m128i __B)
+{
+ return (__mmask8) __builtin_ia32_pcmpgtd128_mask ((__v4si) __A,
+ (__v4si) __B,
+ (__mmask8) -1);
+}
+
+extern __inline __mmask8
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_mask_cmpgt_epi32_mask (__mmask8 __U, __m128i __A, __m128i __B)
+{
+ return (__mmask8) __builtin_ia32_pcmpgtd128_mask ((__v4si) __A,
+ (__v4si) __B, __U);
+}
+
+extern __inline __mmask8
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_cmpgt_epi32_mask (__m256i __A, __m256i __B)
+{
+ return (__mmask8) __builtin_ia32_pcmpgtd256_mask ((__v8si) __A,
+ (__v8si) __B,
+ (__mmask8) -1);
+}
+
+extern __inline __mmask8
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_mask_cmpgt_epi32_mask (__mmask8 __U, __m256i __A, __m256i __B)
+{
+ return (__mmask8) __builtin_ia32_pcmpgtd256_mask ((__v8si) __A,
+ (__v8si) __B, __U);
+}
+
+extern __inline __mmask8
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_cmpgt_epi64_mask (__m128i __A, __m128i __B)
+{
+ return (__mmask8) __builtin_ia32_pcmpgtq128_mask ((__v2di) __A,
+ (__v2di) __B,
+ (__mmask8) -1);
+}
+
+extern __inline __mmask8
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_mask_cmpgt_epi64_mask (__mmask8 __U, __m128i __A, __m128i __B)
+{
+ return (__mmask8) __builtin_ia32_pcmpgtq128_mask ((__v2di) __A,
+ (__v2di) __B, __U);
+}
+
+extern __inline __mmask8
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_cmpgt_epi64_mask (__m256i __A, __m256i __B)
+{
+ return (__mmask8) __builtin_ia32_pcmpgtq256_mask ((__v4di) __A,
+ (__v4di) __B,
+ (__mmask8) -1);
+}
+
+extern __inline __mmask8
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_mask_cmpgt_epi64_mask (__mmask8 __U, __m256i __A, __m256i __B)
+{
+ return (__mmask8) __builtin_ia32_pcmpgtq256_mask ((__v4di) __A,
+ (__v4di) __B, __U);
+}
+
+extern __inline __mmask8
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_test_epi32_mask (__m128i __A, __m128i __B)
+{
+ return (__mmask8) __builtin_ia32_ptestmd128 ((__v4si) __A,
+ (__v4si) __B,
+ (__mmask8) -1);
+}
+
+extern __inline __mmask8
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_mask_test_epi32_mask (__mmask8 __U, __m128i __A, __m128i __B)
+{
+ return (__mmask8) __builtin_ia32_ptestmd128 ((__v4si) __A,
+ (__v4si) __B, __U);
+}
+
+extern __inline __mmask8
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_test_epi32_mask (__m256i __A, __m256i __B)
+{
+ return (__mmask8) __builtin_ia32_ptestmd256 ((__v8si) __A,
+ (__v8si) __B,
+ (__mmask8) -1);
+}
+
+extern __inline __mmask8
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_mask_test_epi32_mask (__mmask8 __U, __m256i __A, __m256i __B)
+{
+ return (__mmask8) __builtin_ia32_ptestmd256 ((__v8si) __A,
+ (__v8si) __B, __U);
+}
+
+extern __inline __mmask8
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_test_epi64_mask (__m128i __A, __m128i __B)
+{
+ return (__mmask8) __builtin_ia32_ptestmq128 ((__v2di) __A,
+ (__v2di) __B,
+ (__mmask8) -1);
+}
+
+extern __inline __mmask8
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_mask_test_epi64_mask (__mmask8 __U, __m128i __A, __m128i __B)
+{
+ return (__mmask8) __builtin_ia32_ptestmq128 ((__v2di) __A,
+ (__v2di) __B, __U);
+}
+
+extern __inline __mmask8
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_test_epi64_mask (__m256i __A, __m256i __B)
+{
+ return (__mmask8) __builtin_ia32_ptestmq256 ((__v4di) __A,
+ (__v4di) __B,
+ (__mmask8) -1);
+}
+
+extern __inline __mmask8
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_mask_test_epi64_mask (__mmask8 __U, __m256i __A, __m256i __B)
+{
+ return (__mmask8) __builtin_ia32_ptestmq256 ((__v4di) __A,
+ (__v4di) __B, __U);
+}
+
+extern __inline __mmask8
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_testn_epi32_mask (__m128i __A, __m128i __B)
+{
+ return (__mmask8) __builtin_ia32_ptestnmd128 ((__v4si) __A,
+ (__v4si) __B,
+ (__mmask8) -1);
+}
+
+extern __inline __mmask8
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_mask_testn_epi32_mask (__mmask8 __U, __m128i __A, __m128i __B)
+{
+ return (__mmask8) __builtin_ia32_ptestnmd128 ((__v4si) __A,
+ (__v4si) __B, __U);
+}
+
+extern __inline __mmask8
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_testn_epi32_mask (__m256i __A, __m256i __B)
+{
+ return (__mmask8) __builtin_ia32_ptestnmd256 ((__v8si) __A,
+ (__v8si) __B,
+ (__mmask8) -1);
+}
+
+extern __inline __mmask8
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_mask_testn_epi32_mask (__mmask8 __U, __m256i __A, __m256i __B)
+{
+ return (__mmask8) __builtin_ia32_ptestnmd256 ((__v8si) __A,
+ (__v8si) __B, __U);
+}
+
+extern __inline __mmask8
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_testn_epi64_mask (__m128i __A, __m128i __B)
+{
+ return (__mmask8) __builtin_ia32_ptestnmq128 ((__v2di) __A,
+ (__v2di) __B,
+ (__mmask8) -1);
+}
+
+extern __inline __mmask8
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_mask_testn_epi64_mask (__mmask8 __U, __m128i __A, __m128i __B)
+{
+ return (__mmask8) __builtin_ia32_ptestnmq128 ((__v2di) __A,
+ (__v2di) __B, __U);
+}
+
+extern __inline __mmask8
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_testn_epi64_mask (__m256i __A, __m256i __B)
+{
+ return (__mmask8) __builtin_ia32_ptestnmq256 ((__v4di) __A,
+ (__v4di) __B,
+ (__mmask8) -1);
+}
+
+extern __inline __mmask8
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_mask_testn_epi64_mask (__mmask8 __U, __m256i __A, __m256i __B)
+{
+ return (__mmask8) __builtin_ia32_ptestnmq256 ((__v4di) __A,
+ (__v4di) __B, __U);
+}
+
+extern __inline __m256d
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_mask_compress_pd (__m256d __W, __mmask8 __U, __m256d __A)
+{
+ return (__m256d) __builtin_ia32_compressdf256_mask ((__v4df) __A,
+ (__v4df) __W,
+ (__mmask8) __U);
+}
+
+extern __inline __m256d
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_maskz_compress_pd (__mmask8 __U, __m256d __A)
+{
+ return (__m256d) __builtin_ia32_compressdf256_mask ((__v4df) __A,
+ (__v4df)
+ _mm256_setzero_pd (),
+ (__mmask8) __U);
+}
+
+extern __inline void
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_mask_compressstoreu_pd (void *__P, __mmask8 __U, __m256d __A)
+{
+ __builtin_ia32_compressstoredf256_mask ((__v4df *) __P,
+ (__v4df) __A,
+ (__mmask8) __U);
+}
+
+extern __inline __m128d
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_mask_compress_pd (__m128d __W, __mmask8 __U, __m128d __A)
+{
+ return (__m128d) __builtin_ia32_compressdf128_mask ((__v2df) __A,
+ (__v2df) __W,
+ (__mmask8) __U);
+}
+
+extern __inline __m128d
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_maskz_compress_pd (__mmask8 __U, __m128d __A)
+{
+ return (__m128d) __builtin_ia32_compressdf128_mask ((__v2df) __A,
+ (__v2df)
+ _mm_setzero_pd (),
+ (__mmask8) __U);
+}
+
+extern __inline void
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_mask_compressstoreu_pd (void *__P, __mmask8 __U, __m128d __A)
+{
+ __builtin_ia32_compressstoredf128_mask ((__v2df *) __P,
+ (__v2df) __A,
+ (__mmask8) __U);
+}
+
+extern __inline __m256
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_mask_compress_ps (__m256 __W, __mmask8 __U, __m256 __A)
+{
+ return (__m256) __builtin_ia32_compresssf256_mask ((__v8sf) __A,
+ (__v8sf) __W,
+ (__mmask8) __U);
+}
+
+extern __inline __m256
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_maskz_compress_ps (__mmask8 __U, __m256 __A)
+{
+ return (__m256) __builtin_ia32_compresssf256_mask ((__v8sf) __A,
+ (__v8sf)
+ _mm256_setzero_ps (),
+ (__mmask8) __U);
+}
+
+extern __inline void
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_mask_compressstoreu_ps (void *__P, __mmask8 __U, __m256 __A)
+{
+ __builtin_ia32_compressstoresf256_mask ((__v8sf *) __P,
+ (__v8sf) __A,
+ (__mmask8) __U);
+}
+
+extern __inline __m128
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_mask_compress_ps (__m128 __W, __mmask8 __U, __m128 __A)
+{
+ return (__m128) __builtin_ia32_compresssf128_mask ((__v4sf) __A,
+ (__v4sf) __W,
+ (__mmask8) __U);
+}
+
+extern __inline __m128
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_maskz_compress_ps (__mmask8 __U, __m128 __A)
+{
+ return (__m128) __builtin_ia32_compresssf128_mask ((__v4sf) __A,
+ (__v4sf)
+ _mm_setzero_ps (),
+ (__mmask8) __U);
+}
+
+extern __inline void
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_mask_compressstoreu_ps (void *__P, __mmask8 __U, __m128 __A)
+{
+ __builtin_ia32_compressstoresf128_mask ((__v4sf *) __P,
+ (__v4sf) __A,
+ (__mmask8) __U);
+}
+
+extern __inline __m256i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_mask_compress_epi64 (__m256i __W, __mmask8 __U, __m256i __A)
+{
+ return (__m256i) __builtin_ia32_compressdi256_mask ((__v4di) __A,
+ (__v4di) __W,
+ (__mmask8) __U);
+}
+
+extern __inline __m256i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_maskz_compress_epi64 (__mmask8 __U, __m256i __A)
+{
+ return (__m256i) __builtin_ia32_compressdi256_mask ((__v4di) __A,
+ (__v4di)
+ _mm256_setzero_si256 (),
+ (__mmask8) __U);
+}
+
+extern __inline void
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_mask_compressstoreu_epi64 (void *__P, __mmask8 __U, __m256i __A)
+{
+ __builtin_ia32_compressstoredi256_mask ((__v4di *) __P,
+ (__v4di) __A,
+ (__mmask8) __U);
+}
+
+extern __inline __m128i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_mask_compress_epi64 (__m128i __W, __mmask8 __U, __m128i __A)
+{
+ return (__m128i) __builtin_ia32_compressdi128_mask ((__v2di) __A,
+ (__v2di) __W,
+ (__mmask8) __U);
+}
+
+extern __inline __m128i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_maskz_compress_epi64 (__mmask8 __U, __m128i __A)
+{
+ return (__m128i) __builtin_ia32_compressdi128_mask ((__v2di) __A,
+ (__v2di)
+ _mm_setzero_di (),
+ (__mmask8) __U);
+}
+
+extern __inline void
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_mask_compressstoreu_epi64 (void *__P, __mmask8 __U, __m128i __A)
+{
+ __builtin_ia32_compressstoredi128_mask ((__v2di *) __P,
+ (__v2di) __A,
+ (__mmask8) __U);
+}
+
+extern __inline __m256i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_mask_compress_epi32 (__m256i __W, __mmask8 __U, __m256i __A)
+{
+ return (__m256i) __builtin_ia32_compresssi256_mask ((__v8si) __A,
+ (__v8si) __W,
+ (__mmask8) __U);
+}
+
+extern __inline __m256i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_maskz_compress_epi32 (__mmask8 __U, __m256i __A)
+{
+ return (__m256i) __builtin_ia32_compresssi256_mask ((__v8si) __A,
+ (__v8si)
+ _mm256_setzero_si256 (),
+ (__mmask8) __U);
+}
+
+extern __inline void
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_mask_compressstoreu_epi32 (void *__P, __mmask8 __U, __m256i __A)
+{
+ __builtin_ia32_compressstoresi256_mask ((__v8si *) __P,
+ (__v8si) __A,
+ (__mmask8) __U);
+}
+
+extern __inline __m128i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_mask_compress_epi32 (__m128i __W, __mmask8 __U, __m128i __A)
+{
+ return (__m128i) __builtin_ia32_compresssi128_mask ((__v4si) __A,
+ (__v4si) __W,
+ (__mmask8) __U);
+}
+
+extern __inline __m128i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_maskz_compress_epi32 (__mmask8 __U, __m128i __A)
+{
+ return (__m128i) __builtin_ia32_compresssi128_mask ((__v4si) __A,
+ (__v4si)
+ _mm_setzero_si128 (),
+ (__mmask8) __U);
+}
+
+extern __inline void
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_mask_compressstoreu_epi32 (void *__P, __mmask8 __U, __m128i __A)
+{
+ __builtin_ia32_compressstoresi128_mask ((__v4si *) __P,
+ (__v4si) __A,
+ (__mmask8) __U);
+}
+
+extern __inline __m256d
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_mask_expand_pd (__m256d __W, __mmask8 __U, __m256d __A)
+{
+ return (__m256d) __builtin_ia32_expanddf256_mask ((__v4df) __A,
+ (__v4df) __W,
+ (__mmask8) __U);
+}
+
+extern __inline __m256d
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_maskz_expand_pd (__mmask8 __U, __m256d __A)
+{
+ return (__m256d) __builtin_ia32_expanddf256_maskz ((__v4df) __A,
+ (__v4df)
+ _mm256_setzero_pd (),
+ (__mmask8) __U);
+}
+
+extern __inline __m256d
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_mask_expandloadu_pd (__m256d __W, __mmask8 __U, void const *__P)
+{
+ return (__m256d) __builtin_ia32_expandloaddf256_mask ((__v4df *) __P,
+ (__v4df) __W,
+ (__mmask8)
+ __U);
+}
+
+extern __inline __m256d
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_maskz_expandloadu_pd (__mmask8 __U, void const *__P)
+{
+ return (__m256d) __builtin_ia32_expandloaddf256_maskz ((__v4df *) __P,
+ (__v4df)
+ _mm256_setzero_pd (),
+ (__mmask8)
+ __U);
+}
+
+extern __inline __m128d
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_mask_expand_pd (__m128d __W, __mmask8 __U, __m128d __A)
+{
+ return (__m128d) __builtin_ia32_expanddf128_mask ((__v2df) __A,
+ (__v2df) __W,
+ (__mmask8) __U);
+}
+
+extern __inline __m128d
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_maskz_expand_pd (__mmask8 __U, __m128d __A)
+{
+ return (__m128d) __builtin_ia32_expanddf128_maskz ((__v2df) __A,
+ (__v2df)
+ _mm_setzero_pd (),
+ (__mmask8) __U);
+}
+
+extern __inline __m128d
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_mask_expandloadu_pd (__m128d __W, __mmask8 __U, void const *__P)
+{
+ return (__m128d) __builtin_ia32_expandloaddf128_mask ((__v2df *) __P,
+ (__v2df) __W,
+ (__mmask8)
+ __U);
+}
+
+extern __inline __m128d
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_maskz_expandloadu_pd (__mmask8 __U, void const *__P)
+{
+ return (__m128d) __builtin_ia32_expandloaddf128_maskz ((__v2df *) __P,
+ (__v2df)
+ _mm_setzero_pd (),
+ (__mmask8)
+ __U);
+}
+
+extern __inline __m256
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_mask_expand_ps (__m256 __W, __mmask8 __U, __m256 __A)
+{
+ return (__m256) __builtin_ia32_expandsf256_mask ((__v8sf) __A,
+ (__v8sf) __W,
+ (__mmask8) __U);
+}
+
+extern __inline __m256
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_maskz_expand_ps (__mmask8 __U, __m256 __A)
+{
+ return (__m256) __builtin_ia32_expandsf256_maskz ((__v8sf) __A,
+ (__v8sf)
+ _mm256_setzero_ps (),
+ (__mmask8) __U);
+}
+
+extern __inline __m256
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_mask_expandloadu_ps (__m256 __W, __mmask8 __U, void const *__P)
+{
+ return (__m256) __builtin_ia32_expandloadsf256_mask ((__v8sf *) __P,
+ (__v8sf) __W,
+ (__mmask8) __U);
+}
+
+extern __inline __m256
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_maskz_expandloadu_ps (__mmask8 __U, void const *__P)
+{
+ return (__m256) __builtin_ia32_expandloadsf256_maskz ((__v8sf *) __P,
+ (__v8sf)
+ _mm256_setzero_ps (),
+ (__mmask8)
+ __U);
+}
+
+extern __inline __m128
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_mask_expand_ps (__m128 __W, __mmask8 __U, __m128 __A)
+{
+ return (__m128) __builtin_ia32_expandsf128_mask ((__v4sf) __A,
+ (__v4sf) __W,
+ (__mmask8) __U);
+}
+
+extern __inline __m128
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_maskz_expand_ps (__mmask8 __U, __m128 __A)
+{
+ return (__m128) __builtin_ia32_expandsf128_maskz ((__v4sf) __A,
+ (__v4sf)
+ _mm_setzero_ps (),
+ (__mmask8) __U);
+}
+
+extern __inline __m128
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_mask_expandloadu_ps (__m128 __W, __mmask8 __U, void const *__P)
+{
+ return (__m128) __builtin_ia32_expandloadsf128_mask ((__v4sf *) __P,
+ (__v4sf) __W,
+ (__mmask8) __U);
+}
+
+extern __inline __m128
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_maskz_expandloadu_ps (__mmask8 __U, void const *__P)
+{
+ return (__m128) __builtin_ia32_expandloadsf128_maskz ((__v4sf *) __P,
+ (__v4sf)
+ _mm_setzero_ps (),
+ (__mmask8)
+ __U);
+}
+
+extern __inline __m256i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_mask_expand_epi64 (__m256i __W, __mmask8 __U, __m256i __A)
+{
+ return (__m256i) __builtin_ia32_expanddi256_mask ((__v4di) __A,
+ (__v4di) __W,
+ (__mmask8) __U);
+}
+
+extern __inline __m256i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_maskz_expand_epi64 (__mmask8 __U, __m256i __A)
+{
+ return (__m256i) __builtin_ia32_expanddi256_maskz ((__v4di) __A,
+ (__v4di)
+ _mm256_setzero_si256 (),
+ (__mmask8) __U);
+}
+
+extern __inline __m256i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_mask_expandloadu_epi64 (__m256i __W, __mmask8 __U,
+ void const *__P)
+{
+ return (__m256i) __builtin_ia32_expandloaddi256_mask ((__v4di *) __P,
+ (__v4di) __W,
+ (__mmask8)
+ __U);
+}
+
+extern __inline __m256i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_maskz_expandloadu_epi64 (__mmask8 __U, void const *__P)
+{
+ return (__m256i) __builtin_ia32_expandloaddi256_maskz ((__v4di *) __P,
+ (__v4di)
+ _mm256_setzero_si256 (),
+ (__mmask8)
+ __U);
+}
+
+extern __inline __m128i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_mask_expand_epi64 (__m128i __W, __mmask8 __U, __m128i __A)
+{
+ return (__m128i) __builtin_ia32_expanddi128_mask ((__v2di) __A,
+ (__v2di) __W,
+ (__mmask8) __U);
+}
+
+extern __inline __m128i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_maskz_expand_epi64 (__mmask8 __U, __m128i __A)
+{
+ return (__m128i) __builtin_ia32_expanddi128_maskz ((__v2di) __A,
+ (__v2di)
+ _mm_setzero_si128 (),
+ (__mmask8) __U);
+}
+
+extern __inline __m128i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_mask_expandloadu_epi64 (__m128i __W, __mmask8 __U, void const *__P)
+{
+ return (__m128i) __builtin_ia32_expandloaddi128_mask ((__v2di *) __P,
+ (__v2di) __W,
+ (__mmask8)
+ __U);
+}
+
+extern __inline __m128i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_maskz_expandloadu_epi64 (__mmask8 __U, void const *__P)
+{
+ return (__m128i) __builtin_ia32_expandloaddi128_maskz ((__v2di *) __P,
+ (__v2di)
+ _mm_setzero_si128 (),
+ (__mmask8)
+ __U);
+}
+
+extern __inline __m256i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_mask_expand_epi32 (__m256i __W, __mmask8 __U, __m256i __A)
+{
+ return (__m256i) __builtin_ia32_expandsi256_mask ((__v8si) __A,
+ (__v8si) __W,
+ (__mmask8) __U);
+}
+
+extern __inline __m256i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_maskz_expand_epi32 (__mmask8 __U, __m256i __A)
+{
+ return (__m256i) __builtin_ia32_expandsi256_maskz ((__v8si) __A,
+ (__v8si)
+ _mm256_setzero_si256 (),
+ (__mmask8) __U);
+}
+
+extern __inline __m256i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_mask_expandloadu_epi32 (__m256i __W, __mmask8 __U,
+ void const *__P)
+{
+ return (__m256i) __builtin_ia32_expandloadsi256_mask ((__v8si *) __P,
+ (__v8si) __W,
+ (__mmask8)
+ __U);
+}
+
+extern __inline __m256i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_maskz_expandloadu_epi32 (__mmask8 __U, void const *__P)
+{
+ return (__m256i) __builtin_ia32_expandloadsi256_maskz ((__v8si *) __P,
+ (__v8si)
+ _mm256_setzero_si256 (),
+ (__mmask8)
+ __U);
+}
+
+extern __inline __m128i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_mask_expand_epi32 (__m128i __W, __mmask8 __U, __m128i __A)
+{
+ return (__m128i) __builtin_ia32_expandsi128_mask ((__v4si) __A,
+ (__v4si) __W,
+ (__mmask8) __U);
+}
+
+extern __inline __m128i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_maskz_expand_epi32 (__mmask8 __U, __m128i __A)
+{
+ return (__m128i) __builtin_ia32_expandsi128_maskz ((__v4si) __A,
+ (__v4si)
+ _mm_setzero_si128 (),
+ (__mmask8) __U);
+}
+
+extern __inline __m128i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_mask_expandloadu_epi32 (__m128i __W, __mmask8 __U, void const *__P)
+{
+ return (__m128i) __builtin_ia32_expandloadsi128_mask ((__v4si *) __P,
+ (__v4si) __W,
+ (__mmask8)
+ __U);
+}
+
+extern __inline __m128i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_maskz_expandloadu_epi32 (__mmask8 __U, void const *__P)
+{
+ return (__m128i) __builtin_ia32_expandloadsi128_maskz ((__v4si *) __P,
+ (__v4si)
+ _mm_setzero_si128 (),
+ (__mmask8)
+ __U);
+}
+
+extern __inline __m256d
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_permutex2var_pd (__m256d __A, __m256i __I, __m256d __B)
+{
+ return (__m256d) __builtin_ia32_vpermt2varpd256_mask ((__v4di) __I
+ /* idx */ ,
+ (__v4df) __A,
+ (__v4df) __B,
+ (__mmask8) -
+ 1);
+}
+
+extern __inline __m256d
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_mask_permutex2var_pd (__m256d __A, __mmask8 __U, __m256i __I,
+ __m256d __B)
+{
+ return (__m256d) __builtin_ia32_vpermt2varpd256_mask ((__v4di) __I
+ /* idx */ ,
+ (__v4df) __A,
+ (__v4df) __B,
+ (__mmask8)
+ __U);
+}
+
+extern __inline __m256d
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_mask2_permutex2var_pd (__m256d __A, __m256i __I, __mmask8 __U,
+ __m256d __B)
+{
+ return (__m256d) __builtin_ia32_vpermi2varpd256_mask ((__v4df) __A,
+ (__v4di) __I
+ /* idx */ ,
+ (__v4df) __B,
+ (__mmask8)
+ __U);
+}
+
+extern __inline __m256d
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_maskz_permutex2var_pd (__mmask8 __U, __m256d __A, __m256i __I,
+ __m256d __B)
+{
+ return (__m256d) __builtin_ia32_vpermt2varpd256_maskz ((__v4di) __I
+ /* idx */ ,
+ (__v4df) __A,
+ (__v4df) __B,
+ (__mmask8)
+ __U);
+}
+
+extern __inline __m256
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_permutex2var_ps (__m256 __A, __m256i __I, __m256 __B)
+{
+ return (__m256) __builtin_ia32_vpermt2varps256_mask ((__v8si) __I
+ /* idx */ ,
+ (__v8sf) __A,
+ (__v8sf) __B,
+ (__mmask8) -1);
+}
+
+extern __inline __m256
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_mask_permutex2var_ps (__m256 __A, __mmask8 __U, __m256i __I,
+ __m256 __B)
+{
+ return (__m256) __builtin_ia32_vpermt2varps256_mask ((__v8si) __I
+ /* idx */ ,
+ (__v8sf) __A,
+ (__v8sf) __B,
+ (__mmask8) __U);
+}
+
+extern __inline __m256
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_mask2_permutex2var_ps (__m256 __A, __m256i __I, __mmask8 __U,
+ __m256 __B)
+{
+ return (__m256) __builtin_ia32_vpermi2varps256_mask ((__v8sf) __A,
+ (__v8si) __I
+ /* idx */ ,
+ (__v8sf) __B,
+ (__mmask8) __U);
+}
+
+extern __inline __m256
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_maskz_permutex2var_ps (__mmask8 __U, __m256 __A, __m256i __I,
+ __m256 __B)
+{
+ return (__m256) __builtin_ia32_vpermt2varps256_maskz ((__v8si) __I
+ /* idx */ ,
+ (__v8sf) __A,
+ (__v8sf) __B,
+ (__mmask8)
+ __U);
+}
+
+extern __inline __m128i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_permutex2var_epi64 (__m128i __A, __m128i __I, __m128i __B)
+{
+ return (__m128i) __builtin_ia32_vpermt2varq128_mask ((__v2di) __I
+ /* idx */ ,
+ (__v2di) __A,
+ (__v2di) __B,
+ (__mmask8) -1);
+}
+
+extern __inline __m128i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_mask_permutex2var_epi64 (__m128i __A, __mmask8 __U, __m128i __I,
+ __m128i __B)
+{
+ return (__m128i) __builtin_ia32_vpermt2varq128_mask ((__v2di) __I
+ /* idx */ ,
+ (__v2di) __A,
+ (__v2di) __B,
+ (__mmask8) __U);
+}
+
+extern __inline __m128i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_mask2_permutex2var_epi64 (__m128i __A, __m128i __I, __mmask8 __U,
+ __m128i __B)
+{
+ return (__m128i) __builtin_ia32_vpermi2varq128_mask ((__v2di) __A,
+ (__v2di) __I
+ /* idx */ ,
+ (__v2di) __B,
+ (__mmask8) __U);
+}
+
+extern __inline __m128i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_maskz_permutex2var_epi64 (__mmask8 __U, __m128i __A, __m128i __I,
+ __m128i __B)
+{
+ return (__m128i) __builtin_ia32_vpermt2varq128_maskz ((__v2di) __I
+ /* idx */ ,
+ (__v2di) __A,
+ (__v2di) __B,
+ (__mmask8)
+ __U);
+}
+
+extern __inline __m128i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_permutex2var_epi32 (__m128i __A, __m128i __I, __m128i __B)
+{
+ return (__m128i) __builtin_ia32_vpermt2vard128_mask ((__v4si) __I
+ /* idx */ ,
+ (__v4si) __A,
+ (__v4si) __B,
+ (__mmask8) -1);
+}
+
+extern __inline __m128i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_mask_permutex2var_epi32 (__m128i __A, __mmask8 __U, __m128i __I,
+ __m128i __B)
+{
+ return (__m128i) __builtin_ia32_vpermt2vard128_mask ((__v4si) __I
+ /* idx */ ,
+ (__v4si) __A,
+ (__v4si) __B,
+ (__mmask8) __U);
+}
+
+extern __inline __m128i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_mask2_permutex2var_epi32 (__m128i __A, __m128i __I, __mmask8 __U,
+ __m128i __B)
+{
+ return (__m128i) __builtin_ia32_vpermi2vard128_mask ((__v4si) __A,
+ (__v4si) __I
+ /* idx */ ,
+ (__v4si) __B,
+ (__mmask8) __U);
+}
+
+extern __inline __m128i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_maskz_permutex2var_epi32 (__mmask8 __U, __m128i __A, __m128i __I,
+ __m128i __B)
+{
+ return (__m128i) __builtin_ia32_vpermt2vard128_maskz ((__v4si) __I
+ /* idx */ ,
+ (__v4si) __A,
+ (__v4si) __B,
+ (__mmask8)
+ __U);
+}
+
+extern __inline __m256i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_permutex2var_epi64 (__m256i __A, __m256i __I, __m256i __B)
+{
+ return (__m256i) __builtin_ia32_vpermt2varq256_mask ((__v4di) __I
+ /* idx */ ,
+ (__v4di) __A,
+ (__v4di) __B,
+ (__mmask8) -1);
+}
+
+extern __inline __m256i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_mask_permutex2var_epi64 (__m256i __A, __mmask8 __U, __m256i __I,
+ __m256i __B)
+{
+ return (__m256i) __builtin_ia32_vpermt2varq256_mask ((__v4di) __I
+ /* idx */ ,
+ (__v4di) __A,
+ (__v4di) __B,
+ (__mmask8) __U);
+}
+
+extern __inline __m256i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_mask2_permutex2var_epi64 (__m256i __A, __m256i __I,
+ __mmask8 __U, __m256i __B)
+{
+ return (__m256i) __builtin_ia32_vpermi2varq256_mask ((__v4di) __A,
+ (__v4di) __I
+ /* idx */ ,
+ (__v4di) __B,
+ (__mmask8) __U);
+}
+
+extern __inline __m256i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_maskz_permutex2var_epi64 (__mmask8 __U, __m256i __A,
+ __m256i __I, __m256i __B)
+{
+ return (__m256i) __builtin_ia32_vpermt2varq256_maskz ((__v4di) __I
+ /* idx */ ,
+ (__v4di) __A,
+ (__v4di) __B,
+ (__mmask8)
+ __U);
+}
+
+extern __inline __m256i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_permutex2var_epi32 (__m256i __A, __m256i __I, __m256i __B)
+{
+ return (__m256i) __builtin_ia32_vpermt2vard256_mask ((__v8si) __I
+ /* idx */ ,
+ (__v8si) __A,
+ (__v8si) __B,
+ (__mmask8) -1);
+}
+
+extern __inline __m256i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_mask_permutex2var_epi32 (__m256i __A, __mmask8 __U, __m256i __I,
+ __m256i __B)
+{
+ return (__m256i) __builtin_ia32_vpermt2vard256_mask ((__v8si) __I
+ /* idx */ ,
+ (__v8si) __A,
+ (__v8si) __B,
+ (__mmask8) __U);
+}
+
+extern __inline __m256i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_mask2_permutex2var_epi32 (__m256i __A, __m256i __I,
+ __mmask8 __U, __m256i __B)
+{
+ return (__m256i) __builtin_ia32_vpermi2vard256_mask ((__v8si) __A,
+ (__v8si) __I
+ /* idx */ ,
+ (__v8si) __B,
+ (__mmask8) __U);
+}
+
+extern __inline __m256i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_maskz_permutex2var_epi32 (__mmask8 __U, __m256i __A,
+ __m256i __I, __m256i __B)
+{
+ return (__m256i) __builtin_ia32_vpermt2vard256_maskz ((__v8si) __I
+ /* idx */ ,
+ (__v8si) __A,
+ (__v8si) __B,
+ (__mmask8)
+ __U);
+}
+
+extern __inline __m128d
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_permutex2var_pd (__m128d __A, __m128i __I, __m128d __B)
+{
+ return (__m128d) __builtin_ia32_vpermt2varpd128_mask ((__v2di) __I
+ /* idx */ ,
+ (__v2df) __A,
+ (__v2df) __B,
+ (__mmask8) -
+ 1);
+}
+
+extern __inline __m128d
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_mask_permutex2var_pd (__m128d __A, __mmask8 __U, __m128i __I,
+ __m128d __B)
+{
+ return (__m128d) __builtin_ia32_vpermt2varpd128_mask ((__v2di) __I
+ /* idx */ ,
+ (__v2df) __A,
+ (__v2df) __B,
+ (__mmask8)
+ __U);
+}
+
+extern __inline __m128d
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_mask2_permutex2var_pd (__m128d __A, __m128i __I, __mmask8 __U,
+ __m128d __B)
+{
+ return (__m128d) __builtin_ia32_vpermi2varpd128_mask ((__v2df) __A,
+ (__v2di) __I
+ /* idx */ ,
+ (__v2df) __B,
+ (__mmask8)
+ __U);
+}
+
+extern __inline __m128d
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_maskz_permutex2var_pd (__mmask8 __U, __m128d __A, __m128i __I,
+ __m128d __B)
+{
+ return (__m128d) __builtin_ia32_vpermt2varpd128_maskz ((__v2di) __I
+ /* idx */ ,
+ (__v2df) __A,
+ (__v2df) __B,
+ (__mmask8)
+ __U);
+}
+
+extern __inline __m128
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_permutex2var_ps (__m128 __A, __m128i __I, __m128 __B)
+{
+ return (__m128) __builtin_ia32_vpermt2varps128_mask ((__v4si) __I
+ /* idx */ ,
+ (__v4sf) __A,
+ (__v4sf) __B,
+ (__mmask8) -1);
+}
+
+extern __inline __m128
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_mask_permutex2var_ps (__m128 __A, __mmask8 __U, __m128i __I,
+ __m128 __B)
+{
+ return (__m128) __builtin_ia32_vpermt2varps128_mask ((__v4si) __I
+ /* idx */ ,
+ (__v4sf) __A,
+ (__v4sf) __B,
+ (__mmask8) __U);
+}
+
+extern __inline __m128
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_mask2_permutex2var_ps (__m128 __A, __m128i __I, __mmask8 __U,
+ __m128 __B)
+{
+ return (__m128) __builtin_ia32_vpermi2varps128_mask ((__v4sf) __A,
+ (__v4si) __I
+ /* idx */ ,
+ (__v4sf) __B,
+ (__mmask8) __U);
+}
+
+extern __inline __m128
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_maskz_permutex2var_ps (__mmask8 __U, __m128 __A, __m128i __I,
+ __m128 __B)
+{
+ return (__m128) __builtin_ia32_vpermt2varps128_maskz ((__v4si) __I
+ /* idx */ ,
+ (__v4sf) __A,
+ (__v4sf) __B,
+ (__mmask8)
+ __U);
+}
+
+extern __inline __m128i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_srav_epi64 (__m128i __X, __m128i __Y)
+{
+ return (__m128i) __builtin_ia32_psravq128_mask ((__v2di) __X,
+ (__v2di) __Y,
+ (__v2di)
+ _mm_setzero_di (),
+ (__mmask8) -1);
+}
+
+extern __inline __m128i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_mask_srav_epi64 (__m128i __W, __mmask8 __U, __m128i __X,
+ __m128i __Y)
+{
+ return (__m128i) __builtin_ia32_psravq128_mask ((__v2di) __X,
+ (__v2di) __Y,
+ (__v2di) __W,
+ (__mmask8) __U);
+}
+
+extern __inline __m128i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_maskz_srav_epi64 (__mmask8 __U, __m128i __X, __m128i __Y)
+{
+ return (__m128i) __builtin_ia32_psravq128_mask ((__v2di) __X,
+ (__v2di) __Y,
+ (__v2di)
+ _mm_setzero_di (),
+ (__mmask8) __U);
+}
+
+extern __inline __m256i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_mask_sllv_epi32 (__m256i __W, __mmask8 __U, __m256i __X,
+ __m256i __Y)
+{
+ return (__m256i) __builtin_ia32_psllv8si_mask ((__v8si) __X,
+ (__v8si) __Y,
+ (__v8si) __W,
+ (__mmask8) __U);
+}
+
+extern __inline __m256i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_maskz_sllv_epi32 (__mmask8 __U, __m256i __X, __m256i __Y)
+{
+ return (__m256i) __builtin_ia32_psllv8si_mask ((__v8si) __X,
+ (__v8si) __Y,
+ (__v8si)
+ _mm256_setzero_si256 (),
+ (__mmask8) __U);
+}
+
+extern __inline __m128i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_mask_sllv_epi32 (__m128i __W, __mmask8 __U, __m128i __X,
+ __m128i __Y)
+{
+ return (__m128i) __builtin_ia32_psllv4si_mask ((__v4si) __X,
+ (__v4si) __Y,
+ (__v4si) __W,
+ (__mmask8) __U);
+}
+
+extern __inline __m128i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_maskz_sllv_epi32 (__mmask8 __U, __m128i __X, __m128i __Y)
+{
+ return (__m128i) __builtin_ia32_psllv4si_mask ((__v4si) __X,
+ (__v4si) __Y,
+ (__v4si)
+ _mm_setzero_si128 (),
+ (__mmask8) __U);
+}
+
+extern __inline __m256i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_mask_sllv_epi64 (__m256i __W, __mmask8 __U, __m256i __X,
+ __m256i __Y)
+{
+ return (__m256i) __builtin_ia32_psllv4di_mask ((__v4di) __X,
+ (__v4di) __Y,
+ (__v4di) __W,
+ (__mmask8) __U);
+}
+
+extern __inline __m256i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_maskz_sllv_epi64 (__mmask8 __U, __m256i __X, __m256i __Y)
+{
+ return (__m256i) __builtin_ia32_psllv4di_mask ((__v4di) __X,
+ (__v4di) __Y,
+ (__v4di)
+ _mm256_setzero_si256 (),
+ (__mmask8) __U);
+}
+
+extern __inline __m128i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_mask_sllv_epi64 (__m128i __W, __mmask8 __U, __m128i __X,
+ __m128i __Y)
+{
+ return (__m128i) __builtin_ia32_psllv2di_mask ((__v2di) __X,
+ (__v2di) __Y,
+ (__v2di) __W,
+ (__mmask8) __U);
+}
+
+extern __inline __m128i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_maskz_sllv_epi64 (__mmask8 __U, __m128i __X, __m128i __Y)
+{
+ return (__m128i) __builtin_ia32_psllv2di_mask ((__v2di) __X,
+ (__v2di) __Y,
+ (__v2di)
+ _mm_setzero_di (),
+ (__mmask8) __U);
+}
+
+extern __inline __m256i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_mask_srav_epi32 (__m256i __W, __mmask8 __U, __m256i __X,
+ __m256i __Y)
+{
+ return (__m256i) __builtin_ia32_psrav8si_mask ((__v8si) __X,
+ (__v8si) __Y,
+ (__v8si) __W,
+ (__mmask8) __U);
+}
+
+extern __inline __m256i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_maskz_srav_epi32 (__mmask8 __U, __m256i __X, __m256i __Y)
+{
+ return (__m256i) __builtin_ia32_psrav8si_mask ((__v8si) __X,
+ (__v8si) __Y,
+ (__v8si)
+ _mm256_setzero_si256 (),
+ (__mmask8) __U);
+}
+
+extern __inline __m128i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_mask_srav_epi32 (__m128i __W, __mmask8 __U, __m128i __X,
+ __m128i __Y)
+{
+ return (__m128i) __builtin_ia32_psrav4si_mask ((__v4si) __X,
+ (__v4si) __Y,
+ (__v4si) __W,
+ (__mmask8) __U);
+}
+
+extern __inline __m128i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_maskz_srav_epi32 (__mmask8 __U, __m128i __X, __m128i __Y)
+{
+ return (__m128i) __builtin_ia32_psrav4si_mask ((__v4si) __X,
+ (__v4si) __Y,
+ (__v4si)
+ _mm_setzero_si128 (),
+ (__mmask8) __U);
+}
+
+extern __inline __m256i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_mask_srlv_epi32 (__m256i __W, __mmask8 __U, __m256i __X,
+ __m256i __Y)
+{
+ return (__m256i) __builtin_ia32_psrlv8si_mask ((__v8si) __X,
+ (__v8si) __Y,
+ (__v8si) __W,
+ (__mmask8) __U);
+}
+
+extern __inline __m256i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_maskz_srlv_epi32 (__mmask8 __U, __m256i __X, __m256i __Y)
+{
+ return (__m256i) __builtin_ia32_psrlv8si_mask ((__v8si) __X,
+ (__v8si) __Y,
+ (__v8si)
+ _mm256_setzero_si256 (),
+ (__mmask8) __U);
+}
+
+extern __inline __m128i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_mask_srlv_epi32 (__m128i __W, __mmask8 __U, __m128i __X,
+ __m128i __Y)
+{
+ return (__m128i) __builtin_ia32_psrlv4si_mask ((__v4si) __X,
+ (__v4si) __Y,
+ (__v4si) __W,
+ (__mmask8) __U);
+}
+
+extern __inline __m128i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_maskz_srlv_epi32 (__mmask8 __U, __m128i __X, __m128i __Y)
+{
+ return (__m128i) __builtin_ia32_psrlv4si_mask ((__v4si) __X,
+ (__v4si) __Y,
+ (__v4si)
+ _mm_setzero_si128 (),
+ (__mmask8) __U);
+}
+
+extern __inline __m256i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_mask_srlv_epi64 (__m256i __W, __mmask8 __U, __m256i __X,
+ __m256i __Y)
+{
+ return (__m256i) __builtin_ia32_psrlv4di_mask ((__v4di) __X,
+ (__v4di) __Y,
+ (__v4di) __W,
+ (__mmask8) __U);
+}
+
+extern __inline __m256i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_maskz_srlv_epi64 (__mmask8 __U, __m256i __X, __m256i __Y)
+{
+ return (__m256i) __builtin_ia32_psrlv4di_mask ((__v4di) __X,
+ (__v4di) __Y,
+ (__v4di)
+ _mm256_setzero_si256 (),
+ (__mmask8) __U);
+}
+
+extern __inline __m128i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_mask_srlv_epi64 (__m128i __W, __mmask8 __U, __m128i __X,
+ __m128i __Y)
+{
+ return (__m128i) __builtin_ia32_psrlv2di_mask ((__v2di) __X,
+ (__v2di) __Y,
+ (__v2di) __W,
+ (__mmask8) __U);
+}
+
+extern __inline __m128i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_maskz_srlv_epi64 (__mmask8 __U, __m128i __X, __m128i __Y)
+{
+ return (__m128i) __builtin_ia32_psrlv2di_mask ((__v2di) __X,
+ (__v2di) __Y,
+ (__v2di)
+ _mm_setzero_di (),
+ (__mmask8) __U);
+}
+
+extern __inline __m256i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_rolv_epi32 (__m256i __A, __m256i __B)
+{
+ return (__m256i) __builtin_ia32_prolvd256_mask ((__v8si) __A,
+ (__v8si) __B,
+ (__v8si)
+ _mm256_setzero_si256 (),
+ (__mmask8) -1);
+}
+
+extern __inline __m256i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_mask_rolv_epi32 (__m256i __W, __mmask8 __U, __m256i __A,
+ __m256i __B)
+{
+ return (__m256i) __builtin_ia32_prolvd256_mask ((__v8si) __A,
+ (__v8si) __B,
+ (__v8si) __W,
+ (__mmask8) __U);
+}
+
+extern __inline __m256i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_maskz_rolv_epi32 (__mmask8 __U, __m256i __A, __m256i __B)
+{
+ return (__m256i) __builtin_ia32_prolvd256_mask ((__v8si) __A,
+ (__v8si) __B,
+ (__v8si)
+ _mm256_setzero_si256 (),
+ (__mmask8) __U);
+}
+
+extern __inline __m128i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_rolv_epi32 (__m128i __A, __m128i __B)
+{
+ return (__m128i) __builtin_ia32_prolvd128_mask ((__v4si) __A,
+ (__v4si) __B,
+ (__v4si)
+ _mm_setzero_si128 (),
+ (__mmask8) -1);
+}
+
+extern __inline __m128i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_mask_rolv_epi32 (__m128i __W, __mmask8 __U, __m128i __A,
+ __m128i __B)
+{
+ return (__m128i) __builtin_ia32_prolvd128_mask ((__v4si) __A,
+ (__v4si) __B,
+ (__v4si) __W,
+ (__mmask8) __U);
+}
+
+extern __inline __m128i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_maskz_rolv_epi32 (__mmask8 __U, __m128i __A, __m128i __B)
+{
+ return (__m128i) __builtin_ia32_prolvd128_mask ((__v4si) __A,
+ (__v4si) __B,
+ (__v4si)
+ _mm_setzero_si128 (),
+ (__mmask8) __U);
+}
+
+extern __inline __m256i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_rorv_epi32 (__m256i __A, __m256i __B)
+{
+ return (__m256i) __builtin_ia32_prorvd256_mask ((__v8si) __A,
+ (__v8si) __B,
+ (__v8si)
+ _mm256_setzero_si256 (),
+ (__mmask8) -1);
+}
+
+extern __inline __m256i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_mask_rorv_epi32 (__m256i __W, __mmask8 __U, __m256i __A,
+ __m256i __B)
+{
+ return (__m256i) __builtin_ia32_prorvd256_mask ((__v8si) __A,
+ (__v8si) __B,
+ (__v8si) __W,
+ (__mmask8) __U);
+}
+
+extern __inline __m256i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_maskz_rorv_epi32 (__mmask8 __U, __m256i __A, __m256i __B)
+{
+ return (__m256i) __builtin_ia32_prorvd256_mask ((__v8si) __A,
+ (__v8si) __B,
+ (__v8si)
+ _mm256_setzero_si256 (),
+ (__mmask8) __U);
+}
+
+extern __inline __m128i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_rorv_epi32 (__m128i __A, __m128i __B)
+{
+ return (__m128i) __builtin_ia32_prorvd128_mask ((__v4si) __A,
+ (__v4si) __B,
+ (__v4si)
+ _mm_setzero_si128 (),
+ (__mmask8) -1);
+}
+
+extern __inline __m128i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_mask_rorv_epi32 (__m128i __W, __mmask8 __U, __m128i __A,
+ __m128i __B)
+{
+ return (__m128i) __builtin_ia32_prorvd128_mask ((__v4si) __A,
+ (__v4si) __B,
+ (__v4si) __W,
+ (__mmask8) __U);
+}
+
+extern __inline __m128i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_maskz_rorv_epi32 (__mmask8 __U, __m128i __A, __m128i __B)
+{
+ return (__m128i) __builtin_ia32_prorvd128_mask ((__v4si) __A,
+ (__v4si) __B,
+ (__v4si)
+ _mm_setzero_si128 (),
+ (__mmask8) __U);
+}
+
+extern __inline __m256i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_rolv_epi64 (__m256i __A, __m256i __B)
+{
+ return (__m256i) __builtin_ia32_prolvq256_mask ((__v4di) __A,
+ (__v4di) __B,
+ (__v4di)
+ _mm256_setzero_si256 (),
+ (__mmask8) -1);
+}
+
+extern __inline __m256i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_mask_rolv_epi64 (__m256i __W, __mmask8 __U, __m256i __A,
+ __m256i __B)
+{
+ return (__m256i) __builtin_ia32_prolvq256_mask ((__v4di) __A,
+ (__v4di) __B,
+ (__v4di) __W,
+ (__mmask8) __U);
+}
+
+extern __inline __m256i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_maskz_rolv_epi64 (__mmask8 __U, __m256i __A, __m256i __B)
+{
+ return (__m256i) __builtin_ia32_prolvq256_mask ((__v4di) __A,
+ (__v4di) __B,
+ (__v4di)
+ _mm256_setzero_si256 (),
+ (__mmask8) __U);
+}
+
+extern __inline __m128i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_rolv_epi64 (__m128i __A, __m128i __B)
+{
+ return (__m128i) __builtin_ia32_prolvq128_mask ((__v2di) __A,
+ (__v2di) __B,
+ (__v2di)
+ _mm_setzero_di (),
+ (__mmask8) -1);
+}
+
+extern __inline __m128i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_mask_rolv_epi64 (__m128i __W, __mmask8 __U, __m128i __A,
+ __m128i __B)
+{
+ return (__m128i) __builtin_ia32_prolvq128_mask ((__v2di) __A,
+ (__v2di) __B,
+ (__v2di) __W,
+ (__mmask8) __U);
+}
+
+extern __inline __m128i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_maskz_rolv_epi64 (__mmask8 __U, __m128i __A, __m128i __B)
+{
+ return (__m128i) __builtin_ia32_prolvq128_mask ((__v2di) __A,
+ (__v2di) __B,
+ (__v2di)
+ _mm_setzero_di (),
+ (__mmask8) __U);
+}
+
+extern __inline __m256i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_rorv_epi64 (__m256i __A, __m256i __B)
+{
+ return (__m256i) __builtin_ia32_prorvq256_mask ((__v4di) __A,
+ (__v4di) __B,
+ (__v4di)
+ _mm256_setzero_si256 (),
+ (__mmask8) -1);
+}
+
+extern __inline __m256i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_mask_rorv_epi64 (__m256i __W, __mmask8 __U, __m256i __A,
+ __m256i __B)
+{
+ return (__m256i) __builtin_ia32_prorvq256_mask ((__v4di) __A,
+ (__v4di) __B,
+ (__v4di) __W,
+ (__mmask8) __U);
+}
+
+extern __inline __m256i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_maskz_rorv_epi64 (__mmask8 __U, __m256i __A, __m256i __B)
+{
+ return (__m256i) __builtin_ia32_prorvq256_mask ((__v4di) __A,
+ (__v4di) __B,
+ (__v4di)
+ _mm256_setzero_si256 (),
+ (__mmask8) __U);
+}
+
+extern __inline __m128i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_rorv_epi64 (__m128i __A, __m128i __B)
+{
+ return (__m128i) __builtin_ia32_prorvq128_mask ((__v2di) __A,
+ (__v2di) __B,
+ (__v2di)
+ _mm_setzero_di (),
+ (__mmask8) -1);
+}
+
+extern __inline __m128i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_mask_rorv_epi64 (__m128i __W, __mmask8 __U, __m128i __A,
+ __m128i __B)
+{
+ return (__m128i) __builtin_ia32_prorvq128_mask ((__v2di) __A,
+ (__v2di) __B,
+ (__v2di) __W,
+ (__mmask8) __U);
+}
+
+extern __inline __m128i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_maskz_rorv_epi64 (__mmask8 __U, __m128i __A, __m128i __B)
+{
+ return (__m128i) __builtin_ia32_prorvq128_mask ((__v2di) __A,
+ (__v2di) __B,
+ (__v2di)
+ _mm_setzero_di (),
+ (__mmask8) __U);
+}
+
+extern __inline __m256i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_srav_epi64 (__m256i __X, __m256i __Y)
+{
+ return (__m256i) __builtin_ia32_psravq256_mask ((__v4di) __X,
+ (__v4di) __Y,
+ (__v4di)
+ _mm256_setzero_si256 (),
+ (__mmask8) -1);
+}
+
+extern __inline __m256i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_mask_srav_epi64 (__m256i __W, __mmask8 __U, __m256i __X,
+ __m256i __Y)
+{
+ return (__m256i) __builtin_ia32_psravq256_mask ((__v4di) __X,
+ (__v4di) __Y,
+ (__v4di) __W,
+ (__mmask8) __U);
+}
+
+extern __inline __m256i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_maskz_srav_epi64 (__mmask8 __U, __m256i __X, __m256i __Y)
+{
+ return (__m256i) __builtin_ia32_psravq256_mask ((__v4di) __X,
+ (__v4di) __Y,
+ (__v4di)
+ _mm256_setzero_si256 (),
+ (__mmask8) __U);
+}
+
+extern __inline __m256i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_mask_and_epi64 (__m256i __W, __mmask8 __U, __m256i __A,
+ __m256i __B)
+{
+ return (__m256i) __builtin_ia32_pandq256_mask ((__v4di) __A,
+ (__v4di) __B,
+ (__v4di) __W, __U);
+}
+
+extern __inline __m256i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_maskz_and_epi64 (__mmask8 __U, __m256i __A, __m256i __B)
+{
+ return (__m256i) __builtin_ia32_pandq256_mask ((__v4di) __A,
+ (__v4di) __B,
+ (__v4di)
+ _mm256_setzero_pd (),
+ __U);
+}
+
+extern __inline __m128i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_mask_and_epi64 (__m128i __W, __mmask8 __U, __m128i __A,
+ __m128i __B)
+{
+ return (__m128i) __builtin_ia32_pandq128_mask ((__v2di) __A,
+ (__v2di) __B,
+ (__v2di) __W, __U);
+}
+
+extern __inline __m128i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_maskz_and_epi64 (__mmask8 __U, __m128i __A, __m128i __B)
+{
+ return (__m128i) __builtin_ia32_pandq128_mask ((__v2di) __A,
+ (__v2di) __B,
+ (__v2di)
+ _mm_setzero_pd (),
+ __U);
+}
+
+extern __inline __m256i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_mask_andnot_epi64 (__m256i __W, __mmask8 __U, __m256i __A,
+ __m256i __B)
+{
+ return (__m256i) __builtin_ia32_pandnq256_mask ((__v4di) __A,
+ (__v4di) __B,
+ (__v4di) __W, __U);
+}
+
+extern __inline __m256i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_maskz_andnot_epi64 (__mmask8 __U, __m256i __A, __m256i __B)
+{
+ return (__m256i) __builtin_ia32_pandnq256_mask ((__v4di) __A,
+ (__v4di) __B,
+ (__v4di)
+ _mm256_setzero_pd (),
+ __U);
+}
+
+extern __inline __m128i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_mask_andnot_epi64 (__m128i __W, __mmask8 __U, __m128i __A,
+ __m128i __B)
+{
+ return (__m128i) __builtin_ia32_pandnq128_mask ((__v2di) __A,
+ (__v2di) __B,
+ (__v2di) __W, __U);
+}
+
+extern __inline __m128i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_maskz_andnot_epi64 (__mmask8 __U, __m128i __A, __m128i __B)
+{
+ return (__m128i) __builtin_ia32_pandnq128_mask ((__v2di) __A,
+ (__v2di) __B,
+ (__v2di)
+ _mm_setzero_pd (),
+ __U);
+}
+
+extern __inline __m256i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_mask_or_epi64 (__m256i __W, __mmask8 __U, __m256i __A,
+ __m256i __B)
+{
+ return (__m256i) __builtin_ia32_porq256_mask ((__v4di) __A,
+ (__v4di) __B,
+ (__v4di) __W,
+ (__mmask8) __U);
+}
+
+extern __inline __m256i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_maskz_or_epi64 (__mmask8 __U, __m256i __A, __m256i __B)
+{
+ return (__m256i) __builtin_ia32_porq256_mask ((__v4di) __A,
+ (__v4di) __B,
+ (__v4di)
+ _mm256_setzero_si256 (),
+ (__mmask8) __U);
+}
+
+extern __inline __m128i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_mask_or_epi64 (__m128i __W, __mmask8 __U, __m128i __A, __m128i __B)
+{
+ return (__m128i) __builtin_ia32_porq128_mask ((__v2di) __A,
+ (__v2di) __B,
+ (__v2di) __W,
+ (__mmask8) __U);
+}
+
+extern __inline __m128i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_maskz_or_epi64 (__mmask8 __U, __m128i __A, __m128i __B)
+{
+ return (__m128i) __builtin_ia32_porq128_mask ((__v2di) __A,
+ (__v2di) __B,
+ (__v2di)
+ _mm_setzero_si128 (),
+ (__mmask8) __U);
+}
+
+extern __inline __m256i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_mask_xor_epi64 (__m256i __W, __mmask8 __U, __m256i __A,
+ __m256i __B)
+{
+ return (__m256i) __builtin_ia32_pxorq256_mask ((__v4di) __A,
+ (__v4di) __B,
+ (__v4di) __W,
+ (__mmask8) __U);
+}
+
+extern __inline __m256i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_maskz_xor_epi64 (__mmask8 __U, __m256i __A, __m256i __B)
+{
+ return (__m256i) __builtin_ia32_pxorq256_mask ((__v4di) __A,
+ (__v4di) __B,
+ (__v4di)
+ _mm256_setzero_si256 (),
+ (__mmask8) __U);
+}
+
+extern __inline __m128i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_mask_xor_epi64 (__m128i __W, __mmask8 __U, __m128i __A,
+ __m128i __B)
+{
+ return (__m128i) __builtin_ia32_pxorq128_mask ((__v2di) __A,
+ (__v2di) __B,
+ (__v2di) __W,
+ (__mmask8) __U);
+}
+
+extern __inline __m128i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_maskz_xor_epi64 (__mmask8 __U, __m128i __A, __m128i __B)
+{
+ return (__m128i) __builtin_ia32_pxorq128_mask ((__v2di) __A,
+ (__v2di) __B,
+ (__v2di)
+ _mm_setzero_si128 (),
+ (__mmask8) __U);
+}
+
+extern __inline __m256d
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_mask_max_pd (__m256d __W, __mmask8 __U, __m256d __A,
+ __m256d __B)
+{
+ return (__m256d) __builtin_ia32_maxpd256_mask ((__v4df) __A,
+ (__v4df) __B,
+ (__v4df) __W,
+ (__mmask8) __U);
+}
+
+extern __inline __m256d
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_maskz_max_pd (__mmask8 __U, __m256d __A, __m256d __B)
+{
+ return (__m256d) __builtin_ia32_maxpd256_mask ((__v4df) __A,
+ (__v4df) __B,
+ (__v4df)
+ _mm256_setzero_pd (),
+ (__mmask8) __U);
+}
+
+extern __inline __m256
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_mask_max_ps (__m256 __W, __mmask8 __U, __m256 __A, __m256 __B)
+{
+ return (__m256) __builtin_ia32_maxps256_mask ((__v8sf) __A,
+ (__v8sf) __B,
+ (__v8sf) __W,
+ (__mmask8) __U);
+}
+
+extern __inline __m256
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_maskz_max_ps (__mmask8 __U, __m256 __A, __m256 __B)
+{
+ return (__m256) __builtin_ia32_maxps256_mask ((__v8sf) __A,
+ (__v8sf) __B,
+ (__v8sf)
+ _mm256_setzero_ps (),
+ (__mmask8) __U);
+}
+
+extern __inline __m128
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_mask_div_ps (__m128 __W, __mmask8 __U, __m128 __A, __m128 __B)
+{
+ return (__m128) __builtin_ia32_divps_mask ((__v4sf) __A,
+ (__v4sf) __B,
+ (__v4sf) __W,
+ (__mmask8) __U);
+}
+
+extern __inline __m128
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_maskz_div_ps (__mmask8 __U, __m128 __A, __m128 __B)
+{
+ return (__m128) __builtin_ia32_divps_mask ((__v4sf) __A,
+ (__v4sf) __B,
+ (__v4sf)
+ _mm_setzero_ps (),
+ (__mmask8) __U);
+}
+
+extern __inline __m128d
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_mask_div_pd (__m128d __W, __mmask8 __U, __m128d __A, __m128d __B)
+{
+ return (__m128d) __builtin_ia32_divpd_mask ((__v2df) __A,
+ (__v2df) __B,
+ (__v2df) __W,
+ (__mmask8) __U);
+}
+
+extern __inline __m128d
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_maskz_div_pd (__mmask8 __U, __m128d __A, __m128d __B)
+{
+ return (__m128d) __builtin_ia32_divpd_mask ((__v2df) __A,
+ (__v2df) __B,
+ (__v2df)
+ _mm_setzero_pd (),
+ (__mmask8) __U);
+}
+
+extern __inline __m256d
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_mask_min_pd (__m256d __W, __mmask8 __U, __m256d __A,
+ __m256d __B)
+{
+ return (__m256d) __builtin_ia32_minpd256_mask ((__v4df) __A,
+ (__v4df) __B,
+ (__v4df) __W,
+ (__mmask8) __U);
+}
+
+extern __inline __m256d
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_mask_div_pd (__m256d __W, __mmask8 __U, __m256d __A,
+ __m256d __B)
+{
+ return (__m256d) __builtin_ia32_divpd256_mask ((__v4df) __A,
+ (__v4df) __B,
+ (__v4df) __W,
+ (__mmask8) __U);
+}
+
+extern __inline __m256d
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_maskz_min_pd (__mmask8 __U, __m256d __A, __m256d __B)
+{
+ return (__m256d) __builtin_ia32_minpd256_mask ((__v4df) __A,
+ (__v4df) __B,
+ (__v4df)
+ _mm256_setzero_pd (),
+ (__mmask8) __U);
+}
+
+extern __inline __m256
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_mask_min_ps (__m256 __W, __mmask8 __U, __m256 __A, __m256 __B)
+{
+ return (__m256) __builtin_ia32_minps256_mask ((__v8sf) __A,
+ (__v8sf) __B,
+ (__v8sf) __W,
+ (__mmask8) __U);
+}
+
+extern __inline __m256d
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_maskz_div_pd (__mmask8 __U, __m256d __A, __m256d __B)
+{
+ return (__m256d) __builtin_ia32_divpd256_mask ((__v4df) __A,
+ (__v4df) __B,
+ (__v4df)
+ _mm256_setzero_pd (),
+ (__mmask8) __U);
+}
+
+extern __inline __m256
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_mask_div_ps (__m256 __W, __mmask8 __U, __m256 __A, __m256 __B)
+{
+ return (__m256) __builtin_ia32_divps256_mask ((__v8sf) __A,
+ (__v8sf) __B,
+ (__v8sf) __W,
+ (__mmask8) __U);
+}
+
+extern __inline __m256
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_maskz_min_ps (__mmask8 __U, __m256 __A, __m256 __B)
+{
+ return (__m256) __builtin_ia32_minps256_mask ((__v8sf) __A,
+ (__v8sf) __B,
+ (__v8sf)
+ _mm256_setzero_ps (),
+ (__mmask8) __U);
+}
+
+extern __inline __m256
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_maskz_div_ps (__mmask8 __U, __m256 __A, __m256 __B)
+{
+ return (__m256) __builtin_ia32_divps256_mask ((__v8sf) __A,
+ (__v8sf) __B,
+ (__v8sf)
+ _mm256_setzero_ps (),
+ (__mmask8) __U);
+}
+
+extern __inline __m128
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_mask_min_ps (__m128 __W, __mmask8 __U, __m128 __A, __m128 __B)
+{
+ return (__m128) __builtin_ia32_minps_mask ((__v4sf) __A,
+ (__v4sf) __B,
+ (__v4sf) __W,
+ (__mmask8) __U);
+}
+
+extern __inline __m128
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_mask_mul_ps (__m128 __W, __mmask8 __U, __m128 __A, __m128 __B)
+{
+ return (__m128) __builtin_ia32_mulps_mask ((__v4sf) __A,
+ (__v4sf) __B,
+ (__v4sf) __W,
+ (__mmask8) __U);
+}
+
+extern __inline __m128
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_maskz_min_ps (__mmask8 __U, __m128 __A, __m128 __B)
+{
+ return (__m128) __builtin_ia32_minps_mask ((__v4sf) __A,
+ (__v4sf) __B,
+ (__v4sf)
+ _mm_setzero_ps (),
+ (__mmask8) __U);
+}
+
+extern __inline __m128
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_maskz_mul_ps (__mmask8 __U, __m128 __A, __m128 __B)
+{
+ return (__m128) __builtin_ia32_mulps_mask ((__v4sf) __A,
+ (__v4sf) __B,
+ (__v4sf)
+ _mm_setzero_ps (),
+ (__mmask8) __U);
+}
+
+extern __inline __m128
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_mask_max_ps (__m128 __W, __mmask8 __U, __m128 __A, __m128 __B)
+{
+ return (__m128) __builtin_ia32_maxps_mask ((__v4sf) __A,
+ (__v4sf) __B,
+ (__v4sf) __W,
+ (__mmask8) __U);
+}
+
+extern __inline __m128
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_maskz_max_ps (__mmask8 __U, __m128 __A, __m128 __B)
+{
+ return (__m128) __builtin_ia32_maxps_mask ((__v4sf) __A,
+ (__v4sf) __B,
+ (__v4sf)
+ _mm_setzero_ps (),
+ (__mmask8) __U);
+}
+
+extern __inline __m128d
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_mask_min_pd (__m128d __W, __mmask8 __U, __m128d __A, __m128d __B)
+{
+ return (__m128d) __builtin_ia32_minpd_mask ((__v2df) __A,
+ (__v2df) __B,
+ (__v2df) __W,
+ (__mmask8) __U);
+}
+
+extern __inline __m128d
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_maskz_min_pd (__mmask8 __U, __m128d __A, __m128d __B)
+{
+ return (__m128d) __builtin_ia32_minpd_mask ((__v2df) __A,
+ (__v2df) __B,
+ (__v2df)
+ _mm_setzero_pd (),
+ (__mmask8) __U);
+}
+
+extern __inline __m128d
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_mask_max_pd (__m128d __W, __mmask8 __U, __m128d __A, __m128d __B)
+{
+ return (__m128d) __builtin_ia32_maxpd_mask ((__v2df) __A,
+ (__v2df) __B,
+ (__v2df) __W,
+ (__mmask8) __U);
+}
+
+extern __inline __m128d
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_maskz_max_pd (__mmask8 __U, __m128d __A, __m128d __B)
+{
+ return (__m128d) __builtin_ia32_maxpd_mask ((__v2df) __A,
+ (__v2df) __B,
+ (__v2df)
+ _mm_setzero_pd (),
+ (__mmask8) __U);
+}
+
+extern __inline __m128d
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_mask_mul_pd (__m128d __W, __mmask8 __U, __m128d __A, __m128d __B)
+{
+ return (__m128d) __builtin_ia32_mulpd_mask ((__v2df) __A,
+ (__v2df) __B,
+ (__v2df) __W,
+ (__mmask8) __U);
+}
+
+extern __inline __m128d
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_maskz_mul_pd (__mmask8 __U, __m128d __A, __m128d __B)
+{
+ return (__m128d) __builtin_ia32_mulpd_mask ((__v2df) __A,
+ (__v2df) __B,
+ (__v2df)
+ _mm_setzero_pd (),
+ (__mmask8) __U);
+}
+
+extern __inline __m256
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_mask_mul_ps (__m256 __W, __mmask8 __U, __m256 __A, __m256 __B)
+{
+ return (__m256) __builtin_ia32_mulps256_mask ((__v8sf) __A,
+ (__v8sf) __B,
+ (__v8sf) __W,
+ (__mmask8) __U);
+}
+
+extern __inline __m256
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_maskz_mul_ps (__mmask8 __U, __m256 __A, __m256 __B)
+{
+ return (__m256) __builtin_ia32_mulps256_mask ((__v8sf) __A,
+ (__v8sf) __B,
+ (__v8sf)
+ _mm256_setzero_ps (),
+ (__mmask8) __U);
+}
+
+extern __inline __m256d
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_mask_mul_pd (__m256d __W, __mmask8 __U, __m256d __A,
+ __m256d __B)
+{
+ return (__m256d) __builtin_ia32_mulpd256_mask ((__v4df) __A,
+ (__v4df) __B,
+ (__v4df) __W,
+ (__mmask8) __U);
+}
+
+extern __inline __m256d
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_maskz_mul_pd (__mmask8 __U, __m256d __A, __m256d __B)
+{
+ return (__m256d) __builtin_ia32_mulpd256_mask ((__v4df) __A,
+ (__v4df) __B,
+ (__v4df)
+ _mm256_setzero_pd (),
+ (__mmask8) __U);
+}
+
+extern __inline __m256i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_maskz_max_epi64 (__mmask8 __M, __m256i __A, __m256i __B)
+{
+ return (__m256i) __builtin_ia32_pmaxsq256_mask ((__v4di) __A,
+ (__v4di) __B,
+ (__v4di)
+ _mm256_setzero_si256 (),
+ __M);
+}
+
+extern __inline __m256i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_mask_max_epi64 (__m256i __W, __mmask8 __M, __m256i __A,
+ __m256i __B)
+{
+ return (__m256i) __builtin_ia32_pmaxsq256_mask ((__v4di) __A,
+ (__v4di) __B,
+ (__v4di) __W, __M);
+}
+
+extern __inline __m256i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_min_epi64 (__m256i __A, __m256i __B)
+{
+ return (__m256i) __builtin_ia32_pminsq256_mask ((__v4di) __A,
+ (__v4di) __B,
+ (__v4di)
+ _mm256_setzero_si256 (),
+ (__mmask8) -1);
+}
+
+extern __inline __m256i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_mask_min_epi64 (__m256i __W, __mmask8 __M, __m256i __A,
+ __m256i __B)
+{
+ return (__m256i) __builtin_ia32_pminsq256_mask ((__v4di) __A,
+ (__v4di) __B,
+ (__v4di) __W, __M);
+}
+
+extern __inline __m256i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_maskz_min_epi64 (__mmask8 __M, __m256i __A, __m256i __B)
+{
+ return (__m256i) __builtin_ia32_pminsq256_mask ((__v4di) __A,
+ (__v4di) __B,
+ (__v4di)
+ _mm256_setzero_si256 (),
+ __M);
+}
+
+extern __inline __m256i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_maskz_max_epu64 (__mmask8 __M, __m256i __A, __m256i __B)
+{
+ return (__m256i) __builtin_ia32_pmaxuq256_mask ((__v4di) __A,
+ (__v4di) __B,
+ (__v4di)
+ _mm256_setzero_si256 (),
+ __M);
+}
+
+extern __inline __m256i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_max_epi64 (__m256i __A, __m256i __B)
+{
+ return (__m256i) __builtin_ia32_pmaxsq256_mask ((__v4di) __A,
+ (__v4di) __B,
+ (__v4di)
+ _mm256_setzero_si256 (),
+ (__mmask8) -1);
+}
+
+extern __inline __m256i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_max_epu64 (__m256i __A, __m256i __B)
+{
+ return (__m256i) __builtin_ia32_pmaxuq256_mask ((__v4di) __A,
+ (__v4di) __B,
+ (__v4di)
+ _mm256_setzero_si256 (),
+ (__mmask8) -1);
+}
+
+extern __inline __m256i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_mask_max_epu64 (__m256i __W, __mmask8 __M, __m256i __A,
+ __m256i __B)
+{
+ return (__m256i) __builtin_ia32_pmaxuq256_mask ((__v4di) __A,
+ (__v4di) __B,
+ (__v4di) __W, __M);
+}
+
+extern __inline __m256i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_min_epu64 (__m256i __A, __m256i __B)
+{
+ return (__m256i) __builtin_ia32_pminuq256_mask ((__v4di) __A,
+ (__v4di) __B,
+ (__v4di)
+ _mm256_setzero_si256 (),
+ (__mmask8) -1);
+}
+
+extern __inline __m256i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_mask_min_epu64 (__m256i __W, __mmask8 __M, __m256i __A,
+ __m256i __B)
+{
+ return (__m256i) __builtin_ia32_pminuq256_mask ((__v4di) __A,
+ (__v4di) __B,
+ (__v4di) __W, __M);
+}
+
+extern __inline __m256i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_maskz_min_epu64 (__mmask8 __M, __m256i __A, __m256i __B)
+{
+ return (__m256i) __builtin_ia32_pminuq256_mask ((__v4di) __A,
+ (__v4di) __B,
+ (__v4di)
+ _mm256_setzero_si256 (),
+ __M);
+}
+
+extern __inline __m256i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_maskz_max_epi32 (__mmask8 __M, __m256i __A, __m256i __B)
+{
+ return (__m256i) __builtin_ia32_pmaxsd256_mask ((__v8si) __A,
+ (__v8si) __B,
+ (__v8si)
+ _mm256_setzero_si256 (),
+ __M);
+}
+
+extern __inline __m256i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_mask_max_epi32 (__m256i __W, __mmask8 __M, __m256i __A,
+ __m256i __B)
+{
+ return (__m256i) __builtin_ia32_pmaxsd256_mask ((__v8si) __A,
+ (__v8si) __B,
+ (__v8si) __W, __M);
+}
+
+extern __inline __m256i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_maskz_min_epi32 (__mmask8 __M, __m256i __A, __m256i __B)
+{
+ return (__m256i) __builtin_ia32_pminsd256_mask ((__v8si) __A,
+ (__v8si) __B,
+ (__v8si)
+ _mm256_setzero_si256 (),
+ __M);
+}
+
+extern __inline __m256i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_mask_min_epi32 (__m256i __W, __mmask8 __M, __m256i __A,
+ __m256i __B)
+{
+ return (__m256i) __builtin_ia32_pminsd256_mask ((__v8si) __A,
+ (__v8si) __B,
+ (__v8si) __W, __M);
+}
+
+extern __inline __m256i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_maskz_max_epu32 (__mmask8 __M, __m256i __A, __m256i __B)
+{
+ return (__m256i) __builtin_ia32_pmaxud256_mask ((__v8si) __A,
+ (__v8si) __B,
+ (__v8si)
+ _mm256_setzero_si256 (),
+ __M);
+}
+
+extern __inline __m256i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_mask_max_epu32 (__m256i __W, __mmask8 __M, __m256i __A,
+ __m256i __B)
+{
+ return (__m256i) __builtin_ia32_pmaxud256_mask ((__v8si) __A,
+ (__v8si) __B,
+ (__v8si) __W, __M);
+}
+
+extern __inline __m256i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_maskz_min_epu32 (__mmask8 __M, __m256i __A, __m256i __B)
+{
+ return (__m256i) __builtin_ia32_pminud256_mask ((__v8si) __A,
+ (__v8si) __B,
+ (__v8si)
+ _mm256_setzero_si256 (),
+ __M);
+}
+
+extern __inline __m256i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_mask_min_epu32 (__m256i __W, __mmask8 __M, __m256i __A,
+ __m256i __B)
+{
+ return (__m256i) __builtin_ia32_pminud256_mask ((__v8si) __A,
+ (__v8si) __B,
+ (__v8si) __W, __M);
+}
+
+extern __inline __m128i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_maskz_max_epi64 (__mmask8 __M, __m128i __A, __m128i __B)
+{
+ return (__m128i) __builtin_ia32_pmaxsq128_mask ((__v2di) __A,
+ (__v2di) __B,
+ (__v2di)
+ _mm_setzero_si128 (),
+ __M);
+}
+
+extern __inline __m128i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_mask_max_epi64 (__m128i __W, __mmask8 __M, __m128i __A,
+ __m128i __B)
+{
+ return (__m128i) __builtin_ia32_pmaxsq128_mask ((__v2di) __A,
+ (__v2di) __B,
+ (__v2di) __W, __M);
+}
+
+extern __inline __m128i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_min_epi64 (__m128i __A, __m128i __B)
+{
+ return (__m128i) __builtin_ia32_pminsq128_mask ((__v2di) __A,
+ (__v2di) __B,
+ (__v2di)
+ _mm_setzero_di (),
+ (__mmask8) -1);
+}
+
+extern __inline __m128i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_mask_min_epi64 (__m128i __W, __mmask8 __M, __m128i __A,
+ __m128i __B)
+{
+ return (__m128i) __builtin_ia32_pminsq128_mask ((__v2di) __A,
+ (__v2di) __B,
+ (__v2di) __W, __M);
+}
+
+extern __inline __m128i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_maskz_min_epi64 (__mmask8 __M, __m128i __A, __m128i __B)
+{
+ return (__m128i) __builtin_ia32_pminsq128_mask ((__v2di) __A,
+ (__v2di) __B,
+ (__v2di)
+ _mm_setzero_si128 (),
+ __M);
+}
+
+extern __inline __m128i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_maskz_max_epu64 (__mmask8 __M, __m128i __A, __m128i __B)
+{
+ return (__m128i) __builtin_ia32_pmaxuq128_mask ((__v2di) __A,
+ (__v2di) __B,
+ (__v2di)
+ _mm_setzero_si128 (),
+ __M);
+}
+
+extern __inline __m128i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_max_epi64 (__m128i __A, __m128i __B)
+{
+ return (__m128i) __builtin_ia32_pmaxsq128_mask ((__v2di) __A,
+ (__v2di) __B,
+ (__v2di)
+ _mm_setzero_di (),
+ (__mmask8) -1);
+}
+
+extern __inline __m128i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_max_epu64 (__m128i __A, __m128i __B)
+{
+ return (__m128i) __builtin_ia32_pmaxuq128_mask ((__v2di) __A,
+ (__v2di) __B,
+ (__v2di)
+ _mm_setzero_di (),
+ (__mmask8) -1);
+}
+
+extern __inline __m128i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_mask_max_epu64 (__m128i __W, __mmask8 __M, __m128i __A,
+ __m128i __B)
+{
+ return (__m128i) __builtin_ia32_pmaxuq128_mask ((__v2di) __A,
+ (__v2di) __B,
+ (__v2di) __W, __M);
+}
+
+extern __inline __m128i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_min_epu64 (__m128i __A, __m128i __B)
+{
+ return (__m128i) __builtin_ia32_pminuq128_mask ((__v2di) __A,
+ (__v2di) __B,
+ (__v2di)
+ _mm_setzero_di (),
+ (__mmask8) -1);
+}
+
+extern __inline __m128i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_mask_min_epu64 (__m128i __W, __mmask8 __M, __m128i __A,
+ __m128i __B)
+{
+ return (__m128i) __builtin_ia32_pminuq128_mask ((__v2di) __A,
+ (__v2di) __B,
+ (__v2di) __W, __M);
+}
+
+extern __inline __m128i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_maskz_min_epu64 (__mmask8 __M, __m128i __A, __m128i __B)
+{
+ return (__m128i) __builtin_ia32_pminuq128_mask ((__v2di) __A,
+ (__v2di) __B,
+ (__v2di)
+ _mm_setzero_si128 (),
+ __M);
+}
+
+extern __inline __m128i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_maskz_max_epi32 (__mmask8 __M, __m128i __A, __m128i __B)
+{
+ return (__m128i) __builtin_ia32_pmaxsd128_mask ((__v4si) __A,
+ (__v4si) __B,
+ (__v4si)
+ _mm_setzero_si128 (),
+ __M);
+}
+
+extern __inline __m128i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_mask_max_epi32 (__m128i __W, __mmask8 __M, __m128i __A,
+ __m128i __B)
+{
+ return (__m128i) __builtin_ia32_pmaxsd128_mask ((__v4si) __A,
+ (__v4si) __B,
+ (__v4si) __W, __M);
+}
+
+extern __inline __m128i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_maskz_min_epi32 (__mmask8 __M, __m128i __A, __m128i __B)
+{
+ return (__m128i) __builtin_ia32_pminsd128_mask ((__v4si) __A,
+ (__v4si) __B,
+ (__v4si)
+ _mm_setzero_si128 (),
+ __M);
+}
+
+extern __inline __m128i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_mask_min_epi32 (__m128i __W, __mmask8 __M, __m128i __A,
+ __m128i __B)
+{
+ return (__m128i) __builtin_ia32_pminsd128_mask ((__v4si) __A,
+ (__v4si) __B,
+ (__v4si) __W, __M);
+}
+
+extern __inline __m128i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_maskz_max_epu32 (__mmask8 __M, __m128i __A, __m128i __B)
+{
+ return (__m128i) __builtin_ia32_pmaxud128_mask ((__v4si) __A,
+ (__v4si) __B,
+ (__v4si)
+ _mm_setzero_si128 (),
+ __M);
+}
+
+extern __inline __m128i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_mask_max_epu32 (__m128i __W, __mmask8 __M, __m128i __A,
+ __m128i __B)
+{
+ return (__m128i) __builtin_ia32_pmaxud128_mask ((__v4si) __A,
+ (__v4si) __B,
+ (__v4si) __W, __M);
+}
+
+extern __inline __m128i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_maskz_min_epu32 (__mmask8 __M, __m128i __A, __m128i __B)
+{
+ return (__m128i) __builtin_ia32_pminud128_mask ((__v4si) __A,
+ (__v4si) __B,
+ (__v4si)
+ _mm_setzero_si128 (),
+ __M);
+}
+
+extern __inline __m128i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_mask_min_epu32 (__m128i __W, __mmask8 __M, __m128i __A,
+ __m128i __B)
+{
+ return (__m128i) __builtin_ia32_pminud128_mask ((__v4si) __A,
+ (__v4si) __B,
+ (__v4si) __W, __M);
+}
+
+#ifndef __AVX512CD__
+#pragma GCC push_options
+#pragma GCC target("avx512vl,avx512cd")
+#define __DISABLE_AVX512VLCD__
+#endif
+
+extern __inline __m128i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_broadcastmb_epi64 (__mmask8 __A)
+{
+ return (__m128i) __builtin_ia32_broadcastmb128 (__A);
+}
+
+extern __inline __m256i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_broadcastmb_epi64 (__mmask8 __A)
+{
+ return (__m256i) __builtin_ia32_broadcastmb256 (__A);
+}
+
+extern __inline __m128i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_broadcastmw_epi32 (__mmask16 __A)
+{
+ return (__m128i) __builtin_ia32_broadcastmw128 (__A);
+}
+
+extern __inline __m256i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_broadcastmw_epi32 (__mmask16 __A)
+{
+ return (__m256i) __builtin_ia32_broadcastmw256 (__A);
+}
+
+extern __inline __m256i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_lzcnt_epi32 (__m256i __A)
+{
+ return (__m256i) __builtin_ia32_vplzcntd_256_mask ((__v8si) __A,
+ (__v8si)
+ _mm256_setzero_si256 (),
+ (__mmask8) -1);
+}
+
+extern __inline __m256i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_mask_lzcnt_epi32 (__m256i __W, __mmask8 __U, __m256i __A)
+{
+ return (__m256i) __builtin_ia32_vplzcntd_256_mask ((__v8si) __A,
+ (__v8si) __W,
+ (__mmask8) __U);
+}
+
+extern __inline __m256i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_maskz_lzcnt_epi32 (__mmask8 __U, __m256i __A)
+{
+ return (__m256i) __builtin_ia32_vplzcntd_256_mask ((__v8si) __A,
+ (__v8si)
+ _mm256_setzero_si256 (),
+ (__mmask8) __U);
+}
+
+extern __inline __m256i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_lzcnt_epi64 (__m256i __A)
+{
+ return (__m256i) __builtin_ia32_vplzcntq_256_mask ((__v4di) __A,
+ (__v4di)
+ _mm256_setzero_si256 (),
+ (__mmask8) -1);
+}
+
+extern __inline __m256i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_mask_lzcnt_epi64 (__m256i __W, __mmask8 __U, __m256i __A)
+{
+ return (__m256i) __builtin_ia32_vplzcntq_256_mask ((__v4di) __A,
+ (__v4di) __W,
+ (__mmask8) __U);
+}
+
+extern __inline __m256i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_maskz_lzcnt_epi64 (__mmask8 __U, __m256i __A)
+{
+ return (__m256i) __builtin_ia32_vplzcntq_256_mask ((__v4di) __A,
+ (__v4di)
+ _mm256_setzero_si256 (),
+ (__mmask8) __U);
+}
+
+extern __inline __m256i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_conflict_epi64 (__m256i __A)
+{
+ return (__m256i) __builtin_ia32_vpconflictdi_256_mask ((__v4di) __A,
+ (__v4di)
+ _mm256_setzero_si256 (),
+ (__mmask8) -
+ 1);
+}
+
+extern __inline __m256i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_mask_conflict_epi64 (__m256i __W, __mmask8 __U, __m256i __A)
+{
+ return (__m256i) __builtin_ia32_vpconflictdi_256_mask ((__v4di) __A,
+ (__v4di) __W,
+ (__mmask8)
+ __U);
+}
+
+extern __inline __m256i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_maskz_conflict_epi64 (__mmask8 __U, __m256i __A)
+{
+ return (__m256i) __builtin_ia32_vpconflictdi_256_mask ((__v4di) __A,
+ (__v4di)
+ _mm256_setzero_si256 (),
+ (__mmask8)
+ __U);
+}
+
+extern __inline __m256i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_conflict_epi32 (__m256i __A)
+{
+ return (__m256i) __builtin_ia32_vpconflictsi_256_mask ((__v8si) __A,
+ (__v8si)
+ _mm256_setzero_si256 (),
+ (__mmask8) -
+ 1);
+}
+
+extern __inline __m256i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_mask_conflict_epi32 (__m256i __W, __mmask8 __U, __m256i __A)
+{
+ return (__m256i) __builtin_ia32_vpconflictsi_256_mask ((__v8si) __A,
+ (__v8si) __W,
+ (__mmask8)
+ __U);
+}
+
+extern __inline __m256i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_maskz_conflict_epi32 (__mmask8 __U, __m256i __A)
+{
+ return (__m256i) __builtin_ia32_vpconflictsi_256_mask ((__v8si) __A,
+ (__v8si)
+ _mm256_setzero_si256 (),
+ (__mmask8)
+ __U);
+}
+
+extern __inline __m128i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_lzcnt_epi32 (__m128i __A)
+{
+ return (__m128i) __builtin_ia32_vplzcntd_128_mask ((__v4si) __A,
+ (__v4si)
+ _mm_setzero_si128 (),
+ (__mmask8) -1);
+}
+
+extern __inline __m128i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_mask_lzcnt_epi32 (__m128i __W, __mmask8 __U, __m128i __A)
+{
+ return (__m128i) __builtin_ia32_vplzcntd_128_mask ((__v4si) __A,
+ (__v4si) __W,
+ (__mmask8) __U);
+}
+
+extern __inline __m128i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_maskz_lzcnt_epi32 (__mmask8 __U, __m128i __A)
+{
+ return (__m128i) __builtin_ia32_vplzcntd_128_mask ((__v4si) __A,
+ (__v4si)
+ _mm_setzero_si128 (),
+ (__mmask8) __U);
+}
+
+extern __inline __m128i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_lzcnt_epi64 (__m128i __A)
+{
+ return (__m128i) __builtin_ia32_vplzcntq_128_mask ((__v2di) __A,
+ (__v2di)
+ _mm_setzero_di (),
+ (__mmask8) -1);
+}
+
+extern __inline __m128i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_mask_lzcnt_epi64 (__m128i __W, __mmask8 __U, __m128i __A)
+{
+ return (__m128i) __builtin_ia32_vplzcntq_128_mask ((__v2di) __A,
+ (__v2di) __W,
+ (__mmask8) __U);
+}
+
+extern __inline __m128i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_maskz_lzcnt_epi64 (__mmask8 __U, __m128i __A)
+{
+ return (__m128i) __builtin_ia32_vplzcntq_128_mask ((__v2di) __A,
+ (__v2di)
+ _mm_setzero_di (),
+ (__mmask8) __U);
+}
+
+extern __inline __m128i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_conflict_epi64 (__m128i __A)
+{
+ return (__m128i) __builtin_ia32_vpconflictdi_128_mask ((__v2di) __A,
+ (__v2di)
+ _mm_setzero_di (),
+ (__mmask8) -
+ 1);
+}
+
+extern __inline __m128i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_mask_conflict_epi64 (__m128i __W, __mmask8 __U, __m128i __A)
+{
+ return (__m128i) __builtin_ia32_vpconflictdi_128_mask ((__v2di) __A,
+ (__v2di) __W,
+ (__mmask8)
+ __U);
+}
+
+extern __inline __m128i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_maskz_conflict_epi64 (__mmask8 __U, __m128i __A)
+{
+ return (__m128i) __builtin_ia32_vpconflictdi_128_mask ((__v2di) __A,
+ (__v2di)
+ _mm_setzero_di (),
+ (__mmask8)
+ __U);
+}
+
+extern __inline __m128i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_conflict_epi32 (__m128i __A)
+{
+ return (__m128i) __builtin_ia32_vpconflictsi_128_mask ((__v4si) __A,
+ (__v4si)
+ _mm_setzero_si128 (),
+ (__mmask8) -
+ 1);
+}
+
+extern __inline __m128i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_mask_conflict_epi32 (__m128i __W, __mmask8 __U, __m128i __A)
+{
+ return (__m128i) __builtin_ia32_vpconflictsi_128_mask ((__v4si) __A,
+ (__v4si) __W,
+ (__mmask8)
+ __U);
+}
+
+extern __inline __m128i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_maskz_conflict_epi32 (__mmask8 __U, __m128i __A)
+{
+ return (__m128i) __builtin_ia32_vpconflictsi_128_mask ((__v4si) __A,
+ (__v4si)
+ _mm_setzero_si128 (),
+ (__mmask8)
+ __U);
+}
+
+#ifdef __DISABLE_AVX512VLCD__
+#pragma GCC pop_options
+#endif
+
+extern __inline __m256d
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_mask_unpacklo_pd (__m256d __W, __mmask8 __U, __m256d __A,
+ __m256d __B)
+{
+ return (__m256d) __builtin_ia32_unpcklpd256_mask ((__v4df) __A,
+ (__v4df) __B,
+ (__v4df) __W,
+ (__mmask8) __U);
+}
+
+extern __inline __m256d
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_maskz_unpacklo_pd (__mmask8 __U, __m256d __A, __m256d __B)
+{
+ return (__m256d) __builtin_ia32_unpcklpd256_mask ((__v4df) __A,
+ (__v4df) __B,
+ (__v4df)
+ _mm256_setzero_pd (),
+ (__mmask8) __U);
+}
+
+extern __inline __m128d
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_mask_unpacklo_pd (__m128d __W, __mmask8 __U, __m128d __A,
+ __m128d __B)
+{
+ return (__m128d) __builtin_ia32_unpcklpd128_mask ((__v2df) __A,
+ (__v2df) __B,
+ (__v2df) __W,
+ (__mmask8) __U);
+}
+
+extern __inline __m128d
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_maskz_unpacklo_pd (__mmask8 __U, __m128d __A, __m128d __B)
+{
+ return (__m128d) __builtin_ia32_unpcklpd128_mask ((__v2df) __A,
+ (__v2df) __B,
+ (__v2df)
+ _mm_setzero_pd (),
+ (__mmask8) __U);
+}
+
+extern __inline __m256
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_mask_unpacklo_ps (__m256 __W, __mmask8 __U, __m256 __A,
+ __m256 __B)
+{
+ return (__m256) __builtin_ia32_unpcklps256_mask ((__v8sf) __A,
+ (__v8sf) __B,
+ (__v8sf) __W,
+ (__mmask8) __U);
+}
+
+extern __inline __m256d
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_mask_unpackhi_pd (__m256d __W, __mmask8 __U, __m256d __A,
+ __m256d __B)
+{
+ return (__m256d) __builtin_ia32_unpckhpd256_mask ((__v4df) __A,
+ (__v4df) __B,
+ (__v4df) __W,
+ (__mmask8) __U);
+}
+
+extern __inline __m256d
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_maskz_unpackhi_pd (__mmask8 __U, __m256d __A, __m256d __B)
+{
+ return (__m256d) __builtin_ia32_unpckhpd256_mask ((__v4df) __A,
+ (__v4df) __B,
+ (__v4df)
+ _mm256_setzero_pd (),
+ (__mmask8) __U);
+}
+
+extern __inline __m128d
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_mask_unpackhi_pd (__m128d __W, __mmask8 __U, __m128d __A,
+ __m128d __B)
+{
+ return (__m128d) __builtin_ia32_unpckhpd128_mask ((__v2df) __A,
+ (__v2df) __B,
+ (__v2df) __W,
+ (__mmask8) __U);
+}
+
+extern __inline __m128d
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_maskz_unpackhi_pd (__mmask8 __U, __m128d __A, __m128d __B)
+{
+ return (__m128d) __builtin_ia32_unpckhpd128_mask ((__v2df) __A,
+ (__v2df) __B,
+ (__v2df)
+ _mm_setzero_pd (),
+ (__mmask8) __U);
+}
+
+extern __inline __m256
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_mask_unpackhi_ps (__m256 __W, __mmask8 __U, __m256 __A,
+ __m256 __B)
+{
+ return (__m256) __builtin_ia32_unpckhps256_mask ((__v8sf) __A,
+ (__v8sf) __B,
+ (__v8sf) __W,
+ (__mmask8) __U);
+}
+
+extern __inline __m256
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_maskz_unpackhi_ps (__mmask8 __U, __m256 __A, __m256 __B)
+{
+ return (__m256) __builtin_ia32_unpckhps256_mask ((__v8sf) __A,
+ (__v8sf) __B,
+ (__v8sf)
+ _mm256_setzero_ps (),
+ (__mmask8) __U);
+}
+
+extern __inline __m128
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_mask_unpackhi_ps (__m128 __W, __mmask8 __U, __m128 __A, __m128 __B)
+{
+ return (__m128) __builtin_ia32_unpckhps128_mask ((__v4sf) __A,
+ (__v4sf) __B,
+ (__v4sf) __W,
+ (__mmask8) __U);
+}
+
+extern __inline __m128
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_maskz_unpackhi_ps (__mmask8 __U, __m128 __A, __m128 __B)
+{
+ return (__m128) __builtin_ia32_unpckhps128_mask ((__v4sf) __A,
+ (__v4sf) __B,
+ (__v4sf)
+ _mm_setzero_ps (),
+ (__mmask8) __U);
+}
+
+extern __inline __m128
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_mask_cvtph_ps (__m128 __W, __mmask8 __U, __m128i __A)
+{
+ return (__m128) __builtin_ia32_vcvtph2ps_mask ((__v8hi) __A,
+ (__v4sf) __W,
+ (__mmask8) __U);
+}
+
+extern __inline __m128
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_maskz_cvtph_ps (__mmask8 __U, __m128i __A)
+{
+ return (__m128) __builtin_ia32_vcvtph2ps_mask ((__v8hi) __A,
+ (__v4sf)
+ _mm_setzero_ps (),
+ (__mmask8) __U);
+}
+
+extern __inline __m256
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_maskz_unpacklo_ps (__mmask8 __U, __m256 __A, __m256 __B)
+{
+ return (__m256) __builtin_ia32_unpcklps256_mask ((__v8sf) __A,
+ (__v8sf) __B,
+ (__v8sf)
+ _mm256_setzero_ps (),
+ (__mmask8) __U);
+}
+
+extern __inline __m256
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_mask_cvtph_ps (__m256 __W, __mmask8 __U, __m128i __A)
+{
+ return (__m256) __builtin_ia32_vcvtph2ps256_mask ((__v8hi) __A,
+ (__v8sf) __W,
+ (__mmask8) __U);
+}
+
+extern __inline __m256
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_maskz_cvtph_ps (__mmask8 __U, __m128i __A)
+{
+ return (__m256) __builtin_ia32_vcvtph2ps256_mask ((__v8hi) __A,
+ (__v8sf)
+ _mm256_setzero_ps (),
+ (__mmask8) __U);
+}
+
+extern __inline __m128
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_mask_unpacklo_ps (__m128 __W, __mmask8 __U, __m128 __A, __m128 __B)
+{
+ return (__m128) __builtin_ia32_unpcklps128_mask ((__v4sf) __A,
+ (__v4sf) __B,
+ (__v4sf) __W,
+ (__mmask8) __U);
+}
+
+extern __inline __m128
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_maskz_unpacklo_ps (__mmask8 __U, __m128 __A, __m128 __B)
+{
+ return (__m128) __builtin_ia32_unpcklps128_mask ((__v4sf) __A,
+ (__v4sf) __B,
+ (__v4sf)
+ _mm_setzero_ps (),
+ (__mmask8) __U);
+}
+
+extern __inline __m256i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_mask_sra_epi32 (__m256i __W, __mmask8 __U, __m256i __A,
+ __m128i __B)
+{
+ return (__m256i) __builtin_ia32_psrad256_mask ((__v8si) __A,
+ (__v4si) __B,
+ (__v8si) __W,
+ (__mmask8) __U);
+}
+
+extern __inline __m256i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_maskz_sra_epi32 (__mmask8 __U, __m256i __A, __m128i __B)
+{
+ return (__m256i) __builtin_ia32_psrad256_mask ((__v8si) __A,
+ (__v4si) __B,
+ (__v8si)
+ _mm256_setzero_si256 (),
+ (__mmask8) __U);
+}
+
+extern __inline __m128i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_mask_sra_epi32 (__m128i __W, __mmask8 __U, __m128i __A,
+ __m128i __B)
+{
+ return (__m128i) __builtin_ia32_psrad128_mask ((__v4si) __A,
+ (__v4si) __B,
+ (__v4si) __W,
+ (__mmask8) __U);
+}
+
+extern __inline __m128i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_maskz_sra_epi32 (__mmask8 __U, __m128i __A, __m128i __B)
+{
+ return (__m128i) __builtin_ia32_psrad128_mask ((__v4si) __A,
+ (__v4si) __B,
+ (__v4si)
+ _mm_setzero_si128 (),
+ (__mmask8) __U);
+}
+
+extern __inline __m256i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_sra_epi64 (__m256i __A, __m128i __B)
+{
+ return (__m256i) __builtin_ia32_psraq256_mask ((__v4di) __A,
+ (__v2di) __B,
+ (__v4di)
+ _mm256_setzero_si256 (),
+ (__mmask8) -1);
+}
+
+extern __inline __m256i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_mask_sra_epi64 (__m256i __W, __mmask8 __U, __m256i __A,
+ __m128i __B)
+{
+ return (__m256i) __builtin_ia32_psraq256_mask ((__v4di) __A,
+ (__v2di) __B,
+ (__v4di) __W,
+ (__mmask8) __U);
+}
+
+extern __inline __m256i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_maskz_sra_epi64 (__mmask8 __U, __m256i __A, __m128i __B)
+{
+ return (__m256i) __builtin_ia32_psraq256_mask ((__v4di) __A,
+ (__v2di) __B,
+ (__v4di)
+ _mm256_setzero_si256 (),
+ (__mmask8) __U);
+}
+
+extern __inline __m128i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_sra_epi64 (__m128i __A, __m128i __B)
+{
+ return (__m128i) __builtin_ia32_psraq128_mask ((__v2di) __A,
+ (__v2di) __B,
+ (__v2di)
+ _mm_setzero_di (),
+ (__mmask8) -1);
+}
+
+extern __inline __m128i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_mask_sra_epi64 (__m128i __W, __mmask8 __U, __m128i __A,
+ __m128i __B)
+{
+ return (__m128i) __builtin_ia32_psraq128_mask ((__v2di) __A,
+ (__v2di) __B,
+ (__v2di) __W,
+ (__mmask8) __U);
+}
+
+extern __inline __m128i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_maskz_sra_epi64 (__mmask8 __U, __m128i __A, __m128i __B)
+{
+ return (__m128i) __builtin_ia32_psraq128_mask ((__v2di) __A,
+ (__v2di) __B,
+ (__v2di)
+ _mm_setzero_di (),
+ (__mmask8) __U);
+}
+
+extern __inline __m128i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_mask_sll_epi32 (__m128i __W, __mmask8 __U, __m128i __A,
+ __m128i __B)
+{
+ return (__m128i) __builtin_ia32_pslld128_mask ((__v4si) __A,
+ (__v4si) __B,
+ (__v4si) __W,
+ (__mmask8) __U);
+}
+
+extern __inline __m128i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_maskz_sll_epi32 (__mmask8 __U, __m128i __A, __m128i __B)
+{
+ return (__m128i) __builtin_ia32_pslld128_mask ((__v4si) __A,
+ (__v4si) __B,
+ (__v4si)
+ _mm_setzero_si128 (),
+ (__mmask8) __U);
+}
+
+extern __inline __m128i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_mask_sll_epi64 (__m128i __W, __mmask8 __U, __m128i __A,
+ __m128i __B)
+{
+ return (__m128i) __builtin_ia32_psllq128_mask ((__v2di) __A,
+ (__v2di) __B,
+ (__v2di) __W,
+ (__mmask8) __U);
+}
+
+extern __inline __m128i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_maskz_sll_epi64 (__mmask8 __U, __m128i __A, __m128i __B)
+{
+ return (__m128i) __builtin_ia32_psllq128_mask ((__v2di) __A,
+ (__v2di) __B,
+ (__v2di)
+ _mm_setzero_di (),
+ (__mmask8) __U);
+}
+
+extern __inline __m256i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_mask_sll_epi32 (__m256i __W, __mmask8 __U, __m256i __A,
+ __m128i __B)
+{
+ return (__m256i) __builtin_ia32_pslld256_mask ((__v8si) __A,
+ (__v4si) __B,
+ (__v8si) __W,
+ (__mmask8) __U);
+}
+
+extern __inline __m256i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_maskz_sll_epi32 (__mmask8 __U, __m256i __A, __m128i __B)
+{
+ return (__m256i) __builtin_ia32_pslld256_mask ((__v8si) __A,
+ (__v4si) __B,
+ (__v8si)
+ _mm256_setzero_si256 (),
+ (__mmask8) __U);
+}
+
+extern __inline __m256i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_mask_sll_epi64 (__m256i __W, __mmask8 __U, __m256i __A,
+ __m128i __B)
+{
+ return (__m256i) __builtin_ia32_psllq256_mask ((__v4di) __A,
+ (__v2di) __B,
+ (__v4di) __W,
+ (__mmask8) __U);
+}
+
+extern __inline __m256i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_maskz_sll_epi64 (__mmask8 __U, __m256i __A, __m128i __B)
+{
+ return (__m256i) __builtin_ia32_psllq256_mask ((__v4di) __A,
+ (__v2di) __B,
+ (__v4di)
+ _mm256_setzero_si256 (),
+ (__mmask8) __U);
+}
+
+extern __inline __m256
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_mask_permutexvar_ps (__m256 __W, __mmask8 __U, __m256i __X,
+ __m256 __Y)
+{
+ return (__m256) __builtin_ia32_permvarsf256_mask ((__v8sf) __Y,
+ (__v8si) __X,
+ (__v8sf) __W,
+ (__mmask8) __U);
+}
+
+extern __inline __m256
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_maskz_permutexvar_ps (__mmask8 __U, __m256i __X, __m256 __Y)
+{
+ return (__m256) __builtin_ia32_permvarsf256_mask ((__v8sf) __Y,
+ (__v8si) __X,
+ (__v8sf)
+ _mm256_setzero_ps (),
+ (__mmask8) __U);
+}
+
+extern __inline __m256d
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_permutexvar_pd (__m256i __X, __m256d __Y)
+{
+ return (__m256d) __builtin_ia32_permvardf256_mask ((__v4df) __Y,
+ (__v4di) __X,
+ (__v4df)
+ _mm256_setzero_pd (),
+ (__mmask8) -1);
+}
+
+extern __inline __m256d
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_mask_permutexvar_pd (__m256d __W, __mmask8 __U, __m256i __X,
+ __m256d __Y)
+{
+ return (__m256d) __builtin_ia32_permvardf256_mask ((__v4df) __Y,
+ (__v4di) __X,
+ (__v4df) __W,
+ (__mmask8) __U);
+}
+
+extern __inline __m256d
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_maskz_permutexvar_pd (__mmask8 __U, __m256i __X, __m256d __Y)
+{
+ return (__m256d) __builtin_ia32_permvardf256_mask ((__v4df) __Y,
+ (__v4di) __X,
+ (__v4df)
+ _mm256_setzero_pd (),
+ (__mmask8) __U);
+}
+
+extern __inline __m256d
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_mask_permutevar_pd (__m256d __W, __mmask8 __U, __m256d __A,
+ __m256i __C)
+{
+ return (__m256d) __builtin_ia32_vpermilvarpd256_mask ((__v4df) __A,
+ (__v4di) __C,
+ (__v4df) __W,
+ (__mmask8)
+ __U);
+}
+
+extern __inline __m256d
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_maskz_permutevar_pd (__mmask8 __U, __m256d __A, __m256i __C)
+{
+ return (__m256d) __builtin_ia32_vpermilvarpd256_mask ((__v4df) __A,
+ (__v4di) __C,
+ (__v4df)
+ _mm256_setzero_pd (),
+ (__mmask8)
+ __U);
+}
+
+extern __inline __m256
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_mask_permutevar_ps (__m256 __W, __mmask8 __U, __m256 __A,
+ __m256i __C)
+{
+ return (__m256) __builtin_ia32_vpermilvarps256_mask ((__v8sf) __A,
+ (__v8si) __C,
+ (__v8sf) __W,
+ (__mmask8) __U);
+}
+
+extern __inline __m256
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_maskz_permutevar_ps (__mmask8 __U, __m256 __A, __m256i __C)
+{
+ return (__m256) __builtin_ia32_vpermilvarps256_mask ((__v8sf) __A,
+ (__v8si) __C,
+ (__v8sf)
+ _mm256_setzero_ps (),
+ (__mmask8) __U);
+}
+
+extern __inline __m128d
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_mask_permutevar_pd (__m128d __W, __mmask8 __U, __m128d __A,
+ __m128i __C)
+{
+ return (__m128d) __builtin_ia32_vpermilvarpd_mask ((__v2df) __A,
+ (__v2di) __C,
+ (__v2df) __W,
+ (__mmask8) __U);
+}
+
+extern __inline __m128d
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_maskz_permutevar_pd (__mmask8 __U, __m128d __A, __m128i __C)
+{
+ return (__m128d) __builtin_ia32_vpermilvarpd_mask ((__v2df) __A,
+ (__v2di) __C,
+ (__v2df)
+ _mm_setzero_pd (),
+ (__mmask8) __U);
+}
+
+extern __inline __m128
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_mask_permutevar_ps (__m128 __W, __mmask8 __U, __m128 __A,
+ __m128i __C)
+{
+ return (__m128) __builtin_ia32_vpermilvarps_mask ((__v4sf) __A,
+ (__v4si) __C,
+ (__v4sf) __W,
+ (__mmask8) __U);
+}
+
+extern __inline __m128
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_maskz_permutevar_ps (__mmask8 __U, __m128 __A, __m128i __C)
+{
+ return (__m128) __builtin_ia32_vpermilvarps_mask ((__v4sf) __A,
+ (__v4si) __C,
+ (__v4sf)
+ _mm_setzero_ps (),
+ (__mmask8) __U);
+}
+
+extern __inline __m256i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_maskz_mullo_epi32 (__mmask8 __M, __m256i __A, __m256i __B)
+{
+ return (__m256i) __builtin_ia32_pmulld256_mask ((__v8si) __A,
+ (__v8si) __B,
+ (__v8si)
+ _mm256_setzero_si256 (),
+ __M);
+}
+
+extern __inline __m256i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_maskz_permutexvar_epi64 (__mmask8 __M, __m256i __X, __m256i __Y)
+{
+ return (__m256i) __builtin_ia32_permvardi256_mask ((__v4di) __Y,
+ (__v4di) __X,
+ (__v4di)
+ _mm256_setzero_si256 (),
+ __M);
+}
+
+extern __inline __m256i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_mask_mullo_epi32 (__m256i __W, __mmask8 __M, __m256i __A,
+ __m256i __B)
+{
+ return (__m256i) __builtin_ia32_pmulld256_mask ((__v8si) __A,
+ (__v8si) __B,
+ (__v8si) __W, __M);
+}
+
+extern __inline __m128i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_maskz_mullo_epi32 (__mmask8 __M, __m128i __A, __m128i __B)
+{
+ return (__m128i) __builtin_ia32_pmulld128_mask ((__v4si) __A,
+ (__v4si) __B,
+ (__v4si)
+ _mm_setzero_si128 (),
+ __M);
+}
+
+extern __inline __m128i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_mask_mullo_epi32 (__m128i __W, __mmask16 __M, __m128i __A,
+ __m128i __B)
+{
+ return (__m128i) __builtin_ia32_pmulld128_mask ((__v4si) __A,
+ (__v4si) __B,
+ (__v4si) __W, __M);
+}
+
+extern __inline __m256i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_mask_mul_epi32 (__m256i __W, __mmask8 __M, __m256i __X,
+ __m256i __Y)
+{
+ return (__m256i) __builtin_ia32_pmuldq256_mask ((__v8si) __X,
+ (__v8si) __Y,
+ (__v4di) __W, __M);
+}
+
+extern __inline __m256i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_maskz_mul_epi32 (__mmask8 __M, __m256i __X, __m256i __Y)
+{
+ return (__m256i) __builtin_ia32_pmuldq256_mask ((__v8si) __X,
+ (__v8si) __Y,
+ (__v4di)
+ _mm256_setzero_si256 (),
+ __M);
+}
+
+extern __inline __m128i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_mask_mul_epi32 (__m128i __W, __mmask8 __M, __m128i __X,
+ __m128i __Y)
+{
+ return (__m128i) __builtin_ia32_pmuldq128_mask ((__v4si) __X,
+ (__v4si) __Y,
+ (__v2di) __W, __M);
+}
+
+extern __inline __m128i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_maskz_mul_epi32 (__mmask8 __M, __m128i __X, __m128i __Y)
+{
+ return (__m128i) __builtin_ia32_pmuldq128_mask ((__v4si) __X,
+ (__v4si) __Y,
+ (__v2di)
+ _mm_setzero_si128 (),
+ __M);
+}
+
+extern __inline __m256i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_mask_permutexvar_epi64 (__m256i __W, __mmask8 __M, __m256i __X,
+ __m256i __Y)
+{
+ return (__m256i) __builtin_ia32_permvardi256_mask ((__v4di) __Y,
+ (__v4di) __X,
+ (__v4di) __W,
+ __M);
+}
+
+extern __inline __m256i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_mask_mul_epu32 (__m256i __W, __mmask8 __M, __m256i __X,
+ __m256i __Y)
+{
+ return (__m256i) __builtin_ia32_pmuludq256_mask ((__v8si) __X,
+ (__v8si) __Y,
+ (__v4di) __W, __M);
+}
+
+extern __inline __m256i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_maskz_permutexvar_epi32 (__mmask8 __M, __m256i __X, __m256i __Y)
+{
+ return (__m256i) __builtin_ia32_permvarsi256_mask ((__v8si) __Y,
+ (__v8si) __X,
+ (__v8si)
+ _mm256_setzero_si256 (),
+ __M);
+}
+
+extern __inline __m256i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_maskz_mul_epu32 (__mmask8 __M, __m256i __X, __m256i __Y)
+{
+ return (__m256i) __builtin_ia32_pmuludq256_mask ((__v8si) __X,
+ (__v8si) __Y,
+ (__v4di)
+ _mm256_setzero_si256 (),
+ __M);
+}
+
+extern __inline __m128i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_mask_mul_epu32 (__m128i __W, __mmask8 __M, __m128i __X,
+ __m128i __Y)
+{
+ return (__m128i) __builtin_ia32_pmuludq128_mask ((__v4si) __X,
+ (__v4si) __Y,
+ (__v2di) __W, __M);
+}
+
+extern __inline __m128i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_maskz_mul_epu32 (__mmask8 __M, __m128i __X, __m128i __Y)
+{
+ return (__m128i) __builtin_ia32_pmuludq128_mask ((__v4si) __X,
+ (__v4si) __Y,
+ (__v2di)
+ _mm_setzero_si128 (),
+ __M);
+}
+
+extern __inline __m256i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_mask_permutexvar_epi32 (__m256i __W, __mmask8 __M, __m256i __X,
+ __m256i __Y)
+{
+ return (__m256i) __builtin_ia32_permvarsi256_mask ((__v8si) __Y,
+ (__v8si) __X,
+ (__v8si) __W,
+ __M);
+}
+
+#ifdef __OPTIMIZE__
+extern __inline __m256i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_mask_permutex_epi64 (__m256i __W, __mmask8 __M,
+ __m256i __X, const int __I)
+{
+ return (__m256i) __builtin_ia32_permdi256_mask ((__v4di) __X,
+ __I,
+ (__v4di) __W,
+ (__mmask8) __M);
+}
+
+extern __inline __m256i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_maskz_permutex_epi64 (__mmask8 __M, __m256i __X, const int __I)
+{
+ return (__m256i) __builtin_ia32_permdi256_mask ((__v4di) __X,
+ __I,
+ (__v4di)
+ _mm256_setzero_si256 (),
+ (__mmask8) __M);
+}
+
+extern __inline __m256d
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_mask_shuffle_pd (__m256d __W, __mmask8 __U, __m256d __A,
+ __m256d __B, const int __imm)
+{
+ return (__m256d) __builtin_ia32_shufpd256_mask ((__v4df) __A,
+ (__v4df) __B, __imm,
+ (__v4df) __W,
+ (__mmask8) __U);
+}
+
+extern __inline __m256d
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_maskz_shuffle_pd (__mmask8 __U, __m256d __A, __m256d __B,
+ const int __imm)
+{
+ return (__m256d) __builtin_ia32_shufpd256_mask ((__v4df) __A,
+ (__v4df) __B, __imm,
+ (__v4df)
+ _mm256_setzero_pd (),
+ (__mmask8) __U);
+}
+
+extern __inline __m128d
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_mask_shuffle_pd (__m128d __W, __mmask8 __U, __m128d __A,
+ __m128d __B, const int __imm)
+{
+ return (__m128d) __builtin_ia32_shufpd128_mask ((__v2df) __A,
+ (__v2df) __B, __imm,
+ (__v2df) __W,
+ (__mmask8) __U);
+}
+
+extern __inline __m128d
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_maskz_shuffle_pd (__mmask8 __U, __m128d __A, __m128d __B,
+ const int __imm)
+{
+ return (__m128d) __builtin_ia32_shufpd128_mask ((__v2df) __A,
+ (__v2df) __B, __imm,
+ (__v2df)
+ _mm_setzero_pd (),
+ (__mmask8) __U);
+}
+
+extern __inline __m256
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_mask_shuffle_ps (__m256 __W, __mmask8 __U, __m256 __A,
+ __m256 __B, const int __imm)
+{
+ return (__m256) __builtin_ia32_shufps256_mask ((__v8sf) __A,
+ (__v8sf) __B, __imm,
+ (__v8sf) __W,
+ (__mmask8) __U);
+}
+
+extern __inline __m256
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_maskz_shuffle_ps (__mmask8 __U, __m256 __A, __m256 __B,
+ const int __imm)
+{
+ return (__m256) __builtin_ia32_shufps256_mask ((__v8sf) __A,
+ (__v8sf) __B, __imm,
+ (__v8sf)
+ _mm256_setzero_ps (),
+ (__mmask8) __U);
+}
+
+extern __inline __m128
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_mask_shuffle_ps (__m128 __W, __mmask8 __U, __m128 __A, __m128 __B,
+ const int __imm)
+{
+ return (__m128) __builtin_ia32_shufps128_mask ((__v4sf) __A,
+ (__v4sf) __B, __imm,
+ (__v4sf) __W,
+ (__mmask8) __U);
+}
+
+extern __inline __m128
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_maskz_shuffle_ps (__mmask8 __U, __m128 __A, __m128 __B,
+ const int __imm)
+{
+ return (__m128) __builtin_ia32_shufps128_mask ((__v4sf) __A,
+ (__v4sf) __B, __imm,
+ (__v4sf)
+ _mm_setzero_ps (),
+ (__mmask8) __U);
+}
+
+extern __inline __m256i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_inserti32x4 (__m256i __A, __m128i __B, const int __imm)
+{
+ return (__m256i) __builtin_ia32_inserti32x4_256_mask ((__v8si) __A,
+ (__v4si) __B,
+ __imm,
+ (__v8si)
+ _mm256_setzero_si256 (),
+ (__mmask8) -
+ 1);
+}
+
+extern __inline __m256i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_mask_inserti32x4 (__m256i __W, __mmask8 __U, __m256i __A,
+ __m128i __B, const int __imm)
+{
+ return (__m256i) __builtin_ia32_inserti32x4_256_mask ((__v8si) __A,
+ (__v4si) __B,
+ __imm,
+ (__v8si) __W,
+ (__mmask8)
+ __U);
+}
+
+extern __inline __m256i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_maskz_inserti32x4 (__mmask8 __U, __m256i __A, __m128i __B,
+ const int __imm)
+{
+ return (__m256i) __builtin_ia32_inserti32x4_256_mask ((__v8si) __A,
+ (__v4si) __B,
+ __imm,
+ (__v8si)
+ _mm256_setzero_si256 (),
+ (__mmask8)
+ __U);
+}
+
+extern __inline __m256
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_insertf32x4 (__m256 __A, __m128 __B, const int __imm)
+{
+ return (__m256) __builtin_ia32_insertf32x4_256_mask ((__v8sf) __A,
+ (__v4sf) __B,
+ __imm,
+ (__v8sf)
+ _mm256_setzero_ps (),
+ (__mmask8) -1);
+}
+
+extern __inline __m256
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_mask_insertf32x4 (__m256 __W, __mmask8 __U, __m256 __A,
+ __m128 __B, const int __imm)
+{
+ return (__m256) __builtin_ia32_insertf32x4_256_mask ((__v8sf) __A,
+ (__v4sf) __B,
+ __imm,
+ (__v8sf) __W,
+ (__mmask8) __U);
+}
+
+extern __inline __m256
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_maskz_insertf32x4 (__mmask8 __U, __m256 __A, __m128 __B,
+ const int __imm)
+{
+ return (__m256) __builtin_ia32_insertf32x4_256_mask ((__v8sf) __A,
+ (__v4sf) __B,
+ __imm,
+ (__v8sf)
+ _mm256_setzero_ps (),
+ (__mmask8) __U);
+}
+
+extern __inline __m128i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_extracti32x4_epi32 (__m256i __A, const int __imm)
+{
+ return (__m128i) __builtin_ia32_extracti32x4_256_mask ((__v8si) __A,
+ __imm,
+ (__v4si)
+ _mm_setzero_si128 (),
+ (__mmask8) -
+ 1);
+}
+
+extern __inline __m128i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_mask_extracti32x4_epi32 (__m128i __W, __mmask8 __U, __m256i __A,
+ const int __imm)
+{
+ return (__m128i) __builtin_ia32_extracti32x4_256_mask ((__v8si) __A,
+ __imm,
+ (__v4si) __W,
+ (__mmask8)
+ __U);
+}
+
+extern __inline __m128i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_maskz_extracti32x4_epi32 (__mmask8 __U, __m256i __A,
+ const int __imm)
+{
+ return (__m128i) __builtin_ia32_extracti32x4_256_mask ((__v8si) __A,
+ __imm,
+ (__v4si)
+ _mm_setzero_si128 (),
+ (__mmask8)
+ __U);
+}
+
+extern __inline __m128
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_extractf32x4_ps (__m256 __A, const int __imm)
+{
+ return (__m128) __builtin_ia32_extractf32x4_256_mask ((__v8sf) __A,
+ __imm,
+ (__v4sf)
+ _mm_setzero_ps (),
+ (__mmask8) -
+ 1);
+}
+
+extern __inline __m128
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_mask_extractf32x4_ps (__m128 __W, __mmask8 __U, __m256 __A,
+ const int __imm)
+{
+ return (__m128) __builtin_ia32_extractf32x4_256_mask ((__v8sf) __A,
+ __imm,
+ (__v4sf) __W,
+ (__mmask8)
+ __U);
+}
+
+extern __inline __m128
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_maskz_extractf32x4_ps (__mmask8 __U, __m256 __A,
+ const int __imm)
+{
+ return (__m128) __builtin_ia32_extractf32x4_256_mask ((__v8sf) __A,
+ __imm,
+ (__v4sf)
+ _mm_setzero_ps (),
+ (__mmask8)
+ __U);
+}
+
+extern __inline __m256i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_shuffle_i64x2 (__m256i __A, __m256i __B, const int __imm)
+{
+ return (__m256i) __builtin_ia32_shuf_i64x2_256_mask ((__v4di) __A,
+ (__v4di) __B,
+ __imm,
+ (__v4di)
+ _mm256_setzero_si256 (),
+ (__mmask8) -1);
+}
+
+extern __inline __m256i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_mask_shuffle_i64x2 (__m256i __W, __mmask8 __U, __m256i __A,
+ __m256i __B, const int __imm)
+{
+ return (__m256i) __builtin_ia32_shuf_i64x2_256_mask ((__v4di) __A,
+ (__v4di) __B,
+ __imm,
+ (__v4di) __W,
+ (__mmask8) __U);
+}
+
+extern __inline __m256i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_maskz_shuffle_i64x2 (__mmask8 __U, __m256i __A, __m256i __B,
+ const int __imm)
+{
+ return (__m256i) __builtin_ia32_shuf_i64x2_256_mask ((__v4di) __A,
+ (__v4di) __B,
+ __imm,
+ (__v4di)
+ _mm256_setzero_si256 (),
+ (__mmask8) __U);
+}
+
+extern __inline __m256i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_shuffle_i32x4 (__m256i __A, __m256i __B, const int __imm)
+{
+ return (__m256i) __builtin_ia32_shuf_i32x4_256_mask ((__v8si) __A,
+ (__v8si) __B,
+ __imm,
+ (__v8si)
+ _mm256_setzero_si256 (),
+ (__mmask8) -1);
+}
+
+extern __inline __m256i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_mask_shuffle_i32x4 (__m256i __W, __mmask8 __U, __m256i __A,
+ __m256i __B, const int __imm)
+{
+ return (__m256i) __builtin_ia32_shuf_i32x4_256_mask ((__v8si) __A,
+ (__v8si) __B,
+ __imm,
+ (__v8si) __W,
+ (__mmask8) __U);
+}
+
+extern __inline __m256i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_maskz_shuffle_i32x4 (__mmask8 __U, __m256i __A, __m256i __B,
+ const int __imm)
+{
+ return (__m256i) __builtin_ia32_shuf_i32x4_256_mask ((__v8si) __A,
+ (__v8si) __B,
+ __imm,
+ (__v8si)
+ _mm256_setzero_si256 (),
+ (__mmask8) __U);
+}
+
+extern __inline __m256d
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_shuffle_f64x2 (__m256d __A, __m256d __B, const int __imm)
+{
+ return (__m256d) __builtin_ia32_shuf_f64x2_256_mask ((__v4df) __A,
+ (__v4df) __B,
+ __imm,
+ (__v4df)
+ _mm256_setzero_pd (),
+ (__mmask8) -1);
+}
+
+extern __inline __m256d
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_mask_shuffle_f64x2 (__m256d __W, __mmask8 __U, __m256d __A,
+ __m256d __B, const int __imm)
+{
+ return (__m256d) __builtin_ia32_shuf_f64x2_256_mask ((__v4df) __A,
+ (__v4df) __B,
+ __imm,
+ (__v4df) __W,
+ (__mmask8) __U);
+}
+
+extern __inline __m256d
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_maskz_shuffle_f64x2 (__mmask8 __U, __m256d __A, __m256d __B,
+ const int __imm)
+{
+ return (__m256d) __builtin_ia32_shuf_f64x2_256_mask ((__v4df) __A,
+ (__v4df) __B,
+ __imm,
+ (__v4df)
+ _mm256_setzero_pd (),
+ (__mmask8) __U);
+}
+
+extern __inline __m256
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_shuffle_f32x4 (__m256 __A, __m256 __B, const int __imm)
+{
+ return (__m256) __builtin_ia32_shuf_f32x4_256_mask ((__v8sf) __A,
+ (__v8sf) __B,
+ __imm,
+ (__v8sf)
+ _mm256_setzero_ps (),
+ (__mmask8) -1);
+}
+
+extern __inline __m256
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_mask_shuffle_f32x4 (__m256 __W, __mmask8 __U, __m256 __A,
+ __m256 __B, const int __imm)
+{
+ return (__m256) __builtin_ia32_shuf_f32x4_256_mask ((__v8sf) __A,
+ (__v8sf) __B,
+ __imm,
+ (__v8sf) __W,
+ (__mmask8) __U);
+}
+
+extern __inline __m256
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_maskz_shuffle_f32x4 (__mmask8 __U, __m256 __A, __m256 __B,
+ const int __imm)
+{
+ return (__m256) __builtin_ia32_shuf_f32x4_256_mask ((__v8sf) __A,
+ (__v8sf) __B,
+ __imm,
+ (__v8sf)
+ _mm256_setzero_ps (),
+ (__mmask8) __U);
+}
+
+extern __inline __m256d
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_fixupimm_pd (__m256d __A, __m256d __B, __m256i __C,
+ const int __imm)
+{
+ return (__m256d) __builtin_ia32_fixupimmpd256 ((__v4df) __A,
+ (__v4df) __B,
+ (__v4di) __C, __imm);
+}
+
+extern __inline __m256d
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_mask_fixupimm_pd (__m256d __A, __mmask8 __U, __m256d __B,
+ __m256i __C, const int __imm)
+{
+ return (__m256d) __builtin_ia32_fixupimmpd256_mask ((__v4df) __A,
+ (__v4df) __B,
+ (__v4di) __C,
+ __imm,
+ (__mmask8) __U);
+}
+
+extern __inline __m256d
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_maskz_fixupimm_pd (__mmask8 __U, __m256d __A, __m256d __B,
+ __m256i __C, const int __imm)
+{
+ return (__m256d) __builtin_ia32_fixupimmpd256_maskz ((__v4df) __A,
+ (__v4df) __B,
+ (__v4di) __C,
+ __imm,
+ (__mmask8) __U);
+}
+
+extern __inline __m256
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_fixupimm_ps (__m256 __A, __m256 __B, __m256i __C,
+ const int __imm)
+{
+ return (__m256) __builtin_ia32_fixupimmps256 ((__v8sf) __A,
+ (__v8sf) __B,
+ (__v8si) __C, __imm);
+}
+
+extern __inline __m256
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_mask_fixupimm_ps (__m256 __A, __mmask8 __U, __m256 __B,
+ __m256i __C, const int __imm)
+{
+ return (__m256) __builtin_ia32_fixupimmps256_mask ((__v8sf) __A,
+ (__v8sf) __B,
+ (__v8si) __C,
+ __imm,
+ (__mmask8) __U);
+}
+
+extern __inline __m256
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_maskz_fixupimm_ps (__mmask8 __U, __m256 __A, __m256 __B,
+ __m256i __C, const int __imm)
+{
+ return (__m256) __builtin_ia32_fixupimmps256_maskz ((__v8sf) __A,
+ (__v8sf) __B,
+ (__v8si) __C,
+ __imm,
+ (__mmask8) __U);
+}
+
+extern __inline __m128d
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_fixupimm_pd (__m128d __A, __m128d __B, __m128i __C,
+ const int __imm)
+{
+ return (__m128d) __builtin_ia32_fixupimmpd128 ((__v2df) __A,
+ (__v2df) __B,
+ (__v2di) __C, __imm);
+}
+
+extern __inline __m128d
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_mask_fixupimm_pd (__m128d __A, __mmask8 __U, __m128d __B,
+ __m128i __C, const int __imm)
+{
+ return (__m128d) __builtin_ia32_fixupimmpd128_mask ((__v2df) __A,
+ (__v2df) __B,
+ (__v2di) __C,
+ __imm,
+ (__mmask8) __U);
+}
+
+extern __inline __m128d
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_maskz_fixupimm_pd (__mmask8 __U, __m128d __A, __m128d __B,
+ __m128i __C, const int __imm)
+{
+ return (__m128d) __builtin_ia32_fixupimmpd128_maskz ((__v2df) __A,
+ (__v2df) __B,
+ (__v2di) __C,
+ __imm,
+ (__mmask8) __U);
+}
+
+extern __inline __m128
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_fixupimm_ps (__m128 __A, __m128 __B, __m128i __C, const int __imm)
+{
+ return (__m128) __builtin_ia32_fixupimmps128 ((__v4sf) __A,
+ (__v4sf) __B,
+ (__v4si) __C, __imm);
+}
+
+extern __inline __m128
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_mask_fixupimm_ps (__m128 __A, __mmask8 __U, __m128 __B,
+ __m128i __C, const int __imm)
+{
+ return (__m128) __builtin_ia32_fixupimmps128_mask ((__v4sf) __A,
+ (__v4sf) __B,
+ (__v4si) __C,
+ __imm,
+ (__mmask8) __U);
+}
+
+extern __inline __m128
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_maskz_fixupimm_ps (__mmask8 __U, __m128 __A, __m128 __B,
+ __m128i __C, const int __imm)
+{
+ return (__m128) __builtin_ia32_fixupimmps128_maskz ((__v4sf) __A,
+ (__v4sf) __B,
+ (__v4si) __C,
+ __imm,
+ (__mmask8) __U);
+}
+
+extern __inline __m256i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_mask_srli_epi32 (__m256i __W, __mmask8 __U, __m256i __A,
+ const int __imm)
+{
+ return (__m256i) __builtin_ia32_psrldi256_mask ((__v8si) __A, __imm,
+ (__v8si) __W,
+ (__mmask8) __U);
+}
+
+extern __inline __m256i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_maskz_srli_epi32 (__mmask8 __U, __m256i __A, const int __imm)
+{
+ return (__m256i) __builtin_ia32_psrldi256_mask ((__v8si) __A, __imm,
+ (__v8si)
+ _mm256_setzero_si256 (),
+ (__mmask8) __U);
+}
+
+extern __inline __m128i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_mask_srli_epi32 (__m128i __W, __mmask8 __U, __m128i __A,
+ const int __imm)
+{
+ return (__m128i) __builtin_ia32_psrldi128_mask ((__v4si) __A, __imm,
+ (__v4si) __W,
+ (__mmask8) __U);
+}
+
+extern __inline __m128i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_maskz_srli_epi32 (__mmask8 __U, __m128i __A, const int __imm)
+{
+ return (__m128i) __builtin_ia32_psrldi128_mask ((__v4si) __A, __imm,
+ (__v4si)
+ _mm_setzero_si128 (),
+ (__mmask8) __U);
+}
+
+extern __inline __m256i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_mask_srli_epi64 (__m256i __W, __mmask8 __U, __m256i __A,
+ const int __imm)
+{
+ return (__m256i) __builtin_ia32_psrlqi256_mask ((__v4di) __A, __imm,
+ (__v4di) __W,
+ (__mmask8) __U);
+}
+
+extern __inline __m256i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_maskz_srli_epi64 (__mmask8 __U, __m256i __A, const int __imm)
+{
+ return (__m256i) __builtin_ia32_psrlqi256_mask ((__v4di) __A, __imm,
+ (__v4di)
+ _mm256_setzero_si256 (),
+ (__mmask8) __U);
+}
+
+extern __inline __m128i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_mask_srli_epi64 (__m128i __W, __mmask8 __U, __m128i __A,
+ const int __imm)
+{
+ return (__m128i) __builtin_ia32_psrlqi128_mask ((__v2di) __A, __imm,
+ (__v2di) __W,
+ (__mmask8) __U);
+}
+
+extern __inline __m128i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_maskz_srli_epi64 (__mmask8 __U, __m128i __A, const int __imm)
+{
+ return (__m128i) __builtin_ia32_psrlqi128_mask ((__v2di) __A, __imm,
+ (__v2di)
+ _mm_setzero_si128 (),
+ (__mmask8) __U);
+}
+
+extern __inline __m256i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_ternarylogic_epi64 (__m256i __A, __m256i __B, __m256i __C,
+ const int imm)
+{
+ return (__m256i) __builtin_ia32_pternlogq256_mask ((__v4di) __A,
+ (__v4di) __B,
+ (__v4di) __C, imm,
+ (__mmask8) -1);
+}
+
+extern __inline __m256i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_mask_ternarylogic_epi64 (__m256i __A, __mmask8 __U,
+ __m256i __B, __m256i __C,
+ const int imm)
+{
+ return (__m256i) __builtin_ia32_pternlogq256_mask ((__v4di) __A,
+ (__v4di) __B,
+ (__v4di) __C, imm,
+ (__mmask8) __U);
+}
+
+extern __inline __m256i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_maskz_ternarylogic_epi64 (__mmask8 __U, __m256i __A,
+ __m256i __B, __m256i __C,
+ const int imm)
+{
+ return (__m256i) __builtin_ia32_pternlogq256_maskz ((__v4di) __A,
+ (__v4di) __B,
+ (__v4di) __C,
+ imm,
+ (__mmask8) __U);
+}
+
+extern __inline __m256i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_ternarylogic_epi32 (__m256i __A, __m256i __B, __m256i __C,
+ const int imm)
+{
+ return (__m256i) __builtin_ia32_pternlogd256_mask ((__v8si) __A,
+ (__v8si) __B,
+ (__v8si) __C, imm,
+ (__mmask8) -1);
+}
+
+extern __inline __m256i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_mask_ternarylogic_epi32 (__m256i __A, __mmask8 __U,
+ __m256i __B, __m256i __C,
+ const int imm)
+{
+ return (__m256i) __builtin_ia32_pternlogd256_mask ((__v8si) __A,
+ (__v8si) __B,
+ (__v8si) __C, imm,
+ (__mmask8) __U);
+}
+
+extern __inline __m256i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_maskz_ternarylogic_epi32 (__mmask8 __U, __m256i __A,
+ __m256i __B, __m256i __C,
+ const int imm)
+{
+ return (__m256i) __builtin_ia32_pternlogd256_maskz ((__v8si) __A,
+ (__v8si) __B,
+ (__v8si) __C,
+ imm,
+ (__mmask8) __U);
+}
+
+extern __inline __m128i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_ternarylogic_epi64 (__m128i __A, __m128i __B, __m128i __C,
+ const int imm)
+{
+ return (__m128i) __builtin_ia32_pternlogq128_mask ((__v2di) __A,
+ (__v2di) __B,
+ (__v2di) __C, imm,
+ (__mmask8) -1);
+}
+
+extern __inline __m128i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_mask_ternarylogic_epi64 (__m128i __A, __mmask8 __U,
+ __m128i __B, __m128i __C, const int imm)
+{
+ return (__m128i) __builtin_ia32_pternlogq128_mask ((__v2di) __A,
+ (__v2di) __B,
+ (__v2di) __C, imm,
+ (__mmask8) __U);
+}
+
+extern __inline __m128i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_maskz_ternarylogic_epi64 (__mmask8 __U, __m128i __A,
+ __m128i __B, __m128i __C, const int imm)
+{
+ return (__m128i) __builtin_ia32_pternlogq128_maskz ((__v2di) __A,
+ (__v2di) __B,
+ (__v2di) __C,
+ imm,
+ (__mmask8) __U);
+}
+
+extern __inline __m128i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_ternarylogic_epi32 (__m128i __A, __m128i __B, __m128i __C,
+ const int imm)
+{
+ return (__m128i) __builtin_ia32_pternlogd128_mask ((__v4si) __A,
+ (__v4si) __B,
+ (__v4si) __C, imm,
+ (__mmask8) -1);
+}
+
+extern __inline __m128i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_mask_ternarylogic_epi32 (__m128i __A, __mmask8 __U,
+ __m128i __B, __m128i __C, const int imm)
+{
+ return (__m128i) __builtin_ia32_pternlogd128_mask ((__v4si) __A,
+ (__v4si) __B,
+ (__v4si) __C, imm,
+ (__mmask8) __U);
+}
+
+extern __inline __m128i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_maskz_ternarylogic_epi32 (__mmask8 __U, __m128i __A,
+ __m128i __B, __m128i __C, const int imm)
+{
+ return (__m128i) __builtin_ia32_pternlogd128_maskz ((__v4si) __A,
+ (__v4si) __B,
+ (__v4si) __C,
+ imm,
+ (__mmask8) __U);
+}
+
+extern __inline __m256
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_roundscale_ps (__m256 __A, const int __imm)
+{
+ return (__m256) __builtin_ia32_rndscaleps_256_mask ((__v8sf) __A,
+ __imm,
+ (__v8sf)
+ _mm256_setzero_ps (),
+ (__mmask8) -1);
+}
+
+extern __inline __m256
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_mask_roundscale_ps (__m256 __W, __mmask8 __U, __m256 __A,
+ const int __imm)
+{
+ return (__m256) __builtin_ia32_rndscaleps_256_mask ((__v8sf) __A,
+ __imm,
+ (__v8sf) __W,
+ (__mmask8) __U);
+}
+
+extern __inline __m256
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_maskz_roundscale_ps (__mmask8 __U, __m256 __A, const int __imm)
+{
+ return (__m256) __builtin_ia32_rndscaleps_256_mask ((__v8sf) __A,
+ __imm,
+ (__v8sf)
+ _mm256_setzero_ps (),
+ (__mmask8) __U);
+}
+
+extern __inline __m256d
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_roundscale_pd (__m256d __A, const int __imm)
+{
+ return (__m256d) __builtin_ia32_rndscalepd_256_mask ((__v4df) __A,
+ __imm,
+ (__v4df)
+ _mm256_setzero_pd (),
+ (__mmask8) -1);
+}
+
+extern __inline __m256d
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_mask_roundscale_pd (__m256d __W, __mmask8 __U, __m256d __A,
+ const int __imm)
+{
+ return (__m256d) __builtin_ia32_rndscalepd_256_mask ((__v4df) __A,
+ __imm,
+ (__v4df) __W,
+ (__mmask8) __U);
+}
+
+extern __inline __m256d
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_maskz_roundscale_pd (__mmask8 __U, __m256d __A, const int __imm)
+{
+ return (__m256d) __builtin_ia32_rndscalepd_256_mask ((__v4df) __A,
+ __imm,
+ (__v4df)
+ _mm256_setzero_pd (),
+ (__mmask8) __U);
+}
+
+extern __inline __m128
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_roundscale_ps (__m128 __A, const int __imm)
+{
+ return (__m128) __builtin_ia32_rndscaleps_128_mask ((__v4sf) __A,
+ __imm,
+ (__v4sf)
+ _mm_setzero_ps (),
+ (__mmask8) -1);
+}
+
+extern __inline __m128
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_mask_roundscale_ps (__m128 __W, __mmask8 __U, __m128 __A,
+ const int __imm)
+{
+ return (__m128) __builtin_ia32_rndscaleps_128_mask ((__v4sf) __A,
+ __imm,
+ (__v4sf) __W,
+ (__mmask8) __U);
+}
+
+extern __inline __m128
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_maskz_roundscale_ps (__mmask8 __U, __m128 __A, const int __imm)
+{
+ return (__m128) __builtin_ia32_rndscaleps_128_mask ((__v4sf) __A,
+ __imm,
+ (__v4sf)
+ _mm_setzero_ps (),
+ (__mmask8) __U);
+}
+
+extern __inline __m128d
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_roundscale_pd (__m128d __A, const int __imm)
+{
+ return (__m128d) __builtin_ia32_rndscalepd_128_mask ((__v2df) __A,
+ __imm,
+ (__v2df)
+ _mm_setzero_pd (),
+ (__mmask8) -1);
+}
+
+extern __inline __m128d
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_mask_roundscale_pd (__m128d __W, __mmask8 __U, __m128d __A,
+ const int __imm)
+{
+ return (__m128d) __builtin_ia32_rndscalepd_128_mask ((__v2df) __A,
+ __imm,
+ (__v2df) __W,
+ (__mmask8) __U);
+}
+
+extern __inline __m128d
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_maskz_roundscale_pd (__mmask8 __U, __m128d __A, const int __imm)
+{
+ return (__m128d) __builtin_ia32_rndscalepd_128_mask ((__v2df) __A,
+ __imm,
+ (__v2df)
+ _mm_setzero_pd (),
+ (__mmask8) __U);
+}
+
+extern __inline __m256
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_getmant_ps (__m256 __A, _MM_MANTISSA_NORM_ENUM __B,
+ _MM_MANTISSA_SIGN_ENUM __C)
+{
+ return (__m256) __builtin_ia32_getmantps256_mask ((__v8sf) __A,
+ (__C << 2) | __B,
+ (__v8sf)
+ _mm256_setzero_ps (),
+ (__mmask8) -1);
+}
+
+extern __inline __m256
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_mask_getmant_ps (__m256 __W, __mmask8 __U, __m256 __A,
+ _MM_MANTISSA_NORM_ENUM __B,
+ _MM_MANTISSA_SIGN_ENUM __C)
+{
+ return (__m256) __builtin_ia32_getmantps256_mask ((__v8sf) __A,
+ (__C << 2) | __B,
+ (__v8sf) __W,
+ (__mmask8) __U);
+}
+
+extern __inline __m256
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_maskz_getmant_ps (__mmask8 __U, __m256 __A,
+ _MM_MANTISSA_NORM_ENUM __B,
+ _MM_MANTISSA_SIGN_ENUM __C)
+{
+ return (__m256) __builtin_ia32_getmantps256_mask ((__v8sf) __A,
+ (__C << 2) | __B,
+ (__v8sf)
+ _mm256_setzero_ps (),
+ (__mmask8) __U);
+}
+
+extern __inline __m128
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_getmant_ps (__m128 __A, _MM_MANTISSA_NORM_ENUM __B,
+ _MM_MANTISSA_SIGN_ENUM __C)
+{
+ return (__m128) __builtin_ia32_getmantps128_mask ((__v4sf) __A,
+ (__C << 2) | __B,
+ (__v4sf)
+ _mm_setzero_ps (),
+ (__mmask8) -1);
+}
+
+extern __inline __m128
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_mask_getmant_ps (__m128 __W, __mmask8 __U, __m128 __A,
+ _MM_MANTISSA_NORM_ENUM __B,
+ _MM_MANTISSA_SIGN_ENUM __C)
+{
+ return (__m128) __builtin_ia32_getmantps128_mask ((__v4sf) __A,
+ (__C << 2) | __B,
+ (__v4sf) __W,
+ (__mmask8) __U);
+}
+
+extern __inline __m128
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_maskz_getmant_ps (__mmask8 __U, __m128 __A,
+ _MM_MANTISSA_NORM_ENUM __B,
+ _MM_MANTISSA_SIGN_ENUM __C)
+{
+ return (__m128) __builtin_ia32_getmantps128_mask ((__v4sf) __A,
+ (__C << 2) | __B,
+ (__v4sf)
+ _mm_setzero_ps (),
+ (__mmask8) __U);
+}
+
+extern __inline __m256d
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_getmant_pd (__m256d __A, _MM_MANTISSA_NORM_ENUM __B,
+ _MM_MANTISSA_SIGN_ENUM __C)
+{
+ return (__m256d) __builtin_ia32_getmantpd256_mask ((__v4df) __A,
+ (__C << 2) | __B,
+ (__v4df)
+ _mm256_setzero_pd (),
+ (__mmask8) -1);
+}
+
+extern __inline __m256d
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_mask_getmant_pd (__m256d __W, __mmask8 __U, __m256d __A,
+ _MM_MANTISSA_NORM_ENUM __B,
+ _MM_MANTISSA_SIGN_ENUM __C)
+{
+ return (__m256d) __builtin_ia32_getmantpd256_mask ((__v4df) __A,
+ (__C << 2) | __B,
+ (__v4df) __W,
+ (__mmask8) __U);
+}
+
+extern __inline __m256d
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_maskz_getmant_pd (__mmask8 __U, __m256d __A,
+ _MM_MANTISSA_NORM_ENUM __B,
+ _MM_MANTISSA_SIGN_ENUM __C)
+{
+ return (__m256d) __builtin_ia32_getmantpd256_mask ((__v4df) __A,
+ (__C << 2) | __B,
+ (__v4df)
+ _mm256_setzero_pd (),
+ (__mmask8) __U);
+}
+
+extern __inline __m128d
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_getmant_pd (__m128d __A, _MM_MANTISSA_NORM_ENUM __B,
+ _MM_MANTISSA_SIGN_ENUM __C)
+{
+ return (__m128d) __builtin_ia32_getmantpd128_mask ((__v2df) __A,
+ (__C << 2) | __B,
+ (__v2df)
+ _mm_setzero_pd (),
+ (__mmask8) -1);
+}
+
+extern __inline __m128d
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_mask_getmant_pd (__m128d __W, __mmask8 __U, __m128d __A,
+ _MM_MANTISSA_NORM_ENUM __B,
+ _MM_MANTISSA_SIGN_ENUM __C)
+{
+ return (__m128d) __builtin_ia32_getmantpd128_mask ((__v2df) __A,
+ (__C << 2) | __B,
+ (__v2df) __W,
+ (__mmask8) __U);
+}
+
+extern __inline __m128d
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_maskz_getmant_pd (__mmask8 __U, __m128d __A,
+ _MM_MANTISSA_NORM_ENUM __B,
+ _MM_MANTISSA_SIGN_ENUM __C)
+{
+ return (__m128d) __builtin_ia32_getmantpd128_mask ((__v2df) __A,
+ (__C << 2) | __B,
+ (__v2df)
+ _mm_setzero_pd (),
+ (__mmask8) __U);
+}
+
+extern __inline __m256
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_mmask_i32gather_ps (__m256 __v1_old, __mmask8 __mask,
+ __m256i __index, float const *__addr,
+ int __scale)
+{
+ return (__m256) __builtin_ia32_gather3siv8sf ((__v8sf) __v1_old,
+ __addr,
+ (__v8si) __index,
+ __mask, __scale);
+}
+
+extern __inline __m128
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_mmask_i32gather_ps (__m128 __v1_old, __mmask8 __mask,
+ __m128i __index, float const *__addr,
+ int __scale)
+{
+ return (__m128) __builtin_ia32_gather3siv4sf ((__v4sf) __v1_old,
+ __addr,
+ (__v4si) __index,
+ __mask, __scale);
+}
+
+extern __inline __m256d
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_mmask_i32gather_pd (__m256d __v1_old, __mmask8 __mask,
+ __m128i __index, double const *__addr,
+ int __scale)
+{
+ return (__m256d) __builtin_ia32_gather3siv4df ((__v4df) __v1_old,
+ __addr,
+ (__v4si) __index,
+ __mask, __scale);
+}
+
+extern __inline __m128d
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_mmask_i32gather_pd (__m128d __v1_old, __mmask8 __mask,
+ __m128i __index, double const *__addr,
+ int __scale)
+{
+ return (__m128d) __builtin_ia32_gather3siv2df ((__v2df) __v1_old,
+ __addr,
+ (__v4si) __index,
+ __mask, __scale);
+}
+
+extern __inline __m128
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_mmask_i64gather_ps (__m128 __v1_old, __mmask8 __mask,
+ __m256i __index, float const *__addr,
+ int __scale)
+{
+ return (__m128) __builtin_ia32_gather3div8sf ((__v4sf) __v1_old,
+ __addr,
+ (__v4di) __index,
+ __mask, __scale);
+}
+
+extern __inline __m128
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_mmask_i64gather_ps (__m128 __v1_old, __mmask8 __mask,
+ __m128i __index, float const *__addr,
+ int __scale)
+{
+ return (__m128) __builtin_ia32_gather3div4sf ((__v4sf) __v1_old,
+ __addr,
+ (__v2di) __index,
+ __mask, __scale);
+}
+
+extern __inline __m256d
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_mmask_i64gather_pd (__m256d __v1_old, __mmask8 __mask,
+ __m256i __index, double const *__addr,
+ int __scale)
+{
+ return (__m256d) __builtin_ia32_gather3div4df ((__v4df) __v1_old,
+ __addr,
+ (__v4di) __index,
+ __mask, __scale);
+}
+
+extern __inline __m128d
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_mmask_i64gather_pd (__m128d __v1_old, __mmask8 __mask,
+ __m128i __index, double const *__addr,
+ int __scale)
+{
+ return (__m128d) __builtin_ia32_gather3div2df ((__v2df) __v1_old,
+ __addr,
+ (__v2di) __index,
+ __mask, __scale);
+}
+
+extern __inline __m256i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_mmask_i32gather_epi32 (__m256i __v1_old, __mmask8 __mask,
+ __m256i __index, int const *__addr,
+ int __scale)
+{
+ return (__m256i) __builtin_ia32_gather3siv8si ((__v8si) __v1_old,
+ __addr,
+ (__v8si) __index,
+ __mask, __scale);
+}
+
+extern __inline __m128i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_mmask_i32gather_epi32 (__m128i __v1_old, __mmask8 __mask,
+ __m128i __index, int const *__addr,
+ int __scale)
+{
+ return (__m128i) __builtin_ia32_gather3siv4si ((__v4si) __v1_old,
+ __addr,
+ (__v4si) __index,
+ __mask, __scale);
+}
+
+extern __inline __m256i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_mmask_i32gather_epi64 (__m256i __v1_old, __mmask8 __mask,
+ __m128i __index, long long const *__addr,
+ int __scale)
+{
+ return (__m256i) __builtin_ia32_gather3siv4di ((__v4di) __v1_old,
+ __addr,
+ (__v4si) __index,
+ __mask, __scale);
+}
+
+extern __inline __m128i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_mmask_i32gather_epi64 (__m128i __v1_old, __mmask8 __mask,
+ __m128i __index, long long const *__addr,
+ int __scale)
+{
+ return (__m128i) __builtin_ia32_gather3siv2di ((__v2di) __v1_old,
+ __addr,
+ (__v4si) __index,
+ __mask, __scale);
+}
+
+extern __inline __m128i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_mmask_i64gather_epi32 (__m128i __v1_old, __mmask8 __mask,
+ __m256i __index, int const *__addr,
+ int __scale)
+{
+ return (__m128i) __builtin_ia32_gather3div8si ((__v4si) __v1_old,
+ __addr,
+ (__v4di) __index,
+ __mask, __scale);
+}
+
+extern __inline __m128i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_mmask_i64gather_epi32 (__m128i __v1_old, __mmask8 __mask,
+ __m128i __index, int const *__addr,
+ int __scale)
+{
+ return (__m128i) __builtin_ia32_gather3div4si ((__v4si) __v1_old,
+ __addr,
+ (__v2di) __index,
+ __mask, __scale);
+}
+
+extern __inline __m256i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_mmask_i64gather_epi64 (__m256i __v1_old, __mmask8 __mask,
+ __m256i __index, long long const *__addr,
+ int __scale)
+{
+ return (__m256i) __builtin_ia32_gather3div4di ((__v4di) __v1_old,
+ __addr,
+ (__v4di) __index,
+ __mask, __scale);
+}
+
+extern __inline __m128i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_mmask_i64gather_epi64 (__m128i __v1_old, __mmask8 __mask,
+ __m128i __index, long long const *__addr,
+ int __scale)
+{
+ return (__m128i) __builtin_ia32_gather3div2di ((__v2di) __v1_old,
+ __addr,
+ (__v2di) __index,
+ __mask, __scale);
+}
+
+extern __inline void
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_i32scatter_ps (float *__addr, __m256i __index,
+ __m256 __v1, const int __scale)
+{
+ __builtin_ia32_scattersiv8sf (__addr, (__mmask8) 0xFF,
+ (__v8si) __index, (__v8sf) __v1,
+ __scale);
+}
+
+extern __inline void
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_mask_i32scatter_ps (float *__addr, __mmask8 __mask,
+ __m256i __index, __m256 __v1,
+ const int __scale)
+{
+ __builtin_ia32_scattersiv8sf (__addr, __mask, (__v8si) __index,
+ (__v8sf) __v1, __scale);
+}
+
+extern __inline void
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_i32scatter_ps (float *__addr, __m128i __index, __m128 __v1,
+ const int __scale)
+{
+ __builtin_ia32_scattersiv4sf (__addr, (__mmask8) 0xFF,
+ (__v4si) __index, (__v4sf) __v1,
+ __scale);
+}
+
+extern __inline void
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_mask_i32scatter_ps (float *__addr, __mmask8 __mask,
+ __m128i __index, __m128 __v1,
+ const int __scale)
+{
+ __builtin_ia32_scattersiv4sf (__addr, __mask, (__v4si) __index,
+ (__v4sf) __v1, __scale);
+}
+
+extern __inline void
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_i32scatter_pd (double *__addr, __m128i __index,
+ __m256d __v1, const int __scale)
+{
+ __builtin_ia32_scattersiv4df (__addr, (__mmask8) 0xFF,
+ (__v4si) __index, (__v4df) __v1,
+ __scale);
+}
+
+extern __inline void
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_mask_i32scatter_pd (double *__addr, __mmask8 __mask,
+ __m128i __index, __m256d __v1,
+ const int __scale)
+{
+ __builtin_ia32_scattersiv4df (__addr, __mask, (__v4si) __index,
+ (__v4df) __v1, __scale);
+}
+
+extern __inline void
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_i32scatter_pd (double *__addr, __m128i __index,
+ __m128d __v1, const int __scale)
+{
+ __builtin_ia32_scattersiv2df (__addr, (__mmask8) 0xFF,
+ (__v4si) __index, (__v2df) __v1,
+ __scale);
+}
+
+extern __inline void
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_mask_i32scatter_pd (double *__addr, __mmask8 __mask,
+ __m128i __index, __m128d __v1,
+ const int __scale)
+{
+ __builtin_ia32_scattersiv2df (__addr, __mask, (__v4si) __index,
+ (__v2df) __v1, __scale);
+}
+
+extern __inline void
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_i64scatter_ps (float *__addr, __m256i __index,
+ __m128 __v1, const int __scale)
+{
+ __builtin_ia32_scatterdiv8sf (__addr, (__mmask8) 0xFF,
+ (__v4di) __index, (__v4sf) __v1,
+ __scale);
+}
+
+extern __inline void
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_mask_i64scatter_ps (float *__addr, __mmask8 __mask,
+ __m256i __index, __m128 __v1,
+ const int __scale)
+{
+ __builtin_ia32_scatterdiv8sf (__addr, __mask, (__v4di) __index,
+ (__v4sf) __v1, __scale);
+}
+
+extern __inline void
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_i64scatter_ps (float *__addr, __m128i __index, __m128 __v1,
+ const int __scale)
+{
+ __builtin_ia32_scatterdiv4sf (__addr, (__mmask8) 0xFF,
+ (__v2di) __index, (__v4sf) __v1,
+ __scale);
+}
+
+extern __inline void
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_mask_i64scatter_ps (float *__addr, __mmask8 __mask,
+ __m128i __index, __m128 __v1,
+ const int __scale)
+{
+ __builtin_ia32_scatterdiv4sf (__addr, __mask, (__v2di) __index,
+ (__v4sf) __v1, __scale);
+}
+
+extern __inline void
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_i64scatter_pd (double *__addr, __m256i __index,
+ __m256d __v1, const int __scale)
+{
+ __builtin_ia32_scatterdiv4df (__addr, (__mmask8) 0xFF,
+ (__v4di) __index, (__v4df) __v1,
+ __scale);
+}
+
+extern __inline void
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_mask_i64scatter_pd (double *__addr, __mmask8 __mask,
+ __m256i __index, __m256d __v1,
+ const int __scale)
+{
+ __builtin_ia32_scatterdiv4df (__addr, __mask, (__v4di) __index,
+ (__v4df) __v1, __scale);
+}
+
+extern __inline void
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_i64scatter_pd (double *__addr, __m128i __index,
+ __m128d __v1, const int __scale)
+{
+ __builtin_ia32_scatterdiv2df (__addr, (__mmask8) 0xFF,
+ (__v2di) __index, (__v2df) __v1,
+ __scale);
+}
+
+extern __inline void
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_mask_i64scatter_pd (double *__addr, __mmask8 __mask,
+ __m128i __index, __m128d __v1,
+ const int __scale)
+{
+ __builtin_ia32_scatterdiv2df (__addr, __mask, (__v2di) __index,
+ (__v2df) __v1, __scale);
+}
+
+extern __inline void
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_i32scatter_epi32 (int *__addr, __m256i __index,
+ __m256i __v1, const int __scale)
+{
+ __builtin_ia32_scattersiv8si (__addr, (__mmask8) 0xFF,
+ (__v8si) __index, (__v8si) __v1,
+ __scale);
+}
+
+extern __inline void
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_mask_i32scatter_epi32 (int *__addr, __mmask8 __mask,
+ __m256i __index, __m256i __v1,
+ const int __scale)
+{
+ __builtin_ia32_scattersiv8si (__addr, __mask, (__v8si) __index,
+ (__v8si) __v1, __scale);
+}
+
+extern __inline void
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_i32scatter_epi32 (int *__addr, __m128i __index,
+ __m128i __v1, const int __scale)
+{
+ __builtin_ia32_scattersiv4si (__addr, (__mmask8) 0xFF,
+ (__v4si) __index, (__v4si) __v1,
+ __scale);
+}
+
+extern __inline void
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_mask_i32scatter_epi32 (int *__addr, __mmask8 __mask,
+ __m128i __index, __m128i __v1,
+ const int __scale)
+{
+ __builtin_ia32_scattersiv4si (__addr, __mask, (__v4si) __index,
+ (__v4si) __v1, __scale);
+}
+
+extern __inline void
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_i32scatter_epi64 (long long *__addr, __m128i __index,
+ __m256i __v1, const int __scale)
+{
+ __builtin_ia32_scattersiv4di (__addr, (__mmask8) 0xFF,
+ (__v4si) __index, (__v4di) __v1,
+ __scale);
+}
+
+extern __inline void
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_mask_i32scatter_epi64 (long long *__addr, __mmask8 __mask,
+ __m128i __index, __m256i __v1,
+ const int __scale)
+{
+ __builtin_ia32_scattersiv4di (__addr, __mask, (__v4si) __index,
+ (__v4di) __v1, __scale);
+}
+
+extern __inline void
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_i32scatter_epi64 (long long *__addr, __m128i __index,
+ __m128i __v1, const int __scale)
+{
+ __builtin_ia32_scattersiv2di (__addr, (__mmask8) 0xFF,
+ (__v4si) __index, (__v2di) __v1,
+ __scale);
+}
+
+extern __inline void
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_mask_i32scatter_epi64 (long long *__addr, __mmask8 __mask,
+ __m128i __index, __m128i __v1,
+ const int __scale)
+{
+ __builtin_ia32_scattersiv2di (__addr, __mask, (__v4si) __index,
+ (__v2di) __v1, __scale);
+}
+
+extern __inline void
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_i64scatter_epi32 (int *__addr, __m256i __index,
+ __m128i __v1, const int __scale)
+{
+ __builtin_ia32_scatterdiv8si (__addr, (__mmask8) 0xFF,
+ (__v4di) __index, (__v4si) __v1,
+ __scale);
+}
+
+extern __inline void
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_mask_i64scatter_epi32 (int *__addr, __mmask8 __mask,
+ __m256i __index, __m128i __v1,
+ const int __scale)
+{
+ __builtin_ia32_scatterdiv8si (__addr, __mask, (__v4di) __index,
+ (__v4si) __v1, __scale);
+}
+
+extern __inline void
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_i64scatter_epi32 (int *__addr, __m128i __index,
+ __m128i __v1, const int __scale)
+{
+ __builtin_ia32_scatterdiv4si (__addr, (__mmask8) 0xFF,
+ (__v2di) __index, (__v4si) __v1,
+ __scale);
+}
+
+extern __inline void
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_mask_i64scatter_epi32 (int *__addr, __mmask8 __mask,
+ __m128i __index, __m128i __v1,
+ const int __scale)
+{
+ __builtin_ia32_scatterdiv4si (__addr, __mask, (__v2di) __index,
+ (__v4si) __v1, __scale);
+}
+
+extern __inline void
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_i64scatter_epi64 (long long *__addr, __m256i __index,
+ __m256i __v1, const int __scale)
+{
+ __builtin_ia32_scatterdiv4di (__addr, (__mmask8) 0xFF,
+ (__v4di) __index, (__v4di) __v1,
+ __scale);
+}
+
+extern __inline void
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_mask_i64scatter_epi64 (long long *__addr, __mmask8 __mask,
+ __m256i __index, __m256i __v1,
+ const int __scale)
+{
+ __builtin_ia32_scatterdiv4di (__addr, __mask, (__v4di) __index,
+ (__v4di) __v1, __scale);
+}
+
+extern __inline void
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_i64scatter_epi64 (long long *__addr, __m128i __index,
+ __m128i __v1, const int __scale)
+{
+ __builtin_ia32_scatterdiv2di (__addr, (__mmask8) 0xFF,
+ (__v2di) __index, (__v2di) __v1,
+ __scale);
+}
+
+extern __inline void
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_mask_i64scatter_epi64 (long long *__addr, __mmask8 __mask,
+ __m128i __index, __m128i __v1,
+ const int __scale)
+{
+ __builtin_ia32_scatterdiv2di (__addr, __mask, (__v2di) __index,
+ (__v2di) __v1, __scale);
+}
+
+extern __inline __m256i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_mask_shuffle_epi32 (__m256i __W, __mmask8 __U, __m256i __A,
+ _MM_PERM_ENUM __mask)
+{
+ return (__m256i) __builtin_ia32_pshufd256_mask ((__v8si) __A, __mask,
+ (__v8si) __W,
+ (__mmask8) __U);
+}
+
+extern __inline __m256i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_maskz_shuffle_epi32 (__mmask8 __U, __m256i __A,
+ _MM_PERM_ENUM __mask)
+{
+ return (__m256i) __builtin_ia32_pshufd256_mask ((__v8si) __A, __mask,
+ (__v8si)
+ _mm256_setzero_si256 (),
+ (__mmask8) __U);
+}
+
+extern __inline __m128i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_mask_shuffle_epi32 (__m128i __W, __mmask8 __U, __m128i __A,
+ _MM_PERM_ENUM __mask)
+{
+ return (__m128i) __builtin_ia32_pshufd128_mask ((__v4si) __A, __mask,
+ (__v4si) __W,
+ (__mmask8) __U);
+}
+
+extern __inline __m128i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_maskz_shuffle_epi32 (__mmask8 __U, __m128i __A,
+ _MM_PERM_ENUM __mask)
+{
+ return (__m128i) __builtin_ia32_pshufd128_mask ((__v4si) __A, __mask,
+ (__v4si)
+ _mm_setzero_si128 (),
+ (__mmask8) __U);
+}
+
+extern __inline __m256i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_rol_epi32 (__m256i __A, const int __B)
+{
+ return (__m256i) __builtin_ia32_prold256_mask ((__v8si) __A, __B,
+ (__v8si)
+ _mm256_setzero_si256 (),
+ (__mmask8) -1);
+}
+
+extern __inline __m256i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_mask_rol_epi32 (__m256i __W, __mmask8 __U, __m256i __A,
+ const int __B)
+{
+ return (__m256i) __builtin_ia32_prold256_mask ((__v8si) __A, __B,
+ (__v8si) __W,
+ (__mmask8) __U);
+}
+
+extern __inline __m256i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_maskz_rol_epi32 (__mmask8 __U, __m256i __A, const int __B)
+{
+ return (__m256i) __builtin_ia32_prold256_mask ((__v8si) __A, __B,
+ (__v8si)
+ _mm256_setzero_si256 (),
+ (__mmask8) __U);
+}
+
+extern __inline __m128i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_rol_epi32 (__m128i __A, const int __B)
+{
+ return (__m128i) __builtin_ia32_prold128_mask ((__v4si) __A, __B,
+ (__v4si)
+ _mm_setzero_si128 (),
+ (__mmask8) -1);
+}
+
+extern __inline __m128i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_mask_rol_epi32 (__m128i __W, __mmask8 __U, __m128i __A,
+ const int __B)
+{
+ return (__m128i) __builtin_ia32_prold128_mask ((__v4si) __A, __B,
+ (__v4si) __W,
+ (__mmask8) __U);
+}
+
+extern __inline __m128i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_maskz_rol_epi32 (__mmask8 __U, __m128i __A, const int __B)
+{
+ return (__m128i) __builtin_ia32_prold128_mask ((__v4si) __A, __B,
+ (__v4si)
+ _mm_setzero_si128 (),
+ (__mmask8) __U);
+}
+
+extern __inline __m256i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_ror_epi32 (__m256i __A, const int __B)
+{
+ return (__m256i) __builtin_ia32_prord256_mask ((__v8si) __A, __B,
+ (__v8si)
+ _mm256_setzero_si256 (),
+ (__mmask8) -1);
+}
+
+extern __inline __m256i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_mask_ror_epi32 (__m256i __W, __mmask8 __U, __m256i __A,
+ const int __B)
+{
+ return (__m256i) __builtin_ia32_prord256_mask ((__v8si) __A, __B,
+ (__v8si) __W,
+ (__mmask8) __U);
+}
+
+extern __inline __m256i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_maskz_ror_epi32 (__mmask8 __U, __m256i __A, const int __B)
+{
+ return (__m256i) __builtin_ia32_prord256_mask ((__v8si) __A, __B,
+ (__v8si)
+ _mm256_setzero_si256 (),
+ (__mmask8) __U);
+}
+
+extern __inline __m128i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_ror_epi32 (__m128i __A, const int __B)
+{
+ return (__m128i) __builtin_ia32_prord128_mask ((__v4si) __A, __B,
+ (__v4si)
+ _mm_setzero_si128 (),
+ (__mmask8) -1);
+}
+
+extern __inline __m128i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_mask_ror_epi32 (__m128i __W, __mmask8 __U, __m128i __A,
+ const int __B)
+{
+ return (__m128i) __builtin_ia32_prord128_mask ((__v4si) __A, __B,
+ (__v4si) __W,
+ (__mmask8) __U);
+}
+
+extern __inline __m128i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_maskz_ror_epi32 (__mmask8 __U, __m128i __A, const int __B)
+{
+ return (__m128i) __builtin_ia32_prord128_mask ((__v4si) __A, __B,
+ (__v4si)
+ _mm_setzero_si128 (),
+ (__mmask8) __U);
+}
+
+extern __inline __m256i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_rol_epi64 (__m256i __A, const int __B)
+{
+ return (__m256i) __builtin_ia32_prolq256_mask ((__v4di) __A, __B,
+ (__v4di)
+ _mm256_setzero_si256 (),
+ (__mmask8) -1);
+}
+
+extern __inline __m256i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_mask_rol_epi64 (__m256i __W, __mmask8 __U, __m256i __A,
+ const int __B)
+{
+ return (__m256i) __builtin_ia32_prolq256_mask ((__v4di) __A, __B,
+ (__v4di) __W,
+ (__mmask8) __U);
+}
+
+extern __inline __m256i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_maskz_rol_epi64 (__mmask8 __U, __m256i __A, const int __B)
+{
+ return (__m256i) __builtin_ia32_prolq256_mask ((__v4di) __A, __B,
+ (__v4di)
+ _mm256_setzero_si256 (),
+ (__mmask8) __U);
+}
+
+extern __inline __m128i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_rol_epi64 (__m128i __A, const int __B)
+{
+ return (__m128i) __builtin_ia32_prolq128_mask ((__v2di) __A, __B,
+ (__v2di)
+ _mm_setzero_di (),
+ (__mmask8) -1);
+}
+
+extern __inline __m128i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_mask_rol_epi64 (__m128i __W, __mmask8 __U, __m128i __A,
+ const int __B)
+{
+ return (__m128i) __builtin_ia32_prolq128_mask ((__v2di) __A, __B,
+ (__v2di) __W,
+ (__mmask8) __U);
+}
+
+extern __inline __m128i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_maskz_rol_epi64 (__mmask8 __U, __m128i __A, const int __B)
+{
+ return (__m128i) __builtin_ia32_prolq128_mask ((__v2di) __A, __B,
+ (__v2di)
+ _mm_setzero_di (),
+ (__mmask8) __U);
+}
+
+extern __inline __m256i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_ror_epi64 (__m256i __A, const int __B)
+{
+ return (__m256i) __builtin_ia32_prorq256_mask ((__v4di) __A, __B,
+ (__v4di)
+ _mm256_setzero_si256 (),
+ (__mmask8) -1);
+}
+
+extern __inline __m256i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_mask_ror_epi64 (__m256i __W, __mmask8 __U, __m256i __A,
+ const int __B)
+{
+ return (__m256i) __builtin_ia32_prorq256_mask ((__v4di) __A, __B,
+ (__v4di) __W,
+ (__mmask8) __U);
+}
+
+extern __inline __m256i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_maskz_ror_epi64 (__mmask8 __U, __m256i __A, const int __B)
+{
+ return (__m256i) __builtin_ia32_prorq256_mask ((__v4di) __A, __B,
+ (__v4di)
+ _mm256_setzero_si256 (),
+ (__mmask8) __U);
+}
+
+extern __inline __m128i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_ror_epi64 (__m128i __A, const int __B)
+{
+ return (__m128i) __builtin_ia32_prorq128_mask ((__v2di) __A, __B,
+ (__v2di)
+ _mm_setzero_di (),
+ (__mmask8) -1);
+}
+
+extern __inline __m128i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_mask_ror_epi64 (__m128i __W, __mmask8 __U, __m128i __A,
+ const int __B)
+{
+ return (__m128i) __builtin_ia32_prorq128_mask ((__v2di) __A, __B,
+ (__v2di) __W,
+ (__mmask8) __U);
+}
+
+extern __inline __m128i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_maskz_ror_epi64 (__mmask8 __U, __m128i __A, const int __B)
+{
+ return (__m128i) __builtin_ia32_prorq128_mask ((__v2di) __A, __B,
+ (__v2di)
+ _mm_setzero_di (),
+ (__mmask8) __U);
+}
+
+extern __inline __m128i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_alignr_epi32 (__m128i __A, __m128i __B, const int __imm)
+{
+ return (__m128i) __builtin_ia32_alignd128_mask ((__v4si) __A,
+ (__v4si) __B, __imm,
+ (__v4si)
+ _mm_setzero_si128 (),
+ (__mmask8) -1);
+}
+
+extern __inline __m128i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_mask_alignr_epi32 (__m128i __W, __mmask8 __U, __m128i __A,
+ __m128i __B, const int __imm)
+{
+ return (__m128i) __builtin_ia32_alignd128_mask ((__v4si) __A,
+ (__v4si) __B, __imm,
+ (__v4si) __W,
+ (__mmask8) __U);
+}
+
+extern __inline __m128i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_maskz_alignr_epi32 (__mmask8 __U, __m128i __A, __m128i __B,
+ const int __imm)
+{
+ return (__m128i) __builtin_ia32_alignd128_mask ((__v4si) __A,
+ (__v4si) __B, __imm,
+ (__v4si)
+ _mm_setzero_si128 (),
+ (__mmask8) __U);
+}
+
+extern __inline __m128i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_alignr_epi64 (__m128i __A, __m128i __B, const int __imm)
+{
+ return (__m128i) __builtin_ia32_alignq128_mask ((__v2di) __A,
+ (__v2di) __B, __imm,
+ (__v2di)
+ _mm_setzero_di (),
+ (__mmask8) -1);
+}
+
+extern __inline __m128i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_mask_alignr_epi64 (__m128i __W, __mmask8 __U, __m128i __A,
+ __m128i __B, const int __imm)
+{
+ return (__m128i) __builtin_ia32_alignq128_mask ((__v2di) __A,
+ (__v2di) __B, __imm,
+ (__v2di) __W,
+ (__mmask8) __U);
+}
+
+extern __inline __m128i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_maskz_alignr_epi64 (__mmask8 __U, __m128i __A, __m128i __B,
+ const int __imm)
+{
+ return (__m128i) __builtin_ia32_alignq128_mask ((__v2di) __A,
+ (__v2di) __B, __imm,
+ (__v2di)
+ _mm_setzero_di (),
+ (__mmask8) __U);
+}
+
+extern __inline __m256i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_alignr_epi32 (__m256i __A, __m256i __B, const int __imm)
+{
+ return (__m256i) __builtin_ia32_alignd256_mask ((__v8si) __A,
+ (__v8si) __B, __imm,
+ (__v8si)
+ _mm256_setzero_si256 (),
+ (__mmask8) -1);
+}
+
+extern __inline __m256i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_mask_alignr_epi32 (__m256i __W, __mmask8 __U, __m256i __A,
+ __m256i __B, const int __imm)
+{
+ return (__m256i) __builtin_ia32_alignd256_mask ((__v8si) __A,
+ (__v8si) __B, __imm,
+ (__v8si) __W,
+ (__mmask8) __U);
+}
+
+extern __inline __m256i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_maskz_alignr_epi32 (__mmask8 __U, __m256i __A, __m256i __B,
+ const int __imm)
+{
+ return (__m256i) __builtin_ia32_alignd256_mask ((__v8si) __A,
+ (__v8si) __B, __imm,
+ (__v8si)
+ _mm256_setzero_si256 (),
+ (__mmask8) __U);
+}
+
+extern __inline __m256i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_alignr_epi64 (__m256i __A, __m256i __B, const int __imm)
+{
+ return (__m256i) __builtin_ia32_alignq256_mask ((__v4di) __A,
+ (__v4di) __B, __imm,
+ (__v4di)
+ _mm256_setzero_si256 (),
+ (__mmask8) -1);
+}
+
+extern __inline __m256i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_mask_alignr_epi64 (__m256i __W, __mmask8 __U, __m256i __A,
+ __m256i __B, const int __imm)
+{
+ return (__m256i) __builtin_ia32_alignq256_mask ((__v4di) __A,
+ (__v4di) __B, __imm,
+ (__v4di) __W,
+ (__mmask8) __U);
+}
+
+extern __inline __m256i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_maskz_alignr_epi64 (__mmask8 __U, __m256i __A, __m256i __B,
+ const int __imm)
+{
+ return (__m256i) __builtin_ia32_alignq256_mask ((__v4di) __A,
+ (__v4di) __B, __imm,
+ (__v4di)
+ _mm256_setzero_si256 (),
+ (__mmask8) __U);
+}
+
+extern __inline __m128i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_mask_cvtps_ph (__m128i __W, __mmask8 __U, __m128 __A,
+ const int __I)
+{
+ return (__m128i) __builtin_ia32_vcvtps2ph_mask ((__v4sf) __A, __I,
+ (__v8hi) __W,
+ (__mmask8) __U);
+}
+
+extern __inline __m128i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_maskz_cvtps_ph (__mmask8 __U, __m128 __A, const int __I)
+{
+ return (__m128i) __builtin_ia32_vcvtps2ph_mask ((__v4sf) __A, __I,
+ (__v8hi)
+ _mm_setzero_hi (),
+ (__mmask8) __U);
+}
+
+extern __inline __m128i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_mask_cvtps_ph (__m128i __W, __mmask8 __U, __m256 __A,
+ const int __I)
+{
+ return (__m128i) __builtin_ia32_vcvtps2ph256_mask ((__v8sf) __A, __I,
+ (__v8hi) __W,
+ (__mmask8) __U);
+}
+
+extern __inline __m128i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_maskz_cvtps_ph (__mmask8 __U, __m256 __A, const int __I)
+{
+ return (__m128i) __builtin_ia32_vcvtps2ph256_mask ((__v8sf) __A, __I,
+ (__v8hi)
+ _mm_setzero_hi (),
+ (__mmask8) __U);
+}
+
+extern __inline __m256i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_mask_srai_epi32 (__m256i __W, __mmask8 __U, __m256i __A,
+ const int __imm)
+{
+ return (__m256i) __builtin_ia32_psradi256_mask ((__v8si) __A, __imm,
+ (__v8si) __W,
+ (__mmask8) __U);
+}
+
+extern __inline __m256i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_maskz_srai_epi32 (__mmask8 __U, __m256i __A, const int __imm)
+{
+ return (__m256i) __builtin_ia32_psradi256_mask ((__v8si) __A, __imm,
+ (__v8si)
+ _mm256_setzero_si256 (),
+ (__mmask8) __U);
+}
+
+extern __inline __m128i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_mask_srai_epi32 (__m128i __W, __mmask8 __U, __m128i __A,
+ const int __imm)
+{
+ return (__m128i) __builtin_ia32_psradi128_mask ((__v4si) __A, __imm,
+ (__v4si) __W,
+ (__mmask8) __U);
+}
+
+extern __inline __m128i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_maskz_srai_epi32 (__mmask8 __U, __m128i __A, const int __imm)
+{
+ return (__m128i) __builtin_ia32_psradi128_mask ((__v4si) __A, __imm,
+ (__v4si)
+ _mm_setzero_si128 (),
+ (__mmask8) __U);
+}
+
+extern __inline __m256i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_srai_epi64 (__m256i __A, const int __imm)
+{
+ return (__m256i) __builtin_ia32_psraqi256_mask ((__v4di) __A, __imm,
+ (__v4di)
+ _mm256_setzero_si256 (),
+ (__mmask8) -1);
+}
+
+extern __inline __m256i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_mask_srai_epi64 (__m256i __W, __mmask8 __U, __m256i __A,
+ const int __imm)
+{
+ return (__m256i) __builtin_ia32_psraqi256_mask ((__v4di) __A, __imm,
+ (__v4di) __W,
+ (__mmask8) __U);
+}
+
+extern __inline __m256i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_maskz_srai_epi64 (__mmask8 __U, __m256i __A, const int __imm)
+{
+ return (__m256i) __builtin_ia32_psraqi256_mask ((__v4di) __A, __imm,
+ (__v4di)
+ _mm256_setzero_si256 (),
+ (__mmask8) __U);
+}
+
+extern __inline __m128i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_srai_epi64 (__m128i __A, const int __imm)
+{
+ return (__m128i) __builtin_ia32_psraqi128_mask ((__v2di) __A, __imm,
+ (__v2di)
+ _mm_setzero_di (),
+ (__mmask8) -1);
+}
+
+extern __inline __m128i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_mask_srai_epi64 (__m128i __W, __mmask8 __U, __m128i __A,
+ const int __imm)
+{
+ return (__m128i) __builtin_ia32_psraqi128_mask ((__v2di) __A, __imm,
+ (__v2di) __W,
+ (__mmask8) __U);
+}
+
+extern __inline __m128i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_maskz_srai_epi64 (__mmask8 __U, __m128i __A, const int __imm)
+{
+ return (__m128i) __builtin_ia32_psraqi128_mask ((__v2di) __A, __imm,
+ (__v2di)
+ _mm_setzero_si128 (),
+ (__mmask8) __U);
+}
+
+extern __inline __m128i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_mask_slli_epi32 (__m128i __W, __mmask8 __U, __m128i __A, int __B)
+{
+ return (__m128i) __builtin_ia32_pslldi128_mask ((__v4si) __A, __B,
+ (__v4si) __W,
+ (__mmask8) __U);
+}
+
+extern __inline __m128i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_maskz_slli_epi32 (__mmask8 __U, __m128i __A, int __B)
+{
+ return (__m128i) __builtin_ia32_pslldi128_mask ((__v4si) __A, __B,
+ (__v4si)
+ _mm_setzero_si128 (),
+ (__mmask8) __U);
+}
+
+extern __inline __m128i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_mask_slli_epi64 (__m128i __W, __mmask8 __U, __m128i __A, int __B)
+{
+ return (__m128i) __builtin_ia32_psllqi128_mask ((__v2di) __A, __B,
+ (__v2di) __W,
+ (__mmask8) __U);
+}
+
+extern __inline __m128i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_maskz_slli_epi64 (__mmask8 __U, __m128i __A, int __B)
+{
+ return (__m128i) __builtin_ia32_psllqi128_mask ((__v2di) __A, __B,
+ (__v2di)
+ _mm_setzero_di (),
+ (__mmask8) __U);
+}
+
+extern __inline __m256i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_mask_slli_epi32 (__m256i __W, __mmask8 __U, __m256i __A,
+ int __B)
+{
+ return (__m256i) __builtin_ia32_pslldi256_mask ((__v8si) __A, __B,
+ (__v8si) __W,
+ (__mmask8) __U);
+}
+
+extern __inline __m256i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_maskz_slli_epi32 (__mmask8 __U, __m256i __A, int __B)
+{
+ return (__m256i) __builtin_ia32_pslldi256_mask ((__v8si) __A, __B,
+ (__v8si)
+ _mm256_setzero_si256 (),
+ (__mmask8) __U);
+}
+
+extern __inline __m256i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_mask_slli_epi64 (__m256i __W, __mmask8 __U, __m256i __A,
+ int __B)
+{
+ return (__m256i) __builtin_ia32_psllqi256_mask ((__v4di) __A, __B,
+ (__v4di) __W,
+ (__mmask8) __U);
+}
+
+extern __inline __m256i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_maskz_slli_epi64 (__mmask8 __U, __m256i __A, int __B)
+{
+ return (__m256i) __builtin_ia32_psllqi256_mask ((__v4di) __A, __B,
+ (__v4di)
+ _mm256_setzero_si256 (),
+ (__mmask8) __U);
+}
+
+extern __inline __m256d
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_mask_permutex_pd (__m256d __W, __mmask8 __U, __m256d __X,
+ const int __imm)
+{
+ return (__m256d) __builtin_ia32_permdf256_mask ((__v4df) __X, __imm,
+ (__v4df) __W,
+ (__mmask8) __U);
+}
+
+extern __inline __m256d
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_maskz_permutex_pd (__mmask8 __U, __m256d __X, const int __imm)
+{
+ return (__m256d) __builtin_ia32_permdf256_mask ((__v4df) __X, __imm,
+ (__v4df)
+ _mm256_setzero_pd (),
+ (__mmask8) __U);
+}
+
+extern __inline __m256d
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_mask_permute_pd (__m256d __W, __mmask8 __U, __m256d __X,
+ const int __C)
+{
+ return (__m256d) __builtin_ia32_vpermilpd256_mask ((__v4df) __X, __C,
+ (__v4df) __W,
+ (__mmask8) __U);
+}
+
+extern __inline __m256d
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_maskz_permute_pd (__mmask8 __U, __m256d __X, const int __C)
+{
+ return (__m256d) __builtin_ia32_vpermilpd256_mask ((__v4df) __X, __C,
+ (__v4df)
+ _mm256_setzero_pd (),
+ (__mmask8) __U);
+}
+
+extern __inline __m128d
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_mask_permute_pd (__m128d __W, __mmask8 __U, __m128d __X,
+ const int __C)
+{
+ return (__m128d) __builtin_ia32_vpermilpd_mask ((__v2df) __X, __C,
+ (__v2df) __W,
+ (__mmask8) __U);
+}
+
+extern __inline __m128d
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_maskz_permute_pd (__mmask8 __U, __m128d __X, const int __C)
+{
+ return (__m128d) __builtin_ia32_vpermilpd_mask ((__v2df) __X, __C,
+ (__v2df)
+ _mm_setzero_pd (),
+ (__mmask8) __U);
+}
+
+extern __inline __m256
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_mask_permute_ps (__m256 __W, __mmask8 __U, __m256 __X,
+ const int __C)
+{
+ return (__m256) __builtin_ia32_vpermilps256_mask ((__v8sf) __X, __C,
+ (__v8sf) __W,
+ (__mmask8) __U);
+}
+
+extern __inline __m256
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_maskz_permute_ps (__mmask8 __U, __m256 __X, const int __C)
+{
+ return (__m256) __builtin_ia32_vpermilps256_mask ((__v8sf) __X, __C,
+ (__v8sf)
+ _mm256_setzero_ps (),
+ (__mmask8) __U);
+}
+
+extern __inline __m128
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_mask_permute_ps (__m128 __W, __mmask8 __U, __m128 __X,
+ const int __C)
+{
+ return (__m128) __builtin_ia32_vpermilps_mask ((__v4sf) __X, __C,
+ (__v4sf) __W,
+ (__mmask8) __U);
+}
+
+extern __inline __m128
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_maskz_permute_ps (__mmask8 __U, __m128 __X, const int __C)
+{
+ return (__m128) __builtin_ia32_vpermilps_mask ((__v4sf) __X, __C,
+ (__v4sf)
+ _mm_setzero_ps (),
+ (__mmask8) __U);
+}
+
+extern __inline __m256d
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_mask_blend_pd (__mmask8 __U, __m256d __A, __m256d __W)
+{
+ return (__m256d) __builtin_ia32_blendmpd_256_mask ((__v4df) __A,
+ (__v4df) __W,
+ (__mmask8) __U);
+}
+
+extern __inline __m256
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_mask_blend_ps (__mmask8 __U, __m256 __A, __m256 __W)
+{
+ return (__m256) __builtin_ia32_blendmps_256_mask ((__v8sf) __A,
+ (__v8sf) __W,
+ (__mmask8) __U);
+}
+
+extern __inline __m256i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_mask_blend_epi64 (__mmask8 __U, __m256i __A, __m256i __W)
+{
+ return (__m256i) __builtin_ia32_blendmq_256_mask ((__v4di) __A,
+ (__v4di) __W,
+ (__mmask8) __U);
+}
+
+extern __inline __m256i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_mask_blend_epi32 (__mmask8 __U, __m256i __A, __m256i __W)
+{
+ return (__m256i) __builtin_ia32_blendmd_256_mask ((__v8si) __A,
+ (__v8si) __W,
+ (__mmask8) __U);
+}
+
+extern __inline __m128d
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_mask_blend_pd (__mmask8 __U, __m128d __A, __m128d __W)
+{
+ return (__m128d) __builtin_ia32_blendmpd_128_mask ((__v2df) __A,
+ (__v2df) __W,
+ (__mmask8) __U);
+}
+
+extern __inline __m128
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_mask_blend_ps (__mmask8 __U, __m128 __A, __m128 __W)
+{
+ return (__m128) __builtin_ia32_blendmps_128_mask ((__v4sf) __A,
+ (__v4sf) __W,
+ (__mmask8) __U);
+}
+
+extern __inline __m128i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_mask_blend_epi64 (__mmask8 __U, __m128i __A, __m128i __W)
+{
+ return (__m128i) __builtin_ia32_blendmq_128_mask ((__v2di) __A,
+ (__v2di) __W,
+ (__mmask8) __U);
+}
+
+extern __inline __m128i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_mask_blend_epi32 (__mmask8 __U, __m128i __A, __m128i __W)
+{
+ return (__m128i) __builtin_ia32_blendmd_128_mask ((__v4si) __A,
+ (__v4si) __W,
+ (__mmask8) __U);
+}
+
+extern __inline __mmask8
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_cmp_epi64_mask (__m256i __X, __m256i __Y, const int __P)
+{
+ return (__mmask8) __builtin_ia32_cmpq256_mask ((__v4di) __X,
+ (__v4di) __Y, __P,
+ (__mmask8) -1);
+}
+
+extern __inline __mmask8
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_cmp_epi32_mask (__m256i __X, __m256i __Y, const int __P)
+{
+ return (__mmask8) __builtin_ia32_cmpd256_mask ((__v8si) __X,
+ (__v8si) __Y, __P,
+ (__mmask8) -1);
+}
+
+extern __inline __mmask8
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_cmp_epu64_mask (__m256i __X, __m256i __Y, const int __P)
+{
+ return (__mmask8) __builtin_ia32_ucmpq256_mask ((__v4di) __X,
+ (__v4di) __Y, __P,
+ (__mmask8) -1);
+}
+
+extern __inline __mmask8
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_cmp_epu32_mask (__m256i __X, __m256i __Y, const int __P)
+{
+ return (__mmask8) __builtin_ia32_ucmpd256_mask ((__v8si) __X,
+ (__v8si) __Y, __P,
+ (__mmask8) -1);
+}
+
+extern __inline __mmask8
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_cmp_pd_mask (__m256d __X, __m256d __Y, const int __P)
+{
+ return (__mmask8) __builtin_ia32_cmppd256_mask ((__v4df) __X,
+ (__v4df) __Y, __P,
+ (__mmask8) -1);
+}
+
+extern __inline __mmask8
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_cmp_ps_mask (__m256 __X, __m256 __Y, const int __P)
+{
+ return (__mmask8) __builtin_ia32_cmpps256_mask ((__v8sf) __X,
+ (__v8sf) __Y, __P,
+ (__mmask8) -1);
+}
+
+extern __inline __mmask8
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_mask_cmp_epi64_mask (__mmask8 __U, __m256i __X, __m256i __Y,
+ const int __P)
+{
+ return (__mmask8) __builtin_ia32_cmpq256_mask ((__v4di) __X,
+ (__v4di) __Y, __P,
+ (__mmask8) __U);
+}
+
+extern __inline __mmask8
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_mask_cmp_epi32_mask (__mmask8 __U, __m256i __X, __m256i __Y,
+ const int __P)
+{
+ return (__mmask8) __builtin_ia32_cmpd256_mask ((__v8si) __X,
+ (__v8si) __Y, __P,
+ (__mmask8) __U);
+}
+
+extern __inline __mmask8
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_mask_cmp_epu64_mask (__mmask8 __U, __m256i __X, __m256i __Y,
+ const int __P)
+{
+ return (__mmask8) __builtin_ia32_ucmpq256_mask ((__v4di) __X,
+ (__v4di) __Y, __P,
+ (__mmask8) __U);
+}
+
+extern __inline __mmask8
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_mask_cmp_epu32_mask (__mmask8 __U, __m256i __X, __m256i __Y,
+ const int __P)
+{
+ return (__mmask8) __builtin_ia32_ucmpd256_mask ((__v8si) __X,
+ (__v8si) __Y, __P,
+ (__mmask8) __U);
+}
+
+extern __inline __mmask8
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_mask_cmp_pd_mask (__mmask8 __U, __m256d __X, __m256d __Y,
+ const int __P)
+{
+ return (__mmask8) __builtin_ia32_cmppd256_mask ((__v4df) __X,
+ (__v4df) __Y, __P,
+ (__mmask8) __U);
+}
+
+extern __inline __mmask8
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_mask_cmp_ps_mask (__mmask8 __U, __m256 __X, __m256 __Y,
+ const int __P)
+{
+ return (__mmask8) __builtin_ia32_cmpps256_mask ((__v8sf) __X,
+ (__v8sf) __Y, __P,
+ (__mmask8) __U);
+}
+
+extern __inline __mmask8
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_cmp_epi64_mask (__m128i __X, __m128i __Y, const int __P)
+{
+ return (__mmask8) __builtin_ia32_cmpq128_mask ((__v2di) __X,
+ (__v2di) __Y, __P,
+ (__mmask8) -1);
+}
+
+extern __inline __mmask8
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_cmp_epi32_mask (__m128i __X, __m128i __Y, const int __P)
+{
+ return (__mmask8) __builtin_ia32_cmpd128_mask ((__v4si) __X,
+ (__v4si) __Y, __P,
+ (__mmask8) -1);
+}
+
+extern __inline __mmask8
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_cmp_epu64_mask (__m128i __X, __m128i __Y, const int __P)
+{
+ return (__mmask8) __builtin_ia32_ucmpq128_mask ((__v2di) __X,
+ (__v2di) __Y, __P,
+ (__mmask8) -1);
+}
+
+extern __inline __mmask8
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_cmp_epu32_mask (__m128i __X, __m128i __Y, const int __P)
+{
+ return (__mmask8) __builtin_ia32_ucmpd128_mask ((__v4si) __X,
+ (__v4si) __Y, __P,
+ (__mmask8) -1);
+}
+
+extern __inline __mmask8
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_cmp_pd_mask (__m128d __X, __m128d __Y, const int __P)
+{
+ return (__mmask8) __builtin_ia32_cmppd128_mask ((__v2df) __X,
+ (__v2df) __Y, __P,
+ (__mmask8) -1);
+}
+
+extern __inline __mmask8
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_cmp_ps_mask (__m128 __X, __m128 __Y, const int __P)
+{
+ return (__mmask8) __builtin_ia32_cmpps128_mask ((__v4sf) __X,
+ (__v4sf) __Y, __P,
+ (__mmask8) -1);
+}
+
+extern __inline __mmask8
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_mask_cmp_epi64_mask (__mmask8 __U, __m128i __X, __m128i __Y,
+ const int __P)
+{
+ return (__mmask8) __builtin_ia32_cmpq128_mask ((__v2di) __X,
+ (__v2di) __Y, __P,
+ (__mmask8) __U);
+}
+
+extern __inline __mmask8
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_mask_cmp_epi32_mask (__mmask8 __U, __m128i __X, __m128i __Y,
+ const int __P)
+{
+ return (__mmask8) __builtin_ia32_cmpd128_mask ((__v4si) __X,
+ (__v4si) __Y, __P,
+ (__mmask8) __U);
+}
+
+extern __inline __mmask8
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_mask_cmp_epu64_mask (__mmask8 __U, __m128i __X, __m128i __Y,
+ const int __P)
+{
+ return (__mmask8) __builtin_ia32_ucmpq128_mask ((__v2di) __X,
+ (__v2di) __Y, __P,
+ (__mmask8) __U);
+}
+
+extern __inline __mmask8
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_mask_cmp_epu32_mask (__mmask8 __U, __m128i __X, __m128i __Y,
+ const int __P)
+{
+ return (__mmask8) __builtin_ia32_ucmpd128_mask ((__v4si) __X,
+ (__v4si) __Y, __P,
+ (__mmask8) __U);
+}
+
+extern __inline __mmask8
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_mask_cmp_pd_mask (__mmask8 __U, __m128d __X, __m128d __Y,
+ const int __P)
+{
+ return (__mmask8) __builtin_ia32_cmppd128_mask ((__v2df) __X,
+ (__v2df) __Y, __P,
+ (__mmask8) __U);
+}
+
+extern __inline __mmask8
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_mask_cmp_ps_mask (__mmask8 __U, __m128 __X, __m128 __Y,
+ const int __P)
+{
+ return (__mmask8) __builtin_ia32_cmpps128_mask ((__v4sf) __X,
+ (__v4sf) __Y, __P,
+ (__mmask8) __U);
+}
+
+extern __inline __m256d
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_permutex_pd (__m256d __X, const int __M)
+{
+ return (__m256d) __builtin_ia32_permdf256_mask ((__v4df) __X, __M,
+ (__v4df)
+ _mm256_undefined_pd (),
+ (__mmask8) -1);
+}
+
+extern __inline __mmask8
+ __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_cmpneq_epu32_mask (__m256i __X, __m256i __Y)
+{
+ return (__mmask8) __builtin_ia32_ucmpd256_mask ((__v8si) __X,
+ (__v8si) __Y, 4,
+ (__mmask8) - 1);
+}
+
+extern __inline __mmask8
+ __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_cmplt_epu32_mask (__m256i __X, __m256i __Y)
+{
+ return (__mmask8) __builtin_ia32_ucmpd256_mask ((__v8si) __X,
+ (__v8si) __Y, 1,
+ (__mmask8) - 1);
+}
+
+extern __inline __mmask8
+ __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_cmpge_epu32_mask (__m256i __X, __m256i __Y)
+{
+ return (__mmask8) __builtin_ia32_ucmpd256_mask ((__v8si) __X,
+ (__v8si) __Y, 5,
+ (__mmask8) - 1);
+}
+
+extern __inline __mmask8
+ __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_cmple_epu32_mask (__m256i __X, __m256i __Y)
+{
+ return (__mmask8) __builtin_ia32_ucmpd256_mask ((__v8si) __X,
+ (__v8si) __Y, 2,
+ (__mmask8) - 1);
+}
+
+extern __inline __mmask8
+ __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_cmpneq_epu64_mask (__m256i __X, __m256i __Y)
+{
+ return (__mmask8) __builtin_ia32_ucmpq256_mask ((__v4di) __X,
+ (__v4di) __Y, 4,
+ (__mmask8) - 1);
+}
+
+extern __inline __mmask8
+ __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_cmplt_epu64_mask (__m256i __X, __m256i __Y)
+{
+ return (__mmask8) __builtin_ia32_ucmpq256_mask ((__v4di) __X,
+ (__v4di) __Y, 1,
+ (__mmask8) - 1);
+}
+
+extern __inline __mmask8
+ __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_cmpge_epu64_mask (__m256i __X, __m256i __Y)
+{
+ return (__mmask8) __builtin_ia32_ucmpq256_mask ((__v4di) __X,
+ (__v4di) __Y, 5,
+ (__mmask8) - 1);
+}
+
+extern __inline __mmask8
+ __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_cmple_epu64_mask (__m256i __X, __m256i __Y)
+{
+ return (__mmask8) __builtin_ia32_ucmpq256_mask ((__v4di) __X,
+ (__v4di) __Y, 2,
+ (__mmask8) - 1);
+}
+
+extern __inline __mmask8
+ __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_cmpneq_epi32_mask (__m256i __X, __m256i __Y)
+{
+ return (__mmask8) __builtin_ia32_cmpd256_mask ((__v8si) __X,
+ (__v8si) __Y, 4,
+ (__mmask8) - 1);
+}
+
+extern __inline __mmask8
+ __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_cmplt_epi32_mask (__m256i __X, __m256i __Y)
+{
+ return (__mmask8) __builtin_ia32_cmpd256_mask ((__v8si) __X,
+ (__v8si) __Y, 1,
+ (__mmask8) - 1);
+}
+
+extern __inline __mmask8
+ __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_cmpge_epi32_mask (__m256i __X, __m256i __Y)
+{
+ return (__mmask8) __builtin_ia32_cmpd256_mask ((__v8si) __X,
+ (__v8si) __Y, 5,
+ (__mmask8) - 1);
+}
+
+extern __inline __mmask8
+ __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_cmple_epi32_mask (__m256i __X, __m256i __Y)
+{
+ return (__mmask8) __builtin_ia32_cmpd256_mask ((__v8si) __X,
+ (__v8si) __Y, 2,
+ (__mmask8) - 1);
+}
+
+extern __inline __mmask8
+ __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_cmpneq_epi64_mask (__m256i __X, __m256i __Y)
+{
+ return (__mmask8) __builtin_ia32_cmpq256_mask ((__v4di) __X,
+ (__v4di) __Y, 4,
+ (__mmask8) - 1);
+}
+
+extern __inline __mmask8
+ __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_cmplt_epi64_mask (__m256i __X, __m256i __Y)
+{
+ return (__mmask8) __builtin_ia32_cmpq256_mask ((__v4di) __X,
+ (__v4di) __Y, 1,
+ (__mmask8) - 1);
+}
+
+extern __inline __mmask8
+ __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_cmpge_epi64_mask (__m256i __X, __m256i __Y)
+{
+ return (__mmask8) __builtin_ia32_cmpq256_mask ((__v4di) __X,
+ (__v4di) __Y, 5,
+ (__mmask8) - 1);
+}
+
+extern __inline __mmask8
+ __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_cmple_epi64_mask (__m256i __X, __m256i __Y)
+{
+ return (__mmask8) __builtin_ia32_cmpq256_mask ((__v4di) __X,
+ (__v4di) __Y, 2,
+ (__mmask8) - 1);
+}
+
+extern __inline __mmask8
+ __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_cmpneq_epu32_mask (__m128i __X, __m128i __Y)
+{
+ return (__mmask8) __builtin_ia32_ucmpd128_mask ((__v4si) __X,
+ (__v4si) __Y, 4,
+ (__mmask8) - 1);
+}
+
+extern __inline __mmask8
+ __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_cmplt_epu32_mask (__m128i __X, __m128i __Y)
+{
+ return (__mmask8) __builtin_ia32_ucmpd128_mask ((__v4si) __X,
+ (__v4si) __Y, 1,
+ (__mmask8) - 1);
+}
+
+extern __inline __mmask8
+ __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_cmpge_epu32_mask (__m128i __X, __m128i __Y)
+{
+ return (__mmask8) __builtin_ia32_ucmpd128_mask ((__v4si) __X,
+ (__v4si) __Y, 5,
+ (__mmask8) - 1);
+}
+
+extern __inline __mmask8
+ __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_cmple_epu32_mask (__m128i __X, __m128i __Y)
+{
+ return (__mmask8) __builtin_ia32_ucmpd128_mask ((__v4si) __X,
+ (__v4si) __Y, 2,
+ (__mmask8) - 1);
+}
+
+extern __inline __mmask8
+ __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_cmpneq_epu64_mask (__m128i __X, __m128i __Y)
+{
+ return (__mmask8) __builtin_ia32_ucmpq128_mask ((__v2di) __X,
+ (__v2di) __Y, 4,
+ (__mmask8) - 1);
+}
+
+extern __inline __mmask8
+ __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_cmplt_epu64_mask (__m128i __X, __m128i __Y)
+{
+ return (__mmask8) __builtin_ia32_ucmpq128_mask ((__v2di) __X,
+ (__v2di) __Y, 1,
+ (__mmask8) - 1);
+}
+
+extern __inline __mmask8
+ __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_cmpge_epu64_mask (__m128i __X, __m128i __Y)
+{
+ return (__mmask8) __builtin_ia32_ucmpq128_mask ((__v2di) __X,
+ (__v2di) __Y, 5,
+ (__mmask8) - 1);
+}
+
+extern __inline __mmask8
+ __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_cmple_epu64_mask (__m128i __X, __m128i __Y)
+{
+ return (__mmask8) __builtin_ia32_ucmpq128_mask ((__v2di) __X,
+ (__v2di) __Y, 2,
+ (__mmask8) - 1);
+}
+
+extern __inline __mmask8
+ __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_cmpneq_epi32_mask (__m128i __X, __m128i __Y)
+{
+ return (__mmask8) __builtin_ia32_cmpd128_mask ((__v4si) __X,
+ (__v4si) __Y, 4,
+ (__mmask8) - 1);
+}
+
+extern __inline __mmask8
+ __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_cmplt_epi32_mask (__m128i __X, __m128i __Y)
+{
+ return (__mmask8) __builtin_ia32_cmpd128_mask ((__v4si) __X,
+ (__v4si) __Y, 1,
+ (__mmask8) - 1);
+}
+
+extern __inline __mmask8
+ __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_cmpge_epi32_mask (__m128i __X, __m128i __Y)
+{
+ return (__mmask8) __builtin_ia32_cmpd128_mask ((__v4si) __X,
+ (__v4si) __Y, 5,
+ (__mmask8) - 1);
+}
+
+extern __inline __mmask8
+ __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_cmple_epi32_mask (__m128i __X, __m128i __Y)
+{
+ return (__mmask8) __builtin_ia32_cmpd128_mask ((__v4si) __X,
+ (__v4si) __Y, 2,
+ (__mmask8) - 1);
+}
+
+extern __inline __mmask8
+ __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_cmpneq_epi64_mask (__m128i __X, __m128i __Y)
+{
+ return (__mmask8) __builtin_ia32_cmpq128_mask ((__v2di) __X,
+ (__v2di) __Y, 4,
+ (__mmask8) - 1);
+}
+
+extern __inline __mmask8
+ __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_cmplt_epi64_mask (__m128i __X, __m128i __Y)
+{
+ return (__mmask8) __builtin_ia32_cmpq128_mask ((__v2di) __X,
+ (__v2di) __Y, 1,
+ (__mmask8) - 1);
+}
+
+extern __inline __mmask8
+ __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_cmpge_epi64_mask (__m128i __X, __m128i __Y)
+{
+ return (__mmask8) __builtin_ia32_cmpq128_mask ((__v2di) __X,
+ (__v2di) __Y, 5,
+ (__mmask8) - 1);
+}
+
+extern __inline __mmask8
+ __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_cmple_epi64_mask (__m128i __X, __m128i __Y)
+{
+ return (__mmask8) __builtin_ia32_cmpq128_mask ((__v2di) __X,
+ (__v2di) __Y, 2,
+ (__mmask8) - 1);
+}
+
+#else
+#define _mm256_permutex_pd(X, M) \
+ ((__m256d) __builtin_ia32_permdf256_mask ((__v4df)(__m256d)(X), (int)(M), \
+ (__v4df)(__m256d)_mm256_undefined_pd(),\
+ (__mmask8)-1))
+
+#define _mm256_maskz_permutex_epi64(M, X, I) \
+ ((__m256i) __builtin_ia32_permdi256_mask ((__v4di)(__m256i)(X), \
+ (int)(I), \
+ (__v4di)(__m256i) \
+ (_mm256_setzero_si256()),\
+ (__mmask8)(M)))
+
+#define _mm256_mask_permutex_epi64(W, M, X, I) \
+ ((__m256i) __builtin_ia32_permdi256_mask ((__v4di)(__m256i)(X), \
+ (int)(I), \
+ (__v4di)(__m256i)(W), \
+ (__mmask8)(M)))
+
+#define _mm256_insertf32x4(X, Y, C) \
+ ((__m256) __builtin_ia32_insertf32x4_256_mask ((__v8sf)(__m256) (X), \
+ (__v4sf)(__m128) (Y), (int) (C), \
+ (__v8sf)(__m256)_mm256_setzero_ps(), \
+ (__mmask8)-1))
+
+#define _mm256_mask_insertf32x4(W, U, X, Y, C) \
+ ((__m256) __builtin_ia32_insertf32x4_256_mask ((__v8sf)(__m256) (X), \
+ (__v4sf)(__m128) (Y), (int) (C), \
+ (__v8sf)(__m256)(W), \
+ (__mmask8)(U)))
+
+#define _mm256_maskz_insertf32x4(U, X, Y, C) \
+ ((__m256) __builtin_ia32_insertf32x4_256_mask ((__v8sf)(__m256) (X), \
+ (__v4sf)(__m128) (Y), (int) (C), \
+ (__v8sf)(__m256)_mm256_setzero_ps(), \
+ (__mmask8)(U)))
+
+#define _mm256_inserti32x4(X, Y, C) \
+ ((__m256i) __builtin_ia32_inserti32x4_256_mask ((__v8si)(__m256i) (X),\
+ (__v4si)(__m128i) (Y), (int) (C), \
+ (__v8si)(__m256i)_mm256_setzero_si256(), \
+ (__mmask8)-1))
+
+#define _mm256_mask_inserti32x4(W, U, X, Y, C) \
+ ((__m256i) __builtin_ia32_inserti32x4_256_mask ((__v8si)(__m256i) (X),\
+ (__v4si)(__m128i) (Y), (int) (C), \
+ (__v8si)(__m256i)(W), \
+ (__mmask8)(U)))
+
+#define _mm256_maskz_inserti32x4(U, X, Y, C) \
+ ((__m256i) __builtin_ia32_inserti32x4_256_mask ((__v8si)(__m256i) (X),\
+ (__v4si)(__m128i) (Y), (int) (C), \
+ (__v8si)(__m256i)_mm256_setzero_si256(), \
+ (__mmask8)(U)))
+
+#define _mm256_extractf32x4_ps(X, C) \
+ ((__m128) __builtin_ia32_extractf32x4_256_mask ((__v8sf)(__m256) (X), \
+ (int) (C), \
+ (__v4sf)(__m128)_mm_setzero_ps(), \
+ (__mmask8)-1))
+
+#define _mm256_mask_extractf32x4_ps(W, U, X, C) \
+ ((__m128) __builtin_ia32_extractf32x4_256_mask ((__v8sf)(__m256) (X), \
+ (int) (C), \
+ (__v4sf)(__m128)(W), \
+ (__mmask8)(U)))
+
+#define _mm256_maskz_extractf32x4_ps(U, X, C) \
+ ((__m128) __builtin_ia32_extractf32x4_256_mask ((__v8sf)(__m256) (X), \
+ (int) (C), \
+ (__v4sf)(__m128)_mm_setzero_ps(), \
+ (__mmask8)(U)))
+
+#define _mm256_extracti32x4_epi32(X, C) \
+ ((__m128i) __builtin_ia32_extracti32x4_256_mask ((__v8si)(__m256i) (X),\
+ (int) (C), (__v4si)(__m128i)_mm_setzero_si128 (), (__mmask8)-1))
+
+#define _mm256_mask_extracti32x4_epi32(W, U, X, C) \
+ ((__m128i) __builtin_ia32_extracti32x4_256_mask ((__v8si)(__m256i) (X),\
+ (int) (C), (__v4si)(__m128i)(W), (__mmask8)(U)))
+
+#define _mm256_maskz_extracti32x4_epi32(U, X, C) \
+ ((__m128i) __builtin_ia32_extracti32x4_256_mask ((__v8si)(__m256i) (X),\
+ (int) (C), (__v4si)(__m128i)_mm_setzero_si128 (), (__mmask8)(U)))
+
+#define _mm256_shuffle_i64x2(X, Y, C) \
+ ((__m256i) __builtin_ia32_shuf_i64x2_256_mask ((__v4di)(__m256i)(X), \
+ (__v4di)(__m256i)(Y), (int)(C), \
+ (__v4di)(__m256i)_mm256_setzero_si256 (), \
+ (__mmask8)-1))
+
+#define _mm256_mask_shuffle_i64x2(W, U, X, Y, C) \
+ ((__m256i) __builtin_ia32_shuf_i64x2_256_mask ((__v4di)(__m256i)(X), \
+ (__v4di)(__m256i)(Y), (int)(C), \
+ (__v4di)(__m256i)(W),\
+ (__mmask8)(U)))
+
+#define _mm256_maskz_shuffle_i64x2(U, X, Y, C) \
+ ((__m256i) __builtin_ia32_shuf_i64x2_256_mask ((__v4di)(__m256i)(X), \
+ (__v4di)(__m256i)(Y), (int)(C), \
+ (__v4di)(__m256i)_mm256_setzero_si256 (), \
+ (__mmask8)(U)))
+
+#define _mm256_shuffle_i32x4(X, Y, C) \
+ ((__m256i) __builtin_ia32_shuf_i32x4_256_mask ((__v8si)(__m256i)(X), \
+ (__v8si)(__m256i)(Y), (int)(C), \
+ (__v8si)(__m256i)_mm256_setzero_si256(), \
+ (__mmask8)-1))
+
+#define _mm256_mask_shuffle_i32x4(W, U, X, Y, C) \
+ ((__m256i) __builtin_ia32_shuf_i32x4_256_mask ((__v8si)(__m256i)(X), \
+ (__v8si)(__m256i)(Y), (int)(C), \
+ (__v8si)(__m256i)(W), \
+ (__mmask8)(U)))
+
+#define _mm256_maskz_shuffle_i32x4(U, X, Y, C) \
+ ((__m256i) __builtin_ia32_shuf_i32x4_256_mask ((__v8si)(__m256i)(X), \
+ (__v8si)(__m256i)(Y), (int)(C), \
+ (__v8si)(__m256i)_mm256_setzero_si256(), \
+ (__mmask8)(U)))
+
+#define _mm256_shuffle_f64x2(X, Y, C) \
+ ((__m256d) __builtin_ia32_shuf_f64x2_256_mask ((__v4df)(__m256d)(X), \
+ (__v4df)(__m256d)(Y), (int)(C), \
+ (__v4df)(__m256d)_mm256_setzero_pd(), \
+ (__mmask8)-1))
+
+#define _mm256_mask_shuffle_f64x2(W, U, X, Y, C) \
+ ((__m256d) __builtin_ia32_shuf_f64x2_256_mask ((__v4df)(__m256d)(X), \
+ (__v4df)(__m256d)(Y), (int)(C), \
+ (__v4df)(__m256d)(W), \
+ (__mmask8)(U)))
+
+#define _mm256_maskz_shuffle_f64x2(U, X, Y, C) \
+ ((__m256d) __builtin_ia32_shuf_f64x2_256_mask ((__v4df)(__m256d)(X), \
+ (__v4df)(__m256d)(Y), (int)(C), \
+ (__v4df)(__m256d)_mm256_setzero_pd(), \
+ (__mmask8)(U)))
+
+#define _mm256_shuffle_f32x4(X, Y, C) \
+ ((__m256) __builtin_ia32_shuf_f32x4_256_mask ((__v8sf)(__m256)(X), \
+ (__v8sf)(__m256)(Y), (int)(C), \
+ (__v8sf)(__m256)_mm256_setzero_ps(), \
+ (__mmask8)-1))
+
+#define _mm256_mask_shuffle_f32x4(W, U, X, Y, C) \
+ ((__m256) __builtin_ia32_shuf_f32x4_256_mask ((__v8sf)(__m256)(X), \
+ (__v8sf)(__m256)(Y), (int)(C), \
+ (__v8sf)(__m256)(W), \
+ (__mmask8)(U)))
+
+#define _mm256_maskz_shuffle_f32x4(U, X, Y, C) \
+ ((__m256) __builtin_ia32_shuf_f32x4_256_mask ((__v8sf)(__m256)(X), \
+ (__v8sf)(__m256)(Y), (int)(C), \
+ (__v8sf)(__m256)_mm256_setzero_ps(), \
+ (__mmask8)(U)))
+
+#define _mm256_mask_shuffle_pd(W, U, A, B, C) \
+ ((__m256d)__builtin_ia32_shufpd256_mask ((__v4df)(__m256d)(A), \
+ (__v4df)(__m256d)(B), (int)(C), \
+ (__v4df)(__m256d)(W), \
+ (__mmask8)(U)))
+
+#define _mm256_maskz_shuffle_pd(U, A, B, C) \
+ ((__m256d)__builtin_ia32_shufpd256_mask ((__v4df)(__m256d)(A), \
+ (__v4df)(__m256d)(B), (int)(C), \
+ (__v4df)(__m256d)_mm256_setzero_pd(),\
+ (__mmask8)(U)))
+
+#define _mm_mask_shuffle_pd(W, U, A, B, C) \
+ ((__m128d)__builtin_ia32_shufpd128_mask ((__v2df)(__m128d)(A), \
+ (__v2df)(__m128d)(B), (int)(C), \
+ (__v2df)(__m128d)(W), \
+ (__mmask8)(U)))
+
+#define _mm_maskz_shuffle_pd(U, A, B, C) \
+ ((__m128d)__builtin_ia32_shufpd128_mask ((__v2df)(__m128d)(A), \
+ (__v2df)(__m128d)(B), (int)(C), \
+ (__v2df)(__m128d)_mm_setzero_pd(), \
+ (__mmask8)(U)))
+
+#define _mm256_mask_shuffle_ps(W, U, A, B, C) \
+ ((__m256) __builtin_ia32_shufps256_mask ((__v8sf)(__m256)(A), \
+ (__v8sf)(__m256)(B), (int)(C), \
+ (__v8sf)(__m256)(W), \
+ (__mmask8)(U)))
+
+#define _mm256_maskz_shuffle_ps(U, A, B, C) \
+ ((__m256) __builtin_ia32_shufps256_mask ((__v8sf)(__m256)(A), \
+ (__v8sf)(__m256)(B), (int)(C), \
+ (__v8sf)(__m256)_mm256_setzero_ps(), \
+ (__mmask8)(U)))
+
+#define _mm_mask_shuffle_ps(W, U, A, B, C) \
+ ((__m128) __builtin_ia32_shufps128_mask ((__v4sf)(__m128)(A), \
+ (__v4sf)(__m128)(B), (int)(C), \
+ (__v4sf)(__m128)(W), \
+ (__mmask8)(U)))
+
+#define _mm_maskz_shuffle_ps(U, A, B, C) \
+ ((__m128) __builtin_ia32_shufps128_mask ((__v4sf)(__m128)(A), \
+ (__v4sf)(__m128)(B), (int)(C), \
+ (__v4sf)(__m128)_mm_setzero_ps(), \
+ (__mmask8)(U)))
+
+#define _mm256_fixupimm_pd(X, Y, Z, C) \
+ ((__m256d)__builtin_ia32_fixupimmpd256 ((__v4df)(__m256d)(X), \
+ (__v4df)(__m256d)(Y), \
+ (__v4di)(__m256i)(Z), (int)(C)))
+
+#define _mm256_mask_fixupimm_pd(X, U, Y, Z, C) \
+ ((__m256d)__builtin_ia32_fixupimmpd256_mask ((__v4df)(__m256d)(X), \
+ (__v4df)(__m256d)(Y), \
+ (__v4di)(__m256i)(Z), (int)(C), \
+ (__mmask8)(U)))
+
+#define _mm256_maskz_fixupimm_pd(U, X, Y, Z, C) \
+ ((__m256d)__builtin_ia32_fixupimmpd256_maskz ((__v4df)(__m256d)(X), \
+ (__v4df)(__m256d)(Y), \
+ (__v4di)(__m256i)(Z), (int)(C),\
+ (__mmask8)(U)))
+
+#define _mm256_fixupimm_ps(X, Y, Z, C) \
+ ((__m256)__builtin_ia32_fixupimmps256 ((__v8sf)(__m256)(X), \
+ (__v8sf)(__m256)(Y), \
+ (__v8si)(__m256i)(Z), (int)(C)))
+
+#define _mm256_mask_fixupimm_ps(X, U, Y, Z, C) \
+ ((__m256)__builtin_ia32_fixupimmps256_mask ((__v8sf)(__m256)(X), \
+ (__v8sf)(__m256)(Y), \
+ (__v8si)(__m256i)(Z), (int)(C), \
+ (__mmask8)(U)))
+
+#define _mm256_maskz_fixupimm_ps(U, X, Y, Z, C) \
+ ((__m256)__builtin_ia32_fixupimmps256_maskz ((__v8sf)(__m256)(X), \
+ (__v8sf)(__m256)(Y), \
+ (__v8si)(__m256i)(Z), (int)(C),\
+ (__mmask8)(U)))
+
+#define _mm_fixupimm_pd(X, Y, Z, C) \
+ ((__m128d)__builtin_ia32_fixupimmpd128 ((__v2df)(__m128d)(X), \
+ (__v2df)(__m128d)(Y), \
+ (__v2di)(__m128i)(Z), (int)(C)))
+
+#define _mm_mask_fixupimm_pd(X, U, Y, Z, C) \
+ ((__m128d)__builtin_ia32_fixupimmpd128_mask ((__v2df)(__m128d)(X), \
+ (__v2df)(__m128d)(Y), \
+ (__v2di)(__m128i)(Z), (int)(C), \
+ (__mmask8)(U)))
+
+#define _mm_maskz_fixupimm_pd(U, X, Y, Z, C) \
+ ((__m128d)__builtin_ia32_fixupimmpd128_maskz ((__v2df)(__m128d)(X), \
+ (__v2df)(__m128d)(Y), \
+ (__v2di)(__m128i)(Z), (int)(C),\
+ (__mmask8)(U)))
+
+#define _mm_fixupimm_ps(X, Y, Z, C) \
+ ((__m128)__builtin_ia32_fixupimmps128 ((__v4sf)(__m128)(X), \
+ (__v4sf)(__m128)(Y), \
+ (__v4si)(__m128i)(Z), (int)(C)))
+
+#define _mm_mask_fixupimm_ps(X, U, Y, Z, C) \
+ ((__m128)__builtin_ia32_fixupimmps128_mask ((__v4sf)(__m128)(X), \
+ (__v4sf)(__m128)(Y), \
+ (__v4si)(__m128i)(Z), (int)(C),\
+ (__mmask8)(U)))
+
+#define _mm_maskz_fixupimm_ps(U, X, Y, Z, C) \
+ ((__m128)__builtin_ia32_fixupimmps128_maskz ((__v4sf)(__m128)(X), \
+ (__v4sf)(__m128)(Y), \
+ (__v4si)(__m128i)(Z), (int)(C),\
+ (__mmask8)(U)))
+
+#define _mm256_mask_srli_epi32(W, U, A, B) \
+ ((__m256i) __builtin_ia32_psrldi256_mask ((__v8si)(__m256i)(A), \
+ (int)(B), (__v8si)(__m256i)(W), (__mmask8)(U)))
+
+#define _mm256_maskz_srli_epi32(U, A, B) \
+ ((__m256i) __builtin_ia32_psrldi256_mask ((__v8si)(__m256i)(A), \
+ (int)(B), (__v8si)_mm256_setzero_si256(), (__mmask8)(U)))
+
+#define _mm_mask_srli_epi32(W, U, A, B) \
+ ((__m128i) __builtin_ia32_psrldi128_mask ((__v4si)(__m128i)(A), \
+ (int)(B), (__v4si)(__m128i)(W), (__mmask8)(U)))
+
+#define _mm_maskz_srli_epi32(U, A, B) \
+ ((__m128i) __builtin_ia32_psrldi128_mask ((__v4si)(__m128i)(A), \
+ (int)(B), (__v4si)_mm_setzero_si128(), (__mmask8)(U)))
+
+#define _mm256_mask_srli_epi64(W, U, A, B) \
+ ((__m256i) __builtin_ia32_psrlqi256_mask ((__v4di)(__m256i)(A), \
+ (int)(B), (__v4di)(__m256i)(W), (__mmask8)(U)))
+
+#define _mm256_maskz_srli_epi64(U, A, B) \
+ ((__m256i) __builtin_ia32_psrlqi256_mask ((__v4di)(__m256i)(A), \
+ (int)(B), (__v4di)_mm256_setzero_si256 (), (__mmask8)(U)))
+
+#define _mm_mask_srli_epi64(W, U, A, B) \
+ ((__m128i) __builtin_ia32_psrlqi128_mask ((__v2di)(__m128i)(A), \
+ (int)(B), (__v2di)(__m128i)(W), (__mmask8)(U)))
+
+#define _mm_maskz_srli_epi64(U, A, B) \
+ ((__m128i) __builtin_ia32_psrlqi128_mask ((__v2di)(__m128i)(A), \
+ (int)(B), (__v2di)_mm_setzero_si128(), (__mmask8)(U)))
+
+#define _mm256_mask_slli_epi32(W, U, X, C) \
+ ((__m256i)__builtin_ia32_pslldi256_mask ((__v8si)(__m256i)(X), (int)(C),\
+ (__v8si)(__m256i)(W),\
+ (__mmask8)(U)))
+
+#define _mm256_maskz_slli_epi32(U, X, C) \
+ ((__m256i)__builtin_ia32_pslldi256_mask ((__v8si)(__m256i)(X), (int)(C),\
+ (__v8si)(__m256i)_mm256_setzero_si256(),\
+ (__mmask8)(U)))
+
+#define _mm256_mask_slli_epi64(W, U, X, C) \
+ ((__m256i)__builtin_ia32_psllqi256_mask ((__v4di)(__m256i)(X), (int)(C),\
+ (__v4di)(__m256i)(W),\
+ (__mmask8)(U)))
+
+#define _mm256_maskz_slli_epi64(U, X, C) \
+ ((__m256i)__builtin_ia32_psllqi256_mask ((__v4di)(__m256i)(X), (int)(C),\
+ (__v4di)(__m256i)_mm256_setzero_si256 (),\
+ (__mmask8)(U)))
+
+#define _mm_mask_slli_epi32(W, U, X, C) \
+ ((__m128i)__builtin_ia32_pslldi128_mask ((__v4si)(__m128i)(X), (int)(C),\
+ (__v4si)(__m128i)(W),\
+ (__mmask8)(U)))
+
+#define _mm_maskz_slli_epi32(U, X, C) \
+ ((__m128i)__builtin_ia32_pslldi128_mask ((__v4si)(__m128i)(X), (int)(C),\
+ (__v4si)(__m128i)_mm_setzero_si128 (),\
+ (__mmask8)(U)))
+
+#define _mm_mask_slli_epi64(W, U, X, C) \
+ ((__m128i)__builtin_ia32_psllqi128_mask ((__v2di)(__m128i)(X), (int)(C),\
+ (__v2di)(__m128i)(W),\
+ (__mmask8)(U)))
+
+#define _mm_maskz_slli_epi64(U, X, C) \
+ ((__m128i)__builtin_ia32_psllqi128_mask ((__v2di)(__m128i)(X), (int)(C),\
+ (__v2di)(__m128i)_mm_setzero_di(),\
+ (__mmask8)(U)))
+
+#define _mm256_ternarylogic_epi64(A, B, C, I) \
+ ((__m256i) __builtin_ia32_pternlogq256_mask ((__v4di)(__m256i)(A), \
+ (__v4di)(__m256i)(B), (__v4di)(__m256i)(C), (int)(I), (__mmask8)-1))
+
+#define _mm256_mask_ternarylogic_epi64(A, U, B, C, I) \
+ ((__m256i) __builtin_ia32_pternlogq256_mask ((__v4di)(__m256i)(A), \
+ (__v4di)(__m256i)(B), (__v4di)(__m256i)(C), (int)(I), (__mmask8)(U)))
+
+#define _mm256_maskz_ternarylogic_epi64(U, A, B, C, I) \
+ ((__m256i) __builtin_ia32_pternlogq256_maskz ((__v4di)(__m256i)(A), \
+ (__v4di)(__m256i)(B), (__v4di)(__m256i)(C), (int)(I), (__mmask8)(U)))
+
+#define _mm256_ternarylogic_epi32(A, B, C, I) \
+ ((__m256i) __builtin_ia32_pternlogd256_mask ((__v8si)(__m256i)(A), \
+ (__v8si)(__m256i)(B), (__v8si)(__m256i)(C), (int)(I), (__mmask8)-1))
+
+#define _mm256_mask_ternarylogic_epi32(A, U, B, C, I) \
+ ((__m256i) __builtin_ia32_pternlogd256_mask ((__v8si)(__m256i)(A), \
+ (__v8si)(__m256i)(B), (__v8si)(__m256i)(C), (int)(I), (__mmask8)(U)))
+
+#define _mm256_maskz_ternarylogic_epi32(U, A, B, C, I) \
+ ((__m256i) __builtin_ia32_pternlogd256_maskz ((__v8si)(__m256i)(A), \
+ (__v8si)(__m256i)(B), (__v8si)(__m256i)(C), (int)(I), (__mmask8)(U)))
+
+#define _mm_ternarylogic_epi64(A, B, C, I) \
+ ((__m128i) __builtin_ia32_pternlogq128_mask ((__v2di)(__m128i)(A), \
+ (__v2di)(__m128i)(B), (__v2di)(__m128i)(C), (int)(I), (__mmask8)-1))
+
+#define _mm_mask_ternarylogic_epi64(A, U, B, C, I) \
+ ((__m128i) __builtin_ia32_pternlogq128_mask ((__v2di)(__m128i)(A), \
+ (__v2di)(__m128i)(B), (__v2di)(__m128i)(C), (int)(I), (__mmask8)(U)))
+
+#define _mm_maskz_ternarylogic_epi64(U, A, B, C, I) \
+ ((__m128i) __builtin_ia32_pternlogq128_maskz ((__v2di)(__m128i)(A), \
+ (__v2di)(__m128i)(B), (__v2di)(__m128i)(C), (int)(I), (__mmask8)(U)))
+
+#define _mm_ternarylogic_epi32(A, B, C, I) \
+ ((__m128i) __builtin_ia32_pternlogd128_mask ((__v4si)(__m128i)(A), \
+ (__v4si)(__m128i)(B), (__v4si)(__m128i)(C), (int)(I), (__mmask8)-1))
+
+#define _mm_mask_ternarylogic_epi32(A, U, B, C, I) \
+ ((__m128i) __builtin_ia32_pternlogd128_mask ((__v4si)(__m128i)(A), \
+ (__v4si)(__m128i)(B), (__v4si)(__m128i)(C), (int)(I), (__mmask8)(U)))
+
+#define _mm_maskz_ternarylogic_epi32(U, A, B, C, I) \
+ ((__m128i) __builtin_ia32_pternlogd128_maskz ((__v4si)(__m128i)(A), \
+ (__v4si)(__m128i)(B), (__v4si)(__m128i)(C), (int)(I), (__mmask8)(U)))
+
+#define _mm256_roundscale_ps(A, B) \
+ ((__m256) __builtin_ia32_rndscaleps_256_mask ((__v8sf)(__m256)(A), \
+ (int)(B), (__v8sf)(__m256)_mm256_setzero_ps(), (__mmask8)-1))
+
+#define _mm256_mask_roundscale_ps(W, U, A, B) \
+ ((__m256) __builtin_ia32_rndscaleps_256_mask ((__v8sf)(__m256)(A), \
+ (int)(B), (__v8sf)(__m256)(W), (__mmask8)(U)))
+
+#define _mm256_maskz_roundscale_ps(U, A, B) \
+ ((__m256) __builtin_ia32_rndscaleps_256_mask ((__v8sf)(__m256)(A), \
+ (int)(B), (__v8sf)(__m256)_mm256_setzero_ps(), (__mmask8)(U)))
+
+#define _mm256_roundscale_pd(A, B) \
+ ((__m256d) __builtin_ia32_rndscalepd_256_mask ((__v4df)(__m256d)(A), \
+ (int)(B), (__v4df)(__m256d)_mm256_setzero_pd(), (__mmask8)-1))
+
+#define _mm256_mask_roundscale_pd(W, U, A, B) \
+ ((__m256d) __builtin_ia32_rndscalepd_256_mask ((__v4df)(__m256d)(A), \
+ (int)(B), (__v4df)(__m256d)(W), (__mmask8)(U)))
+
+#define _mm256_maskz_roundscale_pd(U, A, B) \
+ ((__m256d) __builtin_ia32_rndscalepd_256_mask ((__v4df)(__m256d)(A), \
+ (int)(B), (__v4df)(__m256d)_mm256_setzero_pd(), (__mmask8)(U)))
+
+#define _mm_roundscale_ps(A, B) \
+ ((__m128) __builtin_ia32_rndscaleps_128_mask ((__v4sf)(__m128)(A), \
+ (int)(B), (__v4sf)(__m128)_mm_setzero_ps(), (__mmask8)-1))
+
+#define _mm_mask_roundscale_ps(W, U, A, B) \
+ ((__m128) __builtin_ia32_rndscaleps_128_mask ((__v4sf)(__m128)(A), \
+ (int)(B), (__v4sf)(__m128)(W), (__mmask8)(U)))
+
+#define _mm_maskz_roundscale_ps(U, A, B) \
+ ((__m128) __builtin_ia32_rndscaleps_128_mask ((__v4sf)(__m128)(A), \
+ (int)(B), (__v4sf)(__m128)_mm_setzero_ps(), (__mmask8)(U)))
+
+#define _mm_roundscale_pd(A, B) \
+ ((__m128d) __builtin_ia32_rndscalepd_128_mask ((__v2df)(__m128d)(A), \
+ (int)(B), (__v2df)(__m128d)_mm_setzero_pd(), (__mmask8)-1))
+
+#define _mm_mask_roundscale_pd(W, U, A, B) \
+ ((__m128d) __builtin_ia32_rndscalepd_128_mask ((__v2df)(__m128d)(A), \
+ (int)(B), (__v2df)(__m128d)(W), (__mmask8)(U)))
+
+#define _mm_maskz_roundscale_pd(U, A, B) \
+ ((__m128d) __builtin_ia32_rndscalepd_128_mask ((__v2df)(__m128d)(A), \
+ (int)(B), (__v2df)(__m128d)_mm_setzero_pd(), (__mmask8)(U)))
+
+#define _mm256_getmant_ps(X, B, C) \
+ ((__m256) __builtin_ia32_getmantps256_mask ((__v8sf)(__m256) (X), \
+ (int)(((C)<<2) | (B)), \
+ (__v8sf)(__m256)_mm256_setzero_ps(), \
+ (__mmask8)-1))
+
+#define _mm256_mask_getmant_ps(W, U, X, B, C) \
+ ((__m256) __builtin_ia32_getmantps256_mask ((__v8sf)(__m256) (X), \
+ (int)(((C)<<2) | (B)), \
+ (__v8sf)(__m256)(W), \
+ (__mmask8)(U)))
+
+#define _mm256_maskz_getmant_ps(U, X, B, C) \
+ ((__m256) __builtin_ia32_getmantps256_mask ((__v8sf)(__m256) (X), \
+ (int)(((C)<<2) | (B)), \
+ (__v8sf)(__m256)_mm256_setzero_ps(), \
+ (__mmask8)(U)))
+
+#define _mm_getmant_ps(X, B, C) \
+ ((__m128) __builtin_ia32_getmantps128_mask ((__v4sf)(__m128) (X), \
+ (int)(((C)<<2) | (B)), \
+ (__v4sf)(__m128)_mm_setzero_ps(), \
+ (__mmask8)-1))
+
+#define _mm_mask_getmant_ps(W, U, X, B, C) \
+ ((__m128) __builtin_ia32_getmantps128_mask ((__v4sf)(__m128) (X), \
+ (int)(((C)<<2) | (B)), \
+ (__v4sf)(__m128)(W), \
+ (__mmask8)(U)))
+
+#define _mm_maskz_getmant_ps(U, X, B, C) \
+ ((__m128) __builtin_ia32_getmantps128_mask ((__v4sf)(__m128) (X), \
+ (int)(((C)<<2) | (B)), \
+ (__v4sf)(__m128)_mm_setzero_ps(), \
+ (__mmask8)(U)))
+
+#define _mm256_getmant_pd(X, B, C) \
+ ((__m256d) __builtin_ia32_getmantpd256_mask ((__v4df)(__m256d) (X), \
+ (int)(((C)<<2) | (B)), \
+ (__v4df)(__m256d)_mm256_setzero_pd(), \
+ (__mmask8)-1))
+
+#define _mm256_mask_getmant_pd(W, U, X, B, C) \
+ ((__m256d) __builtin_ia32_getmantpd256_mask ((__v4df)(__m256d) (X), \
+ (int)(((C)<<2) | (B)), \
+ (__v4df)(__m256d)(W), \
+ (__mmask8)(U)))
+
+#define _mm256_maskz_getmant_pd(U, X, B, C) \
+ ((__m256d) __builtin_ia32_getmantpd256_mask ((__v4df)(__m256d) (X), \
+ (int)(((C)<<2) | (B)), \
+ (__v4df)(__m256d)_mm256_setzero_pd(), \
+ (__mmask8)(U)))
+
+#define _mm_getmant_pd(X, B, C) \
+ ((__m128d) __builtin_ia32_getmantpd128_mask ((__v2df)(__m128d) (X), \
+ (int)(((C)<<2) | (B)), \
+ (__v2df)(__m128d)_mm_setzero_pd(), \
+ (__mmask8)-1))
+
+#define _mm_mask_getmant_pd(W, U, X, B, C) \
+ ((__m128d) __builtin_ia32_getmantpd128_mask ((__v2df)(__m128d) (X), \
+ (int)(((C)<<2) | (B)), \
+ (__v2df)(__m128d)(W), \
+ (__mmask8)(U)))
+
+#define _mm_maskz_getmant_pd(U, X, B, C) \
+ ((__m128d) __builtin_ia32_getmantpd128_mask ((__v2df)(__m128d) (X), \
+ (int)(((C)<<2) | (B)), \
+ (__v2df)(__m128d)_mm_setzero_pd(), \
+ (__mmask8)(U)))
+
+#define _mm256_mmask_i32gather_ps(V1OLD, MASK, INDEX, ADDR, SCALE) \
+ (__m256) __builtin_ia32_gather3siv8sf ((__v8sf)(__m256)V1OLD, \
+ (float const *)ADDR, \
+ (__v8si)(__m256i)INDEX, \
+ (__mmask8)MASK, (int)SCALE)
+
+#define _mm_mmask_i32gather_ps(V1OLD, MASK, INDEX, ADDR, SCALE) \
+ (__m128) __builtin_ia32_gather3siv4sf ((__v4sf)(__m128)V1OLD, \
+ (float const *)ADDR, \
+ (__v4si)(__m128i)INDEX, \
+ (__mmask8)MASK, (int)SCALE)
+
+#define _mm256_mmask_i32gather_pd(V1OLD, MASK, INDEX, ADDR, SCALE) \
+ (__m256d) __builtin_ia32_gather3siv4df ((__v4df)(__m256d)V1OLD, \
+ (double const *)ADDR, \
+ (__v4si)(__m128i)INDEX, \
+ (__mmask8)MASK, (int)SCALE)
+
+#define _mm_mmask_i32gather_pd(V1OLD, MASK, INDEX, ADDR, SCALE) \
+ (__m128d) __builtin_ia32_gather3siv2df ((__v2df)(__m128d)V1OLD, \
+ (double const *)ADDR, \
+ (__v4si)(__m128i)INDEX, \
+ (__mmask8)MASK, (int)SCALE)
+
+#define _mm256_mmask_i64gather_ps(V1OLD, MASK, INDEX, ADDR, SCALE) \
+ (__m128) __builtin_ia32_gather3div8sf ((__v4sf)(__m128)V1OLD, \
+ (float const *)ADDR, \
+ (__v4di)(__m256i)INDEX, \
+ (__mmask8)MASK, (int)SCALE)
+
+#define _mm_mmask_i64gather_ps(V1OLD, MASK, INDEX, ADDR, SCALE) \
+ (__m128) __builtin_ia32_gather3div4sf ((__v4sf)(__m128)V1OLD, \
+ (float const *)ADDR, \
+ (__v2di)(__m128i)INDEX, \
+ (__mmask8)MASK, (int)SCALE)
+
+#define _mm256_mmask_i64gather_pd(V1OLD, MASK, INDEX, ADDR, SCALE) \
+ (__m256d) __builtin_ia32_gather3div4df ((__v4df)(__m256d)V1OLD, \
+ (double const *)ADDR, \
+ (__v4di)(__m256i)INDEX, \
+ (__mmask8)MASK, (int)SCALE)
+
+#define _mm_mmask_i64gather_pd(V1OLD, MASK, INDEX, ADDR, SCALE) \
+ (__m128d) __builtin_ia32_gather3div2df ((__v2df)(__m128d)V1OLD, \
+ (double const *)ADDR, \
+ (__v2di)(__m128i)INDEX, \
+ (__mmask8)MASK, (int)SCALE)
+
+#define _mm256_mmask_i32gather_epi32(V1OLD, MASK, INDEX, ADDR, SCALE) \
+ (__m256i) __builtin_ia32_gather3siv8si ((__v8si)(__m256i)V1OLD, \
+ (int const *)ADDR, \
+ (__v8si)(__m256i)INDEX, \
+ (__mmask8)MASK, (int)SCALE)
+
+#define _mm_mmask_i32gather_epi32(V1OLD, MASK, INDEX, ADDR, SCALE) \
+ (__m128i) __builtin_ia32_gather3siv4si ((__v4si)(__m128i)V1OLD, \
+ (int const *)ADDR, \
+ (__v4si)(__m128i)INDEX, \
+ (__mmask8)MASK, (int)SCALE)
+
+#define _mm256_mmask_i32gather_epi64(V1OLD, MASK, INDEX, ADDR, SCALE) \
+ (__m256i) __builtin_ia32_gather3siv4di ((__v4di)(__m256i)V1OLD, \
+ (long long const *)ADDR, \
+ (__v4si)(__m128i)INDEX, \
+ (__mmask8)MASK, (int)SCALE)
+
+#define _mm_mmask_i32gather_epi64(V1OLD, MASK, INDEX, ADDR, SCALE) \
+ (__m128i) __builtin_ia32_gather3siv2di ((__v2di)(__m128i)V1OLD, \
+ (long long const *)ADDR, \
+ (__v4si)(__m128i)INDEX, \
+ (__mmask8)MASK, (int)SCALE)
+
+#define _mm256_mmask_i64gather_epi32(V1OLD, MASK, INDEX, ADDR, SCALE) \
+ (__m128i) __builtin_ia32_gather3div8si ((__v4si)(__m128i)V1OLD, \
+ (int const *)ADDR, \
+ (__v4di)(__m256i)INDEX, \
+ (__mmask8)MASK, (int)SCALE)
+
+#define _mm_mmask_i64gather_epi32(V1OLD, MASK, INDEX, ADDR, SCALE) \
+ (__m128i) __builtin_ia32_gather3div4si ((__v4si)(__m128i)V1OLD, \
+ (int const *)ADDR, \
+ (__v2di)(__m128i)INDEX, \
+ (__mmask8)MASK, (int)SCALE)
+
+#define _mm256_mmask_i64gather_epi64(V1OLD, MASK, INDEX, ADDR, SCALE) \
+ (__m256i) __builtin_ia32_gather3div4di ((__v4di)(__m256i)V1OLD, \
+ (long long const *)ADDR, \
+ (__v4di)(__m256i)INDEX, \
+ (__mmask8)MASK, (int)SCALE)
+
+#define _mm_mmask_i64gather_epi64(V1OLD, MASK, INDEX, ADDR, SCALE) \
+ (__m128i) __builtin_ia32_gather3div2di ((__v2di)(__m128i)V1OLD, \
+ (long long const *)ADDR, \
+ (__v2di)(__m128i)INDEX, \
+ (__mmask8)MASK, (int)SCALE)
+
+#define _mm256_i32scatter_ps(ADDR, INDEX, V1, SCALE) \
+ __builtin_ia32_scattersiv8sf ((float *)ADDR, (__mmask8)0xFF, \
+ (__v8si)(__m256i)INDEX, \
+ (__v8sf)(__m256)V1, (int)SCALE)
+
+#define _mm256_mask_i32scatter_ps(ADDR, MASK, INDEX, V1, SCALE) \
+ __builtin_ia32_scattersiv8sf ((float *)ADDR, (__mmask8)MASK, \
+ (__v8si)(__m256i)INDEX, \
+ (__v8sf)(__m256)V1, (int)SCALE)
+
+#define _mm_i32scatter_ps(ADDR, INDEX, V1, SCALE) \
+ __builtin_ia32_scattersiv4sf ((float *)ADDR, (__mmask8)0xFF, \
+ (__v4si)(__m128i)INDEX, \
+ (__v4sf)(__m128)V1, (int)SCALE)
+
+#define _mm_mask_i32scatter_ps(ADDR, MASK, INDEX, V1, SCALE) \
+ __builtin_ia32_scattersiv4sf ((float *)ADDR, (__mmask8)MASK, \
+ (__v4si)(__m128i)INDEX, \
+ (__v4sf)(__m128)V1, (int)SCALE)
+
+#define _mm256_i32scatter_pd(ADDR, INDEX, V1, SCALE) \
+ __builtin_ia32_scattersiv4df ((double *)ADDR, (__mmask8)0xFF, \
+ (__v4si)(__m128i)INDEX, \
+ (__v4df)(__m256d)V1, (int)SCALE)
+
+#define _mm256_mask_i32scatter_pd(ADDR, MASK, INDEX, V1, SCALE) \
+ __builtin_ia32_scattersiv4df ((double *)ADDR, (__mmask8)MASK, \
+ (__v4si)(__m128i)INDEX, \
+ (__v4df)(__m256d)V1, (int)SCALE)
+
+#define _mm_i32scatter_pd(ADDR, INDEX, V1, SCALE) \
+ __builtin_ia32_scattersiv2df ((double *)ADDR, (__mmask8)0xFF, \
+ (__v4si)(__m128i)INDEX, \
+ (__v2df)(__m128d)V1, (int)SCALE)
+
+#define _mm_mask_i32scatter_pd(ADDR, MASK, INDEX, V1, SCALE) \
+ __builtin_ia32_scattersiv2df ((double *)ADDR, (__mmask8)MASK, \
+ (__v4si)(__m128i)INDEX, \
+ (__v2df)(__m128d)V1, (int)SCALE)
+
+#define _mm256_i64scatter_ps(ADDR, INDEX, V1, SCALE) \
+ __builtin_ia32_scatterdiv8sf ((float *)ADDR, (__mmask8)0xFF, \
+ (__v4di)(__m256i)INDEX, \
+ (__v4sf)(__m128)V1, (int)SCALE)
+
+#define _mm256_mask_i64scatter_ps(ADDR, MASK, INDEX, V1, SCALE) \
+ __builtin_ia32_scatterdiv8sf ((float *)ADDR, (__mmask8)MASK, \
+ (__v4di)(__m256i)INDEX, \
+ (__v4sf)(__m128)V1, (int)SCALE)
+
+#define _mm_i64scatter_ps(ADDR, INDEX, V1, SCALE) \
+ __builtin_ia32_scatterdiv4sf ((float *)ADDR, (__mmask8)0xFF, \
+ (__v2di)(__m128i)INDEX, \
+ (__v4sf)(__m128)V1, (int)SCALE)
+
+#define _mm_mask_i64scatter_ps(ADDR, MASK, INDEX, V1, SCALE) \
+ __builtin_ia32_scatterdiv4sf ((float *)ADDR, (__mmask8)MASK, \
+ (__v2di)(__m128i)INDEX, \
+ (__v4sf)(__m128)V1, (int)SCALE)
+
+#define _mm256_i64scatter_pd(ADDR, INDEX, V1, SCALE) \
+ __builtin_ia32_scatterdiv4df ((double *)ADDR, (__mmask8)0xFF, \
+ (__v4di)(__m256i)INDEX, \
+ (__v4df)(__m256d)V1, (int)SCALE)
+
+#define _mm256_mask_i64scatter_pd(ADDR, MASK, INDEX, V1, SCALE) \
+ __builtin_ia32_scatterdiv4df ((double *)ADDR, (__mmask8)MASK, \
+ (__v4di)(__m256i)INDEX, \
+ (__v4df)(__m256d)V1, (int)SCALE)
+
+#define _mm_i64scatter_pd(ADDR, INDEX, V1, SCALE) \
+ __builtin_ia32_scatterdiv2df ((double *)ADDR, (__mmask8)0xFF, \
+ (__v2di)(__m128i)INDEX, \
+ (__v2df)(__m128d)V1, (int)SCALE)
+
+#define _mm_mask_i64scatter_pd(ADDR, MASK, INDEX, V1, SCALE) \
+ __builtin_ia32_scatterdiv2df ((double *)ADDR, (__mmask8)MASK, \
+ (__v2di)(__m128i)INDEX, \
+ (__v2df)(__m128d)V1, (int)SCALE)
+
+#define _mm256_i32scatter_epi32(ADDR, INDEX, V1, SCALE) \
+ __builtin_ia32_scattersiv8si ((int *)ADDR, (__mmask8)0xFF, \
+ (__v8si)(__m256i)INDEX, \
+ (__v8si)(__m256i)V1, (int)SCALE)
+
+#define _mm256_mask_i32scatter_epi32(ADDR, MASK, INDEX, V1, SCALE) \
+ __builtin_ia32_scattersiv8si ((int *)ADDR, (__mmask8)MASK, \
+ (__v8si)(__m256i)INDEX, \
+ (__v8si)(__m256i)V1, (int)SCALE)
+
+#define _mm_i32scatter_epi32(ADDR, INDEX, V1, SCALE) \
+ __builtin_ia32_scattersiv4si ((int *)ADDR, (__mmask8)0xFF, \
+ (__v4si)(__m128i)INDEX, \
+ (__v4si)(__m128i)V1, (int)SCALE)
+
+#define _mm_mask_i32scatter_epi32(ADDR, MASK, INDEX, V1, SCALE) \
+ __builtin_ia32_scattersiv4si ((int *)ADDR, (__mmask8)MASK, \
+ (__v4si)(__m128i)INDEX, \
+ (__v4si)(__m128i)V1, (int)SCALE)
+
+#define _mm256_i32scatter_epi64(ADDR, INDEX, V1, SCALE) \
+ __builtin_ia32_scattersiv4di ((long long *)ADDR, (__mmask8)0xFF, \
+ (__v4si)(__m128i)INDEX, \
+ (__v4di)(__m256i)V1, (int)SCALE)
+
+#define _mm256_mask_i32scatter_epi64(ADDR, MASK, INDEX, V1, SCALE) \
+ __builtin_ia32_scattersiv4di ((long long *)ADDR, (__mmask8)MASK, \
+ (__v4si)(__m128i)INDEX, \
+ (__v4di)(__m256i)V1, (int)SCALE)
+
+#define _mm_i32scatter_epi64(ADDR, INDEX, V1, SCALE) \
+ __builtin_ia32_scattersiv2di ((long long *)ADDR, (__mmask8)0xFF, \
+ (__v4si)(__m128i)INDEX, \
+ (__v2di)(__m128i)V1, (int)SCALE)
+
+#define _mm_mask_i32scatter_epi64(ADDR, MASK, INDEX, V1, SCALE) \
+ __builtin_ia32_scattersiv2di ((long long *)ADDR, (__mmask8)MASK, \
+ (__v4si)(__m128i)INDEX, \
+ (__v2di)(__m128i)V1, (int)SCALE)
+
+#define _mm256_i64scatter_epi32(ADDR, INDEX, V1, SCALE) \
+ __builtin_ia32_scatterdiv8si ((int *)ADDR, (__mmask8)0xFF, \
+ (__v4di)(__m256i)INDEX, \
+ (__v4si)(__m128i)V1, (int)SCALE)
+
+#define _mm256_mask_i64scatter_epi32(ADDR, MASK, INDEX, V1, SCALE) \
+ __builtin_ia32_scatterdiv8si ((int *)ADDR, (__mmask8)MASK, \
+ (__v4di)(__m256i)INDEX, \
+ (__v4si)(__m128i)V1, (int)SCALE)
+
+#define _mm_i64scatter_epi32(ADDR, INDEX, V1, SCALE) \
+ __builtin_ia32_scatterdiv4si ((int *)ADDR, (__mmask8)0xFF, \
+ (__v2di)(__m128i)INDEX, \
+ (__v4si)(__m128i)V1, (int)SCALE)
+
+#define _mm_mask_i64scatter_epi32(ADDR, MASK, INDEX, V1, SCALE) \
+ __builtin_ia32_scatterdiv4si ((int *)ADDR, (__mmask8)MASK, \
+ (__v2di)(__m128i)INDEX, \
+ (__v4si)(__m128i)V1, (int)SCALE)
+
+#define _mm256_i64scatter_epi64(ADDR, INDEX, V1, SCALE) \
+ __builtin_ia32_scatterdiv4di ((long long *)ADDR, (__mmask8)0xFF, \
+ (__v4di)(__m256i)INDEX, \
+ (__v4di)(__m256i)V1, (int)SCALE)
+
+#define _mm256_mask_i64scatter_epi64(ADDR, MASK, INDEX, V1, SCALE) \
+ __builtin_ia32_scatterdiv4di ((long long *)ADDR, (__mmask8)MASK, \
+ (__v4di)(__m256i)INDEX, \
+ (__v4di)(__m256i)V1, (int)SCALE)
+
+#define _mm_i64scatter_epi64(ADDR, INDEX, V1, SCALE) \
+ __builtin_ia32_scatterdiv2di ((long long *)ADDR, (__mmask8)0xFF, \
+ (__v2di)(__m128i)INDEX, \
+ (__v2di)(__m128i)V1, (int)SCALE)
+
+#define _mm_mask_i64scatter_epi64(ADDR, MASK, INDEX, V1, SCALE) \
+ __builtin_ia32_scatterdiv2di ((long long *)ADDR, (__mmask8)MASK, \
+ (__v2di)(__m128i)INDEX, \
+ (__v2di)(__m128i)V1, (int)SCALE)
+
+#define _mm256_mask_shuffle_epi32(W, U, X, C) \
+ ((__m256i) __builtin_ia32_pshufd256_mask ((__v8si)(__m256i)(X), (int)(C), \
+ (__v8si)(__m256i)(W), \
+ (__mmask8)(U)))
+
+#define _mm256_maskz_shuffle_epi32(U, X, C) \
+ ((__m256i) __builtin_ia32_pshufd256_mask ((__v8si)(__m256i)(X), (int)(C), \
+ (__v8si)(__m256i)_mm256_setzero_si256(), \
+ (__mmask8)(U)))
+
+#define _mm_mask_shuffle_epi32(W, U, X, C) \
+ ((__m128i) __builtin_ia32_pshufd128_mask ((__v4si)(__m128i)(X), (int)(C), \
+ (__v4si)(__m128i)(W), \
+ (__mmask8)(U)))
+
+#define _mm_maskz_shuffle_epi32(U, X, C) \
+ ((__m128i) __builtin_ia32_pshufd128_mask ((__v4si)(__m128i)(X), (int)(C), \
+ (__v4si)(__m128i)_mm_setzero_si128 (), \
+ (__mmask8)(U)))
+
+#define _mm256_rol_epi64(A, B) \
+ ((__m256i)__builtin_ia32_prolq256_mask ((__v4di)(__m256i)(A), (int)(B), \
+ (__v4di)(__m256i)_mm256_setzero_si256 (),\
+ (__mmask8)-1))
+
+#define _mm256_mask_rol_epi64(W, U, A, B) \
+ ((__m256i)__builtin_ia32_prolq256_mask ((__v4di)(__m256i)(A), (int)(B), \
+ (__v4di)(__m256i)(W), \
+ (__mmask8)(U)))
+
+#define _mm256_maskz_rol_epi64(U, A, B) \
+ ((__m256i)__builtin_ia32_prolq256_mask ((__v4di)(__m256i)(A), (int)(B), \
+ (__v4di)(__m256i)_mm256_setzero_si256 (),\
+ (__mmask8)(U)))
+
+#define _mm_rol_epi64(A, B) \
+ ((__m128i)__builtin_ia32_prolq128_mask ((__v2di)(__m128i)(A), (int)(B), \
+ (__v2di)(__m128i)_mm_setzero_di(), \
+ (__mmask8)-1))
+
+#define _mm_mask_rol_epi64(W, U, A, B) \
+ ((__m128i)__builtin_ia32_prolq128_mask ((__v2di)(__m128i)(A), (int)(B), \
+ (__v2di)(__m128i)(W), \
+ (__mmask8)(U)))
+
+#define _mm_maskz_rol_epi64(U, A, B) \
+ ((__m128i)__builtin_ia32_prolq128_mask ((__v2di)(__m128i)(A), (int)(B), \
+ (__v2di)(__m128i)_mm_setzero_di(), \
+ (__mmask8)(U)))
+
+#define _mm256_ror_epi64(A, B) \
+ ((__m256i)__builtin_ia32_prorq256_mask ((__v4di)(__m256i)(A), (int)(B), \
+ (__v4di)(__m256i)_mm256_setzero_si256 (),\
+ (__mmask8)-1))
+
+#define _mm256_mask_ror_epi64(W, U, A, B) \
+ ((__m256i)__builtin_ia32_prorq256_mask ((__v4di)(__m256i)(A), (int)(B), \
+ (__v4di)(__m256i)(W), \
+ (__mmask8)(U)))
+
+#define _mm256_maskz_ror_epi64(U, A, B) \
+ ((__m256i)__builtin_ia32_prorq256_mask ((__v4di)(__m256i)(A), (int)(B), \
+ (__v4di)(__m256i)_mm256_setzero_si256 (),\
+ (__mmask8)(U)))
+
+#define _mm_ror_epi64(A, B) \
+ ((__m128i)__builtin_ia32_prorq128_mask ((__v2di)(__m128i)(A), (int)(B), \
+ (__v2di)(__m128i)_mm_setzero_di(), \
+ (__mmask8)-1))
+
+#define _mm_mask_ror_epi64(W, U, A, B) \
+ ((__m128i)__builtin_ia32_prorq128_mask ((__v2di)(__m128i)(A), (int)(B), \
+ (__v2di)(__m128i)(W), \
+ (__mmask8)(U)))
+
+#define _mm_maskz_ror_epi64(U, A, B) \
+ ((__m128i)__builtin_ia32_prorq128_mask ((__v2di)(__m128i)(A), (int)(B), \
+ (__v2di)(__m128i)_mm_setzero_di(), \
+ (__mmask8)(U)))
+
+#define _mm256_rol_epi32(A, B) \
+ ((__m256i)__builtin_ia32_prold256_mask ((__v8si)(__m256i)(A), (int)(B), \
+ (__v8si)(__m256i)_mm256_setzero_si256(),\
+ (__mmask8)-1))
+
+#define _mm256_mask_rol_epi32(W, U, A, B) \
+ ((__m256i)__builtin_ia32_prold256_mask ((__v8si)(__m256i)(A), (int)(B), \
+ (__v8si)(__m256i)(W), \
+ (__mmask8)(U)))
+
+#define _mm256_maskz_rol_epi32(U, A, B) \
+ ((__m256i)__builtin_ia32_prold256_mask ((__v8si)(__m256i)(A), (int)(B), \
+ (__v8si)(__m256i)_mm256_setzero_si256(),\
+ (__mmask8)(U)))
+
+#define _mm_rol_epi32(A, B) \
+ ((__m128i)__builtin_ia32_prold128_mask ((__v4si)(__m128i)(A), (int)(B), \
+ (__v4si)(__m128i)_mm_setzero_si128 (), \
+ (__mmask8)-1))
+
+#define _mm_mask_rol_epi32(W, U, A, B) \
+ ((__m128i)__builtin_ia32_prold128_mask ((__v4si)(__m128i)(A), (int)(B), \
+ (__v4si)(__m128i)(W), \
+ (__mmask8)(U)))
+
+#define _mm_maskz_rol_epi32(U, A, B) \
+ ((__m128i)__builtin_ia32_prold128_mask ((__v4si)(__m128i)(A), (int)(B), \
+ (__v4si)(__m128i)_mm_setzero_si128 (), \
+ (__mmask8)(U)))
+
+#define _mm256_ror_epi32(A, B) \
+ ((__m256i)__builtin_ia32_prord256_mask ((__v8si)(__m256i)(A), (int)(B), \
+ (__v8si)(__m256i)_mm256_setzero_si256(),\
+ (__mmask8)-1))
+
+#define _mm256_mask_ror_epi32(W, U, A, B) \
+ ((__m256i)__builtin_ia32_prord256_mask ((__v8si)(__m256i)(A), (int)(B), \
+ (__v8si)(__m256i)(W), \
+ (__mmask8)(U)))
+
+#define _mm256_maskz_ror_epi32(U, A, B) \
+ ((__m256i)__builtin_ia32_prord256_mask ((__v8si)(__m256i)(A), (int)(B), \
+ (__v8si)(__m256i)_mm256_setzero_si256(),\
+ (__mmask8)(U)))
+
+#define _mm_ror_epi32(A, B) \
+ ((__m128i)__builtin_ia32_prord128_mask ((__v4si)(__m128i)(A), (int)(B), \
+ (__v4si)(__m128i)_mm_setzero_si128 (), \
+ (__mmask8)-1))
+
+#define _mm_mask_ror_epi32(W, U, A, B) \
+ ((__m128i)__builtin_ia32_prord128_mask ((__v4si)(__m128i)(A), (int)(B), \
+ (__v4si)(__m128i)(W), \
+ (__mmask8)(U)))
+
+#define _mm_maskz_ror_epi32(U, A, B) \
+ ((__m128i)__builtin_ia32_prord128_mask ((__v4si)(__m128i)(A), (int)(B), \
+ (__v4si)(__m128i)_mm_setzero_si128 (), \
+ (__mmask8)(U)))
+
+#define _mm256_alignr_epi32(X, Y, C) \
+ ((__m256i)__builtin_ia32_alignd256_mask ((__v8si)(__m256i)(X), \
+ (__v8si)(__m256i)(Y), (int)(C), (__v8si)(__m256i)(X), (__mmask8)-1))
+
+#define _mm256_mask_alignr_epi32(W, U, X, Y, C) \
+ ((__m256i)__builtin_ia32_alignd256_mask ((__v8si)(__m256i)(X), \
+ (__v8si)(__m256i)(Y), (int)(C), (__v8si)(__m256i)(W), (__mmask8)(U)))
+
+#define _mm256_maskz_alignr_epi32(U, X, Y, C) \
+ ((__m256i)__builtin_ia32_alignd256_mask ((__v8si)(__m256i)(X), \
+ (__v8si)(__m256i)(Y), (int)(C), (__v8si)(__m256i)_mm256_setzero_si256 (),\
+ (__mmask8)(U)))
+
+#define _mm256_alignr_epi64(X, Y, C) \
+ ((__m256i)__builtin_ia32_alignq256_mask ((__v4di)(__m256i)(X), \
+ (__v4di)(__m256i)(Y), (int)(C), (__v4di)(__m256i)(X), (__mmask8)-1))
+
+#define _mm256_mask_alignr_epi64(W, U, X, Y, C) \
+ ((__m256i)__builtin_ia32_alignq256_mask ((__v4di)(__m256i)(X), \
+ (__v4di)(__m256i)(Y), (int)(C), (__v4di)(__m256i)(W), (__mmask8)(U)))
+
+#define _mm256_maskz_alignr_epi64(U, X, Y, C) \
+ ((__m256i)__builtin_ia32_alignq256_mask ((__v4di)(__m256i)(X), \
+ (__v4di)(__m256i)(Y), (int)(C), (__v4di)(__m256i)_mm256_setzero_si256 (),\
+ (__mmask8)(U)))
+
+#define _mm_alignr_epi32(X, Y, C) \
+ ((__m128i)__builtin_ia32_alignd128_mask ((__v4si)(__m128i)(X), \
+ (__v4si)(__m128i)(Y), (int)(C), (__v4si)(__m128i)(X), (__mmask8)-1))
+
+#define _mm_mask_alignr_epi32(W, U, X, Y, C) \
+ ((__m128i)__builtin_ia32_alignd128_mask ((__v4si)(__m128i)(X), \
+ (__v4si)(__m128i)(Y), (int)(C), (__v4si)(__m128i)(W), (__mmask8)(U)))
+
+#define _mm_maskz_alignr_epi32(U, X, Y, C) \
+ ((__m128i)__builtin_ia32_alignd128_mask ((__v4si)(__m128i)(X), \
+ (__v4si)(__m128i)(Y), (int)(C), (__v4si)(__m128i)_mm_setzero_si128(),\
+ (__mmask8)(U)))
+
+#define _mm_alignr_epi64(X, Y, C) \
+ ((__m128i)__builtin_ia32_alignq128_mask ((__v2di)(__m128i)(X), \
+ (__v2di)(__m128i)(Y), (int)(C), (__v2di)(__m128i)(X), (__mmask8)-1))
+
+#define _mm_mask_alignr_epi64(W, U, X, Y, C) \
+ ((__m128i)__builtin_ia32_alignq128_mask ((__v2di)(__m128i)(X), \
+ (__v2di)(__m128i)(Y), (int)(C), (__v2di)(__m128i)(X), (__mmask8)-1))
+
+#define _mm_maskz_alignr_epi64(U, X, Y, C) \
+ ((__m128i)__builtin_ia32_alignq128_mask ((__v2di)(__m128i)(X), \
+ (__v2di)(__m128i)(Y), (int)(C), (__v2di)(__m128i)_mm_setzero_si128(),\
+ (__mmask8)(U)))
+
+#define _mm_mask_cvtps_ph(W, U, A, I) \
+ ((__m128i) __builtin_ia32_vcvtps2ph_mask ((__v4sf)(__m128) A, (int) (I), \
+ (__v8hi)(__m128i) (W), (__mmask8) (U)))
+
+#define _mm_maskz_cvtps_ph(U, A, I) \
+ ((__m128i) __builtin_ia32_vcvtps2ph_mask ((__v4sf)(__m128) A, (int) (I), \
+ (__v8hi)(__m128i) _mm_setzero_hi(), (__mmask8) (U)))
+
+#define _mm256_mask_cvtps_ph(W, U, A, I) \
+ ((__m128i) __builtin_ia32_vcvtps2ph256_mask ((__v8sf)(__m256) A, (int) (I), \
+ (__v8hi)(__m128i) (W), (__mmask8) (U)))
+
+#define _mm256_maskz_cvtps_ph(U, A, I) \
+ ((__m128i) __builtin_ia32_vcvtps2ph256_mask ((__v8sf)(__m256) A, (int) (I), \
+ (__v8hi)(__m128i) _mm_setzero_hi(), (__mmask8) (U)))
+
+#define _mm256_mask_srai_epi32(W, U, A, B) \
+ ((__m256i) __builtin_ia32_psradi256_mask ((__v8si)(__m256i)(A), \
+ (int)(B), (__v8si)(__m256i)(W), (__mmask8)(U)))
+
+#define _mm256_maskz_srai_epi32(U, A, B) \
+ ((__m256i) __builtin_ia32_psradi256_mask ((__v8si)(__m256i)(A), \
+ (int)(B), (__v8si)_mm256_setzero_si256(), (__mmask8)(U)))
+
+#define _mm_mask_srai_epi32(W, U, A, B) \
+ ((__m128i) __builtin_ia32_psradi128_mask ((__v4si)(__m128i)(A), \
+ (int)(B), (__v4si)(__m128i)(W), (__mmask8)(U)))
+
+#define _mm_maskz_srai_epi32(U, A, B) \
+ ((__m128i) __builtin_ia32_psradi128_mask ((__v4si)(__m128i)(A), \
+ (int)(B), (__v4si)_mm_setzero_si128(), (__mmask8)(U)))
+
+#define _mm256_srai_epi64(A, B) \
+ ((__m256i) __builtin_ia32_psraqi256_mask ((__v4di)(__m256i)(A), \
+ (int)(B), (__v4di)_mm256_setzero_si256 (), (__mmask8)-1))
+
+#define _mm256_mask_srai_epi64(W, U, A, B) \
+ ((__m256i) __builtin_ia32_psraqi256_mask ((__v4di)(__m256i)(A), \
+ (int)(B), (__v4di)(__m256i)(W), (__mmask8)(U)))
+
+#define _mm256_maskz_srai_epi64(U, A, B) \
+ ((__m256i) __builtin_ia32_psraqi256_mask ((__v4di)(__m256i)(A), \
+ (int)(B), (__v4di)_mm256_setzero_si256 (), (__mmask8)(U)))
+
+#define _mm_srai_epi64(A, B) \
+ ((__m128i) __builtin_ia32_psraqi128_mask ((__v2di)(__m128i)(A), \
+ (int)(B), (__v2di)_mm_setzero_si128(), (__mmask8)-1))
+
+#define _mm_mask_srai_epi64(W, U, A, B) \
+ ((__m128i) __builtin_ia32_psraqi128_mask ((__v2di)(__m128i)(A), \
+ (int)(B), (__v2di)(__m128i)(W), (__mmask8)(U)))
+
+#define _mm_maskz_srai_epi64(U, A, B) \
+ ((__m128i) __builtin_ia32_psraqi128_mask ((__v2di)(__m128i)(A), \
+ (int)(B), (__v2di)_mm_setzero_si128(), (__mmask8)(U)))
+
+#define _mm256_mask_permutex_pd(W, U, A, B) \
+ ((__m256d) __builtin_ia32_permdf256_mask ((__v4df)(__m256d)(A), \
+ (int)(B), (__v4df)(__m256d)(W), (__mmask8)(U)))
+
+#define _mm256_maskz_permutex_pd(U, A, B) \
+ ((__m256d) __builtin_ia32_permdf256_mask ((__v4df)(__m256d)(A), \
+ (int)(B), (__v4df)(__m256d)_mm256_setzero_pd(), (__mmask8)(U)))
+
+#define _mm256_mask_permute_pd(W, U, X, C) \
+ ((__m256d) __builtin_ia32_vpermilpd256_mask ((__v4df)(__m256d)(X), (int)(C), \
+ (__v4df)(__m256d)(W), \
+ (__mmask8)(U)))
+
+#define _mm256_maskz_permute_pd(U, X, C) \
+ ((__m256d) __builtin_ia32_vpermilpd256_mask ((__v4df)(__m256d)(X), (int)(C), \
+ (__v4df)(__m256d)_mm256_setzero_pd(), \
+ (__mmask8)(U)))
+
+#define _mm256_mask_permute_ps(W, U, X, C) \
+ ((__m256) __builtin_ia32_vpermilps256_mask ((__v8sf)(__m256)(X), (int)(C), \
+ (__v8sf)(__m256)(W), (__mmask8)(U)))
+
+#define _mm256_maskz_permute_ps(U, X, C) \
+ ((__m256) __builtin_ia32_vpermilps256_mask ((__v8sf)(__m256)(X), (int)(C), \
+ (__v8sf)(__m256)_mm256_setzero_ps(), \
+ (__mmask8)(U)))
+
+#define _mm_mask_permute_pd(W, U, X, C) \
+ ((__m128d) __builtin_ia32_vpermilpd_mask ((__v2df)(__m128d)(X), (int)(C), \
+ (__v2df)(__m128d)(W), (__mmask8)(U)))
+
+#define _mm_maskz_permute_pd(U, X, C) \
+ ((__m128d) __builtin_ia32_vpermilpd_mask ((__v2df)(__m128d)(X), (int)(C), \
+ (__v2df)(__m128d)_mm_setzero_pd(), \
+ (__mmask8)(U)))
+
+#define _mm_mask_permute_ps(W, U, X, C) \
+ ((__m128) __builtin_ia32_vpermilps_mask ((__v4sf)(__m128)(X), (int)(C), \
+ (__v4sf)(__m128)(W), (__mmask8)(U)))
+
+#define _mm_maskz_permute_ps(U, X, C) \
+ ((__m128) __builtin_ia32_vpermilps_mask ((__v4sf)(__m128)(X), (int)(C), \
+ (__v4sf)(__m128)_mm_setzero_ps(), \
+ (__mmask8)(U)))
+
+#define _mm256_mask_blend_pd(__U, __A, __W) \
+ ((__m256d) __builtin_ia32_blendmpd_256_mask ((__v4df) (__A), \
+ (__v4df) (__W), \
+ (__mmask8) (__U)))
+
+#define _mm256_mask_blend_ps(__U, __A, __W) \
+ ((__m256) __builtin_ia32_blendmps_256_mask ((__v8sf) (__A), \
+ (__v8sf) (__W), \
+ (__mmask8) (__U)))
+
+#define _mm256_mask_blend_epi64(__U, __A, __W) \
+ ((__m256i) __builtin_ia32_blendmq_256_mask ((__v4di) (__A), \
+ (__v4di) (__W), \
+ (__mmask8) (__U)))
+
+#define _mm256_mask_blend_epi32(__U, __A, __W) \
+ ((__m256i) __builtin_ia32_blendmd_256_mask ((__v8si) (__A), \
+ (__v8si) (__W), \
+ (__mmask8) (__U)))
+
+#define _mm_mask_blend_pd(__U, __A, __W) \
+ ((__m128d) __builtin_ia32_blendmpd_128_mask ((__v2df) (__A), \
+ (__v2df) (__W), \
+ (__mmask8) (__U)))
+
+#define _mm_mask_blend_ps(__U, __A, __W) \
+ ((__m128) __builtin_ia32_blendmps_128_mask ((__v4sf) (__A), \
+ (__v4sf) (__W), \
+ (__mmask8) (__U)))
+
+#define _mm_mask_blend_epi64(__U, __A, __W) \
+ ((__m128i) __builtin_ia32_blendmq_128_mask ((__v2di) (__A), \
+ (__v2di) (__W), \
+ (__mmask8) (__U)))
+
+#define _mm_mask_blend_epi32(__U, __A, __W) \
+ ((__m128i) __builtin_ia32_blendmd_128_mask ((__v4si) (__A), \
+ (__v4si) (__W), \
+ (__mmask8) (__U)))
+
+#define _mm256_cmp_epu32_mask(X, Y, P) \
+ ((__mmask8) __builtin_ia32_ucmpd256_mask ((__v8si)(__m256i)(X), \
+ (__v8si)(__m256i)(Y), (int)(P),\
+ (__mmask8)-1))
+
+#define _mm256_cmp_epi64_mask(X, Y, P) \
+ ((__mmask8) __builtin_ia32_cmpq256_mask ((__v4di)(__m256i)(X), \
+ (__v4di)(__m256i)(Y), (int)(P),\
+ (__mmask8)-1))
+
+#define _mm256_cmp_epi32_mask(X, Y, P) \
+ ((__mmask8) __builtin_ia32_cmpd256_mask ((__v8si)(__m256i)(X), \
+ (__v8si)(__m256i)(Y), (int)(P),\
+ (__mmask8)-1))
+
+#define _mm256_cmp_epu64_mask(X, Y, P) \
+ ((__mmask8) __builtin_ia32_ucmpq256_mask ((__v4di)(__m256i)(X), \
+ (__v4di)(__m256i)(Y), (int)(P),\
+ (__mmask8)-1))
+
+#define _mm256_cmp_pd_mask(X, Y, P) \
+ ((__mmask8) __builtin_ia32_cmppd256_mask ((__v4df)(__m256d)(X), \
+ (__v4df)(__m256d)(Y), (int)(P),\
+ (__mmask8)-1))
+
+#define _mm256_cmp_ps_mask(X, Y, P) \
+ ((__mmask8) __builtin_ia32_cmpps256_mask ((__v8sf)(__m256)(X), \
+ (__v8sf)(__m256)(Y), (int)(P),\
+ (__mmask8)-1))
+
+#define _mm256_mask_cmp_epi64_mask(M, X, Y, P) \
+ ((__mmask8) __builtin_ia32_cmpq256_mask ((__v4di)(__m256i)(X), \
+ (__v4di)(__m256i)(Y), (int)(P),\
+ (__mmask8)(M)))
+
+#define _mm256_mask_cmp_epi32_mask(M, X, Y, P) \
+ ((__mmask8) __builtin_ia32_cmpd256_mask ((__v8si)(__m256i)(X), \
+ (__v8si)(__m256i)(Y), (int)(P),\
+ (__mmask8)(M)))
+
+#define _mm256_mask_cmp_epu64_mask(M, X, Y, P) \
+ ((__mmask8) __builtin_ia32_ucmpq256_mask ((__v4di)(__m256i)(X), \
+ (__v4di)(__m256i)(Y), (int)(P),\
+ (__mmask8)(M)))
+
+#define _mm256_mask_cmp_epu32_mask(M, X, Y, P) \
+ ((__mmask8) __builtin_ia32_ucmpd256_mask ((__v8si)(__m256i)(X), \
+ (__v8si)(__m256i)(Y), (int)(P),\
+ (__mmask8)(M)))
+
+#define _mm256_mask_cmp_pd_mask(M, X, Y, P) \
+ ((__mmask8) __builtin_ia32_cmppd256_mask ((__v4df)(__m256d)(X), \
+ (__v4df)(__m256d)(Y), (int)(P),\
+ (__mmask8)(M)))
+
+#define _mm256_mask_cmp_ps_mask(M, X, Y, P) \
+ ((__mmask8) __builtin_ia32_cmpps256_mask ((__v8sf)(__m256)(X), \
+ (__v8sf)(__m256)(Y), (int)(P),\
+ (__mmask8)(M)))
+
+#define _mm_cmp_epi64_mask(X, Y, P) \
+ ((__mmask8) __builtin_ia32_cmpq128_mask ((__v2di)(__m128i)(X), \
+ (__v2di)(__m128i)(Y), (int)(P),\
+ (__mmask8)-1))
+
+#define _mm_cmp_epi32_mask(X, Y, P) \
+ ((__mmask8) __builtin_ia32_cmpd128_mask ((__v4si)(__m128i)(X), \
+ (__v4si)(__m128i)(Y), (int)(P),\
+ (__mmask8)-1))
+
+#define _mm_cmp_epu64_mask(X, Y, P) \
+ ((__mmask8) __builtin_ia32_ucmpq128_mask ((__v2di)(__m128i)(X), \
+ (__v2di)(__m128i)(Y), (int)(P),\
+ (__mmask8)-1))
+
+#define _mm_cmp_epu32_mask(X, Y, P) \
+ ((__mmask8) __builtin_ia32_ucmpd128_mask ((__v4si)(__m128i)(X), \
+ (__v4si)(__m128i)(Y), (int)(P),\
+ (__mmask8)-1))
+
+#define _mm_cmp_pd_mask(X, Y, P) \
+ ((__mmask8) __builtin_ia32_cmppd128_mask ((__v2df)(__m128d)(X), \
+ (__v2df)(__m128d)(Y), (int)(P),\
+ (__mmask8)-1))
+
+#define _mm_cmp_ps_mask(X, Y, P) \
+ ((__mmask8) __builtin_ia32_cmpps128_mask ((__v4sf)(__m128)(X), \
+ (__v4sf)(__m128)(Y), (int)(P),\
+ (__mmask8)-1))
+
+#define _mm_mask_cmp_epi64_mask(M, X, Y, P) \
+ ((__mmask8) __builtin_ia32_cmpq128_mask ((__v2di)(__m128i)(X), \
+ (__v2di)(__m128i)(Y), (int)(P),\
+ (__mmask8)(M)))
+
+#define _mm_mask_cmp_epi32_mask(M, X, Y, P) \
+ ((__mmask8) __builtin_ia32_cmpd128_mask ((__v4si)(__m128i)(X), \
+ (__v4si)(__m128i)(Y), (int)(P),\
+ (__mmask8)(M)))
+
+#define _mm_mask_cmp_epu64_mask(M, X, Y, P) \
+ ((__mmask8) __builtin_ia32_ucmpq128_mask ((__v2di)(__m128i)(X), \
+ (__v2di)(__m128i)(Y), (int)(P),\
+ (__mmask8)(M)))
+
+#define _mm_mask_cmp_epu32_mask(M, X, Y, P) \
+ ((__mmask8) __builtin_ia32_ucmpd128_mask ((__v4si)(__m128i)(X), \
+ (__v4si)(__m128i)(Y), (int)(P),\
+ (__mmask8)(M)))
+
+#define _mm_mask_cmp_pd_mask(M, X, Y, P) \
+ ((__mmask8) __builtin_ia32_cmppd128_mask ((__v2df)(__m128d)(X), \
+ (__v2df)(__m128d)(Y), (int)(P),\
+ (__mmask8)(M)))
+
+#define _mm_mask_cmp_ps_mask(M, X, Y, P) \
+ ((__mmask8) __builtin_ia32_cmpps128_mask ((__v4sf)(__m128)(X), \
+ (__v4sf)(__m128)(Y), (int)(P),\
+ (__mmask8)(M)))
+
+#endif
+
+#define _mm256_mask_ceil_ps(A, B, C) _mm256_mask_roundscale_ps((A), (B), (C), _MM_FROUND_CEIL)
+#define _mm256_mask_floor_ps(A, B, C) _mm256_mask_roundscale_ps((A), (B), (C), _MM_FROUND_FLOOR)
+#define _mm256_mask_ceil_pd(A, B, C) _mm256_mask_roundscale_pd((A), (B), (C), _MM_FROUND_CEIL)
+#define _mm256_mask_floor_pd(A, B, C) _mm256_mask_roundscale_pd((A), (B), (C), _MM_FROUND_FLOOR)
+#define _mm256_maskz_ceil_ps(A, B) _mm256_maskz_roundscale_ps((A), (B), _MM_FROUND_CEIL)
+#define _mm256_maskz_floor_ps(A, B) _mm256_maskz_roundscale_ps((A), (B), _MM_FROUND_FLOOR)
+#define _mm256_maskz_ceil_pd(A, B) _mm256_maskz_roundscale_pd((A), (B), _MM_FROUND_CEIL)
+#define _mm256_maskz_floor_pd(A, B) _mm256_maskz_roundscale_pd((A), (B), _MM_FROUND_FLOOR)
+#define _mm_mask_ceil_ps(A, B, C) _mm_mask_roundscale_ps((A), (B), (C), _MM_FROUND_CEIL)
+#define _mm_mask_floor_ps(A, B, C) _mm_mask_roundscale_ps((A), (B), (C), _MM_FROUND_FLOOR)
+#define _mm_mask_ceil_pd(A, B, C) _mm_mask_roundscale_pd((A), (B), (C), _MM_FROUND_CEIL)
+#define _mm_mask_floor_pd(A, B, C) _mm_mask_roundscale_pd((A), (B), (C), _MM_FROUND_FLOOR)
+#define _mm_maskz_ceil_ps(A, B) _mm_maskz_roundscale_ps((A), (B), _MM_FROUND_CEIL)
+#define _mm_maskz_floor_ps(A, B) _mm_maskz_roundscale_ps((A), (B), _MM_FROUND_FLOOR)
+#define _mm_maskz_ceil_pd(A, B) _mm_maskz_roundscale_pd((A), (B), _MM_FROUND_CEIL)
+#define _mm_maskz_floor_pd(A, B) _mm_maskz_roundscale_pd((A), (B), _MM_FROUND_FLOOR)
+#define _mm256_permutexvar_ps(A, B) _mm256_permutevar8x32_ps((B), (A))
+
+#ifdef __DISABLE_AVX512VL__
+#undef __DISABLE_AVX512VL__
+#pragma GCC pop_options
+#endif /* __DISABLE_AVX512VL__ */
+
+#endif /* _AVX512VLINTRIN_H_INCLUDED */
diff --git a/gcc/config/i386/cpuid.h b/gcc/config/i386/cpuid.h
index 7ac22a1683d..a191bd4fd8d 100644
--- a/gcc/config/i386/cpuid.h
+++ b/gcc/config/i386/cpuid.h
@@ -73,6 +73,7 @@
#define bit_BMI2 (1 << 8)
#define bit_RTM (1 << 11)
#define bit_AVX512F (1 << 16)
+#define bit_AVX512DQ (1 << 17)
#define bit_RDSEED (1 << 18)
#define bit_ADX (1 << 19)
#define bit_CLFLUSHOPT (1 << 23)
@@ -80,6 +81,8 @@
#define bit_AVX512ER (1 << 27)
#define bit_AVX512CD (1 << 28)
#define bit_SHA (1 << 29)
+#define bit_AVX512BW (1 << 30)
+#define bit_AVX512VL (1 << 31)
/* %ecx */
#define bit_PREFETCHWT1 (1 << 0)
diff --git a/gcc/config/i386/driver-i386.c b/gcc/config/i386/driver-i386.c
index 4cd0b3d2e4b..4935dc63bf0 100644
--- a/gcc/config/i386/driver-i386.c
+++ b/gcc/config/i386/driver-i386.c
@@ -411,6 +411,7 @@ const char *host_detect_local_cpu (int argc, const char **argv)
unsigned int has_avx512er = 0, has_avx512pf = 0, has_avx512cd = 0;
unsigned int has_avx512f = 0, has_sha = 0, has_prefetchwt1 = 0;
unsigned int has_clflushopt = 0, has_xsavec = 0, has_xsaves = 0;
+ unsigned int has_avx512dq = 0, has_avx512bw = 0, has_avx512vl = 0;
bool arch;
@@ -488,6 +489,9 @@ const char *host_detect_local_cpu (int argc, const char **argv)
has_avx512cd = ebx & bit_AVX512CD;
has_sha = ebx & bit_SHA;
has_clflushopt = ebx & bit_CLFLUSHOPT;
+ has_avx512dq = ebx & bit_AVX512DQ;
+ has_avx512bw = ebx & bit_AVX512BW;
+ has_avx512vl = ebx & bit_AVX512VL;
has_prefetchwt1 = ecx & bit_PREFETCHWT1;
}
@@ -900,6 +904,9 @@ const char *host_detect_local_cpu (int argc, const char **argv)
const char *clflushopt = has_clflushopt ? " -mclflushopt" : " -mno-clflushopt";
const char *xsavec = has_xsavec ? " -mxsavec" : " -mno-xsavec";
const char *xsaves = has_xsaves ? " -mxsaves" : " -mno-xsaves";
+ const char *avx512dq = has_avx512dq ? " -mavx512dq" : " -mno-avx512dq";
+ const char *avx512bw = has_avx512bw ? " -mavx512bw" : " -mno-avx512bw";
+ const char *avx512vl = has_avx512vl ? " -mavx512vl" : " -mno-avx512vl";
options = concat (options, mmx, mmx3dnow, sse, sse2, sse3, ssse3,
sse4a, cx16, sahf, movbe, aes, sha, pclmul,
@@ -908,7 +915,8 @@ const char *host_detect_local_cpu (int argc, const char **argv)
hle, rdrnd, f16c, fsgsbase, rdseed, prfchw, adx,
fxsr, xsave, xsaveopt, avx512f, avx512er,
avx512cd, avx512pf, prefetchwt1, clflushopt,
- xsavec, xsaves, NULL);
+ xsavec, xsaves, avx512dq, avx512bw, avx512vl,
+ NULL);
}
done:
diff --git a/gcc/config/i386/i386-builtin-types.def b/gcc/config/i386/i386-builtin-types.def
index 35c0035a0e6..08a6c90941c 100644
--- a/gcc/config/i386/i386-builtin-types.def
+++ b/gcc/config/i386/i386-builtin-types.def
@@ -56,6 +56,7 @@ DEF_PRIMITIVE_TYPE (UHI, unsigned_intHI_type_node)
DEF_PRIMITIVE_TYPE (USI, unsigned_intSI_type_node)
DEF_PRIMITIVE_TYPE (UDI, long_long_unsigned_type_node)
# ??? Some of the types below should use the mode types above.
+DEF_PRIMITIVE_TYPE (SHORT, short_integer_type_node)
DEF_PRIMITIVE_TYPE (USHORT, short_unsigned_type_node)
DEF_PRIMITIVE_TYPE (INT, integer_type_node)
DEF_PRIMITIVE_TYPE (UINT, unsigned_type_node)
@@ -107,7 +108,14 @@ DEF_VECTOR_TYPE (V16SF, FLOAT)
DEF_VECTOR_TYPE (V8DF, DOUBLE)
DEF_VECTOR_TYPE (V8DI, DI)
DEF_VECTOR_TYPE (V16SI, SI)
+DEF_VECTOR_TYPE (V32HI, HI)
DEF_VECTOR_TYPE (V64QI, QI)
+DEF_VECTOR_TYPE (V12QI, QI)
+DEF_VECTOR_TYPE (V14QI, QI)
+DEF_VECTOR_TYPE (V32SI, SI)
+DEF_VECTOR_TYPE (V8UDI, UDI, V8DI)
+DEF_VECTOR_TYPE (V16USI, USI, V16SI)
+DEF_VECTOR_TYPE (V32UHI, UHI, V32HI)
DEF_POINTER_TYPE (PCCHAR, CHAR, CONST)
DEF_POINTER_TYPE (PCDOUBLE, DOUBLE, CONST)
@@ -119,6 +127,7 @@ DEF_POINTER_TYPE (PCVOID, VOID, CONST)
DEF_POINTER_TYPE (PVOID, VOID)
DEF_POINTER_TYPE (PDOUBLE, DOUBLE)
DEF_POINTER_TYPE (PFLOAT, FLOAT)
+DEF_POINTER_TYPE (PSHORT, SHORT)
DEF_POINTER_TYPE (PUSHORT, USHORT)
DEF_POINTER_TYPE (PINT, INT)
DEF_POINTER_TYPE (PLONGLONG, LONGLONG)
@@ -142,6 +151,9 @@ DEF_POINTER_TYPE (PV16QI, V16QI)
DEF_POINTER_TYPE (PV16HI, V16HI)
DEF_POINTER_TYPE (PV16SI, V16SI)
DEF_POINTER_TYPE (PV16SF, V16SF)
+DEF_POINTER_TYPE (PV32QI, V32QI)
+DEF_POINTER_TYPE (PV32HI, V32HI)
+DEF_POINTER_TYPE (PV64QI, V64QI)
DEF_POINTER_TYPE (PCV2SI, V2SI, CONST)
DEF_POINTER_TYPE (PCV2DF, V2DF, CONST)
@@ -155,9 +167,15 @@ DEF_POINTER_TYPE (PCV16SF, V16SF, CONST)
DEF_POINTER_TYPE (PCV2DI, V2DI, CONST)
DEF_POINTER_TYPE (PCV4SI, V4SI, CONST)
DEF_POINTER_TYPE (PCV4DI, V4DI, CONST)
+DEF_POINTER_TYPE (PCV8HI, V8HI, CONST)
DEF_POINTER_TYPE (PCV8SI, V8SI, CONST)
DEF_POINTER_TYPE (PCV8DI, V8DI, CONST)
+DEF_POINTER_TYPE (PCV16QI, V16QI, CONST)
+DEF_POINTER_TYPE (PCV16HI, V16HI, CONST)
DEF_POINTER_TYPE (PCV16SI, V16SI, CONST)
+DEF_POINTER_TYPE (PCV32QI, V32QI, CONST)
+DEF_POINTER_TYPE (PCV32HI, V32HI, CONST)
+DEF_POINTER_TYPE (PCV64QI, V64QI, CONST)
DEF_FUNCTION_TYPE (FLOAT128)
DEF_FUNCTION_TYPE (UINT64)
@@ -217,12 +235,15 @@ DEF_FUNCTION_TYPE (V8DF, V8DF)
DEF_FUNCTION_TYPE (V4HI, V4HI)
DEF_FUNCTION_TYPE (V4SF, PCFLOAT)
DEF_FUNCTION_TYPE (V4SF, V2DF)
+DEF_FUNCTION_TYPE (V4SF, V2DF, V4SF, QI)
DEF_FUNCTION_TYPE (V4SF, V4DF)
+DEF_FUNCTION_TYPE (V4SF, V4DF, V4SF, QI)
DEF_FUNCTION_TYPE (V4SF, V4SF)
DEF_FUNCTION_TYPE (V4SF, PCV4SF)
DEF_FUNCTION_TYPE (V4SF, V4SI)
DEF_FUNCTION_TYPE (V4SF, V8SF)
DEF_FUNCTION_TYPE (V4SF, V8HI)
+DEF_FUNCTION_TYPE (V4SF, V8HI, V4SF, QI)
DEF_FUNCTION_TYPE (V4SI, V16QI)
DEF_FUNCTION_TYPE (V4SI, V2DF)
DEF_FUNCTION_TYPE (V4SI, V4DF)
@@ -241,6 +262,7 @@ DEF_FUNCTION_TYPE (V8SF, V4SF)
DEF_FUNCTION_TYPE (V8SF, V8SF)
DEF_FUNCTION_TYPE (V8SF, V8SI)
DEF_FUNCTION_TYPE (V8SF, V8HI)
+DEF_FUNCTION_TYPE (V8SF, V8HI, V8SF, QI)
DEF_FUNCTION_TYPE (V16SF, V16SF)
DEF_FUNCTION_TYPE (V8SI, V8DI)
DEF_FUNCTION_TYPE (V8SI, V4SI)
@@ -251,6 +273,9 @@ DEF_FUNCTION_TYPE (V32QI, V32QI)
DEF_FUNCTION_TYPE (V32QI, V16QI)
DEF_FUNCTION_TYPE (V16HI, V16SI)
DEF_FUNCTION_TYPE (V16HI, V16HI)
+DEF_FUNCTION_TYPE (V16SF, V16HI)
+DEF_FUNCTION_TYPE (V16SF, V16HI, V16SF, HI)
+DEF_FUNCTION_TYPE (V16SF, V16SI)
DEF_FUNCTION_TYPE (V16HI, V8HI)
DEF_FUNCTION_TYPE (V8SI, V8SI)
DEF_FUNCTION_TYPE (VOID, PCVOID)
@@ -266,13 +291,16 @@ DEF_FUNCTION_TYPE (V4DI, V16QI)
DEF_FUNCTION_TYPE (V8SI, V8HI)
DEF_FUNCTION_TYPE (V4DI, V8HI)
DEF_FUNCTION_TYPE (V4DI, V4SI)
+DEF_FUNCTION_TYPE (V4DI, V4DI)
DEF_FUNCTION_TYPE (V4DI, PV4DI)
DEF_FUNCTION_TYPE (V4DI, V2DI)
+DEF_FUNCTION_TYPE (V16SI, V16SF)
DEF_FUNCTION_TYPE (V16SF, FLOAT)
DEF_FUNCTION_TYPE (V16SI, INT)
DEF_FUNCTION_TYPE (V8DF, DOUBLE)
DEF_FUNCTION_TYPE (V8DI, INT64)
DEF_FUNCTION_TYPE (V16SF, V4SF)
+DEF_FUNCTION_TYPE (V16SI, V4SI)
DEF_FUNCTION_TYPE (V8DF, V4DF)
DEF_FUNCTION_TYPE (V8DI, V4DI)
DEF_FUNCTION_TYPE (V16QI, V8DI)
@@ -286,10 +314,38 @@ DEF_FUNCTION_TYPE (V8DI, V8DI)
DEF_FUNCTION_TYPE (V8DI, V8DI, V8DI, QI)
DEF_FUNCTION_TYPE (V16SI, PV4SI)
DEF_FUNCTION_TYPE (V16SF, PV4SF)
+DEF_FUNCTION_TYPE (V8DI, PV2DI)
+DEF_FUNCTION_TYPE (V8DF, PV2DF)
+DEF_FUNCTION_TYPE (V4DI, PV2DI)
+DEF_FUNCTION_TYPE (V4DF, PV2DF)
+DEF_FUNCTION_TYPE (V16SI, PV2SI)
+DEF_FUNCTION_TYPE (V16SF, PV2SF)
DEF_FUNCTION_TYPE (V8DI, PV4DI)
DEF_FUNCTION_TYPE (V8DF, PV4DF)
+DEF_FUNCTION_TYPE (V8SF, FLOAT)
+DEF_FUNCTION_TYPE (V4SF, FLOAT)
+DEF_FUNCTION_TYPE (V4DF, DOUBLE)
+DEF_FUNCTION_TYPE (V8SF, PV4SF)
+DEF_FUNCTION_TYPE (V8SI, PV4SI)
+DEF_FUNCTION_TYPE (V4SI, PV2SI)
+DEF_FUNCTION_TYPE (V8SF, PV2SF)
+DEF_FUNCTION_TYPE (V8SI, PV2SI)
+DEF_FUNCTION_TYPE (V16SF, PV8SF)
+DEF_FUNCTION_TYPE (V16SI, PV8SI)
+DEF_FUNCTION_TYPE (V8DI, V8SF)
+DEF_FUNCTION_TYPE (V4DI, V4SF)
+DEF_FUNCTION_TYPE (V2DI, V4SF)
+DEF_FUNCTION_TYPE (V64QI, QI)
+DEF_FUNCTION_TYPE (V32HI, HI)
DEF_FUNCTION_TYPE (V8UHI, V8UHI)
+DEF_FUNCTION_TYPE (V16UHI, V16UHI)
+DEF_FUNCTION_TYPE (V32UHI, V32UHI)
+DEF_FUNCTION_TYPE (V2UDI, V2UDI)
+DEF_FUNCTION_TYPE (V4UDI, V4UDI)
+DEF_FUNCTION_TYPE (V8UDI, V8UDI)
+DEF_FUNCTION_TYPE (V4USI, V4USI)
DEF_FUNCTION_TYPE (V8USI, V8USI)
+DEF_FUNCTION_TYPE (V16USI, V16USI)
DEF_FUNCTION_TYPE (V8DI, PV8DI)
DEF_FUNCTION_TYPE (DI, V2DI, INT)
@@ -332,9 +388,13 @@ DEF_FUNCTION_TYPE (V2DF, V2DF, V2DF)
DEF_FUNCTION_TYPE (V2DF, V2DF, V2DI)
DEF_FUNCTION_TYPE (V2DF, V2DF, V4SF)
DEF_FUNCTION_TYPE (V2DF, V4DF, INT)
+DEF_FUNCTION_TYPE (V2DF, V4DF, INT, V2DF, QI)
+DEF_FUNCTION_TYPE (V2DF, V8DF, INT)
+DEF_FUNCTION_TYPE (V2DF, V8DF, INT, V2DF, QI)
DEF_FUNCTION_TYPE (V2DI, V16QI, V16QI)
DEF_FUNCTION_TYPE (V2DI, V2DF, V2DF)
DEF_FUNCTION_TYPE (V2DI, V2DI, INT)
+DEF_FUNCTION_TYPE (V2DI, V2DI, INT, V2DI, QI)
DEF_FUNCTION_TYPE (V2DI, V2DI, SI)
DEF_FUNCTION_TYPE (V2DI, V2DI, V16QI)
DEF_FUNCTION_TYPE (V2DI, V2DI, V2DI)
@@ -349,6 +409,7 @@ DEF_FUNCTION_TYPE (V2SI, V2SI, V2SI)
DEF_FUNCTION_TYPE (V2SI, V4HI, V4HI)
DEF_FUNCTION_TYPE (V4DF, PCV4DF, V4DI)
DEF_FUNCTION_TYPE (V4DF, V4DF, INT)
+DEF_FUNCTION_TYPE (V8DF, V8DF, INT)
DEF_FUNCTION_TYPE (V4DF, V8DF, INT)
DEF_FUNCTION_TYPE (V4DF, V8DF, INT, V4DF, QI)
DEF_FUNCTION_TYPE (V4DF, V4DF, V4DF)
@@ -373,25 +434,36 @@ DEF_FUNCTION_TYPE (V4SF, V4SF, V2SI)
DEF_FUNCTION_TYPE (V4SF, V4SF, V4SF)
DEF_FUNCTION_TYPE (V4SF, V4SF, V4SI)
DEF_FUNCTION_TYPE (V4SF, V8SF, INT)
+DEF_FUNCTION_TYPE (V4SF, V8SF, INT, V4SF, QI)
DEF_FUNCTION_TYPE (V4SI, V2DF, V2DF)
+DEF_FUNCTION_TYPE (V4SI, V2DF, V4SI, QI)
DEF_FUNCTION_TYPE (V4SI, V4SF, V4SF)
DEF_FUNCTION_TYPE (V4SI, V4SI, INT)
+DEF_FUNCTION_TYPE (V4SI, V4SI, INT, V4SI, QI)
DEF_FUNCTION_TYPE (V4SI, V4SI, SI)
DEF_FUNCTION_TYPE (V4SI, V4SI, V4SI)
DEF_FUNCTION_TYPE (V4SI, V8HI, V8HI)
+DEF_FUNCTION_TYPE (V4SI, V8HI, V8HI, V4SI, QI)
DEF_FUNCTION_TYPE (V4SI, V8SI, INT)
+DEF_FUNCTION_TYPE (V4SI, V8SI, INT, V4SI, QI)
DEF_FUNCTION_TYPE (V4SI, PCV4SI, V4SI)
DEF_FUNCTION_TYPE (V8HI, V16QI, V16QI)
+DEF_FUNCTION_TYPE (V8HI, V16QI, V16QI, V8HI, QI)
DEF_FUNCTION_TYPE (V8HI, V4SI, V4SI)
DEF_FUNCTION_TYPE (V8HI, V8HI, INT)
+DEF_FUNCTION_TYPE (V8DI, V8DI, INT)
DEF_FUNCTION_TYPE (V8HI, V8HI, SI)
DEF_FUNCTION_TYPE (V8HI, V8HI, V8HI)
DEF_FUNCTION_TYPE (V8HI, V8SF, INT)
+DEF_FUNCTION_TYPE (V8HI, V8SF, INT, V8HI, QI)
DEF_FUNCTION_TYPE (V8HI, V4SF, INT)
+DEF_FUNCTION_TYPE (V8HI, V4SF, INT, V8HI, QI)
DEF_FUNCTION_TYPE (V8QI, V4HI, V4HI)
DEF_FUNCTION_TYPE (V8QI, V8QI, V8QI)
DEF_FUNCTION_TYPE (V8SF, PCV8SF, V8SI)
DEF_FUNCTION_TYPE (V8SF, V8SF, INT)
+DEF_FUNCTION_TYPE (V8SF, V16SF, INT)
+DEF_FUNCTION_TYPE (V8SF, V16SF, INT, V8SF, QI)
DEF_FUNCTION_TYPE (V16SF, V16SF, INT)
DEF_FUNCTION_TYPE (V4SF, V16SF, INT)
DEF_FUNCTION_TYPE (V4SF, V16SF, INT, V4SF, QI)
@@ -400,29 +472,46 @@ DEF_FUNCTION_TYPE (V16SF, V16SF, V16SF)
DEF_FUNCTION_TYPE (V8SF, V8SF, V8SI)
DEF_FUNCTION_TYPE (V16SF, V16SF, V16SI)
DEF_FUNCTION_TYPE (V32QI, V16HI, V16HI)
+DEF_FUNCTION_TYPE (V64QI, V32HI, V32HI)
DEF_FUNCTION_TYPE (V16HI, V8SI, V8SI)
+DEF_FUNCTION_TYPE (V32HI, V16SI, V16SI)
+DEF_FUNCTION_TYPE (V8DF, V8DF, V2DF, INT)
+DEF_FUNCTION_TYPE (V8DF, V8DF, V2DF, INT, V8DF, QI)
DEF_FUNCTION_TYPE (V8DF, V8DF, V4DF, INT, V8DF, QI)
+DEF_FUNCTION_TYPE (V8DF, V8DF, V8DF, INT)
DEF_FUNCTION_TYPE (V8DF, V8DF, V8DF, INT, V8DF, QI)
+DEF_FUNCTION_TYPE (V8DF, V8DF, V8DF, INT, V8DF, QI, INT)
DEF_FUNCTION_TYPE (V8DF, V8DF, INT, V8DF, QI)
+DEF_FUNCTION_TYPE (V8DF, V8DF, V8DF, V8DI, INT)
+DEF_FUNCTION_TYPE (V8DF, V8DF, V8DF, V8DI, INT, QI)
+DEF_FUNCTION_TYPE (V4DF, V4DF, V4DF, V4DI, INT, QI)
+DEF_FUNCTION_TYPE (V2DF, V2DF, V2DF, V2DI, INT, QI)
DEF_FUNCTION_TYPE (V8DF, V8DF, V8DF, V8DI, INT, QI, INT)
DEF_FUNCTION_TYPE (V8DF, V8DF, V8DF)
DEF_FUNCTION_TYPE (V16SF, V16SF, V16SF, INT)
DEF_FUNCTION_TYPE (V16SF, V16SF, V16SF, INT, V16SF, HI)
+DEF_FUNCTION_TYPE (V16SF, V16SF, V16SF, INT, V16SF, HI, INT)
DEF_FUNCTION_TYPE (V16SF, V16SF, INT, V16SF, HI)
DEF_FUNCTION_TYPE (V16SI, V16SI, V4SI, INT, V16SI, HI)
DEF_FUNCTION_TYPE (V16SF, V16SF, V16SF, V16SI, INT)
DEF_FUNCTION_TYPE (V16SF, V16SF, V16SF, V16SI, INT, HI)
DEF_FUNCTION_TYPE (V16SF, V16SF, V16SF, V16SI, INT, HI, INT)
+DEF_FUNCTION_TYPE (V8SF, V8SF, V8SF, V8SI, INT, QI)
DEF_FUNCTION_TYPE (V4SF, V4SF, V4SF, V4SI, INT, QI)
DEF_FUNCTION_TYPE (V4SF, V4SF, V4SF, V4SI, INT, QI, INT)
-DEF_FUNCTION_TYPE (V2DF, V2DF, V2DF, V2DI, INT, QI)
DEF_FUNCTION_TYPE (V2DF, V2DF, V2DF, V2DI, INT, QI, INT)
DEF_FUNCTION_TYPE (V16SF, V16SF, V4SF, INT)
DEF_FUNCTION_TYPE (V16SF, V16SF, V4SF, INT, V16SF, HI)
+DEF_FUNCTION_TYPE (V16SF, V16SF, V8SF, INT, V16SF, HI)
DEF_FUNCTION_TYPE (V32QI, V32QI, V32QI)
DEF_FUNCTION_TYPE (V16HI, V32QI, V32QI)
+DEF_FUNCTION_TYPE (V32HI, V64QI, V64QI)
+DEF_FUNCTION_TYPE (V16HI, V32QI, V32QI, V16HI, HI)
+DEF_FUNCTION_TYPE (V32HI, V64QI, V64QI, V32HI, SI)
DEF_FUNCTION_TYPE (V16HI, V16HI, V8HI)
DEF_FUNCTION_TYPE (V16HI, V16HI, V16HI)
+DEF_FUNCTION_TYPE (V32HI, V32HI, V32HI)
+DEF_FUNCTION_TYPE (V32HI, V32HI, INT)
DEF_FUNCTION_TYPE (V16HI, V16HI, INT)
DEF_FUNCTION_TYPE (V16HI, V16SF, INT)
DEF_FUNCTION_TYPE (V16HI, V16SF, INT, V16HI, HI)
@@ -434,13 +523,20 @@ DEF_FUNCTION_TYPE (V8SI, V4DF, V4DF)
DEF_FUNCTION_TYPE (V8SI, V8SI, V4SI)
DEF_FUNCTION_TYPE (V16SI, V16SI, V4SI)
DEF_FUNCTION_TYPE (V16SI, V16SI, V4SI, INT)
+DEF_FUNCTION_TYPE (V16SI, V16SI, V8SI, INT, V16SI, HI)
DEF_FUNCTION_TYPE (V4SI, V16SI, INT)
DEF_FUNCTION_TYPE (V4SI, V16SI, INT, V4SI, QI)
DEF_FUNCTION_TYPE (V8SI, V8SI, V8SI)
DEF_FUNCTION_TYPE (V16SI, V16SI, V16SI)
DEF_FUNCTION_TYPE (V16SI, V16SI, V16SI, INT, V16SI, HI)
DEF_FUNCTION_TYPE (V8SI, V16HI, V16HI)
+DEF_FUNCTION_TYPE (V16SI, V32HI, V32HI)
+DEF_FUNCTION_TYPE (V8SI, V16HI, V16HI, V8SI, QI)
+DEF_FUNCTION_TYPE (V16SI, V32HI, V32HI, V16SI, HI)
DEF_FUNCTION_TYPE (V8SI, V8SI, INT)
+DEF_FUNCTION_TYPE (V8SI, V8SI, INT, V8SI, QI)
+DEF_FUNCTION_TYPE (V8SI, V16SI, INT)
+DEF_FUNCTION_TYPE (V8SI, V16SI, INT, V8SI, QI)
DEF_FUNCTION_TYPE (V8SI, V8SI, SI)
DEF_FUNCTION_TYPE (V16SI, V16SI, SI)
DEF_FUNCTION_TYPE (V16SI, V16SI, INT)
@@ -451,9 +547,17 @@ DEF_FUNCTION_TYPE (V4DI, V4DI, V4DI)
DEF_FUNCTION_TYPE (V8DI, V8DI, V8DI)
DEF_FUNCTION_TYPE (V16SI, V8DF, V8DF)
DEF_FUNCTION_TYPE (V8DI, V8DI, V8DI, INT, V8DI, QI)
+DEF_FUNCTION_TYPE (V8DI, V8DI, V8DI, INT, V8DI, DI)
+DEF_FUNCTION_TYPE (V4DI, V4DI, V4DI, INT, V4DI, SI)
+DEF_FUNCTION_TYPE (V2DI, V2DI, V2DI, INT, V2DI, HI)
+DEF_FUNCTION_TYPE (V8DI, V8DI, V4DI, INT)
+DEF_FUNCTION_TYPE (V8DI, V8DI, V2DI, INT)
+DEF_FUNCTION_TYPE (V8DI, V8DI, V2DI, INT, V8DI, QI)
DEF_FUNCTION_TYPE (V8DI, V8DI, V4DI, INT, V8DI, QI)
DEF_FUNCTION_TYPE (V4DI, V8SI, V8SI)
DEF_FUNCTION_TYPE (V4UDI, V8USI, V8USI)
+DEF_FUNCTION_TYPE (V8DI, V16SI, V16SI)
+DEF_FUNCTION_TYPE (V8DI, V64QI, V64QI)
DEF_FUNCTION_TYPE (V4DI, V4DI, V2DI)
DEF_FUNCTION_TYPE (V8DI, V8DI, V2DI)
DEF_FUNCTION_TYPE (V4DI, PCV4DI, V4DI)
@@ -462,8 +566,12 @@ DEF_FUNCTION_TYPE (V4DI, V8DI, INT, V4DI, QI)
DEF_FUNCTION_TYPE (V8DI, V8DI, V2DI, V8DI, QI)
DEF_FUNCTION_TYPE (V8DI, V8DI, INT, V8DI, QI)
DEF_FUNCTION_TYPE (V4DI, V4DI, INT)
+DEF_FUNCTION_TYPE (V4DI, V4DI, INT, V4DI, QI)
DEF_FUNCTION_TYPE (V2DI, V4DI, INT)
DEF_FUNCTION_TYPE (VOID, PVOID, INT64)
+DEF_FUNCTION_TYPE (V2DI, V4DI, INT, V2DI, QI)
+DEF_FUNCTION_TYPE (V2DI, V8DI, INT)
+DEF_FUNCTION_TYPE (V2DI, V8DI, INT, V2DI, QI)
DEF_FUNCTION_TYPE (VOID, PCHAR, V16QI)
DEF_FUNCTION_TYPE (VOID, PCHAR, V32QI)
DEF_FUNCTION_TYPE (VOID, PDOUBLE, V2DF)
@@ -485,9 +593,72 @@ DEF_FUNCTION_TYPE (VOID, UNSIGNED, UNSIGNED)
DEF_FUNCTION_TYPE (VOID, PV8DI, V8DI)
# Instructions returning mask
+DEF_FUNCTION_TYPE (QI, QI)
DEF_FUNCTION_TYPE (HI, HI)
+DEF_FUNCTION_TYPE (SI, SI)
+DEF_FUNCTION_TYPE (DI, DI)
+DEF_FUNCTION_TYPE (HI, V16QI)
+DEF_FUNCTION_TYPE (SI, V32QI)
+DEF_FUNCTION_TYPE (DI, V64QI)
+DEF_FUNCTION_TYPE (QI, V8HI)
+DEF_FUNCTION_TYPE (HI, V16HI)
+DEF_FUNCTION_TYPE (SI, V32HI)
+DEF_FUNCTION_TYPE (QI, V4SI)
+DEF_FUNCTION_TYPE (QI, V8SI)
+DEF_FUNCTION_TYPE (HI, V16SI)
+DEF_FUNCTION_TYPE (QI, V2DI)
+DEF_FUNCTION_TYPE (QI, V4DI)
+DEF_FUNCTION_TYPE (QI, V8DI)
+DEF_FUNCTION_TYPE (V16QI, HI)
+DEF_FUNCTION_TYPE (V32QI, SI)
+DEF_FUNCTION_TYPE (V64QI, DI)
+DEF_FUNCTION_TYPE (V8HI, QI)
+DEF_FUNCTION_TYPE (V16HI, HI)
+DEF_FUNCTION_TYPE (V32HI, SI)
+DEF_FUNCTION_TYPE (V4SI, QI)
+DEF_FUNCTION_TYPE (V4SI, HI)
+DEF_FUNCTION_TYPE (V8SI, QI)
+DEF_FUNCTION_TYPE (V8SI, HI)
+DEF_FUNCTION_TYPE (V2DI, QI)
+DEF_FUNCTION_TYPE (V4DI, QI)
+DEF_FUNCTION_TYPE (QI, QI, QI)
DEF_FUNCTION_TYPE (HI, HI, HI)
+DEF_FUNCTION_TYPE (SI, SI, SI)
+DEF_FUNCTION_TYPE (DI, DI, DI)
+DEF_FUNCTION_TYPE (QI, QI, INT)
DEF_FUNCTION_TYPE (HI, HI, INT)
+DEF_FUNCTION_TYPE (SI, SI, INT)
+DEF_FUNCTION_TYPE (DI, DI, INT)
+DEF_FUNCTION_TYPE (HI, V16QI, V16QI)
+DEF_FUNCTION_TYPE (HI, V16QI, V16QI, HI)
+DEF_FUNCTION_TYPE (HI, V16QI, V16QI, INT, HI)
+DEF_FUNCTION_TYPE (SI, V32QI, V32QI)
+DEF_FUNCTION_TYPE (SI, V32QI, V32QI, SI)
+DEF_FUNCTION_TYPE (SI, V32QI, V32QI, INT, SI)
+DEF_FUNCTION_TYPE (DI, V64QI, V64QI)
+DEF_FUNCTION_TYPE (DI, V64QI, V64QI, DI)
+DEF_FUNCTION_TYPE (DI, V64QI, V64QI, INT, DI)
+DEF_FUNCTION_TYPE (QI, V8HI, V8HI)
+DEF_FUNCTION_TYPE (QI, V8HI, V8HI, QI)
+DEF_FUNCTION_TYPE (QI, V8HI, V8HI, INT, QI)
+DEF_FUNCTION_TYPE (HI, V16HI, V16HI)
+DEF_FUNCTION_TYPE (HI, V16HI, V16HI, HI)
+DEF_FUNCTION_TYPE (HI, V16HI, V16HI, INT, HI)
+DEF_FUNCTION_TYPE (SI, V32HI, V32HI)
+DEF_FUNCTION_TYPE (SI, V32HI, V32HI, SI)
+DEF_FUNCTION_TYPE (SI, V32HI, V32HI, INT, SI)
+DEF_FUNCTION_TYPE (QI, V4SI, V4SI)
+DEF_FUNCTION_TYPE (QI, V4SI, V4SI, QI)
+DEF_FUNCTION_TYPE (QI, V4SI, V4SI, INT, QI)
+DEF_FUNCTION_TYPE (QI, V8SI, V8SI)
+DEF_FUNCTION_TYPE (QI, V8SI, V8SI, QI)
+DEF_FUNCTION_TYPE (QI, V8SI, V8SI, INT, QI)
+DEF_FUNCTION_TYPE (QI, V2DI, V2DI)
+DEF_FUNCTION_TYPE (QI, V2DI, V2DI, QI)
+DEF_FUNCTION_TYPE (QI, V2DI, V2DI, INT, QI)
+DEF_FUNCTION_TYPE (QI, V4DI, V4DI)
+DEF_FUNCTION_TYPE (QI, V4DI, V4DI, QI)
+DEF_FUNCTION_TYPE (QI, V4DI, V4DI, INT, QI)
DEF_FUNCTION_TYPE (QI, V8DI, V8DI)
DEF_FUNCTION_TYPE (QI, V8DI, V8DI, QI)
DEF_FUNCTION_TYPE (HI, V16SI, V16SI)
@@ -514,6 +685,7 @@ DEF_FUNCTION_TYPE (V8DI, QI)
DEF_FUNCTION_TYPE (INT, V16QI, V16QI, INT)
DEF_FUNCTION_TYPE (UCHAR, UINT, UINT, UINT)
DEF_FUNCTION_TYPE (UCHAR, UINT64, UINT, UINT)
+DEF_FUNCTION_TYPE (V32HI, V32HI, V32HI, V32HI)
DEF_FUNCTION_TYPE (V16HI, V16HI, V16HI, V16HI)
DEF_FUNCTION_TYPE (V16QI, V16QI, QI, INT)
DEF_FUNCTION_TYPE (V16QI, V16QI, V16QI, INT)
@@ -521,7 +693,10 @@ DEF_FUNCTION_TYPE (V16QI, V16QI, V16QI, V16QI)
DEF_FUNCTION_TYPE (V1DI, V1DI, V1DI, INT)
DEF_FUNCTION_TYPE (V2DF, V2DF, V2DF, INT)
DEF_FUNCTION_TYPE (V2DF, V2DF, V2DF, INT, INT)
+DEF_FUNCTION_TYPE (V4DF, V4DF, V4DI, INT)
DEF_FUNCTION_TYPE (V2DF, V2DF, V2DF, V2DF)
+DEF_FUNCTION_TYPE (V2DF, V2DI, V2DF, V2DF, QI)
+DEF_FUNCTION_TYPE (V2DF, V2DF, V2DI, V2DF, QI)
DEF_FUNCTION_TYPE (V2DF, V2DF, V2DF, V2DI, INT)
DEF_FUNCTION_TYPE (V2DI, V2DI, DI, INT)
DEF_FUNCTION_TYPE (V2DI, V2DI, UINT, UINT)
@@ -529,9 +704,11 @@ DEF_FUNCTION_TYPE (V2DI, V2DI, V2DI, INT)
DEF_FUNCTION_TYPE (V2DI, V2DI, V2DI, V2DI)
DEF_FUNCTION_TYPE (V32QI, V32QI, V32QI, V32QI)
DEF_FUNCTION_TYPE (V4DF, V4DF, V2DF, INT)
+DEF_FUNCTION_TYPE (V4DF, V4DF, V2DF, INT, V4DF, QI)
DEF_FUNCTION_TYPE (V4DF, V4DF, V4DF, INT)
DEF_FUNCTION_TYPE (V4DF, V4DF, V4DF, V4DF)
DEF_FUNCTION_TYPE (V4DF, V4DF, V4DF, V4DI, INT)
+DEF_FUNCTION_TYPE (V8DI, V8DI, V8DI, INT)
DEF_FUNCTION_TYPE (V4DI, V4DI, V4DI, V4DI)
DEF_FUNCTION_TYPE (V4HI, V4HI, HI, INT)
DEF_FUNCTION_TYPE (V4SF, V4SF, FLOAT, INT)
@@ -540,6 +717,10 @@ DEF_FUNCTION_TYPE (V4SF, V4SF, V4SF, INT, INT)
DEF_FUNCTION_TYPE (V4SF, V4SF, V2DF, INT)
DEF_FUNCTION_TYPE (V2DF, V2DF, V4SF, INT)
DEF_FUNCTION_TYPE (V4SF, V4SF, V4SF, V4SF)
+DEF_FUNCTION_TYPE (V4SF, V4SI, V4SF, V4SF, QI)
+DEF_FUNCTION_TYPE (V4SF, V4SF, V4SI, V4SF, QI)
+DEF_FUNCTION_TYPE (V4SF, V4SF, V4SF, V4SF, QI)
+DEF_FUNCTION_TYPE (V4SF, V4SF, V2DF, V4SF, QI)
DEF_FUNCTION_TYPE (V4SF, V4SF, V4SF, V4SI, INT)
DEF_FUNCTION_TYPE (V4SI, V4SI, SI, INT)
DEF_FUNCTION_TYPE (V4SI, V4SI, V4SI, INT)
@@ -550,76 +731,263 @@ DEF_FUNCTION_TYPE (V8HI, V8HI, V8HI, INT)
DEF_FUNCTION_TYPE (V8HI, V8HI, V8HI, V4SI)
DEF_FUNCTION_TYPE (V8HI, V8HI, V8HI, V8HI)
DEF_FUNCTION_TYPE (V8SF, V8SF, V4SF, INT)
+DEF_FUNCTION_TYPE (V8SF, V8SF, V4SF, INT, V8SF, QI)
DEF_FUNCTION_TYPE (V8SF, V8SF, V8SF, INT)
DEF_FUNCTION_TYPE (V8SF, V8SF, V8SF, V8SF)
DEF_FUNCTION_TYPE (V8SF, V8SF, V8SF, V8SI, INT)
DEF_FUNCTION_TYPE (V8DF, V8DF, V8DF, V8DF)
DEF_FUNCTION_TYPE (V16SF, V16SF, V16SF, V16SF)
DEF_FUNCTION_TYPE (V8SI, V8SI, V4SI, INT)
+DEF_FUNCTION_TYPE (V8SI, V8SI, V4SI, INT, V8SI, QI)
DEF_FUNCTION_TYPE (V8SI, V8SI, V8SI, INT)
DEF_FUNCTION_TYPE (V8SI, V8SI, V8SI, V8SI)
DEF_FUNCTION_TYPE (V4DI, V4DI, V4DI, INT)
DEF_FUNCTION_TYPE (V4DI, V4DI, V2DI, INT)
+DEF_FUNCTION_TYPE (V4DI, V4DI, V2DI, INT, V4DI, QI)
# Instructions with masking
+DEF_FUNCTION_TYPE (V2DF, V2DF, V2DF, QI)
+DEF_FUNCTION_TYPE (V2DF, V4SF, V2DF, QI)
+DEF_FUNCTION_TYPE (V2DF, V4SI, V2DF, QI)
+DEF_FUNCTION_TYPE (V4DF, V4DF, V4DF, QI)
+DEF_FUNCTION_TYPE (V4DF, V4SF, V4DF, QI)
+DEF_FUNCTION_TYPE (V4DF, V4SI, V4DF, QI)
DEF_FUNCTION_TYPE (V8DF, V8DF, V8DF, QI)
DEF_FUNCTION_TYPE (V8DF, V8SF, V8DF, QI)
DEF_FUNCTION_TYPE (V8DF, V8SI, V8DF, QI)
+DEF_FUNCTION_TYPE (V2DI, V4SI, V2DI, QI)
+DEF_FUNCTION_TYPE (V2DI, V8HI, V2DI, QI)
+DEF_FUNCTION_TYPE (V8DI, V8DF, V8DI, QI)
+DEF_FUNCTION_TYPE (V4DI, V4DF, V4DI, QI)
+DEF_FUNCTION_TYPE (V2DI, V2DF, V2DI, QI)
+DEF_FUNCTION_TYPE (V2DI, V2DI, V2DI, V2DI, QI)
+DEF_FUNCTION_TYPE (V2DI, V2DI, V2DI, INT, V2DI, QI)
+DEF_FUNCTION_TYPE (V4DI, V4DI, V4DI, V4DI, QI)
+DEF_FUNCTION_TYPE (V4DI, V4DI, V4DI, INT, V4DI, QI)
DEF_FUNCTION_TYPE (V8DI, V8SI, V8DI, QI)
DEF_FUNCTION_TYPE (V8DI, V8HI, V8DI, QI)
DEF_FUNCTION_TYPE (V8DI, V16QI, V8DI, QI)
+DEF_FUNCTION_TYPE (V2DI, V16QI, V2DI, QI)
+DEF_FUNCTION_TYPE (V4DI, V16QI, V4DI, QI)
+DEF_FUNCTION_TYPE (V4DI, V4DI, V4DI, QI)
+DEF_FUNCTION_TYPE (V4DI, V4SI, V4DI, QI)
+DEF_FUNCTION_TYPE (V4DI, V8HI, V4DI, QI)
DEF_FUNCTION_TYPE (V8DI, V8DI, V8DI, V8DI, QI)
DEF_FUNCTION_TYPE (V8DF, V8DI, V8DF, V8DF)
DEF_FUNCTION_TYPE (V8DF, V8DI, V8DF, V8DF, QI)
DEF_FUNCTION_TYPE (V8DF, V8DF, V8DI, V8DF, QI)
+DEF_FUNCTION_TYPE (V4DF, V4DI, V4DF, V4DF, QI)
+DEF_FUNCTION_TYPE (V4DF, V4DF, V4DI, V4DF, QI)
+DEF_FUNCTION_TYPE (V4DF, V4DF, V4DF, V4DF, QI)
DEF_FUNCTION_TYPE (V8DF, V8DF, V8DF, V8DF, QI)
+DEF_FUNCTION_TYPE (V16QI, V16QI, V16QI, V16QI, HI)
+DEF_FUNCTION_TYPE (V16HI, V16HI, V16HI, V16HI, HI)
DEF_FUNCTION_TYPE (V16SI, V16SI, V16SI, V16SI, HI)
DEF_FUNCTION_TYPE (V2DF, V2DF, V2DF, V2DF, QI)
DEF_FUNCTION_TYPE (V2DF, V2DF, V4SF, V2DF, QI)
+DEF_FUNCTION_TYPE (V32HI, V32HI, V32HI, V32HI, SI)
+DEF_FUNCTION_TYPE (V64QI, V64QI, V64QI, V64QI, DI)
+DEF_FUNCTION_TYPE (V32QI, V32QI, V32QI, V32QI, SI)
+DEF_FUNCTION_TYPE (V8HI, V8HI, V8HI, V8HI, QI)
+DEF_FUNCTION_TYPE (V4SF, V4SF, V4SF, QI)
+DEF_FUNCTION_TYPE (V4SF, V4SI, V4SF, QI)
+DEF_FUNCTION_TYPE (V8SF, V8SF, V8SF, QI)
+DEF_FUNCTION_TYPE (V8SF, V8SI, V8SF, QI)
DEF_FUNCTION_TYPE (V16SF, V16SF, V16SF, HI)
DEF_FUNCTION_TYPE (V16SF, V16SI, V16SF, HI)
+DEF_FUNCTION_TYPE (V4SI, V16QI, V4SI, QI)
+DEF_FUNCTION_TYPE (V4SI, V8HI, V4SI, QI)
+DEF_FUNCTION_TYPE (V8SI, V8SI, V8SI, QI)
+DEF_FUNCTION_TYPE (V8SI, V8HI, V8SI, QI)
+DEF_FUNCTION_TYPE (V8SI, V16QI, V8SI, QI)
+DEF_FUNCTION_TYPE (V4SI, V4SI, V4SI, V4SI, QI)
+DEF_FUNCTION_TYPE (V4SI, V4SI, V4SI, INT, V4SI, QI)
+DEF_FUNCTION_TYPE (V8SF, V8SF, V8SF, V8SF, QI)
+DEF_FUNCTION_TYPE (V8SF, V8SI, V8SF, V8SF, QI)
+DEF_FUNCTION_TYPE (V8SF, V8SF, V8SI, V8SF, QI)
+DEF_FUNCTION_TYPE (V8SI, V8SI, V8SI, V8SI, QI)
+DEF_FUNCTION_TYPE (V8SI, V8SI, V8SI, INT, V8SI, QI)
DEF_FUNCTION_TYPE (V16SF, V16SF, V16SF, V16SF, HI)
DEF_FUNCTION_TYPE (V16SF, V16SI, V16SF, V16SF)
DEF_FUNCTION_TYPE (V16SF, V16SI, V16SF, V16SF, HI)
DEF_FUNCTION_TYPE (V16SF, V16SF, V16SI, V16SF, HI)
-DEF_FUNCTION_TYPE (V4SF, V4SF, V2DF, V4SF, QI)
-DEF_FUNCTION_TYPE (V4SF, V4SF, V4SF, V4SF, QI)
+DEF_FUNCTION_TYPE (V16SF, V8SF, V16SF, HI)
DEF_FUNCTION_TYPE (V16SF, V4SF, V16SF, HI)
DEF_FUNCTION_TYPE (V8DF, V4DF, V8DF, QI)
DEF_FUNCTION_TYPE (V8DF, V2DF, V8DF, QI)
+DEF_FUNCTION_TYPE (V16SI, V8SI, V16SI, HI)
DEF_FUNCTION_TYPE (V16SI, V4SI, V16SI, HI)
DEF_FUNCTION_TYPE (V16SI, SI, V16SI, HI)
DEF_FUNCTION_TYPE (V16SI, V16HI, V16SI, HI)
DEF_FUNCTION_TYPE (V16SI, V16QI, V16SI, HI)
DEF_FUNCTION_TYPE (V8SI, V8DF, V8SI, QI)
DEF_FUNCTION_TYPE (V8DI, V4DI, V8DI, QI)
+DEF_FUNCTION_TYPE (V4SI, V4DF, V4SI, QI)
DEF_FUNCTION_TYPE (V8DI, V2DI, V8DI, QI)
DEF_FUNCTION_TYPE (V8DI, DI, V8DI, QI)
+DEF_FUNCTION_TYPE (V8DI, PCCHAR, V8DI, QI)
+DEF_FUNCTION_TYPE (V8SF, PCFLOAT, V8SF, QI)
+DEF_FUNCTION_TYPE (V4SF, PCFLOAT, V4SF, QI)
+DEF_FUNCTION_TYPE (V4DF, PCDOUBLE, V4DF, QI)
+DEF_FUNCTION_TYPE (V2DF, PCDOUBLE, V2DF, QI)
+DEF_FUNCTION_TYPE (V8SI, PCCHAR, V8SI, QI)
+DEF_FUNCTION_TYPE (V4SI, PCCHAR, V4SI, QI)
+DEF_FUNCTION_TYPE (V4DI, PCCHAR, V4DI, QI)
+DEF_FUNCTION_TYPE (V2DI, PCCHAR, V2DI, QI)
+DEF_FUNCTION_TYPE (V16QI, V16SI, V16QI, HI)
+DEF_FUNCTION_TYPE (V16QI, V8DI, V16QI, QI)
+DEF_FUNCTION_TYPE (V32HI, V32HI, V32HI, SI)
+DEF_FUNCTION_TYPE (V32HI, V64QI, V64QI, INT)
+DEF_FUNCTION_TYPE (V32HI, V32QI, V32HI, SI)
+DEF_FUNCTION_TYPE (V16HI, V16HI, V16HI, HI)
+DEF_FUNCTION_TYPE (V16HI, V32QI, V32QI, INT)
+DEF_FUNCTION_TYPE (V16HI, V16QI, V16HI, HI)
+DEF_FUNCTION_TYPE (V8HI, V16QI, V8HI, QI)
+DEF_FUNCTION_TYPE (V8HI, V16QI, V16QI, INT)
+DEF_FUNCTION_TYPE (V8SF, V4SF, V8SF, QI)
+DEF_FUNCTION_TYPE (V4DF, V2DF, V4DF, QI)
+DEF_FUNCTION_TYPE (V8SI, V4SI, V8SI, QI)
+DEF_FUNCTION_TYPE (V8SI, SI, V8SI, QI)
+DEF_FUNCTION_TYPE (V4SI, V4SI, V4SI, QI)
+DEF_FUNCTION_TYPE (V4SI, SI, V4SI, QI)
+DEF_FUNCTION_TYPE (V4DI, V2DI, V4DI, QI)
+DEF_FUNCTION_TYPE (V4DI, DI, V4DI, QI)
+DEF_FUNCTION_TYPE (V2DI, V2DI, V2DI, QI)
+DEF_FUNCTION_TYPE (V2DI, DI, V2DI, QI)
+DEF_FUNCTION_TYPE (V64QI, V64QI, V64QI, DI)
+DEF_FUNCTION_TYPE (V64QI, V16QI, V64QI, DI)
+DEF_FUNCTION_TYPE (V64QI, QI, V64QI, DI)
+DEF_FUNCTION_TYPE (V32QI, V32QI, V32QI, SI)
+DEF_FUNCTION_TYPE (V32QI, V16QI, V32QI, SI)
+DEF_FUNCTION_TYPE (V32QI, QI, V32QI, SI)
+DEF_FUNCTION_TYPE (V16QI, V16QI, V16QI, HI)
+DEF_FUNCTION_TYPE (V16QI, QI, V16QI, HI)
+DEF_FUNCTION_TYPE (V32HI, V8HI, V32HI, SI)
+DEF_FUNCTION_TYPE (V32HI, HI, V32HI, SI)
+DEF_FUNCTION_TYPE (V16HI, V8HI, V16HI, HI)
+DEF_FUNCTION_TYPE (V16HI, HI, V16HI, HI)
+DEF_FUNCTION_TYPE (V8HI, V8HI, V8HI, QI)
+DEF_FUNCTION_TYPE (V8HI, HI, V8HI, QI)
+DEF_FUNCTION_TYPE (V64QI, PCV64QI, V64QI, DI)
+DEF_FUNCTION_TYPE (V32HI, PCV32HI, V32HI, SI)
+DEF_FUNCTION_TYPE (V32QI, PCV32QI, V32QI, SI)
DEF_FUNCTION_TYPE (V16SF, PCV16SF, V16SF, HI)
+DEF_FUNCTION_TYPE (V16SF, PCV8SF, V16SF, HI)
DEF_FUNCTION_TYPE (V8DF, PCV8DF, V8DF, QI)
DEF_FUNCTION_TYPE (V16SI, PCV16SI, V16SI, HI)
+DEF_FUNCTION_TYPE (V16SI, PCV8SI, V16SI, HI)
+DEF_FUNCTION_TYPE (V16HI, PCV16HI, V16HI, HI)
+DEF_FUNCTION_TYPE (V16QI, PCV16QI, V16QI, HI)
+DEF_FUNCTION_TYPE (V8DF, PCV2DF, V8DF, QI)
+DEF_FUNCTION_TYPE (V8SF, PCV8SF, V8SF, QI)
+DEF_FUNCTION_TYPE (V8SF, PCV4SF, V8SF, QI)
DEF_FUNCTION_TYPE (V8DI, PCV8DI, V8DI, QI)
-DEF_FUNCTION_TYPE (V2DF, PCDOUBLE, V2DF, QI)
-DEF_FUNCTION_TYPE (V4SF, PCFLOAT, V4SF, QI)
-DEF_FUNCTION_TYPE (V16QI, V16SI, V16QI, HI)
+DEF_FUNCTION_TYPE (V8DI, PCV2DI, V8DI, QI)
+DEF_FUNCTION_TYPE (V8SI, PCV8SI, V8SI, QI)
+DEF_FUNCTION_TYPE (V8SI, PCV4SI, V8SI, QI)
+DEF_FUNCTION_TYPE (V8HI, PCV8HI, V8HI, QI)
+DEF_FUNCTION_TYPE (V4DF, PCV2DF, V4DF, QI)
+DEF_FUNCTION_TYPE (V4DF, PCV4DF, V4DF, QI)
+DEF_FUNCTION_TYPE (V4SF, PCV4SF, V4SF, QI)
+DEF_FUNCTION_TYPE (V4DI, PCV4DI, V4DI, QI)
+DEF_FUNCTION_TYPE (V4DI, PCV2DI, V4DI, QI)
+DEF_FUNCTION_TYPE (V4SI, PCV4SI, V4SI, QI)
+DEF_FUNCTION_TYPE (V2DF, PCV2DF, V2DF, QI)
+DEF_FUNCTION_TYPE (V2DI, PCV2DI, V2DI, QI)
DEF_FUNCTION_TYPE (V16HI, V16SI, V16HI, HI)
DEF_FUNCTION_TYPE (V8SI, V8DI, V8SI, QI)
DEF_FUNCTION_TYPE (V8HI, V8DI, V8HI, QI)
-DEF_FUNCTION_TYPE (V16QI, V8DI, V16QI, QI)
+DEF_FUNCTION_TYPE (V16QI, V8HI, V16QI, QI)
+DEF_FUNCTION_TYPE (V16QI, V16HI, V16QI, HI)
+DEF_FUNCTION_TYPE (V16QI, V4SI, V16QI, QI)
+DEF_FUNCTION_TYPE (V16QI, V8SI, V16QI, QI)
+DEF_FUNCTION_TYPE (V8HI, V4SI, V8HI, QI)
+DEF_FUNCTION_TYPE (V8HI, V8SI, V8HI, QI)
+DEF_FUNCTION_TYPE (V16QI, V2DI, V16QI, QI)
+DEF_FUNCTION_TYPE (V16QI, V4DI, V16QI, QI)
+DEF_FUNCTION_TYPE (V8HI, V2DI, V8HI, QI)
+DEF_FUNCTION_TYPE (V8HI, V4DI, V8HI, QI)
+DEF_FUNCTION_TYPE (V4SI, V2DI, V4SI, QI)
+DEF_FUNCTION_TYPE (V4SI, V4DI, V4SI, QI)
+DEF_FUNCTION_TYPE (V32QI, V32HI, V32QI, SI)
+DEF_FUNCTION_TYPE (V2DF, V2DF, INT, V2DF, QI)
+DEF_FUNCTION_TYPE (V4DF, V4DF, INT, V4DF, QI)
+DEF_FUNCTION_TYPE (V4SF, V4SF, INT, V4SF, QI)
+DEF_FUNCTION_TYPE (V8SF, V8SF, INT, V8SF, QI)
+DEF_FUNCTION_TYPE (V4DF, V4DF, V4DF, INT, V4DF, QI)
+DEF_FUNCTION_TYPE (V2DF, V2DF, V2DF, INT, V2DF, QI)
+DEF_FUNCTION_TYPE (V8SF, V8SF, V8SF, INT, V8SF, QI)
+DEF_FUNCTION_TYPE (V4SF, V4SF, V4SF, INT, V4SF, QI)
DEF_FUNCTION_TYPE (VOID, PV8DF, V8DF, QI)
DEF_FUNCTION_TYPE (VOID, PV8SI, V8DI, QI)
DEF_FUNCTION_TYPE (VOID, PV8HI, V8DI, QI)
+DEF_FUNCTION_TYPE (VOID, PV8HI, V4DI, QI)
+DEF_FUNCTION_TYPE (VOID, PV8HI, V2DI, QI)
+DEF_FUNCTION_TYPE (VOID, PV4SI, V4DI, QI)
+DEF_FUNCTION_TYPE (VOID, PV4SI, V2DI, QI)
+DEF_FUNCTION_TYPE (VOID, PV8HI, V8SI, QI)
+DEF_FUNCTION_TYPE (VOID, PV8HI, V4SI, QI)
+DEF_FUNCTION_TYPE (VOID, PV4DF, V4DF, QI)
+DEF_FUNCTION_TYPE (VOID, PV2DF, V2DF, QI)
DEF_FUNCTION_TYPE (VOID, PV16SF, V16SF, HI)
+DEF_FUNCTION_TYPE (VOID, PV8SF, V8SF, QI)
+DEF_FUNCTION_TYPE (VOID, PV4SF, V4SF, QI)
DEF_FUNCTION_TYPE (VOID, PV8DI, V8DI, QI)
+DEF_FUNCTION_TYPE (VOID, PV4DI, V4DI, QI)
+DEF_FUNCTION_TYPE (VOID, PV2DI, V2DI, QI)
DEF_FUNCTION_TYPE (VOID, PV16SI, V16SI, HI)
DEF_FUNCTION_TYPE (VOID, PV16HI, V16SI, HI)
DEF_FUNCTION_TYPE (VOID, PV16QI, V16SI, HI)
+DEF_FUNCTION_TYPE (VOID, PV16QI, V8SI, QI)
+DEF_FUNCTION_TYPE (VOID, PV16QI, V4SI, QI)
DEF_FUNCTION_TYPE (VOID, PV16QI, V8DI, QI)
+DEF_FUNCTION_TYPE (VOID, PV16QI, V4DI, QI)
+DEF_FUNCTION_TYPE (VOID, PV16QI, V2DI, QI)
+DEF_FUNCTION_TYPE (VOID, PV8SI, V8SI, QI)
+DEF_FUNCTION_TYPE (VOID, PV4SI, V4SI, QI)
+DEF_FUNCTION_TYPE (VOID, PV32HI, V32HI, SI)
+DEF_FUNCTION_TYPE (VOID, PV16HI, V16HI, HI)
+DEF_FUNCTION_TYPE (VOID, PV8HI, V8HI, QI)
+DEF_FUNCTION_TYPE (VOID, PV64QI, V64QI, DI)
+DEF_FUNCTION_TYPE (VOID, PV32QI, V32QI, SI)
+DEF_FUNCTION_TYPE (VOID, PV16QI, V16QI, HI)
DEF_FUNCTION_TYPE (VOID, PDOUBLE, V2DF, QI)
DEF_FUNCTION_TYPE (VOID, PFLOAT, V4SF, QI)
DEF_FUNCTION_TYPE (V16SI, V16SF, V16SI, HI)
DEF_FUNCTION_TYPE (V8DI, V8DI, V8DI, V8DI, INT, QI)
+DEF_FUNCTION_TYPE (V8SI, V8SF, V8SI, QI)
+DEF_FUNCTION_TYPE (V4SI, V4SF, V4SI, QI)
+DEF_FUNCTION_TYPE (V8DI, V8SF, V8DI, QI)
+DEF_FUNCTION_TYPE (V4DI, V4SF, V4DI, QI)
+DEF_FUNCTION_TYPE (V2DI, V4SF, V2DI, QI)
+DEF_FUNCTION_TYPE (V8SF, V8DI, V8SF, QI)
+DEF_FUNCTION_TYPE (V4SF, V4DI, V4SF, QI)
+DEF_FUNCTION_TYPE (V4SF, V2DI, V4SF, QI)
+DEF_FUNCTION_TYPE (V8DF, V8DI, V8DF, QI)
+DEF_FUNCTION_TYPE (V4DF, V4DI, V4DF, QI)
+DEF_FUNCTION_TYPE (V2DF, V2DI, V2DF, QI)
+DEF_FUNCTION_TYPE (V32HI, V32HI, INT, V32HI, SI)
+DEF_FUNCTION_TYPE (V32HI, V32HI, V8HI, V32HI, SI)
+DEF_FUNCTION_TYPE (V16HI, V16HI, INT, V16HI, HI)
+DEF_FUNCTION_TYPE (V16HI, V16HI, V8HI, V16HI, HI)
+DEF_FUNCTION_TYPE (V8HI, V8HI, INT, V8HI, QI)
+DEF_FUNCTION_TYPE (V32HI, V64QI, V64QI, INT, V32HI, SI)
+DEF_FUNCTION_TYPE (V16HI, V32QI, V32QI, INT, V16HI, HI)
+DEF_FUNCTION_TYPE (V8HI, V16QI, V16QI, INT, V8HI, QI)
+DEF_FUNCTION_TYPE (V64QI, V32HI, V32HI, V64QI, DI)
+DEF_FUNCTION_TYPE (V32QI, V16HI, V16HI, V32QI, SI)
+DEF_FUNCTION_TYPE (V16QI, V8HI, V8HI, V16QI, HI)
+DEF_FUNCTION_TYPE (V32HI, V16SI, V16SI, V32HI, SI)
+DEF_FUNCTION_TYPE (V16HI, V8SI, V8SI, V16HI, HI)
+DEF_FUNCTION_TYPE (V8HI, V4SI, V4SI, V8HI, QI)
+DEF_FUNCTION_TYPE (V8DI, V16SI, V16SI, V8DI, QI)
+DEF_FUNCTION_TYPE (V4DI, V8SI, V8SI, V4DI, QI)
+DEF_FUNCTION_TYPE (V2DI, V4SI, V4SI, V2DI, QI)
DEF_FUNCTION_TYPE (V16SI, V16SI, V16SI, V16SI, INT, HI)
+DEF_FUNCTION_TYPE (V8SI, V8SI, V8SI, V8SI, INT, QI)
+DEF_FUNCTION_TYPE (V4DI, V4DI, V4DI, V4DI, INT, QI)
+DEF_FUNCTION_TYPE (V4SI, V4SI, V4SI, V4SI, INT, QI)
+DEF_FUNCTION_TYPE (V2DI, V2DI, V2DI, V2DI, INT, QI)
DEF_FUNCTION_TYPE (VOID, PCVOID, UNSIGNED, UNSIGNED)
DEF_FUNCTION_TYPE (VOID, PV2DF, V2DI, V2DF)
@@ -644,11 +1012,30 @@ DEF_FUNCTION_TYPE (V4DF, V4DF, V4DF, V4DI)
DEF_FUNCTION_TYPE (V8SF, V8SF, V8SF, V8SI)
DEF_FUNCTION_TYPE (V8DI, V8DI, V8DI, V8DI)
DEF_FUNCTION_TYPE (V16SI, V16SI, V16SI, V16SI)
+DEF_FUNCTION_TYPE (V2DF, V2DF, V2DI, V2DF)
+DEF_FUNCTION_TYPE (V4DF, V4DF, V4DI, V4DF)
DEF_FUNCTION_TYPE (V8DF, V8DF, V8DI, V8DF)
+DEF_FUNCTION_TYPE (V4SF, V4SF, V4SI, V4SF)
+DEF_FUNCTION_TYPE (V8SF, V8SF, V8SI, V8SF)
DEF_FUNCTION_TYPE (V16SF, V16SF, V16SI, V16SF)
-DEF_FUNCTION_TYPE (V4SF, V4SF, V4SF, INT, V4SF, QI)
-DEF_FUNCTION_TYPE (V2DF, V2DF, V2DF, INT, V2DF, QI)
-DEF_FUNCTION_TYPE (V8DI, V16SI, V16SI, V8DI, QI)
+
+DEF_FUNCTION_TYPE (V8SI, V8SI, V4SI, V8SI, QI)
+DEF_FUNCTION_TYPE (V4DI, V4DI, V2DI, V4DI, QI)
+
+DEF_FUNCTION_TYPE (QI, V8DF, INT)
+DEF_FUNCTION_TYPE (QI, V4DF, INT)
+DEF_FUNCTION_TYPE (QI, V4DF, V4DF, INT, QI)
+DEF_FUNCTION_TYPE (QI, V2DF, INT)
+DEF_FUNCTION_TYPE (HI, V16SF, INT)
+DEF_FUNCTION_TYPE (QI, V8SF, INT)
+DEF_FUNCTION_TYPE (QI, V8SF, V8SF, INT, QI)
+DEF_FUNCTION_TYPE (QI, V4SF, INT)
+DEF_FUNCTION_TYPE (QI, V8DF, INT, QI)
+DEF_FUNCTION_TYPE (QI, V4DF, INT, QI)
+DEF_FUNCTION_TYPE (QI, V2DF, INT, QI)
+DEF_FUNCTION_TYPE (HI, V16SF, INT, HI)
+DEF_FUNCTION_TYPE (QI, V8SF, INT, QI)
+DEF_FUNCTION_TYPE (QI, V4SF, INT, QI)
DEF_FUNCTION_TYPE (V2DI, V2DI, V2DI, UINT, UINT)
DEF_FUNCTION_TYPE (V4HI, HI, HI, HI, HI)
@@ -684,6 +1071,10 @@ DEF_FUNCTION_TYPE (V8SI, V8DF, V8SI, QI, INT)
DEF_FUNCTION_TYPE (V8SF, V8DF, V8SF, QI, INT)
DEF_FUNCTION_TYPE (V8DF, V8DF, V8DF, QI, INT)
DEF_FUNCTION_TYPE (V8DF, V8SF, V8DF, QI, INT)
+DEF_FUNCTION_TYPE (V8DI, V8DF, V8DI, QI, INT)
+DEF_FUNCTION_TYPE (V8DI, V8SF, V8DI, QI, INT)
+DEF_FUNCTION_TYPE (V8DF, V8DI, V8DF, QI, INT)
+DEF_FUNCTION_TYPE (V8SF, V8DI, V8SF, QI, INT)
DEF_FUNCTION_TYPE (V16SF, V16SF, V16SF, V16SF, HI, INT)
DEF_FUNCTION_TYPE (V8DF, V8DF, V8DF, V8DF, QI, INT)
DEF_FUNCTION_TYPE (V4SF, V4SF, V4SF, V4SF, QI, INT)
@@ -732,16 +1123,53 @@ DEF_FUNCTION_TYPE (V8DI, V8DI, PCINT64, V8SI, QI, INT)
DEF_FUNCTION_TYPE (V8DI, V8DI, PCINT64, V16SI, QI, INT)
DEF_FUNCTION_TYPE (V8SI, V8SI, PCINT, V8DI, QI, INT)
DEF_FUNCTION_TYPE (V8DI, V8DI, PCINT64, V8DI, QI, INT)
+DEF_FUNCTION_TYPE (V2DF, V2DF, PCDOUBLE, V4SI, QI, INT)
+DEF_FUNCTION_TYPE (V4DF, V4DF, PCDOUBLE, V4SI, QI, INT)
+DEF_FUNCTION_TYPE (V4DF, V4DF, PCDOUBLE, V8SI, QI, INT)
+DEF_FUNCTION_TYPE (V2DF, V2DF, PCDOUBLE, V2DI, QI, INT)
+DEF_FUNCTION_TYPE (V4DF, V4DF, PCDOUBLE, V4DI, QI, INT)
+DEF_FUNCTION_TYPE (V4SF, V4SF, PCFLOAT, V4SI, QI, INT)
+DEF_FUNCTION_TYPE (V8SF, V8SF, PCFLOAT, V8SI, QI, INT)
+DEF_FUNCTION_TYPE (V4SF, V4SF, PCFLOAT, V2DI, QI, INT)
+DEF_FUNCTION_TYPE (V4SF, V4SF, PCFLOAT, V4DI, QI, INT)
+DEF_FUNCTION_TYPE (V8SF, V8SF, PCFLOAT, V4DI, QI, INT)
+DEF_FUNCTION_TYPE (V2DI, V2DI, PCINT64, V4SI, QI, INT)
+DEF_FUNCTION_TYPE (V4DI, V4DI, PCINT64, V4SI, QI, INT)
+DEF_FUNCTION_TYPE (V4DI, V4DI, PCINT64, V8SI, QI, INT)
+DEF_FUNCTION_TYPE (V2DI, V2DI, PCINT64, V2DI, QI, INT)
+DEF_FUNCTION_TYPE (V4DI, V4DI, PCINT64, V4DI, QI, INT)
+DEF_FUNCTION_TYPE (V4SI, V4SI, PCINT, V4SI, QI, INT)
+DEF_FUNCTION_TYPE (V8SI, V8SI, PCINT, V8SI, QI, INT)
+DEF_FUNCTION_TYPE (V4SI, V4SI, PCINT, V2DI, QI, INT)
+DEF_FUNCTION_TYPE (V4SI, V4SI, PCINT, V4DI, QI, INT)
+DEF_FUNCTION_TYPE (V8SI, V8SI, PCINT, V4DI, QI, INT)
+
DEF_FUNCTION_TYPE (VOID, PFLOAT, HI, V16SI, V16SF, INT)
+DEF_FUNCTION_TYPE (VOID, PFLOAT, QI, V8SI, V8SF, INT)
+DEF_FUNCTION_TYPE (VOID, PFLOAT, QI, V4SI, V4SF, INT)
DEF_FUNCTION_TYPE (VOID, PDOUBLE, QI, V8SI, V8DF, INT)
+DEF_FUNCTION_TYPE (VOID, PDOUBLE, QI, V4SI, V4DF, INT)
+DEF_FUNCTION_TYPE (VOID, PDOUBLE, QI, V4SI, V2DF, INT)
DEF_FUNCTION_TYPE (VOID, PFLOAT, QI, V8DI, V8SF, INT)
+DEF_FUNCTION_TYPE (VOID, PFLOAT, QI, V4DI, V4SF, INT)
+DEF_FUNCTION_TYPE (VOID, PFLOAT, QI, V2DI, V4SF, INT)
DEF_FUNCTION_TYPE (VOID, PDOUBLE, QI, V8DI, V8DF, INT)
+DEF_FUNCTION_TYPE (VOID, PDOUBLE, QI, V4DI, V4DF, INT)
+DEF_FUNCTION_TYPE (VOID, PDOUBLE, QI, V2DI, V2DF, INT)
DEF_FUNCTION_TYPE (VOID, PINT, HI, V16SI, V16SI, INT)
+DEF_FUNCTION_TYPE (VOID, PINT, QI, V8SI, V8SI, INT)
+DEF_FUNCTION_TYPE (VOID, PINT, QI, V4SI, V4SI, INT)
DEF_FUNCTION_TYPE (VOID, PLONGLONG, QI, V8SI, V8DI, INT)
+DEF_FUNCTION_TYPE (VOID, PLONGLONG, QI, V4SI, V4DI, INT)
+DEF_FUNCTION_TYPE (VOID, PLONGLONG, QI, V4SI, V2DI, INT)
DEF_FUNCTION_TYPE (VOID, PINT, QI, V8DI, V8SI, INT)
+DEF_FUNCTION_TYPE (VOID, PINT, QI, V4DI, V4SI, INT)
+DEF_FUNCTION_TYPE (VOID, PINT, QI, V2DI, V4SI, INT)
DEF_FUNCTION_TYPE (VOID, PLONGLONG, QI, V8DI, V8DI, INT)
DEF_FUNCTION_TYPE (VOID, QI, V8SI, PCINT64, INT, INT)
+DEF_FUNCTION_TYPE (VOID, PLONGLONG, QI, V4DI, V4DI, INT)
+DEF_FUNCTION_TYPE (VOID, PLONGLONG, QI, V2DI, V2DI, INT)
DEF_FUNCTION_TYPE (VOID, HI, V16SI, PCINT, INT, INT)
DEF_FUNCTION_TYPE (VOID, QI, V8DI, PCINT64, INT, INT)
DEF_FUNCTION_TYPE (VOID, QI, V8DI, PCINT, INT, INT)
@@ -789,8 +1217,13 @@ DEF_FUNCTION_TYPE_ALIAS (V4DI_FTYPE_V4DI_V2DI, COUNT)
DEF_FUNCTION_TYPE_ALIAS (V2DF_FTYPE_V2DF_V2DF, SWAP)
DEF_FUNCTION_TYPE_ALIAS (V4SF_FTYPE_V4SF_V4SF, SWAP)
+DEF_FUNCTION_TYPE_ALIAS (V8DI_FTYPE_V8DI_INT, CONVERT)
DEF_FUNCTION_TYPE_ALIAS (V4DI_FTYPE_V4DI_INT, CONVERT)
DEF_FUNCTION_TYPE_ALIAS (V2DI_FTYPE_V2DI_INT, CONVERT)
+DEF_FUNCTION_TYPE_ALIAS (V8DI_FTYPE_V8DI_V8DI_INT, CONVERT)
+DEF_FUNCTION_TYPE_ALIAS (V8DI_FTYPE_V8DI_V8DI_INT_V8DI_DI, CONVERT)
+DEF_FUNCTION_TYPE_ALIAS (V4DI_FTYPE_V4DI_V4DI_INT_V4DI_SI, CONVERT)
+DEF_FUNCTION_TYPE_ALIAS (V2DI_FTYPE_V2DI_V2DI_INT_V2DI_HI, CONVERT)
DEF_FUNCTION_TYPE_ALIAS (V4DI_FTYPE_V4DI_V4DI_INT, CONVERT)
DEF_FUNCTION_TYPE_ALIAS (V2DI_FTYPE_V2DI_V2DI_INT, CONVERT)
DEF_FUNCTION_TYPE_ALIAS (V1DI_FTYPE_V1DI_V1DI_INT, CONVERT)
diff --git a/gcc/config/i386/i386-c.c b/gcc/config/i386/i386-c.c
index 2c05cec0c4b..a3858edb240 100644
--- a/gcc/config/i386/i386-c.c
+++ b/gcc/config/i386/i386-c.c
@@ -345,6 +345,12 @@ ix86_target_macros_internal (HOST_WIDE_INT isa_flag,
def_or_undef (parse_in, "__AVX512CD__");
if (isa_flag & OPTION_MASK_ISA_AVX512PF)
def_or_undef (parse_in, "__AVX512PF__");
+ if (isa_flag & OPTION_MASK_ISA_AVX512DQ)
+ def_or_undef (parse_in, "__AVX512DQ__");
+ if (isa_flag & OPTION_MASK_ISA_AVX512BW)
+ def_or_undef (parse_in, "__AVX512BW__");
+ if (isa_flag & OPTION_MASK_ISA_AVX512VL)
+ def_or_undef (parse_in, "__AVX512VL__");
if (isa_flag & OPTION_MASK_ISA_FMA)
def_or_undef (parse_in, "__FMA__");
if (isa_flag & OPTION_MASK_ISA_RTM)
diff --git a/gcc/config/i386/i386-modes.def b/gcc/config/i386/i386-modes.def
index 07e572058cc..c24abe6fea7 100644
--- a/gcc/config/i386/i386-modes.def
+++ b/gcc/config/i386/i386-modes.def
@@ -86,6 +86,9 @@ VECTOR_MODE (INT, TI, 1); /* V1TI */
VECTOR_MODE (INT, DI, 1); /* V1DI */
VECTOR_MODE (INT, SI, 1); /* V1SI */
VECTOR_MODE (INT, QI, 2); /* V2QI */
+VECTOR_MODE (INT, QI, 12); /* V12QI */
+VECTOR_MODE (INT, QI, 14); /* V14QI */
+VECTOR_MODE (INT, HI, 6); /* V6HI */
INT_MODE (OI, 32);
INT_MODE (XI, 64);
diff --git a/gcc/config/i386/i386.c b/gcc/config/i386/i386.c
index 542945f1176..b385bd00179 100644
--- a/gcc/config/i386/i386.c
+++ b/gcc/config/i386/i386.c
@@ -2593,6 +2593,9 @@ ix86_target_string (HOST_WIDE_INT isa, int flags, const char *arch,
{ "-mavx512er", OPTION_MASK_ISA_AVX512ER },
{ "-mavx512cd", OPTION_MASK_ISA_AVX512CD },
{ "-mavx512pf", OPTION_MASK_ISA_AVX512PF },
+ { "-mavx512dq", OPTION_MASK_ISA_AVX512DQ },
+ { "-mavx512bw", OPTION_MASK_ISA_AVX512BW },
+ { "-mavx512vl", OPTION_MASK_ISA_AVX512VL },
{ "-msse4a", OPTION_MASK_ISA_SSE4A },
{ "-msse4.2", OPTION_MASK_ISA_SSE4_2 },
{ "-msse4.1", OPTION_MASK_ISA_SSE4_1 },
@@ -3123,6 +3126,9 @@ ix86_option_override_internal (bool main_args_p,
#define PTA_CLFLUSHOPT (HOST_WIDE_INT_1 << 47)
#define PTA_XSAVEC (HOST_WIDE_INT_1 << 48)
#define PTA_XSAVES (HOST_WIDE_INT_1 << 49)
+#define PTA_AVX512DQ (HOST_WIDE_INT_1 << 50)
+#define PTA_AVX512BW (HOST_WIDE_INT_1 << 51)
+#define PTA_AVX512VL (HOST_WIDE_INT_1 << 52)
#define PTA_CORE2 \
(PTA_64BIT | PTA_MMX | PTA_SSE | PTA_SSE2 | PTA_SSE3 | PTA_SSSE3 \
@@ -3689,6 +3695,15 @@ ix86_option_override_internal (bool main_args_p,
if (processor_alias_table[i].flags & PTA_XSAVES
&& !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_XSAVES))
opts->x_ix86_isa_flags |= OPTION_MASK_ISA_XSAVES;
+ if (processor_alias_table[i].flags & PTA_AVX512DQ
+ && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_AVX512DQ))
+ opts->x_ix86_isa_flags |= OPTION_MASK_ISA_AVX512DQ;
+ if (processor_alias_table[i].flags & PTA_AVX512BW
+ && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_AVX512BW))
+ opts->x_ix86_isa_flags |= OPTION_MASK_ISA_AVX512BW;
+ if (processor_alias_table[i].flags & PTA_AVX512VL
+ && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_AVX512VL))
+ opts->x_ix86_isa_flags |= OPTION_MASK_ISA_AVX512VL;
if (processor_alias_table[i].flags & (PTA_PREFETCH_SSE | PTA_SSE))
x86_prefetch_sse = true;
@@ -4545,6 +4560,9 @@ ix86_valid_target_attribute_inner_p (tree args, char *p_strings[],
IX86_ATTR_ISA ("avx512pf", OPT_mavx512pf),
IX86_ATTR_ISA ("avx512er", OPT_mavx512er),
IX86_ATTR_ISA ("avx512cd", OPT_mavx512cd),
+ IX86_ATTR_ISA ("avx512dq", OPT_mavx512dq),
+ IX86_ATTR_ISA ("avx512bw", OPT_mavx512bw),
+ IX86_ATTR_ISA ("avx512vl", OPT_mavx512vl),
IX86_ATTR_ISA ("mmx", OPT_mmmx),
IX86_ATTR_ISA ("pclmul", OPT_mpclmul),
IX86_ATTR_ISA ("popcnt", OPT_mpopcnt),
@@ -8983,19 +9001,24 @@ standard_sse_constant_opcode (rtx insn, rtx x)
switch (get_attr_mode (insn))
{
case MODE_XI:
- case MODE_V16SF:
return "vpxord\t%g0, %g0, %g0";
+ case MODE_V16SF:
+ return TARGET_AVX512DQ ? "vxorps\t%g0, %g0, %g0"
+ : "vpxord\t%g0, %g0, %g0";
case MODE_V8DF:
- return "vpxorq\t%g0, %g0, %g0";
+ return TARGET_AVX512DQ ? "vxorpd\t%g0, %g0, %g0"
+ : "vpxorq\t%g0, %g0, %g0";
case MODE_TI:
- return "%vpxor\t%0, %d0";
+ return TARGET_AVX512VL ? "vpxord\t%t0, %t0, %t0"
+ : "%vpxor\t%0, %d0";
case MODE_V2DF:
return "%vxorpd\t%0, %d0";
case MODE_V4SF:
return "%vxorps\t%0, %d0";
case MODE_OI:
- return "vpxor\t%x0, %x0, %x0";
+ return TARGET_AVX512VL ? "vpxord\t%x0, %x0, %x0"
+ : "vpxor\t%x0, %x0, %x0";
case MODE_V4DF:
return "vxorpd\t%x0, %x0, %x0";
case MODE_V8SF:
@@ -9006,7 +9029,8 @@ standard_sse_constant_opcode (rtx insn, rtx x)
}
case 2:
- if (get_attr_mode (insn) == MODE_XI
+ if (TARGET_AVX512VL
+ ||get_attr_mode (insn) == MODE_XI
|| get_attr_mode (insn) == MODE_V8DF
|| get_attr_mode (insn) == MODE_V16SF)
return "vpternlogd\t{$0xFF, %g0, %g0, %g0|%g0, %g0, %g0, 0xFF}";
@@ -14688,7 +14712,7 @@ print_reg (rtx x, int code, FILE *file)
case 8:
case 4:
case 12:
- if (! ANY_FP_REG_P (x))
+ if (! ANY_FP_REG_P (x) && ! ANY_MASK_REG_P (x))
putc (code == 8 && TARGET_64BIT ? 'r' : 'e', file);
/* FALLTHRU */
case 16:
@@ -17517,8 +17541,10 @@ ix86_expand_vector_logical_operator (enum rtx_code code, enum machine_mode mode,
{
case V4SFmode:
case V8SFmode:
+ case V16SFmode:
case V2DFmode:
case V4DFmode:
+ case V8DFmode:
dst = gen_reg_rtx (GET_MODE (SUBREG_REG (op1)));
if (GET_CODE (op2) == CONST_VECTOR)
{
@@ -18825,6 +18851,19 @@ ix86_expand_vector_convert_uns_vsivsf (rtx target, rtx val)
enum machine_mode fltmode = GET_MODE (target);
rtx (*cvt) (rtx, rtx);
+ if (intmode == V16SImode)
+ {
+ emit_insn (gen_ufloatv16siv16sf2 (target, val));
+ return;
+ }
+ if (TARGET_AVX512VL)
+ {
+ if (intmode == V4SImode)
+ emit_insn (gen_ufloatv4siv4sf2 (target, val));
+ else
+ emit_insn (gen_ufloatv8siv8sf2 (target, val));
+ return;
+ }
if (intmode == V4SImode)
cvt = gen_floatv4siv4sf2;
else
@@ -21013,6 +21052,12 @@ ix86_expand_sse_movcc (rtx dest, rtx cmp, rtx op_true, rtx op_false)
}
break;
+ case V64QImode:
+ gen = gen_avx512bw_blendmv64qi;
+ break;
+ case V32HImode:
+ gen = gen_avx512bw_blendmv32hi;
+ break;
case V16SImode:
gen = gen_avx512f_blendmv16si;
break;
@@ -21329,6 +21374,8 @@ ix86_expand_int_vcond (rtx operands[])
}
break;
+ case V64QImode:
+ case V32HImode:
case V32QImode:
case V16HImode:
case V16QImode:
@@ -21377,20 +21424,113 @@ ix86_expand_vec_perm_vpermi2 (rtx target, rtx op0, rtx mask, rtx op1)
enum machine_mode mode = GET_MODE (op0);
switch (mode)
{
+ /* There is no byte version of vpermi2. So we use vpermi2w. */
+ case V64QImode:
+ if (!TARGET_AVX512BW)
+ return false;
+ rtx mask_lowpart, op0_lowpart, op1_lowpart;
+ rtx perm_lo, perm_hi, tmp, res_lo, tmp2, res_hi;
+
+ mask_lowpart = gen_lowpart (V32HImode, force_reg (V64QImode, mask));
+ op0_lowpart = gen_lowpart (V32HImode, op0);
+ op1_lowpart = gen_lowpart (V32HImode, op1);
+ tmp = gen_reg_rtx (V32HImode);
+ tmp2 = gen_reg_rtx (V32HImode);
+ perm_lo = gen_reg_rtx (V32HImode);
+ perm_hi = gen_reg_rtx (V32HImode);
+ res_lo = gen_reg_rtx (V32HImode);
+ res_hi = gen_reg_rtx (V32HImode);
+
+ emit_insn (gen_ashlv32hi3 (tmp, mask_lowpart, GEN_INT (8)));
+ emit_insn (gen_ashrv32hi3 (perm_lo, tmp, GEN_INT (9)));
+ emit_insn (gen_ashrv32hi3 (perm_hi, mask_lowpart, GEN_INT (9)));
+ emit_insn (gen_avx512bw_vpermi2varv32hi3 (res_lo, op0_lowpart,
+ perm_lo, op1_lowpart));
+ emit_insn (gen_avx512bw_vpermi2varv32hi3 (tmp2, op0_lowpart,
+ perm_hi, op1_lowpart));
+ emit_insn (gen_ashlv32hi3 (res_hi, tmp2, GEN_INT (8)));
+ emit_insn (gen_avx512bw_blendmv64qi (target, gen_lowpart (V64QImode, res_lo),
+ gen_lowpart (V64QImode, res_hi),
+ force_reg (DImode, GEN_INT (0xAAAAAAAAAAAAAAAALL))));
+ return true;
+ case V8HImode:
+ if (!TARGET_AVX512VL)
+ return false;
+ emit_insn (gen_avx512vl_vpermi2varv8hi3 (target, op0,
+ force_reg (V8HImode, mask), op1));
+ return true;
+ case V16HImode:
+ if (!TARGET_AVX512VL)
+ return false;
+ emit_insn (gen_avx512vl_vpermi2varv16hi3 (target, op0,
+ force_reg (V16HImode, mask), op1));
+ return true;
+ case V32HImode:
+ emit_insn (gen_avx512bw_vpermi2varv32hi3 (target, op0,
+ force_reg (V32HImode, mask), op1));
+ return true;
+ case V4SImode:
+ if (!TARGET_AVX512VL)
+ return false;
+ emit_insn (gen_avx512vl_vpermi2varv4si3 (target, op0,
+ force_reg (V4SImode, mask), op1));
+ return true;
+ case V8SImode:
+ if (!TARGET_AVX512VL)
+ return false;
+ emit_insn (gen_avx512vl_vpermi2varv8si3 (target, op0,
+ force_reg (V8SImode, mask), op1));
+ return true;
case V16SImode:
emit_insn (gen_avx512f_vpermi2varv16si3 (target, op0,
force_reg (V16SImode, mask),
op1));
return true;
+ case V4SFmode:
+ if (!TARGET_AVX512VL)
+ return false;
+ emit_insn (gen_avx512vl_vpermi2varv4sf3 (target, op0,
+ force_reg (V4SImode, mask), op1));
+ return true;
+ case V8SFmode:
+ if (!TARGET_AVX512VL)
+ return false;
+ emit_insn (gen_avx512vl_vpermi2varv8sf3 (target, op0,
+ force_reg (V8SImode, mask), op1));
+ return true;
case V16SFmode:
emit_insn (gen_avx512f_vpermi2varv16sf3 (target, op0,
force_reg (V16SImode, mask),
op1));
return true;
+ case V2DImode:
+ if (!TARGET_AVX512VL)
+ return false;
+ emit_insn (gen_avx512vl_vpermi2varv2di3 (target, op0,
+ force_reg (V2DImode, mask), op1));
+ return true;
+ case V4DImode:
+ if (!TARGET_AVX512VL)
+ return false;
+ emit_insn (gen_avx512vl_vpermi2varv4di3 (target, op0,
+ force_reg (V4DImode, mask), op1));
+ return true;
case V8DImode:
emit_insn (gen_avx512f_vpermi2varv8di3 (target, op0,
force_reg (V8DImode, mask), op1));
return true;
+ case V2DFmode:
+ if (!TARGET_AVX512VL)
+ return false;
+ emit_insn (gen_avx512vl_vpermi2varv2df3 (target, op0,
+ force_reg (V2DImode, mask), op1));
+ return true;
+ case V4DFmode:
+ if (!TARGET_AVX512VL)
+ return false;
+ emit_insn (gen_avx512vl_vpermi2varv4df3 (target, op0,
+ force_reg (V4DImode, mask), op1));
+ return true;
case V8DFmode:
emit_insn (gen_avx512f_vpermi2varv8df3 (target, op0,
force_reg (V8DImode, mask), op1));
@@ -21792,6 +21932,15 @@ ix86_expand_sse_unpack (rtx dest, rtx src, bool unsigned_p, bool high_p)
switch (imode)
{
+ case V64QImode:
+ if (unsigned_p)
+ unpack = gen_avx512bw_zero_extendv32qiv32hi2;
+ else
+ unpack = gen_avx512bw_sign_extendv32qiv32hi2;
+ halfmode = V32QImode;
+ extract
+ = high_p ? gen_vec_extract_hi_v64qi : gen_vec_extract_lo_v64qi;
+ break;
case V32QImode:
if (unsigned_p)
unpack = gen_avx2_zero_extendv16qiv16hi2;
@@ -28482,6 +28631,999 @@ enum ix86_builtins
IX86_BUILTIN_KXOR16,
IX86_BUILTIN_KMOV16,
+ /* AVX512VL. */
+ IX86_BUILTIN_PMOVUSQD256_MEM,
+ IX86_BUILTIN_PMOVUSQD128_MEM,
+ IX86_BUILTIN_PMOVSQD256_MEM,
+ IX86_BUILTIN_PMOVSQD128_MEM,
+ IX86_BUILTIN_PMOVQD256_MEM,
+ IX86_BUILTIN_PMOVQD128_MEM,
+ IX86_BUILTIN_PMOVUSQW256_MEM,
+ IX86_BUILTIN_PMOVUSQW128_MEM,
+ IX86_BUILTIN_PMOVSQW256_MEM,
+ IX86_BUILTIN_PMOVSQW128_MEM,
+ IX86_BUILTIN_PMOVQW256_MEM,
+ IX86_BUILTIN_PMOVQW128_MEM,
+ IX86_BUILTIN_PMOVUSQB256_MEM,
+ IX86_BUILTIN_PMOVUSQB128_MEM,
+ IX86_BUILTIN_PMOVSQB256_MEM,
+ IX86_BUILTIN_PMOVSQB128_MEM,
+ IX86_BUILTIN_PMOVQB256_MEM,
+ IX86_BUILTIN_PMOVQB128_MEM,
+ IX86_BUILTIN_PMOVUSDW256_MEM,
+ IX86_BUILTIN_PMOVUSDW128_MEM,
+ IX86_BUILTIN_PMOVSDW256_MEM,
+ IX86_BUILTIN_PMOVSDW128_MEM,
+ IX86_BUILTIN_PMOVDW256_MEM,
+ IX86_BUILTIN_PMOVDW128_MEM,
+ IX86_BUILTIN_PMOVUSDB256_MEM,
+ IX86_BUILTIN_PMOVUSDB128_MEM,
+ IX86_BUILTIN_PMOVSDB256_MEM,
+ IX86_BUILTIN_PMOVSDB128_MEM,
+ IX86_BUILTIN_PMOVDB256_MEM,
+ IX86_BUILTIN_PMOVDB128_MEM,
+ IX86_BUILTIN_MOVDQA64LOAD256_MASK,
+ IX86_BUILTIN_MOVDQA64LOAD128_MASK,
+ IX86_BUILTIN_MOVDQA32LOAD256_MASK,
+ IX86_BUILTIN_MOVDQA32LOAD128_MASK,
+ IX86_BUILTIN_MOVDQA64STORE256_MASK,
+ IX86_BUILTIN_MOVDQA64STORE128_MASK,
+ IX86_BUILTIN_MOVDQA32STORE256_MASK,
+ IX86_BUILTIN_MOVDQA32STORE128_MASK,
+ IX86_BUILTIN_LOADAPD256_MASK,
+ IX86_BUILTIN_LOADAPD128_MASK,
+ IX86_BUILTIN_LOADAPS256_MASK,
+ IX86_BUILTIN_LOADAPS128_MASK,
+ IX86_BUILTIN_STOREAPD256_MASK,
+ IX86_BUILTIN_STOREAPD128_MASK,
+ IX86_BUILTIN_STOREAPS256_MASK,
+ IX86_BUILTIN_STOREAPS128_MASK,
+ IX86_BUILTIN_LOADUPD256_MASK,
+ IX86_BUILTIN_LOADUPD128_MASK,
+ IX86_BUILTIN_LOADUPS256_MASK,
+ IX86_BUILTIN_LOADUPS128_MASK,
+ IX86_BUILTIN_STOREUPD256_MASK,
+ IX86_BUILTIN_STOREUPD128_MASK,
+ IX86_BUILTIN_STOREUPS256_MASK,
+ IX86_BUILTIN_STOREUPS128_MASK,
+ IX86_BUILTIN_LOADDQUDI256_MASK,
+ IX86_BUILTIN_LOADDQUDI128_MASK,
+ IX86_BUILTIN_LOADDQUSI256_MASK,
+ IX86_BUILTIN_LOADDQUSI128_MASK,
+ IX86_BUILTIN_LOADDQUHI256_MASK,
+ IX86_BUILTIN_LOADDQUHI128_MASK,
+ IX86_BUILTIN_LOADDQUQI256_MASK,
+ IX86_BUILTIN_LOADDQUQI128_MASK,
+ IX86_BUILTIN_STOREDQUDI256_MASK,
+ IX86_BUILTIN_STOREDQUDI128_MASK,
+ IX86_BUILTIN_STOREDQUSI256_MASK,
+ IX86_BUILTIN_STOREDQUSI128_MASK,
+ IX86_BUILTIN_STOREDQUHI256_MASK,
+ IX86_BUILTIN_STOREDQUHI128_MASK,
+ IX86_BUILTIN_STOREDQUQI256_MASK,
+ IX86_BUILTIN_STOREDQUQI128_MASK,
+ IX86_BUILTIN_COMPRESSPDSTORE256,
+ IX86_BUILTIN_COMPRESSPDSTORE128,
+ IX86_BUILTIN_COMPRESSPSSTORE256,
+ IX86_BUILTIN_COMPRESSPSSTORE128,
+ IX86_BUILTIN_PCOMPRESSQSTORE256,
+ IX86_BUILTIN_PCOMPRESSQSTORE128,
+ IX86_BUILTIN_PCOMPRESSDSTORE256,
+ IX86_BUILTIN_PCOMPRESSDSTORE128,
+ IX86_BUILTIN_EXPANDPDLOAD256,
+ IX86_BUILTIN_EXPANDPDLOAD128,
+ IX86_BUILTIN_EXPANDPSLOAD256,
+ IX86_BUILTIN_EXPANDPSLOAD128,
+ IX86_BUILTIN_PEXPANDQLOAD256,
+ IX86_BUILTIN_PEXPANDQLOAD128,
+ IX86_BUILTIN_PEXPANDDLOAD256,
+ IX86_BUILTIN_PEXPANDDLOAD128,
+ IX86_BUILTIN_EXPANDPDLOAD256Z,
+ IX86_BUILTIN_EXPANDPDLOAD128Z,
+ IX86_BUILTIN_EXPANDPSLOAD256Z,
+ IX86_BUILTIN_EXPANDPSLOAD128Z,
+ IX86_BUILTIN_PEXPANDQLOAD256Z,
+ IX86_BUILTIN_PEXPANDQLOAD128Z,
+ IX86_BUILTIN_PEXPANDDLOAD256Z,
+ IX86_BUILTIN_PEXPANDDLOAD128Z,
+ IX86_BUILTIN_PALIGNR256_MASK,
+ IX86_BUILTIN_PALIGNR128_MASK,
+ IX86_BUILTIN_MOVDQA64_256_MASK,
+ IX86_BUILTIN_MOVDQA64_128_MASK,
+ IX86_BUILTIN_MOVDQA32_256_MASK,
+ IX86_BUILTIN_MOVDQA32_128_MASK,
+ IX86_BUILTIN_MOVAPD256_MASK,
+ IX86_BUILTIN_MOVAPD128_MASK,
+ IX86_BUILTIN_MOVAPS256_MASK,
+ IX86_BUILTIN_MOVAPS128_MASK,
+ IX86_BUILTIN_MOVDQUHI256_MASK,
+ IX86_BUILTIN_MOVDQUHI128_MASK,
+ IX86_BUILTIN_MOVDQUQI256_MASK,
+ IX86_BUILTIN_MOVDQUQI128_MASK,
+ IX86_BUILTIN_MINPS128_MASK,
+ IX86_BUILTIN_MAXPS128_MASK,
+ IX86_BUILTIN_MINPD128_MASK,
+ IX86_BUILTIN_MAXPD128_MASK,
+ IX86_BUILTIN_MAXPD256_MASK,
+ IX86_BUILTIN_MAXPS256_MASK,
+ IX86_BUILTIN_MINPD256_MASK,
+ IX86_BUILTIN_MINPS256_MASK,
+ IX86_BUILTIN_MULPS128_MASK,
+ IX86_BUILTIN_DIVPS128_MASK,
+ IX86_BUILTIN_MULPD128_MASK,
+ IX86_BUILTIN_DIVPD128_MASK,
+ IX86_BUILTIN_DIVPD256_MASK,
+ IX86_BUILTIN_DIVPS256_MASK,
+ IX86_BUILTIN_MULPD256_MASK,
+ IX86_BUILTIN_MULPS256_MASK,
+ IX86_BUILTIN_ADDPD128_MASK,
+ IX86_BUILTIN_ADDPD256_MASK,
+ IX86_BUILTIN_ADDPS128_MASK,
+ IX86_BUILTIN_ADDPS256_MASK,
+ IX86_BUILTIN_SUBPD128_MASK,
+ IX86_BUILTIN_SUBPD256_MASK,
+ IX86_BUILTIN_SUBPS128_MASK,
+ IX86_BUILTIN_SUBPS256_MASK,
+ IX86_BUILTIN_XORPD256_MASK,
+ IX86_BUILTIN_XORPD128_MASK,
+ IX86_BUILTIN_XORPS256_MASK,
+ IX86_BUILTIN_XORPS128_MASK,
+ IX86_BUILTIN_ORPD256_MASK,
+ IX86_BUILTIN_ORPD128_MASK,
+ IX86_BUILTIN_ORPS256_MASK,
+ IX86_BUILTIN_ORPS128_MASK,
+ IX86_BUILTIN_BROADCASTF32x2_256,
+ IX86_BUILTIN_BROADCASTI32x2_256,
+ IX86_BUILTIN_BROADCASTI32x2_128,
+ IX86_BUILTIN_BROADCASTF64X2_256,
+ IX86_BUILTIN_BROADCASTI64X2_256,
+ IX86_BUILTIN_BROADCASTF32X4_256,
+ IX86_BUILTIN_BROADCASTI32X4_256,
+ IX86_BUILTIN_EXTRACTF32X4_256,
+ IX86_BUILTIN_EXTRACTI32X4_256,
+ IX86_BUILTIN_DBPSADBW256,
+ IX86_BUILTIN_DBPSADBW128,
+ IX86_BUILTIN_CVTTPD2QQ256,
+ IX86_BUILTIN_CVTTPD2QQ128,
+ IX86_BUILTIN_CVTTPD2UQQ256,
+ IX86_BUILTIN_CVTTPD2UQQ128,
+ IX86_BUILTIN_CVTPD2QQ256,
+ IX86_BUILTIN_CVTPD2QQ128,
+ IX86_BUILTIN_CVTPD2UQQ256,
+ IX86_BUILTIN_CVTPD2UQQ128,
+ IX86_BUILTIN_CVTPD2UDQ256_MASK,
+ IX86_BUILTIN_CVTPD2UDQ128_MASK,
+ IX86_BUILTIN_CVTTPS2QQ256,
+ IX86_BUILTIN_CVTTPS2QQ128,
+ IX86_BUILTIN_CVTTPS2UQQ256,
+ IX86_BUILTIN_CVTTPS2UQQ128,
+ IX86_BUILTIN_CVTTPS2DQ256_MASK,
+ IX86_BUILTIN_CVTTPS2DQ128_MASK,
+ IX86_BUILTIN_CVTTPS2UDQ256,
+ IX86_BUILTIN_CVTTPS2UDQ128,
+ IX86_BUILTIN_CVTTPD2DQ256_MASK,
+ IX86_BUILTIN_CVTTPD2DQ128_MASK,
+ IX86_BUILTIN_CVTTPD2UDQ256_MASK,
+ IX86_BUILTIN_CVTTPD2UDQ128_MASK,
+ IX86_BUILTIN_CVTPD2DQ256_MASK,
+ IX86_BUILTIN_CVTPD2DQ128_MASK,
+ IX86_BUILTIN_CVTDQ2PD256_MASK,
+ IX86_BUILTIN_CVTDQ2PD128_MASK,
+ IX86_BUILTIN_CVTUDQ2PD256_MASK,
+ IX86_BUILTIN_CVTUDQ2PD128_MASK,
+ IX86_BUILTIN_CVTDQ2PS256_MASK,
+ IX86_BUILTIN_CVTDQ2PS128_MASK,
+ IX86_BUILTIN_CVTUDQ2PS256_MASK,
+ IX86_BUILTIN_CVTUDQ2PS128_MASK,
+ IX86_BUILTIN_CVTPS2PD256_MASK,
+ IX86_BUILTIN_CVTPS2PD128_MASK,
+ IX86_BUILTIN_PBROADCASTB256_MASK,
+ IX86_BUILTIN_PBROADCASTB256_GPR_MASK,
+ IX86_BUILTIN_PBROADCASTB128_MASK,
+ IX86_BUILTIN_PBROADCASTB128_GPR_MASK,
+ IX86_BUILTIN_PBROADCASTW256_MASK,
+ IX86_BUILTIN_PBROADCASTW256_GPR_MASK,
+ IX86_BUILTIN_PBROADCASTW128_MASK,
+ IX86_BUILTIN_PBROADCASTW128_GPR_MASK,
+ IX86_BUILTIN_PBROADCASTD256_MASK,
+ IX86_BUILTIN_PBROADCASTD256_GPR_MASK,
+ IX86_BUILTIN_PBROADCASTD128_MASK,
+ IX86_BUILTIN_PBROADCASTD128_GPR_MASK,
+ IX86_BUILTIN_PBROADCASTQ256_MASK,
+ IX86_BUILTIN_PBROADCASTQ256_GPR_MASK,
+ IX86_BUILTIN_PBROADCASTQ256_MEM_MASK,
+ IX86_BUILTIN_PBROADCASTQ128_MASK,
+ IX86_BUILTIN_PBROADCASTQ128_GPR_MASK,
+ IX86_BUILTIN_PBROADCASTQ128_MEM_MASK,
+ IX86_BUILTIN_BROADCASTSS256,
+ IX86_BUILTIN_BROADCASTSS128,
+ IX86_BUILTIN_BROADCASTSD256,
+ IX86_BUILTIN_EXTRACTF64X2_256,
+ IX86_BUILTIN_EXTRACTI64X2_256,
+ IX86_BUILTIN_INSERTF32X4_256,
+ IX86_BUILTIN_INSERTI32X4_256,
+ IX86_BUILTIN_PMOVSXBW256_MASK,
+ IX86_BUILTIN_PMOVSXBW128_MASK,
+ IX86_BUILTIN_PMOVSXBD256_MASK,
+ IX86_BUILTIN_PMOVSXBD128_MASK,
+ IX86_BUILTIN_PMOVSXBQ256_MASK,
+ IX86_BUILTIN_PMOVSXBQ128_MASK,
+ IX86_BUILTIN_PMOVSXWD256_MASK,
+ IX86_BUILTIN_PMOVSXWD128_MASK,
+ IX86_BUILTIN_PMOVSXWQ256_MASK,
+ IX86_BUILTIN_PMOVSXWQ128_MASK,
+ IX86_BUILTIN_PMOVSXDQ256_MASK,
+ IX86_BUILTIN_PMOVSXDQ128_MASK,
+ IX86_BUILTIN_PMOVZXBW256_MASK,
+ IX86_BUILTIN_PMOVZXBW128_MASK,
+ IX86_BUILTIN_PMOVZXBD256_MASK,
+ IX86_BUILTIN_PMOVZXBD128_MASK,
+ IX86_BUILTIN_PMOVZXBQ256_MASK,
+ IX86_BUILTIN_PMOVZXBQ128_MASK,
+ IX86_BUILTIN_PMOVZXWD256_MASK,
+ IX86_BUILTIN_PMOVZXWD128_MASK,
+ IX86_BUILTIN_PMOVZXWQ256_MASK,
+ IX86_BUILTIN_PMOVZXWQ128_MASK,
+ IX86_BUILTIN_PMOVZXDQ256_MASK,
+ IX86_BUILTIN_PMOVZXDQ128_MASK,
+ IX86_BUILTIN_REDUCEPD256_MASK,
+ IX86_BUILTIN_REDUCEPD128_MASK,
+ IX86_BUILTIN_REDUCEPS256_MASK,
+ IX86_BUILTIN_REDUCEPS128_MASK,
+ IX86_BUILTIN_REDUCESD_MASK,
+ IX86_BUILTIN_REDUCESS_MASK,
+ IX86_BUILTIN_VPERMVARHI256_MASK,
+ IX86_BUILTIN_VPERMVARHI128_MASK,
+ IX86_BUILTIN_VPERMT2VARHI256,
+ IX86_BUILTIN_VPERMT2VARHI256_MASKZ,
+ IX86_BUILTIN_VPERMT2VARHI128,
+ IX86_BUILTIN_VPERMT2VARHI128_MASKZ,
+ IX86_BUILTIN_VPERMI2VARHI256,
+ IX86_BUILTIN_VPERMI2VARHI128,
+ IX86_BUILTIN_RCP14PD256,
+ IX86_BUILTIN_RCP14PD128,
+ IX86_BUILTIN_RCP14PS256,
+ IX86_BUILTIN_RCP14PS128,
+ IX86_BUILTIN_RSQRT14PD256_MASK,
+ IX86_BUILTIN_RSQRT14PD128_MASK,
+ IX86_BUILTIN_RSQRT14PS256_MASK,
+ IX86_BUILTIN_RSQRT14PS128_MASK,
+ IX86_BUILTIN_SQRTPD256_MASK,
+ IX86_BUILTIN_SQRTPD128_MASK,
+ IX86_BUILTIN_SQRTPS256_MASK,
+ IX86_BUILTIN_SQRTPS128_MASK,
+ IX86_BUILTIN_PADDB128_MASK,
+ IX86_BUILTIN_PADDW128_MASK,
+ IX86_BUILTIN_PADDD128_MASK,
+ IX86_BUILTIN_PADDQ128_MASK,
+ IX86_BUILTIN_PSUBB128_MASK,
+ IX86_BUILTIN_PSUBW128_MASK,
+ IX86_BUILTIN_PSUBD128_MASK,
+ IX86_BUILTIN_PSUBQ128_MASK,
+ IX86_BUILTIN_PADDSB128_MASK,
+ IX86_BUILTIN_PADDSW128_MASK,
+ IX86_BUILTIN_PSUBSB128_MASK,
+ IX86_BUILTIN_PSUBSW128_MASK,
+ IX86_BUILTIN_PADDUSB128_MASK,
+ IX86_BUILTIN_PADDUSW128_MASK,
+ IX86_BUILTIN_PSUBUSB128_MASK,
+ IX86_BUILTIN_PSUBUSW128_MASK,
+ IX86_BUILTIN_PADDB256_MASK,
+ IX86_BUILTIN_PADDW256_MASK,
+ IX86_BUILTIN_PADDD256_MASK,
+ IX86_BUILTIN_PADDQ256_MASK,
+ IX86_BUILTIN_PADDSB256_MASK,
+ IX86_BUILTIN_PADDSW256_MASK,
+ IX86_BUILTIN_PADDUSB256_MASK,
+ IX86_BUILTIN_PADDUSW256_MASK,
+ IX86_BUILTIN_PSUBB256_MASK,
+ IX86_BUILTIN_PSUBW256_MASK,
+ IX86_BUILTIN_PSUBD256_MASK,
+ IX86_BUILTIN_PSUBQ256_MASK,
+ IX86_BUILTIN_PSUBSB256_MASK,
+ IX86_BUILTIN_PSUBSW256_MASK,
+ IX86_BUILTIN_PSUBUSB256_MASK,
+ IX86_BUILTIN_PSUBUSW256_MASK,
+ IX86_BUILTIN_SHUF_F64x2_256,
+ IX86_BUILTIN_SHUF_I64x2_256,
+ IX86_BUILTIN_SHUF_I32x4_256,
+ IX86_BUILTIN_SHUF_F32x4_256,
+ IX86_BUILTIN_PMOVWB128,
+ IX86_BUILTIN_PMOVWB256,
+ IX86_BUILTIN_PMOVSWB128,
+ IX86_BUILTIN_PMOVSWB256,
+ IX86_BUILTIN_PMOVUSWB128,
+ IX86_BUILTIN_PMOVUSWB256,
+ IX86_BUILTIN_PMOVDB128,
+ IX86_BUILTIN_PMOVDB256,
+ IX86_BUILTIN_PMOVSDB128,
+ IX86_BUILTIN_PMOVSDB256,
+ IX86_BUILTIN_PMOVUSDB128,
+ IX86_BUILTIN_PMOVUSDB256,
+ IX86_BUILTIN_PMOVDW128,
+ IX86_BUILTIN_PMOVDW256,
+ IX86_BUILTIN_PMOVSDW128,
+ IX86_BUILTIN_PMOVSDW256,
+ IX86_BUILTIN_PMOVUSDW128,
+ IX86_BUILTIN_PMOVUSDW256,
+ IX86_BUILTIN_PMOVQB128,
+ IX86_BUILTIN_PMOVQB256,
+ IX86_BUILTIN_PMOVSQB128,
+ IX86_BUILTIN_PMOVSQB256,
+ IX86_BUILTIN_PMOVUSQB128,
+ IX86_BUILTIN_PMOVUSQB256,
+ IX86_BUILTIN_PMOVQW128,
+ IX86_BUILTIN_PMOVQW256,
+ IX86_BUILTIN_PMOVSQW128,
+ IX86_BUILTIN_PMOVSQW256,
+ IX86_BUILTIN_PMOVUSQW128,
+ IX86_BUILTIN_PMOVUSQW256,
+ IX86_BUILTIN_PMOVQD128,
+ IX86_BUILTIN_PMOVQD256,
+ IX86_BUILTIN_PMOVSQD128,
+ IX86_BUILTIN_PMOVSQD256,
+ IX86_BUILTIN_PMOVUSQD128,
+ IX86_BUILTIN_PMOVUSQD256,
+ IX86_BUILTIN_RANGEPD256,
+ IX86_BUILTIN_RANGEPD128,
+ IX86_BUILTIN_RANGEPS256,
+ IX86_BUILTIN_RANGEPS128,
+ IX86_BUILTIN_GETEXPPS256,
+ IX86_BUILTIN_GETEXPPD256,
+ IX86_BUILTIN_GETEXPPS128,
+ IX86_BUILTIN_GETEXPPD128,
+ IX86_BUILTIN_FIXUPIMMPD256,
+ IX86_BUILTIN_FIXUPIMMPD256_MASK,
+ IX86_BUILTIN_FIXUPIMMPD256_MASKZ,
+ IX86_BUILTIN_FIXUPIMMPS256,
+ IX86_BUILTIN_FIXUPIMMPS256_MASK,
+ IX86_BUILTIN_FIXUPIMMPS256_MASKZ,
+ IX86_BUILTIN_FIXUPIMMPD128,
+ IX86_BUILTIN_FIXUPIMMPD128_MASK,
+ IX86_BUILTIN_FIXUPIMMPD128_MASKZ,
+ IX86_BUILTIN_FIXUPIMMPS128,
+ IX86_BUILTIN_FIXUPIMMPS128_MASK,
+ IX86_BUILTIN_FIXUPIMMPS128_MASKZ,
+ IX86_BUILTIN_PABSQ256,
+ IX86_BUILTIN_PABSQ128,
+ IX86_BUILTIN_PABSD256_MASK,
+ IX86_BUILTIN_PABSD128_MASK,
+ IX86_BUILTIN_PMULHRSW256_MASK,
+ IX86_BUILTIN_PMULHRSW128_MASK,
+ IX86_BUILTIN_PMULHUW128_MASK,
+ IX86_BUILTIN_PMULHUW256_MASK,
+ IX86_BUILTIN_PMULHW256_MASK,
+ IX86_BUILTIN_PMULHW128_MASK,
+ IX86_BUILTIN_PMULLW256_MASK,
+ IX86_BUILTIN_PMULLW128_MASK,
+ IX86_BUILTIN_PMULLQ256,
+ IX86_BUILTIN_PMULLQ128,
+ IX86_BUILTIN_ANDPD256_MASK,
+ IX86_BUILTIN_ANDPD128_MASK,
+ IX86_BUILTIN_ANDPS256_MASK,
+ IX86_BUILTIN_ANDPS128_MASK,
+ IX86_BUILTIN_ANDNPD256_MASK,
+ IX86_BUILTIN_ANDNPD128_MASK,
+ IX86_BUILTIN_ANDNPS256_MASK,
+ IX86_BUILTIN_ANDNPS128_MASK,
+ IX86_BUILTIN_PSLLWI128_MASK,
+ IX86_BUILTIN_PSLLDI128_MASK,
+ IX86_BUILTIN_PSLLQI128_MASK,
+ IX86_BUILTIN_PSLLW128_MASK,
+ IX86_BUILTIN_PSLLD128_MASK,
+ IX86_BUILTIN_PSLLQ128_MASK,
+ IX86_BUILTIN_PSLLWI256_MASK ,
+ IX86_BUILTIN_PSLLW256_MASK,
+ IX86_BUILTIN_PSLLDI256_MASK,
+ IX86_BUILTIN_PSLLD256_MASK,
+ IX86_BUILTIN_PSLLQI256_MASK,
+ IX86_BUILTIN_PSLLQ256_MASK,
+ IX86_BUILTIN_PSRADI128_MASK,
+ IX86_BUILTIN_PSRAD128_MASK,
+ IX86_BUILTIN_PSRADI256_MASK,
+ IX86_BUILTIN_PSRAD256_MASK,
+ IX86_BUILTIN_PSRAQI128_MASK,
+ IX86_BUILTIN_PSRAQ128_MASK,
+ IX86_BUILTIN_PSRAQI256_MASK,
+ IX86_BUILTIN_PSRAQ256_MASK,
+ IX86_BUILTIN_PANDD256,
+ IX86_BUILTIN_PANDD128,
+ IX86_BUILTIN_PSRLDI128_MASK,
+ IX86_BUILTIN_PSRLD128_MASK,
+ IX86_BUILTIN_PSRLDI256_MASK,
+ IX86_BUILTIN_PSRLD256_MASK,
+ IX86_BUILTIN_PSRLQI128_MASK,
+ IX86_BUILTIN_PSRLQ128_MASK,
+ IX86_BUILTIN_PSRLQI256_MASK,
+ IX86_BUILTIN_PSRLQ256_MASK,
+ IX86_BUILTIN_PANDQ256,
+ IX86_BUILTIN_PANDQ128,
+ IX86_BUILTIN_PANDND256,
+ IX86_BUILTIN_PANDND128,
+ IX86_BUILTIN_PANDNQ256,
+ IX86_BUILTIN_PANDNQ128,
+ IX86_BUILTIN_PORD256,
+ IX86_BUILTIN_PORD128,
+ IX86_BUILTIN_PORQ256,
+ IX86_BUILTIN_PORQ128,
+ IX86_BUILTIN_PXORD256,
+ IX86_BUILTIN_PXORD128,
+ IX86_BUILTIN_PXORQ256,
+ IX86_BUILTIN_PXORQ128,
+ IX86_BUILTIN_PACKSSWB256_MASK,
+ IX86_BUILTIN_PACKSSWB128_MASK,
+ IX86_BUILTIN_PACKUSWB256_MASK,
+ IX86_BUILTIN_PACKUSWB128_MASK,
+ IX86_BUILTIN_RNDSCALEPS256,
+ IX86_BUILTIN_RNDSCALEPD256,
+ IX86_BUILTIN_RNDSCALEPS128,
+ IX86_BUILTIN_RNDSCALEPD128,
+ IX86_BUILTIN_VTERNLOGQ256_MASK,
+ IX86_BUILTIN_VTERNLOGQ256_MASKZ,
+ IX86_BUILTIN_VTERNLOGD256_MASK,
+ IX86_BUILTIN_VTERNLOGD256_MASKZ,
+ IX86_BUILTIN_VTERNLOGQ128_MASK,
+ IX86_BUILTIN_VTERNLOGQ128_MASKZ,
+ IX86_BUILTIN_VTERNLOGD128_MASK,
+ IX86_BUILTIN_VTERNLOGD128_MASKZ,
+ IX86_BUILTIN_SCALEFPD256,
+ IX86_BUILTIN_SCALEFPS256,
+ IX86_BUILTIN_SCALEFPD128,
+ IX86_BUILTIN_SCALEFPS128,
+ IX86_BUILTIN_VFMADDPD256_MASK,
+ IX86_BUILTIN_VFMADDPD256_MASK3,
+ IX86_BUILTIN_VFMADDPD256_MASKZ,
+ IX86_BUILTIN_VFMADDPD128_MASK,
+ IX86_BUILTIN_VFMADDPD128_MASK3,
+ IX86_BUILTIN_VFMADDPD128_MASKZ,
+ IX86_BUILTIN_VFMADDPS256_MASK,
+ IX86_BUILTIN_VFMADDPS256_MASK3,
+ IX86_BUILTIN_VFMADDPS256_MASKZ,
+ IX86_BUILTIN_VFMADDPS128_MASK,
+ IX86_BUILTIN_VFMADDPS128_MASK3,
+ IX86_BUILTIN_VFMADDPS128_MASKZ,
+ IX86_BUILTIN_VFMSUBPD256_MASK3,
+ IX86_BUILTIN_VFMSUBPD128_MASK3,
+ IX86_BUILTIN_VFMSUBPS256_MASK3,
+ IX86_BUILTIN_VFMSUBPS128_MASK3,
+ IX86_BUILTIN_VFNMADDPD256_MASK,
+ IX86_BUILTIN_VFNMADDPD128_MASK,
+ IX86_BUILTIN_VFNMADDPS256_MASK,
+ IX86_BUILTIN_VFNMADDPS128_MASK,
+ IX86_BUILTIN_VFNMSUBPD256_MASK,
+ IX86_BUILTIN_VFNMSUBPD256_MASK3,
+ IX86_BUILTIN_VFNMSUBPD128_MASK,
+ IX86_BUILTIN_VFNMSUBPD128_MASK3,
+ IX86_BUILTIN_VFNMSUBPS256_MASK,
+ IX86_BUILTIN_VFNMSUBPS256_MASK3,
+ IX86_BUILTIN_VFNMSUBPS128_MASK,
+ IX86_BUILTIN_VFNMSUBPS128_MASK3,
+ IX86_BUILTIN_VFMADDSUBPD256_MASK,
+ IX86_BUILTIN_VFMADDSUBPD256_MASK3,
+ IX86_BUILTIN_VFMADDSUBPD256_MASKZ,
+ IX86_BUILTIN_VFMADDSUBPD128_MASK,
+ IX86_BUILTIN_VFMADDSUBPD128_MASK3,
+ IX86_BUILTIN_VFMADDSUBPD128_MASKZ,
+ IX86_BUILTIN_VFMADDSUBPS256_MASK,
+ IX86_BUILTIN_VFMADDSUBPS256_MASK3,
+ IX86_BUILTIN_VFMADDSUBPS256_MASKZ,
+ IX86_BUILTIN_VFMADDSUBPS128_MASK,
+ IX86_BUILTIN_VFMADDSUBPS128_MASK3,
+ IX86_BUILTIN_VFMADDSUBPS128_MASKZ,
+ IX86_BUILTIN_VFMSUBADDPD256_MASK3,
+ IX86_BUILTIN_VFMSUBADDPD128_MASK3,
+ IX86_BUILTIN_VFMSUBADDPS256_MASK3,
+ IX86_BUILTIN_VFMSUBADDPS128_MASK3,
+ IX86_BUILTIN_INSERTF64X2_256,
+ IX86_BUILTIN_INSERTI64X2_256,
+ IX86_BUILTIN_PSRAVV16HI,
+ IX86_BUILTIN_PSRAVV8HI,
+ IX86_BUILTIN_PMADDUBSW256_MASK,
+ IX86_BUILTIN_PMADDUBSW128_MASK,
+ IX86_BUILTIN_PMADDWD256_MASK,
+ IX86_BUILTIN_PMADDWD128_MASK,
+ IX86_BUILTIN_PSRLVV16HI,
+ IX86_BUILTIN_PSRLVV8HI,
+ IX86_BUILTIN_CVTPS2DQ256_MASK,
+ IX86_BUILTIN_CVTPS2DQ128_MASK,
+ IX86_BUILTIN_CVTPS2UDQ256,
+ IX86_BUILTIN_CVTPS2UDQ128,
+ IX86_BUILTIN_CVTPS2QQ256,
+ IX86_BUILTIN_CVTPS2QQ128,
+ IX86_BUILTIN_CVTPS2UQQ256,
+ IX86_BUILTIN_CVTPS2UQQ128,
+ IX86_BUILTIN_GETMANTPS256,
+ IX86_BUILTIN_GETMANTPS128,
+ IX86_BUILTIN_GETMANTPD256,
+ IX86_BUILTIN_GETMANTPD128,
+ IX86_BUILTIN_MOVDDUP256_MASK,
+ IX86_BUILTIN_MOVDDUP128_MASK,
+ IX86_BUILTIN_MOVSHDUP256_MASK,
+ IX86_BUILTIN_MOVSHDUP128_MASK,
+ IX86_BUILTIN_MOVSLDUP256_MASK,
+ IX86_BUILTIN_MOVSLDUP128_MASK,
+ IX86_BUILTIN_CVTQQ2PS256,
+ IX86_BUILTIN_CVTQQ2PS128,
+ IX86_BUILTIN_CVTUQQ2PS256,
+ IX86_BUILTIN_CVTUQQ2PS128,
+ IX86_BUILTIN_CVTQQ2PD256,
+ IX86_BUILTIN_CVTQQ2PD128,
+ IX86_BUILTIN_CVTUQQ2PD256,
+ IX86_BUILTIN_CVTUQQ2PD128,
+ IX86_BUILTIN_VPERMT2VARQ256,
+ IX86_BUILTIN_VPERMT2VARQ256_MASKZ,
+ IX86_BUILTIN_VPERMT2VARD256,
+ IX86_BUILTIN_VPERMT2VARD256_MASKZ,
+ IX86_BUILTIN_VPERMI2VARQ256,
+ IX86_BUILTIN_VPERMI2VARD256,
+ IX86_BUILTIN_VPERMT2VARPD256,
+ IX86_BUILTIN_VPERMT2VARPD256_MASKZ,
+ IX86_BUILTIN_VPERMT2VARPS256,
+ IX86_BUILTIN_VPERMT2VARPS256_MASKZ,
+ IX86_BUILTIN_VPERMI2VARPD256,
+ IX86_BUILTIN_VPERMI2VARPS256,
+ IX86_BUILTIN_VPERMT2VARQ128,
+ IX86_BUILTIN_VPERMT2VARQ128_MASKZ,
+ IX86_BUILTIN_VPERMT2VARD128,
+ IX86_BUILTIN_VPERMT2VARD128_MASKZ,
+ IX86_BUILTIN_VPERMI2VARQ128,
+ IX86_BUILTIN_VPERMI2VARD128,
+ IX86_BUILTIN_VPERMT2VARPD128,
+ IX86_BUILTIN_VPERMT2VARPD128_MASKZ,
+ IX86_BUILTIN_VPERMT2VARPS128,
+ IX86_BUILTIN_VPERMT2VARPS128_MASKZ,
+ IX86_BUILTIN_VPERMI2VARPD128,
+ IX86_BUILTIN_VPERMI2VARPS128,
+ IX86_BUILTIN_PSHUFB256_MASK,
+ IX86_BUILTIN_PSHUFB128_MASK,
+ IX86_BUILTIN_PSHUFHW256_MASK,
+ IX86_BUILTIN_PSHUFHW128_MASK,
+ IX86_BUILTIN_PSHUFLW256_MASK,
+ IX86_BUILTIN_PSHUFLW128_MASK,
+ IX86_BUILTIN_PSHUFD256_MASK,
+ IX86_BUILTIN_PSHUFD128_MASK,
+ IX86_BUILTIN_SHUFPD256_MASK,
+ IX86_BUILTIN_SHUFPD128_MASK,
+ IX86_BUILTIN_SHUFPS256_MASK,
+ IX86_BUILTIN_SHUFPS128_MASK,
+ IX86_BUILTIN_PROLVQ256,
+ IX86_BUILTIN_PROLVQ128,
+ IX86_BUILTIN_PROLQ256,
+ IX86_BUILTIN_PROLQ128,
+ IX86_BUILTIN_PRORVQ256,
+ IX86_BUILTIN_PRORVQ128,
+ IX86_BUILTIN_PRORQ256,
+ IX86_BUILTIN_PRORQ128,
+ IX86_BUILTIN_PSRAVQ128,
+ IX86_BUILTIN_PSRAVQ256,
+ IX86_BUILTIN_PSLLVV4DI_MASK,
+ IX86_BUILTIN_PSLLVV2DI_MASK,
+ IX86_BUILTIN_PSLLVV8SI_MASK,
+ IX86_BUILTIN_PSLLVV4SI_MASK,
+ IX86_BUILTIN_PSRAVV8SI_MASK,
+ IX86_BUILTIN_PSRAVV4SI_MASK,
+ IX86_BUILTIN_PSRLVV4DI_MASK,
+ IX86_BUILTIN_PSRLVV2DI_MASK,
+ IX86_BUILTIN_PSRLVV8SI_MASK,
+ IX86_BUILTIN_PSRLVV4SI_MASK,
+ IX86_BUILTIN_PSRAWI256_MASK,
+ IX86_BUILTIN_PSRAW256_MASK,
+ IX86_BUILTIN_PSRAWI128_MASK,
+ IX86_BUILTIN_PSRAW128_MASK,
+ IX86_BUILTIN_PSRLWI256_MASK,
+ IX86_BUILTIN_PSRLW256_MASK,
+ IX86_BUILTIN_PSRLWI128_MASK,
+ IX86_BUILTIN_PSRLW128_MASK,
+ IX86_BUILTIN_PRORVD256,
+ IX86_BUILTIN_PROLVD256,
+ IX86_BUILTIN_PRORD256,
+ IX86_BUILTIN_PROLD256,
+ IX86_BUILTIN_PRORVD128,
+ IX86_BUILTIN_PROLVD128,
+ IX86_BUILTIN_PRORD128,
+ IX86_BUILTIN_PROLD128,
+ IX86_BUILTIN_FPCLASSPD256,
+ IX86_BUILTIN_FPCLASSPD128,
+ IX86_BUILTIN_FPCLASSSD,
+ IX86_BUILTIN_FPCLASSPS256,
+ IX86_BUILTIN_FPCLASSPS128,
+ IX86_BUILTIN_FPCLASSSS,
+ IX86_BUILTIN_CVTB2MASK128,
+ IX86_BUILTIN_CVTB2MASK256,
+ IX86_BUILTIN_CVTW2MASK128,
+ IX86_BUILTIN_CVTW2MASK256,
+ IX86_BUILTIN_CVTD2MASK128,
+ IX86_BUILTIN_CVTD2MASK256,
+ IX86_BUILTIN_CVTQ2MASK128,
+ IX86_BUILTIN_CVTQ2MASK256,
+ IX86_BUILTIN_CVTMASK2B128,
+ IX86_BUILTIN_CVTMASK2B256,
+ IX86_BUILTIN_CVTMASK2W128,
+ IX86_BUILTIN_CVTMASK2W256,
+ IX86_BUILTIN_CVTMASK2D128,
+ IX86_BUILTIN_CVTMASK2D256,
+ IX86_BUILTIN_CVTMASK2Q128,
+ IX86_BUILTIN_CVTMASK2Q256,
+ IX86_BUILTIN_PCMPEQB128_MASK,
+ IX86_BUILTIN_PCMPEQB256_MASK,
+ IX86_BUILTIN_PCMPEQW128_MASK,
+ IX86_BUILTIN_PCMPEQW256_MASK,
+ IX86_BUILTIN_PCMPEQD128_MASK,
+ IX86_BUILTIN_PCMPEQD256_MASK,
+ IX86_BUILTIN_PCMPEQQ128_MASK,
+ IX86_BUILTIN_PCMPEQQ256_MASK,
+ IX86_BUILTIN_PCMPGTB128_MASK,
+ IX86_BUILTIN_PCMPGTB256_MASK,
+ IX86_BUILTIN_PCMPGTW128_MASK,
+ IX86_BUILTIN_PCMPGTW256_MASK,
+ IX86_BUILTIN_PCMPGTD128_MASK,
+ IX86_BUILTIN_PCMPGTD256_MASK,
+ IX86_BUILTIN_PCMPGTQ128_MASK,
+ IX86_BUILTIN_PCMPGTQ256_MASK,
+ IX86_BUILTIN_PTESTMB128,
+ IX86_BUILTIN_PTESTMB256,
+ IX86_BUILTIN_PTESTMW128,
+ IX86_BUILTIN_PTESTMW256,
+ IX86_BUILTIN_PTESTMD128,
+ IX86_BUILTIN_PTESTMD256,
+ IX86_BUILTIN_PTESTMQ128,
+ IX86_BUILTIN_PTESTMQ256,
+ IX86_BUILTIN_PTESTNMB128,
+ IX86_BUILTIN_PTESTNMB256,
+ IX86_BUILTIN_PTESTNMW128,
+ IX86_BUILTIN_PTESTNMW256,
+ IX86_BUILTIN_PTESTNMD128,
+ IX86_BUILTIN_PTESTNMD256,
+ IX86_BUILTIN_PTESTNMQ128,
+ IX86_BUILTIN_PTESTNMQ256,
+ IX86_BUILTIN_PBROADCASTMB128,
+ IX86_BUILTIN_PBROADCASTMB256,
+ IX86_BUILTIN_PBROADCASTMW128,
+ IX86_BUILTIN_PBROADCASTMW256,
+ IX86_BUILTIN_COMPRESSPD256,
+ IX86_BUILTIN_COMPRESSPD128,
+ IX86_BUILTIN_COMPRESSPS256,
+ IX86_BUILTIN_COMPRESSPS128,
+ IX86_BUILTIN_PCOMPRESSQ256,
+ IX86_BUILTIN_PCOMPRESSQ128,
+ IX86_BUILTIN_PCOMPRESSD256,
+ IX86_BUILTIN_PCOMPRESSD128,
+ IX86_BUILTIN_EXPANDPD256,
+ IX86_BUILTIN_EXPANDPD128,
+ IX86_BUILTIN_EXPANDPS256,
+ IX86_BUILTIN_EXPANDPS128,
+ IX86_BUILTIN_PEXPANDQ256,
+ IX86_BUILTIN_PEXPANDQ128,
+ IX86_BUILTIN_PEXPANDD256,
+ IX86_BUILTIN_PEXPANDD128,
+ IX86_BUILTIN_EXPANDPD256Z,
+ IX86_BUILTIN_EXPANDPD128Z,
+ IX86_BUILTIN_EXPANDPS256Z,
+ IX86_BUILTIN_EXPANDPS128Z,
+ IX86_BUILTIN_PEXPANDQ256Z,
+ IX86_BUILTIN_PEXPANDQ128Z,
+ IX86_BUILTIN_PEXPANDD256Z,
+ IX86_BUILTIN_PEXPANDD128Z,
+ IX86_BUILTIN_PMAXSD256_MASK,
+ IX86_BUILTIN_PMINSD256_MASK,
+ IX86_BUILTIN_PMAXUD256_MASK,
+ IX86_BUILTIN_PMINUD256_MASK,
+ IX86_BUILTIN_PMAXSD128_MASK,
+ IX86_BUILTIN_PMINSD128_MASK,
+ IX86_BUILTIN_PMAXUD128_MASK,
+ IX86_BUILTIN_PMINUD128_MASK,
+ IX86_BUILTIN_PMAXSQ256_MASK,
+ IX86_BUILTIN_PMINSQ256_MASK,
+ IX86_BUILTIN_PMAXUQ256_MASK,
+ IX86_BUILTIN_PMINUQ256_MASK,
+ IX86_BUILTIN_PMAXSQ128_MASK,
+ IX86_BUILTIN_PMINSQ128_MASK,
+ IX86_BUILTIN_PMAXUQ128_MASK,
+ IX86_BUILTIN_PMINUQ128_MASK,
+ IX86_BUILTIN_PMINSB256_MASK,
+ IX86_BUILTIN_PMINUB256_MASK,
+ IX86_BUILTIN_PMAXSB256_MASK,
+ IX86_BUILTIN_PMAXUB256_MASK,
+ IX86_BUILTIN_PMINSB128_MASK,
+ IX86_BUILTIN_PMINUB128_MASK,
+ IX86_BUILTIN_PMAXSB128_MASK,
+ IX86_BUILTIN_PMAXUB128_MASK,
+ IX86_BUILTIN_PMINSW256_MASK,
+ IX86_BUILTIN_PMINUW256_MASK,
+ IX86_BUILTIN_PMAXSW256_MASK,
+ IX86_BUILTIN_PMAXUW256_MASK,
+ IX86_BUILTIN_PMINSW128_MASK,
+ IX86_BUILTIN_PMINUW128_MASK,
+ IX86_BUILTIN_PMAXSW128_MASK,
+ IX86_BUILTIN_PMAXUW128_MASK,
+ IX86_BUILTIN_VPCONFLICTQ256,
+ IX86_BUILTIN_VPCONFLICTD256,
+ IX86_BUILTIN_VPCLZCNTQ256,
+ IX86_BUILTIN_VPCLZCNTD256,
+ IX86_BUILTIN_UNPCKHPD256_MASK,
+ IX86_BUILTIN_UNPCKHPD128_MASK,
+ IX86_BUILTIN_UNPCKHPS256_MASK,
+ IX86_BUILTIN_UNPCKHPS128_MASK,
+ IX86_BUILTIN_UNPCKLPD256_MASK,
+ IX86_BUILTIN_UNPCKLPD128_MASK,
+ IX86_BUILTIN_UNPCKLPS256_MASK,
+ IX86_BUILTIN_VPCONFLICTQ128,
+ IX86_BUILTIN_VPCONFLICTD128,
+ IX86_BUILTIN_VPCLZCNTQ128,
+ IX86_BUILTIN_VPCLZCNTD128,
+ IX86_BUILTIN_UNPCKLPS128_MASK,
+ IX86_BUILTIN_ALIGND256,
+ IX86_BUILTIN_ALIGNQ256,
+ IX86_BUILTIN_ALIGND128,
+ IX86_BUILTIN_ALIGNQ128,
+ IX86_BUILTIN_CVTPS2PH256_MASK,
+ IX86_BUILTIN_CVTPS2PH_MASK,
+ IX86_BUILTIN_CVTPH2PS_MASK,
+ IX86_BUILTIN_CVTPH2PS256_MASK,
+ IX86_BUILTIN_PUNPCKHDQ128_MASK,
+ IX86_BUILTIN_PUNPCKHDQ256_MASK,
+ IX86_BUILTIN_PUNPCKHQDQ128_MASK,
+ IX86_BUILTIN_PUNPCKHQDQ256_MASK,
+ IX86_BUILTIN_PUNPCKLDQ128_MASK,
+ IX86_BUILTIN_PUNPCKLDQ256_MASK,
+ IX86_BUILTIN_PUNPCKLQDQ128_MASK,
+ IX86_BUILTIN_PUNPCKLQDQ256_MASK,
+ IX86_BUILTIN_PUNPCKHBW128_MASK,
+ IX86_BUILTIN_PUNPCKHBW256_MASK,
+ IX86_BUILTIN_PUNPCKHWD128_MASK,
+ IX86_BUILTIN_PUNPCKHWD256_MASK,
+ IX86_BUILTIN_PUNPCKLBW128_MASK,
+ IX86_BUILTIN_PUNPCKLBW256_MASK,
+ IX86_BUILTIN_PUNPCKLWD128_MASK,
+ IX86_BUILTIN_PUNPCKLWD256_MASK,
+ IX86_BUILTIN_PSLLVV16HI,
+ IX86_BUILTIN_PSLLVV8HI,
+ IX86_BUILTIN_PACKSSDW256_MASK,
+ IX86_BUILTIN_PACKSSDW128_MASK,
+ IX86_BUILTIN_PACKUSDW256_MASK,
+ IX86_BUILTIN_PACKUSDW128_MASK,
+ IX86_BUILTIN_PAVGB256_MASK,
+ IX86_BUILTIN_PAVGW256_MASK,
+ IX86_BUILTIN_PAVGB128_MASK,
+ IX86_BUILTIN_PAVGW128_MASK,
+ IX86_BUILTIN_VPERMVARSF256_MASK,
+ IX86_BUILTIN_VPERMVARDF256_MASK,
+ IX86_BUILTIN_VPERMDF256_MASK,
+ IX86_BUILTIN_PABSB256_MASK,
+ IX86_BUILTIN_PABSB128_MASK,
+ IX86_BUILTIN_PABSW256_MASK,
+ IX86_BUILTIN_PABSW128_MASK,
+ IX86_BUILTIN_VPERMILVARPD_MASK,
+ IX86_BUILTIN_VPERMILVARPS_MASK,
+ IX86_BUILTIN_VPERMILVARPD256_MASK,
+ IX86_BUILTIN_VPERMILVARPS256_MASK,
+ IX86_BUILTIN_VPERMILPD_MASK,
+ IX86_BUILTIN_VPERMILPS_MASK,
+ IX86_BUILTIN_VPERMILPD256_MASK,
+ IX86_BUILTIN_VPERMILPS256_MASK,
+ IX86_BUILTIN_BLENDMQ256,
+ IX86_BUILTIN_BLENDMD256,
+ IX86_BUILTIN_BLENDMPD256,
+ IX86_BUILTIN_BLENDMPS256,
+ IX86_BUILTIN_BLENDMQ128,
+ IX86_BUILTIN_BLENDMD128,
+ IX86_BUILTIN_BLENDMPD128,
+ IX86_BUILTIN_BLENDMPS128,
+ IX86_BUILTIN_BLENDMW256,
+ IX86_BUILTIN_BLENDMB256,
+ IX86_BUILTIN_BLENDMW128,
+ IX86_BUILTIN_BLENDMB128,
+ IX86_BUILTIN_PMULLD256_MASK,
+ IX86_BUILTIN_PMULLD128_MASK,
+ IX86_BUILTIN_PMULUDQ256_MASK,
+ IX86_BUILTIN_PMULDQ256_MASK,
+ IX86_BUILTIN_PMULDQ128_MASK,
+ IX86_BUILTIN_PMULUDQ128_MASK,
+ IX86_BUILTIN_CVTPD2PS256_MASK,
+ IX86_BUILTIN_CVTPD2PS_MASK,
+ IX86_BUILTIN_VPERMVARSI256_MASK,
+ IX86_BUILTIN_VPERMVARDI256_MASK,
+ IX86_BUILTIN_VPERMDI256_MASK,
+ IX86_BUILTIN_CMPQ256,
+ IX86_BUILTIN_CMPD256,
+ IX86_BUILTIN_UCMPQ256,
+ IX86_BUILTIN_UCMPD256,
+ IX86_BUILTIN_CMPB256,
+ IX86_BUILTIN_CMPW256,
+ IX86_BUILTIN_UCMPB256,
+ IX86_BUILTIN_UCMPW256,
+ IX86_BUILTIN_CMPPD256_MASK,
+ IX86_BUILTIN_CMPPS256_MASK,
+ IX86_BUILTIN_CMPQ128,
+ IX86_BUILTIN_CMPD128,
+ IX86_BUILTIN_UCMPQ128,
+ IX86_BUILTIN_UCMPD128,
+ IX86_BUILTIN_CMPB128,
+ IX86_BUILTIN_CMPW128,
+ IX86_BUILTIN_UCMPB128,
+ IX86_BUILTIN_UCMPW128,
+ IX86_BUILTIN_CMPPD128_MASK,
+ IX86_BUILTIN_CMPPS128_MASK,
+
+ IX86_BUILTIN_GATHER3SIV8SF,
+ IX86_BUILTIN_GATHER3SIV4SF,
+ IX86_BUILTIN_GATHER3SIV4DF,
+ IX86_BUILTIN_GATHER3SIV2DF,
+ IX86_BUILTIN_GATHER3DIV8SF,
+ IX86_BUILTIN_GATHER3DIV4SF,
+ IX86_BUILTIN_GATHER3DIV4DF,
+ IX86_BUILTIN_GATHER3DIV2DF,
+ IX86_BUILTIN_GATHER3SIV8SI,
+ IX86_BUILTIN_GATHER3SIV4SI,
+ IX86_BUILTIN_GATHER3SIV4DI,
+ IX86_BUILTIN_GATHER3SIV2DI,
+ IX86_BUILTIN_GATHER3DIV8SI,
+ IX86_BUILTIN_GATHER3DIV4SI,
+ IX86_BUILTIN_GATHER3DIV4DI,
+ IX86_BUILTIN_GATHER3DIV2DI,
+ IX86_BUILTIN_SCATTERSIV8SF,
+ IX86_BUILTIN_SCATTERSIV4SF,
+ IX86_BUILTIN_SCATTERSIV4DF,
+ IX86_BUILTIN_SCATTERSIV2DF,
+ IX86_BUILTIN_SCATTERDIV8SF,
+ IX86_BUILTIN_SCATTERDIV4SF,
+ IX86_BUILTIN_SCATTERDIV4DF,
+ IX86_BUILTIN_SCATTERDIV2DF,
+ IX86_BUILTIN_SCATTERSIV8SI,
+ IX86_BUILTIN_SCATTERSIV4SI,
+ IX86_BUILTIN_SCATTERSIV4DI,
+ IX86_BUILTIN_SCATTERSIV2DI,
+ IX86_BUILTIN_SCATTERDIV8SI,
+ IX86_BUILTIN_SCATTERDIV4SI,
+ IX86_BUILTIN_SCATTERDIV4DI,
+ IX86_BUILTIN_SCATTERDIV2DI,
+
+ /* AVX512DQ. */
+ IX86_BUILTIN_RANGESD128,
+ IX86_BUILTIN_RANGESS128,
+ IX86_BUILTIN_KUNPCKWD,
+ IX86_BUILTIN_KUNPCKDQ,
+ IX86_BUILTIN_BROADCASTF32x2_512,
+ IX86_BUILTIN_BROADCASTI32x2_512,
+ IX86_BUILTIN_BROADCASTF64X2_512,
+ IX86_BUILTIN_BROADCASTI64X2_512,
+ IX86_BUILTIN_BROADCASTF32X8_512,
+ IX86_BUILTIN_BROADCASTI32X8_512,
+ IX86_BUILTIN_EXTRACTF64X2_512,
+ IX86_BUILTIN_EXTRACTF32X8,
+ IX86_BUILTIN_EXTRACTI64X2_512,
+ IX86_BUILTIN_EXTRACTI32X8,
+ IX86_BUILTIN_REDUCEPD512_MASK,
+ IX86_BUILTIN_REDUCEPS512_MASK,
+ IX86_BUILTIN_PMULLQ512,
+ IX86_BUILTIN_XORPD512,
+ IX86_BUILTIN_XORPS512,
+ IX86_BUILTIN_ORPD512,
+ IX86_BUILTIN_ORPS512,
+ IX86_BUILTIN_ANDPD512,
+ IX86_BUILTIN_ANDPS512,
+ IX86_BUILTIN_ANDNPD512,
+ IX86_BUILTIN_ANDNPS512,
+ IX86_BUILTIN_INSERTF32X8,
+ IX86_BUILTIN_INSERTI32X8,
+ IX86_BUILTIN_INSERTF64X2_512,
+ IX86_BUILTIN_INSERTI64X2_512,
+ IX86_BUILTIN_FPCLASSPD512,
+ IX86_BUILTIN_FPCLASSPS512,
+ IX86_BUILTIN_CVTD2MASK512,
+ IX86_BUILTIN_CVTQ2MASK512,
+ IX86_BUILTIN_CVTMASK2D512,
+ IX86_BUILTIN_CVTMASK2Q512,
+ IX86_BUILTIN_CVTPD2QQ512,
+ IX86_BUILTIN_CVTPS2QQ512,
+ IX86_BUILTIN_CVTPD2UQQ512,
+ IX86_BUILTIN_CVTPS2UQQ512,
+ IX86_BUILTIN_CVTQQ2PS512,
+ IX86_BUILTIN_CVTUQQ2PS512,
+ IX86_BUILTIN_CVTQQ2PD512,
+ IX86_BUILTIN_CVTUQQ2PD512,
+ IX86_BUILTIN_CVTTPS2QQ512,
+ IX86_BUILTIN_CVTTPS2UQQ512,
+ IX86_BUILTIN_CVTTPD2QQ512,
+ IX86_BUILTIN_CVTTPD2UQQ512,
+ IX86_BUILTIN_RANGEPS512,
+ IX86_BUILTIN_RANGEPD512,
+
+ /* AVX512BW. */
+ IX86_BUILTIN_PACKUSDW512,
+ IX86_BUILTIN_PACKSSDW512,
+ IX86_BUILTIN_LOADDQUHI512_MASK,
+ IX86_BUILTIN_LOADDQUQI512_MASK,
+ IX86_BUILTIN_PSLLDQ512,
+ IX86_BUILTIN_PSRLDQ512,
+ IX86_BUILTIN_STOREDQUHI512_MASK,
+ IX86_BUILTIN_STOREDQUQI512_MASK,
+ IX86_BUILTIN_PALIGNR512,
+ IX86_BUILTIN_PALIGNR512_MASK,
+ IX86_BUILTIN_MOVDQUHI512_MASK,
+ IX86_BUILTIN_MOVDQUQI512_MASK,
+ IX86_BUILTIN_PSADBW512,
+ IX86_BUILTIN_DBPSADBW512,
+ IX86_BUILTIN_PBROADCASTB512,
+ IX86_BUILTIN_PBROADCASTB512_GPR,
+ IX86_BUILTIN_PBROADCASTW512,
+ IX86_BUILTIN_PBROADCASTW512_GPR,
+ IX86_BUILTIN_PMOVSXBW512_MASK,
+ IX86_BUILTIN_PMOVZXBW512_MASK,
+ IX86_BUILTIN_VPERMVARHI512_MASK,
+ IX86_BUILTIN_VPERMT2VARHI512,
+ IX86_BUILTIN_VPERMT2VARHI512_MASKZ,
+ IX86_BUILTIN_VPERMI2VARHI512,
+ IX86_BUILTIN_PAVGB512,
+ IX86_BUILTIN_PAVGW512,
+ IX86_BUILTIN_PADDB512,
+ IX86_BUILTIN_PSUBB512,
+ IX86_BUILTIN_PSUBSB512,
+ IX86_BUILTIN_PADDSB512,
+ IX86_BUILTIN_PSUBUSB512,
+ IX86_BUILTIN_PADDUSB512,
+ IX86_BUILTIN_PSUBW512,
+ IX86_BUILTIN_PADDW512,
+ IX86_BUILTIN_PSUBSW512,
+ IX86_BUILTIN_PADDSW512,
+ IX86_BUILTIN_PSUBUSW512,
+ IX86_BUILTIN_PADDUSW512,
+ IX86_BUILTIN_PMAXUW512,
+ IX86_BUILTIN_PMAXSW512,
+ IX86_BUILTIN_PMINUW512,
+ IX86_BUILTIN_PMINSW512,
+ IX86_BUILTIN_PMAXUB512,
+ IX86_BUILTIN_PMAXSB512,
+ IX86_BUILTIN_PMINUB512,
+ IX86_BUILTIN_PMINSB512,
+ IX86_BUILTIN_PMOVWB512,
+ IX86_BUILTIN_PMOVSWB512,
+ IX86_BUILTIN_PMOVUSWB512,
+ IX86_BUILTIN_PMULHRSW512_MASK,
+ IX86_BUILTIN_PMULHUW512_MASK,
+ IX86_BUILTIN_PMULHW512_MASK,
+ IX86_BUILTIN_PMULLW512_MASK,
+ IX86_BUILTIN_PSLLWI512_MASK,
+ IX86_BUILTIN_PSLLW512_MASK,
+ IX86_BUILTIN_PACKSSWB512,
+ IX86_BUILTIN_PACKUSWB512,
+ IX86_BUILTIN_PSRAVV32HI,
+ IX86_BUILTIN_PMADDUBSW512_MASK,
+ IX86_BUILTIN_PMADDWD512_MASK,
+ IX86_BUILTIN_PSRLVV32HI,
+ IX86_BUILTIN_PUNPCKHBW512,
+ IX86_BUILTIN_PUNPCKHWD512,
+ IX86_BUILTIN_PUNPCKLBW512,
+ IX86_BUILTIN_PUNPCKLWD512,
+ IX86_BUILTIN_PSHUFB512,
+ IX86_BUILTIN_PSHUFHW512,
+ IX86_BUILTIN_PSHUFLW512,
+ IX86_BUILTIN_PSRAWI512,
+ IX86_BUILTIN_PSRAW512,
+ IX86_BUILTIN_PSRLWI512,
+ IX86_BUILTIN_PSRLW512,
+ IX86_BUILTIN_CVTB2MASK512,
+ IX86_BUILTIN_CVTW2MASK512,
+ IX86_BUILTIN_CVTMASK2B512,
+ IX86_BUILTIN_CVTMASK2W512,
+ IX86_BUILTIN_PCMPEQB512_MASK,
+ IX86_BUILTIN_PCMPEQW512_MASK,
+ IX86_BUILTIN_PCMPGTB512_MASK,
+ IX86_BUILTIN_PCMPGTW512_MASK,
+ IX86_BUILTIN_PTESTMB512,
+ IX86_BUILTIN_PTESTMW512,
+ IX86_BUILTIN_PTESTNMB512,
+ IX86_BUILTIN_PTESTNMW512,
+ IX86_BUILTIN_PSLLVV32HI,
+ IX86_BUILTIN_PABSB512,
+ IX86_BUILTIN_PABSW512,
+ IX86_BUILTIN_BLENDMW512,
+ IX86_BUILTIN_BLENDMB512,
+ IX86_BUILTIN_CMPB512,
+ IX86_BUILTIN_CMPW512,
+ IX86_BUILTIN_UCMPB512,
+ IX86_BUILTIN_UCMPW512,
+
/* Alternate 4 and 8 element gather/scatter for the vectorizer
where all operands are 32-byte or 64-byte wide respectively. */
IX86_BUILTIN_GATHERALTSIV4DF,
@@ -28490,6 +29632,10 @@ enum ix86_builtins
IX86_BUILTIN_GATHERALTDIV8SI,
IX86_BUILTIN_GATHER3ALTDIV16SF,
IX86_BUILTIN_GATHER3ALTDIV16SI,
+ IX86_BUILTIN_GATHER3ALTSIV4DF,
+ IX86_BUILTIN_GATHER3ALTDIV8SF,
+ IX86_BUILTIN_GATHER3ALTSIV4DI,
+ IX86_BUILTIN_GATHER3ALTDIV8SI,
IX86_BUILTIN_GATHER3ALTSIV8DF,
IX86_BUILTIN_GATHER3ALTSIV8DI,
IX86_BUILTIN_GATHER3DIV16SF,
@@ -29127,7 +30273,7 @@ static const struct builtin_description bdesc_special_args[] =
{ OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_movntv16sf, "__builtin_ia32_movntps512", IX86_BUILTIN_MOVNTPS512, UNKNOWN, (int) VOID_FTYPE_PFLOAT_V16SF },
{ OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_movntv8df, "__builtin_ia32_movntpd512", IX86_BUILTIN_MOVNTPD512, UNKNOWN, (int) VOID_FTYPE_PDOUBLE_V8DF },
{ OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_movntv8di, "__builtin_ia32_movntdq512", IX86_BUILTIN_MOVNTDQ512, UNKNOWN, (int) VOID_FTYPE_PV8DI_V8DI },
- { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_movntdqa, "__builtin_ia32_movntdqa512", IX86_BUILTIN_MOVNTDQA512, UNKNOWN, (int) V8DI_FTYPE_PV8DI },
+ { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512dq_movntdqa, "__builtin_ia32_movntdqa512", IX86_BUILTIN_MOVNTDQA512, UNKNOWN, (int) V8DI_FTYPE_PV8DI },
{ OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_storedquv16si_mask, "__builtin_ia32_storedqusi512_mask", IX86_BUILTIN_STOREDQUSI512, UNKNOWN, (int) VOID_FTYPE_PV16SI_V16SI_HI },
{ OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_storedquv8di_mask, "__builtin_ia32_storedqudi512_mask", IX86_BUILTIN_STOREDQUDI512, UNKNOWN, (int) VOID_FTYPE_PV8DI_V8DI_QI },
{ OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_storeupd512_mask, "__builtin_ia32_storeupd512_mask", IX86_BUILTIN_STOREUPD512, UNKNOWN, (int) VOID_FTYPE_PV8DF_V8DF_QI },
@@ -29173,6 +30319,108 @@ static const struct builtin_description bdesc_special_args[] =
{ OPTION_MASK_ISA_RTM, CODE_FOR_xbegin, "__builtin_ia32_xbegin", IX86_BUILTIN_XBEGIN, UNKNOWN, (int) UNSIGNED_FTYPE_VOID },
{ OPTION_MASK_ISA_RTM, CODE_FOR_xend, "__builtin_ia32_xend", IX86_BUILTIN_XEND, UNKNOWN, (int) VOID_FTYPE_VOID },
{ OPTION_MASK_ISA_RTM, CODE_FOR_xtest, "__builtin_ia32_xtest", IX86_BUILTIN_XTEST, UNKNOWN, (int) INT_FTYPE_VOID },
+
+ /* AVX512BW */
+ { OPTION_MASK_ISA_AVX512BW, CODE_FOR_avx512bw_loaddquv32hi_mask, "__builtin_ia32_loaddquhi512_mask", IX86_BUILTIN_LOADDQUHI512_MASK, UNKNOWN, (int) V32HI_FTYPE_PCV32HI_V32HI_SI },
+ { OPTION_MASK_ISA_AVX512BW, CODE_FOR_avx512f_loaddquv64qi_mask, "__builtin_ia32_loaddquqi512_mask", IX86_BUILTIN_LOADDQUQI512_MASK, UNKNOWN, (int) V64QI_FTYPE_PCV64QI_V64QI_DI },
+ { OPTION_MASK_ISA_AVX512BW, CODE_FOR_avx512bw_storedquv32hi_mask, "__builtin_ia32_storedquhi512_mask", IX86_BUILTIN_STOREDQUHI512_MASK, UNKNOWN, (int) VOID_FTYPE_PV32HI_V32HI_SI },
+ { OPTION_MASK_ISA_AVX512BW, CODE_FOR_avx512bw_storedquv64qi_mask, "__builtin_ia32_storedquqi512_mask", IX86_BUILTIN_STOREDQUQI512_MASK, UNKNOWN, (int) VOID_FTYPE_PV64QI_V64QI_DI },
+
+ /* AVX512VL */
+ { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_loaddquv16hi_mask, "__builtin_ia32_loaddquhi256_mask", IX86_BUILTIN_LOADDQUHI256_MASK, UNKNOWN, (int) V16HI_FTYPE_PCV16HI_V16HI_HI },
+ { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_loaddquv8hi_mask, "__builtin_ia32_loaddquhi128_mask", IX86_BUILTIN_LOADDQUHI128_MASK, UNKNOWN, (int) V8HI_FTYPE_PCV8HI_V8HI_QI },
+ { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx_loaddquv32qi_mask, "__builtin_ia32_loaddquqi256_mask", IX86_BUILTIN_LOADDQUQI256_MASK, UNKNOWN, (int) V32QI_FTYPE_PCV32QI_V32QI_SI },
+ { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_sse2_loaddquv16qi_mask, "__builtin_ia32_loaddquqi128_mask", IX86_BUILTIN_LOADDQUQI128_MASK, UNKNOWN, (int) V16QI_FTYPE_PCV16QI_V16QI_HI },
+ { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_loadv4di_mask, "__builtin_ia32_movdqa64load256_mask", IX86_BUILTIN_MOVDQA64LOAD256_MASK, UNKNOWN, (int) V4DI_FTYPE_PCV4DI_V4DI_QI },
+ { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_loadv2di_mask, "__builtin_ia32_movdqa64load128_mask", IX86_BUILTIN_MOVDQA64LOAD128_MASK, UNKNOWN, (int) V2DI_FTYPE_PCV2DI_V2DI_QI },
+ { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_loadv8si_mask, "__builtin_ia32_movdqa32load256_mask", IX86_BUILTIN_MOVDQA32LOAD256_MASK, UNKNOWN, (int) V8SI_FTYPE_PCV8SI_V8SI_QI },
+ { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_loadv4si_mask, "__builtin_ia32_movdqa32load128_mask", IX86_BUILTIN_MOVDQA32LOAD128_MASK, UNKNOWN, (int) V4SI_FTYPE_PCV4SI_V4SI_QI },
+ { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_storev4di_mask, "__builtin_ia32_movdqa64store256_mask", IX86_BUILTIN_MOVDQA64STORE256_MASK, UNKNOWN, (int) VOID_FTYPE_PV4DI_V4DI_QI },
+ { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_storev2di_mask, "__builtin_ia32_movdqa64store128_mask", IX86_BUILTIN_MOVDQA64STORE128_MASK, UNKNOWN, (int) VOID_FTYPE_PV2DI_V2DI_QI },
+ { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_storev8si_mask, "__builtin_ia32_movdqa32store256_mask", IX86_BUILTIN_MOVDQA32STORE256_MASK, UNKNOWN, (int) VOID_FTYPE_PV8SI_V8SI_QI },
+ { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_storev4si_mask, "__builtin_ia32_movdqa32store128_mask", IX86_BUILTIN_MOVDQA32STORE128_MASK, UNKNOWN, (int) VOID_FTYPE_PV4SI_V4SI_QI },
+ { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_loadv4df_mask, "__builtin_ia32_loadapd256_mask", IX86_BUILTIN_LOADAPD256_MASK, UNKNOWN, (int) V4DF_FTYPE_PCV4DF_V4DF_QI },
+ { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_loadv2df_mask, "__builtin_ia32_loadapd128_mask", IX86_BUILTIN_LOADAPD128_MASK, UNKNOWN, (int) V2DF_FTYPE_PCV2DF_V2DF_QI },
+ { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_loadv8sf_mask, "__builtin_ia32_loadaps256_mask", IX86_BUILTIN_LOADAPS256_MASK, UNKNOWN, (int) V8SF_FTYPE_PCV8SF_V8SF_QI },
+ { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_loadv4sf_mask, "__builtin_ia32_loadaps128_mask", IX86_BUILTIN_LOADAPS128_MASK, UNKNOWN, (int) V4SF_FTYPE_PCV4SF_V4SF_QI },
+ { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_storev4df_mask, "__builtin_ia32_storeapd256_mask", IX86_BUILTIN_STOREAPD256_MASK, UNKNOWN, (int) VOID_FTYPE_PV4DF_V4DF_QI },
+ { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_storev2df_mask, "__builtin_ia32_storeapd128_mask", IX86_BUILTIN_STOREAPD128_MASK, UNKNOWN, (int) VOID_FTYPE_PV2DF_V2DF_QI },
+ { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_storev8sf_mask, "__builtin_ia32_storeaps256_mask", IX86_BUILTIN_STOREAPS256_MASK, UNKNOWN, (int) VOID_FTYPE_PV8SF_V8SF_QI },
+ { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_storev4sf_mask, "__builtin_ia32_storeaps128_mask", IX86_BUILTIN_STOREAPS128_MASK, UNKNOWN, (int) VOID_FTYPE_PV4SF_V4SF_QI },
+ { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx_loadupd256_mask, "__builtin_ia32_loadupd256_mask", IX86_BUILTIN_LOADUPD256_MASK, UNKNOWN, (int) V4DF_FTYPE_PCV4DF_V4DF_QI },
+ { OPTION_MASK_ISA_AVX512VL, CODE_FOR_sse2_loadupd_mask, "__builtin_ia32_loadupd128_mask", IX86_BUILTIN_LOADUPD128_MASK, UNKNOWN, (int) V2DF_FTYPE_PCV2DF_V2DF_QI },
+ { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx_loadups256_mask, "__builtin_ia32_loadups256_mask", IX86_BUILTIN_LOADUPS256_MASK, UNKNOWN, (int) V8SF_FTYPE_PCV8SF_V8SF_QI },
+ { OPTION_MASK_ISA_AVX512VL, CODE_FOR_sse_loadups_mask, "__builtin_ia32_loadups128_mask", IX86_BUILTIN_LOADUPS128_MASK, UNKNOWN, (int) V4SF_FTYPE_PCV4SF_V4SF_QI },
+ { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_storeupd256_mask, "__builtin_ia32_storeupd256_mask", IX86_BUILTIN_STOREUPD256_MASK, UNKNOWN, (int) VOID_FTYPE_PV4DF_V4DF_QI },
+ { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_storeupd_mask, "__builtin_ia32_storeupd128_mask", IX86_BUILTIN_STOREUPD128_MASK, UNKNOWN, (int) VOID_FTYPE_PV2DF_V2DF_QI },
+ { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_storeups256_mask, "__builtin_ia32_storeups256_mask", IX86_BUILTIN_STOREUPS256_MASK, UNKNOWN, (int) VOID_FTYPE_PV8SF_V8SF_QI },
+ { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_storeups_mask, "__builtin_ia32_storeups128_mask", IX86_BUILTIN_STOREUPS128_MASK, UNKNOWN, (int) VOID_FTYPE_PV4SF_V4SF_QI },
+ { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_loaddquv4di_mask, "__builtin_ia32_loaddqudi256_mask", IX86_BUILTIN_LOADDQUDI256_MASK, UNKNOWN, (int) V4DI_FTYPE_PCV4DI_V4DI_QI },
+ { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_loaddquv2di_mask, "__builtin_ia32_loaddqudi128_mask", IX86_BUILTIN_LOADDQUDI128_MASK, UNKNOWN, (int) V2DI_FTYPE_PCV2DI_V2DI_QI },
+ { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx_loaddquv8si_mask, "__builtin_ia32_loaddqusi256_mask", IX86_BUILTIN_LOADDQUSI256_MASK, UNKNOWN, (int) V8SI_FTYPE_PCV8SI_V8SI_QI },
+ { OPTION_MASK_ISA_AVX512VL, CODE_FOR_sse2_loaddquv4si_mask, "__builtin_ia32_loaddqusi128_mask", IX86_BUILTIN_LOADDQUSI128_MASK, UNKNOWN, (int) V4SI_FTYPE_PCV4SI_V4SI_QI },
+ { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_storedquv4di_mask, "__builtin_ia32_storedqudi256_mask", IX86_BUILTIN_STOREDQUDI256_MASK, UNKNOWN, (int) VOID_FTYPE_PV4DI_V4DI_QI },
+ { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_storedquv2di_mask, "__builtin_ia32_storedqudi128_mask", IX86_BUILTIN_STOREDQUDI128_MASK, UNKNOWN, (int) VOID_FTYPE_PV2DI_V2DI_QI },
+ { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_storedquv8si_mask, "__builtin_ia32_storedqusi256_mask", IX86_BUILTIN_STOREDQUSI256_MASK, UNKNOWN, (int) VOID_FTYPE_PV8SI_V8SI_QI },
+ { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_storedquv4si_mask, "__builtin_ia32_storedqusi128_mask", IX86_BUILTIN_STOREDQUSI128_MASK, UNKNOWN, (int) VOID_FTYPE_PV4SI_V4SI_QI },
+ { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_storedquv16hi_mask, "__builtin_ia32_storedquhi256_mask", IX86_BUILTIN_STOREDQUHI256_MASK, UNKNOWN, (int) VOID_FTYPE_PV16HI_V16HI_HI },
+ { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_storedquv8hi_mask, "__builtin_ia32_storedquhi128_mask", IX86_BUILTIN_STOREDQUHI128_MASK, UNKNOWN, (int) VOID_FTYPE_PV8HI_V8HI_QI },
+ { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_storedquv32qi_mask, "__builtin_ia32_storedquqi256_mask", IX86_BUILTIN_STOREDQUQI256_MASK, UNKNOWN, (int) VOID_FTYPE_PV32QI_V32QI_SI },
+ { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_storedquv16qi_mask, "__builtin_ia32_storedquqi128_mask", IX86_BUILTIN_STOREDQUQI128_MASK, UNKNOWN, (int) VOID_FTYPE_PV16QI_V16QI_HI },
+ { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_compressstorev4df_mask, "__builtin_ia32_compressstoredf256_mask", IX86_BUILTIN_COMPRESSPDSTORE256, UNKNOWN, (int) VOID_FTYPE_PV4DF_V4DF_QI },
+ { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_compressstorev2df_mask, "__builtin_ia32_compressstoredf128_mask", IX86_BUILTIN_COMPRESSPDSTORE128, UNKNOWN, (int) VOID_FTYPE_PV2DF_V2DF_QI },
+ { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_compressstorev8sf_mask, "__builtin_ia32_compressstoresf256_mask", IX86_BUILTIN_COMPRESSPSSTORE256, UNKNOWN, (int) VOID_FTYPE_PV8SF_V8SF_QI },
+ { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_compressstorev4sf_mask, "__builtin_ia32_compressstoresf128_mask", IX86_BUILTIN_COMPRESSPSSTORE128, UNKNOWN, (int) VOID_FTYPE_PV4SF_V4SF_QI },
+ { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_compressstorev4di_mask, "__builtin_ia32_compressstoredi256_mask", IX86_BUILTIN_PCOMPRESSQSTORE256, UNKNOWN, (int) VOID_FTYPE_PV4DI_V4DI_QI },
+ { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_compressstorev2di_mask, "__builtin_ia32_compressstoredi128_mask", IX86_BUILTIN_PCOMPRESSQSTORE128, UNKNOWN, (int) VOID_FTYPE_PV2DI_V2DI_QI },
+ { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_compressstorev8si_mask, "__builtin_ia32_compressstoresi256_mask", IX86_BUILTIN_PCOMPRESSDSTORE256, UNKNOWN, (int) VOID_FTYPE_PV8SI_V8SI_QI },
+ { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_compressstorev4si_mask, "__builtin_ia32_compressstoresi128_mask", IX86_BUILTIN_PCOMPRESSDSTORE128, UNKNOWN, (int) VOID_FTYPE_PV4SI_V4SI_QI },
+ { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_expandv4df_mask, "__builtin_ia32_expandloaddf256_mask", IX86_BUILTIN_EXPANDPDLOAD256, UNKNOWN, (int) V4DF_FTYPE_PCV4DF_V4DF_QI },
+ { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_expandv2df_mask, "__builtin_ia32_expandloaddf128_mask", IX86_BUILTIN_EXPANDPDLOAD128, UNKNOWN, (int) V2DF_FTYPE_PCV2DF_V2DF_QI },
+ { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_expandv8sf_mask, "__builtin_ia32_expandloadsf256_mask", IX86_BUILTIN_EXPANDPSLOAD256, UNKNOWN, (int) V8SF_FTYPE_PCV8SF_V8SF_QI },
+ { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_expandv4sf_mask, "__builtin_ia32_expandloadsf128_mask", IX86_BUILTIN_EXPANDPSLOAD128, UNKNOWN, (int) V4SF_FTYPE_PCV4SF_V4SF_QI },
+ { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_expandv4di_mask, "__builtin_ia32_expandloaddi256_mask", IX86_BUILTIN_PEXPANDQLOAD256, UNKNOWN, (int) V4DI_FTYPE_PCV4DI_V4DI_QI },
+ { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_expandv2di_mask, "__builtin_ia32_expandloaddi128_mask", IX86_BUILTIN_PEXPANDQLOAD128, UNKNOWN, (int) V2DI_FTYPE_PCV2DI_V2DI_QI },
+ { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_expandv8si_mask, "__builtin_ia32_expandloadsi256_mask", IX86_BUILTIN_PEXPANDDLOAD256, UNKNOWN, (int) V8SI_FTYPE_PCV8SI_V8SI_QI },
+ { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_expandv4si_mask, "__builtin_ia32_expandloadsi128_mask", IX86_BUILTIN_PEXPANDDLOAD128, UNKNOWN, (int) V4SI_FTYPE_PCV4SI_V4SI_QI },
+ { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_expandv4df_maskz, "__builtin_ia32_expandloaddf256_maskz", IX86_BUILTIN_EXPANDPDLOAD256Z, UNKNOWN, (int) V4DF_FTYPE_PCV4DF_V4DF_QI },
+ { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_expandv2df_maskz, "__builtin_ia32_expandloaddf128_maskz", IX86_BUILTIN_EXPANDPDLOAD128Z, UNKNOWN, (int) V2DF_FTYPE_PCV2DF_V2DF_QI },
+ { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_expandv8sf_maskz, "__builtin_ia32_expandloadsf256_maskz", IX86_BUILTIN_EXPANDPSLOAD256Z, UNKNOWN, (int) V8SF_FTYPE_PCV8SF_V8SF_QI },
+ { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_expandv4sf_maskz, "__builtin_ia32_expandloadsf128_maskz", IX86_BUILTIN_EXPANDPSLOAD128Z, UNKNOWN, (int) V4SF_FTYPE_PCV4SF_V4SF_QI },
+ { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_expandv4di_maskz, "__builtin_ia32_expandloaddi256_maskz", IX86_BUILTIN_PEXPANDQLOAD256Z, UNKNOWN, (int) V4DI_FTYPE_PCV4DI_V4DI_QI },
+ { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_expandv2di_maskz, "__builtin_ia32_expandloaddi128_maskz", IX86_BUILTIN_PEXPANDQLOAD128Z, UNKNOWN, (int) V2DI_FTYPE_PCV2DI_V2DI_QI },
+ { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_expandv8si_maskz, "__builtin_ia32_expandloadsi256_maskz", IX86_BUILTIN_PEXPANDDLOAD256Z, UNKNOWN, (int) V8SI_FTYPE_PCV8SI_V8SI_QI },
+ { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_expandv4si_maskz, "__builtin_ia32_expandloadsi128_maskz", IX86_BUILTIN_PEXPANDDLOAD128Z, UNKNOWN, (int) V4SI_FTYPE_PCV4SI_V4SI_QI },
+ { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_truncatev4div4si2_store_mask, "__builtin_ia32_pmovqd256mem_mask", IX86_BUILTIN_PMOVQD256_MEM, UNKNOWN, (int) VOID_FTYPE_PV4SI_V4DI_QI },
+ { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_truncatev2div2si2_store_mask, "__builtin_ia32_pmovqd128mem_mask", IX86_BUILTIN_PMOVQD128_MEM, UNKNOWN, (int) VOID_FTYPE_PV4SI_V2DI_QI },
+ { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_ss_truncatev4div4si2_store_mask, "__builtin_ia32_pmovsqd256mem_mask", IX86_BUILTIN_PMOVSQD256_MEM, UNKNOWN, (int) VOID_FTYPE_PV4SI_V4DI_QI },
+ { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_ss_truncatev2div2si2_store_mask, "__builtin_ia32_pmovsqd128mem_mask", IX86_BUILTIN_PMOVSQD128_MEM, UNKNOWN, (int) VOID_FTYPE_PV4SI_V2DI_QI },
+ { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_us_truncatev4div4si2_store_mask, "__builtin_ia32_pmovusqd256mem_mask", IX86_BUILTIN_PMOVUSQD256_MEM, UNKNOWN, (int) VOID_FTYPE_PV4SI_V4DI_QI },
+ { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_us_truncatev2div2si2_store_mask, "__builtin_ia32_pmovusqd128mem_mask", IX86_BUILTIN_PMOVUSQD128_MEM, UNKNOWN, (int) VOID_FTYPE_PV4SI_V2DI_QI },
+ { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_truncatev4div4hi2_store_mask, "__builtin_ia32_pmovqw256mem_mask", IX86_BUILTIN_PMOVQW256_MEM, UNKNOWN, (int) VOID_FTYPE_PV8HI_V4DI_QI },
+ { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_truncatev2div2hi2_store_mask, "__builtin_ia32_pmovqw128mem_mask", IX86_BUILTIN_PMOVQW128_MEM, UNKNOWN, (int) VOID_FTYPE_PV8HI_V2DI_QI },
+ { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_ss_truncatev4div4hi2_store_mask, "__builtin_ia32_pmovsqw256mem_mask", IX86_BUILTIN_PMOVSQW256_MEM, UNKNOWN, (int) VOID_FTYPE_PV8HI_V4DI_QI },
+ { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_ss_truncatev2div2hi2_store_mask, "__builtin_ia32_pmovsqw128mem_mask", IX86_BUILTIN_PMOVSQW128_MEM, UNKNOWN, (int) VOID_FTYPE_PV8HI_V2DI_QI },
+ { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_us_truncatev4div4hi2_store_mask, "__builtin_ia32_pmovusqw256mem_mask", IX86_BUILTIN_PMOVUSQW256_MEM, UNKNOWN, (int) VOID_FTYPE_PV8HI_V4DI_QI },
+ { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_us_truncatev2div2hi2_store_mask, "__builtin_ia32_pmovusqw128mem_mask", IX86_BUILTIN_PMOVUSQW128_MEM, UNKNOWN, (int) VOID_FTYPE_PV8HI_V2DI_QI },
+ { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_truncatev4div4qi2_store_mask, "__builtin_ia32_pmovqb256mem_mask", IX86_BUILTIN_PMOVQB256_MEM, UNKNOWN, (int) VOID_FTYPE_PV16QI_V4DI_QI },
+ { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_truncatev2div2qi2_store_mask, "__builtin_ia32_pmovqb128mem_mask", IX86_BUILTIN_PMOVQB128_MEM, UNKNOWN, (int) VOID_FTYPE_PV16QI_V2DI_QI },
+ { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_ss_truncatev4div4qi2_store_mask, "__builtin_ia32_pmovsqb256mem_mask", IX86_BUILTIN_PMOVSQB256_MEM, UNKNOWN, (int) VOID_FTYPE_PV16QI_V4DI_QI },
+ { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_ss_truncatev2div2qi2_store_mask, "__builtin_ia32_pmovsqb128mem_mask", IX86_BUILTIN_PMOVSQB128_MEM, UNKNOWN, (int) VOID_FTYPE_PV16QI_V2DI_QI },
+ { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_us_truncatev4div4qi2_store_mask, "__builtin_ia32_pmovusqb256mem_mask", IX86_BUILTIN_PMOVUSQB256_MEM, UNKNOWN, (int) VOID_FTYPE_PV16QI_V4DI_QI },
+ { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_us_truncatev2div2qi2_store_mask, "__builtin_ia32_pmovusqb128mem_mask", IX86_BUILTIN_PMOVUSQB128_MEM, UNKNOWN, (int) VOID_FTYPE_PV16QI_V2DI_QI },
+ { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_truncatev8siv8qi2_store_mask, "__builtin_ia32_pmovdb256mem_mask", IX86_BUILTIN_PMOVDB256_MEM, UNKNOWN, (int) VOID_FTYPE_PV16QI_V8SI_QI },
+ { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_truncatev4siv4qi2_store_mask, "__builtin_ia32_pmovdb128mem_mask", IX86_BUILTIN_PMOVDB128_MEM, UNKNOWN, (int) VOID_FTYPE_PV16QI_V4SI_QI },
+ { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_ss_truncatev8siv8qi2_store_mask, "__builtin_ia32_pmovsdb256mem_mask", IX86_BUILTIN_PMOVSDB256_MEM, UNKNOWN, (int) VOID_FTYPE_PV16QI_V8SI_QI },
+ { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_ss_truncatev4siv4qi2_store_mask, "__builtin_ia32_pmovsdb128mem_mask", IX86_BUILTIN_PMOVSDB128_MEM, UNKNOWN, (int) VOID_FTYPE_PV16QI_V4SI_QI },
+ { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_us_truncatev8siv8qi2_store_mask, "__builtin_ia32_pmovusdb256mem_mask", IX86_BUILTIN_PMOVUSDB256_MEM, UNKNOWN, (int) VOID_FTYPE_PV16QI_V8SI_QI },
+ { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_us_truncatev4siv4qi2_store_mask, "__builtin_ia32_pmovusdb128mem_mask", IX86_BUILTIN_PMOVUSDB128_MEM, UNKNOWN, (int) VOID_FTYPE_PV16QI_V4SI_QI },
+ { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_truncatev8siv8hi2_store_mask, "__builtin_ia32_pmovdw256mem_mask", IX86_BUILTIN_PMOVDW256_MEM, UNKNOWN, (int) VOID_FTYPE_PV8HI_V8SI_QI },
+ { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_truncatev4siv4hi2_store_mask, "__builtin_ia32_pmovdw128mem_mask", IX86_BUILTIN_PMOVDW128_MEM, UNKNOWN, (int) VOID_FTYPE_PV8HI_V4SI_QI },
+ { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_ss_truncatev8siv8hi2_store_mask, "__builtin_ia32_pmovsdw256mem_mask", IX86_BUILTIN_PMOVSDW256_MEM, UNKNOWN, (int) VOID_FTYPE_PV8HI_V8SI_QI },
+ { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_ss_truncatev4siv4hi2_store_mask, "__builtin_ia32_pmovsdw128mem_mask", IX86_BUILTIN_PMOVSDW128_MEM, UNKNOWN, (int) VOID_FTYPE_PV8HI_V4SI_QI },
+ { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_us_truncatev8siv8hi2_store_mask, "__builtin_ia32_pmovusdw256mem_mask", IX86_BUILTIN_PMOVUSDW256_MEM, UNKNOWN, (int) VOID_FTYPE_PV8HI_V8SI_QI },
+ { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_us_truncatev4siv4hi2_store_mask, "__builtin_ia32_pmovusdw128mem_mask", IX86_BUILTIN_PMOVUSDW128_MEM, UNKNOWN, (int) VOID_FTYPE_PV8HI_V4SI_QI },
};
/* Builtins with variable number of arguments. */
@@ -29966,8 +31214,8 @@ static const struct builtin_description bdesc_args[] =
{ OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_permv4df, "__builtin_ia32_permdf256", IX86_BUILTIN_VPERMDF256, UNKNOWN, (int) V4DF_FTYPE_V4DF_INT },
{ OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_permv4di, "__builtin_ia32_permdi256", IX86_BUILTIN_VPERMDI256, UNKNOWN, (int) V4DI_FTYPE_V4DI_INT },
{ OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_permv2ti, "__builtin_ia32_permti256", IX86_BUILTIN_VPERMTI256, UNKNOWN, (int) V4DI_FTYPE_V4DI_V4DI_INT },
- { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_extracti128, "__builtin_ia32_extract128i256", IX86_BUILTIN_VEXTRACT128I256, UNKNOWN, (int) V2DI_FTYPE_V4DI_INT },
- { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_inserti128, "__builtin_ia32_insert128i256", IX86_BUILTIN_VINSERT128I256, UNKNOWN, (int) V4DI_FTYPE_V4DI_V2DI_INT },
+ { OPTION_MASK_ISA_AVX2, CODE_FOR_avx_vextractf128v4di, "__builtin_ia32_extract128i256", IX86_BUILTIN_VEXTRACT128I256, UNKNOWN, (int) V2DI_FTYPE_V4DI_INT },
+ { OPTION_MASK_ISA_AVX2, CODE_FOR_avx_vinsertf128v4di, "__builtin_ia32_insert128i256", IX86_BUILTIN_VINSERT128I256, UNKNOWN, (int) V4DI_FTYPE_V4DI_V2DI_INT },
{ OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_ashlvv4di, "__builtin_ia32_psllv4di", IX86_BUILTIN_PSLLVV4DI, UNKNOWN, (int) V4DI_FTYPE_V4DI_V4DI },
{ OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_ashlvv2di, "__builtin_ia32_psllv2di", IX86_BUILTIN_PSLLVV2DI, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI },
{ OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_ashlvv8si, "__builtin_ia32_psllv8si", IX86_BUILTIN_PSLLVV8SI, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI },
@@ -30023,7 +31271,7 @@ static const struct builtin_description bdesc_args[] =
{ OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_compressv16sf_mask, "__builtin_ia32_compresssf512_mask", IX86_BUILTIN_COMPRESSPS512, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_HI },
{ OPTION_MASK_ISA_AVX512F, CODE_FOR_floatv8siv8df2_mask, "__builtin_ia32_cvtdq2pd512_mask", IX86_BUILTIN_CVTDQ2PD512, UNKNOWN, (int) V8DF_FTYPE_V8SI_V8DF_QI },
{ OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vcvtps2ph512_mask, "__builtin_ia32_vcvtps2ph512_mask", IX86_BUILTIN_CVTPS2PH512, UNKNOWN, (int) V16HI_FTYPE_V16SF_INT_V16HI_HI },
- { OPTION_MASK_ISA_AVX512F, CODE_FOR_ufloatv8siv8df_mask, "__builtin_ia32_cvtudq2pd512_mask", IX86_BUILTIN_CVTUDQ2PD512, UNKNOWN, (int) V8DF_FTYPE_V8SI_V8DF_QI },
+ { OPTION_MASK_ISA_AVX512F, CODE_FOR_ufloatv8siv8df2_mask, "__builtin_ia32_cvtudq2pd512_mask", IX86_BUILTIN_CVTUDQ2PD512, UNKNOWN, (int) V8DF_FTYPE_V8SI_V8DF_QI },
{ OPTION_MASK_ISA_AVX512F, CODE_FOR_cvtusi2sd32, "__builtin_ia32_cvtusi2sd32", IX86_BUILTIN_CVTUSI2SD32, UNKNOWN, (int) V2DF_FTYPE_V2DF_UINT },
{ OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_expandv8df_mask, "__builtin_ia32_expanddf512_mask", IX86_BUILTIN_EXPANDPD512, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_QI },
{ OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_expandv8df_maskz, "__builtin_ia32_expanddf512_maskz", IX86_BUILTIN_EXPANDPD512Z, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_QI },
@@ -30226,6 +31474,852 @@ static const struct builtin_description bdesc_args[] =
{ OPTION_MASK_ISA_SSE2, CODE_FOR_sha256msg1, 0, IX86_BUILTIN_SHA256MSG1, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI },
{ OPTION_MASK_ISA_SSE2, CODE_FOR_sha256msg2, 0, IX86_BUILTIN_SHA256MSG2, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI },
{ OPTION_MASK_ISA_SSE2, CODE_FOR_sha256rnds2, 0, IX86_BUILTIN_SHA256RNDS2, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI_V4SI },
+
+ /* AVX512VL. */
+ { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx2_palignrv32qi_mask, "__builtin_ia32_palignr256_mask", IX86_BUILTIN_PALIGNR256_MASK, UNKNOWN, (int) V4DI_FTYPE_V4DI_V4DI_INT_V4DI_SI_CONVERT },
+ { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_ssse3_palignrv16qi_mask, "__builtin_ia32_palignr128_mask", IX86_BUILTIN_PALIGNR128_MASK, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI_INT_V2DI_HI_CONVERT },
+ { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_loadv4di_mask, "__builtin_ia32_movdqa64_256_mask", IX86_BUILTIN_MOVDQA64_256_MASK, UNKNOWN, (int) V4DI_FTYPE_V4DI_V4DI_QI },
+ { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_loadv2di_mask, "__builtin_ia32_movdqa64_128_mask", IX86_BUILTIN_MOVDQA64_128_MASK, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI_QI },
+ { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_loadv8si_mask, "__builtin_ia32_movdqa32_256_mask", IX86_BUILTIN_MOVDQA32_256_MASK, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI_QI },
+ { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_loadv4si_mask, "__builtin_ia32_movdqa32_128_mask", IX86_BUILTIN_MOVDQA32_128_MASK, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI_QI },
+ { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_loadv4df_mask, "__builtin_ia32_movapd256_mask", IX86_BUILTIN_MOVAPD256_MASK, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF_QI },
+ { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_loadv2df_mask, "__builtin_ia32_movapd128_mask", IX86_BUILTIN_MOVAPD128_MASK, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_QI },
+ { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_loadv8sf_mask, "__builtin_ia32_movaps256_mask", IX86_BUILTIN_MOVAPS256_MASK, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF_QI },
+ { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_loadv4sf_mask, "__builtin_ia32_movaps128_mask", IX86_BUILTIN_MOVAPS128_MASK, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_QI },
+ { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_loaddquv16hi_mask, "__builtin_ia32_movdquhi256_mask", IX86_BUILTIN_MOVDQUHI256_MASK, UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI_HI },
+ { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_loaddquv8hi_mask, "__builtin_ia32_movdquhi128_mask", IX86_BUILTIN_MOVDQUHI128_MASK, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI_QI },
+ { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx_loaddquv32qi_mask, "__builtin_ia32_movdquqi256_mask", IX86_BUILTIN_MOVDQUQI256_MASK, UNKNOWN, (int) V32QI_FTYPE_V32QI_V32QI_SI },
+ { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_sse2_loaddquv16qi_mask, "__builtin_ia32_movdquqi128_mask", IX86_BUILTIN_MOVDQUQI128_MASK, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI_HI },
+ { OPTION_MASK_ISA_AVX512VL, CODE_FOR_sminv4sf3_mask, "__builtin_ia32_minps_mask", IX86_BUILTIN_MINPS128_MASK, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_V4SF_QI },
+ { OPTION_MASK_ISA_AVX512VL, CODE_FOR_smaxv4sf3_mask, "__builtin_ia32_maxps_mask", IX86_BUILTIN_MAXPS128_MASK, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_V4SF_QI },
+ { OPTION_MASK_ISA_AVX512VL, CODE_FOR_sminv2df3_mask, "__builtin_ia32_minpd_mask", IX86_BUILTIN_MINPD128_MASK, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_V2DF_QI },
+ { OPTION_MASK_ISA_AVX512VL, CODE_FOR_smaxv2df3_mask, "__builtin_ia32_maxpd_mask", IX86_BUILTIN_MAXPD128_MASK, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_V2DF_QI },
+ { OPTION_MASK_ISA_AVX512VL, CODE_FOR_smaxv4df3_mask, "__builtin_ia32_maxpd256_mask", IX86_BUILTIN_MAXPD256_MASK, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF_V4DF_QI },
+ { OPTION_MASK_ISA_AVX512VL, CODE_FOR_smaxv8sf3_mask, "__builtin_ia32_maxps256_mask", IX86_BUILTIN_MAXPS256_MASK, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF_V8SF_QI },
+ { OPTION_MASK_ISA_AVX512VL, CODE_FOR_sminv4df3_mask, "__builtin_ia32_minpd256_mask", IX86_BUILTIN_MINPD256_MASK, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF_V4DF_QI },
+ { OPTION_MASK_ISA_AVX512VL, CODE_FOR_sminv8sf3_mask, "__builtin_ia32_minps256_mask", IX86_BUILTIN_MINPS256_MASK, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF_V8SF_QI },
+ { OPTION_MASK_ISA_AVX512VL, CODE_FOR_mulv4sf3_mask, "__builtin_ia32_mulps_mask", IX86_BUILTIN_MULPS128_MASK, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_V4SF_QI },
+ { OPTION_MASK_ISA_AVX512VL, CODE_FOR_sse_divv4sf3_mask, "__builtin_ia32_divps_mask", IX86_BUILTIN_DIVPS128_MASK, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_V4SF_QI },
+ { OPTION_MASK_ISA_AVX512VL, CODE_FOR_mulv2df3_mask, "__builtin_ia32_mulpd_mask", IX86_BUILTIN_MULPD128_MASK, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_V2DF_QI },
+ { OPTION_MASK_ISA_AVX512VL, CODE_FOR_sse2_divv2df3_mask, "__builtin_ia32_divpd_mask", IX86_BUILTIN_DIVPD128_MASK, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_V2DF_QI },
+ { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx_divv4df3_mask, "__builtin_ia32_divpd256_mask", IX86_BUILTIN_DIVPD256_MASK, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF_V4DF_QI },
+ { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx_divv8sf3_mask, "__builtin_ia32_divps256_mask", IX86_BUILTIN_DIVPS256_MASK, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF_V8SF_QI },
+ { OPTION_MASK_ISA_AVX512VL, CODE_FOR_mulv4df3_mask, "__builtin_ia32_mulpd256_mask", IX86_BUILTIN_MULPD256_MASK, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF_V4DF_QI },
+ { OPTION_MASK_ISA_AVX512VL, CODE_FOR_mulv8sf3_mask, "__builtin_ia32_mulps256_mask", IX86_BUILTIN_MULPS256_MASK, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF_V8SF_QI },
+ { OPTION_MASK_ISA_AVX512VL, CODE_FOR_addv2df3_mask, "__builtin_ia32_addpd128_mask", IX86_BUILTIN_ADDPD128_MASK, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_V2DF_QI },
+ { OPTION_MASK_ISA_AVX512VL, CODE_FOR_addv4df3_mask, "__builtin_ia32_addpd256_mask", IX86_BUILTIN_ADDPD256_MASK, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF_V4DF_QI },
+ { OPTION_MASK_ISA_AVX512VL, CODE_FOR_addv4sf3_mask, "__builtin_ia32_addps128_mask", IX86_BUILTIN_ADDPS128_MASK, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_V4SF_QI },
+ { OPTION_MASK_ISA_AVX512VL, CODE_FOR_addv8sf3_mask, "__builtin_ia32_addps256_mask", IX86_BUILTIN_ADDPS256_MASK, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF_V8SF_QI },
+ { OPTION_MASK_ISA_AVX512VL, CODE_FOR_subv2df3_mask, "__builtin_ia32_subpd128_mask", IX86_BUILTIN_SUBPD128_MASK, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_V2DF_QI },
+ { OPTION_MASK_ISA_AVX512VL, CODE_FOR_subv4df3_mask, "__builtin_ia32_subpd256_mask", IX86_BUILTIN_SUBPD256_MASK, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF_V4DF_QI },
+ { OPTION_MASK_ISA_AVX512VL, CODE_FOR_subv4sf3_mask, "__builtin_ia32_subps128_mask", IX86_BUILTIN_SUBPS128_MASK, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_V4SF_QI },
+ { OPTION_MASK_ISA_AVX512VL, CODE_FOR_subv8sf3_mask, "__builtin_ia32_subps256_mask", IX86_BUILTIN_SUBPS256_MASK, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF_V8SF_QI },
+ { OPTION_MASK_ISA_AVX512DQ | OPTION_MASK_ISA_AVX512VL, CODE_FOR_xorv4df3_mask, "__builtin_ia32_xorpd256_mask", IX86_BUILTIN_XORPD256_MASK, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF_V4DF_QI },
+ { OPTION_MASK_ISA_AVX512DQ | OPTION_MASK_ISA_AVX512VL, CODE_FOR_xorv2df3_mask, "__builtin_ia32_xorpd128_mask", IX86_BUILTIN_XORPD128_MASK, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_V2DF_QI },
+ { OPTION_MASK_ISA_AVX512DQ | OPTION_MASK_ISA_AVX512VL, CODE_FOR_xorv8sf3_mask, "__builtin_ia32_xorps256_mask", IX86_BUILTIN_XORPS256_MASK, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF_V8SF_QI },
+ { OPTION_MASK_ISA_AVX512DQ | OPTION_MASK_ISA_AVX512VL, CODE_FOR_xorv4sf3_mask, "__builtin_ia32_xorps128_mask", IX86_BUILTIN_XORPS128_MASK, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_V4SF_QI },
+ { OPTION_MASK_ISA_AVX512DQ | OPTION_MASK_ISA_AVX512VL, CODE_FOR_iorv4df3_mask, "__builtin_ia32_orpd256_mask", IX86_BUILTIN_ORPD256_MASK, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF_V4DF_QI },
+ { OPTION_MASK_ISA_AVX512DQ | OPTION_MASK_ISA_AVX512VL, CODE_FOR_iorv2df3_mask, "__builtin_ia32_orpd128_mask", IX86_BUILTIN_ORPD128_MASK, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_V2DF_QI },
+ { OPTION_MASK_ISA_AVX512DQ | OPTION_MASK_ISA_AVX512VL, CODE_FOR_iorv8sf3_mask, "__builtin_ia32_orps256_mask", IX86_BUILTIN_ORPS256_MASK, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF_V8SF_QI },
+ { OPTION_MASK_ISA_AVX512DQ | OPTION_MASK_ISA_AVX512VL, CODE_FOR_iorv4sf3_mask, "__builtin_ia32_orps128_mask", IX86_BUILTIN_ORPS128_MASK, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_V4SF_QI },
+ { OPTION_MASK_ISA_AVX512DQ | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512dq_broadcastv8sf_mask, "__builtin_ia32_broadcastf32x2_256_mask", IX86_BUILTIN_BROADCASTF32x2_256, UNKNOWN, (int) V8SF_FTYPE_V4SF_V8SF_QI },
+ { OPTION_MASK_ISA_AVX512DQ | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512dq_broadcastv8si_mask, "__builtin_ia32_broadcasti32x2_256_mask", IX86_BUILTIN_BROADCASTI32x2_256, UNKNOWN, (int) V8SI_FTYPE_V4SI_V8SI_QI },
+ { OPTION_MASK_ISA_AVX512DQ | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512dq_broadcastv4si_mask, "__builtin_ia32_broadcasti32x2_128_mask", IX86_BUILTIN_BROADCASTI32x2_128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI_QI },
+ { OPTION_MASK_ISA_AVX512DQ | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512dq_broadcastv4df_mask_1, "__builtin_ia32_broadcastf64x2_256_mask", IX86_BUILTIN_BROADCASTF64X2_256, UNKNOWN, (int) V4DF_FTYPE_V2DF_V4DF_QI },
+ { OPTION_MASK_ISA_AVX512DQ | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512dq_broadcastv4di_mask_1, "__builtin_ia32_broadcasti64x2_256_mask", IX86_BUILTIN_BROADCASTI64X2_256, UNKNOWN, (int) V4DI_FTYPE_V2DI_V4DI_QI },
+ { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_broadcastv8sf_mask_1, "__builtin_ia32_broadcastf32x4_256_mask", IX86_BUILTIN_BROADCASTF32X4_256, UNKNOWN, (int) V8SF_FTYPE_V4SF_V8SF_QI },
+ { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_broadcastv8si_mask_1, "__builtin_ia32_broadcasti32x4_256_mask", IX86_BUILTIN_BROADCASTI32X4_256, UNKNOWN, (int) V8SI_FTYPE_V4SI_V8SI_QI },
+ { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_vextractf128v8sf, "__builtin_ia32_extractf32x4_256_mask", IX86_BUILTIN_EXTRACTF32X4_256, UNKNOWN, (int) V4SF_FTYPE_V8SF_INT_V4SF_QI },
+ { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_vextractf128v8si, "__builtin_ia32_extracti32x4_256_mask", IX86_BUILTIN_EXTRACTI32X4_256, UNKNOWN, (int) V4SI_FTYPE_V8SI_INT_V4SI_QI },
+ { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512bw_dbpsadbwv16hi_mask, "__builtin_ia32_dbpsadbw256_mask", IX86_BUILTIN_DBPSADBW256, UNKNOWN, (int) V16HI_FTYPE_V32QI_V32QI_INT_V16HI_HI },
+ { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512bw_dbpsadbwv8hi_mask, "__builtin_ia32_dbpsadbw128_mask", IX86_BUILTIN_DBPSADBW128, UNKNOWN, (int) V8HI_FTYPE_V16QI_V16QI_INT_V8HI_QI },
+ { OPTION_MASK_ISA_AVX512DQ | OPTION_MASK_ISA_AVX512VL, CODE_FOR_fix_truncv4dfv4di2_mask, "__builtin_ia32_cvttpd2qq256_mask", IX86_BUILTIN_CVTTPD2QQ256, UNKNOWN, (int) V4DI_FTYPE_V4DF_V4DI_QI },
+ { OPTION_MASK_ISA_AVX512DQ | OPTION_MASK_ISA_AVX512VL, CODE_FOR_fix_truncv2dfv2di2_mask, "__builtin_ia32_cvttpd2qq128_mask", IX86_BUILTIN_CVTTPD2QQ128, UNKNOWN, (int) V2DI_FTYPE_V2DF_V2DI_QI },
+ { OPTION_MASK_ISA_AVX512DQ | OPTION_MASK_ISA_AVX512VL, CODE_FOR_ufix_truncv4dfv4di2_mask, "__builtin_ia32_cvttpd2uqq256_mask", IX86_BUILTIN_CVTTPD2UQQ256, UNKNOWN, (int) V4DI_FTYPE_V4DF_V4DI_QI },
+ { OPTION_MASK_ISA_AVX512DQ | OPTION_MASK_ISA_AVX512VL, CODE_FOR_ufix_truncv2dfv2di2_mask, "__builtin_ia32_cvttpd2uqq128_mask", IX86_BUILTIN_CVTTPD2UQQ128, UNKNOWN, (int) V2DI_FTYPE_V2DF_V2DI_QI },
+ { OPTION_MASK_ISA_AVX512DQ | OPTION_MASK_ISA_AVX512VL, CODE_FOR_fix_notruncv4dfv4di2_mask, "__builtin_ia32_cvtpd2qq256_mask", IX86_BUILTIN_CVTPD2QQ256, UNKNOWN, (int) V4DI_FTYPE_V4DF_V4DI_QI },
+ { OPTION_MASK_ISA_AVX512DQ | OPTION_MASK_ISA_AVX512VL, CODE_FOR_fix_notruncv2dfv2di2_mask, "__builtin_ia32_cvtpd2qq128_mask", IX86_BUILTIN_CVTPD2QQ128, UNKNOWN, (int) V2DI_FTYPE_V2DF_V2DI_QI },
+ { OPTION_MASK_ISA_AVX512DQ | OPTION_MASK_ISA_AVX512VL, CODE_FOR_ufix_notruncv4dfv4di2_mask, "__builtin_ia32_cvtpd2uqq256_mask", IX86_BUILTIN_CVTPD2UQQ256, UNKNOWN, (int) V4DI_FTYPE_V4DF_V4DI_QI },
+ { OPTION_MASK_ISA_AVX512DQ | OPTION_MASK_ISA_AVX512VL, CODE_FOR_ufix_notruncv2dfv2di2_mask, "__builtin_ia32_cvtpd2uqq128_mask", IX86_BUILTIN_CVTPD2UQQ128, UNKNOWN, (int) V2DI_FTYPE_V2DF_V2DI_QI },
+ { OPTION_MASK_ISA_AVX512VL, CODE_FOR_ufix_notruncv4dfv4si2_mask, "__builtin_ia32_cvtpd2udq256_mask", IX86_BUILTIN_CVTPD2UDQ256_MASK, UNKNOWN, (int) V4SI_FTYPE_V4DF_V4SI_QI },
+ { OPTION_MASK_ISA_AVX512VL, CODE_FOR_ufix_notruncv2dfv2si2_mask, "__builtin_ia32_cvtpd2udq128_mask", IX86_BUILTIN_CVTPD2UDQ128_MASK, UNKNOWN, (int) V4SI_FTYPE_V2DF_V4SI_QI },
+ { OPTION_MASK_ISA_AVX512DQ | OPTION_MASK_ISA_AVX512VL, CODE_FOR_fix_truncv4sfv4di2_mask, "__builtin_ia32_cvttps2qq256_mask", IX86_BUILTIN_CVTTPS2QQ256, UNKNOWN, (int) V4DI_FTYPE_V4SF_V4DI_QI },
+ { OPTION_MASK_ISA_AVX512DQ | OPTION_MASK_ISA_AVX512VL, CODE_FOR_fix_truncv2sfv2di2_mask, "__builtin_ia32_cvttps2qq128_mask", IX86_BUILTIN_CVTTPS2QQ128, UNKNOWN, (int) V2DI_FTYPE_V4SF_V2DI_QI },
+ { OPTION_MASK_ISA_AVX512DQ | OPTION_MASK_ISA_AVX512VL, CODE_FOR_ufix_truncv4sfv4di2_mask, "__builtin_ia32_cvttps2uqq256_mask", IX86_BUILTIN_CVTTPS2UQQ256, UNKNOWN, (int) V4DI_FTYPE_V4SF_V4DI_QI },
+ { OPTION_MASK_ISA_AVX512DQ | OPTION_MASK_ISA_AVX512VL, CODE_FOR_ufix_truncv2sfv2di2_mask, "__builtin_ia32_cvttps2uqq128_mask", IX86_BUILTIN_CVTTPS2UQQ128, UNKNOWN, (int) V2DI_FTYPE_V4SF_V2DI_QI },
+ { OPTION_MASK_ISA_AVX512VL, CODE_FOR_fix_truncv8sfv8si2_mask, "__builtin_ia32_cvttps2dq256_mask", IX86_BUILTIN_CVTTPS2DQ256_MASK, UNKNOWN, (int) V8SI_FTYPE_V8SF_V8SI_QI },
+ { OPTION_MASK_ISA_AVX512VL, CODE_FOR_fix_truncv4sfv4si2_mask, "__builtin_ia32_cvttps2dq128_mask", IX86_BUILTIN_CVTTPS2DQ128_MASK, UNKNOWN, (int) V4SI_FTYPE_V4SF_V4SI_QI },
+ { OPTION_MASK_ISA_AVX512VL, CODE_FOR_ufix_truncv8sfv8si2_mask, "__builtin_ia32_cvttps2udq256_mask", IX86_BUILTIN_CVTTPS2UDQ256, UNKNOWN, (int) V8SI_FTYPE_V8SF_V8SI_QI },
+ { OPTION_MASK_ISA_AVX512VL, CODE_FOR_ufix_truncv4sfv4si2_mask, "__builtin_ia32_cvttps2udq128_mask", IX86_BUILTIN_CVTTPS2UDQ128, UNKNOWN, (int) V4SI_FTYPE_V4SF_V4SI_QI },
+ { OPTION_MASK_ISA_AVX512VL, CODE_FOR_fix_truncv4dfv4si2_mask, "__builtin_ia32_cvttpd2dq256_mask", IX86_BUILTIN_CVTTPD2DQ256_MASK, UNKNOWN, (int) V4SI_FTYPE_V4DF_V4SI_QI },
+ { OPTION_MASK_ISA_AVX512VL, CODE_FOR_sse2_cvttpd2dq_mask, "__builtin_ia32_cvttpd2dq128_mask", IX86_BUILTIN_CVTTPD2DQ128_MASK, UNKNOWN, (int) V4SI_FTYPE_V2DF_V4SI_QI },
+ { OPTION_MASK_ISA_AVX512VL, CODE_FOR_ufix_truncv4dfv4si2_mask, "__builtin_ia32_cvttpd2udq256_mask", IX86_BUILTIN_CVTTPD2UDQ256_MASK, UNKNOWN, (int) V4SI_FTYPE_V4DF_V4SI_QI },
+ { OPTION_MASK_ISA_AVX512VL, CODE_FOR_ufix_truncv2dfv2si2_mask, "__builtin_ia32_cvttpd2udq128_mask", IX86_BUILTIN_CVTTPD2UDQ128_MASK, UNKNOWN, (int) V4SI_FTYPE_V2DF_V4SI_QI },
+ { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx_cvtpd2dq256_mask, "__builtin_ia32_cvtpd2dq256_mask", IX86_BUILTIN_CVTPD2DQ256_MASK, UNKNOWN, (int) V4SI_FTYPE_V4DF_V4SI_QI },
+ { OPTION_MASK_ISA_AVX512VL, CODE_FOR_sse2_cvtpd2dq_mask, "__builtin_ia32_cvtpd2dq128_mask", IX86_BUILTIN_CVTPD2DQ128_MASK, UNKNOWN, (int) V4SI_FTYPE_V2DF_V4SI_QI },
+ { OPTION_MASK_ISA_AVX512VL, CODE_FOR_floatv4siv4df2_mask, "__builtin_ia32_cvtdq2pd256_mask", IX86_BUILTIN_CVTDQ2PD256_MASK, UNKNOWN, (int) V4DF_FTYPE_V4SI_V4DF_QI },
+ { OPTION_MASK_ISA_AVX512VL, CODE_FOR_sse2_cvtdq2pd_mask, "__builtin_ia32_cvtdq2pd128_mask", IX86_BUILTIN_CVTDQ2PD128_MASK, UNKNOWN, (int) V2DF_FTYPE_V4SI_V2DF_QI },
+ { OPTION_MASK_ISA_AVX512VL, CODE_FOR_ufloatv4siv4df2_mask, "__builtin_ia32_cvtudq2pd256_mask", IX86_BUILTIN_CVTUDQ2PD256_MASK, UNKNOWN, (int) V4DF_FTYPE_V4SI_V4DF_QI },
+ { OPTION_MASK_ISA_AVX512VL, CODE_FOR_ufloatv2siv2df2_mask, "__builtin_ia32_cvtudq2pd128_mask", IX86_BUILTIN_CVTUDQ2PD128_MASK, UNKNOWN, (int) V2DF_FTYPE_V4SI_V2DF_QI },
+ { OPTION_MASK_ISA_AVX512VL, CODE_FOR_floatv8siv8sf2_mask, "__builtin_ia32_cvtdq2ps256_mask", IX86_BUILTIN_CVTDQ2PS256_MASK, UNKNOWN, (int) V8SF_FTYPE_V8SI_V8SF_QI },
+ { OPTION_MASK_ISA_AVX512VL, CODE_FOR_floatv4siv4sf2_mask, "__builtin_ia32_cvtdq2ps128_mask", IX86_BUILTIN_CVTDQ2PS128_MASK, UNKNOWN, (int) V4SF_FTYPE_V4SI_V4SF_QI },
+ { OPTION_MASK_ISA_AVX512VL, CODE_FOR_ufloatv8siv8sf2_mask, "__builtin_ia32_cvtudq2ps256_mask", IX86_BUILTIN_CVTUDQ2PS256_MASK, UNKNOWN, (int) V8SF_FTYPE_V8SI_V8SF_QI },
+ { OPTION_MASK_ISA_AVX512VL, CODE_FOR_ufloatv4siv4sf2_mask, "__builtin_ia32_cvtudq2ps128_mask", IX86_BUILTIN_CVTUDQ2PS128_MASK, UNKNOWN, (int) V4SF_FTYPE_V4SI_V4SF_QI },
+ { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx_cvtps2pd256_mask, "__builtin_ia32_cvtps2pd256_mask", IX86_BUILTIN_CVTPS2PD256_MASK, UNKNOWN, (int) V4DF_FTYPE_V4SF_V4DF_QI },
+ { OPTION_MASK_ISA_AVX512VL, CODE_FOR_sse2_cvtps2pd_mask, "__builtin_ia32_cvtps2pd128_mask", IX86_BUILTIN_CVTPS2PD128_MASK, UNKNOWN, (int) V2DF_FTYPE_V4SF_V2DF_QI },
+ { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_vec_dupv32qi_mask, "__builtin_ia32_pbroadcastb256_mask", IX86_BUILTIN_PBROADCASTB256_MASK, UNKNOWN, (int) V32QI_FTYPE_V16QI_V32QI_SI },
+ { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_vec_dup_gprv32qi_mask, "__builtin_ia32_pbroadcastb256_gpr_mask", IX86_BUILTIN_PBROADCASTB256_GPR_MASK, UNKNOWN, (int) V32QI_FTYPE_QI_V32QI_SI },
+ { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_vec_dupv16qi_mask, "__builtin_ia32_pbroadcastb128_mask", IX86_BUILTIN_PBROADCASTB128_MASK, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI_HI },
+ { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_vec_dup_gprv16qi_mask, "__builtin_ia32_pbroadcastb128_gpr_mask", IX86_BUILTIN_PBROADCASTB128_GPR_MASK, UNKNOWN, (int) V16QI_FTYPE_QI_V16QI_HI },
+ { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_vec_dupv16hi_mask, "__builtin_ia32_pbroadcastw256_mask", IX86_BUILTIN_PBROADCASTW256_MASK, UNKNOWN, (int) V16HI_FTYPE_V8HI_V16HI_HI },
+ { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_vec_dup_gprv16hi_mask, "__builtin_ia32_pbroadcastw256_gpr_mask", IX86_BUILTIN_PBROADCASTW256_GPR_MASK, UNKNOWN, (int) V16HI_FTYPE_HI_V16HI_HI },
+ { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_vec_dupv8hi_mask, "__builtin_ia32_pbroadcastw128_mask", IX86_BUILTIN_PBROADCASTW128_MASK, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI_QI },
+ { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_vec_dup_gprv8hi_mask, "__builtin_ia32_pbroadcastw128_gpr_mask", IX86_BUILTIN_PBROADCASTW128_GPR_MASK, UNKNOWN, (int) V8HI_FTYPE_HI_V8HI_QI },
+ { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_vec_dupv8si_mask, "__builtin_ia32_pbroadcastd256_mask", IX86_BUILTIN_PBROADCASTD256_MASK, UNKNOWN, (int) V8SI_FTYPE_V4SI_V8SI_QI },
+ { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_vec_dup_gprv8si_mask, "__builtin_ia32_pbroadcastd256_gpr_mask", IX86_BUILTIN_PBROADCASTD256_GPR_MASK, UNKNOWN, (int) V8SI_FTYPE_SI_V8SI_QI },
+ { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_vec_dupv4si_mask, "__builtin_ia32_pbroadcastd128_mask", IX86_BUILTIN_PBROADCASTD128_MASK, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI_QI },
+ { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_vec_dup_gprv4si_mask, "__builtin_ia32_pbroadcastd128_gpr_mask", IX86_BUILTIN_PBROADCASTD128_GPR_MASK, UNKNOWN, (int) V4SI_FTYPE_SI_V4SI_QI },
+ { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_vec_dupv4di_mask, "__builtin_ia32_pbroadcastq256_mask", IX86_BUILTIN_PBROADCASTQ256_MASK, UNKNOWN, (int) V4DI_FTYPE_V2DI_V4DI_QI },
+ { OPTION_MASK_ISA_AVX512VL | OPTION_MASK_ISA_64BIT, CODE_FOR_avx512vl_vec_dup_gprv4di_mask, "__builtin_ia32_pbroadcastq256_gpr_mask", IX86_BUILTIN_PBROADCASTQ256_GPR_MASK, UNKNOWN, (int) V4DI_FTYPE_DI_V4DI_QI },
+ { OPTION_MASK_ISA_AVX512VL & ~OPTION_MASK_ISA_64BIT, CODE_FOR_avx512vl_vec_dup_memv4di_mask, "__builtin_ia32_pbroadcastq256_mem_mask", IX86_BUILTIN_PBROADCASTQ256_MEM_MASK, UNKNOWN, (int) V4DI_FTYPE_DI_V4DI_QI },
+ { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_vec_dupv2di_mask, "__builtin_ia32_pbroadcastq128_mask", IX86_BUILTIN_PBROADCASTQ128_MASK, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI_QI },
+ { OPTION_MASK_ISA_AVX512VL | OPTION_MASK_ISA_64BIT, CODE_FOR_avx512vl_vec_dup_gprv2di_mask, "__builtin_ia32_pbroadcastq128_gpr_mask", IX86_BUILTIN_PBROADCASTQ128_GPR_MASK, UNKNOWN, (int) V2DI_FTYPE_DI_V2DI_QI },
+ { OPTION_MASK_ISA_AVX512VL & ~OPTION_MASK_ISA_64BIT, CODE_FOR_avx512vl_vec_dup_memv2di_mask, "__builtin_ia32_pbroadcastq128_mem_mask", IX86_BUILTIN_PBROADCASTQ128_MEM_MASK, UNKNOWN, (int) V2DI_FTYPE_DI_V2DI_QI },
+ { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_vec_dupv8sf_mask, "__builtin_ia32_broadcastss256_mask", IX86_BUILTIN_BROADCASTSS256, UNKNOWN, (int) V8SF_FTYPE_V4SF_V8SF_QI },
+ { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_vec_dupv4sf_mask, "__builtin_ia32_broadcastss128_mask", IX86_BUILTIN_BROADCASTSS128, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_QI },
+ { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_vec_dupv4df_mask, "__builtin_ia32_broadcastsd256_mask", IX86_BUILTIN_BROADCASTSD256, UNKNOWN, (int) V4DF_FTYPE_V2DF_V4DF_QI },
+ { OPTION_MASK_ISA_AVX512DQ | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_vextractf128v4df, "__builtin_ia32_extractf64x2_256_mask", IX86_BUILTIN_EXTRACTF64X2_256, UNKNOWN, (int) V2DF_FTYPE_V4DF_INT_V2DF_QI },
+ { OPTION_MASK_ISA_AVX512DQ | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_vextractf128v4di, "__builtin_ia32_extracti64x2_256_mask", IX86_BUILTIN_EXTRACTI64X2_256, UNKNOWN, (int) V2DI_FTYPE_V4DI_INT_V2DI_QI },
+ { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_vinsertv8sf, "__builtin_ia32_insertf32x4_256_mask", IX86_BUILTIN_INSERTF32X4_256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V4SF_INT_V8SF_QI },
+ { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_vinsertv8si, "__builtin_ia32_inserti32x4_256_mask", IX86_BUILTIN_INSERTI32X4_256, UNKNOWN, (int) V8SI_FTYPE_V8SI_V4SI_INT_V8SI_QI },
+ { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx2_sign_extendv16qiv16hi2_mask, "__builtin_ia32_pmovsxbw256_mask", IX86_BUILTIN_PMOVSXBW256_MASK, UNKNOWN, (int) V16HI_FTYPE_V16QI_V16HI_HI },
+ { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_sse4_1_sign_extendv8qiv8hi2_mask, "__builtin_ia32_pmovsxbw128_mask", IX86_BUILTIN_PMOVSXBW128_MASK, UNKNOWN, (int) V8HI_FTYPE_V16QI_V8HI_QI },
+ { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx2_sign_extendv8qiv8si2_mask, "__builtin_ia32_pmovsxbd256_mask", IX86_BUILTIN_PMOVSXBD256_MASK, UNKNOWN, (int) V8SI_FTYPE_V16QI_V8SI_QI },
+ { OPTION_MASK_ISA_AVX512VL, CODE_FOR_sse4_1_sign_extendv4qiv4si2_mask, "__builtin_ia32_pmovsxbd128_mask", IX86_BUILTIN_PMOVSXBD128_MASK, UNKNOWN, (int) V4SI_FTYPE_V16QI_V4SI_QI },
+ { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx2_sign_extendv4qiv4di2_mask, "__builtin_ia32_pmovsxbq256_mask", IX86_BUILTIN_PMOVSXBQ256_MASK, UNKNOWN, (int) V4DI_FTYPE_V16QI_V4DI_QI },
+ { OPTION_MASK_ISA_AVX512VL, CODE_FOR_sse4_1_sign_extendv2qiv2di2_mask, "__builtin_ia32_pmovsxbq128_mask", IX86_BUILTIN_PMOVSXBQ128_MASK, UNKNOWN, (int) V2DI_FTYPE_V16QI_V2DI_QI },
+ { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx2_sign_extendv8hiv8si2_mask, "__builtin_ia32_pmovsxwd256_mask", IX86_BUILTIN_PMOVSXWD256_MASK, UNKNOWN, (int) V8SI_FTYPE_V8HI_V8SI_QI },
+ { OPTION_MASK_ISA_AVX512VL, CODE_FOR_sse4_1_sign_extendv4hiv4si2_mask, "__builtin_ia32_pmovsxwd128_mask", IX86_BUILTIN_PMOVSXWD128_MASK, UNKNOWN, (int) V4SI_FTYPE_V8HI_V4SI_QI },
+ { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx2_sign_extendv4hiv4di2_mask, "__builtin_ia32_pmovsxwq256_mask", IX86_BUILTIN_PMOVSXWQ256_MASK, UNKNOWN, (int) V4DI_FTYPE_V8HI_V4DI_QI },
+ { OPTION_MASK_ISA_AVX512VL, CODE_FOR_sse4_1_sign_extendv2hiv2di2_mask, "__builtin_ia32_pmovsxwq128_mask", IX86_BUILTIN_PMOVSXWQ128_MASK, UNKNOWN, (int) V2DI_FTYPE_V8HI_V2DI_QI },
+ { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx2_sign_extendv4siv4di2_mask, "__builtin_ia32_pmovsxdq256_mask", IX86_BUILTIN_PMOVSXDQ256_MASK, UNKNOWN, (int) V4DI_FTYPE_V4SI_V4DI_QI },
+ { OPTION_MASK_ISA_AVX512VL, CODE_FOR_sse4_1_sign_extendv2siv2di2_mask, "__builtin_ia32_pmovsxdq128_mask", IX86_BUILTIN_PMOVSXDQ128_MASK, UNKNOWN, (int) V2DI_FTYPE_V4SI_V2DI_QI },
+ { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx2_zero_extendv16qiv16hi2_mask, "__builtin_ia32_pmovzxbw256_mask", IX86_BUILTIN_PMOVZXBW256_MASK, UNKNOWN, (int) V16HI_FTYPE_V16QI_V16HI_HI },
+ { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_sse4_1_zero_extendv8qiv8hi2_mask, "__builtin_ia32_pmovzxbw128_mask", IX86_BUILTIN_PMOVZXBW128_MASK, UNKNOWN, (int) V8HI_FTYPE_V16QI_V8HI_QI },
+ { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx2_zero_extendv8qiv8si2_mask, "__builtin_ia32_pmovzxbd256_mask", IX86_BUILTIN_PMOVZXBD256_MASK, UNKNOWN, (int) V8SI_FTYPE_V16QI_V8SI_QI },
+ { OPTION_MASK_ISA_AVX512VL, CODE_FOR_sse4_1_zero_extendv4qiv4si2_mask, "__builtin_ia32_pmovzxbd128_mask", IX86_BUILTIN_PMOVZXBD128_MASK, UNKNOWN, (int) V4SI_FTYPE_V16QI_V4SI_QI },
+ { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx2_zero_extendv4qiv4di2_mask, "__builtin_ia32_pmovzxbq256_mask", IX86_BUILTIN_PMOVZXBQ256_MASK, UNKNOWN, (int) V4DI_FTYPE_V16QI_V4DI_QI },
+ { OPTION_MASK_ISA_AVX512VL, CODE_FOR_sse4_1_zero_extendv2qiv2di2_mask, "__builtin_ia32_pmovzxbq128_mask", IX86_BUILTIN_PMOVZXBQ128_MASK, UNKNOWN, (int) V2DI_FTYPE_V16QI_V2DI_QI },
+ { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx2_zero_extendv8hiv8si2_mask, "__builtin_ia32_pmovzxwd256_mask", IX86_BUILTIN_PMOVZXWD256_MASK, UNKNOWN, (int) V8SI_FTYPE_V8HI_V8SI_QI },
+ { OPTION_MASK_ISA_AVX512VL, CODE_FOR_sse4_1_zero_extendv4hiv4si2_mask, "__builtin_ia32_pmovzxwd128_mask", IX86_BUILTIN_PMOVZXWD128_MASK, UNKNOWN, (int) V4SI_FTYPE_V8HI_V4SI_QI },
+ { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx2_zero_extendv4hiv4di2_mask, "__builtin_ia32_pmovzxwq256_mask", IX86_BUILTIN_PMOVZXWQ256_MASK, UNKNOWN, (int) V4DI_FTYPE_V8HI_V4DI_QI },
+ { OPTION_MASK_ISA_AVX512VL, CODE_FOR_sse4_1_zero_extendv2hiv2di2_mask, "__builtin_ia32_pmovzxwq128_mask", IX86_BUILTIN_PMOVZXWQ128_MASK, UNKNOWN, (int) V2DI_FTYPE_V8HI_V2DI_QI },
+ { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx2_zero_extendv4siv4di2_mask, "__builtin_ia32_pmovzxdq256_mask", IX86_BUILTIN_PMOVZXDQ256_MASK, UNKNOWN, (int) V4DI_FTYPE_V4SI_V4DI_QI },
+ { OPTION_MASK_ISA_AVX512VL, CODE_FOR_sse4_1_zero_extendv2siv2di2_mask, "__builtin_ia32_pmovzxdq128_mask", IX86_BUILTIN_PMOVZXDQ128_MASK, UNKNOWN, (int) V2DI_FTYPE_V4SI_V2DI_QI },
+ { OPTION_MASK_ISA_AVX512DQ | OPTION_MASK_ISA_AVX512VL, CODE_FOR_reducepv4df_mask, "__builtin_ia32_reducepd256_mask", IX86_BUILTIN_REDUCEPD256_MASK, UNKNOWN, (int) V4DF_FTYPE_V4DF_INT_V4DF_QI },
+ { OPTION_MASK_ISA_AVX512DQ | OPTION_MASK_ISA_AVX512VL, CODE_FOR_reducepv2df_mask, "__builtin_ia32_reducepd128_mask", IX86_BUILTIN_REDUCEPD128_MASK, UNKNOWN, (int) V2DF_FTYPE_V2DF_INT_V2DF_QI },
+ { OPTION_MASK_ISA_AVX512DQ | OPTION_MASK_ISA_AVX512VL, CODE_FOR_reducepv8sf_mask, "__builtin_ia32_reduceps256_mask", IX86_BUILTIN_REDUCEPS256_MASK, UNKNOWN, (int) V8SF_FTYPE_V8SF_INT_V8SF_QI },
+ { OPTION_MASK_ISA_AVX512DQ | OPTION_MASK_ISA_AVX512VL, CODE_FOR_reducepv4sf_mask, "__builtin_ia32_reduceps128_mask", IX86_BUILTIN_REDUCEPS128_MASK, UNKNOWN, (int) V4SF_FTYPE_V4SF_INT_V4SF_QI },
+ { OPTION_MASK_ISA_AVX512DQ, CODE_FOR_reducesv2df, "__builtin_ia32_reducesd", IX86_BUILTIN_REDUCESD_MASK, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_INT },
+ { OPTION_MASK_ISA_AVX512DQ, CODE_FOR_reducesv4sf, "__builtin_ia32_reducess", IX86_BUILTIN_REDUCESS_MASK, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_INT },
+ { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_permvarv16hi_mask, "__builtin_ia32_permvarhi256_mask", IX86_BUILTIN_VPERMVARHI256_MASK, UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI_V16HI_HI },
+ { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_permvarv8hi_mask, "__builtin_ia32_permvarhi128_mask", IX86_BUILTIN_VPERMVARHI128_MASK, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI_V8HI_QI },
+ { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_vpermt2varv16hi3_mask, "__builtin_ia32_vpermt2varhi256_mask", IX86_BUILTIN_VPERMT2VARHI256, UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI_V16HI_HI },
+ { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_vpermt2varv16hi3_maskz, "__builtin_ia32_vpermt2varhi256_maskz", IX86_BUILTIN_VPERMT2VARHI256_MASKZ, UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI_V16HI_HI },
+ { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_vpermt2varv8hi3_mask, "__builtin_ia32_vpermt2varhi128_mask", IX86_BUILTIN_VPERMT2VARHI128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI_V8HI_QI },
+ { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_vpermt2varv8hi3_maskz, "__builtin_ia32_vpermt2varhi128_maskz", IX86_BUILTIN_VPERMT2VARHI128_MASKZ, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI_V8HI_QI },
+ { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_vpermi2varv16hi3_mask, "__builtin_ia32_vpermi2varhi256_mask", IX86_BUILTIN_VPERMI2VARHI256, UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI_V16HI_HI },
+ { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_vpermi2varv8hi3_mask, "__builtin_ia32_vpermi2varhi128_mask", IX86_BUILTIN_VPERMI2VARHI128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI_V8HI_QI },
+ { OPTION_MASK_ISA_AVX512VL, CODE_FOR_rcp14v4df_mask, "__builtin_ia32_rcp14pd256_mask", IX86_BUILTIN_RCP14PD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF_QI },
+ { OPTION_MASK_ISA_AVX512VL, CODE_FOR_rcp14v2df_mask, "__builtin_ia32_rcp14pd128_mask", IX86_BUILTIN_RCP14PD128, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_QI },
+ { OPTION_MASK_ISA_AVX512VL, CODE_FOR_rcp14v8sf_mask, "__builtin_ia32_rcp14ps256_mask", IX86_BUILTIN_RCP14PS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF_QI },
+ { OPTION_MASK_ISA_AVX512VL, CODE_FOR_rcp14v4sf_mask, "__builtin_ia32_rcp14ps128_mask", IX86_BUILTIN_RCP14PS128, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_QI },
+ { OPTION_MASK_ISA_AVX512VL, CODE_FOR_rsqrt14v4df_mask, "__builtin_ia32_rsqrt14pd256_mask", IX86_BUILTIN_RSQRT14PD256_MASK, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF_QI },
+ { OPTION_MASK_ISA_AVX512VL, CODE_FOR_rsqrt14v2df_mask, "__builtin_ia32_rsqrt14pd128_mask", IX86_BUILTIN_RSQRT14PD128_MASK, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_QI },
+ { OPTION_MASK_ISA_AVX512VL, CODE_FOR_rsqrt14v8sf_mask, "__builtin_ia32_rsqrt14ps256_mask", IX86_BUILTIN_RSQRT14PS256_MASK, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF_QI },
+ { OPTION_MASK_ISA_AVX512VL, CODE_FOR_rsqrt14v4sf_mask, "__builtin_ia32_rsqrt14ps128_mask", IX86_BUILTIN_RSQRT14PS128_MASK, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_QI },
+ { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx_sqrtv4df2_mask, "__builtin_ia32_sqrtpd256_mask", IX86_BUILTIN_SQRTPD256_MASK, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF_QI },
+ { OPTION_MASK_ISA_AVX512VL, CODE_FOR_sse2_sqrtv2df2_mask, "__builtin_ia32_sqrtpd128_mask", IX86_BUILTIN_SQRTPD128_MASK, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_QI },
+ { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx_sqrtv8sf2_mask, "__builtin_ia32_sqrtps256_mask", IX86_BUILTIN_SQRTPS256_MASK, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF_QI },
+ { OPTION_MASK_ISA_AVX512VL, CODE_FOR_sse_sqrtv4sf2_mask, "__builtin_ia32_sqrtps128_mask", IX86_BUILTIN_SQRTPS128_MASK, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_QI },
+ { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_addv16qi3_mask, "__builtin_ia32_paddb128_mask", IX86_BUILTIN_PADDB128_MASK, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI_V16QI_HI },
+ { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_addv8hi3_mask, "__builtin_ia32_paddw128_mask", IX86_BUILTIN_PADDW128_MASK, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI_V8HI_QI },
+ { OPTION_MASK_ISA_AVX512VL, CODE_FOR_addv4si3_mask, "__builtin_ia32_paddd128_mask", IX86_BUILTIN_PADDD128_MASK, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI_V4SI_QI },
+ { OPTION_MASK_ISA_AVX512VL, CODE_FOR_addv2di3_mask, "__builtin_ia32_paddq128_mask", IX86_BUILTIN_PADDQ128_MASK, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI_V2DI_QI },
+ { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_subv16qi3_mask, "__builtin_ia32_psubb128_mask", IX86_BUILTIN_PSUBB128_MASK, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI_V16QI_HI },
+ { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_subv8hi3_mask, "__builtin_ia32_psubw128_mask", IX86_BUILTIN_PSUBW128_MASK, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI_V8HI_QI },
+ { OPTION_MASK_ISA_AVX512VL, CODE_FOR_subv4si3_mask, "__builtin_ia32_psubd128_mask", IX86_BUILTIN_PSUBD128_MASK, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI_V4SI_QI },
+ { OPTION_MASK_ISA_AVX512VL, CODE_FOR_subv2di3_mask, "__builtin_ia32_psubq128_mask", IX86_BUILTIN_PSUBQ128_MASK, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI_V2DI_QI },
+ { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_sse2_ssaddv16qi3_mask, "__builtin_ia32_paddsb128_mask", IX86_BUILTIN_PADDSB128_MASK, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI_V16QI_HI },
+ { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_sse2_ssaddv8hi3_mask, "__builtin_ia32_paddsw128_mask", IX86_BUILTIN_PADDSW128_MASK, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI_V8HI_QI },
+ { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_sse2_sssubv16qi3_mask, "__builtin_ia32_psubsb128_mask", IX86_BUILTIN_PSUBSB128_MASK, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI_V16QI_HI },
+ { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_sse2_sssubv8hi3_mask, "__builtin_ia32_psubsw128_mask", IX86_BUILTIN_PSUBSW128_MASK, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI_V8HI_QI },
+ { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_sse2_usaddv16qi3_mask, "__builtin_ia32_paddusb128_mask", IX86_BUILTIN_PADDUSB128_MASK, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI_V16QI_HI },
+ { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_sse2_usaddv8hi3_mask, "__builtin_ia32_paddusw128_mask", IX86_BUILTIN_PADDUSW128_MASK, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI_V8HI_QI },
+ { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_sse2_ussubv16qi3_mask, "__builtin_ia32_psubusb128_mask", IX86_BUILTIN_PSUBUSB128_MASK, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI_V16QI_HI },
+ { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_sse2_ussubv8hi3_mask, "__builtin_ia32_psubusw128_mask", IX86_BUILTIN_PSUBUSW128_MASK, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI_V8HI_QI },
+ { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_addv32qi3_mask, "__builtin_ia32_paddb256_mask", IX86_BUILTIN_PADDB256_MASK, UNKNOWN, (int) V32QI_FTYPE_V32QI_V32QI_V32QI_SI },
+ { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_addv16hi3_mask, "__builtin_ia32_paddw256_mask", IX86_BUILTIN_PADDW256_MASK, UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI_V16HI_HI },
+ { OPTION_MASK_ISA_AVX512VL, CODE_FOR_addv8si3_mask, "__builtin_ia32_paddd256_mask", IX86_BUILTIN_PADDD256_MASK, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI_V8SI_QI },
+ { OPTION_MASK_ISA_AVX512VL, CODE_FOR_addv4di3_mask, "__builtin_ia32_paddq256_mask", IX86_BUILTIN_PADDQ256_MASK, UNKNOWN, (int) V4DI_FTYPE_V4DI_V4DI_V4DI_QI },
+ { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx2_ssaddv32qi3_mask, "__builtin_ia32_paddsb256_mask", IX86_BUILTIN_PADDSB256_MASK, UNKNOWN, (int) V32QI_FTYPE_V32QI_V32QI_V32QI_SI },
+ { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx2_ssaddv16hi3_mask, "__builtin_ia32_paddsw256_mask", IX86_BUILTIN_PADDSW256_MASK, UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI_V16HI_HI },
+ { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx2_usaddv32qi3_mask, "__builtin_ia32_paddusb256_mask", IX86_BUILTIN_PADDUSB256_MASK, UNKNOWN, (int) V32QI_FTYPE_V32QI_V32QI_V32QI_SI },
+ { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx2_usaddv16hi3_mask, "__builtin_ia32_paddusw256_mask", IX86_BUILTIN_PADDUSW256_MASK, UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI_V16HI_HI },
+ { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_subv32qi3_mask, "__builtin_ia32_psubb256_mask", IX86_BUILTIN_PSUBB256_MASK, UNKNOWN, (int) V32QI_FTYPE_V32QI_V32QI_V32QI_SI },
+ { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_subv16hi3_mask, "__builtin_ia32_psubw256_mask", IX86_BUILTIN_PSUBW256_MASK, UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI_V16HI_HI },
+ { OPTION_MASK_ISA_AVX512VL, CODE_FOR_subv8si3_mask, "__builtin_ia32_psubd256_mask", IX86_BUILTIN_PSUBD256_MASK, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI_V8SI_QI },
+ { OPTION_MASK_ISA_AVX512VL, CODE_FOR_subv4di3_mask, "__builtin_ia32_psubq256_mask", IX86_BUILTIN_PSUBQ256_MASK, UNKNOWN, (int) V4DI_FTYPE_V4DI_V4DI_V4DI_QI },
+ { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx2_sssubv32qi3_mask, "__builtin_ia32_psubsb256_mask", IX86_BUILTIN_PSUBSB256_MASK, UNKNOWN, (int) V32QI_FTYPE_V32QI_V32QI_V32QI_SI },
+ { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx2_sssubv16hi3_mask, "__builtin_ia32_psubsw256_mask", IX86_BUILTIN_PSUBSW256_MASK, UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI_V16HI_HI },
+ { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx2_ussubv32qi3_mask, "__builtin_ia32_psubusb256_mask", IX86_BUILTIN_PSUBUSB256_MASK, UNKNOWN, (int) V32QI_FTYPE_V32QI_V32QI_V32QI_SI },
+ { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx2_ussubv16hi3_mask, "__builtin_ia32_psubusw256_mask", IX86_BUILTIN_PSUBUSW256_MASK, UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI_V16HI_HI },
+ { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512dq_shuf_f64x2_mask, "__builtin_ia32_shuf_f64x2_256_mask", IX86_BUILTIN_SHUF_F64x2_256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF_INT_V4DF_QI },
+ { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512dq_shuf_i64x2_mask, "__builtin_ia32_shuf_i64x2_256_mask", IX86_BUILTIN_SHUF_I64x2_256, UNKNOWN, (int) V4DI_FTYPE_V4DI_V4DI_INT_V4DI_QI },
+ { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_shuf_i32x4_mask, "__builtin_ia32_shuf_i32x4_256_mask", IX86_BUILTIN_SHUF_I32x4_256, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI_INT_V8SI_QI },
+ { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_shuf_f32x4_mask, "__builtin_ia32_shuf_f32x4_256_mask", IX86_BUILTIN_SHUF_F32x4_256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF_INT_V8SF_QI },
+ { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_truncatev8hiv8qi2_mask, "__builtin_ia32_pmovwb128_mask", IX86_BUILTIN_PMOVWB128, UNKNOWN, (int) V16QI_FTYPE_V8HI_V16QI_QI },
+ { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_truncatev16hiv16qi2_mask, "__builtin_ia32_pmovwb256_mask", IX86_BUILTIN_PMOVWB256, UNKNOWN, (int) V16QI_FTYPE_V16HI_V16QI_HI },
+ { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_ss_truncatev8hiv8qi2_mask, "__builtin_ia32_pmovswb128_mask", IX86_BUILTIN_PMOVSWB128, UNKNOWN, (int) V16QI_FTYPE_V8HI_V16QI_QI },
+ { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_ss_truncatev16hiv16qi2_mask, "__builtin_ia32_pmovswb256_mask", IX86_BUILTIN_PMOVSWB256, UNKNOWN, (int) V16QI_FTYPE_V16HI_V16QI_HI },
+ { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_us_truncatev8hiv8qi2_mask, "__builtin_ia32_pmovuswb128_mask", IX86_BUILTIN_PMOVUSWB128, UNKNOWN, (int) V16QI_FTYPE_V8HI_V16QI_QI },
+ { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_us_truncatev16hiv16qi2_mask, "__builtin_ia32_pmovuswb256_mask", IX86_BUILTIN_PMOVUSWB256, UNKNOWN, (int) V16QI_FTYPE_V16HI_V16QI_HI },
+ { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_truncatev4siv4qi2_mask, "__builtin_ia32_pmovdb128_mask", IX86_BUILTIN_PMOVDB128, UNKNOWN, (int) V16QI_FTYPE_V4SI_V16QI_QI },
+ { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_truncatev8siv8qi2_mask, "__builtin_ia32_pmovdb256_mask", IX86_BUILTIN_PMOVDB256, UNKNOWN, (int) V16QI_FTYPE_V8SI_V16QI_QI },
+ { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_ss_truncatev4siv4qi2_mask, "__builtin_ia32_pmovsdb128_mask", IX86_BUILTIN_PMOVSDB128, UNKNOWN, (int) V16QI_FTYPE_V4SI_V16QI_QI },
+ { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_ss_truncatev8siv8qi2_mask, "__builtin_ia32_pmovsdb256_mask", IX86_BUILTIN_PMOVSDB256, UNKNOWN, (int) V16QI_FTYPE_V8SI_V16QI_QI },
+ { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_us_truncatev4siv4qi2_mask, "__builtin_ia32_pmovusdb128_mask", IX86_BUILTIN_PMOVUSDB128, UNKNOWN, (int) V16QI_FTYPE_V4SI_V16QI_QI },
+ { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_us_truncatev8siv8qi2_mask, "__builtin_ia32_pmovusdb256_mask", IX86_BUILTIN_PMOVUSDB256, UNKNOWN, (int) V16QI_FTYPE_V8SI_V16QI_QI },
+ { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_truncatev4siv4hi2_mask, "__builtin_ia32_pmovdw128_mask", IX86_BUILTIN_PMOVDW128, UNKNOWN, (int) V8HI_FTYPE_V4SI_V8HI_QI },
+ { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_truncatev8siv8hi2_mask, "__builtin_ia32_pmovdw256_mask", IX86_BUILTIN_PMOVDW256, UNKNOWN, (int) V8HI_FTYPE_V8SI_V8HI_QI },
+ { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_ss_truncatev4siv4hi2_mask, "__builtin_ia32_pmovsdw128_mask", IX86_BUILTIN_PMOVSDW128, UNKNOWN, (int) V8HI_FTYPE_V4SI_V8HI_QI },
+ { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_ss_truncatev8siv8hi2_mask, "__builtin_ia32_pmovsdw256_mask", IX86_BUILTIN_PMOVSDW256, UNKNOWN, (int) V8HI_FTYPE_V8SI_V8HI_QI },
+ { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_us_truncatev4siv4hi2_mask, "__builtin_ia32_pmovusdw128_mask", IX86_BUILTIN_PMOVUSDW128, UNKNOWN, (int) V8HI_FTYPE_V4SI_V8HI_QI },
+ { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_us_truncatev8siv8hi2_mask, "__builtin_ia32_pmovusdw256_mask", IX86_BUILTIN_PMOVUSDW256, UNKNOWN, (int) V8HI_FTYPE_V8SI_V8HI_QI },
+ { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_truncatev2div2qi2_mask, "__builtin_ia32_pmovqb128_mask", IX86_BUILTIN_PMOVQB128, UNKNOWN, (int) V16QI_FTYPE_V2DI_V16QI_QI },
+ { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_truncatev4div4qi2_mask, "__builtin_ia32_pmovqb256_mask", IX86_BUILTIN_PMOVQB256, UNKNOWN, (int) V16QI_FTYPE_V4DI_V16QI_QI },
+ { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_ss_truncatev2div2qi2_mask, "__builtin_ia32_pmovsqb128_mask", IX86_BUILTIN_PMOVSQB128, UNKNOWN, (int) V16QI_FTYPE_V2DI_V16QI_QI },
+ { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_ss_truncatev4div4qi2_mask, "__builtin_ia32_pmovsqb256_mask", IX86_BUILTIN_PMOVSQB256, UNKNOWN, (int) V16QI_FTYPE_V4DI_V16QI_QI },
+ { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_us_truncatev2div2qi2_mask, "__builtin_ia32_pmovusqb128_mask", IX86_BUILTIN_PMOVUSQB128, UNKNOWN, (int) V16QI_FTYPE_V2DI_V16QI_QI },
+ { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_us_truncatev4div4qi2_mask, "__builtin_ia32_pmovusqb256_mask", IX86_BUILTIN_PMOVUSQB256, UNKNOWN, (int) V16QI_FTYPE_V4DI_V16QI_QI },
+ { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_truncatev2div2hi2_mask, "__builtin_ia32_pmovqw128_mask", IX86_BUILTIN_PMOVQW128, UNKNOWN, (int) V8HI_FTYPE_V2DI_V8HI_QI },
+ { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_truncatev4div4hi2_mask, "__builtin_ia32_pmovqw256_mask", IX86_BUILTIN_PMOVQW256, UNKNOWN, (int) V8HI_FTYPE_V4DI_V8HI_QI },
+ { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_ss_truncatev2div2hi2_mask, "__builtin_ia32_pmovsqw128_mask", IX86_BUILTIN_PMOVSQW128, UNKNOWN, (int) V8HI_FTYPE_V2DI_V8HI_QI },
+ { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_ss_truncatev4div4hi2_mask, "__builtin_ia32_pmovsqw256_mask", IX86_BUILTIN_PMOVSQW256, UNKNOWN, (int) V8HI_FTYPE_V4DI_V8HI_QI },
+ { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_us_truncatev2div2hi2_mask, "__builtin_ia32_pmovusqw128_mask", IX86_BUILTIN_PMOVUSQW128, UNKNOWN, (int) V8HI_FTYPE_V2DI_V8HI_QI },
+ { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_us_truncatev4div4hi2_mask, "__builtin_ia32_pmovusqw256_mask", IX86_BUILTIN_PMOVUSQW256, UNKNOWN, (int) V8HI_FTYPE_V4DI_V8HI_QI },
+ { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_truncatev2div2si2_mask, "__builtin_ia32_pmovqd128_mask", IX86_BUILTIN_PMOVQD128, UNKNOWN, (int) V4SI_FTYPE_V2DI_V4SI_QI },
+ { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_truncatev4div4si2_mask, "__builtin_ia32_pmovqd256_mask", IX86_BUILTIN_PMOVQD256, UNKNOWN, (int) V4SI_FTYPE_V4DI_V4SI_QI },
+ { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_ss_truncatev2div2si2_mask, "__builtin_ia32_pmovsqd128_mask", IX86_BUILTIN_PMOVSQD128, UNKNOWN, (int) V4SI_FTYPE_V2DI_V4SI_QI },
+ { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_ss_truncatev4div4si2_mask, "__builtin_ia32_pmovsqd256_mask", IX86_BUILTIN_PMOVSQD256, UNKNOWN, (int) V4SI_FTYPE_V4DI_V4SI_QI },
+ { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_us_truncatev2div2si2_mask, "__builtin_ia32_pmovusqd128_mask", IX86_BUILTIN_PMOVUSQD128, UNKNOWN, (int) V4SI_FTYPE_V2DI_V4SI_QI },
+ { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_us_truncatev4div4si2_mask, "__builtin_ia32_pmovusqd256_mask", IX86_BUILTIN_PMOVUSQD256, UNKNOWN, (int) V4SI_FTYPE_V4DI_V4SI_QI },
+ { OPTION_MASK_ISA_AVX512DQ | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512dq_rangepv4df_mask, "__builtin_ia32_rangepd256_mask", IX86_BUILTIN_RANGEPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF_INT_V4DF_QI },
+ { OPTION_MASK_ISA_AVX512DQ | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512dq_rangepv2df_mask, "__builtin_ia32_rangepd128_mask", IX86_BUILTIN_RANGEPD128, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_INT_V2DF_QI },
+ { OPTION_MASK_ISA_AVX512DQ | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512dq_rangepv8sf_mask, "__builtin_ia32_rangeps256_mask", IX86_BUILTIN_RANGEPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF_INT_V8SF_QI },
+ { OPTION_MASK_ISA_AVX512DQ | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512dq_rangepv4sf_mask, "__builtin_ia32_rangeps128_mask", IX86_BUILTIN_RANGEPS128, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_INT_V4SF_QI },
+ { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_getexpv8sf_mask, "__builtin_ia32_getexpps256_mask", IX86_BUILTIN_GETEXPPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF_QI },
+ { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_getexpv4df_mask, "__builtin_ia32_getexppd256_mask", IX86_BUILTIN_GETEXPPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF_QI },
+ { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_getexpv4sf_mask, "__builtin_ia32_getexpps128_mask", IX86_BUILTIN_GETEXPPS128, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_QI },
+ { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_getexpv2df_mask, "__builtin_ia32_getexppd128_mask", IX86_BUILTIN_GETEXPPD128, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_QI },
+ { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_fixupimmv4df, "__builtin_ia32_fixupimmpd256", IX86_BUILTIN_FIXUPIMMPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF_V4DI_INT },
+ { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_fixupimmv4df_mask, "__builtin_ia32_fixupimmpd256_mask", IX86_BUILTIN_FIXUPIMMPD256_MASK, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF_V4DI_INT_QI },
+ { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_fixupimmv4df_maskz, "__builtin_ia32_fixupimmpd256_maskz", IX86_BUILTIN_FIXUPIMMPD256_MASKZ, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF_V4DI_INT_QI },
+ { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_fixupimmv8sf, "__builtin_ia32_fixupimmps256", IX86_BUILTIN_FIXUPIMMPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF_V8SI_INT },
+ { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_fixupimmv8sf_mask, "__builtin_ia32_fixupimmps256_mask", IX86_BUILTIN_FIXUPIMMPS256_MASK, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF_V8SI_INT_QI },
+ { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_fixupimmv8sf_maskz, "__builtin_ia32_fixupimmps256_maskz", IX86_BUILTIN_FIXUPIMMPS256_MASKZ, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF_V8SI_INT_QI },
+ { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_fixupimmv2df, "__builtin_ia32_fixupimmpd128", IX86_BUILTIN_FIXUPIMMPD128, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_V2DI_INT },
+ { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_fixupimmv2df_mask, "__builtin_ia32_fixupimmpd128_mask", IX86_BUILTIN_FIXUPIMMPD128_MASK, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_V2DI_INT_QI },
+ { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_fixupimmv2df_maskz, "__builtin_ia32_fixupimmpd128_maskz", IX86_BUILTIN_FIXUPIMMPD128_MASKZ, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_V2DI_INT_QI },
+ { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_fixupimmv4sf, "__builtin_ia32_fixupimmps128", IX86_BUILTIN_FIXUPIMMPS128, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_V4SI_INT },
+ { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_fixupimmv4sf_mask, "__builtin_ia32_fixupimmps128_mask", IX86_BUILTIN_FIXUPIMMPS128_MASK, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_V4SI_INT_QI },
+ { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_fixupimmv4sf_maskz, "__builtin_ia32_fixupimmps128_maskz", IX86_BUILTIN_FIXUPIMMPS128_MASKZ, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_V4SI_INT_QI },
+ { OPTION_MASK_ISA_AVX512VL, CODE_FOR_absv4di2_mask, "__builtin_ia32_pabsq256_mask", IX86_BUILTIN_PABSQ256, UNKNOWN, (int) V4DI_FTYPE_V4DI_V4DI_QI },
+ { OPTION_MASK_ISA_AVX512VL, CODE_FOR_absv2di2_mask, "__builtin_ia32_pabsq128_mask", IX86_BUILTIN_PABSQ128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI_QI },
+ { OPTION_MASK_ISA_AVX512VL, CODE_FOR_absv8si2_mask, "__builtin_ia32_pabsd256_mask", IX86_BUILTIN_PABSD256_MASK, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI_QI },
+ { OPTION_MASK_ISA_AVX512VL, CODE_FOR_absv4si2_mask, "__builtin_ia32_pabsd128_mask", IX86_BUILTIN_PABSD128_MASK, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI_QI },
+ { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx2_pmulhrswv16hi3_mask , "__builtin_ia32_pmulhrsw256_mask", IX86_BUILTIN_PMULHRSW256_MASK, UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI_V16HI_HI },
+ { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_ssse3_pmulhrswv8hi3_mask, "__builtin_ia32_pmulhrsw128_mask", IX86_BUILTIN_PMULHRSW128_MASK, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI_V8HI_QI },
+ { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_umulv8hi3_highpart_mask, "__builtin_ia32_pmulhuw128_mask", IX86_BUILTIN_PMULHUW128_MASK, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI_V8HI_QI },
+ { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_umulv16hi3_highpart_mask, "__builtin_ia32_pmulhuw256_mask" , IX86_BUILTIN_PMULHUW256_MASK, UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI_V16HI_HI },
+ { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_smulv16hi3_highpart_mask, "__builtin_ia32_pmulhw256_mask" , IX86_BUILTIN_PMULHW256_MASK, UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI_V16HI_HI },
+ { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_smulv8hi3_highpart_mask, "__builtin_ia32_pmulhw128_mask", IX86_BUILTIN_PMULHW128_MASK, UNKNOWN,(int) V8HI_FTYPE_V8HI_V8HI_V8HI_QI },
+ { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_mulv16hi3_mask, "__builtin_ia32_pmullw256_mask" , IX86_BUILTIN_PMULLW256_MASK, UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI_V16HI_HI },
+ { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_mulv8hi3_mask, "__builtin_ia32_pmullw128_mask", IX86_BUILTIN_PMULLW128_MASK, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI_V8HI_QI },
+ { OPTION_MASK_ISA_AVX512DQ | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512dq_mulv4di3_mask, "__builtin_ia32_pmullq256_mask", IX86_BUILTIN_PMULLQ256, UNKNOWN, (int) V4DI_FTYPE_V4DI_V4DI_V4DI_QI },
+ { OPTION_MASK_ISA_AVX512DQ | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512dq_mulv2di3_mask, "__builtin_ia32_pmullq128_mask", IX86_BUILTIN_PMULLQ128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI_V2DI_QI },
+ { OPTION_MASK_ISA_AVX512DQ | OPTION_MASK_ISA_AVX512VL, CODE_FOR_andv4df3_mask, "__builtin_ia32_andpd256_mask", IX86_BUILTIN_ANDPD256_MASK, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF_V4DF_QI },
+ { OPTION_MASK_ISA_AVX512DQ | OPTION_MASK_ISA_AVX512VL, CODE_FOR_andv2df3_mask, "__builtin_ia32_andpd128_mask", IX86_BUILTIN_ANDPD128_MASK, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_V2DF_QI },
+ { OPTION_MASK_ISA_AVX512DQ | OPTION_MASK_ISA_AVX512VL, CODE_FOR_andv8sf3_mask, "__builtin_ia32_andps256_mask", IX86_BUILTIN_ANDPS256_MASK, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF_V8SF_QI },
+ { OPTION_MASK_ISA_AVX512DQ | OPTION_MASK_ISA_AVX512VL, CODE_FOR_andv4sf3_mask, "__builtin_ia32_andps128_mask", IX86_BUILTIN_ANDPS128_MASK, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_V4SF_QI },
+ { OPTION_MASK_ISA_AVX512DQ | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx_andnotv4df3_mask, "__builtin_ia32_andnpd256_mask", IX86_BUILTIN_ANDNPD256_MASK, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF_V4DF_QI },
+ { OPTION_MASK_ISA_AVX512DQ | OPTION_MASK_ISA_AVX512VL, CODE_FOR_sse2_andnotv2df3_mask, "__builtin_ia32_andnpd128_mask", IX86_BUILTIN_ANDNPD128_MASK, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_V2DF_QI },
+ { OPTION_MASK_ISA_AVX512DQ | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx_andnotv8sf3_mask, "__builtin_ia32_andnps256_mask", IX86_BUILTIN_ANDNPS256_MASK, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF_V8SF_QI },
+ { OPTION_MASK_ISA_AVX512DQ | OPTION_MASK_ISA_AVX512VL, CODE_FOR_sse_andnotv4sf3_mask, "__builtin_ia32_andnps128_mask", IX86_BUILTIN_ANDNPS128_MASK, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_V4SF_QI },
+ { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_ashlv8hi3_mask, "__builtin_ia32_psllwi128_mask", IX86_BUILTIN_PSLLWI128_MASK, UNKNOWN, (int) V8HI_FTYPE_V8HI_INT_V8HI_QI },
+ { OPTION_MASK_ISA_AVX512VL, CODE_FOR_ashlv4si3_mask, "__builtin_ia32_pslldi128_mask", IX86_BUILTIN_PSLLDI128_MASK, UNKNOWN, (int) V4SI_FTYPE_V4SI_INT_V4SI_QI },
+ { OPTION_MASK_ISA_AVX512VL, CODE_FOR_ashlv2di3_mask, "__builtin_ia32_psllqi128_mask", IX86_BUILTIN_PSLLQI128_MASK, UNKNOWN, (int) V2DI_FTYPE_V2DI_INT_V2DI_QI },
+ { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_ashlv8hi3_mask, "__builtin_ia32_psllw128_mask", IX86_BUILTIN_PSLLW128_MASK, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI_V8HI_QI },
+ { OPTION_MASK_ISA_AVX512VL, CODE_FOR_ashlv4si3_mask, "__builtin_ia32_pslld128_mask", IX86_BUILTIN_PSLLD128_MASK, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI_V4SI_QI },
+ { OPTION_MASK_ISA_AVX512VL, CODE_FOR_ashlv2di3_mask, "__builtin_ia32_psllq128_mask", IX86_BUILTIN_PSLLQ128_MASK, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI_V2DI_QI },
+ { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_ashlv16hi3_mask, "__builtin_ia32_psllwi256_mask", IX86_BUILTIN_PSLLWI256_MASK , UNKNOWN, (int) V16HI_FTYPE_V16HI_INT_V16HI_HI },
+ { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_ashlv16hi3_mask, "__builtin_ia32_psllw256_mask", IX86_BUILTIN_PSLLW256_MASK, UNKNOWN, (int) V16HI_FTYPE_V16HI_V8HI_V16HI_HI },
+ { OPTION_MASK_ISA_AVX512VL, CODE_FOR_ashlv8si3_mask, "__builtin_ia32_pslldi256_mask", IX86_BUILTIN_PSLLDI256_MASK, UNKNOWN, (int) V8SI_FTYPE_V8SI_INT_V8SI_QI },
+ { OPTION_MASK_ISA_AVX512VL, CODE_FOR_ashlv8si3_mask, "__builtin_ia32_pslld256_mask", IX86_BUILTIN_PSLLD256_MASK, UNKNOWN, (int) V8SI_FTYPE_V8SI_V4SI_V8SI_QI },
+ { OPTION_MASK_ISA_AVX512VL, CODE_FOR_ashlv4di3_mask, "__builtin_ia32_psllqi256_mask", IX86_BUILTIN_PSLLQI256_MASK, UNKNOWN, (int) V4DI_FTYPE_V4DI_INT_V4DI_QI },
+ { OPTION_MASK_ISA_AVX512VL, CODE_FOR_ashlv4di3_mask, "__builtin_ia32_psllq256_mask", IX86_BUILTIN_PSLLQ256_MASK, UNKNOWN, (int) V4DI_FTYPE_V4DI_V2DI_V4DI_QI },
+ { OPTION_MASK_ISA_AVX512VL, CODE_FOR_ashrv4si3_mask, "__builtin_ia32_psradi128_mask", IX86_BUILTIN_PSRADI128_MASK, UNKNOWN, (int) V4SI_FTYPE_V4SI_INT_V4SI_QI },
+ { OPTION_MASK_ISA_AVX512VL, CODE_FOR_ashrv4si3_mask, "__builtin_ia32_psrad128_mask", IX86_BUILTIN_PSRAD128_MASK, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI_V4SI_QI },
+ { OPTION_MASK_ISA_AVX512VL, CODE_FOR_ashrv8si3_mask, "__builtin_ia32_psradi256_mask", IX86_BUILTIN_PSRADI256_MASK, UNKNOWN, (int) V8SI_FTYPE_V8SI_INT_V8SI_QI },
+ { OPTION_MASK_ISA_AVX512VL, CODE_FOR_ashrv8si3_mask, "__builtin_ia32_psrad256_mask", IX86_BUILTIN_PSRAD256_MASK, UNKNOWN, (int) V8SI_FTYPE_V8SI_V4SI_V8SI_QI },
+ { OPTION_MASK_ISA_AVX512VL, CODE_FOR_ashrv2di3_mask_1, "__builtin_ia32_psraqi128_mask", IX86_BUILTIN_PSRAQI128_MASK, UNKNOWN, (int) V2DI_FTYPE_V2DI_INT_V2DI_QI },
+ { OPTION_MASK_ISA_AVX512VL, CODE_FOR_ashrv2di3_mask_1, "__builtin_ia32_psraq128_mask", IX86_BUILTIN_PSRAQ128_MASK, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI_V2DI_QI },
+ { OPTION_MASK_ISA_AVX512VL, CODE_FOR_ashrv4di3_mask, "__builtin_ia32_psraqi256_mask", IX86_BUILTIN_PSRAQI256_MASK, UNKNOWN, (int) V4DI_FTYPE_V4DI_INT_V4DI_QI },
+ { OPTION_MASK_ISA_AVX512VL, CODE_FOR_ashrv4di3_mask, "__builtin_ia32_psraq256_mask", IX86_BUILTIN_PSRAQ256_MASK, UNKNOWN, (int) V4DI_FTYPE_V4DI_V2DI_V4DI_QI },
+ { OPTION_MASK_ISA_AVX512VL, CODE_FOR_andv8si3_mask, "__builtin_ia32_pandd256_mask", IX86_BUILTIN_PANDD256, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI_V8SI_QI },
+ { OPTION_MASK_ISA_AVX512VL, CODE_FOR_andv4si3_mask, "__builtin_ia32_pandd128_mask", IX86_BUILTIN_PANDD128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI_V4SI_QI },
+ { OPTION_MASK_ISA_AVX512VL, CODE_FOR_lshrv4si3_mask, "__builtin_ia32_psrldi128_mask", IX86_BUILTIN_PSRLDI128_MASK, UNKNOWN, (int) V4SI_FTYPE_V4SI_INT_V4SI_QI },
+ { OPTION_MASK_ISA_AVX512VL, CODE_FOR_lshrv4si3_mask, "__builtin_ia32_psrld128_mask", IX86_BUILTIN_PSRLD128_MASK, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI_V4SI_QI },
+ { OPTION_MASK_ISA_AVX512VL, CODE_FOR_lshrv8si3_mask, "__builtin_ia32_psrldi256_mask", IX86_BUILTIN_PSRLDI256_MASK, UNKNOWN, (int) V8SI_FTYPE_V8SI_INT_V8SI_QI },
+ { OPTION_MASK_ISA_AVX512VL, CODE_FOR_lshrv8si3_mask, "__builtin_ia32_psrld256_mask", IX86_BUILTIN_PSRLD256_MASK, UNKNOWN, (int) V8SI_FTYPE_V8SI_V4SI_V8SI_QI },
+ { OPTION_MASK_ISA_AVX512VL, CODE_FOR_lshrv2di3_mask, "__builtin_ia32_psrlqi128_mask", IX86_BUILTIN_PSRLQI128_MASK, UNKNOWN, (int) V2DI_FTYPE_V2DI_INT_V2DI_QI },
+ { OPTION_MASK_ISA_AVX512VL, CODE_FOR_lshrv2di3_mask, "__builtin_ia32_psrlq128_mask", IX86_BUILTIN_PSRLQ128_MASK, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI_V2DI_QI },
+ { OPTION_MASK_ISA_AVX512VL, CODE_FOR_lshrv4di3_mask, "__builtin_ia32_psrlqi256_mask", IX86_BUILTIN_PSRLQI256_MASK, UNKNOWN, (int) V4DI_FTYPE_V4DI_INT_V4DI_QI },
+ { OPTION_MASK_ISA_AVX512VL, CODE_FOR_lshrv4di3_mask, "__builtin_ia32_psrlq256_mask", IX86_BUILTIN_PSRLQ256_MASK, UNKNOWN, (int) V4DI_FTYPE_V4DI_V2DI_V4DI_QI },
+ { OPTION_MASK_ISA_AVX512VL, CODE_FOR_andv4di3_mask, "__builtin_ia32_pandq256_mask", IX86_BUILTIN_PANDQ256, UNKNOWN, (int) V4DI_FTYPE_V4DI_V4DI_V4DI_QI },
+ { OPTION_MASK_ISA_AVX512VL, CODE_FOR_andv2di3_mask, "__builtin_ia32_pandq128_mask", IX86_BUILTIN_PANDQ128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI_V2DI_QI },
+ { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx2_andnotv8si3_mask, "__builtin_ia32_pandnd256_mask", IX86_BUILTIN_PANDND256, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI_V8SI_QI },
+ { OPTION_MASK_ISA_AVX512VL, CODE_FOR_sse2_andnotv4si3_mask, "__builtin_ia32_pandnd128_mask", IX86_BUILTIN_PANDND128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI_V4SI_QI },
+ { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx2_andnotv4di3_mask, "__builtin_ia32_pandnq256_mask", IX86_BUILTIN_PANDNQ256, UNKNOWN, (int) V4DI_FTYPE_V4DI_V4DI_V4DI_QI },
+ { OPTION_MASK_ISA_AVX512VL, CODE_FOR_sse2_andnotv2di3_mask, "__builtin_ia32_pandnq128_mask", IX86_BUILTIN_PANDNQ128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI_V2DI_QI },
+ { OPTION_MASK_ISA_AVX512VL, CODE_FOR_iorv8si3_mask, "__builtin_ia32_pord256_mask", IX86_BUILTIN_PORD256, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI_V8SI_QI },
+ { OPTION_MASK_ISA_AVX512VL, CODE_FOR_iorv4si3_mask, "__builtin_ia32_pord128_mask", IX86_BUILTIN_PORD128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI_V4SI_QI },
+ { OPTION_MASK_ISA_AVX512VL, CODE_FOR_iorv4di3_mask, "__builtin_ia32_porq256_mask", IX86_BUILTIN_PORQ256, UNKNOWN, (int) V4DI_FTYPE_V4DI_V4DI_V4DI_QI },
+ { OPTION_MASK_ISA_AVX512VL, CODE_FOR_iorv2di3_mask, "__builtin_ia32_porq128_mask", IX86_BUILTIN_PORQ128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI_V2DI_QI },
+ { OPTION_MASK_ISA_AVX512VL, CODE_FOR_xorv8si3_mask, "__builtin_ia32_pxord256_mask", IX86_BUILTIN_PXORD256, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI_V8SI_QI },
+ { OPTION_MASK_ISA_AVX512VL, CODE_FOR_xorv4si3_mask, "__builtin_ia32_pxord128_mask", IX86_BUILTIN_PXORD128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI_V4SI_QI },
+ { OPTION_MASK_ISA_AVX512VL, CODE_FOR_xorv4di3_mask, "__builtin_ia32_pxorq256_mask", IX86_BUILTIN_PXORQ256, UNKNOWN, (int) V4DI_FTYPE_V4DI_V4DI_V4DI_QI },
+ { OPTION_MASK_ISA_AVX512VL, CODE_FOR_xorv2di3_mask, "__builtin_ia32_pxorq128_mask", IX86_BUILTIN_PXORQ128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI_V2DI_QI },
+ { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx2_packsswb_mask, "__builtin_ia32_packsswb256_mask", IX86_BUILTIN_PACKSSWB256_MASK, UNKNOWN, (int) V32QI_FTYPE_V16HI_V16HI_V32QI_SI },
+ { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_sse2_packsswb_mask, "__builtin_ia32_packsswb128_mask", IX86_BUILTIN_PACKSSWB128_MASK, UNKNOWN, (int) V16QI_FTYPE_V8HI_V8HI_V16QI_HI },
+ { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx2_packuswb_mask, "__builtin_ia32_packuswb256_mask", IX86_BUILTIN_PACKUSWB256_MASK, UNKNOWN, (int) V32QI_FTYPE_V16HI_V16HI_V32QI_SI },
+ { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_sse2_packuswb_mask, "__builtin_ia32_packuswb128_mask", IX86_BUILTIN_PACKUSWB128_MASK, UNKNOWN, (int) V16QI_FTYPE_V8HI_V8HI_V16QI_HI },
+ { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_rndscalev8sf_mask, "__builtin_ia32_rndscaleps_256_mask", IX86_BUILTIN_RNDSCALEPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_INT_V8SF_QI },
+ { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_rndscalev4df_mask, "__builtin_ia32_rndscalepd_256_mask", IX86_BUILTIN_RNDSCALEPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_INT_V4DF_QI },
+ { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_rndscalev4sf_mask, "__builtin_ia32_rndscaleps_128_mask", IX86_BUILTIN_RNDSCALEPS128, UNKNOWN, (int) V4SF_FTYPE_V4SF_INT_V4SF_QI },
+ { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_rndscalev2df_mask, "__builtin_ia32_rndscalepd_128_mask", IX86_BUILTIN_RNDSCALEPD128, UNKNOWN, (int) V2DF_FTYPE_V2DF_INT_V2DF_QI },
+ { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_vternlogv4di_mask, "__builtin_ia32_pternlogq256_mask", IX86_BUILTIN_VTERNLOGQ256_MASK, UNKNOWN, (int) V4DI_FTYPE_V4DI_V4DI_V4DI_INT_QI },
+ { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_vternlogv4di_maskz, "__builtin_ia32_pternlogq256_maskz", IX86_BUILTIN_VTERNLOGQ256_MASKZ, UNKNOWN, (int) V4DI_FTYPE_V4DI_V4DI_V4DI_INT_QI },
+ { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_vternlogv8si_mask, "__builtin_ia32_pternlogd256_mask", IX86_BUILTIN_VTERNLOGD256_MASK, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI_V8SI_INT_QI },
+ { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_vternlogv8si_maskz, "__builtin_ia32_pternlogd256_maskz", IX86_BUILTIN_VTERNLOGD256_MASKZ, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI_V8SI_INT_QI },
+ { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_vternlogv2di_mask, "__builtin_ia32_pternlogq128_mask", IX86_BUILTIN_VTERNLOGQ128_MASK, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI_V2DI_INT_QI },
+ { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_vternlogv2di_maskz, "__builtin_ia32_pternlogq128_maskz", IX86_BUILTIN_VTERNLOGQ128_MASKZ, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI_V2DI_INT_QI },
+ { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_vternlogv4si_mask, "__builtin_ia32_pternlogd128_mask", IX86_BUILTIN_VTERNLOGD128_MASK, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI_V4SI_INT_QI },
+ { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_vternlogv4si_maskz, "__builtin_ia32_pternlogd128_maskz", IX86_BUILTIN_VTERNLOGD128_MASKZ, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI_V4SI_INT_QI },
+ { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_scalefv4df_mask, "__builtin_ia32_scalefpd256_mask", IX86_BUILTIN_SCALEFPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF_V4DF_QI },
+ { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_scalefv8sf_mask, "__builtin_ia32_scalefps256_mask", IX86_BUILTIN_SCALEFPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF_V8SF_QI },
+ { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_scalefv2df_mask, "__builtin_ia32_scalefpd128_mask", IX86_BUILTIN_SCALEFPD128, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_V2DF_QI },
+ { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_scalefv4sf_mask, "__builtin_ia32_scalefps128_mask", IX86_BUILTIN_SCALEFPS128, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_V4SF_QI },
+ { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_fmadd_v4df_mask, "__builtin_ia32_vfmaddpd256_mask", IX86_BUILTIN_VFMADDPD256_MASK, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF_V4DF_QI },
+ { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_fmadd_v4df_mask3, "__builtin_ia32_vfmaddpd256_mask3", IX86_BUILTIN_VFMADDPD256_MASK3, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF_V4DF_QI },
+ { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_fmadd_v4df_maskz, "__builtin_ia32_vfmaddpd256_maskz", IX86_BUILTIN_VFMADDPD256_MASKZ, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF_V4DF_QI },
+ { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_fmadd_v2df_mask, "__builtin_ia32_vfmaddpd128_mask", IX86_BUILTIN_VFMADDPD128_MASK, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_V2DF_QI },
+ { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_fmadd_v2df_mask3, "__builtin_ia32_vfmaddpd128_mask3", IX86_BUILTIN_VFMADDPD128_MASK3, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_V2DF_QI },
+ { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_fmadd_v2df_maskz, "__builtin_ia32_vfmaddpd128_maskz", IX86_BUILTIN_VFMADDPD128_MASKZ, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_V2DF_QI },
+ { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_fmadd_v8sf_mask, "__builtin_ia32_vfmaddps256_mask", IX86_BUILTIN_VFMADDPS256_MASK, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF_V8SF_QI },
+ { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_fmadd_v8sf_mask3, "__builtin_ia32_vfmaddps256_mask3", IX86_BUILTIN_VFMADDPS256_MASK3, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF_V8SF_QI },
+ { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_fmadd_v8sf_maskz, "__builtin_ia32_vfmaddps256_maskz", IX86_BUILTIN_VFMADDPS256_MASKZ, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF_V8SF_QI },
+ { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_fmadd_v4sf_mask, "__builtin_ia32_vfmaddps128_mask", IX86_BUILTIN_VFMADDPS128_MASK, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_V4SF_QI },
+ { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_fmadd_v4sf_mask3, "__builtin_ia32_vfmaddps128_mask3", IX86_BUILTIN_VFMADDPS128_MASK3, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_V4SF_QI },
+ { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_fmadd_v4sf_maskz, "__builtin_ia32_vfmaddps128_maskz", IX86_BUILTIN_VFMADDPS128_MASKZ, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_V4SF_QI },
+ { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_fmsub_v4df_mask3, "__builtin_ia32_vfmsubpd256_mask3", IX86_BUILTIN_VFMSUBPD256_MASK3, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF_V4DF_QI },
+ { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_fmsub_v2df_mask3, "__builtin_ia32_vfmsubpd128_mask3", IX86_BUILTIN_VFMSUBPD128_MASK3, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_V2DF_QI },
+ { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_fmsub_v8sf_mask3, "__builtin_ia32_vfmsubps256_mask3", IX86_BUILTIN_VFMSUBPS256_MASK3, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF_V8SF_QI },
+ { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_fmsub_v4sf_mask3, "__builtin_ia32_vfmsubps128_mask3", IX86_BUILTIN_VFMSUBPS128_MASK3, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_V4SF_QI },
+ { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_fnmadd_v4df_mask, "__builtin_ia32_vfnmaddpd256_mask", IX86_BUILTIN_VFNMADDPD256_MASK, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF_V4DF_QI },
+ { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_fnmadd_v2df_mask, "__builtin_ia32_vfnmaddpd128_mask", IX86_BUILTIN_VFNMADDPD128_MASK, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_V2DF_QI },
+ { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_fnmadd_v8sf_mask, "__builtin_ia32_vfnmaddps256_mask", IX86_BUILTIN_VFNMADDPS256_MASK, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF_V8SF_QI },
+ { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_fnmadd_v4sf_mask, "__builtin_ia32_vfnmaddps128_mask", IX86_BUILTIN_VFNMADDPS128_MASK, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_V4SF_QI },
+ { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_fnmsub_v4df_mask, "__builtin_ia32_vfnmsubpd256_mask", IX86_BUILTIN_VFNMSUBPD256_MASK, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF_V4DF_QI },
+ { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_fnmsub_v4df_mask3, "__builtin_ia32_vfnmsubpd256_mask3", IX86_BUILTIN_VFNMSUBPD256_MASK3, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF_V4DF_QI },
+ { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_fnmsub_v2df_mask, "__builtin_ia32_vfnmsubpd128_mask", IX86_BUILTIN_VFNMSUBPD128_MASK, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_V2DF_QI },
+ { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_fnmsub_v2df_mask3, "__builtin_ia32_vfnmsubpd128_mask3", IX86_BUILTIN_VFNMSUBPD128_MASK3, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_V2DF_QI },
+ { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_fnmsub_v8sf_mask, "__builtin_ia32_vfnmsubps256_mask", IX86_BUILTIN_VFNMSUBPS256_MASK, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF_V8SF_QI },
+ { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_fnmsub_v8sf_mask3, "__builtin_ia32_vfnmsubps256_mask3", IX86_BUILTIN_VFNMSUBPS256_MASK3, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF_V8SF_QI },
+ { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_fnmsub_v4sf_mask, "__builtin_ia32_vfnmsubps128_mask", IX86_BUILTIN_VFNMSUBPS128_MASK, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_V4SF_QI },
+ { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_fnmsub_v4sf_mask3, "__builtin_ia32_vfnmsubps128_mask3", IX86_BUILTIN_VFNMSUBPS128_MASK3, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_V4SF_QI },
+ { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_fmaddsub_v4df_mask, "__builtin_ia32_vfmaddsubpd256_mask", IX86_BUILTIN_VFMADDSUBPD256_MASK, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF_V4DF_QI },
+ { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_fmaddsub_v4df_mask3, "__builtin_ia32_vfmaddsubpd256_mask3", IX86_BUILTIN_VFMADDSUBPD256_MASK3, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF_V4DF_QI },
+ { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_fmaddsub_v4df_maskz, "__builtin_ia32_vfmaddsubpd256_maskz", IX86_BUILTIN_VFMADDSUBPD256_MASKZ, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF_V4DF_QI },
+ { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_fmaddsub_v2df_mask, "__builtin_ia32_vfmaddsubpd128_mask", IX86_BUILTIN_VFMADDSUBPD128_MASK, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_V2DF_QI },
+ { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_fmaddsub_v2df_mask3, "__builtin_ia32_vfmaddsubpd128_mask3", IX86_BUILTIN_VFMADDSUBPD128_MASK3, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_V2DF_QI },
+ { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_fmaddsub_v2df_maskz, "__builtin_ia32_vfmaddsubpd128_maskz", IX86_BUILTIN_VFMADDSUBPD128_MASKZ, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_V2DF_QI },
+ { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_fmaddsub_v8sf_mask, "__builtin_ia32_vfmaddsubps256_mask", IX86_BUILTIN_VFMADDSUBPS256_MASK, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF_V8SF_QI },
+ { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_fmaddsub_v8sf_mask3, "__builtin_ia32_vfmaddsubps256_mask3", IX86_BUILTIN_VFMADDSUBPS256_MASK3, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF_V8SF_QI },
+ { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_fmaddsub_v8sf_maskz, "__builtin_ia32_vfmaddsubps256_maskz", IX86_BUILTIN_VFMADDSUBPS256_MASKZ, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF_V8SF_QI },
+ { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_fmaddsub_v4sf_mask, "__builtin_ia32_vfmaddsubps128_mask", IX86_BUILTIN_VFMADDSUBPS128_MASK, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_V4SF_QI },
+ { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_fmaddsub_v4sf_mask3, "__builtin_ia32_vfmaddsubps128_mask3", IX86_BUILTIN_VFMADDSUBPS128_MASK3, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_V4SF_QI },
+ { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_fmaddsub_v4sf_maskz, "__builtin_ia32_vfmaddsubps128_maskz", IX86_BUILTIN_VFMADDSUBPS128_MASKZ, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_V4SF_QI },
+ { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_fmsubadd_v4df_mask3, "__builtin_ia32_vfmsubaddpd256_mask3", IX86_BUILTIN_VFMSUBADDPD256_MASK3, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF_V4DF_QI },
+ { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_fmsubadd_v2df_mask3, "__builtin_ia32_vfmsubaddpd128_mask3", IX86_BUILTIN_VFMSUBADDPD128_MASK3, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_V2DF_QI },
+ { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_fmsubadd_v8sf_mask3, "__builtin_ia32_vfmsubaddps256_mask3", IX86_BUILTIN_VFMSUBADDPS256_MASK3, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF_V8SF_QI },
+ { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_fmsubadd_v4sf_mask3, "__builtin_ia32_vfmsubaddps128_mask3", IX86_BUILTIN_VFMSUBADDPS128_MASK3, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_V4SF_QI },
+ { OPTION_MASK_ISA_AVX512DQ | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_vinsertv4df, "__builtin_ia32_insertf64x2_256_mask", IX86_BUILTIN_INSERTF64X2_256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V2DF_INT_V4DF_QI },
+ { OPTION_MASK_ISA_AVX512DQ | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_vinsertv4di, "__builtin_ia32_inserti64x2_256_mask", IX86_BUILTIN_INSERTI64X2_256, UNKNOWN, (int) V4DI_FTYPE_V4DI_V2DI_INT_V4DI_QI },
+ { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_ashrvv16hi_mask, "__builtin_ia32_psrav16hi_mask", IX86_BUILTIN_PSRAVV16HI, UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI_V16HI_HI },
+ { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_ashrvv8hi_mask, "__builtin_ia32_psrav8hi_mask", IX86_BUILTIN_PSRAVV8HI, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI_V8HI_QI },
+ { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512bw_pmaddubsw512v16hi_mask, "__builtin_ia32_pmaddubsw256_mask", IX86_BUILTIN_PMADDUBSW256_MASK, UNKNOWN, (int) V16HI_FTYPE_V32QI_V32QI_V16HI_HI },
+ { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512bw_pmaddubsw512v8hi_mask, "__builtin_ia32_pmaddubsw128_mask", IX86_BUILTIN_PMADDUBSW128_MASK, UNKNOWN, (int) V8HI_FTYPE_V16QI_V16QI_V8HI_QI },
+ { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512bw_pmaddwd512v16hi_mask, "__builtin_ia32_pmaddwd256_mask", IX86_BUILTIN_PMADDWD256_MASK, UNKNOWN, (int) V8SI_FTYPE_V16HI_V16HI_V8SI_QI },
+ { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512bw_pmaddwd512v8hi_mask, "__builtin_ia32_pmaddwd128_mask", IX86_BUILTIN_PMADDWD128_MASK, UNKNOWN, (int) V4SI_FTYPE_V8HI_V8HI_V4SI_QI },
+ { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_lshrvv16hi_mask, "__builtin_ia32_psrlv16hi_mask", IX86_BUILTIN_PSRLVV16HI, UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI_V16HI_HI },
+ { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_lshrvv8hi_mask, "__builtin_ia32_psrlv8hi_mask", IX86_BUILTIN_PSRLVV8HI, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI_V8HI_QI },
+ { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx_fix_notruncv8sfv8si_mask, "__builtin_ia32_cvtps2dq256_mask", IX86_BUILTIN_CVTPS2DQ256_MASK, UNKNOWN, (int) V8SI_FTYPE_V8SF_V8SI_QI },
+ { OPTION_MASK_ISA_AVX512VL, CODE_FOR_sse2_fix_notruncv4sfv4si_mask, "__builtin_ia32_cvtps2dq128_mask", IX86_BUILTIN_CVTPS2DQ128_MASK, UNKNOWN, (int) V4SI_FTYPE_V4SF_V4SI_QI },
+ { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_ufix_notruncv8sfv8si_mask, "__builtin_ia32_cvtps2udq256_mask", IX86_BUILTIN_CVTPS2UDQ256, UNKNOWN, (int) V8SI_FTYPE_V8SF_V8SI_QI },
+ { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_ufix_notruncv4sfv4si_mask, "__builtin_ia32_cvtps2udq128_mask", IX86_BUILTIN_CVTPS2UDQ128, UNKNOWN, (int) V4SI_FTYPE_V4SF_V4SI_QI },
+ { OPTION_MASK_ISA_AVX512DQ | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512dq_cvtps2qqv4di_mask, "__builtin_ia32_cvtps2qq256_mask", IX86_BUILTIN_CVTPS2QQ256, UNKNOWN, (int) V4DI_FTYPE_V4SF_V4DI_QI },
+ { OPTION_MASK_ISA_AVX512DQ | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512dq_cvtps2qqv2di_mask, "__builtin_ia32_cvtps2qq128_mask", IX86_BUILTIN_CVTPS2QQ128, UNKNOWN, (int) V2DI_FTYPE_V4SF_V2DI_QI },
+ { OPTION_MASK_ISA_AVX512DQ | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512dq_cvtps2uqqv4di_mask, "__builtin_ia32_cvtps2uqq256_mask", IX86_BUILTIN_CVTPS2UQQ256, UNKNOWN, (int) V4DI_FTYPE_V4SF_V4DI_QI },
+ { OPTION_MASK_ISA_AVX512DQ | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512dq_cvtps2uqqv2di_mask, "__builtin_ia32_cvtps2uqq128_mask", IX86_BUILTIN_CVTPS2UQQ128, UNKNOWN, (int) V2DI_FTYPE_V4SF_V2DI_QI },
+ { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_getmantv8sf_mask, "__builtin_ia32_getmantps256_mask", IX86_BUILTIN_GETMANTPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_INT_V8SF_QI },
+ { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_getmantv4sf_mask, "__builtin_ia32_getmantps128_mask", IX86_BUILTIN_GETMANTPS128, UNKNOWN, (int) V4SF_FTYPE_V4SF_INT_V4SF_QI },
+ { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_getmantv4df_mask, "__builtin_ia32_getmantpd256_mask", IX86_BUILTIN_GETMANTPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_INT_V4DF_QI },
+ { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_getmantv2df_mask, "__builtin_ia32_getmantpd128_mask", IX86_BUILTIN_GETMANTPD128, UNKNOWN, (int) V2DF_FTYPE_V2DF_INT_V2DF_QI },
+ { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx_movddup256_mask, "__builtin_ia32_movddup256_mask", IX86_BUILTIN_MOVDDUP256_MASK, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF_QI },
+ { OPTION_MASK_ISA_AVX512VL, CODE_FOR_vec_dupv2df_mask, "__builtin_ia32_movddup128_mask", IX86_BUILTIN_MOVDDUP128_MASK, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_QI },
+ { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx_movshdup256_mask, "__builtin_ia32_movshdup256_mask", IX86_BUILTIN_MOVSHDUP256_MASK, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF_QI },
+ { OPTION_MASK_ISA_AVX512VL, CODE_FOR_sse3_movshdup_mask, "__builtin_ia32_movshdup128_mask", IX86_BUILTIN_MOVSHDUP128_MASK, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_QI },
+ { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx_movsldup256_mask, "__builtin_ia32_movsldup256_mask", IX86_BUILTIN_MOVSLDUP256_MASK, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF_QI },
+ { OPTION_MASK_ISA_AVX512VL, CODE_FOR_sse3_movsldup_mask, "__builtin_ia32_movsldup128_mask", IX86_BUILTIN_MOVSLDUP128_MASK, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_QI },
+ { OPTION_MASK_ISA_AVX512DQ | OPTION_MASK_ISA_AVX512VL, CODE_FOR_floatv4div4sf2_mask, "__builtin_ia32_cvtqq2ps256_mask", IX86_BUILTIN_CVTQQ2PS256, UNKNOWN, (int) V4SF_FTYPE_V4DI_V4SF_QI },
+ { OPTION_MASK_ISA_AVX512DQ | OPTION_MASK_ISA_AVX512VL, CODE_FOR_floatv2div2sf2_mask, "__builtin_ia32_cvtqq2ps128_mask", IX86_BUILTIN_CVTQQ2PS128, UNKNOWN, (int) V4SF_FTYPE_V2DI_V4SF_QI },
+ { OPTION_MASK_ISA_AVX512DQ | OPTION_MASK_ISA_AVX512VL, CODE_FOR_ufloatv4div4sf2_mask, "__builtin_ia32_cvtuqq2ps256_mask", IX86_BUILTIN_CVTUQQ2PS256, UNKNOWN, (int) V4SF_FTYPE_V4DI_V4SF_QI },
+ { OPTION_MASK_ISA_AVX512DQ | OPTION_MASK_ISA_AVX512VL, CODE_FOR_ufloatv2div2sf2_mask, "__builtin_ia32_cvtuqq2ps128_mask", IX86_BUILTIN_CVTUQQ2PS128, UNKNOWN, (int) V4SF_FTYPE_V2DI_V4SF_QI },
+ { OPTION_MASK_ISA_AVX512DQ | OPTION_MASK_ISA_AVX512VL, CODE_FOR_floatv4div4df2_mask, "__builtin_ia32_cvtqq2pd256_mask", IX86_BUILTIN_CVTQQ2PD256, UNKNOWN, (int) V4DF_FTYPE_V4DI_V4DF_QI },
+ { OPTION_MASK_ISA_AVX512DQ | OPTION_MASK_ISA_AVX512VL, CODE_FOR_floatv2div2df2_mask, "__builtin_ia32_cvtqq2pd128_mask", IX86_BUILTIN_CVTQQ2PD128, UNKNOWN, (int) V2DF_FTYPE_V2DI_V2DF_QI },
+ { OPTION_MASK_ISA_AVX512DQ | OPTION_MASK_ISA_AVX512VL, CODE_FOR_ufloatv4div4df2_mask, "__builtin_ia32_cvtuqq2pd256_mask", IX86_BUILTIN_CVTUQQ2PD256, UNKNOWN, (int) V4DF_FTYPE_V4DI_V4DF_QI },
+ { OPTION_MASK_ISA_AVX512DQ | OPTION_MASK_ISA_AVX512VL, CODE_FOR_ufloatv2div2df2_mask, "__builtin_ia32_cvtuqq2pd128_mask", IX86_BUILTIN_CVTUQQ2PD128, UNKNOWN, (int) V2DF_FTYPE_V2DI_V2DF_QI },
+ { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_vpermt2varv4di3_mask, "__builtin_ia32_vpermt2varq256_mask", IX86_BUILTIN_VPERMT2VARQ256, UNKNOWN, (int) V4DI_FTYPE_V4DI_V4DI_V4DI_QI },
+ { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_vpermt2varv4di3_maskz, "__builtin_ia32_vpermt2varq256_maskz", IX86_BUILTIN_VPERMT2VARQ256_MASKZ, UNKNOWN, (int) V4DI_FTYPE_V4DI_V4DI_V4DI_QI },
+ { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_vpermt2varv8si3_mask, "__builtin_ia32_vpermt2vard256_mask", IX86_BUILTIN_VPERMT2VARD256, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI_V8SI_QI },
+ { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_vpermt2varv8si3_maskz, "__builtin_ia32_vpermt2vard256_maskz", IX86_BUILTIN_VPERMT2VARD256_MASKZ, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI_V8SI_QI },
+ { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_vpermi2varv4di3_mask, "__builtin_ia32_vpermi2varq256_mask", IX86_BUILTIN_VPERMI2VARQ256, UNKNOWN, (int) V4DI_FTYPE_V4DI_V4DI_V4DI_QI },
+ { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_vpermi2varv8si3_mask, "__builtin_ia32_vpermi2vard256_mask", IX86_BUILTIN_VPERMI2VARD256, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI_V8SI_QI },
+ { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_vpermt2varv4df3_mask, "__builtin_ia32_vpermt2varpd256_mask", IX86_BUILTIN_VPERMT2VARPD256, UNKNOWN, (int) V4DF_FTYPE_V4DI_V4DF_V4DF_QI },
+ { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_vpermt2varv4df3_maskz, "__builtin_ia32_vpermt2varpd256_maskz", IX86_BUILTIN_VPERMT2VARPD256_MASKZ, UNKNOWN, (int) V4DF_FTYPE_V4DI_V4DF_V4DF_QI },
+ { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_vpermt2varv8sf3_mask, "__builtin_ia32_vpermt2varps256_mask", IX86_BUILTIN_VPERMT2VARPS256, UNKNOWN, (int) V8SF_FTYPE_V8SI_V8SF_V8SF_QI },
+ { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_vpermt2varv8sf3_maskz, "__builtin_ia32_vpermt2varps256_maskz", IX86_BUILTIN_VPERMT2VARPS256_MASKZ, UNKNOWN, (int) V8SF_FTYPE_V8SI_V8SF_V8SF_QI },
+ { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_vpermi2varv4df3_mask, "__builtin_ia32_vpermi2varpd256_mask", IX86_BUILTIN_VPERMI2VARPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DI_V4DF_QI },
+ { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_vpermi2varv8sf3_mask, "__builtin_ia32_vpermi2varps256_mask", IX86_BUILTIN_VPERMI2VARPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SI_V8SF_QI },
+ { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_vpermt2varv2di3_mask, "__builtin_ia32_vpermt2varq128_mask", IX86_BUILTIN_VPERMT2VARQ128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI_V2DI_QI },
+ { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_vpermt2varv2di3_maskz, "__builtin_ia32_vpermt2varq128_maskz", IX86_BUILTIN_VPERMT2VARQ128_MASKZ, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI_V2DI_QI },
+ { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_vpermt2varv4si3_mask, "__builtin_ia32_vpermt2vard128_mask", IX86_BUILTIN_VPERMT2VARD128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI_V4SI_QI },
+ { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_vpermt2varv4si3_maskz, "__builtin_ia32_vpermt2vard128_maskz", IX86_BUILTIN_VPERMT2VARD128_MASKZ, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI_V4SI_QI },
+ { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_vpermi2varv2di3_mask, "__builtin_ia32_vpermi2varq128_mask", IX86_BUILTIN_VPERMI2VARQ128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI_V2DI_QI },
+ { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_vpermi2varv4si3_mask, "__builtin_ia32_vpermi2vard128_mask", IX86_BUILTIN_VPERMI2VARD128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI_V4SI_QI },
+ { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_vpermt2varv2df3_mask, "__builtin_ia32_vpermt2varpd128_mask", IX86_BUILTIN_VPERMT2VARPD128, UNKNOWN, (int) V2DF_FTYPE_V2DI_V2DF_V2DF_QI },
+ { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_vpermt2varv2df3_maskz, "__builtin_ia32_vpermt2varpd128_maskz", IX86_BUILTIN_VPERMT2VARPD128_MASKZ, UNKNOWN, (int) V2DF_FTYPE_V2DI_V2DF_V2DF_QI },
+ { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_vpermt2varv4sf3_mask, "__builtin_ia32_vpermt2varps128_mask", IX86_BUILTIN_VPERMT2VARPS128, UNKNOWN, (int) V4SF_FTYPE_V4SI_V4SF_V4SF_QI },
+ { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_vpermt2varv4sf3_maskz, "__builtin_ia32_vpermt2varps128_maskz", IX86_BUILTIN_VPERMT2VARPS128_MASKZ, UNKNOWN, (int) V4SF_FTYPE_V4SI_V4SF_V4SF_QI },
+ { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_vpermi2varv2df3_mask, "__builtin_ia32_vpermi2varpd128_mask", IX86_BUILTIN_VPERMI2VARPD128, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DI_V2DF_QI },
+ { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_vpermi2varv4sf3_mask, "__builtin_ia32_vpermi2varps128_mask", IX86_BUILTIN_VPERMI2VARPS128, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SI_V4SF_QI },
+ { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx2_pshufbv32qi3_mask, "__builtin_ia32_pshufb256_mask", IX86_BUILTIN_PSHUFB256_MASK, UNKNOWN, (int) V32QI_FTYPE_V32QI_V32QI_V32QI_SI },
+ { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_ssse3_pshufbv16qi3_mask, "__builtin_ia32_pshufb128_mask", IX86_BUILTIN_PSHUFB128_MASK, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI_V16QI_HI },
+ { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_pshufhwv3_mask, "__builtin_ia32_pshufhw256_mask", IX86_BUILTIN_PSHUFHW256_MASK, UNKNOWN, (int) V16HI_FTYPE_V16HI_INT_V16HI_HI },
+ { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_pshufhw_mask, "__builtin_ia32_pshufhw128_mask", IX86_BUILTIN_PSHUFHW128_MASK, UNKNOWN, (int) V8HI_FTYPE_V8HI_INT_V8HI_QI },
+ { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_pshuflwv3_mask, "__builtin_ia32_pshuflw256_mask", IX86_BUILTIN_PSHUFLW256_MASK, UNKNOWN, (int) V16HI_FTYPE_V16HI_INT_V16HI_HI },
+ { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_pshuflw_mask, "__builtin_ia32_pshuflw128_mask", IX86_BUILTIN_PSHUFLW128_MASK, UNKNOWN, (int) V8HI_FTYPE_V8HI_INT_V8HI_QI },
+ { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_pshufdv3_mask, "__builtin_ia32_pshufd256_mask", IX86_BUILTIN_PSHUFD256_MASK, UNKNOWN, (int) V8SI_FTYPE_V8SI_INT_V8SI_QI },
+ { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_pshufd_mask, "__builtin_ia32_pshufd128_mask", IX86_BUILTIN_PSHUFD128_MASK, UNKNOWN, (int) V4SI_FTYPE_V4SI_INT_V4SI_QI },
+ { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx_shufpd256_mask, "__builtin_ia32_shufpd256_mask", IX86_BUILTIN_SHUFPD256_MASK, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF_INT_V4DF_QI },
+ { OPTION_MASK_ISA_AVX512VL, CODE_FOR_sse2_shufpd_mask, "__builtin_ia32_shufpd128_mask", IX86_BUILTIN_SHUFPD128_MASK, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_INT_V2DF_QI },
+ { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx_shufps256_mask, "__builtin_ia32_shufps256_mask", IX86_BUILTIN_SHUFPS256_MASK, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF_INT_V8SF_QI },
+ { OPTION_MASK_ISA_AVX512VL, CODE_FOR_sse_shufps_mask, "__builtin_ia32_shufps128_mask", IX86_BUILTIN_SHUFPS128_MASK, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_INT_V4SF_QI },
+ { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_rolvv4di_mask, "__builtin_ia32_prolvq256_mask", IX86_BUILTIN_PROLVQ256, UNKNOWN, (int) V4DI_FTYPE_V4DI_V4DI_V4DI_QI },
+ { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_rolvv2di_mask, "__builtin_ia32_prolvq128_mask", IX86_BUILTIN_PROLVQ128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI_V2DI_QI },
+ { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_rolv4di_mask, "__builtin_ia32_prolq256_mask", IX86_BUILTIN_PROLQ256, UNKNOWN, (int) V4DI_FTYPE_V4DI_INT_V4DI_QI },
+ { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_rolv2di_mask, "__builtin_ia32_prolq128_mask", IX86_BUILTIN_PROLQ128, UNKNOWN, (int) V2DI_FTYPE_V2DI_INT_V2DI_QI },
+ { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_rorvv4di_mask, "__builtin_ia32_prorvq256_mask", IX86_BUILTIN_PRORVQ256, UNKNOWN, (int) V4DI_FTYPE_V4DI_V4DI_V4DI_QI },
+ { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_rorvv2di_mask, "__builtin_ia32_prorvq128_mask", IX86_BUILTIN_PRORVQ128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI_V2DI_QI },
+ { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_rorv4di_mask, "__builtin_ia32_prorq256_mask", IX86_BUILTIN_PRORQ256, UNKNOWN, (int) V4DI_FTYPE_V4DI_INT_V4DI_QI },
+ { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_rorv2di_mask, "__builtin_ia32_prorq128_mask", IX86_BUILTIN_PRORQ128, UNKNOWN, (int) V2DI_FTYPE_V2DI_INT_V2DI_QI },
+ { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx2_ashrvv2di_mask, "__builtin_ia32_psravq128_mask", IX86_BUILTIN_PSRAVQ128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI_V2DI_QI },
+ { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx2_ashrvv4di_mask, "__builtin_ia32_psravq256_mask", IX86_BUILTIN_PSRAVQ256, UNKNOWN, (int) V4DI_FTYPE_V4DI_V4DI_V4DI_QI },
+ { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx2_ashlvv4di_mask, "__builtin_ia32_psllv4di_mask", IX86_BUILTIN_PSLLVV4DI_MASK, UNKNOWN, (int) V4DI_FTYPE_V4DI_V4DI_V4DI_QI },
+ { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx2_ashlvv2di_mask, "__builtin_ia32_psllv2di_mask", IX86_BUILTIN_PSLLVV2DI_MASK, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI_V2DI_QI },
+ { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx2_ashlvv8si_mask, "__builtin_ia32_psllv8si_mask", IX86_BUILTIN_PSLLVV8SI_MASK, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI_V8SI_QI },
+ { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx2_ashlvv4si_mask, "__builtin_ia32_psllv4si_mask", IX86_BUILTIN_PSLLVV4SI_MASK, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI_V4SI_QI },
+ { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx2_ashrvv8si_mask, "__builtin_ia32_psrav8si_mask", IX86_BUILTIN_PSRAVV8SI_MASK, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI_V8SI_QI },
+ { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx2_ashrvv4si_mask, "__builtin_ia32_psrav4si_mask", IX86_BUILTIN_PSRAVV4SI_MASK, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI_V4SI_QI },
+ { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx2_lshrvv4di_mask, "__builtin_ia32_psrlv4di_mask", IX86_BUILTIN_PSRLVV4DI_MASK, UNKNOWN, (int) V4DI_FTYPE_V4DI_V4DI_V4DI_QI },
+ { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx2_lshrvv2di_mask, "__builtin_ia32_psrlv2di_mask", IX86_BUILTIN_PSRLVV2DI_MASK, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI_V2DI_QI },
+ { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx2_lshrvv8si_mask, "__builtin_ia32_psrlv8si_mask", IX86_BUILTIN_PSRLVV8SI_MASK, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI_V8SI_QI },
+ { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx2_lshrvv4si_mask, "__builtin_ia32_psrlv4si_mask", IX86_BUILTIN_PSRLVV4SI_MASK, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI_V4SI_QI },
+ { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_ashrv16hi3_mask, "__builtin_ia32_psrawi256_mask", IX86_BUILTIN_PSRAWI256_MASK, UNKNOWN, (int) V16HI_FTYPE_V16HI_INT_V16HI_HI },
+ { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_ashrv16hi3_mask, "__builtin_ia32_psraw256_mask", IX86_BUILTIN_PSRAW256_MASK, UNKNOWN, (int) V16HI_FTYPE_V16HI_V8HI_V16HI_HI },
+ { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_ashrv8hi3_mask, "__builtin_ia32_psrawi128_mask", IX86_BUILTIN_PSRAWI128_MASK, UNKNOWN, (int) V8HI_FTYPE_V8HI_INT_V8HI_QI },
+ { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_ashrv8hi3_mask, "__builtin_ia32_psraw128_mask", IX86_BUILTIN_PSRAW128_MASK, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI_V8HI_QI },
+ { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_lshrv16hi3_mask, "__builtin_ia32_psrlwi256_mask", IX86_BUILTIN_PSRLWI256_MASK, UNKNOWN, (int) V16HI_FTYPE_V16HI_INT_V16HI_HI },
+ { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_lshrv16hi3_mask, "__builtin_ia32_psrlw256_mask", IX86_BUILTIN_PSRLW256_MASK, UNKNOWN, (int) V16HI_FTYPE_V16HI_V8HI_V16HI_HI },
+ { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_lshrv8hi3_mask, "__builtin_ia32_psrlwi128_mask", IX86_BUILTIN_PSRLWI128_MASK, UNKNOWN, (int) V8HI_FTYPE_V8HI_INT_V8HI_QI },
+ { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_lshrv8hi3_mask, "__builtin_ia32_psrlw128_mask", IX86_BUILTIN_PSRLW128_MASK, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI_V8HI_QI },
+ { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_rorvv8si_mask, "__builtin_ia32_prorvd256_mask", IX86_BUILTIN_PRORVD256, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI_V8SI_QI },
+ { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_rolvv8si_mask, "__builtin_ia32_prolvd256_mask", IX86_BUILTIN_PROLVD256, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI_V8SI_QI },
+ { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_rorv8si_mask, "__builtin_ia32_prord256_mask", IX86_BUILTIN_PRORD256, UNKNOWN, (int) V8SI_FTYPE_V8SI_INT_V8SI_QI },
+ { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_rolv8si_mask, "__builtin_ia32_prold256_mask", IX86_BUILTIN_PROLD256, UNKNOWN, (int) V8SI_FTYPE_V8SI_INT_V8SI_QI },
+ { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_rorvv4si_mask, "__builtin_ia32_prorvd128_mask", IX86_BUILTIN_PRORVD128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI_V4SI_QI },
+ { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_rolvv4si_mask, "__builtin_ia32_prolvd128_mask", IX86_BUILTIN_PROLVD128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI_V4SI_QI },
+ { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_rorv4si_mask, "__builtin_ia32_prord128_mask", IX86_BUILTIN_PRORD128, UNKNOWN, (int) V4SI_FTYPE_V4SI_INT_V4SI_QI },
+ { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_rolv4si_mask, "__builtin_ia32_prold128_mask", IX86_BUILTIN_PROLD128, UNKNOWN, (int) V4SI_FTYPE_V4SI_INT_V4SI_QI },
+ { OPTION_MASK_ISA_AVX512DQ | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512dq_fpclassv4df_mask, "__builtin_ia32_fpclasspd256_mask", IX86_BUILTIN_FPCLASSPD256, UNKNOWN, (int) QI_FTYPE_V4DF_INT_QI },
+ { OPTION_MASK_ISA_AVX512DQ | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512dq_fpclassv2df_mask, "__builtin_ia32_fpclasspd128_mask", IX86_BUILTIN_FPCLASSPD128, UNKNOWN, (int) QI_FTYPE_V2DF_INT_QI },
+ { OPTION_MASK_ISA_AVX512DQ, CODE_FOR_avx512dq_vmfpclassv2df, "__builtin_ia32_fpclasssd", IX86_BUILTIN_FPCLASSSD, UNKNOWN, (int) QI_FTYPE_V2DF_INT },
+ { OPTION_MASK_ISA_AVX512DQ | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512dq_fpclassv8sf_mask, "__builtin_ia32_fpclassps256_mask", IX86_BUILTIN_FPCLASSPS256, UNKNOWN, (int) QI_FTYPE_V8SF_INT_QI },
+ { OPTION_MASK_ISA_AVX512DQ | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512dq_fpclassv4sf_mask, "__builtin_ia32_fpclassps128_mask", IX86_BUILTIN_FPCLASSPS128, UNKNOWN, (int) QI_FTYPE_V4SF_INT_QI },
+ { OPTION_MASK_ISA_AVX512DQ, CODE_FOR_avx512dq_vmfpclassv4sf, "__builtin_ia32_fpclassss", IX86_BUILTIN_FPCLASSSS, UNKNOWN, (int) QI_FTYPE_V4SF_INT },
+ { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_cvtb2maskv16qi, "__builtin_ia32_cvtb2mask128", IX86_BUILTIN_CVTB2MASK128, UNKNOWN, (int) HI_FTYPE_V16QI },
+ { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_cvtb2maskv32qi, "__builtin_ia32_cvtb2mask256", IX86_BUILTIN_CVTB2MASK256, UNKNOWN, (int) SI_FTYPE_V32QI },
+ { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_cvtw2maskv8hi, "__builtin_ia32_cvtw2mask128", IX86_BUILTIN_CVTW2MASK128, UNKNOWN, (int) QI_FTYPE_V8HI },
+ { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_cvtw2maskv16hi, "__builtin_ia32_cvtw2mask256", IX86_BUILTIN_CVTW2MASK256, UNKNOWN, (int) HI_FTYPE_V16HI },
+ { OPTION_MASK_ISA_AVX512DQ | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_cvtd2maskv4si, "__builtin_ia32_cvtd2mask128", IX86_BUILTIN_CVTD2MASK128, UNKNOWN, (int) QI_FTYPE_V4SI },
+ { OPTION_MASK_ISA_AVX512DQ | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_cvtd2maskv8si, "__builtin_ia32_cvtd2mask256", IX86_BUILTIN_CVTD2MASK256, UNKNOWN, (int) QI_FTYPE_V8SI },
+ { OPTION_MASK_ISA_AVX512DQ | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_cvtq2maskv2di, "__builtin_ia32_cvtq2mask128", IX86_BUILTIN_CVTQ2MASK128, UNKNOWN, (int) QI_FTYPE_V2DI },
+ { OPTION_MASK_ISA_AVX512DQ | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_cvtq2maskv4di, "__builtin_ia32_cvtq2mask256", IX86_BUILTIN_CVTQ2MASK256, UNKNOWN, (int) QI_FTYPE_V4DI },
+ { OPTION_MASK_ISA_AVX512DQ | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_cvtmask2bv16qi, "__builtin_ia32_cvtmask2b128", IX86_BUILTIN_CVTMASK2B128, UNKNOWN, (int) V16QI_FTYPE_HI },
+ { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_cvtmask2bv32qi, "__builtin_ia32_cvtmask2b256", IX86_BUILTIN_CVTMASK2B256, UNKNOWN, (int) V32QI_FTYPE_SI },
+ { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_cvtmask2wv8hi, "__builtin_ia32_cvtmask2w128", IX86_BUILTIN_CVTMASK2W128, UNKNOWN, (int) V8HI_FTYPE_QI },
+ { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_cvtmask2wv16hi, "__builtin_ia32_cvtmask2w256", IX86_BUILTIN_CVTMASK2W256, UNKNOWN, (int) V16HI_FTYPE_HI },
+ { OPTION_MASK_ISA_AVX512DQ | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_cvtmask2dv4si, "__builtin_ia32_cvtmask2d128", IX86_BUILTIN_CVTMASK2D128, UNKNOWN, (int) V4SI_FTYPE_QI },
+ { OPTION_MASK_ISA_AVX512DQ | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_cvtmask2dv8si, "__builtin_ia32_cvtmask2d256", IX86_BUILTIN_CVTMASK2D256, UNKNOWN, (int) V8SI_FTYPE_QI },
+ { OPTION_MASK_ISA_AVX512DQ | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_cvtmask2qv2di, "__builtin_ia32_cvtmask2q128", IX86_BUILTIN_CVTMASK2Q128, UNKNOWN, (int) V2DI_FTYPE_QI },
+ { OPTION_MASK_ISA_AVX512DQ | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_cvtmask2qv4di, "__builtin_ia32_cvtmask2q256", IX86_BUILTIN_CVTMASK2Q256, UNKNOWN, (int) V4DI_FTYPE_QI },
+ { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_eqv16qi3_mask, "__builtin_ia32_pcmpeqb128_mask", IX86_BUILTIN_PCMPEQB128_MASK, UNKNOWN, (int) HI_FTYPE_V16QI_V16QI_HI },
+ { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_eqv32qi3_mask, "__builtin_ia32_pcmpeqb256_mask", IX86_BUILTIN_PCMPEQB256_MASK, UNKNOWN, (int) SI_FTYPE_V32QI_V32QI_SI },
+ { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_eqv8hi3_mask, "__builtin_ia32_pcmpeqw128_mask", IX86_BUILTIN_PCMPEQW128_MASK, UNKNOWN, (int) QI_FTYPE_V8HI_V8HI_QI },
+ { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_eqv16hi3_mask, "__builtin_ia32_pcmpeqw256_mask", IX86_BUILTIN_PCMPEQW256_MASK, UNKNOWN, (int) HI_FTYPE_V16HI_V16HI_HI },
+ { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_eqv4si3_mask, "__builtin_ia32_pcmpeqd128_mask", IX86_BUILTIN_PCMPEQD128_MASK, UNKNOWN, (int) QI_FTYPE_V4SI_V4SI_QI },
+ { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_eqv8si3_mask, "__builtin_ia32_pcmpeqd256_mask", IX86_BUILTIN_PCMPEQD256_MASK, UNKNOWN, (int) QI_FTYPE_V8SI_V8SI_QI },
+ { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_eqv2di3_mask, "__builtin_ia32_pcmpeqq128_mask", IX86_BUILTIN_PCMPEQQ128_MASK, UNKNOWN, (int) QI_FTYPE_V2DI_V2DI_QI },
+ { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_eqv4di3_mask, "__builtin_ia32_pcmpeqq256_mask", IX86_BUILTIN_PCMPEQQ256_MASK, UNKNOWN, (int) QI_FTYPE_V4DI_V4DI_QI },
+ { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_gtv16qi3_mask, "__builtin_ia32_pcmpgtb128_mask", IX86_BUILTIN_PCMPGTB128_MASK, UNKNOWN, (int) HI_FTYPE_V16QI_V16QI_HI },
+ { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_gtv32qi3_mask, "__builtin_ia32_pcmpgtb256_mask", IX86_BUILTIN_PCMPGTB256_MASK, UNKNOWN, (int) SI_FTYPE_V32QI_V32QI_SI },
+ { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_gtv8hi3_mask, "__builtin_ia32_pcmpgtw128_mask", IX86_BUILTIN_PCMPGTW128_MASK, UNKNOWN, (int) QI_FTYPE_V8HI_V8HI_QI },
+ { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_gtv16hi3_mask, "__builtin_ia32_pcmpgtw256_mask", IX86_BUILTIN_PCMPGTW256_MASK, UNKNOWN, (int) HI_FTYPE_V16HI_V16HI_HI },
+ { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_gtv4si3_mask, "__builtin_ia32_pcmpgtd128_mask", IX86_BUILTIN_PCMPGTD128_MASK, UNKNOWN, (int) QI_FTYPE_V4SI_V4SI_QI },
+ { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_gtv8si3_mask, "__builtin_ia32_pcmpgtd256_mask", IX86_BUILTIN_PCMPGTD256_MASK, UNKNOWN, (int) QI_FTYPE_V8SI_V8SI_QI },
+ { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_gtv2di3_mask, "__builtin_ia32_pcmpgtq128_mask", IX86_BUILTIN_PCMPGTQ128_MASK, UNKNOWN, (int) QI_FTYPE_V2DI_V2DI_QI },
+ { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_gtv4di3_mask, "__builtin_ia32_pcmpgtq256_mask", IX86_BUILTIN_PCMPGTQ256_MASK, UNKNOWN, (int) QI_FTYPE_V4DI_V4DI_QI },
+ { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_testmv16qi3_mask, "__builtin_ia32_ptestmb128", IX86_BUILTIN_PTESTMB128, UNKNOWN, (int) HI_FTYPE_V16QI_V16QI_HI },
+ { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_testmv32qi3_mask, "__builtin_ia32_ptestmb256", IX86_BUILTIN_PTESTMB256, UNKNOWN, (int) SI_FTYPE_V32QI_V32QI_SI },
+ { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_testmv8hi3_mask, "__builtin_ia32_ptestmw128", IX86_BUILTIN_PTESTMW128, UNKNOWN, (int) QI_FTYPE_V8HI_V8HI_QI },
+ { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_testmv16hi3_mask, "__builtin_ia32_ptestmw256", IX86_BUILTIN_PTESTMW256, UNKNOWN, (int) HI_FTYPE_V16HI_V16HI_HI },
+ { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_testmv4si3_mask, "__builtin_ia32_ptestmd128", IX86_BUILTIN_PTESTMD128, UNKNOWN, (int) QI_FTYPE_V4SI_V4SI_QI },
+ { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_testmv8si3_mask, "__builtin_ia32_ptestmd256", IX86_BUILTIN_PTESTMD256, UNKNOWN, (int) QI_FTYPE_V8SI_V8SI_QI },
+ { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_testmv2di3_mask, "__builtin_ia32_ptestmq128", IX86_BUILTIN_PTESTMQ128, UNKNOWN, (int) QI_FTYPE_V2DI_V2DI_QI },
+ { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_testmv4di3_mask, "__builtin_ia32_ptestmq256", IX86_BUILTIN_PTESTMQ256, UNKNOWN, (int) QI_FTYPE_V4DI_V4DI_QI },
+ { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_testnmv16qi3_mask, "__builtin_ia32_ptestnmb128", IX86_BUILTIN_PTESTNMB128, UNKNOWN, (int) HI_FTYPE_V16QI_V16QI_HI },
+ { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_testnmv32qi3_mask, "__builtin_ia32_ptestnmb256", IX86_BUILTIN_PTESTNMB256, UNKNOWN, (int) SI_FTYPE_V32QI_V32QI_SI },
+ { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_testnmv8hi3_mask, "__builtin_ia32_ptestnmw128", IX86_BUILTIN_PTESTNMW128, UNKNOWN, (int) QI_FTYPE_V8HI_V8HI_QI },
+ { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_testnmv16hi3_mask, "__builtin_ia32_ptestnmw256", IX86_BUILTIN_PTESTNMW256, UNKNOWN, (int) HI_FTYPE_V16HI_V16HI_HI },
+ { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_testnmv4si3_mask, "__builtin_ia32_ptestnmd128", IX86_BUILTIN_PTESTNMD128, UNKNOWN, (int) QI_FTYPE_V4SI_V4SI_QI },
+ { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_testnmv8si3_mask, "__builtin_ia32_ptestnmd256", IX86_BUILTIN_PTESTNMD256, UNKNOWN, (int) QI_FTYPE_V8SI_V8SI_QI },
+ { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_testnmv2di3_mask, "__builtin_ia32_ptestnmq128", IX86_BUILTIN_PTESTNMQ128, UNKNOWN, (int) QI_FTYPE_V2DI_V2DI_QI },
+ { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_testnmv4di3_mask, "__builtin_ia32_ptestnmq256", IX86_BUILTIN_PTESTNMQ256, UNKNOWN, (int) QI_FTYPE_V4DI_V4DI_QI },
+ { OPTION_MASK_ISA_AVX512VL | OPTION_MASK_ISA_AVX512CD, CODE_FOR_avx512cd_maskb_vec_dupv2di, "__builtin_ia32_broadcastmb128", IX86_BUILTIN_PBROADCASTMB128, UNKNOWN, (int) V2DI_FTYPE_QI },
+ { OPTION_MASK_ISA_AVX512VL | OPTION_MASK_ISA_AVX512CD, CODE_FOR_avx512cd_maskb_vec_dupv4di, "__builtin_ia32_broadcastmb256", IX86_BUILTIN_PBROADCASTMB256, UNKNOWN, (int) V4DI_FTYPE_QI },
+ { OPTION_MASK_ISA_AVX512VL | OPTION_MASK_ISA_AVX512CD, CODE_FOR_avx512cd_maskw_vec_dupv4si, "__builtin_ia32_broadcastmw128", IX86_BUILTIN_PBROADCASTMW128, UNKNOWN, (int) V4SI_FTYPE_HI },
+ { OPTION_MASK_ISA_AVX512VL | OPTION_MASK_ISA_AVX512CD, CODE_FOR_avx512cd_maskw_vec_dupv8si, "__builtin_ia32_broadcastmw256", IX86_BUILTIN_PBROADCASTMW256, UNKNOWN, (int) V8SI_FTYPE_HI },
+ { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_compressv4df_mask, "__builtin_ia32_compressdf256_mask", IX86_BUILTIN_COMPRESSPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF_QI },
+ { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_compressv2df_mask, "__builtin_ia32_compressdf128_mask", IX86_BUILTIN_COMPRESSPD128, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_QI },
+ { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_compressv8sf_mask, "__builtin_ia32_compresssf256_mask", IX86_BUILTIN_COMPRESSPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF_QI },
+ { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_compressv4sf_mask, "__builtin_ia32_compresssf128_mask", IX86_BUILTIN_COMPRESSPS128, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_QI },
+ { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_compressv4di_mask, "__builtin_ia32_compressdi256_mask", IX86_BUILTIN_PCOMPRESSQ256, UNKNOWN, (int) V4DI_FTYPE_V4DI_V4DI_QI },
+ { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_compressv2di_mask, "__builtin_ia32_compressdi128_mask", IX86_BUILTIN_PCOMPRESSQ128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI_QI },
+ { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_compressv8si_mask, "__builtin_ia32_compresssi256_mask", IX86_BUILTIN_PCOMPRESSD256, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI_QI },
+ { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_compressv4si_mask, "__builtin_ia32_compresssi128_mask", IX86_BUILTIN_PCOMPRESSD128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI_QI },
+ { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_expandv4df_mask, "__builtin_ia32_expanddf256_mask", IX86_BUILTIN_EXPANDPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF_QI },
+ { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_expandv2df_mask, "__builtin_ia32_expanddf128_mask", IX86_BUILTIN_EXPANDPD128, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_QI },
+ { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_expandv8sf_mask, "__builtin_ia32_expandsf256_mask", IX86_BUILTIN_EXPANDPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF_QI },
+ { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_expandv4sf_mask, "__builtin_ia32_expandsf128_mask", IX86_BUILTIN_EXPANDPS128, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_QI },
+ { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_expandv4di_mask, "__builtin_ia32_expanddi256_mask", IX86_BUILTIN_PEXPANDQ256, UNKNOWN, (int) V4DI_FTYPE_V4DI_V4DI_QI },
+ { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_expandv2di_mask, "__builtin_ia32_expanddi128_mask", IX86_BUILTIN_PEXPANDQ128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI_QI },
+ { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_expandv8si_mask, "__builtin_ia32_expandsi256_mask", IX86_BUILTIN_PEXPANDD256, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI_QI },
+ { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_expandv4si_mask, "__builtin_ia32_expandsi128_mask", IX86_BUILTIN_PEXPANDD128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI_QI },
+ { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_expandv4df_maskz, "__builtin_ia32_expanddf256_maskz", IX86_BUILTIN_EXPANDPD256Z, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF_QI },
+ { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_expandv2df_maskz, "__builtin_ia32_expanddf128_maskz", IX86_BUILTIN_EXPANDPD128Z, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_QI },
+ { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_expandv8sf_maskz, "__builtin_ia32_expandsf256_maskz", IX86_BUILTIN_EXPANDPS256Z, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF_QI },
+ { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_expandv4sf_maskz, "__builtin_ia32_expandsf128_maskz", IX86_BUILTIN_EXPANDPS128Z, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_QI },
+ { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_expandv4di_maskz, "__builtin_ia32_expanddi256_maskz", IX86_BUILTIN_PEXPANDQ256Z, UNKNOWN, (int) V4DI_FTYPE_V4DI_V4DI_QI },
+ { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_expandv2di_maskz, "__builtin_ia32_expanddi128_maskz", IX86_BUILTIN_PEXPANDQ128Z, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI_QI },
+ { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_expandv8si_maskz, "__builtin_ia32_expandsi256_maskz", IX86_BUILTIN_PEXPANDD256Z, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI_QI },
+ { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_expandv4si_maskz, "__builtin_ia32_expandsi128_maskz", IX86_BUILTIN_PEXPANDD128Z, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI_QI },
+ { OPTION_MASK_ISA_AVX512VL, CODE_FOR_smaxv8si3_mask, "__builtin_ia32_pmaxsd256_mask", IX86_BUILTIN_PMAXSD256_MASK, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI_V8SI_QI },
+ { OPTION_MASK_ISA_AVX512VL, CODE_FOR_sminv8si3_mask, "__builtin_ia32_pminsd256_mask", IX86_BUILTIN_PMINSD256_MASK, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI_V8SI_QI },
+ { OPTION_MASK_ISA_AVX512VL, CODE_FOR_umaxv8si3_mask, "__builtin_ia32_pmaxud256_mask", IX86_BUILTIN_PMAXUD256_MASK, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI_V8SI_QI },
+ { OPTION_MASK_ISA_AVX512VL, CODE_FOR_uminv8si3_mask, "__builtin_ia32_pminud256_mask", IX86_BUILTIN_PMINUD256_MASK, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI_V8SI_QI },
+ { OPTION_MASK_ISA_AVX512VL, CODE_FOR_smaxv4si3_mask, "__builtin_ia32_pmaxsd128_mask", IX86_BUILTIN_PMAXSD128_MASK, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI_V4SI_QI },
+ { OPTION_MASK_ISA_AVX512VL, CODE_FOR_sminv4si3_mask, "__builtin_ia32_pminsd128_mask", IX86_BUILTIN_PMINSD128_MASK, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI_V4SI_QI },
+ { OPTION_MASK_ISA_AVX512VL, CODE_FOR_umaxv4si3_mask, "__builtin_ia32_pmaxud128_mask", IX86_BUILTIN_PMAXUD128_MASK, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI_V4SI_QI },
+ { OPTION_MASK_ISA_AVX512VL, CODE_FOR_uminv4si3_mask, "__builtin_ia32_pminud128_mask", IX86_BUILTIN_PMINUD128_MASK, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI_V4SI_QI },
+ { OPTION_MASK_ISA_AVX512VL, CODE_FOR_smaxv4di3_mask, "__builtin_ia32_pmaxsq256_mask", IX86_BUILTIN_PMAXSQ256_MASK, UNKNOWN, (int) V4DI_FTYPE_V4DI_V4DI_V4DI_QI },
+ { OPTION_MASK_ISA_AVX512VL, CODE_FOR_sminv4di3_mask, "__builtin_ia32_pminsq256_mask", IX86_BUILTIN_PMINSQ256_MASK, UNKNOWN, (int) V4DI_FTYPE_V4DI_V4DI_V4DI_QI },
+ { OPTION_MASK_ISA_AVX512VL, CODE_FOR_umaxv4di3_mask, "__builtin_ia32_pmaxuq256_mask", IX86_BUILTIN_PMAXUQ256_MASK, UNKNOWN, (int) V4DI_FTYPE_V4DI_V4DI_V4DI_QI },
+ { OPTION_MASK_ISA_AVX512VL, CODE_FOR_uminv4di3_mask, "__builtin_ia32_pminuq256_mask", IX86_BUILTIN_PMINUQ256_MASK, UNKNOWN, (int) V4DI_FTYPE_V4DI_V4DI_V4DI_QI },
+ { OPTION_MASK_ISA_AVX512VL, CODE_FOR_smaxv2di3_mask, "__builtin_ia32_pmaxsq128_mask", IX86_BUILTIN_PMAXSQ128_MASK, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI_V2DI_QI },
+ { OPTION_MASK_ISA_AVX512VL, CODE_FOR_sminv2di3_mask, "__builtin_ia32_pminsq128_mask", IX86_BUILTIN_PMINSQ128_MASK, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI_V2DI_QI },
+ { OPTION_MASK_ISA_AVX512VL, CODE_FOR_umaxv2di3_mask, "__builtin_ia32_pmaxuq128_mask", IX86_BUILTIN_PMAXUQ128_MASK, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI_V2DI_QI },
+ { OPTION_MASK_ISA_AVX512VL, CODE_FOR_uminv2di3_mask, "__builtin_ia32_pminuq128_mask", IX86_BUILTIN_PMINUQ128_MASK, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI_V2DI_QI },
+ { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_sminv32qi3_mask, "__builtin_ia32_pminsb256_mask", IX86_BUILTIN_PMINSB256_MASK, UNKNOWN, (int) V32QI_FTYPE_V32QI_V32QI_V32QI_SI },
+ { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_uminv32qi3_mask, "__builtin_ia32_pminub256_mask", IX86_BUILTIN_PMINUB256_MASK, UNKNOWN, (int) V32QI_FTYPE_V32QI_V32QI_V32QI_SI },
+ { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_smaxv32qi3_mask, "__builtin_ia32_pmaxsb256_mask", IX86_BUILTIN_PMAXSB256_MASK, UNKNOWN, (int) V32QI_FTYPE_V32QI_V32QI_V32QI_SI },
+ { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_umaxv32qi3_mask, "__builtin_ia32_pmaxub256_mask", IX86_BUILTIN_PMAXUB256_MASK, UNKNOWN, (int) V32QI_FTYPE_V32QI_V32QI_V32QI_SI },
+ { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_sminv16qi3_mask, "__builtin_ia32_pminsb128_mask", IX86_BUILTIN_PMINSB128_MASK, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI_V16QI_HI },
+ { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_uminv16qi3_mask, "__builtin_ia32_pminub128_mask", IX86_BUILTIN_PMINUB128_MASK, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI_V16QI_HI },
+ { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_smaxv16qi3_mask, "__builtin_ia32_pmaxsb128_mask", IX86_BUILTIN_PMAXSB128_MASK, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI_V16QI_HI },
+ { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_umaxv16qi3_mask, "__builtin_ia32_pmaxub128_mask", IX86_BUILTIN_PMAXUB128_MASK, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI_V16QI_HI },
+ { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_sminv16hi3_mask, "__builtin_ia32_pminsw256_mask", IX86_BUILTIN_PMINSW256_MASK, UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI_V16HI_HI },
+ { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_uminv16hi3_mask, "__builtin_ia32_pminuw256_mask", IX86_BUILTIN_PMINUW256_MASK, UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI_V16HI_HI },
+ { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_smaxv16hi3_mask, "__builtin_ia32_pmaxsw256_mask", IX86_BUILTIN_PMAXSW256_MASK, UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI_V16HI_HI },
+ { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_umaxv16hi3_mask, "__builtin_ia32_pmaxuw256_mask", IX86_BUILTIN_PMAXUW256_MASK, UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI_V16HI_HI },
+ { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_sminv8hi3_mask, "__builtin_ia32_pminsw128_mask", IX86_BUILTIN_PMINSW128_MASK, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI_V8HI_QI },
+ { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_uminv8hi3_mask, "__builtin_ia32_pminuw128_mask", IX86_BUILTIN_PMINUW128_MASK, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI_V8HI_QI },
+ { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_smaxv8hi3_mask, "__builtin_ia32_pmaxsw128_mask", IX86_BUILTIN_PMAXSW128_MASK, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI_V8HI_QI },
+ { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_umaxv8hi3_mask, "__builtin_ia32_pmaxuw128_mask", IX86_BUILTIN_PMAXUW128_MASK, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI_V8HI_QI },
+ { OPTION_MASK_ISA_AVX512VL, CODE_FOR_conflictv4di_mask, "__builtin_ia32_vpconflictdi_256_mask", IX86_BUILTIN_VPCONFLICTQ256, UNKNOWN, (int) V4DI_FTYPE_V4DI_V4DI_QI },
+ { OPTION_MASK_ISA_AVX512VL, CODE_FOR_conflictv8si_mask, "__builtin_ia32_vpconflictsi_256_mask", IX86_BUILTIN_VPCONFLICTD256, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI_QI },
+ { OPTION_MASK_ISA_AVX512VL, CODE_FOR_clzv4di2_mask, "__builtin_ia32_vplzcntq_256_mask", IX86_BUILTIN_VPCLZCNTQ256, UNKNOWN, (int) V4DI_FTYPE_V4DI_V4DI_QI },
+ { OPTION_MASK_ISA_AVX512VL, CODE_FOR_clzv8si2_mask, "__builtin_ia32_vplzcntd_256_mask", IX86_BUILTIN_VPCLZCNTD256, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI_QI },
+ { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx_unpckhpd256_mask, "__builtin_ia32_unpckhpd256_mask", IX86_BUILTIN_UNPCKHPD256_MASK, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF_V4DF_QI },
+ { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_unpckhpd128_mask, "__builtin_ia32_unpckhpd128_mask", IX86_BUILTIN_UNPCKHPD128_MASK, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_V2DF_QI },
+ { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx_unpckhps256_mask, "__builtin_ia32_unpckhps256_mask", IX86_BUILTIN_UNPCKHPS256_MASK, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF_V8SF_QI },
+ { OPTION_MASK_ISA_AVX512VL, CODE_FOR_vec_interleave_highv4sf_mask, "__builtin_ia32_unpckhps128_mask", IX86_BUILTIN_UNPCKHPS128_MASK, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_V4SF_QI },
+ { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx_unpcklpd256_mask, "__builtin_ia32_unpcklpd256_mask", IX86_BUILTIN_UNPCKLPD256_MASK, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF_V4DF_QI },
+ { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_unpcklpd128_mask, "__builtin_ia32_unpcklpd128_mask", IX86_BUILTIN_UNPCKLPD128_MASK, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_V2DF_QI },
+ { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx_unpcklps256_mask, "__builtin_ia32_unpcklps256_mask", IX86_BUILTIN_UNPCKLPS256_MASK, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF_V8SF_QI },
+ { OPTION_MASK_ISA_AVX512VL, CODE_FOR_conflictv2di_mask, "__builtin_ia32_vpconflictdi_128_mask", IX86_BUILTIN_VPCONFLICTQ128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI_QI },
+ { OPTION_MASK_ISA_AVX512VL, CODE_FOR_conflictv4si_mask, "__builtin_ia32_vpconflictsi_128_mask", IX86_BUILTIN_VPCONFLICTD128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI_QI },
+ { OPTION_MASK_ISA_AVX512VL, CODE_FOR_clzv2di2_mask, "__builtin_ia32_vplzcntq_128_mask", IX86_BUILTIN_VPCLZCNTQ128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI_QI },
+ { OPTION_MASK_ISA_AVX512VL, CODE_FOR_clzv4si2_mask, "__builtin_ia32_vplzcntd_128_mask", IX86_BUILTIN_VPCLZCNTD128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI_QI },
+ { OPTION_MASK_ISA_AVX512VL, CODE_FOR_unpcklps128_mask, "__builtin_ia32_unpcklps128_mask", IX86_BUILTIN_UNPCKLPS128_MASK, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_V4SF_QI },
+ { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_alignv8si_mask, "__builtin_ia32_alignd256_mask", IX86_BUILTIN_ALIGND256, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI_INT_V8SI_QI },
+ { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_alignv4di_mask, "__builtin_ia32_alignq256_mask", IX86_BUILTIN_ALIGNQ256, UNKNOWN, (int) V4DI_FTYPE_V4DI_V4DI_INT_V4DI_QI },
+ { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_alignv4si_mask, "__builtin_ia32_alignd128_mask", IX86_BUILTIN_ALIGND128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI_INT_V4SI_QI },
+ { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_alignv2di_mask, "__builtin_ia32_alignq128_mask", IX86_BUILTIN_ALIGNQ128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI_INT_V2DI_QI },
+ { OPTION_MASK_ISA_AVX512VL, CODE_FOR_vcvtps2ph256_mask, "__builtin_ia32_vcvtps2ph256_mask", IX86_BUILTIN_CVTPS2PH256_MASK, UNKNOWN, (int) V8HI_FTYPE_V8SF_INT_V8HI_QI },
+ { OPTION_MASK_ISA_AVX512VL, CODE_FOR_vcvtps2ph_mask, "__builtin_ia32_vcvtps2ph_mask", IX86_BUILTIN_CVTPS2PH_MASK, UNKNOWN, (int) V8HI_FTYPE_V4SF_INT_V8HI_QI },
+ { OPTION_MASK_ISA_AVX512VL, CODE_FOR_vcvtph2ps_mask, "__builtin_ia32_vcvtph2ps_mask", IX86_BUILTIN_CVTPH2PS_MASK, UNKNOWN, (int) V4SF_FTYPE_V8HI_V4SF_QI },
+ { OPTION_MASK_ISA_AVX512VL, CODE_FOR_vcvtph2ps256_mask, "__builtin_ia32_vcvtph2ps256_mask", IX86_BUILTIN_CVTPH2PS256_MASK, UNKNOWN, (int) V8SF_FTYPE_V8HI_V8SF_QI },
+ { OPTION_MASK_ISA_AVX512VL, CODE_FOR_vec_interleave_highv4si_mask, "__builtin_ia32_punpckhdq128_mask", IX86_BUILTIN_PUNPCKHDQ128_MASK, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI_V4SI_QI },
+ { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx2_interleave_highv8si_mask, "__builtin_ia32_punpckhdq256_mask", IX86_BUILTIN_PUNPCKHDQ256_MASK, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI_V8SI_QI },
+ { OPTION_MASK_ISA_AVX512VL, CODE_FOR_vec_interleave_highv2di_mask, "__builtin_ia32_punpckhqdq128_mask", IX86_BUILTIN_PUNPCKHQDQ128_MASK, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI_V2DI_QI },
+ { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx2_interleave_highv4di_mask, "__builtin_ia32_punpckhqdq256_mask", IX86_BUILTIN_PUNPCKHQDQ256_MASK, UNKNOWN, (int) V4DI_FTYPE_V4DI_V4DI_V4DI_QI },
+ { OPTION_MASK_ISA_AVX512VL, CODE_FOR_vec_interleave_lowv4si_mask, "__builtin_ia32_punpckldq128_mask", IX86_BUILTIN_PUNPCKLDQ128_MASK, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI_V4SI_QI },
+ { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx2_interleave_lowv8si_mask, "__builtin_ia32_punpckldq256_mask", IX86_BUILTIN_PUNPCKLDQ256_MASK, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI_V8SI_QI },
+ { OPTION_MASK_ISA_AVX512VL, CODE_FOR_vec_interleave_lowv2di_mask, "__builtin_ia32_punpcklqdq128_mask", IX86_BUILTIN_PUNPCKLQDQ128_MASK, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI_V2DI_QI },
+ { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx2_interleave_lowv4di_mask, "__builtin_ia32_punpcklqdq256_mask", IX86_BUILTIN_PUNPCKLQDQ256_MASK, UNKNOWN, (int) V4DI_FTYPE_V4DI_V4DI_V4DI_QI },
+ { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_vec_interleave_highv16qi_mask, "__builtin_ia32_punpckhbw128_mask", IX86_BUILTIN_PUNPCKHBW128_MASK, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI_V16QI_HI },
+ { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx2_interleave_highv32qi_mask, "__builtin_ia32_punpckhbw256_mask", IX86_BUILTIN_PUNPCKHBW256_MASK, UNKNOWN, (int) V32QI_FTYPE_V32QI_V32QI_V32QI_SI },
+ { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_vec_interleave_highv8hi_mask, "__builtin_ia32_punpckhwd128_mask", IX86_BUILTIN_PUNPCKHWD128_MASK, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI_V8HI_QI },
+ { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx2_interleave_highv16hi_mask, "__builtin_ia32_punpckhwd256_mask", IX86_BUILTIN_PUNPCKHWD256_MASK, UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI_V16HI_HI },
+ { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_vec_interleave_lowv16qi_mask, "__builtin_ia32_punpcklbw128_mask", IX86_BUILTIN_PUNPCKLBW128_MASK, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI_V16QI_HI },
+ { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx2_interleave_lowv32qi_mask, "__builtin_ia32_punpcklbw256_mask", IX86_BUILTIN_PUNPCKLBW256_MASK, UNKNOWN, (int) V32QI_FTYPE_V32QI_V32QI_V32QI_SI },
+ { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_vec_interleave_lowv8hi_mask, "__builtin_ia32_punpcklwd128_mask", IX86_BUILTIN_PUNPCKLWD128_MASK, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI_V8HI_QI },
+ { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx2_interleave_lowv16hi_mask, "__builtin_ia32_punpcklwd256_mask", IX86_BUILTIN_PUNPCKLWD256_MASK, UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI_V16HI_HI },
+ { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_ashlvv16hi_mask, "__builtin_ia32_psllv16hi_mask", IX86_BUILTIN_PSLLVV16HI, UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI_V16HI_HI },
+ { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_ashlvv8hi_mask, "__builtin_ia32_psllv8hi_mask", IX86_BUILTIN_PSLLVV8HI, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI_V8HI_QI },
+ { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx2_packssdw_mask, "__builtin_ia32_packssdw256_mask", IX86_BUILTIN_PACKSSDW256_MASK, UNKNOWN, (int) V16HI_FTYPE_V8SI_V8SI_V16HI_HI },
+ { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_sse2_packssdw_mask, "__builtin_ia32_packssdw128_mask", IX86_BUILTIN_PACKSSDW128_MASK, UNKNOWN, (int) V8HI_FTYPE_V4SI_V4SI_V8HI_QI },
+ { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx2_packusdw_mask, "__builtin_ia32_packusdw256_mask", IX86_BUILTIN_PACKUSDW256_MASK, UNKNOWN, (int) V16HI_FTYPE_V8SI_V8SI_V16HI_HI },
+ { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_sse4_1_packusdw_mask, "__builtin_ia32_packusdw128_mask", IX86_BUILTIN_PACKUSDW128_MASK, UNKNOWN, (int) V8HI_FTYPE_V4SI_V4SI_V8HI_QI },
+ { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx2_uavgv32qi3_mask, "__builtin_ia32_pavgb256_mask", IX86_BUILTIN_PAVGB256_MASK, UNKNOWN, (int) V32QI_FTYPE_V32QI_V32QI_V32QI_SI },
+ { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx2_uavgv16hi3_mask, "__builtin_ia32_pavgw256_mask", IX86_BUILTIN_PAVGW256_MASK, UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI_V16HI_HI },
+ { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_sse2_uavgv16qi3_mask, "__builtin_ia32_pavgb128_mask", IX86_BUILTIN_PAVGB128_MASK, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI_V16QI_HI },
+ { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_sse2_uavgv8hi3_mask, "__builtin_ia32_pavgw128_mask", IX86_BUILTIN_PAVGW128_MASK, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI_V8HI_QI },
+ { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx2_permvarv8sf_mask, "__builtin_ia32_permvarsf256_mask", IX86_BUILTIN_VPERMVARSF256_MASK, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SI_V8SF_QI },
+ { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx2_permvarv4df_mask, "__builtin_ia32_permvardf256_mask", IX86_BUILTIN_VPERMVARDF256_MASK, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DI_V4DF_QI },
+ { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_permv4df_mask, "__builtin_ia32_permdf256_mask", IX86_BUILTIN_VPERMDF256_MASK, UNKNOWN, (int) V4DF_FTYPE_V4DF_INT_V4DF_QI },
+ { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_absv32qi2_mask, "__builtin_ia32_pabsb256_mask", IX86_BUILTIN_PABSB256_MASK, UNKNOWN, (int) V32QI_FTYPE_V32QI_V32QI_SI },
+ { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_absv16qi2_mask, "__builtin_ia32_pabsb128_mask", IX86_BUILTIN_PABSB128_MASK, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI_HI },
+ { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_absv16hi2_mask, "__builtin_ia32_pabsw256_mask", IX86_BUILTIN_PABSW256_MASK, UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI_HI },
+ { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_absv8hi2_mask, "__builtin_ia32_pabsw128_mask", IX86_BUILTIN_PABSW128_MASK, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI_QI },
+ { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx_vpermilvarv2df3_mask, "__builtin_ia32_vpermilvarpd_mask", IX86_BUILTIN_VPERMILVARPD_MASK, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DI_V2DF_QI },
+ { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx_vpermilvarv4sf3_mask, "__builtin_ia32_vpermilvarps_mask", IX86_BUILTIN_VPERMILVARPS_MASK, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SI_V4SF_QI },
+ { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx_vpermilvarv4df3_mask, "__builtin_ia32_vpermilvarpd256_mask", IX86_BUILTIN_VPERMILVARPD256_MASK, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DI_V4DF_QI },
+ { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx_vpermilvarv8sf3_mask, "__builtin_ia32_vpermilvarps256_mask", IX86_BUILTIN_VPERMILVARPS256_MASK, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SI_V8SF_QI },
+ { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx_vpermilv2df_mask, "__builtin_ia32_vpermilpd_mask", IX86_BUILTIN_VPERMILPD_MASK, UNKNOWN, (int) V2DF_FTYPE_V2DF_INT_V2DF_QI },
+ { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx_vpermilv4sf_mask, "__builtin_ia32_vpermilps_mask", IX86_BUILTIN_VPERMILPS_MASK, UNKNOWN, (int) V4SF_FTYPE_V4SF_INT_V4SF_QI },
+ { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx_vpermilv4df_mask, "__builtin_ia32_vpermilpd256_mask", IX86_BUILTIN_VPERMILPD256_MASK, UNKNOWN, (int) V4DF_FTYPE_V4DF_INT_V4DF_QI },
+ { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx_vpermilv8sf_mask, "__builtin_ia32_vpermilps256_mask", IX86_BUILTIN_VPERMILPS256_MASK, UNKNOWN, (int) V8SF_FTYPE_V8SF_INT_V8SF_QI },
+ { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_blendmv4di, "__builtin_ia32_blendmq_256_mask", IX86_BUILTIN_BLENDMQ256, UNKNOWN, (int) V4DI_FTYPE_V4DI_V4DI_QI },
+ { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_blendmv8si, "__builtin_ia32_blendmd_256_mask", IX86_BUILTIN_BLENDMD256, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI_QI },
+ { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_blendmv4df, "__builtin_ia32_blendmpd_256_mask", IX86_BUILTIN_BLENDMPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF_QI },
+ { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_blendmv8sf, "__builtin_ia32_blendmps_256_mask", IX86_BUILTIN_BLENDMPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF_QI },
+ { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_blendmv2di, "__builtin_ia32_blendmq_128_mask", IX86_BUILTIN_BLENDMQ128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI_QI },
+ { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_blendmv4si, "__builtin_ia32_blendmd_128_mask", IX86_BUILTIN_BLENDMD128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI_QI },
+ { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_blendmv2df, "__builtin_ia32_blendmpd_128_mask", IX86_BUILTIN_BLENDMPD128, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_QI },
+ { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_blendmv4sf, "__builtin_ia32_blendmps_128_mask", IX86_BUILTIN_BLENDMPS128, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_QI },
+ { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_blendmv16hi, "__builtin_ia32_blendmw_256_mask", IX86_BUILTIN_BLENDMW256, UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI_HI },
+ { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_blendmv32qi, "__builtin_ia32_blendmb_256_mask", IX86_BUILTIN_BLENDMB256, UNKNOWN, (int) V32QI_FTYPE_V32QI_V32QI_SI },
+ { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_blendmv8hi, "__builtin_ia32_blendmw_128_mask", IX86_BUILTIN_BLENDMW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI_QI },
+ { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_blendmv16qi, "__builtin_ia32_blendmb_128_mask", IX86_BUILTIN_BLENDMB128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI_HI },
+ { OPTION_MASK_ISA_AVX512VL, CODE_FOR_mulv8si3_mask, "__builtin_ia32_pmulld256_mask", IX86_BUILTIN_PMULLD256_MASK, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI_V8SI_QI },
+ { OPTION_MASK_ISA_AVX512VL, CODE_FOR_mulv4si3_mask, "__builtin_ia32_pmulld128_mask", IX86_BUILTIN_PMULLD128_MASK, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI_V4SI_QI },
+ { OPTION_MASK_ISA_AVX512VL, CODE_FOR_vec_widen_umult_even_v8si_mask, "__builtin_ia32_pmuludq256_mask", IX86_BUILTIN_PMULUDQ256_MASK, UNKNOWN, (int) V4DI_FTYPE_V8SI_V8SI_V4DI_QI },
+ { OPTION_MASK_ISA_AVX512VL, CODE_FOR_vec_widen_smult_even_v8si_mask, "__builtin_ia32_pmuldq256_mask", IX86_BUILTIN_PMULDQ256_MASK, UNKNOWN, (int) V4DI_FTYPE_V8SI_V8SI_V4DI_QI },
+ { OPTION_MASK_ISA_AVX512VL, CODE_FOR_sse4_1_mulv2siv2di3_mask, "__builtin_ia32_pmuldq128_mask", IX86_BUILTIN_PMULDQ128_MASK, UNKNOWN, (int) V2DI_FTYPE_V4SI_V4SI_V2DI_QI },
+ { OPTION_MASK_ISA_AVX512VL, CODE_FOR_vec_widen_umult_even_v4si_mask, "__builtin_ia32_pmuludq128_mask", IX86_BUILTIN_PMULUDQ128_MASK, UNKNOWN, (int) V2DI_FTYPE_V4SI_V4SI_V2DI_QI },
+ { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx_cvtpd2ps256_mask, "__builtin_ia32_cvtpd2ps256_mask", IX86_BUILTIN_CVTPD2PS256_MASK, UNKNOWN, (int) V4SF_FTYPE_V4DF_V4SF_QI },
+ { OPTION_MASK_ISA_AVX512VL, CODE_FOR_sse2_cvtpd2ps_mask, "__builtin_ia32_cvtpd2ps_mask", IX86_BUILTIN_CVTPD2PS_MASK, UNKNOWN, (int) V4SF_FTYPE_V2DF_V4SF_QI },
+ { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx2_permvarv8si_mask, "__builtin_ia32_permvarsi256_mask", IX86_BUILTIN_VPERMVARSI256_MASK, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI_V8SI_QI },
+ { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx2_permvarv4di_mask, "__builtin_ia32_permvardi256_mask", IX86_BUILTIN_VPERMVARDI256_MASK, UNKNOWN, (int) V4DI_FTYPE_V4DI_V4DI_V4DI_QI },
+ { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_permv4di_mask, "__builtin_ia32_permdi256_mask", IX86_BUILTIN_VPERMDI256_MASK, UNKNOWN, (int) V4DI_FTYPE_V4DI_INT_V4DI_QI },
+ { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_cmpv4di3_mask, "__builtin_ia32_cmpq256_mask", IX86_BUILTIN_CMPQ256, UNKNOWN, (int) QI_FTYPE_V4DI_V4DI_INT_QI },
+ { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_cmpv8si3_mask, "__builtin_ia32_cmpd256_mask", IX86_BUILTIN_CMPD256, UNKNOWN, (int) QI_FTYPE_V8SI_V8SI_INT_QI },
+ { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_ucmpv4di3_mask, "__builtin_ia32_ucmpq256_mask", IX86_BUILTIN_UCMPQ256, UNKNOWN, (int) QI_FTYPE_V4DI_V4DI_INT_QI },
+ { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_ucmpv8si3_mask, "__builtin_ia32_ucmpd256_mask", IX86_BUILTIN_UCMPD256, UNKNOWN, (int) QI_FTYPE_V8SI_V8SI_INT_QI },
+ { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_cmpv32qi3_mask, "__builtin_ia32_cmpb256_mask", IX86_BUILTIN_CMPB256, UNKNOWN, (int) SI_FTYPE_V32QI_V32QI_INT_SI },
+ { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_cmpv16hi3_mask, "__builtin_ia32_cmpw256_mask", IX86_BUILTIN_CMPW256, UNKNOWN, (int) HI_FTYPE_V16HI_V16HI_INT_HI },
+ { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_ucmpv32qi3_mask, "__builtin_ia32_ucmpb256_mask", IX86_BUILTIN_UCMPB256, UNKNOWN, (int) SI_FTYPE_V32QI_V32QI_INT_SI },
+ { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_ucmpv16hi3_mask, "__builtin_ia32_ucmpw256_mask", IX86_BUILTIN_UCMPW256, UNKNOWN, (int) HI_FTYPE_V16HI_V16HI_INT_HI },
+ { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_cmpv4df3_mask, "__builtin_ia32_cmppd256_mask", IX86_BUILTIN_CMPPD256_MASK, UNKNOWN, (int) QI_FTYPE_V4DF_V4DF_INT_QI },
+ { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_cmpv8sf3_mask, "__builtin_ia32_cmpps256_mask", IX86_BUILTIN_CMPPS256_MASK, UNKNOWN, (int) QI_FTYPE_V8SF_V8SF_INT_QI },
+ { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_cmpv2di3_mask, "__builtin_ia32_cmpq128_mask", IX86_BUILTIN_CMPQ128, UNKNOWN, (int) QI_FTYPE_V2DI_V2DI_INT_QI },
+ { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_cmpv4si3_mask, "__builtin_ia32_cmpd128_mask", IX86_BUILTIN_CMPD128, UNKNOWN, (int) QI_FTYPE_V4SI_V4SI_INT_QI },
+ { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_ucmpv2di3_mask, "__builtin_ia32_ucmpq128_mask", IX86_BUILTIN_UCMPQ128, UNKNOWN, (int) QI_FTYPE_V2DI_V2DI_INT_QI },
+ { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_ucmpv4si3_mask, "__builtin_ia32_ucmpd128_mask", IX86_BUILTIN_UCMPD128, UNKNOWN, (int) QI_FTYPE_V4SI_V4SI_INT_QI },
+ { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_cmpv16qi3_mask, "__builtin_ia32_cmpb128_mask", IX86_BUILTIN_CMPB128, UNKNOWN, (int) HI_FTYPE_V16QI_V16QI_INT_HI },
+ { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_cmpv8hi3_mask, "__builtin_ia32_cmpw128_mask", IX86_BUILTIN_CMPW128, UNKNOWN, (int) QI_FTYPE_V8HI_V8HI_INT_QI },
+ { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_ucmpv16qi3_mask, "__builtin_ia32_ucmpb128_mask", IX86_BUILTIN_UCMPB128, UNKNOWN, (int) HI_FTYPE_V16QI_V16QI_INT_HI },
+ { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_ucmpv8hi3_mask, "__builtin_ia32_ucmpw128_mask", IX86_BUILTIN_UCMPW128, UNKNOWN, (int) QI_FTYPE_V8HI_V8HI_INT_QI },
+ { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_cmpv2df3_mask, "__builtin_ia32_cmppd128_mask", IX86_BUILTIN_CMPPD128_MASK, UNKNOWN, (int) QI_FTYPE_V2DF_V2DF_INT_QI },
+ { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_cmpv4sf3_mask, "__builtin_ia32_cmpps128_mask", IX86_BUILTIN_CMPPS128_MASK, UNKNOWN, (int) QI_FTYPE_V4SF_V4SF_INT_QI },
+
+ /* AVX512DQ. */
+ { OPTION_MASK_ISA_AVX512DQ, CODE_FOR_avx512dq_broadcastv16sf_mask, "__builtin_ia32_broadcastf32x2_512_mask", IX86_BUILTIN_BROADCASTF32x2_512, UNKNOWN, (int) V16SF_FTYPE_V4SF_V16SF_HI },
+ { OPTION_MASK_ISA_AVX512DQ, CODE_FOR_avx512dq_broadcastv16si_mask, "__builtin_ia32_broadcasti32x2_512_mask", IX86_BUILTIN_BROADCASTI32x2_512, UNKNOWN, (int) V16SI_FTYPE_V4SI_V16SI_HI },
+ { OPTION_MASK_ISA_AVX512DQ, CODE_FOR_avx512dq_broadcastv8df_mask_1, "__builtin_ia32_broadcastf64x2_512_mask", IX86_BUILTIN_BROADCASTF64X2_512, UNKNOWN, (int) V8DF_FTYPE_V2DF_V8DF_QI },
+ { OPTION_MASK_ISA_AVX512DQ, CODE_FOR_avx512dq_broadcastv8di_mask_1, "__builtin_ia32_broadcasti64x2_512_mask", IX86_BUILTIN_BROADCASTI64X2_512, UNKNOWN, (int) V8DI_FTYPE_V2DI_V8DI_QI },
+ { OPTION_MASK_ISA_AVX512DQ, CODE_FOR_avx512dq_broadcastv16sf_mask_1, "__builtin_ia32_broadcastf32x8_512_mask", IX86_BUILTIN_BROADCASTF32X8_512, UNKNOWN, (int) V16SF_FTYPE_V8SF_V16SF_HI },
+ { OPTION_MASK_ISA_AVX512DQ, CODE_FOR_avx512dq_broadcastv16si_mask_1, "__builtin_ia32_broadcasti32x8_512_mask", IX86_BUILTIN_BROADCASTI32X8_512, UNKNOWN, (int) V16SI_FTYPE_V8SI_V16SI_HI },
+ { OPTION_MASK_ISA_AVX512DQ, CODE_FOR_avx512dq_vextractf64x2_mask, "__builtin_ia32_extractf64x2_512_mask", IX86_BUILTIN_EXTRACTF64X2_512, UNKNOWN, (int) V2DF_FTYPE_V8DF_INT_V2DF_QI },
+ { OPTION_MASK_ISA_AVX512DQ, CODE_FOR_avx512dq_vextractf32x8_mask, "__builtin_ia32_extractf32x8_mask", IX86_BUILTIN_EXTRACTF32X8, UNKNOWN, (int) V8SF_FTYPE_V16SF_INT_V8SF_QI },
+ { OPTION_MASK_ISA_AVX512DQ, CODE_FOR_avx512dq_vextracti64x2_mask, "__builtin_ia32_extracti64x2_512_mask", IX86_BUILTIN_EXTRACTI64X2_512, UNKNOWN, (int) V2DI_FTYPE_V8DI_INT_V2DI_QI },
+ { OPTION_MASK_ISA_AVX512DQ, CODE_FOR_avx512dq_vextracti32x8_mask, "__builtin_ia32_extracti32x8_mask", IX86_BUILTIN_EXTRACTI32X8, UNKNOWN, (int) V8SI_FTYPE_V16SI_INT_V8SI_QI },
+ { OPTION_MASK_ISA_AVX512DQ, CODE_FOR_reducepv8df_mask, "__builtin_ia32_reducepd512_mask", IX86_BUILTIN_REDUCEPD512_MASK, UNKNOWN, (int) V8DF_FTYPE_V8DF_INT_V8DF_QI },
+ { OPTION_MASK_ISA_AVX512DQ, CODE_FOR_reducepv16sf_mask, "__builtin_ia32_reduceps512_mask", IX86_BUILTIN_REDUCEPS512_MASK, UNKNOWN, (int) V16SF_FTYPE_V16SF_INT_V16SF_HI },
+ { OPTION_MASK_ISA_AVX512DQ, CODE_FOR_avx512dq_mulv8di3_mask, "__builtin_ia32_pmullq512_mask", IX86_BUILTIN_PMULLQ512, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_V8DI_QI },
+ { OPTION_MASK_ISA_AVX512DQ, CODE_FOR_xorv8df3_mask, "__builtin_ia32_xorpd512_mask", IX86_BUILTIN_XORPD512, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_V8DF_QI },
+ { OPTION_MASK_ISA_AVX512DQ, CODE_FOR_xorv16sf3_mask, "__builtin_ia32_xorps512_mask", IX86_BUILTIN_XORPS512, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_V16SF_HI },
+ { OPTION_MASK_ISA_AVX512DQ, CODE_FOR_iorv8df3_mask, "__builtin_ia32_orpd512_mask", IX86_BUILTIN_ORPD512, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_V8DF_QI },
+ { OPTION_MASK_ISA_AVX512DQ, CODE_FOR_iorv16sf3_mask, "__builtin_ia32_orps512_mask", IX86_BUILTIN_ORPS512, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_V16SF_HI },
+ { OPTION_MASK_ISA_AVX512DQ, CODE_FOR_andv8df3_mask, "__builtin_ia32_andpd512_mask", IX86_BUILTIN_ANDPD512, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_V8DF_QI },
+ { OPTION_MASK_ISA_AVX512DQ, CODE_FOR_andv16sf3_mask, "__builtin_ia32_andps512_mask", IX86_BUILTIN_ANDPS512, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_V16SF_HI },
+ { OPTION_MASK_ISA_AVX512DQ, CODE_FOR_avx512f_andnotv8df3_mask, "__builtin_ia32_andnpd512_mask", IX86_BUILTIN_ANDNPD512, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_V8DF_QI},
+ { OPTION_MASK_ISA_AVX512DQ, CODE_FOR_avx512f_andnotv16sf3_mask, "__builtin_ia32_andnps512_mask", IX86_BUILTIN_ANDNPS512, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_V16SF_HI },
+ { OPTION_MASK_ISA_AVX512DQ, CODE_FOR_avx512dq_vinsertf32x8_mask, "__builtin_ia32_insertf32x8_mask", IX86_BUILTIN_INSERTF32X8, UNKNOWN, (int) V16SF_FTYPE_V16SF_V8SF_INT_V16SF_HI },
+ { OPTION_MASK_ISA_AVX512DQ, CODE_FOR_avx512dq_vinserti32x8_mask, "__builtin_ia32_inserti32x8_mask", IX86_BUILTIN_INSERTI32X8, UNKNOWN, (int) V16SI_FTYPE_V16SI_V8SI_INT_V16SI_HI },
+ { OPTION_MASK_ISA_AVX512DQ, CODE_FOR_avx512dq_vinsertf64x2_mask, "__builtin_ia32_insertf64x2_512_mask", IX86_BUILTIN_INSERTF64X2_512, UNKNOWN, (int) V8DF_FTYPE_V8DF_V2DF_INT_V8DF_QI },
+ { OPTION_MASK_ISA_AVX512DQ, CODE_FOR_avx512dq_vinserti64x2_mask, "__builtin_ia32_inserti64x2_512_mask", IX86_BUILTIN_INSERTI64X2_512, UNKNOWN, (int) V8DI_FTYPE_V8DI_V2DI_INT_V8DI_QI },
+ { OPTION_MASK_ISA_AVX512DQ, CODE_FOR_avx512dq_fpclassv8df_mask, "__builtin_ia32_fpclasspd512_mask", IX86_BUILTIN_FPCLASSPD512, UNKNOWN, (int) QI_FTYPE_V8DF_INT_QI },
+ { OPTION_MASK_ISA_AVX512DQ, CODE_FOR_avx512dq_fpclassv16sf_mask, "__builtin_ia32_fpclassps512_mask", IX86_BUILTIN_FPCLASSPS512, UNKNOWN, (int) HI_FTYPE_V16SF_INT_HI },
+ { OPTION_MASK_ISA_AVX512DQ, CODE_FOR_avx512f_cvtd2maskv16si, "__builtin_ia32_cvtd2mask512", IX86_BUILTIN_CVTD2MASK512, UNKNOWN, (int) HI_FTYPE_V16SI },
+ { OPTION_MASK_ISA_AVX512DQ, CODE_FOR_avx512f_cvtq2maskv8di, "__builtin_ia32_cvtq2mask512", IX86_BUILTIN_CVTQ2MASK512, UNKNOWN, (int) QI_FTYPE_V8DI },
+ { OPTION_MASK_ISA_AVX512DQ, CODE_FOR_avx512f_cvtmask2dv16si, "__builtin_ia32_cvtmask2d512", IX86_BUILTIN_CVTMASK2D512, UNKNOWN, (int) V16SI_FTYPE_HI },
+ { OPTION_MASK_ISA_AVX512DQ, CODE_FOR_avx512f_cvtmask2qv8di, "__builtin_ia32_cvtmask2q512", IX86_BUILTIN_CVTMASK2Q512, UNKNOWN, (int) V8DI_FTYPE_QI },
+
+ /* AVX512BW. */
+ { OPTION_MASK_ISA_AVX512BW, CODE_FOR_kunpcksi, "__builtin_ia32_kunpcksi", IX86_BUILTIN_KUNPCKWD, UNKNOWN, (int) SI_FTYPE_SI_SI },
+ { OPTION_MASK_ISA_AVX512BW, CODE_FOR_kunpckdi, "__builtin_ia32_kunpckdi", IX86_BUILTIN_KUNPCKDQ, UNKNOWN, (int) DI_FTYPE_DI_DI },
+ { OPTION_MASK_ISA_AVX512BW, CODE_FOR_avx512bw_packusdw_mask, "__builtin_ia32_packusdw512_mask", IX86_BUILTIN_PACKUSDW512, UNKNOWN, (int) V32HI_FTYPE_V16SI_V16SI_V32HI_SI },
+ { OPTION_MASK_ISA_AVX512BW, CODE_FOR_avx512bw_ashlv4ti3, "__builtin_ia32_pslldq512", IX86_BUILTIN_PSLLDQ512, UNKNOWN, (int) V8DI_FTYPE_V8DI_INT_CONVERT },
+ { OPTION_MASK_ISA_AVX512BW, CODE_FOR_avx512bw_lshrv4ti3, "__builtin_ia32_psrldq512", IX86_BUILTIN_PSRLDQ512, UNKNOWN, (int) V8DI_FTYPE_V8DI_INT_CONVERT },
+ { OPTION_MASK_ISA_AVX512BW, CODE_FOR_avx512bw_packssdw_mask, "__builtin_ia32_packssdw512_mask", IX86_BUILTIN_PACKSSDW512, UNKNOWN, (int) V32HI_FTYPE_V16SI_V16SI_V32HI_SI },
+ { OPTION_MASK_ISA_AVX512BW, CODE_FOR_avx512bw_palignrv4ti, "__builtin_ia32_palignr512", IX86_BUILTIN_PALIGNR512, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_INT_CONVERT },
+ { OPTION_MASK_ISA_AVX512BW, CODE_FOR_avx512bw_palignrv64qi_mask, "__builtin_ia32_palignr512_mask", IX86_BUILTIN_PALIGNR512_MASK, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_INT_V8DI_DI_CONVERT },
+ { OPTION_MASK_ISA_AVX512BW, CODE_FOR_avx512bw_loaddquv32hi_mask, "__builtin_ia32_movdquhi512_mask", IX86_BUILTIN_MOVDQUHI512_MASK, UNKNOWN, (int) V32HI_FTYPE_V32HI_V32HI_SI },
+ { OPTION_MASK_ISA_AVX512BW, CODE_FOR_avx512f_loaddquv64qi_mask, "__builtin_ia32_movdquqi512_mask", IX86_BUILTIN_MOVDQUQI512_MASK, UNKNOWN, (int) V64QI_FTYPE_V64QI_V64QI_DI },
+ { OPTION_MASK_ISA_AVX512BW, CODE_FOR_avx512f_psadbw, "__builtin_ia32_psadbw512", IX86_BUILTIN_PSADBW512, UNKNOWN, (int) V8DI_FTYPE_V64QI_V64QI },
+ { OPTION_MASK_ISA_AVX512BW, CODE_FOR_avx512bw_dbpsadbwv32hi_mask, "__builtin_ia32_dbpsadbw512_mask", IX86_BUILTIN_DBPSADBW512, UNKNOWN, (int) V32HI_FTYPE_V64QI_V64QI_INT_V32HI_SI },
+ { OPTION_MASK_ISA_AVX512BW, CODE_FOR_avx512bw_vec_dupv64qi_mask, "__builtin_ia32_pbroadcastb512_mask", IX86_BUILTIN_PBROADCASTB512, UNKNOWN, (int) V64QI_FTYPE_V16QI_V64QI_DI },
+ { OPTION_MASK_ISA_AVX512BW, CODE_FOR_avx512bw_vec_dup_gprv64qi_mask, "__builtin_ia32_pbroadcastb512_gpr_mask", IX86_BUILTIN_PBROADCASTB512_GPR, UNKNOWN, (int) V64QI_FTYPE_QI_V64QI_DI },
+ { OPTION_MASK_ISA_AVX512BW, CODE_FOR_avx512bw_vec_dupv32hi_mask, "__builtin_ia32_pbroadcastw512_mask", IX86_BUILTIN_PBROADCASTW512, UNKNOWN, (int) V32HI_FTYPE_V8HI_V32HI_SI },
+ { OPTION_MASK_ISA_AVX512BW, CODE_FOR_avx512bw_vec_dup_gprv32hi_mask, "__builtin_ia32_pbroadcastw512_gpr_mask", IX86_BUILTIN_PBROADCASTW512_GPR, UNKNOWN, (int) V32HI_FTYPE_HI_V32HI_SI },
+ { OPTION_MASK_ISA_AVX512BW, CODE_FOR_avx512bw_sign_extendv32qiv32hi2_mask, "__builtin_ia32_pmovsxbw512_mask", IX86_BUILTIN_PMOVSXBW512_MASK, UNKNOWN, (int) V32HI_FTYPE_V32QI_V32HI_SI },
+ { OPTION_MASK_ISA_AVX512BW, CODE_FOR_avx512bw_zero_extendv32qiv32hi2_mask, "__builtin_ia32_pmovzxbw512_mask", IX86_BUILTIN_PMOVZXBW512_MASK, UNKNOWN, (int) V32HI_FTYPE_V32QI_V32HI_SI },
+ { OPTION_MASK_ISA_AVX512BW, CODE_FOR_avx512bw_permvarv32hi_mask, "__builtin_ia32_permvarhi512_mask", IX86_BUILTIN_VPERMVARHI512_MASK, UNKNOWN, (int) V32HI_FTYPE_V32HI_V32HI_V32HI_SI },
+ { OPTION_MASK_ISA_AVX512BW, CODE_FOR_avx512bw_vpermt2varv32hi3_mask, "__builtin_ia32_vpermt2varhi512_mask", IX86_BUILTIN_VPERMT2VARHI512, UNKNOWN, (int) V32HI_FTYPE_V32HI_V32HI_V32HI_SI },
+ { OPTION_MASK_ISA_AVX512BW, CODE_FOR_avx512bw_vpermt2varv32hi3_maskz, "__builtin_ia32_vpermt2varhi512_maskz", IX86_BUILTIN_VPERMT2VARHI512_MASKZ, UNKNOWN, (int) V32HI_FTYPE_V32HI_V32HI_V32HI_SI },
+ { OPTION_MASK_ISA_AVX512BW, CODE_FOR_avx512bw_vpermi2varv32hi3_mask, "__builtin_ia32_vpermi2varhi512_mask", IX86_BUILTIN_VPERMI2VARHI512, UNKNOWN, (int) V32HI_FTYPE_V32HI_V32HI_V32HI_SI },
+ { OPTION_MASK_ISA_AVX512BW, CODE_FOR_avx512bw_uavgv64qi3_mask, "__builtin_ia32_pavgb512_mask", IX86_BUILTIN_PAVGB512, UNKNOWN, (int) V64QI_FTYPE_V64QI_V64QI_V64QI_DI },
+ { OPTION_MASK_ISA_AVX512BW, CODE_FOR_avx512bw_uavgv32hi3_mask, "__builtin_ia32_pavgw512_mask", IX86_BUILTIN_PAVGW512, UNKNOWN, (int) V32HI_FTYPE_V32HI_V32HI_V32HI_SI },
+ { OPTION_MASK_ISA_AVX512BW, CODE_FOR_addv64qi3_mask, "__builtin_ia32_paddb512_mask", IX86_BUILTIN_PADDB512, UNKNOWN, (int) V64QI_FTYPE_V64QI_V64QI_V64QI_DI },
+ { OPTION_MASK_ISA_AVX512BW, CODE_FOR_subv64qi3_mask, "__builtin_ia32_psubb512_mask", IX86_BUILTIN_PSUBB512, UNKNOWN, (int) V64QI_FTYPE_V64QI_V64QI_V64QI_DI },
+ { OPTION_MASK_ISA_AVX512BW, CODE_FOR_avx512bw_sssubv64qi3_mask, "__builtin_ia32_psubsb512_mask", IX86_BUILTIN_PSUBSB512, UNKNOWN, (int) V64QI_FTYPE_V64QI_V64QI_V64QI_DI },
+ { OPTION_MASK_ISA_AVX512BW, CODE_FOR_avx512bw_ssaddv64qi3_mask, "__builtin_ia32_paddsb512_mask", IX86_BUILTIN_PADDSB512, UNKNOWN, (int) V64QI_FTYPE_V64QI_V64QI_V64QI_DI },
+ { OPTION_MASK_ISA_AVX512BW, CODE_FOR_avx512bw_ussubv64qi3_mask, "__builtin_ia32_psubusb512_mask", IX86_BUILTIN_PSUBUSB512, UNKNOWN, (int) V64QI_FTYPE_V64QI_V64QI_V64QI_DI },
+ { OPTION_MASK_ISA_AVX512BW, CODE_FOR_avx512bw_usaddv64qi3_mask, "__builtin_ia32_paddusb512_mask", IX86_BUILTIN_PADDUSB512, UNKNOWN, (int) V64QI_FTYPE_V64QI_V64QI_V64QI_DI },
+ { OPTION_MASK_ISA_AVX512BW, CODE_FOR_subv32hi3_mask, "__builtin_ia32_psubw512_mask", IX86_BUILTIN_PSUBW512, UNKNOWN, (int) V32HI_FTYPE_V32HI_V32HI_V32HI_SI },
+ { OPTION_MASK_ISA_AVX512BW, CODE_FOR_addv32hi3_mask, "__builtin_ia32_paddw512_mask", IX86_BUILTIN_PADDW512, UNKNOWN, (int) V32HI_FTYPE_V32HI_V32HI_V32HI_SI },
+ { OPTION_MASK_ISA_AVX512BW, CODE_FOR_avx512bw_sssubv32hi3_mask, "__builtin_ia32_psubsw512_mask", IX86_BUILTIN_PSUBSW512, UNKNOWN, (int) V32HI_FTYPE_V32HI_V32HI_V32HI_SI },
+ { OPTION_MASK_ISA_AVX512BW, CODE_FOR_avx512bw_ssaddv32hi3_mask, "__builtin_ia32_paddsw512_mask", IX86_BUILTIN_PADDSW512, UNKNOWN, (int) V32HI_FTYPE_V32HI_V32HI_V32HI_SI },
+ { OPTION_MASK_ISA_AVX512BW, CODE_FOR_avx512bw_ussubv32hi3_mask, "__builtin_ia32_psubusw512_mask", IX86_BUILTIN_PSUBUSW512, UNKNOWN, (int) V32HI_FTYPE_V32HI_V32HI_V32HI_SI },
+ { OPTION_MASK_ISA_AVX512BW, CODE_FOR_avx512bw_usaddv32hi3_mask, "__builtin_ia32_paddusw512_mask", IX86_BUILTIN_PADDUSW512, UNKNOWN, (int) V32HI_FTYPE_V32HI_V32HI_V32HI_SI },
+ { OPTION_MASK_ISA_AVX512BW, CODE_FOR_umaxv32hi3_mask, "__builtin_ia32_pmaxuw512_mask", IX86_BUILTIN_PMAXUW512, UNKNOWN, (int) V32HI_FTYPE_V32HI_V32HI_V32HI_SI },
+ { OPTION_MASK_ISA_AVX512BW, CODE_FOR_smaxv32hi3_mask, "__builtin_ia32_pmaxsw512_mask", IX86_BUILTIN_PMAXSW512, UNKNOWN, (int) V32HI_FTYPE_V32HI_V32HI_V32HI_SI },
+ { OPTION_MASK_ISA_AVX512BW, CODE_FOR_uminv32hi3_mask, "__builtin_ia32_pminuw512_mask", IX86_BUILTIN_PMINUW512, UNKNOWN, (int) V32HI_FTYPE_V32HI_V32HI_V32HI_SI },
+ { OPTION_MASK_ISA_AVX512BW, CODE_FOR_sminv32hi3_mask, "__builtin_ia32_pminsw512_mask", IX86_BUILTIN_PMINSW512, UNKNOWN, (int) V32HI_FTYPE_V32HI_V32HI_V32HI_SI },
+ { OPTION_MASK_ISA_AVX512BW, CODE_FOR_umaxv64qi3_mask, "__builtin_ia32_pmaxub512_mask", IX86_BUILTIN_PMAXUB512, UNKNOWN, (int) V64QI_FTYPE_V64QI_V64QI_V64QI_DI },
+ { OPTION_MASK_ISA_AVX512BW, CODE_FOR_smaxv64qi3_mask, "__builtin_ia32_pmaxsb512_mask", IX86_BUILTIN_PMAXSB512, UNKNOWN, (int) V64QI_FTYPE_V64QI_V64QI_V64QI_DI },
+ { OPTION_MASK_ISA_AVX512BW, CODE_FOR_uminv64qi3_mask, "__builtin_ia32_pminub512_mask", IX86_BUILTIN_PMINUB512, UNKNOWN, (int) V64QI_FTYPE_V64QI_V64QI_V64QI_DI },
+ { OPTION_MASK_ISA_AVX512BW, CODE_FOR_sminv64qi3_mask, "__builtin_ia32_pminsb512_mask", IX86_BUILTIN_PMINSB512, UNKNOWN, (int) V64QI_FTYPE_V64QI_V64QI_V64QI_DI },
+ { OPTION_MASK_ISA_AVX512BW, CODE_FOR_avx512vl_truncatev32hiv32qi2_mask, "__builtin_ia32_pmovwb512_mask", IX86_BUILTIN_PMOVWB512, UNKNOWN, (int) V32QI_FTYPE_V32HI_V32QI_SI },
+ { OPTION_MASK_ISA_AVX512BW, CODE_FOR_avx512vl_ss_truncatev32hiv32qi2_mask, "__builtin_ia32_pmovswb512_mask", IX86_BUILTIN_PMOVSWB512, UNKNOWN, (int) V32QI_FTYPE_V32HI_V32QI_SI },
+ { OPTION_MASK_ISA_AVX512BW, CODE_FOR_avx512vl_us_truncatev32hiv32qi2_mask, "__builtin_ia32_pmovuswb512_mask", IX86_BUILTIN_PMOVUSWB512, UNKNOWN, (int) V32QI_FTYPE_V32HI_V32QI_SI },
+ { OPTION_MASK_ISA_AVX512BW, CODE_FOR_avx512bw_umulhrswv32hi3_mask, "__builtin_ia32_pmulhrsw512_mask", IX86_BUILTIN_PMULHRSW512_MASK, UNKNOWN, (int) V32HI_FTYPE_V32HI_V32HI_V32HI_SI },
+ { OPTION_MASK_ISA_AVX512BW, CODE_FOR_umulv32hi3_highpart_mask, "__builtin_ia32_pmulhuw512_mask" , IX86_BUILTIN_PMULHUW512_MASK, UNKNOWN, (int) V32HI_FTYPE_V32HI_V32HI_V32HI_SI },
+ { OPTION_MASK_ISA_AVX512BW, CODE_FOR_smulv32hi3_highpart_mask, "__builtin_ia32_pmulhw512_mask" , IX86_BUILTIN_PMULHW512_MASK, UNKNOWN, (int) V32HI_FTYPE_V32HI_V32HI_V32HI_SI },
+ { OPTION_MASK_ISA_AVX512BW, CODE_FOR_mulv32hi3_mask, "__builtin_ia32_pmullw512_mask", IX86_BUILTIN_PMULLW512_MASK, UNKNOWN, (int) V32HI_FTYPE_V32HI_V32HI_V32HI_SI },
+ { OPTION_MASK_ISA_AVX512BW, CODE_FOR_ashlv32hi3_mask, "__builtin_ia32_psllwi512_mask", IX86_BUILTIN_PSLLWI512_MASK, UNKNOWN, (int) V32HI_FTYPE_V32HI_INT_V32HI_SI },
+ { OPTION_MASK_ISA_AVX512BW, CODE_FOR_ashlv32hi3_mask, "__builtin_ia32_psllw512_mask", IX86_BUILTIN_PSLLW512_MASK, UNKNOWN, (int) V32HI_FTYPE_V32HI_V8HI_V32HI_SI },
+ { OPTION_MASK_ISA_AVX512BW, CODE_FOR_avx512bw_packsswb_mask, "__builtin_ia32_packsswb512_mask", IX86_BUILTIN_PACKSSWB512, UNKNOWN, (int) V64QI_FTYPE_V32HI_V32HI_V64QI_DI },
+ { OPTION_MASK_ISA_AVX512BW, CODE_FOR_avx512bw_packuswb_mask, "__builtin_ia32_packuswb512_mask", IX86_BUILTIN_PACKUSWB512, UNKNOWN, (int) V64QI_FTYPE_V32HI_V32HI_V64QI_DI },
+ { OPTION_MASK_ISA_AVX512BW, CODE_FOR_avx512bw_ashrvv32hi_mask, "__builtin_ia32_psrav32hi_mask", IX86_BUILTIN_PSRAVV32HI, UNKNOWN, (int) V32HI_FTYPE_V32HI_V32HI_V32HI_SI },
+ { OPTION_MASK_ISA_AVX512BW, CODE_FOR_avx512bw_pmaddubsw512v32hi_mask, "__builtin_ia32_pmaddubsw512_mask", IX86_BUILTIN_PMADDUBSW512_MASK, UNKNOWN, (int) V32HI_FTYPE_V64QI_V64QI_V32HI_SI },
+ { OPTION_MASK_ISA_AVX512BW, CODE_FOR_avx512bw_pmaddwd512v32hi_mask, "__builtin_ia32_pmaddwd512_mask", IX86_BUILTIN_PMADDWD512_MASK, UNKNOWN, (int) V16SI_FTYPE_V32HI_V32HI_V16SI_HI },
+ { OPTION_MASK_ISA_AVX512BW, CODE_FOR_avx512bw_lshrvv32hi_mask, "__builtin_ia32_psrlv32hi_mask", IX86_BUILTIN_PSRLVV32HI, UNKNOWN, (int) V32HI_FTYPE_V32HI_V32HI_V32HI_SI },
+ { OPTION_MASK_ISA_AVX512BW, CODE_FOR_avx512bw_interleave_highv64qi_mask, "__builtin_ia32_punpckhbw512_mask", IX86_BUILTIN_PUNPCKHBW512, UNKNOWN, (int) V64QI_FTYPE_V64QI_V64QI_V64QI_DI },
+ { OPTION_MASK_ISA_AVX512BW, CODE_FOR_avx512bw_interleave_highv32hi_mask, "__builtin_ia32_punpckhwd512_mask", IX86_BUILTIN_PUNPCKHWD512, UNKNOWN, (int) V32HI_FTYPE_V32HI_V32HI_V32HI_SI },
+ { OPTION_MASK_ISA_AVX512BW, CODE_FOR_avx512bw_interleave_lowv64qi_mask, "__builtin_ia32_punpcklbw512_mask", IX86_BUILTIN_PUNPCKLBW512, UNKNOWN, (int) V64QI_FTYPE_V64QI_V64QI_V64QI_DI },
+ { OPTION_MASK_ISA_AVX512BW, CODE_FOR_avx512bw_interleave_lowv32hi_mask, "__builtin_ia32_punpcklwd512_mask", IX86_BUILTIN_PUNPCKLWD512, UNKNOWN, (int) V32HI_FTYPE_V32HI_V32HI_V32HI_SI },
+ { OPTION_MASK_ISA_AVX512BW, CODE_FOR_avx512bw_pshufbv64qi3_mask, "__builtin_ia32_pshufb512_mask", IX86_BUILTIN_PSHUFB512, UNKNOWN, (int) V64QI_FTYPE_V64QI_V64QI_V64QI_DI },
+ { OPTION_MASK_ISA_AVX512BW, CODE_FOR_avx512bw_pshufhwv32hi_mask, "__builtin_ia32_pshufhw512_mask", IX86_BUILTIN_PSHUFHW512, UNKNOWN, (int) V32HI_FTYPE_V32HI_INT_V32HI_SI },
+ { OPTION_MASK_ISA_AVX512BW, CODE_FOR_avx512bw_pshuflwv32hi_mask, "__builtin_ia32_pshuflw512_mask", IX86_BUILTIN_PSHUFLW512, UNKNOWN, (int) V32HI_FTYPE_V32HI_INT_V32HI_SI },
+ { OPTION_MASK_ISA_AVX512BW, CODE_FOR_ashrv32hi3_mask, "__builtin_ia32_psrawi512_mask", IX86_BUILTIN_PSRAWI512, UNKNOWN, (int) V32HI_FTYPE_V32HI_INT_V32HI_SI },
+ { OPTION_MASK_ISA_AVX512BW, CODE_FOR_ashrv32hi3_mask, "__builtin_ia32_psraw512_mask", IX86_BUILTIN_PSRAW512, UNKNOWN, (int) V32HI_FTYPE_V32HI_V8HI_V32HI_SI },
+ { OPTION_MASK_ISA_AVX512BW, CODE_FOR_lshrv32hi3_mask, "__builtin_ia32_psrlwi512_mask", IX86_BUILTIN_PSRLWI512, UNKNOWN, (int) V32HI_FTYPE_V32HI_INT_V32HI_SI },
+ { OPTION_MASK_ISA_AVX512BW, CODE_FOR_lshrv32hi3_mask, "__builtin_ia32_psrlw512_mask", IX86_BUILTIN_PSRLW512, UNKNOWN, (int) V32HI_FTYPE_V32HI_V8HI_V32HI_SI },
+ { OPTION_MASK_ISA_AVX512BW, CODE_FOR_avx512bw_cvtb2maskv64qi, "__builtin_ia32_cvtb2mask512", IX86_BUILTIN_CVTB2MASK512, UNKNOWN, (int) DI_FTYPE_V64QI },
+ { OPTION_MASK_ISA_AVX512BW, CODE_FOR_avx512bw_cvtw2maskv32hi, "__builtin_ia32_cvtw2mask512", IX86_BUILTIN_CVTW2MASK512, UNKNOWN, (int) SI_FTYPE_V32HI },
+ { OPTION_MASK_ISA_AVX512BW, CODE_FOR_avx512bw_cvtmask2bv64qi, "__builtin_ia32_cvtmask2b512", IX86_BUILTIN_CVTMASK2B512, UNKNOWN, (int) V64QI_FTYPE_DI },
+ { OPTION_MASK_ISA_AVX512BW, CODE_FOR_avx512bw_cvtmask2wv32hi, "__builtin_ia32_cvtmask2w512", IX86_BUILTIN_CVTMASK2W512, UNKNOWN, (int) V32HI_FTYPE_SI },
+ { OPTION_MASK_ISA_AVX512BW, CODE_FOR_avx512bw_eqv64qi3_mask, "__builtin_ia32_pcmpeqb512_mask", IX86_BUILTIN_PCMPEQB512_MASK, UNKNOWN, (int) DI_FTYPE_V64QI_V64QI_DI },
+ { OPTION_MASK_ISA_AVX512BW, CODE_FOR_avx512bw_eqv32hi3_mask, "__builtin_ia32_pcmpeqw512_mask", IX86_BUILTIN_PCMPEQW512_MASK, UNKNOWN, (int) SI_FTYPE_V32HI_V32HI_SI },
+ { OPTION_MASK_ISA_AVX512BW, CODE_FOR_avx512bw_gtv64qi3_mask, "__builtin_ia32_pcmpgtb512_mask", IX86_BUILTIN_PCMPGTB512_MASK, UNKNOWN, (int) DI_FTYPE_V64QI_V64QI_DI },
+ { OPTION_MASK_ISA_AVX512BW, CODE_FOR_avx512bw_gtv32hi3_mask, "__builtin_ia32_pcmpgtw512_mask", IX86_BUILTIN_PCMPGTW512_MASK, UNKNOWN, (int) SI_FTYPE_V32HI_V32HI_SI },
+ { OPTION_MASK_ISA_AVX512BW, CODE_FOR_avx512bw_testmv64qi3_mask, "__builtin_ia32_ptestmb512", IX86_BUILTIN_PTESTMB512, UNKNOWN, (int) DI_FTYPE_V64QI_V64QI_DI },
+ { OPTION_MASK_ISA_AVX512BW, CODE_FOR_avx512bw_testmv32hi3_mask, "__builtin_ia32_ptestmw512", IX86_BUILTIN_PTESTMW512, UNKNOWN, (int) SI_FTYPE_V32HI_V32HI_SI },
+ { OPTION_MASK_ISA_AVX512BW, CODE_FOR_avx512bw_testnmv64qi3_mask, "__builtin_ia32_ptestnmb512", IX86_BUILTIN_PTESTNMB512, UNKNOWN, (int) DI_FTYPE_V64QI_V64QI_DI },
+ { OPTION_MASK_ISA_AVX512BW, CODE_FOR_avx512bw_testnmv32hi3_mask, "__builtin_ia32_ptestnmw512", IX86_BUILTIN_PTESTNMW512, UNKNOWN, (int) SI_FTYPE_V32HI_V32HI_SI },
+ { OPTION_MASK_ISA_AVX512BW, CODE_FOR_avx512bw_ashlvv32hi_mask, "__builtin_ia32_psllv32hi_mask", IX86_BUILTIN_PSLLVV32HI, UNKNOWN, (int) V32HI_FTYPE_V32HI_V32HI_V32HI_SI },
+ { OPTION_MASK_ISA_AVX512BW, CODE_FOR_absv64qi2_mask, "__builtin_ia32_pabsb512_mask", IX86_BUILTIN_PABSB512, UNKNOWN, (int) V64QI_FTYPE_V64QI_V64QI_DI },
+ { OPTION_MASK_ISA_AVX512BW, CODE_FOR_absv32hi2_mask, "__builtin_ia32_pabsw512_mask", IX86_BUILTIN_PABSW512, UNKNOWN, (int) V32HI_FTYPE_V32HI_V32HI_SI },
+ { OPTION_MASK_ISA_AVX512BW, CODE_FOR_avx512bw_blendmv32hi, "__builtin_ia32_blendmw_512_mask", IX86_BUILTIN_BLENDMW512, UNKNOWN, (int) V32HI_FTYPE_V32HI_V32HI_SI },
+ { OPTION_MASK_ISA_AVX512BW, CODE_FOR_avx512bw_blendmv64qi, "__builtin_ia32_blendmb_512_mask", IX86_BUILTIN_BLENDMB512, UNKNOWN, (int) V64QI_FTYPE_V64QI_V64QI_DI },
+ { OPTION_MASK_ISA_AVX512BW, CODE_FOR_avx512bw_cmpv64qi3_mask, "__builtin_ia32_cmpb512_mask", IX86_BUILTIN_CMPB512, UNKNOWN, (int) DI_FTYPE_V64QI_V64QI_INT_DI },
+ { OPTION_MASK_ISA_AVX512BW, CODE_FOR_avx512bw_cmpv32hi3_mask, "__builtin_ia32_cmpw512_mask", IX86_BUILTIN_CMPW512, UNKNOWN, (int) SI_FTYPE_V32HI_V32HI_INT_SI },
+ { OPTION_MASK_ISA_AVX512BW, CODE_FOR_avx512bw_ucmpv64qi3_mask, "__builtin_ia32_ucmpb512_mask", IX86_BUILTIN_UCMPB512, UNKNOWN, (int) DI_FTYPE_V64QI_V64QI_INT_DI },
+ { OPTION_MASK_ISA_AVX512BW, CODE_FOR_avx512bw_ucmpv32hi3_mask, "__builtin_ia32_ucmpw512_mask", IX86_BUILTIN_UCMPW512, UNKNOWN, (int) SI_FTYPE_V32HI_V32HI_INT_SI },
};
/* Builtins with rounding support. */
@@ -30245,7 +32339,7 @@ static const struct builtin_description bdesc_round_args[] =
{ OPTION_MASK_ISA_AVX512F, CODE_FOR_floatv16siv16sf2_mask_round, "__builtin_ia32_cvtdq2ps512_mask", IX86_BUILTIN_CVTDQ2PS512, UNKNOWN, (int) V16SF_FTYPE_V16SI_V16SF_HI_INT },
{ OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_cvtpd2dq512_mask_round, "__builtin_ia32_cvtpd2dq512_mask", IX86_BUILTIN_CVTPD2DQ512, UNKNOWN, (int) V8SI_FTYPE_V8DF_V8SI_QI_INT },
{ OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_cvtpd2ps512_mask_round, "__builtin_ia32_cvtpd2ps512_mask", IX86_BUILTIN_CVTPD2PS512, UNKNOWN, (int) V8SF_FTYPE_V8DF_V8SF_QI_INT },
- { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_ufix_notruncv8dfv8si_mask_round, "__builtin_ia32_cvtpd2udq512_mask", IX86_BUILTIN_CVTPD2UDQ512, UNKNOWN, (int) V8SI_FTYPE_V8DF_V8SI_QI_INT },
+ { OPTION_MASK_ISA_AVX512F, CODE_FOR_ufix_notruncv8dfv8si2_mask_round, "__builtin_ia32_cvtpd2udq512_mask", IX86_BUILTIN_CVTPD2UDQ512, UNKNOWN, (int) V8SI_FTYPE_V8DF_V8SI_QI_INT },
{ OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vcvtph2ps512_mask_round, "__builtin_ia32_vcvtph2ps512_mask", IX86_BUILTIN_CVTPH2PS512, UNKNOWN, (int) V16SF_FTYPE_V16HI_V16SF_HI_INT },
{ OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_fix_notruncv16sfv16si_mask_round, "__builtin_ia32_cvtps2dq512_mask", IX86_BUILTIN_CVTPS2DQ512, UNKNOWN, (int) V16SI_FTYPE_V16SF_V16SI_HI_INT },
{ OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_cvtps2pd512_mask_round, "__builtin_ia32_cvtps2pd512_mask", IX86_BUILTIN_CVTPS2PD512, UNKNOWN, (int) V8DF_FTYPE_V8SF_V8DF_QI_INT },
@@ -30281,8 +32375,8 @@ static const struct builtin_description bdesc_round_args[] =
{ OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_sgetexpv4sf_round, "__builtin_ia32_getexpss128_round", IX86_BUILTIN_GETEXPSS128, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_INT },
{ OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_getmantv8df_mask_round, "__builtin_ia32_getmantpd512_mask", IX86_BUILTIN_GETMANTPD512, UNKNOWN, (int) V8DF_FTYPE_V8DF_INT_V8DF_QI_INT },
{ OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_getmantv16sf_mask_round, "__builtin_ia32_getmantps512_mask", IX86_BUILTIN_GETMANTPS512, UNKNOWN, (int) V16SF_FTYPE_V16SF_INT_V16SF_HI_INT },
- { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_getmantv2df_round, "__builtin_ia32_getmantsd_round", IX86_BUILTIN_GETMANTSD128, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_INT_INT },
- { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_getmantv4sf_round, "__builtin_ia32_getmantss_round", IX86_BUILTIN_GETMANTSS128, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_INT_INT },
+ { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vgetmantv2df_round, "__builtin_ia32_getmantsd_round", IX86_BUILTIN_GETMANTSD128, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_INT_INT },
+ { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vgetmantv4sf_round, "__builtin_ia32_getmantss_round", IX86_BUILTIN_GETMANTSS128, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_INT_INT },
{ OPTION_MASK_ISA_AVX512F, CODE_FOR_smaxv8df3_mask_round, "__builtin_ia32_maxpd512_mask", IX86_BUILTIN_MAXPD512, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_V8DF_QI_INT },
{ OPTION_MASK_ISA_AVX512F, CODE_FOR_smaxv16sf3_mask_round, "__builtin_ia32_maxps512_mask", IX86_BUILTIN_MAXPS512, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_V16SF_HI_INT },
{ OPTION_MASK_ISA_AVX512F, CODE_FOR_sse2_vmsmaxv2df3_round, "__builtin_ia32_maxsd_round", IX86_BUILTIN_MAXSD_ROUND, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_INT },
@@ -30363,6 +32457,24 @@ static const struct builtin_description bdesc_round_args[] =
{ OPTION_MASK_ISA_AVX512ER, CODE_FOR_avx512er_rsqrt28v16sf_mask_round, "__builtin_ia32_rsqrt28ps_mask", IX86_BUILTIN_RSQRT28PS, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_HI_INT },
{ OPTION_MASK_ISA_AVX512ER, CODE_FOR_avx512er_vmrsqrt28v2df_round, "__builtin_ia32_rsqrt28sd_round", IX86_BUILTIN_RSQRT28SD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_INT },
{ OPTION_MASK_ISA_AVX512ER, CODE_FOR_avx512er_vmrsqrt28v4sf_round, "__builtin_ia32_rsqrt28ss_round", IX86_BUILTIN_RSQRT28SS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_INT },
+
+ /* AVX512DQ. */
+ { OPTION_MASK_ISA_AVX512DQ, CODE_FOR_avx512dq_rangesv2df_round, "__builtin_ia32_rangesd128_round", IX86_BUILTIN_RANGESD128, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_INT_INT },
+ { OPTION_MASK_ISA_AVX512DQ, CODE_FOR_avx512dq_rangesv4sf_round, "__builtin_ia32_rangess128_round", IX86_BUILTIN_RANGESS128, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_INT_INT },
+ { OPTION_MASK_ISA_AVX512DQ, CODE_FOR_fix_notruncv8dfv8di2_mask_round, "__builtin_ia32_cvtpd2qq512_mask", IX86_BUILTIN_CVTPD2QQ512, UNKNOWN, (int) V8DI_FTYPE_V8DF_V8DI_QI_INT },
+ { OPTION_MASK_ISA_AVX512DQ, CODE_FOR_avx512dq_cvtps2qqv8di_mask_round, "__builtin_ia32_cvtps2qq512_mask", IX86_BUILTIN_CVTPS2QQ512, UNKNOWN, (int) V8DI_FTYPE_V8SF_V8DI_QI_INT },
+ { OPTION_MASK_ISA_AVX512DQ, CODE_FOR_ufix_notruncv8dfv8di2_mask_round, "__builtin_ia32_cvtpd2uqq512_mask", IX86_BUILTIN_CVTPD2UQQ512, UNKNOWN, (int) V8DI_FTYPE_V8DF_V8DI_QI_INT },
+ { OPTION_MASK_ISA_AVX512DQ, CODE_FOR_avx512dq_cvtps2uqqv8di_mask_round, "__builtin_ia32_cvtps2uqq512_mask", IX86_BUILTIN_CVTPS2UQQ512, UNKNOWN, (int) V8DI_FTYPE_V8SF_V8DI_QI_INT },
+ { OPTION_MASK_ISA_AVX512DQ, CODE_FOR_floatv8div8sf2_mask_round, "__builtin_ia32_cvtqq2ps512_mask", IX86_BUILTIN_CVTQQ2PS512, UNKNOWN, (int) V8SF_FTYPE_V8DI_V8SF_QI_INT },
+ { OPTION_MASK_ISA_AVX512DQ, CODE_FOR_ufloatv8div8sf2_mask_round, "__builtin_ia32_cvtuqq2ps512_mask", IX86_BUILTIN_CVTUQQ2PS512, UNKNOWN, (int) V8SF_FTYPE_V8DI_V8SF_QI_INT },
+ { OPTION_MASK_ISA_AVX512DQ, CODE_FOR_floatv8div8df2_mask_round, "__builtin_ia32_cvtqq2pd512_mask", IX86_BUILTIN_CVTQQ2PD512, UNKNOWN, (int) V8DF_FTYPE_V8DI_V8DF_QI_INT },
+ { OPTION_MASK_ISA_AVX512DQ, CODE_FOR_ufloatv8div8df2_mask_round, "__builtin_ia32_cvtuqq2pd512_mask", IX86_BUILTIN_CVTUQQ2PD512, UNKNOWN, (int) V8DF_FTYPE_V8DI_V8DF_QI_INT },
+ { OPTION_MASK_ISA_AVX512DQ, CODE_FOR_fix_truncv8sfv8di2_mask_round, "__builtin_ia32_cvttps2qq512_mask", IX86_BUILTIN_CVTTPS2QQ512, UNKNOWN, (int) V8DI_FTYPE_V8SF_V8DI_QI_INT },
+ { OPTION_MASK_ISA_AVX512DQ, CODE_FOR_ufix_truncv8sfv8di2_mask_round, "__builtin_ia32_cvttps2uqq512_mask", IX86_BUILTIN_CVTTPS2UQQ512, UNKNOWN, (int) V8DI_FTYPE_V8SF_V8DI_QI_INT },
+ { OPTION_MASK_ISA_AVX512DQ, CODE_FOR_fix_truncv8dfv8di2_mask_round, "__builtin_ia32_cvttpd2qq512_mask", IX86_BUILTIN_CVTTPD2QQ512, UNKNOWN, (int) V8DI_FTYPE_V8DF_V8DI_QI_INT },
+ { OPTION_MASK_ISA_AVX512DQ, CODE_FOR_ufix_truncv8dfv8di2_mask_round, "__builtin_ia32_cvttpd2uqq512_mask", IX86_BUILTIN_CVTTPD2UQQ512, UNKNOWN, (int) V8DI_FTYPE_V8DF_V8DI_QI_INT },
+ { OPTION_MASK_ISA_AVX512DQ, CODE_FOR_avx512dq_rangepv16sf_mask_round, "__builtin_ia32_rangeps512_mask", IX86_BUILTIN_RANGEPS512, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_INT_V16SF_HI_INT },
+ { OPTION_MASK_ISA_AVX512DQ, CODE_FOR_avx512dq_rangepv8df_mask_round, "__builtin_ia32_rangepd512_mask", IX86_BUILTIN_RANGEPD512, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_INT_V8DF_QI_INT },
};
/* FMA4 and XOP. */
@@ -31070,6 +33182,151 @@ ix86_init_mmx_sse_builtins (void)
VOID_FTYPE_PLONGLONG_QI_V8DI_V8DI_INT,
IX86_BUILTIN_SCATTERDIV8DI);
+ /* AVX512VL */
+ def_builtin (OPTION_MASK_ISA_AVX512VL, "__builtin_ia32_gather3siv2df",
+ V2DF_FTYPE_V2DF_PCDOUBLE_V4SI_QI_INT,
+ IX86_BUILTIN_GATHER3SIV2DF);
+
+ def_builtin (OPTION_MASK_ISA_AVX512VL, "__builtin_ia32_gather3siv4df",
+ V4DF_FTYPE_V4DF_PCDOUBLE_V4SI_QI_INT,
+ IX86_BUILTIN_GATHER3SIV4DF);
+
+ def_builtin (OPTION_MASK_ISA_AVX512VL, "__builtin_ia32_gather3div2df",
+ V2DF_FTYPE_V2DF_PCDOUBLE_V2DI_QI_INT,
+ IX86_BUILTIN_GATHER3DIV2DF);
+
+ def_builtin (OPTION_MASK_ISA_AVX512VL, "__builtin_ia32_gather3div4df",
+ V4DF_FTYPE_V4DF_PCDOUBLE_V4DI_QI_INT,
+ IX86_BUILTIN_GATHER3DIV4DF);
+
+ def_builtin (OPTION_MASK_ISA_AVX512VL, "__builtin_ia32_gather3siv4sf",
+ V4SF_FTYPE_V4SF_PCFLOAT_V4SI_QI_INT,
+ IX86_BUILTIN_GATHER3SIV4SF);
+
+ def_builtin (OPTION_MASK_ISA_AVX512VL, "__builtin_ia32_gather3siv8sf",
+ V8SF_FTYPE_V8SF_PCFLOAT_V8SI_QI_INT,
+ IX86_BUILTIN_GATHER3SIV8SF);
+
+ def_builtin (OPTION_MASK_ISA_AVX512VL, "__builtin_ia32_gather3div4sf",
+ V4SF_FTYPE_V4SF_PCFLOAT_V2DI_QI_INT,
+ IX86_BUILTIN_GATHER3DIV4SF);
+
+ def_builtin (OPTION_MASK_ISA_AVX512VL, "__builtin_ia32_gather3div8sf",
+ V4SF_FTYPE_V4SF_PCFLOAT_V4DI_QI_INT,
+ IX86_BUILTIN_GATHER3DIV8SF);
+
+ def_builtin (OPTION_MASK_ISA_AVX512VL, "__builtin_ia32_gather3siv2di",
+ V2DI_FTYPE_V2DI_PCINT64_V4SI_QI_INT,
+ IX86_BUILTIN_GATHER3SIV2DI);
+
+ def_builtin (OPTION_MASK_ISA_AVX512VL, "__builtin_ia32_gather3siv4di",
+ V4DI_FTYPE_V4DI_PCINT64_V4SI_QI_INT,
+ IX86_BUILTIN_GATHER3SIV4DI);
+
+ def_builtin (OPTION_MASK_ISA_AVX512VL, "__builtin_ia32_gather3div2di",
+ V2DI_FTYPE_V2DI_PCINT64_V2DI_QI_INT,
+ IX86_BUILTIN_GATHER3DIV2DI);
+
+ def_builtin (OPTION_MASK_ISA_AVX512VL, "__builtin_ia32_gather3div4di",
+ V4DI_FTYPE_V4DI_PCINT64_V4DI_QI_INT,
+ IX86_BUILTIN_GATHER3DIV4DI);
+
+ def_builtin (OPTION_MASK_ISA_AVX512VL, "__builtin_ia32_gather3siv4si",
+ V4SI_FTYPE_V4SI_PCINT_V4SI_QI_INT,
+ IX86_BUILTIN_GATHER3SIV4SI);
+
+ def_builtin (OPTION_MASK_ISA_AVX512VL, "__builtin_ia32_gather3siv8si",
+ V8SI_FTYPE_V8SI_PCINT_V8SI_QI_INT,
+ IX86_BUILTIN_GATHER3SIV8SI);
+
+ def_builtin (OPTION_MASK_ISA_AVX512VL, "__builtin_ia32_gather3div4si",
+ V4SI_FTYPE_V4SI_PCINT_V2DI_QI_INT,
+ IX86_BUILTIN_GATHER3DIV4SI);
+
+ def_builtin (OPTION_MASK_ISA_AVX512VL, "__builtin_ia32_gather3div8si",
+ V4SI_FTYPE_V4SI_PCINT_V4DI_QI_INT,
+ IX86_BUILTIN_GATHER3DIV8SI);
+
+ def_builtin (OPTION_MASK_ISA_AVX512VL, "__builtin_ia32_gather3altsiv4df ",
+ V4DF_FTYPE_V4DF_PCDOUBLE_V8SI_QI_INT,
+ IX86_BUILTIN_GATHER3ALTSIV4DF);
+
+ def_builtin (OPTION_MASK_ISA_AVX512VL, "__builtin_ia32_gather3altdiv8sf ",
+ V8SF_FTYPE_V8SF_PCFLOAT_V4DI_QI_INT,
+ IX86_BUILTIN_GATHER3ALTDIV8SF);
+
+ def_builtin (OPTION_MASK_ISA_AVX512VL, "__builtin_ia32_gather3altsiv4di ",
+ V4DI_FTYPE_V4DI_PCINT64_V8SI_QI_INT,
+ IX86_BUILTIN_GATHER3ALTSIV4DI);
+
+ def_builtin (OPTION_MASK_ISA_AVX512VL, "__builtin_ia32_gather3altdiv8si ",
+ V8SI_FTYPE_V8SI_PCINT_V4DI_QI_INT,
+ IX86_BUILTIN_GATHER3ALTDIV8SI);
+
+ def_builtin (OPTION_MASK_ISA_AVX512VL, "__builtin_ia32_scattersiv8sf",
+ VOID_FTYPE_PFLOAT_QI_V8SI_V8SF_INT,
+ IX86_BUILTIN_SCATTERSIV8SF);
+
+ def_builtin (OPTION_MASK_ISA_AVX512VL, "__builtin_ia32_scattersiv4sf",
+ VOID_FTYPE_PFLOAT_QI_V4SI_V4SF_INT,
+ IX86_BUILTIN_SCATTERSIV4SF);
+
+ def_builtin (OPTION_MASK_ISA_AVX512VL, "__builtin_ia32_scattersiv4df",
+ VOID_FTYPE_PDOUBLE_QI_V4SI_V4DF_INT,
+ IX86_BUILTIN_SCATTERSIV4DF);
+
+ def_builtin (OPTION_MASK_ISA_AVX512VL, "__builtin_ia32_scattersiv2df",
+ VOID_FTYPE_PDOUBLE_QI_V4SI_V2DF_INT,
+ IX86_BUILTIN_SCATTERSIV2DF);
+
+ def_builtin (OPTION_MASK_ISA_AVX512VL, "__builtin_ia32_scatterdiv8sf",
+ VOID_FTYPE_PFLOAT_QI_V4DI_V4SF_INT,
+ IX86_BUILTIN_SCATTERDIV8SF);
+
+ def_builtin (OPTION_MASK_ISA_AVX512VL, "__builtin_ia32_scatterdiv4sf",
+ VOID_FTYPE_PFLOAT_QI_V2DI_V4SF_INT,
+ IX86_BUILTIN_SCATTERDIV4SF);
+
+ def_builtin (OPTION_MASK_ISA_AVX512VL, "__builtin_ia32_scatterdiv4df",
+ VOID_FTYPE_PDOUBLE_QI_V4DI_V4DF_INT,
+ IX86_BUILTIN_SCATTERDIV4DF);
+
+ def_builtin (OPTION_MASK_ISA_AVX512VL, "__builtin_ia32_scatterdiv2df",
+ VOID_FTYPE_PDOUBLE_QI_V2DI_V2DF_INT,
+ IX86_BUILTIN_SCATTERDIV2DF);
+
+ def_builtin (OPTION_MASK_ISA_AVX512VL, "__builtin_ia32_scattersiv8si",
+ VOID_FTYPE_PINT_QI_V8SI_V8SI_INT,
+ IX86_BUILTIN_SCATTERSIV8SI);
+
+ def_builtin (OPTION_MASK_ISA_AVX512VL, "__builtin_ia32_scattersiv4si",
+ VOID_FTYPE_PINT_QI_V4SI_V4SI_INT,
+ IX86_BUILTIN_SCATTERSIV4SI);
+
+ def_builtin (OPTION_MASK_ISA_AVX512VL, "__builtin_ia32_scattersiv4di",
+ VOID_FTYPE_PLONGLONG_QI_V4SI_V4DI_INT,
+ IX86_BUILTIN_SCATTERSIV4DI);
+
+ def_builtin (OPTION_MASK_ISA_AVX512VL, "__builtin_ia32_scattersiv2di",
+ VOID_FTYPE_PLONGLONG_QI_V4SI_V2DI_INT,
+ IX86_BUILTIN_SCATTERSIV2DI);
+
+ def_builtin (OPTION_MASK_ISA_AVX512VL, "__builtin_ia32_scatterdiv8si",
+ VOID_FTYPE_PINT_QI_V4DI_V4SI_INT,
+ IX86_BUILTIN_SCATTERDIV8SI);
+
+ def_builtin (OPTION_MASK_ISA_AVX512VL, "__builtin_ia32_scatterdiv4si",
+ VOID_FTYPE_PINT_QI_V2DI_V4SI_INT,
+ IX86_BUILTIN_SCATTERDIV4SI);
+
+ def_builtin (OPTION_MASK_ISA_AVX512VL, "__builtin_ia32_scatterdiv4di",
+ VOID_FTYPE_PLONGLONG_QI_V4DI_V4DI_INT,
+ IX86_BUILTIN_SCATTERDIV4DI);
+
+ def_builtin (OPTION_MASK_ISA_AVX512VL, "__builtin_ia32_scatterdiv2di",
+ VOID_FTYPE_PLONGLONG_QI_V2DI_V2DI_INT,
+ IX86_BUILTIN_SCATTERDIV2DI);
+
/* AVX512PF */
def_builtin (OPTION_MASK_ISA_AVX512PF, "__builtin_ia32_gatherpfdpd",
VOID_FTYPE_QI_V8SI_PCINT64_INT_INT,
@@ -33632,6 +35889,28 @@ ix86_expand_args_builtin (const struct builtin_description *d,
case V4DI_FTYPE_V4SI:
case V4DI_FTYPE_V2DI:
case HI_FTYPE_HI:
+ case HI_FTYPE_V16QI:
+ case SI_FTYPE_V32QI:
+ case DI_FTYPE_V64QI:
+ case V16QI_FTYPE_HI:
+ case V32QI_FTYPE_SI:
+ case V64QI_FTYPE_DI:
+ case V8HI_FTYPE_QI:
+ case V16HI_FTYPE_HI:
+ case V32HI_FTYPE_SI:
+ case V4SI_FTYPE_QI:
+ case V8SI_FTYPE_QI:
+ case V4SI_FTYPE_HI:
+ case V8SI_FTYPE_HI:
+ case QI_FTYPE_V8HI:
+ case HI_FTYPE_V16HI:
+ case SI_FTYPE_V32HI:
+ case QI_FTYPE_V4SI:
+ case QI_FTYPE_V8SI:
+ case HI_FTYPE_V16SI:
+ case QI_FTYPE_V2DI:
+ case QI_FTYPE_V4DI:
+ case QI_FTYPE_V8DI:
case UINT_FTYPE_V2DF:
case UINT_FTYPE_V4SF:
case UINT64_FTYPE_V2DF:
@@ -33639,6 +35918,8 @@ ix86_expand_args_builtin (const struct builtin_description *d,
case V16QI_FTYPE_V8DI:
case V16HI_FTYPE_V16SI:
case V16SI_FTYPE_HI:
+ case V2DI_FTYPE_QI:
+ case V4DI_FTYPE_QI:
case V16SI_FTYPE_V16SI:
case V16SI_FTYPE_INT:
case V16SF_FTYPE_FLOAT:
@@ -33647,7 +35928,6 @@ ix86_expand_args_builtin (const struct builtin_description *d,
case V8HI_FTYPE_V8DI:
case V8UHI_FTYPE_V8UHI:
case V8SI_FTYPE_V8DI:
- case V8USI_FTYPE_V8USI:
case V8SF_FTYPE_V8DF:
case V8DI_FTYPE_QI:
case V8DI_FTYPE_INT64:
@@ -33727,6 +36007,7 @@ ix86_expand_args_builtin (const struct builtin_description *d,
case V4DI_FTYPE_V8SI_V8SI:
case V4UDI_FTYPE_V8USI_V8USI:
case QI_FTYPE_V8DI_V8DI:
+ case V8DI_FTYPE_V64QI_V64QI:
case HI_FTYPE_V16SI_V16SI:
if (comparison == UNKNOWN)
return ix86_expand_binop_builtin (icode, exp, target);
@@ -33766,6 +36047,8 @@ ix86_expand_args_builtin (const struct builtin_description *d,
case UINT16_FTYPE_UINT16_INT:
case UINT8_FTYPE_UINT8_INT:
case HI_FTYPE_HI_HI:
+ case SI_FTYPE_SI_SI:
+ case DI_FTYPE_DI_DI:
case V16SI_FTYPE_V8DF_V8DF:
nargs = 2;
break;
@@ -33779,6 +36062,11 @@ ix86_expand_args_builtin (const struct builtin_description *d,
rmode = V2TImode;
nargs_constant = 1;
break;
+ case V8DI_FTYPE_V8DI_INT_CONVERT:
+ nargs = 2;
+ rmode = V4TImode;
+ nargs_constant = 1;
+ break;
case V8HI_FTYPE_V8HI_INT:
case V8HI_FTYPE_V8SF_INT:
case V16HI_FTYPE_V16SF_INT:
@@ -33804,6 +36092,8 @@ ix86_expand_args_builtin (const struct builtin_description *d,
case V2DI_FTYPE_V4DI_INT:
case V4DI_FTYPE_V8DI_INT:
case HI_FTYPE_HI_INT:
+ case QI_FTYPE_V4SF_INT:
+ case QI_FTYPE_V2DF_INT:
nargs = 2;
nargs_constant = 1;
break;
@@ -33828,20 +36118,118 @@ ix86_expand_args_builtin (const struct builtin_description *d,
case V16SI_FTYPE_V16HI_V16SI_HI:
case V16SI_FTYPE_V16QI_V16SI_HI:
case V16SI_FTYPE_V16SF_V16SI_HI:
+ case V8SF_FTYPE_V4SF_V8SF_QI:
+ case V4DF_FTYPE_V2DF_V4DF_QI:
+ case V8SI_FTYPE_V4SI_V8SI_QI:
+ case V8SI_FTYPE_SI_V8SI_QI:
+ case V4SI_FTYPE_V4SI_V4SI_QI:
+ case V4SI_FTYPE_SI_V4SI_QI:
+ case V4DI_FTYPE_V2DI_V4DI_QI:
+ case V4DI_FTYPE_DI_V4DI_QI:
+ case V2DI_FTYPE_V2DI_V2DI_QI:
+ case V2DI_FTYPE_DI_V2DI_QI:
+ case V64QI_FTYPE_V64QI_V64QI_DI:
+ case V64QI_FTYPE_V16QI_V64QI_DI:
+ case V64QI_FTYPE_QI_V64QI_DI:
+ case V32QI_FTYPE_V32QI_V32QI_SI:
+ case V32QI_FTYPE_V16QI_V32QI_SI:
+ case V32QI_FTYPE_QI_V32QI_SI:
+ case V16QI_FTYPE_V16QI_V16QI_HI:
+ case V16QI_FTYPE_QI_V16QI_HI:
+ case V32HI_FTYPE_V8HI_V32HI_SI:
+ case V32HI_FTYPE_HI_V32HI_SI:
+ case V16HI_FTYPE_V8HI_V16HI_HI:
+ case V16HI_FTYPE_HI_V16HI_HI:
+ case V8HI_FTYPE_V8HI_V8HI_QI:
+ case V8HI_FTYPE_HI_V8HI_QI:
+ case V8SF_FTYPE_V8HI_V8SF_QI:
+ case V4SF_FTYPE_V8HI_V4SF_QI:
+ case V8SI_FTYPE_V8SF_V8SI_QI:
+ case V4SI_FTYPE_V4SF_V4SI_QI:
+ case V8DI_FTYPE_V8SF_V8DI_QI:
+ case V4DI_FTYPE_V4SF_V4DI_QI:
+ case V2DI_FTYPE_V4SF_V2DI_QI:
+ case V8SF_FTYPE_V8DI_V8SF_QI:
+ case V4SF_FTYPE_V4DI_V4SF_QI:
+ case V4SF_FTYPE_V2DI_V4SF_QI:
+ case V8DF_FTYPE_V8DI_V8DF_QI:
+ case V4DF_FTYPE_V4DI_V4DF_QI:
+ case V2DF_FTYPE_V2DI_V2DF_QI:
+ case V16QI_FTYPE_V8HI_V16QI_QI:
+ case V16QI_FTYPE_V16HI_V16QI_HI:
+ case V16QI_FTYPE_V4SI_V16QI_QI:
+ case V16QI_FTYPE_V8SI_V16QI_QI:
+ case V8HI_FTYPE_V4SI_V8HI_QI:
+ case V8HI_FTYPE_V8SI_V8HI_QI:
+ case V16QI_FTYPE_V2DI_V16QI_QI:
+ case V16QI_FTYPE_V4DI_V16QI_QI:
+ case V8HI_FTYPE_V2DI_V8HI_QI:
+ case V8HI_FTYPE_V4DI_V8HI_QI:
+ case V4SI_FTYPE_V2DI_V4SI_QI:
+ case V4SI_FTYPE_V4DI_V4SI_QI:
+ case V32QI_FTYPE_V32HI_V32QI_SI:
+ case HI_FTYPE_V16QI_V16QI_HI:
+ case SI_FTYPE_V32QI_V32QI_SI:
+ case DI_FTYPE_V64QI_V64QI_DI:
+ case QI_FTYPE_V8HI_V8HI_QI:
+ case HI_FTYPE_V16HI_V16HI_HI:
+ case SI_FTYPE_V32HI_V32HI_SI:
+ case QI_FTYPE_V4SI_V4SI_QI:
+ case QI_FTYPE_V8SI_V8SI_QI:
+ case QI_FTYPE_V2DI_V2DI_QI:
+ case QI_FTYPE_V4DI_V4DI_QI:
+ case V4SF_FTYPE_V2DF_V4SF_QI:
+ case V4SF_FTYPE_V4DF_V4SF_QI:
+ nargs = 3;
case V16SI_FTYPE_V16SI_V16SI_HI:
case V16SI_FTYPE_V16SI_V16SI_V16SI:
case V16SI_FTYPE_V4SI_V16SI_HI:
case V2DI_FTYPE_V2DI_V2DI_V2DI:
+ case V2DI_FTYPE_V4SI_V2DI_QI:
+ case V2DI_FTYPE_V8HI_V2DI_QI:
+ case V2DI_FTYPE_V16QI_V2DI_QI:
+ case V4DI_FTYPE_V4DI_V4DI_QI:
+ case V4DI_FTYPE_V4SI_V4DI_QI:
+ case V4DI_FTYPE_V8HI_V4DI_QI:
+ case V4DI_FTYPE_V16QI_V4DI_QI:
+ case V8DI_FTYPE_V8DF_V8DI_QI:
+ case V4DI_FTYPE_V4DF_V4DI_QI:
+ case V2DI_FTYPE_V2DF_V2DI_QI:
+ case V4SI_FTYPE_V4DF_V4SI_QI:
+ case V4SI_FTYPE_V2DF_V4SI_QI:
+ case V4SI_FTYPE_V8HI_V4SI_QI:
+ case V4SI_FTYPE_V16QI_V4SI_QI:
+ case V8SI_FTYPE_V8SI_V8SI_V8SI:
case V4DI_FTYPE_V4DI_V4DI_V4DI:
case V8DF_FTYPE_V2DF_V8DF_QI:
case V8DF_FTYPE_V4DF_V8DF_QI:
case V8DF_FTYPE_V8DF_V8DF_QI:
case V8DF_FTYPE_V8DF_V8DF_V8DF:
+ case V8SF_FTYPE_V8SF_V8SF_QI:
+ case V8SF_FTYPE_V8SI_V8SF_QI:
+ case V4DF_FTYPE_V4DF_V4DF_QI:
+ case V4SF_FTYPE_V4SF_V4SF_QI:
+ case V2DF_FTYPE_V2DF_V2DF_QI:
+ case V2DF_FTYPE_V4SF_V2DF_QI:
+ case V2DF_FTYPE_V4SI_V2DF_QI:
+ case V4SF_FTYPE_V4SI_V4SF_QI:
+ case V4DF_FTYPE_V4SF_V4DF_QI:
+ case V4DF_FTYPE_V4SI_V4DF_QI:
+ case V8SI_FTYPE_V8SI_V8SI_QI:
+ case V8SI_FTYPE_V8HI_V8SI_QI:
+ case V8SI_FTYPE_V16QI_V8SI_QI:
case V8DF_FTYPE_V8DF_V8DI_V8DF:
case V8DF_FTYPE_V8DI_V8DF_V8DF:
case V8DF_FTYPE_V8SF_V8DF_QI:
case V8DF_FTYPE_V8SI_V8DF_QI:
case V8DI_FTYPE_DI_V8DI_QI:
+ case V16SF_FTYPE_V8SF_V16SF_HI:
+ case V16SI_FTYPE_V8SI_V16SI_HI:
+ case V16HI_FTYPE_V16HI_V16HI_HI:
+ case V8HI_FTYPE_V16QI_V8HI_QI:
+ case V16HI_FTYPE_V16QI_V16HI_HI:
+ case V32HI_FTYPE_V32HI_V32HI_SI:
+ case V32HI_FTYPE_V32QI_V32HI_SI:
case V8DI_FTYPE_V16QI_V8DI_QI:
case V8DI_FTYPE_V2DI_V8DI_QI:
case V8DI_FTYPE_V4DI_V8DI_QI:
@@ -33903,13 +36291,80 @@ ix86_expand_args_builtin (const struct builtin_description *d,
nargs = 3;
nargs_constant = 2;
break;
+ case V8DI_FTYPE_V8DI_V8DI_INT_CONVERT:
+ nargs = 3;
+ rmode = V8DImode;
+ nargs_constant = 1;
+ break;
+ case V8DI_FTYPE_V8DI_V8DI_INT_V8DI_DI_CONVERT:
+ nargs = 5;
+ rmode = V8DImode;
+ mask_pos = 2;
+ nargs_constant = 1;
+ break;
+ case QI_FTYPE_V8DF_INT_QI:
+ case QI_FTYPE_V4DF_INT_QI:
+ case QI_FTYPE_V2DF_INT_QI:
+ case HI_FTYPE_V16SF_INT_HI:
+ case QI_FTYPE_V8SF_INT_QI:
+ case QI_FTYPE_V4SF_INT_QI:
+ nargs = 3;
+ mask_pos = 1;
+ nargs_constant = 1;
+ break;
+ case V4DI_FTYPE_V4DI_V4DI_INT_V4DI_SI_CONVERT:
+ nargs = 5;
+ rmode = V4DImode;
+ mask_pos = 2;
+ nargs_constant = 1;
+ break;
+ case V2DI_FTYPE_V2DI_V2DI_INT_V2DI_HI_CONVERT:
+ nargs = 5;
+ rmode = V2DImode;
+ mask_pos = 2;
+ nargs_constant = 1;
+ break;
+ case V32QI_FTYPE_V32QI_V32QI_V32QI_SI:
+ case V32HI_FTYPE_V32HI_V32HI_V32HI_SI:
+ case V32HI_FTYPE_V64QI_V64QI_V32HI_SI:
+ case V16SI_FTYPE_V32HI_V32HI_V16SI_HI:
+ case V64QI_FTYPE_V64QI_V64QI_V64QI_DI:
+ case V32HI_FTYPE_V32HI_V8HI_V32HI_SI:
+ case V16HI_FTYPE_V16HI_V8HI_V16HI_HI:
+ case V8SI_FTYPE_V8SI_V4SI_V8SI_QI:
+ case V4DI_FTYPE_V4DI_V2DI_V4DI_QI:
+ case V64QI_FTYPE_V32HI_V32HI_V64QI_DI:
+ case V32QI_FTYPE_V16HI_V16HI_V32QI_SI:
+ case V16QI_FTYPE_V8HI_V8HI_V16QI_HI:
+ case V32HI_FTYPE_V16SI_V16SI_V32HI_SI:
+ case V16HI_FTYPE_V8SI_V8SI_V16HI_HI:
+ case V8HI_FTYPE_V4SI_V4SI_V8HI_QI:
+ case V4DF_FTYPE_V4DF_V4DI_V4DF_QI:
+ case V8SF_FTYPE_V8SF_V8SI_V8SF_QI:
+ case V4SF_FTYPE_V4SF_V4SI_V4SF_QI:
+ case V2DF_FTYPE_V2DF_V2DI_V2DF_QI:
+ case V2DI_FTYPE_V4SI_V4SI_V2DI_QI:
+ case V4DI_FTYPE_V8SI_V8SI_V4DI_QI:
+ case V4DF_FTYPE_V4DI_V4DF_V4DF_QI:
+ case V8SF_FTYPE_V8SI_V8SF_V8SF_QI:
+ case V2DF_FTYPE_V2DI_V2DF_V2DF_QI:
+ case V4SF_FTYPE_V4SI_V4SF_V4SF_QI:
case V16SF_FTYPE_V16SF_V16SF_V16SF_HI:
case V16SF_FTYPE_V16SF_V16SI_V16SF_HI:
case V16SF_FTYPE_V16SI_V16SF_V16SF_HI:
case V16SI_FTYPE_V16SI_V16SI_V16SI_HI:
case V16SI_FTYPE_V16SI_V4SI_V16SI_HI:
+ case V8HI_FTYPE_V8HI_V8HI_V8HI_QI:
+ case V8SI_FTYPE_V8SI_V8SI_V8SI_QI:
+ case V4SI_FTYPE_V4SI_V4SI_V4SI_QI:
+ case V8SF_FTYPE_V8SF_V8SF_V8SF_QI:
+ case V16QI_FTYPE_V16QI_V16QI_V16QI_HI:
+ case V16HI_FTYPE_V16HI_V16HI_V16HI_HI:
+ case V2DI_FTYPE_V2DI_V2DI_V2DI_QI:
case V2DF_FTYPE_V2DF_V2DF_V2DF_QI:
case V2DF_FTYPE_V2DF_V4SF_V2DF_QI:
+ case V4DI_FTYPE_V4DI_V4DI_V4DI_QI:
+ case V4DF_FTYPE_V4DF_V4DF_V4DF_QI:
case V4SF_FTYPE_V4SF_V2DF_V4SF_QI:
case V4SF_FTYPE_V4SF_V4SF_V4SF_QI:
case V8DF_FTYPE_V8DF_V8DF_V8DF_QI:
@@ -33919,6 +36374,10 @@ ix86_expand_args_builtin (const struct builtin_description *d,
case V8DI_FTYPE_V8DI_SI_V8DI_V8DI:
case V8DI_FTYPE_V8DI_V2DI_V8DI_QI:
case V8DI_FTYPE_V8DI_V8DI_V8DI_QI:
+ case V8HI_FTYPE_V16QI_V16QI_V8HI_QI:
+ case V16HI_FTYPE_V32QI_V32QI_V16HI_HI:
+ case V8SI_FTYPE_V16HI_V16HI_V8SI_QI:
+ case V4SI_FTYPE_V8HI_V8HI_V4SI_QI:
nargs = 4;
break;
case V2DF_FTYPE_V2DF_V2DF_V2DI_INT:
@@ -33929,8 +36388,20 @@ ix86_expand_args_builtin (const struct builtin_description *d,
nargs = 4;
nargs_constant = 1;
break;
+ case QI_FTYPE_V4DI_V4DI_INT_QI:
+ case QI_FTYPE_V8SI_V8SI_INT_QI:
+ case QI_FTYPE_V4DF_V4DF_INT_QI:
+ case QI_FTYPE_V8SF_V8SF_INT_QI:
+ case QI_FTYPE_V2DI_V2DI_INT_QI:
+ case QI_FTYPE_V4SI_V4SI_INT_QI:
case QI_FTYPE_V2DF_V2DF_INT_QI:
case QI_FTYPE_V4SF_V4SF_INT_QI:
+ case DI_FTYPE_V64QI_V64QI_INT_DI:
+ case SI_FTYPE_V32QI_V32QI_INT_SI:
+ case HI_FTYPE_V16QI_V16QI_INT_HI:
+ case SI_FTYPE_V32HI_V32HI_INT_SI:
+ case HI_FTYPE_V16HI_V16HI_INT_HI:
+ case QI_FTYPE_V8HI_V8HI_INT_QI:
nargs = 4;
mask_pos = 1;
nargs_constant = 1;
@@ -33951,6 +36422,27 @@ ix86_expand_args_builtin (const struct builtin_description *d,
nargs = 4;
nargs_constant = 1;
break;
+ case V8SF_FTYPE_V8SF_INT_V8SF_QI:
+ case V4SF_FTYPE_V4SF_INT_V4SF_QI:
+ case V2DF_FTYPE_V4DF_INT_V2DF_QI:
+ case V2DI_FTYPE_V4DI_INT_V2DI_QI:
+ case V8SF_FTYPE_V16SF_INT_V8SF_QI:
+ case V8SI_FTYPE_V16SI_INT_V8SI_QI:
+ case V2DF_FTYPE_V8DF_INT_V2DF_QI:
+ case V2DI_FTYPE_V8DI_INT_V2DI_QI:
+ case V4SF_FTYPE_V8SF_INT_V4SF_QI:
+ case V4SI_FTYPE_V8SI_INT_V4SI_QI:
+ case V8HI_FTYPE_V8SF_INT_V8HI_QI:
+ case V8HI_FTYPE_V4SF_INT_V8HI_QI:
+ case V32HI_FTYPE_V32HI_INT_V32HI_SI:
+ case V16HI_FTYPE_V16HI_INT_V16HI_HI:
+ case V8HI_FTYPE_V8HI_INT_V8HI_QI:
+ case V4DI_FTYPE_V4DI_INT_V4DI_QI:
+ case V2DI_FTYPE_V2DI_INT_V2DI_QI:
+ case V8SI_FTYPE_V8SI_INT_V8SI_QI:
+ case V4SI_FTYPE_V4SI_INT_V4SI_QI:
+ case V4DF_FTYPE_V4DF_INT_V4DF_QI:
+ case V2DF_FTYPE_V2DF_INT_V2DF_QI:
case V8DF_FTYPE_V8DF_INT_V8DF_QI:
case V16SF_FTYPE_V16SF_INT_V16SF_HI:
case V16HI_FTYPE_V16SF_INT_V16HI_HI:
@@ -33974,6 +36466,23 @@ ix86_expand_args_builtin (const struct builtin_description *d,
case V2DF_FTYPE_V2DF_V2DF_INT_V2DF_QI:
case V8DF_FTYPE_V8DF_V4DF_INT_V8DF_QI:
case V8DI_FTYPE_V8DI_V4DI_INT_V8DI_QI:
+ case V4DF_FTYPE_V4DF_V4DF_INT_V4DF_QI:
+ case V8SF_FTYPE_V8SF_V8SF_INT_V8SF_QI:
+ case V8DF_FTYPE_V8DF_V2DF_INT_V8DF_QI:
+ case V8DI_FTYPE_V8DI_V2DI_INT_V8DI_QI:
+ case V8SI_FTYPE_V8SI_V8SI_INT_V8SI_QI:
+ case V4DI_FTYPE_V4DI_V4DI_INT_V4DI_QI:
+ case V4SI_FTYPE_V4SI_V4SI_INT_V4SI_QI:
+ case V2DI_FTYPE_V2DI_V2DI_INT_V2DI_QI:
+ case V32HI_FTYPE_V64QI_V64QI_INT_V32HI_SI:
+ case V16HI_FTYPE_V32QI_V32QI_INT_V16HI_HI:
+ case V8HI_FTYPE_V16QI_V16QI_INT_V8HI_QI:
+ case V16SF_FTYPE_V16SF_V8SF_INT_V16SF_HI:
+ case V16SI_FTYPE_V16SI_V8SI_INT_V16SI_HI:
+ case V8SF_FTYPE_V8SF_V4SF_INT_V8SF_QI:
+ case V8SI_FTYPE_V8SI_V4SI_INT_V8SI_QI:
+ case V4DI_FTYPE_V4DI_V2DI_INT_V4DI_QI:
+ case V4DF_FTYPE_V4DF_V2DF_INT_V4DF_QI:
nargs = 5;
mask_pos = 2;
nargs_constant = 1;
@@ -33983,6 +36492,13 @@ ix86_expand_args_builtin (const struct builtin_description *d,
case V16SI_FTYPE_V16SI_V16SI_V16SI_INT_HI:
case V2DF_FTYPE_V2DF_V2DF_V2DI_INT_QI:
case V4SF_FTYPE_V4SF_V4SF_V4SI_INT_QI:
+ case V8SF_FTYPE_V8SF_V8SF_V8SI_INT_QI:
+ case V8SI_FTYPE_V8SI_V8SI_V8SI_INT_QI:
+ case V4DF_FTYPE_V4DF_V4DF_V4DI_INT_QI:
+ case V4DI_FTYPE_V4DI_V4DI_V4DI_INT_QI:
+ case V4SI_FTYPE_V4SI_V4SI_V4SI_INT_QI:
+ case V2DI_FTYPE_V2DI_V2DI_V2DI_INT_QI:
+ nargs = 5;
nargs = 5;
mask_pos = 1;
nargs_constant = 1;
@@ -34040,8 +36556,8 @@ ix86_expand_args_builtin (const struct builtin_description *d,
if (!match)
switch (icode)
{
- case CODE_FOR_avx2_inserti128:
- case CODE_FOR_avx2_extracti128:
+ case CODE_FOR_avx_vinsertf128v4di:
+ case CODE_FOR_avx_vextractf128v4di:
error ("the last argument must be an 1-bit immediate");
return const0_rtx;
@@ -34049,6 +36565,14 @@ ix86_expand_args_builtin (const struct builtin_description *d,
case CODE_FOR_avx512f_cmpv16si3_mask:
case CODE_FOR_avx512f_ucmpv8di3_mask:
case CODE_FOR_avx512f_ucmpv16si3_mask:
+ case CODE_FOR_avx512vl_cmpv4di3_mask:
+ case CODE_FOR_avx512vl_cmpv8si3_mask:
+ case CODE_FOR_avx512vl_ucmpv4di3_mask:
+ case CODE_FOR_avx512vl_ucmpv8si3_mask:
+ case CODE_FOR_avx512vl_cmpv2di3_mask:
+ case CODE_FOR_avx512vl_cmpv4si3_mask:
+ case CODE_FOR_avx512vl_ucmpv2di3_mask:
+ case CODE_FOR_avx512vl_ucmpv4si3_mask:
error ("the last argument must be a 3-bit immediate");
return const0_rtx;
@@ -34068,14 +36592,27 @@ ix86_expand_args_builtin (const struct builtin_description *d,
case CODE_FOR_sse4_1_blendps:
case CODE_FOR_avx_blendpd256:
case CODE_FOR_avx_vpermilv4df:
+ case CODE_FOR_avx_vpermilv4df_mask:
case CODE_FOR_avx512f_getmantv8df_mask:
case CODE_FOR_avx512f_getmantv16sf_mask:
+ case CODE_FOR_avx512vl_getmantv8sf_mask:
+ case CODE_FOR_avx512vl_getmantv4df_mask:
+ case CODE_FOR_avx512vl_getmantv4sf_mask:
+ case CODE_FOR_avx512vl_getmantv2df_mask:
+ case CODE_FOR_avx512dq_rangepv8df_mask_round:
+ case CODE_FOR_avx512dq_rangepv16sf_mask_round:
+ case CODE_FOR_avx512dq_rangepv4df_mask:
+ case CODE_FOR_avx512dq_rangepv8sf_mask:
+ case CODE_FOR_avx512dq_rangepv2df_mask:
+ case CODE_FOR_avx512dq_rangepv4sf_mask:
+ case CODE_FOR_avx_shufpd256_mask:
error ("the last argument must be a 4-bit immediate");
return const0_rtx;
case CODE_FOR_sha1rnds4:
case CODE_FOR_sse4_1_blendpd:
case CODE_FOR_avx_vpermilv2df:
+ case CODE_FOR_avx_vpermilv2df_mask:
case CODE_FOR_xop_vpermil2v2df3:
case CODE_FOR_xop_vpermil2v4sf3:
case CODE_FOR_xop_vpermil2v4df3:
@@ -34084,6 +36621,12 @@ ix86_expand_args_builtin (const struct builtin_description *d,
case CODE_FOR_avx512f_vinserti32x4_mask:
case CODE_FOR_avx512f_vextractf32x4_mask:
case CODE_FOR_avx512f_vextracti32x4_mask:
+ case CODE_FOR_sse2_shufpd:
+ case CODE_FOR_sse2_shufpd_mask:
+ case CODE_FOR_avx512dq_shuf_f64x2_mask:
+ case CODE_FOR_avx512dq_shuf_i64x2_mask:
+ case CODE_FOR_avx512vl_shuf_i32x4_mask:
+ case CODE_FOR_avx512vl_shuf_f32x4_mask:
error ("the last argument must be a 2-bit immediate");
return const0_rtx;
@@ -34097,6 +36640,12 @@ ix86_expand_args_builtin (const struct builtin_description *d,
case CODE_FOR_avx512f_vinserti64x4_mask:
case CODE_FOR_avx512f_vextractf64x4_mask:
case CODE_FOR_avx512f_vextracti64x4_mask:
+ case CODE_FOR_avx512dq_vinsertf32x8_mask:
+ case CODE_FOR_avx512dq_vinserti32x8_mask:
+ case CODE_FOR_avx512vl_vinsertv4df:
+ case CODE_FOR_avx512vl_vinsertv4di:
+ case CODE_FOR_avx512vl_vinsertv8sf:
+ case CODE_FOR_avx512vl_vinsertv8si:
error ("the last argument must be a 1-bit immediate");
return const0_rtx;
@@ -34401,7 +36950,11 @@ ix86_expand_round_builtin (const struct builtin_description *d,
case V8SF_FTYPE_V8DF_V8SF_QI_INT:
case V8DF_FTYPE_V8DF_V8DF_QI_INT:
case V8SI_FTYPE_V8DF_V8SI_QI_INT:
+ case V8DI_FTYPE_V8DF_V8DI_QI_INT:
+ case V8SF_FTYPE_V8DI_V8SF_QI_INT:
+ case V8DF_FTYPE_V8DI_V8DF_QI_INT:
case V16SF_FTYPE_V16SF_V16SF_HI_INT:
+ case V8DI_FTYPE_V8SF_V8DI_QI_INT:
case V16SF_FTYPE_V16SI_V16SF_HI_INT:
case V16SI_FTYPE_V16SF_V16SI_HI_INT:
case V8DF_FTYPE_V8SF_V8DF_QI_INT:
@@ -34438,6 +36991,8 @@ ix86_expand_round_builtin (const struct builtin_description *d,
nargs_constant = 3;
nargs = 5;
break;
+ case V16SF_FTYPE_V16SF_V16SF_INT_V16SF_HI_INT:
+ case V8DF_FTYPE_V8DF_V8DF_INT_V8DF_QI_INT:
case V4SF_FTYPE_V4SF_V4SF_INT_V4SF_QI_INT:
case V2DF_FTYPE_V2DF_V2DF_INT_V2DF_QI_INT:
nargs = 6;
@@ -34476,8 +37031,8 @@ ix86_expand_round_builtin (const struct builtin_description *d,
{
case CODE_FOR_avx512f_getmantv8df_mask_round:
case CODE_FOR_avx512f_getmantv16sf_mask_round:
- case CODE_FOR_avx512f_getmantv2df_round:
- case CODE_FOR_avx512f_getmantv4sf_round:
+ case CODE_FOR_avx512f_vgetmantv2df_round:
+ case CODE_FOR_avx512f_vgetmantv4sf_round:
error ("the immediate argument must be a 4-bit immediate");
return const0_rtx;
case CODE_FOR_avx512f_cmpv8df3_mask_round:
@@ -34629,7 +37184,7 @@ ix86_expand_special_args_builtin (const struct builtin_description *d,
{
case CODE_FOR_sse4_1_movntdqa:
case CODE_FOR_avx2_movntdqa:
- case CODE_FOR_avx512f_movntdqa:
+ case CODE_FOR_avx512dq_movntdqa:
aligned_mem = true;
break;
default:
@@ -34700,7 +37255,11 @@ ix86_expand_special_args_builtin (const struct builtin_description *d,
case VOID_FTYPE_PV8DF_V8DF_QI:
case VOID_FTYPE_PV16SF_V16SF_HI:
case VOID_FTYPE_PV8DI_V8DI_QI:
+ case VOID_FTYPE_PV4DI_V4DI_QI:
+ case VOID_FTYPE_PV2DI_V2DI_QI:
case VOID_FTYPE_PV16SI_V16SI_HI:
+ case VOID_FTYPE_PV8SI_V8SI_QI:
+ case VOID_FTYPE_PV4SI_V4SI_QI:
switch (icode)
{
/* These builtins and instructions require the memory
@@ -34709,6 +37268,14 @@ ix86_expand_special_args_builtin (const struct builtin_description *d,
case CODE_FOR_avx512f_storev16si_mask:
case CODE_FOR_avx512f_storev8df_mask:
case CODE_FOR_avx512f_storev8di_mask:
+ case CODE_FOR_avx512vl_storev8sf_mask:
+ case CODE_FOR_avx512vl_storev8si_mask:
+ case CODE_FOR_avx512vl_storev4df_mask:
+ case CODE_FOR_avx512vl_storev4di_mask:
+ case CODE_FOR_avx512vl_storev4sf_mask:
+ case CODE_FOR_avx512vl_storev4si_mask:
+ case CODE_FOR_avx512vl_storev2df_mask:
+ case CODE_FOR_avx512vl_storev2di_mask:
aligned_mem = true;
break;
default:
@@ -34730,17 +37297,51 @@ ix86_expand_special_args_builtin (const struct builtin_description *d,
case VOID_FTYPE_PV16HI_V16SI_HI:
case VOID_FTYPE_PV16QI_V8DI_QI:
case VOID_FTYPE_PV16QI_V16SI_HI:
+ case VOID_FTYPE_PV4SI_V4DI_QI:
+ case VOID_FTYPE_PV4SI_V2DI_QI:
+ case VOID_FTYPE_PV8HI_V4DI_QI:
+ case VOID_FTYPE_PV8HI_V2DI_QI:
+ case VOID_FTYPE_PV8HI_V8SI_QI:
+ case VOID_FTYPE_PV8HI_V4SI_QI:
+ case VOID_FTYPE_PV16QI_V4DI_QI:
+ case VOID_FTYPE_PV16QI_V2DI_QI:
+ case VOID_FTYPE_PV16QI_V8SI_QI:
+ case VOID_FTYPE_PV16QI_V4SI_QI:
+ case VOID_FTYPE_PV8HI_V8HI_QI:
+ case VOID_FTYPE_PV16HI_V16HI_HI:
+ case VOID_FTYPE_PV32HI_V32HI_SI:
+ case VOID_FTYPE_PV16QI_V16QI_HI:
+ case VOID_FTYPE_PV32QI_V32QI_SI:
+ case VOID_FTYPE_PV64QI_V64QI_DI:
+ case VOID_FTYPE_PV4DF_V4DF_QI:
+ case VOID_FTYPE_PV2DF_V2DF_QI:
+ case VOID_FTYPE_PV8SF_V8SF_QI:
+ case VOID_FTYPE_PV4SF_V4SF_QI:
nargs = 2;
klass = store;
/* Reserve memory operand for target. */
memory = ARRAY_SIZE (args);
break;
+ case V4SF_FTYPE_PCV4SF_V4SF_QI:
+ case V8SF_FTYPE_PCV8SF_V8SF_QI:
case V16SF_FTYPE_PCV16SF_V16SF_HI:
+ case V4SI_FTYPE_PCV4SI_V4SI_QI:
+ case V8SI_FTYPE_PCV8SI_V8SI_QI:
case V16SI_FTYPE_PCV16SI_V16SI_HI:
+ case V2DF_FTYPE_PCV2DF_V2DF_QI:
+ case V4DF_FTYPE_PCV4DF_V4DF_QI:
case V8DF_FTYPE_PCV8DF_V8DF_QI:
+ case V2DI_FTYPE_PCV2DI_V2DI_QI:
+ case V4DI_FTYPE_PCV4DI_V4DI_QI:
case V8DI_FTYPE_PCV8DI_V8DI_QI:
case V2DF_FTYPE_PCDOUBLE_V2DF_QI:
case V4SF_FTYPE_PCFLOAT_V4SF_QI:
+ case V8HI_FTYPE_PCV8HI_V8HI_QI:
+ case V16HI_FTYPE_PCV16HI_V16HI_HI:
+ case V32HI_FTYPE_PCV32HI_V32HI_SI:
+ case V16QI_FTYPE_PCV16QI_V16QI_HI:
+ case V32QI_FTYPE_PCV32QI_V32QI_SI:
+ case V64QI_FTYPE_PCV64QI_V64QI_DI:
nargs = 3;
klass = load;
memory = 0;
@@ -34752,6 +37353,20 @@ ix86_expand_special_args_builtin (const struct builtin_description *d,
case CODE_FOR_avx512f_loadv16si_mask:
case CODE_FOR_avx512f_loadv8df_mask:
case CODE_FOR_avx512f_loadv8di_mask:
+ case CODE_FOR_avx512vl_loadv8sf_mask:
+ case CODE_FOR_avx512vl_loadv8si_mask:
+ case CODE_FOR_avx512vl_loadv4df_mask:
+ case CODE_FOR_avx512vl_loadv4di_mask:
+ case CODE_FOR_avx512vl_loadv4sf_mask:
+ case CODE_FOR_avx512vl_loadv4si_mask:
+ case CODE_FOR_avx512vl_loadv2df_mask:
+ case CODE_FOR_avx512vl_loadv2di_mask:
+ case CODE_FOR_avx512bw_loadv64qi_mask:
+ case CODE_FOR_avx512vl_loadv32qi_mask:
+ case CODE_FOR_avx512vl_loadv16qi_mask:
+ case CODE_FOR_avx512bw_loadv32hi_mask:
+ case CODE_FOR_avx512vl_loadv16hi_mask:
+ case CODE_FOR_avx512vl_loadv8hi_mask:
aligned_mem = true;
break;
default:
@@ -35771,6 +38386,66 @@ addcarryx:
case IX86_BUILTIN_GATHER3ALTDIV16SI:
icode = CODE_FOR_avx512f_gatherdiv16si;
goto gather_gen;
+ case IX86_BUILTIN_GATHER3SIV2DF:
+ icode = CODE_FOR_avx512vl_gathersiv2df;
+ goto gather_gen;
+ case IX86_BUILTIN_GATHER3SIV4DF:
+ icode = CODE_FOR_avx512vl_gathersiv4df;
+ goto gather_gen;
+ case IX86_BUILTIN_GATHER3DIV2DF:
+ icode = CODE_FOR_avx512vl_gatherdiv2df;
+ goto gather_gen;
+ case IX86_BUILTIN_GATHER3DIV4DF:
+ icode = CODE_FOR_avx512vl_gatherdiv4df;
+ goto gather_gen;
+ case IX86_BUILTIN_GATHER3SIV4SF:
+ icode = CODE_FOR_avx512vl_gathersiv4sf;
+ goto gather_gen;
+ case IX86_BUILTIN_GATHER3SIV8SF:
+ icode = CODE_FOR_avx512vl_gathersiv8sf;
+ goto gather_gen;
+ case IX86_BUILTIN_GATHER3DIV4SF:
+ icode = CODE_FOR_avx512vl_gatherdiv4sf;
+ goto gather_gen;
+ case IX86_BUILTIN_GATHER3DIV8SF:
+ icode = CODE_FOR_avx512vl_gatherdiv8sf;
+ goto gather_gen;
+ case IX86_BUILTIN_GATHER3SIV2DI:
+ icode = CODE_FOR_avx512vl_gathersiv2di;
+ goto gather_gen;
+ case IX86_BUILTIN_GATHER3SIV4DI:
+ icode = CODE_FOR_avx512vl_gathersiv4di;
+ goto gather_gen;
+ case IX86_BUILTIN_GATHER3DIV2DI:
+ icode = CODE_FOR_avx512vl_gatherdiv2di;
+ goto gather_gen;
+ case IX86_BUILTIN_GATHER3DIV4DI:
+ icode = CODE_FOR_avx512vl_gatherdiv4di;
+ goto gather_gen;
+ case IX86_BUILTIN_GATHER3SIV4SI:
+ icode = CODE_FOR_avx512vl_gathersiv4si;
+ goto gather_gen;
+ case IX86_BUILTIN_GATHER3SIV8SI:
+ icode = CODE_FOR_avx512vl_gathersiv8si;
+ goto gather_gen;
+ case IX86_BUILTIN_GATHER3DIV4SI:
+ icode = CODE_FOR_avx512vl_gatherdiv4si;
+ goto gather_gen;
+ case IX86_BUILTIN_GATHER3DIV8SI:
+ icode = CODE_FOR_avx512vl_gatherdiv8si;
+ goto gather_gen;
+ case IX86_BUILTIN_GATHER3ALTSIV4DF:
+ icode = CODE_FOR_avx512vl_gathersiv4df;
+ goto gather_gen;
+ case IX86_BUILTIN_GATHER3ALTDIV8SF:
+ icode = CODE_FOR_avx512vl_gatherdiv8sf;
+ goto gather_gen;
+ case IX86_BUILTIN_GATHER3ALTSIV4DI:
+ icode = CODE_FOR_avx512vl_gathersiv4di;
+ goto gather_gen;
+ case IX86_BUILTIN_GATHER3ALTDIV8SI:
+ icode = CODE_FOR_avx512vl_gatherdiv8si;
+ goto gather_gen;
case IX86_BUILTIN_SCATTERSIV16SF:
icode = CODE_FOR_avx512f_scattersiv16sf;
goto scatter_gen;
@@ -35795,7 +38470,54 @@ addcarryx:
case IX86_BUILTIN_SCATTERDIV8DI:
icode = CODE_FOR_avx512f_scatterdiv8di;
goto scatter_gen;
-
+ case IX86_BUILTIN_SCATTERSIV8SF:
+ icode = CODE_FOR_avx512vl_scattersiv8sf;
+ goto scatter_gen;
+ case IX86_BUILTIN_SCATTERSIV4SF:
+ icode = CODE_FOR_avx512vl_scattersiv4sf;
+ goto scatter_gen;
+ case IX86_BUILTIN_SCATTERSIV4DF:
+ icode = CODE_FOR_avx512vl_scattersiv4df;
+ goto scatter_gen;
+ case IX86_BUILTIN_SCATTERSIV2DF:
+ icode = CODE_FOR_avx512vl_scattersiv2df;
+ goto scatter_gen;
+ case IX86_BUILTIN_SCATTERDIV8SF:
+ icode = CODE_FOR_avx512vl_scatterdiv8sf;
+ goto scatter_gen;
+ case IX86_BUILTIN_SCATTERDIV4SF:
+ icode = CODE_FOR_avx512vl_scatterdiv4sf;
+ goto scatter_gen;
+ case IX86_BUILTIN_SCATTERDIV4DF:
+ icode = CODE_FOR_avx512vl_scatterdiv4df;
+ goto scatter_gen;
+ case IX86_BUILTIN_SCATTERDIV2DF:
+ icode = CODE_FOR_avx512vl_scatterdiv2df;
+ goto scatter_gen;
+ case IX86_BUILTIN_SCATTERSIV8SI:
+ icode = CODE_FOR_avx512vl_scattersiv8si;
+ goto scatter_gen;
+ case IX86_BUILTIN_SCATTERSIV4SI:
+ icode = CODE_FOR_avx512vl_scattersiv4si;
+ goto scatter_gen;
+ case IX86_BUILTIN_SCATTERSIV4DI:
+ icode = CODE_FOR_avx512vl_scattersiv4di;
+ goto scatter_gen;
+ case IX86_BUILTIN_SCATTERSIV2DI:
+ icode = CODE_FOR_avx512vl_scattersiv2di;
+ goto scatter_gen;
+ case IX86_BUILTIN_SCATTERDIV8SI:
+ icode = CODE_FOR_avx512vl_scatterdiv8si;
+ goto scatter_gen;
+ case IX86_BUILTIN_SCATTERDIV4SI:
+ icode = CODE_FOR_avx512vl_scatterdiv4si;
+ goto scatter_gen;
+ case IX86_BUILTIN_SCATTERDIV4DI:
+ icode = CODE_FOR_avx512vl_scatterdiv4di;
+ goto scatter_gen;
+ case IX86_BUILTIN_SCATTERDIV2DI:
+ icode = CODE_FOR_avx512vl_scatterdiv2di;
+ goto scatter_gen;
case IX86_BUILTIN_GATHERPFDPD:
icode = CODE_FOR_avx512pf_gatherpfv8sidf;
goto vec_prefetch_gen;
@@ -35859,6 +38581,8 @@ addcarryx:
emit_insn (gen_vec_extract_lo_v16si (half, op2));
op2 = half;
break;
+ case IX86_BUILTIN_GATHER3ALTSIV4DF:
+ case IX86_BUILTIN_GATHER3ALTSIV4DI:
case IX86_BUILTIN_GATHERALTSIV4DF:
case IX86_BUILTIN_GATHERALTSIV4DI:
half = gen_reg_rtx (V4SImode);
@@ -35886,6 +38610,8 @@ addcarryx:
op3 = half;
}
break;
+ case IX86_BUILTIN_GATHER3ALTDIV8SF:
+ case IX86_BUILTIN_GATHER3ALTDIV8SI:
case IX86_BUILTIN_GATHERALTDIV8SF:
case IX86_BUILTIN_GATHERALTDIV8SI:
half = gen_reg_rtx (mode0);
@@ -36021,11 +38747,13 @@ addcarryx:
target = gen_reg_rtx (V8SImode);
emit_insn (gen_vec_extract_lo_v16si (target, subtarget));
break;
+ case IX86_BUILTIN_GATHER3DIV8SF:
case IX86_BUILTIN_GATHERDIV8SF:
if (target == NULL_RTX)
target = gen_reg_rtx (V4SFmode);
emit_insn (gen_vec_extract_lo_v8sf (target, subtarget));
break;
+ case IX86_BUILTIN_GATHER3DIV8SI:
case IX86_BUILTIN_GATHERDIV8SI:
if (target == NULL_RTX)
target = gen_reg_rtx (V4SImode);
@@ -36857,28 +39585,52 @@ ix86_vectorize_builtin_gather (const_tree mem_vectype,
switch (TYPE_MODE (mem_vectype))
{
case V2DFmode:
- code = si ? IX86_BUILTIN_GATHERSIV2DF : IX86_BUILTIN_GATHERDIV2DF;
+ if (TARGET_AVX512VL)
+ code = si ? IX86_BUILTIN_GATHER3SIV2DF : IX86_BUILTIN_GATHER3DIV2DF;
+ else
+ code = si ? IX86_BUILTIN_GATHERSIV2DF : IX86_BUILTIN_GATHERDIV2DF;
break;
case V4DFmode:
- code = si ? IX86_BUILTIN_GATHERALTSIV4DF : IX86_BUILTIN_GATHERDIV4DF;
+ if (TARGET_AVX512VL)
+ code = si ? IX86_BUILTIN_GATHER3ALTSIV4DF : IX86_BUILTIN_GATHER3DIV4DF;
+ else
+ code = si ? IX86_BUILTIN_GATHERALTSIV4DF : IX86_BUILTIN_GATHERDIV4DF;
break;
case V2DImode:
- code = si ? IX86_BUILTIN_GATHERSIV2DI : IX86_BUILTIN_GATHERDIV2DI;
+ if (TARGET_AVX512VL)
+ code = si ? IX86_BUILTIN_GATHER3SIV2DI : IX86_BUILTIN_GATHER3DIV2DI;
+ else
+ code = si ? IX86_BUILTIN_GATHERSIV2DI : IX86_BUILTIN_GATHERDIV2DI;
break;
case V4DImode:
- code = si ? IX86_BUILTIN_GATHERALTSIV4DI : IX86_BUILTIN_GATHERDIV4DI;
+ if (TARGET_AVX512VL)
+ code = si ? IX86_BUILTIN_GATHER3ALTSIV4DI : IX86_BUILTIN_GATHER3DIV4DI;
+ else
+ code = si ? IX86_BUILTIN_GATHERALTSIV4DI : IX86_BUILTIN_GATHERDIV4DI;
break;
case V4SFmode:
- code = si ? IX86_BUILTIN_GATHERSIV4SF : IX86_BUILTIN_GATHERDIV4SF;
+ if (TARGET_AVX512VL)
+ code = si ? IX86_BUILTIN_GATHER3SIV4SF : IX86_BUILTIN_GATHER3DIV4SF;
+ else
+ code = si ? IX86_BUILTIN_GATHERSIV4SF : IX86_BUILTIN_GATHERDIV4SF;
break;
case V8SFmode:
- code = si ? IX86_BUILTIN_GATHERSIV8SF : IX86_BUILTIN_GATHERALTDIV8SF;
+ if (TARGET_AVX512VL)
+ code = si ? IX86_BUILTIN_GATHER3SIV8SF : IX86_BUILTIN_GATHER3ALTDIV8SF;
+ else
+ code = si ? IX86_BUILTIN_GATHERSIV8SF : IX86_BUILTIN_GATHERALTDIV8SF;
break;
case V4SImode:
- code = si ? IX86_BUILTIN_GATHERSIV4SI : IX86_BUILTIN_GATHERDIV4SI;
+ if (TARGET_AVX512VL)
+ code = si ? IX86_BUILTIN_GATHER3SIV4SI : IX86_BUILTIN_GATHER3DIV4SI;
+ else
+ code = si ? IX86_BUILTIN_GATHERSIV4SI : IX86_BUILTIN_GATHERDIV4SI;
break;
case V8SImode:
- code = si ? IX86_BUILTIN_GATHERSIV8SI : IX86_BUILTIN_GATHERALTDIV8SI;
+ if (TARGET_AVX512VL)
+ code = si ? IX86_BUILTIN_GATHER3SIV8SI : IX86_BUILTIN_GATHER3ALTDIV8SI;
+ else
+ code = si ? IX86_BUILTIN_GATHERSIV8SI : IX86_BUILTIN_GATHERALTDIV8SI;
break;
case V8DFmode:
if (TARGET_AVX512F)
@@ -37381,6 +40133,11 @@ inline_secondary_memory_needed (enum reg_class class1, enum reg_class class2,
if (FLOAT_CLASS_P (class1) != FLOAT_CLASS_P (class2))
return true;
+ /* Between mask and general, we have moves no larger than word size. */
+ if ((MAYBE_MASK_CLASS_P (class1) != MAYBE_MASK_CLASS_P (class2))
+ && (GET_MODE_SIZE (mode) > UNITS_PER_WORD))
+ return true;
+
/* ??? This is a lie. We do have moves between mmx/general, and for
mmx/sse2. But by saying we need secondary memory we discourage the
register allocator from using the mmx registers unless needed. */
@@ -37686,7 +40443,8 @@ ix86_hard_regno_mode_ok (int regno, enum machine_mode mode)
if (STACK_REGNO_P (regno))
return VALID_FP_MODE_P (mode);
if (MASK_REGNO_P (regno))
- return VALID_MASK_REG_MODE (mode);
+ return (VALID_MASK_REG_MODE (mode)
+ || (TARGET_AVX512BW && VALID_MASK_AVX512BW_MODE (mode)));
if (SSE_REGNO_P (regno))
{
/* We implement the move patterns for all vector modes into and
@@ -37703,6 +40461,15 @@ ix86_hard_regno_mode_ok (int regno, enum machine_mode mode)
|| VALID_AVX512F_SCALAR_MODE (mode)))
return true;
+ /* TODO check for QI/HI scalars. */
+ /* AVX512VL allows sse regs16+ for 128/256 bit modes. */
+ if (TARGET_AVX512VL
+ && (mode == OImode
+ || mode == TImode
+ || VALID_AVX256_REG_MODE (mode)
+ || VALID_AVX512VL_128_REG_MODE (mode)))
+ return true;
+
/* xmm16-xmm31 are only available for AVX-512. */
if (EXT_REX_SSE_REGNO_P (regno))
return false;
@@ -39607,6 +42374,8 @@ ix86_expand_vector_init_duplicate (bool mmx_ok, enum machine_mode mode,
case V8SFmode:
case V8SImode:
case V2DFmode:
+ case V64QImode:
+ case V32HImode:
case V2DImode:
case V4SFmode:
case V4SImode:
@@ -39637,6 +42406,9 @@ ix86_expand_vector_init_duplicate (bool mmx_ok, enum machine_mode mode,
goto widen;
case V8HImode:
+ if (TARGET_AVX512VL)
+ return ix86_vector_duplicate_value (mode, target, val);
+
if (TARGET_SSE2)
{
struct expand_vec_perm_d dperm;
@@ -39667,6 +42439,9 @@ ix86_expand_vector_init_duplicate (bool mmx_ok, enum machine_mode mode,
goto widen;
case V16QImode:
+ if (TARGET_AVX512VL)
+ return ix86_vector_duplicate_value (mode, target, val);
+
if (TARGET_SSE2)
goto permute;
goto widen;
@@ -39696,16 +42471,19 @@ ix86_expand_vector_init_duplicate (bool mmx_ok, enum machine_mode mode,
case V16HImode:
case V32QImode:
- {
- enum machine_mode hvmode = (mode == V16HImode ? V8HImode : V16QImode);
- rtx x = gen_reg_rtx (hvmode);
+ if (TARGET_AVX512VL)
+ return ix86_vector_duplicate_value (mode, target, val);
+ else
+ {
+ enum machine_mode hvmode = (mode == V16HImode ? V8HImode : V16QImode);
+ rtx x = gen_reg_rtx (hvmode);
- ok = ix86_expand_vector_init_duplicate (false, hvmode, x, val);
- gcc_assert (ok);
+ ok = ix86_expand_vector_init_duplicate (false, hvmode, x, val);
+ gcc_assert (ok);
- x = gen_rtx_VEC_CONCAT (mode, x, x);
- emit_insn (gen_rtx_SET (VOIDmode, target, x));
- }
+ x = gen_rtx_VEC_CONCAT (mode, x, x);
+ emit_insn (gen_rtx_SET (VOIDmode, target, x));
+ }
return true;
default:
@@ -40267,8 +43045,9 @@ static void
ix86_expand_vector_init_general (bool mmx_ok, enum machine_mode mode,
rtx target, rtx vals)
{
- rtx ops[64], op0, op1;
+ rtx ops[64], op0, op1, op2, op3, op4, op5;
enum machine_mode half_mode = VOIDmode;
+ enum machine_mode quarter_mode = VOIDmode;
int n, i;
switch (mode)
@@ -40319,6 +43098,42 @@ half:
gen_rtx_VEC_CONCAT (mode, op0, op1)));
return;
+ case V64QImode:
+ quarter_mode = V16QImode;
+ half_mode = V32QImode;
+ goto quarter;
+
+ case V32HImode:
+ quarter_mode = V8HImode;
+ half_mode = V16HImode;
+ goto quarter;
+
+quarter:
+ n = GET_MODE_NUNITS (mode);
+ for (i = 0; i < n; i++)
+ ops[i] = XVECEXP (vals, 0, i);
+ op0 = gen_reg_rtx (quarter_mode);
+ op1 = gen_reg_rtx (quarter_mode);
+ op2 = gen_reg_rtx (quarter_mode);
+ op3 = gen_reg_rtx (quarter_mode);
+ op4 = gen_reg_rtx (half_mode);
+ op5 = gen_reg_rtx (half_mode);
+ ix86_expand_vector_init_interleave (quarter_mode, op0, ops,
+ n >> 3);
+ ix86_expand_vector_init_interleave (quarter_mode, op1,
+ &ops [n >> 2], n >> 3);
+ ix86_expand_vector_init_interleave (quarter_mode, op2,
+ &ops [n >> 1], n >> 3);
+ ix86_expand_vector_init_interleave (quarter_mode, op3,
+ &ops [(n >> 1) | (n >> 2)], n >> 3);
+ emit_insn (gen_rtx_SET (VOIDmode, op4,
+ gen_rtx_VEC_CONCAT (half_mode, op0, op1)));
+ emit_insn (gen_rtx_SET (VOIDmode, op5,
+ gen_rtx_VEC_CONCAT (half_mode, op2, op3)));
+ emit_insn (gen_rtx_SET (VOIDmode, target,
+ gen_rtx_VEC_CONCAT (mode, op4, op5)));
+ return;
+
case V16QImode:
if (!TARGET_SSE4_1)
break;
@@ -40701,6 +43516,49 @@ half:
emit_insn (gen_insert[j][i] (target, target, tmp));
return;
+ case V8DFmode:
+ tmp = gen_reg_rtx (mode);
+ emit_insn (gen_rtx_SET (VOIDmode, tmp,
+ gen_rtx_VEC_DUPLICATE (mode, val)));
+ emit_insn (gen_avx512f_blendmv8df (target, tmp, target,
+ force_reg (QImode, GEN_INT (1 << elt))));
+ return;
+ case V8DImode:
+ tmp = gen_reg_rtx (mode);
+ emit_insn (gen_rtx_SET (VOIDmode, tmp,
+ gen_rtx_VEC_DUPLICATE (mode, val)));
+ emit_insn (gen_avx512f_blendmv8di (target, tmp, target,
+ force_reg (QImode, GEN_INT (1 << elt))));
+ return;
+ case V16SFmode:
+ tmp = gen_reg_rtx (mode);
+ emit_insn (gen_rtx_SET (VOIDmode, tmp,
+ gen_rtx_VEC_DUPLICATE (mode, val)));
+ emit_insn (gen_avx512f_blendmv16sf (target, tmp, target,
+ force_reg (HImode, GEN_INT (1 << elt))));
+ return;
+ case V16SImode:
+ tmp = gen_reg_rtx (mode);
+ emit_insn (gen_rtx_SET (VOIDmode, tmp,
+ gen_rtx_VEC_DUPLICATE (mode, val)));
+ emit_insn (gen_avx512f_blendmv16si (target, tmp, target,
+ force_reg (HImode, GEN_INT (1 << elt))));
+ return;
+ case V32HImode:
+ tmp = gen_reg_rtx (mode);
+ emit_insn (gen_rtx_SET (VOIDmode, tmp,
+ gen_rtx_VEC_DUPLICATE (mode, val)));
+ emit_insn (gen_avx512bw_blendmv32hi (target, tmp, target,
+ force_reg (SImode, GEN_INT (1 << elt))));
+ return;
+ case V64QImode:
+ tmp = gen_reg_rtx (mode);
+ emit_insn (gen_rtx_SET (VOIDmode, tmp,
+ gen_rtx_VEC_DUPLICATE (mode, val)));
+ emit_insn (gen_avx512bw_blendmv64qi (target, tmp, target,
+ force_reg (DImode, GEN_INT (1 << elt))));
+ return;
+
default:
break;
}
@@ -40908,6 +43766,32 @@ ix86_expand_vector_extract (bool mmx_ok, rtx target, rtx vec, int elt)
}
break;
+ case V32HImode:
+ if (TARGET_AVX512BW)
+ {
+ tmp = gen_reg_rtx (V16HImode);
+ if (elt < 16)
+ emit_insn (gen_vec_extract_lo_v32hi (tmp, vec));
+ else
+ emit_insn (gen_vec_extract_hi_v32hi (tmp, vec));
+ ix86_expand_vector_extract (false, target, tmp, elt & 15);
+ return;
+ }
+ break;
+
+ case V64QImode:
+ if (TARGET_AVX512BW)
+ {
+ tmp = gen_reg_rtx (V32QImode);
+ if (elt < 32)
+ emit_insn (gen_vec_extract_lo_v64qi (tmp, vec));
+ else
+ emit_insn (gen_vec_extract_hi_v64qi (tmp, vec));
+ ix86_expand_vector_extract (false, target, tmp, elt & 31);
+ return;
+ }
+ break;
+
case V16SFmode:
tmp = gen_reg_rtx (V8SFmode);
if (elt < 8)
@@ -41036,6 +43920,8 @@ emit_reduc_half (rtx dest, rtx src, int i)
GEN_INT (i / 2));
}
break;
+ case V64QImode:
+ case V32HImode:
case V16SImode:
case V16SFmode:
case V8DImode:
@@ -42525,7 +45411,12 @@ expand_vec_perm_blend (struct expand_vec_perm_d *d)
if (d->one_operand_p)
return false;
- if (TARGET_AVX2 && GET_MODE_SIZE (vmode) == 32)
+ if (TARGET_AVX512F && GET_MODE_SIZE (vmode) == 64 &&
+ GET_MODE_SIZE (GET_MODE_INNER (vmode)) >= 4)
+ ;
+ else if (TARGET_AVX512VL)
+ ;
+ else if (TARGET_AVX2 && GET_MODE_SIZE (vmode) == 32)
;
else if (TARGET_AVX && (vmode == V4DFmode || vmode == V8SFmode))
;
@@ -42556,12 +45447,18 @@ expand_vec_perm_blend (struct expand_vec_perm_d *d)
switch (vmode)
{
+ case V8DFmode:
+ case V16SFmode:
case V4DFmode:
case V8SFmode:
case V2DFmode:
case V4SFmode:
case V8HImode:
case V8SImode:
+ case V32HImode:
+ case V64QImode:
+ case V16SImode:
+ case V8DImode:
for (i = 0; i < nelt; ++i)
mask |= (d->perm[i] >= nelt) << i;
break;
@@ -42784,9 +45681,9 @@ static bool
expand_vec_perm_pshufb (struct expand_vec_perm_d *d)
{
unsigned i, nelt, eltsz, mask;
- unsigned char perm[32];
+ unsigned char perm[64];
enum machine_mode vmode = V16QImode;
- rtx rperm[32], vperm, target, op0, op1;
+ rtx rperm[64], vperm, target, op0, op1;
nelt = d->nelt;
@@ -42875,6 +45772,17 @@ expand_vec_perm_pshufb (struct expand_vec_perm_d *d)
return false;
}
}
+ else if (GET_MODE_SIZE (d->vmode) == 64)
+ {
+ if (!TARGET_AVX512BW)
+ return false;
+ if (vmode == V64QImode)
+ {
+ for (i = 0; i < nelt; ++i)
+ if ((d->perm[i] ^ i) & (nelt / 4))
+ return false;
+ }
+ }
else
return false;
}
@@ -42892,6 +45800,8 @@ expand_vec_perm_pshufb (struct expand_vec_perm_d *d)
mask = 2 * nelt - 1;
else if (vmode == V16QImode)
mask = nelt - 1;
+ else if (vmode == V64QImode)
+ mask = nelt / 4 - 1;
else
mask = nelt / 2 - 1;
@@ -42917,6 +45827,8 @@ expand_vec_perm_pshufb (struct expand_vec_perm_d *d)
emit_insn (gen_ssse3_pshufbv16qi3 (target, op0, vperm));
else if (vmode == V32QImode)
emit_insn (gen_avx2_pshufbv32qi3 (target, op0, vperm));
+ else if (vmode == V64QImode)
+ emit_insn (gen_avx512bw_pshufbv64qi3 (target, op0, vperm));
else if (vmode == V8SFmode)
emit_insn (gen_avx2_permvarv8sf (target, op0, vperm));
else
@@ -42972,12 +45884,24 @@ expand_vec_perm_1 (struct expand_vec_perm_d *d)
rtx (*gen) (rtx, rtx) = NULL;
switch (d->vmode)
{
+ case V64QImode:
+ if (TARGET_AVX512VL)
+ gen = gen_avx512bw_vec_dupv64qi;
+ break;
case V32QImode:
gen = gen_avx2_pbroadcastv32qi_1;
break;
+ case V32HImode:
+ if (TARGET_AVX512VL)
+ gen = gen_avx512bw_vec_dupv32hi;
+ break;
case V16HImode:
gen = gen_avx2_pbroadcastv16hi_1;
break;
+ case V16SImode:
+ if (TARGET_AVX512F)
+ gen = gen_avx512f_vec_dupv16si;
+ break;
case V8SImode:
gen = gen_avx2_pbroadcastv8si_1;
break;
@@ -42987,9 +45911,21 @@ expand_vec_perm_1 (struct expand_vec_perm_d *d)
case V8HImode:
gen = gen_avx2_pbroadcastv8hi;
break;
+ case V16SFmode:
+ if (TARGET_AVX512F)
+ gen = gen_avx512f_vec_dupv16sf;
+ break;
case V8SFmode:
gen = gen_avx2_vec_dupv8sf_1;
break;
+ case V8DFmode:
+ if (TARGET_AVX512F)
+ gen = gen_avx512f_vec_dupv8df;
+ break;
+ case V8DImode:
+ if (TARGET_AVX512F)
+ gen = gen_avx512f_vec_dupv8di;
+ break;
/* For other modes prefer other shuffles this function creates. */
default: break;
}
@@ -43079,6 +46015,14 @@ expand_vec_perm_1 (struct expand_vec_perm_d *d)
mode = V8DImode;
else if (mode == V16SFmode)
mode = V16SImode;
+ else if (mode == V4DFmode)
+ mode = V4DImode;
+ else if (mode == V2DFmode)
+ mode = V2DImode;
+ else if (mode == V8SFmode)
+ mode = V8SImode;
+ else if (mode == V4SFmode)
+ mode = V4SImode;
for (i = 0; i < nelt; ++i)
vec[i] = GEN_INT (d->perm[i]);
rtx mask = gen_rtx_CONST_VECTOR (mode, gen_rtvec_v (nelt, vec));
@@ -44622,6 +47566,16 @@ ix86_expand_vec_perm_const_1 (struct expand_vec_perm_d *d)
return true;
/* Try sequences of two instructions. */
+ /* ix86_expand_vec_perm_vpermi2 is also called from
+ * ix86_expand_vec_perm. So it doesn't take d as parameter.
+ * Construct needed params. */
+ rtx vec[64];
+ int i;
+ for (i = 0; i < d->nelt; ++i)
+ vec[i] = GEN_INT (d->perm[i]);
+ rtx sel = gen_rtx_CONST_VECTOR (d->vmode, gen_rtvec_v (d->nelt, vec));
+ if (ix86_expand_vec_perm_vpermi2 (d->target, d->op0, sel, d->op1))
+ return true;
if (expand_vec_perm_pshuflw_pshufhw (d))
return true;
@@ -44796,7 +47750,8 @@ ix86_vectorize_vec_perm_const_ok (enum machine_mode vmode,
/* Given sufficient ISA support we can just return true here
for selected vector modes. */
if (d.vmode == V16SImode || d.vmode == V16SFmode
- || d.vmode == V8DFmode || d.vmode == V8DImode)
+ || d.vmode == V8DFmode || d.vmode == V8DImode
+ || d.vmode == V32HImode || d.vmode == V64QImode)
/* All implementable with a single vpermi2 insn. */
return true;
if (GET_MODE_SIZE (d.vmode) == 16)
@@ -44929,6 +47884,11 @@ ix86_expand_vecop_qihi (enum rtx_code code, rtx dest, rtx op1, rtx op2)
gen_il = gen_avx2_interleave_lowv32qi;
gen_ih = gen_avx2_interleave_highv32qi;
break;
+ case V64QImode:
+ himode = V32HImode;
+ gen_il = gen_avx512bw_interleave_lowv64qi;
+ gen_ih = gen_avx512bw_interleave_highv64qi;
+ break;
default:
gcc_unreachable ();
}
@@ -44989,7 +47949,7 @@ ix86_expand_vecop_qihi (enum rtx_code code, rtx dest, rtx op1, rtx op2)
{
/* For SSE2, we used an full interleave, so the desired
results are in the even elements. */
- for (i = 0; i < 32; ++i)
+ for (i = 0; i < 64; ++i)
d.perm[i] = i * 2;
}
else
@@ -44997,7 +47957,7 @@ ix86_expand_vecop_qihi (enum rtx_code code, rtx dest, rtx op1, rtx op2)
/* For AVX, the interleave used above was not cross-lane. So the
extraction is evens but with the second and third quarter swapped.
Happily, that is even one insn shorter than even extraction. */
- for (i = 0; i < 32; ++i)
+ for (i = 0; i < 64; ++i)
d.perm[i] = i * 2 + ((i & 24) == 8 ? 16 : (i & 24) == 16 ? -16 : 0);
}
@@ -45195,6 +48155,10 @@ ix86_expand_mul_widen_hilo (rtx dest, rtx op1, rtx op2,
case V16QImode:
case V32QImode:
+/* TODO why handle hi here and not in hi case*/
+ case V32HImode:
+ case V16SImode:
+ case V64QImode:
t1 = gen_reg_rtx (wmode);
t2 = gen_reg_rtx (wmode);
ix86_expand_sse_unpack (t1, op1, uns_p, high_p);
@@ -45249,7 +48213,13 @@ ix86_expand_sse2_mulvxdi3 (rtx op0, rtx op1, rtx op2)
enum machine_mode mode = GET_MODE (op0);
rtx t1, t2, t3, t4, t5, t6;
- if (TARGET_XOP && mode == V2DImode)
+ if (TARGET_AVX512DQ && mode == V8DImode)
+ emit_insn (gen_avx512dq_mulv8di3 (op0, op1, op2));
+ else if (TARGET_AVX512DQ && TARGET_AVX512VL && mode == V4DImode)
+ emit_insn (gen_avx512dq_mulv4di3 (op0, op1, op2));
+ else if (TARGET_AVX512DQ && TARGET_AVX512VL && mode == V2DImode)
+ emit_insn (gen_avx512dq_mulv4di3 (op0, op1, op2));
+ else if (TARGET_XOP && mode == V2DImode)
{
/* op1: A,B,C,D, op2: E,F,G,H */
op1 = gen_lowpart (V4SImode, op1);
@@ -46456,9 +49426,11 @@ ix86_preferred_simd_mode (enum machine_mode mode)
switch (mode)
{
case QImode:
- return (TARGET_AVX && !TARGET_PREFER_AVX128) ? V32QImode : V16QImode;
+ return TARGET_AVX512BW ? V64QImode :
+ (TARGET_AVX && !TARGET_PREFER_AVX128) ? V32QImode : V16QImode;
case HImode:
- return (TARGET_AVX && !TARGET_PREFER_AVX128) ? V16HImode : V8HImode;
+ return TARGET_AVX512BW ? V32HImode :
+ (TARGET_AVX && !TARGET_PREFER_AVX128) ? V16HImode : V8HImode;
case SImode:
return TARGET_AVX512F ? V16SImode :
(TARGET_AVX && !TARGET_PREFER_AVX128) ? V8SImode : V4SImode;
diff --git a/gcc/config/i386/i386.h b/gcc/config/i386/i386.h
index 9e3ef9424c3..c2f0ceed45f 100644
--- a/gcc/config/i386/i386.h
+++ b/gcc/config/i386/i386.h
@@ -71,6 +71,12 @@ see the files COPYING3 and COPYING.RUNTIME respectively. If not, see
#define TARGET_AVX512ER_P(x) TARGET_ISA_AVX512ER_P(x)
#define TARGET_AVX512CD TARGET_ISA_AVX512CD
#define TARGET_AVX512CD_P(x) TARGET_ISA_AVX512CD_P(x)
+#define TARGET_AVX512DQ TARGET_ISA_AVX512DQ
+#define TARGET_AVX512DQ_P(x) TARGET_ISA_AVX512DQ_P(x)
+#define TARGET_AVX512BW TARGET_ISA_AVX512BW
+#define TARGET_AVX512BW_P(x) TARGET_ISA_AVX512BW_P(x)
+#define TARGET_AVX512VL TARGET_ISA_AVX512VL
+#define TARGET_AVX512VL_P(x) TARGET_ISA_AVX512VL_P(x)
#define TARGET_FMA TARGET_ISA_FMA
#define TARGET_FMA_P(x) TARGET_ISA_FMA_P(x)
#define TARGET_SSE4A TARGET_ISA_SSE4A
@@ -1048,7 +1054,8 @@ extern const char *host_detect_local_cpu (int argc, const char **argv);
applied to them. */
#define HARD_REGNO_NREGS(REGNO, MODE) \
- (STACK_REGNO_P (REGNO) || SSE_REGNO_P (REGNO) || MMX_REGNO_P (REGNO) \
+ (STACK_REGNO_P (REGNO) || SSE_REGNO_P (REGNO) \
+ || MMX_REGNO_P (REGNO) || MASK_REGNO_P (REGNO) \
? (COMPLEX_MODE_P (MODE) ? 2 : 1) \
: ((MODE) == XFmode \
? (TARGET_64BIT ? 2 : 3) \
@@ -1079,7 +1086,12 @@ extern const char *host_detect_local_cpu (int argc, const char **argv);
#define VALID_AVX512F_REG_MODE(MODE) \
((MODE) == V8DImode || (MODE) == V8DFmode || (MODE) == V64QImode \
- || (MODE) == V16SImode || (MODE) == V16SFmode || (MODE) == V32HImode)
+ || (MODE) == V16SImode || (MODE) == V16SFmode || (MODE) == V32HImode \
+ || (MODE) == V4TImode)
+
+#define VALID_AVX512VL_128_REG_MODE(MODE) \
+ ((MODE) == V2DImode || (MODE) == V2DFmode || (MODE) == V16QImode \
+ || (MODE) == V4SImode || (MODE) == V4SFmode || (MODE) == V8HImode)
#define VALID_SSE2_REG_MODE(MODE) \
((MODE) == V16QImode || (MODE) == V8HImode || (MODE) == V2DFmode \
@@ -1126,6 +1138,8 @@ extern const char *host_detect_local_cpu (int argc, const char **argv);
#define VALID_MASK_REG_MODE(MODE) ((MODE) == HImode || (MODE) == QImode)
+#define VALID_MASK_AVX512BW_MODE(MODE) ((MODE) == SImode || (MODE) == DImode)
+
/* Value is 1 if hard register REGNO can hold a value of machine-mode MODE. */
#define HARD_REGNO_MODE_OK(REGNO, MODE) \
@@ -1448,6 +1462,7 @@ enum reg_class
: (N) <= LAST_REX_SSE_REG ? (FIRST_REX_SSE_REG + (N) - 8) \
: (FIRST_EXT_REX_SSE_REG + (N) - 16))
+#define MASK_REG_P(X) (REG_P (X) && MASK_REGNO_P (REGNO (X)))
#define MASK_REGNO_P(N) IN_RANGE ((N), FIRST_MASK_REG, LAST_MASK_REG)
#define ANY_MASK_REG_P(X) (REG_P (X) && MASK_REGNO_P (REGNO (X)))
diff --git a/gcc/config/i386/i386.md b/gcc/config/i386/i386.md
index 3cb8b672515..2a10862fec8 100644
--- a/gcc/config/i386/i386.md
+++ b/gcc/config/i386/i386.md
@@ -740,7 +740,8 @@
;; Used to control the "enabled" attribute on a per-instruction basis.
(define_attr "isa" "base,x64,x64_sse4,x64_sse4_noavx,x64_avx,nox64,
sse2,sse2_noavx,sse3,sse4,sse4_noavx,avx,noavx,
- avx2,noavx2,bmi,bmi2,fma4,fma,avx512f,noavx512f,fma_avx512f"
+ avx2,noavx2,bmi,bmi2,fma4,fma,avx512f,noavx512f,
+ fma_avx512f,avx512bw,noavx512bw,avx512dq,noavx512dq"
(const_string "base"))
(define_attr "enabled" ""
@@ -771,6 +772,10 @@
(eq_attr "isa" "noavx512f") (symbol_ref "!TARGET_AVX512F")
(eq_attr "isa" "fma_avx512f")
(symbol_ref "TARGET_FMA || TARGET_AVX512F")
+ (eq_attr "isa" "avx512bw") (symbol_ref "TARGET_AVX512BW")
+ (eq_attr "isa" "noavx512bw") (symbol_ref "!TARGET_AVX512BW")
+ (eq_attr "isa" "avx512dq") (symbol_ref "TARGET_AVX512DQ")
+ (eq_attr "isa" "noavx512dq") (symbol_ref "!TARGET_AVX512DQ")
]
(const_int 1)))
@@ -876,6 +881,11 @@
;; Used in signed and unsigned fix.
(define_code_iterator any_fix [fix unsigned_fix])
(define_code_attr fixsuffix [(fix "") (unsigned_fix "u")])
+(define_code_attr ufix_bool [(fix "false") (unsigned_fix "true")])
+
+;; Used in signed and unsigned float.
+(define_code_iterator any_float [float unsigned_float])
+(define_code_attr floatsuffix [(float "") (unsigned_float "u")])
;; All integer modes.
(define_mode_iterator SWI1248x [QI HI SI DI])
@@ -952,6 +962,9 @@
;; Instruction suffix for integer modes.
(define_mode_attr imodesuffix [(QI "b") (HI "w") (SI "l") (DI "q")])
+;; Instruction suffix for masks.
+(define_mode_attr mskmodesuffix [(QI "b") (HI "w") (SI "d") (DI "q")])
+
;; Pointer size prefix for integer modes (Intel asm dialect)
(define_mode_attr iptrsize [(QI "BYTE")
(HI "WORD")
@@ -1048,7 +1061,7 @@
(V4SF "ps") (V2DF "pd")
(V16QI "b") (V8HI "w") (V4SI "d") (V2DI "q")
(V32QI "b") (V16HI "w") (V8SI "d") (V4DI "q")
- (V64QI "b") (V16SI "d") (V8DI "q")])
+ (V64QI "b") (V32HI "w") (V16SI "d") (V8DI "q")])
;; SSE vector suffix for floating point modes
(define_mode_attr ssevecmodesuffix [(SF "ps") (DF "pd")])
@@ -1912,8 +1925,8 @@
(set_attr "mode" "XI")])
(define_insn "*movoi_internal_avx"
- [(set (match_operand:OI 0 "nonimmediate_operand" "=x,x ,m")
- (match_operand:OI 1 "vector_move_operand" "C ,xm,x"))]
+ [(set (match_operand:OI 0 "nonimmediate_operand" "=v,v ,m")
+ (match_operand:OI 1 "vector_move_operand" "C ,vm,v"))]
"TARGET_AVX && !(MEM_P (operands[0]) && MEM_P (operands[1]))"
{
switch (get_attr_type (insn))
@@ -1927,6 +1940,8 @@
{
if (get_attr_mode (insn) == MODE_V8SF)
return "vmovups\t{%1, %0|%0, %1}";
+ else if (get_attr_mode (insn) == MODE_XI)
+ return "vmovdqu32\t{%1, %0|%0, %1}";
else
return "vmovdqu\t{%1, %0|%0, %1}";
}
@@ -1934,6 +1949,8 @@
{
if (get_attr_mode (insn) == MODE_V8SF)
return "vmovaps\t{%1, %0|%0, %1}";
+ else if (get_attr_mode (insn) == MODE_XI)
+ return "vmovdqa32\t{%1, %0|%0, %1}";
else
return "vmovdqa\t{%1, %0|%0, %1}";
}
@@ -1945,7 +1962,10 @@
[(set_attr "type" "sselog1,ssemov,ssemov")
(set_attr "prefix" "vex")
(set (attr "mode")
- (cond [(match_test "TARGET_SSE_PACKED_SINGLE_INSN_OPTIMAL")
+ (cond [(ior (match_operand 0 "ext_sse_reg_operand")
+ (match_operand 1 "ext_sse_reg_operand"))
+ (const_string "XI")
+ (match_test "TARGET_SSE_PACKED_SINGLE_INSN_OPTIMAL")
(const_string "V8SF")
(and (eq_attr "alternative" "2")
(match_test "TARGET_SSE_TYPELESS_STORES"))
@@ -1954,8 +1974,8 @@
(const_string "OI")))])
(define_insn "*movti_internal"
- [(set (match_operand:TI 0 "nonimmediate_operand" "=!r ,o ,x,x ,m")
- (match_operand:TI 1 "general_operand" "riFo,re,C,xm,x"))]
+ [(set (match_operand:TI 0 "nonimmediate_operand" "=!r ,o ,v,v ,m")
+ (match_operand:TI 1 "general_operand" "riFo,re,C,vm,v"))]
"(TARGET_64BIT || TARGET_SSE)
&& !(MEM_P (operands[0]) && MEM_P (operands[1]))"
{
@@ -1975,6 +1995,8 @@
{
if (get_attr_mode (insn) == MODE_V4SF)
return "%vmovups\t{%1, %0|%0, %1}";
+ else if (get_attr_mode (insn) == MODE_XI)
+ return "vmovdqu32\t{%1, %0|%0, %1}";
else
return "%vmovdqu\t{%1, %0|%0, %1}";
}
@@ -1982,6 +2004,8 @@
{
if (get_attr_mode (insn) == MODE_V4SF)
return "%vmovaps\t{%1, %0|%0, %1}";
+ else if (get_attr_mode (insn) == MODE_XI)
+ return "vmovdqa32\t{%1, %0|%0, %1}";
else
return "%vmovdqa\t{%1, %0|%0, %1}";
}
@@ -1997,7 +2021,10 @@
(const_string "maybe_vex")
(const_string "orig")))
(set (attr "mode")
- (cond [(eq_attr "alternative" "0,1")
+ (cond [(ior (match_operand 0 "ext_sse_reg_operand")
+ (match_operand 1 "ext_sse_reg_operand"))
+ (const_string "XI")
+ (eq_attr "alternative" "0,1")
(const_string "DI")
(ior (not (match_test "TARGET_SSE2"))
(match_test "TARGET_SSE_PACKED_SINGLE_INSN_OPTIMAL"))
@@ -2022,13 +2049,16 @@
(define_insn "*movdi_internal"
[(set (match_operand:DI 0 "nonimmediate_operand"
- "=r ,o ,r,r ,r,m ,*y,*y,?*y,?m,?r ,?*Ym,*v,*v,*v,m ,?r ,?r,?*Yi,?*Ym,?*Yi")
+ "=r ,o ,r,r ,r,m ,*y,*y,?*y,?m,?r ,?*Ym,*v,*v,*v,m ,?r ,?r,?*Yi,?*Ym,?*Yi,*k,*k ,*r ,*m")
(match_operand:DI 1 "general_operand"
- "riFo,riF,Z,rem,i,re,C ,*y,m ,*y,*Yn,r ,C ,*v,m ,*v,*Yj,*v,r ,*Yj ,*Yn"))]
+ "riFo,riF,Z,rem,i,re,C ,*y,m ,*y,*Yn,r ,C ,*v,m ,*v,*Yj,*v,r ,*Yj ,*Yn ,*r ,*km,*k,*k"))]
"!(MEM_P (operands[0]) && MEM_P (operands[1]))"
{
switch (get_attr_type (insn))
{
+ case TYPE_MSKMOV:
+ return "kmovq\t{%1, %0|%0, %1}";
+
case TYPE_MULTI:
return "#";
@@ -2099,7 +2129,7 @@
[(set (attr "isa")
(cond [(eq_attr "alternative" "0,1")
(const_string "nox64")
- (eq_attr "alternative" "2,3,4,5,10,11,16,18")
+ (eq_attr "alternative" "2,3,4,5,10,11,16,18,21,23")
(const_string "x64")
(eq_attr "alternative" "17")
(const_string "x64_sse4")
@@ -2118,6 +2148,8 @@
(const_string "ssemov")
(eq_attr "alternative" "19,20")
(const_string "ssecvt")
+ (eq_attr "alternative" "21,22,23,24")
+ (const_string "mskmov")
(match_operand 1 "pic_32bit_operand")
(const_string "lea")
]
@@ -2179,16 +2211,20 @@
[(set (match_operand:DI 0 "nonimmediate_operand")
(match_operand:DI 1 "general_operand"))]
"!TARGET_64BIT && reload_completed
- && !(MMX_REG_P (operands[0]) || SSE_REG_P (operands[0]))
- && !(MMX_REG_P (operands[1]) || SSE_REG_P (operands[1]))"
+ && !(MMX_REG_P (operands[0])
+ || SSE_REG_P (operands[0])
+ || MASK_REG_P (operands[0]))
+ && !(MMX_REG_P (operands[1])
+ || SSE_REG_P (operands[1])
+ || MASK_REG_P (operands[1]))"
[(const_int 0)]
"ix86_split_long_move (operands); DONE;")
(define_insn "*movsi_internal"
[(set (match_operand:SI 0 "nonimmediate_operand"
- "=r,m ,*y,*y,?rm,?*y,*v,*v,*v,m ,?r ,?r,?*Yi")
+ "=r,m ,*y,*y,?rm,?*y,*v,*v,*v,m ,?r ,?r,?*Yi,*k ,*rm")
(match_operand:SI 1 "general_operand"
- "g ,re,C ,*y,*y ,rm ,C ,*v,m ,*v,*Yj,*v,r"))]
+ "g ,re,C ,*y,*y ,rm ,C ,*v,m ,*v,*Yj,*v,r ,*krm,*k"))]
"!(MEM_P (operands[0]) && MEM_P (operands[1]))"
{
switch (get_attr_type (insn))
@@ -2199,6 +2235,9 @@
return standard_sse_constant_opcode (insn, operands[1]);
+ case TYPE_MSKMOV:
+ return "kmovd\t{%1, %0|%0, %1}";
+
case TYPE_SSEMOV:
switch (get_attr_mode (insn))
{
@@ -2262,6 +2301,8 @@
(const_string "sselog1")
(eq_attr "alternative" "7,8,9,10,12")
(const_string "ssemov")
+ (eq_attr "alternative" "13,14")
+ (const_string "mskmov")
(match_operand 1 "pic_32bit_operand")
(const_string "lea")
]
@@ -2410,9 +2451,12 @@
case TYPE_MSKMOV:
switch (which_alternative)
{
- case 7: return "kmovw\t{%k1, %0|%0, %k1}";
- case 8: return "kmovw\t{%1, %0|%0, %1}";
- case 9: return "kmovw\t{%1, %k0|%k0, %1}";
+ case 7: return TARGET_AVX512BW ? "kmovb\t{%k1, %0|%0, %k1}"
+ : "kmovw\t{%k1, %0|%0, %k1}";
+ case 8: return TARGET_AVX512BW ? "kmovb\t{%1, %0|%0, %1}"
+ : "kmovw\t{%1, %0|%0, %1}";
+ case 9: return TARGET_AVX512BW ? "kmovb\t{%1, %k0|%k0, %1}"
+ : "kmovw\t{%1, %k0|%k0, %1}";
default: gcc_unreachable ();
}
@@ -7490,21 +7534,45 @@
})
(define_split
- [(set (match_operand:SWI12 0 "mask_reg_operand")
- (any_logic:SWI12 (match_operand:SWI12 1 "mask_reg_operand")
- (match_operand:SWI12 2 "mask_reg_operand")))
+ [(set (match_operand:SWI1248x 0 "mask_reg_operand")
+ (any_logic:SWI1248x (match_operand:SWI1248x 1 "mask_reg_operand")
+ (match_operand:SWI1248x 2 "mask_reg_operand")))
(clobber (reg:CC FLAGS_REG))]
- "TARGET_AVX512F && reload_completed"
+;;TODO removed avx512f check because mask_reg implies it.
+ "reload_completed"
[(set (match_dup 0)
- (any_logic:SWI12 (match_dup 1)
- (match_dup 2)))])
+ (any_logic:SWI1248x (match_dup 1)
+ (match_dup 2)))])
-(define_insn "*k<logic><mode>"
- [(set (match_operand:SWI12 0 "mask_reg_operand" "=k")
- (any_logic:SWI12 (match_operand:SWI12 1 "mask_reg_operand" "k")
- (match_operand:SWI12 2 "mask_reg_operand" "k")))]
+(define_insn "*k<logic>qi"
+ [(set (match_operand:QI 0 "mask_reg_operand" "=k")
+ (any_logic:QI (match_operand:QI 1 "mask_reg_operand" "k")
+ (match_operand:QI 2 "mask_reg_operand" "k")))]
+ "TARGET_AVX512F"
+{
+ return TARGET_AVX512DQ ? "k<logic>b\t{%2, %1, %0|%0, %1, %2}"
+ : "k<logic>w\t{%2, %1, %0|%0, %1, %2}";
+}
+ [(set_attr "mode" "QI")
+ (set_attr "type" "msklog")
+ (set_attr "prefix" "vex")])
+
+(define_insn "*k<logic>hi"
+ [(set (match_operand:HI 0 "mask_reg_operand" "=k")
+ (any_logic:HI (match_operand:HI 1 "mask_reg_operand" "k")
+ (match_operand:HI 2 "mask_reg_operand" "k")))]
"TARGET_AVX512F"
"k<logic>w\t{%2, %1, %0|%0, %1, %2}";
+ [(set_attr "mode" "HI")
+ (set_attr "type" "msklog")
+ (set_attr "prefix" "vex")])
+
+(define_insn "*k<logic><mode>"
+ [(set (match_operand:SWI48x 0 "mask_reg_operand" "=k")
+ (any_logic:SWI48x (match_operand:SWI48x 1 "mask_reg_operand" "k")
+ (match_operand:SWI48x 2 "mask_reg_operand" "k")))]
+ "TARGET_AVX512BW"
+ "k<logic><mskmodesuffix>\t{%2, %1, %0|%0, %1, %2}";
[(set_attr "mode" "<MODE>")
(set_attr "type" "msklog")
(set_attr "prefix" "vex")])
@@ -7560,10 +7628,10 @@
})
(define_insn "*anddi_1"
- [(set (match_operand:DI 0 "nonimmediate_operand" "=r,rm,r,r")
+ [(set (match_operand:DI 0 "nonimmediate_operand" "=r,rm,r,r,!k")
(and:DI
- (match_operand:DI 1 "nonimmediate_operand" "%0,0,0,qm")
- (match_operand:DI 2 "x86_64_szext_general_operand" "Z,re,rm,L")))
+ (match_operand:DI 1 "nonimmediate_operand" "%0,0,0,qm,k")
+ (match_operand:DI 2 "x86_64_szext_general_operand" "Z,re,rm,L,k")))
(clobber (reg:CC FLAGS_REG))]
"TARGET_64BIT && ix86_binary_operator_ok (AND, DImode, operands)"
{
@@ -7572,6 +7640,9 @@
case TYPE_IMOVX:
return "#";
+ case TYPE_MSKLOG:
+ return "kandq\t{%2, %1, %0|%0, %1, %2}";
+
default:
gcc_assert (rtx_equal_p (operands[0], operands[1]));
if (get_attr_mode (insn) == MODE_SI)
@@ -7580,8 +7651,8 @@
return "and{q}\t{%2, %0|%0, %2}";
}
}
- [(set_attr "type" "alu,alu,alu,imovx")
- (set_attr "length_immediate" "*,*,*,0")
+ [(set_attr "type" "alu,alu,alu,imovx,msklog")
+ (set_attr "length_immediate" "*,*,*,0,0")
(set (attr "prefix_rex")
(if_then_else
(and (eq_attr "type" "imovx")
@@ -7589,12 +7660,12 @@
(match_operand 1 "ext_QIreg_operand")))
(const_string "1")
(const_string "*")))
- (set_attr "mode" "SI,DI,DI,SI")])
+ (set_attr "mode" "SI,DI,DI,SI,DI")])
(define_insn "*andsi_1"
- [(set (match_operand:SI 0 "nonimmediate_operand" "=rm,r,Ya")
- (and:SI (match_operand:SI 1 "nonimmediate_operand" "%0,0,qm")
- (match_operand:SI 2 "x86_64_general_operand" "re,rm,L")))
+ [(set (match_operand:SI 0 "nonimmediate_operand" "=rm,r,Ya,!k")
+ (and:SI (match_operand:SI 1 "nonimmediate_operand" "%0,0,qm,k")
+ (match_operand:SI 2 "x86_64_general_operand" "re,rm,L,k")))
(clobber (reg:CC FLAGS_REG))]
"ix86_binary_operator_ok (AND, SImode, operands)"
{
@@ -7603,12 +7674,15 @@
case TYPE_IMOVX:
return "#";
+ case TYPE_MSKLOG:
+ return "kandd\t{%2, %1, %0|%0, %1, %2}";
+
default:
gcc_assert (rtx_equal_p (operands[0], operands[1]));
return "and{l}\t{%2, %0|%0, %2}";
}
}
- [(set_attr "type" "alu,alu,imovx")
+ [(set_attr "type" "alu,alu,imovx,msklog")
(set (attr "prefix_rex")
(if_then_else
(and (eq_attr "type" "imovx")
@@ -7616,7 +7690,7 @@
(match_operand 1 "ext_QIreg_operand")))
(const_string "1")
(const_string "*")))
- (set_attr "length_immediate" "*,*,0")
+ (set_attr "length_immediate" "*,*,0,0")
(set_attr "mode" "SI")])
;; See comment for addsi_1_zext why we do use nonimmediate_operand
@@ -7668,11 +7742,21 @@
(match_operand:QI 2 "general_operand" "qn,qmn,rn,k")))
(clobber (reg:CC FLAGS_REG))]
"ix86_binary_operator_ok (AND, QImode, operands)"
- "@
- and{b}\t{%2, %0|%0, %2}
- and{b}\t{%2, %0|%0, %2}
- and{l}\t{%k2, %k0|%k0, %k2}
- kandw\t{%2, %1, %0|%0, %1, %2}"
+{
+ switch (which_alternative)
+ {
+ case 0:
+ case 1:
+ return "and{b}\t{%2, %0|%0, %2}";
+ case 2:
+ return "and{l}\t{%k2, %k0|%k0, %k2}";
+ case 3:
+ return TARGET_AVX512DQ ? "kandb\t{%2, %1, %0|%0, %1, %2}"
+ : "kandw\t{%2, %1, %0|%0, %1, %2}";
+ default:
+ gcc_unreachable ();
+ }
+}
[(set_attr "type" "alu,alu,alu,msklog")
(set_attr "mode" "QI,QI,SI,HI")])
@@ -7695,10 +7779,22 @@
(match_operand:SWI12 2 "register_operand" "r,r,k")))
(clobber (reg:CC FLAGS_REG))]
"TARGET_AVX512F"
- "@
- andn\t{%k2, %k1, %k0|%k0, %k1, %k2}
- #
- kandnw\t{%2, %1, %0|%0, %1, %2}"
+{
+ switch (which_alternative)
+ {
+ case 0:
+ return "andn\t{%k2, %k1, %k0|%k0, %k1, %k2}";
+ case 1:
+ return "#";
+ case 2:
+ if (TARGET_AVX512DQ && <MODE>mode == QImode)
+ return "kandnb\t{%2, %1, %0|%0, %1, %2}";
+ else
+ return "kandnw\t{%2, %1, %0|%0, %1, %2}";
+ default:
+ gcc_unreachable ();
+ }
+}
[(set_attr "isa" "bmi,*,avx512f")
(set_attr "type" "bitmanip,*,msklog")
(set_attr "prefix" "*,*,vex")
@@ -8062,14 +8158,17 @@
"ix86_expand_binary_operator (<CODE>, <MODE>mode, operands); DONE;")
(define_insn "*<code><mode>_1"
- [(set (match_operand:SWI48 0 "nonimmediate_operand" "=r,rm")
+ [(set (match_operand:SWI48 0 "nonimmediate_operand" "=r,rm,k")
(any_or:SWI48
- (match_operand:SWI48 1 "nonimmediate_operand" "%0,0")
- (match_operand:SWI48 2 "<general_operand>" "<g>,r<i>")))
+ (match_operand:SWI48 1 "nonimmediate_operand" "%0,0,k")
+ (match_operand:SWI48 2 "<general_operand>" "<g>,r<i>,k")))
(clobber (reg:CC FLAGS_REG))]
"ix86_binary_operator_ok (<CODE>, <MODE>mode, operands)"
- "<logic>{<imodesuffix>}\t{%2, %0|%0, %2}"
- [(set_attr "type" "alu")
+ "@
+ <logic>{<imodesuffix>}\t{%2, %0|%0, %2}
+ <logic>{<imodesuffix>}\t{%2, %0|%0, %2}
+ k<logic><mskmodesuffix>\t{%2, %1, %0|%0, %1, %2}"
+ [(set_attr "type" "alu,alu,msklog")
(set_attr "mode" "<MODE>")])
(define_insn "*<code>hi_1"
@@ -8157,19 +8256,36 @@
(match_operand:SWI12 2 "register_operand" "r,k"))))
(clobber (reg:CC FLAGS_REG))]
"TARGET_AVX512F"
+{
+ if (which_alternative == 1 && <MODE>mode == QImode && TARGET_AVX512DQ)
+ return "kxnorb\t{%2, %1, %0|%0, %1, %2}";
+ return "kxnorw\t{%2, %1, %0|%0, %1, %2}";
+}
+ [(set_attr "type" "*,msklog")
+ (set_attr "prefix" "*,vex")
+ (set_attr "mode" "<MODE>")])
+
+(define_insn "kxnor<mode>"
+ [(set (match_operand:SWI48x 0 "register_operand" "=r,!k")
+ (not:SWI48x
+ (xor:SWI48x
+ (match_operand:SWI48x 1 "register_operand" "0,k")
+ (match_operand:SWI48x 2 "register_operand" "r,k"))))
+ (clobber (reg:CC FLAGS_REG))]
+ "TARGET_AVX512BW"
"@
#
- kxnorw\t{%2, %1, %0|%0, %1, %2}"
+ kxnor<mskmodesuffix>\t{%2, %1, %0|%0, %1, %2}"
[(set_attr "type" "*,msklog")
(set_attr "prefix" "*,vex")
(set_attr "mode" "<MODE>")])
(define_split
- [(set (match_operand:SWI12 0 "general_reg_operand")
- (not:SWI12
- (xor:SWI12
+ [(set (match_operand:SWI1248x 0 "general_reg_operand")
+ (not:SWI1248x
+ (xor:SWI1248x
(match_dup 0)
- (match_operand:SWI12 1 "general_reg_operand"))))
+ (match_operand:SWI1248x 1 "general_reg_operand"))))
(clobber (reg:CC FLAGS_REG))]
"TARGET_AVX512F && reload_completed"
[(parallel [(set (match_dup 0)
@@ -8179,6 +8295,8 @@
(set (match_dup 0)
(not:HI (match_dup 0)))])
+;;There are kortrest[bdq] but no intrinsics for them.
+;;We probably don't need to implement them.
(define_insn "kortestzhi"
[(set (reg:CCZ FLAGS_REG)
(compare:CCZ
@@ -8218,6 +8336,28 @@
(set_attr "type" "msklog")
(set_attr "prefix" "vex")])
+(define_insn "kunpcksi"
+ [(set (match_operand:SI 0 "register_operand" "=k")
+ (ior:SI
+ (ashift:SI
+ (match_operand:SI 1 "register_operand" "k")
+ (const_int 16))
+ (zero_extend:SI (subreg:HI (match_operand:SI 2 "register_operand" "k") 0))))]
+ "TARGET_AVX512BW"
+ "kunpckwd\t{%2, %1, %0|%0, %1, %2}"
+ [(set_attr "mode" "SI")])
+
+(define_insn "kunpckdi"
+ [(set (match_operand:DI 0 "register_operand" "=k")
+ (ior:DI
+ (ashift:DI
+ (match_operand:DI 1 "register_operand" "k")
+ (const_int 32))
+ (zero_extend:DI (subreg:SI (match_operand:DI 2 "register_operand" "k") 0))))]
+ "TARGET_AVX512BW"
+ "kunpckdq\t{%2, %1, %0|%0, %1, %2}"
+ [(set_attr "mode" "DI")])
+
;; See comment for addsi_1_zext why we do use nonimmediate_operand
;; ??? Special case for immediate operand is missing - it is tricky.
(define_insn "*<code>si_2_zext"
@@ -8817,11 +8957,15 @@
"ix86_expand_unary_operator (NOT, <MODE>mode, operands); DONE;")
(define_insn "*one_cmpl<mode>2_1"
- [(set (match_operand:SWI48 0 "nonimmediate_operand" "=rm")
- (not:SWI48 (match_operand:SWI48 1 "nonimmediate_operand" "0")))]
+ [(set (match_operand:SWI48 0 "nonimmediate_operand" "=rm,k")
+ (not:SWI48 (match_operand:SWI48 1 "nonimmediate_operand" "0,k")))]
"ix86_unary_operator_ok (NOT, <MODE>mode, operands)"
- "not{<imodesuffix>}\t%0"
- [(set_attr "type" "negnot")
+ "@
+ not{<imodesuffix>}\t%0
+ knot<mskmodesuffix>\t{%1, %0|%0, %1}"
+ [(set_attr "isa" "*,avx512bw")
+ (set_attr "type" "negnot,msklog")
+ (set_attr "prefix" "*,vex")
(set_attr "mode" "<MODE>")])
(define_insn "*one_cmplhi2_1"
@@ -8841,10 +8985,21 @@
[(set (match_operand:QI 0 "nonimmediate_operand" "=qm,r,!k")
(not:QI (match_operand:QI 1 "nonimmediate_operand" "0,0,k")))]
"ix86_unary_operator_ok (NOT, QImode, operands)"
- "@
- not{b}\t%0
- not{l}\t%k0
- knotw\t{%1, %0|%0, %1}"
+{
+ switch (which_alternative)
+ {
+ case 0:
+ return "not{b}\t%0";
+ case 1:
+ return "not{l}\t%k0";
+ case 2:
+ if (TARGET_AVX512DQ)
+ return "knotb\t{%1, %0|%0, %1}";
+ return "knotw\t{%1, %0|%0, %1}";
+ default:
+ gcc_unreachable ();
+ }
+}
[(set_attr "isa" "*,*,avx512f")
(set_attr "type" "negnot,negnot,msklog")
(set_attr "prefix" "*,*,vex")
diff --git a/gcc/config/i386/i386.opt b/gcc/config/i386/i386.opt
index dc1302c5a01..9208b766030 100644
--- a/gcc/config/i386/i386.opt
+++ b/gcc/config/i386/i386.opt
@@ -641,6 +641,18 @@ mavx512cd
Target Report Mask(ISA_AVX512CD) Var(ix86_isa_flags) Save
Support MMX, SSE, SSE2, SSE3, SSSE3, SSE4.1, SSE4.2, AVX, AVX2 and AVX512F and AVX512CD built-in functions and code generation
+mavx512dq
+Target Report Mask(ISA_AVX512DQ) Var(ix86_isa_flags) Save
+Support MMX, SSE, SSE2, SSE3, SSSE3, SSE4.1, SSE4.2, AVX, AVX2 and AVX512F and AVX512DQ built-in functions and code generation
+
+mavx512bw
+Target Report Mask(ISA_AVX512BW) Var(ix86_isa_flags) Save
+Support MMX, SSE, SSE2, SSE3, SSSE3, SSE4.1, SSE4.2, AVX, AVX2 and AVX512F and AVX512BW built-in functions and code generation
+
+mavx512vl
+Target Report Mask(ISA_AVX512VL) Var(ix86_isa_flags) Save
+Support MMX, SSE, SSE2, SSE3, SSSE3, SSE4.1, SSE4.2, AVX, AVX2 and AVX512F and AVX512VL built-in functions and code generation
+
mfma
Target Report Mask(ISA_FMA) Var(ix86_isa_flags) Save
Support MMX, SSE, SSE2, SSE3, SSSE3, SSE4.1, SSE4.2, AVX and FMA built-in functions and code generation
diff --git a/gcc/config/i386/immintrin.h b/gcc/config/i386/immintrin.h
index 73b48599277..5d921822248 100644
--- a/gcc/config/i386/immintrin.h
+++ b/gcc/config/i386/immintrin.h
@@ -50,6 +50,16 @@
#include <avx512cdintrin.h>
+#include <avx512vlintrin.h>
+
+#include <avx512bwintrin.h>
+
+#include <avx512dqintrin.h>
+
+#include <avx512vlbwintrin.h>
+
+#include <avx512vldqintrin.h>
+
#include <shaintrin.h>
#include <lzcntintrin.h>
diff --git a/gcc/config/i386/sse.md b/gcc/config/i386/sse.md
index f667362b6f7..a0736e812f5 100644
--- a/gcc/config/i386/sse.md
+++ b/gcc/config/i386/sse.md
@@ -128,6 +128,21 @@
UNSPEC_SHA256MSG1
UNSPEC_SHA256MSG2
UNSPEC_SHA256RNDS2
+
+ ;; For AVX512BW support
+ UNSPEC_DBPSADBW
+ UNSPEC_PMADDUBSW512
+ UNSPEC_PMADDWD512
+ UNSPEC_PSHUFHW
+ UNSPEC_PSHUFLW
+ UNSPEC_CVTINT2MASK
+
+ ;; For AVX512DQ support
+ UNSPEC_REDUCE
+ UNSPEC_FPCLASS
+ UNSPEC_FPCLASS_SCALAR
+ UNSPEC_RANGE
+ UNSPEC_RANGE_SCALAR
])
(define_c_enum "unspecv" [
@@ -146,10 +161,21 @@
(V32HI "TARGET_AVX512F") (V16HI "TARGET_AVX") V8HI
(V16SI "TARGET_AVX512F") (V8SI "TARGET_AVX") V4SI
(V8DI "TARGET_AVX512F") (V4DI "TARGET_AVX") V2DI
- (V2TI "TARGET_AVX") V1TI
+ (V4TI "TARGET_AVX") (V2TI "TARGET_AVX") V1TI
(V16SF "TARGET_AVX512F") (V8SF "TARGET_AVX") V4SF
(V8DF "TARGET_AVX512F") (V4DF "TARGET_AVX") V2DF])
+;; All AVX512VL vector modes
+(define_mode_iterator V_AVX512VL
+ [(V64QI "TARGET_AVX512BW") (V32QI "TARGET_AVX512VL && TARGET_AVX512BW")
+ (V16QI "TARGET_AVX512VL && TARGET_AVX512BW")
+ (V32HI "TARGET_AVX512BW") (V16HI "TARGET_AVX512VL && TARGET_AVX512BW")
+ (V8HI "TARGET_AVX512VL && TARGET_AVX512BW")
+ (V16SI "TARGET_AVX512F") (V8SI "TARGET_AVX512VL") (V4SI "TARGET_AVX512VL")
+ (V8DI "TARGET_AVX512F") (V4DI "TARGET_AVX512VL") (V2DI "TARGET_AVX512VL")
+ (V16SF "TARGET_AVX512F") (V8SF "TARGET_AVX512VL") (V4SF "TARGET_AVX512VL")
+ (V8DF "TARGET_AVX512F") (V4DF "TARGET_AVX512VL") (V2DF "TARGET_AVX512VL")])
+
;; All vector modes
(define_mode_iterator V
[(V32QI "TARGET_AVX") V16QI
@@ -194,6 +220,9 @@
(define_mode_iterator VF1_128_256
[(V8SF "TARGET_AVX") V4SF])
+(define_mode_iterator VF1_128_256VL
+ [V8SF (V4SF "TARGET_AVX512VL")])
+
;; All DFmode vector float modes
(define_mode_iterator VF2
[(V8DF "TARGET_AVX512F") (V4DF "TARGET_AVX") V2DF])
@@ -205,6 +234,9 @@
(define_mode_iterator VF2_512_256
[(V8DF "TARGET_AVX512F") (V4DF "TARGET_AVX")])
+(define_mode_iterator VF2_512_256VL
+ [V8DF (V4DF "TARGET_AVX512VL")])
+
;; All 128bit vector float modes
(define_mode_iterator VF_128
[V4SF (V2DF "TARGET_SSE2")])
@@ -217,17 +249,33 @@
(define_mode_iterator VF_512
[V16SF V8DF])
+(define_mode_iterator VI_AVX512VL
+ [(V64QI "TARGET_AVX512BW") (V32QI "TARGET_AVX512VL") (V16QI "TARGET_AVX512VL")
+ (V32HI "TARGET_AVX512BW") (V16HI "TARGET_AVX512VL") (V8HI "TARGET_AVX512VL")
+ V16SI (V8SI "TARGET_AVX512VL") (V4SI "TARGET_AVX512VL")
+ V8DI (V4DI "TARGET_AVX512VL") (V2DI "TARGET_AVX512VL")])
+
+(define_mode_iterator VF_AVX512VL
+ [V16SF (V8SF "TARGET_AVX512VL") (V4SF "TARGET_AVX512VL")
+ V8DF (V4DF "TARGET_AVX512VL") (V2DF "TARGET_AVX512VL")])
+
+(define_mode_iterator VF2_AVX512VL
+ [V8DF (V4DF "TARGET_AVX512VL") (V2DF "TARGET_AVX512VL")])
+
+(define_mode_iterator VF1_AVX512VL
+ [V16SF (V8SF "TARGET_AVX512VL") (V4SF "TARGET_AVX512VL")])
+
;; All vector integer modes
(define_mode_iterator VI
[(V16SI "TARGET_AVX512F") (V8DI "TARGET_AVX512F")
- (V32QI "TARGET_AVX") V16QI
- (V16HI "TARGET_AVX") V8HI
+ (V64QI "TARGET_AVX512BW") (V32QI "TARGET_AVX") V16QI
+ (V32HI "TARGET_AVX512BW") (V16HI "TARGET_AVX") V8HI
(V8SI "TARGET_AVX") V4SI
(V4DI "TARGET_AVX") V2DI])
(define_mode_iterator VI_AVX2
- [(V32QI "TARGET_AVX2") V16QI
- (V16HI "TARGET_AVX2") V8HI
+ [(V64QI "TARGET_AVX512BW") (V32QI "TARGET_AVX2") V16QI
+ (V32HI "TARGET_AVX512BW") (V16HI "TARGET_AVX2") V8HI
(V16SI "TARGET_AVX512F") (V8SI "TARGET_AVX2") V4SI
(V8DI "TARGET_AVX512F") (V4DI "TARGET_AVX2") V2DI])
@@ -236,18 +284,33 @@
[(V32QI "TARGET_AVX") V16QI])
(define_mode_iterator VI_UNALIGNED_LOADSTORE
- [(V32QI "TARGET_AVX") V16QI
- (V16SI "TARGET_AVX512F") (V8DI "TARGET_AVX512F")])
+ [(V64QI "TARGET_AVX512BW") (V32QI "TARGET_AVX") V16QI
+ (V32HI "TARGET_AVX512BW")
+ (V16HI "TARGET_AVX512BW && TARGET_AVX512VL")
+ (V8HI "TARGET_AVX512BW && TARGET_AVX512VL")
+ (V16SI "TARGET_AVX512F") (V8SI "TARGET_AVX512VL") (V4SI "TARGET_AVX512VL")
+ (V8DI "TARGET_AVX512F") (V4DI "TARGET_AVX512VL") (V2DI "TARGET_AVX512VL")])
;; All DImode vector integer modes
(define_mode_iterator VI8
[(V8DI "TARGET_AVX512F") (V4DI "TARGET_AVX") V2DI])
+(define_mode_iterator VI8_AVX512VL
+ [V8DI (V4DI "TARGET_AVX512VL") (V2DI "TARGET_AVX512VL")])
+
+(define_mode_iterator VI8_256_512
+ [V8DI (V4DI "TARGET_AVX512VL")])
+
+(define_mode_iterator VI128_256
+ [(V4DI "TARGET_AVX512VL") (V2DI "TARGET_AVX512VL") (V4SI "TARGET_AVX512VL")
+ (V16QI "TARGET_AVX512VL && TARGET_AVX512BW")
+ (V8HI "TARGET_AVX512VL && TARGET_AVX512BW")])
+
(define_mode_iterator VI1_AVX2
- [(V32QI "TARGET_AVX2") V16QI])
+ [(V64QI "TARGET_AVX512BW") (V32QI "TARGET_AVX2") V16QI])
(define_mode_iterator VI2_AVX2
- [(V16HI "TARGET_AVX2") V8HI])
+ [(V32HI "TARGET_AVX512BW") (V16HI "TARGET_AVX2") V8HI])
(define_mode_iterator VI2_AVX512F
[(V32HI "TARGET_AVX512F") (V16HI "TARGET_AVX2") V8HI])
@@ -261,9 +324,20 @@
(define_mode_iterator VI4_AVX512F
[(V16SI "TARGET_AVX512F") (V8SI "TARGET_AVX2") V4SI])
-(define_mode_iterator VI48_AVX512F
- [(V16SI "TARGET_AVX512F") (V8SI "TARGET_AVX2") V4SI
- (V8DI "TARGET_AVX512F")])
+(define_mode_iterator VI4_AVX512VL
+ [V16SI (V8SI "TARGET_AVX512VL") (V4SI "TARGET_AVX512VL")])
+
+(define_mode_iterator VI248_AVX512
+ [(V16SI "TARGET_AVX512F") (V8SI "TARGET_AVX2") (V4SI "TARGET_AVX2")
+ (V32HI "TARGET_AVX512BW") (V16HI "TARGET_AVX512VL") (V8HI "TARGET_AVX512VL")
+ (V8DI "TARGET_AVX512F") (V4DI "TARGET_AVX512VL") (V2DI "TARGET_AVX512VL")])
+
+(define_mode_iterator VI48_AVX512VL
+ [(V16SI "TARGET_AVX512F") (V8SI "TARGET_AVX512VL") (V4SI "TARGET_AVX512VL")
+ (V8DI "TARGET_AVX512F") (V4DI "TARGET_AVX512VL") (V2DI "TARGET_AVX512VL")])
+
+(define_mode_iterator VI8_AVX2_AVX512BW
+ [(V8DI "TARGET_AVX512BW") (V4DI "TARGET_AVX2") V2DI])
(define_mode_iterator VI8_AVX2
[(V4DI "TARGET_AVX2") V2DI])
@@ -271,6 +345,12 @@
(define_mode_iterator VI8_AVX2_AVX512F
[(V8DI "TARGET_AVX512F") (V4DI "TARGET_AVX2") V2DI])
+(define_mode_iterator VI4_128_8_256
+ [V4SI V4DI])
+
+(define_mode_iterator VI2_128_4_256
+ [V8HI V8SI])
+
;; All V8D* modes
(define_mode_iterator V8FI
[V8DF V8DI])
@@ -281,26 +361,20 @@
;; ??? We should probably use TImode instead.
(define_mode_iterator VIMAX_AVX2
- [(V2TI "TARGET_AVX2") V1TI])
+ [(V4TI "TARGET_AVX512BW") (V2TI "TARGET_AVX2") V1TI])
;; ??? This should probably be dropped in favor of VIMAX_AVX2.
(define_mode_iterator SSESCALARMODE
- [(V2TI "TARGET_AVX2") TI])
+ [(V4TI "TARGET_AVX512BW") (V2TI "TARGET_AVX2") TI])
(define_mode_iterator VI12_AVX2
- [(V32QI "TARGET_AVX2") V16QI
- (V16HI "TARGET_AVX2") V8HI])
+ [(V64QI "TARGET_AVX512BW") (V32QI "TARGET_AVX2") V16QI
+ (V32HI "TARGET_AVX512BW") (V16HI "TARGET_AVX2") V8HI])
(define_mode_iterator VI24_AVX2
- [(V16HI "TARGET_AVX2") V8HI
+ [(V32HI "TARGET_AVX512BW") (V16HI "TARGET_AVX2") V8HI
(V8SI "TARGET_AVX2") V4SI])
-(define_mode_iterator VI124_AVX2_48_AVX512F
- [(V32QI "TARGET_AVX2") V16QI
- (V16HI "TARGET_AVX2") V8HI
- (V16SI "TARGET_AVX512F") (V8SI "TARGET_AVX2") V4SI
- (V8DI "TARGET_AVX512F")])
-
(define_mode_iterator VI124_AVX512F
[(V32QI "TARGET_AVX2") V16QI
(V32HI "TARGET_AVX512F") (V16HI "TARGET_AVX2") V8HI
@@ -312,7 +386,7 @@
(V8SI "TARGET_AVX2") V4SI])
(define_mode_iterator VI248_AVX2
- [(V16HI "TARGET_AVX2") V8HI
+ [(V32HI "TARGET_AVX512BW") (V16HI "TARGET_AVX2") V8HI
(V8SI "TARGET_AVX2") V4SI
(V4DI "TARGET_AVX2") V2DI])
@@ -321,9 +395,10 @@
(V8SI "TARGET_AVX2") V4SI
(V8DI "TARGET_AVX512F") (V4DI "TARGET_AVX2") V2DI])
-(define_mode_iterator VI48_AVX2_48_AVX512F
- [(V16SI "TARGET_AVX512F") (V8SI "TARGET_AVX2") V4SI
- (V8DI "TARGET_AVX512F") (V4DI "TARGET_AVX2") V2DI])
+(define_mode_iterator VI248_AVX512BW
+ [(V16SI "TARGET_AVX512F") (V8SI "TARGET_AVX2") (V4SI "TARGET_AVX2")
+ (V32HI "TARGET_AVX512BW") (V16HI "TARGET_AVX512VL") (V8HI "TARGET_AVX512VL")
+ (V8DI "TARGET_AVX512F") (V4DI "TARGET_AVX2") (V2DI "TARGET_AVX2")])
(define_mode_iterator V48_AVX2
[V4SF V2DF
@@ -331,32 +406,41 @@
(V4SI "TARGET_AVX2") (V2DI "TARGET_AVX2")
(V8SI "TARGET_AVX2") (V4DI "TARGET_AVX2")])
+(define_mode_attr avx512
+ [(V16QI "avx512vl") (V32QI "avx512vl") (V64QI "avx512bw")
+ (V8HI "avx512vl") (V16HI "avx512vl") (V32HI "avx512bw")
+ (V4SI "avx512vl") (V8SI "avx512vl") (V16SI "avx512f")
+ (V2DI "avx512vl") (V4DI "avx512vl") (V8DI "avx512f")
+ (V4SF "avx512vl") (V8SF "avx512vl") (V16SF "avx512f")
+ (V2DF "avx512vl") (V4DF "avx512vl") (V8DF "avx512f")])
+
(define_mode_attr sse2_avx_avx512f
[(V16QI "sse2") (V32QI "avx") (V64QI "avx512f")
+ (V8HI "avx512vl") (V16HI "avx512vl") (V32HI "avx512bw")
(V4SI "sse2") (V8SI "avx") (V16SI "avx512f")
- (V8DI "avx512f")
+ (V2DI "avx512vl") (V4DI "avx512vl") (V8DI "avx512f")
(V16SF "avx512f") (V8SF "avx") (V4SF "avx")
(V8DF "avx512f") (V4DF "avx") (V2DF "avx")])
(define_mode_attr sse2_avx2
- [(V16QI "sse2") (V32QI "avx2")
- (V8HI "sse2") (V16HI "avx2")
+ [(V16QI "sse2") (V32QI "avx2") (V64QI "avx512bw")
+ (V8HI "sse2") (V16HI "avx2") (V32HI "avx512bw")
(V4SI "sse2") (V8SI "avx2") (V16SI "avx512f")
(V2DI "sse2") (V4DI "avx2") (V8DI "avx512f")
- (V1TI "sse2") (V2TI "avx2")])
+ (V1TI "sse2") (V2TI "avx2") (V4TI "avx512bw")])
(define_mode_attr ssse3_avx2
- [(V16QI "ssse3") (V32QI "avx2")
- (V4HI "ssse3") (V8HI "ssse3") (V16HI "avx2")
+ [(V16QI "ssse3") (V32QI "avx2") (V64QI "avx512bw")
+ (V4HI "ssse3") (V8HI "ssse3") (V16HI "avx2") (V32HI "avx512bw")
(V4SI "ssse3") (V8SI "avx2")
(V2DI "ssse3") (V4DI "avx2")
- (TI "ssse3") (V2TI "avx2")])
+ (TI "ssse3") (V2TI "avx2") (V4TI "avx512bw")])
(define_mode_attr sse4_1_avx2
- [(V16QI "sse4_1") (V32QI "avx2")
- (V8HI "sse4_1") (V16HI "avx2")
+ [(V16QI "sse4_1") (V32QI "avx2") (V64QI "avx512bw")
+ (V8HI "sse4_1") (V16HI "avx2") (V32HI "avx512bw")
(V4SI "sse4_1") (V8SI "avx2") (V16SI "avx512f")
- (V2DI "sse4_1") (V4DI "avx2") (V8DI "avx512f")])
+ (V2DI "sse4_1") (V4DI "avx2") (V8DI "avx512dq")])
(define_mode_attr avx_avx2
[(V4SF "avx") (V2DF "avx")
@@ -370,11 +454,12 @@
(V4SI "vec") (V8SI "avx2")
(V2DI "vec") (V4DI "avx2")])
-(define_mode_attr avx2_avx512f
+(define_mode_attr avx2_avx512bw
[(V4SI "avx2") (V8SI "avx2") (V16SI "avx512f")
(V2DI "avx2") (V4DI "avx2") (V8DI "avx512f")
- (V8SF "avx2") (V16SF "avx512f")
- (V4DF "avx2") (V8DF "avx512f")])
+ (V4SF "avx2") (V8SF "avx2") (V16SF "avx512f")
+ (V2DF "avx2") (V4DF "avx2") (V8DF "avx512f")
+ (V8HI "avx512vl") (V16HI "avx512vl") (V32HI "avx512bw")])
(define_mode_attr shuffletype
[(V16SF "f") (V16SI "i") (V8DF "f") (V8DI "i")
@@ -386,13 +471,19 @@
(define_mode_attr ssequartermode
[(V16SF "V4SF") (V8DF "V2DF") (V16SI "V4SI") (V8DI "V2DI")])
+(define_mode_attr ssedoublemodelower
+ [(V16QI "v16hi") (V32QI "v32hi") (V64QI "v64hi")
+ (V8HI "v8si") (V16HI "v16si") (V32HI "v32si")
+ (V4SI "v4di") (V8SI "v8di") (V16SI "v16di")])
+
(define_mode_attr ssedoublemode
[(V16SF "V32SF") (V16SI "V32SI") (V8DI "V16DI") (V8DF "V16DF")
- (V16HI "V16SI") (V8HI "V8SI") (V4HI "V4SI")
- (V32QI "V32HI") (V16QI "V16HI")])
+ (V8SF "V16SF") (V8SI "V16SI") (V4DI "V8DI") (V4DF "V8DF")
+ (V16HI "V16SI") (V8HI "V8SI") (V4HI "V4SI") (V4SI "V4DI")
+ (V32HI "V32SI") (V32QI "V32HI") (V16QI "V16HI") (V64QI "V64HI")])
(define_mode_attr ssebytemode
- [(V4DI "V32QI") (V2DI "V16QI")])
+ [(V8DI "V64QI") (V4DI "V32QI") (V2DI "V16QI")])
;; All 128bit vector integer modes
(define_mode_iterator VI_128 [V16QI V8HI V4SI V2DI])
@@ -413,11 +504,15 @@
(define_mode_iterator VI48_128 [V4SI V2DI])
;; Various 256bit and 512 vector integer mode combinations
-(define_mode_iterator VI124_256_48_512
- [V32QI V16HI V8SI (V8DI "TARGET_AVX512F") (V16SI "TARGET_AVX512F")])
+(define_mode_iterator VI124_256_1248_512
+ [V32QI V16HI V8SI (V8DI "TARGET_AVX512F")
+ (V16SI "TARGET_AVX512F") (V64QI "TARGET_AVX512BW")
+ (V32HI "TARGET_AVX512BW")])
(define_mode_iterator VI48_256 [V8SI V4DI])
(define_mode_iterator VI48_512 [V16SI V8DI])
(define_mode_iterator VI4_256_8_512 [V8SI V8DI])
+(define_mode_iterator VI512_48F_12BW
+ [V16SI V8DI (V64QI "TARGET_AVX512BW") (V32HI "TARGET_AVX512BW")])
;; Int-float size matches
(define_mode_iterator VI4F_128 [V4SI V4SF])
@@ -426,11 +521,34 @@
(define_mode_iterator VI8F_256 [V4DI V4DF])
(define_mode_iterator VI8F_256_512
[V4DI V4DF (V8DI "TARGET_AVX512F") (V8DF "TARGET_AVX512F")])
-(define_mode_iterator VI48F_256_512
+(define_mode_iterator VI48F_256_512_2I
[V8SI V8SF
+ (V32HI "TARGET_AVX512BW")
+ (V16HI "TARGET_AVX512BW && TARGET_AVX512VL")
+ (V8HI "TARGET_AVX512BW && TARGET_AVX512VL")
+ (V16SI "TARGET_AVX512F") (V16SF "TARGET_AVX512F")
+ (V8DI "TARGET_AVX512F") (V8DF "TARGET_AVX512F")
+ (V4DF "TARGET_AVX512VL") (V4DI "TARGET_AVX512VL")])
+(define_mode_iterator VI248F
+ [(V8SI "TARGET_AVX512VL") (V8SF "TARGET_AVX512VL")
+ (V4SF "TARGET_AVX512VL") (V4SI "TARGET_AVX512VL")
+ (V4DI "TARGET_AVX512VL") (V4DF "TARGET_AVX512VL")
+ (V2DF "TARGET_AVX512VL") (V2DI "TARGET_AVX512VL")
+ (V32HI "TARGET_AVX512BW")
+ (V16HI "TARGET_AVX512BW && TARGET_AVX512VL")
+ (V8HI "TARGET_AVX512BW && TARGET_AVX512VL")
(V16SI "TARGET_AVX512F") (V16SF "TARGET_AVX512F")
(V8DI "TARGET_AVX512F") (V8DF "TARGET_AVX512F")])
-(define_mode_iterator VI48F_512 [V16SI V16SF V8DI V8DF])
+(define_mode_iterator VI48F_I12B_512
+ [V16SI V16SF V8DI V8DF
+ (V32HI "TARGET_AVX512BW") (V64QI "TARGET_AVX512BW")])
+(define_mode_iterator VI48F
+ [V16SI V16SF V8DI V8DF
+ (V8SI "TARGET_AVX512VL") (V8SF "TARGET_AVX512VL")
+ (V4DI "TARGET_AVX512VL") (V4DF "TARGET_AVX512VL")
+ (V4SI "TARGET_AVX512VL") (V4SF "TARGET_AVX512VL")
+ (V2DI "TARGET_AVX512VL") (V2DF "TARGET_AVX512VL")])
+(define_mode_iterator VI48F_256 [V8SI V8SF V4DI V4DF])
;; Mapping from float mode to required SSE level
(define_mode_attr sse
@@ -461,7 +579,7 @@
;; SSE instruction mode
(define_mode_attr sseinsnmode
- [(V64QI "XI") (V32HI "XI") (V16SI "XI") (V8DI "XI")
+ [(V64QI "XI") (V32HI "XI") (V16SI "XI") (V8DI "XI") (V4TI "XI")
(V32QI "OI") (V16HI "OI") (V8SI "OI") (V4DI "OI") (V2TI "OI")
(V16QI "TI") (V8HI "TI") (V4SI "TI") (V2DI "TI") (V1TI "TI")
(V16SF "V16SF") (V8DF "V8DF")
@@ -471,8 +589,8 @@
;; Mapping of vector modes to corresponding mask size
(define_mode_attr avx512fmaskmode
- [(V16QI "HI")
- (V16HI "HI") (V8HI "QI")
+ [(V64QI "DI") (V32QI "SI") (V16QI "HI")
+ (V32HI "SI") (V16HI "HI") (V8HI "QI") (V4HI "QI")
(V16SI "HI") (V8SI "QI") (V4SI "QI")
(V8DI "QI") (V4DI "QI") (V2DI "QI")
(V16SF "HI") (V8SF "QI") (V4SF "QI")
@@ -487,10 +605,15 @@
(V8SI "V8SI") (V4DI "V4DI")
(V4SI "V4SI") (V2DI "V2DI")
(V16HI "V16HI") (V8HI "V8HI")
+ (V32HI "V32HI") (V64QI "V64QI")
(V32QI "V32QI") (V16QI "V16QI")])
+(define_mode_attr sseintvecmode2
+ [(V8DF "XI") (V4DF "OI") (V2DF "TI")
+ (V8SF "OI") (V4SF "TI")])
+
(define_mode_attr sseintvecmodelower
- [(V16SF "v16si")
+ [(V16SF "v16si") (V8DF "v8di")
(V8SF "v8si") (V4DF "v4di")
(V4SF "v4si") (V2DF "v2di")
(V8SI "v8si") (V4DI "v4di")
@@ -522,10 +645,13 @@
(V32HI "V16SF") (V16HI "V8SF") (V8HI "V4SF")
(V8SI "V8SF") (V4SI "V4SF")
(V4DI "V8SF") (V2DI "V4SF")
- (V2TI "V8SF") (V1TI "V4SF")
+ (V4TI "V16SF") (V2TI "V8SF") (V1TI "V4SF")
(V8SF "V8SF") (V4SF "V4SF")
(V4DF "V8SF") (V2DF "V4SF")])
+(define_mode_attr ssePSmode2
+ [(V8DI "V8SF") (V4DI "V4SF")])
+
;; Mapping of vector modes back to the scalar modes
(define_mode_attr ssescalarmode
[(V64QI "QI") (V32QI "QI") (V16QI "QI")
@@ -570,6 +696,7 @@
(define_mode_attr ssescalarsize
[(V8DI "64") (V4DI "64") (V2DI "64")
+ (V64QI "8") (V32QI "8") (V16QI "8")
(V32HI "16") (V16HI "16") (V8HI "16")
(V16SI "32") (V8SI "32") (V4SI "32")
(V16SF "32") (V8DF "64")])
@@ -581,7 +708,10 @@
(V8DI "p") (V8DF "")
(V4SI "p") (V4SF "")
(V8SI "p") (V8SF "")
- (V16SI "p") (V16SF "")])
+ (V16SI "p") (V16SF "")
+ (V16QI "p") (V8HI "p")
+ (V32QI "p") (V16HI "p")
+ (V64QI "p") (V32HI "p")])
;; SSE scalar suffix for vector modes
(define_mode_attr ssescalarmodesuffix
@@ -626,9 +756,18 @@
(define_mode_attr blendbits
[(V8SF "255") (V4SF "15") (V4DF "15") (V2DF "3")])
+;; Mapping for dbpsabbw modes
+(define_mode_attr dbpsadbwmode
+ [(V32HI "V64QI") (V16HI "V32QI") (V8HI "V16QI")])
+
;; Mapping suffixes for broadcast
(define_mode_attr bcstscalarsuff
- [(V16SI "d") (V16SF "ss") (V8DI "q") (V8DF "sd")])
+ [(V64QI "b") (V32QI "b") (V16QI "b")
+ (V32HI "w") (V16HI "w") (V8HI "w")
+ (V16SI "d") (V8SI "d") (V4SI "d")
+ (V8DI "q") (V4DI "q") (V2DI "q")
+ (V16SF "ss") (V8SF "ss") (V4SF "ss")
+ (V8DF "sd") (V4DF "sd") (V2DF "sd")])
;; Include define_subst patterns for instructions with mask
(include "subst.md")
@@ -669,12 +808,10 @@
case 2:
/* There is no evex-encoded vmov* for sizes smaller than 64-bytes
in avx512f, so we need to use workarounds, to access sse registers
- 16-31, which are evex-only. */
- if (TARGET_AVX512F && <MODE_SIZE> < 64
- && ((REG_P (operands[0])
- && EXT_REX_SSE_REGNO_P (REGNO (operands[0])))
- || (REG_P (operands[1])
- && EXT_REX_SSE_REGNO_P (REGNO (operands[1])))))
+ 16-31, which are evex-only. In avx512vl we don't need workarounds. */
+ if (TARGET_AVX512F && GET_MODE_SIZE (<MODE>mode) < 64 && !TARGET_AVX512VL
+ && ((REG_P (operands[0]) && EXT_REX_SSE_REGNO_P (REGNO (operands[0])))
+ || (REG_P (operands[1]) && EXT_REX_SSE_REGNO_P (REGNO (operands[1])))))
{
if (memory_operand (operands[0], <MODE>mode))
{
@@ -738,9 +875,11 @@
if (TARGET_AVX
&& (misaligned_operand (operands[0], <MODE>mode)
|| misaligned_operand (operands[1], <MODE>mode)))
- return "vmovdqu\t{%1, %0|%0, %1}";
+ return TARGET_AVX512VL ? "vmovdqu64\t{%1, %0|%0, %1}"
+ : "vmovdqu\t{%1, %0|%0, %1}";
else
- return "%vmovdqa\t{%1, %0|%0, %1}";
+ return TARGET_AVX512VL ? "vmovdqa64\t{%1, %0|%0, %1}"
+ : "%vmovdqa\t{%1, %0|%0, %1}";
case MODE_XI:
if (misaligned_operand (operands[0], <MODE>mode)
|| misaligned_operand (operands[1], <MODE>mode))
@@ -774,25 +913,37 @@
]
(const_string "<sseinsnmode>")))])
-(define_insn "avx512f_load<mode>_mask"
- [(set (match_operand:VI48F_512 0 "register_operand" "=v,v")
- (vec_merge:VI48F_512
- (match_operand:VI48F_512 1 "nonimmediate_operand" "v,m")
- (match_operand:VI48F_512 2 "vector_move_operand" "0C,0C")
+(define_insn "<avx512>_load<mode>_mask"
+ [(set (match_operand:V_AVX512VL 0 "register_operand" "=v,v")
+ (vec_merge:V_AVX512VL
+ (match_operand:V_AVX512VL 1 "nonimmediate_operand" "v,m")
+ (match_operand:V_AVX512VL 2 "vector_move_operand" "0C,0C")
(match_operand:<avx512fmaskmode> 3 "register_operand" "Yk,Yk")))]
"TARGET_AVX512F"
{
switch (MODE_<sseinsnmode>)
{
case MODE_V8DF:
+ case MODE_V4DF:
+ case MODE_V2DF:
case MODE_V16SF:
+ case MODE_V8SF:
+ case MODE_V4SF:
if (misaligned_operand (operands[1], <MODE>mode))
return "vmovu<ssemodesuffix>\t{%1, %0%{%3%}%N2|%0%{%3%}%N2, %1}";
return "vmova<ssemodesuffix>\t{%1, %0%{%3%}%N2|%0%{%3%}%N2, %1}";
default:
- if (misaligned_operand (operands[1], <MODE>mode))
+ /* There is no vmovdqa8/16 use vmovdqu8/16 instead. */
+ if (<MODE>mode == V64QImode
+ || <MODE>mode == V32QImode
+ || <MODE>mode == V16QImode
+ || <MODE>mode == V32HImode
+ || <MODE>mode == V16HImode
+ || <MODE>mode == V8HImode
+ || misaligned_operand (operands[1], <MODE>mode))
return "vmovdqu<ssescalarsize>\t{%1, %0%{%3%}%N2|%0%{%3%}%N2, %1}";
- return "vmovdqa<ssescalarsize>\t{%1, %0%{%3%}%N2|%0%{%3%}%N2, %1}";
+ else
+ return "vmovdqa<ssescalarsize>\t{%1, %0%{%3%}%N2|%0%{%3%}%N2, %1}";
}
}
[(set_attr "type" "ssemov")
@@ -800,11 +951,11 @@
(set_attr "memory" "none,load")
(set_attr "mode" "<sseinsnmode>")])
-(define_insn "avx512f_blendm<mode>"
- [(set (match_operand:VI48F_512 0 "register_operand" "=v")
- (vec_merge:VI48F_512
- (match_operand:VI48F_512 2 "nonimmediate_operand" "vm")
- (match_operand:VI48F_512 1 "register_operand" "v")
+(define_insn "<avx512>_blendm<mode>"
+ [(set (match_operand:V_AVX512VL 0 "register_operand" "=v")
+ (vec_merge:V_AVX512VL
+ (match_operand:V_AVX512VL 2 "nonimmediate_operand" "vm")
+ (match_operand:V_AVX512VL 1 "register_operand" "v")
(match_operand:<avx512fmaskmode> 3 "register_operand" "Yk")))]
"TARGET_AVX512F"
"v<sseintprefix>blendm<ssemodesuffix>\t{%2, %1, %0%{%3%}|%0%{%3%}, %1, %2}"
@@ -812,10 +963,10 @@
(set_attr "prefix" "evex")
(set_attr "mode" "<sseinsnmode>")])
-(define_insn "avx512f_store<mode>_mask"
- [(set (match_operand:VI48F_512 0 "memory_operand" "=m")
- (vec_merge:VI48F_512
- (match_operand:VI48F_512 1 "register_operand" "v")
+(define_insn "<avx512>_store<mode>_mask"
+ [(set (match_operand:V_AVX512VL 0 "memory_operand" "=m")
+ (vec_merge:V_AVX512VL
+ (match_operand:V_AVX512VL 1 "register_operand" "v")
(match_dup 0)
(match_operand:<avx512fmaskmode> 2 "register_operand" "Yk")))]
"TARGET_AVX512F"
@@ -823,10 +974,23 @@
switch (MODE_<sseinsnmode>)
{
case MODE_V8DF:
+ case MODE_V4DF:
+ case MODE_V2DF:
case MODE_V16SF:
+ case MODE_V8SF:
+ case MODE_V4SF:
return "vmova<ssemodesuffix>\t{%1, %0%{%2%}|%0%{%2%}, %1}";
default:
- return "vmovdqa<ssescalarsize>\t{%1, %0%{%2%}|%0%{%2%}, %1}";
+ /* There is no vmovdqa8/16 use vmovdqu8/16 instead. */
+ if (<MODE>mode == V64QImode
+ || <MODE>mode == V32QImode
+ || <MODE>mode == V16QImode
+ || <MODE>mode == V32HImode
+ || <MODE>mode == V16HImode
+ || <MODE>mode == V8HImode)
+ return "vmovdqu<ssescalarsize>\t{%1, %0%{%2%}|%0%{%2%}, %1}";
+ else
+ return "vmovdqa<ssescalarsize>\t{%1, %0%{%2%}|%0%{%2%}, %1}";
}
}
[(set_attr "type" "ssemov")
@@ -1009,11 +1173,11 @@
]
(const_string "<MODE>")))])
-(define_insn "avx512f_storeu<ssemodesuffix>512_mask"
- [(set (match_operand:VF_512 0 "memory_operand" "=m")
- (vec_merge:VF_512
- (unspec:VF_512
- [(match_operand:VF_512 1 "register_operand" "v")]
+(define_insn "<avx512>_storeu<ssemodesuffix><avxsizesuffix>_mask"
+ [(set (match_operand:VF_AVX512VL 0 "memory_operand" "=m")
+ (vec_merge:VF_AVX512VL
+ (unspec:VF_AVX512VL
+ [(match_operand:VF_AVX512VL 1 "register_operand" "v")]
UNSPEC_STOREU)
(match_dup 0)
(match_operand:<avx512fmaskmode> 2 "register_operand" "Yk")))]
@@ -1022,6 +1186,8 @@
switch (get_attr_mode (insn))
{
case MODE_V16SF:
+ case MODE_V8SF:
+ case MODE_V4SF:
return "vmovups\t{%1, %0%{%2%}|%0%{%2%}, %1}";
default:
return "vmovu<ssemodesuffix>\t{%1, %0%{%2%}|%0%{%2%}, %1}";
@@ -1067,16 +1233,20 @@
{
switch (get_attr_mode (insn))
{
+ case MODE_V16SF:
case MODE_V8SF:
case MODE_V4SF:
return "%vmovups\t{%1, %0|%0, %1}";
- case MODE_XI:
- if (<MODE>mode == V8DImode)
- return "vmovdqu64\t{%1, %0<mask_operand2>|%0<mask_operand2>, %1}";
- else
- return "vmovdqu32\t{%1, %0<mask_operand2>|%0<mask_operand2>, %1}";
default:
- return "%vmovdqu\t{%1, %0|%0, %1}";
+ switch (<MODE>mode)
+ {
+ case V32QImode:
+ case V16QImode:
+ if (!(TARGET_AVX512VL && TARGET_AVX512BW))
+ return "%vmovdqu\t{%1, %0|%0, %1}";
+ default:
+ return "vmovdqu<ssescalarsize>\t{%1, %0<mask_operand2>|%0<mask_operand2>, %1}";
+ }
}
}
[(set_attr "type" "ssemov")
@@ -1112,13 +1282,16 @@
case MODE_V8SF:
case MODE_V4SF:
return "%vmovups\t{%1, %0|%0, %1}";
- case MODE_XI:
- if (<MODE>mode == V8DImode)
- return "vmovdqu64\t{%1, %0|%0, %1}";
- else
- return "vmovdqu32\t{%1, %0|%0, %1}";
default:
- return "%vmovdqu\t{%1, %0|%0, %1}";
+ switch (<MODE>mode)
+ {
+ case V32QImode:
+ case V16QImode:
+ if (!(TARGET_AVX512VL && TARGET_AVX512BW))
+ return "%vmovdqu\t{%1, %0|%0, %1}";
+ default:
+ return "vmovdqu<ssescalarsize>\t{%1, %0|%0, %1}";
+ }
}
}
[(set_attr "type" "ssemov")
@@ -1142,21 +1315,16 @@
]
(const_string "<sseinsnmode>")))])
-(define_insn "avx512f_storedqu<mode>_mask"
- [(set (match_operand:VI48_512 0 "memory_operand" "=m")
- (vec_merge:VI48_512
- (unspec:VI48_512
- [(match_operand:VI48_512 1 "register_operand" "v")]
+(define_insn "<avx512>_storedqu<mode>_mask"
+ [(set (match_operand:VI_AVX512VL 0 "memory_operand" "=m")
+ (vec_merge:VI_AVX512VL
+ (unspec:VI_AVX512VL
+ [(match_operand:VI_AVX512VL 1 "register_operand" "v")]
UNSPEC_STOREU)
(match_dup 0)
(match_operand:<avx512fmaskmode> 2 "register_operand" "Yk")))]
"TARGET_AVX512F"
-{
- if (<MODE>mode == V8DImode)
- return "vmovdqu64\t{%1, %0%{%2%}|%0%{%2%}, %1}";
- else
- return "vmovdqu32\t{%1, %0%{%2%}|%0%{%2%}, %1}";
-}
+ "vmovdqu<ssescalarsize>\t{%1, %0%{%2%}|%0%{%2%}, %1}"
[(set_attr "type" "ssemov")
(set_attr "movu" "1")
(set_attr "memory" "store")
@@ -1443,9 +1611,9 @@
(set_attr "mode" "SF")])
(define_insn "<mask_codefor>rcp14<mode><mask_name>"
- [(set (match_operand:VF_512 0 "register_operand" "=v")
- (unspec:VF_512
- [(match_operand:VF_512 1 "nonimmediate_operand" "vm")]
+ [(set (match_operand:VF_AVX512VL 0 "register_operand" "=v")
+ (unspec:VF_AVX512VL
+ [(match_operand:VF_AVX512VL 1 "nonimmediate_operand" "vm")]
UNSPEC_RCP14))]
"TARGET_AVX512F"
"vrcp14<ssemodesuffix>\t{%1, %0<mask_operand2>|%0<mask_operand2>, %1}"
@@ -1538,9 +1706,9 @@
(set_attr "mode" "<MODE>")])
(define_insn "<mask_codefor>rsqrt14<mode><mask_name>"
- [(set (match_operand:VF_512 0 "register_operand" "=v")
- (unspec:VF_512
- [(match_operand:VF_512 1 "nonimmediate_operand" "vm")]
+ [(set (match_operand:VF_AVX512VL 0 "register_operand" "=v")
+ (unspec:VF_AVX512VL
+ [(match_operand:VF_AVX512VL 1 "nonimmediate_operand" "vm")]
UNSPEC_RSQRT14))]
"TARGET_AVX512F"
"vrsqrt14<ssemodesuffix>\t{%1, %0<mask_operand2>|%0<mask_operand2>, %1}"
@@ -2025,7 +2193,8 @@
[(V32QI "TARGET_AVX2") (V16HI "TARGET_AVX2")
(V8SI "TARGET_AVX2") (V4DI "TARGET_AVX2")
(V8SF "TARGET_AVX") (V4DF "TARGET_AVX")
- (V4SF "TARGET_SSE") (V16SI "TARGET_AVX512F")
+ (V4SF "TARGET_SSE") (V64QI "TARGET_AVX512BW")
+ (V32HI "TARGET_AVX512BW") (V16SI "TARGET_AVX512F")
(V8DI "TARGET_AVX512F") (V16SF "TARGET_AVX512F")
(V8DF "TARGET_AVX512F")])
@@ -2040,9 +2209,9 @@
})
(define_expand "reduc_<code>_<mode>"
- [(umaxmin:VI48_512
- (match_operand:VI48_512 0 "register_operand")
- (match_operand:VI48_512 1 "register_operand"))]
+ [(umaxmin:VI512_48F_12BW
+ (match_operand:VI512_48F_12BW 0 "register_operand")
+ (match_operand:VI512_48F_12BW 1 "register_operand"))]
"TARGET_AVX512F"
{
ix86_expand_reduc (gen_<code><mode>3, operands[0], operands[1]);
@@ -2069,6 +2238,34 @@
DONE;
})
+(define_insn "<mask_codefor>reducep<mode><mask_name>"
+ [(set (match_operand:VF_AVX512VL 0 "register_operand" "=v")
+ (unspec:VF_AVX512VL
+ [(match_operand:VF_AVX512VL 1 "nonimmediate_operand" "vm")
+ (match_operand:SI 2 "const_0_to_255_operand")]
+ UNSPEC_REDUCE))]
+ "TARGET_AVX512DQ"
+ "vreduce<ssemodesuffix>\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}"
+ [(set_attr "type" "sse")
+ (set_attr "prefix" "evex")
+ (set_attr "mode" "<MODE>")])
+
+(define_insn "reduces<mode>"
+ [(set (match_operand:VF_128 0 "register_operand" "=v")
+ (vec_merge:VF_128
+ (unspec:VF_128
+ [(match_operand:VF_128 1 "register_operand" "v")
+ (match_operand:VF_128 2 "nonimmediate_operand" "vm")
+ (match_operand:SI 3 "const_0_to_255_operand")]
+ UNSPEC_REDUCE)
+ (match_dup 1)
+ (const_int 1)))]
+ "TARGET_AVX512DQ"
+ "vreduce<ssescalarmodesuffix>\t{%3, %2, %1, %0|%0, %1, %2, %3}"
+ [(set_attr "type" "sse")
+ (set_attr "prefix" "evex")
+ (set_attr "mode" "<MODE>")])
+
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
;;
;; Parallel floating point comparisons
@@ -2156,14 +2353,21 @@
(set_attr "mode" "<ssescalarmode>")])
(define_mode_attr cmp_imm_predicate
- [(V16SF "const_0_to_31_operand") (V8DF "const_0_to_31_operand")
- (V16SI "const_0_to_7_operand") (V8DI "const_0_to_7_operand")])
-
-(define_insn "avx512f_cmp<mode>3<mask_scalar_merge_name><round_saeonly_name>"
+ [(V16SF "const_0_to_31_operand") (V8DF "const_0_to_31_operand")
+ (V16SI "const_0_to_7_operand") (V8DI "const_0_to_7_operand")
+ (V8SF "const_0_to_31_operand") (V4DF "const_0_to_31_operand")
+ (V8SI "const_0_to_7_operand") (V4DI "const_0_to_7_operand")
+ (V4SF "const_0_to_31_operand") (V2DF "const_0_to_31_operand")
+ (V4SI "const_0_to_7_operand") (V2DI "const_0_to_7_operand")
+ (V32HI "const_0_to_7_operand") (V64QI "const_0_to_7_operand")
+ (V16HI "const_0_to_7_operand") (V32QI "const_0_to_7_operand")
+ (V8HI "const_0_to_7_operand") (V16QI "const_0_to_7_operand")])
+
+(define_insn "<avx512>_cmp<mode>3<mask_scalar_merge_name><round_saeonly_name>"
[(set (match_operand:<avx512fmaskmode> 0 "register_operand" "=Yk")
(unspec:<avx512fmaskmode>
- [(match_operand:VI48F_512 1 "register_operand" "v")
- (match_operand:VI48F_512 2 "<round_saeonly_nimm_predicate>" "<round_saeonly_constraint>")
+ [(match_operand:V_AVX512VL 1 "register_operand" "v")
+ (match_operand:V_AVX512VL 2 "nonimmediate_operand" "<round_saeonly_constraint>")
(match_operand:SI 3 "<cmp_imm_predicate>" "n")]
UNSPEC_PCMP))]
"TARGET_AVX512F && <round_saeonly_mode512bit_condition>"
@@ -2173,11 +2377,11 @@
(set_attr "prefix" "evex")
(set_attr "mode" "<sseinsnmode>")])
-(define_insn "avx512f_ucmp<mode>3<mask_scalar_merge_name>"
+(define_insn "<avx512>_ucmp<mode>3<mask_scalar_merge_name>"
[(set (match_operand:<avx512fmaskmode> 0 "register_operand" "=Yk")
(unspec:<avx512fmaskmode>
- [(match_operand:VI48_512 1 "register_operand" "v")
- (match_operand:VI48_512 2 "nonimmediate_operand" "vm")
+ [(match_operand:VI_AVX512VL 1 "register_operand" "v")
+ (match_operand:VI_AVX512VL 2 "nonimmediate_operand" "vm")
(match_operand:SI 3 "const_0_to_7_operand" "n")]
UNSPEC_UNSIGNED_PCMP))]
"TARGET_AVX512F"
@@ -2330,20 +2534,24 @@
;;
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
-(define_insn "<sse>_andnot<mode>3"
+(define_insn "<sse>_andnot<mode>3<mask_name>"
[(set (match_operand:VF 0 "register_operand" "=x,v")
(and:VF
(not:VF
(match_operand:VF 1 "register_operand" "0,v"))
(match_operand:VF 2 "nonimmediate_operand" "xm,vm")))]
- "TARGET_SSE"
+ "TARGET_SSE
+ && (!<mask_applied>
+ || (TARGET_AVX512DQ && GET_MODE_SIZE (<MODE>mode) == 64)
+ || (TARGET_AVX512DQ && TARGET_AVX512VL))"
{
- static char buf[32];
+ static char buf[64];
const char *ops;
const char *suffix;
switch (get_attr_mode (insn))
{
+ case MODE_V16SF:
case MODE_V8SF:
case MODE_V4SF:
suffix = "ps";
@@ -2358,14 +2566,14 @@
ops = "andn%s\t{%%2, %%0|%%0, %%2}";
break;
case 1:
- ops = "vandn%s\t{%%2, %%1, %%0|%%0, %%1, %%2}";
+ ops = "vandn%s\t{%%2, %%1, %%0<mask_operand3_1>|%%0<mask_operand3_1>, %%1, %%2}";
break;
default:
gcc_unreachable ();
}
/* There is no vandnp[sd]. Use vpandnq. */
- if (<MODE_SIZE> == 64)
+ if (GET_MODE_SIZE (<MODE>mode) == 64 && !TARGET_AVX512DQ)
{
suffix = "q";
ops = "vpandn%s\t{%%2, %%1, %%0|%%0, %%1, %%2}";
@@ -2388,35 +2596,31 @@
]
(const_string "<MODE>")))])
-(define_expand "<code><mode>3"
- [(set (match_operand:VF_128_256 0 "register_operand")
- (any_logic:VF_128_256
- (match_operand:VF_128_256 1 "nonimmediate_operand")
- (match_operand:VF_128_256 2 "nonimmediate_operand")))]
- "TARGET_SSE"
- "ix86_fixup_binary_operands_no_copy (<CODE>, <MODE>mode, operands);")
-
-(define_expand "<code><mode>3"
- [(set (match_operand:VF_512 0 "register_operand")
- (fpint_logic:VF_512
- (match_operand:VF_512 1 "nonimmediate_operand")
- (match_operand:VF_512 2 "nonimmediate_operand")))]
- "TARGET_AVX512F"
+(define_expand "<code><mode>3<mask_name>"
+ [(set (match_operand:VF 0 "register_operand")
+ (any_logic:VF
+ (match_operand:VF 1 "nonimmediate_operand")
+ (match_operand:VF 2 "nonimmediate_operand")))]
+ "TARGET_SSE
+ && (!<mask_applied> || TARGET_AVX512VL || GET_MODE_SIZE (<MODE>mode) == 64)"
"ix86_fixup_binary_operands_no_copy (<CODE>, <MODE>mode, operands);")
-(define_insn "*<code><mode>3"
+(define_insn "*<code><mode>3<mask_name>"
[(set (match_operand:VF 0 "register_operand" "=x,v")
(any_logic:VF
(match_operand:VF 1 "nonimmediate_operand" "%0,v")
(match_operand:VF 2 "nonimmediate_operand" "xm,vm")))]
- "TARGET_SSE && ix86_binary_operator_ok (<CODE>, <MODE>mode, operands)"
+ "TARGET_SSE
+ && ix86_binary_operator_ok (<CODE>, <MODE>mode, operands)
+ && (!<mask_applied> || TARGET_AVX512VL || GET_MODE_SIZE (<MODE>mode) == 64)"
{
- static char buf[32];
+ static char buf[64];
const char *ops;
const char *suffix;
switch (get_attr_mode (insn))
{
+ case MODE_V16SF:
case MODE_V8SF:
case MODE_V4SF:
suffix = "ps";
@@ -2431,14 +2635,14 @@
ops = "<logic>%s\t{%%2, %%0|%%0, %%2}";
break;
case 1:
- ops = "v<logic>%s\t{%%2, %%1, %%0|%%0, %%1, %%2}";
+ ops = "v<logic>%s\t{%%2, %%1, %%0<mask_operand3_1>|%%0<mask_operand3_1>, %%1, %%2}";
break;
default:
gcc_unreachable ();
}
- /* There is no v<logic>p[sd]. Use vp<logic>q. */
- if (<MODE_SIZE> == 64)
+ /* There is no v<logic>p[sd] in avx512f. Use vp<logic>q. */
+ if (GET_MODE_SIZE (<MODE>mode) == 64 && !TARGET_AVX512DQ)
{
suffix = "q";
ops = "vp<logic>%s\t{%%2, %%1, %%0|%%0, %%1, %%2}";
@@ -2670,23 +2874,6 @@
]
(const_string "TI")))])
-;; There are no floating point xor for V16SF and V8DF in avx512f
-;; but we need them for negation. Instead we use int versions of
-;; xor. Maybe there could be a better way to do that.
-
-(define_mode_attr avx512flogicsuff
- [(V16SF "d") (V8DF "q")])
-
-(define_insn "avx512f_<logic><mode>"
- [(set (match_operand:VF_512 0 "register_operand" "=v")
- (fpint_logic:VF_512
- (match_operand:VF_512 1 "register_operand" "v")
- (match_operand:VF_512 2 "nonimmediate_operand" "vm")))]
- "TARGET_AVX512F"
- "vp<logic><avx512flogicsuff>\t{%2, %1, %0|%0, %1, %2}"
- [(set_attr "type" "sselog")
- (set_attr "prefix" "evex")])
-
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
;;
;; FMA floating point multiply/accumulate instructions. These include
@@ -2705,10 +2892,10 @@
(define_mode_iterator FMAMODEM
[(SF "TARGET_SSE_MATH && (TARGET_FMA || TARGET_FMA4 || TARGET_AVX512F)")
(DF "TARGET_SSE_MATH && (TARGET_FMA || TARGET_FMA4 || TARGET_AVX512F)")
- (V4SF "TARGET_FMA || TARGET_FMA4")
- (V2DF "TARGET_FMA || TARGET_FMA4")
- (V8SF "TARGET_FMA || TARGET_FMA4")
- (V4DF "TARGET_FMA || TARGET_FMA4")
+ (V4SF "TARGET_FMA || TARGET_FMA4 || (TARGET_AVX512F && TARGET_AVX512VL)")
+ (V2DF "TARGET_FMA || TARGET_FMA4 || (TARGET_AVX512F && TARGET_AVX512VL)")
+ (V8SF "TARGET_FMA || TARGET_FMA4 || (TARGET_AVX512F && TARGET_AVX512VL)")
+ (V4DF "TARGET_FMA || TARGET_FMA4 || (TARGET_AVX512F && TARGET_AVX512VL)")
(V16SF "TARGET_AVX512F")
(V8DF "TARGET_AVX512F")])
@@ -2742,14 +2929,14 @@
;; The builtins for intrinsics are not constrained by SSE math enabled.
(define_mode_iterator FMAMODE
- [(SF "TARGET_FMA || TARGET_FMA4 || TARGET_AVX512F")
- (DF "TARGET_FMA || TARGET_FMA4 || TARGET_AVX512F")
- (V4SF "TARGET_FMA || TARGET_FMA4")
- (V2DF "TARGET_FMA || TARGET_FMA4")
- (V8SF "TARGET_FMA || TARGET_FMA4")
- (V4DF "TARGET_FMA || TARGET_FMA4")
- (V16SF "TARGET_AVX512F")
- (V8DF "TARGET_AVX512F")])
+ [(SF "TARGET_FMA || TARGET_FMA4 || TARGET_AVX512F")
+ (DF "TARGET_FMA || TARGET_FMA4 || TARGET_AVX512F")
+ (V4SF "TARGET_FMA || TARGET_FMA4 || (TARGET_AVX512F && TARGET_AVX512VL)")
+ (V2DF "TARGET_FMA || TARGET_FMA4 || (TARGET_AVX512F && TARGET_AVX512VL)")
+ (V8SF "TARGET_FMA || TARGET_FMA4 || (TARGET_AVX512F && TARGET_AVX512VL)")
+ (V4DF "TARGET_FMA || TARGET_FMA4 || (TARGET_AVX512F && TARGET_AVX512VL)")
+ (V16SF "TARGET_AVX512F")
+ (V8DF "TARGET_AVX512F")])
(define_expand "fma4i_fmadd_<mode>"
[(set (match_operand:FMAMODE 0 "register_operand")
@@ -2758,13 +2945,13 @@
(match_operand:FMAMODE 2 "nonimmediate_operand")
(match_operand:FMAMODE 3 "nonimmediate_operand")))])
-(define_expand "avx512f_fmadd_<mode>_maskz<round_expand_name>"
- [(match_operand:VF_512 0 "register_operand")
- (match_operand:VF_512 1 "<round_expand_nimm_predicate>")
- (match_operand:VF_512 2 "<round_expand_nimm_predicate>")
- (match_operand:VF_512 3 "<round_expand_nimm_predicate>")
+(define_expand "<avx512>_fmadd_<mode>_maskz<round_expand_name>"
+ [(match_operand:VF_AVX512VL 0 "register_operand")
+ (match_operand:VF_AVX512VL 1 "<round_expand_nimm_predicate>")
+ (match_operand:VF_AVX512VL 2 "<round_expand_nimm_predicate>")
+ (match_operand:VF_AVX512VL 3 "<round_expand_nimm_predicate>")
(match_operand:<avx512fmaskmode> 4 "register_operand")]
- "TARGET_AVX512F"
+ "TARGET_AVX512F && <round_mode512bit_condition>"
{
emit_insn (gen_fma_fmadd_<mode>_maskz_1<round_expand_name> (
operands[0], operands[1], operands[2], operands[3],
@@ -2780,7 +2967,7 @@
(V8SF "TARGET_FMA || TARGET_FMA4")
(V4DF "TARGET_FMA || TARGET_FMA4")])
-(define_insn "<sd_mask_codefor>fma_fmadd_<mode><sd_maskz_name><round_name>"
+(define_insn "<sd_mask_codefor>fma_fmadd_noavx512_<mode><sd_maskz_name><round_name>"
[(set (match_operand:FMAMODE_NOVF512 0 "register_operand" "=v,v,v,x,x")
(fma:FMAMODE_NOVF512
(match_operand:FMAMODE_NOVF512 1 "<round_nimm_predicate>" "%0,0,v,x,x")
@@ -2798,11 +2985,11 @@
(set_attr "mode" "<MODE>")])
(define_insn "<sd_mask_codefor>fma_fmadd_<mode><sd_maskz_name><round_name>"
- [(set (match_operand:VF_512 0 "register_operand" "=v,v,v")
- (fma:VF_512
- (match_operand:VF_512 1 "<round_nimm_predicate>" "%0,0,v")
- (match_operand:VF_512 2 "<round_nimm_predicate>" "<round_constraint>,v,<round_constraint>")
- (match_operand:VF_512 3 "<round_nimm_predicate>" "v,<round_constraint>,0")))]
+ [(set (match_operand:VF_AVX512VL 0 "register_operand" "=v,v,v")
+ (fma:VF_AVX512VL
+ (match_operand:VF_AVX512VL 1 "<round_nimm_predicate>" "%0,0,v")
+ (match_operand:VF_AVX512VL 2 "<round_nimm_predicate>" "<round_constraint>,v,<round_constraint>")
+ (match_operand:VF_AVX512VL 3 "<round_nimm_predicate>" "v,<round_constraint>,0")))]
"<sd_mask_mode512bit_condition> && <round_mode512bit_condition>"
"@
vfmadd132<ssemodesuffix>\t{<round_sd_mask_op4>%2, %3, %0<sd_mask_op4>|%0<sd_mask_op4>, %3, %2<round_sd_mask_op4>}
@@ -2812,16 +2999,16 @@
(set_attr "type" "ssemuladd")
(set_attr "mode" "<MODE>")])
-(define_insn "avx512f_fmadd_<mode>_mask<round_name>"
- [(set (match_operand:VF_512 0 "register_operand" "=v,v")
- (vec_merge:VF_512
- (fma:VF_512
- (match_operand:VF_512 1 "register_operand" "0,0")
- (match_operand:VF_512 2 "<round_nimm_predicate>" "<round_constraint>,v")
- (match_operand:VF_512 3 "<round_nimm_predicate>" "v,<round_constraint>"))
+(define_insn "<avx512>_fmadd_<mode>_mask<round_name>"
+ [(set (match_operand:VF_AVX512VL 0 "register_operand" "=v,v")
+ (vec_merge:VF_AVX512VL
+ (fma:VF_AVX512VL
+ (match_operand:VF_AVX512VL 1 "register_operand" "0,0")
+ (match_operand:VF_AVX512VL 2 "nonimmediate_operand" "<round_constraint>,v")
+ (match_operand:VF_AVX512VL 3 "nonimmediate_operand" "v,<round_constraint>"))
(match_dup 1)
(match_operand:<avx512fmaskmode> 4 "register_operand" "Yk,Yk")))]
- "TARGET_AVX512F"
+ "TARGET_AVX512F && <round_mode512bit_condition>"
"@
vfmadd132<ssemodesuffix>\t{<round_op5>%2, %3, %0%{%4%}|%0%{%4%}, %3, %2<round_op5>}
vfmadd213<ssemodesuffix>\t{<round_op5>%3, %2, %0%{%4%}|%0%{%4%}, %2, %3<round_op5>}"
@@ -2829,13 +3016,13 @@
(set_attr "type" "ssemuladd")
(set_attr "mode" "<MODE>")])
-(define_insn "avx512f_fmadd_<mode>_mask3<round_name>"
- [(set (match_operand:VF_512 0 "register_operand" "=x")
- (vec_merge:VF_512
- (fma:VF_512
- (match_operand:VF_512 1 "register_operand" "x")
- (match_operand:VF_512 2 "<round_nimm_predicate>" "<round_constraint>")
- (match_operand:VF_512 3 "register_operand" "0"))
+(define_insn "<avx512>_fmadd_<mode>_mask3<round_name>"
+ [(set (match_operand:VF_AVX512VL 0 "register_operand" "=x")
+ (vec_merge:VF_AVX512VL
+ (fma:VF_AVX512VL
+ (match_operand:VF_AVX512VL 1 "register_operand" "x")
+ (match_operand:VF_AVX512VL 2 "nonimmediate_operand" "<round_constraint>")
+ (match_operand:VF_AVX512VL 3 "register_operand" "0"))
(match_dup 3)
(match_operand:<avx512fmaskmode> 4 "register_operand" "Yk")))]
"TARGET_AVX512F"
@@ -2844,7 +3031,7 @@
(set_attr "type" "ssemuladd")
(set_attr "mode" "<MODE>")])
-(define_insn "<sd_mask_codefor>fma_fmsub_<mode><sd_maskz_name><round_name>"
+(define_insn "<sd_mask_codefor>fma_fmsub_noavx512<mode><sd_maskz_name><round_name>"
[(set (match_operand:FMAMODE_NOVF512 0 "register_operand" "=v,v,v,x,x")
(fma:FMAMODE_NOVF512
(match_operand:FMAMODE_NOVF512 1 "<round_nimm_predicate>" "%0,0,v,x,x")
@@ -2863,12 +3050,12 @@
(set_attr "mode" "<MODE>")])
(define_insn "<sd_mask_codefor>fma_fmsub_<mode><sd_maskz_name><round_name>"
- [(set (match_operand:VF_512 0 "register_operand" "=v,v,v")
- (fma:VF_512
- (match_operand:VF_512 1 "<round_nimm_predicate>" "%0,0,v")
- (match_operand:VF_512 2 "<round_nimm_predicate>" "<round_constraint>,v,<round_constraint>")
- (neg:VF_512
- (match_operand:VF_512 3 "<round_nimm_predicate>" "v,<round_constraint>,0"))))]
+ [(set (match_operand:VF_AVX512VL 0 "register_operand" "=v,v,v")
+ (fma:VF_AVX512VL
+ (match_operand:VF_AVX512VL 1 "<round_nimm_predicate>" "%0,0,v")
+ (match_operand:VF_AVX512VL 2 "<round_nimm_predicate>" "<round_constraint>,v,<round_constraint>")
+ (neg:VF_AVX512VL
+ (match_operand:VF_AVX512VL 3 "<round_nimm_predicate>" "v,<round_constraint>,0"))))]
"<sd_mask_mode512bit_condition> && <round_mode512bit_condition>"
"@
vfmsub132<ssemodesuffix>\t{<round_sd_mask_op4>%2, %3, %0<sd_mask_op4>|%0<sd_mask_op4>, %3, %2<round_sd_mask_op4>}
@@ -2878,14 +3065,14 @@
(set_attr "type" "ssemuladd")
(set_attr "mode" "<MODE>")])
-(define_insn "avx512f_fmsub_<mode>_mask<round_name>"
- [(set (match_operand:VF_512 0 "register_operand" "=v,v")
- (vec_merge:VF_512
- (fma:VF_512
- (match_operand:VF_512 1 "register_operand" "0,0")
- (match_operand:VF_512 2 "<round_nimm_predicate>" "<round_constraint>,v")
- (neg:VF_512
- (match_operand:VF_512 3 "<round_nimm_predicate>" "v,<round_constraint>")))
+(define_insn "<avx512>_fmsub_<mode>_mask<round_name>"
+ [(set (match_operand:VF_AVX512VL 0 "register_operand" "=v,v")
+ (vec_merge:VF_AVX512VL
+ (fma:VF_AVX512VL
+ (match_operand:VF_AVX512VL 1 "register_operand" "0,0")
+ (match_operand:VF_AVX512VL 2 "nonimmediate_operand" "<round_constraint>,v")
+ (neg:VF_AVX512VL
+ (match_operand:VF_AVX512VL 3 "nonimmediate_operand" "v,<round_constraint>")))
(match_dup 1)
(match_operand:<avx512fmaskmode> 4 "register_operand" "Yk,Yk")))]
"TARGET_AVX512F"
@@ -2896,23 +3083,23 @@
(set_attr "type" "ssemuladd")
(set_attr "mode" "<MODE>")])
-(define_insn "avx512f_fmsub_<mode>_mask3<round_name>"
- [(set (match_operand:VF_512 0 "register_operand" "=v")
- (vec_merge:VF_512
- (fma:VF_512
- (match_operand:VF_512 1 "register_operand" "v")
- (match_operand:VF_512 2 "<round_nimm_predicate>" "<round_constraint>")
- (neg:VF_512
- (match_operand:VF_512 3 "register_operand" "0")))
+(define_insn "<avx512>_fmsub_<mode>_mask3<round_name>"
+ [(set (match_operand:VF_AVX512VL 0 "register_operand" "=v")
+ (vec_merge:VF_AVX512VL
+ (fma:VF_AVX512VL
+ (match_operand:VF_AVX512VL 1 "register_operand" "v")
+ (match_operand:VF_AVX512VL 2 "nonimmediate_operand" "<round_constraint>")
+ (neg:VF_AVX512VL
+ (match_operand:VF_AVX512VL 3 "register_operand" "0")))
(match_dup 3)
(match_operand:<avx512fmaskmode> 4 "register_operand" "Yk")))]
- "TARGET_AVX512F"
+ "TARGET_AVX512F && <round_mode512bit_condition>"
"vfmsub231<ssemodesuffix>\t{<round_op5>%2, %1, %0%{%4%}|%0%{%4%}, %1, %2<round_op5>}"
[(set_attr "isa" "fma_avx512f")
(set_attr "type" "ssemuladd")
(set_attr "mode" "<MODE>")])
-(define_insn "<sd_mask_codefor>fma_fnmadd_<mode><sd_maskz_name><round_name>"
+(define_insn "<sd_mask_codefor>fma_fnmadd_noavx512_<mode><sd_maskz_name><round_name>"
[(set (match_operand:FMAMODE_NOVF512 0 "register_operand" "=v,v,v,x,x")
(fma:FMAMODE_NOVF512
(neg:FMAMODE_NOVF512
@@ -2931,12 +3118,12 @@
(set_attr "mode" "<MODE>")])
(define_insn "<sd_mask_codefor>fma_fnmadd_<mode><sd_maskz_name><round_name>"
- [(set (match_operand:VF_512 0 "register_operand" "=v,v,v")
- (fma:VF_512
- (neg:VF_512
- (match_operand:VF_512 1 "<round_nimm_predicate>" "%0,0,v"))
- (match_operand:VF_512 2 "<round_nimm_predicate>" "<round_constraint>,v,<round_constraint>")
- (match_operand:VF_512 3 "<round_nimm_predicate>" "v,<round_constraint>,0")))]
+ [(set (match_operand:VF_AVX512VL 0 "register_operand" "=v,v,v")
+ (fma:VF_AVX512VL
+ (neg:VF_AVX512VL
+ (match_operand:VF_AVX512VL 1 "<round_nimm_predicate>" "%0,0,v"))
+ (match_operand:VF_AVX512VL 2 "<round_nimm_predicate>" "<round_constraint>,v,<round_constraint>")
+ (match_operand:VF_AVX512VL 3 "<round_nimm_predicate>" "v,<round_constraint>,0")))]
"<sd_mask_mode512bit_condition> && <round_mode512bit_condition>"
"@
vfnmadd132<ssemodesuffix>\t{<round_sd_mask_op4>%2, %3, %0<sd_mask_op4>|%0<sd_mask_op4>, %3, %2<round_sd_mask_op4>}
@@ -2946,17 +3133,17 @@
(set_attr "type" "ssemuladd")
(set_attr "mode" "<MODE>")])
-(define_insn "avx512f_fnmadd_<mode>_mask<round_name>"
- [(set (match_operand:VF_512 0 "register_operand" "=v,v")
- (vec_merge:VF_512
- (fma:VF_512
- (neg:VF_512
- (match_operand:VF_512 1 "register_operand" "0,0"))
- (match_operand:VF_512 2 "<round_nimm_predicate>" "<round_constraint>,v")
- (match_operand:VF_512 3 "<round_nimm_predicate>" "v,<round_constraint>"))
+(define_insn "<avx512>_fnmadd_<mode>_mask<round_name>"
+ [(set (match_operand:VF_AVX512VL 0 "register_operand" "=v,v")
+ (vec_merge:VF_AVX512VL
+ (fma:VF_AVX512VL
+ (neg:VF_AVX512VL
+ (match_operand:VF_AVX512VL 1 "register_operand" "0,0"))
+ (match_operand:VF_AVX512VL 2 "nonimmediate_operand" "<round_constraint>,v")
+ (match_operand:VF_AVX512VL 3 "nonimmediate_operand" "v,<round_constraint>"))
(match_dup 1)
(match_operand:<avx512fmaskmode> 4 "register_operand" "Yk,Yk")))]
- "TARGET_AVX512F"
+ "TARGET_AVX512F && <round_mode512bit_condition>"
"@
vfnmadd132<ssemodesuffix>\t{<round_op5>%2, %3, %0%{%4%}|%0%{%4%}, %3, %2<round_op5>}
vfnmadd213<ssemodesuffix>\t{<round_op5>%3, %2, %0%{%4%}|%0%{%4%}, %2, %3<round_op5>}"
@@ -2964,23 +3151,23 @@
(set_attr "type" "ssemuladd")
(set_attr "mode" "<MODE>")])
-(define_insn "avx512f_fnmadd_<mode>_mask3<round_name>"
- [(set (match_operand:VF_512 0 "register_operand" "=v")
- (vec_merge:VF_512
- (fma:VF_512
- (neg:VF_512
- (match_operand:VF_512 1 "register_operand" "v"))
- (match_operand:VF_512 2 "<round_nimm_predicate>" "<round_constraint>")
- (match_operand:VF_512 3 "register_operand" "0"))
+(define_insn "<avx512>_fnmadd_<mode>_mask3<round_name>"
+ [(set (match_operand:VF_AVX512VL 0 "register_operand" "=v")
+ (vec_merge:VF_AVX512VL
+ (fma:VF_AVX512VL
+ (neg:VF_AVX512VL
+ (match_operand:VF_AVX512VL 1 "register_operand" "v"))
+ (match_operand:VF_AVX512VL 2 "nonimmediate_operand" "<round_constraint>")
+ (match_operand:VF_AVX512VL 3 "register_operand" "0"))
(match_dup 3)
(match_operand:<avx512fmaskmode> 4 "register_operand" "Yk")))]
- "TARGET_AVX512F"
+ "TARGET_AVX512F && <round_mode512bit_condition>"
"vfnmadd231<ssemodesuffix>\t{<round_op5>%2, %1, %0%{%4%}|%0%{%4%}, %1, %2<round_op5>}"
[(set_attr "isa" "fma_avx512f")
(set_attr "type" "ssemuladd")
(set_attr "mode" "<MODE>")])
-(define_insn "<sd_mask_codefor>fma_fnmsub_<mode><sd_maskz_name><round_name>"
+(define_insn "<sd_mask_codefor>fma_fnmsub_noavx512_<mode><sd_maskz_name><round_name>"
[(set (match_operand:FMAMODE_NOVF512 0 "register_operand" "=v,v,v,x,x")
(fma:FMAMODE_NOVF512
(neg:FMAMODE_NOVF512
@@ -3000,13 +3187,13 @@
(set_attr "mode" "<MODE>")])
(define_insn "<sd_mask_codefor>fma_fnmsub_<mode><sd_maskz_name><round_name>"
- [(set (match_operand:VF_512 0 "register_operand" "=v,v,v")
- (fma:VF_512
- (neg:VF_512
- (match_operand:VF_512 1 "<round_nimm_predicate>" "%0,0,v"))
- (match_operand:VF_512 2 "<round_nimm_predicate>" "<round_constraint>,v,<round_constraint>")
- (neg:VF_512
- (match_operand:VF_512 3 "<round_nimm_predicate>" "v,<round_constraint>,0"))))]
+ [(set (match_operand:VF_AVX512VL 0 "register_operand" "=v,v,v")
+ (fma:VF_AVX512VL
+ (neg:VF_AVX512VL
+ (match_operand:VF_AVX512VL 1 "<round_nimm_predicate>" "%0,0,v"))
+ (match_operand:VF_AVX512VL 2 "<round_nimm_predicate>" "<round_constraint>,v,<round_constraint>")
+ (neg:VF_AVX512VL
+ (match_operand:VF_AVX512VL 3 "<round_nimm_predicate>" "v,<round_constraint>,0"))))]
"<sd_mask_mode512bit_condition> && <round_mode512bit_condition>"
"@
vfnmsub132<ssemodesuffix>\t{<round_sd_mask_op4>%2, %3, %0<sd_mask_op4>|%0<sd_mask_op4>, %3, %2<round_sd_mask_op4>}
@@ -3016,18 +3203,18 @@
(set_attr "type" "ssemuladd")
(set_attr "mode" "<MODE>")])
-(define_insn "avx512f_fnmsub_<mode>_mask<round_name>"
- [(set (match_operand:VF_512 0 "register_operand" "=v,v")
- (vec_merge:VF_512
- (fma:VF_512
- (neg:VF_512
- (match_operand:VF_512 1 "register_operand" "0,0"))
- (match_operand:VF_512 2 "<round_nimm_predicate>" "<round_constraint>,v")
- (neg:VF_512
- (match_operand:VF_512 3 "<round_nimm_predicate>" "v,<round_constraint>")))
+(define_insn "<avx512>_fnmsub_<mode>_mask<round_name>"
+ [(set (match_operand:VF_AVX512VL 0 "register_operand" "=v,v")
+ (vec_merge:VF_AVX512VL
+ (fma:VF_AVX512VL
+ (neg:VF_AVX512VL
+ (match_operand:VF_AVX512VL 1 "register_operand" "0,0"))
+ (match_operand:VF_AVX512VL 2 "nonimmediate_operand" "<round_constraint>,v")
+ (neg:VF_AVX512VL
+ (match_operand:VF_AVX512VL 3 "nonimmediate_operand" "v,<round_constraint>")))
(match_dup 1)
(match_operand:<avx512fmaskmode> 4 "register_operand" "Yk,Yk")))]
- "TARGET_AVX512F"
+ "TARGET_AVX512F && <round_mode512bit_condition>"
"@
vfnmsub132<ssemodesuffix>\t{<round_op5>%2, %3, %0%{%4%}|%0%{%4%}, %3, %2<round_op5>}
vfnmsub213<ssemodesuffix>\t{<round_op5>%3, %2, %0%{%4%}|%0%{%4%}, %2, %3<round_op5>}"
@@ -3035,15 +3222,15 @@
(set_attr "type" "ssemuladd")
(set_attr "mode" "<MODE>")])
-(define_insn "avx512f_fnmsub_<mode>_mask3<round_name>"
- [(set (match_operand:VF_512 0 "register_operand" "=v")
- (vec_merge:VF_512
- (fma:VF_512
- (neg:VF_512
- (match_operand:VF_512 1 "register_operand" "v"))
- (match_operand:VF_512 2 "<round_nimm_predicate>" "<round_constraint>")
- (neg:VF_512
- (match_operand:VF_512 3 "register_operand" "0")))
+(define_insn "<avx512>_fnmsub_<mode>_mask3<round_name>"
+ [(set (match_operand:VF_AVX512VL 0 "register_operand" "=v")
+ (vec_merge:VF_AVX512VL
+ (fma:VF_AVX512VL
+ (neg:VF_AVX512VL
+ (match_operand:VF_AVX512VL 1 "register_operand" "v"))
+ (match_operand:VF_AVX512VL 2 "nonimmediate_operand" "<round_constraint>")
+ (neg:VF_AVX512VL
+ (match_operand:VF_AVX512VL 3 "register_operand" "0")))
(match_dup 3)
(match_operand:<avx512fmaskmode> 4 "register_operand" "Yk")))]
"TARGET_AVX512F"
@@ -3072,11 +3259,11 @@
UNSPEC_FMADDSUB))]
"TARGET_FMA || TARGET_FMA4 || TARGET_AVX512F")
-(define_expand "avx512f_fmaddsub_<mode>_maskz<round_expand_name>"
- [(match_operand:VF_512 0 "register_operand")
- (match_operand:VF_512 1 "<round_expand_nimm_predicate>")
- (match_operand:VF_512 2 "<round_expand_nimm_predicate>")
- (match_operand:VF_512 3 "<round_expand_nimm_predicate>")
+(define_expand "<avx512>_fmaddsub_<mode>_maskz<round_expand_name>"
+ [(match_operand:VF_AVX512VL 0 "register_operand")
+ (match_operand:VF_AVX512VL 1 "<round_expand_nimm_predicate>")
+ (match_operand:VF_AVX512VL 2 "<round_expand_nimm_predicate>")
+ (match_operand:VF_AVX512VL 3 "<round_expand_nimm_predicate>")
(match_operand:<avx512fmaskmode> 4 "register_operand")]
"TARGET_AVX512F"
{
@@ -3086,7 +3273,7 @@
DONE;
})
-(define_insn "<sd_mask_codefor>fma_fmaddsub_<mode><sd_maskz_name><round_name>"
+(define_insn "<sd_mask_codefor>fma_fmaddsub_noavx512_<mode><sd_maskz_name><round_name>"
[(set (match_operand:VF_128_256 0 "register_operand" "=v,v,v,x,x")
(unspec:VF_128_256
[(match_operand:VF_128_256 1 "<round_nimm_predicate>" "%0,0,v,x,x")
@@ -3105,11 +3292,11 @@
(set_attr "mode" "<MODE>")])
(define_insn "<sd_mask_codefor>fma_fmaddsub_<mode><sd_maskz_name><round_name>"
- [(set (match_operand:VF_512 0 "register_operand" "=v,v,v")
- (unspec:VF_512
- [(match_operand:VF_512 1 "<round_nimm_predicate>" "%0,0,v")
- (match_operand:VF_512 2 "<round_nimm_predicate>" "<round_constraint>,v,<round_constraint>")
- (match_operand:VF_512 3 "<round_nimm_predicate>" "v,<round_constraint>,0")]
+ [(set (match_operand:VF_AVX512VL 0 "register_operand" "=v,v,v")
+ (unspec:VF_AVX512VL
+ [(match_operand:VF_AVX512VL 1 "<round_nimm_predicate>" "%0,0,v")
+ (match_operand:VF_AVX512VL 2 "<round_nimm_predicate>" "<round_constraint>,v,<round_constraint>")
+ (match_operand:VF_AVX512VL 3 "<round_nimm_predicate>" "v,<round_constraint>,0")]
UNSPEC_FMADDSUB))]
"TARGET_AVX512F && <sd_mask_mode512bit_condition> && <round_mode512bit_condition>"
"@
@@ -3120,13 +3307,13 @@
(set_attr "type" "ssemuladd")
(set_attr "mode" "<MODE>")])
-(define_insn "avx512f_fmaddsub_<mode>_mask<round_name>"
- [(set (match_operand:VF_512 0 "register_operand" "=v,v")
- (vec_merge:VF_512
- (unspec:VF_512
- [(match_operand:VF_512 1 "register_operand" "0,0")
- (match_operand:VF_512 2 "<round_nimm_predicate>" "<round_constraint>,v")
- (match_operand:VF_512 3 "<round_nimm_predicate>" "v,<round_constraint>")]
+(define_insn "<avx512>_fmaddsub_<mode>_mask<round_name>"
+ [(set (match_operand:VF_AVX512VL 0 "register_operand" "=v,v")
+ (vec_merge:VF_AVX512VL
+ (unspec:VF_AVX512VL
+ [(match_operand:VF_AVX512VL 1 "register_operand" "0,0")
+ (match_operand:VF_AVX512VL 2 "nonimmediate_operand" "<round_constraint>,v")
+ (match_operand:VF_AVX512VL 3 "nonimmediate_operand" "v,<round_constraint>")]
UNSPEC_FMADDSUB)
(match_dup 1)
(match_operand:<avx512fmaskmode> 4 "register_operand" "Yk,Yk")))]
@@ -3138,13 +3325,13 @@
(set_attr "type" "ssemuladd")
(set_attr "mode" "<MODE>")])
-(define_insn "avx512f_fmaddsub_<mode>_mask3<round_name>"
- [(set (match_operand:VF_512 0 "register_operand" "=v")
- (vec_merge:VF_512
- (unspec:VF_512
- [(match_operand:VF_512 1 "register_operand" "v")
- (match_operand:VF_512 2 "<round_nimm_predicate>" "<round_constraint>")
- (match_operand:VF_512 3 "register_operand" "0")]
+(define_insn "<avx512>_fmaddsub_<mode>_mask3<round_name>"
+ [(set (match_operand:VF_AVX512VL 0 "register_operand" "=v")
+ (vec_merge:VF_AVX512VL
+ (unspec:VF_AVX512VL
+ [(match_operand:VF_AVX512VL 1 "register_operand" "v")
+ (match_operand:VF_AVX512VL 2 "nonimmediate_operand" "<round_constraint>")
+ (match_operand:VF_AVX512VL 3 "register_operand" "0")]
UNSPEC_FMADDSUB)
(match_dup 3)
(match_operand:<avx512fmaskmode> 4 "register_operand" "Yk")))]
@@ -3154,7 +3341,7 @@
(set_attr "type" "ssemuladd")
(set_attr "mode" "<MODE>")])
-(define_insn "<sd_mask_codefor>fma_fmsubadd_<mode><sd_maskz_name><round_name>"
+(define_insn "<sd_mask_codefor>fma_fmsubadd_noavx512_<mode><sd_maskz_name><round_name>"
[(set (match_operand:VF_128_256 0 "register_operand" "=v,v,v,x,x")
(unspec:VF_128_256
[(match_operand:VF_128_256 1 "<round_nimm_predicate>" "%0,0,v,x,x")
@@ -3174,12 +3361,12 @@
(set_attr "mode" "<MODE>")])
(define_insn "<sd_mask_codefor>fma_fmsubadd_<mode><sd_maskz_name><round_name>"
- [(set (match_operand:VF_512 0 "register_operand" "=v,v,v")
- (unspec:VF_512
- [(match_operand:VF_512 1 "<round_nimm_predicate>" "%0,0,v")
- (match_operand:VF_512 2 "<round_nimm_predicate>" "<round_constraint>,v,<round_constraint>")
- (neg:VF_512
- (match_operand:VF_512 3 "<round_nimm_predicate>" "v,<round_constraint>,0"))]
+ [(set (match_operand:VF_AVX512VL 0 "register_operand" "=v,v,v")
+ (unspec:VF_AVX512VL
+ [(match_operand:VF_AVX512VL 1 "<round_nimm_predicate>" "%0,0,v")
+ (match_operand:VF_AVX512VL 2 "<round_nimm_predicate>" "<round_constraint>,v,<round_constraint>")
+ (neg:VF_AVX512VL
+ (match_operand:VF_AVX512VL 3 "<round_nimm_predicate>" "v,<round_constraint>,0"))]
UNSPEC_FMADDSUB))]
"TARGET_AVX512F && <sd_mask_mode512bit_condition> && <round_mode512bit_condition>"
"@
@@ -3190,14 +3377,14 @@
(set_attr "type" "ssemuladd")
(set_attr "mode" "<MODE>")])
-(define_insn "avx512f_fmsubadd_<mode>_mask<round_name>"
- [(set (match_operand:VF_512 0 "register_operand" "=v,v")
- (vec_merge:VF_512
- (unspec:VF_512
- [(match_operand:VF_512 1 "register_operand" "0,0")
- (match_operand:VF_512 2 "<round_nimm_predicate>" "<round_constraint>,v")
- (neg:VF_512
- (match_operand:VF_512 3 "<round_nimm_predicate>" "v,<round_constraint>"))]
+(define_insn "<avx512>_fmsubadd_<mode>_mask<round_name>"
+ [(set (match_operand:VF_AVX512VL 0 "register_operand" "=v,v")
+ (vec_merge:VF_AVX512VL
+ (unspec:VF_AVX512VL
+ [(match_operand:VF_AVX512VL 1 "register_operand" "0,0")
+ (match_operand:VF_AVX512VL 2 "nonimmediate_operand" "<round_constraint>,v")
+ (neg:VF_AVX512VL
+ (match_operand:VF_AVX512VL 3 "nonimmediate_operand" "v,<round_constraint>"))]
UNSPEC_FMADDSUB)
(match_dup 1)
(match_operand:<avx512fmaskmode> 4 "register_operand" "Yk,Yk")))]
@@ -3209,14 +3396,14 @@
(set_attr "type" "ssemuladd")
(set_attr "mode" "<MODE>")])
-(define_insn "avx512f_fmsubadd_<mode>_mask3<round_name>"
- [(set (match_operand:VF_512 0 "register_operand" "=v")
- (vec_merge:VF_512
- (unspec:VF_512
- [(match_operand:VF_512 1 "register_operand" "v")
- (match_operand:VF_512 2 "<round_nimm_predicate>" "<round_constraint>")
- (neg:VF_512
- (match_operand:VF_512 3 "register_operand" "0"))]
+(define_insn "<avx512>_fmsubadd_<mode>_mask3<round_name>"
+ [(set (match_operand:VF_AVX512VL 0 "register_operand" "=v")
+ (vec_merge:VF_AVX512VL
+ (unspec:VF_AVX512VL
+ [(match_operand:VF_AVX512VL 1 "register_operand" "v")
+ (match_operand:VF_AVX512VL 2 "nonimmediate_operand" "<round_constraint>")
+ (neg:VF_AVX512VL
+ (match_operand:VF_AVX512VL 3 "register_operand" "0"))]
UNSPEC_FMADDSUB)
(match_dup 3)
(match_operand:<avx512fmaskmode> 4 "register_operand" "Yk")))]
@@ -3599,15 +3786,15 @@
(set_attr "prefix" "maybe_vex")
(set_attr "mode" "<sseinsnmode>")])
-(define_insn "ufloatv16siv16sf2<mask_name><round_name>"
- [(set (match_operand:V16SF 0 "register_operand" "=v")
- (unsigned_float:V16SF
- (match_operand:V16SI 1 "<round_nimm_predicate>" "<round_constraint>")))]
+(define_insn "ufloat<sseintvecmodelower><mode>2<mask_name><round_name>"
+ [(set (match_operand:VF1_AVX512VL 0 "register_operand" "=v")
+ (unsigned_float:VF1_AVX512VL
+ (match_operand:<sseintvecmode> 1 "nonimmediate_operand" "<round_constraint>")))]
"TARGET_AVX512F"
"vcvtudq2ps\t{<round_mask_op2>%1, %0<mask_operand2>|%0<mask_operand2>, %1<round_mask_op2>}"
[(set_attr "type" "ssecvt")
(set_attr "prefix" "evex")
- (set_attr "mode" "V16SF")])
+ (set_attr "mode" "<MODE>")])
(define_expand "floatuns<sseintvecmodelower><mode>2"
[(match_operand:VF1 0 "register_operand")
@@ -3627,13 +3814,13 @@
(define_mode_attr sf2simodelower
[(V16SI "v16sf") (V8SI "v8sf") (V4SI "v4sf")])
-(define_insn "<sse2_avx_avx512f>_fix_notrunc<sf2simodelower><mode>"
+(define_insn "<sse2_avx_avx512f>_fix_notrunc<sf2simodelower><mode><mask_name>"
[(set (match_operand:VI4_AVX 0 "register_operand" "=v")
(unspec:VI4_AVX
[(match_operand:<ssePSmode> 1 "nonimmediate_operand" "vm")]
UNSPEC_FIX_NOTRUNC))]
- "TARGET_SSE2"
- "%vcvtps2dq\t{%1, %0|%0, %1}"
+ "TARGET_SSE2 && <mask_mode512bit_condition>"
+ "%vcvtps2dq\t{%1, %0<mask_operand2>|%0<mask_operand2>, %1}"
[(set_attr "type" "ssecvt")
(set (attr "prefix_data16")
(if_then_else
@@ -3654,16 +3841,62 @@
(set_attr "prefix" "evex")
(set_attr "mode" "XI")])
-(define_insn "<mask_codefor>avx512f_ufix_notruncv16sfv16si<mask_name><round_name>"
- [(set (match_operand:V16SI 0 "register_operand" "=v")
- (unspec:V16SI
- [(match_operand:V16SF 1 "<round_nimm_predicate>" "<round_constraint>")]
+(define_insn "<mask_codefor><avx512>_ufix_notrunc<sf2simodelower><mode><mask_name><round_name>"
+ [(set (match_operand:VI4_AVX512VL 0 "register_operand" "=v")
+ (unspec:VI4_AVX512VL
+ [(match_operand:<ssePSmode> 1 "nonimmediate_operand" "<round_constraint>")]
UNSPEC_UNSIGNED_FIX_NOTRUNC))]
"TARGET_AVX512F"
"vcvtps2udq\t{<round_mask_op2>%1, %0<mask_operand2>|%0<mask_operand2>, %1<round_mask_op2>}"
[(set_attr "type" "ssecvt")
(set_attr "prefix" "evex")
- (set_attr "mode" "XI")])
+ (set_attr "mode" "<sseinsnmode>")])
+
+(define_insn "<mask_codefor>avx512dq_cvtps2qq<mode><mask_name><round_name>"
+ [(set (match_operand:VI8_256_512 0 "register_operand" "=v")
+ (unspec:VI8_256_512 [(match_operand:<ssePSmode2> 1 "nonimmediate_operand" "<round_constraint>")]
+ UNSPEC_FIX_NOTRUNC))]
+ "TARGET_AVX512DQ && <round_mode512bit_condition>"
+ "vcvtps2qq\t{<round_mask_op2>%1, %0<mask_operand2>|%0<mask_operand2>, %1<round_mask_op2>}"
+ [(set_attr "type" "ssecvt")
+ (set_attr "prefix" "evex")
+ (set_attr "mode" "<sseinsnmode>")])
+
+(define_insn "<mask_codefor>avx512dq_cvtps2qqv2di<mask_name>"
+ [(set (match_operand:V2DI 0 "register_operand" "=v")
+ (unspec:V2DI
+ [(vec_select:V2SF
+ (match_operand:V4SF 1 "nonimmediate_operand" "vm")
+ (parallel [(const_int 0) (const_int 1)]))]
+ UNSPEC_FIX_NOTRUNC))]
+ "TARGET_AVX512DQ && TARGET_AVX512VL"
+ "vcvtps2qq\t{%1, %0<mask_operand2>|%0<mask_operand2>, %1}"
+ [(set_attr "type" "ssecvt")
+ (set_attr "prefix" "evex")
+ (set_attr "mode" "TI")])
+
+(define_insn "<mask_codefor>avx512dq_cvtps2uqq<mode><mask_name><round_name>"
+ [(set (match_operand:VI8_256_512 0 "register_operand" "=v")
+ (unspec:VI8_256_512 [(match_operand:<ssePSmode2> 1 "nonimmediate_operand" "<round_constraint>")]
+ UNSPEC_UNSIGNED_FIX_NOTRUNC))]
+ "TARGET_AVX512DQ && <round_mode512bit_condition>"
+ "vcvtps2uqq\t{<round_mask_op2>%1, %0<mask_operand2>|%0<mask_operand2>, %1<round_mask_op2>}"
+ [(set_attr "type" "ssecvt")
+ (set_attr "prefix" "evex")
+ (set_attr "mode" "<sseinsnmode>")])
+
+(define_insn "<mask_codefor>avx512dq_cvtps2uqqv2di<mask_name>"
+ [(set (match_operand:V2DI 0 "register_operand" "=v")
+ (unspec:V2DI
+ [(vec_select:V2SF
+ (match_operand:V4SF 1 "nonimmediate_operand" "vm")
+ (parallel [(const_int 0) (const_int 1)]))]
+ UNSPEC_UNSIGNED_FIX_NOTRUNC))]
+ "TARGET_AVX512DQ && TARGET_AVX512VL"
+ "vcvtps2uqq\t{%1, %0<mask_operand2>|%0<mask_operand2>, %1}"
+ [(set_attr "type" "ssecvt")
+ (set_attr "prefix" "evex")
+ (set_attr "mode" "TI")])
(define_insn "<fixsuffix>fix_truncv16sfv16si2<mask_name><round_saeonly_name>"
[(set (match_operand:V16SI 0 "register_operand" "=v")
@@ -3675,20 +3908,20 @@
(set_attr "prefix" "evex")
(set_attr "mode" "XI")])
-(define_insn "fix_truncv8sfv8si2"
- [(set (match_operand:V8SI 0 "register_operand" "=x")
- (fix:V8SI (match_operand:V8SF 1 "nonimmediate_operand" "xm")))]
- "TARGET_AVX"
- "vcvttps2dq\t{%1, %0|%0, %1}"
+(define_insn "fix_truncv8sfv8si2<mask_name>"
+ [(set (match_operand:V8SI 0 "register_operand" "=v")
+ (fix:V8SI (match_operand:V8SF 1 "nonimmediate_operand" "vm")))]
+ "TARGET_AVX && <mask_mode512bit_condition>"
+ "vcvttps2dq\t{%1, %0<mask_operand2>|%0<mask_operand2>, %1}"
[(set_attr "type" "ssecvt")
- (set_attr "prefix" "vex")
+ (set_attr "prefix" "<mask_prefix>")
(set_attr "mode" "OI")])
-(define_insn "fix_truncv4sfv4si2"
- [(set (match_operand:V4SI 0 "register_operand" "=x")
- (fix:V4SI (match_operand:V4SF 1 "nonimmediate_operand" "xm")))]
- "TARGET_SSE2"
- "%vcvttps2dq\t{%1, %0|%0, %1}"
+(define_insn "fix_truncv4sfv4si2<mask_name>"
+ [(set (match_operand:V4SI 0 "register_operand" "=v")
+ (fix:V4SI (match_operand:V4SF 1 "nonimmediate_operand" "vm")))]
+ "TARGET_SSE2 && <mask_mode512bit_condition>"
+ "%vcvttps2dq\t{%1, %0<mask_operand2>|%0<mask_operand2>, %1}"
[(set_attr "type" "ssecvt")
(set (attr "prefix_rep")
(if_then_else
@@ -3701,7 +3934,7 @@
(const_string "*")
(const_string "0")))
(set_attr "prefix_data16" "0")
- (set_attr "prefix" "maybe_vex")
+ (set_attr "prefix" "<mask_prefix2>")
(set_attr "mode" "TI")])
(define_expand "fixuns_trunc<mode><sseintvecmodelower>2"
@@ -4009,21 +4242,94 @@
(define_insn "float<si2dfmodelower><mode>2<mask_name>"
[(set (match_operand:VF2_512_256 0 "register_operand" "=v")
(float:VF2_512_256 (match_operand:<si2dfmode> 1 "nonimmediate_operand" "vm")))]
- "TARGET_AVX && <mask_mode512bit_condition>"
+ "<mask_mode512bit_condition>"
"vcvtdq2pd\t{%1, %0<mask_operand2>|%0<mask_operand2>, %1}"
[(set_attr "type" "ssecvt")
(set_attr "prefix" "maybe_vex")
(set_attr "mode" "<MODE>")])
-(define_insn "ufloatv8siv8df<mask_name>"
- [(set (match_operand:V8DF 0 "register_operand" "=v")
- (unsigned_float:V8DF
- (match_operand:V8SI 1 "nonimmediate_operand" "vm")))]
- "TARGET_AVX512F"
+(define_insn "<floatsuffix>float<sseintvecmodelower><mode>2<mask_name><round_name>"
+ [(set (match_operand:VF2_AVX512VL 0 "register_operand" "=v")
+ (any_float:VF2_AVX512VL
+ (match_operand:<sseintvecmode> 1 "nonimmediate_operand" "vm")))]
+ "TARGET_AVX512DQ"
+ "vcvt<floatsuffix>qq2pd\t{<round_mask_op2>%1, %0<mask_operand2>|%0<mask_operand2>, %1<round_mask_op2>}"
+ [(set_attr "type" "ssecvt")
+ (set_attr "prefix" "evex")
+ (set_attr "mode" "<MODE>")])
+
+;; For <floatsuffix>float<sselondveclower><mode> insn patterns
+(define_mode_attr qq2pssuff
+ [(V8SF "") (V4SF "{y}")])
+
+(define_mode_attr sselongvecmode
+ [(V8SF "V8DI") (V4SF "V4DI")])
+
+(define_mode_attr sselongvecmodelower
+ [(V8SF "v8di") (V4SF "v4di")])
+
+(define_mode_attr sseintvecmode3
+ [(V8SF "XI") (V4SF "OI")
+ (V8DF "OI") (V4DF "TI")])
+
+(define_insn "<floatsuffix>float<sselongvecmodelower><mode>2<mask_name><round_name>"
+ [(set (match_operand:VF1_128_256VL 0 "register_operand" "=v")
+ (any_float:VF1_128_256VL
+ (match_operand:<sselongvecmode> 1 "nonimmediate_operand" "<round_constraint>")))]
+ "TARGET_AVX512DQ && <round_modev8sf_condition>"
+ "vcvt<floatsuffix>qq2ps<qq2pssuff>\t{<round_mask_op2>%1, %0<mask_operand2>|%0<mask_operand2>, %1<round_mask_op2>}"
+ [(set_attr "type" "ssecvt")
+ (set_attr "prefix" "evex")
+ (set_attr "mode" "<MODE>")])
+
+(define_insn "*<floatsuffix>floatv2div2sf2"
+ [(set (match_operand:V4SF 0 "register_operand" "=v")
+ (vec_concat:V4SF
+ (any_float:V2SF (match_operand:V2DI 1 "nonimmediate_operand" "vm"))
+ (const_vector:V2SF [(const_int 0) (const_int 0)])))]
+ "TARGET_AVX512DQ && TARGET_AVX512VL"
+ "vcvt<floatsuffix>qq2ps{x}\t{%1, %0|%0, %1}"
+ [(set_attr "type" "ssecvt")
+ (set_attr "prefix" "evex")
+ (set_attr "mode" "V4SF")])
+
+(define_insn "<floatsuffix>floatv2div2sf2_mask"
+ [(set (match_operand:V4SF 0 "register_operand" "=v")
+ (vec_concat:V4SF
+ (vec_merge:V2SF
+ (any_float:V2SF (match_operand:V2DI 1 "nonimmediate_operand" "vm"))
+ (vec_select:V2SF
+ (match_operand:V4SF 2 "vector_move_operand" "0C")
+ (parallel [(const_int 0) (const_int 1)]))
+ (match_operand:QI 3 "register_operand" "Yk"))
+ (const_vector:V2SF [(const_int 0) (const_int 0)])))]
+ "TARGET_AVX512DQ && TARGET_AVX512VL"
+ "vcvt<floatsuffix>qq2ps{x}\t{%1, %0%{%3%}%N2|%0%{%3%}%N2, %1}"
+ [(set_attr "type" "ssecvt")
+ (set_attr "prefix" "evex")
+ (set_attr "mode" "V4SF")])
+
+(define_insn "ufloat<si2dfmodelower><mode>2<mask_name>"
+ [(set (match_operand:VF2_512_256VL 0 "register_operand" "=v")
+ (unsigned_float:VF2_512_256VL
+ (match_operand:<si2dfmode> 1 "nonimmediate_operand" "vm")))]
+ "TARGET_AVX512F"
+ "vcvtudq2pd\t{%1, %0<mask_operand2>|%0<mask_operand2>, %1}"
+ [(set_attr "type" "ssecvt")
+ (set_attr "prefix" "evex")
+ (set_attr "mode" "<MODE>")])
+
+(define_insn "ufloatv2siv2df2<mask_name>"
+ [(set (match_operand:V2DF 0 "register_operand" "=v")
+ (unsigned_float:V2DF
+ (vec_select:V2SI
+ (match_operand:V4SI 1 "nonimmediate_operand" "vm")
+ (parallel [(const_int 0) (const_int 1)]))))]
+ "TARGET_AVX512VL"
"vcvtudq2pd\t{%1, %0<mask_operand2>|%0<mask_operand2>, %1}"
[(set_attr "type" "ssecvt")
(set_attr "prefix" "evex")
- (set_attr "mode" "V8DF")])
+ (set_attr "mode" "V2DF")])
(define_insn "avx512f_cvtdq2pd512_2"
[(set (match_operand:V8DF 0 "register_operand" "=v")
@@ -4034,33 +4340,33 @@
(const_int 2) (const_int 3)
(const_int 4) (const_int 5)
(const_int 6) (const_int 7)]))))]
- "TARGET_AVX"
+ "TARGET_AVX512F"
"vcvtdq2pd\t{%t1, %0|%0, %t1}"
[(set_attr "type" "ssecvt")
(set_attr "prefix" "evex")
(set_attr "mode" "V8DF")])
(define_insn "avx_cvtdq2pd256_2"
- [(set (match_operand:V4DF 0 "register_operand" "=x")
+ [(set (match_operand:V4DF 0 "register_operand" "=v")
(float:V4DF
(vec_select:V4SI
- (match_operand:V8SI 1 "nonimmediate_operand" "xm")
+ (match_operand:V8SI 1 "nonimmediate_operand" "vm")
(parallel [(const_int 0) (const_int 1)
(const_int 2) (const_int 3)]))))]
"TARGET_AVX"
"vcvtdq2pd\t{%x1, %0|%0, %x1}"
[(set_attr "type" "ssecvt")
- (set_attr "prefix" "vex")
+ (set_attr "prefix" "maybe_evex")
(set_attr "mode" "V4DF")])
-(define_insn "sse2_cvtdq2pd"
- [(set (match_operand:V2DF 0 "register_operand" "=x")
+(define_insn "sse2_cvtdq2pd<mask_name>"
+ [(set (match_operand:V2DF 0 "register_operand" "=v")
(float:V2DF
(vec_select:V2SI
- (match_operand:V4SI 1 "nonimmediate_operand" "xm")
+ (match_operand:V4SI 1 "nonimmediate_operand" "vm")
(parallel [(const_int 0) (const_int 1)]))))]
- "TARGET_SSE2"
- "%vcvtdq2pd\t{%1, %0|%0, %q1}"
+ "TARGET_SSE2 && <mask_mode512bit_condition>"
+ "%vcvtdq2pd\t{%1, %0<mask_operand2>|%0<mask_operand2>, %q1}"
[(set_attr "type" "ssecvt")
(set_attr "prefix" "maybe_vex")
(set_attr "ssememalign" "64")
@@ -4077,14 +4383,14 @@
(set_attr "prefix" "evex")
(set_attr "mode" "OI")])
-(define_insn "avx_cvtpd2dq256"
- [(set (match_operand:V4SI 0 "register_operand" "=x")
- (unspec:V4SI [(match_operand:V4DF 1 "nonimmediate_operand" "xm")]
+(define_insn "avx_cvtpd2dq256<mask_name>"
+ [(set (match_operand:V4SI 0 "register_operand" "=v")
+ (unspec:V4SI [(match_operand:V4DF 1 "nonimmediate_operand" "vm")]
UNSPEC_FIX_NOTRUNC))]
- "TARGET_AVX"
- "vcvtpd2dq{y}\t{%1, %0|%0, %1}"
+ "TARGET_AVX && <mask_mode512bit_condition>"
+ "vcvtpd2dq{y}\t{%1, %0<mask_operand2>|%0<mask_operand2>, %1}"
[(set_attr "type" "ssecvt")
- (set_attr "prefix" "vex")
+ (set_attr "prefix" "<mask_prefix>")
(set_attr "mode" "OI")])
(define_expand "avx_cvtpd2dq256_2"
@@ -4109,25 +4415,16 @@
(set_attr "btver2_decode" "vector")
(set_attr "mode" "OI")])
-(define_expand "sse2_cvtpd2dq"
- [(set (match_operand:V4SI 0 "register_operand")
- (vec_concat:V4SI
- (unspec:V2SI [(match_operand:V2DF 1 "nonimmediate_operand")]
- UNSPEC_FIX_NOTRUNC)
- (match_dup 2)))]
- "TARGET_SSE2"
- "operands[2] = CONST0_RTX (V2SImode);")
-
-(define_insn "*sse2_cvtpd2dq"
- [(set (match_operand:V4SI 0 "register_operand" "=x")
+(define_insn "sse2_cvtpd2dq<mask_name>"
+ [(set (match_operand:V4SI 0 "register_operand" "=v")
(vec_concat:V4SI
- (unspec:V2SI [(match_operand:V2DF 1 "nonimmediate_operand" "xm")]
+ (unspec:V2SI [(match_operand:V2DF 1 "nonimmediate_operand" "vm")]
UNSPEC_FIX_NOTRUNC)
- (match_operand:V2SI 2 "const0_operand")))]
- "TARGET_SSE2"
+ (const_vector:V2SI [(const_int 0) (const_int 0)])))]
+ "TARGET_SSE2 && <mask_mode512bit_condition>"
{
if (TARGET_AVX)
- return "vcvtpd2dq{x}\t{%1, %0|%0, %1}";
+ return "vcvtpd2dq{x}\t{%1, %0<mask_operand2>|%0<mask_operand2>, %1}";
else
return "cvtpd2dq\t{%1, %0|%0, %1}";
}
@@ -4140,16 +4437,33 @@
(set_attr "athlon_decode" "vector")
(set_attr "bdver1_decode" "double")])
-(define_insn "avx512f_ufix_notruncv8dfv8si<mask_name><round_name>"
- [(set (match_operand:V8SI 0 "register_operand" "=v")
- (unspec:V8SI
- [(match_operand:V8DF 1 "<round_nimm_predicate>" "<round_constraint>")]
+;; For ufix_notrunc* insn patterns
+(define_mode_attr pd2udqsuff
+ [(V8DF "") (V4DF "{y}")])
+
+(define_insn "ufix_notrunc<mode><si2dfmodelower>2<mask_name><round_name>"
+ [(set (match_operand:<si2dfmode> 0 "register_operand" "=v")
+ (unspec:<si2dfmode>
+ [(match_operand:VF2_512_256VL 1 "nonimmediate_operand" "<round_constraint>")]
UNSPEC_UNSIGNED_FIX_NOTRUNC))]
"TARGET_AVX512F"
- "vcvtpd2udq\t{<round_mask_op2>%1, %0<mask_operand2>|%0<mask_operand2>, %1<round_mask_op2>}"
+ "vcvtpd2udq<pd2udqsuff>\t{<round_mask_op2>%1, %0<mask_operand2>|%0<mask_operand2>, %1<round_mask_op2>}"
[(set_attr "type" "ssecvt")
(set_attr "prefix" "evex")
- (set_attr "mode" "OI")])
+ (set_attr "mode" "<sseinsnmode>")])
+
+(define_insn "ufix_notruncv2dfv2si2<mask_name>"
+ [(set (match_operand:V4SI 0 "register_operand" "=v")
+ (vec_concat:V4SI
+ (unspec:V2SI
+ [(match_operand:V2DF 1 "nonimmediate_operand" "vm")]
+ UNSPEC_UNSIGNED_FIX_NOTRUNC)
+ (const_vector:V2SI [(const_int 0) (const_int 0)])))]
+ "TARGET_AVX512VL"
+ "vcvtpd2udq{x}\t{%1, %0<mask_operand2>|%0<mask_operand2>, %1}"
+ [(set_attr "type" "ssecvt")
+ (set_attr "prefix" "evex")
+ (set_attr "mode" "TI")])
(define_insn "<fixsuffix>fix_truncv8dfv8si2<mask_name><round_saeonly_name>"
[(set (match_operand:V8SI 0 "register_operand" "=v")
@@ -4161,15 +4475,90 @@
(set_attr "prefix" "evex")
(set_attr "mode" "OI")])
-(define_insn "fix_truncv4dfv4si2"
- [(set (match_operand:V4SI 0 "register_operand" "=x")
- (fix:V4SI (match_operand:V4DF 1 "nonimmediate_operand" "xm")))]
- "TARGET_AVX"
- "vcvttpd2dq{y}\t{%1, %0|%0, %1}"
+(define_insn "ufix_truncv2dfv2si2<mask_name>"
+ [(set (match_operand:V4SI 0 "register_operand" "=v")
+ (vec_concat:V4SI
+ (unsigned_fix:V2SI (match_operand:V2DF 1 "nonimmediate_operand" "vm"))
+ (const_vector:V2SI [(const_int 0) (const_int 0)])))]
+ "TARGET_AVX512VL"
+ "vcvttpd2udq{x}\t{%1, %0<mask_operand2>|%0<mask_operand2>, %1}"
[(set_attr "type" "ssecvt")
- (set_attr "prefix" "vex")
+ (set_attr "prefix" "evex")
+ (set_attr "mode" "TI")])
+
+(define_insn "<fixsuffix>fix_truncv4dfv4si2<mask_name>"
+ [(set (match_operand:V4SI 0 "register_operand" "=v")
+ (any_fix:V4SI (match_operand:V4DF 1 "nonimmediate_operand" "vm")))]
+ "(TARGET_AVX && !<ufix_bool>) || (TARGET_AVX512VL && TARGET_AVX512F)"
+ "vcvttpd2<fixsuffix>dq{y}\t{%1, %0<mask_operand2>|%0<mask_operand2>, %1}"
+ [(set_attr "type" "ssecvt")
+ (set_attr "prefix" "maybe_evex")
(set_attr "mode" "OI")])
+(define_insn "<fixsuffix>fix_trunc<mode><sseintvecmodelower>2<mask_name><round_saeonly_name>"
+ [(set (match_operand:<sseintvecmode> 0 "register_operand" "=v")
+ (any_fix:<sseintvecmode>
+ (match_operand:VF2_AVX512VL 1 "<round_saeonly_nimm_predicate>" "<round_saeonly_constraint>")))]
+ "TARGET_AVX512DQ && <round_saeonly_mode512bit_condition>"
+ "vcvttpd2<fixsuffix>qq\t{<round_saeonly_mask_op2>%1, %0<mask_operand2>|%0<mask_operand2>, %1<round_saeonly_mask_op2>}"
+ [(set_attr "type" "ssecvt")
+ (set_attr "prefix" "evex")
+ (set_attr "mode" "<sseintvecmode2>")])
+
+(define_insn "fix_notrunc<mode><sseintvecmodelower>2<mask_name><round_name>"
+ [(set (match_operand:<sseintvecmode> 0 "register_operand" "=v")
+ (unspec:<sseintvecmode>
+ [(match_operand:VF2_AVX512VL 1 "<round_nimm_predicate>" "<round_constraint>")]
+ UNSPEC_FIX_NOTRUNC))]
+ "TARGET_AVX512DQ && <round_mode512bit_condition>"
+ "vcvtpd2qq\t{<round_mask_op2>%1, %0<mask_operand2>|%0<mask_operand2>, %1<round_mask_op2>}"
+ [(set_attr "type" "ssecvt")
+ (set_attr "prefix" "evex")
+ (set_attr "mode" "<sseintvecmode2>")])
+
+(define_insn "ufix_notrunc<mode><sseintvecmodelower>2<mask_name><round_name>"
+ [(set (match_operand:<sseintvecmode> 0 "register_operand" "=v")
+ (unspec:<sseintvecmode>
+ [(match_operand:VF2_AVX512VL 1 "nonimmediate_operand" "<round_constraint>")]
+ UNSPEC_UNSIGNED_FIX_NOTRUNC))]
+ "TARGET_AVX512DQ && <round_mode512bit_condition>"
+ "vcvtpd2uqq\t{<round_mask_op2>%1, %0<mask_operand2>|%0<mask_operand2>, %1<round_mask_op2>}"
+ [(set_attr "type" "ssecvt")
+ (set_attr "prefix" "evex")
+ (set_attr "mode" "<sseintvecmode2>")])
+
+(define_insn "<fixsuffix>fix_trunc<mode><sselongvecmodelower>2<mask_name><round_saeonly_name>"
+ [(set (match_operand:<sselongvecmode> 0 "register_operand" "=v")
+ (any_fix:<sselongvecmode>
+ (match_operand:VF1_128_256VL 1 "<round_saeonly_nimm_predicate>" "<round_saeonly_constraint>")))]
+ "TARGET_AVX512DQ && <round_saeonly_modev8sf_condition>"
+ "vcvttps2<fixsuffix>qq\t{<round_saeonly_mask_op2>%1, %0<mask_operand2>|%0<mask_operand2>, %1<round_saeonly_mask_op2>}"
+ [(set_attr "type" "ssecvt")
+ (set_attr "prefix" "evex")
+ (set_attr "mode" "<sseintvecmode3>")])
+
+(define_insn "<fixsuffix>fix_truncv2sfv2di2<mask_name>"
+ [(set (match_operand:V2DI 0 "register_operand" "=v")
+ (any_fix:V2DI
+ (vec_select:V2SF
+ (match_operand:V4SF 1 "nonimmediate_operand" "vm")
+ (parallel [(const_int 0) (const_int 1)]))))]
+ "TARGET_AVX512DQ && TARGET_AVX512VL"
+ "vcvttps2<fixsuffix>qq\t{%1, %0<mask_operand2>|%0<mask_operand2>, %1}"
+ [(set_attr "type" "ssecvt")
+ (set_attr "prefix" "evex")
+ (set_attr "mode" "TI")])
+
+(define_insn "ufix_trunc<mode><sseintvecmodelower>2<mask_name>"
+ [(set (match_operand:<sseintvecmode> 0 "register_operand" "=v")
+ (unsigned_fix:<sseintvecmode>
+ (match_operand:VF1_128_256VL 1 "nonimmediate_operand" "vm")))]
+ "TARGET_AVX512VL"
+ "vcvttps2udq\t{%1, %0<mask_operand2>|%0<mask_operand2>, %1}"
+ [(set_attr "type" "ssecvt")
+ (set_attr "prefix" "evex")
+ (set_attr "mode" "<sseintvecmode2>")])
+
(define_expand "avx_cvttpd2dq256_2"
[(set (match_operand:V8SI 0 "register_operand")
(vec_concat:V8SI
@@ -4178,35 +4567,15 @@
"TARGET_AVX"
"operands[2] = CONST0_RTX (V4SImode);")
-(define_insn "*avx_cvttpd2dq256_2"
- [(set (match_operand:V8SI 0 "register_operand" "=x")
- (vec_concat:V8SI
- (fix:V4SI (match_operand:V4DF 1 "nonimmediate_operand" "xm"))
- (match_operand:V4SI 2 "const0_operand")))]
- "TARGET_AVX"
- "vcvttpd2dq{y}\t{%1, %x0|%x0, %1}"
- [(set_attr "type" "ssecvt")
- (set_attr "prefix" "vex")
- (set_attr "btver2_decode" "vector")
- (set_attr "mode" "OI")])
-
-(define_expand "sse2_cvttpd2dq"
- [(set (match_operand:V4SI 0 "register_operand")
+(define_insn "sse2_cvttpd2dq<mask_name>"
+ [(set (match_operand:V4SI 0 "register_operand" "=v")
(vec_concat:V4SI
- (fix:V2SI (match_operand:V2DF 1 "nonimmediate_operand"))
- (match_dup 2)))]
- "TARGET_SSE2"
- "operands[2] = CONST0_RTX (V2SImode);")
-
-(define_insn "*sse2_cvttpd2dq"
- [(set (match_operand:V4SI 0 "register_operand" "=x")
- (vec_concat:V4SI
- (fix:V2SI (match_operand:V2DF 1 "nonimmediate_operand" "xm"))
- (match_operand:V2SI 2 "const0_operand")))]
- "TARGET_SSE2"
+ (fix:V2SI (match_operand:V2DF 1 "nonimmediate_operand" "vm"))
+ (const_vector:V2SI [(const_int 0) (const_int 0)])))]
+ "TARGET_SSE2 && <mask_mode512bit_condition>"
{
if (TARGET_AVX)
- return "vcvttpd2dq{x}\t{%1, %0|%0, %1}";
+ return "vcvttpd2dq{x}\t{%1, %0<mask_operand2>|%0<mask_operand2>, %1}";
else
return "cvttpd2dq\t{%1, %0|%0, %1}";
}
@@ -4244,7 +4613,7 @@
(vec_merge:V2DF
(float_extend:V2DF
(vec_select:V2SF
- (match_operand:V4SF 2 "nonimmediate_operand" "x,m,<round_saeonly_constraint>")
+ (match_operand:V4SF 2 "<round_saeonly_nimm_predicate>" "x,m,<round_saeonly_constraint>")
(parallel [(const_int 0) (const_int 1)])))
(match_operand:V2DF 1 "register_operand" "0,0,v")
(const_int 1)))]
@@ -4272,14 +4641,14 @@
(set_attr "prefix" "evex")
(set_attr "mode" "V8SF")])
-(define_insn "avx_cvtpd2ps256"
- [(set (match_operand:V4SF 0 "register_operand" "=x")
+(define_insn "avx_cvtpd2ps256<mask_name>"
+ [(set (match_operand:V4SF 0 "register_operand" "=v")
(float_truncate:V4SF
- (match_operand:V4DF 1 "nonimmediate_operand" "xm")))]
- "TARGET_AVX"
- "vcvtpd2ps{y}\t{%1, %0|%0, %1}"
+ (match_operand:V4DF 1 "nonimmediate_operand" "vm")))]
+ "TARGET_AVX && <mask_mode512bit_condition>"
+ "vcvtpd2ps{y}\t{%1, %0<mask_operand2>|%0<mask_operand2>, %1}"
[(set_attr "type" "ssecvt")
- (set_attr "prefix" "vex")
+ (set_attr "prefix" "maybe_evex")
(set_attr "btver2_decode" "vector")
(set_attr "mode" "V4SF")])
@@ -4292,16 +4661,28 @@
"TARGET_SSE2"
"operands[2] = CONST0_RTX (V2SFmode);")
-(define_insn "*sse2_cvtpd2ps"
- [(set (match_operand:V4SF 0 "register_operand" "=x")
+(define_expand "sse2_cvtpd2ps_mask"
+ [(set (match_operand:V4SF 0 "register_operand")
+ (vec_merge:V4SF
+ (vec_concat:V4SF
+ (float_truncate:V2SF
+ (match_operand:V2DF 1 "nonimmediate_operand"))
+ (match_dup 4))
+ (match_operand:V4SF 2 "register_operand")
+ (match_operand:QI 3 "register_operand")))]
+ "TARGET_SSE2"
+ "operands[4] = CONST0_RTX (V2SFmode);")
+
+(define_insn "*sse2_cvtpd2ps<mask_name>"
+ [(set (match_operand:V4SF 0 "register_operand" "=v")
(vec_concat:V4SF
(float_truncate:V2SF
- (match_operand:V2DF 1 "nonimmediate_operand" "xm"))
+ (match_operand:V2DF 1 "nonimmediate_operand" "vm"))
(match_operand:V2SF 2 "const0_operand")))]
- "TARGET_SSE2"
+ "TARGET_SSE2 && <mask_mode512bit_condition>"
{
if (TARGET_AVX)
- return "vcvtpd2ps{x}\t{%1, %0|%0, %1}";
+ return "vcvtpd2ps{x}\t{%1, %0<mask_operand3>|%0<mask_operand3>, %1}";
else
return "cvtpd2ps\t{%1, %0|%0, %1}";
}
@@ -4355,14 +4736,44 @@
(set_attr "prefix" "evex")
(set_attr "mode" "V8DF")])
-(define_insn "sse2_cvtps2pd"
- [(set (match_operand:V2DF 0 "register_operand" "=x")
+(define_insn "<avx512>_cvt<ssemodesuffix>2mask<mode>"
+ [(set (match_operand:<avx512fmaskmode> 0 "register_operand" "=Yk")
+ (unspec:<avx512fmaskmode>
+ [(match_operand:VI_AVX512VL 1 "register_operand" "v")]
+ UNSPEC_CVTINT2MASK))]
+ "((TARGET_AVX512BW
+ && (<ssescalarmode>mode == QImode
+ || <ssescalarmode>mode == HImode))
+ || (TARGET_AVX512DQ
+ && (<ssescalarmode>mode == SImode
+ || <ssescalarmode>mode == DImode)))"
+ "vpmov<ssemodesuffix>2m\t{%1, %0|%0, %1}"
+ [(set_attr "prefix" "evex")
+ (set_attr "mode" "<sseinsnmode>")])
+
+(define_insn "<avx512>_cvtmask2<ssemodesuffix><mode>"
+ [(set (match_operand:VI_AVX512VL 0 "register_operand" "=v")
+ (unspec:VI_AVX512VL
+ [(match_operand:<avx512fmaskmode> 1 "register_operand" "Yk")]
+ UNSPEC_CVTINT2MASK))]
+ "((TARGET_AVX512BW
+ && (<ssescalarmode>mode == QImode
+ || <ssescalarmode>mode == HImode))
+ || (TARGET_AVX512DQ
+ && (<ssescalarmode>mode == SImode
+ || <ssescalarmode>mode == DImode)))"
+ "vpmovm2<ssemodesuffix>\t{%1, %0|%0, %1}"
+ [(set_attr "prefix" "evex")
+ (set_attr "mode" "<sseinsnmode>")])
+
+(define_insn "sse2_cvtps2pd<mask_name>"
+ [(set (match_operand:V2DF 0 "register_operand" "=v")
(float_extend:V2DF
(vec_select:V2SF
- (match_operand:V4SF 1 "nonimmediate_operand" "xm")
+ (match_operand:V4SF 1 "nonimmediate_operand" "vm")
(parallel [(const_int 0) (const_int 1)]))))]
- "TARGET_SSE2"
- "%vcvtps2pd\t{%1, %0|%0, %q1}"
+ "TARGET_SSE2 && <mask_mode512bit_condition>"
+ "%vcvtps2pd\t{%1, %0<mask_operand2>|%0<mask_operand2>, %q1}"
[(set_attr "type" "ssecvt")
(set_attr "amdfam10_decode" "direct")
(set_attr "athlon_decode" "double")
@@ -5048,18 +5459,18 @@
(set_attr "mode" "V16SF")])
;; Recall that the 256-bit unpck insns only shuffle within their lanes.
-(define_insn "avx_unpckhps256"
- [(set (match_operand:V8SF 0 "register_operand" "=x")
+(define_insn "avx_unpckhps256<mask_name>"
+ [(set (match_operand:V8SF 0 "register_operand" "=v")
(vec_select:V8SF
(vec_concat:V16SF
- (match_operand:V8SF 1 "register_operand" "x")
- (match_operand:V8SF 2 "nonimmediate_operand" "xm"))
+ (match_operand:V8SF 1 "register_operand" "v")
+ (match_operand:V8SF 2 "nonimmediate_operand" "vm"))
(parallel [(const_int 2) (const_int 10)
(const_int 3) (const_int 11)
(const_int 6) (const_int 14)
(const_int 7) (const_int 15)])))]
- "TARGET_AVX"
- "vunpckhps\t{%2, %1, %0|%0, %1, %2}"
+ "TARGET_AVX && <mask_mode512bit_condition>"
+ "vunpckhps\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}"
[(set_attr "type" "sselog")
(set_attr "prefix" "vex")
(set_attr "mode" "V8SF")])
@@ -5098,18 +5509,18 @@
operands[4] = gen_reg_rtx (V8SFmode);
})
-(define_insn "vec_interleave_highv4sf"
- [(set (match_operand:V4SF 0 "register_operand" "=x,x")
+(define_insn "vec_interleave_highv4sf<mask_name>"
+ [(set (match_operand:V4SF 0 "register_operand" "=x,v")
(vec_select:V4SF
(vec_concat:V8SF
- (match_operand:V4SF 1 "register_operand" "0,x")
- (match_operand:V4SF 2 "nonimmediate_operand" "xm,xm"))
+ (match_operand:V4SF 1 "register_operand" "0,v")
+ (match_operand:V4SF 2 "nonimmediate_operand" "xm,vm"))
(parallel [(const_int 2) (const_int 6)
(const_int 3) (const_int 7)])))]
- "TARGET_SSE"
+ "TARGET_SSE && <mask_mode512bit_condition>"
"@
unpckhps\t{%2, %0|%0, %2}
- vunpckhps\t{%2, %1, %0|%0, %1, %2}"
+ vunpckhps\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}"
[(set_attr "isa" "noavx,avx")
(set_attr "type" "sselog")
(set_attr "prefix" "orig,vex")
@@ -5136,22 +5547,39 @@
(set_attr "mode" "V16SF")])
;; Recall that the 256-bit unpck insns only shuffle within their lanes.
-(define_insn "avx_unpcklps256"
- [(set (match_operand:V8SF 0 "register_operand" "=x")
+(define_insn "avx_unpcklps256<mask_name>"
+ [(set (match_operand:V8SF 0 "register_operand" "=v")
(vec_select:V8SF
(vec_concat:V16SF
- (match_operand:V8SF 1 "register_operand" "x")
- (match_operand:V8SF 2 "nonimmediate_operand" "xm"))
+ (match_operand:V8SF 1 "register_operand" "v")
+ (match_operand:V8SF 2 "nonimmediate_operand" "vm"))
(parallel [(const_int 0) (const_int 8)
(const_int 1) (const_int 9)
(const_int 4) (const_int 12)
(const_int 5) (const_int 13)])))]
- "TARGET_AVX"
- "vunpcklps\t{%2, %1, %0|%0, %1, %2}"
+ "TARGET_AVX && <mask_mode512bit_condition>"
+ "vunpcklps\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}"
[(set_attr "type" "sselog")
(set_attr "prefix" "vex")
(set_attr "mode" "V8SF")])
+(define_insn "unpcklps128_mask"
+ [(set (match_operand:V4SF 0 "register_operand" "=v")
+ (vec_merge:V4SF
+ (vec_select:V4SF
+ (vec_concat:V8SF
+ (match_operand:V4SF 1 "register_operand" "v")
+ (match_operand:V4SF 2 "nonimmediate_operand" "vm"))
+ (parallel [(const_int 0) (const_int 4)
+ (const_int 1) (const_int 5)]))
+ (match_operand:V4SF 3 "vector_move_operand" "0C")
+ (match_operand:QI 4 "register_operand" "Yk")))]
+ "TARGET_AVX512F && TARGET_AVX512VL"
+ "vunpcklps\t{%2, %1, %0%{%4%}%N3|%0%{%4%}%N3, %1, %2}"
+ [(set_attr "type" "sselog")
+ (set_attr "prefix" "evex")
+ (set_attr "mode" "V4SF")])
+
(define_expand "vec_interleave_lowv8sf"
[(set (match_dup 3)
(vec_select:V8SF
@@ -5205,34 +5633,34 @@
;; These are modeled with the same vec_concat as the others so that we
;; capture users of shufps that can use the new instructions
-(define_insn "avx_movshdup256"
- [(set (match_operand:V8SF 0 "register_operand" "=x")
+(define_insn "avx_movshdup256<mask_name>"
+ [(set (match_operand:V8SF 0 "register_operand" "=v")
(vec_select:V8SF
(vec_concat:V16SF
- (match_operand:V8SF 1 "nonimmediate_operand" "xm")
+ (match_operand:V8SF 1 "nonimmediate_operand" "vm")
(match_dup 1))
(parallel [(const_int 1) (const_int 1)
(const_int 3) (const_int 3)
(const_int 5) (const_int 5)
(const_int 7) (const_int 7)])))]
- "TARGET_AVX"
- "vmovshdup\t{%1, %0|%0, %1}"
+ "TARGET_AVX && <mask_mode512bit_condition>"
+ "vmovshdup\t{%1, %0<mask_operand2>|%0<mask_operand2>, %1}"
[(set_attr "type" "sse")
(set_attr "prefix" "vex")
(set_attr "mode" "V8SF")])
-(define_insn "sse3_movshdup"
- [(set (match_operand:V4SF 0 "register_operand" "=x")
+(define_insn "sse3_movshdup<mask_name>"
+ [(set (match_operand:V4SF 0 "register_operand" "=v")
(vec_select:V4SF
(vec_concat:V8SF
- (match_operand:V4SF 1 "nonimmediate_operand" "xm")
+ (match_operand:V4SF 1 "nonimmediate_operand" "vm")
(match_dup 1))
(parallel [(const_int 1)
(const_int 1)
(const_int 7)
(const_int 7)])))]
- "TARGET_SSE3"
- "%vmovshdup\t{%1, %0|%0, %1}"
+ "TARGET_SSE3 && <mask_mode512bit_condition>"
+ "%vmovshdup\t{%1, %0<mask_operand2>|%0<mask_operand2>, %1}"
[(set_attr "type" "sse")
(set_attr "prefix_rep" "1")
(set_attr "prefix" "maybe_vex")
@@ -5258,34 +5686,34 @@
(set_attr "prefix" "evex")
(set_attr "mode" "V16SF")])
-(define_insn "avx_movsldup256"
- [(set (match_operand:V8SF 0 "register_operand" "=x")
+(define_insn "avx_movsldup256<mask_name>"
+ [(set (match_operand:V8SF 0 "register_operand" "=v")
(vec_select:V8SF
(vec_concat:V16SF
- (match_operand:V8SF 1 "nonimmediate_operand" "xm")
+ (match_operand:V8SF 1 "nonimmediate_operand" "vm")
(match_dup 1))
(parallel [(const_int 0) (const_int 0)
(const_int 2) (const_int 2)
(const_int 4) (const_int 4)
(const_int 6) (const_int 6)])))]
- "TARGET_AVX"
- "vmovsldup\t{%1, %0|%0, %1}"
+ "TARGET_AVX && <mask_mode512bit_condition>"
+ "vmovsldup\t{%1, %0<mask_operand2>|%0<mask_operand2>, %1}"
[(set_attr "type" "sse")
(set_attr "prefix" "vex")
(set_attr "mode" "V8SF")])
-(define_insn "sse3_movsldup"
- [(set (match_operand:V4SF 0 "register_operand" "=x")
+(define_insn "sse3_movsldup<mask_name>"
+ [(set (match_operand:V4SF 0 "register_operand" "=v")
(vec_select:V4SF
(vec_concat:V8SF
- (match_operand:V4SF 1 "nonimmediate_operand" "xm")
+ (match_operand:V4SF 1 "nonimmediate_operand" "vm")
(match_dup 1))
(parallel [(const_int 0)
(const_int 0)
(const_int 6)
(const_int 6)])))]
- "TARGET_SSE3"
- "%vmovsldup\t{%1, %0|%0, %1}"
+ "TARGET_SSE3 && <mask_mode512bit_condition>"
+ "%vmovsldup\t{%1, %0<mask_operand2>|%0<mask_operand2>, %1}"
[(set_attr "type" "sse")
(set_attr "prefix_rep" "1")
(set_attr "prefix" "maybe_vex")
@@ -5311,7 +5739,7 @@
(set_attr "prefix" "evex")
(set_attr "mode" "V16SF")])
-(define_expand "avx_shufps256"
+(define_expand "avx_shufps256<mask_expand4_name>"
[(match_operand:V8SF 0 "register_operand")
(match_operand:V8SF 1 "register_operand")
(match_operand:V8SF 2 "nonimmediate_operand")
@@ -5319,25 +5747,28 @@
"TARGET_AVX"
{
int mask = INTVAL (operands[3]);
- emit_insn (gen_avx_shufps256_1 (operands[0], operands[1], operands[2],
- GEN_INT ((mask >> 0) & 3),
- GEN_INT ((mask >> 2) & 3),
- GEN_INT (((mask >> 4) & 3) + 8),
- GEN_INT (((mask >> 6) & 3) + 8),
- GEN_INT (((mask >> 0) & 3) + 4),
- GEN_INT (((mask >> 2) & 3) + 4),
- GEN_INT (((mask >> 4) & 3) + 12),
- GEN_INT (((mask >> 6) & 3) + 12)));
+ emit_insn (gen_avx_shufps256_1<mask_expand4_name> (operands[0],
+ operands[1],
+ operands[2],
+ GEN_INT ((mask >> 0) & 3),
+ GEN_INT ((mask >> 2) & 3),
+ GEN_INT (((mask >> 4) & 3) + 8),
+ GEN_INT (((mask >> 6) & 3) + 8),
+ GEN_INT (((mask >> 0) & 3) + 4),
+ GEN_INT (((mask >> 2) & 3) + 4),
+ GEN_INT (((mask >> 4) & 3) + 12),
+ GEN_INT (((mask >> 6) & 3) + 12)
+ <mask_expand4_args>));
DONE;
})
;; One bit in mask selects 2 elements.
-(define_insn "avx_shufps256_1"
- [(set (match_operand:V8SF 0 "register_operand" "=x")
+(define_insn "avx_shufps256_1<mask_name>"
+ [(set (match_operand:V8SF 0 "register_operand" "=v")
(vec_select:V8SF
(vec_concat:V16SF
- (match_operand:V8SF 1 "register_operand" "x")
- (match_operand:V8SF 2 "nonimmediate_operand" "xm"))
+ (match_operand:V8SF 1 "register_operand" "v")
+ (match_operand:V8SF 2 "nonimmediate_operand" "vm"))
(parallel [(match_operand 3 "const_0_to_3_operand" )
(match_operand 4 "const_0_to_3_operand" )
(match_operand 5 "const_8_to_11_operand" )
@@ -5347,6 +5778,7 @@
(match_operand 9 "const_12_to_15_operand")
(match_operand 10 "const_12_to_15_operand")])))]
"TARGET_AVX
+ && <mask_mode512bit_condition>
&& (INTVAL (operands[3]) == (INTVAL (operands[7]) - 4)
&& INTVAL (operands[4]) == (INTVAL (operands[8]) - 4)
&& INTVAL (operands[5]) == (INTVAL (operands[9]) - 4)
@@ -5359,14 +5791,14 @@
mask |= (INTVAL (operands[6]) - 8) << 6;
operands[3] = GEN_INT (mask);
- return "vshufps\t{%3, %2, %1, %0|%0, %1, %2, %3}";
+ return "vshufps\t{%3, %2, %1, %0<mask_operand11>|%0<mask_operand11>, %1, %2, %3}";
}
[(set_attr "type" "sseshuf")
(set_attr "length_immediate" "1")
- (set_attr "prefix" "vex")
+ (set_attr "prefix" "<mask_prefix>")
(set_attr "mode" "V8SF")])
-(define_expand "sse_shufps"
+(define_expand "sse_shufps<mask_expand4_name>"
[(match_operand:V4SF 0 "register_operand")
(match_operand:V4SF 1 "register_operand")
(match_operand:V4SF 2 "nonimmediate_operand")
@@ -5374,14 +5806,46 @@
"TARGET_SSE"
{
int mask = INTVAL (operands[3]);
- emit_insn (gen_sse_shufps_v4sf (operands[0], operands[1], operands[2],
- GEN_INT ((mask >> 0) & 3),
- GEN_INT ((mask >> 2) & 3),
- GEN_INT (((mask >> 4) & 3) + 4),
- GEN_INT (((mask >> 6) & 3) + 4)));
+ emit_insn (gen_sse_shufps_v4sf<mask_expand4_name> (operands[0],
+ operands[1],
+ operands[2],
+ GEN_INT ((mask >> 0) & 3),
+ GEN_INT ((mask >> 2) & 3),
+ GEN_INT (((mask >> 4) & 3) + 4),
+ GEN_INT (((mask >> 6) & 3) + 4)
+ <mask_expand4_args>));
DONE;
})
+(define_insn "sse_shufps_v4sf_mask"
+ [(set (match_operand:V4SF 0 "register_operand" "=v")
+ (vec_merge:V4SF
+ (vec_select:V4SF
+ (vec_concat:V8SF
+ (match_operand:V4SF 1 "register_operand" "v")
+ (match_operand:V4SF 2 "nonimmediate_operand" "vm"))
+ (parallel [(match_operand 3 "const_0_to_3_operand")
+ (match_operand 4 "const_0_to_3_operand")
+ (match_operand 5 "const_4_to_7_operand")
+ (match_operand 6 "const_4_to_7_operand")]))
+ (match_operand:V4SF 7 "vector_move_operand" "0C")
+ (match_operand:QI 8 "register_operand" "Yk")))]
+ "TARGET_AVX512F && TARGET_AVX512VL"
+{
+ int mask = 0;
+ mask |= INTVAL (operands[3]) << 0;
+ mask |= INTVAL (operands[4]) << 2;
+ mask |= (INTVAL (operands[5]) - 4) << 4;
+ mask |= (INTVAL (operands[6]) - 4) << 6;
+ operands[3] = GEN_INT (mask);
+
+ return "vshufps\t{%3, %2, %1, %0%{%8%}%N7|%0%{%8%}%N7, %1, %2, %3}";
+}
+ [(set_attr "type" "sseshuf")
+ (set_attr "length_immediate" "1")
+ (set_attr "prefix" "evex")
+ (set_attr "mode" "V4SF")])
+
(define_insn "sse_shufps_<mode>"
[(set (match_operand:VI4F_128 0 "register_operand" "=x,x")
(vec_select:VI4F_128
@@ -5652,13 +6116,13 @@
;; see comment above inline_secondary_memory_needed function in i386.c
(define_insn "vec_set<mode>_0"
[(set (match_operand:VI4F_128 0 "nonimmediate_operand"
- "=x,x,x ,x,x,x,x ,x ,m ,m ,m")
+ "=v,v,x ,x,x,v,x ,x ,m ,m ,m")
(vec_merge:VI4F_128
(vec_duplicate:VI4F_128
(match_operand:<ssescalarmode> 2 "general_operand"
- " x,m,*r,m,x,x,*rm,*rm,!x,!*re,!*fF"))
+ " v,m,*r,m,x,v,*rm,*rm,!x,!*re,!*fF"))
(match_operand:VI4F_128 1 "vector_move_operand"
- " C,C,C ,C,0,x,0 ,x ,0 ,0 ,0")
+ " C,C,C ,C,0,v,0 ,x ,0 ,0 ,0")
(const_int 1)))]
"TARGET_SSE"
"@
@@ -5843,44 +6307,62 @@
operands[1] = adjust_address (operands[1], SFmode, INTVAL (operands[2]) * 4);
})
-(define_expand "avx512f_vextract<shuffletype>32x4_mask"
+(define_mode_attr extract_type
+ [(V16SF "avx512f") (V16SI "avx512f") (V8DF "avx512dq") (V8DI "avx512dq")])
+
+(define_mode_attr extract_suf
+ [(V16SF "32x4") (V16SI "32x4") (V8DF "64x2") (V8DI "64x2")])
+
+(define_mode_iterator AVX512_VEC
+ [(V8DF "TARGET_AVX512DQ") (V8DI "TARGET_AVX512DQ") V16SF V16SI])
+
+(define_expand "<extract_type>_vextract<shuffletype><extract_suf>_mask"
[(match_operand:<ssequartermode> 0 "nonimmediate_operand")
- (match_operand:V16FI 1 "register_operand")
+ (match_operand:AVX512_VEC 1 "register_operand")
(match_operand:SI 2 "const_0_to_3_operand")
(match_operand:<ssequartermode> 3 "nonimmediate_operand")
(match_operand:QI 4 "register_operand")]
"TARGET_AVX512F"
{
+ int mask;
+ mask = INTVAL (operands[2]);
+
if (MEM_P (operands[0]) && GET_CODE (operands[3]) == CONST_VECTOR)
operands[0] = force_reg (<ssequartermode>mode, operands[0]);
- switch (INTVAL (operands[2]))
- {
- case 0:
- emit_insn (gen_avx512f_vextract<shuffletype>32x4_1_mask (operands[0],
- operands[1], GEN_INT (0), GEN_INT (1), GEN_INT (2),
- GEN_INT (3), operands[3], operands[4]));
- break;
- case 1:
- emit_insn (gen_avx512f_vextract<shuffletype>32x4_1_mask (operands[0],
- operands[1], GEN_INT (4), GEN_INT (5), GEN_INT (6),
- GEN_INT (7), operands[3], operands[4]));
- break;
- case 2:
- emit_insn (gen_avx512f_vextract<shuffletype>32x4_1_mask (operands[0],
- operands[1], GEN_INT (8), GEN_INT (9), GEN_INT (10),
- GEN_INT (11), operands[3], operands[4]));
- break;
- case 3:
- emit_insn (gen_avx512f_vextract<shuffletype>32x4_1_mask (operands[0],
- operands[1], GEN_INT (12), GEN_INT (13), GEN_INT (14),
- GEN_INT (15), operands[3], operands[4]));
- break;
- default:
- gcc_unreachable ();
- }
+
+ if (<MODE>mode == V16SImode || <MODE>mode == V16SFmode)
+ emit_insn (gen_avx512f_vextract<shuffletype>32x4_1_mask (operands[0],
+ operands[1], GEN_INT (mask * 4), GEN_INT (mask * 4 + 1),
+ GEN_INT (mask * 4 + 2), GEN_INT (mask * 4 + 3), operands[3],
+ operands[4]));
+ else
+ emit_insn (gen_avx512dq_vextract<shuffletype>64x2_1_mask (operands[0],
+ operands[1], GEN_INT (mask * 2), GEN_INT (mask * 2 + 1), operands[3],
+ operands[4]));
DONE;
})
+(define_insn "avx512dq_vextract<shuffletype>64x2_1_maskm"
+ [(set (match_operand:<ssequartermode> 0 "memory_operand" "=m")
+ (vec_merge:<ssequartermode>
+ (vec_select:<ssequartermode>
+ (match_operand:V8FI 1 "register_operand" "v")
+ (parallel [(match_operand 2 "const_0_to_7_operand")
+ (match_operand 3 "const_0_to_7_operand")]))
+ (match_operand:<ssequartermode> 4 "memory_operand" "0")
+ (match_operand:QI 5 "register_operand" "k")))]
+ "TARGET_AVX512DQ && (INTVAL (operands[2]) = INTVAL (operands[3]) - 1)"
+{
+ operands[2] = GEN_INT ((INTVAL (operands[2])) >> 1);
+ return "vextract<shuffletype>64x2\t{%2, %1, %0%{%5%}|%0%{%5%}, %1, %2}";
+}
+ [(set_attr "type" "sselog")
+ (set_attr "prefix_extra" "1")
+ (set_attr "length_immediate" "1")
+ (set_attr "memory" "store")
+ (set_attr "prefix" "evex")
+ (set_attr "mode" "<sseinsnmode>")])
+
(define_insn "avx512f_vextract<shuffletype>32x4_1_maskm"
[(set (match_operand:<ssequartermode> 0 "memory_operand" "=m")
(vec_merge:<ssequartermode>
@@ -5906,6 +6388,27 @@
(set_attr "prefix" "evex")
(set_attr "mode" "<sseinsnmode>")])
+(define_insn "<mask_codefor>avx512dq_vextract<shuffletype>64x2_1<mask_name>"
+ [(set (match_operand:<ssequartermode> 0 "<store_mask_predicate>" "=<store_mask_constraint>")
+ (vec_select:<ssequartermode>
+ (match_operand:V8FI 1 "register_operand" "v")
+ (parallel [(match_operand 2 "const_0_to_7_operand")
+ (match_operand 3 "const_0_to_7_operand")])))]
+ "TARGET_AVX512DQ && (INTVAL (operands[2]) = INTVAL (operands[3]) - 1)"
+{
+ operands[2] = GEN_INT ((INTVAL (operands[2])) >> 1);
+ return "vextract<shuffletype>64x2\t{%2, %1, %0<mask_operand4>|%0<mask_operand4>, %1, %2}";
+}
+ [(set_attr "type" "sselog")
+ (set_attr "prefix_extra" "1")
+ (set_attr "length_immediate" "1")
+ (set (attr "memory")
+ (if_then_else (match_test "MEM_P (operands[0])")
+ (const_string "store")
+ (const_string "none")))
+ (set_attr "prefix" "evex")
+ (set_attr "mode" "<sseinsnmode>")])
+
(define_insn "<mask_codefor>avx512f_vextract<shuffletype>32x4_1<mask_name>"
[(set (match_operand:<ssequartermode> 0 "<store_mask_predicate>" "=<store_mask_constraint>")
(vec_select:<ssequartermode>
@@ -5931,9 +6434,18 @@
(set_attr "prefix" "evex")
(set_attr "mode" "<sseinsnmode>")])
-(define_expand "avx512f_vextract<shuffletype>64x4_mask"
+(define_mode_attr extract_type_2
+ [(V16SF "avx512dq") (V16SI "avx512dq") (V8DF "avx512f") (V8DI "avx512f")])
+
+(define_mode_attr extract_suf_2
+ [(V16SF "32x8") (V16SI "32x8") (V8DF "64x4") (V8DI "64x4")])
+
+(define_mode_iterator AVX512_VEC_2
+ [(V16SF "TARGET_AVX512DQ") (V16SI "TARGET_AVX512DQ") V8DF V8DI])
+
+(define_expand "<extract_type_2>_vextract<shuffletype><extract_suf_2>_mask"
[(match_operand:<ssehalfvecmode> 0 "nonimmediate_operand")
- (match_operand:V8FI 1 "register_operand")
+ (match_operand:AVX512_VEC_2 1 "register_operand")
(match_operand:SI 2 "const_0_to_1_operand")
(match_operand:<ssehalfvecmode> 3 "nonimmediate_operand")
(match_operand:QI 4 "register_operand")]
@@ -5989,7 +6501,7 @@
(match_operand:<ssehalfvecmode> 2 "memory_operand" "0")
(match_operand:QI 3 "register_operand" "Yk")))]
"TARGET_AVX512F"
-"vextract<shuffletype>64x4\t{$0x0, %1, %0%{%3%}|%0%{%3%}, %1, 0x0}"
+ "vextract<shuffletype>64x4\t{$0x0, %1, %0%{%3%}|%0%{%3%}, %1, 0x0}"
[(set_attr "type" "sselog")
(set_attr "prefix_extra" "1")
(set_attr "length_immediate" "1")
@@ -6055,6 +6567,81 @@
(set_attr "prefix" "evex")
(set_attr "mode" "<sseinsnmode>")])
+(define_insn "vec_extract_hi_<mode>_maskm"
+ [(set (match_operand:<ssehalfvecmode> 0 "memory_operand" "=m")
+ (vec_merge:<ssehalfvecmode>
+ (vec_select:<ssehalfvecmode>
+ (match_operand:V16FI 1 "register_operand" "v")
+ (parallel [(const_int 8) (const_int 9)
+ (const_int 10) (const_int 11)
+ (const_int 12) (const_int 13)
+ (const_int 14) (const_int 15)]))
+ (match_operand:<ssehalfvecmode> 2 "memory_operand" "0")
+ (match_operand:QI 3 "register_operand" "k")))]
+ "TARGET_AVX512DQ"
+ "vextract<shuffletype>32x8\t{$0x1, %1, %0%{%3%}|%0%{%3%}, %1, 0x1}"
+ [(set_attr "type" "sselog")
+ (set_attr "prefix_extra" "1")
+ (set_attr "length_immediate" "1")
+ (set (attr "memory")
+ (if_then_else (match_test "MEM_P (operands[0])")
+ (const_string "store")
+ (const_string "none")))
+ (set_attr "prefix" "evex")
+ (set_attr "mode" "<sseinsnmode>")])
+
+(define_insn "vec_extract_hi_<mode><mask_name>"
+ [(set (match_operand:<ssehalfvecmode> 0 "<store_mask_predicate>" "=<store_mask_constraint>,vm")
+ (vec_select:<ssehalfvecmode>
+ (match_operand:V16FI 1 "register_operand" "v,v")
+ (parallel [(const_int 8) (const_int 9)
+ (const_int 10) (const_int 11)
+ (const_int 12) (const_int 13)
+ (const_int 14) (const_int 15)])))]
+ "TARGET_AVX512F && (!<mask_applied> || TARGET_AVX512DQ)"
+ "@
+ vextract<shuffletype>32x8\t{$0x1, %1, %0<mask_operand2>|%0<mask_operand2>, %1, 0x1}
+ vextracti64x4\t{$0x1, %1, %0|%0, %1, 0x1}"
+ [(set_attr "type" "sselog")
+ (set_attr "prefix_extra" "1")
+ (set_attr "isa" "avx512dq,noavx512dq")
+ (set_attr "length_immediate" "1")
+ (set (attr "memory")
+ (if_then_else (match_test "MEM_P (operands[0])")
+ (const_string "store")
+ (const_string "none")))
+ (set_attr "prefix" "evex")
+ (set_attr "mode" "<sseinsnmode>")])
+
+(define_expand "avx512vl_vextractf128<mode>"
+ [(match_operand:<ssehalfvecmode> 0 "nonimmediate_operand")
+ (match_operand:VI48F_256 1 "register_operand")
+ (match_operand:SI 2 "const_0_to_1_operand")
+ (match_operand:<ssehalfvecmode> 3 "vector_move_operand")
+ (match_operand:QI 4 "register_operand")]
+ "TARGET_AVX512DQ && TARGET_AVX512VL"
+{
+ rtx (*insn)(rtx, rtx, rtx, rtx);
+
+ if (MEM_P (operands[0]) && GET_CODE (operands[3]) == CONST_VECTOR)
+ operands[0] = force_reg (<ssehalfvecmode>mode, operands[0]);
+
+ switch (INTVAL (operands[2]))
+ {
+ case 0:
+ insn = gen_vec_extract_lo_<mode>_mask;
+ break;
+ case 1:
+ insn = gen_vec_extract_hi_<mode>_mask;
+ break;
+ default:
+ gcc_unreachable ();
+ }
+
+ emit_insn (insn (operands[0], operands[1], operands[3], operands[4]));
+ DONE;
+})
+
(define_expand "avx_vextractf128<mode>"
[(match_operand:<ssehalfvecmode> 0 "nonimmediate_operand")
(match_operand:V_256 1 "register_operand")
@@ -6079,7 +6666,7 @@
DONE;
})
-(define_insn_and_split "vec_extract_lo_<mode>"
+(define_insn "vec_extract_lo_<mode><mask_name>"
[(set (match_operand:<ssehalfvecmode> 0 "nonimmediate_operand" "=v,m")
(vec_select:<ssehalfvecmode>
(match_operand:V16FI 1 "nonimmediate_operand" "vm,v")
@@ -6087,11 +6674,28 @@
(const_int 2) (const_int 3)
(const_int 4) (const_int 5)
(const_int 6) (const_int 7)])))]
- "TARGET_AVX512F && !(MEM_P (operands[0]) && MEM_P (operands[1]))"
- "#"
- "&& reload_completed"
- [(const_int 0)]
+ "TARGET_AVX512F
+ && <mask_mode512bit_condition>
+ && !(MEM_P (operands[0]) && MEM_P (operands[1]))"
{
+ if (<mask_applied>)
+ return "vextract<shuffletype>32x8\t{$0x0, %1, %0<mask_operand2>|%0<mask_operand2>, %1, 0x0}";
+ else
+ return "#";
+})
+
+(define_split
+ [(set (match_operand:<ssehalfvecmode> 0 "nonimmediate_operand")
+ (vec_select:<ssehalfvecmode>
+ (match_operand:V16FI 1 "nonimmediate_operand")
+ (parallel [(const_int 0) (const_int 1)
+ (const_int 2) (const_int 3)
+ (const_int 4) (const_int 5)
+ (const_int 6) (const_int 7)])))]
+ "TARGET_AVX512F && !(MEM_P (operands[0]) && MEM_P (operands[1]))
+ && reload_completed"
+ [(const_int 0)]
+ {
rtx op1 = operands[1];
if (REG_P (op1))
op1 = gen_rtx_REG (<ssehalfvecmode>mode, REGNO (op1));
@@ -6101,46 +6705,57 @@
DONE;
})
-(define_insn "vec_extract_hi_<mode>"
- [(set (match_operand:<ssehalfvecmode> 0 "nonimmediate_operand" "=v,m")
+(define_insn "vec_extract_lo_<mode><mask_name>"
+ [(set (match_operand:<ssehalfvecmode> 0 "<store_mask_predicate>" "=v,m")
(vec_select:<ssehalfvecmode>
- (match_operand:V16FI 1 "nonimmediate_operand" "v,v")
- (parallel [(const_int 8) (const_int 9)
- (const_int 10) (const_int 11)
- (const_int 12) (const_int 13)
- (const_int 14) (const_int 15)])))]
- "TARGET_AVX512F"
- "vextracti64x4\t{$0x1, %1, %0|%0, %1, 0x1}"
- [(set_attr "type" "sselog")
- (set_attr "prefix_extra" "1")
- (set_attr "length_immediate" "1")
- (set_attr "memory" "none,store")
- (set_attr "prefix" "evex")
- (set_attr "mode" "XI")])
+ (match_operand:VI8F_256 1 "nonimmediate_operand" "vm,v")
+ (parallel [(const_int 0) (const_int 1)])))]
+ "TARGET_AVX
+ && (!<mask_applied> || (TARGET_AVX512VL && TARGET_AVX512DQ))
+ && !(MEM_P (operands[0]) && MEM_P (operands[1]))"
+{
+ if (<mask_applied>)
+ return "vextract<shuffletype>64x2\t{$0x0, %1, %0%{%3%}|%0%{%3%}, %1, 0x0}";
+ else
+ return "#";
+}
+ [(set_attr "type" "sselog")
+ (set_attr "prefix_extra" "1")
+ (set_attr "length_immediate" "1")
+ (set_attr "memory" "none,store")
+ (set_attr "prefix" "evex")
+ (set_attr "mode" "XI")])
-(define_insn_and_split "vec_extract_lo_<mode>"
- [(set (match_operand:<ssehalfvecmode> 0 "nonimmediate_operand" "=x,m")
+(define_split
+ [(set (match_operand:<ssehalfvecmode> 0 "nonimmediate_operand")
(vec_select:<ssehalfvecmode>
- (match_operand:VI8F_256 1 "nonimmediate_operand" "xm,x")
+ (match_operand:VI8F_256 1 "nonimmediate_operand")
(parallel [(const_int 0) (const_int 1)])))]
- "TARGET_AVX && !(MEM_P (operands[0]) && MEM_P (operands[1]))"
- "#"
- "&& reload_completed"
- [(set (match_dup 0) (match_dup 1))]
+ "TARGET_AVX && !(MEM_P (operands[0]) && MEM_P (operands[1]))
+ && reload_completed"
+ [(const_int 0)]
{
- if (REG_P (operands[1]))
- operands[1] = gen_rtx_REG (<ssehalfvecmode>mode, REGNO (operands[1]));
+ rtx op1 = operands[1];
+ if (REG_P (op1))
+ op1 = gen_rtx_REG (<ssehalfvecmode>mode, REGNO (op1));
else
- operands[1] = adjust_address (operands[1], <ssehalfvecmode>mode, 0);
+ op1 = gen_lowpart (<ssehalfvecmode>mode, op1);
+ emit_move_insn (operands[0], op1);
+ DONE;
})
-(define_insn "vec_extract_hi_<mode>"
- [(set (match_operand:<ssehalfvecmode> 0 "nonimmediate_operand" "=x,m")
+(define_insn "vec_extract_hi_<mode><mask_name>"
+ [(set (match_operand:<ssehalfvecmode> 0 "<store_mask_predicate>" "=v,<store_mask_constraint>")
(vec_select:<ssehalfvecmode>
- (match_operand:VI8F_256 1 "register_operand" "x,x")
+ (match_operand:VI8F_256 1 "register_operand" "v,v")
(parallel [(const_int 2) (const_int 3)])))]
"TARGET_AVX"
- "vextract<i128>\t{$0x1, %1, %0|%0, %1, 0x1}"
+{
+ if (TARGET_AVX512DQ && TARGET_AVX512VL)
+ return "vextract<shuffletype>64x2\t{$0x1, %1, %0<mask_operand2>|%0<mask_operand2>, %1, 0x1}";
+ else
+ return "vextract<i128>\t{$0x1, %1, %0|%0, %1, 0x1}";
+}
[(set_attr "type" "sselog")
(set_attr "prefix_extra" "1")
(set_attr "length_immediate" "1")
@@ -6148,36 +6763,106 @@
(set_attr "prefix" "vex")
(set_attr "mode" "<sseinsnmode>")])
-(define_insn_and_split "vec_extract_lo_<mode>"
- [(set (match_operand:<ssehalfvecmode> 0 "nonimmediate_operand" "=x,m")
+(define_split
+ [(set (match_operand:<ssehalfvecmode> 0 "nonimmediate_operand")
(vec_select:<ssehalfvecmode>
- (match_operand:VI4F_256 1 "nonimmediate_operand" "xm,x")
+ (match_operand:VI4F_256 1 "nonimmediate_operand")
(parallel [(const_int 0) (const_int 1)
(const_int 2) (const_int 3)])))]
- "TARGET_AVX && !(MEM_P (operands[0]) && MEM_P (operands[1]))"
- "#"
- "&& reload_completed"
- [(set (match_dup 0) (match_dup 1))]
+ "TARGET_AVX && !(MEM_P (operands[0]) && MEM_P (operands[1])) && reload_completed"
+ [(const_int 0)]
{
- if (REG_P (operands[1]))
- operands[1] = gen_rtx_REG (<ssehalfvecmode>mode, REGNO (operands[1]));
+ rtx op1 = operands[1];
+ if (REG_P (op1))
+ op1 = gen_rtx_REG (<ssehalfvecmode>mode, REGNO (op1));
else
- operands[1] = adjust_address (operands[1], <ssehalfvecmode>mode, 0);
+ op1 = gen_lowpart (<ssehalfvecmode>mode, op1);
+ emit_move_insn (operands[0], op1);
+ DONE;
})
-(define_insn "vec_extract_hi_<mode>"
- [(set (match_operand:<ssehalfvecmode> 0 "nonimmediate_operand" "=x,m")
+
+(define_insn "vec_extract_lo_<mode><mask_name>"
+ [(set (match_operand:<ssehalfvecmode> 0 "<store_mask_predicate>" "=<store_mask_constraint>")
(vec_select:<ssehalfvecmode>
- (match_operand:VI4F_256 1 "register_operand" "x,x")
+ (match_operand:VI4F_256 1 "nonimmediate_operand" "v")
+ (parallel [(const_int 0) (const_int 1)
+ (const_int 2) (const_int 3)])))]
+ "(!<mask_applied> && TARGET_AVX) || (TARGET_AVX512VL && TARGET_AVX512DQ)"
+{
+ if (<mask_applied>)
+ return "vextract<shuffletype>32x4\t{$0x0, %1, %0<mask_operand2>|%0<mask_operand2>, %1, 0x0}";
+ else
+ return "#";
+}
+ [(set_attr "type" "sselog")
+ (set_attr "prefix_extra" "1")
+ (set_attr "length_immediate" "1")
+ (set (attr "memory")
+ (if_then_else (match_test "MEM_P (operands[0])")
+ (const_string "store")
+ (const_string "none")))
+ (set_attr "prefix" "evex")
+ (set_attr "mode" "<sseinsnmode>")])
+
+(define_insn "vec_extract_lo_<mode>_maskm"
+ [(set (match_operand:<ssehalfvecmode> 0 "memory_operand" "=m")
+ (vec_merge:<ssehalfvecmode>
+ (vec_select:<ssehalfvecmode>
+ (match_operand:VI4F_256 1 "register_operand" "v")
+ (parallel [(const_int 0) (const_int 1)
+ (const_int 2) (const_int 3)]))
+ (match_operand:<ssehalfvecmode> 2 "memory_operand" "0")
+ (match_operand:QI 3 "register_operand" "k")))]
+ "TARGET_AVX512VL && TARGET_AVX512F"
+ "vextract<shuffletype>32x4\t{$0x0, %1, %0%{3%}|%0%{%3%}, %1, 0x0}"
+ [(set_attr "type" "sselog")
+ (set_attr "length_immediate" "1")
+ (set_attr "prefix" "evex")
+ (set_attr "mode" "<sseinsnmode>")])
+
+(define_insn "vec_extract_hi_<mode>_maskm"
+ [(set (match_operand:<ssehalfvecmode> 0 "memory_operand" "=m")
+ (vec_merge:<ssehalfvecmode>
+ (vec_select:<ssehalfvecmode>
+ (match_operand:VI4F_256 1 "register_operand" "v")
+ (parallel [(const_int 4) (const_int 5)
+ (const_int 6) (const_int 7)]))
+ (match_operand:<ssehalfvecmode> 2 "memory_operand" "0")
+ (match_operand:<ssehalfvecmode> 3 "register_operand" "k")))]
+ "TARGET_AVX512F && TARGET_AVX512VL"
+{
+ return "vextract<shuffletype>32x4\t{$0x1, %1, %0%{%3%}|%0%{%3%}, %1, 0x1}";
+}
+ [(set_attr "type" "sselog")
+ (set_attr "prefix_extra" "1")
+ (set_attr "length_immediate" "1")
+ (set_attr "memory" "store")
+ (set_attr "prefix" "evex")
+ (set_attr "mode" "<sseinsnmode>")])
+
+(define_insn "vec_extract_hi_<mode><mask_name>"
+ [(set (match_operand:<ssehalfvecmode> 0 "<store_mask_predicate>" "=<store_mask_constraint>")
+ (vec_select:<ssehalfvecmode>
+ (match_operand:VI4F_256 1 "register_operand" "v")
(parallel [(const_int 4) (const_int 5)
(const_int 6) (const_int 7)])))]
- "TARGET_AVX"
- "vextract<i128>\t{$0x1, %1, %0|%0, %1, 0x1}"
+ "TARGET_AVX && (!<mask_applied> || (TARGET_AVX512VL && TARGET_AVX512F))"
+{
+ if (TARGET_AVX512VL && TARGET_AVX512F)
+ return "vextract<shuffletype>32x4\t{$0x1, %1, %0<mask_operand2>|%0<mask_operand2>, %1, 0x1}";
+ else
+ return "vextract<i128>\t{$0x1, %1, %0|%0, %1, 0x1}";
+}
[(set_attr "type" "sselog")
(set_attr "prefix_extra" "1")
(set_attr "length_immediate" "1")
- (set_attr "memory" "none,store")
- (set_attr "prefix" "vex")
+ (set_attr "memory" "none")
+ (set (attr "prefix")
+ (if_then_else
+ (match_test "TARGET_AVX512VL")
+ (const_string "evex")
+ (const_string "vex")))
(set_attr "mode" "<sseinsnmode>")])
(define_insn_and_split "vec_extract_lo_v32hi"
@@ -6366,8 +7051,8 @@
;; Modes handled by vec_extract patterns.
(define_mode_iterator VEC_EXTRACT_MODE
- [(V32QI "TARGET_AVX") V16QI
- (V16HI "TARGET_AVX") V8HI
+ [(V64QI "TARGET_AVX512BW") (V32QI "TARGET_AVX") V16QI
+ (V32HI "TARGET_AVX512BW") (V16HI "TARGET_AVX") V8HI
(V16SI "TARGET_AVX512F") (V8SI "TARGET_AVX") V4SI
(V8DI "TARGET_AVX512F") (V4DI "TARGET_AVX") V2DI
(V16SF "TARGET_AVX512F") (V8SF "TARGET_AVX") V4SF
@@ -6407,16 +7092,16 @@
(set_attr "mode" "V8DF")])
;; Recall that the 256-bit unpck insns only shuffle within their lanes.
-(define_insn "avx_unpckhpd256"
- [(set (match_operand:V4DF 0 "register_operand" "=x")
+(define_insn "avx_unpckhpd256<mask_name>"
+ [(set (match_operand:V4DF 0 "register_operand" "=v")
(vec_select:V4DF
(vec_concat:V8DF
- (match_operand:V4DF 1 "register_operand" "x")
- (match_operand:V4DF 2 "nonimmediate_operand" "xm"))
+ (match_operand:V4DF 1 "register_operand" "v")
+ (match_operand:V4DF 2 "nonimmediate_operand" "vm"))
(parallel [(const_int 1) (const_int 5)
(const_int 3) (const_int 7)])))]
- "TARGET_AVX"
- "vunpckhpd\t{%2, %1, %0|%0, %1, %2}"
+ "TARGET_AVX && <mask_mode512bit_condition>"
+ "vunpckhpd\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}"
[(set_attr "type" "sselog")
(set_attr "prefix" "vex")
(set_attr "mode" "V4DF")])
@@ -6450,6 +7135,22 @@
})
+(define_insn "avx512vl_unpckhpd128_mask"
+ [(set (match_operand:V2DF 0 "register_operand" "=v")
+ (vec_merge:V2DF
+ (vec_select:V2DF
+ (vec_concat:V4DF
+ (match_operand:V2DF 1 "register_operand" "v")
+ (match_operand:V2DF 2 "nonimmediate_operand" "vm"))
+ (parallel [(const_int 1) (const_int 3)]))
+ (match_operand:V2DF 3 "vector_move_operand" "0C")
+ (match_operand:QI 4 "register_operand" "Yk")))]
+ "TARGET_AVX512VL && TARGET_AVX512F"
+ "vunpckhpd\t{%2, %1, %0%{%4%}%N3|%0%{%4%}%N3, %1, %2}"
+ [(set_attr "type" "sselog")
+ (set_attr "prefix" "evex")
+ (set_attr "mode" "V2DF")])
+
(define_expand "vec_interleave_highv2df"
[(set (match_operand:V2DF 0 "register_operand")
(vec_select:V2DF
@@ -6530,7 +7231,7 @@
(set_attr "mode" "V8DF")])
;; Recall that the 256-bit unpck insns only shuffle within their lanes.
-(define_expand "avx_movddup256"
+(define_expand "avx_movddup256<mask_name>"
[(set (match_operand:V4DF 0 "register_operand")
(vec_select:V4DF
(vec_concat:V8DF
@@ -6538,9 +7239,9 @@
(match_dup 1))
(parallel [(const_int 0) (const_int 4)
(const_int 2) (const_int 6)])))]
- "TARGET_AVX")
+ "TARGET_AVX && <mask_mode512bit_condition>")
-(define_expand "avx_unpcklpd256"
+(define_expand "avx_unpcklpd256<mask_name>"
[(set (match_operand:V4DF 0 "register_operand")
(vec_select:V4DF
(vec_concat:V8DF
@@ -6548,20 +7249,20 @@
(match_operand:V4DF 2 "nonimmediate_operand"))
(parallel [(const_int 0) (const_int 4)
(const_int 2) (const_int 6)])))]
- "TARGET_AVX")
+ "TARGET_AVX && <mask_mode512bit_condition>")
-(define_insn "*avx_unpcklpd256"
- [(set (match_operand:V4DF 0 "register_operand" "=x,x")
+(define_insn "*avx_unpcklpd256<mask_name>"
+ [(set (match_operand:V4DF 0 "register_operand" "=v,v")
(vec_select:V4DF
(vec_concat:V8DF
- (match_operand:V4DF 1 "nonimmediate_operand" " x,m")
- (match_operand:V4DF 2 "nonimmediate_operand" "xm,1"))
+ (match_operand:V4DF 1 "nonimmediate_operand" " v,m")
+ (match_operand:V4DF 2 "nonimmediate_operand" "vm,1"))
(parallel [(const_int 0) (const_int 4)
(const_int 2) (const_int 6)])))]
- "TARGET_AVX"
+ "TARGET_AVX && <mask_mode512bit_condition>"
"@
- vunpcklpd\t{%2, %1, %0|%0, %1, %2}
- vmovddup\t{%1, %0|%0, %1}"
+ vunpcklpd\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}
+ vmovddup\t{%1, %0<mask_operand3>|%0<mask_operand3>, %1}"
[(set_attr "type" "sselog")
(set_attr "prefix" "vex")
(set_attr "mode" "V4DF")])
@@ -6594,6 +7295,22 @@
operands[4] = gen_reg_rtx (V4DFmode);
})
+(define_insn "avx512vl_unpcklpd128_mask"
+ [(set (match_operand:V2DF 0 "register_operand" "=v")
+ (vec_merge:V2DF
+ (vec_select:V2DF
+ (vec_concat:V4DF
+ (match_operand:V2DF 1 "register_operand" "v")
+ (match_operand:V2DF 2 "nonimmediate_operand" "vm"))
+ (parallel [(const_int 0) (const_int 2)]))
+ (match_operand:V2DF 3 "vector_move_operand" "0C")
+ (match_operand:QI 4 "register_operand" "Yk")))]
+ "TARGET_AVX512VL && TARGET_AVX512F"
+ "vunpcklpd\t{%2, %1, %0%{%4%}%N3|%0%{%4%}%N3, %1, %2}"
+ [(set_attr "type" "sselog")
+ (set_attr "prefix" "evex")
+ (set_attr "mode" "V2DF")])
+
(define_expand "vec_interleave_lowv2df"
[(set (match_operand:V2DF 0 "register_operand")
(vec_select:V2DF
@@ -6676,38 +7393,38 @@
[(set_attr "prefix" "evex")
(set_attr "mode" "<ssescalarmode>")])
-(define_insn "avx512f_scalef<mode><mask_name><round_name>"
- [(set (match_operand:VF_512 0 "register_operand" "=v")
- (unspec:VF_512
- [(match_operand:VF_512 1 "register_operand" "v")
- (match_operand:VF_512 2 "<round_nimm_predicate>" "<round_constraint>")]
+(define_insn "<avx512>_scalef<mode><mask_name><round_name>"
+ [(set (match_operand:VF_AVX512VL 0 "register_operand" "=v")
+ (unspec:VF_AVX512VL
+ [(match_operand:VF_AVX512VL 1 "register_operand" "v")
+ (match_operand:VF_AVX512VL 2 "nonimmediate_operand" "<round_constraint>")]
UNSPEC_SCALEF))]
"TARGET_AVX512F"
"vscalef<ssemodesuffix>\t{<round_mask_op3>%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2<round_mask_op3>}"
[(set_attr "prefix" "evex")
(set_attr "mode" "<MODE>")])
-(define_expand "avx512f_vternlog<mode>_maskz"
- [(match_operand:VI48_512 0 "register_operand")
- (match_operand:VI48_512 1 "register_operand")
- (match_operand:VI48_512 2 "register_operand")
- (match_operand:VI48_512 3 "nonimmediate_operand")
+(define_expand "<avx512>_vternlog<mode>_maskz"
+ [(match_operand:VI48_AVX512VL 0 "register_operand")
+ (match_operand:VI48_AVX512VL 1 "register_operand")
+ (match_operand:VI48_AVX512VL 2 "register_operand")
+ (match_operand:VI48_AVX512VL 3 "nonimmediate_operand")
(match_operand:SI 4 "const_0_to_255_operand")
(match_operand:<avx512fmaskmode> 5 "register_operand")]
"TARGET_AVX512F"
{
- emit_insn (gen_avx512f_vternlog<mode>_maskz_1 (
+ emit_insn (gen_<avx512>_vternlog<mode>_maskz_1 (
operands[0], operands[1], operands[2], operands[3],
operands[4], CONST0_RTX (<MODE>mode), operands[5]));
DONE;
})
-(define_insn "avx512f_vternlog<mode><sd_maskz_name>"
- [(set (match_operand:VI48_512 0 "register_operand" "=v")
- (unspec:VI48_512
- [(match_operand:VI48_512 1 "register_operand" "0")
- (match_operand:VI48_512 2 "register_operand" "v")
- (match_operand:VI48_512 3 "nonimmediate_operand" "vm")
+(define_insn "<avx512>_vternlog<mode><sd_maskz_name>"
+ [(set (match_operand:VI48_AVX512VL 0 "register_operand" "=v")
+ (unspec:VI48_AVX512VL
+ [(match_operand:VI48_AVX512VL 1 "register_operand" "0")
+ (match_operand:VI48_AVX512VL 2 "register_operand" "v")
+ (match_operand:VI48_AVX512VL 3 "nonimmediate_operand" "vm")
(match_operand:SI 4 "const_0_to_255_operand")]
UNSPEC_VTERNLOG))]
"TARGET_AVX512F"
@@ -6716,13 +7433,13 @@
(set_attr "prefix" "evex")
(set_attr "mode" "<sseinsnmode>")])
-(define_insn "avx512f_vternlog<mode>_mask"
- [(set (match_operand:VI48_512 0 "register_operand" "=v")
- (vec_merge:VI48_512
- (unspec:VI48_512
- [(match_operand:VI48_512 1 "register_operand" "0")
- (match_operand:VI48_512 2 "register_operand" "v")
- (match_operand:VI48_512 3 "nonimmediate_operand" "vm")
+(define_insn "<avx512>_vternlog<mode>_mask"
+ [(set (match_operand:VI48_AVX512VL 0 "register_operand" "=v")
+ (vec_merge:VI48_AVX512VL
+ (unspec:VI48_AVX512VL
+ [(match_operand:VI48_AVX512VL 1 "register_operand" "0")
+ (match_operand:VI48_AVX512VL 2 "register_operand" "v")
+ (match_operand:VI48_AVX512VL 3 "nonimmediate_operand" "vm")
(match_operand:SI 4 "const_0_to_255_operand")]
UNSPEC_VTERNLOG)
(match_dup 1)
@@ -6733,9 +7450,9 @@
(set_attr "prefix" "evex")
(set_attr "mode" "<sseinsnmode>")])
-(define_insn "avx512f_getexp<mode><mask_name><round_saeonly_name>"
- [(set (match_operand:VF_512 0 "register_operand" "=v")
- (unspec:VF_512 [(match_operand:VF_512 1 "<round_saeonly_nimm_predicate>" "<round_saeonly_constraint>")]
+(define_insn "<avx512>_getexp<mode><mask_name><round_saeonly_name>"
+ [(set (match_operand:VF_AVX512VL 0 "register_operand" "=v")
+ (unspec:VF_AVX512VL [(match_operand:VF_AVX512VL 1 "<round_saeonly_nimm_predicate>" "<round_saeonly_constraint>")]
UNSPEC_GETEXP))]
"TARGET_AVX512F"
"vgetexp<ssemodesuffix>\t{<round_saeonly_mask_op2>%1, %0<mask_operand2>|%0<mask_operand2>, %1<round_saeonly_mask_op2>}";
@@ -6756,12 +7473,12 @@
[(set_attr "prefix" "evex")
(set_attr "mode" "<ssescalarmode>")])
-(define_insn "<mask_codefor>avx512f_align<mode><mask_name>"
- [(set (match_operand:VI48_512 0 "register_operand" "=v")
- (unspec:VI48_512 [(match_operand:VI48_512 1 "register_operand" "v")
- (match_operand:VI48_512 2 "nonimmediate_operand" "vm")
- (match_operand:SI 3 "const_0_to_255_operand")]
- UNSPEC_ALIGN))]
+(define_insn "<mask_codefor><avx512>_align<mode><mask_name>"
+ [(set (match_operand:VI48_AVX512VL 0 "register_operand" "=v")
+ (unspec:VI48_AVX512VL [(match_operand:VI48_AVX512VL 1 "register_operand" "v")
+ (match_operand:VI48_AVX512VL 2 "nonimmediate_operand" "vm")
+ (match_operand:SI 3 "const_0_to_255_operand")]
+ UNSPEC_ALIGN))]
"TARGET_AVX512F"
"valign<ssemodesuffix>\t{%3, %2, %1, %0<mask_operand4>|%0<mask_operand4>, %1, %2, %3}";
[(set_attr "prefix" "evex")
@@ -6799,28 +7516,28 @@
})
-(define_expand "avx512f_fixupimm<mode>_maskz<round_saeonly_expand_name>"
- [(match_operand:VF_512 0 "register_operand")
- (match_operand:VF_512 1 "register_operand")
- (match_operand:VF_512 2 "register_operand")
+(define_expand "<avx512>_fixupimm<mode>_maskz<round_saeonly_expand_name>"
+ [(match_operand:VF_AVX512VL 0 "register_operand")
+ (match_operand:VF_AVX512VL 1 "register_operand")
+ (match_operand:VF_AVX512VL 2 "register_operand")
(match_operand:<sseintvecmode> 3 "<round_saeonly_expand_nimm_predicate>")
(match_operand:SI 4 "const_0_to_255_operand")
(match_operand:<avx512fmaskmode> 5 "register_operand")]
"TARGET_AVX512F"
{
- emit_insn (gen_avx512f_fixupimm<mode>_maskz_1<round_saeonly_expand_name> (
+ emit_insn (gen_<avx512>_fixupimm<mode>_maskz_1<round_saeonly_expand_name> (
operands[0], operands[1], operands[2], operands[3],
operands[4], CONST0_RTX (<MODE>mode), operands[5]
<round_saeonly_expand_operand6>));
DONE;
})
-(define_insn "avx512f_fixupimm<mode><sd_maskz_name><round_saeonly_name>"
- [(set (match_operand:VF_512 0 "register_operand" "=v")
- (unspec:VF_512
- [(match_operand:VF_512 1 "register_operand" "0")
- (match_operand:VF_512 2 "register_operand" "v")
- (match_operand:<sseintvecmode> 3 "<round_saeonly_nimm_predicate>" "<round_saeonly_constraint>")
+(define_insn "<avx512>_fixupimm<mode><sd_maskz_name><round_saeonly_name>"
+ [(set (match_operand:VF_AVX512VL 0 "register_operand" "=v")
+ (unspec:VF_AVX512VL
+ [(match_operand:VF_AVX512VL 1 "register_operand" "0")
+ (match_operand:VF_AVX512VL 2 "register_operand" "v")
+ (match_operand:<sseintvecmode> 3 "nonimmediate_operand" "<round_saeonly_constraint>")
(match_operand:SI 4 "const_0_to_255_operand")]
UNSPEC_FIXUPIMM))]
"TARGET_AVX512F"
@@ -6828,13 +7545,13 @@
[(set_attr "prefix" "evex")
(set_attr "mode" "<MODE>")])
-(define_insn "avx512f_fixupimm<mode>_mask<round_saeonly_name>"
- [(set (match_operand:VF_512 0 "register_operand" "=v")
- (vec_merge:VF_512
- (unspec:VF_512
- [(match_operand:VF_512 1 "register_operand" "0")
- (match_operand:VF_512 2 "register_operand" "v")
- (match_operand:<sseintvecmode> 3 "<round_saeonly_nimm_predicate>" "<round_saeonly_constraint>")
+(define_insn "<avx512>_fixupimm<mode>_mask<round_saeonly_name>"
+ [(set (match_operand:VF_AVX512VL 0 "register_operand" "=v")
+ (vec_merge:VF_AVX512VL
+ (unspec:VF_AVX512VL
+ [(match_operand:VF_AVX512VL 1 "register_operand" "0")
+ (match_operand:VF_AVX512VL 2 "register_operand" "v")
+ (match_operand:<sseintvecmode> 3 "nonimmediate_operand" "<round_saeonly_constraint>")
(match_operand:SI 4 "const_0_to_255_operand")]
UNSPEC_FIXUPIMM)
(match_dup 1)
@@ -6895,10 +7612,10 @@
[(set_attr "prefix" "evex")
(set_attr "mode" "<ssescalarmode>")])
-(define_insn "avx512f_rndscale<mode><mask_name><round_saeonly_name>"
- [(set (match_operand:VF_512 0 "register_operand" "=v")
- (unspec:VF_512
- [(match_operand:VF_512 1 "<round_saeonly_nimm_predicate>" "<round_saeonly_constraint>")
+(define_insn "<avx512>_rndscale<mode><mask_name><round_saeonly_name>"
+ [(set (match_operand:VF_AVX512VL 0 "register_operand" "=v")
+ (unspec:VF_AVX512VL
+ [(match_operand:VF_AVX512VL 1 "nonimmediate_operand" "<round_saeonly_constraint>")
(match_operand:SI 2 "const_0_to_255_operand")]
UNSPEC_ROUND))]
"TARGET_AVX512F"
@@ -7031,7 +7748,7 @@
(set_attr "prefix" "evex")
(set_attr "mode" "V8DF")])
-(define_expand "avx_shufpd256"
+(define_expand "avx_shufpd256<mask_expand4_name>"
[(match_operand:V4DF 0 "register_operand")
(match_operand:V4DF 1 "register_operand")
(match_operand:V4DF 2 "nonimmediate_operand")
@@ -7039,25 +7756,28 @@
"TARGET_AVX"
{
int mask = INTVAL (operands[3]);
- emit_insn (gen_avx_shufpd256_1 (operands[0], operands[1], operands[2],
- GEN_INT (mask & 1),
- GEN_INT (mask & 2 ? 5 : 4),
- GEN_INT (mask & 4 ? 3 : 2),
- GEN_INT (mask & 8 ? 7 : 6)));
+ emit_insn (gen_avx_shufpd256_1<mask_expand4_name> (operands[0],
+ operands[1],
+ operands[2],
+ GEN_INT (mask & 1),
+ GEN_INT (mask & 2 ? 5 : 4),
+ GEN_INT (mask & 4 ? 3 : 2),
+ GEN_INT (mask & 8 ? 7 : 6)
+ <mask_expand4_args>));
DONE;
})
-(define_insn "avx_shufpd256_1"
- [(set (match_operand:V4DF 0 "register_operand" "=x")
+(define_insn "avx_shufpd256_1<mask_name>"
+ [(set (match_operand:V4DF 0 "register_operand" "=v")
(vec_select:V4DF
(vec_concat:V8DF
- (match_operand:V4DF 1 "register_operand" "x")
- (match_operand:V4DF 2 "nonimmediate_operand" "xm"))
+ (match_operand:V4DF 1 "register_operand" "v")
+ (match_operand:V4DF 2 "nonimmediate_operand" "vm"))
(parallel [(match_operand 3 "const_0_to_1_operand")
(match_operand 4 "const_4_to_5_operand")
(match_operand 5 "const_2_to_3_operand")
(match_operand 6 "const_6_to_7_operand")])))]
- "TARGET_AVX"
+ "TARGET_AVX && <mask_mode512bit_condition>"
{
int mask;
mask = INTVAL (operands[3]);
@@ -7066,14 +7786,14 @@
mask |= (INTVAL (operands[6]) - 6) << 3;
operands[3] = GEN_INT (mask);
- return "vshufpd\t{%3, %2, %1, %0|%0, %1, %2, %3}";
+ return "vshufpd\t{%3, %2, %1, %0<mask_operand7>|%0<mask_operand7>, %1, %2, %3}";
}
[(set_attr "type" "sseshuf")
(set_attr "length_immediate" "1")
(set_attr "prefix" "vex")
(set_attr "mode" "V4DF")])
-(define_expand "sse2_shufpd"
+(define_expand "sse2_shufpd<mask_expand4_name>"
[(match_operand:V2DF 0 "register_operand")
(match_operand:V2DF 1 "register_operand")
(match_operand:V2DF 2 "nonimmediate_operand")
@@ -7081,25 +7801,51 @@
"TARGET_SSE2"
{
int mask = INTVAL (operands[3]);
- emit_insn (gen_sse2_shufpd_v2df (operands[0], operands[1], operands[2],
- GEN_INT (mask & 1),
- GEN_INT (mask & 2 ? 3 : 2)));
+ emit_insn (gen_sse2_shufpd_v2df<mask_expand4_name> (operands[0], operands[1],
+ operands[2], GEN_INT (mask & 1),
+ GEN_INT (mask & 2 ? 3 : 2)
+ <mask_expand4_args>));
DONE;
})
+(define_insn "sse2_shufpd_v2df_mask"
+ [(set (match_operand:V2DF 0 "register_operand" "=v")
+ (vec_merge:V2DF
+ (vec_select:V2DF
+ (vec_concat:V4DF
+ (match_operand:V2DF 1 "register_operand" "v")
+ (match_operand:V2DF 2 "nonimmediate_operand" "vm"))
+ (parallel [(match_operand 3 "const_0_to_1_operand")
+ (match_operand 4 "const_2_to_3_operand")]))
+ (match_operand:V2DF 5 "vector_move_operand" "0C")
+ (match_operand:QI 6 "register_operand" "Yk")))]
+ "TARGET_AVX512VL"
+{
+ int mask;
+ mask = INTVAL (operands[3]);
+ mask |= (INTVAL (operands[4]) - 2) << 1;
+ operands[3] = GEN_INT (mask);
+
+ return "vshufpd\t{%3, %2, %1, %0%{%6%}%N5|%0%{6%}%N5, %1, %2, %3}";
+}
+ [(set_attr "type" "sseshuf")
+ (set_attr "length_immediate" "1")
+ (set_attr "prefix" "evex")
+ (set_attr "mode" "V2DF")])
+
;; punpcklqdq and punpckhqdq are shorter than shufpd.
-(define_insn "avx2_interleave_highv4di"
- [(set (match_operand:V4DI 0 "register_operand" "=x")
+(define_insn "avx2_interleave_highv4di<mask_name>"
+ [(set (match_operand:V4DI 0 "register_operand" "=v")
(vec_select:V4DI
(vec_concat:V8DI
- (match_operand:V4DI 1 "register_operand" "x")
- (match_operand:V4DI 2 "nonimmediate_operand" "xm"))
+ (match_operand:V4DI 1 "register_operand" "v")
+ (match_operand:V4DI 2 "nonimmediate_operand" "vm"))
(parallel [(const_int 1)
(const_int 5)
(const_int 3)
(const_int 7)])))]
- "TARGET_AVX2"
- "vpunpckhqdq\t{%2, %1, %0|%0, %1, %2}"
+ "TARGET_AVX2 && <mask_mode512bit_condition>"
+ "vpunpckhqdq\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}"
[(set_attr "type" "sselog")
(set_attr "prefix" "vex")
(set_attr "mode" "OI")])
@@ -7120,36 +7866,36 @@
(set_attr "prefix" "evex")
(set_attr "mode" "XI")])
-(define_insn "vec_interleave_highv2di"
- [(set (match_operand:V2DI 0 "register_operand" "=x,x")
+(define_insn "vec_interleave_highv2di<mask_name>"
+ [(set (match_operand:V2DI 0 "register_operand" "=x,v")
(vec_select:V2DI
(vec_concat:V4DI
- (match_operand:V2DI 1 "register_operand" "0,x")
- (match_operand:V2DI 2 "nonimmediate_operand" "xm,xm"))
+ (match_operand:V2DI 1 "register_operand" "0,v")
+ (match_operand:V2DI 2 "nonimmediate_operand" "xm,vm"))
(parallel [(const_int 1)
(const_int 3)])))]
- "TARGET_SSE2"
+ "TARGET_SSE2 && <mask_mode512bit_condition>"
"@
punpckhqdq\t{%2, %0|%0, %2}
- vpunpckhqdq\t{%2, %1, %0|%0, %1, %2}"
+ vpunpckhqdq\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}"
[(set_attr "isa" "noavx,avx")
(set_attr "type" "sselog")
(set_attr "prefix_data16" "1,*")
- (set_attr "prefix" "orig,vex")
+ (set_attr "prefix" "orig,<mask_prefix>")
(set_attr "mode" "TI")])
-(define_insn "avx2_interleave_lowv4di"
- [(set (match_operand:V4DI 0 "register_operand" "=x")
+(define_insn "avx2_interleave_lowv4di<mask_name>"
+ [(set (match_operand:V4DI 0 "register_operand" "=v")
(vec_select:V4DI
(vec_concat:V8DI
- (match_operand:V4DI 1 "register_operand" "x")
- (match_operand:V4DI 2 "nonimmediate_operand" "xm"))
+ (match_operand:V4DI 1 "register_operand" "v")
+ (match_operand:V4DI 2 "nonimmediate_operand" "vm"))
(parallel [(const_int 0)
(const_int 4)
(const_int 2)
(const_int 6)])))]
- "TARGET_AVX2"
- "vpunpcklqdq\t{%2, %1, %0|%0, %1, %2}"
+ "TARGET_AVX2 && <mask_mode512bit_condition>"
+ "vpunpcklqdq\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}"
[(set_attr "type" "sselog")
(set_attr "prefix" "vex")
(set_attr "mode" "OI")])
@@ -7170,18 +7916,18 @@
(set_attr "prefix" "evex")
(set_attr "mode" "XI")])
-(define_insn "vec_interleave_lowv2di"
- [(set (match_operand:V2DI 0 "register_operand" "=x,x")
+(define_insn "vec_interleave_lowv2di<mask_name>"
+ [(set (match_operand:V2DI 0 "register_operand" "=x,v")
(vec_select:V2DI
(vec_concat:V4DI
- (match_operand:V2DI 1 "register_operand" "0,x")
- (match_operand:V2DI 2 "nonimmediate_operand" "xm,xm"))
+ (match_operand:V2DI 1 "register_operand" "0,v")
+ (match_operand:V2DI 2 "nonimmediate_operand" "xm,vm"))
(parallel [(const_int 0)
(const_int 2)])))]
- "TARGET_SSE2"
+ "TARGET_SSE2 && <mask_mode512bit_condition>"
"@
punpcklqdq\t{%2, %0|%0, %2}
- vpunpcklqdq\t{%2, %1, %0|%0, %1, %2}"
+ vpunpcklqdq\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}"
[(set_attr "isa" "noavx,avx")
(set_attr "type" "sselog")
(set_attr "prefix_data16" "1,*")
@@ -7477,24 +8223,24 @@
(set_attr "prefix" "orig,vex,orig,vex,maybe_vex,orig,orig,vex,maybe_vex")
(set_attr "mode" "DF,DF,V1DF,V1DF,V1DF,V2DF,V1DF,V1DF,V1DF")])
-(define_insn "vec_dupv2df"
- [(set (match_operand:V2DF 0 "register_operand" "=x,x")
+(define_insn "vec_dupv2df<mask_name>"
+ [(set (match_operand:V2DF 0 "register_operand" "=x,v")
(vec_duplicate:V2DF
- (match_operand:DF 1 "nonimmediate_operand" " 0,xm")))]
- "TARGET_SSE2"
+ (match_operand:DF 1 "nonimmediate_operand" " 0,vm")))]
+ "TARGET_SSE2 && <mask_mode512bit_condition>"
"@
unpcklpd\t%0, %0
- %vmovddup\t{%1, %0|%0, %1}"
+ %vmovddup\t{%1, %0<mask_operand2>|%0<mask_operand2>, %1}"
[(set_attr "isa" "noavx,sse3")
(set_attr "type" "sselog1")
(set_attr "prefix" "orig,maybe_vex")
(set_attr "mode" "V2DF,DF")])
(define_insn "*vec_concatv2df"
- [(set (match_operand:V2DF 0 "register_operand" "=x,x,x,x,x,x,x,x")
+ [(set (match_operand:V2DF 0 "register_operand" "=x,v,v,x,x,v,x,x")
(vec_concat:V2DF
- (match_operand:DF 1 "nonimmediate_operand" " 0,x,m,0,x,m,0,0")
- (match_operand:DF 2 "vector_move_operand" " x,x,1,m,m,C,x,m")))]
+ (match_operand:DF 1 "nonimmediate_operand" " 0,v,m,0,x,m,0,0")
+ (match_operand:DF 2 "vector_move_operand" " x,v,1,m,m,C,x,m")))]
"TARGET_SSE"
"@
unpcklpd\t{%2, %0|%0, %2}
@@ -7521,48 +8267,513 @@
;;
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
-(define_mode_iterator PMOV_DST_MODE [V16QI V16HI V8SI V8HI])
+(define_mode_iterator PMOV_DST_MODE_1 [V16QI V16HI V8SI V8HI])
(define_mode_attr pmov_src_mode
[(V16QI "V16SI") (V16HI "V16SI") (V8SI "V8DI") (V8HI "V8DI")])
(define_mode_attr pmov_src_lower
[(V16QI "v16si") (V16HI "v16si") (V8SI "v8di") (V8HI "v8di")])
-(define_mode_attr pmov_suff
+(define_mode_attr pmov_suff_1
[(V16QI "db") (V16HI "dw") (V8SI "qd") (V8HI "qw")])
(define_insn "*avx512f_<code><pmov_src_lower><mode>2"
- [(set (match_operand:PMOV_DST_MODE 0 "nonimmediate_operand" "=v,m")
- (any_truncate:PMOV_DST_MODE
+ [(set (match_operand:PMOV_DST_MODE_1 0 "nonimmediate_operand" "=v,m")
+ (any_truncate:PMOV_DST_MODE_1
(match_operand:<pmov_src_mode> 1 "register_operand" "v,v")))]
"TARGET_AVX512F"
- "vpmov<trunsuffix><pmov_suff>\t{%1, %0|%0, %1}"
+ "vpmov<trunsuffix><pmov_suff_1>\t{%1, %0|%0, %1}"
[(set_attr "type" "ssemov")
(set_attr "memory" "none,store")
(set_attr "prefix" "evex")
(set_attr "mode" "<sseinsnmode>")])
(define_insn "avx512f_<code><pmov_src_lower><mode>2_mask"
- [(set (match_operand:PMOV_DST_MODE 0 "nonimmediate_operand" "=v,m")
- (vec_merge:PMOV_DST_MODE
- (any_truncate:PMOV_DST_MODE
+ [(set (match_operand:PMOV_DST_MODE_1 0 "nonimmediate_operand" "=v,m")
+ (vec_merge:PMOV_DST_MODE_1
+ (any_truncate:PMOV_DST_MODE_1
(match_operand:<pmov_src_mode> 1 "register_operand" "v,v"))
- (match_operand:PMOV_DST_MODE 2 "vector_move_operand" "0C,0")
+ (match_operand:PMOV_DST_MODE_1 2 "vector_move_operand" "0C,0")
(match_operand:<avx512fmaskmode> 3 "register_operand" "Yk,Yk")))]
"TARGET_AVX512F"
- "vpmov<trunsuffix><pmov_suff>\t{%1, %0%{%3%}%N2|%0%{%3%}%N2, %1}"
+ "vpmov<trunsuffix><pmov_suff_1>\t{%1, %0%{%3%}%N2|%0%{%3%}%N2, %1}"
[(set_attr "type" "ssemov")
(set_attr "memory" "none,store")
(set_attr "prefix" "evex")
(set_attr "mode" "<sseinsnmode>")])
(define_expand "avx512f_<code><pmov_src_lower><mode>2_mask_store"
- [(set (match_operand:PMOV_DST_MODE 0 "memory_operand")
- (vec_merge:PMOV_DST_MODE
- (any_truncate:PMOV_DST_MODE
+ [(set (match_operand:PMOV_DST_MODE_1 0 "memory_operand")
+ (vec_merge:PMOV_DST_MODE_1
+ (any_truncate:PMOV_DST_MODE_1
(match_operand:<pmov_src_mode> 1 "register_operand"))
(match_dup 0)
(match_operand:<avx512fmaskmode> 2 "register_operand")))]
"TARGET_AVX512F")
+(define_mode_iterator PMOV_DST_MODE_2
+ [(V32QI "TARGET_AVX512BW") (V16QI "TARGET_AVX512BW && TARGET_AVX512VL")
+ (V8HI "TARGET_AVX512VL") (V4SI "TARGET_AVX512VL")])
+(define_mode_attr pmov_suff_2
+ [(V32QI "wb") (V16QI "wb") (V8HI "dw") (V4SI "qd")])
+
+(define_insn "*avx512vl_<code><ssedoublemodelower><mode>2"
+ [(set (match_operand:PMOV_DST_MODE_2 0 "nonimmediate_operand" "=v,m")
+ (any_truncate:PMOV_DST_MODE_2
+ (match_operand:<ssedoublemode> 1 "register_operand" "v,v")))]
+ ""
+ "vpmov<trunsuffix><pmov_suff_2>\t{%1, %0|%0, %1}"
+ [(set_attr "type" "ssemov")
+ (set_attr "memory" "none,store")
+ (set_attr "prefix" "evex")
+ (set_attr "mode" "<sseinsnmode>")])
+
+(define_insn "<avx512>_<code><ssedoublemodelower><mode>2_mask"
+ [(set (match_operand:PMOV_DST_MODE_2 0 "nonimmediate_operand" "=v,m")
+ (vec_merge:PMOV_DST_MODE_2
+ (any_truncate:PMOV_DST_MODE_2
+ (match_operand:<ssedoublemode> 1 "register_operand" "v,v"))
+ (match_operand:PMOV_DST_MODE_2 2 "vector_move_operand" "0C,0")
+ (match_operand:<avx512fmaskmode> 3 "register_operand" "Yk,Yk")))]
+ ""
+ "vpmov<trunsuffix><pmov_suff_2>\t{%1, %0%{%3%}%N2|%0%{%3%}%N2, %1}"
+ [(set_attr "type" "ssemov")
+ (set_attr "memory" "none,store")
+ (set_attr "prefix" "evex")
+ (set_attr "mode" "<sseinsnmode>")])
+
+(define_expand "<avx512>_<code><ssedoublemodelower><mode>2_store_mask"
+ [(set (match_operand:PMOV_DST_MODE_2 0 "nonimmediate_operand")
+ (vec_merge:PMOV_DST_MODE_2
+ (any_truncate:PMOV_DST_MODE_2
+ (match_operand:<ssedoublemode> 1 "register_operand"))
+ (match_dup 0)
+ (match_operand:<avx512fmaskmode> 2 "register_operand")))]
+ "TARGET_AVX512VL")
+
+(define_mode_iterator PMOV_SRC_MODE_3 [V4DI V2DI V8SI V4SI (V8HI "TARGET_AVX512BW")])
+(define_mode_attr pmov_dst_3
+ [(V4DI "V4QI") (V2DI "V2QI") (V8SI "V8QI") (V4SI "V4QI") (V8HI "V8QI")])
+(define_mode_attr pmov_dst_zeroed_3
+ [(V4DI "V12QI") (V2DI "V14QI") (V8SI "V8QI") (V4SI "V12QI") (V8HI "V8QI")])
+(define_mode_attr pmov_suff_3
+ [(V4DI "qb") (V2DI "qb") (V8SI "db") (V4SI "db") (V8HI "wb")])
+
+(define_insn "*avx512vl_<code><mode>v<ssescalarnum>qi2"
+ [(set (match_operand:V16QI 0 "register_operand" "=v")
+ (vec_concat:V16QI
+ (any_truncate:<pmov_dst_3>
+ (match_operand:PMOV_SRC_MODE_3 1 "register_operand" "v"))
+ (match_operand:<pmov_dst_zeroed_3> 2 "const0_operand")))]
+ "TARGET_AVX512VL"
+ "vpmov<trunsuffix><pmov_suff_3>\t{%1, %0|%0, %1}"
+ [(set_attr "type" "ssemov")
+ (set_attr "prefix" "evex")
+ (set_attr "mode" "TI")])
+
+(define_insn "*avx512vl_<code>v2div2qi2_store"
+ [(set (match_operand:V16QI 0 "memory_operand" "=m")
+ (vec_concat:V16QI
+ (any_truncate:V2QI
+ (match_operand:V2DI 1 "register_operand" "v"))
+ (vec_select:V14QI
+ (match_dup 0)
+ (parallel [(const_int 2) (const_int 3)
+ (const_int 4) (const_int 5)
+ (const_int 6) (const_int 7)
+ (const_int 8) (const_int 9)
+ (const_int 10) (const_int 11)
+ (const_int 12) (const_int 13)
+ (const_int 14) (const_int 15)]))))]
+ "TARGET_AVX512VL"
+ "vpmov<trunsuffix>qb\t{%1, %0|%0, %1}"
+ [(set_attr "type" "ssemov")
+ (set_attr "memory" "store")
+ (set_attr "prefix" "evex")
+ (set_attr "mode" "TI")])
+
+(define_insn "avx512vl_<code>v2div2qi2_mask"
+ [(set (match_operand:V16QI 0 "register_operand" "=v")
+ (vec_concat:V16QI
+ (vec_merge:V2QI
+ (any_truncate:V2QI
+ (match_operand:V2DI 1 "register_operand" "v"))
+ (vec_select:V2QI
+ (match_operand:V16QI 2 "vector_move_operand" "0C")
+ (parallel [(const_int 0) (const_int 1)]))
+ (match_operand:QI 3 "register_operand" "Yk"))
+ (const_vector:V14QI [(const_int 0) (const_int 0)
+ (const_int 0) (const_int 0)
+ (const_int 0) (const_int 0)
+ (const_int 0) (const_int 0)
+ (const_int 0) (const_int 0)
+ (const_int 0) (const_int 0)
+ (const_int 0) (const_int 0)])))]
+ "TARGET_AVX512VL"
+ "vpmov<trunsuffix>qb\t{%1, %0%{%3%}%N2|%0%{%3%}%N2, %1}"
+ [(set_attr "type" "ssemov")
+ (set_attr "prefix" "evex")
+ (set_attr "mode" "TI")])
+
+(define_insn "avx512vl_<code>v2div2qi2_store_mask"
+ [(set (match_operand:V16QI 0 "memory_operand" "=m")
+ (vec_concat:V16QI
+ (vec_merge:V2QI
+ (any_truncate:V2QI
+ (match_operand:V2DI 1 "register_operand" "v"))
+ (vec_select:V2QI
+ (match_dup 0)
+ (parallel [(const_int 0) (const_int 1)]))
+ (match_operand:QI 2 "register_operand" "Yk"))
+ (vec_select:V14QI
+ (match_dup 0)
+ (parallel [(const_int 2) (const_int 3)
+ (const_int 4) (const_int 5)
+ (const_int 6) (const_int 7)
+ (const_int 8) (const_int 9)
+ (const_int 10) (const_int 11)
+ (const_int 12) (const_int 13)
+ (const_int 14) (const_int 15)]))))]
+ "TARGET_AVX512VL"
+ "vpmov<trunsuffix>qb\t{%1, %0%{%2%}|%0%{%2%}, %1}"
+ [(set_attr "type" "ssemov")
+ (set_attr "memory" "store")
+ (set_attr "prefix" "evex")
+ (set_attr "mode" "TI")])
+
+(define_insn "*avx512vl_<code><mode>v4qi2_store"
+ [(set (match_operand:V16QI 0 "memory_operand" "=m")
+ (vec_concat:V16QI
+ (any_truncate:V4QI
+ (match_operand:VI4_128_8_256 1 "register_operand" "v"))
+ (vec_select:V12QI
+ (match_dup 0)
+ (parallel [(const_int 4) (const_int 5)
+ (const_int 6) (const_int 7)
+ (const_int 8) (const_int 9)
+ (const_int 10) (const_int 11)
+ (const_int 12) (const_int 13)
+ (const_int 14) (const_int 15)]))))]
+ "TARGET_AVX512VL"
+ "vpmov<trunsuffix><pmov_suff_3>\t{%1, %0|%0, %1}"
+ [(set_attr "type" "ssemov")
+ (set_attr "memory" "store")
+ (set_attr "prefix" "evex")
+ (set_attr "mode" "TI")])
+
+(define_insn "avx512vl_<code><mode>v4qi2_mask"
+ [(set (match_operand:V16QI 0 "register_operand" "=v")
+ (vec_concat:V16QI
+ (vec_merge:V4QI
+ (any_truncate:V4QI
+ (match_operand:VI4_128_8_256 1 "register_operand" "v"))
+ (vec_select:V4QI
+ (match_operand:V16QI 2 "vector_move_operand" "0C")
+ (parallel [(const_int 0) (const_int 1)
+ (const_int 2) (const_int 3)]))
+ (match_operand:QI 3 "register_operand" "Yk"))
+ (const_vector:V12QI [(const_int 0) (const_int 0)
+ (const_int 0) (const_int 0)
+ (const_int 0) (const_int 0)
+ (const_int 0) (const_int 0)
+ (const_int 0) (const_int 0)
+ (const_int 0) (const_int 0)])))]
+ "TARGET_AVX512VL"
+ "vpmov<trunsuffix><pmov_suff_3>\t{%1, %0%{%3%}%N2|%0%{%3%}%N2, %1}"
+ [(set_attr "type" "ssemov")
+ (set_attr "prefix" "evex")
+ (set_attr "mode" "TI")])
+
+(define_insn "avx512vl_<code><mode>v4qi2_store_mask"
+ [(set (match_operand:V16QI 0 "memory_operand" "=m")
+ (vec_concat:V16QI
+ (vec_merge:V4QI
+ (any_truncate:V4QI
+ (match_operand:VI4_128_8_256 1 "register_operand" "v"))
+ (vec_select:V4QI
+ (match_dup 0)
+ (parallel [(const_int 0) (const_int 1)
+ (const_int 2) (const_int 3)]))
+ (match_operand:QI 2 "register_operand" "Yk"))
+ (vec_select:V12QI
+ (match_dup 0)
+ (parallel [(const_int 4) (const_int 5)
+ (const_int 6) (const_int 7)
+ (const_int 8) (const_int 9)
+ (const_int 10) (const_int 11)
+ (const_int 12) (const_int 13)
+ (const_int 14) (const_int 15)]))))]
+ "TARGET_AVX512VL"
+ "vpmov<trunsuffix><pmov_suff_3>\t{%1, %0%{%2%}|%0%{%2%}, %1}"
+ [(set_attr "type" "ssemov")
+ (set_attr "memory" "store")
+ (set_attr "prefix" "evex")
+ (set_attr "mode" "TI")])
+
+(define_insn "*avx512vl_<code><mode>v8qi2_store"
+ [(set (match_operand:V16QI 0 "memory_operand" "=m")
+ (vec_concat:V16QI
+ (any_truncate:V8QI
+ (match_operand:VI2_128_4_256 1 "register_operand" "v"))
+ (vec_select:V8QI
+ (match_dup 0)
+ (parallel [(const_int 8) (const_int 9)
+ (const_int 10) (const_int 11)
+ (const_int 12) (const_int 13)
+ (const_int 14) (const_int 15)]))))]
+ "TARGET_AVX512VL"
+ "vpmov<trunsuffix><pmov_suff_3>\t{%1, %0|%0, %1}"
+ [(set_attr "type" "ssemov")
+ (set_attr "memory" "store")
+ (set_attr "prefix" "evex")
+ (set_attr "mode" "TI")])
+
+(define_insn "avx512vl_<code><mode>v8qi2_mask"
+ [(set (match_operand:V16QI 0 "register_operand" "=v")
+ (vec_concat:V16QI
+ (vec_merge:V8QI
+ (any_truncate:V8QI
+ (match_operand:VI2_128_4_256 1 "register_operand" "v"))
+ (vec_select:V8QI
+ (match_operand:V16QI 2 "vector_move_operand" "0C")
+ (parallel [(const_int 0) (const_int 1)
+ (const_int 2) (const_int 3)
+ (const_int 4) (const_int 5)
+ (const_int 6) (const_int 7)]))
+ (match_operand:QI 3 "register_operand" "Yk"))
+ (const_vector:V8QI [(const_int 0) (const_int 0)
+ (const_int 0) (const_int 0)
+ (const_int 0) (const_int 0)
+ (const_int 0) (const_int 0)])))]
+ "TARGET_AVX512VL"
+ "vpmov<trunsuffix><pmov_suff_3>\t{%1, %0%{%3%}%N2|%0%{%3%}%N2, %1}"
+ [(set_attr "type" "ssemov")
+ (set_attr "prefix" "evex")
+ (set_attr "mode" "TI")])
+
+(define_insn "avx512vl_<code><mode>v8qi2_store_mask"
+ [(set (match_operand:V16QI 0 "memory_operand" "=m")
+ (vec_concat:V16QI
+ (vec_merge:V8QI
+ (any_truncate:V8QI
+ (match_operand:VI2_128_4_256 1 "register_operand" "v"))
+ (vec_select:V8QI
+ (match_dup 0)
+ (parallel [(const_int 0) (const_int 1)
+ (const_int 2) (const_int 3)
+ (const_int 4) (const_int 5)
+ (const_int 6) (const_int 7)]))
+ (match_operand:QI 2 "register_operand" "Yk"))
+ (vec_select:V8QI
+ (match_dup 0)
+ (parallel [(const_int 8) (const_int 9)
+ (const_int 10) (const_int 11)
+ (const_int 12) (const_int 13)
+ (const_int 14) (const_int 15)]))))]
+ "TARGET_AVX512VL"
+ "vpmov<trunsuffix><pmov_suff_3>\t{%1, %0%{%2%}|%0%{%2%}, %1}"
+ [(set_attr "type" "ssemov")
+ (set_attr "memory" "store")
+ (set_attr "prefix" "evex")
+ (set_attr "mode" "TI")])
+
+(define_mode_iterator PMOV_SRC_MODE_4 [V4DI V2DI V4SI])
+(define_mode_attr pmov_dst_4
+ [(V4DI "V4HI") (V2DI "V2HI") (V4SI "V4HI")])
+(define_mode_attr pmov_dst_zeroed_4
+ [(V4DI "V4HI") (V2DI "V6HI") (V4SI "V4HI")])
+(define_mode_attr pmov_suff_4
+ [(V4DI "qw") (V2DI "qw") (V4SI "dw")])
+
+(define_insn "*avx512vl_<code><mode>v<ssescalarnum>hi2"
+ [(set (match_operand:V8HI 0 "register_operand" "=v")
+ (vec_concat:V8HI
+ (any_truncate:<pmov_dst_4>
+ (match_operand:PMOV_SRC_MODE_4 1 "register_operand" "v"))
+ (match_operand:<pmov_dst_zeroed_4> 2 "const0_operand")))]
+ "TARGET_AVX512VL"
+ "vpmov<trunsuffix><pmov_suff_4>\t{%1, %0|%0, %1}"
+ [(set_attr "type" "ssemov")
+ (set_attr "prefix" "evex")
+ (set_attr "mode" "TI")])
+
+(define_insn "*avx512vl_<code><mode>v4hi2_store"
+ [(set (match_operand:V8HI 0 "memory_operand" "=m")
+ (vec_concat:V8HI
+ (any_truncate:V4HI
+ (match_operand:VI4_128_8_256 1 "register_operand" "v"))
+ (vec_select:V4HI
+ (match_dup 0)
+ (parallel [(const_int 4) (const_int 5)
+ (const_int 6) (const_int 7)]))))]
+ "TARGET_AVX512VL"
+ "vpmov<trunsuffix><pmov_suff_4>\t{%1, %0|%0, %1}"
+ [(set_attr "type" "ssemov")
+ (set_attr "memory" "store")
+ (set_attr "prefix" "evex")
+ (set_attr "mode" "TI")])
+
+(define_insn "avx512vl_<code><mode>v4hi2_mask"
+ [(set (match_operand:V8HI 0 "register_operand" "=v")
+ (vec_concat:V8HI
+ (vec_merge:V4HI
+ (any_truncate:V4HI
+ (match_operand:VI4_128_8_256 1 "register_operand" "v"))
+ (vec_select:V4HI
+ (match_operand:V8HI 2 "vector_move_operand" "0C")
+ (parallel [(const_int 0) (const_int 1)
+ (const_int 2) (const_int 3)]))
+ (match_operand:QI 3 "register_operand" "Yk"))
+ (const_vector:V4HI [(const_int 0) (const_int 0)
+ (const_int 0) (const_int 0)])))]
+ "TARGET_AVX512VL"
+ "vpmov<trunsuffix><pmov_suff_4>\t{%1, %0%{%3%}%N2|%0%{%3%}%N2, %1}"
+ [(set_attr "type" "ssemov")
+ (set_attr "prefix" "evex")
+ (set_attr "mode" "TI")])
+
+(define_insn "avx512vl_<code><mode>v4hi2_store_mask"
+ [(set (match_operand:V8HI 0 "memory_operand" "=m")
+ (vec_concat:V8HI
+ (vec_merge:V4HI
+ (any_truncate:V4HI
+ (match_operand:VI4_128_8_256 1 "register_operand" "v"))
+ (vec_select:V4HI
+ (match_dup 0)
+ (parallel [(const_int 0) (const_int 1)
+ (const_int 2) (const_int 3)]))
+ (match_operand:QI 2 "register_operand" "Yk"))
+ (vec_select:V4HI
+ (match_dup 0)
+ (parallel [(const_int 4) (const_int 5)
+ (const_int 6) (const_int 7)]))))]
+ "TARGET_AVX512VL"
+ "vpmov<trunsuffix><pmov_suff_4>\t{%1, %0%{%2%}|%0%{%2%}, %1}"
+ [(set_attr "type" "ssemov")
+ (set_attr "memory" "store")
+ (set_attr "prefix" "evex")
+ (set_attr "mode" "TI")])
+
+(define_insn "*avx512vl_<code>v2div2hi2_store"
+ [(set (match_operand:V8HI 0 "memory_operand" "=m")
+ (vec_concat:V8HI
+ (any_truncate:V2HI
+ (match_operand:V2DI 1 "register_operand" "v"))
+ (vec_select:V6HI
+ (match_dup 0)
+ (parallel [(const_int 2) (const_int 3)
+ (const_int 4) (const_int 5)
+ (const_int 6) (const_int 7)]))))]
+ "TARGET_AVX512VL"
+ "vpmov<trunsuffix>qw\t{%1, %0|%0, %1}"
+ [(set_attr "type" "ssemov")
+ (set_attr "memory" "store")
+ (set_attr "prefix" "evex")
+ (set_attr "mode" "TI")])
+
+(define_insn "avx512vl_<code>v2div2hi2_mask"
+ [(set (match_operand:V8HI 0 "register_operand" "=v")
+ (vec_concat:V8HI
+ (vec_merge:V2HI
+ (any_truncate:V2HI
+ (match_operand:V2DI 1 "register_operand" "v"))
+ (vec_select:V2HI
+ (match_operand:V8HI 2 "vector_move_operand" "0C")
+ (parallel [(const_int 0) (const_int 1)]))
+ (match_operand:QI 3 "register_operand" "Yk"))
+ (const_vector:V6HI [(const_int 0) (const_int 0)
+ (const_int 0) (const_int 0)
+ (const_int 0) (const_int 0)])))]
+ "TARGET_AVX512VL"
+ "vpmov<trunsuffix>qw\t{%1, %0%{%3%}%N2|%0%{%3%}%N2, %1}"
+ [(set_attr "type" "ssemov")
+ (set_attr "prefix" "evex")
+ (set_attr "mode" "TI")])
+
+(define_insn "avx512vl_<code>v2div2hi2_store_mask"
+ [(set (match_operand:V8HI 0 "memory_operand" "=m")
+ (vec_concat:V8HI
+ (vec_merge:V2HI
+ (any_truncate:V2HI
+ (match_operand:V2DI 1 "register_operand" "v"))
+ (vec_select:V2HI
+ (match_dup 0)
+ (parallel [(const_int 0) (const_int 1)]))
+ (match_operand:QI 2 "register_operand" "Yk"))
+ (vec_select:V6HI
+ (match_dup 0)
+ (parallel [(const_int 2) (const_int 3)
+ (const_int 4) (const_int 5)
+ (const_int 6) (const_int 7)]))))]
+ "TARGET_AVX512VL"
+ "vpmov<trunsuffix>qw\t{%1, %0%{%2%}|%0%{%2%}, %1}"
+ [(set_attr "type" "ssemov")
+ (set_attr "memory" "store")
+ (set_attr "prefix" "evex")
+ (set_attr "mode" "TI")])
+
+(define_insn "*avx512vl_<code>v2div2si2"
+ [(set (match_operand:V4SI 0 "register_operand" "=v")
+ (vec_concat:V4SI
+ (any_truncate:V2SI
+ (match_operand:V2DI 1 "register_operand" "v"))
+ (match_operand:V2SI 2 "const0_operand")))]
+ "TARGET_AVX512VL"
+ "vpmov<trunsuffix>qd\t{%1, %0|%0, %1}"
+ [(set_attr "type" "ssemov")
+ (set_attr "prefix" "evex")
+ (set_attr "mode" "TI")])
+
+(define_insn "*avx512vl_<code>v2div2si2_store"
+ [(set (match_operand:V4SI 0 "memory_operand" "=m")
+ (vec_concat:V4SI
+ (any_truncate:V2SI
+ (match_operand:V2DI 1 "register_operand" "v"))
+ (vec_select:V2SI
+ (match_dup 0)
+ (parallel [(const_int 2) (const_int 3)]))))]
+ "TARGET_AVX512VL"
+ "vpmov<trunsuffix>qd\t{%1, %0|%0, %1}"
+ [(set_attr "type" "ssemov")
+ (set_attr "memory" "store")
+ (set_attr "prefix" "evex")
+ (set_attr "mode" "TI")])
+
+(define_insn "avx512vl_<code>v2div2si2_mask"
+ [(set (match_operand:V4SI 0 "register_operand" "=v")
+ (vec_concat:V4SI
+ (vec_merge:V2SI
+ (any_truncate:V2SI
+ (match_operand:V2DI 1 "register_operand" "v"))
+ (vec_select:V2SI
+ (match_operand:V4SI 2 "vector_move_operand" "0C")
+ (parallel [(const_int 0) (const_int 1)]))
+ (match_operand:QI 3 "register_operand" "Yk"))
+ (const_vector:V2SI [(const_int 0) (const_int 0)])))]
+ "TARGET_AVX512VL"
+ "vpmov<trunsuffix>qd\t{%1, %0%{%3%}%N2|%0%{%3%}%N2, %1}"
+ [(set_attr "type" "ssemov")
+ (set_attr "prefix" "evex")
+ (set_attr "mode" "TI")])
+
+(define_insn "avx512vl_<code>v2div2si2_store_mask"
+ [(set (match_operand:V4SI 0 "memory_operand" "=m")
+ (vec_concat:V4SI
+ (vec_merge:V2SI
+ (any_truncate:V2SI
+ (match_operand:V2DI 1 "register_operand" "v"))
+ (vec_select:V2SI
+ (match_dup 0)
+ (parallel [(const_int 0) (const_int 1)]))
+ (match_operand:QI 2 "register_operand" "Yk"))
+ (vec_select:V2SI
+ (match_dup 0)
+ (parallel [(const_int 2) (const_int 3)]))))]
+ "TARGET_AVX512VL"
+ "vpmov<trunsuffix>qd\t{%1, %0%{%2%}|%0%{%2%}, %1}"
+ [(set_attr "type" "ssemov")
+ (set_attr "memory" "store")
+ (set_attr "prefix" "evex")
+ (set_attr "mode" "TI")])
+
(define_insn "*avx512f_<code>v8div16qi2"
[(set (match_operand:V16QI 0 "register_operand" "=v")
(vec_concat:V16QI
@@ -7682,61 +8893,65 @@
(set_attr "prefix" "<mask_prefix3>")
(set_attr "mode" "<sseinsnmode>")])
-(define_expand "<sse2_avx2>_<plusminus_insn><mode>3"
+(define_expand "<sse2_avx2>_<plusminus_insn><mode>3<mask_name>"
[(set (match_operand:VI12_AVX2 0 "register_operand")
(sat_plusminus:VI12_AVX2
(match_operand:VI12_AVX2 1 "nonimmediate_operand")
(match_operand:VI12_AVX2 2 "nonimmediate_operand")))]
- "TARGET_SSE2"
+ "TARGET_SSE2 && <mask_mode512bit_condition>"
"ix86_fixup_binary_operands_no_copy (<CODE>, <MODE>mode, operands);")
-(define_insn "*<sse2_avx2>_<plusminus_insn><mode>3"
+(define_insn "*<sse2_avx2>_<plusminus_insn><mode>3<mask_name>"
[(set (match_operand:VI12_AVX2 0 "register_operand" "=x,v")
(sat_plusminus:VI12_AVX2
(match_operand:VI12_AVX2 1 "nonimmediate_operand" "<comm>0,v")
(match_operand:VI12_AVX2 2 "nonimmediate_operand" "xm,vm")))]
- "TARGET_SSE2 && ix86_binary_operator_ok (<CODE>, <MODE>mode, operands)"
+ "TARGET_SSE2
+ && ix86_binary_operator_ok (<CODE>, <MODE>mode, operands)
+ && <mask_mode512bit_condition>"
"@
p<plusminus_mnemonic><ssemodesuffix>\t{%2, %0|%0, %2}
- vp<plusminus_mnemonic><ssemodesuffix>\t{%2, %1, %0|%0, %1, %2}"
+ vp<plusminus_mnemonic><ssemodesuffix>\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}"
[(set_attr "isa" "noavx,avx")
(set_attr "type" "sseiadd")
(set_attr "prefix_data16" "1,*")
- (set_attr "prefix" "orig,vex")
+ (set_attr "prefix" "orig,maybe_evex")
(set_attr "mode" "TI")])
-(define_expand "mul<mode>3"
+(define_expand "mul<mode>3<mask_name>"
[(set (match_operand:VI1_AVX2 0 "register_operand")
(mult:VI1_AVX2 (match_operand:VI1_AVX2 1 "register_operand")
(match_operand:VI1_AVX2 2 "register_operand")))]
- "TARGET_SSE2"
+ "TARGET_SSE2&& <mask_mode512bit_condition>"
{
ix86_expand_vecop_qihi (MULT, operands[0], operands[1], operands[2]);
DONE;
})
-(define_expand "mul<mode>3"
+(define_expand "mul<mode>3<mask_name>"
[(set (match_operand:VI2_AVX2 0 "register_operand")
(mult:VI2_AVX2 (match_operand:VI2_AVX2 1 "nonimmediate_operand")
(match_operand:VI2_AVX2 2 "nonimmediate_operand")))]
- "TARGET_SSE2"
+ "TARGET_SSE2 && <mask_mode512bit_condition>"
"ix86_fixup_binary_operands_no_copy (MULT, <MODE>mode, operands);")
-(define_insn "*mul<mode>3"
- [(set (match_operand:VI2_AVX2 0 "register_operand" "=x,x")
- (mult:VI2_AVX2 (match_operand:VI2_AVX2 1 "nonimmediate_operand" "%0,x")
- (match_operand:VI2_AVX2 2 "nonimmediate_operand" "xm,xm")))]
- "TARGET_SSE2 && ix86_binary_operator_ok (MULT, <MODE>mode, operands)"
+(define_insn "*mul<mode>3<mask_name>"
+ [(set (match_operand:VI2_AVX2 0 "register_operand" "=x,v")
+ (mult:VI2_AVX2 (match_operand:VI2_AVX2 1 "nonimmediate_operand" "%0,v")
+ (match_operand:VI2_AVX2 2 "nonimmediate_operand" "xm,vm")))]
+ "TARGET_SSE2
+ && ix86_binary_operator_ok (MULT, <MODE>mode, operands)
+ && <mask_mode512bit_condition>"
"@
pmullw\t{%2, %0|%0, %2}
- vpmullw\t{%2, %1, %0|%0, %1, %2}"
+ vpmullw\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}"
[(set_attr "isa" "noavx,avx")
(set_attr "type" "sseimul")
(set_attr "prefix_data16" "1,*")
(set_attr "prefix" "orig,vex")
(set_attr "mode" "<sseinsnmode>")])
-(define_expand "<s>mul<mode>3_highpart"
+(define_expand "<s>mul<mode>3_highpart<mask_name>"
[(set (match_operand:VI2_AVX2 0 "register_operand")
(truncate:VI2_AVX2
(lshiftrt:<ssedoublemode>
@@ -7746,23 +8961,25 @@
(any_extend:<ssedoublemode>
(match_operand:VI2_AVX2 2 "nonimmediate_operand")))
(const_int 16))))]
- "TARGET_SSE2"
+ "TARGET_SSE2 && <mask_mode512bit_condition>"
"ix86_fixup_binary_operands_no_copy (MULT, <MODE>mode, operands);")
-(define_insn "*<s>mul<mode>3_highpart"
- [(set (match_operand:VI2_AVX2 0 "register_operand" "=x,x")
+(define_insn "*<s>mul<mode>3_highpart<mask_name>"
+ [(set (match_operand:VI2_AVX2 0 "register_operand" "=x,v")
(truncate:VI2_AVX2
(lshiftrt:<ssedoublemode>
(mult:<ssedoublemode>
(any_extend:<ssedoublemode>
- (match_operand:VI2_AVX2 1 "nonimmediate_operand" "%0,x"))
+ (match_operand:VI2_AVX2 1 "nonimmediate_operand" "%0,v"))
(any_extend:<ssedoublemode>
- (match_operand:VI2_AVX2 2 "nonimmediate_operand" "xm,xm")))
+ (match_operand:VI2_AVX2 2 "nonimmediate_operand" "xm,vm")))
(const_int 16))))]
- "TARGET_SSE2 && ix86_binary_operator_ok (MULT, <MODE>mode, operands)"
+ "TARGET_SSE2
+ && ix86_binary_operator_ok (MULT, <MODE>mode, operands)
+ && <mask_mode512bit_condition>"
"@
pmulh<u>w\t{%2, %0|%0, %2}
- vpmulh<u>w\t{%2, %1, %0|%0, %1, %2}"
+ vpmulh<u>w\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}"
[(set_attr "isa" "noavx,avx")
(set_attr "type" "sseimul")
(set_attr "prefix_data16" "1,*")
@@ -7814,7 +9031,7 @@
(set_attr "prefix" "evex")
(set_attr "mode" "XI")])
-(define_expand "vec_widen_umult_even_v8si"
+(define_expand "vec_widen_umult_even_v8si<mask_name>"
[(set (match_operand:V4DI 0 "register_operand")
(mult:V4DI
(zero_extend:V4DI
@@ -7827,29 +9044,31 @@
(match_operand:V8SI 2 "nonimmediate_operand")
(parallel [(const_int 0) (const_int 2)
(const_int 4) (const_int 6)])))))]
- "TARGET_AVX2"
+ "TARGET_AVX2 && <mask_mode512bit_condition>"
"ix86_fixup_binary_operands_no_copy (MULT, V8SImode, operands);")
-(define_insn "*vec_widen_umult_even_v8si"
- [(set (match_operand:V4DI 0 "register_operand" "=x")
+(define_insn "*vec_widen_umult_even_v8si<mask_name>"
+ [(set (match_operand:V4DI 0 "register_operand" "=v")
(mult:V4DI
(zero_extend:V4DI
(vec_select:V4SI
- (match_operand:V8SI 1 "nonimmediate_operand" "%x")
+ (match_operand:V8SI 1 "nonimmediate_operand" "%v")
(parallel [(const_int 0) (const_int 2)
(const_int 4) (const_int 6)])))
(zero_extend:V4DI
(vec_select:V4SI
- (match_operand:V8SI 2 "nonimmediate_operand" "xm")
+ (match_operand:V8SI 2 "nonimmediate_operand" "vm")
(parallel [(const_int 0) (const_int 2)
(const_int 4) (const_int 6)])))))]
- "TARGET_AVX2 && ix86_binary_operator_ok (MULT, V8SImode, operands)"
- "vpmuludq\t{%2, %1, %0|%0, %1, %2}"
+ "TARGET_AVX2
+ && ix86_binary_operator_ok (MULT, V8SImode, operands)
+ && <mask_mode512bit_condition>"
+ "vpmuludq\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}"
[(set_attr "type" "sseimul")
- (set_attr "prefix" "vex")
+ (set_attr "prefix" "maybe_evex")
(set_attr "mode" "OI")])
-(define_expand "vec_widen_umult_even_v4si"
+(define_expand "vec_widen_umult_even_v4si<mask_name>"
[(set (match_operand:V2DI 0 "register_operand")
(mult:V2DI
(zero_extend:V2DI
@@ -7860,28 +9079,30 @@
(vec_select:V2SI
(match_operand:V4SI 2 "nonimmediate_operand")
(parallel [(const_int 0) (const_int 2)])))))]
- "TARGET_SSE2"
+ "TARGET_SSE2 && <mask_mode512bit_condition>"
"ix86_fixup_binary_operands_no_copy (MULT, V4SImode, operands);")
-(define_insn "*vec_widen_umult_even_v4si"
- [(set (match_operand:V2DI 0 "register_operand" "=x,x")
+(define_insn "*vec_widen_umult_even_v4si<mask_name>"
+ [(set (match_operand:V2DI 0 "register_operand" "=x,v")
(mult:V2DI
(zero_extend:V2DI
(vec_select:V2SI
- (match_operand:V4SI 1 "nonimmediate_operand" "%0,x")
+ (match_operand:V4SI 1 "nonimmediate_operand" "%0,v")
(parallel [(const_int 0) (const_int 2)])))
(zero_extend:V2DI
(vec_select:V2SI
- (match_operand:V4SI 2 "nonimmediate_operand" "xm,xm")
+ (match_operand:V4SI 2 "nonimmediate_operand" "xm,vm")
(parallel [(const_int 0) (const_int 2)])))))]
- "TARGET_SSE2 && ix86_binary_operator_ok (MULT, V4SImode, operands)"
+ "TARGET_SSE2
+ && ix86_binary_operator_ok (MULT, V4SImode, operands)
+ && <mask_mode512bit_condition>"
"@
pmuludq\t{%2, %0|%0, %2}
- vpmuludq\t{%2, %1, %0|%0, %1, %2}"
+ vpmuludq\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}"
[(set_attr "isa" "noavx,avx")
(set_attr "type" "sseimul")
(set_attr "prefix_data16" "1,*")
- (set_attr "prefix" "orig,vex")
+ (set_attr "prefix" "orig,maybe_evex")
(set_attr "mode" "TI")])
(define_expand "vec_widen_smult_even_v16si<mask_name>"
@@ -7929,7 +9150,7 @@
(set_attr "prefix" "evex")
(set_attr "mode" "XI")])
-(define_expand "vec_widen_smult_even_v8si"
+(define_expand "vec_widen_smult_even_v8si<mask_name>"
[(set (match_operand:V4DI 0 "register_operand")
(mult:V4DI
(sign_extend:V4DI
@@ -7942,30 +9163,31 @@
(match_operand:V8SI 2 "nonimmediate_operand")
(parallel [(const_int 0) (const_int 2)
(const_int 4) (const_int 6)])))))]
- "TARGET_AVX2"
+ "TARGET_AVX2 && <mask_mode512bit_condition>"
"ix86_fixup_binary_operands_no_copy (MULT, V8SImode, operands);")
-(define_insn "*vec_widen_smult_even_v8si"
- [(set (match_operand:V4DI 0 "register_operand" "=x")
+(define_insn "*vec_widen_smult_even_v8si<mask_name>"
+ [(set (match_operand:V4DI 0 "register_operand" "=v")
(mult:V4DI
(sign_extend:V4DI
(vec_select:V4SI
- (match_operand:V8SI 1 "nonimmediate_operand" "x")
+ (match_operand:V8SI 1 "nonimmediate_operand" "v")
(parallel [(const_int 0) (const_int 2)
(const_int 4) (const_int 6)])))
(sign_extend:V4DI
(vec_select:V4SI
- (match_operand:V8SI 2 "nonimmediate_operand" "xm")
+ (match_operand:V8SI 2 "nonimmediate_operand" "vm")
(parallel [(const_int 0) (const_int 2)
(const_int 4) (const_int 6)])))))]
- "TARGET_AVX2 && ix86_binary_operator_ok (MULT, V8SImode, operands)"
- "vpmuldq\t{%2, %1, %0|%0, %1, %2}"
+ "TARGET_AVX2
+ && ix86_binary_operator_ok (MULT, V8SImode, operands)"
+ "vpmuldq\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}"
[(set_attr "type" "sseimul")
(set_attr "prefix_extra" "1")
(set_attr "prefix" "vex")
(set_attr "mode" "OI")])
-(define_expand "sse4_1_mulv2siv2di3"
+(define_expand "sse4_1_mulv2siv2di3<mask_name>"
[(set (match_operand:V2DI 0 "register_operand")
(mult:V2DI
(sign_extend:V2DI
@@ -7976,24 +9198,26 @@
(vec_select:V2SI
(match_operand:V4SI 2 "nonimmediate_operand")
(parallel [(const_int 0) (const_int 2)])))))]
- "TARGET_SSE4_1"
+ "TARGET_SSE4_1 && <mask_mode512bit_condition>"
"ix86_fixup_binary_operands_no_copy (MULT, V4SImode, operands);")
-(define_insn "*sse4_1_mulv2siv2di3"
- [(set (match_operand:V2DI 0 "register_operand" "=x,x")
+(define_insn "*sse4_1_mulv2siv2di3<mask_name>"
+ [(set (match_operand:V2DI 0 "register_operand" "=x,v")
(mult:V2DI
(sign_extend:V2DI
(vec_select:V2SI
- (match_operand:V4SI 1 "nonimmediate_operand" "%0,x")
+ (match_operand:V4SI 1 "nonimmediate_operand" "%0,v")
(parallel [(const_int 0) (const_int 2)])))
(sign_extend:V2DI
(vec_select:V2SI
- (match_operand:V4SI 2 "nonimmediate_operand" "xm,xm")
+ (match_operand:V4SI 2 "nonimmediate_operand" "xm,vm")
(parallel [(const_int 0) (const_int 2)])))))]
- "TARGET_SSE4_1 && ix86_binary_operator_ok (MULT, V4SImode, operands)"
+ "TARGET_SSE4_1
+ && ix86_binary_operator_ok (MULT, V4SImode, operands)
+ && <mask_mode512bit_condition>"
"@
pmuldq\t{%2, %0|%0, %2}
- vpmuldq\t{%2, %1, %0|%0, %1, %2}"
+ vpmuldq\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}"
[(set_attr "isa" "noavx,avx")
(set_attr "type" "sseimul")
(set_attr "prefix_data16" "1,*")
@@ -8001,6 +9225,18 @@
(set_attr "prefix" "orig,vex")
(set_attr "mode" "TI")])
+(define_insn "avx512bw_pmaddwd512<mode><mask_name>"
+ [(set (match_operand:<sseunpackmode> 0 "register_operand" "=v")
+ (unspec:<sseunpackmode>
+ [(match_operand:VI2_AVX2 1 "register_operand" "v")
+ (match_operand:VI2_AVX2 2 "nonimmediate_operand" "vm")]
+ UNSPEC_PMADDWD512))]
+ "TARGET_AVX512BW && <mask_mode512bit_condition>"
+ "vpmaddwd\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}";
+ [(set_attr "type" "sseiadd")
+ (set_attr "prefix" "evex")
+ (set_attr "mode" "XI")])
+
(define_expand "avx2_pmaddwd"
[(set (match_operand:V8SI 0 "register_operand")
(plus:V8SI
@@ -8132,6 +9368,17 @@
(set_attr "prefix" "orig,vex")
(set_attr "mode" "TI")])
+(define_insn "avx512dq_mul<mode>3<mask_name>"
+ [(set (match_operand:VI8 0 "register_operand" "=v")
+ (mult:VI8
+ (match_operand:VI8 1 "register_operand" "v")
+ (match_operand:VI8 2 "nonimmediate_operand" "vm")))]
+ "TARGET_AVX512DQ && <mask_mode512bit_condition>"
+ "vpmullq\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}"
+ [(set_attr "type" "sseimul")
+ (set_attr "prefix" "evex")
+ (set_attr "mode" "<sseinsnmode>")])
+
(define_expand "mul<mode>3<mask_name>"
[(set (match_operand:VI4_AVX512F 0 "register_operand")
(mult:VI4_AVX512F
@@ -8230,6 +9477,9 @@
DONE;
})
+(define_mode_attr SDOT_PMADD_SUF
+ [(V32HI "512v32hi") (V16HI "") (V8HI "")])
+
(define_expand "sdot_prod<mode>"
[(match_operand:<sseunpackmode> 0 "register_operand")
(match_operand:VI2_AVX2 1 "register_operand")
@@ -8238,7 +9488,7 @@
"TARGET_SSE2"
{
rtx t = gen_reg_rtx (<sseunpackmode>mode);
- emit_insn (gen_<sse2_avx2>_pmaddwd (t, operands[1], operands[2]));
+ emit_insn (gen_<sse2_avx2>_pmaddwd<SDOT_PMADD_SUF> (t, operands[1], operands[2]));
emit_insn (gen_rtx_SET (VOIDmode, operands[0],
gen_rtx_PLUS (<sseunpackmode>mode,
operands[3], t)));
@@ -8290,15 +9540,15 @@
DONE;
})
-(define_insn "ashr<mode>3"
- [(set (match_operand:VI24_AVX2 0 "register_operand" "=x,x")
+(define_insn "ashr<mode>3<mask_name>"
+ [(set (match_operand:VI24_AVX2 0 "register_operand" "=x,v")
(ashiftrt:VI24_AVX2
- (match_operand:VI24_AVX2 1 "register_operand" "0,x")
- (match_operand:SI 2 "nonmemory_operand" "xN,xN")))]
- "TARGET_SSE2"
+ (match_operand:VI24_AVX2 1 "register_operand" "0,v")
+ (match_operand:SI 2 "nonmemory_operand" "xN,vN")))]
+ "TARGET_SSE2 && <mask_mode512bit_condition>"
"@
psra<ssemodesuffix>\t{%2, %0|%0, %2}
- vpsra<ssemodesuffix>\t{%2, %1, %0|%0, %1, %2}"
+ vpsra<ssemodesuffix>\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}"
[(set_attr "isa" "noavx,avx")
(set_attr "type" "sseishft")
(set (attr "length_immediate")
@@ -8306,9 +9556,37 @@
(const_string "1")
(const_string "0")))
(set_attr "prefix_data16" "1,*")
- (set_attr "prefix" "orig,vex")
+ (set_attr "prefix" "orig,maybe_evex")
(set_attr "mode" "<sseinsnmode>")])
+(define_insn "ashrv4di3<mask_name>"
+ [(set (match_operand:V4DI 0 "register_operand" "=v,v")
+ (ashiftrt:V4DI
+ (match_operand:V4DI 1 "nonimmediate_operand" "v,vm")
+ (match_operand:SI 2 "nonmemory_operand" "v,N")))]
+ "TARGET_AVX512VL"
+ "vpsraq\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}"
+ [(set_attr "type" "sseishft")
+ (set (attr "length_immediate")
+ (if_then_else (match_operand 2 "const_int_operand")
+ (const_string "1")
+ (const_string "0")))
+ (set_attr "mode" "OI")])
+
+(define_insn "ashrv2di3<mask_name>_1"
+ [(set (match_operand:V2DI 0 "register_operand" "=v,v")
+ (ashiftrt:V2DI
+ (match_operand:V2DI 1 "nonimmediate_operand" "v,vm")
+ (match_operand:DI 2 "nonmemory_operand" "v,N")))]
+ "TARGET_AVX512VL"
+ "vpsraq\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}"
+ [(set_attr "type" "sseishft")
+ (set (attr "length_immediate")
+ (if_then_else (match_operand 2 "const_int_operand")
+ (const_string "1")
+ (const_string "0")))
+ (set_attr "mode" "TI")])
+
(define_insn "ashr<mode>3<mask_name>"
[(set (match_operand:VI48_512 0 "register_operand" "=v,v")
(ashiftrt:VI48_512
@@ -8323,15 +9601,16 @@
(const_string "0")))
(set_attr "mode" "<sseinsnmode>")])
-(define_insn "<shift_insn><mode>3"
- [(set (match_operand:VI248_AVX2 0 "register_operand" "=x,x")
+(define_insn "<shift_insn><mode>3<mask_name>"
+ [(set (match_operand:VI248_AVX2 0 "register_operand" "=x,v")
(any_lshift:VI248_AVX2
- (match_operand:VI248_AVX2 1 "register_operand" "0,x")
- (match_operand:SI 2 "nonmemory_operand" "xN,xN")))]
- "TARGET_SSE2"
+ (match_operand:VI248_AVX2 1 "register_operand" "0,v")
+ (match_operand:SI 2 "nonmemory_operand" "xN,vN")))]
+ "TARGET_SSE2
+ && (!<mask_applied> || TARGET_AVX512VL || <MODE>mode == V32HImode)"
"@
p<vshift><ssemodesuffix>\t{%2, %0|%0, %2}
- vp<vshift><ssemodesuffix>\t{%2, %1, %0|%0, %1, %2}"
+ vp<vshift><ssemodesuffix>\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}"
[(set_attr "isa" "noavx,avx")
(set_attr "type" "sseishft")
(set (attr "length_immediate")
@@ -8373,9 +9652,9 @@
})
(define_insn "<sse2_avx2>_ashl<mode>3"
- [(set (match_operand:VIMAX_AVX2 0 "register_operand" "=x,x")
+ [(set (match_operand:VIMAX_AVX2 0 "register_operand" "=x,v")
(ashift:VIMAX_AVX2
- (match_operand:VIMAX_AVX2 1 "register_operand" "0,x")
+ (match_operand:VIMAX_AVX2 1 "register_operand" "0,v")
(match_operand:SI 2 "const_0_to_255_mul_8_operand" "n,n")))]
"TARGET_SSE2"
{
@@ -8412,9 +9691,9 @@
})
(define_insn "<sse2_avx2>_lshr<mode>3"
- [(set (match_operand:VIMAX_AVX2 0 "register_operand" "=x,x")
+ [(set (match_operand:VIMAX_AVX2 0 "register_operand" "=x,v")
(lshiftrt:VIMAX_AVX2
- (match_operand:VIMAX_AVX2 1 "register_operand" "0,x")
+ (match_operand:VIMAX_AVX2 1 "register_operand" "0,v")
(match_operand:SI 2 "const_0_to_255_mul_8_operand" "n,n")))]
"TARGET_SSE2"
{
@@ -8438,20 +9717,20 @@
(set_attr "prefix" "orig,vex")
(set_attr "mode" "<sseinsnmode>")])
-(define_insn "avx512f_<rotate>v<mode><mask_name>"
- [(set (match_operand:VI48_512 0 "register_operand" "=v")
- (any_rotate:VI48_512
- (match_operand:VI48_512 1 "register_operand" "v")
- (match_operand:VI48_512 2 "nonimmediate_operand" "vm")))]
+(define_insn "<avx512>_<rotate>v<mode><mask_name>"
+ [(set (match_operand:VI48_AVX512VL 0 "register_operand" "=v")
+ (any_rotate:VI48_AVX512VL
+ (match_operand:VI48_AVX512VL 1 "register_operand" "v")
+ (match_operand:VI48_AVX512VL 2 "nonimmediate_operand" "vm")))]
"TARGET_AVX512F"
"vp<rotate>v<ssemodesuffix>\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}"
[(set_attr "prefix" "evex")
(set_attr "mode" "<sseinsnmode>")])
-(define_insn "avx512f_<rotate><mode><mask_name>"
- [(set (match_operand:VI48_512 0 "register_operand" "=v")
- (any_rotate:VI48_512
- (match_operand:VI48_512 1 "nonimmediate_operand" "vm")
+(define_insn "<avx512>_<rotate><mode><mask_name>"
+ [(set (match_operand:VI48_AVX512VL 0 "register_operand" "=v")
+ (any_rotate:VI48_AVX512VL
+ (match_operand:VI48_AVX512VL 1 "nonimmediate_operand" "vm")
(match_operand:SI 2 "const_0_to_255_operand")))]
"TARGET_AVX512F"
"vp<rotate><ssemodesuffix>\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}"
@@ -8459,18 +9738,18 @@
(set_attr "mode" "<sseinsnmode>")])
(define_expand "<code><mode>3<mask_name><round_name>"
- [(set (match_operand:VI124_256_48_512 0 "register_operand")
- (maxmin:VI124_256_48_512
- (match_operand:VI124_256_48_512 1 "<round_nimm_predicate>")
- (match_operand:VI124_256_48_512 2 "<round_nimm_predicate>")))]
+ [(set (match_operand:VI124_256_1248_512 0 "register_operand")
+ (maxmin:VI124_256_1248_512
+ (match_operand:VI124_256_1248_512 1 "nonimmediate_operand")
+ (match_operand:VI124_256_1248_512 2 "nonimmediate_operand")))]
"TARGET_AVX2 && <mask_mode512bit_condition> && <round_mode512bit_condition>"
"ix86_fixup_binary_operands_no_copy (<CODE>, <MODE>mode, operands);")
(define_insn "*avx2_<code><mode>3<mask_name><round_name>"
- [(set (match_operand:VI124_256_48_512 0 "register_operand" "=v")
- (maxmin:VI124_256_48_512
- (match_operand:VI124_256_48_512 1 "<round_nimm_predicate>" "%v")
- (match_operand:VI124_256_48_512 2 "<round_nimm_predicate>" "<round_constraint>")))]
+ [(set (match_operand:VI124_256_1248_512 0 "register_operand" "=v")
+ (maxmin:VI124_256_1248_512
+ (match_operand:VI124_256_1248_512 1 "nonimmediate_operand" "%v")
+ (match_operand:VI124_256_1248_512 2 "nonimmediate_operand" "<round_constraint>")))]
"TARGET_AVX2 && ix86_binary_operator_ok (<CODE>, <MODE>mode, operands)
&& <mask_mode512bit_condition> && <round_mode512bit_condition>"
"vp<maxmin_int><ssemodesuffix>\t{<round_mask_op3>%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2<round_mask_op3>}"
@@ -8479,6 +9758,17 @@
(set_attr "prefix" "maybe_evex")
(set_attr "mode" "OI")])
+(define_insn "<mask_codefor><code><mode>3<mask_name>"
+ [(set (match_operand:VI128_256 0 "register_operand" "=v")
+ (maxmin:VI128_256
+ (match_operand:VI128_256 1 "register_operand" "v")
+ (match_operand:VI128_256 2 "nonimmediate_operand" "vm")))]
+ ""
+ "vp<maxmin_int><ssemodesuffix>\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}"
+ [(set_attr "type" "sseiadd")
+ (set_attr "prefix" "evex")
+ (set_attr "mode" "<sseinsnmode>")])
+
(define_expand "<code><mode>3"
[(set (match_operand:VI8_AVX2 0 "register_operand")
(maxmin:VI8_AVX2
@@ -8490,27 +9780,32 @@
rtx xops[6];
bool ok;
- xops[0] = operands[0];
-
- if (<CODE> == SMAX || <CODE> == UMAX)
- {
- xops[1] = operands[1];
- xops[2] = operands[2];
- }
+ if (TARGET_AVX512VL)
+ ix86_fixup_binary_operands_no_copy (<CODE>, <MODE>mode, operands);
else
{
- xops[1] = operands[2];
- xops[2] = operands[1];
- }
+ xops[0] = operands[0];
- code = (<CODE> == UMAX || <CODE> == UMIN) ? GTU : GT;
+ if (<CODE> == SMAX || <CODE> == UMAX)
+ {
+ xops[1] = operands[1];
+ xops[2] = operands[2];
+ }
+ else
+ {
+ xops[1] = operands[2];
+ xops[2] = operands[1];
+ }
- xops[3] = gen_rtx_fmt_ee (code, VOIDmode, operands[1], operands[2]);
- xops[4] = operands[1];
- xops[5] = operands[2];
+ code = (<CODE> == UMAX || <CODE> == UMIN) ? GTU : GT;
- ok = ix86_expand_int_vcond (xops);
- gcc_assert (ok);
+ xops[3] = gen_rtx_fmt_ee (code, VOIDmode, operands[1], operands[2]);
+ xops[4] = operands[1];
+ xops[5] = operands[2];
+
+ ok = ix86_expand_int_vcond (xops);
+ gcc_assert (ok);
+ }
DONE;
})
@@ -8553,15 +9848,17 @@
}
})
-(define_insn "*sse4_1_<code><mode>3"
- [(set (match_operand:VI14_128 0 "register_operand" "=x,x")
+(define_insn "*sse4_1_<code><mode>3<mask_name>"
+ [(set (match_operand:VI14_128 0 "register_operand" "=x,v")
(smaxmin:VI14_128
- (match_operand:VI14_128 1 "nonimmediate_operand" "%0,x")
- (match_operand:VI14_128 2 "nonimmediate_operand" "xm,xm")))]
- "TARGET_SSE4_1 && ix86_binary_operator_ok (<CODE>, <MODE>mode, operands)"
+ (match_operand:VI14_128 1 "nonimmediate_operand" "%0,v")
+ (match_operand:VI14_128 2 "nonimmediate_operand" "xm,vm")))]
+ "TARGET_SSE4_1
+ && ix86_binary_operator_ok (<CODE>, <MODE>mode, operands)
+ && <mask_mode512bit_condition>"
"@
p<maxmin_int><ssemodesuffix>\t{%2, %0|%0, %2}
- vp<maxmin_int><ssemodesuffix>\t{%2, %1, %0|%0, %1, %2}"
+ vp<maxmin_int><ssemodesuffix>\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}"
[(set_attr "isa" "noavx,avx")
(set_attr "type" "sseiadd")
(set_attr "prefix_extra" "1,*")
@@ -8634,15 +9931,17 @@
}
})
-(define_insn "*sse4_1_<code><mode>3"
- [(set (match_operand:VI24_128 0 "register_operand" "=x,x")
+(define_insn "*sse4_1_<code><mode>3<mask_name>"
+ [(set (match_operand:VI24_128 0 "register_operand" "=x,v")
(umaxmin:VI24_128
- (match_operand:VI24_128 1 "nonimmediate_operand" "%0,x")
- (match_operand:VI24_128 2 "nonimmediate_operand" "xm,xm")))]
- "TARGET_SSE4_1 && ix86_binary_operator_ok (<CODE>, <MODE>mode, operands)"
+ (match_operand:VI24_128 1 "nonimmediate_operand" "%0,v")
+ (match_operand:VI24_128 2 "nonimmediate_operand" "xm,vm")))]
+ "TARGET_SSE4_1
+ && ix86_binary_operator_ok (<CODE>, <MODE>mode, operands)
+ && <mask_mode512bit_condition>"
"@
p<maxmin_int><ssemodesuffix>\t{%2, %0|%0, %2}
- vp<maxmin_int><ssemodesuffix>\t{%2, %1, %0|%0, %1, %2}"
+ vp<maxmin_int><ssemodesuffix>\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}"
[(set_attr "isa" "noavx,avx")
(set_attr "type" "sseiadd")
(set_attr "prefix_extra" "1,*")
@@ -8691,20 +9990,20 @@
(set_attr "prefix" "vex")
(set_attr "mode" "OI")])
-(define_expand "avx512f_eq<mode>3<mask_scalar_merge_name>"
+(define_expand "<avx512>_eq<mode>3<mask_scalar_merge_name>"
[(set (match_operand:<avx512fmaskmode> 0 "register_operand")
(unspec:<avx512fmaskmode>
- [(match_operand:VI48_512 1 "register_operand")
- (match_operand:VI48_512 2 "nonimmediate_operand")]
+ [(match_operand:VI_AVX512VL 1 "register_operand")
+ (match_operand:VI_AVX512VL 2 "nonimmediate_operand")]
UNSPEC_MASKED_EQ))]
"TARGET_AVX512F"
"ix86_fixup_binary_operands_no_copy (EQ, <MODE>mode, operands);")
-(define_insn "avx512f_eq<mode>3<mask_scalar_merge_name>_1"
+(define_insn "<avx512>_eq<mode>3<mask_scalar_merge_name>_1"
[(set (match_operand:<avx512fmaskmode> 0 "register_operand" "=Yk")
(unspec:<avx512fmaskmode>
- [(match_operand:VI48_512 1 "register_operand" "%v")
- (match_operand:VI48_512 2 "nonimmediate_operand" "vm")]
+ [(match_operand:VI_AVX512VL 1 "register_operand" "%v")
+ (match_operand:VI_AVX512VL 2 "nonimmediate_operand" "vm")]
UNSPEC_MASKED_EQ))]
"TARGET_AVX512F && ix86_binary_operator_ok (EQ, <MODE>mode, operands)"
"vpcmpeq<ssemodesuffix>\t{%2, %1, %0<mask_scalar_merge_operand3>|%0<mask_scalar_merge_operand3>, %1, %2}"
@@ -8787,11 +10086,11 @@
(set_attr "prefix" "vex")
(set_attr "mode" "OI")])
-(define_insn "avx512f_gt<mode>3<mask_scalar_merge_name>"
+(define_insn "<avx512>_gt<mode>3<mask_scalar_merge_name>"
[(set (match_operand:<avx512fmaskmode> 0 "register_operand" "=Yk")
(unspec:<avx512fmaskmode>
- [(match_operand:VI48_512 1 "register_operand" "v")
- (match_operand:VI48_512 2 "nonimmediate_operand" "vm")] UNSPEC_MASKED_GT))]
+ [(match_operand:VI_AVX512VL 1 "register_operand" "v")
+ (match_operand:VI_AVX512VL 2 "nonimmediate_operand" "vm")] UNSPEC_MASKED_GT))]
"TARGET_AVX512F"
"vpcmpgt<ssemodesuffix>\t{%2, %1, %0<mask_scalar_merge_operand3>|%0<mask_scalar_merge_operand3>, %1, %2}"
[(set_attr "type" "ssecmp")
@@ -8952,7 +10251,8 @@
(V8SI "TARGET_AVX2") (V4DI "TARGET_AVX2")
(V8SF "TARGET_AVX2") (V4DF "TARGET_AVX2")
(V16SF "TARGET_AVX512F") (V8DF "TARGET_AVX512F")
- (V16SI "TARGET_AVX512F") (V8DI "TARGET_AVX512F")])
+ (V16SI "TARGET_AVX512F") (V8DI "TARGET_AVX512F")
+ (V32HI "TARGET_AVX512BW")])
(define_expand "vec_perm<mode>"
[(match_operand:VEC_PERM_AVX2 0 "register_operand")
@@ -8973,7 +10273,8 @@
(V8SI "TARGET_AVX") (V4DI "TARGET_AVX")
(V32QI "TARGET_AVX2") (V16HI "TARGET_AVX2")
(V16SI "TARGET_AVX512F") (V8DI "TARGET_AVX512F")
- (V16SF "TARGET_AVX512F") (V8DF "TARGET_AVX512F")])
+ (V16SF "TARGET_AVX512F") (V8DF "TARGET_AVX512F")
+ (V32HI "TARGET_AVX512BW")])
(define_expand "vec_perm_const<mode>"
[(match_operand:VEC_PERM_CONST 0 "register_operand")
@@ -9031,16 +10332,31 @@
{
case MODE_XI:
gcc_assert (TARGET_AVX512F);
-
- tmp = "pandn<ssemodesuffix>";
- break;
-
case MODE_OI:
- gcc_assert (TARGET_AVX2);
+ gcc_assert (TARGET_AVX2 || TARGET_AVX512VL);
case MODE_TI:
- gcc_assert (TARGET_SSE2);
-
- tmp = "pandn";
+ gcc_assert (TARGET_SSE2 || TARGET_AVX512VL);
+ switch (<MODE>mode)
+ {
+ case V16SImode:
+ case V8DImode:
+ if (TARGET_AVX512F)
+ {
+ tmp = "pandn<ssemodesuffix>";
+ break;
+ }
+ case V8SImode:
+ case V4DImode:
+ case V4SImode:
+ case V2DImode:
+ if (TARGET_AVX512VL)
+ {
+ tmp = "pandn<ssemodesuffix>";
+ break;
+ }
+ default:
+ tmp = TARGET_AVX512VL ? "pandnq" : "pandn";
+ }
break;
case MODE_V16SF:
@@ -9125,16 +10441,31 @@
{
case MODE_XI:
gcc_assert (TARGET_AVX512F);
-
- tmp = "p<logic><ssemodesuffix>";
- break;
-
case MODE_OI:
- gcc_assert (TARGET_AVX2);
+ gcc_assert (TARGET_AVX2 || TARGET_AVX512VL);
case MODE_TI:
- gcc_assert (TARGET_SSE2);
-
- tmp = "p<logic>";
+ gcc_assert (TARGET_SSE2 || TARGET_AVX512VL);
+ switch (<MODE>mode)
+ {
+ case V16SImode:
+ case V8DImode:
+ if (TARGET_AVX512F)
+ {
+ tmp = "p<logic><ssemodesuffix>";
+ break;
+ }
+ case V8SImode:
+ case V4DImode:
+ case V4SImode:
+ case V2DImode:
+ if (TARGET_AVX512VL)
+ {
+ tmp = "p<logic><ssemodesuffix>";
+ break;
+ }
+ default:
+ tmp = TARGET_AVX512VL ? "p<logic>q" : "p<logic>";
+ }
break;
case MODE_V16SF:
@@ -9192,22 +10523,22 @@
]
(const_string "<sseinsnmode>")))])
-(define_insn "avx512f_testm<mode>3<mask_scalar_merge_name>"
+(define_insn "<avx512>_testm<mode>3<mask_scalar_merge_name>"
[(set (match_operand:<avx512fmaskmode> 0 "register_operand" "=Yk")
(unspec:<avx512fmaskmode>
- [(match_operand:VI48_512 1 "register_operand" "v")
- (match_operand:VI48_512 2 "nonimmediate_operand" "vm")]
+ [(match_operand:VI_AVX512VL 1 "register_operand" "v")
+ (match_operand:VI_AVX512VL 2 "nonimmediate_operand" "vm")]
UNSPEC_TESTM))]
"TARGET_AVX512F"
"vptestm<ssemodesuffix>\t{%2, %1, %0<mask_scalar_merge_operand3>|%0<mask_scalar_merge_operand3>, %1, %2}"
[(set_attr "prefix" "evex")
(set_attr "mode" "<sseinsnmode>")])
-(define_insn "avx512f_testnm<mode>3<mask_scalar_merge_name>"
+(define_insn "<avx512>_testnm<mode>3<mask_scalar_merge_name>"
[(set (match_operand:<avx512fmaskmode> 0 "register_operand" "=Yk")
(unspec:<avx512fmaskmode>
- [(match_operand:VI48_512 1 "register_operand" "v")
- (match_operand:VI48_512 2 "nonimmediate_operand" "vm")]
+ [(match_operand:VI_AVX512VL 1 "register_operand" "v")
+ (match_operand:VI_AVX512VL 2 "nonimmediate_operand" "vm")]
UNSPEC_TESTNM))]
"TARGET_AVX512F"
"vptestnm<ssemodesuffix>\t{%2, %1, %0<mask_scalar_merge_operand3>|%0<mask_scalar_merge_operand3>, %1, %2}"
@@ -9232,63 +10563,107 @@
DONE;
})
-(define_insn "<sse2_avx2>_packsswb"
- [(set (match_operand:VI1_AVX2 0 "register_operand" "=x,x")
+(define_insn "<sse2_avx2>_packsswb<mask_name>"
+ [(set (match_operand:VI1_AVX2 0 "register_operand" "=x,v")
(vec_concat:VI1_AVX2
(ss_truncate:<ssehalfvecmode>
- (match_operand:<sseunpackmode> 1 "register_operand" "0,x"))
+ (match_operand:<sseunpackmode> 1 "register_operand" "0,v"))
(ss_truncate:<ssehalfvecmode>
- (match_operand:<sseunpackmode> 2 "nonimmediate_operand" "xm,xm"))))]
- "TARGET_SSE2"
+ (match_operand:<sseunpackmode> 2 "nonimmediate_operand" "xm,vm"))))]
+ "TARGET_SSE2 && <mask_mode512bit_condition>"
"@
packsswb\t{%2, %0|%0, %2}
- vpacksswb\t{%2, %1, %0|%0, %1, %2}"
+ vpacksswb\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}"
[(set_attr "isa" "noavx,avx")
(set_attr "type" "sselog")
(set_attr "prefix_data16" "1,*")
- (set_attr "prefix" "orig,vex")
+ (set_attr "prefix" "orig,maybe_evex")
(set_attr "mode" "<sseinsnmode>")])
-(define_insn "<sse2_avx2>_packssdw"
- [(set (match_operand:VI2_AVX2 0 "register_operand" "=x,x")
+(define_insn "<sse2_avx2>_packssdw<mask_name>"
+ [(set (match_operand:VI2_AVX2 0 "register_operand" "=x,v")
(vec_concat:VI2_AVX2
(ss_truncate:<ssehalfvecmode>
- (match_operand:<sseunpackmode> 1 "register_operand" "0,x"))
+ (match_operand:<sseunpackmode> 1 "register_operand" "0,v"))
(ss_truncate:<ssehalfvecmode>
- (match_operand:<sseunpackmode> 2 "nonimmediate_operand" "xm,xm"))))]
- "TARGET_SSE2"
+ (match_operand:<sseunpackmode> 2 "nonimmediate_operand" "xm,vm"))))]
+ "TARGET_SSE2 && <mask_mode512bit_condition>"
"@
packssdw\t{%2, %0|%0, %2}
- vpackssdw\t{%2, %1, %0|%0, %1, %2}"
+ vpackssdw\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}"
[(set_attr "isa" "noavx,avx")
(set_attr "type" "sselog")
(set_attr "prefix_data16" "1,*")
(set_attr "prefix" "orig,vex")
(set_attr "mode" "<sseinsnmode>")])
-(define_insn "<sse2_avx2>_packuswb"
- [(set (match_operand:VI1_AVX2 0 "register_operand" "=x,x")
+(define_insn "<sse2_avx2>_packuswb<mask_name>"
+ [(set (match_operand:VI1_AVX2 0 "register_operand" "=x,v")
(vec_concat:VI1_AVX2
(us_truncate:<ssehalfvecmode>
- (match_operand:<sseunpackmode> 1 "register_operand" "0,x"))
+ (match_operand:<sseunpackmode> 1 "register_operand" "0,v"))
(us_truncate:<ssehalfvecmode>
- (match_operand:<sseunpackmode> 2 "nonimmediate_operand" "xm,xm"))))]
- "TARGET_SSE2"
+ (match_operand:<sseunpackmode> 2 "nonimmediate_operand" "xm,vm"))))]
+ "TARGET_SSE2 && <mask_mode512bit_condition>"
"@
packuswb\t{%2, %0|%0, %2}
- vpackuswb\t{%2, %1, %0|%0, %1, %2}"
+ vpackuswb\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}"
[(set_attr "isa" "noavx,avx")
(set_attr "type" "sselog")
(set_attr "prefix_data16" "1,*")
(set_attr "prefix" "orig,vex")
(set_attr "mode" "<sseinsnmode>")])
-(define_insn "avx2_interleave_highv32qi"
- [(set (match_operand:V32QI 0 "register_operand" "=x")
+(define_insn "avx512bw_interleave_highv64qi<mask_name>"
+ [(set (match_operand:V64QI 0 "register_operand" "=v")
+ (vec_select:V64QI
+ (vec_concat:V128QI
+ (match_operand:V64QI 1 "register_operand" "v")
+ (match_operand:V64QI 2 "nonimmediate_operand" "vm"))
+ (parallel [(const_int 8) (const_int 72)
+ (const_int 9) (const_int 73)
+ (const_int 10) (const_int 74)
+ (const_int 11) (const_int 75)
+ (const_int 12) (const_int 76)
+ (const_int 13) (const_int 77)
+ (const_int 14) (const_int 78)
+ (const_int 15) (const_int 79)
+ (const_int 24) (const_int 88)
+ (const_int 25) (const_int 89)
+ (const_int 26) (const_int 90)
+ (const_int 27) (const_int 91)
+ (const_int 28) (const_int 92)
+ (const_int 29) (const_int 93)
+ (const_int 30) (const_int 94)
+ (const_int 31) (const_int 95)
+ (const_int 40) (const_int 104)
+ (const_int 41) (const_int 105)
+ (const_int 42) (const_int 106)
+ (const_int 43) (const_int 107)
+ (const_int 44) (const_int 108)
+ (const_int 45) (const_int 109)
+ (const_int 46) (const_int 110)
+ (const_int 47) (const_int 111)
+ (const_int 56) (const_int 120)
+ (const_int 57) (const_int 121)
+ (const_int 58) (const_int 122)
+ (const_int 59) (const_int 123)
+ (const_int 60) (const_int 124)
+ (const_int 61) (const_int 125)
+ (const_int 62) (const_int 126)
+ (const_int 63) (const_int 127)])))]
+ "TARGET_AVX512BW"
+ "vpunpckhbw\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}"
+ [(set_attr "type" "sselog")
+ (set_attr "prefix" "evex")
+ (set_attr "mode" "XI")])
+
+(define_insn "avx2_interleave_highv32qi<mask_name>"
+ [(set (match_operand:V32QI 0 "register_operand" "=v")
(vec_select:V32QI
(vec_concat:V64QI
- (match_operand:V32QI 1 "register_operand" "x")
- (match_operand:V32QI 2 "nonimmediate_operand" "xm"))
+ (match_operand:V32QI 1 "register_operand" "v")
+ (match_operand:V32QI 2 "nonimmediate_operand" "vm"))
(parallel [(const_int 8) (const_int 40)
(const_int 9) (const_int 41)
(const_int 10) (const_int 42)
@@ -9305,18 +10680,18 @@
(const_int 29) (const_int 61)
(const_int 30) (const_int 62)
(const_int 31) (const_int 63)])))]
- "TARGET_AVX2"
- "vpunpckhbw\t{%2, %1, %0|%0, %1, %2}"
+ "TARGET_AVX2 && <mask_mode512bit_condition>"
+ "vpunpckhbw\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}"
[(set_attr "type" "sselog")
- (set_attr "prefix" "vex")
+ (set_attr "prefix" "<mask_prefix>")
(set_attr "mode" "OI")])
-(define_insn "vec_interleave_highv16qi"
- [(set (match_operand:V16QI 0 "register_operand" "=x,x")
+(define_insn "vec_interleave_highv16qi<mask_name>"
+ [(set (match_operand:V16QI 0 "register_operand" "=x,v")
(vec_select:V16QI
(vec_concat:V32QI
- (match_operand:V16QI 1 "register_operand" "0,x")
- (match_operand:V16QI 2 "nonimmediate_operand" "xm,xm"))
+ (match_operand:V16QI 1 "register_operand" "0,v")
+ (match_operand:V16QI 2 "nonimmediate_operand" "xm,vm"))
(parallel [(const_int 8) (const_int 24)
(const_int 9) (const_int 25)
(const_int 10) (const_int 26)
@@ -9325,22 +10700,66 @@
(const_int 13) (const_int 29)
(const_int 14) (const_int 30)
(const_int 15) (const_int 31)])))]
- "TARGET_SSE2"
+ "TARGET_SSE2 && <mask_mode512bit_condition>"
"@
punpckhbw\t{%2, %0|%0, %2}
- vpunpckhbw\t{%2, %1, %0|%0, %1, %2}"
+ vpunpckhbw\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}"
[(set_attr "isa" "noavx,avx")
(set_attr "type" "sselog")
(set_attr "prefix_data16" "1,*")
- (set_attr "prefix" "orig,vex")
+ (set_attr "prefix" "orig,<mask_prefix>")
(set_attr "mode" "TI")])
-(define_insn "avx2_interleave_lowv32qi"
- [(set (match_operand:V32QI 0 "register_operand" "=x")
+(define_insn "avx512bw_interleave_lowv64qi<mask_name>"
+ [(set (match_operand:V64QI 0 "register_operand" "=v")
+ (vec_select:V64QI
+ (vec_concat:V128QI
+ (match_operand:V64QI 1 "register_operand" "v")
+ (match_operand:V64QI 2 "nonimmediate_operand" "vm"))
+ (parallel [(const_int 0) (const_int 64)
+ (const_int 1) (const_int 65)
+ (const_int 2) (const_int 66)
+ (const_int 3) (const_int 67)
+ (const_int 4) (const_int 68)
+ (const_int 5) (const_int 69)
+ (const_int 6) (const_int 70)
+ (const_int 7) (const_int 71)
+ (const_int 16) (const_int 80)
+ (const_int 17) (const_int 81)
+ (const_int 18) (const_int 82)
+ (const_int 19) (const_int 83)
+ (const_int 20) (const_int 84)
+ (const_int 21) (const_int 85)
+ (const_int 22) (const_int 86)
+ (const_int 23) (const_int 87)
+ (const_int 32) (const_int 96)
+ (const_int 33) (const_int 97)
+ (const_int 34) (const_int 98)
+ (const_int 35) (const_int 99)
+ (const_int 36) (const_int 100)
+ (const_int 37) (const_int 101)
+ (const_int 38) (const_int 102)
+ (const_int 39) (const_int 103)
+ (const_int 48) (const_int 112)
+ (const_int 49) (const_int 113)
+ (const_int 50) (const_int 114)
+ (const_int 51) (const_int 115)
+ (const_int 52) (const_int 116)
+ (const_int 53) (const_int 117)
+ (const_int 54) (const_int 118)
+ (const_int 55) (const_int 119)])))]
+ "TARGET_AVX512BW"
+ "vpunpcklbw\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}"
+ [(set_attr "type" "sselog")
+ (set_attr "prefix" "evex")
+ (set_attr "mode" "XI")])
+
+(define_insn "avx2_interleave_lowv32qi<mask_name>"
+ [(set (match_operand:V32QI 0 "register_operand" "=v")
(vec_select:V32QI
(vec_concat:V64QI
- (match_operand:V32QI 1 "register_operand" "x")
- (match_operand:V32QI 2 "nonimmediate_operand" "xm"))
+ (match_operand:V32QI 1 "register_operand" "v")
+ (match_operand:V32QI 2 "nonimmediate_operand" "vm"))
(parallel [(const_int 0) (const_int 32)
(const_int 1) (const_int 33)
(const_int 2) (const_int 34)
@@ -9357,18 +10776,18 @@
(const_int 21) (const_int 53)
(const_int 22) (const_int 54)
(const_int 23) (const_int 55)])))]
- "TARGET_AVX2"
- "vpunpcklbw\t{%2, %1, %0|%0, %1, %2}"
+ "TARGET_AVX2 && <mask_mode512bit_condition>"
+ "vpunpcklbw\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}"
[(set_attr "type" "sselog")
- (set_attr "prefix" "vex")
+ (set_attr "prefix" "maybe_vex")
(set_attr "mode" "OI")])
-(define_insn "vec_interleave_lowv16qi"
- [(set (match_operand:V16QI 0 "register_operand" "=x,x")
+(define_insn "vec_interleave_lowv16qi<mask_name>"
+ [(set (match_operand:V16QI 0 "register_operand" "=x,v")
(vec_select:V16QI
(vec_concat:V32QI
- (match_operand:V16QI 1 "register_operand" "0,x")
- (match_operand:V16QI 2 "nonimmediate_operand" "xm,xm"))
+ (match_operand:V16QI 1 "register_operand" "0,v")
+ (match_operand:V16QI 2 "nonimmediate_operand" "xm,vm"))
(parallel [(const_int 0) (const_int 16)
(const_int 1) (const_int 17)
(const_int 2) (const_int 18)
@@ -9377,22 +10796,50 @@
(const_int 5) (const_int 21)
(const_int 6) (const_int 22)
(const_int 7) (const_int 23)])))]
- "TARGET_SSE2"
+ "TARGET_SSE2 && <mask_mode512bit_condition>"
"@
punpcklbw\t{%2, %0|%0, %2}
- vpunpcklbw\t{%2, %1, %0|%0, %1, %2}"
+ vpunpcklbw\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}"
[(set_attr "isa" "noavx,avx")
(set_attr "type" "sselog")
(set_attr "prefix_data16" "1,*")
(set_attr "prefix" "orig,vex")
(set_attr "mode" "TI")])
-(define_insn "avx2_interleave_highv16hi"
- [(set (match_operand:V16HI 0 "register_operand" "=x")
+(define_insn "avx512bw_interleave_highv32hi<mask_name>"
+ [(set (match_operand:V32HI 0 "register_operand" "=v")
+ (vec_select:V32HI
+ (vec_concat:V64HI
+ (match_operand:V32HI 1 "register_operand" "v")
+ (match_operand:V32HI 2 "nonimmediate_operand" "vm"))
+ (parallel [(const_int 4) (const_int 36)
+ (const_int 5) (const_int 37)
+ (const_int 6) (const_int 38)
+ (const_int 7) (const_int 39)
+ (const_int 12) (const_int 44)
+ (const_int 13) (const_int 45)
+ (const_int 14) (const_int 46)
+ (const_int 15) (const_int 47)
+ (const_int 20) (const_int 52)
+ (const_int 21) (const_int 53)
+ (const_int 22) (const_int 54)
+ (const_int 23) (const_int 55)
+ (const_int 28) (const_int 60)
+ (const_int 29) (const_int 61)
+ (const_int 30) (const_int 62)
+ (const_int 31) (const_int 63)])))]
+ "TARGET_AVX512BW"
+ "vpunpckhwd\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}"
+ [(set_attr "type" "sselog")
+ (set_attr "prefix" "evex")
+ (set_attr "mode" "XI")])
+
+(define_insn "avx2_interleave_highv16hi<mask_name>"
+ [(set (match_operand:V16HI 0 "register_operand" "=v")
(vec_select:V16HI
(vec_concat:V32HI
- (match_operand:V16HI 1 "register_operand" "x")
- (match_operand:V16HI 2 "nonimmediate_operand" "xm"))
+ (match_operand:V16HI 1 "register_operand" "v")
+ (match_operand:V16HI 2 "nonimmediate_operand" "vm"))
(parallel [(const_int 4) (const_int 20)
(const_int 5) (const_int 21)
(const_int 6) (const_int 22)
@@ -9401,38 +10848,66 @@
(const_int 13) (const_int 29)
(const_int 14) (const_int 30)
(const_int 15) (const_int 31)])))]
- "TARGET_AVX2"
- "vpunpckhwd\t{%2, %1, %0|%0, %1, %2}"
+ "TARGET_AVX2 && <mask_mode512bit_condition>"
+ "vpunpckhwd\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}"
[(set_attr "type" "sselog")
- (set_attr "prefix" "vex")
+ (set_attr "prefix" "maybe_evex")
(set_attr "mode" "OI")])
-(define_insn "vec_interleave_highv8hi"
- [(set (match_operand:V8HI 0 "register_operand" "=x,x")
+(define_insn "vec_interleave_highv8hi<mask_name>"
+ [(set (match_operand:V8HI 0 "register_operand" "=x,v")
(vec_select:V8HI
(vec_concat:V16HI
- (match_operand:V8HI 1 "register_operand" "0,x")
- (match_operand:V8HI 2 "nonimmediate_operand" "xm,xm"))
+ (match_operand:V8HI 1 "register_operand" "0,v")
+ (match_operand:V8HI 2 "nonimmediate_operand" "xm,vm"))
(parallel [(const_int 4) (const_int 12)
(const_int 5) (const_int 13)
(const_int 6) (const_int 14)
(const_int 7) (const_int 15)])))]
- "TARGET_SSE2"
+ "TARGET_SSE2 && <mask_mode512bit_condition>"
"@
punpckhwd\t{%2, %0|%0, %2}
- vpunpckhwd\t{%2, %1, %0|%0, %1, %2}"
+ vpunpckhwd\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}"
[(set_attr "isa" "noavx,avx")
(set_attr "type" "sselog")
(set_attr "prefix_data16" "1,*")
- (set_attr "prefix" "orig,vex")
+ (set_attr "prefix" "orig,maybe_vex")
(set_attr "mode" "TI")])
-(define_insn "avx2_interleave_lowv16hi"
- [(set (match_operand:V16HI 0 "register_operand" "=x")
+(define_insn "<mask_codefor>avx512bw_interleave_lowv32hi<mask_name>"
+ [(set (match_operand:V32HI 0 "register_operand" "=v")
+ (vec_select:V32HI
+ (vec_concat:V64HI
+ (match_operand:V32HI 1 "register_operand" "v")
+ (match_operand:V32HI 2 "nonimmediate_operand" "vm"))
+ (parallel [(const_int 0) (const_int 32)
+ (const_int 1) (const_int 33)
+ (const_int 2) (const_int 34)
+ (const_int 3) (const_int 35)
+ (const_int 8) (const_int 40)
+ (const_int 9) (const_int 41)
+ (const_int 10) (const_int 42)
+ (const_int 11) (const_int 43)
+ (const_int 16) (const_int 48)
+ (const_int 17) (const_int 49)
+ (const_int 18) (const_int 50)
+ (const_int 19) (const_int 51)
+ (const_int 24) (const_int 56)
+ (const_int 25) (const_int 57)
+ (const_int 26) (const_int 58)
+ (const_int 27) (const_int 59)])))]
+ "TARGET_AVX512BW"
+ "vpunpcklwd\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}"
+ [(set_attr "type" "sselog")
+ (set_attr "prefix" "evex")
+ (set_attr "mode" "XI")])
+
+(define_insn "avx2_interleave_lowv16hi<mask_name>"
+ [(set (match_operand:V16HI 0 "register_operand" "=v")
(vec_select:V16HI
(vec_concat:V32HI
- (match_operand:V16HI 1 "register_operand" "x")
- (match_operand:V16HI 2 "nonimmediate_operand" "xm"))
+ (match_operand:V16HI 1 "register_operand" "v")
+ (match_operand:V16HI 2 "nonimmediate_operand" "vm"))
(parallel [(const_int 0) (const_int 16)
(const_int 1) (const_int 17)
(const_int 2) (const_int 18)
@@ -9441,46 +10916,46 @@
(const_int 9) (const_int 25)
(const_int 10) (const_int 26)
(const_int 11) (const_int 27)])))]
- "TARGET_AVX2"
- "vpunpcklwd\t{%2, %1, %0|%0, %1, %2}"
+ "TARGET_AVX2 && <mask_mode512bit_condition>"
+ "vpunpcklwd\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}"
[(set_attr "type" "sselog")
- (set_attr "prefix" "vex")
+ (set_attr "prefix" "maybe_evex")
(set_attr "mode" "OI")])
-(define_insn "vec_interleave_lowv8hi"
- [(set (match_operand:V8HI 0 "register_operand" "=x,x")
+(define_insn "vec_interleave_lowv8hi<mask_name>"
+ [(set (match_operand:V8HI 0 "register_operand" "=x,v")
(vec_select:V8HI
(vec_concat:V16HI
- (match_operand:V8HI 1 "register_operand" "0,x")
- (match_operand:V8HI 2 "nonimmediate_operand" "xm,xm"))
+ (match_operand:V8HI 1 "register_operand" "0,v")
+ (match_operand:V8HI 2 "nonimmediate_operand" "xm,vm"))
(parallel [(const_int 0) (const_int 8)
(const_int 1) (const_int 9)
(const_int 2) (const_int 10)
(const_int 3) (const_int 11)])))]
- "TARGET_SSE2"
+ "TARGET_SSE2 && <mask_mode512bit_condition>"
"@
punpcklwd\t{%2, %0|%0, %2}
- vpunpcklwd\t{%2, %1, %0|%0, %1, %2}"
+ vpunpcklwd\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}"
[(set_attr "isa" "noavx,avx")
(set_attr "type" "sselog")
(set_attr "prefix_data16" "1,*")
- (set_attr "prefix" "orig,vex")
+ (set_attr "prefix" "orig,maybe_evex")
(set_attr "mode" "TI")])
-(define_insn "avx2_interleave_highv8si"
- [(set (match_operand:V8SI 0 "register_operand" "=x")
+(define_insn "avx2_interleave_highv8si<mask_name>"
+ [(set (match_operand:V8SI 0 "register_operand" "=v")
(vec_select:V8SI
(vec_concat:V16SI
- (match_operand:V8SI 1 "register_operand" "x")
- (match_operand:V8SI 2 "nonimmediate_operand" "xm"))
+ (match_operand:V8SI 1 "register_operand" "v")
+ (match_operand:V8SI 2 "nonimmediate_operand" "vm"))
(parallel [(const_int 2) (const_int 10)
(const_int 3) (const_int 11)
(const_int 6) (const_int 14)
(const_int 7) (const_int 15)])))]
- "TARGET_AVX2"
- "vpunpckhdq\t{%2, %1, %0|%0, %1, %2}"
+ "TARGET_AVX2 && <mask_mode512bit_condition>"
+ "vpunpckhdq\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}"
[(set_attr "type" "sselog")
- (set_attr "prefix" "vex")
+ (set_attr "prefix" "maybe_evex")
(set_attr "mode" "OI")])
(define_insn "<mask_codefor>avx512f_interleave_highv16si<mask_name>"
@@ -9504,38 +10979,38 @@
(set_attr "mode" "XI")])
-(define_insn "vec_interleave_highv4si"
- [(set (match_operand:V4SI 0 "register_operand" "=x,x")
+(define_insn "vec_interleave_highv4si<mask_name>"
+ [(set (match_operand:V4SI 0 "register_operand" "=x,v")
(vec_select:V4SI
(vec_concat:V8SI
- (match_operand:V4SI 1 "register_operand" "0,x")
- (match_operand:V4SI 2 "nonimmediate_operand" "xm,xm"))
+ (match_operand:V4SI 1 "register_operand" "0,v")
+ (match_operand:V4SI 2 "nonimmediate_operand" "xm,vm"))
(parallel [(const_int 2) (const_int 6)
(const_int 3) (const_int 7)])))]
- "TARGET_SSE2"
+ "TARGET_SSE2 && <mask_mode512bit_condition>"
"@
punpckhdq\t{%2, %0|%0, %2}
- vpunpckhdq\t{%2, %1, %0|%0, %1, %2}"
+ vpunpckhdq\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}"
[(set_attr "isa" "noavx,avx")
(set_attr "type" "sselog")
(set_attr "prefix_data16" "1,*")
- (set_attr "prefix" "orig,vex")
+ (set_attr "prefix" "orig,maybe_vex")
(set_attr "mode" "TI")])
-(define_insn "avx2_interleave_lowv8si"
- [(set (match_operand:V8SI 0 "register_operand" "=x")
+(define_insn "avx2_interleave_lowv8si<mask_name>"
+ [(set (match_operand:V8SI 0 "register_operand" "=v")
(vec_select:V8SI
(vec_concat:V16SI
- (match_operand:V8SI 1 "register_operand" "x")
- (match_operand:V8SI 2 "nonimmediate_operand" "xm"))
+ (match_operand:V8SI 1 "register_operand" "v")
+ (match_operand:V8SI 2 "nonimmediate_operand" "vm"))
(parallel [(const_int 0) (const_int 8)
(const_int 1) (const_int 9)
(const_int 4) (const_int 12)
(const_int 5) (const_int 13)])))]
- "TARGET_AVX2"
- "vpunpckldq\t{%2, %1, %0|%0, %1, %2}"
+ "TARGET_AVX2 && <mask_mode512bit_condition>"
+ "vpunpckldq\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}"
[(set_attr "type" "sselog")
- (set_attr "prefix" "vex")
+ (set_attr "prefix" "maybe_evex")
(set_attr "mode" "OI")])
(define_insn "<mask_codefor>avx512f_interleave_lowv16si<mask_name>"
@@ -9558,18 +11033,18 @@
(set_attr "prefix" "evex")
(set_attr "mode" "XI")])
-(define_insn "vec_interleave_lowv4si"
- [(set (match_operand:V4SI 0 "register_operand" "=x,x")
+(define_insn "vec_interleave_lowv4si<mask_name>"
+ [(set (match_operand:V4SI 0 "register_operand" "=x,v")
(vec_select:V4SI
(vec_concat:V8SI
- (match_operand:V4SI 1 "register_operand" "0,x")
- (match_operand:V4SI 2 "nonimmediate_operand" "xm,xm"))
+ (match_operand:V4SI 1 "register_operand" "0,v")
+ (match_operand:V4SI 2 "nonimmediate_operand" "xm,vm"))
(parallel [(const_int 0) (const_int 4)
(const_int 1) (const_int 5)])))]
- "TARGET_SSE2"
+ "TARGET_SSE2 && <mask_mode512bit_condition>"
"@
punpckldq\t{%2, %0|%0, %2}
- vpunpckldq\t{%2, %1, %0|%0, %1, %2}"
+ vpunpckldq\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}"
[(set_attr "isa" "noavx,avx")
(set_attr "type" "sselog")
(set_attr "prefix_data16" "1,*")
@@ -9678,80 +11153,64 @@
(set_attr "prefix" "orig,orig,vex,vex")
(set_attr "mode" "TI")])
-(define_expand "avx512f_vinsert<shuffletype>32x4_mask"
- [(match_operand:V16FI 0 "register_operand")
- (match_operand:V16FI 1 "register_operand")
+(define_expand "<extract_type>_vinsert<shuffletype><extract_suf>_mask"
+ [(match_operand:AVX512_VEC 0 "register_operand")
+ (match_operand:AVX512_VEC 1 "register_operand")
(match_operand:<ssequartermode> 2 "nonimmediate_operand")
(match_operand:SI 3 "const_0_to_3_operand")
- (match_operand:V16FI 4 "register_operand")
+ (match_operand:AVX512_VEC 4 "register_operand")
(match_operand:<avx512fmaskmode> 5 "register_operand")]
"TARGET_AVX512F"
{
- switch (INTVAL (operands[3]))
- {
- case 0:
- emit_insn (gen_avx512f_vinsert<shuffletype>32x4_1_mask (operands[0],
- operands[1], operands[2], GEN_INT (0xFFF), operands[4],
- operands[5]));
- break;
- case 1:
- emit_insn (gen_avx512f_vinsert<shuffletype>32x4_1_mask (operands[0],
- operands[1], operands[2], GEN_INT (0xF0FF), operands[4],
- operands[5]));
- break;
- case 2:
- emit_insn (gen_avx512f_vinsert<shuffletype>32x4_1_mask (operands[0],
- operands[1], operands[2], GEN_INT (0xFF0F), operands[4],
- operands[5]));
- break;
- case 3:
- emit_insn (gen_avx512f_vinsert<shuffletype>32x4_1_mask (operands[0],
- operands[1], operands[2], GEN_INT (0xFFF0), operands[4],
- operands[5]));
- break;
- default:
- gcc_unreachable ();
- }
+ int mask,selector;
+ mask = INTVAL (operands[3]);
+ selector = GET_MODE_SIZE (GET_MODE_INNER (<MODE>mode)) == 4 ?
+ 0xFFFF ^ (0xF000 >> mask * 4)
+ : 0xFF ^ (0xC0 >> mask * 2);
+ emit_insn (gen_<extract_type>_vinsert<shuffletype><extract_suf>_1_mask
+ (operands[0], operands[1], operands[2], GEN_INT (selector),
+ operands[4], operands[5]));
DONE;
-
})
-(define_insn "<mask_codefor>avx512f_vinsert<shuffletype>32x4_1<mask_name>"
- [(set (match_operand:V16FI 0 "register_operand" "=v")
- (vec_merge:V16FI
- (match_operand:V16FI 1 "register_operand" "v")
- (vec_duplicate:V16FI
+(define_insn "<mask_codefor><extract_type>_vinsert<shuffletype><extract_suf>_1<mask_name>"
+ [(set (match_operand:AVX512_VEC 0 "register_operand" "=v")
+ (vec_merge:AVX512_VEC
+ (match_operand:AVX512_VEC 1 "register_operand" "v")
+ (vec_duplicate:AVX512_VEC
(match_operand:<ssequartermode> 2 "nonimmediate_operand" "vm"))
(match_operand:SI 3 "const_int_operand" "n")))]
"TARGET_AVX512F"
{
int mask;
- if (INTVAL (operands[3]) == 0xFFF)
- mask = 0;
- else if ( INTVAL (operands[3]) == 0xF0FF)
- mask = 1;
- else if ( INTVAL (operands[3]) == 0xFF0F)
- mask = 2;
- else if ( INTVAL (operands[3]) == 0xFFF0)
- mask = 3;
+ int selector = INTVAL (operands[3]);
+
+ if (selector == 0xFFF || selector == 0x3F)
+ mask = 0;
+ else if ( selector == 0xF0FF || selector == 0xCF)
+ mask = 1;
+ else if ( selector == 0xFF0F || selector == 0xF3)
+ mask = 2;
+ else if ( selector == 0xFFF0 || selector == 0xFC)
+ mask = 3;
else
gcc_unreachable ();
operands[3] = GEN_INT (mask);
- return "vinsert<shuffletype>32x4\t{%3, %2, %1, %0<mask_operand4>|%0<mask_operand4>, %1, %2, %3}";
+ return "vinsert<shuffletype><extract_suf>\t{%3, %2, %1, %0<mask_operand4>|%0<mask_operand4>, %1, %2, %3}";
}
[(set_attr "type" "sselog")
(set_attr "length_immediate" "1")
(set_attr "prefix" "evex")
(set_attr "mode" "<sseinsnmode>")])
-(define_expand "avx512f_vinsert<shuffletype>64x4_mask"
- [(match_operand:V8FI 0 "register_operand")
- (match_operand:V8FI 1 "register_operand")
+(define_expand "<extract_type_2>_vinsert<shuffletype><extract_suf_2>_mask"
+ [(match_operand:AVX512_VEC_2 0 "register_operand")
+ (match_operand:AVX512_VEC_2 1 "register_operand")
(match_operand:<ssehalfvecmode> 2 "nonimmediate_operand")
(match_operand:SI 3 "const_0_to_1_operand")
- (match_operand:V8FI 4 "register_operand")
+ (match_operand:AVX512_VEC_2 4 "register_operand")
(match_operand:<avx512fmaskmode> 5 "register_operand")]
"TARGET_AVX512F"
{
@@ -9768,6 +11227,40 @@
})
(define_insn "vec_set_lo_<mode><mask_name>"
+ [(set (match_operand:V16FI 0 "register_operand" "=v")
+ (vec_concat:V16FI
+ (match_operand:<ssehalfvecmode> 2 "nonimmediate_operand" "vm")
+ (vec_select:<ssehalfvecmode>
+ (match_operand:V16FI 1 "register_operand" "v")
+ (parallel [(const_int 8) (const_int 9)
+ (const_int 10) (const_int 11)
+ (const_int 12) (const_int 13)
+ (const_int 14) (const_int 15)]))))]
+ "TARGET_AVX512DQ"
+ "vinsert<shuffletype>32x8\t{$0x0, %2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2, $0x0}"
+ [(set_attr "type" "sselog")
+ (set_attr "length_immediate" "1")
+ (set_attr "prefix" "evex")
+ (set_attr "mode" "<sseinsnmode>")])
+
+(define_insn "vec_set_hi_<mode><mask_name>"
+ [(set (match_operand:V16FI 0 "register_operand" "=v")
+ (vec_concat:V16FI
+ (match_operand:<ssehalfvecmode> 2 "nonimmediate_operand" "vm")
+ (vec_select:<ssehalfvecmode>
+ (match_operand:V16FI 1 "register_operand" "v")
+ (parallel [(const_int 0) (const_int 1)
+ (const_int 2) (const_int 3)
+ (const_int 4) (const_int 5)
+ (const_int 6) (const_int 7)]))))]
+ "TARGET_AVX512DQ"
+ "vinsert<shuffletype>32x8\t{$0x1, %2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2, $0x1}"
+ [(set_attr "type" "sselog")
+ (set_attr "length_immediate" "1")
+ (set_attr "prefix" "evex")
+ (set_attr "mode" "<sseinsnmode>")])
+
+(define_insn "vec_set_lo_<mode><mask_name>"
[(set (match_operand:V8FI 0 "register_operand" "=v")
(vec_concat:V8FI
(match_operand:<ssehalfvecmode> 2 "nonimmediate_operand" "vm")
@@ -9797,6 +11290,51 @@
(set_attr "prefix" "evex")
(set_attr "mode" "XI")])
+(define_expand "avx512dq_shuf_<shuffletype>64x2_mask"
+ [(match_operand:VI8F_256 0 "register_operand")
+ (match_operand:VI8F_256 1 "register_operand")
+ (match_operand:VI8F_256 2 "nonimmediate_operand")
+ (match_operand:SI 3 "const_0_to_3_operand")
+ (match_operand:VI8F_256 4 "register_operand")
+ (match_operand:QI 5 "register_operand")]
+ "TARGET_AVX512DQ"
+{
+ int mask = INTVAL (operands[3]);
+ emit_insn (gen_avx512dq_shuf_<shuffletype>64x2_1_mask
+ (operands[0], operands[1], operands[2],
+ GEN_INT (((mask >> 0) & 1) * 2 + 0),
+ GEN_INT (((mask >> 0) & 1) * 2 + 1),
+ GEN_INT (((mask >> 1) & 1) * 2 + 4),
+ GEN_INT (((mask >> 1) & 1) * 2 + 5),
+ operands[4], operands[5]));
+ DONE;
+})
+
+(define_insn "<mask_codefor>avx512dq_shuf_<shuffletype>64x2_1<mask_name>"
+ [(set (match_operand:VI8F_256 0 "register_operand" "=v")
+ (vec_select:VI8F_256
+ (vec_concat:<ssedoublemode>
+ (match_operand:VI8F_256 1 "register_operand" "v")
+ (match_operand:VI8F_256 2 "nonimmediate_operand" "vm"))
+ (parallel [(match_operand 3 "const_0_to_3_operand")
+ (match_operand 4 "const_0_to_3_operand")
+ (match_operand 5 "const_4_to_7_operand")
+ (match_operand 6 "const_4_to_7_operand")])))]
+ "TARGET_AVX512VL
+ && (INTVAL (operands[3]) == (INTVAL (operands[4]) - 1)
+ && INTVAL (operands[5]) == (INTVAL (operands[6]) - 1))"
+{
+ int mask;
+ mask = INTVAL (operands[3]) / 2;
+ mask |= (INTVAL (operands[5]) - 4) / 2 << 1;
+ operands[3] = GEN_INT (mask);
+ return "vshuf<shuffletype>64x2\t{%3, %2, %1, %0<mask_operand7>|%0<mask_operand7>, %1, %2, %3}";
+}
+ [(set_attr "type" "sselog")
+ (set_attr "length_immediate" "1")
+ (set_attr "prefix" "evex")
+ (set_attr "mode" "XI")])
+
(define_expand "avx512f_shuf_<shuffletype>64x2_mask"
[(match_operand:V8FI 0 "register_operand")
(match_operand:V8FI 1 "register_operand")
@@ -9855,6 +11393,64 @@
(set_attr "prefix" "evex")
(set_attr "mode" "<sseinsnmode>")])
+(define_expand "avx512vl_shuf_<shuffletype>32x4_mask"
+ [(match_operand:VI4F_256 0 "register_operand")
+ (match_operand:VI4F_256 1 "register_operand")
+ (match_operand:VI4F_256 2 "nonimmediate_operand")
+ (match_operand:SI 3 "const_0_to_3_operand")
+ (match_operand:VI4F_256 4 "register_operand")
+ (match_operand:QI 5 "register_operand")]
+ "TARGET_AVX512VL"
+{
+ int mask = INTVAL (operands[3]);
+ emit_insn (gen_avx512vl_shuf_<shuffletype>32x4_1_mask
+ (operands[0], operands[1], operands[2],
+ GEN_INT (((mask >> 0) & 1) * 4 + 0),
+ GEN_INT (((mask >> 0) & 1) * 4 + 1),
+ GEN_INT (((mask >> 0) & 1) * 4 + 2),
+ GEN_INT (((mask >> 0) & 1) * 4 + 3),
+ GEN_INT (((mask >> 1) & 1) * 4 + 8),
+ GEN_INT (((mask >> 1) & 1) * 4 + 9),
+ GEN_INT (((mask >> 1) & 1) * 4 + 10),
+ GEN_INT (((mask >> 1) & 1) * 4 + 11),
+ operands[4], operands[5]));
+ DONE;
+})
+
+(define_insn "<mask_codefor>avx512vl_shuf_<shuffletype>32x4_1<mask_name>"
+ [(set (match_operand:VI4F_256 0 "register_operand" "=v")
+ (vec_select:VI4F_256
+ (vec_concat:<ssedoublemode>
+ (match_operand:VI4F_256 1 "register_operand" "v")
+ (match_operand:VI4F_256 2 "nonimmediate_operand" "vm"))
+ (parallel [(match_operand 3 "const_0_to_7_operand")
+ (match_operand 4 "const_0_to_7_operand")
+ (match_operand 5 "const_0_to_7_operand")
+ (match_operand 6 "const_0_to_7_operand")
+ (match_operand 7 "const_8_to_15_operand")
+ (match_operand 8 "const_8_to_15_operand")
+ (match_operand 9 "const_8_to_15_operand")
+ (match_operand 10 "const_8_to_15_operand")])))]
+ "TARGET_AVX512VL
+ && (INTVAL (operands[3]) == (INTVAL (operands[4]) - 1)
+ && INTVAL (operands[3]) == (INTVAL (operands[5]) - 2)
+ && INTVAL (operands[3]) == (INTVAL (operands[6]) - 3)
+ && INTVAL (operands[7]) == (INTVAL (operands[8]) - 1)
+ && INTVAL (operands[7]) == (INTVAL (operands[9]) - 2)
+ && INTVAL (operands[7]) == (INTVAL (operands[10]) - 3))"
+{
+ int mask;
+ mask = INTVAL (operands[3]) / 4;
+ mask |= (INTVAL (operands[7]) - 8) / 4 << 1;
+ operands[3] = GEN_INT (mask);
+
+ return "vshuf<shuffletype>32x4\t{%3, %2, %1, %0<mask_operand11>|%0<mask_operand11>, %1, %2, %3}";
+}
+ [(set_attr "type" "sselog")
+ (set_attr "length_immediate" "1")
+ (set_attr "prefix" "evex")
+ (set_attr "mode" "<sseinsnmode>")])
+
(define_expand "avx512f_shuf_<shuffletype>32x4_mask"
[(match_operand:V16FI 0 "register_operand")
(match_operand:V16FI 1 "register_operand")
@@ -10015,6 +11611,28 @@
(set_attr "length_immediate" "1")
(set_attr "mode" "XI")])
+(define_expand "avx512vl_pshufdv3_mask"
+ [(match_operand:V8SI 0 "register_operand")
+ (match_operand:V8SI 1 "nonimmediate_operand")
+ (match_operand:SI 2 "const_0_to_255_operand")
+ (match_operand:V8SI 3 "register_operand")
+ (match_operand:QI 4 "register_operand")]
+ "TARGET_AVX512VL"
+{
+ int mask = INTVAL (operands[2]);
+ emit_insn (gen_avx2_pshufd_1_mask (operands[0], operands[1],
+ GEN_INT ((mask >> 0) & 3),
+ GEN_INT ((mask >> 2) & 3),
+ GEN_INT ((mask >> 4) & 3),
+ GEN_INT ((mask >> 6) & 3),
+ GEN_INT (((mask >> 0) & 3) + 4),
+ GEN_INT (((mask >> 2) & 3) + 4),
+ GEN_INT (((mask >> 4) & 3) + 4),
+ GEN_INT (((mask >> 6) & 3) + 4),
+ operands[3], operands[4]));
+ DONE;
+})
+
(define_expand "avx2_pshufdv3"
[(match_operand:V8SI 0 "register_operand")
(match_operand:V8SI 1 "nonimmediate_operand")
@@ -10034,10 +11652,10 @@
DONE;
})
-(define_insn "avx2_pshufd_1"
- [(set (match_operand:V8SI 0 "register_operand" "=x")
+(define_insn "avx2_pshufd_1<mask_name>"
+ [(set (match_operand:V8SI 0 "register_operand" "=v")
(vec_select:V8SI
- (match_operand:V8SI 1 "nonimmediate_operand" "xm")
+ (match_operand:V8SI 1 "nonimmediate_operand" "vm")
(parallel [(match_operand 2 "const_0_to_3_operand")
(match_operand 3 "const_0_to_3_operand")
(match_operand 4 "const_0_to_3_operand")
@@ -10050,7 +11668,8 @@
&& INTVAL (operands[2]) + 4 == INTVAL (operands[6])
&& INTVAL (operands[3]) + 4 == INTVAL (operands[7])
&& INTVAL (operands[4]) + 4 == INTVAL (operands[8])
- && INTVAL (operands[5]) + 4 == INTVAL (operands[9])"
+ && INTVAL (operands[5]) + 4 == INTVAL (operands[9])
+ && <mask_mode512bit_condition>"
{
int mask = 0;
mask |= INTVAL (operands[2]) << 0;
@@ -10059,13 +11678,31 @@
mask |= INTVAL (operands[5]) << 6;
operands[2] = GEN_INT (mask);
- return "vpshufd\t{%2, %1, %0|%0, %1, %2}";
+ return "vpshufd\t{%2, %1, %0<mask_operand10>|%0<mask_operand10>, %1, %2}";
}
[(set_attr "type" "sselog1")
- (set_attr "prefix" "vex")
+ (set_attr "prefix" "maybe_evex")
(set_attr "length_immediate" "1")
(set_attr "mode" "OI")])
+(define_expand "avx512vl_pshufd_mask"
+ [(match_operand:V4SI 0 "register_operand")
+ (match_operand:V4SI 1 "nonimmediate_operand")
+ (match_operand:SI 2 "const_0_to_255_operand")
+ (match_operand:V4SI 3 "register_operand")
+ (match_operand:QI 4 "register_operand")]
+ "TARGET_AVX512VL"
+{
+ int mask = INTVAL (operands[2]);
+ emit_insn (gen_sse2_pshufd_1_mask (operands[0], operands[1],
+ GEN_INT ((mask >> 0) & 3),
+ GEN_INT ((mask >> 2) & 3),
+ GEN_INT ((mask >> 4) & 3),
+ GEN_INT ((mask >> 6) & 3),
+ operands[3], operands[4]));
+ DONE;
+})
+
(define_expand "sse2_pshufd"
[(match_operand:V4SI 0 "register_operand")
(match_operand:V4SI 1 "nonimmediate_operand")
@@ -10081,15 +11718,15 @@
DONE;
})
-(define_insn "sse2_pshufd_1"
- [(set (match_operand:V4SI 0 "register_operand" "=x")
+(define_insn "sse2_pshufd_1<mask_name>"
+ [(set (match_operand:V4SI 0 "register_operand" "=v")
(vec_select:V4SI
- (match_operand:V4SI 1 "nonimmediate_operand" "xm")
+ (match_operand:V4SI 1 "nonimmediate_operand" "vm")
(parallel [(match_operand 2 "const_0_to_3_operand")
(match_operand 3 "const_0_to_3_operand")
(match_operand 4 "const_0_to_3_operand")
(match_operand 5 "const_0_to_3_operand")])))]
- "TARGET_SSE2"
+ "TARGET_SSE2 && <mask_mode512bit_condition>"
{
int mask = 0;
mask |= INTVAL (operands[2]) << 0;
@@ -10098,14 +11735,48 @@
mask |= INTVAL (operands[5]) << 6;
operands[2] = GEN_INT (mask);
- return "%vpshufd\t{%2, %1, %0|%0, %1, %2}";
+ return "%vpshufd\t{%2, %1, %0<mask_operand6>|%0<mask_operand6>, %1, %2}";
}
[(set_attr "type" "sselog1")
(set_attr "prefix_data16" "1")
- (set_attr "prefix" "maybe_vex")
+ (set_attr "prefix" "<mask_prefix2>")
(set_attr "length_immediate" "1")
(set_attr "mode" "TI")])
+(define_insn "<mask_codefor>avx512bw_pshuflwv32hi<mask_name>"
+ [(set (match_operand:V32HI 0 "register_operand" "=v")
+ (unspec:V32HI
+ [(match_operand:V32HI 1 "nonimmediate_operand" "vm")
+ (match_operand:SI 2 "const_0_to_255_operand" "n")]
+ UNSPEC_PSHUFLW))]
+ "TARGET_AVX512BW"
+ "vpshuflw\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}"
+ [(set_attr "type" "sselog")
+ (set_attr "prefix" "evex")
+ (set_attr "mode" "XI")])
+
+(define_expand "avx512vl_pshuflwv3_mask"
+ [(match_operand:V16HI 0 "register_operand")
+ (match_operand:V16HI 1 "nonimmediate_operand")
+ (match_operand:SI 2 "const_0_to_255_operand")
+ (match_operand:V16HI 3 "register_operand")
+ (match_operand:HI 4 "register_operand")]
+ "TARGET_AVX512VL"
+{
+ int mask = INTVAL (operands[2]);
+ emit_insn (gen_avx2_pshuflw_1_mask (operands[0], operands[1],
+ GEN_INT ((mask >> 0) & 3),
+ GEN_INT ((mask >> 2) & 3),
+ GEN_INT ((mask >> 4) & 3),
+ GEN_INT ((mask >> 6) & 3),
+ GEN_INT (((mask >> 0) & 3) + 8),
+ GEN_INT (((mask >> 2) & 3) + 8),
+ GEN_INT (((mask >> 4) & 3) + 8),
+ GEN_INT (((mask >> 6) & 3) + 8),
+ operands[3], operands[4]));
+ DONE;
+})
+
(define_expand "avx2_pshuflwv3"
[(match_operand:V16HI 0 "register_operand")
(match_operand:V16HI 1 "nonimmediate_operand")
@@ -10125,10 +11796,10 @@
DONE;
})
-(define_insn "avx2_pshuflw_1"
- [(set (match_operand:V16HI 0 "register_operand" "=x")
+(define_insn "avx2_pshuflw_1<mask_name>"
+ [(set (match_operand:V16HI 0 "register_operand" "=v")
(vec_select:V16HI
- (match_operand:V16HI 1 "nonimmediate_operand" "xm")
+ (match_operand:V16HI 1 "nonimmediate_operand" "vm")
(parallel [(match_operand 2 "const_0_to_3_operand")
(match_operand 3 "const_0_to_3_operand")
(match_operand 4 "const_0_to_3_operand")
@@ -10149,7 +11820,8 @@
&& INTVAL (operands[2]) + 8 == INTVAL (operands[6])
&& INTVAL (operands[3]) + 8 == INTVAL (operands[7])
&& INTVAL (operands[4]) + 8 == INTVAL (operands[8])
- && INTVAL (operands[5]) + 8 == INTVAL (operands[9])"
+ && INTVAL (operands[5]) + 8 == INTVAL (operands[9])
+ && <mask_mode512bit_condition>"
{
int mask = 0;
mask |= INTVAL (operands[2]) << 0;
@@ -10158,13 +11830,31 @@
mask |= INTVAL (operands[5]) << 6;
operands[2] = GEN_INT (mask);
- return "vpshuflw\t{%2, %1, %0|%0, %1, %2}";
+ return "vpshuflw\t{%2, %1, %0<mask_operand10>|%0<mask_operand10>, %1, %2}";
}
[(set_attr "type" "sselog")
- (set_attr "prefix" "vex")
+ (set_attr "prefix" "maybe_evex")
(set_attr "length_immediate" "1")
(set_attr "mode" "OI")])
+(define_expand "avx512vl_pshuflw_mask"
+ [(match_operand:V8HI 0 "register_operand")
+ (match_operand:V8HI 1 "nonimmediate_operand")
+ (match_operand:SI 2 "const_0_to_255_operand")
+ (match_operand:V8HI 3 "register_operand")
+ (match_operand:QI 4 "register_operand")]
+ "TARGET_AVX512VL"
+{
+ int mask = INTVAL (operands[2]);
+ emit_insn (gen_sse2_pshuflw_1_mask (operands[0], operands[1],
+ GEN_INT ((mask >> 0) & 3),
+ GEN_INT ((mask >> 2) & 3),
+ GEN_INT ((mask >> 4) & 3),
+ GEN_INT ((mask >> 6) & 3),
+ operands[3], operands[4]));
+ DONE;
+})
+
(define_expand "sse2_pshuflw"
[(match_operand:V8HI 0 "register_operand")
(match_operand:V8HI 1 "nonimmediate_operand")
@@ -10180,10 +11870,10 @@
DONE;
})
-(define_insn "sse2_pshuflw_1"
- [(set (match_operand:V8HI 0 "register_operand" "=x")
+(define_insn "sse2_pshuflw_1<mask_name>"
+ [(set (match_operand:V8HI 0 "register_operand" "=v")
(vec_select:V8HI
- (match_operand:V8HI 1 "nonimmediate_operand" "xm")
+ (match_operand:V8HI 1 "nonimmediate_operand" "vm")
(parallel [(match_operand 2 "const_0_to_3_operand")
(match_operand 3 "const_0_to_3_operand")
(match_operand 4 "const_0_to_3_operand")
@@ -10192,7 +11882,7 @@
(const_int 5)
(const_int 6)
(const_int 7)])))]
- "TARGET_SSE2"
+ "TARGET_SSE2 && <mask_mode512bit_condition>"
{
int mask = 0;
mask |= INTVAL (operands[2]) << 0;
@@ -10201,7 +11891,7 @@
mask |= INTVAL (operands[5]) << 6;
operands[2] = GEN_INT (mask);
- return "%vpshuflw\t{%2, %1, %0|%0, %1, %2}";
+ return "%vpshuflw\t{%2, %1, %0<mask_operand6>|%0<mask_operand6>, %1, %2}";
}
[(set_attr "type" "sselog")
(set_attr "prefix_data16" "0")
@@ -10229,10 +11919,44 @@
DONE;
})
-(define_insn "avx2_pshufhw_1"
- [(set (match_operand:V16HI 0 "register_operand" "=x")
+(define_insn "<mask_codefor>avx512bw_pshufhwv32hi<mask_name>"
+ [(set (match_operand:V32HI 0 "register_operand" "=v")
+ (unspec:V32HI
+ [(match_operand:V32HI 1 "nonimmediate_operand" "vm")
+ (match_operand:SI 2 "const_0_to_255_operand" "n")]
+ UNSPEC_PSHUFHW))]
+ "TARGET_AVX512BW"
+ "vpshufhw\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}"
+ [(set_attr "type" "sselog")
+ (set_attr "prefix" "evex")
+ (set_attr "mode" "XI")])
+
+(define_expand "avx512vl_pshufhwv3_mask"
+ [(match_operand:V16HI 0 "register_operand")
+ (match_operand:V16HI 1 "nonimmediate_operand")
+ (match_operand:SI 2 "const_0_to_255_operand")
+ (match_operand:V16HI 3 "register_operand")
+ (match_operand:HI 4 "register_operand")]
+ "TARGET_AVX512VL"
+{
+ int mask = INTVAL (operands[2]);
+ emit_insn (gen_avx2_pshufhw_1_mask (operands[0], operands[1],
+ GEN_INT (((mask >> 0) & 3) + 4),
+ GEN_INT (((mask >> 2) & 3) + 4),
+ GEN_INT (((mask >> 4) & 3) + 4),
+ GEN_INT (((mask >> 6) & 3) + 4),
+ GEN_INT (((mask >> 0) & 3) + 12),
+ GEN_INT (((mask >> 2) & 3) + 12),
+ GEN_INT (((mask >> 4) & 3) + 12),
+ GEN_INT (((mask >> 6) & 3) + 12),
+ operands[3], operands[4]));
+ DONE;
+})
+
+(define_insn "avx2_pshufhw_1<mask_name>"
+ [(set (match_operand:V16HI 0 "register_operand" "=v")
(vec_select:V16HI
- (match_operand:V16HI 1 "nonimmediate_operand" "xm")
+ (match_operand:V16HI 1 "nonimmediate_operand" "vm")
(parallel [(const_int 0)
(const_int 1)
(const_int 2)
@@ -10253,7 +11977,8 @@
&& INTVAL (operands[2]) + 8 == INTVAL (operands[6])
&& INTVAL (operands[3]) + 8 == INTVAL (operands[7])
&& INTVAL (operands[4]) + 8 == INTVAL (operands[8])
- && INTVAL (operands[5]) + 8 == INTVAL (operands[9])"
+ && INTVAL (operands[5]) + 8 == INTVAL (operands[9])
+ && <mask_mode512bit_condition>"
{
int mask = 0;
mask |= (INTVAL (operands[2]) - 4) << 0;
@@ -10262,13 +11987,31 @@
mask |= (INTVAL (operands[5]) - 4) << 6;
operands[2] = GEN_INT (mask);
- return "vpshufhw\t{%2, %1, %0|%0, %1, %2}";
+ return "vpshufhw\t{%2, %1, %0<mask_operand10>|%0<mask_operand10>, %1, %2}";
}
[(set_attr "type" "sselog")
- (set_attr "prefix" "vex")
+ (set_attr "prefix" "maybe_evex")
(set_attr "length_immediate" "1")
(set_attr "mode" "OI")])
+(define_expand "avx512vl_pshufhw_mask"
+ [(match_operand:V8HI 0 "register_operand")
+ (match_operand:V8HI 1 "nonimmediate_operand")
+ (match_operand:SI 2 "const_0_to_255_operand")
+ (match_operand:V8HI 3 "register_operand")
+ (match_operand:QI 4 "register_operand")]
+ "TARGET_AVX512VL"
+{
+ int mask = INTVAL (operands[2]);
+ emit_insn (gen_sse2_pshufhw_1_mask (operands[0], operands[1],
+ GEN_INT (((mask >> 0) & 3) + 4),
+ GEN_INT (((mask >> 2) & 3) + 4),
+ GEN_INT (((mask >> 4) & 3) + 4),
+ GEN_INT (((mask >> 6) & 3) + 4),
+ operands[3], operands[4]));
+ DONE;
+})
+
(define_expand "sse2_pshufhw"
[(match_operand:V8HI 0 "register_operand")
(match_operand:V8HI 1 "nonimmediate_operand")
@@ -10284,10 +12027,10 @@
DONE;
})
-(define_insn "sse2_pshufhw_1"
- [(set (match_operand:V8HI 0 "register_operand" "=x")
+(define_insn "sse2_pshufhw_1<mask_name>"
+ [(set (match_operand:V8HI 0 "register_operand" "=v")
(vec_select:V8HI
- (match_operand:V8HI 1 "nonimmediate_operand" "xm")
+ (match_operand:V8HI 1 "nonimmediate_operand" "vm")
(parallel [(const_int 0)
(const_int 1)
(const_int 2)
@@ -10296,7 +12039,7 @@
(match_operand 3 "const_4_to_7_operand")
(match_operand 4 "const_4_to_7_operand")
(match_operand 5 "const_4_to_7_operand")])))]
- "TARGET_SSE2"
+ "TARGET_SSE2 && <mask_mode512bit_condition>"
{
int mask = 0;
mask |= (INTVAL (operands[2]) - 4) << 0;
@@ -10305,7 +12048,7 @@
mask |= (INTVAL (operands[5]) - 4) << 6;
operands[2] = GEN_INT (mask);
- return "%vpshufhw\t{%2, %1, %0|%0, %1, %2}";
+ return "%vpshufhw\t{%2, %1, %0<mask_operand6>|%0<mask_operand6>, %1, %2}";
}
[(set_attr "type" "sselog")
(set_attr "prefix_rep" "1")
@@ -10717,7 +12460,7 @@
;;
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
-(define_expand "<sse2_avx2>_uavg<mode>3"
+(define_expand "<sse2_avx2>_uavg<mode>3<mask_name>"
[(set (match_operand:VI12_AVX2 0 "register_operand")
(truncate:VI12_AVX2
(lshiftrt:<ssedoublemode>
@@ -10727,43 +12470,54 @@
(match_operand:VI12_AVX2 1 "nonimmediate_operand"))
(zero_extend:<ssedoublemode>
(match_operand:VI12_AVX2 2 "nonimmediate_operand")))
- (match_dup 3))
+ (match_dup <mask_expand_op3>))
(const_int 1))))]
- "TARGET_SSE2"
+ "TARGET_SSE2 && <mask_mode512bit_condition>"
{
+ rtx tmp;
+ if (<mask_applied>)
+ tmp = operands[3];
operands[3] = CONST1_RTX(<MODE>mode);
ix86_fixup_binary_operands_no_copy (PLUS, <MODE>mode, operands);
+
+ if (<mask_applied>)
+ {
+ operands[5] = operands[3];
+ operands[3] = tmp;
+ }
})
-(define_insn "*<sse2_avx2>_uavg<mode>3"
- [(set (match_operand:VI12_AVX2 0 "register_operand" "=x,x")
+(define_insn "*<sse2_avx2>_uavg<mode>3<mask_name>"
+ [(set (match_operand:VI12_AVX2 0 "register_operand" "=x,v")
(truncate:VI12_AVX2
(lshiftrt:<ssedoublemode>
(plus:<ssedoublemode>
(plus:<ssedoublemode>
(zero_extend:<ssedoublemode>
- (match_operand:VI12_AVX2 1 "nonimmediate_operand" "%0,x"))
+ (match_operand:VI12_AVX2 1 "nonimmediate_operand" "%0,v"))
(zero_extend:<ssedoublemode>
- (match_operand:VI12_AVX2 2 "nonimmediate_operand" "xm,xm")))
- (match_operand:VI12_AVX2 3 "const1_operand"))
+ (match_operand:VI12_AVX2 2 "nonimmediate_operand" "xm,vm")))
+ (match_operand:VI12_AVX2 <mask_expand_op3> "const1_operand"))
(const_int 1))))]
- "TARGET_SSE2 && ix86_binary_operator_ok (PLUS, <MODE>mode, operands)"
+ "TARGET_SSE2
+ && ix86_binary_operator_ok (PLUS, <MODE>mode, operands)
+ && <mask_mode512bit_condition>"
"@
pavg<ssemodesuffix>\t{%2, %0|%0, %2}
- vpavg<ssemodesuffix>\t{%2, %1, %0|%0, %1, %2}"
+ vpavg<ssemodesuffix>\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}"
[(set_attr "isa" "noavx,avx")
(set_attr "type" "sseiadd")
(set_attr "prefix_data16" "1,*")
- (set_attr "prefix" "orig,vex")
+ (set_attr "prefix" "orig,<mask_prefix>")
(set_attr "mode" "<sseinsnmode>")])
;; The correct representation for this is absolutely enormous, and
;; surely not generally useful.
(define_insn "<sse2_avx2>_psadbw"
- [(set (match_operand:VI8_AVX2 0 "register_operand" "=x,x")
- (unspec:VI8_AVX2
- [(match_operand:<ssebytemode> 1 "register_operand" "0,x")
- (match_operand:<ssebytemode> 2 "nonimmediate_operand" "xm,xm")]
+ [(set (match_operand:VI8_AVX2_AVX512BW 0 "register_operand" "=x,v")
+ (unspec:VI8_AVX2_AVX512BW
+ [(match_operand:<ssebytemode> 1 "register_operand" "0,v")
+ (match_operand:<ssebytemode> 2 "nonimmediate_operand" "xm,vm")]
UNSPEC_PSADBW))]
"TARGET_SSE2"
"@
@@ -10773,7 +12527,7 @@
(set_attr "type" "sseiadd")
(set_attr "atom_unit" "simul")
(set_attr "prefix_data16" "1,*")
- (set_attr "prefix" "orig,vex")
+ (set_attr "prefix" "orig,maybe_evex")
(set_attr "mode" "<sseinsnmode>")])
(define_insn "<sse>_movmsk<ssemodesuffix><avxsizesuffix>"
@@ -11208,6 +12962,54 @@
(set_attr "prefix" "vex")
(set_attr "mode" "OI")])
+;;unspec version for intrinsics.
+(define_insn "avx512bw_pmaddubsw512<mode><mask_name>"
+ [(set (match_operand:VI2_AVX2 0 "register_operand" "=v")
+ (unspec:VI2_AVX2
+ [(match_operand:<dbpsadbwmode> 1 "register_operand" "v")
+ (match_operand:<dbpsadbwmode> 2 "nonimmediate_operand" "vm")]
+ UNSPEC_PMADDUBSW512))]
+ "TARGET_AVX512BW && <mask_mode512bit_condition>"
+ "vpmaddubsw\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}";
+ [(set_attr "type" "sseiadd")
+ (set_attr "prefix" "evex")
+ (set_attr "mode" "XI")])
+
+(define_insn "avx512bw_umulhrswv32hi3<mask_name>"
+ [(set (match_operand:V32HI 0 "register_operand" "=v")
+ (truncate:V32HI
+ (lshiftrt:V32SI
+ (plus:V32SI
+ (lshiftrt:V32SI
+ (mult:V32SI
+ (sign_extend:V32SI
+ (match_operand:V32HI 1 "nonimmediate_operand" "%v"))
+ (sign_extend:V32SI
+ (match_operand:V32HI 2 "nonimmediate_operand" "vm")))
+ (const_int 14))
+ (const_vector:V32HI [(const_int 1) (const_int 1)
+ (const_int 1) (const_int 1)
+ (const_int 1) (const_int 1)
+ (const_int 1) (const_int 1)
+ (const_int 1) (const_int 1)
+ (const_int 1) (const_int 1)
+ (const_int 1) (const_int 1)
+ (const_int 1) (const_int 1)
+ (const_int 1) (const_int 1)
+ (const_int 1) (const_int 1)
+ (const_int 1) (const_int 1)
+ (const_int 1) (const_int 1)
+ (const_int 1) (const_int 1)
+ (const_int 1) (const_int 1)
+ (const_int 1) (const_int 1)
+ (const_int 1) (const_int 1)]))
+ (const_int 1))))]
+ "TARGET_AVX512BW"
+ "vpmulhrsw\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}"
+ [(set_attr "type" "sseimul")
+ (set_attr "prefix" "evex")
+ (set_attr "mode" "XI")])
+
(define_insn "ssse3_pmaddubsw128"
[(set (match_operand:V8HI 0 "register_operand" "=x,x")
(ss_plus:V8HI
@@ -11285,6 +13087,29 @@
(define_mode_iterator PMULHRSW
[V4HI V8HI (V16HI "TARGET_AVX2")])
+(define_expand "<ssse3_avx2>_pmulhrsw<mode>3_mask"
+ [(set (match_operand:PMULHRSW 0 "register_operand")
+ (vec_merge:PMULHRSW
+ (truncate:PMULHRSW
+ (lshiftrt:<ssedoublemode>
+ (plus:<ssedoublemode>
+ (lshiftrt:<ssedoublemode>
+ (mult:<ssedoublemode>
+ (sign_extend:<ssedoublemode>
+ (match_operand:PMULHRSW 1 "nonimmediate_operand"))
+ (sign_extend:<ssedoublemode>
+ (match_operand:PMULHRSW 2 "nonimmediate_operand")))
+ (const_int 14))
+ (match_dup 5))
+ (const_int 1)))
+ (match_operand:PMULHRSW 3 "register_operand")
+ (match_operand:<avx512fmaskmode> 4 "register_operand")))]
+ "TARGET_AVX512BW && TARGET_AVX512VL"
+{
+ operands[5] = CONST1_RTX(<MODE>mode);
+ ix86_fixup_binary_operands_no_copy (MULT, <MODE>mode, operands);
+})
+
(define_expand "<ssse3_avx2>_pmulhrsw<mode>3"
[(set (match_operand:PMULHRSW 0 "register_operand")
(truncate:PMULHRSW
@@ -11305,29 +13130,31 @@
ix86_fixup_binary_operands_no_copy (MULT, <MODE>mode, operands);
})
-(define_insn "*<ssse3_avx2>_pmulhrsw<mode>3"
- [(set (match_operand:VI2_AVX2 0 "register_operand" "=x,x")
+(define_insn "*<ssse3_avx2>_pmulhrsw<mode>3<mask_name>"
+ [(set (match_operand:VI2_AVX2 0 "register_operand" "=x,v")
(truncate:VI2_AVX2
(lshiftrt:<ssedoublemode>
(plus:<ssedoublemode>
(lshiftrt:<ssedoublemode>
(mult:<ssedoublemode>
(sign_extend:<ssedoublemode>
- (match_operand:VI2_AVX2 1 "nonimmediate_operand" "%0,x"))
+ (match_operand:VI2_AVX2 1 "nonimmediate_operand" "%0,v"))
(sign_extend:<ssedoublemode>
- (match_operand:VI2_AVX2 2 "nonimmediate_operand" "xm,xm")))
+ (match_operand:VI2_AVX2 2 "nonimmediate_operand" "xm,vm")))
(const_int 14))
(match_operand:VI2_AVX2 3 "const1_operand"))
(const_int 1))))]
- "TARGET_SSSE3 && ix86_binary_operator_ok (MULT, <MODE>mode, operands)"
+ "TARGET_SSSE3
+ && ix86_binary_operator_ok (MULT, <MODE>mode, operands)
+ && <mask_mode512bit_condition>"
"@
pmulhrsw\t{%2, %0|%0, %2}
- vpmulhrsw\t{%2, %1, %0|%0, %1, %2}"
+ vpmulhrsw\t{%2, %1, %0<mask_operand4>|%0<mask_operand4>, %1, %2}"
[(set_attr "isa" "noavx,avx")
(set_attr "type" "sseimul")
(set_attr "prefix_data16" "1,*")
(set_attr "prefix_extra" "1")
- (set_attr "prefix" "orig,vex")
+ (set_attr "prefix" "orig,maybe_evex")
(set_attr "mode" "<sseinsnmode>")])
(define_insn "*ssse3_pmulhrswv4hi3"
@@ -11351,21 +13178,21 @@
(set (attr "prefix_rex") (symbol_ref "x86_extended_reg_mentioned_p (insn)"))
(set_attr "mode" "DI")])
-(define_insn "<ssse3_avx2>_pshufb<mode>3"
- [(set (match_operand:VI1_AVX2 0 "register_operand" "=x,x")
+(define_insn "<ssse3_avx2>_pshufb<mode>3<mask_name>"
+ [(set (match_operand:VI1_AVX2 0 "register_operand" "=x,v")
(unspec:VI1_AVX2
- [(match_operand:VI1_AVX2 1 "register_operand" "0,x")
- (match_operand:VI1_AVX2 2 "nonimmediate_operand" "xm,xm")]
+ [(match_operand:VI1_AVX2 1 "register_operand" "0,v")
+ (match_operand:VI1_AVX2 2 "nonimmediate_operand" "xm,vm")]
UNSPEC_PSHUFB))]
- "TARGET_SSSE3"
+ "TARGET_SSSE3 && <mask_mode512bit_condition>"
"@
pshufb\t{%2, %0|%0, %2}
- vpshufb\t{%2, %1, %0|%0, %1, %2}"
+ vpshufb\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}"
[(set_attr "isa" "noavx,avx")
(set_attr "type" "sselog1")
(set_attr "prefix_data16" "1,*")
(set_attr "prefix_extra" "1")
- (set_attr "prefix" "orig,vex")
+ (set_attr "prefix" "orig,maybe_evex")
(set_attr "btver2_decode" "vector,vector")
(set_attr "mode" "<sseinsnmode>")])
@@ -11411,11 +13238,33 @@
(set (attr "prefix_rex") (symbol_ref "x86_extended_reg_mentioned_p (insn)"))
(set_attr "mode" "DI")])
+(define_insn "<ssse3_avx2>_palignr<mode>_mask"
+ [(set (match_operand:VI1_AVX2 0 "register_operand" "=v")
+ (vec_merge:VI1_AVX2
+ (unspec:VI1_AVX2
+ [(match_operand:VI1_AVX2 1 "register_operand" "v")
+ (match_operand:VI1_AVX2 2 "nonimmediate_operand" "vm")
+ (match_operand:SI 3 "const_0_to_255_mul_8_operand" "n")]
+ UNSPEC_PALIGNR)
+ (match_operand:VI1_AVX2 4 "vector_move_operand" "0C")
+ (match_operand:<avx512fmaskmode> 5 "register_operand" "Yk")))]
+ "TARGET_AVX512BW"
+{
+ operands[3] = GEN_INT (INTVAL (operands[3]) / 8);
+ return "vpalignr\t{%3, %2, %1, %0%{%5%}%N4|%0%{%5%}%N4, %1, %2, %3}";
+}
+ [(set_attr "type" "sseishft")
+ (set_attr "atom_unit" "sishuf")
+ (set_attr "prefix_extra" "1")
+ (set_attr "length_immediate" "1")
+ (set_attr "prefix" "evex")
+ (set_attr "mode" "<sseinsnmode>")])
+
(define_insn "<ssse3_avx2>_palignr<mode>"
- [(set (match_operand:SSESCALARMODE 0 "register_operand" "=x,x")
+ [(set (match_operand:SSESCALARMODE 0 "register_operand" "=x,v")
(unspec:SSESCALARMODE
- [(match_operand:SSESCALARMODE 1 "register_operand" "0,x")
- (match_operand:SSESCALARMODE 2 "nonimmediate_operand" "xm,xm")
+ [(match_operand:SSESCALARMODE 1 "register_operand" "0,v")
+ (match_operand:SSESCALARMODE 2 "nonimmediate_operand" "xm,vm")
(match_operand:SI 3 "const_0_to_255_mul_8_operand" "n,n")]
UNSPEC_PALIGNR))]
"TARGET_SSSE3"
@@ -11460,10 +13309,10 @@
(set_attr "mode" "DI")])
(define_insn "<mask_codefor>abs<mode>2<mask_name>"
- [(set (match_operand:VI124_AVX2_48_AVX512F 0 "register_operand" "=v")
- (abs:VI124_AVX2_48_AVX512F
- (match_operand:VI124_AVX2_48_AVX512F 1 "nonimmediate_operand" "vm")))]
- "TARGET_SSSE3 && <mask_mode512bit_condition>"
+ [(set (match_operand:VI_AVX2 0 "register_operand" "=v")
+ (abs:VI_AVX2
+ (match_operand:VI_AVX2 1 "nonimmediate_operand" "vm")))]
+ "TARGET_SSSE3 && <mask_mode512bit_condition> && (<MODE>mode != V2DImode || TARGET_AVX)"
"%vpabs<ssemodesuffix>\t{%1, %0<mask_operand2>|%0<mask_operand2>, %1}"
[(set_attr "type" "sselog1")
(set_attr "prefix_data16" "1")
@@ -11471,11 +13320,12 @@
(set_attr "prefix" "maybe_vex")
(set_attr "mode" "<sseinsnmode>")])
+;; TODO not in patch check this itterator v
(define_expand "abs<mode>2"
- [(set (match_operand:VI124_AVX2_48_AVX512F 0 "register_operand")
- (abs:VI124_AVX2_48_AVX512F
- (match_operand:VI124_AVX2_48_AVX512F 1 "nonimmediate_operand")))]
- "TARGET_SSE2"
+ [(set (match_operand:VI_AVX2 0 "register_operand")
+ (abs:VI_AVX2
+ (match_operand:VI_AVX2 1 "nonimmediate_operand")))]
+ "TARGET_SSE2 && (<MODE>mode != V2DImode || TARGET_AVX)"
{
if (!TARGET_SSSE3)
{
@@ -11669,36 +13519,22 @@
(set_attr "btver2_decode" "vector,vector")
(set_attr "mode" "<sseinsnmode>")])
-(define_insn "avx2_packusdw"
- [(set (match_operand:V16HI 0 "register_operand" "=x")
- (vec_concat:V16HI
- (us_truncate:V8HI
- (match_operand:V8SI 1 "register_operand" "x"))
- (us_truncate:V8HI
- (match_operand:V8SI 2 "nonimmediate_operand" "xm"))))]
- "TARGET_AVX2"
- "vpackusdw\t{%2, %1, %0|%0, %1, %2}"
- [(set_attr "type" "sselog")
- (set_attr "prefix_extra" "1")
- (set_attr "prefix" "vex")
- (set_attr "mode" "OI")])
-
-(define_insn "sse4_1_packusdw"
- [(set (match_operand:V8HI 0 "register_operand" "=x,x")
- (vec_concat:V8HI
- (us_truncate:V4HI
- (match_operand:V4SI 1 "register_operand" "0,x"))
- (us_truncate:V4HI
- (match_operand:V4SI 2 "nonimmediate_operand" "xm,xm"))))]
- "TARGET_SSE4_1"
+(define_insn "<sse4_1_avx2>_packusdw<mask_name>"
+ [(set (match_operand:VI2_AVX2 0 "register_operand" "=x,v")
+ (vec_concat:VI2_AVX2
+ (us_truncate:<ssehalfvecmode>
+ (match_operand:<sseunpackmode> 1 "register_operand" "0,v"))
+ (us_truncate:<ssehalfvecmode>
+ (match_operand:<sseunpackmode> 2 "nonimmediate_operand" "xm,vm"))))]
+ "TARGET_SSE4_1 && <mask_mode512bit_condition>"
"@
packusdw\t{%2, %0|%0, %2}
- vpackusdw\t{%2, %1, %0|%0, %1, %2}"
+ vpackusdw\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}"
[(set_attr "isa" "noavx,avx")
(set_attr "type" "sselog")
(set_attr "prefix_extra" "1")
- (set_attr "prefix" "orig,vex")
- (set_attr "mode" "TI")])
+ (set_attr "prefix" "orig,maybe_evex")
+ (set_attr "mode" "<sseinsnmode>")])
(define_insn "<sse4_1_avx2>_pblendvb"
[(set (match_operand:VI1_AVX2 0 "register_operand" "=x,x")
@@ -11791,28 +13627,39 @@
(set_attr "prefix" "maybe_vex")
(set_attr "mode" "TI")])
-(define_insn "avx2_<code>v16qiv16hi2"
- [(set (match_operand:V16HI 0 "register_operand" "=x")
+(define_insn "avx2_<code>v16qiv16hi2<mask_name>"
+ [(set (match_operand:V16HI 0 "register_operand" "=v")
(any_extend:V16HI
- (match_operand:V16QI 1 "nonimmediate_operand" "xm")))]
- "TARGET_AVX2"
- "vpmov<extsuffix>bw\t{%1, %0|%0, %1}"
+ (match_operand:V16QI 1 "nonimmediate_operand" "vm")))]
+ "TARGET_AVX2 && <mask_mode512bit_condition>"
+ "vpmov<extsuffix>bw\t{%1, %0<mask_operand2>|%0<mask_operand2>, %1}"
[(set_attr "type" "ssemov")
(set_attr "prefix_extra" "1")
- (set_attr "prefix" "vex")
+ (set_attr "prefix" "maybe_evex")
(set_attr "mode" "OI")])
-(define_insn "sse4_1_<code>v8qiv8hi2"
- [(set (match_operand:V8HI 0 "register_operand" "=x")
+(define_insn "avx512bw_<code>v32qiv32hi2<mask_name>"
+ [(set (match_operand:V32HI 0 "register_operand" "=v")
+ (any_extend:V32HI
+ (match_operand:V32QI 1 "nonimmediate_operand" "vm")))]
+ "TARGET_AVX512BW"
+ "vpmov<extsuffix>bw\t{%1, %0<mask_operand2>|%0<mask_operand2>, %1}"
+ [(set_attr "type" "ssemov")
+ (set_attr "prefix_extra" "1")
+ (set_attr "prefix" "evex")
+ (set_attr "mode" "XI")])
+
+(define_insn "sse4_1_<code>v8qiv8hi2<mask_name>"
+ [(set (match_operand:V8HI 0 "register_operand" "=v")
(any_extend:V8HI
(vec_select:V8QI
- (match_operand:V16QI 1 "nonimmediate_operand" "xm")
+ (match_operand:V16QI 1 "nonimmediate_operand" "vm")
(parallel [(const_int 0) (const_int 1)
(const_int 2) (const_int 3)
(const_int 4) (const_int 5)
(const_int 6) (const_int 7)]))))]
- "TARGET_SSE4_1"
- "%vpmov<extsuffix>bw\t{%1, %0|%0, %q1}"
+ "TARGET_SSE4_1 && <mask_mode512bit_condition>"
+ "%vpmov<extsuffix>bw\t{%1, %0<mask_operand2>|%0<mask_operand2>, %q1}"
[(set_attr "type" "ssemov")
(set_attr "ssememalign" "64")
(set_attr "prefix_extra" "1")
@@ -11829,31 +13676,31 @@
(set_attr "prefix" "evex")
(set_attr "mode" "XI")])
-(define_insn "avx2_<code>v8qiv8si2"
- [(set (match_operand:V8SI 0 "register_operand" "=x")
+(define_insn "avx2_<code>v8qiv8si2<mask_name>"
+ [(set (match_operand:V8SI 0 "register_operand" "=v")
(any_extend:V8SI
(vec_select:V8QI
- (match_operand:V16QI 1 "nonimmediate_operand" "xm")
+ (match_operand:V16QI 1 "nonimmediate_operand" "vm")
(parallel [(const_int 0) (const_int 1)
(const_int 2) (const_int 3)
(const_int 4) (const_int 5)
(const_int 6) (const_int 7)]))))]
- "TARGET_AVX2"
- "vpmov<extsuffix>bd\t{%1, %0|%0, %q1}"
+ "TARGET_AVX2 && <mask_mode512bit_condition>"
+ "vpmov<extsuffix>bd\t{%1, %0<mask_operand2>|%0<mask_operand2>, %q1}"
[(set_attr "type" "ssemov")
(set_attr "prefix_extra" "1")
- (set_attr "prefix" "vex")
+ (set_attr "prefix" "maybe_evex")
(set_attr "mode" "OI")])
-(define_insn "sse4_1_<code>v4qiv4si2"
- [(set (match_operand:V4SI 0 "register_operand" "=x")
+(define_insn "sse4_1_<code>v4qiv4si2<mask_name>"
+ [(set (match_operand:V4SI 0 "register_operand" "=v")
(any_extend:V4SI
(vec_select:V4QI
- (match_operand:V16QI 1 "nonimmediate_operand" "xm")
+ (match_operand:V16QI 1 "nonimmediate_operand" "vm")
(parallel [(const_int 0) (const_int 1)
(const_int 2) (const_int 3)]))))]
- "TARGET_SSE4_1"
- "%vpmov<extsuffix>bd\t{%1, %0|%0, %k1}"
+ "TARGET_SSE4_1 && <mask_mode512bit_condition>"
+ "%vpmov<extsuffix>bd\t{%1, %0<mask_operand2>|%0<mask_operand2>, %k1}"
[(set_attr "type" "ssemov")
(set_attr "ssememalign" "32")
(set_attr "prefix_extra" "1")
@@ -11870,26 +13717,26 @@
(set_attr "prefix" "evex")
(set_attr "mode" "XI")])
-(define_insn "avx2_<code>v8hiv8si2"
- [(set (match_operand:V8SI 0 "register_operand" "=x")
+(define_insn "avx2_<code>v8hiv8si2<mask_name>"
+ [(set (match_operand:V8SI 0 "register_operand" "=v")
(any_extend:V8SI
- (match_operand:V8HI 1 "nonimmediate_operand" "xm")))]
- "TARGET_AVX2"
- "vpmov<extsuffix>wd\t{%1, %0|%0, %1}"
+ (match_operand:V8HI 1 "nonimmediate_operand" "vm")))]
+ "TARGET_AVX2 && <mask_mode512bit_condition>"
+ "vpmov<extsuffix>wd\t{%1, %0<mask_operand2>|%0<mask_operand2>, %1}"
[(set_attr "type" "ssemov")
(set_attr "prefix_extra" "1")
- (set_attr "prefix" "vex")
+ (set_attr "prefix" "maybe_evex")
(set_attr "mode" "OI")])
-(define_insn "sse4_1_<code>v4hiv4si2"
- [(set (match_operand:V4SI 0 "register_operand" "=x")
+(define_insn "sse4_1_<code>v4hiv4si2<mask_name>"
+ [(set (match_operand:V4SI 0 "register_operand" "=v")
(any_extend:V4SI
(vec_select:V4HI
- (match_operand:V8HI 1 "nonimmediate_operand" "xm")
+ (match_operand:V8HI 1 "nonimmediate_operand" "vm")
(parallel [(const_int 0) (const_int 1)
(const_int 2) (const_int 3)]))))]
- "TARGET_SSE4_1"
- "%vpmov<extsuffix>wd\t{%1, %0|%0, %q1}"
+ "TARGET_SSE4_1 && <mask_mode512bit_condition>"
+ "%vpmov<extsuffix>wd\t{%1, %0<mask_operand2>|%0<mask_operand2>, %q1}"
[(set_attr "type" "ssemov")
(set_attr "ssememalign" "64")
(set_attr "prefix_extra" "1")
@@ -11911,28 +13758,28 @@
(set_attr "prefix" "evex")
(set_attr "mode" "XI")])
-(define_insn "avx2_<code>v4qiv4di2"
- [(set (match_operand:V4DI 0 "register_operand" "=x")
+(define_insn "avx2_<code>v4qiv4di2<mask_name>"
+ [(set (match_operand:V4DI 0 "register_operand" "=v")
(any_extend:V4DI
(vec_select:V4QI
- (match_operand:V16QI 1 "nonimmediate_operand" "xm")
+ (match_operand:V16QI 1 "nonimmediate_operand" "vm")
(parallel [(const_int 0) (const_int 1)
(const_int 2) (const_int 3)]))))]
- "TARGET_AVX2"
- "vpmov<extsuffix>bq\t{%1, %0|%0, %k1}"
+ "TARGET_AVX2 && <mask_mode512bit_condition>"
+ "vpmov<extsuffix>bq\t{%1, %0<mask_operand2>|%0<mask_operand2>, %k1}"
[(set_attr "type" "ssemov")
(set_attr "prefix_extra" "1")
- (set_attr "prefix" "vex")
+ (set_attr "prefix" "maybe_evex")
(set_attr "mode" "OI")])
-(define_insn "sse4_1_<code>v2qiv2di2"
- [(set (match_operand:V2DI 0 "register_operand" "=x")
+(define_insn "sse4_1_<code>v2qiv2di2<mask_name>"
+ [(set (match_operand:V2DI 0 "register_operand" "=v")
(any_extend:V2DI
(vec_select:V2QI
- (match_operand:V16QI 1 "nonimmediate_operand" "xm")
+ (match_operand:V16QI 1 "nonimmediate_operand" "vm")
(parallel [(const_int 0) (const_int 1)]))))]
- "TARGET_SSE4_1"
- "%vpmov<extsuffix>bq\t{%1, %0|%0, %w1}"
+ "TARGET_SSE4_1 && <mask_mode512bit_condition>"
+ "%vpmov<extsuffix>bq\t{%1, %0<mask_operand2>|%0<mask_operand2>, %w1}"
[(set_attr "type" "ssemov")
(set_attr "ssememalign" "16")
(set_attr "prefix_extra" "1")
@@ -11949,28 +13796,28 @@
(set_attr "prefix" "evex")
(set_attr "mode" "XI")])
-(define_insn "avx2_<code>v4hiv4di2"
- [(set (match_operand:V4DI 0 "register_operand" "=x")
+(define_insn "avx2_<code>v4hiv4di2<mask_name>"
+ [(set (match_operand:V4DI 0 "register_operand" "=v")
(any_extend:V4DI
(vec_select:V4HI
- (match_operand:V8HI 1 "nonimmediate_operand" "xm")
+ (match_operand:V8HI 1 "nonimmediate_operand" "vm")
(parallel [(const_int 0) (const_int 1)
(const_int 2) (const_int 3)]))))]
- "TARGET_AVX2"
- "vpmov<extsuffix>wq\t{%1, %0|%0, %q1}"
+ "TARGET_AVX2 && <mask_mode512bit_condition>"
+ "vpmov<extsuffix>wq\t{%1, %0<mask_operand2>|%0<mask_operand2>, %q1}"
[(set_attr "type" "ssemov")
(set_attr "prefix_extra" "1")
- (set_attr "prefix" "vex")
+ (set_attr "prefix" "maybe_evex")
(set_attr "mode" "OI")])
-(define_insn "sse4_1_<code>v2hiv2di2"
- [(set (match_operand:V2DI 0 "register_operand" "=x")
+(define_insn "sse4_1_<code>v2hiv2di2<mask_name>"
+ [(set (match_operand:V2DI 0 "register_operand" "=v")
(any_extend:V2DI
(vec_select:V2HI
- (match_operand:V8HI 1 "nonimmediate_operand" "xm")
+ (match_operand:V8HI 1 "nonimmediate_operand" "vm")
(parallel [(const_int 0) (const_int 1)]))))]
- "TARGET_SSE4_1"
- "%vpmov<extsuffix>wq\t{%1, %0|%0, %k1}"
+ "TARGET_SSE4_1 && <mask_mode512bit_condition>"
+ "%vpmov<extsuffix>wq\t{%1, %0<mask_operand2>|%0<mask_operand2>, %k1}"
[(set_attr "type" "ssemov")
(set_attr "ssememalign" "32")
(set_attr "prefix_extra" "1")
@@ -11987,24 +13834,25 @@
(set_attr "prefix" "evex")
(set_attr "mode" "XI")])
-(define_insn "avx2_<code>v4siv4di2"
- [(set (match_operand:V4DI 0 "register_operand" "=x")
+(define_insn "avx2_<code>v4siv4di2<mask_name>"
+ [(set (match_operand:V4DI 0 "register_operand" "=v")
(any_extend:V4DI
- (match_operand:V4SI 1 "nonimmediate_operand" "xm")))]
- "TARGET_AVX2"
- "vpmov<extsuffix>dq\t{%1, %0|%0, %1}"
+ (match_operand:V4SI 1 "nonimmediate_operand" "vm")))]
+ "TARGET_AVX2 && <mask_mode512bit_condition>"
+ "vpmov<extsuffix>dq\t{%1, %0<mask_operand2>|%0<mask_operand2>, %1}"
[(set_attr "type" "ssemov")
+ (set_attr "prefix" "maybe_evex")
(set_attr "prefix_extra" "1")
(set_attr "mode" "OI")])
-(define_insn "sse4_1_<code>v2siv2di2"
- [(set (match_operand:V2DI 0 "register_operand" "=x")
+(define_insn "sse4_1_<code>v2siv2di2<mask_name>"
+ [(set (match_operand:V2DI 0 "register_operand" "=v")
(any_extend:V2DI
(vec_select:V2SI
- (match_operand:V4SI 1 "nonimmediate_operand" "xm")
+ (match_operand:V4SI 1 "nonimmediate_operand" "vm")
(parallel [(const_int 0) (const_int 1)]))))]
- "TARGET_SSE4_1"
- "%vpmov<extsuffix>dq\t{%1, %0|%0, %q1}"
+ "TARGET_SSE4_1 && <mask_mode512bit_condition>"
+ "%vpmov<extsuffix>dq\t{%1, %0<mask_operand2>|%0<mask_operand2>, %q1}"
[(set_attr "type" "ssemov")
(set_attr "ssememalign" "64")
(set_attr "prefix_extra" "1")
@@ -13556,17 +15404,20 @@
(match_operand:VI48_256 2 "nonimmediate_operand")))]
"TARGET_AVX2")
-(define_expand "vashr<mode>3"
+(define_expand "vashr<mode>3<mask_name>"
[(set (match_operand:VI128_128 0 "register_operand")
(ashiftrt:VI128_128
(match_operand:VI128_128 1 "register_operand")
(match_operand:VI128_128 2 "nonimmediate_operand")))]
- "TARGET_XOP"
+ "TARGET_XOP || ((TARGET_AVX512BW || <MODE>mode == V2DImode) && TARGET_AVX512VL)"
{
- rtx neg = gen_reg_rtx (<MODE>mode);
- emit_insn (gen_neg<mode>2 (neg, operands[2]));
- emit_insn (gen_xop_sha<mode>3 (operands[0], operands[1], neg));
- DONE;
+ if (!((TARGET_AVX512BW || <MODE>mode == V2DImode) && TARGET_AVX512VL))
+ {
+ rtx neg = gen_reg_rtx (<MODE>mode);
+ emit_insn (gen_neg<mode>2 (neg, operands[2]));
+ emit_insn (gen_xop_sha<mode>3 (operands[0], operands[1], neg));
+ DONE;
+ }
})
(define_expand "vashrv4si3"
@@ -13713,34 +15564,37 @@
DONE;
})
-(define_expand "ashrv2di3"
+(define_expand "ashrv2di3<mask_name>"
[(set (match_operand:V2DI 0 "register_operand")
(ashiftrt:V2DI
(match_operand:V2DI 1 "register_operand")
(match_operand:DI 2 "nonmemory_operand")))]
- "TARGET_XOP"
+ "TARGET_XOP || TARGET_AVX512VL"
{
rtx reg = gen_reg_rtx (V2DImode);
rtx par;
bool negate = false;
int i;
- if (CONST_INT_P (operands[2]))
- operands[2] = GEN_INT (-INTVAL (operands[2]));
- else
- negate = true;
+ if (!TARGET_AVX512VL)
+ {
+ if (CONST_INT_P (operands[2]))
+ operands[2] = GEN_INT (-INTVAL (operands[2]));
+ else
+ negate = true;
- par = gen_rtx_PARALLEL (V2DImode, rtvec_alloc (2));
- for (i = 0; i < 2; i++)
- XVECEXP (par, 0, i) = operands[2];
+ par = gen_rtx_PARALLEL (V2DImode, rtvec_alloc (2));
+ for (i = 0; i < 2; i++)
+ XVECEXP (par, 0, i) = operands[2];
- emit_insn (gen_vec_initv2di (reg, par));
+ emit_insn (gen_vec_initv2di (reg, par));
- if (negate)
- emit_insn (gen_negv2di2 (reg, reg));
+ if (negate)
+ emit_insn (gen_negv2di2 (reg, reg));
- emit_insn (gen_xop_shav2di3 (operands[0], operands[1], reg));
- DONE;
+ emit_insn (gen_xop_shav2di3 (operands[0], operands[1], reg));
+ DONE;
+ }
})
;; XOP FRCZ support
@@ -14039,10 +15893,10 @@
(set_attr "prefix" "vex")
(set_attr "mode" "<sseinsnmode>")])
-(define_insn "<avx2_avx512f>_permvar<mode><mask_name>"
- [(set (match_operand:VI48F_256_512 0 "register_operand" "=v")
- (unspec:VI48F_256_512
- [(match_operand:VI48F_256_512 1 "nonimmediate_operand" "vm")
+(define_insn "<avx2_avx512bw>_permvar<mode><mask_name>"
+ [(set (match_operand:VI48F_256_512_2I 0 "register_operand" "=v")
+ (unspec:VI48F_256_512_2I
+ [(match_operand:VI48F_256_512_2I 1 "nonimmediate_operand" "vm")
(match_operand:<sseintvecmode> 2 "register_operand" "v")]
UNSPEC_VPERMVAR))]
"TARGET_AVX2 && <mask_mode512bit_condition>"
@@ -14051,14 +15905,14 @@
(set_attr "prefix" "<mask_prefix2>")
(set_attr "mode" "<sseinsnmode>")])
-(define_expand "<avx2_avx512f>_perm<mode>"
+(define_expand "<avx2_avx512bw>_perm<mode>"
[(match_operand:VI8F_256_512 0 "register_operand")
(match_operand:VI8F_256_512 1 "nonimmediate_operand")
(match_operand:SI 2 "const_0_to_255_operand")]
"TARGET_AVX2"
{
int mask = INTVAL (operands[2]);
- emit_insn (gen_<avx2_avx512f>_perm<mode>_1 (operands[0], operands[1],
+ emit_insn (gen_<avx2_avx512bw>_perm<mode>_1 (operands[0], operands[1],
GEN_INT ((mask >> 0) & 3),
GEN_INT ((mask >> 2) & 3),
GEN_INT ((mask >> 4) & 3),
@@ -14066,16 +15920,16 @@
DONE;
})
-(define_expand "avx512f_perm<mode>_mask"
- [(match_operand:V8FI 0 "register_operand")
- (match_operand:V8FI 1 "nonimmediate_operand")
+(define_expand "<avx512>_perm<mode>_mask"
+ [(match_operand:VI8F_256_512 0 "register_operand")
+ (match_operand:VI8F_256_512 1 "nonimmediate_operand")
(match_operand:SI 2 "const_0_to_255_operand")
- (match_operand:V8FI 3 "vector_move_operand")
+ (match_operand:VI8F_256_512 3 "vector_move_operand")
(match_operand:<avx512fmaskmode> 4 "register_operand")]
"TARGET_AVX512F"
{
int mask = INTVAL (operands[2]);
- emit_insn (gen_<avx2_avx512f>_perm<mode>_1_mask (operands[0], operands[1],
+ emit_insn (gen_<avx2_avx512bw>_perm<mode>_1_mask (operands[0], operands[1],
GEN_INT ((mask >> 0) & 3),
GEN_INT ((mask >> 2) & 3),
GEN_INT ((mask >> 4) & 3),
@@ -14084,7 +15938,7 @@
DONE;
})
-(define_insn "<avx2_avx512f>_perm<mode>_1<mask_name>"
+(define_insn "<avx2_avx512bw>_perm<mode>_1<mask_name>"
[(set (match_operand:VI8F_256_512 0 "register_operand" "=v")
(vec_select:VI8F_256_512
(match_operand:VI8F_256_512 1 "nonimmediate_operand" "vm")
@@ -14136,9 +15990,9 @@
[V8SI V8SF V4DI V4DF])
(define_insn "vec_dup<mode>"
- [(set (match_operand:AVX_VEC_DUP_MODE 0 "register_operand" "=x,x,x")
+ [(set (match_operand:AVX_VEC_DUP_MODE 0 "register_operand" "=x,v,x")
(vec_duplicate:AVX_VEC_DUP_MODE
- (match_operand:<ssescalarmode> 1 "nonimmediate_operand" "m,x,?x")))]
+ (match_operand:<ssescalarmode> 1 "nonimmediate_operand" "m,v,?x")))]
"TARGET_AVX"
"@
vbroadcast<ssescalarmodesuffix>\t{%1, %0|%0, %1}
@@ -14146,13 +16000,13 @@
#"
[(set_attr "type" "ssemov")
(set_attr "prefix_extra" "1")
- (set_attr "prefix" "vex")
+ (set_attr "prefix" "maybe_evex")
(set_attr "isa" "*,avx2,noavx2")
(set_attr "mode" "V8SF")])
-(define_insn "<mask_codefor>avx512f_vec_dup<mode><mask_name>"
- [(set (match_operand:VI48F_512 0 "register_operand" "=v")
- (vec_duplicate:VI48F_512
+(define_insn "<avx512>_vec_dup<mode><mask_name>"
+ [(set (match_operand:V_AVX512VL 0 "register_operand" "=v")
+ (vec_duplicate:V_AVX512VL
(vec_select:<ssescalarmode>
(match_operand:<ssexmmmode> 1 "nonimmediate_operand" "vm")
(parallel [(const_int 0)]))))]
@@ -14186,19 +16040,24 @@
(set_attr "prefix" "evex")
(set_attr "mode" "<sseinsnmode>")])
-(define_insn "<mask_codefor>avx512f_vec_dup_gpr<mode><mask_name>"
- [(set (match_operand:VI48_512 0 "register_operand" "=v")
- (vec_duplicate:VI48_512
+(define_insn "<mask_codefor><avx512>_vec_dup_gpr<mode><mask_name>"
+ [(set (match_operand:VI_AVX512VL 0 "register_operand" "=v")
+ (vec_duplicate:VI_AVX512VL
(match_operand:<ssescalarmode> 1 "register_operand" "r")))]
- "TARGET_AVX512F && (<MODE>mode != V8DImode || TARGET_64BIT)"
- "vpbroadcast<bcstscalarsuff>\t{%1, %0<mask_operand2>|%0<mask_operand2>, %1}"
+ "TARGET_AVX512F && (<ssescalarmode>mode != DImode || TARGET_64BIT)"
+{
+ /* To generate correct assembler. */
+ if (GET_MODE (operands[1]) == QImode || GET_MODE (operands[1]) == HImode)
+ operands[1] = gen_rtx_REG (SImode, REGNO (operands[1]));
+ return "vpbroadcast<bcstscalarsuff>\t{%1, %0<mask_operand2>|%0<mask_operand2>, %1}";
+}
[(set_attr "type" "ssemov")
(set_attr "prefix" "evex")
(set_attr "mode" "<sseinsnmode>")])
-(define_insn "<mask_codefor>avx512f_vec_dup_mem<mode><mask_name>"
- [(set (match_operand:VI48F_512 0 "register_operand" "=v")
- (vec_duplicate:VI48F_512
+(define_insn "<mask_codefor><avx512>_vec_dup_mem<mode><mask_name>"
+ [(set (match_operand:V_AVX512VL 0 "register_operand" "=v")
+ (vec_duplicate:V_AVX512VL
(match_operand:<ssescalarmode> 1 "nonimmediate_operand" "vm")))]
"TARGET_AVX512F"
"v<sseintprefix>broadcast<bcstscalarsuff>\t{%1, %0<mask_operand2>|%0<mask_operand2>, %1}"
@@ -14245,9 +16104,84 @@
(set_attr "prefix" "vex")
(set_attr "mode" "<sseinsnmode>")])
-(define_insn "avx512cd_maskb_vec_dupv8di"
- [(set (match_operand:V8DI 0 "register_operand" "=v")
- (vec_duplicate:V8DI
+;; For broadcast[i|f]32x2. Yes there is no v4sf version, only v4si.
+(define_mode_iterator VI4F_BRCST32x2
+ [V16SI (V8SI "TARGET_AVX512VL") (V4SI "TARGET_AVX512VL")
+ V16SF (V8SF "TARGET_AVX512VL")])
+
+(define_mode_attr 64x2_mode
+ [(V8DF "V2DF") (V8DI "V2DI") (V4DI "V2DI") (V4DF "V2DF")])
+
+(define_mode_attr 32x2mode
+ [(V16SF "V2SF") (V16SI "V2SI") (V8SI "V2SI")
+ (V8SF "V2SF") (V4SI "V2SI")])
+
+(define_insn "<mask_codefor>avx512dq_broadcast<mode><mask_name>"
+ [(set (match_operand:VI4F_BRCST32x2 0 "register_operand" "=v")
+ (vec_duplicate:VI4F_BRCST32x2
+ (vec_select:<32x2mode>
+ (match_operand:<ssexmmmode> 1 "nonimmediate_operand" "vm")
+ (parallel [(const_int 0) (const_int 1)]))))]
+ "TARGET_AVX512DQ"
+ "vbroadcast<shuffletype>32x2\t{%1, %0<mask_operand2>|%0<mask_operand2>, %1}"
+ [(set_attr "type" "ssemov")
+ (set_attr "prefix_extra" "1")
+ (set_attr "prefix" "evex")
+ (set_attr "mode" "<sseinsnmode>")])
+
+(define_insn "<mask_codefor>avx512vl_broadcast<mode><mask_name>_1"
+ [(set (match_operand:VI4F_256 0 "register_operand" "=v,v")
+ (vec_duplicate:VI4F_256
+ (match_operand:<ssexmmmode> 1 "nonimmediate_operand" "v,m")))]
+ "TARGET_AVX512VL"
+ "@
+ vshuf<shuffletype>32x4\t{$0x0, %t1, %t1, %0<mask_operand2>|%0<mask_operand2>, %t1, %t1, 0x0}
+ vbroadcast<shuffletype>32x4\t{%1, %0<mask_operand2>|%0<mask_operand2>, %1}"
+ [(set_attr "type" "ssemov")
+ (set_attr "prefix_extra" "1")
+ (set_attr "prefix" "evex")
+ (set_attr "mode" "<sseinsnmode>")])
+
+(define_insn "<mask_codefor>avx512dq_broadcast<mode><mask_name>_1"
+ [(set (match_operand:V16FI 0 "register_operand" "=v,v")
+ (vec_duplicate:V16FI
+ (match_operand:<ssehalfvecmode> 1 "nonimmediate_operand" "v,m")))]
+ "TARGET_AVX512DQ"
+ "@
+ vshuf<shuffletype>32x4\t{$0x44, %g1, %g1, %0<mask_operand2>|%0<mask_operand2>, %g1, %g1, 0x44}
+ vbroadcast<shuffletype>32x8\t{%1, %0<mask_operand2>|%0<mask_operand2>, %1}"
+ [(set_attr "type" "ssemov")
+ (set_attr "prefix_extra" "1")
+ (set_attr "prefix" "evex")
+ (set_attr "mode" "<sseinsnmode>")])
+
+(define_insn "<mask_codefor>avx512dq_broadcast<mode><mask_name>_1"
+ [(set (match_operand:VI8F_256_512 0 "register_operand" "=v,v")
+ (vec_duplicate:VI8F_256_512
+ (match_operand:<64x2_mode> 1 "nonimmediate_operand" "v,m")))]
+ "TARGET_AVX512DQ"
+{
+ switch (which_alternative)
+ {
+ case 0:
+ if (GET_MODE_SIZE (<MODE>mode) == 64)
+ return "vshuf<shuffletype>64x2\t{$0x0, %g1, %g1, %0<mask_operand2>|%0<mask_operand2>, %g1, %g1, 0x0}";
+ else
+ return "vshuf<shuffletype>64x2\t{$0x0, %t1, %t1, %0<mask_operand2>|%0<mask_operand2>, %t1, %t1, 0x0}";
+ case 1:
+ return "vbroadcast<shuffletype>64x2\t{%1, %0<mask_operand2>|%0<mask_operand2>, %1}";
+ default:
+ gcc_unreachable ();
+ }
+}
+ [(set_attr "type" "ssemov")
+ (set_attr "prefix_extra" "1")
+ (set_attr "prefix" "evex")
+ (set_attr "mode" "<sseinsnmode>")])
+
+(define_insn "avx512cd_maskb_vec_dup<mode>"
+ [(set (match_operand:VI8_AVX512VL 0 "register_operand" "=v")
+ (vec_duplicate:VI8_AVX512VL
(zero_extend:DI
(match_operand:QI 1 "register_operand" "Yk"))))]
"TARGET_AVX512CD"
@@ -14256,9 +16190,9 @@
(set_attr "prefix" "evex")
(set_attr "mode" "XI")])
-(define_insn "avx512cd_maskw_vec_dupv16si"
- [(set (match_operand:V16SI 0 "register_operand" "=v")
- (vec_duplicate:V16SI
+(define_insn "avx512cd_maskw_vec_dup<mode>"
+ [(set (match_operand:VI4_AVX512VL 0 "register_operand" "=v")
+ (vec_duplicate:VI4_AVX512VL
(zero_extend:SI
(match_operand:HI 1 "register_operand" "Yk"))))]
"TARGET_AVX512CD"
@@ -14418,26 +16352,26 @@
(set_attr "prefix" "<mask_prefix>")
(set_attr "mode" "<sseinsnmode>")])
-(define_expand "avx512f_vpermi2var<mode>3_maskz"
- [(match_operand:VI48F_512 0 "register_operand" "=v")
- (match_operand:VI48F_512 1 "register_operand" "v")
+(define_expand "<avx512>_vpermi2var<mode>3_maskz"
+ [(match_operand:VI248F 0 "register_operand" "=v")
+ (match_operand:VI248F 1 "register_operand" "v")
(match_operand:<sseintvecmode> 2 "register_operand" "0")
- (match_operand:VI48F_512 3 "nonimmediate_operand" "vm")
+ (match_operand:VI248F 3 "nonimmediate_operand" "vm")
(match_operand:<avx512fmaskmode> 4 "register_operand" "Yk")]
"TARGET_AVX512F"
{
- emit_insn (gen_avx512f_vpermi2var<mode>3_maskz_1 (
+ emit_insn (gen_<avx512>_vpermi2var<mode>3_maskz_1 (
operands[0], operands[1], operands[2], operands[3],
CONST0_RTX (<MODE>mode), operands[4]));
DONE;
})
-(define_insn "avx512f_vpermi2var<mode>3<sd_maskz_name>"
- [(set (match_operand:VI48F_512 0 "register_operand" "=v")
- (unspec:VI48F_512
- [(match_operand:VI48F_512 1 "register_operand" "v")
+(define_insn "<avx512>_vpermi2var<mode>3<sd_maskz_name>"
+ [(set (match_operand:VI248F 0 "register_operand" "=v")
+ (unspec:VI248F
+ [(match_operand:VI248F 1 "register_operand" "v")
(match_operand:<sseintvecmode> 2 "register_operand" "0")
- (match_operand:VI48F_512 3 "nonimmediate_operand" "vm")]
+ (match_operand:VI248F 3 "nonimmediate_operand" "vm")]
UNSPEC_VPERMI2))]
"TARGET_AVX512F"
"vpermi2<ssemodesuffix>\t{%3, %1, %0<sd_mask_op4>|%0<sd_mask_op4>, %1, %3}"
@@ -14445,13 +16379,13 @@
(set_attr "prefix" "evex")
(set_attr "mode" "<sseinsnmode>")])
-(define_insn "avx512f_vpermi2var<mode>3_mask"
- [(set (match_operand:VI48F_512 0 "register_operand" "=v")
- (vec_merge:VI48F_512
- (unspec:VI48F_512
- [(match_operand:VI48F_512 1 "register_operand" "v")
+(define_insn "<avx512>_vpermi2var<mode>3_mask"
+ [(set (match_operand:VI248F 0 "register_operand" "=v")
+ (vec_merge:VI248F
+ (unspec:VI248F
+ [(match_operand:VI248F 1 "register_operand" "v")
(match_operand:<sseintvecmode> 2 "register_operand" "0")
- (match_operand:VI48F_512 3 "nonimmediate_operand" "vm")]
+ (match_operand:VI248F 3 "nonimmediate_operand" "vm")]
UNSPEC_VPERMI2_MASK)
(match_dup 0)
(match_operand:<avx512fmaskmode> 4 "register_operand" "Yk")))]
@@ -14461,26 +16395,26 @@
(set_attr "prefix" "evex")
(set_attr "mode" "<sseinsnmode>")])
-(define_expand "avx512f_vpermt2var<mode>3_maskz"
- [(match_operand:VI48F_512 0 "register_operand" "=v")
+(define_expand "<avx512>_vpermt2var<mode>3_maskz"
+ [(match_operand:VI248F 0 "register_operand" "=v")
(match_operand:<sseintvecmode> 1 "register_operand" "v")
- (match_operand:VI48F_512 2 "register_operand" "0")
- (match_operand:VI48F_512 3 "nonimmediate_operand" "vm")
+ (match_operand:VI248F 2 "register_operand" "0")
+ (match_operand:VI248F 3 "nonimmediate_operand" "vm")
(match_operand:<avx512fmaskmode> 4 "register_operand" "Yk")]
"TARGET_AVX512F"
{
- emit_insn (gen_avx512f_vpermt2var<mode>3_maskz_1 (
+ emit_insn (gen_<avx512>_vpermt2var<mode>3_maskz_1 (
operands[0], operands[1], operands[2], operands[3],
CONST0_RTX (<MODE>mode), operands[4]));
DONE;
})
-(define_insn "avx512f_vpermt2var<mode>3<sd_maskz_name>"
- [(set (match_operand:VI48F_512 0 "register_operand" "=v")
- (unspec:VI48F_512
+(define_insn "<avx512>_vpermt2var<mode>3<sd_maskz_name>"
+ [(set (match_operand:VI248F 0 "register_operand" "=v")
+ (unspec:VI248F
[(match_operand:<sseintvecmode> 1 "register_operand" "v")
- (match_operand:VI48F_512 2 "register_operand" "0")
- (match_operand:VI48F_512 3 "nonimmediate_operand" "vm")]
+ (match_operand:VI248F 2 "register_operand" "0")
+ (match_operand:VI248F 3 "nonimmediate_operand" "vm")]
UNSPEC_VPERMT2))]
"TARGET_AVX512F"
"vpermt2<ssemodesuffix>\t{%3, %1, %0<sd_mask_op4>|%0<sd_mask_op4>, %1, %3}"
@@ -14488,13 +16422,13 @@
(set_attr "prefix" "evex")
(set_attr "mode" "<sseinsnmode>")])
-(define_insn "avx512f_vpermt2var<mode>3_mask"
- [(set (match_operand:VI48F_512 0 "register_operand" "=v")
- (vec_merge:VI48F_512
- (unspec:VI48F_512
+(define_insn "<avx512>_vpermt2var<mode>3_mask"
+ [(set (match_operand:VI248F 0 "register_operand" "=v")
+ (vec_merge:VI248F
+ (unspec:VI248F
[(match_operand:<sseintvecmode> 1 "register_operand" "v")
- (match_operand:VI48F_512 2 "register_operand" "0")
- (match_operand:VI48F_512 3 "nonimmediate_operand" "vm")]
+ (match_operand:VI248F 2 "register_operand" "0")
+ (match_operand:VI248F 3 "nonimmediate_operand" "vm")]
UNSPEC_VPERMT2)
(match_dup 2)
(match_operand:<avx512fmaskmode> 4 "register_operand" "Yk")))]
@@ -14610,6 +16544,34 @@
(set_attr "length_immediate" "1")
(set_attr "prefix" "orig,vex")])
+(define_expand "avx512vl_vinsert<mode>"
+ [(match_operand:VI48F_256 0 "register_operand")
+ (match_operand:VI48F_256 1 "register_operand")
+ (match_operand:<ssehalfvecmode> 2 "nonimmediate_operand")
+ (match_operand:SI 3 "const_0_to_1_operand")
+ (match_operand:VI48F_256 4 "register_operand")
+ (match_operand:<avx512fmaskmode> 5 "register_operand")]
+ "TARGET_AVX512VL"
+{
+ rtx (*insn)(rtx, rtx, rtx, rtx, rtx);
+
+ switch (INTVAL (operands[3]))
+ {
+ case 0:
+ insn = gen_vec_set_lo_<mode>_mask;
+ break;
+ case 1:
+ insn = gen_vec_set_hi_<mode>_mask;
+ break;
+ default:
+ gcc_unreachable ();
+ }
+
+ emit_insn (insn (operands[0], operands[1], operands[2], operands[4],
+ operands[5]));
+ DONE;
+})
+
(define_expand "avx_vinsertf128<mode>"
[(match_operand:V_256 0 "register_operand")
(match_operand:V_256 1 "register_operand")
@@ -14635,92 +16597,82 @@
DONE;
})
-(define_insn "avx2_vec_set_lo_v4di"
- [(set (match_operand:V4DI 0 "register_operand" "=x")
- (vec_concat:V4DI
- (match_operand:V2DI 2 "nonimmediate_operand" "xm")
- (vec_select:V2DI
- (match_operand:V4DI 1 "register_operand" "x")
- (parallel [(const_int 2) (const_int 3)]))))]
- "TARGET_AVX2"
- "vinserti128\t{$0x0, %2, %1, %0|%0, %1, %2, 0x0}"
- [(set_attr "type" "sselog")
- (set_attr "prefix_extra" "1")
- (set_attr "length_immediate" "1")
- (set_attr "prefix" "vex")
- (set_attr "mode" "OI")])
-
-(define_insn "avx2_vec_set_hi_v4di"
- [(set (match_operand:V4DI 0 "register_operand" "=x")
- (vec_concat:V4DI
- (vec_select:V2DI
- (match_operand:V4DI 1 "register_operand" "x")
- (parallel [(const_int 0) (const_int 1)]))
- (match_operand:V2DI 2 "nonimmediate_operand" "xm")))]
- "TARGET_AVX2"
- "vinserti128\t{$0x1, %2, %1, %0|%0, %1, %2, 0x1}"
- [(set_attr "type" "sselog")
- (set_attr "prefix_extra" "1")
- (set_attr "length_immediate" "1")
- (set_attr "prefix" "vex")
- (set_attr "mode" "OI")])
-
-(define_insn "vec_set_lo_<mode>"
- [(set (match_operand:VI8F_256 0 "register_operand" "=x")
+(define_insn "vec_set_lo_<mode><mask_name>"
+ [(set (match_operand:VI8F_256 0 "register_operand" "=v")
(vec_concat:VI8F_256
- (match_operand:<ssehalfvecmode> 2 "nonimmediate_operand" "xm")
+ (match_operand:<ssehalfvecmode> 2 "nonimmediate_operand" "vm")
(vec_select:<ssehalfvecmode>
- (match_operand:VI8F_256 1 "register_operand" "x")
+ (match_operand:VI8F_256 1 "register_operand" "v")
(parallel [(const_int 2) (const_int 3)]))))]
"TARGET_AVX"
- "vinsert<i128>\t{$0x0, %2, %1, %0|%0, %1, %2, 0x0}"
+{
+ if (TARGET_AVX512VL)
+ return "vinsert<shuffletype>64x2\t{$0x0, %2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2, 0x0}";
+ else
+ return "vinsert<i128>\t{$0x0, %2, %1, %0|%0, %1, %2, 0x0}";
+}
[(set_attr "type" "sselog")
(set_attr "prefix_extra" "1")
(set_attr "length_immediate" "1")
(set_attr "prefix" "vex")
(set_attr "mode" "<sseinsnmode>")])
-(define_insn "vec_set_hi_<mode>"
- [(set (match_operand:VI8F_256 0 "register_operand" "=x")
+(define_insn "vec_set_hi_<mode><mask_name>"
+ [(set (match_operand:VI8F_256 0 "register_operand" "=v")
(vec_concat:VI8F_256
(vec_select:<ssehalfvecmode>
- (match_operand:VI8F_256 1 "register_operand" "x")
+ (match_operand:VI8F_256 1 "register_operand" "v")
(parallel [(const_int 0) (const_int 1)]))
- (match_operand:<ssehalfvecmode> 2 "nonimmediate_operand" "xm")))]
+ (match_operand:<ssehalfvecmode> 2 "nonimmediate_operand" "vm")))]
"TARGET_AVX"
- "vinsert<i128>\t{$0x1, %2, %1, %0|%0, %1, %2, 0x1}"
+{
+ if (TARGET_AVX512VL)
+ return "vinsert<shuffletype>64x2\t{$0x1, %2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2, 0x1}";
+ else
+ return "vinsert<i128>\t{$0x1, %2, %1, %0|%0, %1, %2, 0x1}";
+}
[(set_attr "type" "sselog")
(set_attr "prefix_extra" "1")
(set_attr "length_immediate" "1")
(set_attr "prefix" "vex")
(set_attr "mode" "<sseinsnmode>")])
-(define_insn "vec_set_lo_<mode>"
- [(set (match_operand:VI4F_256 0 "register_operand" "=x")
+(define_insn "vec_set_lo_<mode><mask_name>"
+ [(set (match_operand:VI4F_256 0 "register_operand" "=v")
(vec_concat:VI4F_256
- (match_operand:<ssehalfvecmode> 2 "nonimmediate_operand" "xm")
+ (match_operand:<ssehalfvecmode> 2 "nonimmediate_operand" "vm")
(vec_select:<ssehalfvecmode>
- (match_operand:VI4F_256 1 "register_operand" "x")
+ (match_operand:VI4F_256 1 "register_operand" "v")
(parallel [(const_int 4) (const_int 5)
(const_int 6) (const_int 7)]))))]
"TARGET_AVX"
- "vinsert<i128>\t{$0x0, %2, %1, %0|%0, %1, %2, 0x0}"
+{
+ if (TARGET_AVX512VL)
+ return "vinsert<shuffletype>32x4\t{$0x0, %2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2, 0x0}";
+ else
+ return "vinsert<i128>\t{$0x0, %2, %1, %0|%0, %1, %2, 0x0}";
+}
[(set_attr "type" "sselog")
(set_attr "prefix_extra" "1")
(set_attr "length_immediate" "1")
(set_attr "prefix" "vex")
(set_attr "mode" "<sseinsnmode>")])
-(define_insn "vec_set_hi_<mode>"
- [(set (match_operand:VI4F_256 0 "register_operand" "=x")
+(define_insn "vec_set_hi_<mode><mask_name>"
+ [(set (match_operand:VI4F_256 0 "register_operand" "=v")
(vec_concat:VI4F_256
(vec_select:<ssehalfvecmode>
- (match_operand:VI4F_256 1 "register_operand" "x")
+ (match_operand:VI4F_256 1 "register_operand" "v")
(parallel [(const_int 0) (const_int 1)
(const_int 2) (const_int 3)]))
- (match_operand:<ssehalfvecmode> 2 "nonimmediate_operand" "xm")))]
+ (match_operand:<ssehalfvecmode> 2 "nonimmediate_operand" "vm")))]
"TARGET_AVX"
- "vinsert<i128>\t{$0x1, %2, %1, %0|%0, %1, %2, 0x1}"
+{
+ if (TARGET_AVX512VL)
+ return "vinsert<shuffletype>32x4\t{$0x1, %2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2, 0x1}";
+ else
+ return "vinsert<i128>\t{$0x1, %2, %1, %0|%0, %1, %2, 0x1}";
+}
[(set_attr "type" "sselog")
(set_attr "prefix_extra" "1")
(set_attr "length_immediate" "1")
@@ -14883,7 +16835,7 @@
})
(define_expand "vec_init<mode>"
- [(match_operand:VI48F_512 0 "register_operand")
+ [(match_operand:VI48F_I12B_512 0 "register_operand")
(match_operand 1)]
"TARGET_AVX512F"
{
@@ -14891,71 +16843,22 @@
DONE;
})
-(define_expand "avx2_extracti128"
- [(match_operand:V2DI 0 "nonimmediate_operand")
- (match_operand:V4DI 1 "register_operand")
- (match_operand:SI 2 "const_0_to_1_operand")]
- "TARGET_AVX2"
-{
- rtx (*insn)(rtx, rtx);
-
- switch (INTVAL (operands[2]))
- {
- case 0:
- insn = gen_vec_extract_lo_v4di;
- break;
- case 1:
- insn = gen_vec_extract_hi_v4di;
- break;
- default:
- gcc_unreachable ();
- }
-
- emit_insn (insn (operands[0], operands[1]));
- DONE;
-})
-
-(define_expand "avx2_inserti128"
- [(match_operand:V4DI 0 "register_operand")
- (match_operand:V4DI 1 "register_operand")
- (match_operand:V2DI 2 "nonimmediate_operand")
- (match_operand:SI 3 "const_0_to_1_operand")]
- "TARGET_AVX2"
-{
- rtx (*insn)(rtx, rtx, rtx);
-
- switch (INTVAL (operands[3]))
- {
- case 0:
- insn = gen_avx2_vec_set_lo_v4di;
- break;
- case 1:
- insn = gen_avx2_vec_set_hi_v4di;
- break;
- default:
- gcc_unreachable ();
- }
-
- emit_insn (insn (operands[0], operands[1], operands[2]));
- DONE;
-})
-
-(define_insn "<avx2_avx512f>_ashrv<mode><mask_name>"
- [(set (match_operand:VI48_AVX512F 0 "register_operand" "=v")
- (ashiftrt:VI48_AVX512F
- (match_operand:VI48_AVX512F 1 "register_operand" "v")
- (match_operand:VI48_AVX512F 2 "nonimmediate_operand" "vm")))]
+(define_insn "<avx2_avx512bw>_ashrv<mode><mask_name>"
+ [(set (match_operand:VI248_AVX512 0 "register_operand" "=v")
+ (ashiftrt:VI248_AVX512
+ (match_operand:VI248_AVX512 1 "register_operand" "v")
+ (match_operand:VI248_AVX512 2 "nonimmediate_operand" "vm")))]
"TARGET_AVX2 && <mask_mode512bit_condition>"
"vpsrav<ssemodesuffix>\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}"
[(set_attr "type" "sseishft")
(set_attr "prefix" "maybe_evex")
(set_attr "mode" "<sseinsnmode>")])
-(define_insn "<avx2_avx512f>_<shift_insn>v<mode><mask_name>"
- [(set (match_operand:VI48_AVX2_48_AVX512F 0 "register_operand" "=v")
- (any_lshift:VI48_AVX2_48_AVX512F
- (match_operand:VI48_AVX2_48_AVX512F 1 "register_operand" "v")
- (match_operand:VI48_AVX2_48_AVX512F 2 "nonimmediate_operand" "vm")))]
+(define_insn "<avx2_avx512bw>_<shift_insn>v<mode><mask_name>"
+ [(set (match_operand:VI248_AVX512BW 0 "register_operand" "=v")
+ (any_lshift:VI248_AVX512BW
+ (match_operand:VI248_AVX512BW 1 "register_operand" "v")
+ (match_operand:VI248_AVX512BW 2 "nonimmediate_operand" "vm")))]
"TARGET_AVX2 && <mask_mode512bit_condition>"
"vp<vshift>v<ssemodesuffix>\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}"
[(set_attr "type" "sseishft")
@@ -15006,35 +16909,35 @@
(set_attr "prefix" "maybe_evex")
(set_attr "mode" "<sseinsnmode>")])
-(define_insn "vcvtph2ps"
- [(set (match_operand:V4SF 0 "register_operand" "=x")
+(define_insn "vcvtph2ps<mask_name>"
+ [(set (match_operand:V4SF 0 "register_operand" "=v")
(vec_select:V4SF
- (unspec:V8SF [(match_operand:V8HI 1 "register_operand" "x")]
+ (unspec:V8SF [(match_operand:V8HI 1 "register_operand" "v")]
UNSPEC_VCVTPH2PS)
(parallel [(const_int 0) (const_int 1)
(const_int 2) (const_int 3)])))]
- "TARGET_F16C"
- "vcvtph2ps\t{%1, %0|%0, %1}"
+ "TARGET_F16C || TARGET_AVX512VL"
+ "vcvtph2ps\t{%1, %0<mask_operand2>|%0<mask_operand2>, %1}"
[(set_attr "type" "ssecvt")
- (set_attr "prefix" "vex")
+ (set_attr "prefix" "maybe_evex")
(set_attr "mode" "V4SF")])
-(define_insn "*vcvtph2ps_load"
- [(set (match_operand:V4SF 0 "register_operand" "=x")
+(define_insn "*vcvtph2ps_load<mask_name>"
+ [(set (match_operand:V4SF 0 "register_operand" "=v")
(unspec:V4SF [(match_operand:V4HI 1 "memory_operand" "m")]
UNSPEC_VCVTPH2PS))]
- "TARGET_F16C"
- "vcvtph2ps\t{%1, %0|%0, %1}"
+ "TARGET_F16C || TARGET_AVX512VL"
+ "vcvtph2ps\t{%1, %0<mask_operand2>|%0<mask_operand2>, %1}"
[(set_attr "type" "ssecvt")
(set_attr "prefix" "vex")
(set_attr "mode" "V8SF")])
-(define_insn "vcvtph2ps256"
- [(set (match_operand:V8SF 0 "register_operand" "=x")
- (unspec:V8SF [(match_operand:V8HI 1 "nonimmediate_operand" "xm")]
+(define_insn "vcvtph2ps256<mask_name>"
+ [(set (match_operand:V8SF 0 "register_operand" "=v")
+ (unspec:V8SF [(match_operand:V8HI 1 "nonimmediate_operand" "vm")]
UNSPEC_VCVTPH2PS))]
- "TARGET_F16C"
- "vcvtph2ps\t{%1, %0|%0, %1}"
+ "TARGET_F16C || TARGET_AVX512VL"
+ "vcvtph2ps\t{%1, %0<mask_operand2>|%0<mask_operand2>, %1}"
[(set_attr "type" "ssecvt")
(set_attr "prefix" "vex")
(set_attr "btver2_decode" "double")
@@ -15051,6 +16954,19 @@
(set_attr "prefix" "evex")
(set_attr "mode" "V16SF")])
+(define_expand "vcvtps2ph_mask"
+ [(set (match_operand:V8HI 0 "register_operand")
+ (vec_merge:V8HI
+ (vec_concat:V8HI
+ (unspec:V4HI [(match_operand:V4SF 1 "register_operand")
+ (match_operand:SI 2 "const_0_to_255_operand")]
+ UNSPEC_VCVTPS2PH)
+ (match_dup 5))
+ (match_operand:V8HI 3 "vector_move_operand")
+ (match_operand:QI 4 "register_operand")))]
+ "TARGET_AVX512VL"
+ "operands[5] = CONST0_RTX (V4HImode);")
+
(define_expand "vcvtps2ph"
[(set (match_operand:V8HI 0 "register_operand")
(vec_concat:V8HI
@@ -15061,39 +16977,39 @@
"TARGET_F16C"
"operands[3] = CONST0_RTX (V4HImode);")
-(define_insn "*vcvtps2ph"
- [(set (match_operand:V8HI 0 "register_operand" "=x")
+(define_insn "*vcvtps2ph<mask_name>"
+ [(set (match_operand:V8HI 0 "register_operand" "=v")
(vec_concat:V8HI
- (unspec:V4HI [(match_operand:V4SF 1 "register_operand" "x")
+ (unspec:V4HI [(match_operand:V4SF 1 "register_operand" "v")
(match_operand:SI 2 "const_0_to_255_operand" "N")]
UNSPEC_VCVTPS2PH)
(match_operand:V4HI 3 "const0_operand")))]
- "TARGET_F16C"
- "vcvtps2ph\t{%2, %1, %0|%0, %1, %2}"
+ "(TARGET_F16C && !<mask_applied>) || TARGET_AVX512VL"
+ "vcvtps2ph\t{%2, %1, %0<mask_operand4>|%0<mask_operand4>, %1, %2}"
[(set_attr "type" "ssecvt")
- (set_attr "prefix" "vex")
+ (set_attr "prefix" "maybe_evex")
(set_attr "mode" "V4SF")])
-(define_insn "*vcvtps2ph_store"
+(define_insn "*vcvtps2ph_store<mask_name>"
[(set (match_operand:V4HI 0 "memory_operand" "=m")
(unspec:V4HI [(match_operand:V4SF 1 "register_operand" "x")
(match_operand:SI 2 "const_0_to_255_operand" "N")]
UNSPEC_VCVTPS2PH))]
- "TARGET_F16C"
- "vcvtps2ph\t{%2, %1, %0|%0, %1, %2}"
+ "TARGET_F16C || TARGET_AVX512VL"
+ "vcvtps2ph\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}"
[(set_attr "type" "ssecvt")
- (set_attr "prefix" "vex")
+ (set_attr "prefix" "maybe_evex")
(set_attr "mode" "V4SF")])
-(define_insn "vcvtps2ph256"
+(define_insn "vcvtps2ph256<mask_name>"
[(set (match_operand:V8HI 0 "nonimmediate_operand" "=xm")
(unspec:V8HI [(match_operand:V8SF 1 "register_operand" "x")
(match_operand:SI 2 "const_0_to_255_operand" "N")]
UNSPEC_VCVTPS2PH))]
- "TARGET_F16C"
- "vcvtps2ph\t{%2, %1, %0|%0, %1, %2}"
+ "TARGET_F16C || TARGET_AVX512VL"
+ "vcvtps2ph\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}"
[(set_attr "type" "ssecvt")
- (set_attr "prefix" "vex")
+ (set_attr "prefix" "maybe_evex")
(set_attr "btver2_decode" "vector")
(set_attr "mode" "V8SF")])
@@ -15303,10 +17219,10 @@
(set_attr "prefix" "vex")
(set_attr "mode" "<sseinsnmode>")])
-(define_expand "avx512f_gathersi<mode>"
- [(parallel [(set (match_operand:VI48F_512 0 "register_operand")
- (unspec:VI48F_512
- [(match_operand:VI48F_512 1 "register_operand")
+(define_expand "<avx512>_gathersi<mode>"
+ [(parallel [(set (match_operand:VI48F 0 "register_operand")
+ (unspec:VI48F
+ [(match_operand:VI48F 1 "register_operand")
(match_operand:<avx512fmaskmode> 4 "register_operand")
(mem:<ssescalarmode>
(match_par_dup 6
@@ -15323,9 +17239,9 @@
})
(define_insn "*avx512f_gathersi<mode>"
- [(set (match_operand:VI48F_512 0 "register_operand" "=&v")
- (unspec:VI48F_512
- [(match_operand:VI48F_512 1 "register_operand" "0")
+ [(set (match_operand:VI48F 0 "register_operand" "=&v")
+ (unspec:VI48F
+ [(match_operand:VI48F 1 "register_operand" "0")
(match_operand:<avx512fmaskmode> 7 "register_operand" "2")
(match_operator:<ssescalarmode> 6 "vsib_mem_operator"
[(unspec:P
@@ -15342,8 +17258,8 @@
(set_attr "mode" "<sseinsnmode>")])
(define_insn "*avx512f_gathersi<mode>_2"
- [(set (match_operand:VI48F_512 0 "register_operand" "=&v")
- (unspec:VI48F_512
+ [(set (match_operand:VI48F 0 "register_operand" "=&v")
+ (unspec:VI48F
[(pc)
(match_operand:<avx512fmaskmode> 6 "register_operand" "1")
(match_operator:<ssescalarmode> 5 "vsib_mem_operator"
@@ -15361,9 +17277,9 @@
(set_attr "mode" "<sseinsnmode>")])
-(define_expand "avx512f_gatherdi<mode>"
- [(parallel [(set (match_operand:VI48F_512 0 "register_operand")
- (unspec:VI48F_512
+(define_expand "<avx512>_gatherdi<mode>"
+ [(parallel [(set (match_operand:VI48F 0 "register_operand")
+ (unspec:VI48F
[(match_operand:<VEC_GATHER_SRCDI> 1 "register_operand")
(match_operand:QI 4 "register_operand")
(mem:<ssescalarmode>
@@ -15381,8 +17297,8 @@
})
(define_insn "*avx512f_gatherdi<mode>"
- [(set (match_operand:VI48F_512 0 "register_operand" "=&v")
- (unspec:VI48F_512
+ [(set (match_operand:VI48F 0 "register_operand" "=&v")
+ (unspec:VI48F
[(match_operand:<VEC_GATHER_SRCDI> 1 "register_operand" "0")
(match_operand:QI 7 "register_operand" "2")
(match_operator:<ssescalarmode> 6 "vsib_mem_operator"
@@ -15400,8 +17316,8 @@
(set_attr "mode" "<sseinsnmode>")])
(define_insn "*avx512f_gatherdi<mode>_2"
- [(set (match_operand:VI48F_512 0 "register_operand" "=&v")
- (unspec:VI48F_512
+ [(set (match_operand:VI48F 0 "register_operand" "=&v")
+ (unspec:VI48F
[(pc)
(match_operand:QI 6 "register_operand" "1")
(match_operator:<ssescalarmode> 5 "vsib_mem_operator"
@@ -15415,22 +17331,27 @@
"TARGET_AVX512F"
{
if (<MODE>mode != <VEC_GATHER_SRCDI>mode)
- return "v<sseintprefix>gatherq<ssemodesuffix>\t{%5, %t0%{%1%}|%t0%{%1%}, %g5}";
+ {
+ if (GET_MODE_SIZE (<MODE>mode) != 64)
+ return "v<sseintprefix>gatherq<ssemodesuffix>\t{%5, %x0%{%1%}|%t0%{%1%}, %g5}";
+ else
+ return "v<sseintprefix>gatherq<ssemodesuffix>\t{%5, %t0%{%1%}|%t0%{%1%}, %g5}";
+ }
return "v<sseintprefix>gatherq<ssemodesuffix>\t{%5, %0%{%1%}|%0%{%1%}, %g5}";
}
[(set_attr "type" "ssemov")
(set_attr "prefix" "evex")
(set_attr "mode" "<sseinsnmode>")])
-(define_expand "avx512f_scattersi<mode>"
- [(parallel [(set (mem:VI48F_512
+(define_expand "<avx512>_scattersi<mode>"
+ [(parallel [(set (mem:VI48F
(match_par_dup 5
[(match_operand 0 "vsib_address_operand")
(match_operand:<VEC_GATHER_IDXSI> 2 "register_operand")
(match_operand:SI 4 "const1248_operand")]))
- (unspec:VI48F_512
+ (unspec:VI48F
[(match_operand:<avx512fmaskmode> 1 "register_operand")
- (match_operand:VI48F_512 3 "register_operand")]
+ (match_operand:VI48F 3 "register_operand")]
UNSPEC_SCATTER))
(clobber (match_scratch:<avx512fmaskmode> 6))])]
"TARGET_AVX512F"
@@ -15441,15 +17362,15 @@
})
(define_insn "*avx512f_scattersi<mode>"
- [(set (match_operator:VI48F_512 5 "vsib_mem_operator"
+ [(set (match_operator:VI48F 5 "vsib_mem_operator"
[(unspec:P
[(match_operand:P 0 "vsib_address_operand" "Tv")
(match_operand:<VEC_GATHER_IDXSI> 2 "register_operand" "v")
(match_operand:SI 4 "const1248_operand" "n")]
UNSPEC_VSIBADDR)])
- (unspec:VI48F_512
+ (unspec:VI48F
[(match_operand:<avx512fmaskmode> 6 "register_operand" "1")
- (match_operand:VI48F_512 3 "register_operand" "v")]
+ (match_operand:VI48F 3 "register_operand" "v")]
UNSPEC_SCATTER))
(clobber (match_scratch:<avx512fmaskmode> 1 "=&Yk"))]
"TARGET_AVX512F"
@@ -15458,13 +17379,13 @@
(set_attr "prefix" "evex")
(set_attr "mode" "<sseinsnmode>")])
-(define_expand "avx512f_scatterdi<mode>"
- [(parallel [(set (mem:VI48F_512
+(define_expand "<avx512>_scatterdi<mode>"
+ [(parallel [(set (mem:VI48F
(match_par_dup 5
[(match_operand 0 "vsib_address_operand")
- (match_operand:V8DI 2 "register_operand")
+ (match_operand:<VEC_GATHER_IDXDI> 2 "register_operand")
(match_operand:SI 4 "const1248_operand")]))
- (unspec:VI48F_512
+ (unspec:VI48F
[(match_operand:QI 1 "register_operand")
(match_operand:<VEC_GATHER_SRCDI> 3 "register_operand")]
UNSPEC_SCATTER))
@@ -15477,13 +17398,13 @@
})
(define_insn "*avx512f_scatterdi<mode>"
- [(set (match_operator:VI48F_512 5 "vsib_mem_operator"
+ [(set (match_operator:VI48F 5 "vsib_mem_operator"
[(unspec:P
[(match_operand:P 0 "vsib_address_operand" "Tv")
- (match_operand:V8DI 2 "register_operand" "v")
+ (match_operand:<VEC_GATHER_IDXDI> 2 "register_operand" "v")
(match_operand:SI 4 "const1248_operand" "n")]
UNSPEC_VSIBADDR)])
- (unspec:VI48F_512
+ (unspec:VI48F
[(match_operand:QI 6 "register_operand" "1")
(match_operand:<VEC_GATHER_SRCDI> 3 "register_operand" "v")]
UNSPEC_SCATTER))
@@ -15494,11 +17415,11 @@
(set_attr "prefix" "evex")
(set_attr "mode" "<sseinsnmode>")])
-(define_insn "avx512f_compress<mode>_mask"
- [(set (match_operand:VI48F_512 0 "register_operand" "=v")
- (unspec:VI48F_512
- [(match_operand:VI48F_512 1 "register_operand" "v")
- (match_operand:VI48F_512 2 "vector_move_operand" "0C")
+(define_insn "<avx512>_compress<mode>_mask"
+ [(set (match_operand:VI48F 0 "register_operand" "=v")
+ (unspec:VI48F
+ [(match_operand:VI48F 1 "register_operand" "v")
+ (match_operand:VI48F 2 "vector_move_operand" "0C")
(match_operand:<avx512fmaskmode> 3 "register_operand" "Yk")]
UNSPEC_COMPRESS))]
"TARGET_AVX512F"
@@ -15507,10 +17428,10 @@
(set_attr "prefix" "evex")
(set_attr "mode" "<sseinsnmode>")])
-(define_insn "avx512f_compressstore<mode>_mask"
- [(set (match_operand:VI48F_512 0 "memory_operand" "=m")
- (unspec:VI48F_512
- [(match_operand:VI48F_512 1 "register_operand" "x")
+(define_insn "<avx512>_compressstore<mode>_mask"
+ [(set (match_operand:VI48F 0 "memory_operand" "=m")
+ (unspec:VI48F
+ [(match_operand:VI48F 1 "register_operand" "x")
(match_dup 0)
(match_operand:<avx512fmaskmode> 2 "register_operand" "Yk")]
UNSPEC_COMPRESS_STORE))]
@@ -15521,21 +17442,21 @@
(set_attr "memory" "store")
(set_attr "mode" "<sseinsnmode>")])
-(define_expand "avx512f_expand<mode>_maskz"
- [(set (match_operand:VI48F_512 0 "register_operand")
- (unspec:VI48F_512
- [(match_operand:VI48F_512 1 "nonimmediate_operand")
- (match_operand:VI48F_512 2 "vector_move_operand")
+(define_expand "<avx512>_expand<mode>_maskz"
+ [(set (match_operand:VI48F 0 "register_operand")
+ (unspec:VI48F
+ [(match_operand:VI48F 1 "nonimmediate_operand")
+ (match_operand:VI48F 2 "vector_move_operand")
(match_operand:<avx512fmaskmode> 3 "register_operand")]
UNSPEC_EXPAND))]
"TARGET_AVX512F"
"operands[2] = CONST0_RTX (<MODE>mode);")
-(define_insn "avx512f_expand<mode>_mask"
- [(set (match_operand:VI48F_512 0 "register_operand" "=v,v")
- (unspec:VI48F_512
- [(match_operand:VI48F_512 1 "nonimmediate_operand" "v,m")
- (match_operand:VI48F_512 2 "vector_move_operand" "0C,0C")
+(define_insn "<avx512>_expand<mode>_mask"
+ [(set (match_operand:VI48F 0 "register_operand" "=v,v")
+ (unspec:VI48F
+ [(match_operand:VI48F 1 "nonimmediate_operand" "v,m")
+ (match_operand:VI48F 2 "vector_move_operand" "0C,0C")
(match_operand:<avx512fmaskmode> 3 "register_operand" "Yk,Yk")]
UNSPEC_EXPAND))]
"TARGET_AVX512F"
@@ -15545,10 +17466,67 @@
(set_attr "memory" "none,load")
(set_attr "mode" "<sseinsnmode>")])
-(define_insn "avx512f_getmant<mode><mask_name><round_saeonly_name>"
- [(set (match_operand:VF_512 0 "register_operand" "=v")
- (unspec:VF_512
- [(match_operand:VF_512 1 "<round_saeonly_nimm_predicate>" "<round_saeonly_constraint>")
+(define_insn "avx512dq_rangep<mode><mask_name><round_saeonly_name>"
+ [(set (match_operand:VF_AVX512VL 0 "register_operand" "=v")
+ (unspec:VF_AVX512VL
+ [(match_operand:VF_AVX512VL 1 "register_operand" "v")
+ (match_operand:VF_AVX512VL 2 "<round_saeonly_nimm_predicate>" "<round_saeonly_constraint>")
+ (match_operand:SI 3 "const_0_to_15_operand")]
+ UNSPEC_RANGE))]
+ "TARGET_AVX512DQ && <round_saeonly_mode512bit_condition>"
+ "vrange<ssemodesuffix>\t{<round_saeonly_mask_op4>%3, %2, %1, %0<mask_operand4>|%0<mask_operand4>, %1, %2, %3<round_saeonly_mask_op4>}"
+ [(set_attr "type" "sse")
+ (set_attr "prefix" "evex")
+ (set_attr "mode" "<MODE>")])
+
+(define_insn "avx512dq_ranges<mode><round_saeonly_name>"
+ [(set (match_operand:VF_128 0 "register_operand" "=v")
+ (vec_merge:VF_128
+ (unspec:VF_128
+ [(match_operand:VF_128 1 "register_operand" "v")
+ (match_operand:VF_128 2 "<round_saeonly_nimm_predicate>" "<round_saeonly_constraint>")
+ (match_operand:SI 3 "const_0_to_15_operand")]
+ UNSPEC_RANGE_SCALAR)
+ (match_dup 1)
+ (const_int 1)))]
+ "TARGET_AVX512DQ"
+ "vrange<ssescalarmodesuffix>\t{<round_saeonly_op4>%3, %2, %1, %0|%0, %1, %2, %3<round_saeonly_op4>}"
+ [(set_attr "type" "sse")
+ (set_attr "prefix" "evex")
+ (set_attr "mode" "<MODE>")])
+
+(define_insn "avx512dq_fpclass<mode><mask_scalar_merge_name>"
+ [(set (match_operand:<avx512fmaskmode> 0 "register_operand" "=Yk")
+ (unspec:<avx512fmaskmode>
+ [(match_operand:VF_AVX512VL 1 "register_operand" "v")
+ (match_operand:QI 2 "const_0_to_255_operand" "n")]
+ UNSPEC_FPCLASS))]
+ "TARGET_AVX512DQ"
+ "vfpclass<ssemodesuffix>\t{%2, %1, %0<mask_scalar_merge_operand3>|%0<mask_scalar_merge_operand3>, %1, %2}";
+ [(set_attr "type" "sse")
+ (set_attr "length_immediate" "1")
+ (set_attr "prefix" "evex")
+ (set_attr "mode" "<MODE>")])
+
+(define_insn "avx512dq_vmfpclass<mode>"
+ [(set (match_operand:<avx512fmaskmode> 0 "register_operand" "=Yk")
+ (and:<avx512fmaskmode>
+ (unspec:<avx512fmaskmode>
+ [(match_operand:VF_128 1 "register_operand" "v")
+ (match_operand:QI 2 "const_0_to_255_operand" "n")]
+ UNSPEC_FPCLASS_SCALAR)
+ (const_int 1)))]
+ "TARGET_AVX512DQ"
+ "vfpclass<ssescalarmodesuffix>\t{%2, %1, %0|%0, %1, %2}";
+ [(set_attr "type" "sse")
+ (set_attr "length_immediate" "1")
+ (set_attr "prefix" "evex")
+ (set_attr "mode" "<MODE>")])
+
+(define_insn "<avx512>_getmant<mode><mask_name><round_saeonly_name>"
+ [(set (match_operand:VF_AVX512VL 0 "register_operand" "=v")
+ (unspec:VF_AVX512VL
+ [(match_operand:VF_AVX512VL 1 "nonimmediate_operand" "<round_saeonly_constraint>")
(match_operand:SI 2 "const_0_to_15_operand")]
UNSPEC_GETMANT))]
"TARGET_AVX512F"
@@ -15556,7 +17534,7 @@
[(set_attr "prefix" "evex")
(set_attr "mode" "<MODE>")])
-(define_insn "avx512f_getmant<mode><round_saeonly_name>"
+(define_insn "avx512f_vgetmant<mode><round_saeonly_name>"
[(set (match_operand:VF_128 0 "register_operand" "=v")
(vec_merge:VF_128
(unspec:VF_128
@@ -15571,10 +17549,25 @@
[(set_attr "prefix" "evex")
(set_attr "mode" "<ssescalarmode>")])
+(define_insn "<mask_codefor>avx512bw_dbpsadbw<mode><mask_name>"
+ [(set (match_operand:VI2_AVX512F 0 "register_operand" "=v")
+ (unspec:VI2_AVX512F
+ [(match_operand:<dbpsadbwmode> 1 "register_operand" "v")
+ (match_operand:<dbpsadbwmode> 2 "nonimmediate_operand" "vm")
+ (match_operand:SI 3 "const_0_to_255_operand")]
+ UNSPEC_DBPSADBW))]
+ "TARGET_AVX512BW && <mask_mode512bit_condition>"
+ "vdbpsadbw\t{%3, %2, %1, %0<mask_operand4>|%0<mask_operand4>, %1, %2, %3}"
+ [(set_attr "isa" "avx")
+ (set_attr "type" "sselog1")
+ (set_attr "length_immediate" "1")
+ (set_attr "prefix" "evex")
+ (set_attr "mode" "<sseinsnmode>")])
+
(define_insn "clz<mode>2<mask_name>"
- [(set (match_operand:VI48_512 0 "register_operand" "=v")
- (clz:VI48_512
- (match_operand:VI48_512 1 "nonimmediate_operand" "vm")))]
+ [(set (match_operand:VI48_AVX512VL 0 "register_operand" "=v")
+ (clz:VI48_AVX512VL
+ (match_operand:VI48_AVX512VL 1 "nonimmediate_operand" "vm")))]
"TARGET_AVX512CD"
"vplzcnt<ssemodesuffix>\t{%1, %0<mask_operand2>|%0<mask_operand2>, %1}"
[(set_attr "type" "sse")
@@ -15582,9 +17575,9 @@
(set_attr "mode" "<sseinsnmode>")])
(define_insn "<mask_codefor>conflict<mode><mask_name>"
- [(set (match_operand:VI48_512 0 "register_operand" "=v")
- (unspec:VI48_512
- [(match_operand:VI48_512 1 "nonimmediate_operand" "vm")]
+ [(set (match_operand:VI48_AVX512VL 0 "register_operand" "=v")
+ (unspec:VI48_AVX512VL
+ [(match_operand:VI48_AVX512VL 1 "nonimmediate_operand" "vm")]
UNSPEC_CONFLICT))]
"TARGET_AVX512CD"
"vpconflict<ssemodesuffix>\t{%1, %0<mask_operand2>|%0<mask_operand2>, %1}"
diff --git a/gcc/config/i386/subst.md b/gcc/config/i386/subst.md
index 1654cbae6ab..8826533cf33 100644
--- a/gcc/config/i386/subst.md
+++ b/gcc/config/i386/subst.md
@@ -20,8 +20,8 @@
;; Some iterators for extending subst as much as possible
;; All vectors (Use it for destination)
(define_mode_iterator SUBST_V
- [V16QI
- V16HI V8HI
+ [V64QI V32QI V16QI
+ V32HI V16HI V8HI
V16SI V8SI V4SI
V8DI V4DI V2DI
V16SF V8SF V4SF
@@ -31,8 +31,8 @@
[QI HI SI DI])
(define_mode_iterator SUBST_A
- [V16QI
- V16HI V8HI
+ [V64QI V32QI V16QI
+ V32HI V16HI V8HI
V16SI V8SI V4SI
V8DI V4DI V2DI
V16SF V8SF V4SF
@@ -47,16 +47,20 @@
(define_subst_attr "mask_operand3_1" "mask" "" "%%{%%4%%}%%N3") ;; for sprintf
(define_subst_attr "mask_operand4" "mask" "" "%{%5%}%N4")
(define_subst_attr "mask_operand6" "mask" "" "%{%7%}%N6")
+(define_subst_attr "mask_operand7" "mask" "" "%{%8%}%N7")
+(define_subst_attr "mask_operand10" "mask" "" "%{%11%}%N10")
(define_subst_attr "mask_operand11" "mask" "" "%{%12%}%N11")
(define_subst_attr "mask_operand18" "mask" "" "%{%19%}%N18")
(define_subst_attr "mask_operand19" "mask" "" "%{%20%}%N19")
(define_subst_attr "mask_codefor" "mask" "*" "")
-(define_subst_attr "mask_mode512bit_condition" "mask" "1" "(<MODE_SIZE> == 64)")
+(define_subst_attr "mask_operand_arg34" "mask" "" ", operands[3], operands[4]")
+(define_subst_attr "mask_mode512bit_condition" "mask" "1" "(GET_MODE_SIZE (GET_MODE (operands[0])) == 64 || TARGET_AVX512VL)")
(define_subst_attr "store_mask_constraint" "mask" "vm" "v")
(define_subst_attr "store_mask_predicate" "mask" "nonimmediate_operand" "register_operand")
(define_subst_attr "mask_prefix" "mask" "vex" "evex")
(define_subst_attr "mask_prefix2" "mask" "maybe_vex" "evex")
(define_subst_attr "mask_prefix3" "mask" "orig,vex" "evex")
+(define_subst_attr "mask_expand_op3" "mask" "3" "5")
(define_subst "mask"
[(set (match_operand:SUBST_V 0)
@@ -85,7 +89,7 @@
(define_subst_attr "sd_mask_op4" "sd" "" "%{%5%}%N4")
(define_subst_attr "sd_mask_op5" "sd" "" "%{%6%}%N5")
(define_subst_attr "sd_mask_codefor" "sd" "*" "")
-(define_subst_attr "sd_mask_mode512bit_condition" "sd" "1" "(<MODE_SIZE> == 64)")
+(define_subst_attr "sd_mask_mode512bit_condition" "sd" "1" "(<MODE_SIZE> == 64 || TARGET_AVX512VL)")
(define_subst "sd"
[(set (match_operand:SUBST_V 0)
@@ -101,6 +105,7 @@
(define_subst_attr "round_name" "round" "" "_round")
(define_subst_attr "round_mask_operand2" "mask" "%R2" "%R4")
(define_subst_attr "round_mask_operand3" "mask" "%R3" "%R5")
+(define_subst_attr "round_mask_operand4" "mask" "%R4" "%R6")
(define_subst_attr "round_sd_mask_operand4" "sd" "%R4" "%R6")
(define_subst_attr "round_op2" "round" "" "%R2")
(define_subst_attr "round_op3" "round" "" "%R3")
@@ -109,15 +114,19 @@
(define_subst_attr "round_op6" "round" "" "%R6")
(define_subst_attr "round_mask_op2" "round" "" "<round_mask_operand2>")
(define_subst_attr "round_mask_op3" "round" "" "<round_mask_operand3>")
-(define_subst_attr "round_mask_scalar_op3" "round" "" "<round_mask_scalar_operand3>")
+(define_subst_attr "round_mask_op4" "round" "" "<round_mask_operand4>")
(define_subst_attr "round_sd_mask_op4" "round" "" "<round_sd_mask_operand4>")
(define_subst_attr "round_constraint" "round" "vm" "v")
(define_subst_attr "round_constraint2" "round" "m" "v")
(define_subst_attr "round_constraint3" "round" "rm" "r")
(define_subst_attr "round_nimm_predicate" "round" "nonimmediate_operand" "register_operand")
(define_subst_attr "round_prefix" "round" "vex" "evex")
-(define_subst_attr "round_mode512bit_condition" "round" "1" "(<MODE>mode == V16SFmode || <MODE>mode == V8DFmode)")
-(define_subst_attr "round_modev4sf_condition" "round" "1" "(<MODE>mode == V4SFmode)")
+(define_subst_attr "round_mode512bit_condition" "round" "1" "(GET_MODE (operands[0]) == V16SFmode
+ || GET_MODE (operands[0]) == V8DFmode
+ || GET_MODE (operands[0]) == V8DImode
+ || GET_MODE (operands[0]) == V16SImode)")
+(define_subst_attr "round_modev8sf_condition" "round" "1" "(GET_MODE (operands[0]) == V8SFmode)")
+(define_subst_attr "round_modev4sf_condition" "round" "1" "(GET_MODE (operands[0]) == V4SFmode)")
(define_subst_attr "round_codefor" "round" "*" "")
(define_subst_attr "round_opnum" "round" "5" "6")
@@ -133,6 +142,7 @@
(define_subst_attr "round_saeonly_name" "round_saeonly" "" "_round")
(define_subst_attr "round_saeonly_mask_operand2" "mask" "%r2" "%r4")
(define_subst_attr "round_saeonly_mask_operand3" "mask" "%r3" "%r5")
+(define_subst_attr "round_saeonly_mask_operand4" "mask" "%r4" "%r6")
(define_subst_attr "round_saeonly_mask_scalar_merge_operand4" "mask_scalar_merge" "%r4" "%r5")
(define_subst_attr "round_saeonly_sd_mask_operand5" "sd" "%r5" "%r7")
(define_subst_attr "round_saeonly_op2" "round_saeonly" "" "%r2")
@@ -143,12 +153,17 @@
(define_subst_attr "round_saeonly_prefix" "round_saeonly" "vex" "evex")
(define_subst_attr "round_saeonly_mask_op2" "round_saeonly" "" "<round_saeonly_mask_operand2>")
(define_subst_attr "round_saeonly_mask_op3" "round_saeonly" "" "<round_saeonly_mask_operand3>")
+(define_subst_attr "round_saeonly_mask_op4" "round_saeonly" "" "<round_saeonly_mask_operand4>")
(define_subst_attr "round_saeonly_mask_scalar_merge_op4" "round_saeonly" "" "<round_saeonly_mask_scalar_merge_operand4>")
(define_subst_attr "round_saeonly_sd_mask_op5" "round_saeonly" "" "<round_saeonly_sd_mask_operand5>")
(define_subst_attr "round_saeonly_constraint" "round_saeonly" "vm" "v")
(define_subst_attr "round_saeonly_constraint2" "round_saeonly" "m" "v")
(define_subst_attr "round_saeonly_nimm_predicate" "round_saeonly" "nonimmediate_operand" "register_operand")
-(define_subst_attr "round_saeonly_mode512bit_condition" "round_saeonly" "1" "(<MODE>mode == V16SFmode || <MODE>mode == V8DFmode)")
+(define_subst_attr "round_saeonly_mode512bit_condition" "round_saeonly" "1" "(<MODE>mode == V16SFmode
+ || <MODE>mode == V8DFmode
+ || <MODE>mode == V8DImode
+ || <MODE>mode == V16SImode)")
+(define_subst_attr "round_saeonly_modev8sf_condition" "round_saeonly" "1" "(<MODE>mode == V8SFmode)")
(define_subst "round_saeonly"
[(set (match_operand:SUBST_A 0)
@@ -196,3 +211,19 @@
(match_dup 4)
(match_dup 5)
(unspec [(match_operand:SI 6 "const48_operand")] UNSPEC_EMBEDDED_ROUNDING)])
+
+(define_subst_attr "mask_expand4_name" "mask_expand4" "" "_mask")
+(define_subst_attr "mask_expand4_args" "mask_expand4" "" ", operands[4], operands[5]")
+
+(define_subst "mask_expand4"
+ [(match_operand:SUBST_V 0)
+ (match_operand:SUBST_V 1)
+ (match_operand:SUBST_V 2)
+ (match_operand:SI 3)]
+ "TARGET_AVX512VL"
+ [(match_dup 0)
+ (match_dup 1)
+ (match_dup 2)
+ (match_dup 3)
+ (match_operand:SUBST_V 4 "vector_move_operand")
+ (match_operand:<avx512fmaskmode> 5 "register_operand")])