aboutsummaryrefslogtreecommitdiff
path: root/gcc/config/i386/i386.c
diff options
context:
space:
mode:
Diffstat (limited to 'gcc/config/i386/i386.c')
-rw-r--r--gcc/config/i386/i386.c2848
1 files changed, 2696 insertions, 152 deletions
diff --git a/gcc/config/i386/i386.c b/gcc/config/i386/i386.c
index 0c546af00b2..73854fc9beb 100644
--- a/gcc/config/i386/i386.c
+++ b/gcc/config/i386/i386.c
@@ -2188,6 +2188,14 @@ enum reg_class const regclass_map[FIRST_PSEUDO_REGISTER] =
/* SSE REX registers */
SSE_REGS, SSE_REGS, SSE_REGS, SSE_REGS, SSE_REGS, SSE_REGS,
SSE_REGS, SSE_REGS,
+ /* New 16 SSE registers */
+ EVEX_SSE_REGS, EVEX_SSE_REGS, EVEX_SSE_REGS, EVEX_SSE_REGS,
+ EVEX_SSE_REGS, EVEX_SSE_REGS, EVEX_SSE_REGS, EVEX_SSE_REGS,
+ EVEX_SSE_REGS, EVEX_SSE_REGS, EVEX_SSE_REGS, EVEX_SSE_REGS,
+ EVEX_SSE_REGS, EVEX_SSE_REGS, EVEX_SSE_REGS, EVEX_SSE_REGS,
+ /* Mask registers. */
+ MASK_REGS, MASK_EVEX_REGS, MASK_EVEX_REGS, MASK_EVEX_REGS,
+ MASK_EVEX_REGS, MASK_EVEX_REGS, MASK_EVEX_REGS, MASK_EVEX_REGS,
};
/* The "default" register map used in 32bit mode. */
@@ -2201,6 +2209,9 @@ int const dbx_register_map[FIRST_PSEUDO_REGISTER] =
29, 30, 31, 32, 33, 34, 35, 36, /* MMX */
-1, -1, -1, -1, -1, -1, -1, -1, /* extended integer registers */
-1, -1, -1, -1, -1, -1, -1, -1, /* extended SSE registers */
+ -1, -1, -1, -1, -1, -1, -1, -1, /* new SSE registers 16-23*/
+ -1, -1, -1, -1, -1, -1, -1, -1, /* new SSE registers 24-31*/
+ 93, 94, 95, 96, 97, 98, 99, 100, /* Mask registers */
};
/* The "default" register map used in 64bit mode. */
@@ -2214,6 +2225,9 @@ int const dbx64_register_map[FIRST_PSEUDO_REGISTER] =
41, 42, 43, 44, 45, 46, 47, 48, /* MMX */
8,9,10,11,12,13,14,15, /* extended integer registers */
25, 26, 27, 28, 29, 30, 31, 32, /* extended SSE registers */
+ 67, 68, 69, 70, 71, 72, 73, 74, /* new SSE registers 16-23 */
+ 75, 76, 77, 78, 79, 80, 81, 82, /* new SSE registers 24-31 */
+ 118, 119, 120, 121, 122, 123, 124, 125, /* Mask registers */
};
/* Define the register numbers to be used in Dwarf debugging information.
@@ -2279,6 +2293,9 @@ int const svr4_dbx_register_map[FIRST_PSEUDO_REGISTER] =
29, 30, 31, 32, 33, 34, 35, 36, /* MMX registers */
-1, -1, -1, -1, -1, -1, -1, -1, /* extended integer registers */
-1, -1, -1, -1, -1, -1, -1, -1, /* extended SSE registers */
+ -1, -1, -1, -1, -1, -1, -1, -1, /* new SSE registers 16-23*/
+ -1, -1, -1, -1, -1, -1, -1, -1, /* new SSE registers 24-31*/
+ 93, 94, 95, 96, 97, 98, 99, 100, /* Mask registers */
};
/* Define parameter passing and return registers. */
@@ -2439,7 +2456,7 @@ enum x86_64_reg_class
X86_64_MEMORY_CLASS
};
-#define MAX_CLASSES 4
+#define MAX_CLASSES 8
/* Table of constants used by fldpi, fldln2, etc.... */
static REAL_VALUE_TYPE ext_80387_constants_table [5];
@@ -2578,7 +2595,7 @@ static const char *const cpu_names[TARGET_CPU_DEFAULT_max] =
static bool
gate_insert_vzeroupper (void)
{
- return TARGET_AVX && TARGET_VZEROUPPER;
+ return TARGET_AVX && !TARGET_AVX512F && TARGET_VZEROUPPER;
}
static unsigned int
@@ -2651,6 +2668,10 @@ ix86_target_string (HOST_WIDE_INT isa, int flags, const char *arch,
{ "-mfma", OPTION_MASK_ISA_FMA },
{ "-mxop", OPTION_MASK_ISA_XOP },
{ "-mlwp", OPTION_MASK_ISA_LWP },
+ { "-mavx512f", OPTION_MASK_ISA_AVX512F },
+ { "-mavx512er", OPTION_MASK_ISA_AVX512ER },
+ { "-mavx512cd", OPTION_MASK_ISA_AVX512CD },
+ { "-mavx512pf", OPTION_MASK_ISA_AVX512PF },
{ "-msse4a", OPTION_MASK_ISA_SSE4A },
{ "-msse4.2", OPTION_MASK_ISA_SSE4_2 },
{ "-msse4.1", OPTION_MASK_ISA_SSE4_1 },
@@ -2675,6 +2696,7 @@ ix86_target_string (HOST_WIDE_INT isa, int flags, const char *arch,
{ "-mmovbe", OPTION_MASK_ISA_MOVBE },
{ "-mcrc32", OPTION_MASK_ISA_CRC32 },
{ "-maes", OPTION_MASK_ISA_AES },
+ { "-msha", OPTION_MASK_ISA_SHA },
{ "-mpclmul", OPTION_MASK_ISA_PCLMUL },
{ "-mfsgsbase", OPTION_MASK_ISA_FSGSBASE },
{ "-mrdrnd", OPTION_MASK_ISA_RDRND },
@@ -2955,6 +2977,11 @@ ix86_option_override_internal (bool main_args_p)
#define PTA_FXSR (HOST_WIDE_INT_1 << 37)
#define PTA_XSAVE (HOST_WIDE_INT_1 << 38)
#define PTA_XSAVEOPT (HOST_WIDE_INT_1 << 39)
+#define PTA_AVX512F (HOST_WIDE_INT_1 << 40)
+#define PTA_AVX512ER (HOST_WIDE_INT_1 << 41)
+#define PTA_AVX512PF (HOST_WIDE_INT_1 << 42)
+#define PTA_AVX512CD (HOST_WIDE_INT_1 << 43)
+#define PTA_SHA (HOST_WIDE_INT_1 << 44)
/* if this reaches 64, need to widen struct pta flags below */
@@ -3440,6 +3467,9 @@ ix86_option_override_internal (bool main_args_p)
if (processor_alias_table[i].flags & PTA_AES
&& !(ix86_isa_flags_explicit & OPTION_MASK_ISA_AES))
ix86_isa_flags |= OPTION_MASK_ISA_AES;
+ if (processor_alias_table[i].flags & PTA_SHA
+ && !(ix86_isa_flags_explicit & OPTION_MASK_ISA_SHA))
+ ix86_isa_flags |= OPTION_MASK_ISA_SHA;
if (processor_alias_table[i].flags & PTA_PCLMUL
&& !(ix86_isa_flags_explicit & OPTION_MASK_ISA_PCLMUL))
ix86_isa_flags |= OPTION_MASK_ISA_PCLMUL;
@@ -3476,6 +3506,18 @@ ix86_option_override_internal (bool main_args_p)
if (processor_alias_table[i].flags & PTA_XSAVEOPT
&& !(ix86_isa_flags_explicit & OPTION_MASK_ISA_XSAVEOPT))
ix86_isa_flags |= OPTION_MASK_ISA_XSAVEOPT;
+ if (processor_alias_table[i].flags & PTA_AVX512F
+ && !(ix86_isa_flags_explicit & OPTION_MASK_ISA_AVX512F))
+ ix86_isa_flags |= OPTION_MASK_ISA_AVX512F;
+ if (processor_alias_table[i].flags & PTA_AVX512ER
+ && !(ix86_isa_flags_explicit & OPTION_MASK_ISA_AVX512ER))
+ ix86_isa_flags |= OPTION_MASK_ISA_AVX512ER;
+ if (processor_alias_table[i].flags & PTA_AVX512PF
+ && !(ix86_isa_flags_explicit & OPTION_MASK_ISA_AVX512PF))
+ ix86_isa_flags |= OPTION_MASK_ISA_AVX512PF;
+ if (processor_alias_table[i].flags & PTA_AVX512CD
+ && !(ix86_isa_flags_explicit & OPTION_MASK_ISA_AVX512CD))
+ ix86_isa_flags |= OPTION_MASK_ISA_AVX512CD;
if (processor_alias_table[i].flags & (PTA_PREFETCH_SSE | PTA_SSE))
x86_prefetch_sse = true;
@@ -3940,22 +3982,22 @@ ix86_option_override_internal (bool main_args_p)
TARGET_AVX with -fexpensive-optimizations and split 32-byte
AVX unaligned load/store. */
if (!optimize_size)
- {
- if (flag_expensive_optimizations
- && !(target_flags_explicit & MASK_VZEROUPPER))
+ {
+ if (flag_expensive_optimizations
+ && !(target_flags_explicit & MASK_VZEROUPPER))
target_flags |= MASK_VZEROUPPER;
- if ((x86_avx256_split_unaligned_load & ix86_tune_mask)
- && !(target_flags_explicit & MASK_AVX256_SPLIT_UNALIGNED_LOAD))
+ if ((x86_avx256_split_unaligned_load & ix86_tune_mask)
+ && !(target_flags_explicit & MASK_AVX256_SPLIT_UNALIGNED_LOAD))
target_flags |= MASK_AVX256_SPLIT_UNALIGNED_LOAD;
- if ((x86_avx256_split_unaligned_store & ix86_tune_mask)
- && !(target_flags_explicit & MASK_AVX256_SPLIT_UNALIGNED_STORE))
+ if ((x86_avx256_split_unaligned_store & ix86_tune_mask)
+ && !(target_flags_explicit & MASK_AVX256_SPLIT_UNALIGNED_STORE))
target_flags |= MASK_AVX256_SPLIT_UNALIGNED_STORE;
- /* Enable 128-bit AVX instruction generation
- for the auto-vectorizer. */
- if (TARGET_AVX128_OPTIMAL
- && !(target_flags_explicit & MASK_PREFER_AVX128))
+ /* Enable 128-bit AVX instruction generation
+ for the auto-vectorizer. */
+ if (TARGET_AVX128_OPTIMAL
+ && !(target_flags_explicit & MASK_PREFER_AVX128))
target_flags |= MASK_PREFER_AVX128;
- }
+ }
if (ix86_recip_name)
{
@@ -4060,6 +4102,8 @@ ix86_conditional_register_usage (void)
fixed_regs[i] = call_used_regs[i] = 1, reg_names[i] = "";
for (i = FIRST_REX_SSE_REG; i <= LAST_REX_SSE_REG; i++)
fixed_regs[i] = call_used_regs[i] = 1, reg_names[i] = "";
+ for (i = FIRST_EXT_REX_SSE_REG; i <= LAST_EXT_REX_SSE_REG; i++)
+ fixed_regs[i] = call_used_regs[i] = 1, reg_names[i] = "";
}
/* See the definition of CALL_USED_REGISTERS in i386.h. */
@@ -4100,6 +4144,13 @@ ix86_conditional_register_usage (void)
for (i = 0; i < FIRST_PSEUDO_REGISTER; i++)
if (TEST_HARD_REG_BIT (reg_class_contents[(int)FLOAT_REGS], i))
fixed_regs[i] = call_used_regs[i] = 1, reg_names[i] = "";
+
+ /* If AVX512F is disabled, squash the registers. */
+ if (! TARGET_AVX512F)
+ for (i = 0; i < FIRST_PSEUDO_REGISTER; i++)
+ if (TEST_HARD_REG_BIT (reg_class_contents[(int)MASK_REGS], i)
+ || TEST_HARD_REG_BIT (reg_class_contents[(int)EVEX_SSE_REGS], i))
+ fixed_regs[i] = call_used_regs[i] = 1, reg_names[i] = "";
}
@@ -4242,8 +4293,13 @@ ix86_valid_target_attribute_inner_p (tree args, char *p_strings[],
IX86_ATTR_ISA ("lzcnt", OPT_mlzcnt),
IX86_ATTR_ISA ("tbm", OPT_mtbm),
IX86_ATTR_ISA ("aes", OPT_maes),
+ IX86_ATTR_ISA ("sha", OPT_msha),
IX86_ATTR_ISA ("avx", OPT_mavx),
IX86_ATTR_ISA ("avx2", OPT_mavx2),
+ IX86_ATTR_ISA ("avx512f", OPT_mavx512f),
+ IX86_ATTR_ISA ("avx512pf", OPT_mavx512pf),
+ IX86_ATTR_ISA ("avx512er", OPT_mavx512er),
+ IX86_ATTR_ISA ("avx512cd", OPT_mavx512cd),
IX86_ATTR_ISA ("mmx", OPT_mmmx),
IX86_ATTR_ISA ("pclmul", OPT_mpclmul),
IX86_ATTR_ISA ("popcnt", OPT_mpopcnt),
@@ -6014,7 +6070,7 @@ merge_classes (enum x86_64_reg_class class1, enum x86_64_reg_class class2)
sized containers, classes[0] will be NO_CLASS and 1 is returned.
BIT_OFFSET is used internally for handling records and specifies offset
- of the offset in bits modulo 256 to avoid overflow cases.
+ of the offset in bits modulo 512 to avoid overflow cases.
See the x86-64 PS ABI for details.
*/
@@ -6042,8 +6098,8 @@ classify_argument (enum machine_mode mode, const_tree type,
tree field;
enum x86_64_reg_class subclasses[MAX_CLASSES];
- /* On x86-64 we pass structures larger than 32 bytes on the stack. */
- if (bytes > 32)
+ /* On x86-64 we pass structures larger than 64 bytes on the stack. */
+ if (bytes > 64)
return 0;
for (i = 0; i < words; i++)
@@ -6114,7 +6170,7 @@ classify_argument (enum machine_mode mode, const_tree type,
num = classify_argument (TYPE_MODE (type), type,
subclasses,
(int_bit_position (field)
- + bit_offset) % 256);
+ + bit_offset) % 512);
if (!num)
return 0;
pos = (int_bit_position (field)
@@ -6364,6 +6420,21 @@ classify_argument (enum machine_mode mode, const_tree type,
classes[2] = X86_64_SSEUP_CLASS;
classes[3] = X86_64_SSEUP_CLASS;
return 4;
+ case V8DFmode:
+ case V16SFmode:
+ case V8DImode:
+ case V16SImode:
+ case V32HImode:
+ case V64QImode:
+ classes[0] = X86_64_SSE_CLASS;
+ classes[1] = X86_64_SSEUP_CLASS;
+ classes[2] = X86_64_SSEUP_CLASS;
+ classes[3] = X86_64_SSEUP_CLASS;
+ classes[4] = X86_64_SSEUP_CLASS;
+ classes[5] = X86_64_SSEUP_CLASS;
+ classes[6] = X86_64_SSEUP_CLASS;
+ classes[7] = X86_64_SSEUP_CLASS;
+ return 8;
case V4SFmode:
case V4SImode:
case V16QImode:
@@ -6549,6 +6620,18 @@ construct_container (enum machine_mode mode, enum machine_mode orig_mode,
&& mode != BLKmode)
return gen_reg_or_parallel (mode, orig_mode,
SSE_REGNO (sse_regno));
+ if (n == 8
+ && regclass[0] == X86_64_SSE_CLASS
+ && regclass[1] == X86_64_SSEUP_CLASS
+ && regclass[2] == X86_64_SSEUP_CLASS
+ && regclass[3] == X86_64_SSEUP_CLASS
+ && regclass[4] == X86_64_SSEUP_CLASS
+ && regclass[5] == X86_64_SSEUP_CLASS
+ && regclass[6] == X86_64_SSEUP_CLASS
+ && regclass[7] == X86_64_SSEUP_CLASS
+ && mode != BLKmode)
+ return gen_reg_or_parallel (mode, orig_mode,
+ SSE_REGNO (sse_regno));
if (n == 2
&& regclass[0] == X86_64_X87_CLASS
&& regclass[1] == X86_64_X87UP_CLASS)
@@ -6630,6 +6713,18 @@ construct_container (enum machine_mode mode, enum machine_mode orig_mode,
tmpmode = OImode;
i += 3;
break;
+ case 8:
+ gcc_assert (i == 0
+ && regclass[1] == X86_64_SSEUP_CLASS
+ && regclass[2] == X86_64_SSEUP_CLASS
+ && regclass[3] == X86_64_SSEUP_CLASS
+ && regclass[4] == X86_64_SSEUP_CLASS
+ && regclass[5] == X86_64_SSEUP_CLASS
+ && regclass[6] == X86_64_SSEUP_CLASS
+ && regclass[7] == X86_64_SSEUP_CLASS);
+ tmpmode = XImode;
+ i += 7;
+ break;
default:
gcc_unreachable ();
}
@@ -6703,6 +6798,12 @@ function_arg_advance_32 (CUMULATIVE_ARGS *cum, enum machine_mode mode,
case V8SFmode:
case V8SImode:
+ case V64QImode:
+ case V32HImode:
+ case V16SImode:
+ case V8DImode:
+ case V16SFmode:
+ case V8DFmode:
case V32QImode:
case V16HImode:
case V4DFmode:
@@ -6754,8 +6855,9 @@ function_arg_advance_64 (CUMULATIVE_ARGS *cum, enum machine_mode mode,
{
int int_nregs, sse_nregs;
- /* Unnamed 256bit vector mode parameters are passed on stack. */
- if (!named && VALID_AVX256_REG_MODE (mode))
+ /* Unnamed 512 and 256bit vector mode parameters are passed on stack. */
+ if (!named && (VALID_AVX512F_REG_MODE (mode)
+ || VALID_AVX256_REG_MODE (mode)))
return;
if (examine_argument (mode, type, 0, &int_nregs, &sse_nregs)
@@ -6906,9 +7008,16 @@ function_arg_32 (const CUMULATIVE_ARGS *cum, enum machine_mode mode,
break;
case OImode:
- /* OImode shouldn't be used directly. */
+ case XImode:
+ /* OImode and XImode shouldn't be used directly. */
gcc_unreachable ();
+ case V64QImode:
+ case V32HImode:
+ case V16SImode:
+ case V8DImode:
+ case V16SFmode:
+ case V8DFmode:
case V8SFmode:
case V8SImode:
case V32QImode:
@@ -6971,7 +7080,13 @@ function_arg_64 (const CUMULATIVE_ARGS *cum, enum machine_mode mode,
case V16HImode:
case V4DFmode:
case V4DImode:
- /* Unnamed 256bit vector mode parameters are passed on stack. */
+ case V16SFmode:
+ case V16SImode:
+ case V64QImode:
+ case V32HImode:
+ case V8DFmode:
+ case V8DImode:
+ /* Unnamed 256 and 512bit vector mode parameters are passed on stack. */
if (!named)
return NULL;
break;
@@ -7367,6 +7482,10 @@ function_value_32 (enum machine_mode orig_mode, enum machine_mode mode,
else if (VECTOR_MODE_P (mode) && GET_MODE_SIZE (mode) == 32)
regno = FIRST_SSE_REG;
+ /* 64-byte vector modes in %zmm0. */
+ else if (VECTOR_MODE_P (mode) && GET_MODE_SIZE (mode) == 64)
+ regno = FIRST_SSE_REG;
+
/* Floating point return values in %st(0) (unless -mno-fp-ret-in-387). */
else if (X87_FLOAT_MODE_P (mode) && TARGET_FLOAT_RETURNS_IN_80387)
regno = FIRST_FLOAT_REG;
@@ -7574,6 +7693,10 @@ return_in_memory_32 (const_tree type, enum machine_mode mode)
/* AVX values are returned in YMM0, except when it doesn't exist. */
if (size == 32)
return !TARGET_AVX;
+
+ /* AVX512F values are returned in ZMM0, except when it doesn't exist. */
+ if (size == 64)
+ return !TARGET_AVX512F;
}
if (mode == XFmode)
@@ -8110,7 +8233,13 @@ ix86_gimplify_va_arg (tree valist, tree type, gimple_seq *pre_p,
case V16HImode:
case V4DFmode:
case V4DImode:
- /* Unnamed 256bit vector mode parameters are passed on stack. */
+ case V16SFmode:
+ case V16SImode:
+ case V64QImode:
+ case V32HImode:
+ case V8DFmode:
+ case V8DImode:
+ /* Unnamed 256 and 512bit vector mode parameters are passed on stack. */
if (!TARGET_64BIT_MS_ABI)
{
container = NULL;
@@ -8525,6 +8654,12 @@ standard_sse_constant_p (rtx x)
case V4DImode:
if (TARGET_AVX2)
return 2;
+ case V64QImode:
+ case V32HImode:
+ case V16SImode:
+ case V8DImode:
+ if (TARGET_AVX512F)
+ return 2;
default:
break;
}
@@ -8543,6 +8678,11 @@ standard_sse_constant_opcode (rtx insn, rtx x)
case 1:
switch (get_attr_mode (insn))
{
+ case MODE_XI:
+ case MODE_V16SF:
+ return "vpxord\t%g0, %g0, %g0";
+ case MODE_V8DF:
+ return "vpxorq\t%g0, %g0, %g0";
case MODE_TI:
return "%vpxor\t%0, %d0";
case MODE_V2DF:
@@ -8562,6 +8702,10 @@ standard_sse_constant_opcode (rtx insn, rtx x)
}
case 2:
+ if (get_attr_mode (insn) == MODE_XI
+ || get_attr_mode (insn) == MODE_V8DF
+ || get_attr_mode (insn) == MODE_V16SF)
+ return "vpternlogd\t{$0xFF, %g0, %g0, %g0|%g0, %g0, %g0, 0xFF}";
if (TARGET_AVX)
return "vpcmpeqd\t%0, %0, %0";
else
@@ -14077,6 +14221,7 @@ put_condition_code (enum rtx_code code, enum machine_mode mode, bool reverse,
If CODE is 'q', pretend the mode is DImode.
If CODE is 'x', pretend the mode is V4SFmode.
If CODE is 't', pretend the mode is V8SFmode.
+ If CODE is 'g', pretend the mode is V16SFmode.
If CODE is 'h', pretend the reg is the 'high' byte register.
If CODE is 'y', print "st(0)" instead of "st", if the reg is stack op.
If CODE is 'd', duplicate the operand for AVX instruction.
@@ -14122,6 +14267,8 @@ print_reg (rtx x, int code, FILE *file)
code = 16;
else if (code == 't')
code = 32;
+ else if (code == 'g')
+ code = 64;
else
code = GET_MODE_SIZE (GET_MODE (x));
@@ -14169,7 +14316,7 @@ print_reg (rtx x, int code, FILE *file)
case 8:
case 4:
case 12:
- if (! ANY_FP_REG_P (x))
+ if (! ANY_FP_REG_P (x) && ! ANY_MASK_REG_P (x))
putc (code == 8 && TARGET_64BIT ? 'r' : 'e', file);
/* FALLTHRU */
case 16:
@@ -14195,6 +14342,14 @@ print_reg (rtx x, int code, FILE *file)
fputs (hi_reg_name[regno] + 1, file);
return;
}
+ case 64:
+ if (SSE_REG_P (x))
+ {
+ gcc_assert (!duplicated);
+ putc ('z', file);
+ fputs (hi_reg_name[REGNO (x)] + 1, file);
+ return;
+ }
break;
default:
gcc_unreachable ();
@@ -14268,6 +14423,7 @@ get_some_local_dynamic_name (void)
q -- likewise, print the DImode name of the register.
x -- likewise, print the V4SFmode name of the register.
t -- likewise, print the V8SFmode name of the register.
+ g -- likewise, print the V16SFmode name of the register.
h -- print the QImode name for a "high" register, either ah, bh, ch or dh.
y -- print "st(0)" instead of "st" as a register.
d -- print duplicated register operand for AVX instruction.
@@ -14497,6 +14653,7 @@ ix86_print_operand (FILE *file, rtx x, int code)
case 'q':
case 'h':
case 't':
+ case 'g':
case 'y':
case 'x':
case 'X':
@@ -14695,6 +14852,43 @@ ix86_print_operand (FILE *file, rtx x, int code)
/* We do not want to print value of the operand. */
return;
+ case 'N':
+ if (x == const0_rtx || x == CONST0_RTX (GET_MODE (x)))
+ fputs ("{z}", file);
+ return;
+
+ case 'R':
+ gcc_assert (CONST_INT_P (x));
+
+ if (ASSEMBLER_DIALECT == ASM_INTEL)
+ fputs (", ", file);
+
+ switch (INTVAL (x))
+ {
+ case ROUND_NEAREST_INT:
+ fputs ("{rn-sae}", file);
+ break;
+ case ROUND_NEG_INF:
+ fputs ("{rd-sae}", file);
+ break;
+ case ROUND_POS_INF:
+ fputs ("{ru-sae}", file);
+ break;
+ case ROUND_ZERO:
+ fputs ("{rz-sae}", file);
+ break;
+ case ROUND_SAE:
+ fputs ("{sae}", file);
+ break;
+ default:
+ gcc_unreachable ();
+ }
+
+ if (ASSEMBLER_DIALECT == ASM_ATT)
+ fputs (", ", file);
+
+ return;
+
case '*':
if (ASSEMBLER_DIALECT == ASM_ATT)
putc ('*', file);
@@ -14805,6 +14999,7 @@ ix86_print_operand (FILE *file, rtx x, int code)
size = "XMMWORD";
break;
case 32: size = "YMMWORD"; break;
+ case 64: size = "ZMMWORD"; break;
default:
gcc_unreachable ();
}
@@ -16371,8 +16566,8 @@ ix86_avx256_split_vector_move_misalign (rtx op0, rtx op1)
gcc_unreachable ();
case V32QImode:
extract = gen_avx_vextractf128v32qi;
- load_unaligned = gen_avx_loaddqu256;
- store_unaligned = gen_avx_storedqu256;
+ load_unaligned = gen_avx_loaddquv32qi;
+ store_unaligned = gen_avx_storedquv32qi;
mode = V16QImode;
break;
case V8SFmode:
@@ -16475,10 +16670,56 @@ void
ix86_expand_vector_move_misalign (enum machine_mode mode, rtx operands[])
{
rtx op0, op1, m;
+ rtx (*load_unaligned) (rtx, rtx);
+ rtx (*store_unaligned) (rtx, rtx);
op0 = operands[0];
op1 = operands[1];
+ if (GET_MODE_SIZE (mode) == 64)
+ {
+ switch (GET_MODE_CLASS (mode))
+ {
+ case MODE_VECTOR_INT:
+ case MODE_INT:
+ op0 = gen_lowpart (V16SImode, op0);
+ op1 = gen_lowpart (V16SImode, op1);
+ /* FALLTHRU */
+
+ case MODE_VECTOR_FLOAT:
+ switch (GET_MODE (op0))
+ {
+ default:
+ gcc_unreachable ();
+ case V16SImode:
+ load_unaligned = gen_avx512f_loaddquv16si;
+ store_unaligned = gen_avx512f_storedquv16si;
+ break;
+ case V16SFmode:
+ load_unaligned = gen_avx512f_loadups512;
+ store_unaligned = gen_avx512f_storeups512;
+ break;
+ case V8DFmode:
+ load_unaligned = gen_avx512f_loadupd512;
+ store_unaligned = gen_avx512f_storeupd512;
+ break;
+ }
+
+ if (MEM_P (op1))
+ emit_insn (load_unaligned (op0, op1));
+ else if (MEM_P (op0))
+ emit_insn (store_unaligned (op0, op1));
+ else
+ gcc_unreachable ();
+ break;
+
+ default:
+ gcc_unreachable ();
+ }
+
+ return;
+ }
+
if (TARGET_AVX
&& GET_MODE_SIZE (mode) == 32)
{
@@ -16511,7 +16752,7 @@ ix86_expand_vector_move_misalign (enum machine_mode mode, rtx operands[])
op0 = gen_lowpart (V16QImode, op0);
op1 = gen_lowpart (V16QImode, op1);
/* We will eventually emit movups based on insn attributes. */
- emit_insn (gen_sse2_loaddqu (op0, op1));
+ emit_insn (gen_sse2_loaddquv16qi (op0, op1));
}
else if (TARGET_SSE2 && mode == V2DFmode)
{
@@ -16586,7 +16827,7 @@ ix86_expand_vector_move_misalign (enum machine_mode mode, rtx operands[])
op0 = gen_lowpart (V16QImode, op0);
op1 = gen_lowpart (V16QImode, op1);
/* We will eventually emit movups based on insn attributes. */
- emit_insn (gen_sse2_storedqu (op0, op1));
+ emit_insn (gen_sse2_storedquv16qi (op0, op1));
}
else if (TARGET_SSE2 && mode == V2DFmode)
{
@@ -18128,6 +18369,11 @@ ix86_expand_vector_convert_uns_vsivsf (rtx target, rtx val)
enum machine_mode fltmode = GET_MODE (target);
rtx (*cvt) (rtx, rtx);
+ if (intmode == V16SImode)
+ {
+ emit_insn (gen_ufloatv16siv16sf2 (target, val));
+ return;
+ }
if (intmode == V4SImode)
cvt = gen_floatv4siv4sf2;
else
@@ -18218,17 +18464,23 @@ ix86_build_const_vector (enum machine_mode mode, bool vect, rtx value)
switch (mode)
{
+ case V64QImode:
case V32QImode:
case V16QImode:
+ case V32HImode:
case V16HImode:
case V8HImode:
+ case V16SImode:
case V8SImode:
case V4SImode:
+ case V8DImode:
case V4DImode:
case V2DImode:
gcc_assert (vect);
+ case V16SFmode:
case V8SFmode:
case V4SFmode:
+ case V8DFmode:
case V4DFmode:
case V2DFmode:
n_elt = GET_MODE_NUNITS (mode);
@@ -18265,6 +18517,8 @@ ix86_build_signbit_mask (enum machine_mode mode, bool vect, bool invert)
/* Find the sign bit, sign extended to 2*HWI. */
switch (mode)
{
+ case V16SImode:
+ case V16SFmode:
case V8SImode:
case V4SImode:
case V8SFmode:
@@ -18275,8 +18529,10 @@ ix86_build_signbit_mask (enum machine_mode mode, bool vect, bool invert)
lo = 0x80000000, hi = lo < 0;
break;
+ case V8DImode:
case V4DImode:
case V2DImode:
+ case V8DFmode:
case V4DFmode:
case V2DFmode:
vec_mode = mode;
@@ -20138,22 +20394,63 @@ ix86_expand_sse_cmp (rtx dest, enum rtx_code code, rtx cmp_op0, rtx cmp_op1,
rtx op_true, rtx op_false)
{
enum machine_mode mode = GET_MODE (dest);
- enum machine_mode cmp_mode = GET_MODE (cmp_op0);
+ enum machine_mode cmp_ops_mode = GET_MODE (cmp_op0);
+
+ /* In general case result of comparison can differ from operands' type. */
+ enum machine_mode cmp_mode;
+
+ /* In AVX512F the result of comparison is an integer mask. */
+ bool maskcmp = false;
rtx x;
- cmp_op0 = force_reg (cmp_mode, cmp_op0);
- if (!nonimmediate_operand (cmp_op1, cmp_mode))
- cmp_op1 = force_reg (cmp_mode, cmp_op1);
+ if (GET_MODE_SIZE (cmp_ops_mode) == 64)
+ {
+ cmp_mode = mode_for_size (GET_MODE_NUNITS (cmp_ops_mode), MODE_INT, 0);
+ gcc_assert (cmp_mode != BLKmode);
+
+ maskcmp = true;
+ }
+ else
+ cmp_mode = cmp_ops_mode;
+
+
+ cmp_op0 = force_reg (cmp_ops_mode, cmp_op0);
+ if (!nonimmediate_operand (cmp_op1, cmp_ops_mode))
+ cmp_op1 = force_reg (cmp_ops_mode, cmp_op1);
if (optimize
|| reg_overlap_mentioned_p (dest, op_true)
|| reg_overlap_mentioned_p (dest, op_false))
- dest = gen_reg_rtx (mode);
+ dest = gen_reg_rtx (maskcmp ? cmp_mode : mode);
+ /* Compare patterns for int modes are unspec in AVX512F only. */
+ if (maskcmp && (code == GT || code == EQ))
+ {
+ rtx (*gen)(rtx, rtx, rtx);
+
+ switch (cmp_ops_mode)
+ {
+ case V16SImode:
+ gen = code == GT ? gen_avx512f_gtv16si3 : gen_avx512f_eqv16si3_1;
+ break;
+ case V8DImode:
+ gen = code == GT ? gen_avx512f_gtv8di3 : gen_avx512f_eqv8di3_1;
+ break;
+ default:
+ gen = NULL;
+ }
+
+ if (gen)
+ {
+ emit_insn (gen (dest, cmp_op0, cmp_op1));
+ return dest;
+ }
+ }
x = gen_rtx_fmt_ee (code, cmp_mode, cmp_op0, cmp_op1);
- if (cmp_mode != mode)
+
+ if (cmp_mode != mode && !maskcmp)
{
- x = force_reg (cmp_mode, x);
+ x = force_reg (cmp_ops_mode, x);
convert_move (dest, x, false);
}
else
@@ -20169,33 +20466,43 @@ static void
ix86_expand_sse_movcc (rtx dest, rtx cmp, rtx op_true, rtx op_false)
{
enum machine_mode mode = GET_MODE (dest);
+ enum machine_mode cmpmode = GET_MODE (cmp);
+
+ /* In AVX512F the result of comparison is an integer mask. */
+ bool maskcmp = (mode != cmpmode && TARGET_AVX512F);
+
rtx t2, t3, x;
if (vector_all_ones_operand (op_true, mode)
- && rtx_equal_p (op_false, CONST0_RTX (mode)))
+ && rtx_equal_p (op_false, CONST0_RTX (mode))
+ && !maskcmp)
{
emit_insn (gen_rtx_SET (VOIDmode, dest, cmp));
}
- else if (op_false == CONST0_RTX (mode))
+ else if (op_false == CONST0_RTX (mode)
+ && !maskcmp)
{
op_true = force_reg (mode, op_true);
x = gen_rtx_AND (mode, cmp, op_true);
emit_insn (gen_rtx_SET (VOIDmode, dest, x));
}
- else if (op_true == CONST0_RTX (mode))
+ else if (op_true == CONST0_RTX (mode)
+ && !maskcmp)
{
op_false = force_reg (mode, op_false);
x = gen_rtx_NOT (mode, cmp);
x = gen_rtx_AND (mode, x, op_false);
emit_insn (gen_rtx_SET (VOIDmode, dest, x));
}
- else if (INTEGRAL_MODE_P (mode) && op_true == CONSTM1_RTX (mode))
+ else if (INTEGRAL_MODE_P (mode) && op_true == CONSTM1_RTX (mode)
+ && !maskcmp)
{
op_false = force_reg (mode, op_false);
x = gen_rtx_IOR (mode, cmp, op_false);
emit_insn (gen_rtx_SET (VOIDmode, dest, x));
}
- else if (TARGET_XOP)
+ else if (TARGET_XOP
+ && !maskcmp)
{
op_true = force_reg (mode, op_true);
@@ -20260,6 +20567,20 @@ ix86_expand_sse_movcc (rtx dest, rtx cmp, rtx op_true, rtx op_false)
cmp = gen_lowpart (V32QImode, cmp);
}
break;
+
+ case V16SImode:
+ gen = gen_avx512f_blendmv16si;
+ break;
+ case V8DImode:
+ gen = gen_avx512f_blendmv8di;
+ break;
+ case V8DFmode:
+ gen = gen_avx512f_blendmv8df;
+ break;
+ case V16SFmode:
+ gen = gen_avx512f_blendmv16sf;
+ break;
+
default:
break;
}
@@ -20523,6 +20844,8 @@ ix86_expand_int_vcond (rtx operands[])
switch (mode)
{
+ case V16SImode:
+ case V8DImode:
case V8SImode:
case V4DImode:
case V4SImode:
@@ -20533,6 +20856,8 @@ ix86_expand_int_vcond (rtx operands[])
switch (mode)
{
+ case V16SImode: gen_sub3 = gen_subv16si3; break;
+ case V8DImode: gen_sub3 = gen_subv8di3; break;
case V8SImode: gen_sub3 = gen_subv8si3; break;
case V4DImode: gen_sub3 = gen_subv4di3; break;
case V4SImode: gen_sub3 = gen_subv4si3; break;
@@ -20589,7 +20914,8 @@ ix86_expand_int_vcond (rtx operands[])
x = ix86_expand_sse_cmp (gen_lowpart (mode, operands[0]),
code, cop0, cop1,
operands[1+negate], operands[2-negate]);
- x = gen_lowpart (data_mode, x);
+ if (GET_MODE (x) == mode)
+ x = gen_lowpart (data_mode, x);
}
ix86_expand_sse_movcc (operands[0], x, operands[1+negate],
@@ -20597,6 +20923,35 @@ ix86_expand_int_vcond (rtx operands[])
return true;
}
+static void
+ix86_expand_vec_perm_vpermi2 (rtx target, rtx op0, rtx mask, rtx op1)
+{
+ enum machine_mode mode = GET_MODE (op0);
+ switch (mode)
+ {
+ case V16SImode:
+ emit_insn (gen_avx512f_vpermi2varv16si3 (target, op0,
+ force_reg (V16SImode, mask),
+ op1));
+ return;
+ case V16SFmode:
+ emit_insn (gen_avx512f_vpermi2varv16sf3 (target, op0,
+ force_reg (V16SImode, mask),
+ op1));
+ return;
+ case V8DImode:
+ emit_insn (gen_avx512f_vpermi2varv8di3 (target, op0,
+ force_reg (V8DImode, mask), op1));
+ return;
+ case V8DFmode:
+ emit_insn (gen_avx512f_vpermi2varv8df3 (target, op0,
+ force_reg (V8DImode, mask), op1));
+ return;
+ default:
+ gcc_unreachable ();
+ }
+}
+
/* Expand a variable vector permutation. */
void
@@ -20615,7 +20970,14 @@ ix86_expand_vec_perm (rtx operands[])
/* Number of elements in the vector. */
w = GET_MODE_NUNITS (mode);
e = GET_MODE_UNIT_SIZE (mode);
- gcc_assert (w <= 32);
+ gcc_assert (w <= 64);
+
+ if (mode == V8DFmode || mode == V8DImode
+ || mode == V16SFmode || mode == V16SImode)
+ {
+ ix86_expand_vec_perm_vpermi2 (target, op0, mask, op1);
+ return;
+ }
if (TARGET_AVX2)
{
@@ -20969,6 +21331,15 @@ ix86_expand_sse_unpack (rtx dest, rtx src, bool unsigned_p, bool high_p)
extract
= high_p ? gen_vec_extract_hi_v32qi : gen_vec_extract_lo_v32qi;
break;
+ case V32HImode:
+ if (unsigned_p)
+ unpack = gen_avx512f_zero_extendv16hiv16si2;
+ else
+ unpack = gen_avx512f_sign_extendv16hiv16si2;
+ halfmode = V16HImode;
+ extract
+ = high_p ? gen_vec_extract_hi_v32hi : gen_vec_extract_lo_v32hi;
+ break;
case V16HImode:
if (unsigned_p)
unpack = gen_avx2_zero_extendv8hiv8si2;
@@ -20978,6 +21349,15 @@ ix86_expand_sse_unpack (rtx dest, rtx src, bool unsigned_p, bool high_p)
extract
= high_p ? gen_vec_extract_hi_v16hi : gen_vec_extract_lo_v16hi;
break;
+ case V16SImode:
+ if (unsigned_p)
+ unpack = gen_avx512f_zero_extendv8siv8di2;
+ else
+ unpack = gen_avx512f_sign_extendv8siv8di2;
+ halfmode = V8SImode;
+ extract
+ = high_p ? gen_vec_extract_hi_v16si : gen_vec_extract_lo_v16si;
+ break;
case V8SImode:
if (unsigned_p)
unpack = gen_avx2_zero_extendv4siv4di2;
@@ -21009,7 +21389,7 @@ ix86_expand_sse_unpack (rtx dest, rtx src, bool unsigned_p, bool high_p)
gcc_unreachable ();
}
- if (GET_MODE_SIZE (imode) == 32)
+ if (GET_MODE_SIZE (imode) >= 32)
{
tmp = gen_reg_rtx (halfmode);
emit_insn (extract (tmp, src));
@@ -25364,7 +25744,8 @@ ix86_constant_alignment (tree exp, int align)
int
ix86_data_alignment (tree type, int align, bool opt)
{
- int max_align = optimize_size ? BITS_PER_WORD : MIN (256, MAX_OFILE_ALIGNMENT);
+ int max_align = optimize_size ? BITS_PER_WORD
+ : MIN (512, MAX_OFILE_ALIGNMENT);
if (opt
&& AGGREGATE_TYPE_P (type)
@@ -26821,12 +27202,421 @@ enum ix86_builtins
IX86_BUILTIN_GATHERDIV4SI,
IX86_BUILTIN_GATHERDIV8SI,
- /* Alternate 4 element gather for the vectorizer where
- all operands are 32-byte wide. */
+ /* AVX512F */
+ IX86_BUILTIN_ADDPD512,
+ IX86_BUILTIN_ADDPS512,
+ IX86_BUILTIN_ADDSD_MASK,
+ IX86_BUILTIN_ADDSS_MASK,
+ IX86_BUILTIN_ALIGND512,
+ IX86_BUILTIN_ALIGNQ512,
+ IX86_BUILTIN_BLENDMD512,
+ IX86_BUILTIN_BLENDMPD512,
+ IX86_BUILTIN_BLENDMPS512,
+ IX86_BUILTIN_BLENDMQ512,
+ IX86_BUILTIN_BROADCASTF32X4_512,
+ IX86_BUILTIN_BROADCASTF64X4_512,
+ IX86_BUILTIN_BROADCASTI32X4_512,
+ IX86_BUILTIN_BROADCASTI64X4_512,
+ IX86_BUILTIN_BROADCASTSD512,
+ IX86_BUILTIN_BROADCASTSS512,
+ IX86_BUILTIN_CMPD512,
+ IX86_BUILTIN_CMPPD512,
+ IX86_BUILTIN_CMPPS512,
+ IX86_BUILTIN_CMPQ512,
+ IX86_BUILTIN_CMPSD_MASK,
+ IX86_BUILTIN_CMPSS_MASK,
+ IX86_BUILTIN_COMIDF,
+ IX86_BUILTIN_COMISF,
+ IX86_BUILTIN_COMPRESSPD512,
+ IX86_BUILTIN_COMPRESSPDSTORE512,
+ IX86_BUILTIN_COMPRESSPS512,
+ IX86_BUILTIN_COMPRESSPSSTORE512,
+ IX86_BUILTIN_CVTDQ2PD512,
+ IX86_BUILTIN_CVTDQ2PS512,
+ IX86_BUILTIN_CVTPD2DQ512,
+ IX86_BUILTIN_CVTPD2PS512,
+ IX86_BUILTIN_CVTPD2UDQ512,
+ IX86_BUILTIN_CVTPH2PS512,
+ IX86_BUILTIN_CVTPS2DQ512,
+ IX86_BUILTIN_CVTPS2PD512,
+ IX86_BUILTIN_CVTPS2PH512,
+ IX86_BUILTIN_CVTPS2UDQ512,
+ IX86_BUILTIN_CVTSD2SS_MASK,
+ IX86_BUILTIN_CVTSI2SD64,
+ IX86_BUILTIN_CVTSI2SS32,
+ IX86_BUILTIN_CVTSI2SS64,
+ IX86_BUILTIN_CVTSS2SD_MASK,
+ IX86_BUILTIN_CVTTPD2DQ512,
+ IX86_BUILTIN_CVTTPD2UDQ512,
+ IX86_BUILTIN_CVTTPS2DQ512,
+ IX86_BUILTIN_CVTTPS2UDQ512,
+ IX86_BUILTIN_CVTUDQ2PD512,
+ IX86_BUILTIN_CVTUDQ2PS512,
+ IX86_BUILTIN_CVTUSI2SD32,
+ IX86_BUILTIN_CVTUSI2SD64,
+ IX86_BUILTIN_CVTUSI2SS32,
+ IX86_BUILTIN_CVTUSI2SS64,
+ IX86_BUILTIN_DIVPD512,
+ IX86_BUILTIN_DIVPS512,
+ IX86_BUILTIN_DIVSD_MASK,
+ IX86_BUILTIN_DIVSS_MASK,
+ IX86_BUILTIN_EXPANDPD512,
+ IX86_BUILTIN_EXPANDPD512Z,
+ IX86_BUILTIN_EXPANDPDLOAD512,
+ IX86_BUILTIN_EXPANDPDLOAD512Z,
+ IX86_BUILTIN_EXPANDPS512,
+ IX86_BUILTIN_EXPANDPS512Z,
+ IX86_BUILTIN_EXPANDPSLOAD512,
+ IX86_BUILTIN_EXPANDPSLOAD512Z,
+ IX86_BUILTIN_EXTRACTF32X4,
+ IX86_BUILTIN_EXTRACTF64X4,
+ IX86_BUILTIN_EXTRACTI32X4,
+ IX86_BUILTIN_EXTRACTI64X4,
+ IX86_BUILTIN_FIXUPIMMPD512_MASK,
+ IX86_BUILTIN_FIXUPIMMPD512_MASKZ,
+ IX86_BUILTIN_FIXUPIMMPS512_MASK,
+ IX86_BUILTIN_FIXUPIMMPS512_MASKZ,
+ IX86_BUILTIN_FIXUPIMMSD128_MASK,
+ IX86_BUILTIN_FIXUPIMMSD128_MASKZ,
+ IX86_BUILTIN_FIXUPIMMSS128_MASK,
+ IX86_BUILTIN_FIXUPIMMSS128_MASKZ,
+ IX86_BUILTIN_GETEXPPD512,
+ IX86_BUILTIN_GETEXPPS512,
+ IX86_BUILTIN_GETEXPSD128,
+ IX86_BUILTIN_GETEXPSS128,
+ IX86_BUILTIN_GETMANTPD512,
+ IX86_BUILTIN_GETMANTPS512,
+ IX86_BUILTIN_GETMANTSD128,
+ IX86_BUILTIN_GETMANTSS128,
+ IX86_BUILTIN_INSERTF32X4,
+ IX86_BUILTIN_INSERTF64X4,
+ IX86_BUILTIN_INSERTI32X4,
+ IX86_BUILTIN_INSERTI64X4,
+ IX86_BUILTIN_LOADAPD512,
+ IX86_BUILTIN_LOADAPS512,
+ IX86_BUILTIN_LOADDQUDI512,
+ IX86_BUILTIN_LOADDQUSI512,
+ IX86_BUILTIN_LOADSD,
+ IX86_BUILTIN_LOADSS,
+ IX86_BUILTIN_LOADUPD512,
+ IX86_BUILTIN_LOADUPS512,
+ IX86_BUILTIN_MAXPD512,
+ IX86_BUILTIN_MAXPS512,
+ IX86_BUILTIN_MAXSD_MASK,
+ IX86_BUILTIN_MAXSS_MASK,
+ IX86_BUILTIN_MINPD512,
+ IX86_BUILTIN_MINPS512,
+ IX86_BUILTIN_MINSD_MASK,
+ IX86_BUILTIN_MINSS_MASK,
+ IX86_BUILTIN_MOVAPD512,
+ IX86_BUILTIN_MOVAPS512,
+ IX86_BUILTIN_MOVDDUP512,
+ IX86_BUILTIN_MOVDQA32LOAD512,
+ IX86_BUILTIN_MOVDQA32STORE512,
+ IX86_BUILTIN_MOVDQA32_512,
+ IX86_BUILTIN_MOVDQA64LOAD512,
+ IX86_BUILTIN_MOVDQA64STORE512,
+ IX86_BUILTIN_MOVDQA64_512,
+ IX86_BUILTIN_MOVESD,
+ IX86_BUILTIN_MOVESS,
+ IX86_BUILTIN_MOVNTDQ512,
+ IX86_BUILTIN_MOVNTPD512,
+ IX86_BUILTIN_MOVNTPS512,
+ IX86_BUILTIN_MOVSHDUP512,
+ IX86_BUILTIN_MOVSLDUP512,
+ IX86_BUILTIN_MULPD512,
+ IX86_BUILTIN_MULPS512,
+ IX86_BUILTIN_MULSD_MASK,
+ IX86_BUILTIN_MULSS_MASK,
+ IX86_BUILTIN_PABSD512,
+ IX86_BUILTIN_PABSQ512,
+ IX86_BUILTIN_PADDD512,
+ IX86_BUILTIN_PADDQ512,
+ IX86_BUILTIN_PANDD512,
+ IX86_BUILTIN_PANDND512,
+ IX86_BUILTIN_PANDNQ512,
+ IX86_BUILTIN_PANDQ512,
+ IX86_BUILTIN_PBROADCASTD512,
+ IX86_BUILTIN_PBROADCASTD512_GPR,
+ IX86_BUILTIN_PBROADCASTMB512,
+ IX86_BUILTIN_PBROADCASTMW512,
+ IX86_BUILTIN_PBROADCASTQ512,
+ IX86_BUILTIN_PBROADCASTQ512_GPR,
+ IX86_BUILTIN_PBROADCASTQ512_MEM,
+ IX86_BUILTIN_PCMPEQD512_MASK,
+ IX86_BUILTIN_PCMPEQQ512_MASK,
+ IX86_BUILTIN_PCMPGTD512_MASK,
+ IX86_BUILTIN_PCMPGTQ512_MASK,
+ IX86_BUILTIN_PCOMPRESSD512,
+ IX86_BUILTIN_PCOMPRESSDSTORE512,
+ IX86_BUILTIN_PCOMPRESSQ512,
+ IX86_BUILTIN_PCOMPRESSQSTORE512,
+ IX86_BUILTIN_PEXPANDD512,
+ IX86_BUILTIN_PEXPANDD512Z,
+ IX86_BUILTIN_PEXPANDDLOAD512,
+ IX86_BUILTIN_PEXPANDDLOAD512Z,
+ IX86_BUILTIN_PEXPANDQ512,
+ IX86_BUILTIN_PEXPANDQ512Z,
+ IX86_BUILTIN_PEXPANDQLOAD512,
+ IX86_BUILTIN_PEXPANDQLOAD512Z,
+ IX86_BUILTIN_PMAXSD512,
+ IX86_BUILTIN_PMAXSQ512,
+ IX86_BUILTIN_PMAXUD512,
+ IX86_BUILTIN_PMAXUQ512,
+ IX86_BUILTIN_PMINSD512,
+ IX86_BUILTIN_PMINSQ512,
+ IX86_BUILTIN_PMINUD512,
+ IX86_BUILTIN_PMINUQ512,
+ IX86_BUILTIN_PMOVDB512,
+ IX86_BUILTIN_PMOVDW512,
+ IX86_BUILTIN_PMOVQB512,
+ IX86_BUILTIN_PMOVQD512,
+ IX86_BUILTIN_PMOVQW512,
+ IX86_BUILTIN_PMOVSDB512,
+ IX86_BUILTIN_PMOVSDW512,
+ IX86_BUILTIN_PMOVSQB512,
+ IX86_BUILTIN_PMOVSQD512,
+ IX86_BUILTIN_PMOVSQW512,
+ IX86_BUILTIN_PMOVSXBD512,
+ IX86_BUILTIN_PMOVSXBQ512,
+ IX86_BUILTIN_PMOVSXDQ512,
+ IX86_BUILTIN_PMOVSXWD512,
+ IX86_BUILTIN_PMOVSXWQ512,
+ IX86_BUILTIN_PMOVUSDB512,
+ IX86_BUILTIN_PMOVUSDW512,
+ IX86_BUILTIN_PMOVUSQB512,
+ IX86_BUILTIN_PMOVUSQD512,
+ IX86_BUILTIN_PMOVUSQW512,
+ IX86_BUILTIN_PMOVZXBD512,
+ IX86_BUILTIN_PMOVZXBQ512,
+ IX86_BUILTIN_PMOVZXDQ512,
+ IX86_BUILTIN_PMOVZXWD512,
+ IX86_BUILTIN_PMOVZXWQ512,
+ IX86_BUILTIN_PMULDQ512,
+ IX86_BUILTIN_PMULLD512,
+ IX86_BUILTIN_PMULUDQ512,
+ IX86_BUILTIN_PORD512,
+ IX86_BUILTIN_PORQ512,
+ IX86_BUILTIN_PROLD512,
+ IX86_BUILTIN_PROLQ512,
+ IX86_BUILTIN_PROLVD512,
+ IX86_BUILTIN_PROLVQ512,
+ IX86_BUILTIN_PRORD512,
+ IX86_BUILTIN_PRORQ512,
+ IX86_BUILTIN_PRORVD512,
+ IX86_BUILTIN_PRORVQ512,
+ IX86_BUILTIN_PSHUFD512,
+ IX86_BUILTIN_PSLLD512,
+ IX86_BUILTIN_PSLLDI512,
+ IX86_BUILTIN_PSLLQ512,
+ IX86_BUILTIN_PSLLQI512,
+ IX86_BUILTIN_PSLLVV16SI,
+ IX86_BUILTIN_PSLLVV8DI,
+ IX86_BUILTIN_PSRAD512,
+ IX86_BUILTIN_PSRADI512,
+ IX86_BUILTIN_PSRAQ512,
+ IX86_BUILTIN_PSRAQI512,
+ IX86_BUILTIN_PSRAVV16SI,
+ IX86_BUILTIN_PSRAVV8DI,
+ IX86_BUILTIN_PSRLD512,
+ IX86_BUILTIN_PSRLDI512,
+ IX86_BUILTIN_PSRLQ512,
+ IX86_BUILTIN_PSRLQI512,
+ IX86_BUILTIN_PSRLVV16SI,
+ IX86_BUILTIN_PSRLVV8DI,
+ IX86_BUILTIN_PSUBD512,
+ IX86_BUILTIN_PSUBQ512,
+ IX86_BUILTIN_PTESTMD512,
+ IX86_BUILTIN_PTESTMQ512,
+ IX86_BUILTIN_PTESTNMD512,
+ IX86_BUILTIN_PTESTNMQ512,
+ IX86_BUILTIN_PUNPCKHDQ512,
+ IX86_BUILTIN_PUNPCKHQDQ512,
+ IX86_BUILTIN_PUNPCKLDQ512,
+ IX86_BUILTIN_PUNPCKLQDQ512,
+ IX86_BUILTIN_PXORD512,
+ IX86_BUILTIN_PXORQ512,
+ IX86_BUILTIN_RCP14PD512,
+ IX86_BUILTIN_RCP14PS512,
+ IX86_BUILTIN_RCP14SD,
+ IX86_BUILTIN_RCP14SS,
+ IX86_BUILTIN_RNDSCALEPD,
+ IX86_BUILTIN_RNDSCALEPS,
+ IX86_BUILTIN_RNDSCALESD,
+ IX86_BUILTIN_RNDSCALESS,
+ IX86_BUILTIN_RSQRT14PD512,
+ IX86_BUILTIN_RSQRT14PS512,
+ IX86_BUILTIN_RSQRT14SD,
+ IX86_BUILTIN_RSQRT14SS,
+ IX86_BUILTIN_SCALEFPD512,
+ IX86_BUILTIN_SCALEFPS512,
+ IX86_BUILTIN_SCALEFSD,
+ IX86_BUILTIN_SCALEFSS,
+ IX86_BUILTIN_SHUFPD512,
+ IX86_BUILTIN_SHUFPS512,
+ IX86_BUILTIN_SHUF_F32x4,
+ IX86_BUILTIN_SHUF_F64x2,
+ IX86_BUILTIN_SHUF_I32x4,
+ IX86_BUILTIN_SHUF_I64x2,
+ IX86_BUILTIN_SQRTPD512,
+ IX86_BUILTIN_SQRTPD512_MASK,
+ IX86_BUILTIN_SQRTPS512_MASK,
+ IX86_BUILTIN_SQRTPS_NR512,
+ IX86_BUILTIN_SQRTSD_MASK,
+ IX86_BUILTIN_SQRTSS_MASK,
+ IX86_BUILTIN_STOREAPD512,
+ IX86_BUILTIN_STOREAPS512,
+ IX86_BUILTIN_STOREDQUDI512,
+ IX86_BUILTIN_STOREDQUSI512,
+ IX86_BUILTIN_STORESD,
+ IX86_BUILTIN_STORESS,
+ IX86_BUILTIN_STOREUPD512,
+ IX86_BUILTIN_STOREUPS512,
+ IX86_BUILTIN_SUBPD512,
+ IX86_BUILTIN_SUBPS512,
+ IX86_BUILTIN_SUBSD_MASK,
+ IX86_BUILTIN_SUBSS_MASK,
+ IX86_BUILTIN_UCMPD512,
+ IX86_BUILTIN_UCMPQ512,
+ IX86_BUILTIN_UNPCKHPD512,
+ IX86_BUILTIN_UNPCKHPS512,
+ IX86_BUILTIN_UNPCKLPD512,
+ IX86_BUILTIN_UNPCKLPS512,
+ IX86_BUILTIN_VCVTSD2SI32,
+ IX86_BUILTIN_VCVTSD2SI64,
+ IX86_BUILTIN_VCVTSD2USI32,
+ IX86_BUILTIN_VCVTSD2USI64,
+ IX86_BUILTIN_VCVTSS2SI32,
+ IX86_BUILTIN_VCVTSS2SI64,
+ IX86_BUILTIN_VCVTSS2USI32,
+ IX86_BUILTIN_VCVTSS2USI64,
+ IX86_BUILTIN_VCVTTSD2SI32,
+ IX86_BUILTIN_VCVTTSD2SI64,
+ IX86_BUILTIN_VCVTTSD2USI32,
+ IX86_BUILTIN_VCVTTSD2USI64,
+ IX86_BUILTIN_VCVTTSS2SI32,
+ IX86_BUILTIN_VCVTTSS2SI64,
+ IX86_BUILTIN_VCVTTSS2USI32,
+ IX86_BUILTIN_VCVTTSS2USI64,
+ IX86_BUILTIN_VFMADDPD512_MASK,
+ IX86_BUILTIN_VFMADDPD512_MASK3,
+ IX86_BUILTIN_VFMADDPD512_MASKZ,
+ IX86_BUILTIN_VFMADDPS512_MASK,
+ IX86_BUILTIN_VFMADDPS512_MASK3,
+ IX86_BUILTIN_VFMADDPS512_MASKZ,
+ IX86_BUILTIN_VFMADDSD3_MASK,
+ IX86_BUILTIN_VFMADDSD3_MASK3,
+ IX86_BUILTIN_VFMADDSD3_MASKZ,
+ IX86_BUILTIN_VFMADDSS3_MASK,
+ IX86_BUILTIN_VFMADDSS3_MASK3,
+ IX86_BUILTIN_VFMADDSS3_MASKZ,
+ IX86_BUILTIN_VFMADDSUBPD512_MASK,
+ IX86_BUILTIN_VFMADDSUBPD512_MASK3,
+ IX86_BUILTIN_VFMADDSUBPD512_MASKZ,
+ IX86_BUILTIN_VFMADDSUBPS512_MASK,
+ IX86_BUILTIN_VFMADDSUBPS512_MASK3,
+ IX86_BUILTIN_VFMADDSUBPS512_MASKZ,
+ IX86_BUILTIN_VFMSUBADDPD512_MASK3,
+ IX86_BUILTIN_VFMSUBADDPS512_MASK3,
+ IX86_BUILTIN_VFMSUBPD512_MASK3,
+ IX86_BUILTIN_VFMSUBPS512_MASK3,
+ IX86_BUILTIN_VFMSUBSD3_MASK3,
+ IX86_BUILTIN_VFMSUBSS3_MASK3,
+ IX86_BUILTIN_VFNMADDPD512_MASK,
+ IX86_BUILTIN_VFNMADDPS512_MASK,
+ IX86_BUILTIN_VFNMSUBPD512_MASK,
+ IX86_BUILTIN_VFNMSUBPD512_MASK3,
+ IX86_BUILTIN_VFNMSUBPS512_MASK,
+ IX86_BUILTIN_VFNMSUBPS512_MASK3,
+ IX86_BUILTIN_VPCLZCNTD512,
+ IX86_BUILTIN_VPCLZCNTQ512,
+ IX86_BUILTIN_VPCONFLICTD512,
+ IX86_BUILTIN_VPCONFLICTQ512,
+ IX86_BUILTIN_VPERMDF512,
+ IX86_BUILTIN_VPERMDI512,
+ IX86_BUILTIN_VPERMI2VARD512,
+ IX86_BUILTIN_VPERMI2VARPD512,
+ IX86_BUILTIN_VPERMI2VARPS512,
+ IX86_BUILTIN_VPERMI2VARQ512,
+ IX86_BUILTIN_VPERMILPD512,
+ IX86_BUILTIN_VPERMILPS512,
+ IX86_BUILTIN_VPERMILVARPD512,
+ IX86_BUILTIN_VPERMILVARPS512,
+ IX86_BUILTIN_VPERMT2VARD512,
+ IX86_BUILTIN_VPERMT2VARD512_MASKZ,
+ IX86_BUILTIN_VPERMT2VARPD512,
+ IX86_BUILTIN_VPERMT2VARPD512_MASKZ,
+ IX86_BUILTIN_VPERMT2VARPS512,
+ IX86_BUILTIN_VPERMT2VARPS512_MASKZ,
+ IX86_BUILTIN_VPERMT2VARQ512,
+ IX86_BUILTIN_VPERMT2VARQ512_MASKZ,
+ IX86_BUILTIN_VPERMVARDF512,
+ IX86_BUILTIN_VPERMVARDI512,
+ IX86_BUILTIN_VPERMVARSF512,
+ IX86_BUILTIN_VPERMVARSI512,
+ IX86_BUILTIN_VTERNLOGD512_MASK,
+ IX86_BUILTIN_VTERNLOGD512_MASKZ,
+ IX86_BUILTIN_VTERNLOGQ512_MASK,
+ IX86_BUILTIN_VTERNLOGQ512_MASKZ,
+
+ /* Mask arithmetic operations */
+ IX86_BUILTIN_KAND16,
+ IX86_BUILTIN_KANDN16,
+ IX86_BUILTIN_KNOT16,
+ IX86_BUILTIN_KOR16,
+ IX86_BUILTIN_KORTESTC16,
+ IX86_BUILTIN_KORTESTZ16,
+ IX86_BUILTIN_KUNPCKBW,
+ IX86_BUILTIN_KXNOR16,
+ IX86_BUILTIN_KXOR16,
+
+ /* Alternate 4 and 8 element gather/scatter for the vectorizer
+ where all operands are 32-byte or 64-byte wide respectively. */
IX86_BUILTIN_GATHERALTSIV4DF,
IX86_BUILTIN_GATHERALTDIV8SF,
IX86_BUILTIN_GATHERALTSIV4DI,
IX86_BUILTIN_GATHERALTDIV8SI,
+ IX86_BUILTIN_GATHER3ALTDIV16SF,
+ IX86_BUILTIN_GATHER3ALTDIV16SI,
+ IX86_BUILTIN_GATHER3ALTSIV8DF,
+ IX86_BUILTIN_GATHER3ALTSIV8DI,
+ IX86_BUILTIN_GATHER3DIV16SF,
+ IX86_BUILTIN_GATHER3DIV16SI,
+ IX86_BUILTIN_GATHER3DIV8DF,
+ IX86_BUILTIN_GATHER3DIV8DI,
+ IX86_BUILTIN_GATHER3SIV16SF,
+ IX86_BUILTIN_GATHER3SIV16SI,
+ IX86_BUILTIN_GATHER3SIV8DF,
+ IX86_BUILTIN_GATHER3SIV8DI,
+ IX86_BUILTIN_SCATTERDIV16SF,
+ IX86_BUILTIN_SCATTERDIV16SI,
+ IX86_BUILTIN_SCATTERDIV8DF,
+ IX86_BUILTIN_SCATTERDIV8DI,
+ IX86_BUILTIN_SCATTERSIV16SF,
+ IX86_BUILTIN_SCATTERSIV16SI,
+ IX86_BUILTIN_SCATTERSIV8DF,
+ IX86_BUILTIN_SCATTERSIV8DI,
+
+ /* AVX512PF */
+ IX86_BUILTIN_GATHERPFDPS,
+ IX86_BUILTIN_GATHERPFQPS,
+ IX86_BUILTIN_SCATTERPFDPS,
+ IX86_BUILTIN_SCATTERPFQPS,
+ IX86_BUILTIN_EXP2PD,
+ IX86_BUILTIN_EXP2PS,
+ IX86_BUILTIN_RCP28PD,
+ IX86_BUILTIN_RCP28PS,
+ IX86_BUILTIN_RSQRT28PD,
+ IX86_BUILTIN_RSQRT28PS,
+
+ /* SHA builtins. */
+ IX86_BUILTIN_SHA1MSG1,
+ IX86_BUILTIN_SHA1MSG2,
+ IX86_BUILTIN_SHA1NEXTE,
+ IX86_BUILTIN_SHA1RNDS4,
+ IX86_BUILTIN_SHA256MSG1,
+ IX86_BUILTIN_SHA256MSG2,
+ IX86_BUILTIN_SHA256RNDS2,
/* TFmode support builtins. */
IX86_BUILTIN_INFQ,
@@ -26835,10 +27625,15 @@ enum ix86_builtins
IX86_BUILTIN_COPYSIGNQ,
/* Vectorizer support builtins. */
+ IX86_BUILTIN_CEILPD_VEC_PACK_SFIX512,
IX86_BUILTIN_CPYSGNPS,
IX86_BUILTIN_CPYSGNPD,
IX86_BUILTIN_CPYSGNPS256,
+ IX86_BUILTIN_CPYSGNPS512,
IX86_BUILTIN_CPYSGNPD256,
+ IX86_BUILTIN_CPYSGNPD512,
+ IX86_BUILTIN_FLOORPD_VEC_PACK_SFIX512,
+ IX86_BUILTIN_ROUNDPD_AZ_VEC_PACK_SFIX512,
/* FMA4 instructions. */
IX86_BUILTIN_VFMADDSS,
@@ -27302,13 +28097,13 @@ static const struct builtin_description bdesc_special_args[] =
{ OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_lfence, "__builtin_ia32_lfence", IX86_BUILTIN_LFENCE, UNKNOWN, (int) VOID_FTYPE_VOID },
{ OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_mfence, 0, IX86_BUILTIN_MFENCE, UNKNOWN, (int) VOID_FTYPE_VOID },
{ OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_storeupd, "__builtin_ia32_storeupd", IX86_BUILTIN_STOREUPD, UNKNOWN, (int) VOID_FTYPE_PDOUBLE_V2DF },
- { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_storedqu, "__builtin_ia32_storedqu", IX86_BUILTIN_STOREDQU, UNKNOWN, (int) VOID_FTYPE_PCHAR_V16QI },
+ { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_storedquv16qi, "__builtin_ia32_storedqu", IX86_BUILTIN_STOREDQU, UNKNOWN, (int) VOID_FTYPE_PCHAR_V16QI },
{ OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_movntv2df, "__builtin_ia32_movntpd", IX86_BUILTIN_MOVNTPD, UNKNOWN, (int) VOID_FTYPE_PDOUBLE_V2DF },
{ OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_movntv2di, "__builtin_ia32_movntdq", IX86_BUILTIN_MOVNTDQ, UNKNOWN, (int) VOID_FTYPE_PV2DI_V2DI },
{ OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_movntisi, "__builtin_ia32_movnti", IX86_BUILTIN_MOVNTI, UNKNOWN, (int) VOID_FTYPE_PINT_INT },
{ OPTION_MASK_ISA_SSE2 | OPTION_MASK_ISA_64BIT, CODE_FOR_sse2_movntidi, "__builtin_ia32_movnti64", IX86_BUILTIN_MOVNTI64, UNKNOWN, (int) VOID_FTYPE_PLONGLONG_LONGLONG },
{ OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_loadupd, "__builtin_ia32_loadupd", IX86_BUILTIN_LOADUPD, UNKNOWN, (int) V2DF_FTYPE_PCDOUBLE },
- { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_loaddqu, "__builtin_ia32_loaddqu", IX86_BUILTIN_LOADDQU, UNKNOWN, (int) V16QI_FTYPE_PCCHAR },
+ { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_loaddquv16qi, "__builtin_ia32_loaddqu", IX86_BUILTIN_LOADDQU, UNKNOWN, (int) V16QI_FTYPE_PCCHAR },
{ OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_loadhpd_exp, "__builtin_ia32_loadhpd", IX86_BUILTIN_LOADHPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_PCDOUBLE },
{ OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_loadlpd_exp, "__builtin_ia32_loadlpd", IX86_BUILTIN_LOADLPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_PCDOUBLE },
@@ -27337,8 +28132,8 @@ static const struct builtin_description bdesc_special_args[] =
{ OPTION_MASK_ISA_AVX, CODE_FOR_avx_loadups256, "__builtin_ia32_loadups256", IX86_BUILTIN_LOADUPS256, UNKNOWN, (int) V8SF_FTYPE_PCFLOAT },
{ OPTION_MASK_ISA_AVX, CODE_FOR_avx_storeupd256, "__builtin_ia32_storeupd256", IX86_BUILTIN_STOREUPD256, UNKNOWN, (int) VOID_FTYPE_PDOUBLE_V4DF },
{ OPTION_MASK_ISA_AVX, CODE_FOR_avx_storeups256, "__builtin_ia32_storeups256", IX86_BUILTIN_STOREUPS256, UNKNOWN, (int) VOID_FTYPE_PFLOAT_V8SF },
- { OPTION_MASK_ISA_AVX, CODE_FOR_avx_loaddqu256, "__builtin_ia32_loaddqu256", IX86_BUILTIN_LOADDQU256, UNKNOWN, (int) V32QI_FTYPE_PCCHAR },
- { OPTION_MASK_ISA_AVX, CODE_FOR_avx_storedqu256, "__builtin_ia32_storedqu256", IX86_BUILTIN_STOREDQU256, UNKNOWN, (int) VOID_FTYPE_PCHAR_V32QI },
+ { OPTION_MASK_ISA_AVX, CODE_FOR_avx_loaddquv32qi, "__builtin_ia32_loaddqu256", IX86_BUILTIN_LOADDQU256, UNKNOWN, (int) V32QI_FTYPE_PCCHAR },
+ { OPTION_MASK_ISA_AVX, CODE_FOR_avx_storedquv32qi, "__builtin_ia32_storedqu256", IX86_BUILTIN_STOREDQU256, UNKNOWN, (int) VOID_FTYPE_PCHAR_V32QI },
{ OPTION_MASK_ISA_AVX, CODE_FOR_avx_lddqu256, "__builtin_ia32_lddqu256", IX86_BUILTIN_LDDQU256, UNKNOWN, (int) V32QI_FTYPE_PCCHAR },
{ OPTION_MASK_ISA_AVX, CODE_FOR_avx_movntv4di, "__builtin_ia32_movntdq256", IX86_BUILTIN_MOVNTDQ256, UNKNOWN, (int) VOID_FTYPE_PV4DI_V4DI },
@@ -27365,6 +28160,43 @@ static const struct builtin_description bdesc_special_args[] =
{ OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_maskstored256, "__builtin_ia32_maskstored256", IX86_BUILTIN_MASKSTORED256, UNKNOWN, (int) VOID_FTYPE_PV8SI_V8SI_V8SI },
{ OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_maskstoreq256, "__builtin_ia32_maskstoreq256", IX86_BUILTIN_MASKSTOREQ256, UNKNOWN, (int) VOID_FTYPE_PV4DI_V4DI_V4DI },
+ /* AVX512F */
+ { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_compressstorev16sf_mask, "__builtin_ia32_compressstoresf512_mask", IX86_BUILTIN_COMPRESSPSSTORE512, UNKNOWN, (int) VOID_FTYPE_PV16SF_V16SF_HI },
+ { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_compressstorev16si_mask, "__builtin_ia32_compressstoresi512_mask", IX86_BUILTIN_PCOMPRESSDSTORE512, UNKNOWN, (int) VOID_FTYPE_PV16SI_V16SI_HI },
+ { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_compressstorev8df_mask, "__builtin_ia32_compressstoredf512_mask", IX86_BUILTIN_COMPRESSPDSTORE512, UNKNOWN, (int) VOID_FTYPE_PV8DF_V8DF_QI },
+ { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_compressstorev8di_mask, "__builtin_ia32_compressstoredi512_mask", IX86_BUILTIN_PCOMPRESSQSTORE512, UNKNOWN, (int) VOID_FTYPE_PV8DI_V8DI_QI },
+ { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_expandv16sf_mask, "__builtin_ia32_expandloadsf512_mask", IX86_BUILTIN_EXPANDPSLOAD512, UNKNOWN, (int) V16SF_FTYPE_PCV16SF_V16SF_HI },
+ { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_expandv16sf_maskz, "__builtin_ia32_expandloadsf512_maskz", IX86_BUILTIN_EXPANDPSLOAD512Z, UNKNOWN, (int) V16SF_FTYPE_PCV16SF_V16SF_HI },
+ { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_expandv16si_mask, "__builtin_ia32_expandloadsi512_mask", IX86_BUILTIN_PEXPANDDLOAD512, UNKNOWN, (int) V16SI_FTYPE_PCV16SI_V16SI_HI },
+ { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_expandv16si_maskz, "__builtin_ia32_expandloadsi512_maskz", IX86_BUILTIN_PEXPANDDLOAD512Z, UNKNOWN, (int) V16SI_FTYPE_PCV16SI_V16SI_HI },
+ { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_expandv8df_mask, "__builtin_ia32_expandloaddf512_mask", IX86_BUILTIN_EXPANDPDLOAD512, UNKNOWN, (int) V8DF_FTYPE_PCV8DF_V8DF_QI },
+ { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_expandv8df_maskz, "__builtin_ia32_expandloaddf512_maskz", IX86_BUILTIN_EXPANDPDLOAD512Z, UNKNOWN, (int) V8DF_FTYPE_PCV8DF_V8DF_QI },
+ { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_expandv8di_mask, "__builtin_ia32_expandloaddi512_mask", IX86_BUILTIN_PEXPANDQLOAD512, UNKNOWN, (int) V8DI_FTYPE_PCV8DI_V8DI_QI },
+ { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_expandv8di_maskz, "__builtin_ia32_expandloaddi512_maskz", IX86_BUILTIN_PEXPANDQLOAD512Z, UNKNOWN, (int) V8DI_FTYPE_PCV8DI_V8DI_QI },
+ { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_loaddquv16si_mask, "__builtin_ia32_loaddqusi512_mask", IX86_BUILTIN_LOADDQUSI512, UNKNOWN, (int) V16SI_FTYPE_PCV16SI_V16SI_HI },
+ { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_loaddquv8di_mask, "__builtin_ia32_loaddqudi512_mask", IX86_BUILTIN_LOADDQUDI512, UNKNOWN, (int) V8DI_FTYPE_PCV8DI_V8DI_QI },
+ { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_loadsv2df_mask, "__builtin_ia32_loadsd_mask", IX86_BUILTIN_LOADSD, UNKNOWN, (int) V2DF_FTYPE_PCDOUBLE_V2DF_QI },
+ { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_loadsv4sf_mask, "__builtin_ia32_loadss_mask", IX86_BUILTIN_LOADSS, UNKNOWN, (int) V4SF_FTYPE_PCFLOAT_V4SF_QI },
+ { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_loadupd512_mask, "__builtin_ia32_loadupd512_mask", IX86_BUILTIN_LOADUPD512, UNKNOWN, (int) V8DF_FTYPE_PCV8DF_V8DF_QI },
+ { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_loadups512_mask, "__builtin_ia32_loadups512_mask", IX86_BUILTIN_LOADUPS512, UNKNOWN, (int) V16SF_FTYPE_PCV16SF_V16SF_HI },
+ { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_loadv16sf_mask, "__builtin_ia32_loadaps512_mask", IX86_BUILTIN_LOADAPS512, UNKNOWN, (int) V16SF_FTYPE_PCV16SF_V16SF_HI },
+ { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_loadv16si_mask, "__builtin_ia32_movdqa32load512_mask", IX86_BUILTIN_MOVDQA32LOAD512, UNKNOWN, (int) V16SI_FTYPE_PCV16SI_V16SI_HI },
+ { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_loadv8df_mask, "__builtin_ia32_loadapd512_mask", IX86_BUILTIN_LOADAPD512, UNKNOWN, (int) V8DF_FTYPE_PCV8DF_V8DF_QI },
+ { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_loadv8di_mask, "__builtin_ia32_movdqa64load512_mask", IX86_BUILTIN_MOVDQA64LOAD512, UNKNOWN, (int) V8DI_FTYPE_PCV8DI_V8DI_QI },
+ { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_movntv16sf, "__builtin_ia32_movntps512", IX86_BUILTIN_MOVNTPS512, UNKNOWN, (int) VOID_FTYPE_PFLOAT_V16SF },
+ { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_movntv8df, "__builtin_ia32_movntpd512", IX86_BUILTIN_MOVNTPD512, UNKNOWN, (int) VOID_FTYPE_PDOUBLE_V8DF },
+ { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_movntv8di, "__builtin_ia32_movntdq512", IX86_BUILTIN_MOVNTDQ512, UNKNOWN, (int) VOID_FTYPE_PV8DI_V8DI },
+ { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_storedquv16si_mask, "__builtin_ia32_storedqusi512_mask", IX86_BUILTIN_STOREDQUSI512, UNKNOWN, (int) VOID_FTYPE_PV16SI_V16SI_HI },
+ { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_storedquv8di_mask, "__builtin_ia32_storedqudi512_mask", IX86_BUILTIN_STOREDQUDI512, UNKNOWN, (int) VOID_FTYPE_PV8DI_V8DI_QI },
+ { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_storesv2df_mask, "__builtin_ia32_storesd_mask", IX86_BUILTIN_STORESD, UNKNOWN, (int) VOID_FTYPE_PDOUBLE_V2DF_QI },
+ { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_storesv4sf_mask, "__builtin_ia32_storess_mask", IX86_BUILTIN_STORESS, UNKNOWN, (int) VOID_FTYPE_PFLOAT_V4SF_QI },
+ { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_storeupd512_mask, "__builtin_ia32_storeupd512_mask", IX86_BUILTIN_STOREUPD512, UNKNOWN, (int) VOID_FTYPE_PV8DF_V8DF_QI },
+ { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_storeups512_mask, "__builtin_ia32_storeups512_mask", IX86_BUILTIN_STOREUPS512, UNKNOWN, (int) VOID_FTYPE_PV16SF_V16SF_HI },
+ { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_storev16sf_mask, "__builtin_ia32_storeaps512_mask", IX86_BUILTIN_STOREAPS512, UNKNOWN, (int) VOID_FTYPE_PV16SF_V16SF_HI },
+ { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_storev16si_mask, "__builtin_ia32_movdqa32store512_mask", IX86_BUILTIN_MOVDQA32STORE512, UNKNOWN, (int) VOID_FTYPE_PV16SI_V16SI_HI },
+ { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_storev8df_mask, "__builtin_ia32_storeapd512_mask", IX86_BUILTIN_STOREAPD512, UNKNOWN, (int) VOID_FTYPE_PV8DF_V8DF_QI },
+ { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_storev8di_mask, "__builtin_ia32_movdqa64store512_mask", IX86_BUILTIN_MOVDQA64STORE512, UNKNOWN, (int) VOID_FTYPE_PV8DI_V8DI_QI },
+
{ OPTION_MASK_ISA_LWP, CODE_FOR_lwp_llwpcb, "__builtin_ia32_llwpcb", IX86_BUILTIN_LLWPCB, UNKNOWN, (int) VOID_FTYPE_PVOID },
{ OPTION_MASK_ISA_LWP, CODE_FOR_lwp_slwpcb, "__builtin_ia32_slwpcb", IX86_BUILTIN_SLWPCB, UNKNOWN, (int) PVOID_FTYPE_VOID },
{ OPTION_MASK_ISA_LWP, CODE_FOR_lwp_lwpvalsi3, "__builtin_ia32_lwpval32", IX86_BUILTIN_LWPVAL32, UNKNOWN, (int) VOID_FTYPE_UINT_UINT_UINT },
@@ -27607,7 +28439,7 @@ static const struct builtin_description bdesc_args[] =
{ OPTION_MASK_ISA_SSE2 | OPTION_MASK_ISA_64BIT, CODE_FOR_sse2_cvtsd2siq, "__builtin_ia32_cvtsd2si64", IX86_BUILTIN_CVTSD2SI64, UNKNOWN, (int) INT64_FTYPE_V2DF },
{ OPTION_MASK_ISA_SSE2 | OPTION_MASK_ISA_64BIT, CODE_FOR_sse2_cvttsd2siq, "__builtin_ia32_cvttsd2si64", IX86_BUILTIN_CVTTSD2SI64, UNKNOWN, (int) INT64_FTYPE_V2DF },
- { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_cvtps2dq, "__builtin_ia32_cvtps2dq", IX86_BUILTIN_CVTPS2DQ, UNKNOWN, (int) V4SI_FTYPE_V4SF },
+ { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_fix_notruncv4sfv4si, "__builtin_ia32_cvtps2dq", IX86_BUILTIN_CVTPS2DQ, UNKNOWN, (int) V4SI_FTYPE_V4SF },
{ OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_cvtps2pd, "__builtin_ia32_cvtps2pd", IX86_BUILTIN_CVTPS2PD, UNKNOWN, (int) V2DF_FTYPE_V4SF },
{ OPTION_MASK_ISA_SSE2, CODE_FOR_fix_truncv4sfv4si2, "__builtin_ia32_cvttps2dq", IX86_BUILTIN_CVTTPS2DQ, UNKNOWN, (int) V4SI_FTYPE_V4SF },
@@ -27955,7 +28787,7 @@ static const struct builtin_description bdesc_args[] =
{ OPTION_MASK_ISA_AVX, CODE_FOR_floatv4siv4df2, "__builtin_ia32_cvtdq2pd256", IX86_BUILTIN_CVTDQ2PD256, UNKNOWN, (int) V4DF_FTYPE_V4SI },
{ OPTION_MASK_ISA_AVX, CODE_FOR_floatv8siv8sf2, "__builtin_ia32_cvtdq2ps256", IX86_BUILTIN_CVTDQ2PS256, UNKNOWN, (int) V8SF_FTYPE_V8SI },
{ OPTION_MASK_ISA_AVX, CODE_FOR_avx_cvtpd2ps256, "__builtin_ia32_cvtpd2ps256", IX86_BUILTIN_CVTPD2PS256, UNKNOWN, (int) V4SF_FTYPE_V4DF },
- { OPTION_MASK_ISA_AVX, CODE_FOR_avx_cvtps2dq256, "__builtin_ia32_cvtps2dq256", IX86_BUILTIN_CVTPS2DQ256, UNKNOWN, (int) V8SI_FTYPE_V8SF },
+ { OPTION_MASK_ISA_AVX, CODE_FOR_avx_fix_notruncv8sfv8si, "__builtin_ia32_cvtps2dq256", IX86_BUILTIN_CVTPS2DQ256, UNKNOWN, (int) V8SI_FTYPE_V8SF },
{ OPTION_MASK_ISA_AVX, CODE_FOR_avx_cvtps2pd256, "__builtin_ia32_cvtps2pd256", IX86_BUILTIN_CVTPS2PD256, UNKNOWN, (int) V4DF_FTYPE_V4SF },
{ OPTION_MASK_ISA_AVX, CODE_FOR_fix_truncv4dfv4si2, "__builtin_ia32_cvttpd2dq256", IX86_BUILTIN_CVTTPD2DQ256, UNKNOWN, (int) V4SI_FTYPE_V4DF },
{ OPTION_MASK_ISA_AVX, CODE_FOR_avx_cvtpd2dq256, "__builtin_ia32_cvtpd2dq256", IX86_BUILTIN_CVTPD2DQ256, UNKNOWN, (int) V4SI_FTYPE_V4DF },
@@ -28216,6 +29048,368 @@ static const struct builtin_description bdesc_args[] =
{ OPTION_MASK_ISA_BMI2, CODE_FOR_bmi2_pdep_di3, "__builtin_ia32_pdep_di", IX86_BUILTIN_PDEP64, UNKNOWN, (int) UINT64_FTYPE_UINT64_UINT64 },
{ OPTION_MASK_ISA_BMI2, CODE_FOR_bmi2_pext_si3, "__builtin_ia32_pext_si", IX86_BUILTIN_PEXT32, UNKNOWN, (int) UINT_FTYPE_UINT_UINT },
{ OPTION_MASK_ISA_BMI2, CODE_FOR_bmi2_pext_di3, "__builtin_ia32_pext_di", IX86_BUILTIN_PEXT64, UNKNOWN, (int) UINT64_FTYPE_UINT64_UINT64 },
+
+ /* AVX512F */
+ { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_alignv16si_mask, "__builtin_ia32_alignd512_mask", IX86_BUILTIN_ALIGND512, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_INT_V16SI_HI },
+ { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_alignv8di_mask, "__builtin_ia32_alignq512_mask", IX86_BUILTIN_ALIGNQ512, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_INT_V8DI_QI },
+ { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_blendmv16si, "__builtin_ia32_blendmd_512_mask", IX86_BUILTIN_BLENDMD512, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_HI },
+ { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_blendmv8df, "__builtin_ia32_blendmpd_512_mask", IX86_BUILTIN_BLENDMPD512, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_QI },
+ { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_blendmv16sf, "__builtin_ia32_blendmps_512_mask", IX86_BUILTIN_BLENDMPS512, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_HI },
+ { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_blendmv8di, "__builtin_ia32_blendmq_512_mask", IX86_BUILTIN_BLENDMQ512, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_QI },
+ { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_broadcastv16sf_mask, "__builtin_ia32_broadcastf32x4_512", IX86_BUILTIN_BROADCASTF32X4_512, UNKNOWN, (int) V16SF_FTYPE_V4SF_V16SF_HI },
+ { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_broadcastv8df_mask, "__builtin_ia32_broadcastf64x4_512", IX86_BUILTIN_BROADCASTF64X4_512, UNKNOWN, (int) V8DF_FTYPE_V4DF_V8DF_QI },
+ { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_broadcastv16si_mask, "__builtin_ia32_broadcasti32x4_512", IX86_BUILTIN_BROADCASTI32X4_512, UNKNOWN, (int) V16SI_FTYPE_V4SI_V16SI_HI },
+ { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_broadcastv8di_mask, "__builtin_ia32_broadcasti64x4_512", IX86_BUILTIN_BROADCASTI64X4_512, UNKNOWN, (int) V8DI_FTYPE_V4DI_V8DI_QI },
+ { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vec_dupv8df_mask, "__builtin_ia32_broadcastsd512", IX86_BUILTIN_BROADCASTSD512, UNKNOWN, (int) V8DF_FTYPE_V2DF_V8DF_QI },
+ { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vec_dupv16sf_mask, "__builtin_ia32_broadcastss512", IX86_BUILTIN_BROADCASTSS512, UNKNOWN, (int) V16SF_FTYPE_V4SF_V16SF_HI },
+ { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_cmpv16si3_mask, "__builtin_ia32_cmpd512_mask", IX86_BUILTIN_CMPD512, UNKNOWN, (int) HI_FTYPE_V16SI_V16SI_INT_HI },
+ { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_cmpv8di3_mask, "__builtin_ia32_cmpq512_mask", IX86_BUILTIN_CMPQ512, UNKNOWN, (int) QI_FTYPE_V8DI_V8DI_INT_QI },
+ { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_compressv8df_mask, "__builtin_ia32_compressdf512_mask", IX86_BUILTIN_COMPRESSPD512, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_QI },
+ { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_compressv16sf_mask, "__builtin_ia32_compresssf512_mask", IX86_BUILTIN_COMPRESSPS512, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_HI },
+ { OPTION_MASK_ISA_AVX512F, CODE_FOR_floatv8siv8df2_mask, "__builtin_ia32_cvtdq2pd512_mask", IX86_BUILTIN_CVTDQ2PD512, UNKNOWN, (int) V8DF_FTYPE_V8SI_V8DF_QI },
+ { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vcvtps2ph512_mask, "__builtin_ia32_vcvtps2ph512_mask", IX86_BUILTIN_CVTPS2PH512, UNKNOWN, (int) V16HI_FTYPE_V16SF_INT_V16HI_HI },
+ { OPTION_MASK_ISA_AVX512F, CODE_FOR_ufloatv8siv8df_mask, "__builtin_ia32_cvtudq2pd512_mask", IX86_BUILTIN_CVTUDQ2PD512, UNKNOWN, (int) V8DF_FTYPE_V8SI_V8DF_QI },
+ { OPTION_MASK_ISA_AVX512F, CODE_FOR_cvtusi2sd32, "__builtin_ia32_cvtusi2sd32", IX86_BUILTIN_CVTUSI2SD32, UNKNOWN, (int) V2DF_FTYPE_V2DF_UINT },
+ { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_expandv8df_mask, "__builtin_ia32_expanddf512_mask", IX86_BUILTIN_EXPANDPD512, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_QI },
+ { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_expandv8df_maskz, "__builtin_ia32_expanddf512_maskz", IX86_BUILTIN_EXPANDPD512Z, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_QI },
+ { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_expandv16sf_mask, "__builtin_ia32_expandsf512_mask", IX86_BUILTIN_EXPANDPS512, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_HI },
+ { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_expandv16sf_maskz, "__builtin_ia32_expandsf512_maskz", IX86_BUILTIN_EXPANDPS512Z, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_HI },
+ { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vextractf32x4_mask, "__builtin_ia32_extractf32x4_mask", IX86_BUILTIN_EXTRACTF32X4, UNKNOWN, (int) V4SF_FTYPE_V16SF_INT_V4SF_QI },
+ { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vextractf64x4_mask, "__builtin_ia32_extractf64x4_mask", IX86_BUILTIN_EXTRACTF64X4, UNKNOWN, (int) V4DF_FTYPE_V8DF_INT_V4DF_QI },
+ { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vextracti32x4_mask, "__builtin_ia32_extracti32x4_mask", IX86_BUILTIN_EXTRACTI32X4, UNKNOWN, (int) V4SI_FTYPE_V16SI_INT_V4SI_QI },
+ { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vextracti64x4_mask, "__builtin_ia32_extracti64x4_mask", IX86_BUILTIN_EXTRACTI64X4, UNKNOWN, (int) V4DI_FTYPE_V8DI_INT_V4DI_QI },
+ { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vinsertf32x4_mask, "__builtin_ia32_insertf32x4_mask", IX86_BUILTIN_INSERTF32X4, UNKNOWN, (int) V16SF_FTYPE_V16SF_V4SF_INT_V16SF_HI },
+ { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vinsertf64x4_mask, "__builtin_ia32_insertf64x4_mask", IX86_BUILTIN_INSERTF64X4, UNKNOWN, (int) V8DF_FTYPE_V8DF_V4DF_INT_V8DF_QI },
+ { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vinserti32x4_mask, "__builtin_ia32_inserti32x4_mask", IX86_BUILTIN_INSERTI32X4, UNKNOWN, (int) V16SI_FTYPE_V16SI_V4SI_INT_V16SI_HI },
+ { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vinserti64x4_mask, "__builtin_ia32_inserti64x4_mask", IX86_BUILTIN_INSERTI64X4, UNKNOWN, (int) V8DI_FTYPE_V8DI_V4DI_INT_V8DI_QI },
+ { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_loadv8df_mask, "__builtin_ia32_movapd512_mask", IX86_BUILTIN_MOVAPD512, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_QI },
+ { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_loadv16sf_mask, "__builtin_ia32_movaps512_mask", IX86_BUILTIN_MOVAPS512, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_HI },
+ { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_movddup512_mask, "__builtin_ia32_movddup512_mask", IX86_BUILTIN_MOVDDUP512, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_QI },
+ { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_loadv16si_mask, "__builtin_ia32_movdqa32_512_mask", IX86_BUILTIN_MOVDQA32_512, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_HI },
+ { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_loadv8di_mask, "__builtin_ia32_movdqa64_512_mask", IX86_BUILTIN_MOVDQA64_512, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_QI },
+ { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_movesv2df_mask, "__builtin_ia32_movesd_mask", IX86_BUILTIN_MOVESD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_V2DF_QI },
+ { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_movesv4sf_mask, "__builtin_ia32_movess_mask", IX86_BUILTIN_MOVESS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_V4SF_QI },
+ { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_movshdup512_mask, "__builtin_ia32_movshdup512_mask", IX86_BUILTIN_MOVSHDUP512, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_HI },
+ { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_movsldup512_mask, "__builtin_ia32_movsldup512_mask", IX86_BUILTIN_MOVSLDUP512, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_HI },
+ { OPTION_MASK_ISA_AVX512F, CODE_FOR_absv16si2_mask, "__builtin_ia32_pabsd512_mask", IX86_BUILTIN_PABSD512, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_HI },
+ { OPTION_MASK_ISA_AVX512F, CODE_FOR_absv8di2_mask, "__builtin_ia32_pabsq512_mask", IX86_BUILTIN_PABSQ512, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_QI },
+ { OPTION_MASK_ISA_AVX512F, CODE_FOR_addv16si3_mask, "__builtin_ia32_paddd512_mask", IX86_BUILTIN_PADDD512, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_V16SI_HI },
+ { OPTION_MASK_ISA_AVX512F, CODE_FOR_addv8di3_mask, "__builtin_ia32_paddq512_mask", IX86_BUILTIN_PADDQ512, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_V8DI_QI },
+ { OPTION_MASK_ISA_AVX512F, CODE_FOR_andv16si3_mask, "__builtin_ia32_pandd512_mask", IX86_BUILTIN_PANDD512, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_V16SI_HI },
+ { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_andnotv16si3_mask, "__builtin_ia32_pandnd512_mask", IX86_BUILTIN_PANDND512, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_V16SI_HI },
+ { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_andnotv8di3_mask, "__builtin_ia32_pandnq512_mask", IX86_BUILTIN_PANDNQ512, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_V8DI_QI },
+ { OPTION_MASK_ISA_AVX512F, CODE_FOR_andv8di3_mask, "__builtin_ia32_pandq512_mask", IX86_BUILTIN_PANDQ512, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_V8DI_QI },
+ { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vec_dupv16si_mask, "__builtin_ia32_pbroadcastd512", IX86_BUILTIN_PBROADCASTD512, UNKNOWN, (int) V16SI_FTYPE_V4SI_V16SI_HI },
+ { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vec_dup_gprv16si_mask, "__builtin_ia32_pbroadcastd512_gpr_mask", IX86_BUILTIN_PBROADCASTD512_GPR, UNKNOWN, (int) V16SI_FTYPE_SI_V16SI_HI },
+ { OPTION_MASK_ISA_AVX512CD, CODE_FOR_avx512cd_maskb_vec_dupv8di, "__builtin_ia32_broadcastmb512", IX86_BUILTIN_PBROADCASTMB512, UNKNOWN, (int) V8DI_FTYPE_QI },
+ { OPTION_MASK_ISA_AVX512CD, CODE_FOR_avx512cd_maskw_vec_dupv16si, "__builtin_ia32_broadcastmw512", IX86_BUILTIN_PBROADCASTMW512, UNKNOWN, (int) V16SI_FTYPE_HI },
+ { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vec_dupv8di_mask, "__builtin_ia32_pbroadcastq512", IX86_BUILTIN_PBROADCASTQ512, UNKNOWN, (int) V8DI_FTYPE_V2DI_V8DI_QI },
+ { OPTION_MASK_ISA_AVX512F | OPTION_MASK_ISA_64BIT, CODE_FOR_avx512f_vec_dup_gprv8di_mask, "__builtin_ia32_pbroadcastq512_gpr_mask", IX86_BUILTIN_PBROADCASTQ512_GPR, UNKNOWN, (int) V8DI_FTYPE_DI_V8DI_QI },
+ { OPTION_MASK_ISA_AVX512F & ~OPTION_MASK_ISA_64BIT, CODE_FOR_avx512f_vec_dup_memv8di_mask, "__builtin_ia32_pbroadcastq512_mem_mask", IX86_BUILTIN_PBROADCASTQ512_MEM, UNKNOWN, (int) V8DI_FTYPE_DI_V8DI_QI },
+ { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_eqv16si3_mask, "__builtin_ia32_pcmpeqd512_mask", IX86_BUILTIN_PCMPEQD512_MASK, UNKNOWN, (int) HI_FTYPE_V16SI_V16SI_HI },
+ { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_eqv8di3_mask, "__builtin_ia32_pcmpeqq512_mask", IX86_BUILTIN_PCMPEQQ512_MASK, UNKNOWN, (int) QI_FTYPE_V8DI_V8DI_QI },
+ { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_gtv16si3_mask, "__builtin_ia32_pcmpgtd512_mask", IX86_BUILTIN_PCMPGTD512_MASK, UNKNOWN, (int) HI_FTYPE_V16SI_V16SI_HI },
+ { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_gtv8di3_mask, "__builtin_ia32_pcmpgtq512_mask", IX86_BUILTIN_PCMPGTQ512_MASK, UNKNOWN, (int) QI_FTYPE_V8DI_V8DI_QI },
+ { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_compressv16si_mask, "__builtin_ia32_compresssi512_mask", IX86_BUILTIN_PCOMPRESSD512, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_HI },
+ { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_compressv8di_mask, "__builtin_ia32_compressdi512_mask", IX86_BUILTIN_PCOMPRESSQ512, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_QI },
+ { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_expandv16si_mask, "__builtin_ia32_expandsi512_mask", IX86_BUILTIN_PEXPANDD512, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_HI },
+ { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_expandv16si_maskz, "__builtin_ia32_expandsi512_maskz", IX86_BUILTIN_PEXPANDD512Z, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_HI },
+ { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_expandv8di_mask, "__builtin_ia32_expanddi512_mask", IX86_BUILTIN_PEXPANDQ512, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_QI },
+ { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_expandv8di_maskz, "__builtin_ia32_expanddi512_maskz", IX86_BUILTIN_PEXPANDQ512Z, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_QI },
+ { OPTION_MASK_ISA_AVX512F, CODE_FOR_smaxv16si3_mask, "__builtin_ia32_pmaxsd512_mask", IX86_BUILTIN_PMAXSD512, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_V16SI_HI },
+ { OPTION_MASK_ISA_AVX512F, CODE_FOR_smaxv8di3_mask, "__builtin_ia32_pmaxsq512_mask", IX86_BUILTIN_PMAXSQ512, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_V8DI_QI },
+ { OPTION_MASK_ISA_AVX512F, CODE_FOR_umaxv16si3_mask, "__builtin_ia32_pmaxud512_mask", IX86_BUILTIN_PMAXUD512, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_V16SI_HI },
+ { OPTION_MASK_ISA_AVX512F, CODE_FOR_umaxv8di3_mask, "__builtin_ia32_pmaxuq512_mask", IX86_BUILTIN_PMAXUQ512, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_V8DI_QI },
+ { OPTION_MASK_ISA_AVX512F, CODE_FOR_sminv16si3_mask, "__builtin_ia32_pminsd512_mask", IX86_BUILTIN_PMINSD512, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_V16SI_HI },
+ { OPTION_MASK_ISA_AVX512F, CODE_FOR_sminv8di3_mask, "__builtin_ia32_pminsq512_mask", IX86_BUILTIN_PMINSQ512, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_V8DI_QI },
+ { OPTION_MASK_ISA_AVX512F, CODE_FOR_uminv16si3_mask, "__builtin_ia32_pminud512_mask", IX86_BUILTIN_PMINUD512, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_V16SI_HI },
+ { OPTION_MASK_ISA_AVX512F, CODE_FOR_uminv8di3_mask, "__builtin_ia32_pminuq512_mask", IX86_BUILTIN_PMINUQ512, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_V8DI_QI },
+ { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_truncatev16siv16qi2_mask, "__builtin_ia32_pmovdb512_mask", IX86_BUILTIN_PMOVDB512, UNKNOWN, (int) V16QI_FTYPE_V16SI_V16QI_HI },
+ { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_truncatev16siv16hi2_mask, "__builtin_ia32_pmovdw512_mask", IX86_BUILTIN_PMOVDW512, UNKNOWN, (int) V16HI_FTYPE_V16SI_V16HI_HI },
+ { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_truncatev8div16qi2_mask, "__builtin_ia32_pmovqb512_mask", IX86_BUILTIN_PMOVQB512, UNKNOWN, (int) V16QI_FTYPE_V8DI_V16QI_QI },
+ { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_truncatev8div8si2_mask, "__builtin_ia32_pmovqd512_mask", IX86_BUILTIN_PMOVQD512, UNKNOWN, (int) V8SI_FTYPE_V8DI_V8SI_QI },
+ { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_truncatev8div8hi2_mask, "__builtin_ia32_pmovqw512_mask", IX86_BUILTIN_PMOVQW512, UNKNOWN, (int) V8HI_FTYPE_V8DI_V8HI_QI },
+ { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_ss_truncatev16siv16qi2_mask, "__builtin_ia32_pmovsdb512_mask", IX86_BUILTIN_PMOVSDB512, UNKNOWN, (int) V16QI_FTYPE_V16SI_V16QI_HI },
+ { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_ss_truncatev16siv16hi2_mask, "__builtin_ia32_pmovsdw512_mask", IX86_BUILTIN_PMOVSDW512, UNKNOWN, (int) V16HI_FTYPE_V16SI_V16HI_HI },
+ { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_ss_truncatev8div16qi2_mask, "__builtin_ia32_pmovsqb512_mask", IX86_BUILTIN_PMOVSQB512, UNKNOWN, (int) V16QI_FTYPE_V8DI_V16QI_QI },
+ { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_ss_truncatev8div8si2_mask, "__builtin_ia32_pmovsqd512_mask", IX86_BUILTIN_PMOVSQD512, UNKNOWN, (int) V8SI_FTYPE_V8DI_V8SI_QI },
+ { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_ss_truncatev8div8hi2_mask, "__builtin_ia32_pmovsqw512_mask", IX86_BUILTIN_PMOVSQW512, UNKNOWN, (int) V8HI_FTYPE_V8DI_V8HI_QI },
+ { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_sign_extendv16qiv16si2_mask, "__builtin_ia32_pmovsxbd512_mask", IX86_BUILTIN_PMOVSXBD512, UNKNOWN, (int) V16SI_FTYPE_V16QI_V16SI_HI },
+ { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_sign_extendv8qiv8di2_mask, "__builtin_ia32_pmovsxbq512_mask", IX86_BUILTIN_PMOVSXBQ512, UNKNOWN, (int) V8DI_FTYPE_V16QI_V8DI_QI },
+ { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_sign_extendv8siv8di2_mask, "__builtin_ia32_pmovsxdq512_mask", IX86_BUILTIN_PMOVSXDQ512, UNKNOWN, (int) V8DI_FTYPE_V8SI_V8DI_QI },
+ { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_sign_extendv16hiv16si2_mask, "__builtin_ia32_pmovsxwd512_mask", IX86_BUILTIN_PMOVSXWD512, UNKNOWN, (int) V16SI_FTYPE_V16HI_V16SI_HI },
+ { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_sign_extendv8hiv8di2_mask, "__builtin_ia32_pmovsxwq512_mask", IX86_BUILTIN_PMOVSXWQ512, UNKNOWN, (int) V8DI_FTYPE_V8HI_V8DI_QI },
+ { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_us_truncatev16siv16qi2_mask, "__builtin_ia32_pmovusdb512_mask", IX86_BUILTIN_PMOVUSDB512, UNKNOWN, (int) V16QI_FTYPE_V16SI_V16QI_HI },
+ { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_us_truncatev16siv16hi2_mask, "__builtin_ia32_pmovusdw512_mask", IX86_BUILTIN_PMOVUSDW512, UNKNOWN, (int) V16HI_FTYPE_V16SI_V16HI_HI },
+ { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_us_truncatev8div16qi2_mask, "__builtin_ia32_pmovusqb512_mask", IX86_BUILTIN_PMOVUSQB512, UNKNOWN, (int) V16QI_FTYPE_V8DI_V16QI_QI },
+ { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_us_truncatev8div8si2_mask, "__builtin_ia32_pmovusqd512_mask", IX86_BUILTIN_PMOVUSQD512, UNKNOWN, (int) V8SI_FTYPE_V8DI_V8SI_QI },
+ { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_us_truncatev8div8hi2_mask, "__builtin_ia32_pmovusqw512_mask", IX86_BUILTIN_PMOVUSQW512, UNKNOWN, (int) V8HI_FTYPE_V8DI_V8HI_QI },
+ { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_zero_extendv16qiv16si2_mask, "__builtin_ia32_pmovzxbd512_mask", IX86_BUILTIN_PMOVZXBD512, UNKNOWN, (int) V16SI_FTYPE_V16QI_V16SI_HI },
+ { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_zero_extendv8qiv8di2_mask, "__builtin_ia32_pmovzxbq512_mask", IX86_BUILTIN_PMOVZXBQ512, UNKNOWN, (int) V8DI_FTYPE_V16QI_V8DI_QI },
+ { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_zero_extendv8siv8di2_mask, "__builtin_ia32_pmovzxdq512_mask", IX86_BUILTIN_PMOVZXDQ512, UNKNOWN, (int) V8DI_FTYPE_V8SI_V8DI_QI },
+ { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_zero_extendv16hiv16si2_mask, "__builtin_ia32_pmovzxwd512_mask", IX86_BUILTIN_PMOVZXWD512, UNKNOWN, (int) V16SI_FTYPE_V16HI_V16SI_HI },
+ { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_zero_extendv8hiv8di2_mask, "__builtin_ia32_pmovzxwq512_mask", IX86_BUILTIN_PMOVZXWQ512, UNKNOWN, (int) V8DI_FTYPE_V8HI_V8DI_QI },
+ { OPTION_MASK_ISA_AVX512F, CODE_FOR_vec_widen_smult_even_v16si_mask, "__builtin_ia32_pmuldq512_mask", IX86_BUILTIN_PMULDQ512, UNKNOWN, (int) V8DI_FTYPE_V16SI_V16SI_V8DI_QI },
+ { OPTION_MASK_ISA_AVX512F, CODE_FOR_mulv16si3_mask, "__builtin_ia32_pmulld512_mask" , IX86_BUILTIN_PMULLD512, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_V16SI_HI },
+ { OPTION_MASK_ISA_AVX512F, CODE_FOR_vec_widen_umult_even_v16si_mask, "__builtin_ia32_pmuludq512_mask", IX86_BUILTIN_PMULUDQ512, UNKNOWN, (int) V8DI_FTYPE_V16SI_V16SI_V8DI_QI },
+ { OPTION_MASK_ISA_AVX512F, CODE_FOR_iorv16si3_mask, "__builtin_ia32_pord512_mask", IX86_BUILTIN_PORD512, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_V16SI_HI },
+ { OPTION_MASK_ISA_AVX512F, CODE_FOR_iorv8di3_mask, "__builtin_ia32_porq512_mask", IX86_BUILTIN_PORQ512, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_V8DI_QI },
+ { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_rolv16si_mask, "__builtin_ia32_prold512_mask", IX86_BUILTIN_PROLD512, UNKNOWN, (int) V16SI_FTYPE_V16SI_INT_V16SI_HI },
+ { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_rolv8di_mask, "__builtin_ia32_prolq512_mask", IX86_BUILTIN_PROLQ512, UNKNOWN, (int) V8DI_FTYPE_V8DI_INT_V8DI_QI },
+ { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_rolvv16si_mask, "__builtin_ia32_prolvd512_mask", IX86_BUILTIN_PROLVD512, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_V16SI_HI },
+ { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_rolvv8di_mask, "__builtin_ia32_prolvq512_mask", IX86_BUILTIN_PROLVQ512, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_V8DI_QI },
+ { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_rorv16si_mask, "__builtin_ia32_prord512_mask", IX86_BUILTIN_PRORD512, UNKNOWN, (int) V16SI_FTYPE_V16SI_INT_V16SI_HI },
+ { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_rorv8di_mask, "__builtin_ia32_prorq512_mask", IX86_BUILTIN_PRORQ512, UNKNOWN, (int) V8DI_FTYPE_V8DI_INT_V8DI_QI },
+ { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_rorvv16si_mask, "__builtin_ia32_prorvd512_mask", IX86_BUILTIN_PRORVD512, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_V16SI_HI },
+ { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_rorvv8di_mask, "__builtin_ia32_prorvq512_mask", IX86_BUILTIN_PRORVQ512, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_V8DI_QI },
+ { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_pshufdv3_mask, "__builtin_ia32_pshufd512_mask", IX86_BUILTIN_PSHUFD512, UNKNOWN, (int) V16SI_FTYPE_V16SI_INT_V16SI_HI },
+ { OPTION_MASK_ISA_AVX512F, CODE_FOR_ashlv16si3_mask, "__builtin_ia32_pslld512_mask", IX86_BUILTIN_PSLLD512, UNKNOWN, (int) V16SI_FTYPE_V16SI_V4SI_V16SI_HI },
+ { OPTION_MASK_ISA_AVX512F, CODE_FOR_ashlv16si3_mask, "__builtin_ia32_pslldi512_mask", IX86_BUILTIN_PSLLDI512, UNKNOWN, (int) V16SI_FTYPE_V16SI_INT_V16SI_HI },
+ { OPTION_MASK_ISA_AVX512F, CODE_FOR_ashlv8di3_mask, "__builtin_ia32_psllq512_mask", IX86_BUILTIN_PSLLQ512, UNKNOWN, (int) V8DI_FTYPE_V8DI_V2DI_V8DI_QI },
+ { OPTION_MASK_ISA_AVX512F, CODE_FOR_ashlv8di3_mask, "__builtin_ia32_psllqi512_mask", IX86_BUILTIN_PSLLQI512, UNKNOWN, (int) V8DI_FTYPE_V8DI_INT_V8DI_QI },
+ { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_ashlvv16si_mask, "__builtin_ia32_psllv16si_mask", IX86_BUILTIN_PSLLVV16SI, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_V16SI_HI },
+ { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_ashlvv8di_mask, "__builtin_ia32_psllv8di_mask", IX86_BUILTIN_PSLLVV8DI, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_V8DI_QI },
+ { OPTION_MASK_ISA_AVX512F, CODE_FOR_ashrv16si3_mask, "__builtin_ia32_psrad512_mask", IX86_BUILTIN_PSRAD512, UNKNOWN, (int) V16SI_FTYPE_V16SI_V4SI_V16SI_HI },
+ { OPTION_MASK_ISA_AVX512F, CODE_FOR_ashrv16si3_mask, "__builtin_ia32_psradi512_mask", IX86_BUILTIN_PSRADI512, UNKNOWN, (int) V16SI_FTYPE_V16SI_INT_V16SI_HI },
+ { OPTION_MASK_ISA_AVX512F, CODE_FOR_ashrv8di3_mask, "__builtin_ia32_psraq512_mask", IX86_BUILTIN_PSRAQ512, UNKNOWN, (int) V8DI_FTYPE_V8DI_V2DI_V8DI_QI },
+ { OPTION_MASK_ISA_AVX512F, CODE_FOR_ashrv8di3_mask, "__builtin_ia32_psraqi512_mask", IX86_BUILTIN_PSRAQI512, UNKNOWN, (int) V8DI_FTYPE_V8DI_INT_V8DI_QI },
+ { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_ashrvv16si_mask, "__builtin_ia32_psrav16si_mask", IX86_BUILTIN_PSRAVV16SI, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_V16SI_HI },
+ { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_ashrvv8di_mask, "__builtin_ia32_psrav8di_mask", IX86_BUILTIN_PSRAVV8DI, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_V8DI_QI },
+ { OPTION_MASK_ISA_AVX512F, CODE_FOR_lshrv16si3_mask, "__builtin_ia32_psrld512_mask", IX86_BUILTIN_PSRLD512, UNKNOWN, (int) V16SI_FTYPE_V16SI_V4SI_V16SI_HI },
+ { OPTION_MASK_ISA_AVX512F, CODE_FOR_lshrv16si3_mask, "__builtin_ia32_psrldi512_mask", IX86_BUILTIN_PSRLDI512, UNKNOWN, (int) V16SI_FTYPE_V16SI_INT_V16SI_HI },
+ { OPTION_MASK_ISA_AVX512F, CODE_FOR_lshrv8di3_mask, "__builtin_ia32_psrlq512_mask", IX86_BUILTIN_PSRLQ512, UNKNOWN, (int) V8DI_FTYPE_V8DI_V2DI_V8DI_QI },
+ { OPTION_MASK_ISA_AVX512F, CODE_FOR_lshrv8di3_mask, "__builtin_ia32_psrlqi512_mask", IX86_BUILTIN_PSRLQI512, UNKNOWN, (int) V8DI_FTYPE_V8DI_INT_V8DI_QI },
+ { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_lshrvv16si_mask, "__builtin_ia32_psrlv16si_mask", IX86_BUILTIN_PSRLVV16SI, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_V16SI_HI },
+ { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_lshrvv8di_mask, "__builtin_ia32_psrlv8di_mask", IX86_BUILTIN_PSRLVV8DI, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_V8DI_QI },
+ { OPTION_MASK_ISA_AVX512F, CODE_FOR_subv16si3_mask, "__builtin_ia32_psubd512_mask", IX86_BUILTIN_PSUBD512, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_V16SI_HI },
+ { OPTION_MASK_ISA_AVX512F, CODE_FOR_subv8di3_mask, "__builtin_ia32_psubq512_mask", IX86_BUILTIN_PSUBQ512, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_V8DI_QI },
+ { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_testmv16si3_mask, "__builtin_ia32_ptestmd512", IX86_BUILTIN_PTESTMD512, UNKNOWN, (int) HI_FTYPE_V16SI_V16SI_HI },
+ { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_testmv8di3_mask, "__builtin_ia32_ptestmq512", IX86_BUILTIN_PTESTMQ512, UNKNOWN, (int) QI_FTYPE_V8DI_V8DI_QI },
+ { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_testnmv16si3_mask, "__builtin_ia32_ptestnmd512", IX86_BUILTIN_PTESTNMD512, UNKNOWN, (int) HI_FTYPE_V16SI_V16SI_HI },
+ { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_testnmv8di3_mask, "__builtin_ia32_ptestnmq512", IX86_BUILTIN_PTESTNMQ512, UNKNOWN, (int) QI_FTYPE_V8DI_V8DI_QI },
+ { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_interleave_highv16si_mask, "__builtin_ia32_punpckhdq512_mask", IX86_BUILTIN_PUNPCKHDQ512, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_V16SI_HI },
+ { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_interleave_highv8di_mask, "__builtin_ia32_punpckhqdq512_mask", IX86_BUILTIN_PUNPCKHQDQ512, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_V8DI_QI },
+ { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_interleave_lowv16si_mask, "__builtin_ia32_punpckldq512_mask", IX86_BUILTIN_PUNPCKLDQ512, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_V16SI_HI },
+ { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_interleave_lowv8di_mask, "__builtin_ia32_punpcklqdq512_mask", IX86_BUILTIN_PUNPCKLQDQ512, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_V8DI_QI },
+ { OPTION_MASK_ISA_AVX512F, CODE_FOR_xorv16si3_mask, "__builtin_ia32_pxord512_mask", IX86_BUILTIN_PXORD512, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_V16SI_HI },
+ { OPTION_MASK_ISA_AVX512F, CODE_FOR_xorv8di3_mask, "__builtin_ia32_pxorq512_mask", IX86_BUILTIN_PXORQ512, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_V8DI_QI },
+ { OPTION_MASK_ISA_AVX512F, CODE_FOR_rcp14v8df_mask, "__builtin_ia32_rcp14pd512_mask", IX86_BUILTIN_RCP14PD512, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_QI },
+ { OPTION_MASK_ISA_AVX512F, CODE_FOR_rcp14v16sf_mask, "__builtin_ia32_rcp14ps512_mask", IX86_BUILTIN_RCP14PS512, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_HI },
+ { OPTION_MASK_ISA_AVX512F, CODE_FOR_srcp14v2df_mask, "__builtin_ia32_rcp14sd_mask", IX86_BUILTIN_RCP14SD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_V2DF_QI },
+ { OPTION_MASK_ISA_AVX512F, CODE_FOR_srcp14v4sf_mask, "__builtin_ia32_rcp14ss_mask", IX86_BUILTIN_RCP14SS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_V4SF_QI },
+ { OPTION_MASK_ISA_AVX512F, CODE_FOR_rsqrt14v8df_mask, "__builtin_ia32_rsqrt14pd512_mask", IX86_BUILTIN_RSQRT14PD512, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_QI },
+ { OPTION_MASK_ISA_AVX512F, CODE_FOR_rsqrt14v16sf_mask, "__builtin_ia32_rsqrt14ps512_mask", IX86_BUILTIN_RSQRT14PS512, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_HI },
+ { OPTION_MASK_ISA_AVX512F, CODE_FOR_rsqrt14v2df_mask, "__builtin_ia32_rsqrt14sd_mask", IX86_BUILTIN_RSQRT14SD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_V2DF_QI },
+ { OPTION_MASK_ISA_AVX512F, CODE_FOR_rsqrt14v4sf_mask, "__builtin_ia32_rsqrt14ss_mask", IX86_BUILTIN_RSQRT14SS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_V4SF_QI },
+ { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_shufpd512_mask, "__builtin_ia32_shufpd512_mask", IX86_BUILTIN_SHUFPD512, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_INT_V8DF_QI },
+ { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_shufps512_mask, "__builtin_ia32_shufps512_mask", IX86_BUILTIN_SHUFPS512, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_INT_V16SF_HI },
+ { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_shuf_f32x4_mask, "__builtin_ia32_shuf_f32x4_mask", IX86_BUILTIN_SHUF_F32x4, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_INT_V16SF_HI },
+ { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_shuf_f64x2_mask, "__builtin_ia32_shuf_f64x2_mask", IX86_BUILTIN_SHUF_F64x2, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_INT_V8DF_QI },
+ { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_shuf_i32x4_mask, "__builtin_ia32_shuf_i32x4_mask", IX86_BUILTIN_SHUF_I32x4, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_INT_V16SI_HI },
+ { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_shuf_i64x2_mask, "__builtin_ia32_shuf_i64x2_mask", IX86_BUILTIN_SHUF_I64x2, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_INT_V8DI_QI },
+ { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_ucmpv16si3_mask, "__builtin_ia32_ucmpd512_mask", IX86_BUILTIN_UCMPD512, UNKNOWN, (int) HI_FTYPE_V16SI_V16SI_INT_HI },
+ { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_ucmpv8di3_mask, "__builtin_ia32_ucmpq512_mask", IX86_BUILTIN_UCMPQ512, UNKNOWN, (int) QI_FTYPE_V8DI_V8DI_INT_QI },
+ { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_unpckhpd512_mask, "__builtin_ia32_unpckhpd512_mask", IX86_BUILTIN_UNPCKHPD512, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_V8DF_QI },
+ { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_unpckhps512_mask, "__builtin_ia32_unpckhps512_mask", IX86_BUILTIN_UNPCKHPS512, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_V16SF_HI },
+ { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_unpcklpd512_mask, "__builtin_ia32_unpcklpd512_mask", IX86_BUILTIN_UNPCKLPD512, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_V8DF_QI },
+ { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_unpcklps512_mask, "__builtin_ia32_unpcklps512_mask", IX86_BUILTIN_UNPCKLPS512, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_V16SF_HI },
+ { OPTION_MASK_ISA_AVX512CD, CODE_FOR_clzv16si2_mask, "__builtin_ia32_vplzcntd_512_mask", IX86_BUILTIN_VPCLZCNTD512, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_HI },
+ { OPTION_MASK_ISA_AVX512CD, CODE_FOR_clzv8di2_mask, "__builtin_ia32_vplzcntq_512_mask", IX86_BUILTIN_VPCLZCNTQ512, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_QI },
+ { OPTION_MASK_ISA_AVX512CD, CODE_FOR_conflictv16si_mask, "__builtin_ia32_vpconflictsi_512_mask", IX86_BUILTIN_VPCONFLICTD512, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_HI },
+ { OPTION_MASK_ISA_AVX512CD, CODE_FOR_conflictv8di_mask, "__builtin_ia32_vpconflictdi_512_mask", IX86_BUILTIN_VPCONFLICTQ512, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_QI },
+ { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_permv8df_mask, "__builtin_ia32_permdf512_mask", IX86_BUILTIN_VPERMDF512, UNKNOWN, (int) V8DF_FTYPE_V8DF_INT_V8DF_QI },
+ { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_permv8di_mask, "__builtin_ia32_permdi512_mask", IX86_BUILTIN_VPERMDI512, UNKNOWN, (int) V8DI_FTYPE_V8DI_INT_V8DI_QI },
+ { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vpermi2varv16si3_mask, "__builtin_ia32_vpermi2vard512_mask", IX86_BUILTIN_VPERMI2VARD512, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_V16SI_HI },
+ { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vpermi2varv8df3_mask, "__builtin_ia32_vpermi2varpd512_mask", IX86_BUILTIN_VPERMI2VARPD512, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DI_V8DF_QI },
+ { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vpermi2varv16sf3_mask, "__builtin_ia32_vpermi2varps512_mask", IX86_BUILTIN_VPERMI2VARPS512, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SI_V16SF_HI },
+ { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vpermi2varv8di3_mask, "__builtin_ia32_vpermi2varq512_mask", IX86_BUILTIN_VPERMI2VARQ512, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_V8DI_QI },
+ { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vpermilv8df_mask, "__builtin_ia32_vpermilpd512_mask", IX86_BUILTIN_VPERMILPD512, UNKNOWN, (int) V8DF_FTYPE_V8DF_INT_V8DF_QI },
+ { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vpermilv16sf_mask, "__builtin_ia32_vpermilps512_mask", IX86_BUILTIN_VPERMILPS512, UNKNOWN, (int) V16SF_FTYPE_V16SF_INT_V16SF_HI },
+ { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vpermilvarv8df3_mask, "__builtin_ia32_vpermilvarpd512_mask", IX86_BUILTIN_VPERMILVARPD512, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DI_V8DF_QI },
+ { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vpermilvarv16sf3_mask, "__builtin_ia32_vpermilvarps512_mask", IX86_BUILTIN_VPERMILVARPS512, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SI_V16SF_HI },
+ { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vpermt2varv16si3_mask, "__builtin_ia32_vpermt2vard512_mask", IX86_BUILTIN_VPERMT2VARD512, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_V16SI_HI },
+ { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vpermt2varv16si3_maskz, "__builtin_ia32_vpermt2vard512_maskz", IX86_BUILTIN_VPERMT2VARD512_MASKZ, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_V16SI_HI },
+ { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vpermt2varv8df3_mask, "__builtin_ia32_vpermt2varpd512_mask", IX86_BUILTIN_VPERMT2VARPD512, UNKNOWN, (int) V8DF_FTYPE_V8DI_V8DF_V8DF_QI },
+ { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vpermt2varv8df3_maskz, "__builtin_ia32_vpermt2varpd512_maskz", IX86_BUILTIN_VPERMT2VARPD512_MASKZ, UNKNOWN, (int) V8DF_FTYPE_V8DI_V8DF_V8DF_QI },
+ { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vpermt2varv16sf3_mask, "__builtin_ia32_vpermt2varps512_mask", IX86_BUILTIN_VPERMT2VARPS512, UNKNOWN, (int) V16SF_FTYPE_V16SI_V16SF_V16SF_HI },
+ { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vpermt2varv16sf3_maskz, "__builtin_ia32_vpermt2varps512_maskz", IX86_BUILTIN_VPERMT2VARPS512_MASKZ, UNKNOWN, (int) V16SF_FTYPE_V16SI_V16SF_V16SF_HI },
+ { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vpermt2varv8di3_mask, "__builtin_ia32_vpermt2varq512_mask", IX86_BUILTIN_VPERMT2VARQ512, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_V8DI_QI },
+ { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vpermt2varv8di3_maskz, "__builtin_ia32_vpermt2varq512_maskz", IX86_BUILTIN_VPERMT2VARQ512_MASKZ, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_V8DI_QI },
+ { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_permvarv8df_mask, "__builtin_ia32_permvardf512_mask", IX86_BUILTIN_VPERMVARDF512, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DI_V8DF_QI },
+ { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_permvarv8di_mask, "__builtin_ia32_permvardi512_mask", IX86_BUILTIN_VPERMVARDI512, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_V8DI_QI },
+ { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_permvarv16sf_mask, "__builtin_ia32_permvarsf512_mask", IX86_BUILTIN_VPERMVARSF512, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SI_V16SF_HI },
+ { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_permvarv16si_mask, "__builtin_ia32_permvarsi512_mask", IX86_BUILTIN_VPERMVARSI512, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_V16SI_HI },
+ { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vternlogv16si_mask, "__builtin_ia32_pternlogd512_mask", IX86_BUILTIN_VTERNLOGD512_MASK, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_V16SI_INT_HI },
+ { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vternlogv16si_maskz, "__builtin_ia32_pternlogd512_maskz", IX86_BUILTIN_VTERNLOGD512_MASKZ, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_V16SI_INT_HI },
+ { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vternlogv8di_mask, "__builtin_ia32_pternlogq512_mask", IX86_BUILTIN_VTERNLOGQ512_MASK, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_V8DI_INT_QI },
+ { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vternlogv8di_maskz, "__builtin_ia32_pternlogq512_maskz", IX86_BUILTIN_VTERNLOGQ512_MASKZ, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_V8DI_INT_QI },
+
+ { OPTION_MASK_ISA_AVX512F, CODE_FOR_copysignv16sf3, "__builtin_ia32_copysignps512", IX86_BUILTIN_CPYSGNPS512, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF },
+ { OPTION_MASK_ISA_AVX512F, CODE_FOR_copysignv8df3, "__builtin_ia32_copysignpd512", IX86_BUILTIN_CPYSGNPD512, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF },
+ { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_sqrtv8df2, "__builtin_ia32_sqrtpd512", IX86_BUILTIN_SQRTPD512, UNKNOWN, (int) V8DF_FTYPE_V8DF },
+ { OPTION_MASK_ISA_AVX512F, CODE_FOR_sqrtv16sf2, "__builtin_ia32_sqrtps512", IX86_BUILTIN_SQRTPS_NR512, UNKNOWN, (int) V16SF_FTYPE_V16SF },
+ { OPTION_MASK_ISA_AVX512F, CODE_FOR_roundv8df2_vec_pack_sfix, "__builtin_ia32_roundpd_az_vec_pack_sfix512", IX86_BUILTIN_ROUNDPD_AZ_VEC_PACK_SFIX512, UNKNOWN, (int) V16SI_FTYPE_V8DF_V8DF },
+ { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_roundpd_vec_pack_sfix512, "__builtin_ia32_floorpd_vec_pack_sfix512", IX86_BUILTIN_FLOORPD_VEC_PACK_SFIX512, (enum rtx_code) ROUND_FLOOR, (int) V16SI_FTYPE_V8DF_V8DF_ROUND },
+ { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_roundpd_vec_pack_sfix512, "__builtin_ia32_ceilpd_vec_pack_sfix512", IX86_BUILTIN_CEILPD_VEC_PACK_SFIX512, (enum rtx_code) ROUND_CEIL, (int) V16SI_FTYPE_V8DF_V8DF_ROUND },
+
+ /* Mask arithmetic operations */
+ { OPTION_MASK_ISA_AVX512F, CODE_FOR_kandhi, "__builtin_ia32_kandhi", IX86_BUILTIN_KAND16, UNKNOWN, (int) HI_FTYPE_HI_HI },
+ { OPTION_MASK_ISA_AVX512F, CODE_FOR_kandnhi, "__builtin_ia32_kandnhi", IX86_BUILTIN_KANDN16, UNKNOWN, (int) HI_FTYPE_HI_HI },
+ { OPTION_MASK_ISA_AVX512F, CODE_FOR_one_cmplhi2, "__builtin_ia32_knothi", IX86_BUILTIN_KNOT16, UNKNOWN, (int) HI_FTYPE_HI },
+ { OPTION_MASK_ISA_AVX512F, CODE_FOR_kiorhi, "__builtin_ia32_korhi", IX86_BUILTIN_KOR16, UNKNOWN, (int) HI_FTYPE_HI_HI },
+ { OPTION_MASK_ISA_AVX512F, CODE_FOR_kortestchi, "__builtin_ia32_kortestchi", IX86_BUILTIN_KORTESTC16, UNKNOWN, (int) HI_FTYPE_HI_HI },
+ { OPTION_MASK_ISA_AVX512F, CODE_FOR_kortestzhi, "__builtin_ia32_kortestzhi", IX86_BUILTIN_KORTESTZ16, UNKNOWN, (int) HI_FTYPE_HI_HI },
+ { OPTION_MASK_ISA_AVX512F, CODE_FOR_kunpckhi, "__builtin_ia32_kunpckhi", IX86_BUILTIN_KUNPCKBW, UNKNOWN, (int) HI_FTYPE_HI_HI },
+ { OPTION_MASK_ISA_AVX512F, CODE_FOR_kxnorhi, "__builtin_ia32_kxnorhi", IX86_BUILTIN_KXNOR16, UNKNOWN, (int) HI_FTYPE_HI_HI },
+ { OPTION_MASK_ISA_AVX512F, CODE_FOR_kxorhi, "__builtin_ia32_kxorhi", IX86_BUILTIN_KXOR16, UNKNOWN, (int) HI_FTYPE_HI_HI },
+
+ /* SHA */
+ { OPTION_MASK_ISA_SSE2, CODE_FOR_sha1msg1, 0, IX86_BUILTIN_SHA1MSG1, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI },
+ { OPTION_MASK_ISA_SSE2, CODE_FOR_sha1msg2, 0, IX86_BUILTIN_SHA1MSG2, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI },
+ { OPTION_MASK_ISA_SSE2, CODE_FOR_sha1nexte, 0, IX86_BUILTIN_SHA1NEXTE, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI },
+ { OPTION_MASK_ISA_SSE2, CODE_FOR_sha1rnds4, 0, IX86_BUILTIN_SHA1RNDS4, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI_INT },
+ { OPTION_MASK_ISA_SSE2, CODE_FOR_sha256msg1, 0, IX86_BUILTIN_SHA256MSG1, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI },
+ { OPTION_MASK_ISA_SSE2, CODE_FOR_sha256msg2, 0, IX86_BUILTIN_SHA256MSG2, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI },
+ { OPTION_MASK_ISA_SSE2, CODE_FOR_sha256rnds2, 0, IX86_BUILTIN_SHA256RNDS2, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI_V4SI },
+};
+
+/* Builtins with rounding support. */
+static const struct builtin_description bdesc_round_args[] =
+{
+ /* AVX512F */
+ { OPTION_MASK_ISA_AVX512F, CODE_FOR_addv8df3_mask_round, "__builtin_ia32_addpd512_mask", IX86_BUILTIN_ADDPD512, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_V8DF_QI_INT },
+ { OPTION_MASK_ISA_AVX512F, CODE_FOR_addv16sf3_mask_round, "__builtin_ia32_addps512_mask", IX86_BUILTIN_ADDPS512, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_V16SF_HI_INT },
+ { OPTION_MASK_ISA_AVX512F, CODE_FOR_sse2_vmaddv2df3_mask_round, "__builtin_ia32_addsd_mask", IX86_BUILTIN_ADDSD_MASK, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_V2DF_QI_INT },
+ { OPTION_MASK_ISA_AVX512F, CODE_FOR_sse_vmaddv4sf3_mask_round, "__builtin_ia32_addss_mask", IX86_BUILTIN_ADDSS_MASK, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_V4SF_QI_INT },
+ { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_cmpv8df3_mask_round, "__builtin_ia32_cmppd512_mask", IX86_BUILTIN_CMPPD512, UNKNOWN, (int) QI_FTYPE_V8DF_V8DF_INT_QI_INT },
+ { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_cmpv16sf3_mask_round, "__builtin_ia32_cmpps512_mask", IX86_BUILTIN_CMPPS512, UNKNOWN, (int) HI_FTYPE_V16SF_V16SF_INT_HI_INT },
+ { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vmcmpv2df3_mask_round, "__builtin_ia32_cmpsd_mask", IX86_BUILTIN_CMPSD_MASK, UNKNOWN, (int) QI_FTYPE_V2DF_V2DF_INT_QI_INT },
+ { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vmcmpv4sf3_mask_round, "__builtin_ia32_cmpss_mask", IX86_BUILTIN_CMPSS_MASK, UNKNOWN, (int) QI_FTYPE_V4SF_V4SF_INT_QI_INT },
+ { OPTION_MASK_ISA_AVX512F, CODE_FOR_sse2_comi_round, "__builtin_ia32_vcomisd", IX86_BUILTIN_COMIDF, UNKNOWN, (int) INT_FTYPE_V2DF_V2DF_INT_INT },
+ { OPTION_MASK_ISA_AVX512F, CODE_FOR_sse_comi_round, "__builtin_ia32_vcomiss", IX86_BUILTIN_COMISF, UNKNOWN, (int) INT_FTYPE_V4SF_V4SF_INT_INT },
+ { OPTION_MASK_ISA_AVX512F, CODE_FOR_floatv16siv16sf2_mask_round, "__builtin_ia32_cvtdq2ps512_mask", IX86_BUILTIN_CVTDQ2PS512, UNKNOWN, (int) V16SF_FTYPE_V16SI_V16SF_HI_INT },
+ { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_cvtpd2dq512_mask_round, "__builtin_ia32_cvtpd2dq512_mask", IX86_BUILTIN_CVTPD2DQ512, UNKNOWN, (int) V8SI_FTYPE_V8DF_V8SI_QI_INT },
+ { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_cvtpd2ps512_mask_round, "__builtin_ia32_cvtpd2ps512_mask", IX86_BUILTIN_CVTPD2PS512, UNKNOWN, (int) V8SF_FTYPE_V8DF_V8SF_QI_INT },
+ { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_ufix_notruncv8dfv8si_mask_round, "__builtin_ia32_cvtpd2udq512_mask", IX86_BUILTIN_CVTPD2UDQ512, UNKNOWN, (int) V8SI_FTYPE_V8DF_V8SI_QI_INT },
+ { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vcvtph2ps512_mask_round, "__builtin_ia32_vcvtph2ps512_mask", IX86_BUILTIN_CVTPH2PS512, UNKNOWN, (int) V16SF_FTYPE_V16HI_V16SF_HI_INT },
+ { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_fix_notruncv16sfv16si_mask_round, "__builtin_ia32_cvtps2dq512_mask", IX86_BUILTIN_CVTPS2DQ512, UNKNOWN, (int) V16SI_FTYPE_V16SF_V16SI_HI_INT },
+ { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_cvtps2pd512_mask_round, "__builtin_ia32_cvtps2pd512_mask", IX86_BUILTIN_CVTPS2PD512, UNKNOWN, (int) V8DF_FTYPE_V8SF_V8DF_QI_INT },
+ { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_ufix_notruncv16sfv16si_mask_round, "__builtin_ia32_cvtps2udq512_mask", IX86_BUILTIN_CVTPS2UDQ512, UNKNOWN, (int) V16SI_FTYPE_V16SF_V16SI_HI_INT },
+ { OPTION_MASK_ISA_AVX512F, CODE_FOR_sse2_cvtsd2ss_mask_round, "__builtin_ia32_cvtsd2ss_mask", IX86_BUILTIN_CVTSD2SS_MASK, UNKNOWN, (int) V4SF_FTYPE_V4SF_V2DF_V4SF_QI_INT },
+ { OPTION_MASK_ISA_AVX512F | OPTION_MASK_ISA_64BIT, CODE_FOR_sse2_cvtsi2sdq_round, "__builtin_ia32_cvtsi2sd64", IX86_BUILTIN_CVTSI2SD64, UNKNOWN, (int) V2DF_FTYPE_V2DF_INT64_INT },
+ { OPTION_MASK_ISA_AVX512F, CODE_FOR_sse_cvtsi2ss_round, "__builtin_ia32_cvtsi2ss32", IX86_BUILTIN_CVTSI2SS32, UNKNOWN, (int) V4SF_FTYPE_V4SF_INT_INT },
+ { OPTION_MASK_ISA_AVX512F | OPTION_MASK_ISA_64BIT, CODE_FOR_sse_cvtsi2ssq_round, "__builtin_ia32_cvtsi2ss64", IX86_BUILTIN_CVTSI2SS64, UNKNOWN, (int) V4SF_FTYPE_V4SF_INT64_INT },
+ { OPTION_MASK_ISA_AVX512F, CODE_FOR_sse2_cvtss2sd_mask_round, "__builtin_ia32_cvtss2sd_mask", IX86_BUILTIN_CVTSS2SD_MASK, UNKNOWN, (int) V2DF_FTYPE_V2DF_V4SF_V2DF_QI_INT },
+ { OPTION_MASK_ISA_AVX512F, CODE_FOR_fix_truncv8dfv8si2_mask_round, "__builtin_ia32_cvttpd2dq512_mask", IX86_BUILTIN_CVTTPD2DQ512, UNKNOWN, (int) V8SI_FTYPE_V8DF_V8SI_QI_INT },
+ { OPTION_MASK_ISA_AVX512F, CODE_FOR_ufix_truncv8dfv8si2_mask_round, "__builtin_ia32_cvttpd2udq512_mask", IX86_BUILTIN_CVTTPD2UDQ512, UNKNOWN, (int) V8SI_FTYPE_V8DF_V8SI_QI_INT },
+ { OPTION_MASK_ISA_AVX512F, CODE_FOR_fix_truncv16sfv16si2_mask_round, "__builtin_ia32_cvttps2dq512_mask", IX86_BUILTIN_CVTTPS2DQ512, UNKNOWN, (int) V16SI_FTYPE_V16SF_V16SI_HI_INT },
+ { OPTION_MASK_ISA_AVX512F, CODE_FOR_ufix_truncv16sfv16si2_mask_round, "__builtin_ia32_cvttps2udq512_mask", IX86_BUILTIN_CVTTPS2UDQ512, UNKNOWN, (int) V16SI_FTYPE_V16SF_V16SI_HI_INT },
+ { OPTION_MASK_ISA_AVX512F, CODE_FOR_ufloatv16siv16sf2_mask_round, "__builtin_ia32_cvtudq2ps512_mask", IX86_BUILTIN_CVTUDQ2PS512, UNKNOWN, (int) V16SF_FTYPE_V16SI_V16SF_HI_INT },
+ { OPTION_MASK_ISA_AVX512F | OPTION_MASK_ISA_64BIT, CODE_FOR_cvtusi2sd64_round, "__builtin_ia32_cvtusi2sd64", IX86_BUILTIN_CVTUSI2SD64, UNKNOWN, (int) V2DF_FTYPE_V2DF_UINT64_INT },
+ { OPTION_MASK_ISA_AVX512F, CODE_FOR_cvtusi2ss32_round, "__builtin_ia32_cvtusi2ss32", IX86_BUILTIN_CVTUSI2SS32, UNKNOWN, (int) V4SF_FTYPE_V4SF_UINT_INT },
+ { OPTION_MASK_ISA_AVX512F | OPTION_MASK_ISA_64BIT, CODE_FOR_cvtusi2ss64_round, "__builtin_ia32_cvtusi2ss64", IX86_BUILTIN_CVTUSI2SS64, UNKNOWN, (int) V4SF_FTYPE_V4SF_UINT64_INT },
+ { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_divv8df3_mask_round, "__builtin_ia32_divpd512_mask", IX86_BUILTIN_DIVPD512, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_V8DF_QI_INT },
+ { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_divv16sf3_mask_round, "__builtin_ia32_divps512_mask", IX86_BUILTIN_DIVPS512, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_V16SF_HI_INT },
+ { OPTION_MASK_ISA_AVX512F, CODE_FOR_sse2_vmdivv2df3_mask_round, "__builtin_ia32_divsd_mask", IX86_BUILTIN_DIVSD_MASK, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_V2DF_QI_INT },
+ { OPTION_MASK_ISA_AVX512F, CODE_FOR_sse_vmdivv4sf3_mask_round, "__builtin_ia32_divss_mask", IX86_BUILTIN_DIVSS_MASK, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_V4SF_QI_INT },
+ { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_fixupimmv8df_mask_round, "__builtin_ia32_fixupimmpd512_mask", IX86_BUILTIN_FIXUPIMMPD512_MASK, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_V8DI_INT_QI_INT },
+ { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_fixupimmv8df_maskz_round, "__builtin_ia32_fixupimmpd512_maskz", IX86_BUILTIN_FIXUPIMMPD512_MASKZ, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_V8DI_INT_QI_INT },
+ { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_fixupimmv16sf_mask_round, "__builtin_ia32_fixupimmps512_mask", IX86_BUILTIN_FIXUPIMMPS512_MASK, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_V16SI_INT_HI_INT },
+ { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_fixupimmv16sf_maskz_round, "__builtin_ia32_fixupimmps512_maskz", IX86_BUILTIN_FIXUPIMMPS512_MASKZ, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_V16SI_INT_HI_INT },
+ { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_sfixupimmv2df_mask_round, "__builtin_ia32_fixupimmsd_mask", IX86_BUILTIN_FIXUPIMMSD128_MASK, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_V2DI_INT_QI_INT },
+ { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_sfixupimmv2df_maskz_round, "__builtin_ia32_fixupimmsd_maskz", IX86_BUILTIN_FIXUPIMMSD128_MASKZ, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_V2DI_INT_QI_INT },
+ { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_sfixupimmv4sf_mask_round, "__builtin_ia32_fixupimmss_mask", IX86_BUILTIN_FIXUPIMMSS128_MASK, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_V4SI_INT_QI_INT },
+ { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_sfixupimmv4sf_maskz_round, "__builtin_ia32_fixupimmss_maskz", IX86_BUILTIN_FIXUPIMMSS128_MASKZ, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_V4SI_INT_QI_INT },
+ { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_getexpv8df_mask_round, "__builtin_ia32_getexppd512_mask", IX86_BUILTIN_GETEXPPD512, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_QI_INT },
+ { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_getexpv16sf_mask_round, "__builtin_ia32_getexpps512_mask", IX86_BUILTIN_GETEXPPS512, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_HI_INT },
+ { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_sgetexpv2df_mask_round, "__builtin_ia32_getexpsd128_mask", IX86_BUILTIN_GETEXPSD128, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_V2DF_QI_INT },
+ { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_sgetexpv4sf_mask_round, "__builtin_ia32_getexpss128_mask", IX86_BUILTIN_GETEXPSS128, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_V4SF_QI_INT },
+ { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_getmantv8df_mask_round, "__builtin_ia32_getmantpd512_mask", IX86_BUILTIN_GETMANTPD512, UNKNOWN, (int) V8DF_FTYPE_V8DF_INT_V8DF_QI_INT },
+ { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_getmantv16sf_mask_round, "__builtin_ia32_getmantps512_mask", IX86_BUILTIN_GETMANTPS512, UNKNOWN, (int) V16SF_FTYPE_V16SF_INT_V16SF_HI_INT },
+ { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_getmantv2df_mask_round, "__builtin_ia32_getmantsd_mask", IX86_BUILTIN_GETMANTSD128, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_INT_V2DF_QI_INT },
+ { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_getmantv4sf_mask_round, "__builtin_ia32_getmantss_mask", IX86_BUILTIN_GETMANTSS128, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_INT_V4SF_QI_INT },
+ { OPTION_MASK_ISA_AVX512F, CODE_FOR_smaxv8df3_mask_round, "__builtin_ia32_maxpd512_mask", IX86_BUILTIN_MAXPD512, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_V8DF_QI_INT },
+ { OPTION_MASK_ISA_AVX512F, CODE_FOR_smaxv16sf3_mask_round, "__builtin_ia32_maxps512_mask", IX86_BUILTIN_MAXPS512, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_V16SF_HI_INT },
+ { OPTION_MASK_ISA_AVX512F, CODE_FOR_sse2_vmsmaxv2df3_mask_round, "__builtin_ia32_maxsd_mask", IX86_BUILTIN_MAXSD_MASK, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_V2DF_QI_INT },
+ { OPTION_MASK_ISA_AVX512F, CODE_FOR_sse_vmsmaxv4sf3_mask_round, "__builtin_ia32_maxss_mask", IX86_BUILTIN_MAXSS_MASK, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_V4SF_QI_INT },
+ { OPTION_MASK_ISA_AVX512F, CODE_FOR_sminv8df3_mask_round, "__builtin_ia32_minpd512_mask", IX86_BUILTIN_MINPD512, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_V8DF_QI_INT },
+ { OPTION_MASK_ISA_AVX512F, CODE_FOR_sminv16sf3_mask_round, "__builtin_ia32_minps512_mask", IX86_BUILTIN_MINPS512, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_V16SF_HI_INT },
+ { OPTION_MASK_ISA_AVX512F, CODE_FOR_sse2_vmsminv2df3_mask_round, "__builtin_ia32_minsd_mask", IX86_BUILTIN_MINSD_MASK, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_V2DF_QI_INT },
+ { OPTION_MASK_ISA_AVX512F, CODE_FOR_sse_vmsminv4sf3_mask_round, "__builtin_ia32_minss_mask", IX86_BUILTIN_MINSS_MASK, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_V4SF_QI_INT },
+ { OPTION_MASK_ISA_AVX512F, CODE_FOR_mulv8df3_mask_round, "__builtin_ia32_mulpd512_mask", IX86_BUILTIN_MULPD512, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_V8DF_QI_INT },
+ { OPTION_MASK_ISA_AVX512F, CODE_FOR_mulv16sf3_mask_round, "__builtin_ia32_mulps512_mask", IX86_BUILTIN_MULPS512, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_V16SF_HI_INT },
+ { OPTION_MASK_ISA_AVX512F, CODE_FOR_sse2_vmmulv2df3_mask_round, "__builtin_ia32_mulsd_mask", IX86_BUILTIN_MULSD_MASK, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_V2DF_QI_INT },
+ { OPTION_MASK_ISA_AVX512F, CODE_FOR_sse_vmmulv4sf3_mask_round, "__builtin_ia32_mulss_mask", IX86_BUILTIN_MULSS_MASK, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_V4SF_QI_INT },
+ { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_rndscalev8df_mask_round, "__builtin_ia32_rndscalepd_mask", IX86_BUILTIN_RNDSCALEPD, UNKNOWN, (int) V8DF_FTYPE_V8DF_INT_V8DF_QI_INT },
+ { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_rndscalev16sf_mask_round, "__builtin_ia32_rndscaleps_mask", IX86_BUILTIN_RNDSCALEPS, UNKNOWN, (int) V16SF_FTYPE_V16SF_INT_V16SF_HI_INT },
+ { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_rndscalev2df_mask_round, "__builtin_ia32_rndscalesd_mask", IX86_BUILTIN_RNDSCALESD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_INT_V2DF_QI_INT },
+ { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_rndscalev4sf_mask_round, "__builtin_ia32_rndscaless_mask", IX86_BUILTIN_RNDSCALESS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_INT_V4SF_QI_INT },
+ { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_scalefv8df_mask_round, "__builtin_ia32_scalefpd512_mask", IX86_BUILTIN_SCALEFPD512, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_V8DF_QI_INT },
+ { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_scalefv16sf_mask_round, "__builtin_ia32_scalefps512_mask", IX86_BUILTIN_SCALEFPS512, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_V16SF_HI_INT },
+ { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vmscalefv2df_mask_round, "__builtin_ia32_scalefsd_mask", IX86_BUILTIN_SCALEFSD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_V2DF_QI_INT },
+ { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vmscalefv4sf_mask_round, "__builtin_ia32_scalefss_mask", IX86_BUILTIN_SCALEFSS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_V4SF_QI_INT },
+ { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_sqrtv8df2_mask_round, "__builtin_ia32_sqrtpd512_mask", IX86_BUILTIN_SQRTPD512_MASK, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_QI_INT },
+ { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_sqrtv16sf2_mask_round, "__builtin_ia32_sqrtps512_mask", IX86_BUILTIN_SQRTPS512_MASK, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_HI_INT },
+ { OPTION_MASK_ISA_AVX512F, CODE_FOR_sse2_vmsqrtv2df2_mask_round, "__builtin_ia32_sqrtsd_mask", IX86_BUILTIN_SQRTSD_MASK, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_V2DF_QI_INT },
+ { OPTION_MASK_ISA_AVX512F, CODE_FOR_sse_vmsqrtv4sf2_mask_round, "__builtin_ia32_sqrtss_mask", IX86_BUILTIN_SQRTSS_MASK, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_V4SF_QI_INT },
+ { OPTION_MASK_ISA_AVX512F, CODE_FOR_subv8df3_mask_round, "__builtin_ia32_subpd512_mask", IX86_BUILTIN_SUBPD512, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_V8DF_QI_INT },
+ { OPTION_MASK_ISA_AVX512F, CODE_FOR_subv16sf3_mask_round, "__builtin_ia32_subps512_mask", IX86_BUILTIN_SUBPS512, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_V16SF_HI_INT },
+ { OPTION_MASK_ISA_AVX512F, CODE_FOR_sse2_vmsubv2df3_mask_round, "__builtin_ia32_subsd_mask", IX86_BUILTIN_SUBSD_MASK, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_V2DF_QI_INT },
+ { OPTION_MASK_ISA_AVX512F, CODE_FOR_sse_vmsubv4sf3_mask_round, "__builtin_ia32_subss_mask", IX86_BUILTIN_SUBSS_MASK, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_V4SF_QI_INT },
+ { OPTION_MASK_ISA_AVX512F, CODE_FOR_sse2_cvtsd2si_round, "__builtin_ia32_vcvtsd2si32", IX86_BUILTIN_VCVTSD2SI32, UNKNOWN, (int) INT_FTYPE_V2DF_INT },
+ { OPTION_MASK_ISA_AVX512F | OPTION_MASK_ISA_64BIT, CODE_FOR_sse2_cvtsd2siq_round, "__builtin_ia32_vcvtsd2si64", IX86_BUILTIN_VCVTSD2SI64, UNKNOWN, (int) INT64_FTYPE_V2DF_INT },
+ { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vcvtsd2usi_round, "__builtin_ia32_vcvtsd2usi32", IX86_BUILTIN_VCVTSD2USI32, UNKNOWN, (int) UINT_FTYPE_V2DF_INT },
+ { OPTION_MASK_ISA_AVX512F | OPTION_MASK_ISA_64BIT, CODE_FOR_avx512f_vcvtsd2usiq_round, "__builtin_ia32_vcvtsd2usi64", IX86_BUILTIN_VCVTSD2USI64, UNKNOWN, (int) UINT64_FTYPE_V2DF_INT },
+ { OPTION_MASK_ISA_AVX512F, CODE_FOR_sse_cvtss2si_round, "__builtin_ia32_vcvtss2si32", IX86_BUILTIN_VCVTSS2SI32, UNKNOWN, (int) INT_FTYPE_V4SF_INT },
+ { OPTION_MASK_ISA_AVX512F | OPTION_MASK_ISA_64BIT, CODE_FOR_sse_cvtss2siq_round, "__builtin_ia32_vcvtss2si64", IX86_BUILTIN_VCVTSS2SI64, UNKNOWN, (int) INT64_FTYPE_V4SF_INT },
+ { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vcvtss2usi_round, "__builtin_ia32_vcvtss2usi32", IX86_BUILTIN_VCVTSS2USI32, UNKNOWN, (int) UINT_FTYPE_V4SF_INT },
+ { OPTION_MASK_ISA_AVX512F | OPTION_MASK_ISA_64BIT, CODE_FOR_avx512f_vcvtss2usiq_round, "__builtin_ia32_vcvtss2usi64", IX86_BUILTIN_VCVTSS2USI64, UNKNOWN, (int) UINT64_FTYPE_V4SF_INT },
+ { OPTION_MASK_ISA_AVX512F, CODE_FOR_sse2_cvttsd2si_round, "__builtin_ia32_vcvttsd2si32", IX86_BUILTIN_VCVTTSD2SI32, UNKNOWN, (int) INT_FTYPE_V2DF_INT },
+ { OPTION_MASK_ISA_AVX512F | OPTION_MASK_ISA_64BIT, CODE_FOR_sse2_cvttsd2siq_round, "__builtin_ia32_vcvttsd2si64", IX86_BUILTIN_VCVTTSD2SI64, UNKNOWN, (int) INT64_FTYPE_V2DF_INT },
+ { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vcvttsd2usi_round, "__builtin_ia32_vcvttsd2usi32", IX86_BUILTIN_VCVTTSD2USI32, UNKNOWN, (int) UINT_FTYPE_V2DF_INT },
+ { OPTION_MASK_ISA_AVX512F | OPTION_MASK_ISA_64BIT, CODE_FOR_avx512f_vcvttsd2usiq_round, "__builtin_ia32_vcvttsd2usi64", IX86_BUILTIN_VCVTTSD2USI64, UNKNOWN, (int) UINT64_FTYPE_V2DF_INT },
+ { OPTION_MASK_ISA_AVX512F, CODE_FOR_sse_cvttss2si_round, "__builtin_ia32_vcvttss2si32", IX86_BUILTIN_VCVTTSS2SI32, UNKNOWN, (int) INT_FTYPE_V4SF_INT },
+ { OPTION_MASK_ISA_AVX512F | OPTION_MASK_ISA_64BIT, CODE_FOR_sse_cvttss2siq_round, "__builtin_ia32_vcvttss2si64", IX86_BUILTIN_VCVTTSS2SI64, UNKNOWN, (int) INT64_FTYPE_V4SF_INT },
+ { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vcvttss2usi_round, "__builtin_ia32_vcvttss2usi32", IX86_BUILTIN_VCVTTSS2USI32, UNKNOWN, (int) UINT_FTYPE_V4SF_INT },
+ { OPTION_MASK_ISA_AVX512F | OPTION_MASK_ISA_64BIT, CODE_FOR_avx512f_vcvttss2usiq_round, "__builtin_ia32_vcvttss2usi64", IX86_BUILTIN_VCVTTSS2USI64, UNKNOWN, (int) UINT64_FTYPE_V4SF_INT },
+ { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_fmadd_v8df_mask_round, "__builtin_ia32_vfmaddpd512_mask", IX86_BUILTIN_VFMADDPD512_MASK, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_V8DF_QI_INT },
+ { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_fmadd_v8df_mask3_round, "__builtin_ia32_vfmaddpd512_mask3", IX86_BUILTIN_VFMADDPD512_MASK3, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_V8DF_QI_INT },
+ { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_fmadd_v8df_maskz_round, "__builtin_ia32_vfmaddpd512_maskz", IX86_BUILTIN_VFMADDPD512_MASKZ, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_V8DF_QI_INT },
+ { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_fmadd_v16sf_mask_round, "__builtin_ia32_vfmaddps512_mask", IX86_BUILTIN_VFMADDPS512_MASK, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_V16SF_HI_INT },
+ { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_fmadd_v16sf_mask3_round, "__builtin_ia32_vfmaddps512_mask3", IX86_BUILTIN_VFMADDPS512_MASK3, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_V16SF_HI_INT },
+ { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_fmadd_v16sf_maskz_round, "__builtin_ia32_vfmaddps512_maskz", IX86_BUILTIN_VFMADDPS512_MASKZ, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_V16SF_HI_INT },
+ { OPTION_MASK_ISA_AVX512F, CODE_FOR_fmai_vmfmadd_v2df_mask_round, "__builtin_ia32_vfmaddsd3_mask", IX86_BUILTIN_VFMADDSD3_MASK, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_V2DF_QI_INT },
+ { OPTION_MASK_ISA_AVX512F, CODE_FOR_fmai_vmfmadd_v2df_mask3_round, "__builtin_ia32_vfmaddsd3_mask3", IX86_BUILTIN_VFMADDSD3_MASK3, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_V2DF_QI_INT },
+ { OPTION_MASK_ISA_AVX512F, CODE_FOR_fmai_vmfmadd_v2df_maskz_round, "__builtin_ia32_vfmaddsd3_maskz", IX86_BUILTIN_VFMADDSD3_MASKZ, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_V2DF_QI_INT },
+ { OPTION_MASK_ISA_AVX512F, CODE_FOR_fmai_vmfmadd_v4sf_mask_round, "__builtin_ia32_vfmaddss3_mask", IX86_BUILTIN_VFMADDSS3_MASK, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_V4SF_QI_INT },
+ { OPTION_MASK_ISA_AVX512F, CODE_FOR_fmai_vmfmadd_v4sf_mask3_round, "__builtin_ia32_vfmaddss3_mask3", IX86_BUILTIN_VFMADDSS3_MASK3, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_V4SF_QI_INT },
+ { OPTION_MASK_ISA_AVX512F, CODE_FOR_fmai_vmfmadd_v4sf_maskz_round, "__builtin_ia32_vfmaddss3_maskz", IX86_BUILTIN_VFMADDSS3_MASKZ, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_V4SF_QI_INT },
+ { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_fmaddsub_v8df_mask_round, "__builtin_ia32_vfmaddsubpd512_mask", IX86_BUILTIN_VFMADDSUBPD512_MASK, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_V8DF_QI_INT },
+ { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_fmaddsub_v8df_mask3_round, "__builtin_ia32_vfmaddsubpd512_mask3", IX86_BUILTIN_VFMADDSUBPD512_MASK3, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_V8DF_QI_INT },
+ { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_fmaddsub_v8df_maskz_round, "__builtin_ia32_vfmaddsubpd512_maskz", IX86_BUILTIN_VFMADDSUBPD512_MASKZ, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_V8DF_QI_INT },
+ { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_fmaddsub_v16sf_mask_round, "__builtin_ia32_vfmaddsubps512_mask", IX86_BUILTIN_VFMADDSUBPS512_MASK, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_V16SF_HI_INT },
+ { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_fmaddsub_v16sf_mask3_round, "__builtin_ia32_vfmaddsubps512_mask3", IX86_BUILTIN_VFMADDSUBPS512_MASK3, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_V16SF_HI_INT },
+ { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_fmaddsub_v16sf_maskz_round, "__builtin_ia32_vfmaddsubps512_maskz", IX86_BUILTIN_VFMADDSUBPS512_MASKZ, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_V16SF_HI_INT },
+ { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_fmsubadd_v8df_mask3_round, "__builtin_ia32_vfmsubaddpd512_mask3", IX86_BUILTIN_VFMSUBADDPD512_MASK3, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_V8DF_QI_INT },
+ { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_fmsubadd_v16sf_mask3_round, "__builtin_ia32_vfmsubaddps512_mask3", IX86_BUILTIN_VFMSUBADDPS512_MASK3, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_V16SF_HI_INT },
+ { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_fmsub_v8df_mask3_round, "__builtin_ia32_vfmsubpd512_mask3", IX86_BUILTIN_VFMSUBPD512_MASK3, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_V8DF_QI_INT },
+ { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_fmsub_v16sf_mask3_round, "__builtin_ia32_vfmsubps512_mask3", IX86_BUILTIN_VFMSUBPS512_MASK3, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_V16SF_HI_INT },
+ { OPTION_MASK_ISA_AVX512F, CODE_FOR_fmai_vmfmsub_v2df_mask3_round, "__builtin_ia32_vfmsubsd3_mask3", IX86_BUILTIN_VFMSUBSD3_MASK3, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_V2DF_QI_INT },
+ { OPTION_MASK_ISA_AVX512F, CODE_FOR_fmai_vmfmsub_v4sf_mask3_round, "__builtin_ia32_vfmsubss3_mask3", IX86_BUILTIN_VFMSUBSS3_MASK3, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_V4SF_QI_INT },
+ { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_fnmadd_v8df_mask_round, "__builtin_ia32_vfnmaddpd512_mask", IX86_BUILTIN_VFNMADDPD512_MASK, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_V8DF_QI_INT },
+ { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_fnmadd_v16sf_mask_round, "__builtin_ia32_vfnmaddps512_mask", IX86_BUILTIN_VFNMADDPS512_MASK, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_V16SF_HI_INT },
+ { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_fnmsub_v8df_mask_round, "__builtin_ia32_vfnmsubpd512_mask", IX86_BUILTIN_VFNMSUBPD512_MASK, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_V8DF_QI_INT },
+ { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_fnmsub_v8df_mask3_round, "__builtin_ia32_vfnmsubpd512_mask3", IX86_BUILTIN_VFNMSUBPD512_MASK3, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_V8DF_QI_INT },
+ { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_fnmsub_v16sf_mask_round, "__builtin_ia32_vfnmsubps512_mask", IX86_BUILTIN_VFNMSUBPS512_MASK, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_V16SF_HI_INT },
+ { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_fnmsub_v16sf_mask3_round, "__builtin_ia32_vfnmsubps512_mask3", IX86_BUILTIN_VFNMSUBPS512_MASK3, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_V16SF_HI_INT },
+
+ /* AVX512ER */
+ { OPTION_MASK_ISA_AVX512ER, CODE_FOR_avx512er_exp2v8df_mask_round, "__builtin_ia32_exp2pd_mask", IX86_BUILTIN_EXP2PD, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_QI_INT },
+ { OPTION_MASK_ISA_AVX512ER, CODE_FOR_avx512er_exp2v16sf_mask_round, "__builtin_ia32_exp2ps_mask", IX86_BUILTIN_EXP2PS, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_HI_INT },
+ { OPTION_MASK_ISA_AVX512ER, CODE_FOR_avx512er_rcp28v8df_mask_round, "__builtin_ia32_rcp28pd_mask", IX86_BUILTIN_RCP28PD, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_QI_INT },
+ { OPTION_MASK_ISA_AVX512ER, CODE_FOR_avx512er_rcp28v16sf_mask_round, "__builtin_ia32_rcp28ps_mask", IX86_BUILTIN_RCP28PS, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_HI_INT },
+ { OPTION_MASK_ISA_AVX512ER, CODE_FOR_avx512er_rsqrt28v8df_mask_round, "__builtin_ia32_rsqrt28pd_mask", IX86_BUILTIN_RSQRT28PD, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_QI_INT },
+ { OPTION_MASK_ISA_AVX512ER, CODE_FOR_avx512er_rsqrt28v16sf_mask_round, "__builtin_ia32_rsqrt28ps_mask", IX86_BUILTIN_RSQRT28PS, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_HI_INT },
};
/* FMA4 and XOP. */
@@ -28662,6 +29856,18 @@ ix86_init_mmx_sse_builtins (void)
def_builtin_const (d->mask, d->name, ftype, d->code);
}
+ /* Add all builtins with rounding. */
+ for (i = 0, d = bdesc_round_args;
+ i < ARRAY_SIZE (bdesc_round_args);
+ i++, d++)
+ {
+ if (d->name == 0)
+ continue;
+
+ ftype = (enum ix86_builtin_func_type) d->flag;
+ def_builtin_const (d->mask, d->name, ftype, d->code);
+ }
+
/* pcmpestr[im] insns. */
for (i = 0, d = bdesc_pcmpestr;
i < ARRAY_SIZE (bdesc_pcmpestr);
@@ -28830,6 +30036,117 @@ ix86_init_mmx_sse_builtins (void)
V8SI_FTYPE_V8SI_PCINT_V4DI_V8SI_INT,
IX86_BUILTIN_GATHERALTDIV8SI);
+ /* AVX512F */
+ def_builtin (OPTION_MASK_ISA_AVX512F, "__builtin_ia32_gathersiv16sf",
+ V16SF_FTYPE_V16SF_PCFLOAT_V16SI_HI_INT,
+ IX86_BUILTIN_GATHER3SIV16SF);
+
+ def_builtin (OPTION_MASK_ISA_AVX512F, "__builtin_ia32_gathersiv8df",
+ V8DF_FTYPE_V8DF_PCDOUBLE_V8SI_QI_INT,
+ IX86_BUILTIN_GATHER3SIV8DF);
+
+ def_builtin (OPTION_MASK_ISA_AVX512F, "__builtin_ia32_gatherdiv16sf",
+ V8SF_FTYPE_V8SF_PCFLOAT_V8DI_QI_INT,
+ IX86_BUILTIN_GATHER3DIV16SF);
+
+ def_builtin (OPTION_MASK_ISA_AVX512F, "__builtin_ia32_gatherdiv8df",
+ V8DF_FTYPE_V8DF_PCDOUBLE_V8DI_QI_INT,
+ IX86_BUILTIN_GATHER3DIV8DF);
+
+ def_builtin (OPTION_MASK_ISA_AVX512F, "__builtin_ia32_gathersiv16si",
+ V16SI_FTYPE_V16SI_PCINT_V16SI_HI_INT,
+ IX86_BUILTIN_GATHER3SIV16SI);
+
+ def_builtin (OPTION_MASK_ISA_AVX512F, "__builtin_ia32_gathersiv8di",
+ V8DI_FTYPE_V8DI_PCINT64_V8SI_QI_INT,
+ IX86_BUILTIN_GATHER3SIV8DI);
+
+ def_builtin (OPTION_MASK_ISA_AVX512F, "__builtin_ia32_gatherdiv16si",
+ V8SI_FTYPE_V8SI_PCINT_V8DI_QI_INT,
+ IX86_BUILTIN_GATHER3DIV16SI);
+
+ def_builtin (OPTION_MASK_ISA_AVX512F, "__builtin_ia32_gatherdiv8di",
+ V8DI_FTYPE_V8DI_PCINT64_V8DI_QI_INT,
+ IX86_BUILTIN_GATHER3DIV8DI);
+
+ def_builtin (OPTION_MASK_ISA_AVX512F, "__builtin_ia32_gatheraltsiv8df ",
+ V8DF_FTYPE_V8DF_PCDOUBLE_V16SI_QI_INT,
+ IX86_BUILTIN_GATHER3ALTSIV8DF);
+
+ def_builtin (OPTION_MASK_ISA_AVX512F, "__builtin_ia32_gatheraltdiv8sf ",
+ V16SF_FTYPE_V16SF_PCFLOAT_V8DI_HI_INT,
+ IX86_BUILTIN_GATHER3ALTDIV16SF);
+
+ def_builtin (OPTION_MASK_ISA_AVX512F, "__builtin_ia32_gatheraltsiv8di ",
+ V8DI_FTYPE_V8DI_PCINT64_V16SI_QI_INT,
+ IX86_BUILTIN_GATHER3ALTSIV8DI);
+
+ def_builtin (OPTION_MASK_ISA_AVX512F, "__builtin_ia32_gatheraltdiv8si ",
+ V16SI_FTYPE_V16SI_PCINT_V8DI_HI_INT,
+ IX86_BUILTIN_GATHER3ALTDIV16SI);
+
+ def_builtin (OPTION_MASK_ISA_AVX512F, "__builtin_ia32_scattersiv16sf",
+ VOID_FTYPE_PFLOAT_HI_V16SI_V16SF_INT,
+ IX86_BUILTIN_SCATTERSIV16SF);
+
+ def_builtin (OPTION_MASK_ISA_AVX512F, "__builtin_ia32_scattersiv8df",
+ VOID_FTYPE_PDOUBLE_QI_V8SI_V8DF_INT,
+ IX86_BUILTIN_SCATTERSIV8DF);
+
+ def_builtin (OPTION_MASK_ISA_AVX512F, "__builtin_ia32_scatterdiv16sf",
+ VOID_FTYPE_PFLOAT_QI_V8DI_V8SF_INT,
+ IX86_BUILTIN_SCATTERDIV16SF);
+
+ def_builtin (OPTION_MASK_ISA_AVX512F, "__builtin_ia32_scatterdiv8df",
+ VOID_FTYPE_PDOUBLE_QI_V8DI_V8DF_INT,
+ IX86_BUILTIN_SCATTERDIV8DF);
+
+ def_builtin (OPTION_MASK_ISA_AVX512F, "__builtin_ia32_scattersiv16si",
+ VOID_FTYPE_PINT_HI_V16SI_V16SI_INT,
+ IX86_BUILTIN_SCATTERSIV16SI);
+
+ def_builtin (OPTION_MASK_ISA_AVX512F, "__builtin_ia32_scattersiv8di",
+ VOID_FTYPE_PLONGLONG_QI_V8SI_V8DI_INT,
+ IX86_BUILTIN_SCATTERSIV8DI);
+
+ def_builtin (OPTION_MASK_ISA_AVX512F, "__builtin_ia32_scatterdiv16si",
+ VOID_FTYPE_PINT_QI_V8DI_V8SI_INT,
+ IX86_BUILTIN_SCATTERDIV16SI);
+
+ def_builtin (OPTION_MASK_ISA_AVX512F, "__builtin_ia32_scatterdiv8di",
+ VOID_FTYPE_PLONGLONG_QI_V8DI_V8DI_INT,
+ IX86_BUILTIN_SCATTERDIV8DI);
+
+ /* AVX512PF */
+ def_builtin (OPTION_MASK_ISA_AVX512PF, "__builtin_ia32_gatherpfdps",
+ VOID_FTYPE_HI_V16SI_PCINT_INT_INT,
+ IX86_BUILTIN_GATHERPFDPS);
+ def_builtin (OPTION_MASK_ISA_AVX512PF, "__builtin_ia32_gatherpfqps",
+ VOID_FTYPE_QI_V8DI_PCINT_INT_INT,
+ IX86_BUILTIN_GATHERPFQPS);
+ def_builtin (OPTION_MASK_ISA_AVX512PF, "__builtin_ia32_scatterpfdps",
+ VOID_FTYPE_HI_V16SI_PCINT_INT_INT,
+ IX86_BUILTIN_SCATTERPFDPS);
+ def_builtin (OPTION_MASK_ISA_AVX512PF, "__builtin_ia32_scatterpfqps",
+ VOID_FTYPE_QI_V8DI_PCINT_INT_INT,
+ IX86_BUILTIN_SCATTERPFQPS);
+
+ /* SHA */
+ def_builtin_const (OPTION_MASK_ISA_SHA, "__builtin_ia32_sha1msg1",
+ V4SI_FTYPE_V4SI_V4SI, IX86_BUILTIN_SHA1MSG1);
+ def_builtin_const (OPTION_MASK_ISA_SHA, "__builtin_ia32_sha1msg2",
+ V4SI_FTYPE_V4SI_V4SI, IX86_BUILTIN_SHA1MSG2);
+ def_builtin_const (OPTION_MASK_ISA_SHA, "__builtin_ia32_sha1nexte",
+ V4SI_FTYPE_V4SI_V4SI, IX86_BUILTIN_SHA1NEXTE);
+ def_builtin_const (OPTION_MASK_ISA_SHA, "__builtin_ia32_sha1rnds4",
+ V4SI_FTYPE_V4SI_V4SI_INT, IX86_BUILTIN_SHA1RNDS4);
+ def_builtin_const (OPTION_MASK_ISA_SHA, "__builtin_ia32_sha256msg1",
+ V4SI_FTYPE_V4SI_V4SI, IX86_BUILTIN_SHA256MSG1);
+ def_builtin_const (OPTION_MASK_ISA_SHA, "__builtin_ia32_sha256msg2",
+ V4SI_FTYPE_V4SI_V4SI, IX86_BUILTIN_SHA256MSG2);
+ def_builtin_const (OPTION_MASK_ISA_SHA, "__builtin_ia32_sha256rnds2",
+ V4SI_FTYPE_V4SI_V4SI_V4SI, IX86_BUILTIN_SHA256RNDS2);
+
/* RTM. */
def_builtin (OPTION_MASK_ISA_RTM, "__builtin_ia32_xabort",
VOID_FTYPE_UNSIGNED, IX86_BUILTIN_XABORT);
@@ -31162,12 +32479,13 @@ ix86_expand_args_builtin (const struct builtin_description *d,
rtx pat, real_target;
unsigned int i, nargs;
unsigned int nargs_constant = 0;
+ unsigned int mask_pos = 0;
int num_memory = 0;
struct
{
rtx op;
enum machine_mode mode;
- } args[4];
+ } args[6];
bool last_arg_count = false;
enum insn_code icode = d->icode;
const struct insn_data_d *insn_p = &insn_data[icode];
@@ -31187,6 +32505,7 @@ ix86_expand_args_builtin (const struct builtin_description *d,
return ix86_expand_sse_round (d, exp, target);
case V4SI_FTYPE_V2DF_V2DF_ROUND:
case V8SI_FTYPE_V4DF_V4DF_ROUND:
+ case V16SI_FTYPE_V8DF_V8DF_ROUND:
return ix86_expand_sse_round_vec_pack_sfix (d, exp, target);
case INT_FTYPE_V8SF_V8SF_PTEST:
case INT_FTYPE_V4DI_V4DI_PTEST:
@@ -31265,6 +32584,32 @@ ix86_expand_args_builtin (const struct builtin_description *d,
case V4DI_FTYPE_V8HI:
case V4DI_FTYPE_V4SI:
case V4DI_FTYPE_V2DI:
+ case HI_FTYPE_HI:
+ case UINT_FTYPE_V2DF:
+ case UINT_FTYPE_V4SF:
+ case UINT64_FTYPE_V2DF:
+ case UINT64_FTYPE_V4SF:
+ case V16QI_FTYPE_V8DI:
+ case V16HI_FTYPE_V16SI:
+ case V16SI_FTYPE_HI:
+ case V16SI_FTYPE_V16SI:
+ case V16SI_FTYPE_INT:
+ case V16SF_FTYPE_FLOAT:
+ case V16SF_FTYPE_V4SF:
+ case V16SF_FTYPE_V16SF:
+ case V8HI_FTYPE_V8DI:
+ case V8UHI_FTYPE_V8UHI:
+ case V8SI_FTYPE_V8DI:
+ case V8USI_FTYPE_V8USI:
+ case V8SF_FTYPE_V8DF:
+ case V8DI_FTYPE_QI:
+ case V8DI_FTYPE_INT64:
+ case V8DI_FTYPE_V4DI:
+ case V8DI_FTYPE_V8DI:
+ case V8DF_FTYPE_DOUBLE:
+ case V8DF_FTYPE_V4DF:
+ case V8DF_FTYPE_V8DF:
+ case V8DF_FTYPE_V8SI:
nargs = 1;
break;
case V4SF_FTYPE_V4SF_VEC_MERGE:
@@ -31273,6 +32618,9 @@ ix86_expand_args_builtin (const struct builtin_description *d,
case FLOAT128_FTYPE_FLOAT128_FLOAT128:
case V16QI_FTYPE_V16QI_V16QI:
case V16QI_FTYPE_V8HI_V8HI:
+ case V16SI_FTYPE_V16SI_V16SI:
+ case V16SF_FTYPE_V16SF_V16SF:
+ case V16SF_FTYPE_V16SF_V16SI:
case V8QI_FTYPE_V8QI_V8QI:
case V8QI_FTYPE_V4HI_V4HI:
case V8HI_FTYPE_V8HI_V8HI:
@@ -31280,6 +32628,9 @@ ix86_expand_args_builtin (const struct builtin_description *d,
case V8HI_FTYPE_V4SI_V4SI:
case V8SF_FTYPE_V8SF_V8SF:
case V8SF_FTYPE_V8SF_V8SI:
+ case V8DI_FTYPE_V8DI_V8DI:
+ case V8DF_FTYPE_V8DF_V8DF:
+ case V8DF_FTYPE_V8DF_V8DI:
case V4SI_FTYPE_V4SI_V4SI:
case V4SI_FTYPE_V8HI_V8HI:
case V4SI_FTYPE_V4SF_V4SF:
@@ -31293,6 +32644,8 @@ ix86_expand_args_builtin (const struct builtin_description *d,
case V4SF_FTYPE_V4SF_V4SI:
case V4SF_FTYPE_V4SF_V2SI:
case V4SF_FTYPE_V4SF_V2DF:
+ case V4SF_FTYPE_V4SF_UINT:
+ case V4SF_FTYPE_V4SF_UINT64:
case V4SF_FTYPE_V4SF_DI:
case V4SF_FTYPE_V4SF_SI:
case V2DI_FTYPE_V2DI_V2DI:
@@ -31309,6 +32662,8 @@ ix86_expand_args_builtin (const struct builtin_description *d,
case V2DF_FTYPE_V2DF_V2DI:
case V2DF_FTYPE_V2DF_DI:
case V2DF_FTYPE_V2DF_SI:
+ case V2DF_FTYPE_V2DF_UINT:
+ case V2DF_FTYPE_V2DF_UINT64:
case V2SF_FTYPE_V2SF_V2SF:
case V1DI_FTYPE_V1DI_V1DI:
case V1DI_FTYPE_V8QI_V8QI:
@@ -31324,6 +32679,8 @@ ix86_expand_args_builtin (const struct builtin_description *d,
case V4DI_FTYPE_V4DI_V4DI:
case V4DI_FTYPE_V8SI_V8SI:
case V4UDI_FTYPE_V8USI_V8USI:
+ case QI_FTYPE_V8DI_V8DI:
+ case HI_FTYPE_V16SI_V16SI:
if (comparison == UNKNOWN)
return ix86_expand_binop_builtin (icode, exp, target);
nargs = 2;
@@ -31361,6 +32718,8 @@ ix86_expand_args_builtin (const struct builtin_description *d,
case UINT_FTYPE_UINT_UCHAR:
case UINT16_FTYPE_UINT16_INT:
case UINT8_FTYPE_UINT8_INT:
+ case HI_FTYPE_HI_HI:
+ case V16SI_FTYPE_V8DF_V8DF:
nargs = 2;
break;
case V2DI_FTYPE_V2DI_INT_CONVERT:
@@ -31375,12 +32734,16 @@ ix86_expand_args_builtin (const struct builtin_description *d,
break;
case V8HI_FTYPE_V8HI_INT:
case V8HI_FTYPE_V8SF_INT:
+ case V16HI_FTYPE_V16SF_INT:
case V8HI_FTYPE_V4SF_INT:
case V8SF_FTYPE_V8SF_INT:
+ case V4SF_FTYPE_V16SF_INT:
+ case V16SF_FTYPE_V16SF_INT:
case V4SI_FTYPE_V4SI_INT:
case V4SI_FTYPE_V8SI_INT:
case V4HI_FTYPE_V4HI_INT:
case V4DF_FTYPE_V4DF_INT:
+ case V4DF_FTYPE_V8DF_INT:
case V4SF_FTYPE_V4SF_INT:
case V4SF_FTYPE_V8SF_INT:
case V2DI_FTYPE_V2DI_INT:
@@ -31388,8 +32751,12 @@ ix86_expand_args_builtin (const struct builtin_description *d,
case V2DF_FTYPE_V4DF_INT:
case V16HI_FTYPE_V16HI_INT:
case V8SI_FTYPE_V8SI_INT:
+ case V16SI_FTYPE_V16SI_INT:
+ case V4SI_FTYPE_V16SI_INT:
case V4DI_FTYPE_V4DI_INT:
case V2DI_FTYPE_V4DI_INT:
+ case V4DI_FTYPE_V8DI_INT:
+ case HI_FTYPE_HI_INT:
nargs = 2;
nargs_constant = 1;
break;
@@ -31399,6 +32766,47 @@ ix86_expand_args_builtin (const struct builtin_description *d,
case V4SF_FTYPE_V4SF_V4SF_V4SF:
case V2DF_FTYPE_V2DF_V2DF_V2DF:
case V32QI_FTYPE_V32QI_V32QI_V32QI:
+ case HI_FTYPE_V16SI_V16SI_HI:
+ case QI_FTYPE_V8DI_V8DI_QI:
+ case V16HI_FTYPE_V16SI_V16HI_HI:
+ case V16QI_FTYPE_V16SI_V16QI_HI:
+ case V16QI_FTYPE_V8DI_V16QI_QI:
+ case V16SF_FTYPE_V16SF_V16SF_HI:
+ case V16SF_FTYPE_V16SF_V16SF_V16SF:
+ case V16SF_FTYPE_V16SF_V16SI_V16SF:
+ case V16SF_FTYPE_V16SI_V16SF_HI:
+ case V16SF_FTYPE_V16SI_V16SF_V16SF:
+ case V16SF_FTYPE_V4SF_V16SF_HI:
+ case V16SI_FTYPE_SI_V16SI_HI:
+ case V16SI_FTYPE_V16HI_V16SI_HI:
+ case V16SI_FTYPE_V16QI_V16SI_HI:
+ case V16SI_FTYPE_V16SF_V16SI_HI:
+ case V16SI_FTYPE_V16SI_V16SI_HI:
+ case V16SI_FTYPE_V16SI_V16SI_V16SI:
+ case V16SI_FTYPE_V4SI_V16SI_HI:
+ case V2DI_FTYPE_V2DI_V2DI_V2DI:
+ case V4DI_FTYPE_V4DI_V4DI_V4DI:
+ case V8DF_FTYPE_V2DF_V8DF_QI:
+ case V8DF_FTYPE_V4DF_V8DF_QI:
+ case V8DF_FTYPE_V8DF_V8DF_QI:
+ case V8DF_FTYPE_V8DF_V8DF_V8DF:
+ case V8DF_FTYPE_V8DF_V8DI_V8DF:
+ case V8DF_FTYPE_V8DI_V8DF_V8DF:
+ case V8DF_FTYPE_V8SF_V8DF_QI:
+ case V8DF_FTYPE_V8SI_V8DF_QI:
+ case V8DI_FTYPE_DI_V8DI_QI:
+ case V8DI_FTYPE_V16QI_V8DI_QI:
+ case V8DI_FTYPE_V2DI_V8DI_QI:
+ case V8DI_FTYPE_V4DI_V8DI_QI:
+ case V8DI_FTYPE_V8DI_V8DI_QI:
+ case V8DI_FTYPE_V8DI_V8DI_V8DI:
+ case V8DI_FTYPE_V8HI_V8DI_QI:
+ case V8DI_FTYPE_V8SI_V8DI_QI:
+ case V8HI_FTYPE_V8DI_V8HI_QI:
+ case V8SF_FTYPE_V8DF_V8SF_QI:
+ case V8SI_FTYPE_V8DF_V8SI_QI:
+ case V8SI_FTYPE_V8DI_V8SI_QI:
+ case V4SI_FTYPE_V4SI_V4SI_V4SI:
nargs = 3;
break;
case V32QI_FTYPE_V32QI_V32QI_INT:
@@ -31412,11 +32820,20 @@ ix86_expand_args_builtin (const struct builtin_description *d,
case V8SF_FTYPE_V8SF_V4SF_INT:
case V4SI_FTYPE_V4SI_V4SI_INT:
case V4DF_FTYPE_V4DF_V4DF_INT:
+ case V16SF_FTYPE_V16SF_V16SF_INT:
+ case V16SF_FTYPE_V16SF_V4SF_INT:
+ case V16SI_FTYPE_V16SI_V4SI_INT:
case V4DF_FTYPE_V4DF_V2DF_INT:
case V4SF_FTYPE_V4SF_V4SF_INT:
case V2DI_FTYPE_V2DI_V2DI_INT:
case V4DI_FTYPE_V4DI_V2DI_INT:
case V2DF_FTYPE_V2DF_V2DF_INT:
+ case QI_FTYPE_V8DI_V8DI_INT:
+ case QI_FTYPE_V8DF_V8DF_INT:
+ case QI_FTYPE_V2DF_V2DF_INT:
+ case QI_FTYPE_V4SF_V4SF_INT:
+ case HI_FTYPE_V16SI_V16SI_INT:
+ case HI_FTYPE_V16SF_V16SF_INT:
nargs = 3;
nargs_constant = 1;
break;
@@ -31439,13 +32856,38 @@ ix86_expand_args_builtin (const struct builtin_description *d,
nargs = 3;
nargs_constant = 2;
break;
+ case V16SF_FTYPE_V16SF_V16SF_V16SF_HI:
+ case V16SF_FTYPE_V16SF_V16SI_V16SF_HI:
+ case V16SF_FTYPE_V16SI_V16SF_V16SF_HI:
+ case V16SI_FTYPE_V16SI_V16SI_V16SI_HI:
+ case V16SI_FTYPE_V16SI_V4SI_V16SI_HI:
+ case V2DF_FTYPE_V2DF_V2DF_V2DF_QI:
+ case V2DF_FTYPE_V2DF_V4SF_V2DF_QI:
+ case V4SF_FTYPE_V4SF_V2DF_V4SF_QI:
+ case V4SF_FTYPE_V4SF_V4SF_V4SF_QI:
+ case V8DF_FTYPE_V8DF_V8DF_V8DF_QI:
+ case V8DF_FTYPE_V8DF_V8DI_V8DF_QI:
+ case V8DF_FTYPE_V8DI_V8DF_V8DF_QI:
+ case V8DI_FTYPE_V16SI_V16SI_V8DI_QI:
+ case V8DI_FTYPE_V8DI_SI_V8DI_V8DI:
+ case V8DI_FTYPE_V8DI_V2DI_V8DI_QI:
+ case V8DI_FTYPE_V8DI_V8DI_V8DI_QI:
+ nargs = 4;
+ break;
case V2DF_FTYPE_V2DF_V2DF_V2DI_INT:
case V4DF_FTYPE_V4DF_V4DF_V4DI_INT:
case V4SF_FTYPE_V4SF_V4SF_V4SI_INT:
case V8SF_FTYPE_V8SF_V8SF_V8SI_INT:
+ case V16SF_FTYPE_V16SF_V16SF_V16SI_INT:
nargs = 4;
nargs_constant = 1;
break;
+ case QI_FTYPE_V2DF_V2DF_INT_QI:
+ case QI_FTYPE_V4SF_V4SF_INT_QI:
+ nargs = 4;
+ mask_pos = 1;
+ nargs_constant = 1;
+ break;
case V2DI_FTYPE_V2DI_V2DI_UINT_UINT:
nargs = 4;
nargs_constant = 2;
@@ -31454,6 +32896,51 @@ ix86_expand_args_builtin (const struct builtin_description *d,
case UCHAR_FTYPE_UCHAR_ULONGLONG_ULONGLONG_PULONGLONG:
nargs = 4;
break;
+ case QI_FTYPE_V8DI_V8DI_INT_QI:
+ case HI_FTYPE_V16SI_V16SI_INT_HI:
+ case QI_FTYPE_V8DF_V8DF_INT_QI:
+ case HI_FTYPE_V16SF_V16SF_INT_HI:
+ mask_pos = 1;
+ nargs = 4;
+ nargs_constant = 1;
+ break;
+ case V8DF_FTYPE_V8DF_INT_V8DF_QI:
+ case V16SF_FTYPE_V16SF_INT_V16SF_HI:
+ case V16HI_FTYPE_V16SF_INT_V16HI_HI:
+ case V16SI_FTYPE_V16SI_INT_V16SI_HI:
+ case V4SI_FTYPE_V16SI_INT_V4SI_QI:
+ case V4DI_FTYPE_V8DI_INT_V4DI_QI:
+ case V4DF_FTYPE_V8DF_INT_V4DF_QI:
+ case V4SF_FTYPE_V16SF_INT_V4SF_QI:
+ case V8DI_FTYPE_V8DI_INT_V8DI_QI:
+ nargs = 4;
+ mask_pos = 2;
+ nargs_constant = 1;
+ break;
+ case V16SF_FTYPE_V16SF_V4SF_INT_V16SF_HI:
+ case V16SI_FTYPE_V16SI_V4SI_INT_V16SI_HI:
+ case V8DF_FTYPE_V8DF_V8DF_INT_V8DF_QI:
+ case V8DI_FTYPE_V8DI_V8DI_INT_V8DI_QI:
+ case V16SF_FTYPE_V16SF_V16SF_INT_V16SF_HI:
+ case V16SI_FTYPE_V16SI_V16SI_INT_V16SI_HI:
+ case V4SF_FTYPE_V4SF_V4SF_INT_V4SF_QI:
+ case V2DF_FTYPE_V2DF_V2DF_INT_V2DF_QI:
+ case V8DF_FTYPE_V8DF_V4DF_INT_V8DF_QI:
+ case V8DI_FTYPE_V8DI_V4DI_INT_V8DI_QI:
+ nargs = 5;
+ mask_pos = 2;
+ nargs_constant = 1;
+ break;
+ case V8DI_FTYPE_V8DI_V8DI_V8DI_INT_QI:
+ case V16SF_FTYPE_V16SF_V16SF_V16SI_INT_HI:
+ case V16SI_FTYPE_V16SI_V16SI_V16SI_INT_HI:
+ case V2DF_FTYPE_V2DF_V2DF_V2DI_INT_QI:
+ case V4SF_FTYPE_V4SF_V4SF_V4SI_INT_QI:
+ nargs = 5;
+ mask_pos = 1;
+ nargs_constant = 1;
+ break;
+
default:
gcc_unreachable ();
}
@@ -31500,7 +32987,8 @@ ix86_expand_args_builtin (const struct builtin_description *d,
op = copy_to_reg (op);
}
}
- else if ((nargs - i) <= nargs_constant)
+ else if ((mask_pos && (nargs - i - mask_pos) == nargs_constant) ||
+ (!mask_pos && (nargs - i) <= nargs_constant))
{
if (!match)
switch (icode)
@@ -31510,6 +32998,13 @@ ix86_expand_args_builtin (const struct builtin_description *d,
error ("the last argument must be an 1-bit immediate");
return const0_rtx;
+ case CODE_FOR_avx512f_cmpv8di3_mask:
+ case CODE_FOR_avx512f_cmpv16si3_mask:
+ case CODE_FOR_avx512f_ucmpv8di3_mask:
+ case CODE_FOR_avx512f_ucmpv16si3_mask:
+ error ("the last argument must be a 3-bit immediate");
+ return const0_rtx;
+
case CODE_FOR_sse4_1_roundsd:
case CODE_FOR_sse4_1_roundss:
@@ -31526,15 +33021,24 @@ ix86_expand_args_builtin (const struct builtin_description *d,
case CODE_FOR_sse4_1_blendps:
case CODE_FOR_avx_blendpd256:
case CODE_FOR_avx_vpermilv4df:
+ case CODE_FOR_avx512f_getmantv8df_mask:
+ case CODE_FOR_avx512f_getmantv16sf_mask:
+ case CODE_FOR_avx512f_getmantv2df_mask:
+ case CODE_FOR_avx512f_getmantv4sf_mask:
error ("the last argument must be a 4-bit immediate");
return const0_rtx;
+ case CODE_FOR_sha1rnds4:
case CODE_FOR_sse4_1_blendpd:
case CODE_FOR_avx_vpermilv2df:
case CODE_FOR_xop_vpermil2v2df3:
case CODE_FOR_xop_vpermil2v4sf3:
case CODE_FOR_xop_vpermil2v4df3:
case CODE_FOR_xop_vpermil2v8sf3:
+ case CODE_FOR_avx512f_vinsertf32x4_mask:
+ case CODE_FOR_avx512f_vinserti32x4_mask:
+ case CODE_FOR_avx512f_vextractf32x4_mask:
+ case CODE_FOR_avx512f_vextracti32x4_mask:
error ("the last argument must be a 2-bit immediate");
return const0_rtx;
@@ -31544,6 +33048,10 @@ ix86_expand_args_builtin (const struct builtin_description *d,
case CODE_FOR_avx_vinsertf128v4df:
case CODE_FOR_avx_vinsertf128v8sf:
case CODE_FOR_avx_vinsertf128v8si:
+ case CODE_FOR_avx512f_vinsertf64x4_mask:
+ case CODE_FOR_avx512f_vinserti64x4_mask:
+ case CODE_FOR_avx512f_vextractf64x4_mask:
+ case CODE_FOR_avx512f_vextracti64x4_mask:
error ("the last argument must be a 1-bit immediate");
return const0_rtx;
@@ -31553,14 +33061,19 @@ ix86_expand_args_builtin (const struct builtin_description *d,
case CODE_FOR_avx_cmpv4sf3:
case CODE_FOR_avx_cmpv4df3:
case CODE_FOR_avx_cmpv8sf3:
+ case CODE_FOR_avx512f_cmpv8df3_mask:
+ case CODE_FOR_avx512f_cmpv16sf3_mask:
+ case CODE_FOR_avx512f_vmcmpv2df3_mask:
+ case CODE_FOR_avx512f_vmcmpv4sf3_mask:
error ("the last argument must be a 5-bit immediate");
return const0_rtx;
- default:
+ default:
switch (nargs_constant)
{
case 2:
- if ((nargs - i) == nargs_constant)
+ if ((mask_pos && (nargs - i - mask_pos) == nargs_constant) ||
+ (!mask_pos && (nargs - i) == nargs_constant))
{
error ("the next to last argument must be an 8-bit immediate");
break;
@@ -31616,6 +33129,14 @@ ix86_expand_args_builtin (const struct builtin_description *d,
pat = GEN_FCN (icode) (real_target, args[0].op, args[1].op,
args[2].op, args[3].op);
break;
+ case 5:
+ pat = GEN_FCN (icode) (real_target, args[0].op, args[1].op,
+ args[2].op, args[3].op, args[4].op);
+ case 6:
+ pat = GEN_FCN (icode) (real_target, args[0].op, args[1].op,
+ args[2].op, args[3].op, args[4].op,
+ args[5].op);
+ break;
default:
gcc_unreachable ();
}
@@ -31627,6 +33148,404 @@ ix86_expand_args_builtin (const struct builtin_description *d,
return target;
}
+/* Transform pattern of following layout:
+ (parallel [
+ set (A B)
+ (unspec [C] UNSPEC_EMBEDDED_ROUNDING)])
+ ])
+ into:
+ (set (A B))
+
+ Or:
+ (parallel [ A B
+ ...
+ (unspec [C] UNSPEC_EMBEDDED_ROUNDING)
+ ...
+ ])
+ into:
+ (parallel [ A B ... ]) */
+
+static rtx
+ix86_erase_embedded_rounding_1 (rtx pat, bool *removed)
+{
+ gcc_assert (GET_CODE (pat) == PARALLEL);
+
+ if (XVECLEN (pat, 0) == 2)
+ {
+ rtx p0 = XVECEXP (pat, 0, 0);
+ rtx p1 = XVECEXP (pat, 0, 1);
+
+ gcc_assert (GET_CODE (p0) == SET
+ && GET_CODE (p1) == UNSPEC
+ && XINT (p1, 1) == UNSPEC_EMBEDDED_ROUNDING);
+
+ *removed = true;
+ return p0;
+ }
+ else
+ {
+ rtx *res = XALLOCAVEC (rtx, XVECLEN (pat, 0));
+ int i = 0;
+ int j = 0;
+
+ *removed = 0;
+
+ for (; i < XVECLEN (pat, 0); ++i)
+ {
+ rtx elem = XVECEXP (pat, 0, i);
+ if (GET_CODE (elem) != UNSPEC
+ || XINT (elem, 1) != UNSPEC_EMBEDDED_ROUNDING)
+ res [j++] = elem;
+ else
+ *removed = true;
+ }
+
+ /* No more than 1 occurence was removed. */
+ gcc_assert (j >= XVECLEN (pat, 0) - 1);
+
+ return gen_rtx_PARALLEL (GET_MODE (pat), gen_rtvec_v (j, res));
+ }
+}
+
+/* Walk through insns sequence or pattern and erase rounding mentions.
+ Main transformation is performed in ix86_erase_embedded_rounding_1. */
+static rtx
+ix86_erase_embedded_rounding (rtx pat)
+{
+ gcc_assert (GET_CODE (pat) == INSN
+ || GET_CODE (pat) == PARALLEL);
+
+ rtx res;
+ bool was_removed = 0;
+
+ if (GET_CODE (pat) == INSN)
+ {
+ rtx insn = pat;
+ start_sequence ();
+ for (; insn; insn = NEXT_INSN (insn))
+ {
+ rtx p = PATTERN (insn);
+ if (GET_CODE (p) == PARALLEL)
+ {
+ bool tmp = 0;
+ p = ix86_erase_embedded_rounding_1 (p, &tmp);
+ was_removed |= tmp;
+ }
+ emit_insn (p);
+ }
+ res = get_insns ();
+ end_sequence ();
+ }
+ else
+ res = ix86_erase_embedded_rounding_1 (pat, &was_removed);
+
+ /* Assert that at least 1 occurence of rounding was removed. */
+ gcc_assert (was_removed);
+ return res;
+}
+
+/* Subroutine of ix86_expand_round_builtin to take care of comi insns
+ with rounding. */
+static rtx
+ix86_expand_sse_comi_round (const struct builtin_description *d,
+ tree exp, rtx target)
+{
+ rtx pat, set_dst;
+ tree arg0 = CALL_EXPR_ARG (exp, 0);
+ tree arg1 = CALL_EXPR_ARG (exp, 1);
+ tree arg2 = CALL_EXPR_ARG (exp, 2);
+ tree arg3 = CALL_EXPR_ARG (exp, 3);
+ rtx op0 = expand_normal (arg0);
+ rtx op1 = expand_normal (arg1);
+ rtx op2 = expand_normal (arg2);
+ rtx op3 = expand_normal (arg3);
+ enum insn_code icode = d->icode;
+ const struct insn_data_d *insn_p = &insn_data[icode];
+ enum machine_mode mode0 = insn_p->operand[0].mode;
+ enum machine_mode mode1 = insn_p->operand[1].mode;
+ enum rtx_code comparison = UNEQ;
+ bool need_ucomi = false;
+
+ /* See avxintrin.h for values. */
+ enum rtx_code comi_comparisons[32] =
+ {
+ UNEQ, GT, GE, UNORDERED, LTGT, UNLE, UNLT, ORDERED, UNEQ, UNLT,
+ UNLE, LT, LTGT, GE, GT, LT, UNEQ, GT, GE, UNORDERED, LTGT, UNLE,
+ UNLT, ORDERED, UNEQ, UNLT, UNLE, LT, LTGT, GE, GT, LT
+ };
+ bool need_ucomi_values[32] =
+ {
+ true, false, false, true, true, false, false, true,
+ true, false, false, true, true, false, false, true,
+ false, true, true, false, false, true, true, false,
+ false, true, true, false, false, true, true, false
+ };
+
+ if (!CONST_INT_P (op2))
+ {
+ error ("third argument must be comparison constant.");
+ return const0_rtx;
+ }
+ if (INTVAL (op2) < 0 || INTVAL (op2) >= 32)
+ {
+ error ("incorect comparison mode.");
+ return const0_rtx;
+ }
+
+ if (!insn_p->operand[2].predicate (op3, SImode))
+ {
+ error ("incorrect rounding operand.");
+ return const0_rtx;
+ }
+
+ comparison = comi_comparisons[INTVAL (op2)];
+ need_ucomi = need_ucomi_values[INTVAL (op2)];
+
+ if (VECTOR_MODE_P (mode0))
+ op0 = safe_vector_operand (op0, mode0);
+ if (VECTOR_MODE_P (mode1))
+ op1 = safe_vector_operand (op1, mode1);
+
+ target = gen_reg_rtx (SImode);
+ emit_move_insn (target, const0_rtx);
+ target = gen_rtx_SUBREG (QImode, target, 0);
+
+ if ((optimize && !register_operand (op0, mode0))
+ || !insn_p->operand[0].predicate (op0, mode0))
+ op0 = copy_to_mode_reg (mode0, op0);
+ if ((optimize && !register_operand (op1, mode1))
+ || !insn_p->operand[1].predicate (op1, mode1))
+ op1 = copy_to_mode_reg (mode1, op1);
+
+ if (need_ucomi)
+ icode = icode == CODE_FOR_sse_comi_round
+ ? CODE_FOR_sse_ucomi_round
+ : CODE_FOR_sse2_ucomi_round;
+
+ pat = GEN_FCN (icode) (op0, op1, op3);
+ if (! pat)
+ return 0;
+
+ /* Rounding operand can be either NO_ROUND or ROUND_SAE at this point. */
+ if (INTVAL (op3) == NO_ROUND)
+ {
+ pat = ix86_erase_embedded_rounding (pat);
+ if (! pat)
+ return 0;
+
+ set_dst = SET_DEST (pat);
+ }
+ else
+ {
+ gcc_assert (GET_CODE (XVECEXP (pat, 0, 0)) == SET);
+ set_dst = SET_DEST (XVECEXP (pat, 0, 0));
+ }
+
+ emit_insn (pat);
+ emit_insn (gen_rtx_SET (VOIDmode,
+ gen_rtx_STRICT_LOW_PART (VOIDmode, target),
+ gen_rtx_fmt_ee (comparison, QImode,
+ set_dst,
+ const0_rtx)));
+
+ return SUBREG_REG (target);
+}
+
+static rtx
+ix86_expand_round_builtin (const struct builtin_description *d,
+ tree exp, rtx target)
+{
+ rtx pat;
+ unsigned int i, nargs;
+ struct
+ {
+ rtx op;
+ enum machine_mode mode;
+ } args[6];
+ enum insn_code icode = d->icode;
+ const struct insn_data_d *insn_p = &insn_data[icode];
+ enum machine_mode tmode = insn_p->operand[0].mode;
+ unsigned int nargs_constant = 0;
+ unsigned int redundant_embed_rnd = 0;
+
+ switch ((enum ix86_builtin_func_type) d->flag)
+ {
+ case UINT64_FTYPE_V2DF_INT:
+ case UINT64_FTYPE_V4SF_INT:
+ case UINT_FTYPE_V2DF_INT:
+ case UINT_FTYPE_V4SF_INT:
+ case INT64_FTYPE_V2DF_INT:
+ case INT64_FTYPE_V4SF_INT:
+ case INT_FTYPE_V2DF_INT:
+ case INT_FTYPE_V4SF_INT:
+ nargs = 2;
+ break;
+ case V4SF_FTYPE_V4SF_UINT_INT:
+ case V4SF_FTYPE_V4SF_UINT64_INT:
+ case V2DF_FTYPE_V2DF_UINT64_INT:
+ case V4SF_FTYPE_V4SF_INT_INT:
+ case V4SF_FTYPE_V4SF_INT64_INT:
+ case V2DF_FTYPE_V2DF_INT64_INT:
+ nargs = 3;
+ break;
+ case V8SF_FTYPE_V8DF_V8SF_QI_INT:
+ case V8DF_FTYPE_V8DF_V8DF_QI_INT:
+ case V8SI_FTYPE_V8DF_V8SI_QI_INT:
+ case V16SF_FTYPE_V16SF_V16SF_HI_INT:
+ case V16SF_FTYPE_V16SI_V16SF_HI_INT:
+ case V16SI_FTYPE_V16SF_V16SI_HI_INT:
+ case V8DF_FTYPE_V8SF_V8DF_QI_INT:
+ case V16SF_FTYPE_V16HI_V16SF_HI_INT:
+ nargs = 4;
+ break;
+ case INT_FTYPE_V4SF_V4SF_INT_INT:
+ case INT_FTYPE_V2DF_V2DF_INT_INT:
+ return ix86_expand_sse_comi_round (d, exp, target);
+ case V8DF_FTYPE_V8DF_V8DF_V8DF_QI_INT:
+ case V16SF_FTYPE_V16SF_V16SF_V16SF_HI_INT:
+ case V2DF_FTYPE_V2DF_V2DF_V2DF_QI_INT:
+ case V2DF_FTYPE_V2DF_V4SF_V2DF_QI_INT:
+ case V4SF_FTYPE_V4SF_V4SF_V4SF_QI_INT:
+ case V4SF_FTYPE_V4SF_V2DF_V4SF_QI_INT:
+ nargs = 5;
+ break;
+ case V16SF_FTYPE_V16SF_INT_V16SF_HI_INT:
+ case V8DF_FTYPE_V8DF_INT_V8DF_QI_INT:
+ nargs_constant = 4;
+ nargs = 5;
+ break;
+ case QI_FTYPE_V8DF_V8DF_INT_QI_INT:
+ case QI_FTYPE_V2DF_V2DF_INT_QI_INT:
+ case HI_FTYPE_V16SF_V16SF_INT_HI_INT:
+ case QI_FTYPE_V4SF_V4SF_INT_QI_INT:
+ nargs_constant = 3;
+ nargs = 5;
+ break;
+ case V4SF_FTYPE_V4SF_V4SF_INT_V4SF_QI_INT:
+ case V2DF_FTYPE_V2DF_V2DF_INT_V2DF_QI_INT:
+ nargs = 6;
+ nargs_constant = 4;
+ break;
+ case V8DF_FTYPE_V8DF_V8DF_V8DI_INT_QI_INT:
+ case V16SF_FTYPE_V16SF_V16SF_V16SI_INT_HI_INT:
+ case V2DF_FTYPE_V2DF_V2DF_V2DI_INT_QI_INT:
+ case V4SF_FTYPE_V4SF_V4SF_V4SI_INT_QI_INT:
+ nargs = 6;
+ nargs_constant = 3;
+ break;
+ default:
+ gcc_unreachable ();
+ }
+ gcc_assert (nargs <= ARRAY_SIZE (args));
+
+ if (optimize
+ || target == 0
+ || GET_MODE (target) != tmode
+ || !insn_p->operand[0].predicate (target, tmode))
+ target = gen_reg_rtx (tmode);
+
+ for (i = 0; i < nargs; i++)
+ {
+ tree arg = CALL_EXPR_ARG (exp, i);
+ rtx op = expand_normal (arg);
+ enum machine_mode mode = insn_p->operand[i + 1].mode;
+ bool match = insn_p->operand[i + 1].predicate (op, mode);
+
+ if (i == nargs - nargs_constant)
+ {
+ if (!match)
+ {
+ switch (icode)
+ {
+ case CODE_FOR_avx512f_getmantv8df_mask_round:
+ case CODE_FOR_avx512f_getmantv16sf_mask_round:
+ case CODE_FOR_avx512f_getmantv2df_mask_round:
+ case CODE_FOR_avx512f_getmantv4sf_mask_round:
+ error ("the immediate argument must be 4-bit immediate.");
+ return const0_rtx;
+ case CODE_FOR_avx512f_cmpv8df3_mask_round:
+ case CODE_FOR_avx512f_cmpv16sf3_mask_round:
+ case CODE_FOR_avx512f_vmcmpv2df3_mask_round:
+ case CODE_FOR_avx512f_vmcmpv4sf3_mask_round:
+ error ("the immediate argument must be 5-bit immediate.");
+ return const0_rtx;
+ default:
+ error ("the immediate argument must be 8-bit immediate.");
+ return const0_rtx;
+ }
+ }
+ }
+ else if (i == nargs-1)
+ {
+ if (!insn_p->operand[nargs].predicate (op, SImode))
+ {
+ error ("incorrect rounding operand.");
+ return const0_rtx;
+ }
+
+ /* If there is no rounding use normal version of the pattern. */
+ if (INTVAL (op) == NO_ROUND)
+ redundant_embed_rnd = 1;
+ }
+ else
+ {
+ if (VECTOR_MODE_P (mode))
+ op = safe_vector_operand (op, mode);
+
+ if (GET_MODE (op) == mode || GET_MODE (op) == VOIDmode)
+ {
+ if (optimize || !match)
+ op = copy_to_mode_reg (mode, op);
+ }
+ else
+ {
+ op = copy_to_reg (op);
+ op = simplify_gen_subreg (mode, op, GET_MODE (op), 0);
+ }
+ }
+
+ args[i].op = op;
+ args[i].mode = mode;
+ }
+
+ switch (nargs)
+ {
+ case 1:
+ pat = GEN_FCN (icode) (target, args[0].op);
+ break;
+ case 2:
+ pat = GEN_FCN (icode) (target, args[0].op, args[1].op);
+ break;
+ case 3:
+ pat = GEN_FCN (icode) (target, args[0].op, args[1].op,
+ args[2].op);
+ break;
+ case 4:
+ pat = GEN_FCN (icode) (target, args[0].op, args[1].op,
+ args[2].op, args[3].op);
+ break;
+ case 5:
+ pat = GEN_FCN (icode) (target, args[0].op, args[1].op,
+ args[2].op, args[3].op, args[4].op);
+ case 6:
+ pat = GEN_FCN (icode) (target, args[0].op, args[1].op,
+ args[2].op, args[3].op, args[4].op,
+ args[5].op);
+ break;
+ default:
+ gcc_unreachable ();
+ }
+
+ if (!pat)
+ return 0;
+
+ if (redundant_embed_rnd)
+ pat = ix86_erase_embedded_rounding (pat);
+
+ emit_insn (pat);
+ return target;
+}
+
/* Subroutine of ix86_expand_builtin to take care of special insns
with variable number of operands. */
@@ -31679,17 +33598,24 @@ ix86_expand_special_args_builtin (const struct builtin_description *d,
case V4DF_FTYPE_PCDOUBLE:
case V2DF_FTYPE_PCDOUBLE:
case VOID_FTYPE_PVOID:
+ case V16SI_FTYPE_PV4SI:
+ case V16SF_FTYPE_PV4SF:
+ case V8DI_FTYPE_PV4DI:
+ case V8DF_FTYPE_PV4DF:
nargs = 1;
klass = load;
memory = 0;
break;
case VOID_FTYPE_PV2SF_V4SF:
+ case VOID_FTYPE_PV8DI_V8DI:
case VOID_FTYPE_PV4DI_V4DI:
case VOID_FTYPE_PV2DI_V2DI:
case VOID_FTYPE_PCHAR_V32QI:
case VOID_FTYPE_PCHAR_V16QI:
+ case VOID_FTYPE_PFLOAT_V16SF:
case VOID_FTYPE_PFLOAT_V8SF:
case VOID_FTYPE_PFLOAT_V4SF:
+ case VOID_FTYPE_PDOUBLE_V8DF:
case VOID_FTYPE_PDOUBLE_V4DF:
case VOID_FTYPE_PDOUBLE_V2DF:
case VOID_FTYPE_PLONGLONG_LONGLONG:
@@ -31726,11 +33652,27 @@ ix86_expand_special_args_builtin (const struct builtin_description *d,
case VOID_FTYPE_PV4DI_V4DI_V4DI:
case VOID_FTYPE_PV4SI_V4SI_V4SI:
case VOID_FTYPE_PV2DI_V2DI_V2DI:
+ case VOID_FTYPE_PV8DF_V8DF_QI:
+ case VOID_FTYPE_PV16SF_V16SF_HI:
+ case VOID_FTYPE_PV8DI_V8DI_QI:
+ case VOID_FTYPE_PV16SI_V16SI_HI:
+ case VOID_FTYPE_PDOUBLE_V2DF_QI:
+ case VOID_FTYPE_PFLOAT_V4SF_QI:
nargs = 2;
klass = store;
/* Reserve memory operand for target. */
memory = ARRAY_SIZE (args);
break;
+ case V16SF_FTYPE_PCV16SF_V16SF_HI:
+ case V16SI_FTYPE_PCV16SI_V16SI_HI:
+ case V8DF_FTYPE_PCV8DF_V8DF_QI:
+ case V8DI_FTYPE_PCV8DI_V8DI_QI:
+ case V2DF_FTYPE_PCDOUBLE_V2DF_QI:
+ case V4SF_FTYPE_PCFLOAT_V4SF_QI:
+ nargs = 3;
+ klass = load;
+ memory = 0;
+ break;
case VOID_FTYPE_UINT_UINT_UINT:
case VOID_FTYPE_UINT64_UINT_UINT:
case UCHAR_FTYPE_UINT_UINT_UINT:
@@ -31809,9 +33751,13 @@ ix86_expand_special_args_builtin (const struct builtin_description *d,
if (VECTOR_MODE_P (mode))
op = safe_vector_operand (op, mode);
- gcc_assert (GET_MODE (op) == mode
- || GET_MODE (op) == VOIDmode);
- op = copy_to_mode_reg (mode, op);
+ if (GET_MODE (op) == mode || GET_MODE (op) == VOIDmode)
+ op = copy_to_mode_reg (mode, op);
+ else
+ {
+ op = copy_to_reg (op);
+ op = simplify_gen_subreg (mode, op, GET_MODE (op), 0);
+ }
}
}
@@ -32516,6 +34462,38 @@ addcarryx:
emit_insn (gen_rtx_SET (VOIDmode, target, pat));
return target;
+ case IX86_BUILTIN_KORTESTC16:
+ icode = CODE_FOR_kortestchi;
+ mode0 = HImode;
+ mode1 = CCCmode;
+ goto kortest;
+
+ case IX86_BUILTIN_KORTESTZ16:
+ icode = CODE_FOR_kortestzhi;
+ mode0 = HImode;
+ mode1 = CCZmode;
+
+ kortest:
+ arg0 = CALL_EXPR_ARG (exp, 0); /* Mask reg src1. */
+ arg1 = CALL_EXPR_ARG (exp, 1); /* Mask reg src2. */
+ op0 = expand_normal (arg0);
+ op1 = expand_normal (arg1);
+
+ op0 = copy_to_reg (op0);
+ op0 = simplify_gen_subreg (mode0, op0, GET_MODE (op0), 0);
+ op1 = copy_to_reg (op1);
+ op1 = simplify_gen_subreg (mode0, op1, GET_MODE (op1), 0);
+
+ target = gen_reg_rtx (QImode);
+ emit_insn (gen_rtx_SET (mode0, target, const0_rtx));
+
+ /* Emit kortest. */
+ emit_insn (GEN_FCN (icode) (op0, op1));
+ /* And use setcc to return result from flags. */
+ ix86_expand_setcc (target, EQ,
+ gen_rtx_REG (mode1, FLAGS_REG), const0_rtx);
+ return target;
+
case IX86_BUILTIN_GATHERSIV2DF:
icode = CODE_FOR_avx2_gathersiv2df;
goto gather_gen;
@@ -32576,8 +34554,83 @@ addcarryx:
case IX86_BUILTIN_GATHERALTDIV8SI:
icode = CODE_FOR_avx2_gatherdiv8si;
goto gather_gen;
+ case IX86_BUILTIN_GATHER3SIV16SF:
+ icode = CODE_FOR_avx512f_gathersiv16sf;
+ goto gather_gen;
+ case IX86_BUILTIN_GATHER3SIV8DF:
+ icode = CODE_FOR_avx512f_gathersiv8df;
+ goto gather_gen;
+ case IX86_BUILTIN_GATHER3DIV16SF:
+ icode = CODE_FOR_avx512f_gatherdiv16sf;
+ goto gather_gen;
+ case IX86_BUILTIN_GATHER3DIV8DF:
+ icode = CODE_FOR_avx512f_gatherdiv8df;
+ goto gather_gen;
+ case IX86_BUILTIN_GATHER3SIV16SI:
+ icode = CODE_FOR_avx512f_gathersiv16si;
+ goto gather_gen;
+ case IX86_BUILTIN_GATHER3SIV8DI:
+ icode = CODE_FOR_avx512f_gathersiv8di;
+ goto gather_gen;
+ case IX86_BUILTIN_GATHER3DIV16SI:
+ icode = CODE_FOR_avx512f_gatherdiv16si;
+ goto gather_gen;
+ case IX86_BUILTIN_GATHER3DIV8DI:
+ icode = CODE_FOR_avx512f_gatherdiv8di;
+ goto gather_gen;
+ case IX86_BUILTIN_GATHER3ALTSIV8DF:
+ icode = CODE_FOR_avx512f_gathersiv8df;
+ goto gather_gen;
+ case IX86_BUILTIN_GATHER3ALTDIV16SF:
+ icode = CODE_FOR_avx512f_gatherdiv16sf;
+ goto gather_gen;
+ case IX86_BUILTIN_GATHER3ALTSIV8DI:
+ icode = CODE_FOR_avx512f_gathersiv8di;
+ goto gather_gen;
+ case IX86_BUILTIN_GATHER3ALTDIV16SI:
+ icode = CODE_FOR_avx512f_gatherdiv16si;
+ goto gather_gen;
+ case IX86_BUILTIN_SCATTERSIV16SF:
+ icode = CODE_FOR_avx512f_scattersiv16sf;
+ goto scatter_gen;
+ case IX86_BUILTIN_SCATTERSIV8DF:
+ icode = CODE_FOR_avx512f_scattersiv8df;
+ goto scatter_gen;
+ case IX86_BUILTIN_SCATTERDIV16SF:
+ icode = CODE_FOR_avx512f_scatterdiv16sf;
+ goto scatter_gen;
+ case IX86_BUILTIN_SCATTERDIV8DF:
+ icode = CODE_FOR_avx512f_scatterdiv8df;
+ goto scatter_gen;
+ case IX86_BUILTIN_SCATTERSIV16SI:
+ icode = CODE_FOR_avx512f_scattersiv16si;
+ goto scatter_gen;
+ case IX86_BUILTIN_SCATTERSIV8DI:
+ icode = CODE_FOR_avx512f_scattersiv8di;
+ goto scatter_gen;
+ case IX86_BUILTIN_SCATTERDIV16SI:
+ icode = CODE_FOR_avx512f_scatterdiv16si;
+ goto scatter_gen;
+ case IX86_BUILTIN_SCATTERDIV8DI:
+ icode = CODE_FOR_avx512f_scatterdiv8di;
+ goto scatter_gen;
+ case IX86_BUILTIN_GATHERPFDPS:
+ icode = CODE_FOR_avx512pf_gatherpfv16si;
+ goto vec_prefetch_gen;
+ case IX86_BUILTIN_GATHERPFQPS:
+ icode = CODE_FOR_avx512pf_gatherpfv8di;
+ goto vec_prefetch_gen;
+ case IX86_BUILTIN_SCATTERPFDPS:
+ icode = CODE_FOR_avx512pf_scatterpfv16si;
+ goto vec_prefetch_gen;
+ case IX86_BUILTIN_SCATTERPFQPS:
+ icode = CODE_FOR_avx512pf_scatterpfv8di;
+ goto vec_prefetch_gen;
gather_gen:
+ rtx half;
+ rtx (*gen) (rtx, rtx);
+
arg0 = CALL_EXPR_ARG (exp, 0);
arg1 = CALL_EXPR_ARG (exp, 1);
arg2 = CALL_EXPR_ARG (exp, 2);
@@ -32600,20 +34653,46 @@ addcarryx:
else
subtarget = target;
- if (fcode == IX86_BUILTIN_GATHERALTSIV4DF
- || fcode == IX86_BUILTIN_GATHERALTSIV4DI)
+ switch (fcode)
{
- rtx half = gen_reg_rtx (V4SImode);
+ case IX86_BUILTIN_GATHER3ALTSIV8DF:
+ case IX86_BUILTIN_GATHER3ALTSIV8DI:
+ half = gen_reg_rtx (V8SImode);
+ if (!nonimmediate_operand (op2, V16SImode))
+ op2 = copy_to_mode_reg (V16SImode, op2);
+ emit_insn (gen_vec_extract_lo_v16si (half, op2));
+ op2 = half;
+ break;
+ case IX86_BUILTIN_GATHERALTSIV4DF:
+ case IX86_BUILTIN_GATHERALTSIV4DI:
+ half = gen_reg_rtx (V4SImode);
if (!nonimmediate_operand (op2, V8SImode))
op2 = copy_to_mode_reg (V8SImode, op2);
emit_insn (gen_vec_extract_lo_v8si (half, op2));
op2 = half;
- }
- else if (fcode == IX86_BUILTIN_GATHERALTDIV8SF
- || fcode == IX86_BUILTIN_GATHERALTDIV8SI)
- {
- rtx (*gen) (rtx, rtx);
- rtx half = gen_reg_rtx (mode0);
+ break;
+ case IX86_BUILTIN_GATHER3ALTDIV16SF:
+ case IX86_BUILTIN_GATHER3ALTDIV16SI:
+ half = gen_reg_rtx (mode0);
+ if (mode0 == V8SFmode)
+ gen = gen_vec_extract_lo_v16sf;
+ else
+ gen = gen_vec_extract_lo_v16si;
+ if (!nonimmediate_operand (op0, GET_MODE (op0)))
+ op0 = copy_to_mode_reg (GET_MODE (op0), op0);
+ emit_insn (gen (half, op0));
+ op0 = half;
+ if (GET_MODE (op3) != VOIDmode)
+ {
+ if (!nonimmediate_operand (op3, GET_MODE (op3)))
+ op3 = copy_to_mode_reg (GET_MODE (op3), op3);
+ emit_insn (gen (half, op3));
+ op3 = half;
+ }
+ break;
+ case IX86_BUILTIN_GATHERALTDIV8SF:
+ case IX86_BUILTIN_GATHERALTDIV8SI:
+ half = gen_reg_rtx (mode0);
if (mode0 == V4SFmode)
gen = gen_vec_extract_lo_v8sf;
else
@@ -32622,10 +34701,16 @@ addcarryx:
op0 = copy_to_mode_reg (GET_MODE (op0), op0);
emit_insn (gen (half, op0));
op0 = half;
- if (!nonimmediate_operand (op3, GET_MODE (op3)))
- op3 = copy_to_mode_reg (GET_MODE (op3), op3);
- emit_insn (gen (half, op3));
- op3 = half;
+ if (GET_MODE (op3) != VOIDmode)
+ {
+ if (!nonimmediate_operand (op3, GET_MODE (op3)))
+ op3 = copy_to_mode_reg (GET_MODE (op3), op3);
+ emit_insn (gen (half, op3));
+ op3 = half;
+ }
+ break;
+ default:
+ break;
}
/* Force memory operand only with base register here. But we
@@ -32639,8 +34724,16 @@ addcarryx:
op1 = copy_to_mode_reg (Pmode, op1);
if (!insn_data[icode].operand[3].predicate (op2, mode2))
op2 = copy_to_mode_reg (mode2, op2);
- if (!insn_data[icode].operand[4].predicate (op3, mode3))
- op3 = copy_to_mode_reg (mode3, op3);
+ if (GET_MODE (op3) == mode3 || GET_MODE (op3) == VOIDmode)
+ {
+ if (!insn_data[icode].operand[4].predicate (op3, mode3))
+ op3 = copy_to_mode_reg (mode3, op3);
+ }
+ else
+ {
+ op3 = copy_to_reg (op3);
+ op3 = simplify_gen_subreg (mode3, op3, GET_MODE (op3), 0);
+ }
if (!insn_data[icode].operand[5].predicate (op4, mode4))
{
error ("last argument must be scale 1, 2, 4, 8");
@@ -32653,7 +34746,12 @@ addcarryx:
previous contents. */
if (optimize)
{
- if (TREE_CODE (arg3) == VECTOR_CST)
+ if (TREE_CODE (arg3) == INTEGER_CST)
+ {
+ if (integer_all_onesp (arg3))
+ op0 = pc_rtx;
+ }
+ else if (TREE_CODE (arg3) == VECTOR_CST)
{
unsigned int negative = 0;
for (i = 0; i < VECTOR_CST_NELTS (arg3); ++i)
@@ -32669,7 +34767,8 @@ addcarryx:
if (negative == TYPE_VECTOR_SUBPARTS (TREE_TYPE (arg3)))
op0 = pc_rtx;
}
- else if (TREE_CODE (arg3) == SSA_NAME)
+ else if (TREE_CODE (arg3) == SSA_NAME
+ && TREE_CODE (TREE_TYPE (arg3)) == VECTOR_TYPE)
{
/* Recognize also when mask is like:
__v2df src = _mm_setzero_pd ();
@@ -32714,22 +34813,150 @@ addcarryx:
return const0_rtx;
emit_insn (pat);
- if (fcode == IX86_BUILTIN_GATHERDIV8SF
- || fcode == IX86_BUILTIN_GATHERDIV8SI)
+ switch (fcode)
{
- enum machine_mode tmode = GET_MODE (subtarget) == V8SFmode
- ? V4SFmode : V4SImode;
+ case IX86_BUILTIN_GATHER3DIV16SF:
if (target == NULL_RTX)
- target = gen_reg_rtx (tmode);
- if (tmode == V4SFmode)
- emit_insn (gen_vec_extract_lo_v8sf (target, subtarget));
- else
- emit_insn (gen_vec_extract_lo_v8si (target, subtarget));
+ target = gen_reg_rtx (V8SFmode);
+ emit_insn (gen_vec_extract_lo_v16sf (target, subtarget));
+ break;
+ case IX86_BUILTIN_GATHER3DIV16SI:
+ if (target == NULL_RTX)
+ target = gen_reg_rtx (V8SImode);
+ emit_insn (gen_vec_extract_lo_v16si (target, subtarget));
+ break;
+ case IX86_BUILTIN_GATHERDIV8SF:
+ if (target == NULL_RTX)
+ target = gen_reg_rtx (V4SFmode);
+ emit_insn (gen_vec_extract_lo_v8sf (target, subtarget));
+ break;
+ case IX86_BUILTIN_GATHERDIV8SI:
+ if (target == NULL_RTX)
+ target = gen_reg_rtx (V4SImode);
+ emit_insn (gen_vec_extract_lo_v8si (target, subtarget));
+ break;
+ default:
+ target = subtarget;
+ break;
+ }
+ return target;
+
+ scatter_gen:
+ arg0 = CALL_EXPR_ARG (exp, 0);
+ arg1 = CALL_EXPR_ARG (exp, 1);
+ arg2 = CALL_EXPR_ARG (exp, 2);
+ arg3 = CALL_EXPR_ARG (exp, 3);
+ arg4 = CALL_EXPR_ARG (exp, 4);
+ op0 = expand_normal (arg0);
+ op1 = expand_normal (arg1);
+ op2 = expand_normal (arg2);
+ op3 = expand_normal (arg3);
+ op4 = expand_normal (arg4);
+ mode1 = insn_data[icode].operand[1].mode;
+ mode2 = insn_data[icode].operand[2].mode;
+ mode3 = insn_data[icode].operand[3].mode;
+ mode4 = insn_data[icode].operand[4].mode;
+
+ /* Force memory operand only with base register here. But we
+ don't want to do it on memory operand for other builtin
+ functions. */
+ if (GET_MODE (op0) != Pmode)
+ op0 = convert_to_mode (Pmode, op0, 1);
+ op0 = force_reg (Pmode, op0);
+
+ if (!insn_data[icode].operand[0].predicate (op0, Pmode))
+ op0 = copy_to_mode_reg (Pmode, op0);
+
+ if (GET_MODE (op1) == mode1 || GET_MODE (op1) == VOIDmode)
+ {
+ if (!insn_data[icode].operand[1].predicate (op1, mode1))
+ op1 = copy_to_mode_reg (mode1, op1);
}
else
- target = subtarget;
+ {
+ op1 = copy_to_reg (op1);
+ op1 = simplify_gen_subreg (mode1, op1, GET_MODE (op1), 0);
+ }
- return target;
+ if (!insn_data[icode].operand[2].predicate (op2, mode2))
+ op2 = copy_to_mode_reg (mode2, op2);
+
+ if (!insn_data[icode].operand[3].predicate (op3, mode3))
+ op3 = copy_to_mode_reg (mode3, op3);
+
+ if (!insn_data[icode].operand[4].predicate (op4, mode4))
+ {
+ error ("last argument must be scale 1, 2, 4, 8");
+ return const0_rtx;
+ }
+
+ pat = GEN_FCN (icode) (op0, op1, op2, op3, op4);
+ if (! pat)
+ return const0_rtx;
+
+ emit_insn (pat);
+ return 0;
+
+ vec_prefetch_gen:
+ arg0 = CALL_EXPR_ARG (exp, 0);
+ arg1 = CALL_EXPR_ARG (exp, 1);
+ arg2 = CALL_EXPR_ARG (exp, 2);
+ arg3 = CALL_EXPR_ARG (exp, 3);
+ arg4 = CALL_EXPR_ARG (exp, 4);
+ op0 = expand_normal (arg0);
+ op1 = expand_normal (arg1);
+ op2 = expand_normal (arg2);
+ op3 = expand_normal (arg3);
+ op4 = expand_normal (arg4);
+ mode0 = insn_data[icode].operand[0].mode;
+ mode1 = insn_data[icode].operand[1].mode;
+ mode3 = insn_data[icode].operand[3].mode;
+ mode4 = insn_data[icode].operand[4].mode;
+
+ if (GET_MODE (op0) == mode0
+ || (GET_MODE (op0) == VOIDmode && op0 != constm1_rtx))
+ {
+ if (!insn_data[icode].operand[0].predicate (op0, mode0))
+ op0 = copy_to_mode_reg (mode0, op0);
+ }
+ else if (op0 != constm1_rtx)
+ {
+ op0 = copy_to_reg (op0);
+ op0 = simplify_gen_subreg (mode0, op0, GET_MODE (op0), 0);
+ }
+
+ if (!insn_data[icode].operand[1].predicate (op1, mode1))
+ op1 = copy_to_mode_reg (mode1, op1);
+
+ /* Force memory operand only with base register here. But we
+ don't want to do it on memory operand for other builtin
+ functions. */
+ if (GET_MODE (op2) != Pmode)
+ op2 = convert_to_mode (Pmode, op2, 1);
+ op2 = force_reg (Pmode, op2);
+
+ if (!insn_data[icode].operand[2].predicate (op2, Pmode))
+ op2 = copy_to_mode_reg (Pmode, op2);
+
+ if (!insn_data[icode].operand[3].predicate (op3, mode3))
+ {
+ error ("forth argument must be scale 1, 2, 4, 8");
+ return const0_rtx;
+ }
+
+ if (!insn_data[icode].operand[4].predicate (op4, mode4))
+ {
+ error ("last argument must be hint 0 or 1");
+ return const0_rtx;
+ }
+
+ pat = GEN_FCN (icode) (op0, op1, op2, op3, op4);
+ if (! pat)
+ return const0_rtx;
+
+ emit_insn (pat);
+
+ return 0;
case IX86_BUILTIN_XABORT:
icode = CODE_FOR_xabort;
@@ -32773,6 +35000,10 @@ addcarryx:
if (d->code == fcode)
return ix86_expand_sse_comi (d, exp, target);
+ for (i = 0, d = bdesc_round_args; i < ARRAY_SIZE (bdesc_round_args); i++, d++)
+ if (d->code == fcode)
+ return ix86_expand_round_builtin (d, exp, target);
+
for (i = 0, d = bdesc_pcmpestr;
i < ARRAY_SIZE (bdesc_pcmpestr);
i++, d++)
@@ -32825,6 +35056,8 @@ ix86_builtin_vectorized_function (tree fndecl, tree type_out,
return ix86_builtins[IX86_BUILTIN_SQRTPD];
else if (out_n == 4 && in_n == 4)
return ix86_builtins[IX86_BUILTIN_SQRTPD256];
+ else if (out_n == 8 && in_n == 8)
+ return ix86_builtins[IX86_BUILTIN_SQRTPD512];
}
break;
@@ -32835,6 +35068,8 @@ ix86_builtin_vectorized_function (tree fndecl, tree type_out,
return ix86_builtins[IX86_BUILTIN_SQRTPS_NR];
else if (out_n == 8 && in_n == 8)
return ix86_builtins[IX86_BUILTIN_SQRTPS_NR256];
+ else if (out_n == 16 && in_n == 16)
+ return ix86_builtins[IX86_BUILTIN_SQRTPS_NR512];
}
break;
@@ -32851,6 +35086,8 @@ ix86_builtin_vectorized_function (tree fndecl, tree type_out,
return ix86_builtins[IX86_BUILTIN_FLOORPD_VEC_PACK_SFIX];
else if (out_n == 8 && in_n == 4)
return ix86_builtins[IX86_BUILTIN_FLOORPD_VEC_PACK_SFIX256];
+ else if (out_n == 16 && in_n == 8)
+ return ix86_builtins[IX86_BUILTIN_FLOORPD_VEC_PACK_SFIX512];
}
break;
@@ -32883,6 +35120,8 @@ ix86_builtin_vectorized_function (tree fndecl, tree type_out,
return ix86_builtins[IX86_BUILTIN_CEILPD_VEC_PACK_SFIX];
else if (out_n == 8 && in_n == 4)
return ix86_builtins[IX86_BUILTIN_CEILPD_VEC_PACK_SFIX256];
+ else if (out_n == 16 && in_n == 8)
+ return ix86_builtins[IX86_BUILTIN_CEILPD_VEC_PACK_SFIX512];
}
break;
@@ -32939,6 +35178,8 @@ ix86_builtin_vectorized_function (tree fndecl, tree type_out,
return ix86_builtins[IX86_BUILTIN_ROUNDPD_AZ_VEC_PACK_SFIX];
else if (out_n == 8 && in_n == 4)
return ix86_builtins[IX86_BUILTIN_ROUNDPD_AZ_VEC_PACK_SFIX256];
+ else if (out_n == 16 && in_n == 8)
+ return ix86_builtins[IX86_BUILTIN_ROUNDPD_AZ_VEC_PACK_SFIX512];
}
break;
@@ -32965,6 +35206,8 @@ ix86_builtin_vectorized_function (tree fndecl, tree type_out,
return ix86_builtins[IX86_BUILTIN_CPYSGNPD];
else if (out_n == 4 && in_n == 4)
return ix86_builtins[IX86_BUILTIN_CPYSGNPD256];
+ else if (out_n == 8 && in_n == 8)
+ return ix86_builtins[IX86_BUILTIN_CPYSGNPD512];
}
break;
@@ -32975,6 +35218,8 @@ ix86_builtin_vectorized_function (tree fndecl, tree type_out,
return ix86_builtins[IX86_BUILTIN_CPYSGNPS];
else if (out_n == 8 && in_n == 8)
return ix86_builtins[IX86_BUILTIN_CPYSGNPS256];
+ else if (out_n == 16 && in_n == 16)
+ return ix86_builtins[IX86_BUILTIN_CPYSGNPS512];
}
break;
@@ -33392,24 +35637,48 @@ ix86_vectorize_builtin_gather (const_tree mem_vectype,
case V4DFmode:
code = si ? IX86_BUILTIN_GATHERALTSIV4DF : IX86_BUILTIN_GATHERDIV4DF;
break;
+ case V8DFmode:
+ if (TARGET_AVX512F)
+ code = si ? IX86_BUILTIN_GATHER3ALTSIV8DF : IX86_BUILTIN_GATHER3DIV8DF;
+ else
+ return NULL_TREE;
+ break;
case V2DImode:
code = si ? IX86_BUILTIN_GATHERSIV2DI : IX86_BUILTIN_GATHERDIV2DI;
break;
case V4DImode:
code = si ? IX86_BUILTIN_GATHERALTSIV4DI : IX86_BUILTIN_GATHERDIV4DI;
break;
+ case V8DImode:
+ if (TARGET_AVX512F)
+ code = si ? IX86_BUILTIN_GATHER3ALTSIV8DI : IX86_BUILTIN_GATHER3DIV8DI;
+ else
+ return NULL_TREE;
+ break;
case V4SFmode:
code = si ? IX86_BUILTIN_GATHERSIV4SF : IX86_BUILTIN_GATHERDIV4SF;
break;
case V8SFmode:
code = si ? IX86_BUILTIN_GATHERSIV8SF : IX86_BUILTIN_GATHERALTDIV8SF;
break;
+ case V16SFmode:
+ if (TARGET_AVX512F)
+ code = si ? IX86_BUILTIN_GATHER3SIV16SF : IX86_BUILTIN_GATHER3ALTDIV16SF;
+ else
+ return NULL_TREE;
+ break;
case V4SImode:
code = si ? IX86_BUILTIN_GATHERSIV4SI : IX86_BUILTIN_GATHERDIV4SI;
break;
case V8SImode:
code = si ? IX86_BUILTIN_GATHERSIV8SI : IX86_BUILTIN_GATHERALTDIV8SI;
break;
+ case V16SImode:
+ if (TARGET_AVX512F)
+ code = si ? IX86_BUILTIN_GATHER3SIV16SI : IX86_BUILTIN_GATHER3ALTDIV16SI;
+ else
+ return NULL_TREE;
+ break;
default:
return NULL_TREE;
}
@@ -33465,7 +35734,7 @@ avx_vpermilp_parallel (rtx par, enum machine_mode mode)
{
unsigned i, nelt = GET_MODE_NUNITS (mode);
unsigned mask = 0;
- unsigned char ipar[8] = {}; /* Silence -Wuninitialized warning. */
+ unsigned char ipar[16] = {}; /* Silence -Wuninitialized warning. */
if (XVECLEN (par, 0) != (int) nelt)
return 0;
@@ -33488,6 +35757,24 @@ avx_vpermilp_parallel (rtx par, enum machine_mode mode)
switch (mode)
{
+ case V8DFmode:
+ /* In the 512-bit DFmode case, we can only move elements within
+ a 128-bit lane. First fill the second part of the mask,
+ then fallthru. */
+ for (i = 4; i < 6; ++i)
+ {
+ if (ipar[i] < 4 || ipar[i] >= 6)
+ return 0;
+ mask |= (ipar[i] - 4) << i;
+ }
+ for (i = 6; i < 8; ++i)
+ {
+ if (ipar[i] < 6)
+ return 0;
+ mask |= (ipar[i] - 6) << i;
+ }
+ /* FALLTHRU */
+
case V4DFmode:
/* In the 256-bit DFmode case, we can only move elements within
a 128-bit lane. */
@@ -33505,10 +35792,18 @@ avx_vpermilp_parallel (rtx par, enum machine_mode mode)
}
break;
+ case V16SFmode:
+ /* In 512 bit SFmode case, permutation in the upper 256 bits
+ must mirror the permutation in the lower 256-bits. */
+ for (i = 0; i < 8; ++i)
+ if (ipar[i] + 8 != ipar[i + 8])
+ return 0;
+ /* FALLTHRU */
+
case V8SFmode:
- /* In the 256-bit SFmode case, we have full freedom of movement
- within the low 128-bit lane, but the high 128-bit lane must
- mirror the exact same pattern. */
+ /* In 256 bit SFmode case, we have full freedom of
+ movement within the low 128-bit lane, but the high 128-bit
+ lane must mirror the exact same pattern. */
for (i = 0; i < 4; ++i)
if (ipar[i] + 4 != ipar[i + 4])
return 0;
@@ -33729,10 +36024,12 @@ ix86_preferred_reload_class (rtx x, reg_class_t regclass)
return regclass;
/* Force constants into memory if we are loading a (nonzero) constant into
- an MMX or SSE register. This is because there are no MMX/SSE instructions
- to load from a constant. */
+ an MMX, SSE or MASK register. This is because there are no MMX/SSE/MASK
+ instructions to load from a constant. */
if (CONSTANT_P (x)
- && (MAYBE_MMX_CLASS_P (regclass) || MAYBE_SSE_CLASS_P (regclass)))
+ && (MAYBE_MMX_CLASS_P (regclass)
+ || MAYBE_SSE_CLASS_P (regclass)
+ || MAYBE_MASK_CLASS_P (regclass)))
return NO_REGS;
/* Prefer SSE regs only, if we can use them for math. */
@@ -33798,7 +36095,7 @@ ix86_preferred_output_reload_class (rtx x, reg_class_t regclass)
alternative: if reload cannot do this, it will still use its choice. */
mode = GET_MODE (x);
if (TARGET_SSE_MATH && SSE_FLOAT_MODE_P (mode))
- return MAYBE_SSE_CLASS_P (regclass) ? SSE_REGS : NO_REGS;
+ return MAYBE_SSE_CLASS_P (regclass) ? ALL_SSE_REGS : NO_REGS;
if (X87_FLOAT_MODE_P (mode))
{
@@ -34035,6 +36332,9 @@ ix86_cannot_change_mode_class (enum machine_mode from, enum machine_mode to,
if (MAYBE_FLOAT_CLASS_P (regclass))
return true;
+ if (MAYBE_MASK_CLASS_P (regclass))
+ return true;
+
if (MAYBE_SSE_CLASS_P (regclass) || MAYBE_MMX_CLASS_P (regclass))
{
/* Vector registers do not support QI or HImode loads. If we don't
@@ -34265,18 +36565,26 @@ ix86_hard_regno_mode_ok (int regno, enum machine_mode mode)
return false;
if (STACK_REGNO_P (regno))
return VALID_FP_MODE_P (mode);
+ if (MASK_REGNO_P (regno))
+ return VALID_MASK_REG_MODE (mode);
if (SSE_REGNO_P (regno))
{
/* We implement the move patterns for all vector modes into and
out of SSE registers, even when no operation instructions
- are available. OImode move is available only when AVX is
- enabled. */
- return ((TARGET_AVX && mode == OImode)
- || VALID_AVX256_REG_MODE (mode)
- || VALID_SSE_REG_MODE (mode)
- || VALID_SSE2_REG_MODE (mode)
- || VALID_MMX_REG_MODE (mode)
- || VALID_MMX_REG_MODE_3DNOW (mode));
+ are available. In xmm16-xmm31 we can store only 512 bit
+ modes. OImode move is available only when AVX is enabled.
+ XImode is available only when AVX512F is enabled. */
+ return ((TARGET_AVX512F
+ && (mode == XImode
+ || VALID_AVX512F_REG_MODE (mode)
+ || VALID_AVX512F_SCALAR_MODE (mode)))
+ || (!EXT_REX_SSE_REGNO_P (regno)
+ && ((TARGET_AVX && mode == OImode)
+ || VALID_AVX256_REG_MODE (mode)
+ || VALID_SSE_REG_MODE (mode)
+ || VALID_SSE2_REG_MODE (mode)
+ || VALID_MMX_REG_MODE (mode)
+ || VALID_MMX_REG_MODE_3DNOW (mode))));
}
if (MMX_REGNO_P (regno))
{
@@ -34422,7 +36730,8 @@ ix86_set_reg_reg_cost (enum machine_mode mode)
case MODE_VECTOR_INT:
case MODE_VECTOR_FLOAT:
- if ((TARGET_AVX && VALID_AVX256_REG_MODE (mode))
+ if ((TARGET_AVX512F && VALID_AVX512F_REG_MODE (mode))
+ || (TARGET_AVX && VALID_AVX256_REG_MODE (mode))
|| (TARGET_SSE2 && VALID_SSE2_REG_MODE (mode))
|| (TARGET_SSE && VALID_SSE_REG_MODE (mode))
|| (TARGET_MMX && VALID_MMX_REG_MODE (mode)))
@@ -34442,6 +36751,7 @@ static bool
ix86_rtx_costs (rtx x, int code_i, int outer_code_i, int opno, int *total,
bool speed)
{
+ rtx mask;
enum rtx_code code = (enum rtx_code) code_i;
enum rtx_code outer_code = (enum rtx_code) outer_code_i;
enum machine_mode mode = GET_MODE (x);
@@ -34625,7 +36935,7 @@ ix86_rtx_costs (rtx x, int code_i, int outer_code_i, int opno, int *total,
rtx sub;
gcc_assert (FLOAT_MODE_P (mode));
- gcc_assert (TARGET_FMA || TARGET_FMA4);
+ gcc_assert (TARGET_FMA || TARGET_FMA4 || TARGET_AVX512F);
/* ??? SSE scalar/vector cost should be used here. */
/* ??? Bald assumption that fma has the same cost as fmul. */
@@ -34918,13 +37228,21 @@ ix86_rtx_costs (rtx x, int code_i, int outer_code_i, int opno, int *total,
case VEC_SELECT:
case VEC_CONCAT:
- case VEC_MERGE:
case VEC_DUPLICATE:
/* ??? Assume all of these vector manipulation patterns are
recognizable. In which case they all pretty much have the
same cost. */
*total = cost->fabs;
return true;
+ case VEC_MERGE:
+ mask = XEXP (x, 2);
+ /* This is masked instruction, assume the same cost,
+ as nonmasked variant. */
+ if (TARGET_AVX512F && register_operand (mask, GET_MODE (mask)))
+ *total = rtx_cost (XEXP (x, 0), outer_code, opno, speed);
+ else
+ *total = cost->fabs;
+ return true;
default:
return false;
@@ -35054,6 +37372,14 @@ x86_order_regs_for_local_alloc (void)
for (i = FIRST_REX_SSE_REG; i <= LAST_REX_SSE_REG; i++)
reg_alloc_order [pos++] = i;
+ /* Extended REX SSE registers. */
+ for (i = FIRST_EXT_REX_SSE_REG; i <= LAST_EXT_REX_SSE_REG; i++)
+ reg_alloc_order [pos++] = i;
+
+ /* Mask register. */
+ for (i = FIRST_MASK_REG; i <= LAST_MASK_REG; i++)
+ reg_alloc_order [pos++] = i;
+
/* x87 registers. */
if (TARGET_SSE_MATH)
for (i = FIRST_STACK_REG; i <= LAST_STACK_REG; i++)
@@ -36051,9 +38377,9 @@ x86_emit_floatuns (rtx operands[2])
emit_label (donelab);
}
-/* AVX2 does support 32-byte integer vector operations,
- thus the longest vector we are faced with is V32QImode. */
-#define MAX_VECT_LEN 32
+/* AVX512F does support 64-byte integer vector operations,
+ thus the longest vector we are faced with is V64QImode. */
+#define MAX_VECT_LEN 64
struct expand_vec_perm_d
{
@@ -36082,6 +38408,36 @@ get_mode_wider_vector (enum machine_mode o)
return n;
}
+/* A subroutine of ix86_expand_vector_init_duplicate. Tries to
+ fill target with val via vec_duplicate. */
+
+static bool
+ix86_vector_duplicate_value (enum machine_mode mode, rtx target, rtx val)
+{
+ bool ok;
+ rtx insn, dup;
+
+ /* First attempt to recognize VAL as-is. */
+ dup = gen_rtx_VEC_DUPLICATE (mode, val);
+ insn = emit_insn (gen_rtx_SET (VOIDmode, target, dup));
+ if (recog_memoized (insn) < 0)
+ {
+ rtx seq;
+ /* If that fails, force VAL into a register. */
+
+ start_sequence ();
+ XEXP (dup, 0) = force_reg (GET_MODE_INNER (mode), val);
+ seq = get_insns ();
+ end_sequence ();
+ if (seq)
+ emit_insn_before (seq, insn);
+
+ ok = recog_memoized (insn) >= 0;
+ gcc_assert (ok);
+ }
+ return true;
+}
+
/* A subroutine of ix86_expand_vector_init. Store into TARGET a vector
with all elements equal to VAR. Return true if successful. */
@@ -36107,29 +38463,11 @@ ix86_expand_vector_init_duplicate (bool mmx_ok, enum machine_mode mode,
case V2DImode:
case V4SFmode:
case V4SImode:
- {
- rtx insn, dup;
-
- /* First attempt to recognize VAL as-is. */
- dup = gen_rtx_VEC_DUPLICATE (mode, val);
- insn = emit_insn (gen_rtx_SET (VOIDmode, target, dup));
- if (recog_memoized (insn) < 0)
- {
- rtx seq;
- /* If that fails, force VAL into a register. */
-
- start_sequence ();
- XEXP (dup, 0) = force_reg (GET_MODE_INNER (mode), val);
- seq = get_insns ();
- end_sequence ();
- if (seq)
- emit_insn_before (seq, insn);
-
- ok = recog_memoized (insn) >= 0;
- gcc_assert (ok);
- }
- }
- return true;
+ case V16SImode:
+ case V8DImode:
+ case V16SFmode:
+ case V8DFmode:
+ return ix86_vector_duplicate_value (mode, target, val);
case V4HImode:
if (!mmx_ok)
@@ -36477,8 +38815,8 @@ static void
ix86_expand_vector_init_concat (enum machine_mode mode,
rtx target, rtx *ops, int n)
{
- enum machine_mode cmode, hmode = VOIDmode;
- rtx first[8], second[4];
+ enum machine_mode cmode, hmode = VOIDmode, gmode = VOIDmode;
+ rtx first[16], second[8], third[4];
rtvec v;
int i, j;
@@ -36487,6 +38825,18 @@ ix86_expand_vector_init_concat (enum machine_mode mode,
case 2:
switch (mode)
{
+ case V16SImode:
+ cmode = V8SImode;
+ break;
+ case V16SFmode:
+ cmode = V8SFmode;
+ break;
+ case V8DImode:
+ cmode = V4DImode;
+ break;
+ case V8DFmode:
+ cmode = V4DFmode;
+ break;
case V8SImode:
cmode = V4SImode;
break;
@@ -36553,6 +38903,14 @@ ix86_expand_vector_init_concat (enum machine_mode mode,
case 8:
switch (mode)
{
+ case V8DImode:
+ cmode = V2DImode;
+ hmode = V4DImode;
+ break;
+ case V8DFmode:
+ cmode = V2DFmode;
+ hmode = V4DFmode;
+ break;
case V8SImode:
cmode = V2SImode;
hmode = V4SImode;
@@ -36566,6 +38924,24 @@ ix86_expand_vector_init_concat (enum machine_mode mode,
}
goto half;
+ case 16:
+ switch (mode)
+ {
+ case V16SImode:
+ cmode = V2SImode;
+ hmode = V4SImode;
+ gmode = V8SImode;
+ break;
+ case V16SFmode:
+ cmode = V2SFmode;
+ hmode = V4SFmode;
+ gmode = V8SFmode;
+ break;
+ default:
+ gcc_unreachable ();
+ }
+ goto half;
+
half:
/* FIXME: We process inputs backward to help RA. PR 36222. */
i = n - 1;
@@ -36579,7 +38955,27 @@ half:
}
n >>= 1;
- if (n > 2)
+ if (n > 4)
+ {
+ gcc_assert (hmode != VOIDmode);
+ gcc_assert (gmode != VOIDmode);
+ for (i = j = 0; i < n; i += 2, j++)
+ {
+ second[j] = gen_reg_rtx (hmode);
+ ix86_expand_vector_init_concat (hmode, second [j],
+ &first [i], 2);
+ }
+ n >>= 1;
+ for (i = j = 0; i < n; i += 2, j++)
+ {
+ third[j] = gen_reg_rtx (gmode);
+ ix86_expand_vector_init_concat (gmode, third[j],
+ &second[i], 2);
+ }
+ n >>= 1;
+ ix86_expand_vector_init_concat (mode, target, third, n);
+ }
+ else if (n > 2)
{
gcc_assert (hmode != VOIDmode);
for (i = j = 0; i < n; i += 2, j++)
@@ -36722,7 +39118,7 @@ static void
ix86_expand_vector_init_general (bool mmx_ok, enum machine_mode mode,
rtx target, rtx vals)
{
- rtx ops[32], op0, op1;
+ rtx ops[64], op0, op1;
enum machine_mode half_mode = VOIDmode;
int n, i;
@@ -36734,6 +39130,10 @@ ix86_expand_vector_init_general (bool mmx_ok, enum machine_mode mode,
break;
/* FALLTHRU */
+ case V16SImode:
+ case V16SFmode:
+ case V8DFmode:
+ case V8DImode:
case V8SFmode:
case V8SImode:
case V4DFmode:
@@ -37358,6 +39758,42 @@ ix86_expand_vector_extract (bool mmx_ok, rtx target, rtx vec, int elt)
}
break;
+ case V16SFmode:
+ tmp = gen_reg_rtx (V8SFmode);
+ if (elt < 8)
+ emit_insn (gen_vec_extract_lo_v16sf (tmp, vec));
+ else
+ emit_insn (gen_vec_extract_hi_v16sf (tmp, vec));
+ ix86_expand_vector_extract (false, target, tmp, elt & 7);
+ return;
+
+ case V8DFmode:
+ tmp = gen_reg_rtx (V4DFmode);
+ if (elt < 4)
+ emit_insn (gen_vec_extract_lo_v8df (tmp, vec));
+ else
+ emit_insn (gen_vec_extract_hi_v8df (tmp, vec));
+ ix86_expand_vector_extract (false, target, tmp, elt & 3);
+ return;
+
+ case V16SImode:
+ tmp = gen_reg_rtx (V8SImode);
+ if (elt < 8)
+ emit_insn (gen_vec_extract_lo_v16si (tmp, vec));
+ else
+ emit_insn (gen_vec_extract_hi_v16si (tmp, vec));
+ ix86_expand_vector_extract (false, target, tmp, elt & 7);
+ return;
+
+ case V8DImode:
+ tmp = gen_reg_rtx (V4DImode);
+ if (elt < 4)
+ emit_insn (gen_vec_extract_lo_v8di (tmp, vec));
+ else
+ emit_insn (gen_vec_extract_hi_v8di (tmp, vec));
+ ix86_expand_vector_extract (false, target, tmp, elt & 3);
+ return;
+
case V8QImode:
/* ??? Could extract the appropriate HImode element and shift. */
default:
@@ -37445,6 +39881,44 @@ emit_reduc_half (rtx dest, rtx src, int i)
gen_lowpart (V2TImode, src),
GEN_INT (i / 2));
break;
+ case V16SImode:
+ case V16SFmode:
+ case V8DImode:
+ case V8DFmode:
+ if (i > 128)
+ tem = gen_avx512f_shuf_i32x4_1 (gen_lowpart (V16SImode, dest),
+ gen_lowpart (V16SImode, src),
+ gen_lowpart (V16SImode, src),
+ GEN_INT (0x4 + (i == 512 ? 4 : 0)),
+ GEN_INT (0x5 + (i == 512 ? 4 : 0)),
+ GEN_INT (0x6 + (i == 512 ? 4 : 0)),
+ GEN_INT (0x7 + (i == 512 ? 4 : 0)),
+ GEN_INT (0xC), GEN_INT (0xD),
+ GEN_INT (0xE), GEN_INT (0xF),
+ GEN_INT (0x10), GEN_INT (0x11),
+ GEN_INT (0x12), GEN_INT (0x13),
+ GEN_INT (0x14), GEN_INT (0x15),
+ GEN_INT (0x16), GEN_INT (0x17));
+ else
+ tem = gen_avx512f_pshufd_1 (gen_lowpart (V16SImode, dest),
+ gen_lowpart (V16SImode, src),
+ GEN_INT (i == 128 ? 0x2 : 0x1),
+ GEN_INT (0x3),
+ GEN_INT (0x3),
+ GEN_INT (0x3),
+ GEN_INT (i == 128 ? 0x6 : 0x5),
+ GEN_INT (0x7),
+ GEN_INT (0x7),
+ GEN_INT (0x7),
+ GEN_INT (i == 128 ? 0xA : 0x9),
+ GEN_INT (0xB),
+ GEN_INT (0xB),
+ GEN_INT (0xB),
+ GEN_INT (i == 128 ? 0xE : 0xD),
+ GEN_INT (0xF),
+ GEN_INT (0xF),
+ GEN_INT (0xF));
+ break;
default:
gcc_unreachable ();
}
@@ -37507,6 +39981,8 @@ ix86_vector_mode_supported_p (enum machine_mode mode)
return true;
if (TARGET_AVX && VALID_AVX256_REG_MODE (mode))
return true;
+ if (TARGET_AVX512F && VALID_AVX512F_REG_MODE (mode))
+ return true;
if (TARGET_MMX && VALID_MMX_REG_MODE (mode))
return true;
if (TARGET_3DNOW && VALID_MMX_REG_MODE_3DNOW (mode))
@@ -37820,9 +40296,15 @@ void ix86_emit_swdivsf (rtx res, rtx a, rtx b, enum machine_mode mode)
b = force_reg (mode, b);
/* x0 = rcp(b) estimate */
- emit_insn (gen_rtx_SET (VOIDmode, x0,
- gen_rtx_UNSPEC (mode, gen_rtvec (1, b),
- UNSPEC_RCP)));
+ if (mode == V16SFmode || mode == V8DFmode)
+ emit_insn (gen_rtx_SET (VOIDmode, x0,
+ gen_rtx_UNSPEC (mode, gen_rtvec (1, b),
+ UNSPEC_RCP14)));
+ else
+ emit_insn (gen_rtx_SET (VOIDmode, x0,
+ gen_rtx_UNSPEC (mode, gen_rtvec (1, b),
+ UNSPEC_RCP)));
+
/* e0 = x0 * b */
emit_insn (gen_rtx_SET (VOIDmode, e0,
gen_rtx_MULT (mode, x0, b)));
@@ -37852,6 +40334,7 @@ void ix86_emit_swsqrtsf (rtx res, rtx a, enum machine_mode mode,
{
rtx x0, e0, e1, e2, e3, mthree, mhalf;
REAL_VALUE_TYPE r;
+ int unspec;
x0 = gen_reg_rtx (mode);
e0 = gen_reg_rtx (mode);
@@ -37864,11 +40347,15 @@ void ix86_emit_swsqrtsf (rtx res, rtx a, enum machine_mode mode,
real_arithmetic (&r, NEGATE_EXPR, &dconsthalf, NULL);
mhalf = CONST_DOUBLE_FROM_REAL_VALUE (r, SFmode);
+ unspec = UNSPEC_RSQRT;
if (VECTOR_MODE_P (mode))
{
mthree = ix86_build_const_vector (mode, true, mthree);
mhalf = ix86_build_const_vector (mode, true, mhalf);
+ /* There is no 512-bit rsqrt. There is however rsqrt14. */
+ if (GET_MODE_SIZE (mode) == 64)
+ unspec = UNSPEC_RSQRT14;
}
/* sqrt(a) = -0.5 * a * rsqrtss(a) * (a * rsqrtss(a) * rsqrtss(a) - 3.0)
@@ -37879,7 +40366,7 @@ void ix86_emit_swsqrtsf (rtx res, rtx a, enum machine_mode mode,
/* x0 = rsqrt(a) estimate */
emit_insn (gen_rtx_SET (VOIDmode, x0,
gen_rtx_UNSPEC (mode, gen_rtvec (1, a),
- UNSPEC_RSQRT)));
+ unspec)));
/* If (a == 0.0) Filter out infinity to prevent NaN for sqrt(0.0). */
if (!recip)
@@ -37890,11 +40377,23 @@ void ix86_emit_swsqrtsf (rtx res, rtx a, enum machine_mode mode,
mask = gen_reg_rtx (mode);
zero = force_reg (mode, CONST0_RTX(mode));
- emit_insn (gen_rtx_SET (VOIDmode, mask,
- gen_rtx_NE (mode, zero, a)));
- emit_insn (gen_rtx_SET (VOIDmode, x0,
- gen_rtx_AND (mode, x0, mask)));
+ /* Handle masked compare. */
+ if (VECTOR_MODE_P (mode) && GET_MODE_SIZE (mode) == 64)
+ {
+ mask = gen_reg_rtx (HImode);
+ /* Imm value 0x4 corresponds to not-equal comparison. */
+ emit_insn (gen_avx512f_cmpv16sf3 (mask, zero, a, GEN_INT (0x4)));
+ emit_insn (gen_avx512f_blendmv16sf (x0, zero, x0, mask));
+ }
+ else
+ {
+ emit_insn (gen_rtx_SET (VOIDmode, mask,
+ gen_rtx_NE (mode, zero, a)));
+
+ emit_insn (gen_rtx_SET (VOIDmode, x0,
+ gen_rtx_AND (mode, x0, mask)));
+ }
}
/* e0 = x0 * a */
@@ -40928,6 +43427,14 @@ ix86_expand_vec_perm_const (rtx operands[4])
gcc_assert (XVECLEN (sel, 0) == nelt);
gcc_checking_assert (sizeof (d.perm) == sizeof (perm));
+ /* vpermi2 takes vector, there is no need to disasemble it. */
+ if (d.vmode == V16SFmode || d.vmode == V16SImode
+ || d.vmode == V8DFmode || d.vmode == V8DImode)
+ {
+ ix86_expand_vec_perm_vpermi2 (d.target, d.op0, sel, d.op1);
+ return true;
+ }
+
for (i = 0; i < nelt; ++i)
{
rtx e = XVECEXP (sel, 0, i);
@@ -40971,6 +43478,10 @@ ix86_vectorize_vec_perm_const_ok (enum machine_mode vmode,
/* Given sufficient ISA support we can just return true here
for selected vector modes. */
+ if (d.vmode == V16SImode || d.vmode == V16SFmode
+ || d.vmode == V8DFmode || d.vmode == V8DImode)
+ /* All implementable with a single vpermi2 insn. */
+ return true;
if (GET_MODE_SIZE (d.vmode) == 16)
{
/* All implementable with a single vpperm insn. */
@@ -41038,6 +43549,19 @@ ix86_expand_vec_extract_even_odd (rtx targ, rtx op0, rtx op1, unsigned odd)
d.perm[i] = i * 2 + odd;
/* We'll either be able to implement the permutation directly... */
+ if (d.vmode == V16SFmode || d.vmode == V16SImode
+ || d.vmode == V8DImode || d.vmode == V8DFmode)
+ {
+ rtx vec[64];
+ for (i = 0; i < nelt; ++i)
+ vec[i] = GEN_INT (d.perm[i]);
+ rtx mask = gen_rtx_CONST_VECTOR (d.vmode, gen_rtvec_v (nelt, vec));
+ rtx operands[4] = {targ, op0, op1, mask};
+ if (ix86_expand_vec_perm_const (operands))
+ return;
+ else
+ gcc_unreachable ();
+ }
if (expand_vec_perm_1 (&d))
return;
@@ -41213,7 +43737,7 @@ ix86_expand_mul_widen_evenodd (rtx dest, rtx op1, rtx op2,
op2 = force_reg (mode, op2);
/* We only play even/odd games with vectors of SImode. */
- gcc_assert (mode == V4SImode || mode == V8SImode);
+ gcc_assert (mode == V4SImode || mode == V8SImode || mode == V16SImode);
/* If we're looking for the odd results, shift those members down to
the even slots. For some cpus this is faster than a PSHUFD. */
@@ -41239,7 +43763,14 @@ ix86_expand_mul_widen_evenodd (rtx dest, rtx op1, rtx op2,
op2 = gen_lowpart (mode, op2);
}
- if (mode == V8SImode)
+ if (mode == V16SImode)
+ {
+ if (uns_p)
+ x = gen_vec_widen_umult_even_v16si (dest, op1, op2);
+ else
+ x = gen_vec_widen_smult_even_v16si (dest, op1, op2);
+ }
+ else if (mode == V8SImode)
{
if (uns_p)
x = gen_vec_widen_umult_even_v8si (dest, op1, op2);
@@ -41455,6 +43986,11 @@ ix86_expand_sse2_mulvxdi3 (rtx op0, rtx op1, rtx op2)
umul = gen_vec_widen_umult_even_v8si;
nmode = V8SImode;
}
+ else if (mode == V8DImode)
+ {
+ umul = gen_vec_widen_umult_even_v16si;
+ nmode = V16SImode;
+ }
else
gcc_unreachable ();
@@ -42546,12 +45082,16 @@ ix86_preferred_simd_mode (enum machine_mode mode)
case HImode:
return (TARGET_AVX && !TARGET_PREFER_AVX128) ? V16HImode : V8HImode;
case SImode:
- return (TARGET_AVX && !TARGET_PREFER_AVX128) ? V8SImode : V4SImode;
+ return TARGET_AVX512F ? V16SImode :
+ (TARGET_AVX && !TARGET_PREFER_AVX128) ? V8SImode : V4SImode;
case DImode:
- return (TARGET_AVX && !TARGET_PREFER_AVX128) ? V4DImode : V2DImode;
+ return TARGET_AVX512F ? V8DImode :
+ (TARGET_AVX && !TARGET_PREFER_AVX128) ? V4DImode : V2DImode;
case SFmode:
- if (TARGET_AVX && !TARGET_PREFER_AVX128)
+ if (TARGET_AVX512F)
+ return V16SFmode;
+ else if (TARGET_AVX && !TARGET_PREFER_AVX128)
return V8SFmode;
else
return V4SFmode;
@@ -42559,6 +45099,8 @@ ix86_preferred_simd_mode (enum machine_mode mode)
case DFmode:
if (!TARGET_VECTORIZE_DOUBLE)
return word_mode;
+ else if (TARGET_AVX512F)
+ return V8DFmode;
else if (TARGET_AVX && !TARGET_PREFER_AVX128)
return V4DFmode;
else if (TARGET_SSE2)
@@ -42571,12 +45113,14 @@ ix86_preferred_simd_mode (enum machine_mode mode)
}
/* If AVX is enabled then try vectorizing with both 256bit and 128bit
- vectors. */
+ vectors. If AVX512F is enabled then try vectorizing with 512bit,
+ 256bit and 128bit vectors. */
static unsigned int
ix86_autovectorize_vector_sizes (void)
{
- return (TARGET_AVX && !TARGET_PREFER_AVX128) ? 32 | 16 : 0;
+ return TARGET_AVX512F ? 64 | 32 | 16 :
+ (TARGET_AVX && !TARGET_PREFER_AVX128) ? 32 | 16 : 0;
}
@@ -42590,7 +45134,7 @@ ix86_spill_class (reg_class_t rclass, enum machine_mode mode)
if (TARGET_SSE && TARGET_GENERAL_REGS_SSE_SPILL && ! TARGET_MMX
&& (mode == SImode || (TARGET_64BIT && mode == DImode))
&& INTEGER_CLASS_P (rclass))
- return SSE_REGS;
+ return ALL_SSE_REGS;
return NO_REGS;
}