aboutsummaryrefslogtreecommitdiff
path: root/gcc/config/i386/i386.c
diff options
context:
space:
mode:
Diffstat (limited to 'gcc/config/i386/i386.c')
-rw-r--r--gcc/config/i386/i386.c116
1 files changed, 96 insertions, 20 deletions
diff --git a/gcc/config/i386/i386.c b/gcc/config/i386/i386.c
index 63eb1f2a09b..ff87333b07d 100644
--- a/gcc/config/i386/i386.c
+++ b/gcc/config/i386/i386.c
@@ -18802,12 +18802,29 @@ ix86_expand_vector_move (machine_mode mode, rtx operands[])
of the register, once we have that information we may be able
to handle some of them more efficiently. */
if (can_create_pseudo_p ()
- && register_operand (op0, mode)
&& (CONSTANT_P (op1)
|| (SUBREG_P (op1)
&& CONSTANT_P (SUBREG_REG (op1))))
- && !standard_sse_constant_p (op1))
- op1 = validize_mem (force_const_mem (mode, op1));
+ && ((register_operand (op0, mode)
+ && !standard_sse_constant_p (op1))
+ /* ix86_expand_vector_move_misalign() does not like constants. */
+ || (SSE_REG_MODE_P (mode)
+ && MEM_P (op0)
+ && MEM_ALIGN (op0) < align)))
+ {
+ if (SUBREG_P (op1))
+ {
+ machine_mode imode = GET_MODE (SUBREG_REG (op1));
+ rtx r = force_const_mem (imode, SUBREG_REG (op1));
+ if (r)
+ r = validize_mem (r);
+ else
+ r = force_reg (imode, SUBREG_REG (op1));
+ op1 = simplify_gen_subreg (mode, r, imode, SUBREG_BYTE (op1));
+ }
+ else
+ op1 = validize_mem (force_const_mem (mode, op1));
+ }
/* We need to check memory alignment for SSE mode since attribute
can make operands unaligned. */
@@ -18818,13 +18835,8 @@ ix86_expand_vector_move (machine_mode mode, rtx operands[])
{
rtx tmp[2];
- /* ix86_expand_vector_move_misalign() does not like constants ... */
- if (CONSTANT_P (op1)
- || (SUBREG_P (op1)
- && CONSTANT_P (SUBREG_REG (op1))))
- op1 = validize_mem (force_const_mem (mode, op1));
-
- /* ... nor both arguments in memory. */
+ /* ix86_expand_vector_move_misalign() does not like both
+ arguments in memory. */
if (!register_operand (op0, mode)
&& !register_operand (op1, mode))
op1 = force_reg (mode, op1);
@@ -18910,7 +18922,7 @@ ix86_avx256_split_vector_move_misalign (rtx op0, rtx op1)
m = adjust_address (op0, mode, 0);
emit_insn (extract (m, op1, const0_rtx));
m = adjust_address (op0, mode, 16);
- emit_insn (extract (m, op1, const1_rtx));
+ emit_insn (extract (m, copy_rtx (op1), const1_rtx));
}
else
emit_insn (store_unaligned (op0, op1));
@@ -19218,7 +19230,7 @@ ix86_expand_vector_move_misalign (machine_mode mode, rtx operands[])
m = adjust_address (op0, V2SFmode, 0);
emit_insn (gen_sse_storelps (m, op1));
m = adjust_address (op0, V2SFmode, 8);
- emit_insn (gen_sse_storehps (m, op1));
+ emit_insn (gen_sse_storehps (m, copy_rtx (op1)));
}
}
}
@@ -23042,17 +23054,33 @@ ix86_fp_cmp_code_to_pcmp_immediate (enum rtx_code code)
switch (code)
{
case EQ:
- return 0x08;
+ return 0x00;
case NE:
return 0x04;
case GT:
- return 0x16;
+ return 0x0e;
case LE:
- return 0x1a;
+ return 0x02;
case GE:
- return 0x15;
+ return 0x0d;
case LT:
- return 0x19;
+ return 0x01;
+ case UNLE:
+ return 0x0a;
+ case UNLT:
+ return 0x09;
+ case UNGE:
+ return 0x05;
+ case UNGT:
+ return 0x06;
+ case UNEQ:
+ return 0x18;
+ case LTGT:
+ return 0x0c;
+ case ORDERED:
+ return 0x07;
+ case UNORDERED:
+ return 0x03;
default:
gcc_unreachable ();
}
@@ -30701,7 +30729,7 @@ enum ix86_builtins
IX86_BUILTIN_CVTPD2PS512,
IX86_BUILTIN_CVTPD2UDQ512,
IX86_BUILTIN_CVTPH2PS512,
- IX86_BUILTIN_CVTPS2DQ512,
+ IX86_BUILTIN_CVTPS2DQ512_MASK,
IX86_BUILTIN_CVTPS2PD512,
IX86_BUILTIN_CVTPS2PH512,
IX86_BUILTIN_CVTPS2UDQ512,
@@ -32141,14 +32169,25 @@ enum ix86_builtins
IX86_BUILTIN_COPYSIGNQ,
/* Vectorizer support builtins. */
- IX86_BUILTIN_CEILPD_VEC_PACK_SFIX512,
IX86_BUILTIN_CPYSGNPS,
IX86_BUILTIN_CPYSGNPD,
IX86_BUILTIN_CPYSGNPS256,
IX86_BUILTIN_CPYSGNPS512,
IX86_BUILTIN_CPYSGNPD256,
IX86_BUILTIN_CPYSGNPD512,
+ IX86_BUILTIN_FLOORPS512,
+ IX86_BUILTIN_FLOORPD512,
+ IX86_BUILTIN_CEILPS512,
+ IX86_BUILTIN_CEILPD512,
+ IX86_BUILTIN_TRUNCPS512,
+ IX86_BUILTIN_TRUNCPD512,
+ IX86_BUILTIN_CVTPS2DQ512,
+ IX86_BUILTIN_VEC_PACK_SFIX512,
+ IX86_BUILTIN_FLOORPS_SFIX512,
IX86_BUILTIN_FLOORPD_VEC_PACK_SFIX512,
+ IX86_BUILTIN_CEILPS_SFIX512,
+ IX86_BUILTIN_CEILPD_VEC_PACK_SFIX512,
+ IX86_BUILTIN_ROUNDPS_AZ_SFIX512,
IX86_BUILTIN_ROUNDPD_AZ_VEC_PACK_SFIX512,
@@ -33963,6 +34002,17 @@ static const struct builtin_description bdesc_args[] =
{ OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_sqrtv8df2, "__builtin_ia32_sqrtpd512", IX86_BUILTIN_SQRTPD512, UNKNOWN, (int) V8DF_FTYPE_V8DF },
{ OPTION_MASK_ISA_AVX512F, CODE_FOR_sqrtv16sf2, "__builtin_ia32_sqrtps512", IX86_BUILTIN_SQRTPS_NR512, UNKNOWN, (int) V16SF_FTYPE_V16SF },
{ OPTION_MASK_ISA_AVX512ER, CODE_FOR_avx512er_exp2v16sf, "__builtin_ia32_exp2ps", IX86_BUILTIN_EXP2PS, UNKNOWN, (int) V16SF_FTYPE_V16SF },
+ { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_roundps512, "__builtin_ia32_floorps512", IX86_BUILTIN_FLOORPS512, (enum rtx_code) ROUND_FLOOR, (int) V16SF_FTYPE_V16SF_ROUND },
+ { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_roundps512, "__builtin_ia32_ceilps512", IX86_BUILTIN_CEILPS512, (enum rtx_code) ROUND_CEIL, (int) V16SF_FTYPE_V16SF_ROUND },
+ { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_roundps512, "__builtin_ia32_truncps512", IX86_BUILTIN_TRUNCPS512, (enum rtx_code) ROUND_TRUNC, (int) V16SF_FTYPE_V16SF_ROUND },
+ { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_roundpd512, "__builtin_ia32_floorpd512", IX86_BUILTIN_FLOORPD512, (enum rtx_code) ROUND_FLOOR, (int) V8DF_FTYPE_V8DF_ROUND },
+ { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_roundpd512, "__builtin_ia32_ceilpd512", IX86_BUILTIN_CEILPD512, (enum rtx_code) ROUND_CEIL, (int) V8DF_FTYPE_V8DF_ROUND },
+ { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_roundpd512, "__builtin_ia32_truncpd512", IX86_BUILTIN_TRUNCPD512, (enum rtx_code) ROUND_TRUNC, (int) V8DF_FTYPE_V8DF_ROUND },
+ { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_fix_notruncv16sfv16si, "__builtin_ia32_cvtps2dq512", IX86_BUILTIN_CVTPS2DQ512, UNKNOWN, (int) V16SI_FTYPE_V16SF },
+ { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vec_pack_sfix_v8df, "__builtin_ia32_vec_pack_sfix512", IX86_BUILTIN_VEC_PACK_SFIX512, UNKNOWN, (int) V16SI_FTYPE_V8DF_V8DF },
+ { OPTION_MASK_ISA_AVX512F, CODE_FOR_roundv16sf2_sfix, "__builtin_ia32_roundps_az_sfix512", IX86_BUILTIN_ROUNDPS_AZ_SFIX512, UNKNOWN, (int) V16SI_FTYPE_V16SF },
+ { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_roundps512_sfix, "__builtin_ia32_floorps_sfix512", IX86_BUILTIN_FLOORPS_SFIX512, (enum rtx_code) ROUND_FLOOR, (int) V16SI_FTYPE_V16SF_ROUND },
+ { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_roundps512_sfix, "__builtin_ia32_ceilps_sfix512", IX86_BUILTIN_CEILPS_SFIX512, (enum rtx_code) ROUND_CEIL, (int) V16SI_FTYPE_V16SF_ROUND },
{ OPTION_MASK_ISA_AVX512F, CODE_FOR_roundv8df2_vec_pack_sfix, "__builtin_ia32_roundpd_az_vec_pack_sfix512", IX86_BUILTIN_ROUNDPD_AZ_VEC_PACK_SFIX512, UNKNOWN, (int) V16SI_FTYPE_V8DF_V8DF },
{ OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_roundpd_vec_pack_sfix512, "__builtin_ia32_floorpd_vec_pack_sfix512", IX86_BUILTIN_FLOORPD_VEC_PACK_SFIX512, (enum rtx_code) ROUND_FLOOR, (int) V16SI_FTYPE_V8DF_V8DF_ROUND },
{ OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_roundpd_vec_pack_sfix512, "__builtin_ia32_ceilpd_vec_pack_sfix512", IX86_BUILTIN_CEILPD_VEC_PACK_SFIX512, (enum rtx_code) ROUND_CEIL, (int) V16SI_FTYPE_V8DF_V8DF_ROUND },
@@ -34879,7 +34929,7 @@ static const struct builtin_description bdesc_round_args[] =
{ OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_cvtpd2ps512_mask_round, "__builtin_ia32_cvtpd2ps512_mask", IX86_BUILTIN_CVTPD2PS512, UNKNOWN, (int) V8SF_FTYPE_V8DF_V8SF_QI_INT },
{ OPTION_MASK_ISA_AVX512F, CODE_FOR_ufix_notruncv8dfv8si2_mask_round, "__builtin_ia32_cvtpd2udq512_mask", IX86_BUILTIN_CVTPD2UDQ512, UNKNOWN, (int) V8SI_FTYPE_V8DF_V8SI_QI_INT },
{ OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vcvtph2ps512_mask_round, "__builtin_ia32_vcvtph2ps512_mask", IX86_BUILTIN_CVTPH2PS512, UNKNOWN, (int) V16SF_FTYPE_V16HI_V16SF_HI_INT },
- { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_fix_notruncv16sfv16si_mask_round, "__builtin_ia32_cvtps2dq512_mask", IX86_BUILTIN_CVTPS2DQ512, UNKNOWN, (int) V16SI_FTYPE_V16SF_V16SI_HI_INT },
+ { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_fix_notruncv16sfv16si_mask_round, "__builtin_ia32_cvtps2dq512_mask", IX86_BUILTIN_CVTPS2DQ512_MASK, UNKNOWN, (int) V16SI_FTYPE_V16SF_V16SI_HI_INT },
{ OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_cvtps2pd512_mask_round, "__builtin_ia32_cvtps2pd512_mask", IX86_BUILTIN_CVTPS2PD512, UNKNOWN, (int) V8DF_FTYPE_V8SF_V8DF_QI_INT },
{ OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_ufix_notruncv16sfv16si_mask_round, "__builtin_ia32_cvtps2udq512_mask", IX86_BUILTIN_CVTPS2UDQ512, UNKNOWN, (int) V16SI_FTYPE_V16SF_V16SI_HI_INT },
{ OPTION_MASK_ISA_AVX512F, CODE_FOR_sse2_cvtsd2ss_round, "__builtin_ia32_cvtsd2ss_round", IX86_BUILTIN_CVTSD2SS_ROUND, UNKNOWN, (int) V4SF_FTYPE_V4SF_V2DF_INT },
@@ -38456,10 +38506,13 @@ ix86_expand_args_builtin (const struct builtin_description *d,
{
case V2DF_FTYPE_V2DF_ROUND:
case V4DF_FTYPE_V4DF_ROUND:
+ case V8DF_FTYPE_V8DF_ROUND:
case V4SF_FTYPE_V4SF_ROUND:
case V8SF_FTYPE_V8SF_ROUND:
+ case V16SF_FTYPE_V16SF_ROUND:
case V4SI_FTYPE_V4SF_ROUND:
case V8SI_FTYPE_V8SF_ROUND:
+ case V16SI_FTYPE_V16SF_ROUND:
return ix86_expand_sse_round (d, exp, target);
case V4SI_FTYPE_V2DF_V2DF_ROUND:
case V8SI_FTYPE_V4DF_V4DF_ROUND:
@@ -38573,6 +38626,7 @@ ix86_expand_args_builtin (const struct builtin_description *d,
case V16SI_FTYPE_V8SI:
case V16SF_FTYPE_V4SF:
case V16SI_FTYPE_V4SI:
+ case V16SI_FTYPE_V16SF:
case V16SF_FTYPE_V16SF:
case V8DI_FTYPE_UQI:
case V8DF_FTYPE_V4DF:
@@ -42278,6 +42332,8 @@ ix86_builtin_vectorized_function (unsigned int fn, tree type_out,
return ix86_get_builtin (IX86_BUILTIN_FLOORPS_SFIX);
else if (out_n == 8 && in_n == 8)
return ix86_get_builtin (IX86_BUILTIN_FLOORPS_SFIX256);
+ else if (out_n == 16 && in_n == 16)
+ return ix86_get_builtin (IX86_BUILTIN_FLOORPS_SFIX512);
}
break;
@@ -42303,6 +42359,8 @@ ix86_builtin_vectorized_function (unsigned int fn, tree type_out,
return ix86_get_builtin (IX86_BUILTIN_CEILPS_SFIX);
else if (out_n == 8 && in_n == 8)
return ix86_get_builtin (IX86_BUILTIN_CEILPS_SFIX256);
+ else if (out_n == 16 && in_n == 16)
+ return ix86_get_builtin (IX86_BUILTIN_CEILPS_SFIX512);
}
break;
@@ -42315,6 +42373,8 @@ ix86_builtin_vectorized_function (unsigned int fn, tree type_out,
return ix86_get_builtin (IX86_BUILTIN_VEC_PACK_SFIX);
else if (out_n == 8 && in_n == 4)
return ix86_get_builtin (IX86_BUILTIN_VEC_PACK_SFIX256);
+ else if (out_n == 16 && in_n == 8)
+ return ix86_get_builtin (IX86_BUILTIN_VEC_PACK_SFIX512);
}
if (out_mode == SImode && in_mode == SFmode)
{
@@ -42322,6 +42382,8 @@ ix86_builtin_vectorized_function (unsigned int fn, tree type_out,
return ix86_get_builtin (IX86_BUILTIN_CVTPS2DQ);
else if (out_n == 8 && in_n == 8)
return ix86_get_builtin (IX86_BUILTIN_CVTPS2DQ256);
+ else if (out_n == 16 && in_n == 16)
+ return ix86_get_builtin (IX86_BUILTIN_CVTPS2DQ512);
}
break;
@@ -42347,6 +42409,8 @@ ix86_builtin_vectorized_function (unsigned int fn, tree type_out,
return ix86_get_builtin (IX86_BUILTIN_ROUNDPS_AZ_SFIX);
else if (out_n == 8 && in_n == 8)
return ix86_get_builtin (IX86_BUILTIN_ROUNDPS_AZ_SFIX256);
+ else if (out_n == 16 && in_n == 16)
+ return ix86_get_builtin (IX86_BUILTIN_ROUNDPS_AZ_SFIX512);
}
break;
@@ -42361,6 +42425,8 @@ ix86_builtin_vectorized_function (unsigned int fn, tree type_out,
return ix86_get_builtin (IX86_BUILTIN_FLOORPD);
else if (out_n == 4 && in_n == 4)
return ix86_get_builtin (IX86_BUILTIN_FLOORPD256);
+ else if (out_n == 8 && in_n == 8)
+ return ix86_get_builtin (IX86_BUILTIN_FLOORPD512);
}
if (out_mode == SFmode && in_mode == SFmode)
{
@@ -42368,6 +42434,8 @@ ix86_builtin_vectorized_function (unsigned int fn, tree type_out,
return ix86_get_builtin (IX86_BUILTIN_FLOORPS);
else if (out_n == 8 && in_n == 8)
return ix86_get_builtin (IX86_BUILTIN_FLOORPS256);
+ else if (out_n == 16 && in_n == 16)
+ return ix86_get_builtin (IX86_BUILTIN_FLOORPS512);
}
break;
@@ -42382,6 +42450,8 @@ ix86_builtin_vectorized_function (unsigned int fn, tree type_out,
return ix86_get_builtin (IX86_BUILTIN_CEILPD);
else if (out_n == 4 && in_n == 4)
return ix86_get_builtin (IX86_BUILTIN_CEILPD256);
+ else if (out_n == 8 && in_n == 8)
+ return ix86_get_builtin (IX86_BUILTIN_CEILPD512);
}
if (out_mode == SFmode && in_mode == SFmode)
{
@@ -42389,6 +42459,8 @@ ix86_builtin_vectorized_function (unsigned int fn, tree type_out,
return ix86_get_builtin (IX86_BUILTIN_CEILPS);
else if (out_n == 8 && in_n == 8)
return ix86_get_builtin (IX86_BUILTIN_CEILPS256);
+ else if (out_n == 16 && in_n == 16)
+ return ix86_get_builtin (IX86_BUILTIN_CEILPS512);
}
break;
@@ -42403,6 +42475,8 @@ ix86_builtin_vectorized_function (unsigned int fn, tree type_out,
return ix86_get_builtin (IX86_BUILTIN_TRUNCPD);
else if (out_n == 4 && in_n == 4)
return ix86_get_builtin (IX86_BUILTIN_TRUNCPD256);
+ else if (out_n == 8 && in_n == 8)
+ return ix86_get_builtin (IX86_BUILTIN_TRUNCPD512);
}
if (out_mode == SFmode && in_mode == SFmode)
{
@@ -42410,6 +42484,8 @@ ix86_builtin_vectorized_function (unsigned int fn, tree type_out,
return ix86_get_builtin (IX86_BUILTIN_TRUNCPS);
else if (out_n == 8 && in_n == 8)
return ix86_get_builtin (IX86_BUILTIN_TRUNCPS256);
+ else if (out_n == 16 && in_n == 16)
+ return ix86_get_builtin (IX86_BUILTIN_TRUNCPS512);
}
break;