diff options
Diffstat (limited to 'gcc/config/i386/i386.c')
-rw-r--r-- | gcc/config/i386/i386.c | 116 |
1 files changed, 96 insertions, 20 deletions
diff --git a/gcc/config/i386/i386.c b/gcc/config/i386/i386.c index 63eb1f2a09b..ff87333b07d 100644 --- a/gcc/config/i386/i386.c +++ b/gcc/config/i386/i386.c @@ -18802,12 +18802,29 @@ ix86_expand_vector_move (machine_mode mode, rtx operands[]) of the register, once we have that information we may be able to handle some of them more efficiently. */ if (can_create_pseudo_p () - && register_operand (op0, mode) && (CONSTANT_P (op1) || (SUBREG_P (op1) && CONSTANT_P (SUBREG_REG (op1)))) - && !standard_sse_constant_p (op1)) - op1 = validize_mem (force_const_mem (mode, op1)); + && ((register_operand (op0, mode) + && !standard_sse_constant_p (op1)) + /* ix86_expand_vector_move_misalign() does not like constants. */ + || (SSE_REG_MODE_P (mode) + && MEM_P (op0) + && MEM_ALIGN (op0) < align))) + { + if (SUBREG_P (op1)) + { + machine_mode imode = GET_MODE (SUBREG_REG (op1)); + rtx r = force_const_mem (imode, SUBREG_REG (op1)); + if (r) + r = validize_mem (r); + else + r = force_reg (imode, SUBREG_REG (op1)); + op1 = simplify_gen_subreg (mode, r, imode, SUBREG_BYTE (op1)); + } + else + op1 = validize_mem (force_const_mem (mode, op1)); + } /* We need to check memory alignment for SSE mode since attribute can make operands unaligned. */ @@ -18818,13 +18835,8 @@ ix86_expand_vector_move (machine_mode mode, rtx operands[]) { rtx tmp[2]; - /* ix86_expand_vector_move_misalign() does not like constants ... */ - if (CONSTANT_P (op1) - || (SUBREG_P (op1) - && CONSTANT_P (SUBREG_REG (op1)))) - op1 = validize_mem (force_const_mem (mode, op1)); - - /* ... nor both arguments in memory. */ + /* ix86_expand_vector_move_misalign() does not like both + arguments in memory. */ if (!register_operand (op0, mode) && !register_operand (op1, mode)) op1 = force_reg (mode, op1); @@ -18910,7 +18922,7 @@ ix86_avx256_split_vector_move_misalign (rtx op0, rtx op1) m = adjust_address (op0, mode, 0); emit_insn (extract (m, op1, const0_rtx)); m = adjust_address (op0, mode, 16); - emit_insn (extract (m, op1, const1_rtx)); + emit_insn (extract (m, copy_rtx (op1), const1_rtx)); } else emit_insn (store_unaligned (op0, op1)); @@ -19218,7 +19230,7 @@ ix86_expand_vector_move_misalign (machine_mode mode, rtx operands[]) m = adjust_address (op0, V2SFmode, 0); emit_insn (gen_sse_storelps (m, op1)); m = adjust_address (op0, V2SFmode, 8); - emit_insn (gen_sse_storehps (m, op1)); + emit_insn (gen_sse_storehps (m, copy_rtx (op1))); } } } @@ -23042,17 +23054,33 @@ ix86_fp_cmp_code_to_pcmp_immediate (enum rtx_code code) switch (code) { case EQ: - return 0x08; + return 0x00; case NE: return 0x04; case GT: - return 0x16; + return 0x0e; case LE: - return 0x1a; + return 0x02; case GE: - return 0x15; + return 0x0d; case LT: - return 0x19; + return 0x01; + case UNLE: + return 0x0a; + case UNLT: + return 0x09; + case UNGE: + return 0x05; + case UNGT: + return 0x06; + case UNEQ: + return 0x18; + case LTGT: + return 0x0c; + case ORDERED: + return 0x07; + case UNORDERED: + return 0x03; default: gcc_unreachable (); } @@ -30701,7 +30729,7 @@ enum ix86_builtins IX86_BUILTIN_CVTPD2PS512, IX86_BUILTIN_CVTPD2UDQ512, IX86_BUILTIN_CVTPH2PS512, - IX86_BUILTIN_CVTPS2DQ512, + IX86_BUILTIN_CVTPS2DQ512_MASK, IX86_BUILTIN_CVTPS2PD512, IX86_BUILTIN_CVTPS2PH512, IX86_BUILTIN_CVTPS2UDQ512, @@ -32141,14 +32169,25 @@ enum ix86_builtins IX86_BUILTIN_COPYSIGNQ, /* Vectorizer support builtins. */ - IX86_BUILTIN_CEILPD_VEC_PACK_SFIX512, IX86_BUILTIN_CPYSGNPS, IX86_BUILTIN_CPYSGNPD, IX86_BUILTIN_CPYSGNPS256, IX86_BUILTIN_CPYSGNPS512, IX86_BUILTIN_CPYSGNPD256, IX86_BUILTIN_CPYSGNPD512, + IX86_BUILTIN_FLOORPS512, + IX86_BUILTIN_FLOORPD512, + IX86_BUILTIN_CEILPS512, + IX86_BUILTIN_CEILPD512, + IX86_BUILTIN_TRUNCPS512, + IX86_BUILTIN_TRUNCPD512, + IX86_BUILTIN_CVTPS2DQ512, + IX86_BUILTIN_VEC_PACK_SFIX512, + IX86_BUILTIN_FLOORPS_SFIX512, IX86_BUILTIN_FLOORPD_VEC_PACK_SFIX512, + IX86_BUILTIN_CEILPS_SFIX512, + IX86_BUILTIN_CEILPD_VEC_PACK_SFIX512, + IX86_BUILTIN_ROUNDPS_AZ_SFIX512, IX86_BUILTIN_ROUNDPD_AZ_VEC_PACK_SFIX512, @@ -33963,6 +34002,17 @@ static const struct builtin_description bdesc_args[] = { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_sqrtv8df2, "__builtin_ia32_sqrtpd512", IX86_BUILTIN_SQRTPD512, UNKNOWN, (int) V8DF_FTYPE_V8DF }, { OPTION_MASK_ISA_AVX512F, CODE_FOR_sqrtv16sf2, "__builtin_ia32_sqrtps512", IX86_BUILTIN_SQRTPS_NR512, UNKNOWN, (int) V16SF_FTYPE_V16SF }, { OPTION_MASK_ISA_AVX512ER, CODE_FOR_avx512er_exp2v16sf, "__builtin_ia32_exp2ps", IX86_BUILTIN_EXP2PS, UNKNOWN, (int) V16SF_FTYPE_V16SF }, + { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_roundps512, "__builtin_ia32_floorps512", IX86_BUILTIN_FLOORPS512, (enum rtx_code) ROUND_FLOOR, (int) V16SF_FTYPE_V16SF_ROUND }, + { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_roundps512, "__builtin_ia32_ceilps512", IX86_BUILTIN_CEILPS512, (enum rtx_code) ROUND_CEIL, (int) V16SF_FTYPE_V16SF_ROUND }, + { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_roundps512, "__builtin_ia32_truncps512", IX86_BUILTIN_TRUNCPS512, (enum rtx_code) ROUND_TRUNC, (int) V16SF_FTYPE_V16SF_ROUND }, + { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_roundpd512, "__builtin_ia32_floorpd512", IX86_BUILTIN_FLOORPD512, (enum rtx_code) ROUND_FLOOR, (int) V8DF_FTYPE_V8DF_ROUND }, + { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_roundpd512, "__builtin_ia32_ceilpd512", IX86_BUILTIN_CEILPD512, (enum rtx_code) ROUND_CEIL, (int) V8DF_FTYPE_V8DF_ROUND }, + { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_roundpd512, "__builtin_ia32_truncpd512", IX86_BUILTIN_TRUNCPD512, (enum rtx_code) ROUND_TRUNC, (int) V8DF_FTYPE_V8DF_ROUND }, + { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_fix_notruncv16sfv16si, "__builtin_ia32_cvtps2dq512", IX86_BUILTIN_CVTPS2DQ512, UNKNOWN, (int) V16SI_FTYPE_V16SF }, + { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vec_pack_sfix_v8df, "__builtin_ia32_vec_pack_sfix512", IX86_BUILTIN_VEC_PACK_SFIX512, UNKNOWN, (int) V16SI_FTYPE_V8DF_V8DF }, + { OPTION_MASK_ISA_AVX512F, CODE_FOR_roundv16sf2_sfix, "__builtin_ia32_roundps_az_sfix512", IX86_BUILTIN_ROUNDPS_AZ_SFIX512, UNKNOWN, (int) V16SI_FTYPE_V16SF }, + { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_roundps512_sfix, "__builtin_ia32_floorps_sfix512", IX86_BUILTIN_FLOORPS_SFIX512, (enum rtx_code) ROUND_FLOOR, (int) V16SI_FTYPE_V16SF_ROUND }, + { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_roundps512_sfix, "__builtin_ia32_ceilps_sfix512", IX86_BUILTIN_CEILPS_SFIX512, (enum rtx_code) ROUND_CEIL, (int) V16SI_FTYPE_V16SF_ROUND }, { OPTION_MASK_ISA_AVX512F, CODE_FOR_roundv8df2_vec_pack_sfix, "__builtin_ia32_roundpd_az_vec_pack_sfix512", IX86_BUILTIN_ROUNDPD_AZ_VEC_PACK_SFIX512, UNKNOWN, (int) V16SI_FTYPE_V8DF_V8DF }, { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_roundpd_vec_pack_sfix512, "__builtin_ia32_floorpd_vec_pack_sfix512", IX86_BUILTIN_FLOORPD_VEC_PACK_SFIX512, (enum rtx_code) ROUND_FLOOR, (int) V16SI_FTYPE_V8DF_V8DF_ROUND }, { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_roundpd_vec_pack_sfix512, "__builtin_ia32_ceilpd_vec_pack_sfix512", IX86_BUILTIN_CEILPD_VEC_PACK_SFIX512, (enum rtx_code) ROUND_CEIL, (int) V16SI_FTYPE_V8DF_V8DF_ROUND }, @@ -34879,7 +34929,7 @@ static const struct builtin_description bdesc_round_args[] = { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_cvtpd2ps512_mask_round, "__builtin_ia32_cvtpd2ps512_mask", IX86_BUILTIN_CVTPD2PS512, UNKNOWN, (int) V8SF_FTYPE_V8DF_V8SF_QI_INT }, { OPTION_MASK_ISA_AVX512F, CODE_FOR_ufix_notruncv8dfv8si2_mask_round, "__builtin_ia32_cvtpd2udq512_mask", IX86_BUILTIN_CVTPD2UDQ512, UNKNOWN, (int) V8SI_FTYPE_V8DF_V8SI_QI_INT }, { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vcvtph2ps512_mask_round, "__builtin_ia32_vcvtph2ps512_mask", IX86_BUILTIN_CVTPH2PS512, UNKNOWN, (int) V16SF_FTYPE_V16HI_V16SF_HI_INT }, - { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_fix_notruncv16sfv16si_mask_round, "__builtin_ia32_cvtps2dq512_mask", IX86_BUILTIN_CVTPS2DQ512, UNKNOWN, (int) V16SI_FTYPE_V16SF_V16SI_HI_INT }, + { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_fix_notruncv16sfv16si_mask_round, "__builtin_ia32_cvtps2dq512_mask", IX86_BUILTIN_CVTPS2DQ512_MASK, UNKNOWN, (int) V16SI_FTYPE_V16SF_V16SI_HI_INT }, { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_cvtps2pd512_mask_round, "__builtin_ia32_cvtps2pd512_mask", IX86_BUILTIN_CVTPS2PD512, UNKNOWN, (int) V8DF_FTYPE_V8SF_V8DF_QI_INT }, { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_ufix_notruncv16sfv16si_mask_round, "__builtin_ia32_cvtps2udq512_mask", IX86_BUILTIN_CVTPS2UDQ512, UNKNOWN, (int) V16SI_FTYPE_V16SF_V16SI_HI_INT }, { OPTION_MASK_ISA_AVX512F, CODE_FOR_sse2_cvtsd2ss_round, "__builtin_ia32_cvtsd2ss_round", IX86_BUILTIN_CVTSD2SS_ROUND, UNKNOWN, (int) V4SF_FTYPE_V4SF_V2DF_INT }, @@ -38456,10 +38506,13 @@ ix86_expand_args_builtin (const struct builtin_description *d, { case V2DF_FTYPE_V2DF_ROUND: case V4DF_FTYPE_V4DF_ROUND: + case V8DF_FTYPE_V8DF_ROUND: case V4SF_FTYPE_V4SF_ROUND: case V8SF_FTYPE_V8SF_ROUND: + case V16SF_FTYPE_V16SF_ROUND: case V4SI_FTYPE_V4SF_ROUND: case V8SI_FTYPE_V8SF_ROUND: + case V16SI_FTYPE_V16SF_ROUND: return ix86_expand_sse_round (d, exp, target); case V4SI_FTYPE_V2DF_V2DF_ROUND: case V8SI_FTYPE_V4DF_V4DF_ROUND: @@ -38573,6 +38626,7 @@ ix86_expand_args_builtin (const struct builtin_description *d, case V16SI_FTYPE_V8SI: case V16SF_FTYPE_V4SF: case V16SI_FTYPE_V4SI: + case V16SI_FTYPE_V16SF: case V16SF_FTYPE_V16SF: case V8DI_FTYPE_UQI: case V8DF_FTYPE_V4DF: @@ -42278,6 +42332,8 @@ ix86_builtin_vectorized_function (unsigned int fn, tree type_out, return ix86_get_builtin (IX86_BUILTIN_FLOORPS_SFIX); else if (out_n == 8 && in_n == 8) return ix86_get_builtin (IX86_BUILTIN_FLOORPS_SFIX256); + else if (out_n == 16 && in_n == 16) + return ix86_get_builtin (IX86_BUILTIN_FLOORPS_SFIX512); } break; @@ -42303,6 +42359,8 @@ ix86_builtin_vectorized_function (unsigned int fn, tree type_out, return ix86_get_builtin (IX86_BUILTIN_CEILPS_SFIX); else if (out_n == 8 && in_n == 8) return ix86_get_builtin (IX86_BUILTIN_CEILPS_SFIX256); + else if (out_n == 16 && in_n == 16) + return ix86_get_builtin (IX86_BUILTIN_CEILPS_SFIX512); } break; @@ -42315,6 +42373,8 @@ ix86_builtin_vectorized_function (unsigned int fn, tree type_out, return ix86_get_builtin (IX86_BUILTIN_VEC_PACK_SFIX); else if (out_n == 8 && in_n == 4) return ix86_get_builtin (IX86_BUILTIN_VEC_PACK_SFIX256); + else if (out_n == 16 && in_n == 8) + return ix86_get_builtin (IX86_BUILTIN_VEC_PACK_SFIX512); } if (out_mode == SImode && in_mode == SFmode) { @@ -42322,6 +42382,8 @@ ix86_builtin_vectorized_function (unsigned int fn, tree type_out, return ix86_get_builtin (IX86_BUILTIN_CVTPS2DQ); else if (out_n == 8 && in_n == 8) return ix86_get_builtin (IX86_BUILTIN_CVTPS2DQ256); + else if (out_n == 16 && in_n == 16) + return ix86_get_builtin (IX86_BUILTIN_CVTPS2DQ512); } break; @@ -42347,6 +42409,8 @@ ix86_builtin_vectorized_function (unsigned int fn, tree type_out, return ix86_get_builtin (IX86_BUILTIN_ROUNDPS_AZ_SFIX); else if (out_n == 8 && in_n == 8) return ix86_get_builtin (IX86_BUILTIN_ROUNDPS_AZ_SFIX256); + else if (out_n == 16 && in_n == 16) + return ix86_get_builtin (IX86_BUILTIN_ROUNDPS_AZ_SFIX512); } break; @@ -42361,6 +42425,8 @@ ix86_builtin_vectorized_function (unsigned int fn, tree type_out, return ix86_get_builtin (IX86_BUILTIN_FLOORPD); else if (out_n == 4 && in_n == 4) return ix86_get_builtin (IX86_BUILTIN_FLOORPD256); + else if (out_n == 8 && in_n == 8) + return ix86_get_builtin (IX86_BUILTIN_FLOORPD512); } if (out_mode == SFmode && in_mode == SFmode) { @@ -42368,6 +42434,8 @@ ix86_builtin_vectorized_function (unsigned int fn, tree type_out, return ix86_get_builtin (IX86_BUILTIN_FLOORPS); else if (out_n == 8 && in_n == 8) return ix86_get_builtin (IX86_BUILTIN_FLOORPS256); + else if (out_n == 16 && in_n == 16) + return ix86_get_builtin (IX86_BUILTIN_FLOORPS512); } break; @@ -42382,6 +42450,8 @@ ix86_builtin_vectorized_function (unsigned int fn, tree type_out, return ix86_get_builtin (IX86_BUILTIN_CEILPD); else if (out_n == 4 && in_n == 4) return ix86_get_builtin (IX86_BUILTIN_CEILPD256); + else if (out_n == 8 && in_n == 8) + return ix86_get_builtin (IX86_BUILTIN_CEILPD512); } if (out_mode == SFmode && in_mode == SFmode) { @@ -42389,6 +42459,8 @@ ix86_builtin_vectorized_function (unsigned int fn, tree type_out, return ix86_get_builtin (IX86_BUILTIN_CEILPS); else if (out_n == 8 && in_n == 8) return ix86_get_builtin (IX86_BUILTIN_CEILPS256); + else if (out_n == 16 && in_n == 16) + return ix86_get_builtin (IX86_BUILTIN_CEILPS512); } break; @@ -42403,6 +42475,8 @@ ix86_builtin_vectorized_function (unsigned int fn, tree type_out, return ix86_get_builtin (IX86_BUILTIN_TRUNCPD); else if (out_n == 4 && in_n == 4) return ix86_get_builtin (IX86_BUILTIN_TRUNCPD256); + else if (out_n == 8 && in_n == 8) + return ix86_get_builtin (IX86_BUILTIN_TRUNCPD512); } if (out_mode == SFmode && in_mode == SFmode) { @@ -42410,6 +42484,8 @@ ix86_builtin_vectorized_function (unsigned int fn, tree type_out, return ix86_get_builtin (IX86_BUILTIN_TRUNCPS); else if (out_n == 8 && in_n == 8) return ix86_get_builtin (IX86_BUILTIN_TRUNCPS256); + else if (out_n == 16 && in_n == 16) + return ix86_get_builtin (IX86_BUILTIN_TRUNCPS512); } break; |