diff options
Diffstat (limited to 'gcc/config/i386/i386.c')
-rw-r--r-- | gcc/config/i386/i386.c | 231 |
1 files changed, 160 insertions, 71 deletions
diff --git a/gcc/config/i386/i386.c b/gcc/config/i386/i386.c index 23a42734007..4247af353ac 100644 --- a/gcc/config/i386/i386.c +++ b/gcc/config/i386/i386.c @@ -42676,66 +42676,36 @@ ix86_vectorize_builtin_scatter (const_tree vectype, return ix86_builtins[code]; } +/* Return true if it is safe to use the rsqrt optabs to optimize + 1.0/sqrt. */ + +static bool +use_rsqrt_p () +{ + return (TARGET_SSE_MATH + && flag_finite_math_only + && !flag_trapping_math + && flag_unsafe_math_optimizations); +} + /* Returns a code for a target-specific builtin that implements reciprocal of the function, or NULL_TREE if not available. */ static tree -ix86_builtin_reciprocal (gcall *call) +ix86_builtin_reciprocal (tree fndecl) { - if (! (TARGET_SSE_MATH && !optimize_insn_for_size_p () - && flag_finite_math_only && !flag_trapping_math - && flag_unsafe_math_optimizations)) - return NULL_TREE; - - if (gimple_call_internal_p (call)) - switch (gimple_call_internal_fn (call)) - { - tree type; - case IFN_SQRT: - type = TREE_TYPE (gimple_call_lhs (call)); - switch (TYPE_MODE (type)) - { - /* Vectorized version of sqrt to rsqrt conversion. */ - case V4SFmode: - return ix86_get_builtin (IX86_BUILTIN_RSQRTPS_NR); - - case V8SFmode: - return ix86_get_builtin (IX86_BUILTIN_RSQRTPS_NR256); - - default: - return NULL_TREE; - } - - default: - return NULL_TREE; - } - - tree fndecl = gimple_call_fndecl (call); - if (DECL_BUILT_IN_CLASS (fndecl) == BUILT_IN_MD) - /* Machine dependent builtins. */ - switch (DECL_FUNCTION_CODE (fndecl)) - { - /* Vectorized version of sqrt to rsqrt conversion. */ - case IX86_BUILTIN_SQRTPS_NR: - return ix86_get_builtin (IX86_BUILTIN_RSQRTPS_NR); - - case IX86_BUILTIN_SQRTPS_NR256: - return ix86_get_builtin (IX86_BUILTIN_RSQRTPS_NR256); + switch (DECL_FUNCTION_CODE (fndecl)) + { + /* Vectorized version of sqrt to rsqrt conversion. */ + case IX86_BUILTIN_SQRTPS_NR: + return ix86_get_builtin (IX86_BUILTIN_RSQRTPS_NR); - default: - return NULL_TREE; - } - else - /* Normal builtins. */ - switch (DECL_FUNCTION_CODE (fndecl)) - { - /* Sqrt to rsqrt conversion. */ - case BUILT_IN_SQRTF: - return ix86_get_builtin (IX86_BUILTIN_RSQRTF); + case IX86_BUILTIN_SQRTPS_NR256: + return ix86_get_builtin (IX86_BUILTIN_RSQRTPS_NR256); - default: - return NULL_TREE; - } + default: + return NULL_TREE; + } } /* Helper for avx_vpermilps256_operand et al. This is also used by @@ -45267,8 +45237,9 @@ ix86_mitigate_rop (void) COPY_HARD_REG_SET (inout_risky, input_risky); IOR_HARD_REG_SET (inout_risky, output_risky); - compute_bb_for_insn (); df_note_add_problem (); + /* Fix up what stack-regs did. */ + df_insn_rescan_all (); df_analyze (); regrename_init (true); @@ -49365,6 +49336,57 @@ expand_vec_perm_pshufb (struct expand_vec_perm_d *d) return true; } +/* For V*[QHS]Imode permutations, check if the same permutation + can't be performed in a 2x, 4x or 8x wider inner mode. */ + +static bool +canonicalize_vector_int_perm (const struct expand_vec_perm_d *d, + struct expand_vec_perm_d *nd) +{ + int i; + enum machine_mode mode = VOIDmode; + + switch (d->vmode) + { + case V16QImode: mode = V8HImode; break; + case V32QImode: mode = V16HImode; break; + case V64QImode: mode = V32HImode; break; + case V8HImode: mode = V4SImode; break; + case V16HImode: mode = V8SImode; break; + case V32HImode: mode = V16SImode; break; + case V4SImode: mode = V2DImode; break; + case V8SImode: mode = V4DImode; break; + case V16SImode: mode = V8DImode; break; + default: return false; + } + for (i = 0; i < d->nelt; i += 2) + if ((d->perm[i] & 1) || d->perm[i + 1] != d->perm[i] + 1) + return false; + nd->vmode = mode; + nd->nelt = d->nelt / 2; + for (i = 0; i < nd->nelt; i++) + nd->perm[i] = d->perm[2 * i] / 2; + if (GET_MODE_INNER (mode) != DImode) + canonicalize_vector_int_perm (nd, nd); + if (nd != d) + { + nd->one_operand_p = d->one_operand_p; + nd->testing_p = d->testing_p; + if (d->op0 == d->op1) + nd->op0 = nd->op1 = gen_lowpart (nd->vmode, d->op0); + else + { + nd->op0 = gen_lowpart (nd->vmode, d->op0); + nd->op1 = gen_lowpart (nd->vmode, d->op1); + } + if (d->testing_p) + nd->target = gen_raw_REG (nd->vmode, LAST_VIRTUAL_REGISTER + 1); + else + nd->target = gen_reg_rtx (nd->vmode); + } + return true; +} + /* A subroutine of ix86_expand_vec_perm_builtin_1. Try to instantiate D in a single instruction. */ @@ -49372,7 +49394,7 @@ static bool expand_vec_perm_1 (struct expand_vec_perm_d *d) { unsigned i, nelt = d->nelt; - unsigned char perm2[MAX_VECT_LEN]; + struct expand_vec_perm_d nd; /* Check plain VEC_SELECT first, because AVX has instructions that could match both SEL and SEL+CONCAT, but the plain SEL will allow a memory @@ -49385,10 +49407,10 @@ expand_vec_perm_1 (struct expand_vec_perm_d *d) for (i = 0; i < nelt; i++) { - perm2[i] = d->perm[i] & mask; - if (perm2[i] != i) + nd.perm[i] = d->perm[i] & mask; + if (nd.perm[i] != i) identity_perm = false; - if (perm2[i]) + if (nd.perm[i]) broadcast_perm = false; } @@ -49457,7 +49479,7 @@ expand_vec_perm_1 (struct expand_vec_perm_d *d) } } - if (expand_vselect (d->target, d->op0, perm2, nelt, d->testing_p)) + if (expand_vselect (d->target, d->op0, nd.perm, nelt, d->testing_p)) return true; /* There are plenty of patterns in sse.md that are written for @@ -49468,10 +49490,10 @@ expand_vec_perm_1 (struct expand_vec_perm_d *d) every other permutation operand. */ for (i = 0; i < nelt; i += 2) { - perm2[i] = d->perm[i] & mask; - perm2[i + 1] = (d->perm[i + 1] & mask) + nelt; + nd.perm[i] = d->perm[i] & mask; + nd.perm[i + 1] = (d->perm[i + 1] & mask) + nelt; } - if (expand_vselect_vconcat (d->target, d->op0, d->op0, perm2, nelt, + if (expand_vselect_vconcat (d->target, d->op0, d->op0, nd.perm, nelt, d->testing_p)) return true; @@ -49480,13 +49502,13 @@ expand_vec_perm_1 (struct expand_vec_perm_d *d) { for (i = 0; i < nelt; i += 4) { - perm2[i + 0] = d->perm[i + 0] & mask; - perm2[i + 1] = d->perm[i + 1] & mask; - perm2[i + 2] = (d->perm[i + 2] & mask) + nelt; - perm2[i + 3] = (d->perm[i + 3] & mask) + nelt; + nd.perm[i + 0] = d->perm[i + 0] & mask; + nd.perm[i + 1] = d->perm[i + 1] & mask; + nd.perm[i + 2] = (d->perm[i + 2] & mask) + nelt; + nd.perm[i + 3] = (d->perm[i + 3] & mask) + nelt; } - if (expand_vselect_vconcat (d->target, d->op0, d->op0, perm2, nelt, + if (expand_vselect_vconcat (d->target, d->op0, d->op0, nd.perm, nelt, d->testing_p)) return true; } @@ -49507,10 +49529,10 @@ expand_vec_perm_1 (struct expand_vec_perm_d *d) e -= nelt; else e += nelt; - perm2[i] = e; + nd.perm[i] = e; } - if (expand_vselect_vconcat (d->target, d->op1, d->op0, perm2, nelt, + if (expand_vselect_vconcat (d->target, d->op1, d->op0, nd.perm, nelt, d->testing_p)) return true; } @@ -49536,6 +49558,14 @@ expand_vec_perm_1 (struct expand_vec_perm_d *d) if (ix86_expand_vec_perm_vpermi2 (NULL_RTX, NULL_RTX, NULL_RTX, NULL_RTX, d)) return true; + /* See if we can get the same permutation in different vector integer + mode. */ + if (canonicalize_vector_int_perm (d, &nd) && expand_vec_perm_1 (&nd)) + { + if (!d->testing_p) + emit_move_insn (d->target, gen_lowpart (d->vmode, nd.target)); + return true; + } return false; } @@ -50968,7 +50998,7 @@ expand_vec_perm_even_odd_1 (struct expand_vec_perm_d *d, unsigned odd) struct expand_vec_perm_d d_copy = *d; d_copy.vmode = V4DFmode; if (d->testing_p) - d_copy.target = gen_lowpart (V4DFmode, d->target); + d_copy.target = gen_raw_REG (V4DFmode, LAST_VIRTUAL_REGISTER + 1); else d_copy.target = gen_reg_rtx (V4DFmode); d_copy.op0 = gen_lowpart (V4DFmode, d->op0); @@ -51007,7 +51037,7 @@ expand_vec_perm_even_odd_1 (struct expand_vec_perm_d *d, unsigned odd) struct expand_vec_perm_d d_copy = *d; d_copy.vmode = V8SFmode; if (d->testing_p) - d_copy.target = gen_lowpart (V8SFmode, d->target); + d_copy.target = gen_raw_REG (V8SFmode, LAST_VIRTUAL_REGISTER + 1); else d_copy.target = gen_reg_rtx (V8SFmode); d_copy.op0 = gen_lowpart (V8SFmode, d->op0); @@ -51451,6 +51481,16 @@ ix86_expand_vec_perm_const_1 (struct expand_vec_perm_d *d) if (expand_vec_perm_vpshufb4_vpermq2 (d)) return true; + /* See if we can get the same permutation in different vector integer + mode. */ + struct expand_vec_perm_d nd; + if (canonicalize_vector_int_perm (d, &nd) && expand_vec_perm_1 (&nd)) + { + if (!d->testing_p) + emit_move_insn (d->target, gen_lowpart (d->vmode, nd.target)); + return true; + } + return false; } @@ -54100,6 +54140,52 @@ ix86_operands_ok_for_move_multiple (rtx *operands, bool load, return true; } +/* Implement the TARGET_OPTAB_SUPPORTED_P hook. */ + +static bool +ix86_optab_supported_p (int op, machine_mode mode1, machine_mode, + optimization_type opt_type) +{ + switch (op) + { + case asin_optab: + case acos_optab: + case log1p_optab: + case exp_optab: + case exp10_optab: + case exp2_optab: + case expm1_optab: + case ldexp_optab: + case scalb_optab: + case round_optab: + return opt_type == OPTIMIZE_FOR_SPEED; + + case rint_optab: + if (SSE_FLOAT_MODE_P (mode1) + && TARGET_SSE_MATH + && !flag_trapping_math + && !TARGET_ROUND) + return opt_type == OPTIMIZE_FOR_SPEED; + return true; + + case floor_optab: + case ceil_optab: + case btrunc_optab: + if (SSE_FLOAT_MODE_P (mode1) + && TARGET_SSE_MATH + && !flag_trapping_math + && TARGET_ROUND) + return true; + return opt_type == OPTIMIZE_FOR_SPEED; + + case rsqrt_optab: + return opt_type == OPTIMIZE_FOR_SPEED && use_rsqrt_p (); + + default: + return true; + } +} + /* Address space support. This is not "far pointers" in the 16-bit sense, but an easy way @@ -54645,6 +54731,9 @@ ix86_addr_space_zero_address_valid (addr_space_t as) #undef TARGET_ABSOLUTE_BIGGEST_ALIGNMENT #define TARGET_ABSOLUTE_BIGGEST_ALIGNMENT 512 +#undef TARGET_OPTAB_SUPPORTED_P +#define TARGET_OPTAB_SUPPORTED_P ix86_optab_supported_p + struct gcc_target targetm = TARGET_INITIALIZER; #include "gt-i386.h" |