aboutsummaryrefslogtreecommitdiff
path: root/gcc/config/i386/i386.c
diff options
context:
space:
mode:
Diffstat (limited to 'gcc/config/i386/i386.c')
-rw-r--r--gcc/config/i386/i386.c231
1 files changed, 160 insertions, 71 deletions
diff --git a/gcc/config/i386/i386.c b/gcc/config/i386/i386.c
index 23a42734007..4247af353ac 100644
--- a/gcc/config/i386/i386.c
+++ b/gcc/config/i386/i386.c
@@ -42676,66 +42676,36 @@ ix86_vectorize_builtin_scatter (const_tree vectype,
return ix86_builtins[code];
}
+/* Return true if it is safe to use the rsqrt optabs to optimize
+ 1.0/sqrt. */
+
+static bool
+use_rsqrt_p ()
+{
+ return (TARGET_SSE_MATH
+ && flag_finite_math_only
+ && !flag_trapping_math
+ && flag_unsafe_math_optimizations);
+}
+
/* Returns a code for a target-specific builtin that implements
reciprocal of the function, or NULL_TREE if not available. */
static tree
-ix86_builtin_reciprocal (gcall *call)
+ix86_builtin_reciprocal (tree fndecl)
{
- if (! (TARGET_SSE_MATH && !optimize_insn_for_size_p ()
- && flag_finite_math_only && !flag_trapping_math
- && flag_unsafe_math_optimizations))
- return NULL_TREE;
-
- if (gimple_call_internal_p (call))
- switch (gimple_call_internal_fn (call))
- {
- tree type;
- case IFN_SQRT:
- type = TREE_TYPE (gimple_call_lhs (call));
- switch (TYPE_MODE (type))
- {
- /* Vectorized version of sqrt to rsqrt conversion. */
- case V4SFmode:
- return ix86_get_builtin (IX86_BUILTIN_RSQRTPS_NR);
-
- case V8SFmode:
- return ix86_get_builtin (IX86_BUILTIN_RSQRTPS_NR256);
-
- default:
- return NULL_TREE;
- }
-
- default:
- return NULL_TREE;
- }
-
- tree fndecl = gimple_call_fndecl (call);
- if (DECL_BUILT_IN_CLASS (fndecl) == BUILT_IN_MD)
- /* Machine dependent builtins. */
- switch (DECL_FUNCTION_CODE (fndecl))
- {
- /* Vectorized version of sqrt to rsqrt conversion. */
- case IX86_BUILTIN_SQRTPS_NR:
- return ix86_get_builtin (IX86_BUILTIN_RSQRTPS_NR);
-
- case IX86_BUILTIN_SQRTPS_NR256:
- return ix86_get_builtin (IX86_BUILTIN_RSQRTPS_NR256);
+ switch (DECL_FUNCTION_CODE (fndecl))
+ {
+ /* Vectorized version of sqrt to rsqrt conversion. */
+ case IX86_BUILTIN_SQRTPS_NR:
+ return ix86_get_builtin (IX86_BUILTIN_RSQRTPS_NR);
- default:
- return NULL_TREE;
- }
- else
- /* Normal builtins. */
- switch (DECL_FUNCTION_CODE (fndecl))
- {
- /* Sqrt to rsqrt conversion. */
- case BUILT_IN_SQRTF:
- return ix86_get_builtin (IX86_BUILTIN_RSQRTF);
+ case IX86_BUILTIN_SQRTPS_NR256:
+ return ix86_get_builtin (IX86_BUILTIN_RSQRTPS_NR256);
- default:
- return NULL_TREE;
- }
+ default:
+ return NULL_TREE;
+ }
}
/* Helper for avx_vpermilps256_operand et al. This is also used by
@@ -45267,8 +45237,9 @@ ix86_mitigate_rop (void)
COPY_HARD_REG_SET (inout_risky, input_risky);
IOR_HARD_REG_SET (inout_risky, output_risky);
- compute_bb_for_insn ();
df_note_add_problem ();
+ /* Fix up what stack-regs did. */
+ df_insn_rescan_all ();
df_analyze ();
regrename_init (true);
@@ -49365,6 +49336,57 @@ expand_vec_perm_pshufb (struct expand_vec_perm_d *d)
return true;
}
+/* For V*[QHS]Imode permutations, check if the same permutation
+ can't be performed in a 2x, 4x or 8x wider inner mode. */
+
+static bool
+canonicalize_vector_int_perm (const struct expand_vec_perm_d *d,
+ struct expand_vec_perm_d *nd)
+{
+ int i;
+ enum machine_mode mode = VOIDmode;
+
+ switch (d->vmode)
+ {
+ case V16QImode: mode = V8HImode; break;
+ case V32QImode: mode = V16HImode; break;
+ case V64QImode: mode = V32HImode; break;
+ case V8HImode: mode = V4SImode; break;
+ case V16HImode: mode = V8SImode; break;
+ case V32HImode: mode = V16SImode; break;
+ case V4SImode: mode = V2DImode; break;
+ case V8SImode: mode = V4DImode; break;
+ case V16SImode: mode = V8DImode; break;
+ default: return false;
+ }
+ for (i = 0; i < d->nelt; i += 2)
+ if ((d->perm[i] & 1) || d->perm[i + 1] != d->perm[i] + 1)
+ return false;
+ nd->vmode = mode;
+ nd->nelt = d->nelt / 2;
+ for (i = 0; i < nd->nelt; i++)
+ nd->perm[i] = d->perm[2 * i] / 2;
+ if (GET_MODE_INNER (mode) != DImode)
+ canonicalize_vector_int_perm (nd, nd);
+ if (nd != d)
+ {
+ nd->one_operand_p = d->one_operand_p;
+ nd->testing_p = d->testing_p;
+ if (d->op0 == d->op1)
+ nd->op0 = nd->op1 = gen_lowpart (nd->vmode, d->op0);
+ else
+ {
+ nd->op0 = gen_lowpart (nd->vmode, d->op0);
+ nd->op1 = gen_lowpart (nd->vmode, d->op1);
+ }
+ if (d->testing_p)
+ nd->target = gen_raw_REG (nd->vmode, LAST_VIRTUAL_REGISTER + 1);
+ else
+ nd->target = gen_reg_rtx (nd->vmode);
+ }
+ return true;
+}
+
/* A subroutine of ix86_expand_vec_perm_builtin_1. Try to instantiate D
in a single instruction. */
@@ -49372,7 +49394,7 @@ static bool
expand_vec_perm_1 (struct expand_vec_perm_d *d)
{
unsigned i, nelt = d->nelt;
- unsigned char perm2[MAX_VECT_LEN];
+ struct expand_vec_perm_d nd;
/* Check plain VEC_SELECT first, because AVX has instructions that could
match both SEL and SEL+CONCAT, but the plain SEL will allow a memory
@@ -49385,10 +49407,10 @@ expand_vec_perm_1 (struct expand_vec_perm_d *d)
for (i = 0; i < nelt; i++)
{
- perm2[i] = d->perm[i] & mask;
- if (perm2[i] != i)
+ nd.perm[i] = d->perm[i] & mask;
+ if (nd.perm[i] != i)
identity_perm = false;
- if (perm2[i])
+ if (nd.perm[i])
broadcast_perm = false;
}
@@ -49457,7 +49479,7 @@ expand_vec_perm_1 (struct expand_vec_perm_d *d)
}
}
- if (expand_vselect (d->target, d->op0, perm2, nelt, d->testing_p))
+ if (expand_vselect (d->target, d->op0, nd.perm, nelt, d->testing_p))
return true;
/* There are plenty of patterns in sse.md that are written for
@@ -49468,10 +49490,10 @@ expand_vec_perm_1 (struct expand_vec_perm_d *d)
every other permutation operand. */
for (i = 0; i < nelt; i += 2)
{
- perm2[i] = d->perm[i] & mask;
- perm2[i + 1] = (d->perm[i + 1] & mask) + nelt;
+ nd.perm[i] = d->perm[i] & mask;
+ nd.perm[i + 1] = (d->perm[i + 1] & mask) + nelt;
}
- if (expand_vselect_vconcat (d->target, d->op0, d->op0, perm2, nelt,
+ if (expand_vselect_vconcat (d->target, d->op0, d->op0, nd.perm, nelt,
d->testing_p))
return true;
@@ -49480,13 +49502,13 @@ expand_vec_perm_1 (struct expand_vec_perm_d *d)
{
for (i = 0; i < nelt; i += 4)
{
- perm2[i + 0] = d->perm[i + 0] & mask;
- perm2[i + 1] = d->perm[i + 1] & mask;
- perm2[i + 2] = (d->perm[i + 2] & mask) + nelt;
- perm2[i + 3] = (d->perm[i + 3] & mask) + nelt;
+ nd.perm[i + 0] = d->perm[i + 0] & mask;
+ nd.perm[i + 1] = d->perm[i + 1] & mask;
+ nd.perm[i + 2] = (d->perm[i + 2] & mask) + nelt;
+ nd.perm[i + 3] = (d->perm[i + 3] & mask) + nelt;
}
- if (expand_vselect_vconcat (d->target, d->op0, d->op0, perm2, nelt,
+ if (expand_vselect_vconcat (d->target, d->op0, d->op0, nd.perm, nelt,
d->testing_p))
return true;
}
@@ -49507,10 +49529,10 @@ expand_vec_perm_1 (struct expand_vec_perm_d *d)
e -= nelt;
else
e += nelt;
- perm2[i] = e;
+ nd.perm[i] = e;
}
- if (expand_vselect_vconcat (d->target, d->op1, d->op0, perm2, nelt,
+ if (expand_vselect_vconcat (d->target, d->op1, d->op0, nd.perm, nelt,
d->testing_p))
return true;
}
@@ -49536,6 +49558,14 @@ expand_vec_perm_1 (struct expand_vec_perm_d *d)
if (ix86_expand_vec_perm_vpermi2 (NULL_RTX, NULL_RTX, NULL_RTX, NULL_RTX, d))
return true;
+ /* See if we can get the same permutation in different vector integer
+ mode. */
+ if (canonicalize_vector_int_perm (d, &nd) && expand_vec_perm_1 (&nd))
+ {
+ if (!d->testing_p)
+ emit_move_insn (d->target, gen_lowpart (d->vmode, nd.target));
+ return true;
+ }
return false;
}
@@ -50968,7 +50998,7 @@ expand_vec_perm_even_odd_1 (struct expand_vec_perm_d *d, unsigned odd)
struct expand_vec_perm_d d_copy = *d;
d_copy.vmode = V4DFmode;
if (d->testing_p)
- d_copy.target = gen_lowpart (V4DFmode, d->target);
+ d_copy.target = gen_raw_REG (V4DFmode, LAST_VIRTUAL_REGISTER + 1);
else
d_copy.target = gen_reg_rtx (V4DFmode);
d_copy.op0 = gen_lowpart (V4DFmode, d->op0);
@@ -51007,7 +51037,7 @@ expand_vec_perm_even_odd_1 (struct expand_vec_perm_d *d, unsigned odd)
struct expand_vec_perm_d d_copy = *d;
d_copy.vmode = V8SFmode;
if (d->testing_p)
- d_copy.target = gen_lowpart (V8SFmode, d->target);
+ d_copy.target = gen_raw_REG (V8SFmode, LAST_VIRTUAL_REGISTER + 1);
else
d_copy.target = gen_reg_rtx (V8SFmode);
d_copy.op0 = gen_lowpart (V8SFmode, d->op0);
@@ -51451,6 +51481,16 @@ ix86_expand_vec_perm_const_1 (struct expand_vec_perm_d *d)
if (expand_vec_perm_vpshufb4_vpermq2 (d))
return true;
+ /* See if we can get the same permutation in different vector integer
+ mode. */
+ struct expand_vec_perm_d nd;
+ if (canonicalize_vector_int_perm (d, &nd) && expand_vec_perm_1 (&nd))
+ {
+ if (!d->testing_p)
+ emit_move_insn (d->target, gen_lowpart (d->vmode, nd.target));
+ return true;
+ }
+
return false;
}
@@ -54100,6 +54140,52 @@ ix86_operands_ok_for_move_multiple (rtx *operands, bool load,
return true;
}
+/* Implement the TARGET_OPTAB_SUPPORTED_P hook. */
+
+static bool
+ix86_optab_supported_p (int op, machine_mode mode1, machine_mode,
+ optimization_type opt_type)
+{
+ switch (op)
+ {
+ case asin_optab:
+ case acos_optab:
+ case log1p_optab:
+ case exp_optab:
+ case exp10_optab:
+ case exp2_optab:
+ case expm1_optab:
+ case ldexp_optab:
+ case scalb_optab:
+ case round_optab:
+ return opt_type == OPTIMIZE_FOR_SPEED;
+
+ case rint_optab:
+ if (SSE_FLOAT_MODE_P (mode1)
+ && TARGET_SSE_MATH
+ && !flag_trapping_math
+ && !TARGET_ROUND)
+ return opt_type == OPTIMIZE_FOR_SPEED;
+ return true;
+
+ case floor_optab:
+ case ceil_optab:
+ case btrunc_optab:
+ if (SSE_FLOAT_MODE_P (mode1)
+ && TARGET_SSE_MATH
+ && !flag_trapping_math
+ && TARGET_ROUND)
+ return true;
+ return opt_type == OPTIMIZE_FOR_SPEED;
+
+ case rsqrt_optab:
+ return opt_type == OPTIMIZE_FOR_SPEED && use_rsqrt_p ();
+
+ default:
+ return true;
+ }
+}
+
/* Address space support.
This is not "far pointers" in the 16-bit sense, but an easy way
@@ -54645,6 +54731,9 @@ ix86_addr_space_zero_address_valid (addr_space_t as)
#undef TARGET_ABSOLUTE_BIGGEST_ALIGNMENT
#define TARGET_ABSOLUTE_BIGGEST_ALIGNMENT 512
+#undef TARGET_OPTAB_SUPPORTED_P
+#define TARGET_OPTAB_SUPPORTED_P ix86_optab_supported_p
+
struct gcc_target targetm = TARGET_INITIALIZER;
#include "gt-i386.h"