aboutsummaryrefslogtreecommitdiff
path: root/gcc/config
diff options
context:
space:
mode:
Diffstat (limited to 'gcc/config')
-rw-r--r--gcc/config/aarch64/aarch64-protos.h2
-rw-r--r--gcc/config/aarch64/aarch64-simd.md14
-rw-r--r--gcc/config/aarch64/aarch64.c65
-rw-r--r--gcc/config/arm/arm-protos.h1
-rw-r--r--gcc/config/arm/arm.c63
-rw-r--r--gcc/config/arm/vec-common.md29
-rw-r--r--gcc/config/i386/i386-protos.h1
-rw-r--r--gcc/config/i386/i386.c181
-rw-r--r--gcc/config/i386/sse.md24
-rw-r--r--gcc/config/ia64/ia64-protos.h1
-rw-r--r--gcc/config/ia64/ia64.c97
-rw-r--r--gcc/config/ia64/vect.md13
-rw-r--r--gcc/config/mips/loongson.md13
-rw-r--r--gcc/config/mips/mips-msa.md13
-rw-r--r--gcc/config/mips/mips-protos.h1
-rw-r--r--gcc/config/mips/mips-ps-3d.md13
-rw-r--r--gcc/config/mips/mips.c91
-rw-r--r--gcc/config/powerpcspe/altivec.md13
-rw-r--r--gcc/config/powerpcspe/paired.md13
-rw-r--r--gcc/config/powerpcspe/powerpcspe-protos.h2
-rw-r--r--gcc/config/powerpcspe/powerpcspe.c125
-rw-r--r--gcc/config/powerpcspe/spe.md13
-rw-r--r--gcc/config/powerpcspe/vsx.md13
-rw-r--r--gcc/config/rs6000/altivec.md13
-rw-r--r--gcc/config/rs6000/paired.md13
-rw-r--r--gcc/config/rs6000/rs6000-protos.h2
-rw-r--r--gcc/config/rs6000/rs6000.c123
-rw-r--r--gcc/config/rs6000/vsx.md13
-rw-r--r--gcc/config/sparc/sparc.c31
-rw-r--r--gcc/config/sparc/sparc.md22
30 files changed, 334 insertions, 684 deletions
diff --git a/gcc/config/aarch64/aarch64-protos.h b/gcc/config/aarch64/aarch64-protos.h
index 002ac330c6d..3bc3756563a 100644
--- a/gcc/config/aarch64/aarch64-protos.h
+++ b/gcc/config/aarch64/aarch64-protos.h
@@ -488,8 +488,6 @@ extern void aarch64_split_combinev16qi (rtx operands[3]);
extern void aarch64_expand_vec_perm (rtx, rtx, rtx, rtx, unsigned int);
extern bool aarch64_madd_needs_nop (rtx_insn *);
extern void aarch64_final_prescan_insn (rtx_insn *);
-extern bool
-aarch64_expand_vec_perm_const (rtx, rtx, rtx, rtx, unsigned int);
void aarch64_atomic_assign_expand_fenv (tree *, tree *, tree *);
int aarch64_ccmp_mode_to_code (machine_mode mode);
diff --git a/gcc/config/aarch64/aarch64-simd.md b/gcc/config/aarch64/aarch64-simd.md
index 84c4f8286c0..e04a9883892 100644
--- a/gcc/config/aarch64/aarch64-simd.md
+++ b/gcc/config/aarch64/aarch64-simd.md
@@ -5385,20 +5385,6 @@
;; vec_perm support
-(define_expand "vec_perm_const<mode>"
- [(match_operand:VALL_F16 0 "register_operand")
- (match_operand:VALL_F16 1 "register_operand")
- (match_operand:VALL_F16 2 "register_operand")
- (match_operand:<V_INT_EQUIV> 3)]
- "TARGET_SIMD"
-{
- if (aarch64_expand_vec_perm_const (operands[0], operands[1],
- operands[2], operands[3], <nunits>))
- DONE;
- else
- FAIL;
-})
-
(define_expand "vec_perm<mode>"
[(match_operand:VB 0 "register_operand")
(match_operand:VB 1 "register_operand")
diff --git a/gcc/config/aarch64/aarch64.c b/gcc/config/aarch64/aarch64.c
index 1da313f57e0..05b82bcd615 100644
--- a/gcc/config/aarch64/aarch64.c
+++ b/gcc/config/aarch64/aarch64.c
@@ -143,8 +143,6 @@ static void aarch64_elf_asm_constructor (rtx, int) ATTRIBUTE_UNUSED;
static void aarch64_elf_asm_destructor (rtx, int) ATTRIBUTE_UNUSED;
static void aarch64_override_options_after_change (void);
static bool aarch64_vector_mode_supported_p (machine_mode);
-static bool aarch64_vectorize_vec_perm_const_ok (machine_mode,
- vec_perm_indices);
static int aarch64_address_cost (rtx, machine_mode, addr_space_t, bool);
static bool aarch64_builtin_support_vector_misalignment (machine_mode mode,
const_tree type,
@@ -13670,29 +13668,27 @@ aarch64_expand_vec_perm_const_1 (struct expand_vec_perm_d *d)
return false;
}
-/* Expand a vec_perm_const pattern with the operands given by TARGET,
- OP0, OP1 and SEL. NELT is the number of elements in the vector. */
+/* Implement TARGET_VECTORIZE_VEC_PERM_CONST. */
-bool
-aarch64_expand_vec_perm_const (rtx target, rtx op0, rtx op1, rtx sel,
- unsigned int nelt)
+static bool
+aarch64_vectorize_vec_perm_const (machine_mode vmode, rtx target, rtx op0,
+ rtx op1, const vec_perm_indices &sel)
{
struct expand_vec_perm_d d;
unsigned int i, which;
+ d.vmode = vmode;
d.target = target;
d.op0 = op0;
d.op1 = op1;
+ d.testing_p = !target;
- d.vmode = GET_MODE (target);
- gcc_assert (VECTOR_MODE_P (d.vmode));
- d.testing_p = false;
-
+ /* Calculate whether all elements are in one vector. */
+ unsigned int nelt = sel.length ();
d.perm.reserve (nelt);
for (i = which = 0; i < nelt; ++i)
{
- rtx e = XVECEXP (sel, 0, i);
- unsigned int ei = INTVAL (e) & (2 * nelt - 1);
+ unsigned int ei = sel[i] & (2 * nelt - 1);
which |= (ei < nelt ? 1 : 2);
d.perm.quick_push (ei);
}
@@ -13704,7 +13700,7 @@ aarch64_expand_vec_perm_const (rtx target, rtx op0, rtx op1, rtx sel,
case 3:
d.one_vector_p = false;
- if (!rtx_equal_p (op0, op1))
+ if (d.testing_p || !rtx_equal_p (op0, op1))
break;
/* The elements of PERM do not suggest that only the first operand
@@ -13725,37 +13721,8 @@ aarch64_expand_vec_perm_const (rtx target, rtx op0, rtx op1, rtx sel,
break;
}
- return aarch64_expand_vec_perm_const_1 (&d);
-}
-
-static bool
-aarch64_vectorize_vec_perm_const_ok (machine_mode vmode, vec_perm_indices sel)
-{
- struct expand_vec_perm_d d;
- unsigned int i, nelt, which;
- bool ret;
-
- d.vmode = vmode;
- d.testing_p = true;
- d.perm.safe_splice (sel);
-
- /* Calculate whether all elements are in one vector. */
- nelt = sel.length ();
- for (i = which = 0; i < nelt; ++i)
- {
- unsigned int e = d.perm[i];
- gcc_assert (e < 2 * nelt);
- which |= (e < nelt ? 1 : 2);
- }
-
- /* If all elements are from the second vector, reindex as if from the
- first vector. */
- if (which == 2)
- for (i = 0; i < nelt; ++i)
- d.perm[i] -= nelt;
-
- /* Check whether the mask can be applied to a single vector. */
- d.one_vector_p = (which != 3);
+ if (!d.testing_p)
+ return aarch64_expand_vec_perm_const_1 (&d);
d.target = gen_raw_REG (d.vmode, LAST_VIRTUAL_REGISTER + 1);
d.op1 = d.op0 = gen_raw_REG (d.vmode, LAST_VIRTUAL_REGISTER + 2);
@@ -13763,7 +13730,7 @@ aarch64_vectorize_vec_perm_const_ok (machine_mode vmode, vec_perm_indices sel)
d.op1 = gen_raw_REG (d.vmode, LAST_VIRTUAL_REGISTER + 3);
start_sequence ();
- ret = aarch64_expand_vec_perm_const_1 (&d);
+ bool ret = aarch64_expand_vec_perm_const_1 (&d);
end_sequence ();
return ret;
@@ -15515,9 +15482,9 @@ aarch64_libgcc_floating_mode_supported_p
/* vec_perm support. */
-#undef TARGET_VECTORIZE_VEC_PERM_CONST_OK
-#define TARGET_VECTORIZE_VEC_PERM_CONST_OK \
- aarch64_vectorize_vec_perm_const_ok
+#undef TARGET_VECTORIZE_VEC_PERM_CONST
+#define TARGET_VECTORIZE_VEC_PERM_CONST \
+ aarch64_vectorize_vec_perm_const
#undef TARGET_INIT_LIBFUNCS
#define TARGET_INIT_LIBFUNCS aarch64_init_libfuncs
diff --git a/gcc/config/arm/arm-protos.h b/gcc/config/arm/arm-protos.h
index 0c977429c12..24a4ab870c7 100644
--- a/gcc/config/arm/arm-protos.h
+++ b/gcc/config/arm/arm-protos.h
@@ -357,7 +357,6 @@ extern bool arm_validize_comparison (rtx *, rtx *, rtx *);
extern bool arm_gen_setmem (rtx *);
extern void arm_expand_vec_perm (rtx target, rtx op0, rtx op1, rtx sel);
-extern bool arm_expand_vec_perm_const (rtx target, rtx op0, rtx op1, rtx sel);
extern bool arm_autoinc_modes_ok_p (machine_mode, enum arm_auto_incmodes);
diff --git a/gcc/config/arm/arm.c b/gcc/config/arm/arm.c
index 2aa64917e4c..2173d95dd6d 100644
--- a/gcc/config/arm/arm.c
+++ b/gcc/config/arm/arm.c
@@ -290,7 +290,8 @@ static int arm_cortex_a5_branch_cost (bool, bool);
static int arm_cortex_m_branch_cost (bool, bool);
static int arm_cortex_m7_branch_cost (bool, bool);
-static bool arm_vectorize_vec_perm_const_ok (machine_mode, vec_perm_indices);
+static bool arm_vectorize_vec_perm_const (machine_mode, rtx, rtx, rtx,
+ const vec_perm_indices &);
static bool aarch_macro_fusion_pair_p (rtx_insn*, rtx_insn*);
@@ -736,9 +737,8 @@ static const struct attribute_spec arm_attribute_table[] =
#define TARGET_PREFERRED_RENAME_CLASS \
arm_preferred_rename_class
-#undef TARGET_VECTORIZE_VEC_PERM_CONST_OK
-#define TARGET_VECTORIZE_VEC_PERM_CONST_OK \
- arm_vectorize_vec_perm_const_ok
+#undef TARGET_VECTORIZE_VEC_PERM_CONST
+#define TARGET_VECTORIZE_VEC_PERM_CONST arm_vectorize_vec_perm_const
#undef TARGET_VECTORIZE_BUILTIN_VECTORIZATION_COST
#define TARGET_VECTORIZE_BUILTIN_VECTORIZATION_COST \
@@ -29383,28 +29383,31 @@ arm_expand_vec_perm_const_1 (struct expand_vec_perm_d *d)
return false;
}
-/* Expand a vec_perm_const pattern. */
+/* Implement TARGET_VECTORIZE_VEC_PERM_CONST. */
-bool
-arm_expand_vec_perm_const (rtx target, rtx op0, rtx op1, rtx sel)
+static bool
+arm_vectorize_vec_perm_const (machine_mode vmode, rtx target, rtx op0, rtx op1,
+ const vec_perm_indices &sel)
{
struct expand_vec_perm_d d;
int i, nelt, which;
+ if (!VALID_NEON_DREG_MODE (vmode) && !VALID_NEON_QREG_MODE (vmode))
+ return false;
+
d.target = target;
d.op0 = op0;
d.op1 = op1;
- d.vmode = GET_MODE (target);
+ d.vmode = vmode;
gcc_assert (VECTOR_MODE_P (d.vmode));
- d.testing_p = false;
+ d.testing_p = !target;
nelt = GET_MODE_NUNITS (d.vmode);
d.perm.reserve (nelt);
for (i = which = 0; i < nelt; ++i)
{
- rtx e = XVECEXP (sel, 0, i);
- int ei = INTVAL (e) & (2 * nelt - 1);
+ int ei = sel[i] & (2 * nelt - 1);
which |= (ei < nelt ? 1 : 2);
d.perm.quick_push (ei);
}
@@ -29416,7 +29419,7 @@ arm_expand_vec_perm_const (rtx target, rtx op0, rtx op1, rtx sel)
case 3:
d.one_vector_p = false;
- if (!rtx_equal_p (op0, op1))
+ if (d.testing_p || !rtx_equal_p (op0, op1))
break;
/* The elements of PERM do not suggest that only the first operand
@@ -29437,38 +29440,8 @@ arm_expand_vec_perm_const (rtx target, rtx op0, rtx op1, rtx sel)
break;
}
- return arm_expand_vec_perm_const_1 (&d);
-}
-
-/* Implement TARGET_VECTORIZE_VEC_PERM_CONST_OK. */
-
-static bool
-arm_vectorize_vec_perm_const_ok (machine_mode vmode, vec_perm_indices sel)
-{
- struct expand_vec_perm_d d;
- unsigned int i, nelt, which;
- bool ret;
-
- d.vmode = vmode;
- d.testing_p = true;
- d.perm.safe_splice (sel);
-
- /* Categorize the set of elements in the selector. */
- nelt = GET_MODE_NUNITS (d.vmode);
- for (i = which = 0; i < nelt; ++i)
- {
- unsigned int e = d.perm[i];
- gcc_assert (e < 2 * nelt);
- which |= (e < nelt ? 1 : 2);
- }
-
- /* For all elements from second vector, fold the elements to first. */
- if (which == 2)
- for (i = 0; i < nelt; ++i)
- d.perm[i] -= nelt;
-
- /* Check whether the mask can be applied to the vector type. */
- d.one_vector_p = (which != 3);
+ if (d.testing_p)
+ return arm_expand_vec_perm_const_1 (&d);
d.target = gen_raw_REG (d.vmode, LAST_VIRTUAL_REGISTER + 1);
d.op1 = d.op0 = gen_raw_REG (d.vmode, LAST_VIRTUAL_REGISTER + 2);
@@ -29476,7 +29449,7 @@ arm_vectorize_vec_perm_const_ok (machine_mode vmode, vec_perm_indices sel)
d.op1 = gen_raw_REG (d.vmode, LAST_VIRTUAL_REGISTER + 3);
start_sequence ();
- ret = arm_expand_vec_perm_const_1 (&d);
+ bool ret = arm_expand_vec_perm_const_1 (&d);
end_sequence ();
return ret;
diff --git a/gcc/config/arm/vec-common.md b/gcc/config/arm/vec-common.md
index 813341b157f..20ae24fed56 100644
--- a/gcc/config/arm/vec-common.md
+++ b/gcc/config/arm/vec-common.md
@@ -109,35 +109,6 @@
{
})
-(define_expand "vec_perm_const<mode>"
- [(match_operand:VALL 0 "s_register_operand" "")
- (match_operand:VALL 1 "s_register_operand" "")
- (match_operand:VALL 2 "s_register_operand" "")
- (match_operand:<V_cmp_result> 3 "" "")]
- "TARGET_NEON
- || (TARGET_REALLY_IWMMXT && VALID_IWMMXT_REG_MODE (<MODE>mode))"
-{
- if (arm_expand_vec_perm_const (operands[0], operands[1],
- operands[2], operands[3]))
- DONE;
- else
- FAIL;
-})
-
-(define_expand "vec_perm_const<mode>"
- [(match_operand:VH 0 "s_register_operand")
- (match_operand:VH 1 "s_register_operand")
- (match_operand:VH 2 "s_register_operand")
- (match_operand:<V_cmp_result> 3)]
- "TARGET_NEON"
-{
- if (arm_expand_vec_perm_const (operands[0], operands[1],
- operands[2], operands[3]))
- DONE;
- else
- FAIL;
-})
-
(define_expand "vec_perm<mode>"
[(match_operand:VE 0 "s_register_operand" "")
(match_operand:VE 1 "s_register_operand" "")
diff --git a/gcc/config/i386/i386-protos.h b/gcc/config/i386/i386-protos.h
index f5755f0d363..287b0198589 100644
--- a/gcc/config/i386/i386-protos.h
+++ b/gcc/config/i386/i386-protos.h
@@ -133,7 +133,6 @@ extern bool ix86_expand_fp_movcc (rtx[]);
extern bool ix86_expand_fp_vcond (rtx[]);
extern bool ix86_expand_int_vcond (rtx[]);
extern void ix86_expand_vec_perm (rtx[]);
-extern bool ix86_expand_vec_perm_const (rtx[]);
extern bool ix86_expand_mask_vec_cmp (rtx[]);
extern bool ix86_expand_int_vec_cmp (rtx[]);
extern bool ix86_expand_fp_vec_cmp (rtx[]);
diff --git a/gcc/config/i386/i386.c b/gcc/config/i386/i386.c
index 9ff9ca4e37f..1acb2c6ab83 100644
--- a/gcc/config/i386/i386.c
+++ b/gcc/config/i386/i386.c
@@ -47605,9 +47605,8 @@ expand_vec_perm_vpshufb4_vpermq2 (struct expand_vec_perm_d *d)
return true;
}
-/* The guts of ix86_expand_vec_perm_const, also used by the ok hook.
- With all of the interface bits taken care of, perform the expansion
- in D and return true on success. */
+/* The guts of ix86_vectorize_vec_perm_const. With all of the interface bits
+ taken care of, perform the expansion in D and return true on success. */
static bool
ix86_expand_vec_perm_const_1 (struct expand_vec_perm_d *d)
@@ -47742,69 +47741,29 @@ canonicalize_perm (struct expand_vec_perm_d *d)
return (which == 3);
}
-bool
-ix86_expand_vec_perm_const (rtx operands[4])
+/* Implement TARGET_VECTORIZE_VEC_PERM_CONST. */
+
+static bool
+ix86_vectorize_vec_perm_const (machine_mode vmode, rtx target, rtx op0,
+ rtx op1, const vec_perm_indices &sel)
{
struct expand_vec_perm_d d;
unsigned char perm[MAX_VECT_LEN];
- int i, nelt;
+ unsigned int i, nelt, which;
bool two_args;
- rtx sel;
- d.target = operands[0];
- d.op0 = operands[1];
- d.op1 = operands[2];
- sel = operands[3];
+ d.target = target;
+ d.op0 = op0;
+ d.op1 = op1;
- d.vmode = GET_MODE (d.target);
+ d.vmode = vmode;
gcc_assert (VECTOR_MODE_P (d.vmode));
d.nelt = nelt = GET_MODE_NUNITS (d.vmode);
- d.testing_p = false;
+ d.testing_p = !target;
- gcc_assert (GET_CODE (sel) == CONST_VECTOR);
- gcc_assert (XVECLEN (sel, 0) == nelt);
+ gcc_assert (sel.length () == nelt);
gcc_checking_assert (sizeof (d.perm) == sizeof (perm));
- for (i = 0; i < nelt; ++i)
- {
- rtx e = XVECEXP (sel, 0, i);
- int ei = INTVAL (e) & (2 * nelt - 1);
- d.perm[i] = ei;
- perm[i] = ei;
- }
-
- two_args = canonicalize_perm (&d);
-
- if (ix86_expand_vec_perm_const_1 (&d))
- return true;
-
- /* If the selector says both arguments are needed, but the operands are the
- same, the above tried to expand with one_operand_p and flattened selector.
- If that didn't work, retry without one_operand_p; we succeeded with that
- during testing. */
- if (two_args && d.one_operand_p)
- {
- d.one_operand_p = false;
- memcpy (d.perm, perm, sizeof (perm));
- return ix86_expand_vec_perm_const_1 (&d);
- }
-
- return false;
-}
-
-/* Implement targetm.vectorize.vec_perm_const_ok. */
-
-static bool
-ix86_vectorize_vec_perm_const_ok (machine_mode vmode, vec_perm_indices sel)
-{
- struct expand_vec_perm_d d;
- unsigned int i, nelt, which;
- bool ret;
-
- d.vmode = vmode;
- d.nelt = nelt = GET_MODE_NUNITS (d.vmode);
- d.testing_p = true;
-
/* Given sufficient ISA support we can just return true here
for selected vector modes. */
switch (d.vmode)
@@ -47813,17 +47772,23 @@ ix86_vectorize_vec_perm_const_ok (machine_mode vmode, vec_perm_indices sel)
case E_V16SImode:
case E_V8DImode:
case E_V8DFmode:
- if (TARGET_AVX512F)
- /* All implementable with a single vperm[it]2 insn. */
+ if (!TARGET_AVX512F)
+ return false;
+ /* All implementable with a single vperm[it]2 insn. */
+ if (d.testing_p)
return true;
break;
case E_V32HImode:
- if (TARGET_AVX512BW)
+ if (!TARGET_AVX512BW)
+ return false;
+ if (d.testing_p)
/* All implementable with a single vperm[it]2 insn. */
return true;
break;
case E_V64QImode:
- if (TARGET_AVX512BW)
+ if (!TARGET_AVX512BW)
+ return false;
+ if (d.testing_p)
/* Implementable with 2 vperm[it]2, 2 vpshufb and 1 or insn. */
return true;
break;
@@ -47831,73 +47796,108 @@ ix86_vectorize_vec_perm_const_ok (machine_mode vmode, vec_perm_indices sel)
case E_V8SFmode:
case E_V4DFmode:
case E_V4DImode:
- if (TARGET_AVX512VL)
+ if (!TARGET_AVX)
+ return false;
+ if (d.testing_p && TARGET_AVX512VL)
/* All implementable with a single vperm[it]2 insn. */
return true;
break;
case E_V16HImode:
- if (TARGET_AVX2)
+ if (!TARGET_SSE2)
+ return false;
+ if (d.testing_p && TARGET_AVX2)
/* Implementable with 4 vpshufb insns, 2 vpermq and 3 vpor insns. */
return true;
break;
case E_V32QImode:
- if (TARGET_AVX2)
+ if (!TARGET_SSE2)
+ return false;
+ if (d.testing_p && TARGET_AVX2)
/* Implementable with 4 vpshufb insns, 2 vpermq and 3 vpor insns. */
return true;
break;
- case E_V4SImode:
- case E_V4SFmode:
case E_V8HImode:
case E_V16QImode:
+ if (!TARGET_SSE2)
+ return false;
+ /* Fall through. */
+ case E_V4SImode:
+ case E_V4SFmode:
+ if (!TARGET_SSE)
+ return false;
/* All implementable with a single vpperm insn. */
- if (TARGET_XOP)
+ if (d.testing_p && TARGET_XOP)
return true;
/* All implementable with 2 pshufb + 1 ior. */
- if (TARGET_SSSE3)
+ if (d.testing_p && TARGET_SSSE3)
return true;
break;
case E_V2DImode:
case E_V2DFmode:
+ if (!TARGET_SSE)
+ return false;
/* All implementable with shufpd or unpck[lh]pd. */
- return true;
+ if (d.testing_p)
+ return true;
+ break;
default:
return false;
}
- /* Extract the values from the vector CST into the permutation
- array in D. */
for (i = which = 0; i < nelt; ++i)
{
unsigned char e = sel[i];
gcc_assert (e < 2 * nelt);
d.perm[i] = e;
+ perm[i] = e;
which |= (e < nelt ? 1 : 2);
}
- /* For all elements from second vector, fold the elements to first. */
- if (which == 2)
- for (i = 0; i < nelt; ++i)
- d.perm[i] -= nelt;
+ if (d.testing_p)
+ {
+ /* For all elements from second vector, fold the elements to first. */
+ if (which == 2)
+ for (i = 0; i < nelt; ++i)
+ d.perm[i] -= nelt;
- /* Check whether the mask can be applied to the vector type. */
- d.one_operand_p = (which != 3);
+ /* Check whether the mask can be applied to the vector type. */
+ d.one_operand_p = (which != 3);
- /* Implementable with shufps or pshufd. */
- if (d.one_operand_p && (d.vmode == V4SFmode || d.vmode == V4SImode))
- return true;
+ /* Implementable with shufps or pshufd. */
+ if (d.one_operand_p && (d.vmode == V4SFmode || d.vmode == V4SImode))
+ return true;
- /* Otherwise we have to go through the motions and see if we can
- figure out how to generate the requested permutation. */
- d.target = gen_raw_REG (d.vmode, LAST_VIRTUAL_REGISTER + 1);
- d.op1 = d.op0 = gen_raw_REG (d.vmode, LAST_VIRTUAL_REGISTER + 2);
- if (!d.one_operand_p)
- d.op1 = gen_raw_REG (d.vmode, LAST_VIRTUAL_REGISTER + 3);
+ /* Otherwise we have to go through the motions and see if we can
+ figure out how to generate the requested permutation. */
+ d.target = gen_raw_REG (d.vmode, LAST_VIRTUAL_REGISTER + 1);
+ d.op1 = d.op0 = gen_raw_REG (d.vmode, LAST_VIRTUAL_REGISTER + 2);
+ if (!d.one_operand_p)
+ d.op1 = gen_raw_REG (d.vmode, LAST_VIRTUAL_REGISTER + 3);
- start_sequence ();
- ret = ix86_expand_vec_perm_const_1 (&d);
- end_sequence ();
+ start_sequence ();
+ bool ret = ix86_expand_vec_perm_const_1 (&d);
+ end_sequence ();
- return ret;
+ return ret;
+ }
+
+ two_args = canonicalize_perm (&d);
+
+ if (ix86_expand_vec_perm_const_1 (&d))
+ return true;
+
+ /* If the selector says both arguments are needed, but the operands are the
+ same, the above tried to expand with one_operand_p and flattened selector.
+ If that didn't work, retry without one_operand_p; we succeeded with that
+ during testing. */
+ if (two_args && d.one_operand_p)
+ {
+ d.one_operand_p = false;
+ memcpy (d.perm, perm, sizeof (perm));
+ return ix86_expand_vec_perm_const_1 (&d);
+ }
+
+ return false;
}
void
@@ -50549,9 +50549,8 @@ ix86_run_selftests (void)
#undef TARGET_VECTORIZE_BUILTIN_VECTORIZATION_COST
#define TARGET_VECTORIZE_BUILTIN_VECTORIZATION_COST \
ix86_builtin_vectorization_cost
-#undef TARGET_VECTORIZE_VEC_PERM_CONST_OK
-#define TARGET_VECTORIZE_VEC_PERM_CONST_OK \
- ix86_vectorize_vec_perm_const_ok
+#undef TARGET_VECTORIZE_VEC_PERM_CONST
+#define TARGET_VECTORIZE_VEC_PERM_CONST ix86_vectorize_vec_perm_const
#undef TARGET_VECTORIZE_PREFERRED_SIMD_MODE
#define TARGET_VECTORIZE_PREFERRED_SIMD_MODE \
ix86_preferred_simd_mode
diff --git a/gcc/config/i386/sse.md b/gcc/config/i386/sse.md
index 105b5cf6092..76c150fe8ec 100644
--- a/gcc/config/i386/sse.md
+++ b/gcc/config/i386/sse.md
@@ -11498,30 +11498,6 @@
DONE;
})
-(define_mode_iterator VEC_PERM_CONST
- [(V4SF "TARGET_SSE") (V4SI "TARGET_SSE")
- (V2DF "TARGET_SSE") (V2DI "TARGET_SSE")
- (V16QI "TARGET_SSE2") (V8HI "TARGET_SSE2")
- (V8SF "TARGET_AVX") (V4DF "TARGET_AVX")
- (V8SI "TARGET_AVX") (V4DI "TARGET_AVX")
- (V32QI "TARGET_AVX2") (V16HI "TARGET_AVX2")
- (V16SI "TARGET_AVX512F") (V8DI "TARGET_AVX512F")
- (V16SF "TARGET_AVX512F") (V8DF "TARGET_AVX512F")
- (V32HI "TARGET_AVX512BW") (V64QI "TARGET_AVX512BW")])
-
-(define_expand "vec_perm_const<mode>"
- [(match_operand:VEC_PERM_CONST 0 "register_operand")
- (match_operand:VEC_PERM_CONST 1 "register_operand")
- (match_operand:VEC_PERM_CONST 2 "register_operand")
- (match_operand:<sseintvecmode> 3)]
- ""
-{
- if (ix86_expand_vec_perm_const (operands))
- DONE;
- else
- FAIL;
-})
-
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
;;
;; Parallel bitwise logical operations
diff --git a/gcc/config/ia64/ia64-protos.h b/gcc/config/ia64/ia64-protos.h
index cbabbd3b757..71e55e47557 100644
--- a/gcc/config/ia64/ia64-protos.h
+++ b/gcc/config/ia64/ia64-protos.h
@@ -62,7 +62,6 @@ extern const char *get_bundle_name (int);
extern const char *output_probe_stack_range (rtx, rtx);
extern void ia64_expand_vec_perm_even_odd (rtx, rtx, rtx, int);
-extern bool ia64_expand_vec_perm_const (rtx op[4]);
extern void ia64_expand_vec_setv2sf (rtx op[3]);
#endif /* RTX_CODE */
diff --git a/gcc/config/ia64/ia64.c b/gcc/config/ia64/ia64.c
index f99bea98d21..d2ce1a49fb9 100644
--- a/gcc/config/ia64/ia64.c
+++ b/gcc/config/ia64/ia64.c
@@ -335,7 +335,8 @@ static fixed_size_mode ia64_get_reg_raw_mode (int regno);
static section * ia64_hpux_function_section (tree, enum node_frequency,
bool, bool);
-static bool ia64_vectorize_vec_perm_const_ok (machine_mode, vec_perm_indices);
+static bool ia64_vectorize_vec_perm_const (machine_mode, rtx, rtx, rtx,
+ const vec_perm_indices &);
static unsigned int ia64_hard_regno_nregs (unsigned int, machine_mode);
static bool ia64_hard_regno_mode_ok (unsigned int, machine_mode);
@@ -654,8 +655,8 @@ static const struct attribute_spec ia64_attribute_table[] =
#undef TARGET_DELAY_VARTRACK
#define TARGET_DELAY_VARTRACK true
-#undef TARGET_VECTORIZE_VEC_PERM_CONST_OK
-#define TARGET_VECTORIZE_VEC_PERM_CONST_OK ia64_vectorize_vec_perm_const_ok
+#undef TARGET_VECTORIZE_VEC_PERM_CONST
+#define TARGET_VECTORIZE_VEC_PERM_CONST ia64_vectorize_vec_perm_const
#undef TARGET_ATTRIBUTE_TAKES_IDENTIFIER_P
#define TARGET_ATTRIBUTE_TAKES_IDENTIFIER_P ia64_attribute_takes_identifier_p
@@ -11743,32 +11744,31 @@ ia64_expand_vec_perm_const_1 (struct expand_vec_perm_d *d)
return false;
}
-bool
-ia64_expand_vec_perm_const (rtx operands[4])
+/* Implement TARGET_VECTORIZE_VEC_PERM_CONST. */
+
+static bool
+ia64_vectorize_vec_perm_const (machine_mode vmode, rtx target, rtx op0,
+ rtx op1, const vec_perm_indices &sel)
{
struct expand_vec_perm_d d;
unsigned char perm[MAX_VECT_LEN];
- int i, nelt, which;
- rtx sel;
+ unsigned int i, nelt, which;
- d.target = operands[0];
- d.op0 = operands[1];
- d.op1 = operands[2];
- sel = operands[3];
+ d.target = target;
+ d.op0 = op0;
+ d.op1 = op1;
- d.vmode = GET_MODE (d.target);
+ d.vmode = vmode;
gcc_assert (VECTOR_MODE_P (d.vmode));
d.nelt = nelt = GET_MODE_NUNITS (d.vmode);
- d.testing_p = false;
+ d.testing_p = !target;
- gcc_assert (GET_CODE (sel) == CONST_VECTOR);
- gcc_assert (XVECLEN (sel, 0) == nelt);
+ gcc_assert (sel.length () == nelt);
gcc_checking_assert (sizeof (d.perm) == sizeof (perm));
for (i = which = 0; i < nelt; ++i)
{
- rtx e = XVECEXP (sel, 0, i);
- int ei = INTVAL (e) & (2 * nelt - 1);
+ unsigned int ei = sel[i] & (2 * nelt - 1);
which |= (ei < nelt ? 1 : 2);
d.perm[i] = ei;
@@ -11781,7 +11781,7 @@ ia64_expand_vec_perm_const (rtx operands[4])
gcc_unreachable();
case 3:
- if (!rtx_equal_p (d.op0, d.op1))
+ if (d.testing_p || !rtx_equal_p (d.op0, d.op1))
{
d.one_operand_p = false;
break;
@@ -11809,6 +11809,22 @@ ia64_expand_vec_perm_const (rtx operands[4])
break;
}
+ if (d.testing_p)
+ {
+ /* We have to go through the motions and see if we can
+ figure out how to generate the requested permutation. */
+ d.target = gen_raw_REG (d.vmode, LAST_VIRTUAL_REGISTER + 1);
+ d.op1 = d.op0 = gen_raw_REG (d.vmode, LAST_VIRTUAL_REGISTER + 2);
+ if (!d.one_operand_p)
+ d.op1 = gen_raw_REG (d.vmode, LAST_VIRTUAL_REGISTER + 3);
+
+ start_sequence ();
+ bool ret = ia64_expand_vec_perm_const_1 (&d);
+ end_sequence ();
+
+ return ret;
+ }
+
if (ia64_expand_vec_perm_const_1 (&d))
return true;
@@ -11825,51 +11841,6 @@ ia64_expand_vec_perm_const (rtx operands[4])
return false;
}
-/* Implement targetm.vectorize.vec_perm_const_ok. */
-
-static bool
-ia64_vectorize_vec_perm_const_ok (machine_mode vmode, vec_perm_indices sel)
-{
- struct expand_vec_perm_d d;
- unsigned int i, nelt, which;
- bool ret;
-
- d.vmode = vmode;
- d.nelt = nelt = GET_MODE_NUNITS (d.vmode);
- d.testing_p = true;
-
- /* Extract the values from the vector CST into the permutation
- array in D. */
- for (i = which = 0; i < nelt; ++i)
- {
- unsigned char e = sel[i];
- d.perm[i] = e;
- gcc_assert (e < 2 * nelt);
- which |= (e < nelt ? 1 : 2);
- }
-
- /* For all elements from second vector, fold the elements to first. */
- if (which == 2)
- for (i = 0; i < nelt; ++i)
- d.perm[i] -= nelt;
-
- /* Check whether the mask can be applied to the vector type. */
- d.one_operand_p = (which != 3);
-
- /* Otherwise we have to go through the motions and see if we can
- figure out how to generate the requested permutation. */
- d.target = gen_raw_REG (d.vmode, LAST_VIRTUAL_REGISTER + 1);
- d.op1 = d.op0 = gen_raw_REG (d.vmode, LAST_VIRTUAL_REGISTER + 2);
- if (!d.one_operand_p)
- d.op1 = gen_raw_REG (d.vmode, LAST_VIRTUAL_REGISTER + 3);
-
- start_sequence ();
- ret = ia64_expand_vec_perm_const_1 (&d);
- end_sequence ();
-
- return ret;
-}
-
void
ia64_expand_vec_setv2sf (rtx operands[3])
{
diff --git a/gcc/config/ia64/vect.md b/gcc/config/ia64/vect.md
index 20e260ccfba..68ac05c0e8a 100644
--- a/gcc/config/ia64/vect.md
+++ b/gcc/config/ia64/vect.md
@@ -1549,19 +1549,6 @@
DONE;
})
-(define_expand "vec_perm_const<mode>"
- [(match_operand:VEC 0 "register_operand" "")
- (match_operand:VEC 1 "register_operand" "")
- (match_operand:VEC 2 "register_operand" "")
- (match_operand:<vecint> 3 "" "")]
- ""
-{
- if (ia64_expand_vec_perm_const (operands))
- DONE;
- else
- FAIL;
-})
-
;; Missing operations
;; fprcpa
;; fpsqrta
diff --git a/gcc/config/mips/loongson.md b/gcc/config/mips/loongson.md
index b48dfa0dc71..c75ce2cca5b 100644
--- a/gcc/config/mips/loongson.md
+++ b/gcc/config/mips/loongson.md
@@ -784,19 +784,6 @@
"punpcklwd\t%0,%1,%2"
[(set_attr "type" "fcvt")])
-(define_expand "vec_perm_const<mode>"
- [(match_operand:VWHB 0 "register_operand" "")
- (match_operand:VWHB 1 "register_operand" "")
- (match_operand:VWHB 2 "register_operand" "")
- (match_operand:VWHB 3 "" "")]
- "TARGET_HARD_FLOAT && TARGET_LOONGSON_VECTORS"
-{
- if (mips_expand_vec_perm_const (operands))
- DONE;
- else
- FAIL;
-})
-
(define_expand "vec_unpacks_lo_<mode>"
[(match_operand:<V_stretch_half> 0 "register_operand" "")
(match_operand:VHB 1 "register_operand" "")]
diff --git a/gcc/config/mips/mips-msa.md b/gcc/config/mips/mips-msa.md
index 87d889d7296..73f38d3f5d4 100644
--- a/gcc/config/mips/mips-msa.md
+++ b/gcc/config/mips/mips-msa.md
@@ -558,19 +558,6 @@
[(set_attr "type" "simd_copy")
(set_attr "mode" "<MODE>")])
-(define_expand "vec_perm_const<mode>"
- [(match_operand:MSA 0 "register_operand")
- (match_operand:MSA 1 "register_operand")
- (match_operand:MSA 2 "register_operand")
- (match_operand:<VIMODE> 3 "")]
- "ISA_HAS_MSA"
-{
- if (mips_expand_vec_perm_const (operands))
- DONE;
- else
- FAIL;
-})
-
(define_expand "abs<mode>2"
[(match_operand:IMSA 0 "register_operand" "=f")
(abs:IMSA (match_operand:IMSA 1 "register_operand" "f"))]
diff --git a/gcc/config/mips/mips-protos.h b/gcc/config/mips/mips-protos.h
index 1c4167a836a..8eab7c58114 100644
--- a/gcc/config/mips/mips-protos.h
+++ b/gcc/config/mips/mips-protos.h
@@ -348,7 +348,6 @@ extern void mips_expand_atomic_qihi (union mips_gen_fn_ptrs,
rtx, rtx, rtx, rtx);
extern void mips_expand_vector_init (rtx, rtx);
-extern bool mips_expand_vec_perm_const (rtx op[4]);
extern void mips_expand_vec_unpack (rtx op[2], bool, bool);
extern void mips_expand_vec_reduc (rtx, rtx, rtx (*)(rtx, rtx, rtx));
extern void mips_expand_vec_minmax (rtx, rtx, rtx,
diff --git a/gcc/config/mips/mips-ps-3d.md b/gcc/config/mips/mips-ps-3d.md
index 81820b13b11..05d58e9968f 100644
--- a/gcc/config/mips/mips-ps-3d.md
+++ b/gcc/config/mips/mips-ps-3d.md
@@ -164,19 +164,6 @@
[(set_attr "type" "fmove")
(set_attr "mode" "SF")])
-(define_expand "vec_perm_constv2sf"
- [(match_operand:V2SF 0 "register_operand" "")
- (match_operand:V2SF 1 "register_operand" "")
- (match_operand:V2SF 2 "register_operand" "")
- (match_operand:V2SI 3 "" "")]
- "TARGET_HARD_FLOAT && TARGET_PAIRED_SINGLE_FLOAT"
-{
- if (mips_expand_vec_perm_const (operands))
- DONE;
- else
- FAIL;
-})
-
;; Expanders for builtins. The instruction:
;;
;; P[UL][UL].PS <result>, <a>, <b>
diff --git a/gcc/config/mips/mips.c b/gcc/config/mips/mips.c
index 8f2f6e09824..966e7ce0891 100644
--- a/gcc/config/mips/mips.c
+++ b/gcc/config/mips/mips.c
@@ -21379,34 +21379,32 @@ mips_expand_vec_perm_const_1 (struct expand_vec_perm_d *d)
return false;
}
-/* Expand a vec_perm_const pattern. */
+/* Implement TARGET_VECTORIZE_VEC_PERM_CONST. */
-bool
-mips_expand_vec_perm_const (rtx operands[4])
+static bool
+mips_vectorize_vec_perm_const (machine_mode vmode, rtx target, rtx op0,
+ rtx op1, const vec_perm_indices &sel)
{
struct expand_vec_perm_d d;
int i, nelt, which;
unsigned char orig_perm[MAX_VECT_LEN];
- rtx sel;
bool ok;
- d.target = operands[0];
- d.op0 = operands[1];
- d.op1 = operands[2];
- sel = operands[3];
+ d.target = target;
+ d.op0 = op0;
+ d.op1 = op1;
- d.vmode = GET_MODE (d.target);
- gcc_assert (VECTOR_MODE_P (d.vmode));
- d.nelt = nelt = GET_MODE_NUNITS (d.vmode);
- d.testing_p = false;
+ d.vmode = vmode;
+ gcc_assert (VECTOR_MODE_P (vmode));
+ d.nelt = nelt = GET_MODE_NUNITS (vmode);
+ d.testing_p = !target;
/* This is overly conservative, but ensures we don't get an
uninitialized warning on ORIG_PERM. */
memset (orig_perm, 0, MAX_VECT_LEN);
for (i = which = 0; i < nelt; ++i)
{
- rtx e = XVECEXP (sel, 0, i);
- int ei = INTVAL (e) & (2 * nelt - 1);
+ int ei = sel[i] & (2 * nelt - 1);
which |= (ei < nelt ? 1 : 2);
orig_perm[i] = ei;
}
@@ -21419,7 +21417,7 @@ mips_expand_vec_perm_const (rtx operands[4])
case 3:
d.one_vector_p = false;
- if (!rtx_equal_p (d.op0, d.op1))
+ if (d.testing_p || !rtx_equal_p (d.op0, d.op1))
break;
/* FALLTHRU */
@@ -21436,6 +21434,19 @@ mips_expand_vec_perm_const (rtx operands[4])
break;
}
+ if (d.testing_p)
+ {
+ d.target = gen_raw_REG (d.vmode, LAST_VIRTUAL_REGISTER + 1);
+ d.op1 = d.op0 = gen_raw_REG (d.vmode, LAST_VIRTUAL_REGISTER + 2);
+ if (!d.one_vector_p)
+ d.op1 = gen_raw_REG (d.vmode, LAST_VIRTUAL_REGISTER + 3);
+
+ start_sequence ();
+ ok = mips_expand_vec_perm_const_1 (&d);
+ end_sequence ();
+ return ok;
+ }
+
ok = mips_expand_vec_perm_const_1 (&d);
/* If we were given a two-vector permutation which just happened to
@@ -21447,8 +21458,8 @@ mips_expand_vec_perm_const (rtx operands[4])
the original permutation. */
if (!ok && which == 3)
{
- d.op0 = operands[1];
- d.op1 = operands[2];
+ d.op0 = op0;
+ d.op1 = op1;
d.one_vector_p = false;
memcpy (d.perm, orig_perm, MAX_VECT_LEN);
ok = mips_expand_vec_perm_const_1 (&d);
@@ -21468,48 +21479,6 @@ mips_sched_reassociation_width (unsigned int opc ATTRIBUTE_UNUSED,
return 1;
}
-/* Implement TARGET_VECTORIZE_VEC_PERM_CONST_OK. */
-
-static bool
-mips_vectorize_vec_perm_const_ok (machine_mode vmode, vec_perm_indices sel)
-{
- struct expand_vec_perm_d d;
- unsigned int i, nelt, which;
- bool ret;
-
- d.vmode = vmode;
- d.nelt = nelt = GET_MODE_NUNITS (d.vmode);
- d.testing_p = true;
-
- /* Categorize the set of elements in the selector. */
- for (i = which = 0; i < nelt; ++i)
- {
- unsigned char e = sel[i];
- d.perm[i] = e;
- gcc_assert (e < 2 * nelt);
- which |= (e < nelt ? 1 : 2);
- }
-
- /* For all elements from second vector, fold the elements to first. */
- if (which == 2)
- for (i = 0; i < nelt; ++i)
- d.perm[i] -= nelt;
-
- /* Check whether the mask can be applied to the vector type. */
- d.one_vector_p = (which != 3);
-
- d.target = gen_raw_REG (d.vmode, LAST_VIRTUAL_REGISTER + 1);
- d.op1 = d.op0 = gen_raw_REG (d.vmode, LAST_VIRTUAL_REGISTER + 2);
- if (!d.one_vector_p)
- d.op1 = gen_raw_REG (d.vmode, LAST_VIRTUAL_REGISTER + 3);
-
- start_sequence ();
- ret = mips_expand_vec_perm_const_1 (&d);
- end_sequence ();
-
- return ret;
-}
-
/* Expand an integral vector unpack operation. */
void
@@ -22591,8 +22560,8 @@ mips_starting_frame_offset (void)
#undef TARGET_PREPARE_PCH_SAVE
#define TARGET_PREPARE_PCH_SAVE mips_prepare_pch_save
-#undef TARGET_VECTORIZE_VEC_PERM_CONST_OK
-#define TARGET_VECTORIZE_VEC_PERM_CONST_OK mips_vectorize_vec_perm_const_ok
+#undef TARGET_VECTORIZE_VEC_PERM_CONST
+#define TARGET_VECTORIZE_VEC_PERM_CONST mips_vectorize_vec_perm_const
#undef TARGET_SCHED_REASSOCIATION_WIDTH
#define TARGET_SCHED_REASSOCIATION_WIDTH mips_sched_reassociation_width
diff --git a/gcc/config/powerpcspe/altivec.md b/gcc/config/powerpcspe/altivec.md
index 81373f581d1..2f85e369c3e 100644
--- a/gcc/config/powerpcspe/altivec.md
+++ b/gcc/config/powerpcspe/altivec.md
@@ -2080,19 +2080,6 @@
}
})
-(define_expand "vec_perm_constv16qi"
- [(match_operand:V16QI 0 "register_operand" "")
- (match_operand:V16QI 1 "register_operand" "")
- (match_operand:V16QI 2 "register_operand" "")
- (match_operand:V16QI 3 "" "")]
- "TARGET_ALTIVEC"
-{
- if (altivec_expand_vec_perm_const (operands))
- DONE;
- else
- FAIL;
-})
-
(define_insn "*altivec_vpermr_<mode>_internal"
[(set (match_operand:VM 0 "register_operand" "=v,?wo")
(unspec:VM [(match_operand:VM 1 "register_operand" "v,wo")
diff --git a/gcc/config/powerpcspe/paired.md b/gcc/config/powerpcspe/paired.md
index e12f07fc9b8..e950e465861 100644
--- a/gcc/config/powerpcspe/paired.md
+++ b/gcc/config/powerpcspe/paired.md
@@ -313,19 +313,6 @@
"ps_merge11 %0, %1, %2"
[(set_attr "type" "fp")])
-(define_expand "vec_perm_constv2sf"
- [(match_operand:V2SF 0 "gpc_reg_operand" "")
- (match_operand:V2SF 1 "gpc_reg_operand" "")
- (match_operand:V2SF 2 "gpc_reg_operand" "")
- (match_operand:V2SI 3 "" "")]
- "TARGET_PAIRED_FLOAT"
-{
- if (rs6000_expand_vec_perm_const (operands))
- DONE;
- else
- FAIL;
-})
-
(define_insn "paired_sum0"
[(set (match_operand:V2SF 0 "gpc_reg_operand" "=f")
(vec_concat:V2SF (plus:SF (vec_select:SF
diff --git a/gcc/config/powerpcspe/powerpcspe-protos.h b/gcc/config/powerpcspe/powerpcspe-protos.h
index 78baeecad38..b9baae8a680 100644
--- a/gcc/config/powerpcspe/powerpcspe-protos.h
+++ b/gcc/config/powerpcspe/powerpcspe-protos.h
@@ -64,9 +64,7 @@ extern void rs6000_expand_vector_extract (rtx, rtx, rtx);
extern void rs6000_split_vec_extract_var (rtx, rtx, rtx, rtx, rtx);
extern rtx rs6000_adjust_vec_address (rtx, rtx, rtx, rtx, machine_mode);
extern void rs6000_split_v4si_init (rtx []);
-extern bool altivec_expand_vec_perm_const (rtx op[4]);
extern void altivec_expand_vec_perm_le (rtx op[4]);
-extern bool rs6000_expand_vec_perm_const (rtx op[4]);
extern void altivec_expand_lvx_be (rtx, rtx, machine_mode, unsigned);
extern void altivec_expand_stvx_be (rtx, rtx, machine_mode, unsigned);
extern void altivec_expand_stvex_be (rtx, rtx, machine_mode, unsigned);
diff --git a/gcc/config/powerpcspe/powerpcspe.c b/gcc/config/powerpcspe/powerpcspe.c
index bf90cc5cd7d..9133125a3ea 100644
--- a/gcc/config/powerpcspe/powerpcspe.c
+++ b/gcc/config/powerpcspe/powerpcspe.c
@@ -1938,8 +1938,8 @@ static const struct attribute_spec rs6000_attribute_table[] =
#undef TARGET_LEGITIMATE_CONSTANT_P
#define TARGET_LEGITIMATE_CONSTANT_P rs6000_legitimate_constant_p
-#undef TARGET_VECTORIZE_VEC_PERM_CONST_OK
-#define TARGET_VECTORIZE_VEC_PERM_CONST_OK rs6000_vectorize_vec_perm_const_ok
+#undef TARGET_VECTORIZE_VEC_PERM_CONST
+#define TARGET_VECTORIZE_VEC_PERM_CONST rs6000_vectorize_vec_perm_const
#undef TARGET_CAN_USE_DOLOOP_P
#define TARGET_CAN_USE_DOLOOP_P can_use_doloop_if_innermost
@@ -38313,6 +38313,9 @@ rs6000_emit_parity (rtx dst, rtx src)
}
/* Expand an Altivec constant permutation for little endian mode.
+ OP0 and OP1 are the input vectors and TARGET is the output vector.
+ SEL specifies the constant permutation vector.
+
There are two issues: First, the two input operands must be
swapped so that together they form a double-wide array in LE
order. Second, the vperm instruction has surprising behavior
@@ -38354,22 +38357,18 @@ rs6000_emit_parity (rtx dst, rtx src)
vr9 = 00000006 00000004 00000002 00000000. */
-void
-altivec_expand_vec_perm_const_le (rtx operands[4])
+static void
+altivec_expand_vec_perm_const_le (rtx target, rtx op0, rtx op1,
+ const vec_perm_indices &sel)
{
unsigned int i;
rtx perm[16];
rtx constv, unspec;
- rtx target = operands[0];
- rtx op0 = operands[1];
- rtx op1 = operands[2];
- rtx sel = operands[3];
/* Unpack and adjust the constant selector. */
for (i = 0; i < 16; ++i)
{
- rtx e = XVECEXP (sel, 0, i);
- unsigned int elt = 31 - (INTVAL (e) & 31);
+ unsigned int elt = 31 - (sel[i] & 31);
perm[i] = GEN_INT (elt);
}
@@ -38451,10 +38450,14 @@ altivec_expand_vec_perm_le (rtx operands[4])
}
/* Expand an Altivec constant permutation. Return true if we match
- an efficient implementation; false to fall back to VPERM. */
+ an efficient implementation; false to fall back to VPERM.
-bool
-altivec_expand_vec_perm_const (rtx operands[4])
+ OP0 and OP1 are the input vectors and TARGET is the output vector.
+ SEL specifies the constant permutation vector. */
+
+static bool
+altivec_expand_vec_perm_const (rtx target, rtx op0, rtx op1,
+ const vec_perm_indices &sel)
{
struct altivec_perm_insn {
HOST_WIDE_INT mask;
@@ -38498,19 +38501,13 @@ altivec_expand_vec_perm_const (rtx operands[4])
unsigned int i, j, elt, which;
unsigned char perm[16];
- rtx target, op0, op1, sel, x;
+ rtx x;
bool one_vec;
- target = operands[0];
- op0 = operands[1];
- op1 = operands[2];
- sel = operands[3];
-
/* Unpack the constant selector. */
for (i = which = 0; i < 16; ++i)
{
- rtx e = XVECEXP (sel, 0, i);
- elt = INTVAL (e) & 31;
+ elt = sel[i] & 31;
which |= (elt < 16 ? 1 : 2);
perm[i] = elt;
}
@@ -38666,7 +38663,7 @@ altivec_expand_vec_perm_const (rtx operands[4])
if (!BYTES_BIG_ENDIAN)
{
- altivec_expand_vec_perm_const_le (operands);
+ altivec_expand_vec_perm_const_le (target, op0, op1, sel);
return true;
}
@@ -38726,60 +38723,54 @@ rs6000_expand_vec_perm_const_1 (rtx target, rtx op0, rtx op1,
return true;
}
-bool
-rs6000_expand_vec_perm_const (rtx operands[4])
-{
- rtx target, op0, op1, sel;
- unsigned char perm0, perm1;
-
- target = operands[0];
- op0 = operands[1];
- op1 = operands[2];
- sel = operands[3];
-
- /* Unpack the constant selector. */
- perm0 = INTVAL (XVECEXP (sel, 0, 0)) & 3;
- perm1 = INTVAL (XVECEXP (sel, 0, 1)) & 3;
-
- return rs6000_expand_vec_perm_const_1 (target, op0, op1, perm0, perm1);
-}
-
-/* Test whether a constant permutation is supported. */
+/* Implement TARGET_VECTORIZE_VEC_PERM_CONST. */
static bool
-rs6000_vectorize_vec_perm_const_ok (machine_mode vmode, vec_perm_indices sel)
+rs6000_vectorize_vec_perm_const (machine_mode vmode, rtx target, rtx op0,
+ rtx op1, const vec_perm_indices &sel)
{
+ bool testing_p = !target;
+
/* AltiVec (and thus VSX) can handle arbitrary permutations. */
- if (TARGET_ALTIVEC)
+ if (TARGET_ALTIVEC && testing_p)
return true;
- /* Check for ps_merge* or evmerge* insns. */
- if ((TARGET_PAIRED_FLOAT && vmode == V2SFmode)
- || (TARGET_SPE && vmode == V2SImode))
+ /* Check for ps_merge*, evmerge* or xxperm* insns. */
+ if ((vmode == V2SFmode && TARGET_PAIRED_FLOAT)
+ || (vmode == V2SImode && TARGET_SPE)
+ || ((vmode == V2DFmode || vmode == V2DImode)
+ && VECTOR_MEM_VSX_P (vmode)))
+ {
+ if (testing_p)
+ {
+ op0 = gen_raw_REG (vmode, LAST_VIRTUAL_REGISTER + 1);
+ op1 = gen_raw_REG (vmode, LAST_VIRTUAL_REGISTER + 2);
+ }
+ if (rs6000_expand_vec_perm_const_1 (target, op0, op1, sel[0], sel[1]))
+ return true;
+ }
+
+ if (TARGET_ALTIVEC)
{
- rtx op0 = gen_raw_REG (vmode, LAST_VIRTUAL_REGISTER + 1);
- rtx op1 = gen_raw_REG (vmode, LAST_VIRTUAL_REGISTER + 2);
- return rs6000_expand_vec_perm_const_1 (NULL, op0, op1, sel[0], sel[1]);
+ /* Force the target-independent code to lower to V16QImode. */
+ if (vmode != V16QImode)
+ return false;
+ if (altivec_expand_vec_perm_const (target, op0, op1, sel))
+ return true;
}
return false;
}
-/* A subroutine for rs6000_expand_extract_even & rs6000_expand_interleave. */
+/* A subroutine for rs6000_expand_extract_even & rs6000_expand_interleave.
+ OP0 and OP1 are the input vectors and TARGET is the output vector.
+ PERM specifies the constant permutation vector. */
static void
rs6000_do_expand_vec_perm (rtx target, rtx op0, rtx op1,
- machine_mode vmode, unsigned nelt, rtx perm[])
+ machine_mode vmode, const vec_perm_builder &perm)
{
- machine_mode imode;
- rtx x;
-
- imode = vmode;
- if (GET_MODE_CLASS (vmode) != MODE_VECTOR_INT)
- imode = mode_for_int_vector (vmode).require ();
-
- x = gen_rtx_CONST_VECTOR (imode, gen_rtvec_v (nelt, perm));
- x = expand_vec_perm (vmode, op0, op1, x, target);
+ rtx x = expand_vec_perm_const (vmode, op0, op1, perm, BLKmode, target);
if (x != target)
emit_move_insn (target, x);
}
@@ -38791,12 +38782,12 @@ rs6000_expand_extract_even (rtx target, rtx op0, rtx op1)
{
machine_mode vmode = GET_MODE (target);
unsigned i, nelt = GET_MODE_NUNITS (vmode);
- rtx perm[16];
+ vec_perm_builder perm (nelt);
for (i = 0; i < nelt; i++)
- perm[i] = GEN_INT (i * 2);
+ perm.quick_push (i * 2);
- rs6000_do_expand_vec_perm (target, op0, op1, vmode, nelt, perm);
+ rs6000_do_expand_vec_perm (target, op0, op1, vmode, perm);
}
/* Expand a vector interleave operation. */
@@ -38806,16 +38797,16 @@ rs6000_expand_interleave (rtx target, rtx op0, rtx op1, bool highp)
{
machine_mode vmode = GET_MODE (target);
unsigned i, high, nelt = GET_MODE_NUNITS (vmode);
- rtx perm[16];
+ vec_perm_builder perm (nelt);
high = (highp ? 0 : nelt / 2);
for (i = 0; i < nelt / 2; i++)
{
- perm[i * 2] = GEN_INT (i + high);
- perm[i * 2 + 1] = GEN_INT (i + nelt + high);
+ perm.quick_push (i + high);
+ perm.quick_push (i + nelt + high);
}
- rs6000_do_expand_vec_perm (target, op0, op1, vmode, nelt, perm);
+ rs6000_do_expand_vec_perm (target, op0, op1, vmode, perm);
}
/* Scale a V2DF vector SRC by two to the SCALE and place in TGT. */
diff --git a/gcc/config/powerpcspe/spe.md b/gcc/config/powerpcspe/spe.md
index 2351152dc24..56acfdd86d0 100644
--- a/gcc/config/powerpcspe/spe.md
+++ b/gcc/config/powerpcspe/spe.md
@@ -511,19 +511,6 @@
[(set_attr "type" "vecsimple")
(set_attr "length" "4")])
-(define_expand "vec_perm_constv2si"
- [(match_operand:V2SI 0 "gpc_reg_operand" "")
- (match_operand:V2SI 1 "gpc_reg_operand" "")
- (match_operand:V2SI 2 "gpc_reg_operand" "")
- (match_operand:V2SI 3 "" "")]
- "TARGET_SPE"
-{
- if (rs6000_expand_vec_perm_const (operands))
- DONE;
- else
- FAIL;
-})
-
(define_expand "spe_evmergehi"
[(match_operand:V2SI 0 "register_operand" "")
(match_operand:V2SI 1 "register_operand" "")
diff --git a/gcc/config/powerpcspe/vsx.md b/gcc/config/powerpcspe/vsx.md
index b669764ce8f..794ff446b8a 100644
--- a/gcc/config/powerpcspe/vsx.md
+++ b/gcc/config/powerpcspe/vsx.md
@@ -2543,19 +2543,6 @@
}
[(set_attr "type" "vecperm")])
-(define_expand "vec_perm_const<mode>"
- [(match_operand:VSX_D 0 "vsx_register_operand" "")
- (match_operand:VSX_D 1 "vsx_register_operand" "")
- (match_operand:VSX_D 2 "vsx_register_operand" "")
- (match_operand:V2DI 3 "" "")]
- "VECTOR_MEM_VSX_P (<MODE>mode)"
-{
- if (rs6000_expand_vec_perm_const (operands))
- DONE;
- else
- FAIL;
-})
-
;; Extraction of a single element in a small integer vector. Until ISA 3.0,
;; none of the small types were allowed in a vector register, so we had to
;; extract to a DImode and either do a direct move or store.
diff --git a/gcc/config/rs6000/altivec.md b/gcc/config/rs6000/altivec.md
index 7122f99bffd..5d10c829103 100644
--- a/gcc/config/rs6000/altivec.md
+++ b/gcc/config/rs6000/altivec.md
@@ -2198,19 +2198,6 @@
}
})
-(define_expand "vec_perm_constv16qi"
- [(match_operand:V16QI 0 "register_operand" "")
- (match_operand:V16QI 1 "register_operand" "")
- (match_operand:V16QI 2 "register_operand" "")
- (match_operand:V16QI 3 "" "")]
- "TARGET_ALTIVEC"
-{
- if (altivec_expand_vec_perm_const (operands))
- DONE;
- else
- FAIL;
-})
-
(define_insn "*altivec_vpermr_<mode>_internal"
[(set (match_operand:VM 0 "register_operand" "=v,?wo")
(unspec:VM [(match_operand:VM 1 "register_operand" "v,wo")
diff --git a/gcc/config/rs6000/paired.md b/gcc/config/rs6000/paired.md
index b0aa329d7b8..584a791d431 100644
--- a/gcc/config/rs6000/paired.md
+++ b/gcc/config/rs6000/paired.md
@@ -313,19 +313,6 @@
"ps_merge11 %0, %1, %2"
[(set_attr "type" "fp")])
-(define_expand "vec_perm_constv2sf"
- [(match_operand:V2SF 0 "gpc_reg_operand" "")
- (match_operand:V2SF 1 "gpc_reg_operand" "")
- (match_operand:V2SF 2 "gpc_reg_operand" "")
- (match_operand:V2SI 3 "" "")]
- "TARGET_PAIRED_FLOAT"
-{
- if (rs6000_expand_vec_perm_const (operands))
- DONE;
- else
- FAIL;
-})
-
(define_insn "paired_sum0"
[(set (match_operand:V2SF 0 "gpc_reg_operand" "=f")
(vec_concat:V2SF (plus:SF (vec_select:SF
diff --git a/gcc/config/rs6000/rs6000-protos.h b/gcc/config/rs6000/rs6000-protos.h
index 9264aa2fd26..90107ea8821 100644
--- a/gcc/config/rs6000/rs6000-protos.h
+++ b/gcc/config/rs6000/rs6000-protos.h
@@ -63,9 +63,7 @@ extern void rs6000_expand_vector_extract (rtx, rtx, rtx);
extern void rs6000_split_vec_extract_var (rtx, rtx, rtx, rtx, rtx);
extern rtx rs6000_adjust_vec_address (rtx, rtx, rtx, rtx, machine_mode);
extern void rs6000_split_v4si_init (rtx []);
-extern bool altivec_expand_vec_perm_const (rtx op[4]);
extern void altivec_expand_vec_perm_le (rtx op[4]);
-extern bool rs6000_expand_vec_perm_const (rtx op[4]);
extern void altivec_expand_lvx_be (rtx, rtx, machine_mode, unsigned);
extern void altivec_expand_stvx_be (rtx, rtx, machine_mode, unsigned);
extern void altivec_expand_stvex_be (rtx, rtx, machine_mode, unsigned);
diff --git a/gcc/config/rs6000/rs6000.c b/gcc/config/rs6000/rs6000.c
index 42704d34d98..7d10b44fff9 100644
--- a/gcc/config/rs6000/rs6000.c
+++ b/gcc/config/rs6000/rs6000.c
@@ -1910,8 +1910,8 @@ static const struct attribute_spec rs6000_attribute_table[] =
#undef TARGET_LEGITIMATE_CONSTANT_P
#define TARGET_LEGITIMATE_CONSTANT_P rs6000_legitimate_constant_p
-#undef TARGET_VECTORIZE_VEC_PERM_CONST_OK
-#define TARGET_VECTORIZE_VEC_PERM_CONST_OK rs6000_vectorize_vec_perm_const_ok
+#undef TARGET_VECTORIZE_VEC_PERM_CONST
+#define TARGET_VECTORIZE_VEC_PERM_CONST rs6000_vectorize_vec_perm_const
#undef TARGET_CAN_USE_DOLOOP_P
#define TARGET_CAN_USE_DOLOOP_P can_use_doloop_if_innermost
@@ -35570,6 +35570,9 @@ rs6000_emit_parity (rtx dst, rtx src)
}
/* Expand an Altivec constant permutation for little endian mode.
+ OP0 and OP1 are the input vectors and TARGET is the output vector.
+ SEL specifies the constant permutation vector.
+
There are two issues: First, the two input operands must be
swapped so that together they form a double-wide array in LE
order. Second, the vperm instruction has surprising behavior
@@ -35611,22 +35614,18 @@ rs6000_emit_parity (rtx dst, rtx src)
vr9 = 00000006 00000004 00000002 00000000. */
-void
-altivec_expand_vec_perm_const_le (rtx operands[4])
+static void
+altivec_expand_vec_perm_const_le (rtx target, rtx op0, rtx op1,
+ const vec_perm_indices &sel)
{
unsigned int i;
rtx perm[16];
rtx constv, unspec;
- rtx target = operands[0];
- rtx op0 = operands[1];
- rtx op1 = operands[2];
- rtx sel = operands[3];
/* Unpack and adjust the constant selector. */
for (i = 0; i < 16; ++i)
{
- rtx e = XVECEXP (sel, 0, i);
- unsigned int elt = 31 - (INTVAL (e) & 31);
+ unsigned int elt = 31 - (sel[i] & 31);
perm[i] = GEN_INT (elt);
}
@@ -35708,10 +35707,14 @@ altivec_expand_vec_perm_le (rtx operands[4])
}
/* Expand an Altivec constant permutation. Return true if we match
- an efficient implementation; false to fall back to VPERM. */
+ an efficient implementation; false to fall back to VPERM.
-bool
-altivec_expand_vec_perm_const (rtx operands[4])
+ OP0 and OP1 are the input vectors and TARGET is the output vector.
+ SEL specifies the constant permutation vector. */
+
+static bool
+altivec_expand_vec_perm_const (rtx target, rtx op0, rtx op1,
+ const vec_perm_indices &sel)
{
struct altivec_perm_insn {
HOST_WIDE_INT mask;
@@ -35759,19 +35762,13 @@ altivec_expand_vec_perm_const (rtx operands[4])
unsigned int i, j, elt, which;
unsigned char perm[16];
- rtx target, op0, op1, sel, x;
+ rtx x;
bool one_vec;
- target = operands[0];
- op0 = operands[1];
- op1 = operands[2];
- sel = operands[3];
-
/* Unpack the constant selector. */
for (i = which = 0; i < 16; ++i)
{
- rtx e = XVECEXP (sel, 0, i);
- elt = INTVAL (e) & 31;
+ elt = sel[i] & 31;
which |= (elt < 16 ? 1 : 2);
perm[i] = elt;
}
@@ -35927,7 +35924,7 @@ altivec_expand_vec_perm_const (rtx operands[4])
if (!BYTES_BIG_ENDIAN)
{
- altivec_expand_vec_perm_const_le (operands);
+ altivec_expand_vec_perm_const_le (target, op0, op1, sel);
return true;
}
@@ -35987,59 +35984,53 @@ rs6000_expand_vec_perm_const_1 (rtx target, rtx op0, rtx op1,
return true;
}
-bool
-rs6000_expand_vec_perm_const (rtx operands[4])
-{
- rtx target, op0, op1, sel;
- unsigned char perm0, perm1;
-
- target = operands[0];
- op0 = operands[1];
- op1 = operands[2];
- sel = operands[3];
-
- /* Unpack the constant selector. */
- perm0 = INTVAL (XVECEXP (sel, 0, 0)) & 3;
- perm1 = INTVAL (XVECEXP (sel, 0, 1)) & 3;
-
- return rs6000_expand_vec_perm_const_1 (target, op0, op1, perm0, perm1);
-}
-
-/* Test whether a constant permutation is supported. */
+/* Implement TARGET_VECTORIZE_VEC_PERM_CONST. */
static bool
-rs6000_vectorize_vec_perm_const_ok (machine_mode vmode, vec_perm_indices sel)
+rs6000_vectorize_vec_perm_const (machine_mode vmode, rtx target, rtx op0,
+ rtx op1, const vec_perm_indices &sel)
{
+ bool testing_p = !target;
+
/* AltiVec (and thus VSX) can handle arbitrary permutations. */
- if (TARGET_ALTIVEC)
+ if (TARGET_ALTIVEC && testing_p)
return true;
- /* Check for ps_merge* or evmerge* insns. */
- if (TARGET_PAIRED_FLOAT && vmode == V2SFmode)
+ /* Check for ps_merge* or xxpermdi insns. */
+ if ((vmode == V2SFmode && TARGET_PAIRED_FLOAT)
+ || ((vmode == V2DFmode || vmode == V2DImode)
+ && VECTOR_MEM_VSX_P (vmode)))
+ {
+ if (testing_p)
+ {
+ op0 = gen_raw_REG (vmode, LAST_VIRTUAL_REGISTER + 1);
+ op1 = gen_raw_REG (vmode, LAST_VIRTUAL_REGISTER + 2);
+ }
+ if (rs6000_expand_vec_perm_const_1 (target, op0, op1, sel[0], sel[1]))
+ return true;
+ }
+
+ if (TARGET_ALTIVEC)
{
- rtx op0 = gen_raw_REG (vmode, LAST_VIRTUAL_REGISTER + 1);
- rtx op1 = gen_raw_REG (vmode, LAST_VIRTUAL_REGISTER + 2);
- return rs6000_expand_vec_perm_const_1 (NULL, op0, op1, sel[0], sel[1]);
+ /* Force the target-independent code to lower to V16QImode. */
+ if (vmode != V16QImode)
+ return false;
+ if (altivec_expand_vec_perm_const (target, op0, op1, sel))
+ return true;
}
return false;
}
-/* A subroutine for rs6000_expand_extract_even & rs6000_expand_interleave. */
+/* A subroutine for rs6000_expand_extract_even & rs6000_expand_interleave.
+ OP0 and OP1 are the input vectors and TARGET is the output vector.
+ PERM specifies the constant permutation vector. */
static void
rs6000_do_expand_vec_perm (rtx target, rtx op0, rtx op1,
- machine_mode vmode, unsigned nelt, rtx perm[])
+ machine_mode vmode, const vec_perm_builder &perm)
{
- machine_mode imode;
- rtx x;
-
- imode = vmode;
- if (GET_MODE_CLASS (vmode) != MODE_VECTOR_INT)
- imode = mode_for_int_vector (vmode).require ();
-
- x = gen_rtx_CONST_VECTOR (imode, gen_rtvec_v (nelt, perm));
- x = expand_vec_perm (vmode, op0, op1, x, target);
+ rtx x = expand_vec_perm_const (vmode, op0, op1, perm, BLKmode, target);
if (x != target)
emit_move_insn (target, x);
}
@@ -36051,12 +36042,12 @@ rs6000_expand_extract_even (rtx target, rtx op0, rtx op1)
{
machine_mode vmode = GET_MODE (target);
unsigned i, nelt = GET_MODE_NUNITS (vmode);
- rtx perm[16];
+ vec_perm_builder perm (nelt);
for (i = 0; i < nelt; i++)
- perm[i] = GEN_INT (i * 2);
+ perm.quick_push (i * 2);
- rs6000_do_expand_vec_perm (target, op0, op1, vmode, nelt, perm);
+ rs6000_do_expand_vec_perm (target, op0, op1, vmode, perm);
}
/* Expand a vector interleave operation. */
@@ -36066,16 +36057,16 @@ rs6000_expand_interleave (rtx target, rtx op0, rtx op1, bool highp)
{
machine_mode vmode = GET_MODE (target);
unsigned i, high, nelt = GET_MODE_NUNITS (vmode);
- rtx perm[16];
+ vec_perm_builder perm (nelt);
high = (highp ? 0 : nelt / 2);
for (i = 0; i < nelt / 2; i++)
{
- perm[i * 2] = GEN_INT (i + high);
- perm[i * 2 + 1] = GEN_INT (i + nelt + high);
+ perm.quick_push (i + high);
+ perm.quick_push (i + nelt + high);
}
- rs6000_do_expand_vec_perm (target, op0, op1, vmode, nelt, perm);
+ rs6000_do_expand_vec_perm (target, op0, op1, vmode, perm);
}
/* Scale a V2DF vector SRC by two to the SCALE and place in TGT. */
diff --git a/gcc/config/rs6000/vsx.md b/gcc/config/rs6000/vsx.md
index f6f2bd48363..7c6bb17a33b 100644
--- a/gcc/config/rs6000/vsx.md
+++ b/gcc/config/rs6000/vsx.md
@@ -3189,19 +3189,6 @@
}
[(set_attr "type" "vecperm")])
-(define_expand "vec_perm_const<mode>"
- [(match_operand:VSX_D 0 "vsx_register_operand" "")
- (match_operand:VSX_D 1 "vsx_register_operand" "")
- (match_operand:VSX_D 2 "vsx_register_operand" "")
- (match_operand:V2DI 3 "" "")]
- "VECTOR_MEM_VSX_P (<MODE>mode)"
-{
- if (rs6000_expand_vec_perm_const (operands))
- DONE;
- else
- FAIL;
-})
-
;; Extraction of a single element in a small integer vector. Until ISA 3.0,
;; none of the small types were allowed in a vector register, so we had to
;; extract to a DImode and either do a direct move or store.
diff --git a/gcc/config/sparc/sparc.c b/gcc/config/sparc/sparc.c
index 288264f0e8c..62bc492229e 100644
--- a/gcc/config/sparc/sparc.c
+++ b/gcc/config/sparc/sparc.c
@@ -688,6 +688,8 @@ static bool sparc_modes_tieable_p (machine_mode, machine_mode);
static bool sparc_can_change_mode_class (machine_mode, machine_mode,
reg_class_t);
static HOST_WIDE_INT sparc_constant_alignment (const_tree, HOST_WIDE_INT);
+static bool sparc_vectorize_vec_perm_const (machine_mode, rtx, rtx, rtx,
+ const vec_perm_indices &);
#ifdef SUBTARGET_ATTRIBUTE_TABLE
/* Table of valid machine attributes. */
@@ -932,6 +934,9 @@ char sparc_hard_reg_printed[8];
#undef TARGET_CONSTANT_ALIGNMENT
#define TARGET_CONSTANT_ALIGNMENT sparc_constant_alignment
+#undef TARGET_VECTORIZE_VEC_PERM_CONST
+#define TARGET_VECTORIZE_VEC_PERM_CONST sparc_vectorize_vec_perm_const
+
struct gcc_target targetm = TARGET_INITIALIZER;
/* Return the memory reference contained in X if any, zero otherwise. */
@@ -12813,6 +12818,32 @@ sparc_expand_vec_perm_bmask (machine_mode vmode, rtx sel)
emit_insn (gen_bmasksi_vis (gen_reg_rtx (SImode), sel, t_1));
}
+/* Implement TARGET_VEC_PERM_CONST. */
+
+static bool
+sparc_vectorize_vec_perm_const (machine_mode vmode, rtx target, rtx op0,
+ rtx op1, const vec_perm_indices &sel)
+{
+ /* All permutes are supported. */
+ if (!target)
+ return true;
+
+ /* Force target-independent code to convert constant permutations on other
+ modes down to V8QI. Rely on this to avoid the complexity of the byte
+ order of the permutation. */
+ if (vmode != V8QImode)
+ return false;
+
+ unsigned int i, mask;
+ for (i = mask = 0; i < 8; ++i)
+ mask |= (sel[i] & 0xf) << (28 - i*4);
+ rtx mask_rtx = force_reg (SImode, gen_int_mode (mask, SImode));
+
+ emit_insn (gen_bmasksi_vis (gen_reg_rtx (SImode), mask_rtx, const0_rtx));
+ emit_insn (gen_bshufflev8qi_vis (target, op0, op1));
+ return true;
+}
+
/* Implement TARGET_FRAME_POINTER_REQUIRED. */
static bool
diff --git a/gcc/config/sparc/sparc.md b/gcc/config/sparc/sparc.md
index d1af68034dd..f7dff435642 100644
--- a/gcc/config/sparc/sparc.md
+++ b/gcc/config/sparc/sparc.md
@@ -9327,28 +9327,6 @@ visl")
(set_attr "subtype" "other")
(set_attr "fptype" "double")])
-;; The rtl expanders will happily convert constant permutations on other
-;; modes down to V8QI. Rely on this to avoid the complexity of the byte
-;; order of the permutation.
-(define_expand "vec_perm_constv8qi"
- [(match_operand:V8QI 0 "register_operand" "")
- (match_operand:V8QI 1 "register_operand" "")
- (match_operand:V8QI 2 "register_operand" "")
- (match_operand:V8QI 3 "" "")]
- "TARGET_VIS2"
-{
- unsigned int i, mask;
- rtx sel = operands[3];
-
- for (i = mask = 0; i < 8; ++i)
- mask |= (INTVAL (XVECEXP (sel, 0, i)) & 0xf) << (28 - i*4);
- sel = force_reg (SImode, gen_int_mode (mask, SImode));
-
- emit_insn (gen_bmasksi_vis (gen_reg_rtx (SImode), sel, const0_rtx));
- emit_insn (gen_bshufflev8qi_vis (operands[0], operands[1], operands[2]));
- DONE;
-})
-
;; Unlike constant permutation, we can vastly simplify the compression of
;; the 64-bit selector input to the 32-bit %gsr value by knowing what the
;; width of the input is.