diff options
Diffstat (limited to 'gcc/config')
30 files changed, 334 insertions, 684 deletions
diff --git a/gcc/config/aarch64/aarch64-protos.h b/gcc/config/aarch64/aarch64-protos.h index 002ac330c6d..3bc3756563a 100644 --- a/gcc/config/aarch64/aarch64-protos.h +++ b/gcc/config/aarch64/aarch64-protos.h @@ -488,8 +488,6 @@ extern void aarch64_split_combinev16qi (rtx operands[3]); extern void aarch64_expand_vec_perm (rtx, rtx, rtx, rtx, unsigned int); extern bool aarch64_madd_needs_nop (rtx_insn *); extern void aarch64_final_prescan_insn (rtx_insn *); -extern bool -aarch64_expand_vec_perm_const (rtx, rtx, rtx, rtx, unsigned int); void aarch64_atomic_assign_expand_fenv (tree *, tree *, tree *); int aarch64_ccmp_mode_to_code (machine_mode mode); diff --git a/gcc/config/aarch64/aarch64-simd.md b/gcc/config/aarch64/aarch64-simd.md index 84c4f8286c0..e04a9883892 100644 --- a/gcc/config/aarch64/aarch64-simd.md +++ b/gcc/config/aarch64/aarch64-simd.md @@ -5385,20 +5385,6 @@ ;; vec_perm support -(define_expand "vec_perm_const<mode>" - [(match_operand:VALL_F16 0 "register_operand") - (match_operand:VALL_F16 1 "register_operand") - (match_operand:VALL_F16 2 "register_operand") - (match_operand:<V_INT_EQUIV> 3)] - "TARGET_SIMD" -{ - if (aarch64_expand_vec_perm_const (operands[0], operands[1], - operands[2], operands[3], <nunits>)) - DONE; - else - FAIL; -}) - (define_expand "vec_perm<mode>" [(match_operand:VB 0 "register_operand") (match_operand:VB 1 "register_operand") diff --git a/gcc/config/aarch64/aarch64.c b/gcc/config/aarch64/aarch64.c index 1da313f57e0..05b82bcd615 100644 --- a/gcc/config/aarch64/aarch64.c +++ b/gcc/config/aarch64/aarch64.c @@ -143,8 +143,6 @@ static void aarch64_elf_asm_constructor (rtx, int) ATTRIBUTE_UNUSED; static void aarch64_elf_asm_destructor (rtx, int) ATTRIBUTE_UNUSED; static void aarch64_override_options_after_change (void); static bool aarch64_vector_mode_supported_p (machine_mode); -static bool aarch64_vectorize_vec_perm_const_ok (machine_mode, - vec_perm_indices); static int aarch64_address_cost (rtx, machine_mode, addr_space_t, bool); static bool aarch64_builtin_support_vector_misalignment (machine_mode mode, const_tree type, @@ -13670,29 +13668,27 @@ aarch64_expand_vec_perm_const_1 (struct expand_vec_perm_d *d) return false; } -/* Expand a vec_perm_const pattern with the operands given by TARGET, - OP0, OP1 and SEL. NELT is the number of elements in the vector. */ +/* Implement TARGET_VECTORIZE_VEC_PERM_CONST. */ -bool -aarch64_expand_vec_perm_const (rtx target, rtx op0, rtx op1, rtx sel, - unsigned int nelt) +static bool +aarch64_vectorize_vec_perm_const (machine_mode vmode, rtx target, rtx op0, + rtx op1, const vec_perm_indices &sel) { struct expand_vec_perm_d d; unsigned int i, which; + d.vmode = vmode; d.target = target; d.op0 = op0; d.op1 = op1; + d.testing_p = !target; - d.vmode = GET_MODE (target); - gcc_assert (VECTOR_MODE_P (d.vmode)); - d.testing_p = false; - + /* Calculate whether all elements are in one vector. */ + unsigned int nelt = sel.length (); d.perm.reserve (nelt); for (i = which = 0; i < nelt; ++i) { - rtx e = XVECEXP (sel, 0, i); - unsigned int ei = INTVAL (e) & (2 * nelt - 1); + unsigned int ei = sel[i] & (2 * nelt - 1); which |= (ei < nelt ? 1 : 2); d.perm.quick_push (ei); } @@ -13704,7 +13700,7 @@ aarch64_expand_vec_perm_const (rtx target, rtx op0, rtx op1, rtx sel, case 3: d.one_vector_p = false; - if (!rtx_equal_p (op0, op1)) + if (d.testing_p || !rtx_equal_p (op0, op1)) break; /* The elements of PERM do not suggest that only the first operand @@ -13725,37 +13721,8 @@ aarch64_expand_vec_perm_const (rtx target, rtx op0, rtx op1, rtx sel, break; } - return aarch64_expand_vec_perm_const_1 (&d); -} - -static bool -aarch64_vectorize_vec_perm_const_ok (machine_mode vmode, vec_perm_indices sel) -{ - struct expand_vec_perm_d d; - unsigned int i, nelt, which; - bool ret; - - d.vmode = vmode; - d.testing_p = true; - d.perm.safe_splice (sel); - - /* Calculate whether all elements are in one vector. */ - nelt = sel.length (); - for (i = which = 0; i < nelt; ++i) - { - unsigned int e = d.perm[i]; - gcc_assert (e < 2 * nelt); - which |= (e < nelt ? 1 : 2); - } - - /* If all elements are from the second vector, reindex as if from the - first vector. */ - if (which == 2) - for (i = 0; i < nelt; ++i) - d.perm[i] -= nelt; - - /* Check whether the mask can be applied to a single vector. */ - d.one_vector_p = (which != 3); + if (!d.testing_p) + return aarch64_expand_vec_perm_const_1 (&d); d.target = gen_raw_REG (d.vmode, LAST_VIRTUAL_REGISTER + 1); d.op1 = d.op0 = gen_raw_REG (d.vmode, LAST_VIRTUAL_REGISTER + 2); @@ -13763,7 +13730,7 @@ aarch64_vectorize_vec_perm_const_ok (machine_mode vmode, vec_perm_indices sel) d.op1 = gen_raw_REG (d.vmode, LAST_VIRTUAL_REGISTER + 3); start_sequence (); - ret = aarch64_expand_vec_perm_const_1 (&d); + bool ret = aarch64_expand_vec_perm_const_1 (&d); end_sequence (); return ret; @@ -15515,9 +15482,9 @@ aarch64_libgcc_floating_mode_supported_p /* vec_perm support. */ -#undef TARGET_VECTORIZE_VEC_PERM_CONST_OK -#define TARGET_VECTORIZE_VEC_PERM_CONST_OK \ - aarch64_vectorize_vec_perm_const_ok +#undef TARGET_VECTORIZE_VEC_PERM_CONST +#define TARGET_VECTORIZE_VEC_PERM_CONST \ + aarch64_vectorize_vec_perm_const #undef TARGET_INIT_LIBFUNCS #define TARGET_INIT_LIBFUNCS aarch64_init_libfuncs diff --git a/gcc/config/arm/arm-protos.h b/gcc/config/arm/arm-protos.h index 0c977429c12..24a4ab870c7 100644 --- a/gcc/config/arm/arm-protos.h +++ b/gcc/config/arm/arm-protos.h @@ -357,7 +357,6 @@ extern bool arm_validize_comparison (rtx *, rtx *, rtx *); extern bool arm_gen_setmem (rtx *); extern void arm_expand_vec_perm (rtx target, rtx op0, rtx op1, rtx sel); -extern bool arm_expand_vec_perm_const (rtx target, rtx op0, rtx op1, rtx sel); extern bool arm_autoinc_modes_ok_p (machine_mode, enum arm_auto_incmodes); diff --git a/gcc/config/arm/arm.c b/gcc/config/arm/arm.c index 2aa64917e4c..2173d95dd6d 100644 --- a/gcc/config/arm/arm.c +++ b/gcc/config/arm/arm.c @@ -290,7 +290,8 @@ static int arm_cortex_a5_branch_cost (bool, bool); static int arm_cortex_m_branch_cost (bool, bool); static int arm_cortex_m7_branch_cost (bool, bool); -static bool arm_vectorize_vec_perm_const_ok (machine_mode, vec_perm_indices); +static bool arm_vectorize_vec_perm_const (machine_mode, rtx, rtx, rtx, + const vec_perm_indices &); static bool aarch_macro_fusion_pair_p (rtx_insn*, rtx_insn*); @@ -736,9 +737,8 @@ static const struct attribute_spec arm_attribute_table[] = #define TARGET_PREFERRED_RENAME_CLASS \ arm_preferred_rename_class -#undef TARGET_VECTORIZE_VEC_PERM_CONST_OK -#define TARGET_VECTORIZE_VEC_PERM_CONST_OK \ - arm_vectorize_vec_perm_const_ok +#undef TARGET_VECTORIZE_VEC_PERM_CONST +#define TARGET_VECTORIZE_VEC_PERM_CONST arm_vectorize_vec_perm_const #undef TARGET_VECTORIZE_BUILTIN_VECTORIZATION_COST #define TARGET_VECTORIZE_BUILTIN_VECTORIZATION_COST \ @@ -29383,28 +29383,31 @@ arm_expand_vec_perm_const_1 (struct expand_vec_perm_d *d) return false; } -/* Expand a vec_perm_const pattern. */ +/* Implement TARGET_VECTORIZE_VEC_PERM_CONST. */ -bool -arm_expand_vec_perm_const (rtx target, rtx op0, rtx op1, rtx sel) +static bool +arm_vectorize_vec_perm_const (machine_mode vmode, rtx target, rtx op0, rtx op1, + const vec_perm_indices &sel) { struct expand_vec_perm_d d; int i, nelt, which; + if (!VALID_NEON_DREG_MODE (vmode) && !VALID_NEON_QREG_MODE (vmode)) + return false; + d.target = target; d.op0 = op0; d.op1 = op1; - d.vmode = GET_MODE (target); + d.vmode = vmode; gcc_assert (VECTOR_MODE_P (d.vmode)); - d.testing_p = false; + d.testing_p = !target; nelt = GET_MODE_NUNITS (d.vmode); d.perm.reserve (nelt); for (i = which = 0; i < nelt; ++i) { - rtx e = XVECEXP (sel, 0, i); - int ei = INTVAL (e) & (2 * nelt - 1); + int ei = sel[i] & (2 * nelt - 1); which |= (ei < nelt ? 1 : 2); d.perm.quick_push (ei); } @@ -29416,7 +29419,7 @@ arm_expand_vec_perm_const (rtx target, rtx op0, rtx op1, rtx sel) case 3: d.one_vector_p = false; - if (!rtx_equal_p (op0, op1)) + if (d.testing_p || !rtx_equal_p (op0, op1)) break; /* The elements of PERM do not suggest that only the first operand @@ -29437,38 +29440,8 @@ arm_expand_vec_perm_const (rtx target, rtx op0, rtx op1, rtx sel) break; } - return arm_expand_vec_perm_const_1 (&d); -} - -/* Implement TARGET_VECTORIZE_VEC_PERM_CONST_OK. */ - -static bool -arm_vectorize_vec_perm_const_ok (machine_mode vmode, vec_perm_indices sel) -{ - struct expand_vec_perm_d d; - unsigned int i, nelt, which; - bool ret; - - d.vmode = vmode; - d.testing_p = true; - d.perm.safe_splice (sel); - - /* Categorize the set of elements in the selector. */ - nelt = GET_MODE_NUNITS (d.vmode); - for (i = which = 0; i < nelt; ++i) - { - unsigned int e = d.perm[i]; - gcc_assert (e < 2 * nelt); - which |= (e < nelt ? 1 : 2); - } - - /* For all elements from second vector, fold the elements to first. */ - if (which == 2) - for (i = 0; i < nelt; ++i) - d.perm[i] -= nelt; - - /* Check whether the mask can be applied to the vector type. */ - d.one_vector_p = (which != 3); + if (d.testing_p) + return arm_expand_vec_perm_const_1 (&d); d.target = gen_raw_REG (d.vmode, LAST_VIRTUAL_REGISTER + 1); d.op1 = d.op0 = gen_raw_REG (d.vmode, LAST_VIRTUAL_REGISTER + 2); @@ -29476,7 +29449,7 @@ arm_vectorize_vec_perm_const_ok (machine_mode vmode, vec_perm_indices sel) d.op1 = gen_raw_REG (d.vmode, LAST_VIRTUAL_REGISTER + 3); start_sequence (); - ret = arm_expand_vec_perm_const_1 (&d); + bool ret = arm_expand_vec_perm_const_1 (&d); end_sequence (); return ret; diff --git a/gcc/config/arm/vec-common.md b/gcc/config/arm/vec-common.md index 813341b157f..20ae24fed56 100644 --- a/gcc/config/arm/vec-common.md +++ b/gcc/config/arm/vec-common.md @@ -109,35 +109,6 @@ { }) -(define_expand "vec_perm_const<mode>" - [(match_operand:VALL 0 "s_register_operand" "") - (match_operand:VALL 1 "s_register_operand" "") - (match_operand:VALL 2 "s_register_operand" "") - (match_operand:<V_cmp_result> 3 "" "")] - "TARGET_NEON - || (TARGET_REALLY_IWMMXT && VALID_IWMMXT_REG_MODE (<MODE>mode))" -{ - if (arm_expand_vec_perm_const (operands[0], operands[1], - operands[2], operands[3])) - DONE; - else - FAIL; -}) - -(define_expand "vec_perm_const<mode>" - [(match_operand:VH 0 "s_register_operand") - (match_operand:VH 1 "s_register_operand") - (match_operand:VH 2 "s_register_operand") - (match_operand:<V_cmp_result> 3)] - "TARGET_NEON" -{ - if (arm_expand_vec_perm_const (operands[0], operands[1], - operands[2], operands[3])) - DONE; - else - FAIL; -}) - (define_expand "vec_perm<mode>" [(match_operand:VE 0 "s_register_operand" "") (match_operand:VE 1 "s_register_operand" "") diff --git a/gcc/config/i386/i386-protos.h b/gcc/config/i386/i386-protos.h index f5755f0d363..287b0198589 100644 --- a/gcc/config/i386/i386-protos.h +++ b/gcc/config/i386/i386-protos.h @@ -133,7 +133,6 @@ extern bool ix86_expand_fp_movcc (rtx[]); extern bool ix86_expand_fp_vcond (rtx[]); extern bool ix86_expand_int_vcond (rtx[]); extern void ix86_expand_vec_perm (rtx[]); -extern bool ix86_expand_vec_perm_const (rtx[]); extern bool ix86_expand_mask_vec_cmp (rtx[]); extern bool ix86_expand_int_vec_cmp (rtx[]); extern bool ix86_expand_fp_vec_cmp (rtx[]); diff --git a/gcc/config/i386/i386.c b/gcc/config/i386/i386.c index 9ff9ca4e37f..1acb2c6ab83 100644 --- a/gcc/config/i386/i386.c +++ b/gcc/config/i386/i386.c @@ -47605,9 +47605,8 @@ expand_vec_perm_vpshufb4_vpermq2 (struct expand_vec_perm_d *d) return true; } -/* The guts of ix86_expand_vec_perm_const, also used by the ok hook. - With all of the interface bits taken care of, perform the expansion - in D and return true on success. */ +/* The guts of ix86_vectorize_vec_perm_const. With all of the interface bits + taken care of, perform the expansion in D and return true on success. */ static bool ix86_expand_vec_perm_const_1 (struct expand_vec_perm_d *d) @@ -47742,69 +47741,29 @@ canonicalize_perm (struct expand_vec_perm_d *d) return (which == 3); } -bool -ix86_expand_vec_perm_const (rtx operands[4]) +/* Implement TARGET_VECTORIZE_VEC_PERM_CONST. */ + +static bool +ix86_vectorize_vec_perm_const (machine_mode vmode, rtx target, rtx op0, + rtx op1, const vec_perm_indices &sel) { struct expand_vec_perm_d d; unsigned char perm[MAX_VECT_LEN]; - int i, nelt; + unsigned int i, nelt, which; bool two_args; - rtx sel; - d.target = operands[0]; - d.op0 = operands[1]; - d.op1 = operands[2]; - sel = operands[3]; + d.target = target; + d.op0 = op0; + d.op1 = op1; - d.vmode = GET_MODE (d.target); + d.vmode = vmode; gcc_assert (VECTOR_MODE_P (d.vmode)); d.nelt = nelt = GET_MODE_NUNITS (d.vmode); - d.testing_p = false; + d.testing_p = !target; - gcc_assert (GET_CODE (sel) == CONST_VECTOR); - gcc_assert (XVECLEN (sel, 0) == nelt); + gcc_assert (sel.length () == nelt); gcc_checking_assert (sizeof (d.perm) == sizeof (perm)); - for (i = 0; i < nelt; ++i) - { - rtx e = XVECEXP (sel, 0, i); - int ei = INTVAL (e) & (2 * nelt - 1); - d.perm[i] = ei; - perm[i] = ei; - } - - two_args = canonicalize_perm (&d); - - if (ix86_expand_vec_perm_const_1 (&d)) - return true; - - /* If the selector says both arguments are needed, but the operands are the - same, the above tried to expand with one_operand_p and flattened selector. - If that didn't work, retry without one_operand_p; we succeeded with that - during testing. */ - if (two_args && d.one_operand_p) - { - d.one_operand_p = false; - memcpy (d.perm, perm, sizeof (perm)); - return ix86_expand_vec_perm_const_1 (&d); - } - - return false; -} - -/* Implement targetm.vectorize.vec_perm_const_ok. */ - -static bool -ix86_vectorize_vec_perm_const_ok (machine_mode vmode, vec_perm_indices sel) -{ - struct expand_vec_perm_d d; - unsigned int i, nelt, which; - bool ret; - - d.vmode = vmode; - d.nelt = nelt = GET_MODE_NUNITS (d.vmode); - d.testing_p = true; - /* Given sufficient ISA support we can just return true here for selected vector modes. */ switch (d.vmode) @@ -47813,17 +47772,23 @@ ix86_vectorize_vec_perm_const_ok (machine_mode vmode, vec_perm_indices sel) case E_V16SImode: case E_V8DImode: case E_V8DFmode: - if (TARGET_AVX512F) - /* All implementable with a single vperm[it]2 insn. */ + if (!TARGET_AVX512F) + return false; + /* All implementable with a single vperm[it]2 insn. */ + if (d.testing_p) return true; break; case E_V32HImode: - if (TARGET_AVX512BW) + if (!TARGET_AVX512BW) + return false; + if (d.testing_p) /* All implementable with a single vperm[it]2 insn. */ return true; break; case E_V64QImode: - if (TARGET_AVX512BW) + if (!TARGET_AVX512BW) + return false; + if (d.testing_p) /* Implementable with 2 vperm[it]2, 2 vpshufb and 1 or insn. */ return true; break; @@ -47831,73 +47796,108 @@ ix86_vectorize_vec_perm_const_ok (machine_mode vmode, vec_perm_indices sel) case E_V8SFmode: case E_V4DFmode: case E_V4DImode: - if (TARGET_AVX512VL) + if (!TARGET_AVX) + return false; + if (d.testing_p && TARGET_AVX512VL) /* All implementable with a single vperm[it]2 insn. */ return true; break; case E_V16HImode: - if (TARGET_AVX2) + if (!TARGET_SSE2) + return false; + if (d.testing_p && TARGET_AVX2) /* Implementable with 4 vpshufb insns, 2 vpermq and 3 vpor insns. */ return true; break; case E_V32QImode: - if (TARGET_AVX2) + if (!TARGET_SSE2) + return false; + if (d.testing_p && TARGET_AVX2) /* Implementable with 4 vpshufb insns, 2 vpermq and 3 vpor insns. */ return true; break; - case E_V4SImode: - case E_V4SFmode: case E_V8HImode: case E_V16QImode: + if (!TARGET_SSE2) + return false; + /* Fall through. */ + case E_V4SImode: + case E_V4SFmode: + if (!TARGET_SSE) + return false; /* All implementable with a single vpperm insn. */ - if (TARGET_XOP) + if (d.testing_p && TARGET_XOP) return true; /* All implementable with 2 pshufb + 1 ior. */ - if (TARGET_SSSE3) + if (d.testing_p && TARGET_SSSE3) return true; break; case E_V2DImode: case E_V2DFmode: + if (!TARGET_SSE) + return false; /* All implementable with shufpd or unpck[lh]pd. */ - return true; + if (d.testing_p) + return true; + break; default: return false; } - /* Extract the values from the vector CST into the permutation - array in D. */ for (i = which = 0; i < nelt; ++i) { unsigned char e = sel[i]; gcc_assert (e < 2 * nelt); d.perm[i] = e; + perm[i] = e; which |= (e < nelt ? 1 : 2); } - /* For all elements from second vector, fold the elements to first. */ - if (which == 2) - for (i = 0; i < nelt; ++i) - d.perm[i] -= nelt; + if (d.testing_p) + { + /* For all elements from second vector, fold the elements to first. */ + if (which == 2) + for (i = 0; i < nelt; ++i) + d.perm[i] -= nelt; - /* Check whether the mask can be applied to the vector type. */ - d.one_operand_p = (which != 3); + /* Check whether the mask can be applied to the vector type. */ + d.one_operand_p = (which != 3); - /* Implementable with shufps or pshufd. */ - if (d.one_operand_p && (d.vmode == V4SFmode || d.vmode == V4SImode)) - return true; + /* Implementable with shufps or pshufd. */ + if (d.one_operand_p && (d.vmode == V4SFmode || d.vmode == V4SImode)) + return true; - /* Otherwise we have to go through the motions and see if we can - figure out how to generate the requested permutation. */ - d.target = gen_raw_REG (d.vmode, LAST_VIRTUAL_REGISTER + 1); - d.op1 = d.op0 = gen_raw_REG (d.vmode, LAST_VIRTUAL_REGISTER + 2); - if (!d.one_operand_p) - d.op1 = gen_raw_REG (d.vmode, LAST_VIRTUAL_REGISTER + 3); + /* Otherwise we have to go through the motions and see if we can + figure out how to generate the requested permutation. */ + d.target = gen_raw_REG (d.vmode, LAST_VIRTUAL_REGISTER + 1); + d.op1 = d.op0 = gen_raw_REG (d.vmode, LAST_VIRTUAL_REGISTER + 2); + if (!d.one_operand_p) + d.op1 = gen_raw_REG (d.vmode, LAST_VIRTUAL_REGISTER + 3); - start_sequence (); - ret = ix86_expand_vec_perm_const_1 (&d); - end_sequence (); + start_sequence (); + bool ret = ix86_expand_vec_perm_const_1 (&d); + end_sequence (); - return ret; + return ret; + } + + two_args = canonicalize_perm (&d); + + if (ix86_expand_vec_perm_const_1 (&d)) + return true; + + /* If the selector says both arguments are needed, but the operands are the + same, the above tried to expand with one_operand_p and flattened selector. + If that didn't work, retry without one_operand_p; we succeeded with that + during testing. */ + if (two_args && d.one_operand_p) + { + d.one_operand_p = false; + memcpy (d.perm, perm, sizeof (perm)); + return ix86_expand_vec_perm_const_1 (&d); + } + + return false; } void @@ -50549,9 +50549,8 @@ ix86_run_selftests (void) #undef TARGET_VECTORIZE_BUILTIN_VECTORIZATION_COST #define TARGET_VECTORIZE_BUILTIN_VECTORIZATION_COST \ ix86_builtin_vectorization_cost -#undef TARGET_VECTORIZE_VEC_PERM_CONST_OK -#define TARGET_VECTORIZE_VEC_PERM_CONST_OK \ - ix86_vectorize_vec_perm_const_ok +#undef TARGET_VECTORIZE_VEC_PERM_CONST +#define TARGET_VECTORIZE_VEC_PERM_CONST ix86_vectorize_vec_perm_const #undef TARGET_VECTORIZE_PREFERRED_SIMD_MODE #define TARGET_VECTORIZE_PREFERRED_SIMD_MODE \ ix86_preferred_simd_mode diff --git a/gcc/config/i386/sse.md b/gcc/config/i386/sse.md index 105b5cf6092..76c150fe8ec 100644 --- a/gcc/config/i386/sse.md +++ b/gcc/config/i386/sse.md @@ -11498,30 +11498,6 @@ DONE; }) -(define_mode_iterator VEC_PERM_CONST - [(V4SF "TARGET_SSE") (V4SI "TARGET_SSE") - (V2DF "TARGET_SSE") (V2DI "TARGET_SSE") - (V16QI "TARGET_SSE2") (V8HI "TARGET_SSE2") - (V8SF "TARGET_AVX") (V4DF "TARGET_AVX") - (V8SI "TARGET_AVX") (V4DI "TARGET_AVX") - (V32QI "TARGET_AVX2") (V16HI "TARGET_AVX2") - (V16SI "TARGET_AVX512F") (V8DI "TARGET_AVX512F") - (V16SF "TARGET_AVX512F") (V8DF "TARGET_AVX512F") - (V32HI "TARGET_AVX512BW") (V64QI "TARGET_AVX512BW")]) - -(define_expand "vec_perm_const<mode>" - [(match_operand:VEC_PERM_CONST 0 "register_operand") - (match_operand:VEC_PERM_CONST 1 "register_operand") - (match_operand:VEC_PERM_CONST 2 "register_operand") - (match_operand:<sseintvecmode> 3)] - "" -{ - if (ix86_expand_vec_perm_const (operands)) - DONE; - else - FAIL; -}) - ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; ;; ;; Parallel bitwise logical operations diff --git a/gcc/config/ia64/ia64-protos.h b/gcc/config/ia64/ia64-protos.h index cbabbd3b757..71e55e47557 100644 --- a/gcc/config/ia64/ia64-protos.h +++ b/gcc/config/ia64/ia64-protos.h @@ -62,7 +62,6 @@ extern const char *get_bundle_name (int); extern const char *output_probe_stack_range (rtx, rtx); extern void ia64_expand_vec_perm_even_odd (rtx, rtx, rtx, int); -extern bool ia64_expand_vec_perm_const (rtx op[4]); extern void ia64_expand_vec_setv2sf (rtx op[3]); #endif /* RTX_CODE */ diff --git a/gcc/config/ia64/ia64.c b/gcc/config/ia64/ia64.c index f99bea98d21..d2ce1a49fb9 100644 --- a/gcc/config/ia64/ia64.c +++ b/gcc/config/ia64/ia64.c @@ -335,7 +335,8 @@ static fixed_size_mode ia64_get_reg_raw_mode (int regno); static section * ia64_hpux_function_section (tree, enum node_frequency, bool, bool); -static bool ia64_vectorize_vec_perm_const_ok (machine_mode, vec_perm_indices); +static bool ia64_vectorize_vec_perm_const (machine_mode, rtx, rtx, rtx, + const vec_perm_indices &); static unsigned int ia64_hard_regno_nregs (unsigned int, machine_mode); static bool ia64_hard_regno_mode_ok (unsigned int, machine_mode); @@ -654,8 +655,8 @@ static const struct attribute_spec ia64_attribute_table[] = #undef TARGET_DELAY_VARTRACK #define TARGET_DELAY_VARTRACK true -#undef TARGET_VECTORIZE_VEC_PERM_CONST_OK -#define TARGET_VECTORIZE_VEC_PERM_CONST_OK ia64_vectorize_vec_perm_const_ok +#undef TARGET_VECTORIZE_VEC_PERM_CONST +#define TARGET_VECTORIZE_VEC_PERM_CONST ia64_vectorize_vec_perm_const #undef TARGET_ATTRIBUTE_TAKES_IDENTIFIER_P #define TARGET_ATTRIBUTE_TAKES_IDENTIFIER_P ia64_attribute_takes_identifier_p @@ -11743,32 +11744,31 @@ ia64_expand_vec_perm_const_1 (struct expand_vec_perm_d *d) return false; } -bool -ia64_expand_vec_perm_const (rtx operands[4]) +/* Implement TARGET_VECTORIZE_VEC_PERM_CONST. */ + +static bool +ia64_vectorize_vec_perm_const (machine_mode vmode, rtx target, rtx op0, + rtx op1, const vec_perm_indices &sel) { struct expand_vec_perm_d d; unsigned char perm[MAX_VECT_LEN]; - int i, nelt, which; - rtx sel; + unsigned int i, nelt, which; - d.target = operands[0]; - d.op0 = operands[1]; - d.op1 = operands[2]; - sel = operands[3]; + d.target = target; + d.op0 = op0; + d.op1 = op1; - d.vmode = GET_MODE (d.target); + d.vmode = vmode; gcc_assert (VECTOR_MODE_P (d.vmode)); d.nelt = nelt = GET_MODE_NUNITS (d.vmode); - d.testing_p = false; + d.testing_p = !target; - gcc_assert (GET_CODE (sel) == CONST_VECTOR); - gcc_assert (XVECLEN (sel, 0) == nelt); + gcc_assert (sel.length () == nelt); gcc_checking_assert (sizeof (d.perm) == sizeof (perm)); for (i = which = 0; i < nelt; ++i) { - rtx e = XVECEXP (sel, 0, i); - int ei = INTVAL (e) & (2 * nelt - 1); + unsigned int ei = sel[i] & (2 * nelt - 1); which |= (ei < nelt ? 1 : 2); d.perm[i] = ei; @@ -11781,7 +11781,7 @@ ia64_expand_vec_perm_const (rtx operands[4]) gcc_unreachable(); case 3: - if (!rtx_equal_p (d.op0, d.op1)) + if (d.testing_p || !rtx_equal_p (d.op0, d.op1)) { d.one_operand_p = false; break; @@ -11809,6 +11809,22 @@ ia64_expand_vec_perm_const (rtx operands[4]) break; } + if (d.testing_p) + { + /* We have to go through the motions and see if we can + figure out how to generate the requested permutation. */ + d.target = gen_raw_REG (d.vmode, LAST_VIRTUAL_REGISTER + 1); + d.op1 = d.op0 = gen_raw_REG (d.vmode, LAST_VIRTUAL_REGISTER + 2); + if (!d.one_operand_p) + d.op1 = gen_raw_REG (d.vmode, LAST_VIRTUAL_REGISTER + 3); + + start_sequence (); + bool ret = ia64_expand_vec_perm_const_1 (&d); + end_sequence (); + + return ret; + } + if (ia64_expand_vec_perm_const_1 (&d)) return true; @@ -11825,51 +11841,6 @@ ia64_expand_vec_perm_const (rtx operands[4]) return false; } -/* Implement targetm.vectorize.vec_perm_const_ok. */ - -static bool -ia64_vectorize_vec_perm_const_ok (machine_mode vmode, vec_perm_indices sel) -{ - struct expand_vec_perm_d d; - unsigned int i, nelt, which; - bool ret; - - d.vmode = vmode; - d.nelt = nelt = GET_MODE_NUNITS (d.vmode); - d.testing_p = true; - - /* Extract the values from the vector CST into the permutation - array in D. */ - for (i = which = 0; i < nelt; ++i) - { - unsigned char e = sel[i]; - d.perm[i] = e; - gcc_assert (e < 2 * nelt); - which |= (e < nelt ? 1 : 2); - } - - /* For all elements from second vector, fold the elements to first. */ - if (which == 2) - for (i = 0; i < nelt; ++i) - d.perm[i] -= nelt; - - /* Check whether the mask can be applied to the vector type. */ - d.one_operand_p = (which != 3); - - /* Otherwise we have to go through the motions and see if we can - figure out how to generate the requested permutation. */ - d.target = gen_raw_REG (d.vmode, LAST_VIRTUAL_REGISTER + 1); - d.op1 = d.op0 = gen_raw_REG (d.vmode, LAST_VIRTUAL_REGISTER + 2); - if (!d.one_operand_p) - d.op1 = gen_raw_REG (d.vmode, LAST_VIRTUAL_REGISTER + 3); - - start_sequence (); - ret = ia64_expand_vec_perm_const_1 (&d); - end_sequence (); - - return ret; -} - void ia64_expand_vec_setv2sf (rtx operands[3]) { diff --git a/gcc/config/ia64/vect.md b/gcc/config/ia64/vect.md index 20e260ccfba..68ac05c0e8a 100644 --- a/gcc/config/ia64/vect.md +++ b/gcc/config/ia64/vect.md @@ -1549,19 +1549,6 @@ DONE; }) -(define_expand "vec_perm_const<mode>" - [(match_operand:VEC 0 "register_operand" "") - (match_operand:VEC 1 "register_operand" "") - (match_operand:VEC 2 "register_operand" "") - (match_operand:<vecint> 3 "" "")] - "" -{ - if (ia64_expand_vec_perm_const (operands)) - DONE; - else - FAIL; -}) - ;; Missing operations ;; fprcpa ;; fpsqrta diff --git a/gcc/config/mips/loongson.md b/gcc/config/mips/loongson.md index b48dfa0dc71..c75ce2cca5b 100644 --- a/gcc/config/mips/loongson.md +++ b/gcc/config/mips/loongson.md @@ -784,19 +784,6 @@ "punpcklwd\t%0,%1,%2" [(set_attr "type" "fcvt")]) -(define_expand "vec_perm_const<mode>" - [(match_operand:VWHB 0 "register_operand" "") - (match_operand:VWHB 1 "register_operand" "") - (match_operand:VWHB 2 "register_operand" "") - (match_operand:VWHB 3 "" "")] - "TARGET_HARD_FLOAT && TARGET_LOONGSON_VECTORS" -{ - if (mips_expand_vec_perm_const (operands)) - DONE; - else - FAIL; -}) - (define_expand "vec_unpacks_lo_<mode>" [(match_operand:<V_stretch_half> 0 "register_operand" "") (match_operand:VHB 1 "register_operand" "")] diff --git a/gcc/config/mips/mips-msa.md b/gcc/config/mips/mips-msa.md index 87d889d7296..73f38d3f5d4 100644 --- a/gcc/config/mips/mips-msa.md +++ b/gcc/config/mips/mips-msa.md @@ -558,19 +558,6 @@ [(set_attr "type" "simd_copy") (set_attr "mode" "<MODE>")]) -(define_expand "vec_perm_const<mode>" - [(match_operand:MSA 0 "register_operand") - (match_operand:MSA 1 "register_operand") - (match_operand:MSA 2 "register_operand") - (match_operand:<VIMODE> 3 "")] - "ISA_HAS_MSA" -{ - if (mips_expand_vec_perm_const (operands)) - DONE; - else - FAIL; -}) - (define_expand "abs<mode>2" [(match_operand:IMSA 0 "register_operand" "=f") (abs:IMSA (match_operand:IMSA 1 "register_operand" "f"))] diff --git a/gcc/config/mips/mips-protos.h b/gcc/config/mips/mips-protos.h index 1c4167a836a..8eab7c58114 100644 --- a/gcc/config/mips/mips-protos.h +++ b/gcc/config/mips/mips-protos.h @@ -348,7 +348,6 @@ extern void mips_expand_atomic_qihi (union mips_gen_fn_ptrs, rtx, rtx, rtx, rtx); extern void mips_expand_vector_init (rtx, rtx); -extern bool mips_expand_vec_perm_const (rtx op[4]); extern void mips_expand_vec_unpack (rtx op[2], bool, bool); extern void mips_expand_vec_reduc (rtx, rtx, rtx (*)(rtx, rtx, rtx)); extern void mips_expand_vec_minmax (rtx, rtx, rtx, diff --git a/gcc/config/mips/mips-ps-3d.md b/gcc/config/mips/mips-ps-3d.md index 81820b13b11..05d58e9968f 100644 --- a/gcc/config/mips/mips-ps-3d.md +++ b/gcc/config/mips/mips-ps-3d.md @@ -164,19 +164,6 @@ [(set_attr "type" "fmove") (set_attr "mode" "SF")]) -(define_expand "vec_perm_constv2sf" - [(match_operand:V2SF 0 "register_operand" "") - (match_operand:V2SF 1 "register_operand" "") - (match_operand:V2SF 2 "register_operand" "") - (match_operand:V2SI 3 "" "")] - "TARGET_HARD_FLOAT && TARGET_PAIRED_SINGLE_FLOAT" -{ - if (mips_expand_vec_perm_const (operands)) - DONE; - else - FAIL; -}) - ;; Expanders for builtins. The instruction: ;; ;; P[UL][UL].PS <result>, <a>, <b> diff --git a/gcc/config/mips/mips.c b/gcc/config/mips/mips.c index 8f2f6e09824..966e7ce0891 100644 --- a/gcc/config/mips/mips.c +++ b/gcc/config/mips/mips.c @@ -21379,34 +21379,32 @@ mips_expand_vec_perm_const_1 (struct expand_vec_perm_d *d) return false; } -/* Expand a vec_perm_const pattern. */ +/* Implement TARGET_VECTORIZE_VEC_PERM_CONST. */ -bool -mips_expand_vec_perm_const (rtx operands[4]) +static bool +mips_vectorize_vec_perm_const (machine_mode vmode, rtx target, rtx op0, + rtx op1, const vec_perm_indices &sel) { struct expand_vec_perm_d d; int i, nelt, which; unsigned char orig_perm[MAX_VECT_LEN]; - rtx sel; bool ok; - d.target = operands[0]; - d.op0 = operands[1]; - d.op1 = operands[2]; - sel = operands[3]; + d.target = target; + d.op0 = op0; + d.op1 = op1; - d.vmode = GET_MODE (d.target); - gcc_assert (VECTOR_MODE_P (d.vmode)); - d.nelt = nelt = GET_MODE_NUNITS (d.vmode); - d.testing_p = false; + d.vmode = vmode; + gcc_assert (VECTOR_MODE_P (vmode)); + d.nelt = nelt = GET_MODE_NUNITS (vmode); + d.testing_p = !target; /* This is overly conservative, but ensures we don't get an uninitialized warning on ORIG_PERM. */ memset (orig_perm, 0, MAX_VECT_LEN); for (i = which = 0; i < nelt; ++i) { - rtx e = XVECEXP (sel, 0, i); - int ei = INTVAL (e) & (2 * nelt - 1); + int ei = sel[i] & (2 * nelt - 1); which |= (ei < nelt ? 1 : 2); orig_perm[i] = ei; } @@ -21419,7 +21417,7 @@ mips_expand_vec_perm_const (rtx operands[4]) case 3: d.one_vector_p = false; - if (!rtx_equal_p (d.op0, d.op1)) + if (d.testing_p || !rtx_equal_p (d.op0, d.op1)) break; /* FALLTHRU */ @@ -21436,6 +21434,19 @@ mips_expand_vec_perm_const (rtx operands[4]) break; } + if (d.testing_p) + { + d.target = gen_raw_REG (d.vmode, LAST_VIRTUAL_REGISTER + 1); + d.op1 = d.op0 = gen_raw_REG (d.vmode, LAST_VIRTUAL_REGISTER + 2); + if (!d.one_vector_p) + d.op1 = gen_raw_REG (d.vmode, LAST_VIRTUAL_REGISTER + 3); + + start_sequence (); + ok = mips_expand_vec_perm_const_1 (&d); + end_sequence (); + return ok; + } + ok = mips_expand_vec_perm_const_1 (&d); /* If we were given a two-vector permutation which just happened to @@ -21447,8 +21458,8 @@ mips_expand_vec_perm_const (rtx operands[4]) the original permutation. */ if (!ok && which == 3) { - d.op0 = operands[1]; - d.op1 = operands[2]; + d.op0 = op0; + d.op1 = op1; d.one_vector_p = false; memcpy (d.perm, orig_perm, MAX_VECT_LEN); ok = mips_expand_vec_perm_const_1 (&d); @@ -21468,48 +21479,6 @@ mips_sched_reassociation_width (unsigned int opc ATTRIBUTE_UNUSED, return 1; } -/* Implement TARGET_VECTORIZE_VEC_PERM_CONST_OK. */ - -static bool -mips_vectorize_vec_perm_const_ok (machine_mode vmode, vec_perm_indices sel) -{ - struct expand_vec_perm_d d; - unsigned int i, nelt, which; - bool ret; - - d.vmode = vmode; - d.nelt = nelt = GET_MODE_NUNITS (d.vmode); - d.testing_p = true; - - /* Categorize the set of elements in the selector. */ - for (i = which = 0; i < nelt; ++i) - { - unsigned char e = sel[i]; - d.perm[i] = e; - gcc_assert (e < 2 * nelt); - which |= (e < nelt ? 1 : 2); - } - - /* For all elements from second vector, fold the elements to first. */ - if (which == 2) - for (i = 0; i < nelt; ++i) - d.perm[i] -= nelt; - - /* Check whether the mask can be applied to the vector type. */ - d.one_vector_p = (which != 3); - - d.target = gen_raw_REG (d.vmode, LAST_VIRTUAL_REGISTER + 1); - d.op1 = d.op0 = gen_raw_REG (d.vmode, LAST_VIRTUAL_REGISTER + 2); - if (!d.one_vector_p) - d.op1 = gen_raw_REG (d.vmode, LAST_VIRTUAL_REGISTER + 3); - - start_sequence (); - ret = mips_expand_vec_perm_const_1 (&d); - end_sequence (); - - return ret; -} - /* Expand an integral vector unpack operation. */ void @@ -22591,8 +22560,8 @@ mips_starting_frame_offset (void) #undef TARGET_PREPARE_PCH_SAVE #define TARGET_PREPARE_PCH_SAVE mips_prepare_pch_save -#undef TARGET_VECTORIZE_VEC_PERM_CONST_OK -#define TARGET_VECTORIZE_VEC_PERM_CONST_OK mips_vectorize_vec_perm_const_ok +#undef TARGET_VECTORIZE_VEC_PERM_CONST +#define TARGET_VECTORIZE_VEC_PERM_CONST mips_vectorize_vec_perm_const #undef TARGET_SCHED_REASSOCIATION_WIDTH #define TARGET_SCHED_REASSOCIATION_WIDTH mips_sched_reassociation_width diff --git a/gcc/config/powerpcspe/altivec.md b/gcc/config/powerpcspe/altivec.md index 81373f581d1..2f85e369c3e 100644 --- a/gcc/config/powerpcspe/altivec.md +++ b/gcc/config/powerpcspe/altivec.md @@ -2080,19 +2080,6 @@ } }) -(define_expand "vec_perm_constv16qi" - [(match_operand:V16QI 0 "register_operand" "") - (match_operand:V16QI 1 "register_operand" "") - (match_operand:V16QI 2 "register_operand" "") - (match_operand:V16QI 3 "" "")] - "TARGET_ALTIVEC" -{ - if (altivec_expand_vec_perm_const (operands)) - DONE; - else - FAIL; -}) - (define_insn "*altivec_vpermr_<mode>_internal" [(set (match_operand:VM 0 "register_operand" "=v,?wo") (unspec:VM [(match_operand:VM 1 "register_operand" "v,wo") diff --git a/gcc/config/powerpcspe/paired.md b/gcc/config/powerpcspe/paired.md index e12f07fc9b8..e950e465861 100644 --- a/gcc/config/powerpcspe/paired.md +++ b/gcc/config/powerpcspe/paired.md @@ -313,19 +313,6 @@ "ps_merge11 %0, %1, %2" [(set_attr "type" "fp")]) -(define_expand "vec_perm_constv2sf" - [(match_operand:V2SF 0 "gpc_reg_operand" "") - (match_operand:V2SF 1 "gpc_reg_operand" "") - (match_operand:V2SF 2 "gpc_reg_operand" "") - (match_operand:V2SI 3 "" "")] - "TARGET_PAIRED_FLOAT" -{ - if (rs6000_expand_vec_perm_const (operands)) - DONE; - else - FAIL; -}) - (define_insn "paired_sum0" [(set (match_operand:V2SF 0 "gpc_reg_operand" "=f") (vec_concat:V2SF (plus:SF (vec_select:SF diff --git a/gcc/config/powerpcspe/powerpcspe-protos.h b/gcc/config/powerpcspe/powerpcspe-protos.h index 78baeecad38..b9baae8a680 100644 --- a/gcc/config/powerpcspe/powerpcspe-protos.h +++ b/gcc/config/powerpcspe/powerpcspe-protos.h @@ -64,9 +64,7 @@ extern void rs6000_expand_vector_extract (rtx, rtx, rtx); extern void rs6000_split_vec_extract_var (rtx, rtx, rtx, rtx, rtx); extern rtx rs6000_adjust_vec_address (rtx, rtx, rtx, rtx, machine_mode); extern void rs6000_split_v4si_init (rtx []); -extern bool altivec_expand_vec_perm_const (rtx op[4]); extern void altivec_expand_vec_perm_le (rtx op[4]); -extern bool rs6000_expand_vec_perm_const (rtx op[4]); extern void altivec_expand_lvx_be (rtx, rtx, machine_mode, unsigned); extern void altivec_expand_stvx_be (rtx, rtx, machine_mode, unsigned); extern void altivec_expand_stvex_be (rtx, rtx, machine_mode, unsigned); diff --git a/gcc/config/powerpcspe/powerpcspe.c b/gcc/config/powerpcspe/powerpcspe.c index bf90cc5cd7d..9133125a3ea 100644 --- a/gcc/config/powerpcspe/powerpcspe.c +++ b/gcc/config/powerpcspe/powerpcspe.c @@ -1938,8 +1938,8 @@ static const struct attribute_spec rs6000_attribute_table[] = #undef TARGET_LEGITIMATE_CONSTANT_P #define TARGET_LEGITIMATE_CONSTANT_P rs6000_legitimate_constant_p -#undef TARGET_VECTORIZE_VEC_PERM_CONST_OK -#define TARGET_VECTORIZE_VEC_PERM_CONST_OK rs6000_vectorize_vec_perm_const_ok +#undef TARGET_VECTORIZE_VEC_PERM_CONST +#define TARGET_VECTORIZE_VEC_PERM_CONST rs6000_vectorize_vec_perm_const #undef TARGET_CAN_USE_DOLOOP_P #define TARGET_CAN_USE_DOLOOP_P can_use_doloop_if_innermost @@ -38313,6 +38313,9 @@ rs6000_emit_parity (rtx dst, rtx src) } /* Expand an Altivec constant permutation for little endian mode. + OP0 and OP1 are the input vectors and TARGET is the output vector. + SEL specifies the constant permutation vector. + There are two issues: First, the two input operands must be swapped so that together they form a double-wide array in LE order. Second, the vperm instruction has surprising behavior @@ -38354,22 +38357,18 @@ rs6000_emit_parity (rtx dst, rtx src) vr9 = 00000006 00000004 00000002 00000000. */ -void -altivec_expand_vec_perm_const_le (rtx operands[4]) +static void +altivec_expand_vec_perm_const_le (rtx target, rtx op0, rtx op1, + const vec_perm_indices &sel) { unsigned int i; rtx perm[16]; rtx constv, unspec; - rtx target = operands[0]; - rtx op0 = operands[1]; - rtx op1 = operands[2]; - rtx sel = operands[3]; /* Unpack and adjust the constant selector. */ for (i = 0; i < 16; ++i) { - rtx e = XVECEXP (sel, 0, i); - unsigned int elt = 31 - (INTVAL (e) & 31); + unsigned int elt = 31 - (sel[i] & 31); perm[i] = GEN_INT (elt); } @@ -38451,10 +38450,14 @@ altivec_expand_vec_perm_le (rtx operands[4]) } /* Expand an Altivec constant permutation. Return true if we match - an efficient implementation; false to fall back to VPERM. */ + an efficient implementation; false to fall back to VPERM. -bool -altivec_expand_vec_perm_const (rtx operands[4]) + OP0 and OP1 are the input vectors and TARGET is the output vector. + SEL specifies the constant permutation vector. */ + +static bool +altivec_expand_vec_perm_const (rtx target, rtx op0, rtx op1, + const vec_perm_indices &sel) { struct altivec_perm_insn { HOST_WIDE_INT mask; @@ -38498,19 +38501,13 @@ altivec_expand_vec_perm_const (rtx operands[4]) unsigned int i, j, elt, which; unsigned char perm[16]; - rtx target, op0, op1, sel, x; + rtx x; bool one_vec; - target = operands[0]; - op0 = operands[1]; - op1 = operands[2]; - sel = operands[3]; - /* Unpack the constant selector. */ for (i = which = 0; i < 16; ++i) { - rtx e = XVECEXP (sel, 0, i); - elt = INTVAL (e) & 31; + elt = sel[i] & 31; which |= (elt < 16 ? 1 : 2); perm[i] = elt; } @@ -38666,7 +38663,7 @@ altivec_expand_vec_perm_const (rtx operands[4]) if (!BYTES_BIG_ENDIAN) { - altivec_expand_vec_perm_const_le (operands); + altivec_expand_vec_perm_const_le (target, op0, op1, sel); return true; } @@ -38726,60 +38723,54 @@ rs6000_expand_vec_perm_const_1 (rtx target, rtx op0, rtx op1, return true; } -bool -rs6000_expand_vec_perm_const (rtx operands[4]) -{ - rtx target, op0, op1, sel; - unsigned char perm0, perm1; - - target = operands[0]; - op0 = operands[1]; - op1 = operands[2]; - sel = operands[3]; - - /* Unpack the constant selector. */ - perm0 = INTVAL (XVECEXP (sel, 0, 0)) & 3; - perm1 = INTVAL (XVECEXP (sel, 0, 1)) & 3; - - return rs6000_expand_vec_perm_const_1 (target, op0, op1, perm0, perm1); -} - -/* Test whether a constant permutation is supported. */ +/* Implement TARGET_VECTORIZE_VEC_PERM_CONST. */ static bool -rs6000_vectorize_vec_perm_const_ok (machine_mode vmode, vec_perm_indices sel) +rs6000_vectorize_vec_perm_const (machine_mode vmode, rtx target, rtx op0, + rtx op1, const vec_perm_indices &sel) { + bool testing_p = !target; + /* AltiVec (and thus VSX) can handle arbitrary permutations. */ - if (TARGET_ALTIVEC) + if (TARGET_ALTIVEC && testing_p) return true; - /* Check for ps_merge* or evmerge* insns. */ - if ((TARGET_PAIRED_FLOAT && vmode == V2SFmode) - || (TARGET_SPE && vmode == V2SImode)) + /* Check for ps_merge*, evmerge* or xxperm* insns. */ + if ((vmode == V2SFmode && TARGET_PAIRED_FLOAT) + || (vmode == V2SImode && TARGET_SPE) + || ((vmode == V2DFmode || vmode == V2DImode) + && VECTOR_MEM_VSX_P (vmode))) + { + if (testing_p) + { + op0 = gen_raw_REG (vmode, LAST_VIRTUAL_REGISTER + 1); + op1 = gen_raw_REG (vmode, LAST_VIRTUAL_REGISTER + 2); + } + if (rs6000_expand_vec_perm_const_1 (target, op0, op1, sel[0], sel[1])) + return true; + } + + if (TARGET_ALTIVEC) { - rtx op0 = gen_raw_REG (vmode, LAST_VIRTUAL_REGISTER + 1); - rtx op1 = gen_raw_REG (vmode, LAST_VIRTUAL_REGISTER + 2); - return rs6000_expand_vec_perm_const_1 (NULL, op0, op1, sel[0], sel[1]); + /* Force the target-independent code to lower to V16QImode. */ + if (vmode != V16QImode) + return false; + if (altivec_expand_vec_perm_const (target, op0, op1, sel)) + return true; } return false; } -/* A subroutine for rs6000_expand_extract_even & rs6000_expand_interleave. */ +/* A subroutine for rs6000_expand_extract_even & rs6000_expand_interleave. + OP0 and OP1 are the input vectors and TARGET is the output vector. + PERM specifies the constant permutation vector. */ static void rs6000_do_expand_vec_perm (rtx target, rtx op0, rtx op1, - machine_mode vmode, unsigned nelt, rtx perm[]) + machine_mode vmode, const vec_perm_builder &perm) { - machine_mode imode; - rtx x; - - imode = vmode; - if (GET_MODE_CLASS (vmode) != MODE_VECTOR_INT) - imode = mode_for_int_vector (vmode).require (); - - x = gen_rtx_CONST_VECTOR (imode, gen_rtvec_v (nelt, perm)); - x = expand_vec_perm (vmode, op0, op1, x, target); + rtx x = expand_vec_perm_const (vmode, op0, op1, perm, BLKmode, target); if (x != target) emit_move_insn (target, x); } @@ -38791,12 +38782,12 @@ rs6000_expand_extract_even (rtx target, rtx op0, rtx op1) { machine_mode vmode = GET_MODE (target); unsigned i, nelt = GET_MODE_NUNITS (vmode); - rtx perm[16]; + vec_perm_builder perm (nelt); for (i = 0; i < nelt; i++) - perm[i] = GEN_INT (i * 2); + perm.quick_push (i * 2); - rs6000_do_expand_vec_perm (target, op0, op1, vmode, nelt, perm); + rs6000_do_expand_vec_perm (target, op0, op1, vmode, perm); } /* Expand a vector interleave operation. */ @@ -38806,16 +38797,16 @@ rs6000_expand_interleave (rtx target, rtx op0, rtx op1, bool highp) { machine_mode vmode = GET_MODE (target); unsigned i, high, nelt = GET_MODE_NUNITS (vmode); - rtx perm[16]; + vec_perm_builder perm (nelt); high = (highp ? 0 : nelt / 2); for (i = 0; i < nelt / 2; i++) { - perm[i * 2] = GEN_INT (i + high); - perm[i * 2 + 1] = GEN_INT (i + nelt + high); + perm.quick_push (i + high); + perm.quick_push (i + nelt + high); } - rs6000_do_expand_vec_perm (target, op0, op1, vmode, nelt, perm); + rs6000_do_expand_vec_perm (target, op0, op1, vmode, perm); } /* Scale a V2DF vector SRC by two to the SCALE and place in TGT. */ diff --git a/gcc/config/powerpcspe/spe.md b/gcc/config/powerpcspe/spe.md index 2351152dc24..56acfdd86d0 100644 --- a/gcc/config/powerpcspe/spe.md +++ b/gcc/config/powerpcspe/spe.md @@ -511,19 +511,6 @@ [(set_attr "type" "vecsimple") (set_attr "length" "4")]) -(define_expand "vec_perm_constv2si" - [(match_operand:V2SI 0 "gpc_reg_operand" "") - (match_operand:V2SI 1 "gpc_reg_operand" "") - (match_operand:V2SI 2 "gpc_reg_operand" "") - (match_operand:V2SI 3 "" "")] - "TARGET_SPE" -{ - if (rs6000_expand_vec_perm_const (operands)) - DONE; - else - FAIL; -}) - (define_expand "spe_evmergehi" [(match_operand:V2SI 0 "register_operand" "") (match_operand:V2SI 1 "register_operand" "") diff --git a/gcc/config/powerpcspe/vsx.md b/gcc/config/powerpcspe/vsx.md index b669764ce8f..794ff446b8a 100644 --- a/gcc/config/powerpcspe/vsx.md +++ b/gcc/config/powerpcspe/vsx.md @@ -2543,19 +2543,6 @@ } [(set_attr "type" "vecperm")]) -(define_expand "vec_perm_const<mode>" - [(match_operand:VSX_D 0 "vsx_register_operand" "") - (match_operand:VSX_D 1 "vsx_register_operand" "") - (match_operand:VSX_D 2 "vsx_register_operand" "") - (match_operand:V2DI 3 "" "")] - "VECTOR_MEM_VSX_P (<MODE>mode)" -{ - if (rs6000_expand_vec_perm_const (operands)) - DONE; - else - FAIL; -}) - ;; Extraction of a single element in a small integer vector. Until ISA 3.0, ;; none of the small types were allowed in a vector register, so we had to ;; extract to a DImode and either do a direct move or store. diff --git a/gcc/config/rs6000/altivec.md b/gcc/config/rs6000/altivec.md index 7122f99bffd..5d10c829103 100644 --- a/gcc/config/rs6000/altivec.md +++ b/gcc/config/rs6000/altivec.md @@ -2198,19 +2198,6 @@ } }) -(define_expand "vec_perm_constv16qi" - [(match_operand:V16QI 0 "register_operand" "") - (match_operand:V16QI 1 "register_operand" "") - (match_operand:V16QI 2 "register_operand" "") - (match_operand:V16QI 3 "" "")] - "TARGET_ALTIVEC" -{ - if (altivec_expand_vec_perm_const (operands)) - DONE; - else - FAIL; -}) - (define_insn "*altivec_vpermr_<mode>_internal" [(set (match_operand:VM 0 "register_operand" "=v,?wo") (unspec:VM [(match_operand:VM 1 "register_operand" "v,wo") diff --git a/gcc/config/rs6000/paired.md b/gcc/config/rs6000/paired.md index b0aa329d7b8..584a791d431 100644 --- a/gcc/config/rs6000/paired.md +++ b/gcc/config/rs6000/paired.md @@ -313,19 +313,6 @@ "ps_merge11 %0, %1, %2" [(set_attr "type" "fp")]) -(define_expand "vec_perm_constv2sf" - [(match_operand:V2SF 0 "gpc_reg_operand" "") - (match_operand:V2SF 1 "gpc_reg_operand" "") - (match_operand:V2SF 2 "gpc_reg_operand" "") - (match_operand:V2SI 3 "" "")] - "TARGET_PAIRED_FLOAT" -{ - if (rs6000_expand_vec_perm_const (operands)) - DONE; - else - FAIL; -}) - (define_insn "paired_sum0" [(set (match_operand:V2SF 0 "gpc_reg_operand" "=f") (vec_concat:V2SF (plus:SF (vec_select:SF diff --git a/gcc/config/rs6000/rs6000-protos.h b/gcc/config/rs6000/rs6000-protos.h index 9264aa2fd26..90107ea8821 100644 --- a/gcc/config/rs6000/rs6000-protos.h +++ b/gcc/config/rs6000/rs6000-protos.h @@ -63,9 +63,7 @@ extern void rs6000_expand_vector_extract (rtx, rtx, rtx); extern void rs6000_split_vec_extract_var (rtx, rtx, rtx, rtx, rtx); extern rtx rs6000_adjust_vec_address (rtx, rtx, rtx, rtx, machine_mode); extern void rs6000_split_v4si_init (rtx []); -extern bool altivec_expand_vec_perm_const (rtx op[4]); extern void altivec_expand_vec_perm_le (rtx op[4]); -extern bool rs6000_expand_vec_perm_const (rtx op[4]); extern void altivec_expand_lvx_be (rtx, rtx, machine_mode, unsigned); extern void altivec_expand_stvx_be (rtx, rtx, machine_mode, unsigned); extern void altivec_expand_stvex_be (rtx, rtx, machine_mode, unsigned); diff --git a/gcc/config/rs6000/rs6000.c b/gcc/config/rs6000/rs6000.c index 42704d34d98..7d10b44fff9 100644 --- a/gcc/config/rs6000/rs6000.c +++ b/gcc/config/rs6000/rs6000.c @@ -1910,8 +1910,8 @@ static const struct attribute_spec rs6000_attribute_table[] = #undef TARGET_LEGITIMATE_CONSTANT_P #define TARGET_LEGITIMATE_CONSTANT_P rs6000_legitimate_constant_p -#undef TARGET_VECTORIZE_VEC_PERM_CONST_OK -#define TARGET_VECTORIZE_VEC_PERM_CONST_OK rs6000_vectorize_vec_perm_const_ok +#undef TARGET_VECTORIZE_VEC_PERM_CONST +#define TARGET_VECTORIZE_VEC_PERM_CONST rs6000_vectorize_vec_perm_const #undef TARGET_CAN_USE_DOLOOP_P #define TARGET_CAN_USE_DOLOOP_P can_use_doloop_if_innermost @@ -35570,6 +35570,9 @@ rs6000_emit_parity (rtx dst, rtx src) } /* Expand an Altivec constant permutation for little endian mode. + OP0 and OP1 are the input vectors and TARGET is the output vector. + SEL specifies the constant permutation vector. + There are two issues: First, the two input operands must be swapped so that together they form a double-wide array in LE order. Second, the vperm instruction has surprising behavior @@ -35611,22 +35614,18 @@ rs6000_emit_parity (rtx dst, rtx src) vr9 = 00000006 00000004 00000002 00000000. */ -void -altivec_expand_vec_perm_const_le (rtx operands[4]) +static void +altivec_expand_vec_perm_const_le (rtx target, rtx op0, rtx op1, + const vec_perm_indices &sel) { unsigned int i; rtx perm[16]; rtx constv, unspec; - rtx target = operands[0]; - rtx op0 = operands[1]; - rtx op1 = operands[2]; - rtx sel = operands[3]; /* Unpack and adjust the constant selector. */ for (i = 0; i < 16; ++i) { - rtx e = XVECEXP (sel, 0, i); - unsigned int elt = 31 - (INTVAL (e) & 31); + unsigned int elt = 31 - (sel[i] & 31); perm[i] = GEN_INT (elt); } @@ -35708,10 +35707,14 @@ altivec_expand_vec_perm_le (rtx operands[4]) } /* Expand an Altivec constant permutation. Return true if we match - an efficient implementation; false to fall back to VPERM. */ + an efficient implementation; false to fall back to VPERM. -bool -altivec_expand_vec_perm_const (rtx operands[4]) + OP0 and OP1 are the input vectors and TARGET is the output vector. + SEL specifies the constant permutation vector. */ + +static bool +altivec_expand_vec_perm_const (rtx target, rtx op0, rtx op1, + const vec_perm_indices &sel) { struct altivec_perm_insn { HOST_WIDE_INT mask; @@ -35759,19 +35762,13 @@ altivec_expand_vec_perm_const (rtx operands[4]) unsigned int i, j, elt, which; unsigned char perm[16]; - rtx target, op0, op1, sel, x; + rtx x; bool one_vec; - target = operands[0]; - op0 = operands[1]; - op1 = operands[2]; - sel = operands[3]; - /* Unpack the constant selector. */ for (i = which = 0; i < 16; ++i) { - rtx e = XVECEXP (sel, 0, i); - elt = INTVAL (e) & 31; + elt = sel[i] & 31; which |= (elt < 16 ? 1 : 2); perm[i] = elt; } @@ -35927,7 +35924,7 @@ altivec_expand_vec_perm_const (rtx operands[4]) if (!BYTES_BIG_ENDIAN) { - altivec_expand_vec_perm_const_le (operands); + altivec_expand_vec_perm_const_le (target, op0, op1, sel); return true; } @@ -35987,59 +35984,53 @@ rs6000_expand_vec_perm_const_1 (rtx target, rtx op0, rtx op1, return true; } -bool -rs6000_expand_vec_perm_const (rtx operands[4]) -{ - rtx target, op0, op1, sel; - unsigned char perm0, perm1; - - target = operands[0]; - op0 = operands[1]; - op1 = operands[2]; - sel = operands[3]; - - /* Unpack the constant selector. */ - perm0 = INTVAL (XVECEXP (sel, 0, 0)) & 3; - perm1 = INTVAL (XVECEXP (sel, 0, 1)) & 3; - - return rs6000_expand_vec_perm_const_1 (target, op0, op1, perm0, perm1); -} - -/* Test whether a constant permutation is supported. */ +/* Implement TARGET_VECTORIZE_VEC_PERM_CONST. */ static bool -rs6000_vectorize_vec_perm_const_ok (machine_mode vmode, vec_perm_indices sel) +rs6000_vectorize_vec_perm_const (machine_mode vmode, rtx target, rtx op0, + rtx op1, const vec_perm_indices &sel) { + bool testing_p = !target; + /* AltiVec (and thus VSX) can handle arbitrary permutations. */ - if (TARGET_ALTIVEC) + if (TARGET_ALTIVEC && testing_p) return true; - /* Check for ps_merge* or evmerge* insns. */ - if (TARGET_PAIRED_FLOAT && vmode == V2SFmode) + /* Check for ps_merge* or xxpermdi insns. */ + if ((vmode == V2SFmode && TARGET_PAIRED_FLOAT) + || ((vmode == V2DFmode || vmode == V2DImode) + && VECTOR_MEM_VSX_P (vmode))) + { + if (testing_p) + { + op0 = gen_raw_REG (vmode, LAST_VIRTUAL_REGISTER + 1); + op1 = gen_raw_REG (vmode, LAST_VIRTUAL_REGISTER + 2); + } + if (rs6000_expand_vec_perm_const_1 (target, op0, op1, sel[0], sel[1])) + return true; + } + + if (TARGET_ALTIVEC) { - rtx op0 = gen_raw_REG (vmode, LAST_VIRTUAL_REGISTER + 1); - rtx op1 = gen_raw_REG (vmode, LAST_VIRTUAL_REGISTER + 2); - return rs6000_expand_vec_perm_const_1 (NULL, op0, op1, sel[0], sel[1]); + /* Force the target-independent code to lower to V16QImode. */ + if (vmode != V16QImode) + return false; + if (altivec_expand_vec_perm_const (target, op0, op1, sel)) + return true; } return false; } -/* A subroutine for rs6000_expand_extract_even & rs6000_expand_interleave. */ +/* A subroutine for rs6000_expand_extract_even & rs6000_expand_interleave. + OP0 and OP1 are the input vectors and TARGET is the output vector. + PERM specifies the constant permutation vector. */ static void rs6000_do_expand_vec_perm (rtx target, rtx op0, rtx op1, - machine_mode vmode, unsigned nelt, rtx perm[]) + machine_mode vmode, const vec_perm_builder &perm) { - machine_mode imode; - rtx x; - - imode = vmode; - if (GET_MODE_CLASS (vmode) != MODE_VECTOR_INT) - imode = mode_for_int_vector (vmode).require (); - - x = gen_rtx_CONST_VECTOR (imode, gen_rtvec_v (nelt, perm)); - x = expand_vec_perm (vmode, op0, op1, x, target); + rtx x = expand_vec_perm_const (vmode, op0, op1, perm, BLKmode, target); if (x != target) emit_move_insn (target, x); } @@ -36051,12 +36042,12 @@ rs6000_expand_extract_even (rtx target, rtx op0, rtx op1) { machine_mode vmode = GET_MODE (target); unsigned i, nelt = GET_MODE_NUNITS (vmode); - rtx perm[16]; + vec_perm_builder perm (nelt); for (i = 0; i < nelt; i++) - perm[i] = GEN_INT (i * 2); + perm.quick_push (i * 2); - rs6000_do_expand_vec_perm (target, op0, op1, vmode, nelt, perm); + rs6000_do_expand_vec_perm (target, op0, op1, vmode, perm); } /* Expand a vector interleave operation. */ @@ -36066,16 +36057,16 @@ rs6000_expand_interleave (rtx target, rtx op0, rtx op1, bool highp) { machine_mode vmode = GET_MODE (target); unsigned i, high, nelt = GET_MODE_NUNITS (vmode); - rtx perm[16]; + vec_perm_builder perm (nelt); high = (highp ? 0 : nelt / 2); for (i = 0; i < nelt / 2; i++) { - perm[i * 2] = GEN_INT (i + high); - perm[i * 2 + 1] = GEN_INT (i + nelt + high); + perm.quick_push (i + high); + perm.quick_push (i + nelt + high); } - rs6000_do_expand_vec_perm (target, op0, op1, vmode, nelt, perm); + rs6000_do_expand_vec_perm (target, op0, op1, vmode, perm); } /* Scale a V2DF vector SRC by two to the SCALE and place in TGT. */ diff --git a/gcc/config/rs6000/vsx.md b/gcc/config/rs6000/vsx.md index f6f2bd48363..7c6bb17a33b 100644 --- a/gcc/config/rs6000/vsx.md +++ b/gcc/config/rs6000/vsx.md @@ -3189,19 +3189,6 @@ } [(set_attr "type" "vecperm")]) -(define_expand "vec_perm_const<mode>" - [(match_operand:VSX_D 0 "vsx_register_operand" "") - (match_operand:VSX_D 1 "vsx_register_operand" "") - (match_operand:VSX_D 2 "vsx_register_operand" "") - (match_operand:V2DI 3 "" "")] - "VECTOR_MEM_VSX_P (<MODE>mode)" -{ - if (rs6000_expand_vec_perm_const (operands)) - DONE; - else - FAIL; -}) - ;; Extraction of a single element in a small integer vector. Until ISA 3.0, ;; none of the small types were allowed in a vector register, so we had to ;; extract to a DImode and either do a direct move or store. diff --git a/gcc/config/sparc/sparc.c b/gcc/config/sparc/sparc.c index 288264f0e8c..62bc492229e 100644 --- a/gcc/config/sparc/sparc.c +++ b/gcc/config/sparc/sparc.c @@ -688,6 +688,8 @@ static bool sparc_modes_tieable_p (machine_mode, machine_mode); static bool sparc_can_change_mode_class (machine_mode, machine_mode, reg_class_t); static HOST_WIDE_INT sparc_constant_alignment (const_tree, HOST_WIDE_INT); +static bool sparc_vectorize_vec_perm_const (machine_mode, rtx, rtx, rtx, + const vec_perm_indices &); #ifdef SUBTARGET_ATTRIBUTE_TABLE /* Table of valid machine attributes. */ @@ -932,6 +934,9 @@ char sparc_hard_reg_printed[8]; #undef TARGET_CONSTANT_ALIGNMENT #define TARGET_CONSTANT_ALIGNMENT sparc_constant_alignment +#undef TARGET_VECTORIZE_VEC_PERM_CONST +#define TARGET_VECTORIZE_VEC_PERM_CONST sparc_vectorize_vec_perm_const + struct gcc_target targetm = TARGET_INITIALIZER; /* Return the memory reference contained in X if any, zero otherwise. */ @@ -12813,6 +12818,32 @@ sparc_expand_vec_perm_bmask (machine_mode vmode, rtx sel) emit_insn (gen_bmasksi_vis (gen_reg_rtx (SImode), sel, t_1)); } +/* Implement TARGET_VEC_PERM_CONST. */ + +static bool +sparc_vectorize_vec_perm_const (machine_mode vmode, rtx target, rtx op0, + rtx op1, const vec_perm_indices &sel) +{ + /* All permutes are supported. */ + if (!target) + return true; + + /* Force target-independent code to convert constant permutations on other + modes down to V8QI. Rely on this to avoid the complexity of the byte + order of the permutation. */ + if (vmode != V8QImode) + return false; + + unsigned int i, mask; + for (i = mask = 0; i < 8; ++i) + mask |= (sel[i] & 0xf) << (28 - i*4); + rtx mask_rtx = force_reg (SImode, gen_int_mode (mask, SImode)); + + emit_insn (gen_bmasksi_vis (gen_reg_rtx (SImode), mask_rtx, const0_rtx)); + emit_insn (gen_bshufflev8qi_vis (target, op0, op1)); + return true; +} + /* Implement TARGET_FRAME_POINTER_REQUIRED. */ static bool diff --git a/gcc/config/sparc/sparc.md b/gcc/config/sparc/sparc.md index d1af68034dd..f7dff435642 100644 --- a/gcc/config/sparc/sparc.md +++ b/gcc/config/sparc/sparc.md @@ -9327,28 +9327,6 @@ visl") (set_attr "subtype" "other") (set_attr "fptype" "double")]) -;; The rtl expanders will happily convert constant permutations on other -;; modes down to V8QI. Rely on this to avoid the complexity of the byte -;; order of the permutation. -(define_expand "vec_perm_constv8qi" - [(match_operand:V8QI 0 "register_operand" "") - (match_operand:V8QI 1 "register_operand" "") - (match_operand:V8QI 2 "register_operand" "") - (match_operand:V8QI 3 "" "")] - "TARGET_VIS2" -{ - unsigned int i, mask; - rtx sel = operands[3]; - - for (i = mask = 0; i < 8; ++i) - mask |= (INTVAL (XVECEXP (sel, 0, i)) & 0xf) << (28 - i*4); - sel = force_reg (SImode, gen_int_mode (mask, SImode)); - - emit_insn (gen_bmasksi_vis (gen_reg_rtx (SImode), sel, const0_rtx)); - emit_insn (gen_bshufflev8qi_vis (operands[0], operands[1], operands[2])); - DONE; -}) - ;; Unlike constant permutation, we can vastly simplify the compression of ;; the 64-bit selector input to the 32-bit %gsr value by knowing what the ;; width of the input is. |