diff options
-rw-r--r-- | gcc/config/aarch64/aarch64-sve.md | 14 | ||||
-rw-r--r-- | gcc/config/aarch64/aarch64.c | 10 | ||||
-rw-r--r-- | gcc/config/aarch64/iterators.md | 1 | ||||
-rw-r--r-- | gcc/testsuite/gcc.target/aarch64/sve/pr100048.c | 25 |
4 files changed, 45 insertions, 5 deletions
diff --git a/gcc/config/aarch64/aarch64-sve.md b/gcc/config/aarch64/aarch64-sve.md index 7db2938bb84..b8b6f55e160 100644 --- a/gcc/config/aarch64/aarch64-sve.md +++ b/gcc/config/aarch64/aarch64-sve.md @@ -8657,6 +8657,20 @@ "<perm_insn>\t%0.<Vetype>, %1.<Vetype>, %2.<Vetype>" ) +;; Special purpose permute used by the predicate generation instructions. +;; Unlike the normal permute patterns, these instructions operate on VNx16BI +;; regardless of the element size, so that all input and output bits are +;; well-defined. Operand 3 then indicates the size of the permute. +(define_insn "@aarch64_sve_trn1_conv<mode>" + [(set (match_operand:VNx16BI 0 "register_operand" "=Upa") + (unspec:VNx16BI [(match_operand:VNx16BI 1 "register_operand" "Upa") + (match_operand:VNx16BI 2 "register_operand" "Upa") + (match_operand:PRED_ALL 3 "aarch64_simd_imm_zero")] + UNSPEC_TRN1_CONV))] + "TARGET_SVE" + "trn1\t%0.<PRED_ALL:Vetype>, %1.<PRED_ALL:Vetype>, %2.<PRED_ALL:Vetype>" +) + ;; ========================================================================= ;; == Conversions ;; ========================================================================= diff --git a/gcc/config/aarch64/aarch64.c b/gcc/config/aarch64/aarch64.c index 04b55d9070b..09d79f67a61 100644 --- a/gcc/config/aarch64/aarch64.c +++ b/gcc/config/aarch64/aarch64.c @@ -5535,12 +5535,12 @@ aarch64_expand_sve_const_pred_trn (rtx target, rtx_vector_builder &builder, } } - /* Emit the TRN1 itself. */ + /* Emit the TRN1 itself. We emit a TRN that operates on VNx16BI + operands but permutes them as though they had mode MODE. */ machine_mode mode = aarch64_sve_pred_mode (permute_size).require (); - target = aarch64_target_reg (target, mode); - emit_insn (gen_aarch64_sve (UNSPEC_TRN1, mode, target, - gen_lowpart (mode, a), - gen_lowpart (mode, b))); + target = aarch64_target_reg (target, GET_MODE (a)); + rtx type_reg = CONST0_RTX (mode); + emit_insn (gen_aarch64_sve_trn1_conv (mode, target, a, b, type_reg)); return target; } diff --git a/gcc/config/aarch64/iterators.md b/gcc/config/aarch64/iterators.md index 5f5abd60525..cac33ae812b 100644 --- a/gcc/config/aarch64/iterators.md +++ b/gcc/config/aarch64/iterators.md @@ -649,6 +649,7 @@ UNSPEC_UZP2Q ; Used in aarch64-sve.md. UNSPEC_ZIP1Q ; Used in aarch64-sve.md. UNSPEC_ZIP2Q ; Used in aarch64-sve.md. + UNSPEC_TRN1_CONV ; Used in aarch64-sve.md. UNSPEC_COND_CMPEQ_WIDE ; Used in aarch64-sve.md. UNSPEC_COND_CMPGE_WIDE ; Used in aarch64-sve.md. UNSPEC_COND_CMPGT_WIDE ; Used in aarch64-sve.md. diff --git a/gcc/testsuite/gcc.target/aarch64/sve/pr100048.c b/gcc/testsuite/gcc.target/aarch64/sve/pr100048.c new file mode 100644 index 00000000000..525933863f7 --- /dev/null +++ b/gcc/testsuite/gcc.target/aarch64/sve/pr100048.c @@ -0,0 +1,25 @@ +/* { dg-additional-options "-O2 -fno-schedule-insns" } */ +/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ + +#include "arm_sve.h" + +/* +** foo: +** ptrue (p[0-7])\.d, all +** pfalse (p[0-7])\.b +** ptrue (p[0-7])\.s, all +** trn1 (p[0-7])\.d, \2\.d, \3\.d +** trn1 \2\.d, \1\.d, \3\.d +** faddv (h[0-31]), \4\, (z[0-31]).h +** faddv (h[0-31]), \2\, \6\.h +** str \5, [x0] +** str \7, [x0, 2] +** ret +*/ +void foo(svfloat16_t in, float16_t *dst) { + const svbool_t pg_q0 = svdupq_n_b16(1, 0, 1, 0, 0, 0, 0, 0); + const svbool_t pg_f0 = svdupq_n_b16(1, 0, 0, 0, 0, 0, 0, 0); + dst[0] = svaddv_f16(pg_f0, in); + dst[1] = svaddv_f16(pg_q0, in); +} + |