diff options
author | Matthew Gretton-Dann <matthew.gretton-dann@linaro.org> | 2013-05-02 13:44:34 +0000 |
---|---|---|
committer | Matthew Gretton-Dann <matthew.gretton-dann@linaro.org> | 2013-05-02 13:44:34 +0000 |
commit | b234a322d3353163978ff3e05d06ac163296f357 (patch) | |
tree | f92803deb3100e283c4a016266dc2cd3663407af /gcc/config | |
parent | 12c433c86e6ddccfc9521d19096e403be0eb3905 (diff) |
Backport from trunk r198090,198136-198137,198142,198176.
git-svn-id: https://gcc.gnu.org/svn/gcc/branches/linaro/gcc-4_8-branch@198537 138bc75d-0d04-0410-961f-82ee72b054a4
Diffstat (limited to 'gcc/config')
-rw-r--r-- | gcc/config/aarch64/aarch64-builtins.c | 326 | ||||
-rw-r--r-- | gcc/config/aarch64/aarch64-simd-builtins.def | 397 | ||||
-rw-r--r-- | gcc/config/aarch64/aarch64-simd.md | 105 | ||||
-rw-r--r-- | gcc/config/aarch64/aarch64.md | 27 | ||||
-rw-r--r-- | gcc/config/aarch64/arm_neon.h | 282 | ||||
-rw-r--r-- | gcc/config/aarch64/iterators.md | 4 |
6 files changed, 557 insertions, 584 deletions
diff --git a/gcc/config/aarch64/aarch64-builtins.c b/gcc/config/aarch64/aarch64-builtins.c index 1ea55a83eb8..35475ba12bb 100644 --- a/gcc/config/aarch64/aarch64-builtins.c +++ b/gcc/config/aarch64/aarch64-builtins.c @@ -50,6 +50,7 @@ enum aarch64_simd_builtin_type_mode T_OI, T_XI, T_SI, + T_SF, T_HI, T_QI, T_MAX @@ -72,6 +73,7 @@ enum aarch64_simd_builtin_type_mode #define oi_UP T_OI #define xi_UP T_XI #define si_UP T_SI +#define sf_UP T_SF #define hi_UP T_HI #define qi_UP T_QI @@ -128,123 +130,133 @@ typedef struct unsigned int fcode; } aarch64_simd_builtin_datum; -#define CF(N, X) CODE_FOR_aarch64_##N##X - -#define VAR1(T, N, A) \ - {#N, AARCH64_SIMD_##T, UP (A), CF (N, A), 0}, -#define VAR2(T, N, A, B) \ - VAR1 (T, N, A) \ - VAR1 (T, N, B) -#define VAR3(T, N, A, B, C) \ - VAR2 (T, N, A, B) \ - VAR1 (T, N, C) -#define VAR4(T, N, A, B, C, D) \ - VAR3 (T, N, A, B, C) \ - VAR1 (T, N, D) -#define VAR5(T, N, A, B, C, D, E) \ - VAR4 (T, N, A, B, C, D) \ - VAR1 (T, N, E) -#define VAR6(T, N, A, B, C, D, E, F) \ - VAR5 (T, N, A, B, C, D, E) \ - VAR1 (T, N, F) -#define VAR7(T, N, A, B, C, D, E, F, G) \ - VAR6 (T, N, A, B, C, D, E, F) \ - VAR1 (T, N, G) -#define VAR8(T, N, A, B, C, D, E, F, G, H) \ - VAR7 (T, N, A, B, C, D, E, F, G) \ - VAR1 (T, N, H) -#define VAR9(T, N, A, B, C, D, E, F, G, H, I) \ - VAR8 (T, N, A, B, C, D, E, F, G, H) \ - VAR1 (T, N, I) -#define VAR10(T, N, A, B, C, D, E, F, G, H, I, J) \ - VAR9 (T, N, A, B, C, D, E, F, G, H, I) \ - VAR1 (T, N, J) -#define VAR11(T, N, A, B, C, D, E, F, G, H, I, J, K) \ - VAR10 (T, N, A, B, C, D, E, F, G, H, I, J) \ - VAR1 (T, N, K) -#define VAR12(T, N, A, B, C, D, E, F, G, H, I, J, K, L) \ - VAR11 (T, N, A, B, C, D, E, F, G, H, I, J, K) \ - VAR1 (T, N, L) +#define CF0(N, X) CODE_FOR_aarch64_##N##X +#define CF1(N, X) CODE_FOR_##N##X##1 +#define CF2(N, X) CODE_FOR_##N##X##2 +#define CF3(N, X) CODE_FOR_##N##X##3 +#define CF4(N, X) CODE_FOR_##N##X##4 +#define CF10(N, X) CODE_FOR_##N##X + +#define VAR1(T, N, MAP, A) \ + {#N, AARCH64_SIMD_##T, UP (A), CF##MAP (N, A), 0}, +#define VAR2(T, N, MAP, A, B) \ + VAR1 (T, N, MAP, A) \ + VAR1 (T, N, MAP, B) +#define VAR3(T, N, MAP, A, B, C) \ + VAR2 (T, N, MAP, A, B) \ + VAR1 (T, N, MAP, C) +#define VAR4(T, N, MAP, A, B, C, D) \ + VAR3 (T, N, MAP, A, B, C) \ + VAR1 (T, N, MAP, D) +#define VAR5(T, N, MAP, A, B, C, D, E) \ + VAR4 (T, N, MAP, A, B, C, D) \ + VAR1 (T, N, MAP, E) +#define VAR6(T, N, MAP, A, B, C, D, E, F) \ + VAR5 (T, N, MAP, A, B, C, D, E) \ + VAR1 (T, N, MAP, F) +#define VAR7(T, N, MAP, A, B, C, D, E, F, G) \ + VAR6 (T, N, MAP, A, B, C, D, E, F) \ + VAR1 (T, N, MAP, G) +#define VAR8(T, N, MAP, A, B, C, D, E, F, G, H) \ + VAR7 (T, N, MAP, A, B, C, D, E, F, G) \ + VAR1 (T, N, MAP, H) +#define VAR9(T, N, MAP, A, B, C, D, E, F, G, H, I) \ + VAR8 (T, N, MAP, A, B, C, D, E, F, G, H) \ + VAR1 (T, N, MAP, I) +#define VAR10(T, N, MAP, A, B, C, D, E, F, G, H, I, J) \ + VAR9 (T, N, MAP, A, B, C, D, E, F, G, H, I) \ + VAR1 (T, N, MAP, J) +#define VAR11(T, N, MAP, A, B, C, D, E, F, G, H, I, J, K) \ + VAR10 (T, N, MAP, A, B, C, D, E, F, G, H, I, J) \ + VAR1 (T, N, MAP, K) +#define VAR12(T, N, MAP, A, B, C, D, E, F, G, H, I, J, K, L) \ + VAR11 (T, N, MAP, A, B, C, D, E, F, G, H, I, J, K) \ + VAR1 (T, N, MAP, L) /* BUILTIN_<ITERATOR> macros should expand to cover the same range of modes as is given for each define_mode_iterator in config/aarch64/iterators.md. */ -#define BUILTIN_DX(T, N) \ - VAR2 (T, N, di, df) -#define BUILTIN_SDQ_I(T, N) \ - VAR4 (T, N, qi, hi, si, di) -#define BUILTIN_SD_HSI(T, N) \ - VAR2 (T, N, hi, si) -#define BUILTIN_V2F(T, N) \ - VAR2 (T, N, v2sf, v2df) -#define BUILTIN_VALL(T, N) \ - VAR10 (T, N, v8qi, v16qi, v4hi, v8hi, v2si, v4si, v2di, v2sf, v4sf, v2df) -#define BUILTIN_VB(T, N) \ - VAR2 (T, N, v8qi, v16qi) -#define BUILTIN_VD(T, N) \ - VAR4 (T, N, v8qi, v4hi, v2si, v2sf) -#define BUILTIN_VDC(T, N) \ - VAR6 (T, N, v8qi, v4hi, v2si, v2sf, di, df) -#define BUILTIN_VDIC(T, N) \ - VAR3 (T, N, v8qi, v4hi, v2si) -#define BUILTIN_VDN(T, N) \ - VAR3 (T, N, v4hi, v2si, di) -#define BUILTIN_VDQ(T, N) \ - VAR7 (T, N, v8qi, v16qi, v4hi, v8hi, v2si, v4si, v2di) -#define BUILTIN_VDQF(T, N) \ - VAR3 (T, N, v2sf, v4sf, v2df) -#define BUILTIN_VDQHS(T, N) \ - VAR4 (T, N, v4hi, v8hi, v2si, v4si) -#define BUILTIN_VDQIF(T, N) \ - VAR9 (T, N, v8qi, v16qi, v4hi, v8hi, v2si, v4si, v2sf, v4sf, v2df) -#define BUILTIN_VDQM(T, N) \ - VAR6 (T, N, v8qi, v16qi, v4hi, v8hi, v2si, v4si) -#define BUILTIN_VDQV(T, N) \ - VAR5 (T, N, v8qi, v16qi, v4hi, v8hi, v4si) -#define BUILTIN_VDQ_BHSI(T, N) \ - VAR6 (T, N, v8qi, v16qi, v4hi, v8hi, v2si, v4si) -#define BUILTIN_VDQ_I(T, N) \ - VAR7 (T, N, v8qi, v16qi, v4hi, v8hi, v2si, v4si, v2di) -#define BUILTIN_VDW(T, N) \ - VAR3 (T, N, v8qi, v4hi, v2si) -#define BUILTIN_VD_BHSI(T, N) \ - VAR3 (T, N, v8qi, v4hi, v2si) -#define BUILTIN_VD_HSI(T, N) \ - VAR2 (T, N, v4hi, v2si) -#define BUILTIN_VD_RE(T, N) \ - VAR6 (T, N, v8qi, v4hi, v2si, v2sf, di, df) -#define BUILTIN_VQ(T, N) \ - VAR6 (T, N, v16qi, v8hi, v4si, v2di, v4sf, v2df) -#define BUILTIN_VQN(T, N) \ - VAR3 (T, N, v8hi, v4si, v2di) -#define BUILTIN_VQW(T, N) \ - VAR3 (T, N, v16qi, v8hi, v4si) -#define BUILTIN_VQ_HSI(T, N) \ - VAR2 (T, N, v8hi, v4si) -#define BUILTIN_VQ_S(T, N) \ - VAR6 (T, N, v8qi, v16qi, v4hi, v8hi, v2si, v4si) -#define BUILTIN_VSDQ_HSI(T, N) \ - VAR6 (T, N, v4hi, v8hi, v2si, v4si, hi, si) -#define BUILTIN_VSDQ_I(T, N) \ - VAR11 (T, N, v8qi, v16qi, v4hi, v8hi, v2si, v4si, v2di, qi, hi, si, di) -#define BUILTIN_VSDQ_I_BHSI(T, N) \ - VAR10 (T, N, v8qi, v16qi, v4hi, v8hi, v2si, v4si, v2di, qi, hi, si) -#define BUILTIN_VSDQ_I_DI(T, N) \ - VAR8 (T, N, v8qi, v16qi, v4hi, v8hi, v2si, v4si, v2di, di) -#define BUILTIN_VSD_HSI(T, N) \ - VAR4 (T, N, v4hi, v2si, hi, si) -#define BUILTIN_VSQN_HSDI(T, N) \ - VAR6 (T, N, v8hi, v4si, v2di, hi, si, di) -#define BUILTIN_VSTRUCT(T, N) \ - VAR3 (T, N, oi, ci, xi) +#define BUILTIN_DX(T, N, MAP) \ + VAR2 (T, N, MAP, di, df) +#define BUILTIN_GPF(T, N, MAP) \ + VAR2 (T, N, MAP, sf, df) +#define BUILTIN_SDQ_I(T, N, MAP) \ + VAR4 (T, N, MAP, qi, hi, si, di) +#define BUILTIN_SD_HSI(T, N, MAP) \ + VAR2 (T, N, MAP, hi, si) +#define BUILTIN_V2F(T, N, MAP) \ + VAR2 (T, N, MAP, v2sf, v2df) +#define BUILTIN_VALL(T, N, MAP) \ + VAR10 (T, N, MAP, v8qi, v16qi, v4hi, v8hi, v2si, \ + v4si, v2di, v2sf, v4sf, v2df) +#define BUILTIN_VB(T, N, MAP) \ + VAR2 (T, N, MAP, v8qi, v16qi) +#define BUILTIN_VD(T, N, MAP) \ + VAR4 (T, N, MAP, v8qi, v4hi, v2si, v2sf) +#define BUILTIN_VDC(T, N, MAP) \ + VAR6 (T, N, MAP, v8qi, v4hi, v2si, v2sf, di, df) +#define BUILTIN_VDIC(T, N, MAP) \ + VAR3 (T, N, MAP, v8qi, v4hi, v2si) +#define BUILTIN_VDN(T, N, MAP) \ + VAR3 (T, N, MAP, v4hi, v2si, di) +#define BUILTIN_VDQ(T, N, MAP) \ + VAR7 (T, N, MAP, v8qi, v16qi, v4hi, v8hi, v2si, v4si, v2di) +#define BUILTIN_VDQF(T, N, MAP) \ + VAR3 (T, N, MAP, v2sf, v4sf, v2df) +#define BUILTIN_VDQH(T, N, MAP) \ + VAR2 (T, N, MAP, v4hi, v8hi) +#define BUILTIN_VDQHS(T, N, MAP) \ + VAR4 (T, N, MAP, v4hi, v8hi, v2si, v4si) +#define BUILTIN_VDQIF(T, N, MAP) \ + VAR9 (T, N, MAP, v8qi, v16qi, v4hi, v8hi, v2si, v4si, v2sf, v4sf, v2df) +#define BUILTIN_VDQM(T, N, MAP) \ + VAR6 (T, N, MAP, v8qi, v16qi, v4hi, v8hi, v2si, v4si) +#define BUILTIN_VDQV(T, N, MAP) \ + VAR5 (T, N, MAP, v8qi, v16qi, v4hi, v8hi, v4si) +#define BUILTIN_VDQ_BHSI(T, N, MAP) \ + VAR6 (T, N, MAP, v8qi, v16qi, v4hi, v8hi, v2si, v4si) +#define BUILTIN_VDQ_I(T, N, MAP) \ + VAR7 (T, N, MAP, v8qi, v16qi, v4hi, v8hi, v2si, v4si, v2di) +#define BUILTIN_VDW(T, N, MAP) \ + VAR3 (T, N, MAP, v8qi, v4hi, v2si) +#define BUILTIN_VD_BHSI(T, N, MAP) \ + VAR3 (T, N, MAP, v8qi, v4hi, v2si) +#define BUILTIN_VD_HSI(T, N, MAP) \ + VAR2 (T, N, MAP, v4hi, v2si) +#define BUILTIN_VD_RE(T, N, MAP) \ + VAR6 (T, N, MAP, v8qi, v4hi, v2si, v2sf, di, df) +#define BUILTIN_VQ(T, N, MAP) \ + VAR6 (T, N, MAP, v16qi, v8hi, v4si, v2di, v4sf, v2df) +#define BUILTIN_VQN(T, N, MAP) \ + VAR3 (T, N, MAP, v8hi, v4si, v2di) +#define BUILTIN_VQW(T, N, MAP) \ + VAR3 (T, N, MAP, v16qi, v8hi, v4si) +#define BUILTIN_VQ_HSI(T, N, MAP) \ + VAR2 (T, N, MAP, v8hi, v4si) +#define BUILTIN_VQ_S(T, N, MAP) \ + VAR6 (T, N, MAP, v8qi, v16qi, v4hi, v8hi, v2si, v4si) +#define BUILTIN_VSDQ_HSI(T, N, MAP) \ + VAR6 (T, N, MAP, v4hi, v8hi, v2si, v4si, hi, si) +#define BUILTIN_VSDQ_I(T, N, MAP) \ + VAR11 (T, N, MAP, v8qi, v16qi, v4hi, v8hi, v2si, v4si, v2di, qi, hi, si, di) +#define BUILTIN_VSDQ_I_BHSI(T, N, MAP) \ + VAR10 (T, N, MAP, v8qi, v16qi, v4hi, v8hi, v2si, v4si, v2di, qi, hi, si) +#define BUILTIN_VSDQ_I_DI(T, N, MAP) \ + VAR8 (T, N, MAP, v8qi, v16qi, v4hi, v8hi, v2si, v4si, v2di, di) +#define BUILTIN_VSD_HSI(T, N, MAP) \ + VAR4 (T, N, MAP, v4hi, v2si, hi, si) +#define BUILTIN_VSQN_HSDI(T, N, MAP) \ + VAR6 (T, N, MAP, v8hi, v4si, v2di, hi, si, di) +#define BUILTIN_VSTRUCT(T, N, MAP) \ + VAR3 (T, N, MAP, oi, ci, xi) static aarch64_simd_builtin_datum aarch64_simd_builtin_data[] = { #include "aarch64-simd-builtins.def" }; #undef VAR1 -#define VAR1(T, N, A) \ +#define VAR1(T, N, MAP, A) \ AARCH64_SIMD_BUILTIN_##N##A, enum aarch64_builtins @@ -257,53 +269,6 @@ enum aarch64_builtins AARCH64_BUILTIN_MAX }; -#undef BUILTIN_DX -#undef BUILTIN_SDQ_I -#undef BUILTIN_SD_HSI -#undef BUILTIN_V2F -#undef BUILTIN_VALL -#undef BUILTIN_VB -#undef BUILTIN_VD -#undef BUILTIN_VDC -#undef BUILTIN_VDIC -#undef BUILTIN_VDN -#undef BUILTIN_VDQ -#undef BUILTIN_VDQF -#undef BUILTIN_VDQHS -#undef BUILTIN_VDQIF -#undef BUILTIN_VDQM -#undef BUILTIN_VDQV -#undef BUILTIN_VDQ_BHSI -#undef BUILTIN_VDQ_I -#undef BUILTIN_VDW -#undef BUILTIN_VD_BHSI -#undef BUILTIN_VD_HSI -#undef BUILTIN_VD_RE -#undef BUILTIN_VQ -#undef BUILTIN_VQN -#undef BUILTIN_VQW -#undef BUILTIN_VQ_HSI -#undef BUILTIN_VQ_S -#undef BUILTIN_VSDQ_HSI -#undef BUILTIN_VSDQ_I -#undef BUILTIN_VSDQ_I_BHSI -#undef BUILTIN_VSDQ_I_DI -#undef BUILTIN_VSD_HSI -#undef BUILTIN_VSQN_HSDI -#undef BUILTIN_VSTRUCT -#undef CF -#undef VAR1 -#undef VAR2 -#undef VAR3 -#undef VAR4 -#undef VAR5 -#undef VAR6 -#undef VAR7 -#undef VAR8 -#undef VAR9 -#undef VAR10 -#undef VAR11 - static GTY(()) tree aarch64_builtin_decls[AARCH64_BUILTIN_MAX]; #define NUM_DREG_TYPES 6 @@ -609,7 +574,7 @@ aarch64_init_simd_builtins (void) { "v8qi", "v4hi", "v2si", "v2sf", "di", "df", "v16qi", "v8hi", "v4si", "v4sf", "v2di", "v2df", - "ti", "ei", "oi", "xi", "si", "hi", "qi" + "ti", "ei", "oi", "xi", "si", "sf", "hi", "qi" }; char namebuf[60]; tree ftype = NULL; @@ -1291,3 +1256,56 @@ aarch64_builtin_vectorized_function (tree fndecl, tree type_out, tree type_in) } #undef AARCH64_CHECK_BUILTIN_MODE #undef AARCH64_FIND_FRINT_VARIANT +#undef BUILTIN_DX +#undef BUILTIN_SDQ_I +#undef BUILTIN_SD_HSI +#undef BUILTIN_V2F +#undef BUILTIN_VALL +#undef BUILTIN_VB +#undef BUILTIN_VD +#undef BUILTIN_VDC +#undef BUILTIN_VDIC +#undef BUILTIN_VDN +#undef BUILTIN_VDQ +#undef BUILTIN_VDQF +#undef BUILTIN_VDQH +#undef BUILTIN_VDQHS +#undef BUILTIN_VDQIF +#undef BUILTIN_VDQM +#undef BUILTIN_VDQV +#undef BUILTIN_VDQ_BHSI +#undef BUILTIN_VDQ_I +#undef BUILTIN_VDW +#undef BUILTIN_VD_BHSI +#undef BUILTIN_VD_HSI +#undef BUILTIN_VD_RE +#undef BUILTIN_VQ +#undef BUILTIN_VQN +#undef BUILTIN_VQW +#undef BUILTIN_VQ_HSI +#undef BUILTIN_VQ_S +#undef BUILTIN_VSDQ_HSI +#undef BUILTIN_VSDQ_I +#undef BUILTIN_VSDQ_I_BHSI +#undef BUILTIN_VSDQ_I_DI +#undef BUILTIN_VSD_HSI +#undef BUILTIN_VSQN_HSDI +#undef BUILTIN_VSTRUCT +#undef CF0 +#undef CF1 +#undef CF2 +#undef CF3 +#undef CF4 +#undef CF10 +#undef VAR1 +#undef VAR2 +#undef VAR3 +#undef VAR4 +#undef VAR5 +#undef VAR6 +#undef VAR7 +#undef VAR8 +#undef VAR9 +#undef VAR10 +#undef VAR11 + diff --git a/gcc/config/aarch64/aarch64-simd-builtins.def b/gcc/config/aarch64/aarch64-simd-builtins.def index a6a5e12c7a5..43b5b931d77 100644 --- a/gcc/config/aarch64/aarch64-simd-builtins.def +++ b/gcc/config/aarch64/aarch64-simd-builtins.def @@ -18,241 +18,264 @@ along with GCC; see the file COPYING3. If not see <http://www.gnu.org/licenses/>. */ -/* In the list below, the BUILTIN_<ITERATOR> macros should - correspond to the iterator used to construct the instruction's - patterns in aarch64-simd.md. A helpful idiom to follow when - adding new builtins is to add a line for each pattern in the md - file. Thus, ADDP, which has one pattern defined for the VD_BHSI - iterator, and one for DImode, has two entries below. */ +/* In the list below, the BUILTIN_<ITERATOR> macros expand to create + builtins for each of the modes described by <ITERATOR>. When adding + new builtins to this list, a helpful idiom to follow is to add + a line for each pattern in the md file. Thus, ADDP, which has one + pattern defined for the VD_BHSI iterator, and one for DImode, has two + entries below. - BUILTIN_VD_RE (CREATE, create) - BUILTIN_VQ_S (GETLANE, get_lane_signed) - BUILTIN_VDQ (GETLANE, get_lane_unsigned) - BUILTIN_VDQF (GETLANE, get_lane) - VAR1 (GETLANE, get_lane, di) - BUILTIN_VDC (COMBINE, combine) - BUILTIN_VB (BINOP, pmul) - BUILTIN_VDQF (UNOP, sqrt) - BUILTIN_VD_BHSI (BINOP, addp) - VAR1 (UNOP, addp, di) + Parameter 1 is the 'type' of the intrinsic. This is used to + describe the type modifiers (for example; unsigned) applied to + each of the parameters to the intrinsic function. - BUILTIN_VD_RE (REINTERP, reinterpretdi) - BUILTIN_VDC (REINTERP, reinterpretv8qi) - BUILTIN_VDC (REINTERP, reinterpretv4hi) - BUILTIN_VDC (REINTERP, reinterpretv2si) - BUILTIN_VDC (REINTERP, reinterpretv2sf) - BUILTIN_VQ (REINTERP, reinterpretv16qi) - BUILTIN_VQ (REINTERP, reinterpretv8hi) - BUILTIN_VQ (REINTERP, reinterpretv4si) - BUILTIN_VQ (REINTERP, reinterpretv4sf) - BUILTIN_VQ (REINTERP, reinterpretv2di) - BUILTIN_VQ (REINTERP, reinterpretv2df) + Parameter 2 is the name of the intrinsic. This is appended + to `__builtin_aarch64_<name><mode>` to give the intrinsic name + as exported to the front-ends. - BUILTIN_VDQ_I (BINOP, dup_lane) - BUILTIN_SDQ_I (BINOP, dup_lane) + Parameter 3 describes how to map from the name to the CODE_FOR_ + macro holding the RTL pattern for the intrinsic. This mapping is: + 0 - CODE_FOR_aarch64_<name><mode> + 1-9 - CODE_FOR_<name><mode><1-9> + 10 - CODE_FOR_<name><mode>. */ + + BUILTIN_VD_RE (CREATE, create, 0) + BUILTIN_VQ_S (GETLANE, get_lane_signed, 0) + BUILTIN_VDQ (GETLANE, get_lane_unsigned, 0) + BUILTIN_VDQF (GETLANE, get_lane, 0) + VAR1 (GETLANE, get_lane, 0, di) + BUILTIN_VDC (COMBINE, combine, 0) + BUILTIN_VB (BINOP, pmul, 0) + BUILTIN_VDQF (UNOP, sqrt, 2) + BUILTIN_VD_BHSI (BINOP, addp, 0) + VAR1 (UNOP, addp, 0, di) + + BUILTIN_VD_RE (REINTERP, reinterpretdi, 0) + BUILTIN_VDC (REINTERP, reinterpretv8qi, 0) + BUILTIN_VDC (REINTERP, reinterpretv4hi, 0) + BUILTIN_VDC (REINTERP, reinterpretv2si, 0) + BUILTIN_VDC (REINTERP, reinterpretv2sf, 0) + BUILTIN_VQ (REINTERP, reinterpretv16qi, 0) + BUILTIN_VQ (REINTERP, reinterpretv8hi, 0) + BUILTIN_VQ (REINTERP, reinterpretv4si, 0) + BUILTIN_VQ (REINTERP, reinterpretv4sf, 0) + BUILTIN_VQ (REINTERP, reinterpretv2di, 0) + BUILTIN_VQ (REINTERP, reinterpretv2df, 0) + + BUILTIN_VDQ_I (BINOP, dup_lane, 0) + BUILTIN_SDQ_I (BINOP, dup_lane, 0) /* Implemented by aarch64_<sur>q<r>shl<mode>. */ - BUILTIN_VSDQ_I (BINOP, sqshl) - BUILTIN_VSDQ_I (BINOP, uqshl) - BUILTIN_VSDQ_I (BINOP, sqrshl) - BUILTIN_VSDQ_I (BINOP, uqrshl) + BUILTIN_VSDQ_I (BINOP, sqshl, 0) + BUILTIN_VSDQ_I (BINOP, uqshl, 0) + BUILTIN_VSDQ_I (BINOP, sqrshl, 0) + BUILTIN_VSDQ_I (BINOP, uqrshl, 0) /* Implemented by aarch64_<su_optab><optab><mode>. */ - BUILTIN_VSDQ_I (BINOP, sqadd) - BUILTIN_VSDQ_I (BINOP, uqadd) - BUILTIN_VSDQ_I (BINOP, sqsub) - BUILTIN_VSDQ_I (BINOP, uqsub) + BUILTIN_VSDQ_I (BINOP, sqadd, 0) + BUILTIN_VSDQ_I (BINOP, uqadd, 0) + BUILTIN_VSDQ_I (BINOP, sqsub, 0) + BUILTIN_VSDQ_I (BINOP, uqsub, 0) /* Implemented by aarch64_<sur>qadd<mode>. */ - BUILTIN_VSDQ_I (BINOP, suqadd) - BUILTIN_VSDQ_I (BINOP, usqadd) + BUILTIN_VSDQ_I (BINOP, suqadd, 0) + BUILTIN_VSDQ_I (BINOP, usqadd, 0) /* Implemented by aarch64_get_dreg<VSTRUCT:mode><VDC:mode>. */ - BUILTIN_VDC (GETLANE, get_dregoi) - BUILTIN_VDC (GETLANE, get_dregci) - BUILTIN_VDC (GETLANE, get_dregxi) + BUILTIN_VDC (GETLANE, get_dregoi, 0) + BUILTIN_VDC (GETLANE, get_dregci, 0) + BUILTIN_VDC (GETLANE, get_dregxi, 0) /* Implemented by aarch64_get_qreg<VSTRUCT:mode><VQ:mode>. */ - BUILTIN_VQ (GETLANE, get_qregoi) - BUILTIN_VQ (GETLANE, get_qregci) - BUILTIN_VQ (GETLANE, get_qregxi) + BUILTIN_VQ (GETLANE, get_qregoi, 0) + BUILTIN_VQ (GETLANE, get_qregci, 0) + BUILTIN_VQ (GETLANE, get_qregxi, 0) /* Implemented by aarch64_set_qreg<VSTRUCT:mode><VQ:mode>. */ - BUILTIN_VQ (SETLANE, set_qregoi) - BUILTIN_VQ (SETLANE, set_qregci) - BUILTIN_VQ (SETLANE, set_qregxi) + BUILTIN_VQ (SETLANE, set_qregoi, 0) + BUILTIN_VQ (SETLANE, set_qregci, 0) + BUILTIN_VQ (SETLANE, set_qregxi, 0) /* Implemented by aarch64_ld<VSTRUCT:nregs><VDC:mode>. */ - BUILTIN_VDC (LOADSTRUCT, ld2) - BUILTIN_VDC (LOADSTRUCT, ld3) - BUILTIN_VDC (LOADSTRUCT, ld4) + BUILTIN_VDC (LOADSTRUCT, ld2, 0) + BUILTIN_VDC (LOADSTRUCT, ld3, 0) + BUILTIN_VDC (LOADSTRUCT, ld4, 0) /* Implemented by aarch64_ld<VSTRUCT:nregs><VQ:mode>. */ - BUILTIN_VQ (LOADSTRUCT, ld2) - BUILTIN_VQ (LOADSTRUCT, ld3) - BUILTIN_VQ (LOADSTRUCT, ld4) + BUILTIN_VQ (LOADSTRUCT, ld2, 0) + BUILTIN_VQ (LOADSTRUCT, ld3, 0) + BUILTIN_VQ (LOADSTRUCT, ld4, 0) /* Implemented by aarch64_st<VSTRUCT:nregs><VDC:mode>. */ - BUILTIN_VDC (STORESTRUCT, st2) - BUILTIN_VDC (STORESTRUCT, st3) - BUILTIN_VDC (STORESTRUCT, st4) + BUILTIN_VDC (STORESTRUCT, st2, 0) + BUILTIN_VDC (STORESTRUCT, st3, 0) + BUILTIN_VDC (STORESTRUCT, st4, 0) /* Implemented by aarch64_st<VSTRUCT:nregs><VQ:mode>. */ - BUILTIN_VQ (STORESTRUCT, st2) - BUILTIN_VQ (STORESTRUCT, st3) - BUILTIN_VQ (STORESTRUCT, st4) + BUILTIN_VQ (STORESTRUCT, st2, 0) + BUILTIN_VQ (STORESTRUCT, st3, 0) + BUILTIN_VQ (STORESTRUCT, st4, 0) - BUILTIN_VQW (BINOP, saddl2) - BUILTIN_VQW (BINOP, uaddl2) - BUILTIN_VQW (BINOP, ssubl2) - BUILTIN_VQW (BINOP, usubl2) - BUILTIN_VQW (BINOP, saddw2) - BUILTIN_VQW (BINOP, uaddw2) - BUILTIN_VQW (BINOP, ssubw2) - BUILTIN_VQW (BINOP, usubw2) + BUILTIN_VQW (BINOP, saddl2, 0) + BUILTIN_VQW (BINOP, uaddl2, 0) + BUILTIN_VQW (BINOP, ssubl2, 0) + BUILTIN_VQW (BINOP, usubl2, 0) + BUILTIN_VQW (BINOP, saddw2, 0) + BUILTIN_VQW (BINOP, uaddw2, 0) + BUILTIN_VQW (BINOP, ssubw2, 0) + BUILTIN_VQW (BINOP, usubw2, 0) /* Implemented by aarch64_<ANY_EXTEND:su><ADDSUB:optab>l<mode>. */ - BUILTIN_VDW (BINOP, saddl) - BUILTIN_VDW (BINOP, uaddl) - BUILTIN_VDW (BINOP, ssubl) - BUILTIN_VDW (BINOP, usubl) + BUILTIN_VDW (BINOP, saddl, 0) + BUILTIN_VDW (BINOP, uaddl, 0) + BUILTIN_VDW (BINOP, ssubl, 0) + BUILTIN_VDW (BINOP, usubl, 0) /* Implemented by aarch64_<ANY_EXTEND:su><ADDSUB:optab>w<mode>. */ - BUILTIN_VDW (BINOP, saddw) - BUILTIN_VDW (BINOP, uaddw) - BUILTIN_VDW (BINOP, ssubw) - BUILTIN_VDW (BINOP, usubw) + BUILTIN_VDW (BINOP, saddw, 0) + BUILTIN_VDW (BINOP, uaddw, 0) + BUILTIN_VDW (BINOP, ssubw, 0) + BUILTIN_VDW (BINOP, usubw, 0) /* Implemented by aarch64_<sur>h<addsub><mode>. */ - BUILTIN_VQ_S (BINOP, shadd) - BUILTIN_VQ_S (BINOP, uhadd) - BUILTIN_VQ_S (BINOP, srhadd) - BUILTIN_VQ_S (BINOP, urhadd) + BUILTIN_VQ_S (BINOP, shadd, 0) + BUILTIN_VQ_S (BINOP, uhadd, 0) + BUILTIN_VQ_S (BINOP, srhadd, 0) + BUILTIN_VQ_S (BINOP, urhadd, 0) /* Implemented by aarch64_<sur><addsub>hn<mode>. */ - BUILTIN_VQN (BINOP, addhn) - BUILTIN_VQN (BINOP, raddhn) + BUILTIN_VQN (BINOP, addhn, 0) + BUILTIN_VQN (BINOP, raddhn, 0) /* Implemented by aarch64_<sur><addsub>hn2<mode>. */ - BUILTIN_VQN (TERNOP, addhn2) - BUILTIN_VQN (TERNOP, raddhn2) + BUILTIN_VQN (TERNOP, addhn2, 0) + BUILTIN_VQN (TERNOP, raddhn2, 0) - BUILTIN_VSQN_HSDI (UNOP, sqmovun) + BUILTIN_VSQN_HSDI (UNOP, sqmovun, 0) /* Implemented by aarch64_<sur>qmovn<mode>. */ - BUILTIN_VSQN_HSDI (UNOP, sqmovn) - BUILTIN_VSQN_HSDI (UNOP, uqmovn) + BUILTIN_VSQN_HSDI (UNOP, sqmovn, 0) + BUILTIN_VSQN_HSDI (UNOP, uqmovn, 0) /* Implemented by aarch64_s<optab><mode>. */ - BUILTIN_VSDQ_I_BHSI (UNOP, sqabs) - BUILTIN_VSDQ_I_BHSI (UNOP, sqneg) + BUILTIN_VSDQ_I_BHSI (UNOP, sqabs, 0) + BUILTIN_VSDQ_I_BHSI (UNOP, sqneg, 0) - BUILTIN_VSD_HSI (QUADOP, sqdmlal_lane) - BUILTIN_VSD_HSI (QUADOP, sqdmlsl_lane) - BUILTIN_VSD_HSI (QUADOP, sqdmlal_laneq) - BUILTIN_VSD_HSI (QUADOP, sqdmlsl_laneq) - BUILTIN_VQ_HSI (TERNOP, sqdmlal2) - BUILTIN_VQ_HSI (TERNOP, sqdmlsl2) - BUILTIN_VQ_HSI (QUADOP, sqdmlal2_lane) - BUILTIN_VQ_HSI (QUADOP, sqdmlsl2_lane) - BUILTIN_VQ_HSI (QUADOP, sqdmlal2_laneq) - BUILTIN_VQ_HSI (QUADOP, sqdmlsl2_laneq) - BUILTIN_VQ_HSI (TERNOP, sqdmlal2_n) - BUILTIN_VQ_HSI (TERNOP, sqdmlsl2_n) + BUILTIN_VSD_HSI (QUADOP, sqdmlal_lane, 0) + BUILTIN_VSD_HSI (QUADOP, sqdmlsl_lane, 0) + BUILTIN_VSD_HSI (QUADOP, sqdmlal_laneq, 0) + BUILTIN_VSD_HSI (QUADOP, sqdmlsl_laneq, 0) + BUILTIN_VQ_HSI (TERNOP, sqdmlal2, 0) + BUILTIN_VQ_HSI (TERNOP, sqdmlsl2, 0) + BUILTIN_VQ_HSI (QUADOP, sqdmlal2_lane, 0) + BUILTIN_VQ_HSI (QUADOP, sqdmlsl2_lane, 0) + BUILTIN_VQ_HSI (QUADOP, sqdmlal2_laneq, 0) + BUILTIN_VQ_HSI (QUADOP, sqdmlsl2_laneq, 0) + BUILTIN_VQ_HSI (TERNOP, sqdmlal2_n, 0) + BUILTIN_VQ_HSI (TERNOP, sqdmlsl2_n, 0) /* Implemented by aarch64_sqdml<SBINQOPS:as>l<mode>. */ - BUILTIN_VSD_HSI (TERNOP, sqdmlal) - BUILTIN_VSD_HSI (TERNOP, sqdmlsl) + BUILTIN_VSD_HSI (TERNOP, sqdmlal, 0) + BUILTIN_VSD_HSI (TERNOP, sqdmlsl, 0) /* Implemented by aarch64_sqdml<SBINQOPS:as>l_n<mode>. */ - BUILTIN_VD_HSI (TERNOP, sqdmlal_n) - BUILTIN_VD_HSI (TERNOP, sqdmlsl_n) + BUILTIN_VD_HSI (TERNOP, sqdmlal_n, 0) + BUILTIN_VD_HSI (TERNOP, sqdmlsl_n, 0) - BUILTIN_VSD_HSI (BINOP, sqdmull) - BUILTIN_VSD_HSI (TERNOP, sqdmull_lane) - BUILTIN_VD_HSI (TERNOP, sqdmull_laneq) - BUILTIN_VD_HSI (BINOP, sqdmull_n) - BUILTIN_VQ_HSI (BINOP, sqdmull2) - BUILTIN_VQ_HSI (TERNOP, sqdmull2_lane) - BUILTIN_VQ_HSI (TERNOP, sqdmull2_laneq) - BUILTIN_VQ_HSI (BINOP, sqdmull2_n) + BUILTIN_VSD_HSI (BINOP, sqdmull, 0) + BUILTIN_VSD_HSI (TERNOP, sqdmull_lane, 0) + BUILTIN_VD_HSI (TERNOP, sqdmull_laneq, 0) + BUILTIN_VD_HSI (BINOP, sqdmull_n, 0) + BUILTIN_VQ_HSI (BINOP, sqdmull2, 0) + BUILTIN_VQ_HSI (TERNOP, sqdmull2_lane, 0) + BUILTIN_VQ_HSI (TERNOP, sqdmull2_laneq, 0) + BUILTIN_VQ_HSI (BINOP, sqdmull2_n, 0) /* Implemented by aarch64_sq<r>dmulh<mode>. */ - BUILTIN_VSDQ_HSI (BINOP, sqdmulh) - BUILTIN_VSDQ_HSI (BINOP, sqrdmulh) + BUILTIN_VSDQ_HSI (BINOP, sqdmulh, 0) + BUILTIN_VSDQ_HSI (BINOP, sqrdmulh, 0) /* Implemented by aarch64_sq<r>dmulh_lane<q><mode>. */ - BUILTIN_VDQHS (TERNOP, sqdmulh_lane) - BUILTIN_VDQHS (TERNOP, sqdmulh_laneq) - BUILTIN_VDQHS (TERNOP, sqrdmulh_lane) - BUILTIN_VDQHS (TERNOP, sqrdmulh_laneq) - BUILTIN_SD_HSI (TERNOP, sqdmulh_lane) - BUILTIN_SD_HSI (TERNOP, sqrdmulh_lane) + BUILTIN_VDQHS (TERNOP, sqdmulh_lane, 0) + BUILTIN_VDQHS (TERNOP, sqdmulh_laneq, 0) + BUILTIN_VDQHS (TERNOP, sqrdmulh_lane, 0) + BUILTIN_VDQHS (TERNOP, sqrdmulh_laneq, 0) + BUILTIN_SD_HSI (TERNOP, sqdmulh_lane, 0) + BUILTIN_SD_HSI (TERNOP, sqrdmulh_lane, 0) - BUILTIN_VSDQ_I_DI (BINOP, sshl_n) - BUILTIN_VSDQ_I_DI (BINOP, ushl_n) + BUILTIN_VSDQ_I_DI (BINOP, ashl, 3) /* Implemented by aarch64_<sur>shl<mode>. */ - BUILTIN_VSDQ_I_DI (BINOP, sshl) - BUILTIN_VSDQ_I_DI (BINOP, ushl) - BUILTIN_VSDQ_I_DI (BINOP, srshl) - BUILTIN_VSDQ_I_DI (BINOP, urshl) + BUILTIN_VSDQ_I_DI (BINOP, sshl, 0) + BUILTIN_VSDQ_I_DI (BINOP, ushl, 0) + BUILTIN_VSDQ_I_DI (BINOP, srshl, 0) + BUILTIN_VSDQ_I_DI (BINOP, urshl, 0) - BUILTIN_VSDQ_I_DI (SHIFTIMM, sshr_n) - BUILTIN_VSDQ_I_DI (SHIFTIMM, ushr_n) + BUILTIN_VSDQ_I_DI (SHIFTIMM, ashr, 3) + BUILTIN_VSDQ_I_DI (SHIFTIMM, lshr, 3) /* Implemented by aarch64_<sur>shr_n<mode>. */ - BUILTIN_VSDQ_I_DI (SHIFTIMM, srshr_n) - BUILTIN_VSDQ_I_DI (SHIFTIMM, urshr_n) + BUILTIN_VSDQ_I_DI (SHIFTIMM, srshr_n, 0) + BUILTIN_VSDQ_I_DI (SHIFTIMM, urshr_n, 0) /* Implemented by aarch64_<sur>sra_n<mode>. */ - BUILTIN_VSDQ_I_DI (SHIFTACC, ssra_n) - BUILTIN_VSDQ_I_DI (SHIFTACC, usra_n) - BUILTIN_VSDQ_I_DI (SHIFTACC, srsra_n) - BUILTIN_VSDQ_I_DI (SHIFTACC, ursra_n) + BUILTIN_VSDQ_I_DI (SHIFTACC, ssra_n, 0) + BUILTIN_VSDQ_I_DI (SHIFTACC, usra_n, 0) + BUILTIN_VSDQ_I_DI (SHIFTACC, srsra_n, 0) + BUILTIN_VSDQ_I_DI (SHIFTACC, ursra_n, 0) /* Implemented by aarch64_<sur>shll_n<mode>. */ - BUILTIN_VDW (SHIFTIMM, sshll_n) - BUILTIN_VDW (SHIFTIMM, ushll_n) + BUILTIN_VDW (SHIFTIMM, sshll_n, 0) + BUILTIN_VDW (SHIFTIMM, ushll_n, 0) /* Implemented by aarch64_<sur>shll2_n<mode>. */ - BUILTIN_VQW (SHIFTIMM, sshll2_n) - BUILTIN_VQW (SHIFTIMM, ushll2_n) + BUILTIN_VQW (SHIFTIMM, sshll2_n, 0) + BUILTIN_VQW (SHIFTIMM, ushll2_n, 0) /* Implemented by aarch64_<sur>q<r>shr<u>n_n<mode>. */ - BUILTIN_VSQN_HSDI (SHIFTIMM, sqshrun_n) - BUILTIN_VSQN_HSDI (SHIFTIMM, sqrshrun_n) - BUILTIN_VSQN_HSDI (SHIFTIMM, sqshrn_n) - BUILTIN_VSQN_HSDI (SHIFTIMM, uqshrn_n) - BUILTIN_VSQN_HSDI (SHIFTIMM, sqrshrn_n) - BUILTIN_VSQN_HSDI (SHIFTIMM, uqrshrn_n) + BUILTIN_VSQN_HSDI (SHIFTIMM, sqshrun_n, 0) + BUILTIN_VSQN_HSDI (SHIFTIMM, sqrshrun_n, 0) + BUILTIN_VSQN_HSDI (SHIFTIMM, sqshrn_n, 0) + BUILTIN_VSQN_HSDI (SHIFTIMM, uqshrn_n, 0) + BUILTIN_VSQN_HSDI (SHIFTIMM, sqrshrn_n, 0) + BUILTIN_VSQN_HSDI (SHIFTIMM, uqrshrn_n, 0) /* Implemented by aarch64_<sur>s<lr>i_n<mode>. */ - BUILTIN_VSDQ_I_DI (SHIFTINSERT, ssri_n) - BUILTIN_VSDQ_I_DI (SHIFTINSERT, usri_n) - BUILTIN_VSDQ_I_DI (SHIFTINSERT, ssli_n) - BUILTIN_VSDQ_I_DI (SHIFTINSERT, usli_n) + BUILTIN_VSDQ_I_DI (SHIFTINSERT, ssri_n, 0) + BUILTIN_VSDQ_I_DI (SHIFTINSERT, usri_n, 0) + BUILTIN_VSDQ_I_DI (SHIFTINSERT, ssli_n, 0) + BUILTIN_VSDQ_I_DI (SHIFTINSERT, usli_n, 0) /* Implemented by aarch64_<sur>qshl<u>_n<mode>. */ - BUILTIN_VSDQ_I (SHIFTIMM, sqshlu_n) - BUILTIN_VSDQ_I (SHIFTIMM, sqshl_n) - BUILTIN_VSDQ_I (SHIFTIMM, uqshl_n) + BUILTIN_VSDQ_I (SHIFTIMM, sqshlu_n, 0) + BUILTIN_VSDQ_I (SHIFTIMM, sqshl_n, 0) + BUILTIN_VSDQ_I (SHIFTIMM, uqshl_n, 0) /* Implemented by aarch64_cm<cmp><mode>. */ - BUILTIN_VSDQ_I_DI (BINOP, cmeq) - BUILTIN_VSDQ_I_DI (BINOP, cmge) - BUILTIN_VSDQ_I_DI (BINOP, cmgt) - BUILTIN_VSDQ_I_DI (BINOP, cmle) - BUILTIN_VSDQ_I_DI (BINOP, cmlt) + BUILTIN_VSDQ_I_DI (BINOP, cmeq, 0) + BUILTIN_VSDQ_I_DI (BINOP, cmge, 0) + BUILTIN_VSDQ_I_DI (BINOP, cmgt, 0) + BUILTIN_VSDQ_I_DI (BINOP, cmle, 0) + BUILTIN_VSDQ_I_DI (BINOP, cmlt, 0) /* Implemented by aarch64_cm<cmp><mode>. */ - BUILTIN_VSDQ_I_DI (BINOP, cmhs) - BUILTIN_VSDQ_I_DI (BINOP, cmhi) - BUILTIN_VSDQ_I_DI (BINOP, cmtst) + BUILTIN_VSDQ_I_DI (BINOP, cmhs, 0) + BUILTIN_VSDQ_I_DI (BINOP, cmhi, 0) + BUILTIN_VSDQ_I_DI (BINOP, cmtst, 0) /* Implemented by aarch64_<fmaxmin><mode>. */ - BUILTIN_VDQF (BINOP, fmax) - BUILTIN_VDQF (BINOP, fmin) - /* Implemented by aarch64_<maxmin><mode>. */ - BUILTIN_VDQ_BHSI (BINOP, smax) - BUILTIN_VDQ_BHSI (BINOP, smin) - BUILTIN_VDQ_BHSI (BINOP, umax) - BUILTIN_VDQ_BHSI (BINOP, umin) + BUILTIN_VDQF (BINOP, fmax, 0) + BUILTIN_VDQF (BINOP, fmin, 0) + + /* Implemented by <maxmin><mode>3. */ + BUILTIN_VDQ_BHSI (BINOP, smax, 3) + BUILTIN_VDQ_BHSI (BINOP, smin, 3) + BUILTIN_VDQ_BHSI (BINOP, umax, 3) + BUILTIN_VDQ_BHSI (BINOP, umin, 3) /* Implemented by aarch64_frint<frint_suffix><mode>. */ - BUILTIN_VDQF (UNOP, frintz) - BUILTIN_VDQF (UNOP, frintp) - BUILTIN_VDQF (UNOP, frintm) - BUILTIN_VDQF (UNOP, frinti) - BUILTIN_VDQF (UNOP, frintx) - BUILTIN_VDQF (UNOP, frinta) + BUILTIN_VDQF (UNOP, frintz, 0) + BUILTIN_VDQF (UNOP, frintp, 0) + BUILTIN_VDQF (UNOP, frintm, 0) + BUILTIN_VDQF (UNOP, frinti, 0) + BUILTIN_VDQF (UNOP, frintx, 0) + BUILTIN_VDQF (UNOP, frinta, 0) /* Implemented by aarch64_fcvt<frint_suffix><su><mode>. */ - BUILTIN_VDQF (UNOP, fcvtzs) - BUILTIN_VDQF (UNOP, fcvtzu) - BUILTIN_VDQF (UNOP, fcvtas) - BUILTIN_VDQF (UNOP, fcvtau) - BUILTIN_VDQF (UNOP, fcvtps) - BUILTIN_VDQF (UNOP, fcvtpu) - BUILTIN_VDQF (UNOP, fcvtms) - BUILTIN_VDQF (UNOP, fcvtmu) + BUILTIN_VDQF (UNOP, fcvtzs, 0) + BUILTIN_VDQF (UNOP, fcvtzu, 0) + BUILTIN_VDQF (UNOP, fcvtas, 0) + BUILTIN_VDQF (UNOP, fcvtau, 0) + BUILTIN_VDQF (UNOP, fcvtps, 0) + BUILTIN_VDQF (UNOP, fcvtpu, 0) + BUILTIN_VDQF (UNOP, fcvtms, 0) + BUILTIN_VDQF (UNOP, fcvtmu, 0) /* Implemented by aarch64_<PERMUTE:perm_insn><PERMUTE:perm_hilo><mode>. */ - BUILTIN_VALL (BINOP, zip1) - BUILTIN_VALL (BINOP, zip2) - BUILTIN_VALL (BINOP, uzp1) - BUILTIN_VALL (BINOP, uzp2) - BUILTIN_VALL (BINOP, trn1) - BUILTIN_VALL (BINOP, trn2) + BUILTIN_VALL (BINOP, zip1, 0) + BUILTIN_VALL (BINOP, zip2, 0) + BUILTIN_VALL (BINOP, uzp1, 0) + BUILTIN_VALL (BINOP, uzp2, 0) + BUILTIN_VALL (BINOP, trn1, 0) + BUILTIN_VALL (BINOP, trn2, 0) + + /* Implemented by + aarch64_frecp<FRECP:frecp_suffix><mode>. */ + BUILTIN_GPF (UNOP, frecpe, 0) + BUILTIN_GPF (BINOP, frecps, 0) + BUILTIN_GPF (UNOP, frecpx, 0) + + BUILTIN_VDQF (UNOP, frecpe, 0) + BUILTIN_VDQF (BINOP, frecps, 0) diff --git a/gcc/config/aarch64/aarch64-simd.md b/gcc/config/aarch64/aarch64-simd.md index 92dcfc0c57b..ad3f4a4a1f0 100644 --- a/gcc/config/aarch64/aarch64-simd.md +++ b/gcc/config/aarch64/aarch64-simd.md @@ -59,9 +59,9 @@ ; simd_fmul floating point multiply. ; simd_fmul_elt floating point multiply (by element). ; simd_fnegabs floating point neg/abs. -; simd_frcpe floating point reciprocal estimate. -; simd_frcps floating point reciprocal step. -; simd_frecx floating point reciprocal exponent. +; simd_frecpe floating point reciprocal estimate. +; simd_frecps floating point reciprocal step. +; simd_frecpx floating point reciprocal exponent. ; simd_frint floating point round to integer. ; simd_fsqrt floating point square root. ; simd_icvtf integer convert to floating point. @@ -163,9 +163,9 @@ simd_fmul,\ simd_fmul_elt,\ simd_fnegabs,\ - simd_frcpe,\ - simd_frcps,\ - simd_frecx,\ + simd_frecpe,\ + simd_frecps,\ + simd_frecpx,\ simd_frint,\ simd_fsqrt,\ simd_icvtf,\ @@ -305,8 +305,8 @@ (eq_attr "simd_type" "simd_store3,simd_store4") (const_string "neon_vst1_3_4_regs") (eq_attr "simd_type" "simd_store1s,simd_store2s") (const_string "neon_vst1_vst2_lane") (eq_attr "simd_type" "simd_store3s,simd_store4s") (const_string "neon_vst3_vst4_lane") - (and (eq_attr "simd_type" "simd_frcpe,simd_frcps") (eq_attr "simd_mode" "V2SF")) (const_string "neon_fp_vrecps_vrsqrts_ddd") - (and (eq_attr "simd_type" "simd_frcpe,simd_frcps") (eq_attr "simd_mode" "V4SF,V2DF")) (const_string "neon_fp_vrecps_vrsqrts_qqq") + (and (eq_attr "simd_type" "simd_frecpe,simd_frecps") (eq_attr "simd_mode" "V2SF")) (const_string "neon_fp_vrecps_vrsqrts_ddd") + (and (eq_attr "simd_type" "simd_frecpe,simd_frecps") (eq_attr "simd_mode" "V4SF,V2DF")) (const_string "neon_fp_vrecps_vrsqrts_qqq") (eq_attr "simd_type" "none") (const_string "none") ] (const_string "unknown"))) @@ -2873,28 +2873,6 @@ (set_attr "simd_mode" "<MODE>")] ) -;; vshl_n - -(define_expand "aarch64_sshl_n<mode>" - [(match_operand:VSDQ_I_DI 0 "register_operand" "=w") - (match_operand:VSDQ_I_DI 1 "register_operand" "w") - (match_operand:SI 2 "immediate_operand" "i")] - "TARGET_SIMD" -{ - emit_insn (gen_ashl<mode>3 (operands[0], operands[1], operands[2])); - DONE; -}) - -(define_expand "aarch64_ushl_n<mode>" - [(match_operand:VSDQ_I_DI 0 "register_operand" "=w") - (match_operand:VSDQ_I_DI 1 "register_operand" "w") - (match_operand:SI 2 "immediate_operand" "i")] - "TARGET_SIMD" -{ - emit_insn (gen_ashl<mode>3 (operands[0], operands[1], operands[2])); - DONE; -}) - ;; vshll_n (define_insn "aarch64_<sur>shll_n<mode>" @@ -2939,28 +2917,6 @@ (set_attr "simd_mode" "<MODE>")] ) -;; vshr_n - -(define_expand "aarch64_sshr_n<mode>" - [(match_operand:VSDQ_I_DI 0 "register_operand" "=w") - (match_operand:VSDQ_I_DI 1 "register_operand" "w") - (match_operand:SI 2 "immediate_operand" "i")] - "TARGET_SIMD" -{ - emit_insn (gen_ashr<mode>3 (operands[0], operands[1], operands[2])); - DONE; -}) - -(define_expand "aarch64_ushr_n<mode>" - [(match_operand:VSDQ_I_DI 0 "register_operand" "=w") - (match_operand:VSDQ_I_DI 1 "register_operand" "w") - (match_operand:SI 2 "immediate_operand" "i")] - "TARGET_SIMD" -{ - emit_insn (gen_lshr<mode>3 (operands[0], operands[1], operands[2])); - DONE; -}) - ;; vrshr_n (define_insn "aarch64_<sur>shr_n<mode>" @@ -3117,19 +3073,6 @@ (set_attr "simd_mode" "DI")] ) -;; v(max|min) - -(define_expand "aarch64_<maxmin><mode>" - [(set (match_operand:VDQ_BHSI 0 "register_operand" "=w") - (MAXMIN:VDQ_BHSI (match_operand:VDQ_BHSI 1 "register_operand" "w") - (match_operand:VDQ_BHSI 2 "register_operand" "w")))] - "TARGET_SIMD" -{ - emit_insn (gen_<maxmin><mode>3 (operands[0], operands[1], operands[2])); - DONE; -}) - - (define_insn "aarch64_<fmaxmin><mode>" [(set (match_operand:VDQF 0 "register_operand" "=w") (unspec:VDQF [(match_operand:VDQF 1 "register_operand" "w") @@ -3152,16 +3095,6 @@ (set_attr "simd_mode" "<MODE>")] ) -(define_expand "aarch64_sqrt<mode>" - [(match_operand:VDQF 0 "register_operand" "=w") - (match_operand:VDQF 1 "register_operand" "w")] - "TARGET_SIMD" -{ - emit_insn (gen_sqrt<mode>2 (operands[0], operands[1])); - DONE; -}) - - ;; Patterns for vector struct loads and stores. (define_insn "vec_load_lanesoi<mode>" @@ -3726,3 +3659,25 @@ "ld1r\\t{%0.<Vtype>}, %1" [(set_attr "simd_type" "simd_load1r") (set_attr "simd_mode" "<MODE>")]) + +(define_insn "aarch64_frecpe<mode>" + [(set (match_operand:VDQF 0 "register_operand" "=w") + (unspec:VDQF [(match_operand:VDQF 1 "register_operand" "w")] + UNSPEC_FRECPE))] + "TARGET_SIMD" + "frecpe\\t%0.<Vtype>, %1.<Vtype>" + [(set_attr "simd_type" "simd_frecpe") + (set_attr "simd_mode" "<MODE>")] +) + +(define_insn "aarch64_frecps<mode>" + [(set (match_operand:VDQF 0 "register_operand" "=w") + (unspec:VDQF [(match_operand:VDQF 1 "register_operand" "w") + (match_operand:VDQF 2 "register_operand" "w")] + UNSPEC_FRECPS))] + "TARGET_SIMD" + "frecps\\t%0.<Vtype>, %1.<Vtype>, %2.<Vtype>" + [(set_attr "simd_type" "simd_frecps") + (set_attr "simd_mode" "<MODE>")] +) + diff --git a/gcc/config/aarch64/aarch64.md b/gcc/config/aarch64/aarch64.md index c593596daba..1b1d7ea3169 100644 --- a/gcc/config/aarch64/aarch64.md +++ b/gcc/config/aarch64/aarch64.md @@ -68,6 +68,9 @@ (define_c_enum "unspec" [ UNSPEC_CASESI UNSPEC_CLS + UNSPEC_FRECPE + UNSPEC_FRECPS + UNSPEC_FRECPX UNSPEC_FRINTA UNSPEC_FRINTI UNSPEC_FRINTM @@ -230,6 +233,9 @@ fmovf2i,\ fmovi2f,\ fmul,\ + frecpe,\ + frecps,\ + frecpx,\ frint,\ fsqrt,\ load_acq,\ @@ -3313,6 +3319,27 @@ (set_attr "mode" "<MODE>")] ) +(define_insn "aarch64_frecp<FRECP:frecp_suffix><mode>" + [(set (match_operand:GPF 0 "register_operand" "=w") + (unspec:GPF [(match_operand:GPF 1 "register_operand" "w")] + FRECP))] + "TARGET_FLOAT" + "frecp<FRECP:frecp_suffix>\\t%<s>0, %<s>1" + [(set_attr "v8type" "frecp<FRECP:frecp_suffix>") + (set_attr "mode" "<MODE>")] +) + +(define_insn "aarch64_frecps<mode>" + [(set (match_operand:GPF 0 "register_operand" "=w") + (unspec:GPF [(match_operand:GPF 1 "register_operand" "w") + (match_operand:GPF 2 "register_operand" "w")] + UNSPEC_FRECPS))] + "TARGET_FLOAT" + "frecps\\t%<s>0, %<s>1, %<s>2" + [(set_attr "v8type" "frecps") + (set_attr "mode" "<MODE>")] +) + ;; ------------------------------------------------------------------- ;; Reload support ;; ------------------------------------------------------------------- diff --git a/gcc/config/aarch64/arm_neon.h b/gcc/config/aarch64/arm_neon.h index ca474033e1d..5e25c778111 100644 --- a/gcc/config/aarch64/arm_neon.h +++ b/gcc/config/aarch64/arm_neon.h @@ -14556,17 +14556,6 @@ vrbitq_u8 (uint8x16_t a) return result; } -__extension__ static __inline float32x2_t __attribute__ ((__always_inline__)) -vrecpe_f32 (float32x2_t a) -{ - float32x2_t result; - __asm__ ("frecpe %0.2s,%1.2s" - : "=w"(result) - : "w"(a) - : /* No clobbers */); - return result; -} - __extension__ static __inline uint32x2_t __attribute__ ((__always_inline__)) vrecpe_u32 (uint32x2_t a) { @@ -14578,39 +14567,6 @@ vrecpe_u32 (uint32x2_t a) return result; } -__extension__ static __inline float64_t __attribute__ ((__always_inline__)) -vrecped_f64 (float64_t a) -{ - float64_t result; - __asm__ ("frecpe %d0,%d1" - : "=w"(result) - : "w"(a) - : /* No clobbers */); - return result; -} - -__extension__ static __inline float32x4_t __attribute__ ((__always_inline__)) -vrecpeq_f32 (float32x4_t a) -{ - float32x4_t result; - __asm__ ("frecpe %0.4s,%1.4s" - : "=w"(result) - : "w"(a) - : /* No clobbers */); - return result; -} - -__extension__ static __inline float64x2_t __attribute__ ((__always_inline__)) -vrecpeq_f64 (float64x2_t a) -{ - float64x2_t result; - __asm__ ("frecpe %0.2d,%1.2d" - : "=w"(result) - : "w"(a) - : /* No clobbers */); - return result; -} - __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__)) vrecpeq_u32 (uint32x4_t a) { @@ -14622,94 +14578,6 @@ vrecpeq_u32 (uint32x4_t a) return result; } -__extension__ static __inline float32_t __attribute__ ((__always_inline__)) -vrecpes_f32 (float32_t a) -{ - float32_t result; - __asm__ ("frecpe %s0,%s1" - : "=w"(result) - : "w"(a) - : /* No clobbers */); - return result; -} - -__extension__ static __inline float32x2_t __attribute__ ((__always_inline__)) -vrecps_f32 (float32x2_t a, float32x2_t b) -{ - float32x2_t result; - __asm__ ("frecps %0.2s,%1.2s,%2.2s" - : "=w"(result) - : "w"(a), "w"(b) - : /* No clobbers */); - return result; -} - -__extension__ static __inline float64_t __attribute__ ((__always_inline__)) -vrecpsd_f64 (float64_t a, float64_t b) -{ - float64_t result; - __asm__ ("frecps %d0,%d1,%d2" - : "=w"(result) - : "w"(a), "w"(b) - : /* No clobbers */); - return result; -} - -__extension__ static __inline float32x4_t __attribute__ ((__always_inline__)) -vrecpsq_f32 (float32x4_t a, float32x4_t b) -{ - float32x4_t result; - __asm__ ("frecps %0.4s,%1.4s,%2.4s" - : "=w"(result) - : "w"(a), "w"(b) - : /* No clobbers */); - return result; -} - -__extension__ static __inline float64x2_t __attribute__ ((__always_inline__)) -vrecpsq_f64 (float64x2_t a, float64x2_t b) -{ - float64x2_t result; - __asm__ ("frecps %0.2d,%1.2d,%2.2d" - : "=w"(result) - : "w"(a), "w"(b) - : /* No clobbers */); - return result; -} - -__extension__ static __inline float32_t __attribute__ ((__always_inline__)) -vrecpss_f32 (float32_t a, float32_t b) -{ - float32_t result; - __asm__ ("frecps %s0,%s1,%s2" - : "=w"(result) - : "w"(a), "w"(b) - : /* No clobbers */); - return result; -} - -__extension__ static __inline float64_t __attribute__ ((__always_inline__)) -vrecpxd_f64 (float64_t a) -{ - float64_t result; - __asm__ ("frecpe %d0,%d1" - : "=w"(result) - : "w"(a) - : /* No clobbers */); - return result; -} - -__extension__ static __inline float32_t __attribute__ ((__always_inline__)) -vrecpxs_f32 (float32_t a) -{ - float32_t result; - __asm__ ("frecpe %s0,%s1" - : "=w"(result) - : "w"(a) - : /* No clobbers */); - return result; -} - __extension__ static __inline poly8x8_t __attribute__ ((__always_inline__)) vrev16_p8 (poly8x8_t a) { @@ -23115,6 +22983,84 @@ vqsubd_u64 (uint64x1_t __a, uint64x1_t __b) return (uint64x1_t) __builtin_aarch64_uqsubdi (__a, __b); } +/* vrecpe */ + +__extension__ static __inline float32_t __attribute__ ((__always_inline__)) +vrecpes_f32 (float32_t __a) +{ + return __builtin_aarch64_frecpesf (__a); +} + +__extension__ static __inline float64_t __attribute__ ((__always_inline__)) +vrecped_f64 (float64_t __a) +{ + return __builtin_aarch64_frecpedf (__a); +} + +__extension__ static __inline float32x2_t __attribute__ ((__always_inline__)) +vrecpe_f32 (float32x2_t __a) +{ + return __builtin_aarch64_frecpev2sf (__a); +} + +__extension__ static __inline float32x4_t __attribute__ ((__always_inline__)) +vrecpeq_f32 (float32x4_t __a) +{ + return __builtin_aarch64_frecpev4sf (__a); +} + +__extension__ static __inline float64x2_t __attribute__ ((__always_inline__)) +vrecpeq_f64 (float64x2_t __a) +{ + return __builtin_aarch64_frecpev2df (__a); +} + +/* vrecps */ + +__extension__ static __inline float32_t __attribute__ ((__always_inline__)) +vrecpss_f32 (float32_t __a, float32_t __b) +{ + return __builtin_aarch64_frecpssf (__a, __b); +} + +__extension__ static __inline float64_t __attribute__ ((__always_inline__)) +vrecpsd_f64 (float64_t __a, float64_t __b) +{ + return __builtin_aarch64_frecpsdf (__a, __b); +} + +__extension__ static __inline float32x2_t __attribute__ ((__always_inline__)) +vrecps_f32 (float32x2_t __a, float32x2_t __b) +{ + return __builtin_aarch64_frecpsv2sf (__a, __b); +} + +__extension__ static __inline float32x4_t __attribute__ ((__always_inline__)) +vrecpsq_f32 (float32x4_t __a, float32x4_t __b) +{ + return __builtin_aarch64_frecpsv4sf (__a, __b); +} + +__extension__ static __inline float64x2_t __attribute__ ((__always_inline__)) +vrecpsq_f64 (float64x2_t __a, float64x2_t __b) +{ + return __builtin_aarch64_frecpsv2df (__a, __b); +} + +/* vrecpx */ + +__extension__ static __inline float32_t __attribute__ ((__always_inline__)) +vrecpxs_f32 (float32_t __a) +{ + return __builtin_aarch64_frecpxsf (__a); +} + +__extension__ static __inline float64_t __attribute__ ((__always_inline__)) +vrecpxd_f64 (float64_t __a) +{ + return __builtin_aarch64_frecpxdf (__a); +} + /* vrshl */ __extension__ static __inline int8x8_t __attribute__ ((__always_inline__)) @@ -23458,109 +23404,109 @@ vrsrad_n_u64 (uint64x1_t __a, uint64x1_t __b, const int __c) __extension__ static __inline int8x8_t __attribute__ ((__always_inline__)) vshl_n_s8 (int8x8_t __a, const int __b) { - return (int8x8_t) __builtin_aarch64_sshl_nv8qi (__a, __b); + return (int8x8_t) __builtin_aarch64_ashlv8qi (__a, __b); } __extension__ static __inline int16x4_t __attribute__ ((__always_inline__)) vshl_n_s16 (int16x4_t __a, const int __b) { - return (int16x4_t) __builtin_aarch64_sshl_nv4hi (__a, __b); + return (int16x4_t) __builtin_aarch64_ashlv4hi (__a, __b); } __extension__ static __inline int32x2_t __attribute__ ((__always_inline__)) vshl_n_s32 (int32x2_t __a, const int __b) { - return (int32x2_t) __builtin_aarch64_sshl_nv2si (__a, __b); + return (int32x2_t) __builtin_aarch64_ashlv2si (__a, __b); } __extension__ static __inline int64x1_t __attribute__ ((__always_inline__)) vshl_n_s64 (int64x1_t __a, const int __b) { - return (int64x1_t) __builtin_aarch64_sshl_ndi (__a, __b); + return (int64x1_t) __builtin_aarch64_ashldi (__a, __b); } __extension__ static __inline uint8x8_t __attribute__ ((__always_inline__)) vshl_n_u8 (uint8x8_t __a, const int __b) { - return (uint8x8_t) __builtin_aarch64_ushl_nv8qi ((int8x8_t) __a, __b); + return (uint8x8_t) __builtin_aarch64_ashlv8qi ((int8x8_t) __a, __b); } __extension__ static __inline uint16x4_t __attribute__ ((__always_inline__)) vshl_n_u16 (uint16x4_t __a, const int __b) { - return (uint16x4_t) __builtin_aarch64_ushl_nv4hi ((int16x4_t) __a, __b); + return (uint16x4_t) __builtin_aarch64_ashlv4hi ((int16x4_t) __a, __b); } __extension__ static __inline uint32x2_t __attribute__ ((__always_inline__)) vshl_n_u32 (uint32x2_t __a, const int __b) { - return (uint32x2_t) __builtin_aarch64_ushl_nv2si ((int32x2_t) __a, __b); + return (uint32x2_t) __builtin_aarch64_ashlv2si ((int32x2_t) __a, __b); } __extension__ static __inline uint64x1_t __attribute__ ((__always_inline__)) vshl_n_u64 (uint64x1_t __a, const int __b) { - return (uint64x1_t) __builtin_aarch64_ushl_ndi ((int64x1_t) __a, __b); + return (uint64x1_t) __builtin_aarch64_ashldi ((int64x1_t) __a, __b); } __extension__ static __inline int8x16_t __attribute__ ((__always_inline__)) vshlq_n_s8 (int8x16_t __a, const int __b) { - return (int8x16_t) __builtin_aarch64_sshl_nv16qi (__a, __b); + return (int8x16_t) __builtin_aarch64_ashlv16qi (__a, __b); } __extension__ static __inline int16x8_t __attribute__ ((__always_inline__)) vshlq_n_s16 (int16x8_t __a, const int __b) { - return (int16x8_t) __builtin_aarch64_sshl_nv8hi (__a, __b); + return (int16x8_t) __builtin_aarch64_ashlv8hi (__a, __b); } __extension__ static __inline int32x4_t __attribute__ ((__always_inline__)) vshlq_n_s32 (int32x4_t __a, const int __b) { - return (int32x4_t) __builtin_aarch64_sshl_nv4si (__a, __b); + return (int32x4_t) __builtin_aarch64_ashlv4si (__a, __b); } __extension__ static __inline int64x2_t __attribute__ ((__always_inline__)) vshlq_n_s64 (int64x2_t __a, const int __b) { - return (int64x2_t) __builtin_aarch64_sshl_nv2di (__a, __b); + return (int64x2_t) __builtin_aarch64_ashlv2di (__a, __b); } __extension__ static __inline uint8x16_t __attribute__ ((__always_inline__)) vshlq_n_u8 (uint8x16_t __a, const int __b) { - return (uint8x16_t) __builtin_aarch64_ushl_nv16qi ((int8x16_t) __a, __b); + return (uint8x16_t) __builtin_aarch64_ashlv16qi ((int8x16_t) __a, __b); } __extension__ static __inline uint16x8_t __attribute__ ((__always_inline__)) vshlq_n_u16 (uint16x8_t __a, const int __b) { - return (uint16x8_t) __builtin_aarch64_ushl_nv8hi ((int16x8_t) __a, __b); + return (uint16x8_t) __builtin_aarch64_ashlv8hi ((int16x8_t) __a, __b); } __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__)) vshlq_n_u32 (uint32x4_t __a, const int __b) { - return (uint32x4_t) __builtin_aarch64_ushl_nv4si ((int32x4_t) __a, __b); + return (uint32x4_t) __builtin_aarch64_ashlv4si ((int32x4_t) __a, __b); } __extension__ static __inline uint64x2_t __attribute__ ((__always_inline__)) vshlq_n_u64 (uint64x2_t __a, const int __b) { - return (uint64x2_t) __builtin_aarch64_ushl_nv2di ((int64x2_t) __a, __b); + return (uint64x2_t) __builtin_aarch64_ashlv2di ((int64x2_t) __a, __b); } __extension__ static __inline int64x1_t __attribute__ ((__always_inline__)) vshld_n_s64 (int64x1_t __a, const int __b) { - return (int64x1_t) __builtin_aarch64_sshl_ndi (__a, __b); + return (int64x1_t) __builtin_aarch64_ashldi (__a, __b); } __extension__ static __inline uint64x1_t __attribute__ ((__always_inline__)) vshld_n_u64 (uint64x1_t __a, const int __b) { - return (uint64x1_t) __builtin_aarch64_ushl_ndi (__a, __b); + return (uint64x1_t) __builtin_aarch64_ashldi (__a, __b); } __extension__ static __inline int8x8_t __attribute__ ((__always_inline__)) @@ -23748,109 +23694,109 @@ vshll_n_u32 (uint32x2_t __a, const int __b) __extension__ static __inline int8x8_t __attribute__ ((__always_inline__)) vshr_n_s8 (int8x8_t __a, const int __b) { - return (int8x8_t) __builtin_aarch64_sshr_nv8qi (__a, __b); + return (int8x8_t) __builtin_aarch64_ashrv8qi (__a, __b); } __extension__ static __inline int16x4_t __attribute__ ((__always_inline__)) vshr_n_s16 (int16x4_t __a, const int __b) { - return (int16x4_t) __builtin_aarch64_sshr_nv4hi (__a, __b); + return (int16x4_t) __builtin_aarch64_ashrv4hi (__a, __b); } __extension__ static __inline int32x2_t __attribute__ ((__always_inline__)) vshr_n_s32 (int32x2_t __a, const int __b) { - return (int32x2_t) __builtin_aarch64_sshr_nv2si (__a, __b); + return (int32x2_t) __builtin_aarch64_ashrv2si (__a, __b); } __extension__ static __inline int64x1_t __attribute__ ((__always_inline__)) vshr_n_s64 (int64x1_t __a, const int __b) { - return (int64x1_t) __builtin_aarch64_sshr_ndi (__a, __b); + return (int64x1_t) __builtin_aarch64_ashrdi (__a, __b); } __extension__ static __inline uint8x8_t __attribute__ ((__always_inline__)) vshr_n_u8 (uint8x8_t __a, const int __b) { - return (uint8x8_t) __builtin_aarch64_ushr_nv8qi ((int8x8_t) __a, __b); + return (uint8x8_t) __builtin_aarch64_lshrv8qi ((int8x8_t) __a, __b); } __extension__ static __inline uint16x4_t __attribute__ ((__always_inline__)) vshr_n_u16 (uint16x4_t __a, const int __b) { - return (uint16x4_t) __builtin_aarch64_ushr_nv4hi ((int16x4_t) __a, __b); + return (uint16x4_t) __builtin_aarch64_lshrv4hi ((int16x4_t) __a, __b); } __extension__ static __inline uint32x2_t __attribute__ ((__always_inline__)) vshr_n_u32 (uint32x2_t __a, const int __b) { - return (uint32x2_t) __builtin_aarch64_ushr_nv2si ((int32x2_t) __a, __b); + return (uint32x2_t) __builtin_aarch64_lshrv2si ((int32x2_t) __a, __b); } __extension__ static __inline uint64x1_t __attribute__ ((__always_inline__)) vshr_n_u64 (uint64x1_t __a, const int __b) { - return (uint64x1_t) __builtin_aarch64_ushr_ndi ((int64x1_t) __a, __b); + return (uint64x1_t) __builtin_aarch64_lshrdi ((int64x1_t) __a, __b); } __extension__ static __inline int8x16_t __attribute__ ((__always_inline__)) vshrq_n_s8 (int8x16_t __a, const int __b) { - return (int8x16_t) __builtin_aarch64_sshr_nv16qi (__a, __b); + return (int8x16_t) __builtin_aarch64_ashrv16qi (__a, __b); } __extension__ static __inline int16x8_t __attribute__ ((__always_inline__)) vshrq_n_s16 (int16x8_t __a, const int __b) { - return (int16x8_t) __builtin_aarch64_sshr_nv8hi (__a, __b); + return (int16x8_t) __builtin_aarch64_ashrv8hi (__a, __b); } __extension__ static __inline int32x4_t __attribute__ ((__always_inline__)) vshrq_n_s32 (int32x4_t __a, const int __b) { - return (int32x4_t) __builtin_aarch64_sshr_nv4si (__a, __b); + return (int32x4_t) __builtin_aarch64_ashrv4si (__a, __b); } __extension__ static __inline int64x2_t __attribute__ ((__always_inline__)) vshrq_n_s64 (int64x2_t __a, const int __b) { - return (int64x2_t) __builtin_aarch64_sshr_nv2di (__a, __b); + return (int64x2_t) __builtin_aarch64_ashrv2di (__a, __b); } __extension__ static __inline uint8x16_t __attribute__ ((__always_inline__)) vshrq_n_u8 (uint8x16_t __a, const int __b) { - return (uint8x16_t) __builtin_aarch64_ushr_nv16qi ((int8x16_t) __a, __b); + return (uint8x16_t) __builtin_aarch64_lshrv16qi ((int8x16_t) __a, __b); } __extension__ static __inline uint16x8_t __attribute__ ((__always_inline__)) vshrq_n_u16 (uint16x8_t __a, const int __b) { - return (uint16x8_t) __builtin_aarch64_ushr_nv8hi ((int16x8_t) __a, __b); + return (uint16x8_t) __builtin_aarch64_lshrv8hi ((int16x8_t) __a, __b); } __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__)) vshrq_n_u32 (uint32x4_t __a, const int __b) { - return (uint32x4_t) __builtin_aarch64_ushr_nv4si ((int32x4_t) __a, __b); + return (uint32x4_t) __builtin_aarch64_lshrv4si ((int32x4_t) __a, __b); } __extension__ static __inline uint64x2_t __attribute__ ((__always_inline__)) vshrq_n_u64 (uint64x2_t __a, const int __b) { - return (uint64x2_t) __builtin_aarch64_ushr_nv2di ((int64x2_t) __a, __b); + return (uint64x2_t) __builtin_aarch64_lshrv2di ((int64x2_t) __a, __b); } __extension__ static __inline int64x1_t __attribute__ ((__always_inline__)) vshrd_n_s64 (int64x1_t __a, const int __b) { - return (int64x1_t) __builtin_aarch64_sshr_ndi (__a, __b); + return (int64x1_t) __builtin_aarch64_ashrdi (__a, __b); } __extension__ static __inline uint64x1_t __attribute__ ((__always_inline__)) vshrd_n_u64 (uint64x1_t __a, const int __b) { - return (uint64x1_t) __builtin_aarch64_ushr_ndi (__a, __b); + return (uint64x1_t) __builtin_aarch64_lshrdi (__a, __b); } /* vsli */ diff --git a/gcc/config/aarch64/iterators.md b/gcc/config/aarch64/iterators.md index 863a4af0346..017e1281bbb 100644 --- a/gcc/config/aarch64/iterators.md +++ b/gcc/config/aarch64/iterators.md @@ -698,6 +698,8 @@ (define_int_iterator FCVT [UNSPEC_FRINTZ UNSPEC_FRINTP UNSPEC_FRINTM UNSPEC_FRINTA]) +(define_int_iterator FRECP [UNSPEC_FRECPE UNSPEC_FRECPX]) + ;; ------------------------------------------------------------------- ;; Int Iterators Attributes. ;; ------------------------------------------------------------------- @@ -803,3 +805,5 @@ (define_int_attr perm_hilo [(UNSPEC_ZIP1 "1") (UNSPEC_ZIP2 "2") (UNSPEC_TRN1 "1") (UNSPEC_TRN2 "2") (UNSPEC_UZP1 "1") (UNSPEC_UZP2 "2")]) + +(define_int_attr frecp_suffix [(UNSPEC_FRECPE "e") (UNSPEC_FRECPX "x")]) |