aboutsummaryrefslogtreecommitdiff
path: root/gcc/config
diff options
context:
space:
mode:
authorMatthew Gretton-Dann <matthew.gretton-dann@linaro.org>2013-05-14 09:30:32 +0000
committerMatthew Gretton-Dann <matthew.gretton-dann@linaro.org>2013-05-14 09:30:32 +0000
commita6002864cd18f9049b5511293d4d84a6a4ea4e73 (patch)
tree4ac0a2e47b7948b71a66fd00c4d1ba1bfef2fbb6 /gcc/config
parente118fade5f65318cb59db4b7593d0019e2302ed2 (diff)
Backport from trunk r198490-198496.
git-svn-id: https://gcc.gnu.org/svn/gcc/branches/linaro/gcc-4_8-branch@198868 138bc75d-0d04-0410-961f-82ee72b054a4
Diffstat (limited to 'gcc/config')
-rw-r--r--gcc/config/aarch64/aarch64-builtins.c20
-rw-r--r--gcc/config/aarch64/aarch64-simd-builtins.def14
-rw-r--r--gcc/config/aarch64/aarch64-simd.md289
-rw-r--r--gcc/config/aarch64/aarch64.md2
-rw-r--r--gcc/config/aarch64/arm_neon.h1736
-rw-r--r--gcc/config/aarch64/iterators.md64
-rw-r--r--gcc/config/aarch64/predicates.md5
7 files changed, 1545 insertions, 585 deletions
diff --git a/gcc/config/aarch64/aarch64-builtins.c b/gcc/config/aarch64/aarch64-builtins.c
index 53d2c6ad557..3016f256869 100644
--- a/gcc/config/aarch64/aarch64-builtins.c
+++ b/gcc/config/aarch64/aarch64-builtins.c
@@ -191,6 +191,9 @@ typedef struct
#define BUILTIN_VALL(T, N, MAP) \
VAR10 (T, N, MAP, v8qi, v16qi, v4hi, v8hi, v2si, \
v4si, v2di, v2sf, v4sf, v2df)
+#define BUILTIN_VALLDI(T, N, MAP) \
+ VAR11 (T, N, MAP, v8qi, v16qi, v4hi, v8hi, v2si, \
+ v4si, v2di, v2sf, v4sf, v2df, di)
#define BUILTIN_VB(T, N, MAP) \
VAR2 (T, N, MAP, v8qi, v16qi)
#define BUILTIN_VD(T, N, MAP) \
@@ -1314,11 +1317,26 @@ aarch64_fold_builtin (tree fndecl, int n_args ATTRIBUTE_UNUSED, tree *args,
BUILTIN_VDQF (UNOP, abs, 2)
return fold_build1 (ABS_EXPR, type, args[0]);
break;
+ BUILTIN_VALLDI (BINOP, cmge, 0)
+ return fold_build2 (GE_EXPR, type, args[0], args[1]);
+ break;
+ BUILTIN_VALLDI (BINOP, cmgt, 0)
+ return fold_build2 (GT_EXPR, type, args[0], args[1]);
+ break;
+ BUILTIN_VALLDI (BINOP, cmeq, 0)
+ return fold_build2 (EQ_EXPR, type, args[0], args[1]);
+ break;
+ BUILTIN_VSDQ_I_DI (BINOP, cmtst, 0)
+ {
+ tree and_node = fold_build2 (BIT_AND_EXPR, type, args[0], args[1]);
+ tree vec_zero_node = build_zero_cst (type);
+ return fold_build2 (NE_EXPR, type, and_node, vec_zero_node);
+ break;
+ }
VAR1 (UNOP, floatv2si, 2, v2sf)
VAR1 (UNOP, floatv4si, 2, v4sf)
VAR1 (UNOP, floatv2di, 2, v2df)
return fold_build1 (FLOAT_EXPR, type, args[0]);
- break;
default:
break;
}
diff --git a/gcc/config/aarch64/aarch64-simd-builtins.def b/gcc/config/aarch64/aarch64-simd-builtins.def
index 6093341b199..620406b449d 100644
--- a/gcc/config/aarch64/aarch64-simd-builtins.def
+++ b/gcc/config/aarch64/aarch64-simd-builtins.def
@@ -224,14 +224,14 @@
BUILTIN_VSDQ_I (SHIFTIMM, uqshl_n, 0)
/* Implemented by aarch64_cm<cmp><mode>. */
- BUILTIN_VSDQ_I_DI (BINOP, cmeq, 0)
- BUILTIN_VSDQ_I_DI (BINOP, cmge, 0)
- BUILTIN_VSDQ_I_DI (BINOP, cmgt, 0)
- BUILTIN_VSDQ_I_DI (BINOP, cmle, 0)
- BUILTIN_VSDQ_I_DI (BINOP, cmlt, 0)
+ BUILTIN_VALLDI (BINOP, cmeq, 0)
+ BUILTIN_VALLDI (BINOP, cmge, 0)
+ BUILTIN_VALLDI (BINOP, cmgt, 0)
+ BUILTIN_VALLDI (BINOP, cmle, 0)
+ BUILTIN_VALLDI (BINOP, cmlt, 0)
/* Implemented by aarch64_cm<cmp><mode>. */
- BUILTIN_VSDQ_I_DI (BINOP, cmhs, 0)
- BUILTIN_VSDQ_I_DI (BINOP, cmhi, 0)
+ BUILTIN_VSDQ_I_DI (BINOP, cmgeu, 0)
+ BUILTIN_VSDQ_I_DI (BINOP, cmgtu, 0)
BUILTIN_VSDQ_I_DI (BINOP, cmtst, 0)
/* Implemented by aarch64_<fmaxmin><mode>. */
diff --git a/gcc/config/aarch64/aarch64-simd.md b/gcc/config/aarch64/aarch64-simd.md
index a88d4cca05d..2b252540519 100644
--- a/gcc/config/aarch64/aarch64-simd.md
+++ b/gcc/config/aarch64/aarch64-simd.md
@@ -21,7 +21,7 @@
; Main data types used by the insntructions
-(define_attr "simd_mode" "unknown,none,V8QI,V16QI,V4HI,V8HI,V2SI,V4SI,V2DI,V2SF,V4SF,V2DF,OI,CI,XI,DI,DF,SI,HI,QI"
+(define_attr "simd_mode" "unknown,none,V8QI,V16QI,V4HI,V8HI,V2SI,V4SI,V2DI,V2SF,V4SF,V2DF,OI,CI,XI,DI,DF,SI,SF,HI,QI"
(const_string "unknown"))
@@ -1683,11 +1683,13 @@
(match_operator 3 "comparison_operator"
[(match_operand:VDQ 4 "register_operand")
(match_operand:VDQ 5 "nonmemory_operand")])
- (match_operand:VDQ 1 "register_operand")
- (match_operand:VDQ 2 "register_operand")))]
+ (match_operand:VDQ 1 "nonmemory_operand")
+ (match_operand:VDQ 2 "nonmemory_operand")))]
"TARGET_SIMD"
{
int inverse = 0, has_zero_imm_form = 0;
+ rtx op1 = operands[1];
+ rtx op2 = operands[2];
rtx mask = gen_reg_rtx (<MODE>mode);
switch (GET_CODE (operands[3]))
@@ -1728,12 +1730,12 @@
case LTU:
case GEU:
- emit_insn (gen_aarch64_cmhs<mode> (mask, operands[4], operands[5]));
+ emit_insn (gen_aarch64_cmgeu<mode> (mask, operands[4], operands[5]));
break;
case LEU:
case GTU:
- emit_insn (gen_aarch64_cmhi<mode> (mask, operands[4], operands[5]));
+ emit_insn (gen_aarch64_cmgtu<mode> (mask, operands[4], operands[5]));
break;
case NE:
@@ -1746,11 +1748,26 @@
}
if (inverse)
- emit_insn (gen_aarch64_simd_bsl<mode> (operands[0], mask, operands[2],
- operands[1]));
- else
- emit_insn (gen_aarch64_simd_bsl<mode> (operands[0], mask, operands[1],
- operands[2]));
+ {
+ op1 = operands[2];
+ op2 = operands[1];
+ }
+
+ /* If we have (a = (b CMP c) ? -1 : 0);
+ Then we can simply move the generated mask. */
+
+ if (op1 == CONSTM1_RTX (<V_cmp_result>mode)
+ && op2 == CONST0_RTX (<V_cmp_result>mode))
+ emit_move_insn (operands[0], mask);
+ else
+ {
+ if (!REG_P (op1))
+ op1 = force_reg (<MODE>mode, op1);
+ if (!REG_P (op2))
+ op2 = force_reg (<MODE>mode, op2);
+ emit_insn (gen_aarch64_simd_bsl<mode> (operands[0], mask,
+ op1, op2));
+ }
DONE;
})
@@ -1761,12 +1778,14 @@
(match_operator 3 "comparison_operator"
[(match_operand:VDQF 4 "register_operand")
(match_operand:VDQF 5 "nonmemory_operand")])
- (match_operand:VDQF 1 "register_operand")
- (match_operand:VDQF 2 "register_operand")))]
+ (match_operand:VDQF 1 "nonmemory_operand")
+ (match_operand:VDQF 2 "nonmemory_operand")))]
"TARGET_SIMD"
{
int inverse = 0;
int swap_bsl_operands = 0;
+ rtx op1 = operands[1];
+ rtx op2 = operands[2];
rtx mask = gen_reg_rtx (<V_cmp_result>mode);
rtx tmp = gen_reg_rtx (<V_cmp_result>mode);
@@ -1888,11 +1907,27 @@
}
if (swap_bsl_operands)
- emit_insn (gen_aarch64_simd_bsl<mode> (operands[0], mask, operands[2],
- operands[1]));
- else
- emit_insn (gen_aarch64_simd_bsl<mode> (operands[0], mask, operands[1],
- operands[2]));
+ {
+ op1 = operands[2];
+ op2 = operands[1];
+ }
+
+ /* If we have (a = (b CMP c) ? -1 : 0);
+ Then we can simply move the generated mask. */
+
+ if (op1 == CONSTM1_RTX (<V_cmp_result>mode)
+ && op2 == CONST0_RTX (<V_cmp_result>mode))
+ emit_move_insn (operands[0], mask);
+ else
+ {
+ if (!REG_P (op1))
+ op1 = force_reg (<MODE>mode, op1);
+ if (!REG_P (op2))
+ op2 = force_reg (<MODE>mode, op2);
+ emit_insn (gen_aarch64_simd_bsl<mode> (operands[0], mask,
+ op1, op2));
+ }
+
DONE;
})
@@ -1902,8 +1937,8 @@
(match_operator 3 "comparison_operator"
[(match_operand:VALL 4 "register_operand")
(match_operand:VALL 5 "nonmemory_operand")])
- (match_operand:VALL 1 "register_operand")
- (match_operand:VALL 2 "register_operand")))]
+ (match_operand:VALL 1 "nonmemory_operand")
+ (match_operand:VALL 2 "nonmemory_operand")))]
"TARGET_SIMD"
{
emit_insn (gen_aarch64_vcond_internal<mode> (operands[0], operands[1],
@@ -1912,6 +1947,22 @@
DONE;
})
+(define_expand "vcond<v_cmp_result><mode>"
+ [(set (match_operand:<V_cmp_result> 0 "register_operand")
+ (if_then_else:<V_cmp_result>
+ (match_operator 3 "comparison_operator"
+ [(match_operand:VDQF 4 "register_operand")
+ (match_operand:VDQF 5 "nonmemory_operand")])
+ (match_operand:<V_cmp_result> 1 "nonmemory_operand")
+ (match_operand:<V_cmp_result> 2 "nonmemory_operand")))]
+ "TARGET_SIMD"
+{
+ emit_insn (gen_aarch64_vcond_internal<v_cmp_result> (
+ operands[0], operands[1],
+ operands[2], operands[3],
+ operands[4], operands[5]));
+ DONE;
+})
(define_expand "vcondu<mode><mode>"
[(set (match_operand:VDQ 0 "register_operand")
@@ -1919,8 +1970,8 @@
(match_operator 3 "comparison_operator"
[(match_operand:VDQ 4 "register_operand")
(match_operand:VDQ 5 "nonmemory_operand")])
- (match_operand:VDQ 1 "register_operand")
- (match_operand:VDQ 2 "register_operand")))]
+ (match_operand:VDQ 1 "nonmemory_operand")
+ (match_operand:VDQ 2 "nonmemory_operand")))]
"TARGET_SIMD"
{
emit_insn (gen_aarch64_vcond_internal<mode> (operands[0], operands[1],
@@ -3146,48 +3197,198 @@
)
-;; cm(eq|ge|le|lt|gt)
+;; cm(eq|ge|gt|lt|le)
+;; Note, we have constraints for Dz and Z as different expanders
+;; have different ideas of what should be passed to this pattern.
-(define_insn "aarch64_cm<cmp><mode>"
+(define_insn "aarch64_cm<optab><mode>"
[(set (match_operand:<V_cmp_result> 0 "register_operand" "=w,w")
- (unspec:<V_cmp_result>
- [(match_operand:VSDQ_I_DI 1 "register_operand" "w,w")
- (match_operand:VSDQ_I_DI 2 "aarch64_simd_reg_or_zero" "w,Z")]
- VCMP_S))]
+ (neg:<V_cmp_result>
+ (COMPARISONS:<V_cmp_result>
+ (match_operand:VDQ 1 "register_operand" "w,w")
+ (match_operand:VDQ 2 "aarch64_simd_reg_or_zero" "w,ZDz")
+ )))]
"TARGET_SIMD"
"@
- cm<cmp>\t%<v>0<Vmtype>, %<v>1<Vmtype>, %<v>2<Vmtype>
- cm<cmp>\t%<v>0<Vmtype>, %<v>1<Vmtype>, #0"
+ cm<n_optab>\t%<v>0<Vmtype>, %<v><cmp_1><Vmtype>, %<v><cmp_2><Vmtype>
+ cm<optab>\t%<v>0<Vmtype>, %<v>1<Vmtype>, #0"
[(set_attr "simd_type" "simd_cmp")
(set_attr "simd_mode" "<MODE>")]
)
-;; cm(hs|hi|tst)
+(define_insn_and_split "aarch64_cm<optab>di"
+ [(set (match_operand:DI 0 "register_operand" "=w,w,r")
+ (neg:DI
+ (COMPARISONS:DI
+ (match_operand:DI 1 "register_operand" "w,w,r")
+ (match_operand:DI 2 "aarch64_simd_reg_or_zero" "w,ZDz,r")
+ )))]
+ "TARGET_SIMD"
+ "@
+ cm<n_optab>\t%d0, %d<cmp_1>, %d<cmp_2>
+ cm<optab>\t%d0, %d1, #0
+ #"
+ "reload_completed
+ /* We need to prevent the split from
+ happening in the 'w' constraint cases. */
+ && GP_REGNUM_P (REGNO (operands[0]))
+ && GP_REGNUM_P (REGNO (operands[1]))"
+ [(set (reg:CC CC_REGNUM)
+ (compare:CC
+ (match_dup 1)
+ (match_dup 2)))
+ (set (match_dup 0)
+ (neg:DI
+ (COMPARISONS:DI
+ (match_operand 3 "cc_register" "")
+ (const_int 0))))]
+ {
+ enum machine_mode mode = SELECT_CC_MODE (<CMP>, operands[1], operands[2]);
+ rtx cc_reg = aarch64_gen_compare_reg (<CMP>, operands[1], operands[2]);
+ rtx comparison = gen_rtx_<CMP> (mode, operands[1], operands[2]);
+ emit_insn (gen_cstoredi_neg (operands[0], comparison, cc_reg));
+ DONE;
+ }
+ [(set_attr "simd_type" "simd_cmp")
+ (set_attr "simd_mode" "DI")]
+)
+
+;; cm(hs|hi)
-(define_insn "aarch64_cm<cmp><mode>"
+(define_insn "aarch64_cm<optab><mode>"
[(set (match_operand:<V_cmp_result> 0 "register_operand" "=w")
- (unspec:<V_cmp_result>
- [(match_operand:VSDQ_I_DI 1 "register_operand" "w")
- (match_operand:VSDQ_I_DI 2 "register_operand" "w")]
- VCMP_U))]
+ (neg:<V_cmp_result>
+ (UCOMPARISONS:<V_cmp_result>
+ (match_operand:VDQ 1 "register_operand" "w")
+ (match_operand:VDQ 2 "register_operand" "w")
+ )))]
"TARGET_SIMD"
- "cm<cmp>\t%<v>0<Vmtype>, %<v>1<Vmtype>, %<v>2<Vmtype>"
+ "cm<n_optab>\t%<v>0<Vmtype>, %<v><cmp_1><Vmtype>, %<v><cmp_2><Vmtype>"
[(set_attr "simd_type" "simd_cmp")
(set_attr "simd_mode" "<MODE>")]
)
-;; fcm(eq|ge|le|lt|gt)
+(define_insn_and_split "aarch64_cm<optab>di"
+ [(set (match_operand:DI 0 "register_operand" "=w,r")
+ (neg:DI
+ (UCOMPARISONS:DI
+ (match_operand:DI 1 "register_operand" "w,r")
+ (match_operand:DI 2 "aarch64_simd_reg_or_zero" "w,r")
+ )))]
+ "TARGET_SIMD"
+ "@
+ cm<n_optab>\t%d0, %d<cmp_1>, %d<cmp_2>
+ #"
+ "reload_completed
+ /* We need to prevent the split from
+ happening in the 'w' constraint cases. */
+ && GP_REGNUM_P (REGNO (operands[0]))
+ && GP_REGNUM_P (REGNO (operands[1]))"
+ [(set (reg:CC CC_REGNUM)
+ (compare:CC
+ (match_dup 1)
+ (match_dup 2)))
+ (set (match_dup 0)
+ (neg:DI
+ (UCOMPARISONS:DI
+ (match_operand 3 "cc_register" "")
+ (const_int 0))))]
+ {
+ enum machine_mode mode = SELECT_CC_MODE (<CMP>, operands[1], operands[2]);
+ rtx cc_reg = aarch64_gen_compare_reg (<CMP>, operands[1], operands[2]);
+ rtx comparison = gen_rtx_<CMP> (mode, operands[1], operands[2]);
+ emit_insn (gen_cstoredi_neg (operands[0], comparison, cc_reg));
+ DONE;
+ }
+ [(set_attr "simd_type" "simd_cmp")
+ (set_attr "simd_mode" "DI")]
+)
+
+;; cmtst
+
+(define_insn "aarch64_cmtst<mode>"
+ [(set (match_operand:<V_cmp_result> 0 "register_operand" "=w")
+ (neg:<V_cmp_result>
+ (ne:<V_cmp_result>
+ (and:VDQ
+ (match_operand:VDQ 1 "register_operand" "w")
+ (match_operand:VDQ 2 "register_operand" "w"))
+ (vec_duplicate:<V_cmp_result> (const_int 0)))))]
+ "TARGET_SIMD"
+ "cmtst\t%<v>0<Vmtype>, %<v>1<Vmtype>, %<v>2<Vmtype>"
+ [(set_attr "simd_type" "simd_cmp")
+ (set_attr "simd_mode" "<MODE>")]
+)
+
+(define_insn_and_split "aarch64_cmtstdi"
+ [(set (match_operand:DI 0 "register_operand" "=w,r")
+ (neg:DI
+ (ne:DI
+ (and:DI
+ (match_operand:DI 1 "register_operand" "w,r")
+ (match_operand:DI 2 "register_operand" "w,r"))
+ (const_int 0))))]
+ "TARGET_SIMD"
+ "@
+ cmtst\t%d0, %d1, %d2
+ #"
+ "reload_completed
+ /* We need to prevent the split from
+ happening in the 'w' constraint cases. */
+ && GP_REGNUM_P (REGNO (operands[0]))
+ && GP_REGNUM_P (REGNO (operands[1]))"
+ [(set (reg:CC_NZ CC_REGNUM)
+ (compare:CC_NZ
+ (and:DI (match_dup 1)
+ (match_dup 2))
+ (const_int 0)))
+ (set (match_dup 0)
+ (neg:DI
+ (ne:DI
+ (match_operand 3 "cc_register" "")
+ (const_int 0))))]
+ {
+ rtx and_tree = gen_rtx_AND (DImode, operands[1], operands[2]);
+ enum machine_mode mode = SELECT_CC_MODE (NE, and_tree, const0_rtx);
+ rtx cc_reg = aarch64_gen_compare_reg (NE, and_tree, const0_rtx);
+ rtx comparison = gen_rtx_NE (mode, and_tree, const0_rtx);
+ emit_insn (gen_cstoredi_neg (operands[0], comparison, cc_reg));
+ DONE;
+ }
+ [(set_attr "simd_type" "simd_cmp")
+ (set_attr "simd_mode" "DI")]
+)
+
+;; fcm(eq|ge|gt|le|lt)
-(define_insn "aarch64_cm<cmp><mode>"
+(define_insn "aarch64_cm<optab><mode>"
[(set (match_operand:<V_cmp_result> 0 "register_operand" "=w,w")
- (unspec:<V_cmp_result>
- [(match_operand:VDQF 1 "register_operand" "w,w")
- (match_operand:VDQF 2 "aarch64_simd_reg_or_zero" "w,Dz")]
- VCMP_S))]
+ (neg:<V_cmp_result>
+ (COMPARISONS:<V_cmp_result>
+ (match_operand:VALLF 1 "register_operand" "w,w")
+ (match_operand:VALLF 2 "aarch64_simd_reg_or_zero" "w,YDz")
+ )))]
"TARGET_SIMD"
"@
- fcm<cmp>\t%<v>0<Vmtype>, %<v>1<Vmtype>, %<v>2<Vmtype>
- fcm<cmp>\t%<v>0<Vmtype>, %<v>1<Vmtype>, 0"
+ fcm<n_optab>\t%<v>0<Vmtype>, %<v><cmp_1><Vmtype>, %<v><cmp_2><Vmtype>
+ fcm<optab>\t%<v>0<Vmtype>, %<v>1<Vmtype>, 0"
+ [(set_attr "simd_type" "simd_fcmp")
+ (set_attr "simd_mode" "<MODE>")]
+)
+
+;; fac(ge|gt)
+;; Note we can also handle what would be fac(le|lt) by
+;; generating fac(ge|gt).
+
+(define_insn "*aarch64_fac<optab><mode>"
+ [(set (match_operand:<V_cmp_result> 0 "register_operand" "=w")
+ (neg:<V_cmp_result>
+ (FAC_COMPARISONS:<V_cmp_result>
+ (abs:VALLF (match_operand:VALLF 1 "register_operand" "w"))
+ (abs:VALLF (match_operand:VALLF 2 "register_operand" "w"))
+ )))]
+ "TARGET_SIMD"
+ "fac<n_optab>\t%<v>0<Vmtype>, %<v><cmp_1><Vmtype>, %<v><cmp_2><Vmtype>"
[(set_attr "simd_type" "simd_fcmp")
(set_attr "simd_mode" "<MODE>")]
)
diff --git a/gcc/config/aarch64/aarch64.md b/gcc/config/aarch64/aarch64.md
index 08f86cf10fb..80e202a88cd 100644
--- a/gcc/config/aarch64/aarch64.md
+++ b/gcc/config/aarch64/aarch64.md
@@ -2409,7 +2409,7 @@
(set_attr "mode" "SI")]
)
-(define_insn "*cstore<mode>_neg"
+(define_insn "cstore<mode>_neg"
[(set (match_operand:ALLI 0 "register_operand" "=r")
(neg:ALLI (match_operator:ALLI 1 "aarch64_comparison_operator"
[(match_operand 2 "cc_register" "") (const_int 0)])))]
diff --git a/gcc/config/aarch64/arm_neon.h b/gcc/config/aarch64/arm_neon.h
index 8db224930fd..29e4d64e052 100644
--- a/gcc/config/aarch64/arm_neon.h
+++ b/gcc/config/aarch64/arm_neon.h
@@ -4545,50 +4545,6 @@ vabsq_s64 (int64x2_t a)
return result;
}
-__extension__ static __inline float64_t __attribute__ ((__always_inline__))
-vacged_f64 (float64_t a, float64_t b)
-{
- float64_t result;
- __asm__ ("facge %d0,%d1,%d2"
- : "=w"(result)
- : "w"(a), "w"(b)
- : /* No clobbers */);
- return result;
-}
-
-__extension__ static __inline float32_t __attribute__ ((__always_inline__))
-vacges_f32 (float32_t a, float32_t b)
-{
- float32_t result;
- __asm__ ("facge %s0,%s1,%s2"
- : "=w"(result)
- : "w"(a), "w"(b)
- : /* No clobbers */);
- return result;
-}
-
-__extension__ static __inline float64_t __attribute__ ((__always_inline__))
-vacgtd_f64 (float64_t a, float64_t b)
-{
- float64_t result;
- __asm__ ("facgt %d0,%d1,%d2"
- : "=w"(result)
- : "w"(a), "w"(b)
- : /* No clobbers */);
- return result;
-}
-
-__extension__ static __inline float32_t __attribute__ ((__always_inline__))
-vacgts_f32 (float32_t a, float32_t b)
-{
- float32_t result;
- __asm__ ("facgt %s0,%s1,%s2"
- : "=w"(result)
- : "w"(a), "w"(b)
- : /* No clobbers */);
- return result;
-}
-
__extension__ static __inline int16_t __attribute__ ((__always_inline__))
vaddlv_s8 (int8x8_t a)
{
@@ -5062,358 +5018,6 @@ vbslq_u64 (uint64x2_t a, uint64x2_t b, uint64x2_t c)
return result;
}
-__extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
-vcage_f32 (float32x2_t a, float32x2_t b)
-{
- uint32x2_t result;
- __asm__ ("facge %0.2s, %1.2s, %2.2s"
- : "=w"(result)
- : "w"(a), "w"(b)
- : /* No clobbers */);
- return result;
-}
-
-__extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
-vcageq_f32 (float32x4_t a, float32x4_t b)
-{
- uint32x4_t result;
- __asm__ ("facge %0.4s, %1.4s, %2.4s"
- : "=w"(result)
- : "w"(a), "w"(b)
- : /* No clobbers */);
- return result;
-}
-
-__extension__ static __inline uint64x2_t __attribute__ ((__always_inline__))
-vcageq_f64 (float64x2_t a, float64x2_t b)
-{
- uint64x2_t result;
- __asm__ ("facge %0.2d, %1.2d, %2.2d"
- : "=w"(result)
- : "w"(a), "w"(b)
- : /* No clobbers */);
- return result;
-}
-
-__extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
-vcagt_f32 (float32x2_t a, float32x2_t b)
-{
- uint32x2_t result;
- __asm__ ("facgt %0.2s, %1.2s, %2.2s"
- : "=w"(result)
- : "w"(a), "w"(b)
- : /* No clobbers */);
- return result;
-}
-
-__extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
-vcagtq_f32 (float32x4_t a, float32x4_t b)
-{
- uint32x4_t result;
- __asm__ ("facgt %0.4s, %1.4s, %2.4s"
- : "=w"(result)
- : "w"(a), "w"(b)
- : /* No clobbers */);
- return result;
-}
-
-__extension__ static __inline uint64x2_t __attribute__ ((__always_inline__))
-vcagtq_f64 (float64x2_t a, float64x2_t b)
-{
- uint64x2_t result;
- __asm__ ("facgt %0.2d, %1.2d, %2.2d"
- : "=w"(result)
- : "w"(a), "w"(b)
- : /* No clobbers */);
- return result;
-}
-
-__extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
-vcale_f32 (float32x2_t a, float32x2_t b)
-{
- uint32x2_t result;
- __asm__ ("facge %0.2s, %2.2s, %1.2s"
- : "=w"(result)
- : "w"(a), "w"(b)
- : /* No clobbers */);
- return result;
-}
-
-__extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
-vcaleq_f32 (float32x4_t a, float32x4_t b)
-{
- uint32x4_t result;
- __asm__ ("facge %0.4s, %2.4s, %1.4s"
- : "=w"(result)
- : "w"(a), "w"(b)
- : /* No clobbers */);
- return result;
-}
-
-__extension__ static __inline uint64x2_t __attribute__ ((__always_inline__))
-vcaleq_f64 (float64x2_t a, float64x2_t b)
-{
- uint64x2_t result;
- __asm__ ("facge %0.2d, %2.2d, %1.2d"
- : "=w"(result)
- : "w"(a), "w"(b)
- : /* No clobbers */);
- return result;
-}
-
-__extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
-vcalt_f32 (float32x2_t a, float32x2_t b)
-{
- uint32x2_t result;
- __asm__ ("facgt %0.2s, %2.2s, %1.2s"
- : "=w"(result)
- : "w"(a), "w"(b)
- : /* No clobbers */);
- return result;
-}
-
-__extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
-vcaltq_f32 (float32x4_t a, float32x4_t b)
-{
- uint32x4_t result;
- __asm__ ("facgt %0.4s, %2.4s, %1.4s"
- : "=w"(result)
- : "w"(a), "w"(b)
- : /* No clobbers */);
- return result;
-}
-
-__extension__ static __inline uint64x2_t __attribute__ ((__always_inline__))
-vcaltq_f64 (float64x2_t a, float64x2_t b)
-{
- uint64x2_t result;
- __asm__ ("facgt %0.2d, %2.2d, %1.2d"
- : "=w"(result)
- : "w"(a), "w"(b)
- : /* No clobbers */);
- return result;
-}
-
-__extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
-vceq_f32 (float32x2_t a, float32x2_t b)
-{
- uint32x2_t result;
- __asm__ ("fcmeq %0.2s, %1.2s, %2.2s"
- : "=w"(result)
- : "w"(a), "w"(b)
- : /* No clobbers */);
- return result;
-}
-
-__extension__ static __inline uint64x1_t __attribute__ ((__always_inline__))
-vceq_f64 (float64x1_t a, float64x1_t b)
-{
- uint64x1_t result;
- __asm__ ("fcmeq %d0, %d1, %d2"
- : "=w"(result)
- : "w"(a), "w"(b)
- : /* No clobbers */);
- return result;
-}
-
-__extension__ static __inline float64_t __attribute__ ((__always_inline__))
-vceqd_f64 (float64_t a, float64_t b)
-{
- float64_t result;
- __asm__ ("fcmeq %d0,%d1,%d2"
- : "=w"(result)
- : "w"(a), "w"(b)
- : /* No clobbers */);
- return result;
-}
-
-__extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
-vceqq_f32 (float32x4_t a, float32x4_t b)
-{
- uint32x4_t result;
- __asm__ ("fcmeq %0.4s, %1.4s, %2.4s"
- : "=w"(result)
- : "w"(a), "w"(b)
- : /* No clobbers */);
- return result;
-}
-
-__extension__ static __inline uint64x2_t __attribute__ ((__always_inline__))
-vceqq_f64 (float64x2_t a, float64x2_t b)
-{
- uint64x2_t result;
- __asm__ ("fcmeq %0.2d, %1.2d, %2.2d"
- : "=w"(result)
- : "w"(a), "w"(b)
- : /* No clobbers */);
- return result;
-}
-
-__extension__ static __inline float32_t __attribute__ ((__always_inline__))
-vceqs_f32 (float32_t a, float32_t b)
-{
- float32_t result;
- __asm__ ("fcmeq %s0,%s1,%s2"
- : "=w"(result)
- : "w"(a), "w"(b)
- : /* No clobbers */);
- return result;
-}
-
-__extension__ static __inline float64_t __attribute__ ((__always_inline__))
-vceqzd_f64 (float64_t a)
-{
- float64_t result;
- __asm__ ("fcmeq %d0,%d1,#0"
- : "=w"(result)
- : "w"(a)
- : /* No clobbers */);
- return result;
-}
-
-__extension__ static __inline float32_t __attribute__ ((__always_inline__))
-vceqzs_f32 (float32_t a)
-{
- float32_t result;
- __asm__ ("fcmeq %s0,%s1,#0"
- : "=w"(result)
- : "w"(a)
- : /* No clobbers */);
- return result;
-}
-
-__extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
-vcge_f32 (float32x2_t a, float32x2_t b)
-{
- uint32x2_t result;
- __asm__ ("fcmge %0.2s, %1.2s, %2.2s"
- : "=w"(result)
- : "w"(a), "w"(b)
- : /* No clobbers */);
- return result;
-}
-
-__extension__ static __inline uint64x1_t __attribute__ ((__always_inline__))
-vcge_f64 (float64x1_t a, float64x1_t b)
-{
- uint64x1_t result;
- __asm__ ("fcmge %d0, %d1, %d2"
- : "=w"(result)
- : "w"(a), "w"(b)
- : /* No clobbers */);
- return result;
-}
-
-__extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
-vcgeq_f32 (float32x4_t a, float32x4_t b)
-{
- uint32x4_t result;
- __asm__ ("fcmge %0.4s, %1.4s, %2.4s"
- : "=w"(result)
- : "w"(a), "w"(b)
- : /* No clobbers */);
- return result;
-}
-
-__extension__ static __inline uint64x2_t __attribute__ ((__always_inline__))
-vcgeq_f64 (float64x2_t a, float64x2_t b)
-{
- uint64x2_t result;
- __asm__ ("fcmge %0.2d, %1.2d, %2.2d"
- : "=w"(result)
- : "w"(a), "w"(b)
- : /* No clobbers */);
- return result;
-}
-
-__extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
-vcgt_f32 (float32x2_t a, float32x2_t b)
-{
- uint32x2_t result;
- __asm__ ("fcmgt %0.2s, %1.2s, %2.2s"
- : "=w"(result)
- : "w"(a), "w"(b)
- : /* No clobbers */);
- return result;
-}
-
-__extension__ static __inline uint64x1_t __attribute__ ((__always_inline__))
-vcgt_f64 (float64x1_t a, float64x1_t b)
-{
- uint64x1_t result;
- __asm__ ("fcmgt %d0, %d1, %d2"
- : "=w"(result)
- : "w"(a), "w"(b)
- : /* No clobbers */);
- return result;
-}
-
-__extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
-vcgtq_f32 (float32x4_t a, float32x4_t b)
-{
- uint32x4_t result;
- __asm__ ("fcmgt %0.4s, %1.4s, %2.4s"
- : "=w"(result)
- : "w"(a), "w"(b)
- : /* No clobbers */);
- return result;
-}
-
-__extension__ static __inline uint64x2_t __attribute__ ((__always_inline__))
-vcgtq_f64 (float64x2_t a, float64x2_t b)
-{
- uint64x2_t result;
- __asm__ ("fcmgt %0.2d, %1.2d, %2.2d"
- : "=w"(result)
- : "w"(a), "w"(b)
- : /* No clobbers */);
- return result;
-}
-
-__extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
-vcle_f32 (float32x2_t a, float32x2_t b)
-{
- uint32x2_t result;
- __asm__ ("fcmge %0.2s, %2.2s, %1.2s"
- : "=w"(result)
- : "w"(a), "w"(b)
- : /* No clobbers */);
- return result;
-}
-
-__extension__ static __inline uint64x1_t __attribute__ ((__always_inline__))
-vcle_f64 (float64x1_t a, float64x1_t b)
-{
- uint64x1_t result;
- __asm__ ("fcmge %d0, %d2, %d1"
- : "=w"(result)
- : "w"(a), "w"(b)
- : /* No clobbers */);
- return result;
-}
-
-__extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
-vcleq_f32 (float32x4_t a, float32x4_t b)
-{
- uint32x4_t result;
- __asm__ ("fcmge %0.4s, %2.4s, %1.4s"
- : "=w"(result)
- : "w"(a), "w"(b)
- : /* No clobbers */);
- return result;
-}
-
-__extension__ static __inline uint64x2_t __attribute__ ((__always_inline__))
-vcleq_f64 (float64x2_t a, float64x2_t b)
-{
- uint64x2_t result;
- __asm__ ("fcmge %0.2d, %2.2d, %1.2d"
- : "=w"(result)
- : "w"(a), "w"(b)
- : /* No clobbers */);
- return result;
-}
-
__extension__ static __inline int8x8_t __attribute__ ((__always_inline__))
vcls_s8 (int8x8_t a)
{
@@ -5480,50 +5084,6 @@ vclsq_s32 (int32x4_t a)
return result;
}
-__extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
-vclt_f32 (float32x2_t a, float32x2_t b)
-{
- uint32x2_t result;
- __asm__ ("fcmgt %0.2s, %2.2s, %1.2s"
- : "=w"(result)
- : "w"(a), "w"(b)
- : /* No clobbers */);
- return result;
-}
-
-__extension__ static __inline uint64x1_t __attribute__ ((__always_inline__))
-vclt_f64 (float64x1_t a, float64x1_t b)
-{
- uint64x1_t result;
- __asm__ ("fcmgt %d0, %d2, %d1"
- : "=w"(result)
- : "w"(a), "w"(b)
- : /* No clobbers */);
- return result;
-}
-
-__extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
-vcltq_f32 (float32x4_t a, float32x4_t b)
-{
- uint32x4_t result;
- __asm__ ("fcmgt %0.4s, %2.4s, %1.4s"
- : "=w"(result)
- : "w"(a), "w"(b)
- : /* No clobbers */);
- return result;
-}
-
-__extension__ static __inline uint64x2_t __attribute__ ((__always_inline__))
-vcltq_f64 (float64x2_t a, float64x2_t b)
-{
- uint64x2_t result;
- __asm__ ("fcmgt %0.2d, %2.2d, %1.2d"
- : "=w"(result)
- : "w"(a), "w"(b)
- : /* No clobbers */);
- return result;
-}
-
__extension__ static __inline int8x8_t __attribute__ ((__always_inline__))
vclz_s8 (int8x8_t a)
{
@@ -18848,7 +18408,123 @@ vaddvq_f64 (float64x2_t __a)
return vgetq_lane_f64 (t, 0);
}
-/* vceq */
+/* vcage */
+
+__extension__ static __inline uint32_t __attribute__ ((__always_inline__))
+vcages_f32 (float32_t __a, float32_t __b)
+{
+ return __builtin_fabsf (__a) >= __builtin_fabsf (__b) ? -1 : 0;
+}
+
+__extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
+vcage_f32 (float32x2_t __a, float32x2_t __b)
+{
+ return vabs_f32 (__a) >= vabs_f32 (__b);
+}
+
+__extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
+vcageq_f32 (float32x4_t __a, float32x4_t __b)
+{
+ return vabsq_f32 (__a) >= vabsq_f32 (__b);
+}
+
+__extension__ static __inline uint64_t __attribute__ ((__always_inline__))
+vcaged_f64 (float64_t __a, float64_t __b)
+{
+ return __builtin_fabs (__a) >= __builtin_fabs (__b) ? -1 : 0;
+}
+
+__extension__ static __inline uint64x2_t __attribute__ ((__always_inline__))
+vcageq_f64 (float64x2_t __a, float64x2_t __b)
+{
+ return vabsq_f64 (__a) >= vabsq_f64 (__b);
+}
+
+/* vcagt */
+
+__extension__ static __inline uint32_t __attribute__ ((__always_inline__))
+vcagts_f32 (float32_t __a, float32_t __b)
+{
+ return __builtin_fabsf (__a) > __builtin_fabsf (__b) ? -1 : 0;
+}
+
+__extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
+vcagt_f32 (float32x2_t __a, float32x2_t __b)
+{
+ return vabs_f32 (__a) > vabs_f32 (__b);
+}
+
+__extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
+vcagtq_f32 (float32x4_t __a, float32x4_t __b)
+{
+ return vabsq_f32 (__a) > vabsq_f32 (__b);
+}
+
+__extension__ static __inline uint64_t __attribute__ ((__always_inline__))
+vcagtd_f64 (float64_t __a, float64_t __b)
+{
+ return __builtin_fabs (__a) > __builtin_fabs (__b) ? -1 : 0;
+}
+
+__extension__ static __inline uint64x2_t __attribute__ ((__always_inline__))
+vcagtq_f64 (float64x2_t __a, float64x2_t __b)
+{
+ return vabsq_f64 (__a) > vabsq_f64 (__b);
+}
+
+/* vcale */
+
+__extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
+vcale_f32 (float32x2_t __a, float32x2_t __b)
+{
+ return vabs_f32 (__a) <= vabs_f32 (__b);
+}
+
+__extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
+vcaleq_f32 (float32x4_t __a, float32x4_t __b)
+{
+ return vabsq_f32 (__a) <= vabsq_f32 (__b);
+}
+
+__extension__ static __inline uint64x2_t __attribute__ ((__always_inline__))
+vcaleq_f64 (float64x2_t __a, float64x2_t __b)
+{
+ return vabsq_f64 (__a) <= vabsq_f64 (__b);
+}
+
+/* vcalt */
+
+__extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
+vcalt_f32 (float32x2_t __a, float32x2_t __b)
+{
+ return vabs_f32 (__a) < vabs_f32 (__b);
+}
+
+__extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
+vcaltq_f32 (float32x4_t __a, float32x4_t __b)
+{
+ return vabsq_f32 (__a) < vabsq_f32 (__b);
+}
+
+__extension__ static __inline uint64x2_t __attribute__ ((__always_inline__))
+vcaltq_f64 (float64x2_t __a, float64x2_t __b)
+{
+ return vabsq_f64 (__a) < vabsq_f64 (__b);
+}
+
+/* vceq - vector. */
+
+__extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
+vceq_f32 (float32x2_t __a, float32x2_t __b)
+{
+ return (uint32x2_t) __builtin_aarch64_cmeqv2sf (__a, __b);
+}
+
+__extension__ static __inline uint64x1_t __attribute__ ((__always_inline__))
+vceq_f64 (float64x1_t __a, float64x1_t __b)
+{
+ return __a == __b ? -1ll : 0ll;
+}
__extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
vceq_p8 (poly8x8_t __a, poly8x8_t __b)
@@ -18878,7 +18554,7 @@ vceq_s32 (int32x2_t __a, int32x2_t __b)
__extension__ static __inline uint64x1_t __attribute__ ((__always_inline__))
vceq_s64 (int64x1_t __a, int64x1_t __b)
{
- return (uint64x1_t) __builtin_aarch64_cmeqdi (__a, __b);
+ return __a == __b ? -1ll : 0ll;
}
__extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
@@ -18905,8 +18581,19 @@ vceq_u32 (uint32x2_t __a, uint32x2_t __b)
__extension__ static __inline uint64x1_t __attribute__ ((__always_inline__))
vceq_u64 (uint64x1_t __a, uint64x1_t __b)
{
- return (uint64x1_t) __builtin_aarch64_cmeqdi ((int64x1_t) __a,
- (int64x1_t) __b);
+ return __a == __b ? -1ll : 0ll;
+}
+
+__extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
+vceqq_f32 (float32x4_t __a, float32x4_t __b)
+{
+ return (uint32x4_t) __builtin_aarch64_cmeqv4sf (__a, __b);
+}
+
+__extension__ static __inline uint64x2_t __attribute__ ((__always_inline__))
+vceqq_f64 (float64x2_t __a, float64x2_t __b)
+{
+ return (uint64x2_t) __builtin_aarch64_cmeqv2df (__a, __b);
}
__extension__ static __inline uint8x16_t __attribute__ ((__always_inline__))
@@ -18968,25 +18655,243 @@ vceqq_u64 (uint64x2_t __a, uint64x2_t __b)
(int64x2_t) __b);
}
+/* vceq - scalar. */
+
+__extension__ static __inline uint32_t __attribute__ ((__always_inline__))
+vceqs_f32 (float32_t __a, float32_t __b)
+{
+ return __a == __b ? -1 : 0;
+}
+
__extension__ static __inline uint64x1_t __attribute__ ((__always_inline__))
vceqd_s64 (int64x1_t __a, int64x1_t __b)
{
- return (uint64x1_t) __builtin_aarch64_cmeqdi (__a, __b);
+ return __a == __b ? -1ll : 0ll;
}
__extension__ static __inline uint64x1_t __attribute__ ((__always_inline__))
vceqd_u64 (uint64x1_t __a, uint64x1_t __b)
{
- return (uint64x1_t) __builtin_aarch64_cmeqdi (__a, __b);
+ return __a == __b ? -1ll : 0ll;
+}
+
+__extension__ static __inline uint64_t __attribute__ ((__always_inline__))
+vceqd_f64 (float64_t __a, float64_t __b)
+{
+ return __a == __b ? -1ll : 0ll;
+}
+
+/* vceqz - vector. */
+
+__extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
+vceqz_f32 (float32x2_t __a)
+{
+ float32x2_t __b = {0.0f, 0.0f};
+ return (uint32x2_t) __builtin_aarch64_cmeqv2sf (__a, __b);
+}
+
+__extension__ static __inline uint64x1_t __attribute__ ((__always_inline__))
+vceqz_f64 (float64x1_t __a)
+{
+ return __a == 0.0 ? -1ll : 0ll;
+}
+
+__extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
+vceqz_p8 (poly8x8_t __a)
+{
+ poly8x8_t __b = {0, 0, 0, 0, 0, 0, 0, 0};
+ return (uint8x8_t) __builtin_aarch64_cmeqv8qi ((int8x8_t) __a,
+ (int8x8_t) __b);
+}
+
+__extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
+vceqz_s8 (int8x8_t __a)
+{
+ int8x8_t __b = {0, 0, 0, 0, 0, 0, 0, 0};
+ return (uint8x8_t) __builtin_aarch64_cmeqv8qi (__a, __b);
+}
+
+__extension__ static __inline uint16x4_t __attribute__ ((__always_inline__))
+vceqz_s16 (int16x4_t __a)
+{
+ int16x4_t __b = {0, 0, 0, 0};
+ return (uint16x4_t) __builtin_aarch64_cmeqv4hi (__a, __b);
+}
+
+__extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
+vceqz_s32 (int32x2_t __a)
+{
+ int32x2_t __b = {0, 0};
+ return (uint32x2_t) __builtin_aarch64_cmeqv2si (__a, __b);
+}
+
+__extension__ static __inline uint64x1_t __attribute__ ((__always_inline__))
+vceqz_s64 (int64x1_t __a)
+{
+ return __a == 0ll ? -1ll : 0ll;
+}
+
+__extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
+vceqz_u8 (uint8x8_t __a)
+{
+ uint8x8_t __b = {0, 0, 0, 0, 0, 0, 0, 0};
+ return (uint8x8_t) __builtin_aarch64_cmeqv8qi ((int8x8_t) __a,
+ (int8x8_t) __b);
+}
+
+__extension__ static __inline uint16x4_t __attribute__ ((__always_inline__))
+vceqz_u16 (uint16x4_t __a)
+{
+ uint16x4_t __b = {0, 0, 0, 0};
+ return (uint16x4_t) __builtin_aarch64_cmeqv4hi ((int16x4_t) __a,
+ (int16x4_t) __b);
+}
+
+__extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
+vceqz_u32 (uint32x2_t __a)
+{
+ uint32x2_t __b = {0, 0};
+ return (uint32x2_t) __builtin_aarch64_cmeqv2si ((int32x2_t) __a,
+ (int32x2_t) __b);
+}
+
+__extension__ static __inline uint64x1_t __attribute__ ((__always_inline__))
+vceqz_u64 (uint64x1_t __a)
+{
+ return __a == 0ll ? -1ll : 0ll;
+}
+
+__extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
+vceqzq_f32 (float32x4_t __a)
+{
+ float32x4_t __b = {0.0f, 0.0f, 0.0f, 0.0f};
+ return (uint32x4_t) __builtin_aarch64_cmeqv4sf (__a, __b);
+}
+
+__extension__ static __inline uint64x2_t __attribute__ ((__always_inline__))
+vceqzq_f64 (float64x2_t __a)
+{
+ float64x2_t __b = {0.0, 0.0};
+ return (uint64x2_t) __builtin_aarch64_cmeqv2df (__a, __b);
+}
+
+__extension__ static __inline uint8x16_t __attribute__ ((__always_inline__))
+vceqzq_p8 (poly8x16_t __a)
+{
+ poly8x16_t __b = {0, 0, 0, 0, 0, 0, 0, 0,
+ 0, 0, 0, 0, 0, 0, 0, 0};
+ return (uint8x16_t) __builtin_aarch64_cmeqv16qi ((int8x16_t) __a,
+ (int8x16_t) __b);
+}
+
+__extension__ static __inline uint8x16_t __attribute__ ((__always_inline__))
+vceqzq_s8 (int8x16_t __a)
+{
+ int8x16_t __b = {0, 0, 0, 0, 0, 0, 0, 0,
+ 0, 0, 0, 0, 0, 0, 0, 0};
+ return (uint8x16_t) __builtin_aarch64_cmeqv16qi (__a, __b);
+}
+
+__extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
+vceqzq_s16 (int16x8_t __a)
+{
+ int16x8_t __b = {0, 0, 0, 0, 0, 0, 0, 0};
+ return (uint16x8_t) __builtin_aarch64_cmeqv8hi (__a, __b);
+}
+
+__extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
+vceqzq_s32 (int32x4_t __a)
+{
+ int32x4_t __b = {0, 0, 0, 0};
+ return (uint32x4_t) __builtin_aarch64_cmeqv4si (__a, __b);
+}
+
+__extension__ static __inline uint64x2_t __attribute__ ((__always_inline__))
+vceqzq_s64 (int64x2_t __a)
+{
+ int64x2_t __b = {0, 0};
+ return (uint64x2_t) __builtin_aarch64_cmeqv2di (__a, __b);
+}
+
+__extension__ static __inline uint8x16_t __attribute__ ((__always_inline__))
+vceqzq_u8 (uint8x16_t __a)
+{
+ uint8x16_t __b = {0, 0, 0, 0, 0, 0, 0, 0,
+ 0, 0, 0, 0, 0, 0, 0, 0};
+ return (uint8x16_t) __builtin_aarch64_cmeqv16qi ((int8x16_t) __a,
+ (int8x16_t) __b);
+}
+
+__extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
+vceqzq_u16 (uint16x8_t __a)
+{
+ uint16x8_t __b = {0, 0, 0, 0, 0, 0, 0, 0};
+ return (uint16x8_t) __builtin_aarch64_cmeqv8hi ((int16x8_t) __a,
+ (int16x8_t) __b);
+}
+
+__extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
+vceqzq_u32 (uint32x4_t __a)
+{
+ uint32x4_t __b = {0, 0, 0, 0};
+ return (uint32x4_t) __builtin_aarch64_cmeqv4si ((int32x4_t) __a,
+ (int32x4_t) __b);
+}
+
+__extension__ static __inline uint64x2_t __attribute__ ((__always_inline__))
+vceqzq_u64 (uint64x2_t __a)
+{
+ uint64x2_t __b = {0, 0};
+ return (uint64x2_t) __builtin_aarch64_cmeqv2di ((int64x2_t) __a,
+ (int64x2_t) __b);
+}
+
+/* vceqz - scalar. */
+
+__extension__ static __inline uint32_t __attribute__ ((__always_inline__))
+vceqzs_f32 (float32_t __a)
+{
+ return __a == 0.0f ? -1 : 0;
}
__extension__ static __inline uint64x1_t __attribute__ ((__always_inline__))
vceqzd_s64 (int64x1_t __a)
{
- return (uint64x1_t) __builtin_aarch64_cmeqdi (__a, 0);
+ return __a == 0 ? -1ll : 0ll;
}
-/* vcge */
+__extension__ static __inline uint64x1_t __attribute__ ((__always_inline__))
+vceqzd_u64 (int64x1_t __a)
+{
+ return __a == 0 ? -1ll : 0ll;
+}
+
+__extension__ static __inline uint64_t __attribute__ ((__always_inline__))
+vceqzd_f64 (float64_t __a)
+{
+ return __a == 0.0 ? -1ll : 0ll;
+}
+
+/* vcge - vector. */
+
+__extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
+vcge_f32 (float32x2_t __a, float32x2_t __b)
+{
+ return (uint32x2_t) __builtin_aarch64_cmgev2sf (__a, __b);
+}
+
+__extension__ static __inline uint64x1_t __attribute__ ((__always_inline__))
+vcge_f64 (float64x1_t __a, float64x1_t __b)
+{
+ return __a >= __b ? -1ll : 0ll;
+}
+
+__extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
+vcge_p8 (poly8x8_t __a, poly8x8_t __b)
+{
+ return (uint8x8_t) __builtin_aarch64_cmgev8qi ((int8x8_t) __a,
+ (int8x8_t) __b);
+}
__extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
vcge_s8 (int8x8_t __a, int8x8_t __b)
@@ -19009,35 +18914,53 @@ vcge_s32 (int32x2_t __a, int32x2_t __b)
__extension__ static __inline uint64x1_t __attribute__ ((__always_inline__))
vcge_s64 (int64x1_t __a, int64x1_t __b)
{
- return (uint64x1_t) __builtin_aarch64_cmgedi (__a, __b);
+ return __a >= __b ? -1ll : 0ll;
}
__extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
vcge_u8 (uint8x8_t __a, uint8x8_t __b)
{
- return (uint8x8_t) __builtin_aarch64_cmhsv8qi ((int8x8_t) __a,
+ return (uint8x8_t) __builtin_aarch64_cmgeuv8qi ((int8x8_t) __a,
(int8x8_t) __b);
}
__extension__ static __inline uint16x4_t __attribute__ ((__always_inline__))
vcge_u16 (uint16x4_t __a, uint16x4_t __b)
{
- return (uint16x4_t) __builtin_aarch64_cmhsv4hi ((int16x4_t) __a,
+ return (uint16x4_t) __builtin_aarch64_cmgeuv4hi ((int16x4_t) __a,
(int16x4_t) __b);
}
__extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
vcge_u32 (uint32x2_t __a, uint32x2_t __b)
{
- return (uint32x2_t) __builtin_aarch64_cmhsv2si ((int32x2_t) __a,
+ return (uint32x2_t) __builtin_aarch64_cmgeuv2si ((int32x2_t) __a,
(int32x2_t) __b);
}
__extension__ static __inline uint64x1_t __attribute__ ((__always_inline__))
vcge_u64 (uint64x1_t __a, uint64x1_t __b)
{
- return (uint64x1_t) __builtin_aarch64_cmhsdi ((int64x1_t) __a,
- (int64x1_t) __b);
+ return __a >= __b ? -1ll : 0ll;
+}
+
+__extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
+vcgeq_f32 (float32x4_t __a, float32x4_t __b)
+{
+ return (uint32x4_t) __builtin_aarch64_cmgev4sf (__a, __b);
+}
+
+__extension__ static __inline uint64x2_t __attribute__ ((__always_inline__))
+vcgeq_f64 (float64x2_t __a, float64x2_t __b)
+{
+ return (uint64x2_t) __builtin_aarch64_cmgev2df (__a, __b);
+}
+
+__extension__ static __inline uint8x16_t __attribute__ ((__always_inline__))
+vcgeq_p8 (poly8x16_t __a, poly8x16_t __b)
+{
+ return (uint8x16_t) __builtin_aarch64_cmgev16qi ((int8x16_t) __a,
+ (int8x16_t) __b);
}
__extension__ static __inline uint8x16_t __attribute__ ((__always_inline__))
@@ -19067,51 +18990,268 @@ vcgeq_s64 (int64x2_t __a, int64x2_t __b)
__extension__ static __inline uint8x16_t __attribute__ ((__always_inline__))
vcgeq_u8 (uint8x16_t __a, uint8x16_t __b)
{
- return (uint8x16_t) __builtin_aarch64_cmhsv16qi ((int8x16_t) __a,
+ return (uint8x16_t) __builtin_aarch64_cmgeuv16qi ((int8x16_t) __a,
(int8x16_t) __b);
}
__extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
vcgeq_u16 (uint16x8_t __a, uint16x8_t __b)
{
- return (uint16x8_t) __builtin_aarch64_cmhsv8hi ((int16x8_t) __a,
+ return (uint16x8_t) __builtin_aarch64_cmgeuv8hi ((int16x8_t) __a,
(int16x8_t) __b);
}
__extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
vcgeq_u32 (uint32x4_t __a, uint32x4_t __b)
{
- return (uint32x4_t) __builtin_aarch64_cmhsv4si ((int32x4_t) __a,
+ return (uint32x4_t) __builtin_aarch64_cmgeuv4si ((int32x4_t) __a,
(int32x4_t) __b);
}
__extension__ static __inline uint64x2_t __attribute__ ((__always_inline__))
vcgeq_u64 (uint64x2_t __a, uint64x2_t __b)
{
- return (uint64x2_t) __builtin_aarch64_cmhsv2di ((int64x2_t) __a,
+ return (uint64x2_t) __builtin_aarch64_cmgeuv2di ((int64x2_t) __a,
(int64x2_t) __b);
}
+/* vcge - scalar. */
+
+__extension__ static __inline uint32_t __attribute__ ((__always_inline__))
+vcges_f32 (float32_t __a, float32_t __b)
+{
+ return __a >= __b ? -1 : 0;
+}
+
__extension__ static __inline uint64x1_t __attribute__ ((__always_inline__))
vcged_s64 (int64x1_t __a, int64x1_t __b)
{
- return (uint64x1_t) __builtin_aarch64_cmgedi (__a, __b);
+ return __a >= __b ? -1ll : 0ll;
}
__extension__ static __inline uint64x1_t __attribute__ ((__always_inline__))
vcged_u64 (uint64x1_t __a, uint64x1_t __b)
{
- return (uint64x1_t) __builtin_aarch64_cmhsdi ((int64x1_t) __a,
- (int64x1_t) __b);
+ return __a >= __b ? -1ll : 0ll;
+}
+
+__extension__ static __inline uint64_t __attribute__ ((__always_inline__))
+vcged_f64 (float64_t __a, float64_t __b)
+{
+ return __a >= __b ? -1ll : 0ll;
+}
+
+/* vcgez - vector. */
+
+__extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
+vcgez_f32 (float32x2_t __a)
+{
+ float32x2_t __b = {0.0f, 0.0f};
+ return (uint32x2_t) __builtin_aarch64_cmgev2sf (__a, __b);
+}
+
+__extension__ static __inline uint64x1_t __attribute__ ((__always_inline__))
+vcgez_f64 (float64x1_t __a)
+{
+ return __a >= 0.0 ? -1ll : 0ll;
+}
+
+__extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
+vcgez_p8 (poly8x8_t __a)
+{
+ poly8x8_t __b = {0, 0, 0, 0, 0, 0, 0, 0};
+ return (uint8x8_t) __builtin_aarch64_cmgev8qi ((int8x8_t) __a,
+ (int8x8_t) __b);
+}
+
+__extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
+vcgez_s8 (int8x8_t __a)
+{
+ int8x8_t __b = {0, 0, 0, 0, 0, 0, 0, 0};
+ return (uint8x8_t) __builtin_aarch64_cmgev8qi (__a, __b);
+}
+
+__extension__ static __inline uint16x4_t __attribute__ ((__always_inline__))
+vcgez_s16 (int16x4_t __a)
+{
+ int16x4_t __b = {0, 0, 0, 0};
+ return (uint16x4_t) __builtin_aarch64_cmgev4hi (__a, __b);
+}
+
+__extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
+vcgez_s32 (int32x2_t __a)
+{
+ int32x2_t __b = {0, 0};
+ return (uint32x2_t) __builtin_aarch64_cmgev2si (__a, __b);
+}
+
+__extension__ static __inline uint64x1_t __attribute__ ((__always_inline__))
+vcgez_s64 (int64x1_t __a)
+{
+ return __a >= 0ll ? -1ll : 0ll;
+}
+
+__extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
+vcgez_u8 (uint8x8_t __a)
+{
+ uint8x8_t __b = {0, 0, 0, 0, 0, 0, 0, 0};
+ return (uint8x8_t) __builtin_aarch64_cmgeuv8qi ((int8x8_t) __a,
+ (int8x8_t) __b);
+}
+
+__extension__ static __inline uint16x4_t __attribute__ ((__always_inline__))
+vcgez_u16 (uint16x4_t __a)
+{
+ uint16x4_t __b = {0, 0, 0, 0};
+ return (uint16x4_t) __builtin_aarch64_cmgeuv4hi ((int16x4_t) __a,
+ (int16x4_t) __b);
+}
+
+__extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
+vcgez_u32 (uint32x2_t __a)
+{
+ uint32x2_t __b = {0, 0};
+ return (uint32x2_t) __builtin_aarch64_cmgeuv2si ((int32x2_t) __a,
+ (int32x2_t) __b);
+}
+
+__extension__ static __inline uint64x1_t __attribute__ ((__always_inline__))
+vcgez_u64 (uint64x1_t __a)
+{
+ return __a >= 0ll ? -1ll : 0ll;
+}
+
+__extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
+vcgezq_f32 (float32x4_t __a)
+{
+ float32x4_t __b = {0.0f, 0.0f, 0.0f, 0.0f};
+ return (uint32x4_t) __builtin_aarch64_cmgev4sf (__a, __b);
+}
+
+__extension__ static __inline uint64x2_t __attribute__ ((__always_inline__))
+vcgezq_f64 (float64x2_t __a)
+{
+ float64x2_t __b = {0.0, 0.0};
+ return (uint64x2_t) __builtin_aarch64_cmgev2df (__a, __b);
+}
+
+__extension__ static __inline uint8x16_t __attribute__ ((__always_inline__))
+vcgezq_p8 (poly8x16_t __a)
+{
+ poly8x16_t __b = {0, 0, 0, 0, 0, 0, 0, 0,
+ 0, 0, 0, 0, 0, 0, 0, 0};
+ return (uint8x16_t) __builtin_aarch64_cmgev16qi ((int8x16_t) __a,
+ (int8x16_t) __b);
+}
+
+__extension__ static __inline uint8x16_t __attribute__ ((__always_inline__))
+vcgezq_s8 (int8x16_t __a)
+{
+ int8x16_t __b = {0, 0, 0, 0, 0, 0, 0, 0,
+ 0, 0, 0, 0, 0, 0, 0, 0};
+ return (uint8x16_t) __builtin_aarch64_cmgev16qi (__a, __b);
+}
+
+__extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
+vcgezq_s16 (int16x8_t __a)
+{
+ int16x8_t __b = {0, 0, 0, 0, 0, 0, 0, 0};
+ return (uint16x8_t) __builtin_aarch64_cmgev8hi (__a, __b);
+}
+
+__extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
+vcgezq_s32 (int32x4_t __a)
+{
+ int32x4_t __b = {0, 0, 0, 0};
+ return (uint32x4_t) __builtin_aarch64_cmgev4si (__a, __b);
+}
+
+__extension__ static __inline uint64x2_t __attribute__ ((__always_inline__))
+vcgezq_s64 (int64x2_t __a)
+{
+ int64x2_t __b = {0, 0};
+ return (uint64x2_t) __builtin_aarch64_cmgev2di (__a, __b);
+}
+
+__extension__ static __inline uint8x16_t __attribute__ ((__always_inline__))
+vcgezq_u8 (uint8x16_t __a)
+{
+ uint8x16_t __b = {0, 0, 0, 0, 0, 0, 0, 0,
+ 0, 0, 0, 0, 0, 0, 0, 0};
+ return (uint8x16_t) __builtin_aarch64_cmgeuv16qi ((int8x16_t) __a,
+ (int8x16_t) __b);
+}
+
+__extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
+vcgezq_u16 (uint16x8_t __a)
+{
+ uint16x8_t __b = {0, 0, 0, 0, 0, 0, 0, 0};
+ return (uint16x8_t) __builtin_aarch64_cmgeuv8hi ((int16x8_t) __a,
+ (int16x8_t) __b);
+}
+
+__extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
+vcgezq_u32 (uint32x4_t __a)
+{
+ uint32x4_t __b = {0, 0, 0, 0};
+ return (uint32x4_t) __builtin_aarch64_cmgeuv4si ((int32x4_t) __a,
+ (int32x4_t) __b);
+}
+
+__extension__ static __inline uint64x2_t __attribute__ ((__always_inline__))
+vcgezq_u64 (uint64x2_t __a)
+{
+ uint64x2_t __b = {0, 0};
+ return (uint64x2_t) __builtin_aarch64_cmgeuv2di ((int64x2_t) __a,
+ (int64x2_t) __b);
+}
+
+/* vcgez - scalar. */
+
+__extension__ static __inline uint32_t __attribute__ ((__always_inline__))
+vcgezs_f32 (float32_t __a)
+{
+ return __a >= 0.0f ? -1 : 0;
}
__extension__ static __inline uint64x1_t __attribute__ ((__always_inline__))
vcgezd_s64 (int64x1_t __a)
{
- return (uint64x1_t) __builtin_aarch64_cmgedi (__a, 0);
+ return __a >= 0 ? -1ll : 0ll;
+}
+
+__extension__ static __inline uint64x1_t __attribute__ ((__always_inline__))
+vcgezd_u64 (int64x1_t __a)
+{
+ return __a >= 0 ? -1ll : 0ll;
+}
+
+__extension__ static __inline uint64_t __attribute__ ((__always_inline__))
+vcgezd_f64 (float64_t __a)
+{
+ return __a >= 0.0 ? -1ll : 0ll;
+}
+
+/* vcgt - vector. */
+
+__extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
+vcgt_f32 (float32x2_t __a, float32x2_t __b)
+{
+ return (uint32x2_t) __builtin_aarch64_cmgtv2sf (__a, __b);
+}
+
+__extension__ static __inline uint64x1_t __attribute__ ((__always_inline__))
+vcgt_f64 (float64x1_t __a, float64x1_t __b)
+{
+ return __a > __b ? -1ll : 0ll;
}
-/* vcgt */
+__extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
+vcgt_p8 (poly8x8_t __a, poly8x8_t __b)
+{
+ return (uint8x8_t) __builtin_aarch64_cmgtv8qi ((int8x8_t) __a,
+ (int8x8_t) __b);
+}
__extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
vcgt_s8 (int8x8_t __a, int8x8_t __b)
@@ -19134,35 +19274,53 @@ vcgt_s32 (int32x2_t __a, int32x2_t __b)
__extension__ static __inline uint64x1_t __attribute__ ((__always_inline__))
vcgt_s64 (int64x1_t __a, int64x1_t __b)
{
- return (uint64x1_t) __builtin_aarch64_cmgtdi (__a, __b);
+ return __a > __b ? -1ll : 0ll;
}
__extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
vcgt_u8 (uint8x8_t __a, uint8x8_t __b)
{
- return (uint8x8_t) __builtin_aarch64_cmhiv8qi ((int8x8_t) __a,
+ return (uint8x8_t) __builtin_aarch64_cmgtuv8qi ((int8x8_t) __a,
(int8x8_t) __b);
}
__extension__ static __inline uint16x4_t __attribute__ ((__always_inline__))
vcgt_u16 (uint16x4_t __a, uint16x4_t __b)
{
- return (uint16x4_t) __builtin_aarch64_cmhiv4hi ((int16x4_t) __a,
+ return (uint16x4_t) __builtin_aarch64_cmgtuv4hi ((int16x4_t) __a,
(int16x4_t) __b);
}
__extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
vcgt_u32 (uint32x2_t __a, uint32x2_t __b)
{
- return (uint32x2_t) __builtin_aarch64_cmhiv2si ((int32x2_t) __a,
+ return (uint32x2_t) __builtin_aarch64_cmgtuv2si ((int32x2_t) __a,
(int32x2_t) __b);
}
__extension__ static __inline uint64x1_t __attribute__ ((__always_inline__))
vcgt_u64 (uint64x1_t __a, uint64x1_t __b)
{
- return (uint64x1_t) __builtin_aarch64_cmhidi ((int64x1_t) __a,
- (int64x1_t) __b);
+ return __a > __b ? -1ll : 0ll;
+}
+
+__extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
+vcgtq_f32 (float32x4_t __a, float32x4_t __b)
+{
+ return (uint32x4_t) __builtin_aarch64_cmgtv4sf (__a, __b);
+}
+
+__extension__ static __inline uint64x2_t __attribute__ ((__always_inline__))
+vcgtq_f64 (float64x2_t __a, float64x2_t __b)
+{
+ return (uint64x2_t) __builtin_aarch64_cmgtv2df (__a, __b);
+}
+
+__extension__ static __inline uint8x16_t __attribute__ ((__always_inline__))
+vcgtq_p8 (poly8x16_t __a, poly8x16_t __b)
+{
+ return (uint8x16_t) __builtin_aarch64_cmgtv16qi ((int8x16_t) __a,
+ (int8x16_t) __b);
}
__extension__ static __inline uint8x16_t __attribute__ ((__always_inline__))
@@ -19192,51 +19350,268 @@ vcgtq_s64 (int64x2_t __a, int64x2_t __b)
__extension__ static __inline uint8x16_t __attribute__ ((__always_inline__))
vcgtq_u8 (uint8x16_t __a, uint8x16_t __b)
{
- return (uint8x16_t) __builtin_aarch64_cmhiv16qi ((int8x16_t) __a,
+ return (uint8x16_t) __builtin_aarch64_cmgtuv16qi ((int8x16_t) __a,
(int8x16_t) __b);
}
__extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
vcgtq_u16 (uint16x8_t __a, uint16x8_t __b)
{
- return (uint16x8_t) __builtin_aarch64_cmhiv8hi ((int16x8_t) __a,
+ return (uint16x8_t) __builtin_aarch64_cmgtuv8hi ((int16x8_t) __a,
(int16x8_t) __b);
}
__extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
vcgtq_u32 (uint32x4_t __a, uint32x4_t __b)
{
- return (uint32x4_t) __builtin_aarch64_cmhiv4si ((int32x4_t) __a,
+ return (uint32x4_t) __builtin_aarch64_cmgtuv4si ((int32x4_t) __a,
(int32x4_t) __b);
}
__extension__ static __inline uint64x2_t __attribute__ ((__always_inline__))
vcgtq_u64 (uint64x2_t __a, uint64x2_t __b)
{
- return (uint64x2_t) __builtin_aarch64_cmhiv2di ((int64x2_t) __a,
+ return (uint64x2_t) __builtin_aarch64_cmgtuv2di ((int64x2_t) __a,
(int64x2_t) __b);
}
+/* vcgt - scalar. */
+
+__extension__ static __inline uint32_t __attribute__ ((__always_inline__))
+vcgts_f32 (float32_t __a, float32_t __b)
+{
+ return __a > __b ? -1 : 0;
+}
+
__extension__ static __inline uint64x1_t __attribute__ ((__always_inline__))
vcgtd_s64 (int64x1_t __a, int64x1_t __b)
{
- return (uint64x1_t) __builtin_aarch64_cmgtdi (__a, __b);
+ return __a > __b ? -1ll : 0ll;
}
__extension__ static __inline uint64x1_t __attribute__ ((__always_inline__))
vcgtd_u64 (uint64x1_t __a, uint64x1_t __b)
{
- return (uint64x1_t) __builtin_aarch64_cmhidi ((int64x1_t) __a,
- (int64x1_t) __b);
+ return __a > __b ? -1ll : 0ll;
+}
+
+__extension__ static __inline uint64_t __attribute__ ((__always_inline__))
+vcgtd_f64 (float64_t __a, float64_t __b)
+{
+ return __a > __b ? -1ll : 0ll;
+}
+
+/* vcgtz - vector. */
+
+__extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
+vcgtz_f32 (float32x2_t __a)
+{
+ float32x2_t __b = {0.0f, 0.0f};
+ return (uint32x2_t) __builtin_aarch64_cmgtv2sf (__a, __b);
+}
+
+__extension__ static __inline uint64x1_t __attribute__ ((__always_inline__))
+vcgtz_f64 (float64x1_t __a)
+{
+ return __a > 0.0 ? -1ll : 0ll;
+}
+
+__extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
+vcgtz_p8 (poly8x8_t __a)
+{
+ poly8x8_t __b = {0, 0, 0, 0, 0, 0, 0, 0};
+ return (uint8x8_t) __builtin_aarch64_cmgtv8qi ((int8x8_t) __a,
+ (int8x8_t) __b);
+}
+
+__extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
+vcgtz_s8 (int8x8_t __a)
+{
+ int8x8_t __b = {0, 0, 0, 0, 0, 0, 0, 0};
+ return (uint8x8_t) __builtin_aarch64_cmgtv8qi (__a, __b);
+}
+
+__extension__ static __inline uint16x4_t __attribute__ ((__always_inline__))
+vcgtz_s16 (int16x4_t __a)
+{
+ int16x4_t __b = {0, 0, 0, 0};
+ return (uint16x4_t) __builtin_aarch64_cmgtv4hi (__a, __b);
+}
+
+__extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
+vcgtz_s32 (int32x2_t __a)
+{
+ int32x2_t __b = {0, 0};
+ return (uint32x2_t) __builtin_aarch64_cmgtv2si (__a, __b);
+}
+
+__extension__ static __inline uint64x1_t __attribute__ ((__always_inline__))
+vcgtz_s64 (int64x1_t __a)
+{
+ return __a > 0ll ? -1ll : 0ll;
+}
+
+__extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
+vcgtz_u8 (uint8x8_t __a)
+{
+ uint8x8_t __b = {0, 0, 0, 0, 0, 0, 0, 0};
+ return (uint8x8_t) __builtin_aarch64_cmgtuv8qi ((int8x8_t) __a,
+ (int8x8_t) __b);
+}
+
+__extension__ static __inline uint16x4_t __attribute__ ((__always_inline__))
+vcgtz_u16 (uint16x4_t __a)
+{
+ uint16x4_t __b = {0, 0, 0, 0};
+ return (uint16x4_t) __builtin_aarch64_cmgtuv4hi ((int16x4_t) __a,
+ (int16x4_t) __b);
+}
+
+__extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
+vcgtz_u32 (uint32x2_t __a)
+{
+ uint32x2_t __b = {0, 0};
+ return (uint32x2_t) __builtin_aarch64_cmgtuv2si ((int32x2_t) __a,
+ (int32x2_t) __b);
+}
+
+__extension__ static __inline uint64x1_t __attribute__ ((__always_inline__))
+vcgtz_u64 (uint64x1_t __a)
+{
+ return __a > 0ll ? -1ll : 0ll;
+}
+
+__extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
+vcgtzq_f32 (float32x4_t __a)
+{
+ float32x4_t __b = {0.0f, 0.0f, 0.0f, 0.0f};
+ return (uint32x4_t) __builtin_aarch64_cmgtv4sf (__a, __b);
+}
+
+__extension__ static __inline uint64x2_t __attribute__ ((__always_inline__))
+vcgtzq_f64 (float64x2_t __a)
+{
+ float64x2_t __b = {0.0, 0.0};
+ return (uint64x2_t) __builtin_aarch64_cmgtv2df (__a, __b);
+}
+
+__extension__ static __inline uint8x16_t __attribute__ ((__always_inline__))
+vcgtzq_p8 (poly8x16_t __a)
+{
+ poly8x16_t __b = {0, 0, 0, 0, 0, 0, 0, 0,
+ 0, 0, 0, 0, 0, 0, 0, 0};
+ return (uint8x16_t) __builtin_aarch64_cmgtv16qi ((int8x16_t) __a,
+ (int8x16_t) __b);
+}
+
+__extension__ static __inline uint8x16_t __attribute__ ((__always_inline__))
+vcgtzq_s8 (int8x16_t __a)
+{
+ int8x16_t __b = {0, 0, 0, 0, 0, 0, 0, 0,
+ 0, 0, 0, 0, 0, 0, 0, 0};
+ return (uint8x16_t) __builtin_aarch64_cmgtv16qi (__a, __b);
+}
+
+__extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
+vcgtzq_s16 (int16x8_t __a)
+{
+ int16x8_t __b = {0, 0, 0, 0, 0, 0, 0, 0};
+ return (uint16x8_t) __builtin_aarch64_cmgtv8hi (__a, __b);
+}
+
+__extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
+vcgtzq_s32 (int32x4_t __a)
+{
+ int32x4_t __b = {0, 0, 0, 0};
+ return (uint32x4_t) __builtin_aarch64_cmgtv4si (__a, __b);
+}
+
+__extension__ static __inline uint64x2_t __attribute__ ((__always_inline__))
+vcgtzq_s64 (int64x2_t __a)
+{
+ int64x2_t __b = {0, 0};
+ return (uint64x2_t) __builtin_aarch64_cmgtv2di (__a, __b);
+}
+
+__extension__ static __inline uint8x16_t __attribute__ ((__always_inline__))
+vcgtzq_u8 (uint8x16_t __a)
+{
+ uint8x16_t __b = {0, 0, 0, 0, 0, 0, 0, 0,
+ 0, 0, 0, 0, 0, 0, 0, 0};
+ return (uint8x16_t) __builtin_aarch64_cmgtuv16qi ((int8x16_t) __a,
+ (int8x16_t) __b);
+}
+
+__extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
+vcgtzq_u16 (uint16x8_t __a)
+{
+ uint16x8_t __b = {0, 0, 0, 0, 0, 0, 0, 0};
+ return (uint16x8_t) __builtin_aarch64_cmgtuv8hi ((int16x8_t) __a,
+ (int16x8_t) __b);
+}
+
+__extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
+vcgtzq_u32 (uint32x4_t __a)
+{
+ uint32x4_t __b = {0, 0, 0, 0};
+ return (uint32x4_t) __builtin_aarch64_cmgtuv4si ((int32x4_t) __a,
+ (int32x4_t) __b);
+}
+
+__extension__ static __inline uint64x2_t __attribute__ ((__always_inline__))
+vcgtzq_u64 (uint64x2_t __a)
+{
+ uint64x2_t __b = {0, 0};
+ return (uint64x2_t) __builtin_aarch64_cmgtuv2di ((int64x2_t) __a,
+ (int64x2_t) __b);
+}
+
+/* vcgtz - scalar. */
+
+__extension__ static __inline uint32_t __attribute__ ((__always_inline__))
+vcgtzs_f32 (float32_t __a)
+{
+ return __a > 0.0f ? -1 : 0;
}
__extension__ static __inline uint64x1_t __attribute__ ((__always_inline__))
vcgtzd_s64 (int64x1_t __a)
{
- return (uint64x1_t) __builtin_aarch64_cmgtdi (__a, 0);
+ return __a > 0 ? -1ll : 0ll;
}
-/* vcle */
+__extension__ static __inline uint64x1_t __attribute__ ((__always_inline__))
+vcgtzd_u64 (int64x1_t __a)
+{
+ return __a > 0 ? -1ll : 0ll;
+}
+
+__extension__ static __inline uint64_t __attribute__ ((__always_inline__))
+vcgtzd_f64 (float64_t __a)
+{
+ return __a > 0.0 ? -1ll : 0ll;
+}
+
+/* vcle - vector. */
+
+__extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
+vcle_f32 (float32x2_t __a, float32x2_t __b)
+{
+ return (uint32x2_t) __builtin_aarch64_cmgev2sf (__b, __a);
+}
+
+__extension__ static __inline uint64x1_t __attribute__ ((__always_inline__))
+vcle_f64 (float64x1_t __a, float64x1_t __b)
+{
+ return __a <= __b ? -1ll : 0ll;
+}
+
+__extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
+vcle_p8 (poly8x8_t __a, poly8x8_t __b)
+{
+ return (uint8x8_t) __builtin_aarch64_cmgev8qi ((int8x8_t) __b,
+ (int8x8_t) __a);
+}
__extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
vcle_s8 (int8x8_t __a, int8x8_t __b)
@@ -19259,35 +19634,53 @@ vcle_s32 (int32x2_t __a, int32x2_t __b)
__extension__ static __inline uint64x1_t __attribute__ ((__always_inline__))
vcle_s64 (int64x1_t __a, int64x1_t __b)
{
- return (uint64x1_t) __builtin_aarch64_cmgedi (__b, __a);
+ return __a <= __b ? -1ll : 0ll;
}
__extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
vcle_u8 (uint8x8_t __a, uint8x8_t __b)
{
- return (uint8x8_t) __builtin_aarch64_cmhsv8qi ((int8x8_t) __b,
+ return (uint8x8_t) __builtin_aarch64_cmgeuv8qi ((int8x8_t) __b,
(int8x8_t) __a);
}
__extension__ static __inline uint16x4_t __attribute__ ((__always_inline__))
vcle_u16 (uint16x4_t __a, uint16x4_t __b)
{
- return (uint16x4_t) __builtin_aarch64_cmhsv4hi ((int16x4_t) __b,
+ return (uint16x4_t) __builtin_aarch64_cmgeuv4hi ((int16x4_t) __b,
(int16x4_t) __a);
}
__extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
vcle_u32 (uint32x2_t __a, uint32x2_t __b)
{
- return (uint32x2_t) __builtin_aarch64_cmhsv2si ((int32x2_t) __b,
+ return (uint32x2_t) __builtin_aarch64_cmgeuv2si ((int32x2_t) __b,
(int32x2_t) __a);
}
__extension__ static __inline uint64x1_t __attribute__ ((__always_inline__))
vcle_u64 (uint64x1_t __a, uint64x1_t __b)
{
- return (uint64x1_t) __builtin_aarch64_cmhsdi ((int64x1_t) __b,
- (int64x1_t) __a);
+ return __a <= __b ? -1ll : 0ll;
+}
+
+__extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
+vcleq_f32 (float32x4_t __a, float32x4_t __b)
+{
+ return (uint32x4_t) __builtin_aarch64_cmgev4sf (__b, __a);
+}
+
+__extension__ static __inline uint64x2_t __attribute__ ((__always_inline__))
+vcleq_f64 (float64x2_t __a, float64x2_t __b)
+{
+ return (uint64x2_t) __builtin_aarch64_cmgev2df (__b, __a);
+}
+
+__extension__ static __inline uint8x16_t __attribute__ ((__always_inline__))
+vcleq_p8 (poly8x16_t __a, poly8x16_t __b)
+{
+ return (uint8x16_t) __builtin_aarch64_cmgev16qi ((int8x16_t) __b,
+ (int8x16_t) __a);
}
__extension__ static __inline uint8x16_t __attribute__ ((__always_inline__))
@@ -19317,44 +19710,211 @@ vcleq_s64 (int64x2_t __a, int64x2_t __b)
__extension__ static __inline uint8x16_t __attribute__ ((__always_inline__))
vcleq_u8 (uint8x16_t __a, uint8x16_t __b)
{
- return (uint8x16_t) __builtin_aarch64_cmhsv16qi ((int8x16_t) __b,
+ return (uint8x16_t) __builtin_aarch64_cmgeuv16qi ((int8x16_t) __b,
(int8x16_t) __a);
}
__extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
vcleq_u16 (uint16x8_t __a, uint16x8_t __b)
{
- return (uint16x8_t) __builtin_aarch64_cmhsv8hi ((int16x8_t) __b,
+ return (uint16x8_t) __builtin_aarch64_cmgeuv8hi ((int16x8_t) __b,
(int16x8_t) __a);
}
__extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
vcleq_u32 (uint32x4_t __a, uint32x4_t __b)
{
- return (uint32x4_t) __builtin_aarch64_cmhsv4si ((int32x4_t) __b,
+ return (uint32x4_t) __builtin_aarch64_cmgeuv4si ((int32x4_t) __b,
(int32x4_t) __a);
}
__extension__ static __inline uint64x2_t __attribute__ ((__always_inline__))
vcleq_u64 (uint64x2_t __a, uint64x2_t __b)
{
- return (uint64x2_t) __builtin_aarch64_cmhsv2di ((int64x2_t) __b,
+ return (uint64x2_t) __builtin_aarch64_cmgeuv2di ((int64x2_t) __b,
(int64x2_t) __a);
}
+/* vcle - scalar. */
+
+__extension__ static __inline uint32_t __attribute__ ((__always_inline__))
+vcles_f32 (float32_t __a, float32_t __b)
+{
+ return __a <= __b ? -1 : 0;
+}
+
__extension__ static __inline uint64x1_t __attribute__ ((__always_inline__))
vcled_s64 (int64x1_t __a, int64x1_t __b)
{
- return (uint64x1_t) __builtin_aarch64_cmgedi (__b, __a);
+ return __a <= __b ? -1ll : 0ll;
+}
+
+__extension__ static __inline uint64x1_t __attribute__ ((__always_inline__))
+vcled_u64 (uint64x1_t __a, uint64x1_t __b)
+{
+ return __a <= __b ? -1ll : 0ll;
+}
+
+__extension__ static __inline uint64_t __attribute__ ((__always_inline__))
+vcled_f64 (float64_t __a, float64_t __b)
+{
+ return __a <= __b ? -1ll : 0ll;
+}
+
+/* vclez - vector. */
+
+__extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
+vclez_f32 (float32x2_t __a)
+{
+ float32x2_t __b = {0.0f, 0.0f};
+ return (uint32x2_t) __builtin_aarch64_cmlev2sf (__a, __b);
+}
+
+__extension__ static __inline uint64x1_t __attribute__ ((__always_inline__))
+vclez_f64 (float64x1_t __a)
+{
+ return __a <= 0.0 ? -1ll : 0ll;
+}
+
+__extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
+vclez_p8 (poly8x8_t __a)
+{
+ poly8x8_t __b = {0, 0, 0, 0, 0, 0, 0, 0};
+ return (uint8x8_t) __builtin_aarch64_cmlev8qi ((int8x8_t) __a,
+ (int8x8_t) __b);
+}
+
+__extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
+vclez_s8 (int8x8_t __a)
+{
+ int8x8_t __b = {0, 0, 0, 0, 0, 0, 0, 0};
+ return (uint8x8_t) __builtin_aarch64_cmlev8qi (__a, __b);
+}
+
+__extension__ static __inline uint16x4_t __attribute__ ((__always_inline__))
+vclez_s16 (int16x4_t __a)
+{
+ int16x4_t __b = {0, 0, 0, 0};
+ return (uint16x4_t) __builtin_aarch64_cmlev4hi (__a, __b);
+}
+
+__extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
+vclez_s32 (int32x2_t __a)
+{
+ int32x2_t __b = {0, 0};
+ return (uint32x2_t) __builtin_aarch64_cmlev2si (__a, __b);
+}
+
+__extension__ static __inline uint64x1_t __attribute__ ((__always_inline__))
+vclez_s64 (int64x1_t __a)
+{
+ return __a <= 0ll ? -1ll : 0ll;
+}
+
+__extension__ static __inline uint64x1_t __attribute__ ((__always_inline__))
+vclez_u64 (uint64x1_t __a)
+{
+ return __a <= 0ll ? -1ll : 0ll;
+}
+
+__extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
+vclezq_f32 (float32x4_t __a)
+{
+ float32x4_t __b = {0.0f, 0.0f, 0.0f, 0.0f};
+ return (uint32x4_t) __builtin_aarch64_cmlev4sf (__a, __b);
+}
+
+__extension__ static __inline uint64x2_t __attribute__ ((__always_inline__))
+vclezq_f64 (float64x2_t __a)
+{
+ float64x2_t __b = {0.0, 0.0};
+ return (uint64x2_t) __builtin_aarch64_cmlev2df (__a, __b);
+}
+
+__extension__ static __inline uint8x16_t __attribute__ ((__always_inline__))
+vclezq_p8 (poly8x16_t __a)
+{
+ poly8x16_t __b = {0, 0, 0, 0, 0, 0, 0, 0,
+ 0, 0, 0, 0, 0, 0, 0, 0};
+ return (uint8x16_t) __builtin_aarch64_cmlev16qi ((int8x16_t) __a,
+ (int8x16_t) __b);
+}
+
+__extension__ static __inline uint8x16_t __attribute__ ((__always_inline__))
+vclezq_s8 (int8x16_t __a)
+{
+ int8x16_t __b = {0, 0, 0, 0, 0, 0, 0, 0,
+ 0, 0, 0, 0, 0, 0, 0, 0};
+ return (uint8x16_t) __builtin_aarch64_cmlev16qi (__a, __b);
+}
+
+__extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
+vclezq_s16 (int16x8_t __a)
+{
+ int16x8_t __b = {0, 0, 0, 0, 0, 0, 0, 0};
+ return (uint16x8_t) __builtin_aarch64_cmlev8hi (__a, __b);
+}
+
+__extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
+vclezq_s32 (int32x4_t __a)
+{
+ int32x4_t __b = {0, 0, 0, 0};
+ return (uint32x4_t) __builtin_aarch64_cmlev4si (__a, __b);
+}
+
+__extension__ static __inline uint64x2_t __attribute__ ((__always_inline__))
+vclezq_s64 (int64x2_t __a)
+{
+ int64x2_t __b = {0, 0};
+ return (uint64x2_t) __builtin_aarch64_cmlev2di (__a, __b);
+}
+
+/* vclez - scalar. */
+
+__extension__ static __inline uint32_t __attribute__ ((__always_inline__))
+vclezs_f32 (float32_t __a)
+{
+ return __a <= 0.0f ? -1 : 0;
}
__extension__ static __inline uint64x1_t __attribute__ ((__always_inline__))
vclezd_s64 (int64x1_t __a)
{
- return (uint64x1_t) __builtin_aarch64_cmledi (__a, 0);
+ return __a <= 0 ? -1ll : 0ll;
+}
+
+__extension__ static __inline uint64x1_t __attribute__ ((__always_inline__))
+vclezd_u64 (int64x1_t __a)
+{
+ return __a <= 0 ? -1ll : 0ll;
+}
+
+__extension__ static __inline uint64_t __attribute__ ((__always_inline__))
+vclezd_f64 (float64_t __a)
+{
+ return __a <= 0.0 ? -1ll : 0ll;
+}
+
+/* vclt - vector. */
+
+__extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
+vclt_f32 (float32x2_t __a, float32x2_t __b)
+{
+ return (uint32x2_t) __builtin_aarch64_cmgtv2sf (__b, __a);
+}
+
+__extension__ static __inline uint64x1_t __attribute__ ((__always_inline__))
+vclt_f64 (float64x1_t __a, float64x1_t __b)
+{
+ return __a < __b ? -1ll : 0ll;
}
-/* vclt */
+__extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
+vclt_p8 (poly8x8_t __a, poly8x8_t __b)
+{
+ return (uint8x8_t) __builtin_aarch64_cmgtv8qi ((int8x8_t) __b,
+ (int8x8_t) __a);
+}
__extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
vclt_s8 (int8x8_t __a, int8x8_t __b)
@@ -19377,35 +19937,53 @@ vclt_s32 (int32x2_t __a, int32x2_t __b)
__extension__ static __inline uint64x1_t __attribute__ ((__always_inline__))
vclt_s64 (int64x1_t __a, int64x1_t __b)
{
- return (uint64x1_t) __builtin_aarch64_cmgtdi (__b, __a);
+ return __a < __b ? -1ll : 0ll;
}
__extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
vclt_u8 (uint8x8_t __a, uint8x8_t __b)
{
- return (uint8x8_t) __builtin_aarch64_cmhiv8qi ((int8x8_t) __b,
+ return (uint8x8_t) __builtin_aarch64_cmgtuv8qi ((int8x8_t) __b,
(int8x8_t) __a);
}
__extension__ static __inline uint16x4_t __attribute__ ((__always_inline__))
vclt_u16 (uint16x4_t __a, uint16x4_t __b)
{
- return (uint16x4_t) __builtin_aarch64_cmhiv4hi ((int16x4_t) __b,
+ return (uint16x4_t) __builtin_aarch64_cmgtuv4hi ((int16x4_t) __b,
(int16x4_t) __a);
}
__extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
vclt_u32 (uint32x2_t __a, uint32x2_t __b)
{
- return (uint32x2_t) __builtin_aarch64_cmhiv2si ((int32x2_t) __b,
+ return (uint32x2_t) __builtin_aarch64_cmgtuv2si ((int32x2_t) __b,
(int32x2_t) __a);
}
__extension__ static __inline uint64x1_t __attribute__ ((__always_inline__))
vclt_u64 (uint64x1_t __a, uint64x1_t __b)
{
- return (uint64x1_t) __builtin_aarch64_cmhidi ((int64x1_t) __b,
- (int64x1_t) __a);
+ return __a < __b ? -1ll : 0ll;
+}
+
+__extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
+vcltq_f32 (float32x4_t __a, float32x4_t __b)
+{
+ return (uint32x4_t) __builtin_aarch64_cmgtv4sf (__b, __a);
+}
+
+__extension__ static __inline uint64x2_t __attribute__ ((__always_inline__))
+vcltq_f64 (float64x2_t __a, float64x2_t __b)
+{
+ return (uint64x2_t) __builtin_aarch64_cmgtv2df (__b, __a);
+}
+
+__extension__ static __inline uint8x16_t __attribute__ ((__always_inline__))
+vcltq_p8 (poly8x16_t __a, poly8x16_t __b)
+{
+ return (uint8x16_t) __builtin_aarch64_cmgtv16qi ((int8x16_t) __b,
+ (int8x16_t) __a);
}
__extension__ static __inline uint8x16_t __attribute__ ((__always_inline__))
@@ -19435,41 +20013,183 @@ vcltq_s64 (int64x2_t __a, int64x2_t __b)
__extension__ static __inline uint8x16_t __attribute__ ((__always_inline__))
vcltq_u8 (uint8x16_t __a, uint8x16_t __b)
{
- return (uint8x16_t) __builtin_aarch64_cmhiv16qi ((int8x16_t) __b,
+ return (uint8x16_t) __builtin_aarch64_cmgtuv16qi ((int8x16_t) __b,
(int8x16_t) __a);
}
__extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
vcltq_u16 (uint16x8_t __a, uint16x8_t __b)
{
- return (uint16x8_t) __builtin_aarch64_cmhiv8hi ((int16x8_t) __b,
+ return (uint16x8_t) __builtin_aarch64_cmgtuv8hi ((int16x8_t) __b,
(int16x8_t) __a);
}
__extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
vcltq_u32 (uint32x4_t __a, uint32x4_t __b)
{
- return (uint32x4_t) __builtin_aarch64_cmhiv4si ((int32x4_t) __b,
+ return (uint32x4_t) __builtin_aarch64_cmgtuv4si ((int32x4_t) __b,
(int32x4_t) __a);
}
__extension__ static __inline uint64x2_t __attribute__ ((__always_inline__))
vcltq_u64 (uint64x2_t __a, uint64x2_t __b)
{
- return (uint64x2_t) __builtin_aarch64_cmhiv2di ((int64x2_t) __b,
+ return (uint64x2_t) __builtin_aarch64_cmgtuv2di ((int64x2_t) __b,
(int64x2_t) __a);
}
+/* vclt - scalar. */
+
+__extension__ static __inline uint32_t __attribute__ ((__always_inline__))
+vclts_f32 (float32_t __a, float32_t __b)
+{
+ return __a < __b ? -1 : 0;
+}
+
__extension__ static __inline uint64x1_t __attribute__ ((__always_inline__))
vcltd_s64 (int64x1_t __a, int64x1_t __b)
{
- return (uint64x1_t) __builtin_aarch64_cmgtdi (__b, __a);
+ return __a < __b ? -1ll : 0ll;
+}
+
+__extension__ static __inline uint64x1_t __attribute__ ((__always_inline__))
+vcltd_u64 (uint64x1_t __a, uint64x1_t __b)
+{
+ return __a < __b ? -1ll : 0ll;
+}
+
+__extension__ static __inline uint64_t __attribute__ ((__always_inline__))
+vcltd_f64 (float64_t __a, float64_t __b)
+{
+ return __a < __b ? -1ll : 0ll;
+}
+
+/* vcltz - vector. */
+
+__extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
+vcltz_f32 (float32x2_t __a)
+{
+ float32x2_t __b = {0.0f, 0.0f};
+ return (uint32x2_t) __builtin_aarch64_cmltv2sf (__a, __b);
+}
+
+__extension__ static __inline uint64x1_t __attribute__ ((__always_inline__))
+vcltz_f64 (float64x1_t __a)
+{
+ return __a < 0.0 ? -1ll : 0ll;
+}
+
+__extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
+vcltz_p8 (poly8x8_t __a)
+{
+ poly8x8_t __b = {0, 0, 0, 0, 0, 0, 0, 0};
+ return (uint8x8_t) __builtin_aarch64_cmltv8qi ((int8x8_t) __a,
+ (int8x8_t) __b);
+}
+
+__extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
+vcltz_s8 (int8x8_t __a)
+{
+ int8x8_t __b = {0, 0, 0, 0, 0, 0, 0, 0};
+ return (uint8x8_t) __builtin_aarch64_cmltv8qi (__a, __b);
+}
+
+__extension__ static __inline uint16x4_t __attribute__ ((__always_inline__))
+vcltz_s16 (int16x4_t __a)
+{
+ int16x4_t __b = {0, 0, 0, 0};
+ return (uint16x4_t) __builtin_aarch64_cmltv4hi (__a, __b);
+}
+
+__extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
+vcltz_s32 (int32x2_t __a)
+{
+ int32x2_t __b = {0, 0};
+ return (uint32x2_t) __builtin_aarch64_cmltv2si (__a, __b);
+}
+
+__extension__ static __inline uint64x1_t __attribute__ ((__always_inline__))
+vcltz_s64 (int64x1_t __a)
+{
+ return __a < 0ll ? -1ll : 0ll;
+}
+
+__extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
+vcltzq_f32 (float32x4_t __a)
+{
+ float32x4_t __b = {0.0f, 0.0f, 0.0f, 0.0f};
+ return (uint32x4_t) __builtin_aarch64_cmltv4sf (__a, __b);
+}
+
+__extension__ static __inline uint64x2_t __attribute__ ((__always_inline__))
+vcltzq_f64 (float64x2_t __a)
+{
+ float64x2_t __b = {0.0, 0.0};
+ return (uint64x2_t) __builtin_aarch64_cmltv2df (__a, __b);
+}
+
+__extension__ static __inline uint8x16_t __attribute__ ((__always_inline__))
+vcltzq_p8 (poly8x16_t __a)
+{
+ poly8x16_t __b = {0, 0, 0, 0, 0, 0, 0, 0,
+ 0, 0, 0, 0, 0, 0, 0, 0};
+ return (uint8x16_t) __builtin_aarch64_cmltv16qi ((int8x16_t) __a,
+ (int8x16_t) __b);
+}
+
+__extension__ static __inline uint8x16_t __attribute__ ((__always_inline__))
+vcltzq_s8 (int8x16_t __a)
+{
+ int8x16_t __b = {0, 0, 0, 0, 0, 0, 0, 0,
+ 0, 0, 0, 0, 0, 0, 0, 0};
+ return (uint8x16_t) __builtin_aarch64_cmltv16qi (__a, __b);
+}
+
+__extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
+vcltzq_s16 (int16x8_t __a)
+{
+ int16x8_t __b = {0, 0, 0, 0, 0, 0, 0, 0};
+ return (uint16x8_t) __builtin_aarch64_cmltv8hi (__a, __b);
+}
+
+__extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
+vcltzq_s32 (int32x4_t __a)
+{
+ int32x4_t __b = {0, 0, 0, 0};
+ return (uint32x4_t) __builtin_aarch64_cmltv4si (__a, __b);
+}
+
+__extension__ static __inline uint64x2_t __attribute__ ((__always_inline__))
+vcltzq_s64 (int64x2_t __a)
+{
+ int64x2_t __b = {0, 0};
+ return (uint64x2_t) __builtin_aarch64_cmltv2di (__a, __b);
+}
+
+/* vcltz - scalar. */
+
+__extension__ static __inline uint32_t __attribute__ ((__always_inline__))
+vcltzs_f32 (float32_t __a)
+{
+ return __a < 0.0f ? -1 : 0;
}
__extension__ static __inline uint64x1_t __attribute__ ((__always_inline__))
vcltzd_s64 (int64x1_t __a)
{
- return (uint64x1_t) __builtin_aarch64_cmltdi (__a, 0);
+ return __a < 0 ? -1ll : 0ll;
+}
+
+__extension__ static __inline uint64x1_t __attribute__ ((__always_inline__))
+vcltzd_u64 (int64x1_t __a)
+{
+ return __a < 0 ? -1ll : 0ll;
+}
+
+__extension__ static __inline uint64_t __attribute__ ((__always_inline__))
+vcltzd_f64 (float64_t __a)
+{
+ return __a < 0.0 ? -1ll : 0ll;
}
/* vcvt (double -> float). */
@@ -24953,7 +25673,7 @@ vtst_s32 (int32x2_t __a, int32x2_t __b)
__extension__ static __inline uint64x1_t __attribute__ ((__always_inline__))
vtst_s64 (int64x1_t __a, int64x1_t __b)
{
- return (uint64x1_t) __builtin_aarch64_cmtstdi (__a, __b);
+ return (__a & __b) ? -1ll : 0ll;
}
__extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
@@ -24980,8 +25700,7 @@ vtst_u32 (uint32x2_t __a, uint32x2_t __b)
__extension__ static __inline uint64x1_t __attribute__ ((__always_inline__))
vtst_u64 (uint64x1_t __a, uint64x1_t __b)
{
- return (uint64x1_t) __builtin_aarch64_cmtstdi ((int64x1_t) __a,
- (int64x1_t) __b);
+ return (__a & __b) ? -1ll : 0ll;
}
__extension__ static __inline uint8x16_t __attribute__ ((__always_inline__))
@@ -25039,14 +25758,13 @@ vtstq_u64 (uint64x2_t __a, uint64x2_t __b)
__extension__ static __inline uint64x1_t __attribute__ ((__always_inline__))
vtstd_s64 (int64x1_t __a, int64x1_t __b)
{
- return (uint64x1_t) __builtin_aarch64_cmtstdi (__a, __b);
+ return (__a & __b) ? -1ll : 0ll;
}
__extension__ static __inline uint64x1_t __attribute__ ((__always_inline__))
vtstd_u64 (uint64x1_t __a, uint64x1_t __b)
{
- return (uint64x1_t) __builtin_aarch64_cmtstdi ((int64x1_t) __a,
- (int64x1_t) __b);
+ return (__a & __b) ? -1ll : 0ll;
}
/* vuqadd */
diff --git a/gcc/config/aarch64/iterators.md b/gcc/config/aarch64/iterators.md
index d774c4c8cbc..00e315d920c 100644
--- a/gcc/config/aarch64/iterators.md
+++ b/gcc/config/aarch64/iterators.md
@@ -83,6 +83,9 @@
;; Vector Float modes.
(define_mode_iterator VDQF [V2SF V4SF V2DF])
+;; All Float modes.
+(define_mode_iterator VALLF [V2SF V4SF V2DF SF DF])
+
;; Vector Float modes with 2 elements.
(define_mode_iterator V2F [V2SF V2DF])
@@ -213,13 +216,6 @@
UNSPEC_URSHL ; Used in aarch64-simd.md.
UNSPEC_SQRSHL ; Used in aarch64-simd.md.
UNSPEC_UQRSHL ; Used in aarch64-simd.md.
- UNSPEC_CMEQ ; Used in aarch64-simd.md.
- UNSPEC_CMLE ; Used in aarch64-simd.md.
- UNSPEC_CMLT ; Used in aarch64-simd.md.
- UNSPEC_CMGE ; Used in aarch64-simd.md.
- UNSPEC_CMGT ; Used in aarch64-simd.md.
- UNSPEC_CMHS ; Used in aarch64-simd.md.
- UNSPEC_CMHI ; Used in aarch64-simd.md.
UNSPEC_SSLI ; Used in aarch64-simd.md.
UNSPEC_USLI ; Used in aarch64-simd.md.
UNSPEC_SSRI ; Used in aarch64-simd.md.
@@ -227,7 +223,6 @@
UNSPEC_SSHLL ; Used in aarch64-simd.md.
UNSPEC_USHLL ; Used in aarch64-simd.md.
UNSPEC_ADDP ; Used in aarch64-simd.md.
- UNSPEC_CMTST ; Used in aarch64-simd.md.
UNSPEC_FMAX ; Used in aarch64-simd.md.
UNSPEC_FMIN ; Used in aarch64-simd.md.
UNSPEC_TBL ; Used in vector permute patterns.
@@ -253,6 +248,7 @@
;; For scalar usage of vector/FP registers
(define_mode_attr v [(QI "b") (HI "h") (SI "s") (DI "d")
+ (SF "s") (DF "d")
(V8QI "") (V16QI "")
(V4HI "") (V8HI "")
(V2SI "") (V4SI "")
@@ -307,7 +303,8 @@
(V4SF ".4s") (V2DF ".2d")
(DI "") (SI "")
(HI "") (QI "")
- (TI "")])
+ (TI "") (SF "")
+ (DF "")])
;; Register suffix narrowed modes for VQN.
(define_mode_attr Vmntype [(V8HI ".8b") (V4SI ".4h")
@@ -446,7 +443,8 @@
(V2SI "V2SI") (V4SI "V4SI")
(DI "DI") (V2DI "V2DI")
(V2SF "V2SI") (V4SF "V4SI")
- (V2DF "V2DI")])
+ (V2DF "V2DI") (DF "DI")
+ (SF "SI")])
;; Lower case mode of results of comparison operations.
(define_mode_attr v_cmp_result [(V8QI "v8qi") (V16QI "v16qi")
@@ -454,7 +452,8 @@
(V2SI "v2si") (V4SI "v4si")
(DI "di") (V2DI "v2di")
(V2SF "v2si") (V4SF "v4si")
- (V2DF "v2di")])
+ (V2DF "v2di") (DF "di")
+ (SF "si")])
;; Vm for lane instructions is restricted to FP_LO_REGS.
(define_mode_attr vwx [(V4HI "x") (V8HI "x") (HI "x")
@@ -548,6 +547,15 @@
;; Code iterator for signed variants of vector saturating binary ops.
(define_code_iterator SBINQOPS [ss_plus ss_minus])
+;; Comparison operators for <F>CM.
+(define_code_iterator COMPARISONS [lt le eq ge gt])
+
+;; Unsigned comparison operators.
+(define_code_iterator UCOMPARISONS [ltu leu geu gtu])
+
+;; Unsigned comparison operators.
+(define_code_iterator FAC_COMPARISONS [lt le ge gt])
+
;; -------------------------------------------------------------------
;; Code Attributes
;; -------------------------------------------------------------------
@@ -580,7 +588,28 @@
(eq "eq")
(ne "ne")
(lt "lt")
- (ge "ge")])
+ (ge "ge")
+ (le "le")
+ (gt "gt")
+ (ltu "ltu")
+ (leu "leu")
+ (geu "geu")
+ (gtu "gtu")])
+
+;; For comparison operators we use the FCM* and CM* instructions.
+;; As there are no CMLE or CMLT instructions which act on 3 vector
+;; operands, we must use CMGE or CMGT and swap the order of the
+;; source operands.
+
+(define_code_attr n_optab [(lt "gt") (le "ge") (eq "eq") (ge "ge") (gt "gt")
+ (ltu "hi") (leu "hs") (geu "hs") (gtu "hi")])
+(define_code_attr cmp_1 [(lt "2") (le "2") (eq "1") (ge "1") (gt "1")
+ (ltu "2") (leu "2") (geu "1") (gtu "1")])
+(define_code_attr cmp_2 [(lt "1") (le "1") (eq "2") (ge "2") (gt "2")
+ (ltu "1") (leu "1") (geu "2") (gtu "2")])
+
+(define_code_attr CMP [(lt "LT") (le "LE") (eq "EQ") (ge "GE") (gt "GT")
+ (ltu "LTU") (leu "LEU") (geu "GEU") (gtu "GTU")])
(define_code_attr fix_trunc_optab [(fix "fix_trunc")
(unsigned_fix "fixuns_trunc")])
@@ -693,11 +722,6 @@
UNSPEC_SQSHRN UNSPEC_UQSHRN
UNSPEC_SQRSHRN UNSPEC_UQRSHRN])
-(define_int_iterator VCMP_S [UNSPEC_CMEQ UNSPEC_CMGE UNSPEC_CMGT
- UNSPEC_CMLE UNSPEC_CMLT])
-
-(define_int_iterator VCMP_U [UNSPEC_CMHS UNSPEC_CMHI UNSPEC_CMTST])
-
(define_int_iterator PERMUTE [UNSPEC_ZIP1 UNSPEC_ZIP2
UNSPEC_TRN1 UNSPEC_TRN2
UNSPEC_UZP1 UNSPEC_UZP2])
@@ -784,12 +808,6 @@
(UNSPEC_RADDHN2 "add")
(UNSPEC_RSUBHN2 "sub")])
-(define_int_attr cmp [(UNSPEC_CMGE "ge") (UNSPEC_CMGT "gt")
- (UNSPEC_CMLE "le") (UNSPEC_CMLT "lt")
- (UNSPEC_CMEQ "eq")
- (UNSPEC_CMHS "hs") (UNSPEC_CMHI "hi")
- (UNSPEC_CMTST "tst")])
-
(define_int_attr offsetlr [(UNSPEC_SSLI "1") (UNSPEC_USLI "1")
(UNSPEC_SSRI "0") (UNSPEC_USRI "0")])
diff --git a/gcc/config/aarch64/predicates.md b/gcc/config/aarch64/predicates.md
index 8f80b202811..8514e8f8fbd 100644
--- a/gcc/config/aarch64/predicates.md
+++ b/gcc/config/aarch64/predicates.md
@@ -31,6 +31,11 @@
(ior (match_operand 0 "register_operand")
(match_test "op == const0_rtx"))))
+(define_predicate "aarch64_reg_or_fp_zero"
+ (and (match_code "reg,subreg,const_double")
+ (ior (match_operand 0 "register_operand")
+ (match_test "aarch64_float_const_zero_rtx_p (op)"))))
+
(define_predicate "aarch64_reg_zero_or_m1_or_1"
(and (match_code "reg,subreg,const_int")
(ior (match_operand 0 "register_operand")