aboutsummaryrefslogtreecommitdiff
path: root/gcc/config/arm/neon.md
diff options
context:
space:
mode:
Diffstat (limited to 'gcc/config/arm/neon.md')
-rw-r--r--gcc/config/arm/neon.md688
1 files changed, 688 insertions, 0 deletions
diff --git a/gcc/config/arm/neon.md b/gcc/config/arm/neon.md
new file mode 100644
index 00000000000..ab4c88a6854
--- /dev/null
+++ b/gcc/config/arm/neon.md
@@ -0,0 +1,688 @@
+;; ARM NEON coprocessor Machine Description
+;; Copyright (C) 2006 Free Software Foundation, Inc.
+;; Written by CodeSourcery.
+;;
+;; This file is part of GCC.
+;;
+;; GCC is free software; you can redistribute it and/or modify it
+;; under the terms of the GNU General Public License as published by
+;; the Free Software Foundation; either version 2, or (at your option)
+;; any later version.
+;;
+;; GCC is distributed in the hope that it will be useful, but
+;; WITHOUT ANY WARRANTY; without even the implied warranty of
+;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+;; General Public License for more details.
+;;
+;; You should have received a copy of the GNU General Public License
+;; along with GCC; see the file COPYING. If not, write to the Free
+;; Software Foundation, 51 Franklin Street, Fifth Floor, Boston, MA
+;; 02110-1301, USA.
+
+;; Constants for unspecs.
+(define_constants
+ [(UNSPEC_VEXT 64)
+ (UNSPEC_VPADD 65)
+ (UNSPEC_VPSMIN 66)
+ (UNSPEC_VPUMIN 67)
+ (UNSPEC_VPSMAX 68)
+ (UNSPEC_VPUMAX 69)
+ (UNSPEC_ASHIFT_SIGNED 70)
+ (UNSPEC_ASHIFT_UNSIGNED 71)])
+
+;; Double-width vector modes.
+(define_mode_macro VD [V8QI V4HI V2SI V2SF])
+
+;; Same, without floating-point elements.
+(define_mode_macro VDI [V8QI V4HI V2SI])
+
+;; Quad-width vector modes.
+(define_mode_macro VQ [V16QI V8HI V4SI V4SF])
+
+;; Same, without floating-point elements.
+(define_mode_macro VQI [V16QI V8HI V4SI])
+
+;; Widenable modes.
+(define_mode_macro VW [V8QI V4HI V2SI])
+
+;; Narrowable modes.
+(define_mode_macro VN [V8HI V4SI V2DI])
+
+;; All supported vector modes.
+(define_mode_macro VDQ [V8QI V16QI V4HI V8HI V2SI V4SI V2SF V4SF])
+
+;; Supported integer vector modes.
+(define_mode_macro VDQI [V8QI V16QI V4HI V8HI V2SI V4SI])
+
+;; Define element mode for each vector mode.
+(define_mode_attr V_elem [(V8QI "QI") (V16QI "QI")
+ (V4HI "HI") (V8HI "HI")
+ (V2SI "SI") (V4SI "SI")
+ (V2SF "SF") (V4SF "SF")])
+
+;; Register width from element mode
+(define_mode_attr V_reg [(V8QI "P") (V16QI "q")
+ (V4HI "P") (V8HI "q")
+ (V2SI "P") (V4SI "q")
+ (V2SF "P") (V4SF "q")])
+
+;; Wider modes with the same number of elements.
+(define_mode_attr V_widen [(V8QI "V8HI") (V4HI "V4SI") (V2SI "V2DI")])
+
+;; Narrower modes with the same number of elements.
+(define_mode_attr V_narrow [(V8HI "V8QI") (V4SI "V4HI") (V2DI "V2SI")])
+
+;; Get element type from double-width mode, for operations where we don't care
+;; about signedness.
+(define_mode_attr V_if_elem [(V8QI "i8") (V16QI "i8")
+ (V4HI "i16") (V8HI "i16")
+ (V2SI "i32") (V4SI "i32")
+ (V2SF "f32") (V4SF "f32")])
+
+;; Same, but for operations which work on signed values.
+(define_mode_attr V_s_elem [(V8QI "s8") (V16QI "s8")
+ (V4HI "s16") (V8HI "s16")
+ (V2SI "s32") (V4SI "s32")
+ (V2SF "f32") (V4SF "f32")])
+
+;; Same, but for operations which work on unsigned values.
+(define_mode_attr V_u_elem [(V8QI "u8") (V16QI "u8")
+ (V4HI "u16") (V8HI "u16")
+ (V2SI "u32") (V4SI "u32")])
+
+;; Element types for extraction of unsigned scalars.
+(define_mode_attr VD_uf_sclr [(V8QI "u8") (V4HI "u16") (V2SI "32") (V2SF "32")])
+
+;; Element sizes for duplicating ARM registers to all elements of a vector.
+(define_mode_attr VD_dup [(V8QI "8") (V4HI "16") (V2SI "32") (V2SF "32")])
+
+;; FIXME: Attributes are probably borked.
+(define_insn "*neon_mov<mode>"
+ [(set (match_operand:VD 0 "nonimmediate_operand"
+ "=w,Uv,w, w, ?r,?w,?r,?r, ?Us")
+ (match_operand:VD 1 "general_operand"
+ " w,w, Dn,Uvi, w, r, r, Usi,r"))]
+ "TARGET_NEON"
+{
+ if (which_alternative == 2)
+ {
+ int width, is_valid;
+ static char templ[40];
+
+ is_valid = neon_immediate_valid_for_move (operands[1], <MODE>mode,
+ &operands[1], &width);
+
+ gcc_assert (is_valid != 0);
+
+ if (width == 0)
+ return "vmov.f32\t%P0, %1 @ <mode>";
+ else
+ sprintf (templ, "vmov.i%d\t%%P0, %%1 @ <mode>", width);
+
+ return templ;
+ }
+
+ /* FIXME: If the memory layout is changed in big-endian mode, output_move_vfp
+ below must be changed to output_move_neon (which will use the
+ element/structure loads/stores), and the constraint changed to 'Un' instead
+ of 'Uv'. */
+
+ switch (which_alternative)
+ {
+ case 0: return "vmov\t%P0, %P1 @ <mode>";
+ case 1: case 3: return output_move_vfp (operands);
+ case 2: gcc_unreachable ();
+ case 4: return "vmov\t%Q0, %R0, %P1 @ <mode>";
+ case 5: return "vmov\t%P0, %Q1, %R1 @ <mode>";
+ default: return output_move_double (operands);
+ }
+}
+ [(set_attr "type" "farith,f_stored,farith,f_loadd,f_2_r,r_2_f,*,load2,store2")
+ (set_attr "length" "4,4,4,4,4,4,8,8,8")
+ (set_attr "pool_range" "*,*,*,1020,*,*,*,1020,*")
+ (set_attr "neg_pool_range" "*,*,*,1008,*,*,*,1008,*")
+ (set_attr "predicable" "no")])
+
+(define_insn "*neon_mov<mode>"
+ [(set (match_operand:VQ 0 "nonimmediate_operand"
+ "=w,Un,w, w, ?r,?w,?r,?r, ?Us")
+ (match_operand:VQ 1 "general_operand"
+ " w,w, Dn,Uni, w, r, r, Usi, r"))]
+ "TARGET_NEON"
+{
+ if (which_alternative == 2)
+ {
+ int width, is_valid;
+ static char templ[40];
+
+ is_valid = neon_immediate_valid_for_move (operands[1], <MODE>mode,
+ &operands[1], &width);
+
+ gcc_assert (is_valid != 0);
+
+ if (width == 0)
+ return "vmov.f32\t%q0, %1 @ <mode>";
+ else
+ sprintf (templ, "vmov.i%d\t%%q0, %%1 @ <mode>", width);
+
+ return templ;
+ }
+
+ switch (which_alternative)
+ {
+ case 0: return "vmov\t%q0, %q1 @ <mode>";
+ case 1: case 3: return output_move_neon (operands);
+ case 2: gcc_unreachable ();
+ case 4: return "vmov\t%Q0, %R0, %e1 @ <mode>\;vmov\t%J0, %K0, %f1";
+ case 5: return "vmov\t%e0, %Q1, %R1 @ <mode>\;vmov\t%f0, %J1, %K1";
+ default: return output_move_quad (operands);
+ }
+}
+ [(set_attr "type" "farith,f_stored,farith,f_loadd,f_2_r,r_2_f,*,load2,store2")
+ (set_attr "length" "4,8,4,8,8,8,16,8,16")
+ (set_attr "pool_range" "*,*,*,1020,*,*,*,1020,*")
+ (set_attr "neg_pool_range" "*,*,*,1008,*,*,*,1008,*")
+ (set_attr "predicable" "no")])
+
+; FIXME: Set/extract/init quads.
+
+(define_insn "vec_set<mode>"
+ [(set (match_operand:VD 0 "s_register_operand" "+w")
+ (vec_merge:VD
+ (match_operand:VD 1 "s_register_operand" "0")
+ (vec_duplicate:VD
+ (match_operand:<V_elem> 2 "s_register_operand" "r"))
+ (ashift:SI (const_int 1)
+ (match_operand:SI 3 "immediate_operand" "i"))))]
+ "TARGET_NEON"
+ "vmov.<VD_uf_sclr>\t%P0[%n3], %2"
+ [(set_attr "predicable" "yes")])
+
+(define_insn "vec_extract<mode>"
+ [(set (match_operand:<V_elem> 0 "s_register_operand" "=r")
+ (vec_select:<V_elem>
+ (match_operand:VD 1 "s_register_operand" "w")
+ (parallel [(match_operand:SI 2 "immediate_operand" "i")])))]
+ "TARGET_NEON"
+ "vmov.<VD_uf_sclr>\t%0, %P1[%n2]"
+ [(set_attr "predicable" "yes")])
+
+(define_expand "vec_init<mode>"
+ [(match_operand:VD 0 "s_register_operand" "")
+ (match_operand 1 "" "")]
+ "TARGET_NEON"
+{
+ neon_expand_vector_init (operands[0], operands[1]);
+ DONE;
+})
+
+;; Doubleword and quadword arithmetic.
+
+;; NOTE: vadd/vsub and some other instructions also support 64-bit integer
+;; element size, which we could potentially use for "long long" operations. We
+;; don't want to do this at present though, because moving values from the
+;; vector unit to the ARM core is currently slow and 64-bit addition (etc.) is
+;; easy to do with ARM instructions anyway.
+
+(define_insn "*add<mode>3_neon"
+ [(set (match_operand:VDQ 0 "s_register_operand" "=w")
+ (plus:VDQ (match_operand:VDQ 1 "s_register_operand" "w")
+ (match_operand:VDQ 2 "s_register_operand" "w")))]
+ "TARGET_NEON"
+ "vadd.<V_if_elem>\t%<V_reg>0, %<V_reg>1, %<V_reg>2"
+ [(set_attr "predicable" "no")])
+
+(define_insn "*sub<mode>3_neon"
+ [(set (match_operand:VDQ 0 "s_register_operand" "=w")
+ (minus:VDQ (match_operand:VDQ 1 "s_register_operand" "w")
+ (match_operand:VDQ 2 "s_register_operand" "w")))]
+ "TARGET_NEON"
+ "vsub.<V_if_elem>\t%<V_reg>0, %<V_reg>1, %<V_reg>2"
+ [(set_attr "predicable" "no")])
+
+(define_insn "*mul<mode>3_neon"
+ [(set (match_operand:VDQ 0 "s_register_operand" "=w")
+ (mult:VDQ (match_operand:VDQ 1 "s_register_operand" "w")
+ (match_operand:VDQ 2 "s_register_operand" "w")))]
+ "TARGET_NEON"
+ "vmul.<V_if_elem>\t%<V_reg>0, %<V_reg>1, %<V_reg>2"
+ [(set_attr "predicable" "no")])
+
+(define_insn "ior<mode>3"
+ [(set (match_operand:VDQ 0 "s_register_operand" "=w,w")
+ (ior:VDQ (match_operand:VDQ 1 "s_register_operand" "w,0")
+ (match_operand:VDQ 2 "neon_logic_op2" "w,Dl")))]
+ "TARGET_NEON"
+{
+ switch (which_alternative)
+ {
+ case 0: return "vorr\t%<V_reg>0, %<V_reg>1, %<V_reg>2";
+ case 1: return neon_output_logic_immediate ("vorr", &operands[2],
+ <MODE>mode, 0, VALID_NEON_QREG_MODE (<MODE>mode));
+ default: gcc_unreachable ();
+ }
+}
+ [(set_attr "predicable" "no")])
+
+;; The concrete forms of the Neon immediate-logic instructions are vbic and
+;; vorr. We support the pseudo-instruction vand instead, because that
+;; corresponds to the canonical form the middle-end expects to use for
+;; immediate bitwise-ANDs.
+
+(define_insn "and<mode>3"
+ [(set (match_operand:VDQ 0 "s_register_operand" "=w,w")
+ (and:VDQ (match_operand:VDQ 1 "s_register_operand" "w,0")
+ (match_operand:VDQ 2 "neon_inv_logic_op2" "w,DL")))]
+ "TARGET_NEON"
+{
+ switch (which_alternative)
+ {
+ case 0: return "vand\t%<V_reg>0, %<V_reg>1, %<V_reg>2";
+ case 1: return neon_output_logic_immediate ("vand", &operands[2],
+ <MODE>mode, 1, VALID_NEON_QREG_MODE (<MODE>mode));
+ default: gcc_unreachable ();
+ }
+}
+ [(set_attr "predicable" "no")])
+
+(define_insn "*orn<mode>3_neon"
+ [(set (match_operand:VDQ 0 "s_register_operand" "=w")
+ (ior:VDQ (match_operand:VDQ 1 "s_register_operand" "w")
+ (not:VDQ (match_operand:VDQ 2 "s_register_operand" "w"))))]
+ "TARGET_NEON"
+ "vorn\t%<V_reg>0, %<V_reg>1, %<V_reg>2"
+ [(set_attr "predicable" "no")])
+
+(define_insn "*bic<mode>3_neon"
+ [(set (match_operand:VDQ 0 "s_register_operand" "=w")
+ (and:VDQ (match_operand:VDQ 1 "s_register_operand" "w")
+ (not:VDQ (match_operand:VDQ 2 "s_register_operand" "w"))))]
+ "TARGET_NEON"
+ "vbic\t%<V_reg>0, %<V_reg>1, %<V_reg>2"
+ [(set_attr "predicable" "no")])
+
+(define_insn "xor<mode>3"
+ [(set (match_operand:VDQ 0 "s_register_operand" "=w")
+ (xor:VDQ (match_operand:VDQ 1 "s_register_operand" "w")
+ (match_operand:VDQ 2 "s_register_operand" "w")))]
+ "TARGET_NEON"
+ "veor\t%<V_reg>0, %<V_reg>1, %<V_reg>2"
+ [(set_attr "predicable" "no")])
+
+(define_insn "one_cmpl<mode>2"
+ [(set (match_operand:VDQ 0 "s_register_operand" "=w")
+ (not:VDQ (match_operand:VDQ 1 "s_register_operand" "w")))]
+ "TARGET_NEON"
+ "vmvn\t%<V_reg>0, %<V_reg>1"
+ [(set_attr "predicable" "no")])
+
+(define_insn "abs<mode>2"
+ [(set (match_operand:VDQ 0 "s_register_operand" "=w")
+ (abs:VDQ (match_operand:VDQ 1 "s_register_operand" "w")))]
+ "TARGET_NEON"
+ "vabs.<V_s_elem>\t%<V_reg>0, %<V_reg>1"
+ [(set_attr "predicable" "no")])
+
+(define_insn "neg<mode>2"
+ [(set (match_operand:VDQ 0 "s_register_operand" "=w")
+ (neg:VDQ (match_operand:VDQ 1 "s_register_operand" "w")))]
+ "TARGET_NEON"
+ "vneg.<V_s_elem>\t%<V_reg>0, %<V_reg>1"
+ [(set_attr "predicable" "no")])
+
+(define_insn "*umin<mode>3_neon"
+ [(set (match_operand:VDQI 0 "s_register_operand" "=w")
+ (umin:VDQI (match_operand:VDQI 1 "s_register_operand" "w")
+ (match_operand:VDQI 2 "s_register_operand" "w")))]
+ "TARGET_NEON"
+ "vmin.<V_u_elem>\t%<V_reg>0, %<V_reg>1, %<V_reg>2"
+ [(set_attr "predicable" "no")])
+
+(define_insn "*umax<mode>3_neon"
+ [(set (match_operand:VDQI 0 "s_register_operand" "=w")
+ (umax:VDQI (match_operand:VDQI 1 "s_register_operand" "w")
+ (match_operand:VDQI 2 "s_register_operand" "w")))]
+ "TARGET_NEON"
+ "vmax.<V_u_elem>\t%<V_reg>0, %<V_reg>1, %<V_reg>2"
+ [(set_attr "predicable" "no")])
+
+(define_insn "*smin<mode>3_neon"
+ [(set (match_operand:VDQ 0 "s_register_operand" "=w")
+ (smin:VDQ (match_operand:VDQ 1 "s_register_operand" "w")
+ (match_operand:VDQ 2 "s_register_operand" "w")))]
+ "TARGET_NEON"
+ "vmin.<V_s_elem>\t%<V_reg>0, %<V_reg>1, %<V_reg>2"
+ [(set_attr "predicable" "no")])
+
+(define_insn "*smax<mode>3_neon"
+ [(set (match_operand:VDQ 0 "s_register_operand" "=w")
+ (smax:VDQ (match_operand:VDQ 1 "s_register_operand" "w")
+ (match_operand:VDQ 2 "s_register_operand" "w")))]
+ "TARGET_NEON"
+ "vmax.<V_s_elem>\t%<V_reg>0, %<V_reg>1, %<V_reg>2"
+ [(set_attr "predicable" "no")])
+
+(define_insn "ashl<mode>3"
+ [(set (match_operand:VDQI 0 "s_register_operand" "=w")
+ (ashift:VDQI (match_operand:VDQI 1 "s_register_operand" "w")
+ (match_operand:VDQI 2 "s_register_operand" "w")))]
+ "TARGET_NEON"
+ "vshl.<V_s_elem>\t%<V_reg>0, %<V_reg>1, %<V_reg>2"
+ [(set_attr "predicable" "no")])
+
+; Used for implementing logical shift-right, which is a left-shift by a negative
+; amount, with signed operands. This is essentially the same as ashl<mode>3
+; above, but using an unspec in case GCC tries anything tricky with negative
+; shift amounts.
+
+(define_insn "ashl<mode>3_signed"
+ [(set (match_operand:VDQI 0 "s_register_operand" "=w")
+ (unspec:VDQI [(match_operand:VDQI 1 "s_register_operand" "w")
+ (match_operand:VDQI 2 "s_register_operand" "w")]
+ UNSPEC_ASHIFT_SIGNED))]
+ "TARGET_NEON"
+ "vshl.<V_s_elem>\t%<V_reg>0, %<V_reg>1, %<V_reg>2"
+ [(set_attr "predicable" "no")])
+
+; Used for implementing logical shift-right, which is a left-shift by a negative
+; amount, with unsigned operands.
+
+(define_insn "ashl<mode>3_unsigned"
+ [(set (match_operand:VDQI 0 "s_register_operand" "=w")
+ (unspec:VDQI [(match_operand:VDQI 1 "s_register_operand" "w")
+ (match_operand:VDQI 2 "s_register_operand" "w")]
+ UNSPEC_ASHIFT_UNSIGNED))]
+ "TARGET_NEON"
+ "vshl.<V_u_elem>\t%<V_reg>0, %<V_reg>1, %<V_reg>2"
+ [(set_attr "predicable" "no")])
+
+(define_expand "ashr<mode>3"
+ [(set (match_operand:VDQI 0 "s_register_operand" "")
+ (ashiftrt:VDQI (match_operand:VDQI 1 "s_register_operand" "")
+ (match_operand:VDQI 2 "s_register_operand" "")))]
+ "TARGET_NEON"
+{
+ rtx neg = gen_reg_rtx (<MODE>mode);
+
+ emit_insn (gen_neg<mode>2 (neg, operands[2]));
+ emit_insn (gen_ashl<mode>3_signed (operands[0], operands[1], neg));
+
+ DONE;
+})
+
+(define_expand "lshr<mode>3"
+ [(set (match_operand:VDQI 0 "s_register_operand" "")
+ (lshiftrt:VDQI (match_operand:VDQI 1 "s_register_operand" "")
+ (match_operand:VDQI 2 "s_register_operand" "")))]
+ "TARGET_NEON"
+{
+ rtx neg = gen_reg_rtx (<MODE>mode);
+
+ emit_insn (gen_neg<mode>2 (neg, operands[2]));
+ emit_insn (gen_ashl<mode>3_unsigned (operands[0], operands[1], neg));
+
+ DONE;
+})
+
+;; Widening operations
+
+;; FIXME: I'm not sure if sign/zero_extend are legal to use on vector modes.
+
+(define_insn "widen_ssum<mode>3"
+ [(set (match_operand:<V_widen> 0 "s_register_operand" "=w")
+ (plus:<V_widen> (sign_extend:<V_widen>
+ (match_operand:VW 1 "s_register_operand" "%w"))
+ (match_operand:<V_widen> 2 "s_register_operand" "w")))]
+ "TARGET_NEON"
+ "vaddw.<V_s_elem>\t%q0, %q2, %P1"
+ [(set_attr "predicable" "no")])
+
+(define_insn "widen_usum<mode>3"
+ [(set (match_operand:<V_widen> 0 "s_register_operand" "=w")
+ (plus:<V_widen> (zero_extend:<V_widen>
+ (match_operand:VW 1 "s_register_operand" "%w"))
+ (match_operand:<V_widen> 2 "s_register_operand" "w")))]
+ "TARGET_NEON"
+ "vaddw.<V_u_elem>\t%q0, %q2, %P1"
+ [(set_attr "predicable" "no")])
+
+;; VEXT can be used to synthesize coarse whole-vector shifts with 8-bit
+;; shift-count granularity. That's good enough for the middle-end's current
+;; needs.
+
+(define_expand "vec_shr_<mode>"
+ [(match_operand:VDQ 0 "s_register_operand" "")
+ (match_operand:VDQ 1 "s_register_operand" "")
+ (match_operand:SI 2 "const_multiple_of_8_operand" "")]
+ "TARGET_NEON"
+{
+ rtx zero_reg;
+ HOST_WIDE_INT num_bits = INTVAL (operands[2]);
+ const int width = GET_MODE_BITSIZE (<MODE>mode);
+ const enum machine_mode bvecmode = (width == 128) ? V16QImode : V8QImode;
+ rtx (*gen_ext) (rtx, rtx, rtx, rtx) =
+ (width == 128) ? gen_neon_vextv16qi : gen_neon_vextv8qi;
+
+ if (num_bits == width)
+ {
+ emit_move_insn (operands[0], operands[1]);
+ DONE;
+ }
+
+ zero_reg = force_reg (bvecmode, CONST0_RTX (bvecmode));
+ operands[0] = gen_lowpart (bvecmode, operands[0]);
+ operands[1] = gen_lowpart (bvecmode, operands[1]);
+
+ emit_insn (gen_ext (operands[0], operands[1], zero_reg,
+ GEN_INT (num_bits / BITS_PER_UNIT)));
+ DONE;
+})
+
+(define_expand "vec_shl_<mode>"
+ [(match_operand:VDQ 0 "s_register_operand" "")
+ (match_operand:VDQ 1 "s_register_operand" "")
+ (match_operand:SI 2 "const_multiple_of_8_operand" "")]
+ "TARGET_NEON"
+{
+ rtx zero_reg;
+ HOST_WIDE_INT num_bits = INTVAL (operands[2]);
+ const int width = GET_MODE_BITSIZE (<MODE>mode);
+ const enum machine_mode bvecmode = (width == 128) ? V16QImode : V8QImode;
+ rtx (*gen_ext) (rtx, rtx, rtx, rtx) =
+ (width == 128) ? gen_neon_vextv16qi : gen_neon_vextv8qi;
+
+ if (num_bits == 0)
+ {
+ emit_move_insn (operands[0], CONST0_RTX (<MODE>mode));
+ DONE;
+ }
+
+ num_bits = width - num_bits;
+
+ zero_reg = force_reg (bvecmode, CONST0_RTX (bvecmode));
+ operands[0] = gen_lowpart (bvecmode, operands[0]);
+ operands[1] = gen_lowpart (bvecmode, operands[1]);
+
+ emit_insn (gen_ext (operands[0], zero_reg, operands[1],
+ GEN_INT (num_bits / BITS_PER_UNIT)));
+ DONE;
+})
+
+(define_insn "neon_vextv8qi"
+ [(set (match_operand:V8QI 0 "s_register_operand" "=w")
+ (unspec:V8QI [(match_operand:V8QI 1 "s_register_operand" "w")
+ (match_operand:V8QI 2 "s_register_operand" "w")
+ (match_operand:SI 3 "immediate_operand" "i")]
+ UNSPEC_VEXT))]
+ "TARGET_NEON"
+ "vext.8\t%P0, %P1, %P2, %3"
+ [(set_attr "predicable" "no")])
+
+(define_insn "neon_vextv16qi"
+ [(set (match_operand:V16QI 0 "s_register_operand" "=w")
+ (unspec:V16QI [(match_operand:V16QI 1 "s_register_operand" "w")
+ (match_operand:V16QI 2 "s_register_operand" "w")
+ (match_operand:SI 3 "immediate_operand" "i")]
+ UNSPEC_VEXT))]
+ "TARGET_NEON"
+ "vext.8\t%q0, %q1, %q2, %3"
+ [(set_attr "predicable" "no")])
+
+;; Reduction operations
+
+; We have pairwise addition with wraparound semantics: we don't need to define
+; reduc_splus_<mode> too.
+
+(define_expand "reduc_uplus_<mode>"
+ [(match_operand:VD 0 "s_register_operand" "")
+ (match_operand:VD 1 "s_register_operand" "")]
+ "TARGET_NEON"
+{
+ neon_pairwise_reduce (operands[0], operands[1], <MODE>mode,
+ &gen_neon_vpadd<mode>);
+ DONE;
+})
+
+(define_expand "reduc_smin_<mode>"
+ [(match_operand:VD 0 "s_register_operand" "")
+ (match_operand:VD 1 "s_register_operand" "")]
+ "TARGET_NEON"
+{
+ neon_pairwise_reduce (operands[0], operands[1], <MODE>mode,
+ &gen_neon_vpsmin<mode>);
+ DONE;
+})
+
+(define_expand "reduc_smax_<mode>"
+ [(match_operand:VD 0 "s_register_operand" "")
+ (match_operand:VD 1 "s_register_operand" "")]
+ "TARGET_NEON"
+{
+ neon_pairwise_reduce (operands[0], operands[1], <MODE>mode,
+ &gen_neon_vpsmax<mode>);
+ DONE;
+})
+
+(define_expand "reduc_umin_<mode>"
+ [(match_operand:VDI 0 "s_register_operand" "")
+ (match_operand:VDI 1 "s_register_operand" "")]
+ "TARGET_NEON"
+{
+ neon_pairwise_reduce (operands[0], operands[1], <MODE>mode,
+ &gen_neon_vpumin<mode>);
+ DONE;
+})
+
+(define_expand "reduc_umax_<mode>"
+ [(match_operand:VDI 0 "s_register_operand" "")
+ (match_operand:VDI 1 "s_register_operand" "")]
+ "TARGET_NEON"
+{
+ neon_pairwise_reduce (operands[0], operands[1], <MODE>mode,
+ &gen_neon_vpumax<mode>);
+ DONE;
+})
+
+(define_insn "neon_vpadd<mode>"
+ [(set (match_operand:VD 0 "s_register_operand" "=w")
+ (unspec:VD [(match_operand:VD 1 "s_register_operand" "w")
+ (match_operand:VD 2 "s_register_operand" "w")]
+ UNSPEC_VPADD))]
+ "TARGET_NEON"
+ "vpadd.<V_if_elem>\t%P0, %P1, %P2"
+ [(set_attr "predicable" "no")])
+
+(define_insn "neon_vpsmin<mode>"
+ [(set (match_operand:VD 0 "s_register_operand" "=w")
+ (unspec:VD [(match_operand:VD 1 "s_register_operand" "w")
+ (match_operand:VD 2 "s_register_operand" "w")]
+ UNSPEC_VPSMIN))]
+ "TARGET_NEON"
+ "vpmin.<V_s_elem>\t%P0, %P1, %P2"
+ [(set_attr "predicable" "no")])
+
+(define_insn "neon_vpsmax<mode>"
+ [(set (match_operand:VD 0 "s_register_operand" "=w")
+ (unspec:VD [(match_operand:VD 1 "s_register_operand" "w")
+ (match_operand:VD 2 "s_register_operand" "w")]
+ UNSPEC_VPSMAX))]
+ "TARGET_NEON"
+ "vpmax.<V_s_elem>\t%P0, %P1, %P2"
+ [(set_attr "predicable" "no")])
+
+(define_insn "neon_vpumin<mode>"
+ [(set (match_operand:VDI 0 "s_register_operand" "=w")
+ (unspec:VDI [(match_operand:VDI 1 "s_register_operand" "w")
+ (match_operand:VDI 2 "s_register_operand" "w")]
+ UNSPEC_VPUMIN))]
+ "TARGET_NEON"
+ "vpmin.<V_u_elem>\t%P0, %P1, %P2"
+ [(set_attr "predicable" "no")])
+
+(define_insn "neon_vpumax<mode>"
+ [(set (match_operand:VDI 0 "s_register_operand" "=w")
+ (unspec:VDI [(match_operand:VDI 1 "s_register_operand" "w")
+ (match_operand:VDI 2 "s_register_operand" "w")]
+ UNSPEC_VPUMAX))]
+ "TARGET_NEON"
+ "vpmax.<V_u_elem>\t%P0, %P1, %P2"
+ [(set_attr "predicable" "no")])
+
+;; Saturating arithmetic
+
+; NOTE: Neon supports many more saturating variants of instructions than the
+; following, but these are all GCC currently understands.
+; FIXME: Actually, GCC doesn't know how to create saturating add/sub by itself
+; yet either, although these patterns may be used by intrinsics when they're
+; added.
+
+(define_insn "*ss_add<mode>_neon"
+ [(set (match_operand:VD 0 "s_register_operand" "=w")
+ (ss_plus:VD (match_operand:VD 1 "s_register_operand" "w")
+ (match_operand:VD 2 "s_register_operand" "w")))]
+ "TARGET_NEON"
+ "vqadd.<V_s_elem>\t%P0, %P1, %P2"
+ [(set_attr "predicable" "no")])
+
+(define_insn "*us_add<mode>_neon"
+ [(set (match_operand:VD 0 "s_register_operand" "=w")
+ (us_plus:VD (match_operand:VD 1 "s_register_operand" "w")
+ (match_operand:VD 2 "s_register_operand" "w")))]
+ "TARGET_NEON"
+ "vqadd.<V_u_elem>\t%P0, %P1, %P2"
+ [(set_attr "predicable" "no")])
+
+(define_insn "*ss_sub<mode>_neon"
+ [(set (match_operand:VD 0 "s_register_operand" "=w")
+ (ss_minus:VD (match_operand:VD 1 "s_register_operand" "w")
+ (match_operand:VD 2 "s_register_operand" "w")))]
+ "TARGET_NEON"
+ "vqsub.<V_s_elem>\t%P0, %P1, %P2"
+ [(set_attr "predicable" "no")])
+
+(define_insn "*us_sub<mode>_neon"
+ [(set (match_operand:VD 0 "s_register_operand" "=w")
+ (us_minus:VD (match_operand:VD 1 "s_register_operand" "w")
+ (match_operand:VD 2 "s_register_operand" "w")))]
+ "TARGET_NEON"
+ "vqsub.<V_u_elem>\t%P0, %P1, %P2"
+ [(set_attr "predicable" "no")])
+
+; FIXME: These instructions aren't supported in GCC 4.1, but are documented
+; for the current trunk. Uncomment when this code is merged to a GCC version
+; which supports them.
+
+;(define_insn "*ss_neg<mode>_neon"
+; [(set (match_operand:VD 0 "s_register_operand" "=w")
+; (ss_neg:VD 1 (match_operand:VD 1 "s_register_operand" "w")))]
+; "TARGET_NEON"
+; "vqneg.<V_s_elem>\t%P0, %P1"
+; [(set_attr "predicable" "no")])
+
+;(define_insn "*ss_ashift<mode>_neon"
+; [(set (match_operand:VD 0 "s_register_operand" "=w")
+; (ss_ashift:VD (match_operand:VD 1 "s_register_operand" "w")
+; (match_operand:VD 2 "s_register_operand" "w")))]
+; "TARGET_NEON"
+; "vqshl.<V_s_elem>\t%P0, %P1, %P2"
+; [(set_attr "predicable" "no")])