Merge ARM NEON support from CodeSourcery internal arm-neon-4_1 branchcsl/sourcerygxx/4.1-18

revision 148395. git-svn-id: https://gcc.gnu.org/svn/gcc/branches/csl/sourcerygxx-4_1@116723 138bc75d-0d04-0410-961f-82ee72b054a4
author: Julian Brown <julian@codesourcery.com> 2006-09-06 14:57:08 +0000
committer: Julian Brown <julian@codesourcery.com> 2006-09-06 14:57:08 +0000
commit: 01cde33addb17e5282dca60cb66784abf9290c5e (patch)
tree: 847ab9ebcbecff401109d92c70671619486b3d52
parent: c76ea46f3013e8114a053960b4ecdefdc952f34c (diff)
15 files changed, 2231 insertions, 477 deletions
diff --git a/ChangeLog.csl b/ChangeLog.csl
index 9edc73e8c58..6085bd820d3 100644
--- a/ChangeLog.csl
+++ b/ChangeLog.csl
@@ -1,3 +1,8 @@
+2006-09-06  Julian Brown  <julian@codesourcery.com>
+
+	Merge ARM NEON support from CodeSourcery internal arm-neon-4_1 branch
+	revision 148395.
+
 2006-09-02  Joseph Myers  <joseph@codesourcery.com>
 
 	Backport:
diff --git a/gcc/config.gcc b/gcc/config.gcc
index 233903eaac5..1d8f2cd1c71 100644
--- a/gcc/config.gcc
+++ b/gcc/config.gcc
@@ -2575,7 +2575,7 @@ case "${target}" in
 
 		case "$with_fpu" in
 		"" \
-		| fpa | fpe2 | fpe3 | maverick | vfp )
+		| fpa | fpe2 | fpe3 | maverick | vfp | vfp3 | neon )
 			# OK
 			;;
 		*)
diff --git a/gcc/config/arm/aof.h b/gcc/config/arm/aof.h
index 9f2ddfd633f..ac02ec155ca 100644
--- a/gcc/config/arm/aof.h
+++ b/gcc/config/arm/aof.h
@@ -250,7 +250,11 @@ do {					\
   "s0",  "s1",  "s2",  "s3",  "s4",  "s5",  "s6",  "s7",  \
   "s8",  "s9",  "s10", "s11", "s12", "s13", "s14", "s15", \
   "s16", "s17", "s18", "s19", "s20", "s21", "s22", "s23", \
-  "s24", "s25", "s26", "s27", "s28", "s29", "s30", "s31",  \
+  "s24", "s25", "s26", "s27", "s28", "s29", "s30", "s31", \
+  "d16", "?16", "d17", "?17", "d18", "?18", "d19", "?19", \
+  "d20", "?20", "d21", "?21", "d22", "?22", "d23", "?23", \
+  "d24", "?24", "d25", "?25", "d26", "?26", "d27", "?27", \
+  "d28", "?28", "d29", "?29", "d30", "?30", "d31", "?31", \
   "vfpcc"					\
 }
 
@@ -272,22 +276,30 @@ do {					\
   {"r13", 13}, {"sp", 13}, 			\
   {"r14", 14}, {"lr", 14},			\
   {"r15", 15}, {"pc", 15},			\
-  {"d0", 63},					\
+  {"d0", 63}, {"q0", 63},			\
   {"d1", 65},					\
-  {"d2", 67},					\
+  {"d2", 67}, {"q1", 67},			\
   {"d3", 69},					\
-  {"d4", 71},					\
+  {"d4", 71}, {"q2", 71},			\
   {"d5", 73},					\
-  {"d6", 75},					\
+  {"d6", 75}, {"q3", 75},			\
   {"d7", 77},					\
-  {"d8", 79},					\
+  {"d8", 79}, {"q4", 79},			\
   {"d9", 81},					\
-  {"d10", 83},					\
+  {"d10", 83}, {"q5", 83},			\
   {"d11", 85},					\
-  {"d12", 87},					\
+  {"d12", 87}, {"q6", 87},			\
   {"d13", 89},					\
-  {"d14", 91},					\
-  {"d15", 93}					\
+  {"d14", 91}, {"q7", 91},			\
+  {"d15", 93},					\
+  {"q8", 95},					\
+  {"q9", 99},					\
+  {"q10", 103},					\
+  {"q11", 107},					\
+  {"q12", 111},					\
+  {"q13", 115},					\
+  {"q14", 119},					\
+  {"q15", 123}					\
 }
 
 #define REGISTER_PREFIX "__"
diff --git a/gcc/config/arm/aout.h b/gcc/config/arm/aout.h
index b48ca4b4090..52cdb01a102 100644
--- a/gcc/config/arm/aout.h
+++ b/gcc/config/arm/aout.h
@@ -68,6 +68,10 @@
   "s8",  "s9",  "s10", "s11", "s12", "s13", "s14", "s15", \
   "s16", "s17", "s18", "s19", "s20", "s21", "s22", "s23", \
   "s24", "s25", "s26", "s27", "s28", "s29", "s30", "s31", \
+  "d16", "?16", "d17", "?17", "d18", "?18", "d19", "?19", \
+  "d20", "?20", "d21", "?21", "d22", "?22", "d23", "?23", \
+  "d24", "?24", "d25", "?25", "d26", "?26", "d27", "?27", \
+  "d28", "?28", "d29", "?29", "d30", "?30", "d31", "?31", \
   "vfpcc"					   \
 }
 #endif
@@ -158,22 +162,30 @@
   {"mvdx13", 40},				\
   {"mvdx14", 41},				\
   {"mvdx15", 42},				\
-  {"d0", 63},					\
+  {"d0", 63}, {"q0", 63},			\
   {"d1", 65},					\
-  {"d2", 67},					\
+  {"d2", 67}, {"q1", 67},			\
   {"d3", 69},					\
-  {"d4", 71},					\
+  {"d4", 71}, {"q2", 71},			\
   {"d5", 73},					\
-  {"d6", 75},					\
+  {"d6", 75}, {"q3", 75},			\
   {"d7", 77},					\
-  {"d8", 79},					\
+  {"d8", 79}, {"q4", 79},			\
   {"d9", 81},					\
-  {"d10", 83},					\
+  {"d10", 83}, {"q5", 83},			\
   {"d11", 85},					\
-  {"d12", 87},					\
+  {"d12", 87}, {"q6", 87},			\
   {"d13", 89},					\
-  {"d14", 91},					\
+  {"d14", 91}, {"q7", 91},			\
   {"d15", 93},					\
+  {"q8", 95},					\
+  {"q9", 99},					\
+  {"q10", 103},					\
+  {"q11", 107},					\
+  {"q12", 111},					\
+  {"q13", 115},					\
+  {"q14", 119},					\
+  {"q15", 123}					\
 }
 #endif
 
diff --git a/gcc/config/arm/arm-protos.h b/gcc/config/arm/arm-protos.h
index 7af2b94fe2b..7eb85378a48 100644
--- a/gcc/config/arm/arm-protos.h
+++ b/gcc/config/arm/arm-protos.h
@@ -68,11 +68,21 @@ extern rtx thumb_legitimize_reload_address (rtx *, enum machine_mode, int, int,
 					    int);
 extern int arm_const_double_rtx (rtx);
 extern int neg_const_double_rtx_ok_for_fpa (rtx);
+extern int vfp3_const_double_rtx (rtx);
+extern int neon_immediate_valid_for_move (rtx, enum machine_mode, rtx *, int *);
+extern int neon_immediate_valid_for_logic (rtx, enum machine_mode, int, rtx *,
+					   int *);
+extern char *neon_output_logic_immediate (const char *, rtx *,
+					  enum machine_mode, int, int);
+extern void neon_pairwise_reduce (rtx, rtx, enum machine_mode,
+				  rtx (*) (rtx, rtx, rtx));
+extern void neon_expand_vector_init (rtx, rtx);
 extern enum reg_class vfp_secondary_reload_class (enum machine_mode, rtx);
 extern bool arm_tls_referenced_p (rtx);
 
 extern int cirrus_memory_offset (rtx);
 extern int arm_coproc_mem_operand (rtx, bool);
+extern int neon_vector_mem_operand (rtx, bool);
 extern int arm_no_early_store_addr_dep (rtx, rtx);
 extern int arm_no_early_alu_shift_dep (rtx, rtx);
 extern int arm_no_early_alu_shift_value_dep (rtx, rtx);
@@ -110,7 +120,9 @@ extern const char *output_mov_long_double_arm_from_arm (rtx *);
 extern const char *output_mov_double_fpa_from_arm (rtx *);
 extern const char *output_mov_double_arm_from_fpa (rtx *);
 extern const char *output_move_double (rtx *);
+extern const char *output_move_quad (rtx *);
 extern const char *output_move_vfp (rtx *operands);
+extern const char *output_move_neon (rtx *operands);
 extern const char *output_add_immediate (rtx *);
 extern const char *arithmetic_instr (rtx, int);
 extern void output_ascii_pseudo_op (FILE *, const unsigned char *, int);
diff --git a/gcc/config/arm/arm.c b/gcc/config/arm/arm.c
index 488991ebefa..0880b58a0cc 100644
--- a/gcc/config/arm/arm.c
+++ b/gcc/config/arm/arm.c
@@ -452,6 +452,8 @@ static int thumb_call_reg_needed;
 #define FL_NOTM	      (1 << 17)	      /* Instructions not present in the 'M'
 					 profile.  */
 #define FL_DIV	      (1 << 18)	      /* Hardware divde.  */
+#define FL_VFPV3      (1 << 19)	      /* Vector Floating Point V3.  */
+#define FL_NEON	      (1 << 20)	      /* Neon instructions.  */
 
 #define FL_IWMMXT     (1 << 29)	      /* XScale v2 or "Intel Wireless MMX technology".  */
 
@@ -695,7 +697,9 @@ static const struct fpu_desc all_fpus[] =
   {"fpe2",	FPUTYPE_FPA_EMU2},
   {"fpe3",	FPUTYPE_FPA_EMU2},
   {"maverick",	FPUTYPE_MAVERICK},
-  {"vfp",	FPUTYPE_VFP}
+  {"vfp",	FPUTYPE_VFP},
+  {"vfp3",	FPUTYPE_VFP3},
+  {"neon",	FPUTYPE_NEON}
 };
 
 
@@ -710,7 +714,9 @@ static const enum fputype fp_model_for_fpu[] =
   ARM_FP_MODEL_FPA,		/* FPUTYPE_FPA_EMU2  */
   ARM_FP_MODEL_FPA,		/* FPUTYPE_FPA_EMU3  */
   ARM_FP_MODEL_MAVERICK,	/* FPUTYPE_MAVERICK  */
-  ARM_FP_MODEL_VFP		/* FPUTYPE_VFP  */
+  ARM_FP_MODEL_VFP,		/* FPUTYPE_VFP  */
+  ARM_FP_MODEL_VFP,		/* FPUTYPE_VFP3  */
+  ARM_FP_MODEL_VFP		/* FPUTYPE_NEON  */
 };
 
 
@@ -2633,15 +2639,20 @@ arm_return_in_memory (tree type)
 {
   HOST_WIDE_INT size;
 
+  size = int_size_in_bytes (type);
+
+  /* Vector values should be returned using ARM registers, not memory (unless
+     they're over 16 bytes, which will break since we only have four
+     call-clobbered registers to play with).  */
+  if (TREE_CODE (type) == VECTOR_TYPE && size <= 16)
+    return 0;
+
   if (!AGGREGATE_TYPE_P (type) &&
-      (TREE_CODE (type) != VECTOR_TYPE) &&
       !(TARGET_AAPCS_BASED && TREE_CODE (type) == COMPLEX_TYPE))
     /* All simple types are returned in registers.
        For AAPCS, complex types are treated the same as aggregates.  */
     return 0;
 
-  size = int_size_in_bytes (type);
-
   if (arm_abi != ARM_ABI_APCS)
     {
       /* ATPCS and later return aggregate types in memory only if they are
@@ -2882,7 +2893,7 @@ arm_arg_partial_bytes (CUMULATIVE_ARGS *pcum, enum machine_mode mode,
 {
   int nregs = pcum->nregs;
 
-  if (arm_vector_mode_supported_p (mode))
+  if (TARGET_IWMMXT_ABI && arm_vector_mode_supported_p (mode))
     return 0;
 
   if (NUM_ARG_REGS > nregs
@@ -4869,7 +4880,7 @@ arm_rtx_costs_1 (rtx x, enum rtx_code code, enum rtx_code outer)
       return 6;
 
     case CONST_DOUBLE:
-      if (arm_const_double_rtx (x))
+      if (arm_const_double_rtx (x) || vfp3_const_double_rtx (x))
 	return outer == SET ? 2 : -1;
       else if ((outer == COMPARE || outer == PLUS)
 	       && neg_const_double_rtx_ok_for_fpa (x))
@@ -5568,6 +5579,458 @@ neg_const_double_rtx_ok_for_fpa (rtx x)
 
   return 0;
 }
+
+/* VFPv3 has a fairly wide range of representable immediates, formed from
+   "quarter-precision" floating-point values. These can be evaluated using this
+   formula (with ^ for exponentiation):
+
+     -1^s * n * 2^-r
+   
+   Where 's' is a sign bit (0/1), 'n' and 'r' are integers such that
+   16 <= n <= 31 and 0 <= r <= 7.
+   
+   These values are mapped onto an 8-bit integer ABCDEFGH s.t.
+   
+     - A (most-significant) is the sign bit.
+     - BCD are the exponent (encoded as r XOR 3).
+     - EFGH are the mantissa (encoded as n - 16).
+*/
+
+/* Return an integer index for a VFPv3 immediate operand X suitable for the
+   fconst[sd] instruction, or -1 if X isn't suitable.  */
+static int
+vfp3_const_double_index (rtx x)
+{
+  REAL_VALUE_TYPE r, m;
+  int sign, exponent;
+  unsigned HOST_WIDE_INT mantissa, mant_hi;
+  unsigned HOST_WIDE_INT mask;
+  int point_pos = 2 * HOST_BITS_PER_WIDE_INT - 1;
+
+  if (!TARGET_VFP3 || GET_CODE (x) != CONST_DOUBLE)
+    return -1;
+
+  REAL_VALUE_FROM_CONST_DOUBLE (r, x);
+
+  /* We can't represent these things, so detect them first.  */
+  if (REAL_VALUE_ISINF (r) || REAL_VALUE_ISNAN (r) || REAL_VALUE_MINUS_ZERO (r))
+    return -1;
+
+  /* Extract sign, exponent and mantissa.  */
+  sign = REAL_VALUE_NEGATIVE (r) ? 1 : 0;
+  r = REAL_VALUE_ABS (r);
+  exponent = REAL_EXP (&r);
+  /* For the mantissa, we expand into two HOST_WIDE_INTS, apart from the
+     highest (sign) bit, with a fixed binary point at bit point_pos.
+     WARNING: If there's ever a VFP version which uses more than 2 * H_W_I - 1
+     bits for the mantissa, this may fail (low bits would be lost).  */
+  real_ldexp (&m, &r, point_pos - exponent);
+  REAL_VALUE_TO_INT (&mantissa, &mant_hi, m);
+
+  /* If there are bits set in the low part of the mantissa, we can't
+     represent this value.  */
+  if (mantissa != 0)
+    return -1;
+  
+  /* Now make it so that mantissa contains the most-significant bits, and move
+     the point_pos to indicate that the least-significant bits have been
+     discarded.  */
+  point_pos -= HOST_BITS_PER_WIDE_INT;
+  mantissa = mant_hi;
+  
+  /* We can permit four significant bits of mantissa only, plus a high bit
+     which is always 1.  */
+  mask = ((unsigned HOST_WIDE_INT)1 << (point_pos - 5)) - 1;
+  if ((mantissa & mask) != 0)
+    return -1;
+  
+  /* Now we know the mantissa is in range, chop off the unneeded bits.  */
+  mantissa >>= point_pos - 5;
+  
+  /* The mantissa may be zero. Disallow that case. (It's possible to load the
+     floating-point immediate zero with Neon using an integer-zero load, but
+     that case is handled elsewhere.)  */
+  if (mantissa == 0)
+    return -1;
+  
+  gcc_assert (mantissa >= 16 && mantissa <= 31);
+  
+  /* The value of 5 here would be 4 if GCC used IEEE754-like encoding (where
+     normalised significands are in the range [1, 2). (Our mantissa is shifted
+     left 4 places at this point relative to normalised IEEE754 values).  GCC
+     internally uses [0.5, 1) (see real.c), so the exponent returned from
+     REAL_EXP must be altered.  */
+  exponent = 5 - exponent;
+    
+  if (exponent < 0 || exponent > 7)
+    return -1;
+  
+  /* Sign, mantissa and exponent are now in the correct form to plug into the
+     formulae described in the comment above.  */
+  return (sign << 7) | ((exponent ^ 3) << 4) | (mantissa - 16);
+}
+
+/* Return TRUE if rtx X is a valid immediate VFPv3 constant.  */
+int
+vfp3_const_double_rtx (rtx x)
+{
+  if (!TARGET_VFP3)
+    return 0;
+  
+  return vfp3_const_double_index (x) != -1;
+}
+
+/* Recognize immediates which can be used in various Neon instructions. Legal
+   immediates are described by the following table (for VMVN variants, the
+   bitwise inverse of the constant shown is recognized. In either case, VMOV
+   is output and the correct instruction to use for a given constant is chosen
+   by the assembler). The constant shown is replicated across all elements of
+   the destination vector.
+   
+   insn elems variant constant (binary)
+   ---- ----- ------- -----------------
+   vmov  i32     0    00000000 00000000 00000000 abcdefgh
+   vmov  i32     1    00000000 00000000 abcdefgh 00000000
+   vmov  i32     2    00000000 abcdefgh 00000000 00000000
+   vmov  i32     3    abcdefgh 00000000 00000000 00000000
+   vmov  i16     4    00000000 abcdefgh
+   vmov  i16     5    abcdefgh 00000000
+   vmvn  i32     6    00000000 00000000 00000000 abcdefgh
+   vmvn  i32     7    00000000 00000000 abcdefgh 00000000
+   vmvn  i32     8    00000000 abcdefgh 00000000 00000000
+   vmvn  i32     9    abcdefgh 00000000 00000000 00000000
+   vmvn  i16    10    00000000 abcdefgh
+   vmvn  i16    11    abcdefgh 00000000
+   vmov  i32    12    00000000 00000000 abcdefgh 11111111
+   vmvn  i32    13    00000000 00000000 abcdefgh 11111111
+   vmov  i32    14    00000000 abcdefgh 11111111 11111111
+   vmvn  i32    15    00000000 abcdefgh 11111111 11111111
+   vmov   i8    16    abcdefgh
+   vmov  i64    17    aaaaaaaa bbbbbbbb cccccccc dddddddd
+                      eeeeeeee ffffffff gggggggg hhhhhhhh
+   vmov  f32    18    aBbbbbbc defgh000 00000000 00000000
+
+   For case 18, B = !b. Representable values are exactly those accepted by
+   vfp3_const_double_index, but are output as floating-point numbers rather
+   than indices.
+   
+   Variants 0-5 (inclusive) may also be used as immediates for the second
+   operand of VORR/VBIC instructions.
+   
+   The INVERSE argument causes the bitwise inverse of the given operand to be
+   recognized instead (used for recognizing legal immediates for the VAND/VORN
+   pseudo-instructions). If INVERSE is true, the value placed in *MODCONST is
+   *not* inverted (i.e. the pseudo-instruction forms vand/vorn should still be
+   output, rather than the real insns vbic/vorr).
+   
+   INVERSE makes no difference to the recognition of float vectors.
+   
+   The return value is the variant of immediate as shown in the above table, or
+   -1 if the given value doesn't match any of the listed patterns.
+*/
+static int
+neon_valid_immediate (rtx op, enum machine_mode mode, int inverse,
+		      rtx *modconst, int *elementwidth)
+{
+#define CHECK(STRIDE, ELSIZE, CLASS, TEST)	\
+  matches = 1;					\
+  for (i = 0; i < idx; i += (STRIDE))		\
+    if (!(TEST))				\
+      matches = 0;				\
+  if (matches)					\
+    {						\
+      immtype = (CLASS);			\
+      elsize = (ELSIZE);			\
+      break;					\
+    }
+
+  unsigned int i, elsize, idx = 0, n_elts = CONST_VECTOR_NUNITS (op);
+  unsigned int innersize = GET_MODE_SIZE (GET_MODE_INNER (mode));
+  unsigned char bytes[16];
+  int immtype = -1, matches;
+  unsigned int invmask = inverse ? 0xff : 0;
+  
+  /* Vectors of float constants.  */
+  if (GET_MODE_CLASS (mode) == MODE_VECTOR_FLOAT)
+    {
+      rtx el0 = CONST_VECTOR_ELT (op, 0);
+      REAL_VALUE_TYPE r0;
+
+      if (!vfp3_const_double_rtx (el0))
+        return -1;
+
+      REAL_VALUE_FROM_CONST_DOUBLE (r0, el0);
+
+      for (i = 1; i < n_elts; i++)
+        {
+          rtx elt = CONST_VECTOR_ELT (op, i);
+          REAL_VALUE_TYPE re;
+          
+          REAL_VALUE_FROM_CONST_DOUBLE (re, elt);
+
+          if (!REAL_VALUES_EQUAL (r0, re))
+            return -1;
+        }
+
+      if (modconst)
+        *modconst = CONST_VECTOR_ELT (op, 0);
+      
+      if (elementwidth)
+        *elementwidth = 0;
+      
+      return 18;
+    }
+  
+  /* Splat vector constant out into a byte vector.  */
+  for (i = 0; i < n_elts; i++)
+    {
+      rtx el = CONST_VECTOR_ELT (op, i);
+      unsigned HOST_WIDE_INT elpart;
+      unsigned int part, parts;
+
+      if (GET_CODE (el) == CONST_INT)
+        {
+          elpart = INTVAL (el);
+          parts = 1;
+        }
+      else if (GET_CODE (el) == CONST_DOUBLE)
+        {
+          elpart = CONST_DOUBLE_LOW (el);
+          parts = 2;
+        }
+      else
+        gcc_unreachable ();
+      
+      for (part = 0; part < parts; part++)
+        {
+          unsigned int byte;
+          for (byte = 0; byte < innersize; byte++)
+            {
+              bytes[idx++] = (elpart & 0xff) ^ invmask;
+              elpart >>= BITS_PER_UNIT;
+            }
+          if (GET_CODE (el) == CONST_DOUBLE)
+            elpart = CONST_DOUBLE_HIGH (el);
+        }
+    }
+  
+  /* Sanity check.  */
+  gcc_assert (idx == GET_MODE_SIZE (mode));
+  
+  do
+    {
+      CHECK (4, 32, 0, bytes[i] == bytes[0] && bytes[i + 1] == 0
+		       && bytes[i + 2] == 0 && bytes[i + 3] == 0);
+
+      CHECK (4, 32, 1, bytes[i] == 0 && bytes[i + 1] == bytes[1]
+		       && bytes[i + 2] == 0 && bytes[i + 3] == 0);
+
+      CHECK (4, 32, 2, bytes[i] == 0 && bytes[i + 1] == 0
+		       && bytes[i + 2] == bytes[2] && bytes[i + 3] == 0);
+
+      CHECK (4, 32, 3, bytes[i] == 0 && bytes[i + 1] == 0
+		       && bytes[i + 2] == 0 && bytes[i + 3] == bytes[3]);
+
+      CHECK (2, 16, 4, bytes[i] == bytes[0] && bytes[i + 1] == 0);
+
+      CHECK (2, 16, 5, bytes[i] == 0 && bytes[i + 1] == bytes[1]);
+
+      CHECK (4, 32, 6, bytes[i] == bytes[0] && bytes[i + 1] == 0xff
+		       && bytes[i + 2] == 0xff && bytes[i + 3] == 0xff);
+
+      CHECK (4, 32, 7, bytes[i] == 0xff && bytes[i + 1] == bytes[1]
+		       && bytes[i + 2] == 0xff && bytes[i + 3] == 0xff);
+                   
+      CHECK (4, 32, 8, bytes[i] == 0xff && bytes[i + 1] == 0xff
+		       && bytes[i + 2] == bytes[2] && bytes[i + 3] == 0xff);
+
+      CHECK (4, 32, 9, bytes[i] == 0xff && bytes[i + 1] == 0xff
+		       && bytes[i + 2] == 0xff && bytes[i + 3] == bytes[3]);
+      
+      CHECK (2, 16, 10, bytes[i] == bytes[0] && bytes[i + 1] == 0xff);
+
+      CHECK (2, 16, 11, bytes[i] == 0xff && bytes[i + 1] == bytes[1]);
+                    
+      CHECK (4, 32, 12, bytes[i] == 0xff && bytes[i + 1] == bytes[1]
+			&& bytes[i + 2] == 0 && bytes[i + 3] == 0);
+
+      CHECK (4, 32, 13, bytes[i] == 0 && bytes[i + 1] == bytes[1]
+			&& bytes[i + 2] == 0xff && bytes[i + 3] == 0xff);
+      
+      CHECK (4, 32, 14, bytes[i] == 0xff && bytes[i + 1] == 0xff
+			&& bytes[i + 2] == bytes[2] && bytes[i + 3] == 0);
+                    
+      CHECK (4, 32, 15, bytes[i] == 0 && bytes[i + 1] == 0
+			&& bytes[i + 2] == bytes[2] && bytes[i + 3] == 0xff);
+                    
+      CHECK (1, 8, 16, bytes[i] == bytes[0]);
+
+      CHECK (1, 64, 17, (bytes[i] == 0 || bytes[i] == 0xff)
+			&& bytes[i] == bytes[(i + 8) % idx]);
+    }
+  while (0);
+
+  if (immtype == -1)
+    return -1;
+
+  if (elementwidth)
+    *elementwidth = elsize;
+  
+  if (modconst)
+    {
+      unsigned HOST_WIDE_INT imm = 0;
+
+      /* Un-invert bytes of recognized vector, if neccessary.  */
+      if (invmask != 0)
+        for (i = 0; i < idx; i++)
+          bytes[i] ^= invmask;
+
+      if (immtype == 17)
+        {
+          /* FIXME: Broken on 32-bit H_W_I hosts.  */
+          gcc_assert (sizeof (HOST_WIDE_INT) == 8);
+          
+          for (i = 0; i < 8; i++)
+            imm |= (unsigned HOST_WIDE_INT) (bytes[i] ? 0xff : 0)
+                   << (i * BITS_PER_UNIT);
+
+          *modconst = GEN_INT (imm);
+        }
+      else
+        {
+          unsigned HOST_WIDE_INT imm = 0;
+
+          for (i = 0; i < elsize / BITS_PER_UNIT; i++)
+            imm |= (unsigned HOST_WIDE_INT) bytes[i] << (i * BITS_PER_UNIT);
+
+          *modconst = GEN_INT (imm);
+        }
+    }
+  
+  return immtype;
+#undef CHECK
+}
+
+/* Return TRUE if rtx X is legal for use as either a Neon VMOV (or, implicitly,
+   VMVN) immediate. Write back width per element to *ELEMENTWIDTH (or zero for
+   float elements), and a modified constant (whatever should be output for a
+   VMOV) in *MODCONST.  */
+
+int
+neon_immediate_valid_for_move (rtx op, enum machine_mode mode,
+			       rtx *modconst, int *elementwidth)
+{
+  rtx tmpconst;
+  int tmpwidth;
+  int retval = neon_valid_immediate (op, mode, 0, &tmpconst, &tmpwidth);
+  
+  if (retval == -1)
+    return 0;
+  
+  if (modconst)
+    *modconst = tmpconst;
+  
+  if (elementwidth)
+    *elementwidth = tmpwidth;
+  
+  return 1;
+}
+
+/* Return TRUE if rtx X is legal for use in a VORR or VBIC instruction.  If
+   the immediate is valid, write a constant suitable for using as an operand
+   to VORR/VBIC/VAND/VORN to *MODCONST and the corresponding element width to
+   *ELEMENTWIDTH. See neon_valid_immediate for description of INVERSE.  */
+
+int
+neon_immediate_valid_for_logic (rtx op, enum machine_mode mode, int inverse,
+				rtx *modconst, int *elementwidth)
+{
+  rtx tmpconst;
+  int tmpwidth;
+  int retval = neon_valid_immediate (op, mode, inverse, &tmpconst, &tmpwidth);
+
+  if (retval < 0 || retval > 5)
+    return 0;
+  
+  if (modconst)
+    *modconst = tmpconst;
+  
+  if (elementwidth)
+    *elementwidth = tmpwidth;
+  
+  return 1;
+}
+
+/* Return a string suitable for output of Neon immediate logic operation
+   MNEM.  */
+
+char *
+neon_output_logic_immediate (const char *mnem, rtx *op2, enum machine_mode mode,
+			     int inverse, int quad)
+{
+  int width, is_valid;
+  static char templ[40];
+  
+  is_valid = neon_immediate_valid_for_logic (*op2, mode, inverse, op2, &width);
+  
+  gcc_assert (is_valid != 0);
+  
+  if (quad)
+    sprintf (templ, "%s.i%d\t%%q0, %%2", mnem, width);
+  else
+    sprintf (templ, "%s.i%d\t%%P0, %%2", mnem, width);
+  
+  return templ;
+}
+
+/* Output a sequence of pairwise operations to implement a reduction.
+   NOTE: We do "too much work" here, because pairwise operations work on two
+   registers-worth of operands in one go. Unfortunately we can't exploit those
+   extra calculations to do the full operation in fewer steps, I don't think.
+   Although all vector elements of the result but the first are ignored, we
+   actually calculate the same result in each of the elements. An alternative
+   such as initially loading a vector with zero to use as each of the second
+   operands would use up an additional register and take an extra instruction,
+   for no particular gain.  */
+
+void
+neon_pairwise_reduce (rtx op0, rtx op1, enum machine_mode mode,
+		      rtx (*reduc) (rtx, rtx, rtx))
+{
+  enum machine_mode inner = GET_MODE_INNER (mode);
+  unsigned int i, parts = GET_MODE_SIZE (mode) / GET_MODE_SIZE (inner);
+  rtx tmpsum = op1;
+  
+  for (i = parts / 2; i >= 1; i /= 2)
+    {
+      rtx dest = (i == 1) ? op0 : gen_reg_rtx (mode);
+      emit_insn (reduc (dest, tmpsum, tmpsum));
+      tmpsum = dest;
+    }
+}
+
+/* Initialise a vector with non-constant elements.  FIXME: We can do better
+   than the current implementation (building a vector on the stack and then
+   loading it) in many cases.  See rs6000.c.  */
+
+void
+neon_expand_vector_init (rtx target, rtx vals)
+{
+  enum machine_mode mode = GET_MODE (target);
+  enum machine_mode inner = GET_MODE_INNER (mode);
+  unsigned int i, n_elts = GET_MODE_NUNITS (mode);
+  rtx mem;
+
+  gcc_assert (VECTOR_MODE_P (mode));
+
+  mem = assign_stack_temp (mode, GET_MODE_SIZE (mode), 0);
+  for (i = 0; i < n_elts; i++)
+    emit_move_insn (adjust_address_nv (mem, inner, i * GET_MODE_SIZE (inner)),
+                   XVECEXP (vals, 0, i));
+
+  emit_move_insn (target, mem);
+}
+
 
 /* Predicates for `match_operand' and `match_operator'.  */
 
@@ -5679,6 +6142,72 @@ arm_coproc_mem_operand (rtx op, bool wb)
   return FALSE;
 }
 
+/* Return TRUE if OP is a memory operand which we can load or store a vector
+   to/from. If CORE is true, we're moving from ARM registers not Neon
+   registers.  */
+int
+neon_vector_mem_operand (rtx op, bool core)
+{
+  rtx ind;
+
+  /* Reject eliminable registers.  */
+  if (! (reload_in_progress || reload_completed)
+      && (   reg_mentioned_p (frame_pointer_rtx, op)
+	  || reg_mentioned_p (arg_pointer_rtx, op)
+	  || reg_mentioned_p (virtual_incoming_args_rtx, op)
+	  || reg_mentioned_p (virtual_outgoing_args_rtx, op)
+	  || reg_mentioned_p (virtual_stack_dynamic_rtx, op)
+	  || reg_mentioned_p (virtual_stack_vars_rtx, op)))
+    return FALSE;
+
+  /* Constants are converted into offsets from labels.  */
+  if (GET_CODE (op) != MEM)
+    return FALSE;
+
+  ind = XEXP (op, 0);
+
+  if (reload_completed
+      && (GET_CODE (ind) == LABEL_REF
+	  || (GET_CODE (ind) == CONST
+	      && GET_CODE (XEXP (ind, 0)) == PLUS
+	      && GET_CODE (XEXP (XEXP (ind, 0), 0)) == LABEL_REF
+	      && GET_CODE (XEXP (XEXP (ind, 0), 1)) == CONST_INT)))
+    return TRUE;
+
+  /* Match: (mem (reg)).  */
+  if (GET_CODE (ind) == REG)
+    return arm_address_register_rtx_p (ind, 0);
+
+  /* Allow post-increment with Neon registers.  */
+  if (!core && GET_CODE (ind) == POST_INC)
+    return arm_address_register_rtx_p (XEXP (ind, 0), 0);
+
+#if 0
+  /* FIXME: We can support this too if we use VLD1/VST1.  */
+  if (!core
+      && GET_CODE (ind) == POST_MODIFY
+      && arm_address_register_rtx_p (XEXP (ind, 0), 0)
+      && GET_CODE (XEXP (ind, 1)) == PLUS
+      && rtx_equal_p (XEXP (XEXP (ind, 1), 0), XEXP (ind, 0)))
+    ind = XEXP (ind, 1);
+#endif
+
+  /* Match:
+     (plus (reg)
+          (const)).  */
+  if (!core
+      && GET_CODE (ind) == PLUS
+      && GET_CODE (XEXP (ind, 0)) == REG
+      && REG_MODE_OK_FOR_BASE_P (XEXP (ind, 0), VOIDmode)
+      && GET_CODE (XEXP (ind, 1)) == CONST_INT
+      && INTVAL (XEXP (ind, 1)) > -1024
+      && INTVAL (XEXP (ind, 1)) < 1016
+      && (INTVAL (XEXP (ind, 1)) & 3) == 0)
+    return TRUE;
+
+  return FALSE;
+}
+
 /* Return true if X is a register that will be eliminated later on.  */
 int
 arm_eliminable_register (rtx x)
@@ -5695,6 +6224,12 @@ arm_eliminable_register (rtx x)
 enum reg_class
 vfp_secondary_reload_class (enum machine_mode mode, rtx x)
 {
+  if (TARGET_NEON
+      && (GET_MODE_CLASS (mode) == MODE_VECTOR_INT
+	  || GET_MODE_CLASS (mode) == MODE_VECTOR_FLOAT)
+      && neon_vector_mem_operand (x, FALSE))
+    return NO_REGS;
+
   if (arm_coproc_mem_operand (x, FALSE) || s_register_operand (x, mode))
     return NO_REGS;
 
@@ -7767,8 +8302,8 @@ add_minipool_forward_ref (Mfix *fix)
 	 placed at the start of the pool.  */
       if (ARM_DOUBLEWORD_ALIGN
 	  && max_mp == NULL
-	  && fix->fix_size == 8
-	  && mp->fix_size != 8)
+	  && fix->fix_size >= 8
+	  && mp->fix_size < 8)
 	{
 	  max_mp = mp;
 	  max_address = mp->max_address;
@@ -7948,7 +8483,7 @@ add_minipool_backward_ref (Mfix *fix)
 	      /* For now, we do not allow the insertion of 8-byte alignment
 		 requiring nodes anywhere but at the start of the pool.  */
 	      if (ARM_DOUBLEWORD_ALIGN
-		  && fix->fix_size == 8 && mp->fix_size != 8)
+		  && fix->fix_size >= 8 && mp->fix_size < 8)
 		return NULL;
 	      else
 		min_mp = mp;
@@ -7969,7 +8504,7 @@ add_minipool_backward_ref (Mfix *fix)
 	     placed at the start of the pool.  */
 	  else if (ARM_DOUBLEWORD_ALIGN
 		   && min_mp == NULL
-		   && fix->fix_size == 8
+		   && fix->fix_size >= 8
 		   && mp->fix_size < 8)
 	    {
 	      min_mp = mp;
@@ -8067,7 +8602,7 @@ dump_minipool (rtx scan)
 
   if (ARM_DOUBLEWORD_ALIGN)
     for (mp = minipool_vector_head; mp != NULL; mp = mp->next)
-      if (mp->refcount > 0 && mp->fix_size == 8)
+      if (mp->refcount > 0 && mp->fix_size >= 8)
 	{
 	  align64 = 1;
 	  break;
@@ -8122,6 +8657,12 @@ dump_minipool (rtx scan)
 	      break;
 
 #endif
+#ifdef HAVE_consttable_16
+	    case 16:
+              scan = emit_insn_after (gen_consttable_16 (mp->value), scan);
+              break;
+
+#endif
 	    default:
 	      gcc_unreachable ();
 	    }
@@ -8314,7 +8855,7 @@ push_minipool_fix (rtx insn, HOST_WIDE_INT address, rtx *loc,
   /* If an entry requires 8-byte alignment then assume all constant pools
      require 4 bytes of padding.  Trying to do this later on a per-pool
      basis is awkward becuse existing pool entries have to be modified.  */
-  if (ARM_DOUBLEWORD_ALIGN && fix->fix_size == 8)
+  if (ARM_DOUBLEWORD_ALIGN && fix->fix_size >= 8)
     minipool_pad = 4;
 
   if (dump_file)
@@ -8730,6 +9271,17 @@ arm_output_fldmx (FILE * stream, unsigned int base, int reg, int count)
       count++;
     }
 
+  /* FLDMX may not load more than 16 doubleword registers at a time. Split the
+     load into multiple parts if we have to handle more than 16 registers.
+     FIXME: This will increase the maximum size of the epilogue, which will
+     need altering elsewhere.  */
+  if (count > 16)
+    {
+      arm_output_fldmx (stream, base, reg, 16);
+      arm_output_fldmx (stream, base, reg + 16, count - 16);
+      return;
+    }
+
   fputc ('\t', stream);
   asm_fprintf (stream, "fldmfd%c\t%r!, {", TARGET_FLDMX ? 'x' : 'd', base);
 
@@ -8793,6 +9345,19 @@ vfp_emit_fstmx (int base_reg, int count)
       count++;
     }
 
+  /* FSTMX may not store more than 16 doubleword registers at once.  Split
+     larger stores into multiple parts (up to a maximum of two, in
+     practice).  */
+  if (count > 16)
+    {
+      int saved;
+      /* FIXME: When ARMv6 support is added to this function, the padding used
+         by f{ldm,stm}x may need attention here.  */
+      saved = vfp_emit_fstmx (base_reg + 16, count - 16);
+      saved += vfp_emit_fstmx (base_reg, 16);
+      return saved;
+    }
+
   /* ??? The frame layout is implementation defined.  We describe
      standard format 1 (equivalent to a FSTMD insn and unused pad word)
      for architectures pre-v6, and FSTMD insn format without the pad
@@ -9279,6 +9844,85 @@ output_move_double (rtx *operands)
   return "";
 }
 
+/* Output a move, load or store for quad-word vectors in ARM registers.  Only
+   handles MEMs accepted by neon_vector_mem_operand with CORE=true.  */
+
+const char *
+output_move_quad (rtx *operands)
+{
+  if (REG_P (operands[0]))
+    {
+      /* Load, or reg->reg move.  */
+
+      if (MEM_P (operands[1]))
+        {
+          switch (GET_CODE (XEXP (operands[1], 0)))
+            {
+            case REG:
+              output_asm_insn ("ldm%(ia%)\t%m1, %M0", operands);
+              break;
+
+            case LABEL_REF:
+            case CONST:
+              output_asm_insn ("adr%?\t%0, %1", operands);
+              output_asm_insn ("ldm%(ia%)\t%0, %M0", operands);
+              break;
+
+            default:
+              gcc_unreachable ();
+            }
+        }
+      else
+        {
+          rtx ops[2];
+          int dest, src, i;
+
+          gcc_assert (REG_P (operands[1]));
+
+          dest = REGNO (operands[0]);
+          src = REGNO (operands[1]);
+          
+          /* This seems pretty dumb, but hopefully GCC won't try to do it
+             very often.  */
+          if (dest < src)
+            for (i = 0; i < 4; i++)
+              {
+                ops[0] = gen_rtx_REG (SImode, dest + i);
+                ops[1] = gen_rtx_REG (SImode, src + i);
+                output_asm_insn ("mov%?\t%0, %1", ops);
+              }
+          else
+            for (i = 3; i >= 0; i--)
+              {
+                ops[0] = gen_rtx_REG (SImode, dest + i);
+                ops[1] = gen_rtx_REG (SImode, src + i);
+                output_asm_insn ("mov%?\t%0, %1", ops);
+              }
+        }
+    }
+  else 
+    {
+      int i, regno;
+      rtx ops[3];
+      
+      gcc_assert (MEM_P (operands[0]));
+      gcc_assert (REG_P (operands[1]));
+      gcc_assert (!reg_overlap_mentioned_p (operands[1], operands[0]));
+      
+      switch (GET_CODE (XEXP (operands[0], 0)))
+        {
+        case REG:
+          output_asm_insn ("stm%(ia%)\t%m0, %M1", operands);
+          break;
+
+        default:
+          gcc_unreachable ();
+        }
+    }
+  
+  return "";
+}
+
 /* Output a VFP load or store instruction.  */
 
 const char *
@@ -9290,16 +9934,20 @@ output_move_vfp (rtx *operands)
   int integer_p = GET_MODE_CLASS (GET_MODE (operands[0])) == MODE_INT;
   const char *template;
   char buff[50];
+  enum machine_mode mode;
 
   reg = operands[!load];
   mem = operands[load];
 
+  mode = GET_MODE (reg);
+
   gcc_assert (REG_P (reg));
   gcc_assert (IS_VFP_REGNUM (REGNO (reg)));
-  gcc_assert (GET_MODE (reg) == SFmode
-	      || GET_MODE (reg) == DFmode
-	      || GET_MODE (reg) == SImode
-	      || GET_MODE (reg) == DImode);
+  gcc_assert (mode == SFmode
+	      || mode == DFmode
+	      || mode == SImode
+	      || mode == DImode
+              || (TARGET_NEON && VALID_NEON_DREG_MODE (mode)));
   gcc_assert (MEM_P (mem));
 
   addr = XEXP (mem, 0);
@@ -9335,6 +9983,112 @@ output_move_vfp (rtx *operands)
   return "";
 }
 
+/* Output a Neon quad-word load or store. We could also support post-modify
+   forms using VLD1/VST1, but we don't do that yet.
+   WARNING, FIXME: The ordering of elements in memory is going to be weird in
+   big-endian mode at present, because we use VSTM instead of VST1, to make
+   it easy to make vector stores via ARM registers write values in the same
+   order as stores direct from Neon registers.  For example, the byte ordering
+   of a quadword vector with 16-byte elements like this:
+
+     [e7:e6:e5:e4:e3:e2:e1:e0]  (highest-numbered element first)
+
+   will be (with lowest address first, h = most-significant byte,
+   l = least-significant byte of element):
+
+     [e3h, e3l, e2h, e2l, e1h, e1l, e0h, e0l,
+      e7h, e7l, e6h, e6l, e5h, e5l, e4h, e4l]
+   
+   When necessary, quadword registers (dN, dN+1) are moved to ARM registers from
+   rN in the order:
+   
+     dN -> (rN+1, rN), dN+1 -> (rN+3, rN+2)
+   
+   So that STM/LDM can be used on vectors in ARM registers, and the same memory
+   layout will result as if VSTM/VLDM were used.
+
+   This memory format (in BE mode) is very likely to change in the future.  */
+
+const char *
+output_move_neon (rtx *operands)
+{
+  rtx reg, mem, addr, ops[2];
+  int regno, load = REG_P (operands[0]);
+  const char *template;
+  char buff[50];
+  enum machine_mode mode;
+  
+  reg = operands[!load];
+  mem = operands[load];
+  
+  mode = GET_MODE (reg);
+  
+  gcc_assert (REG_P (reg));
+  regno = REGNO (reg);
+  gcc_assert (VFP_REGNO_OK_FOR_DOUBLE (regno)
+	      || NEON_REGNO_OK_FOR_QUAD (regno));
+  gcc_assert (VALID_NEON_DREG_MODE (mode)
+	      || VALID_NEON_QREG_MODE (mode));
+  gcc_assert (MEM_P (mem));
+  
+  addr = XEXP (mem, 0);
+  
+  switch (GET_CODE (addr))
+    {
+    case POST_INC:
+      /* FIXME: We should be using vld1/vst1 here in BE mode?  */
+      template = "v%smia%%?\t%%0!, %%h1";
+      ops[0] = XEXP (addr, 0);
+      ops[1] = reg;
+      break;
+    
+    case POST_MODIFY:
+      /* FIXME: Not currently enabled in neon_vector_mem_operand.  */
+      gcc_unreachable ();
+
+    case PLUS:
+      {
+        rtx otherops[2];
+        /* We're only using DImode here because it's a convenient size.
+           FIXME: This will need updating if the memory format of vectors
+           changes.  */
+        ops[0] = gen_rtx_REG (DImode, REGNO (reg));
+        ops[1] = mem;
+
+        otherops[0] = gen_rtx_REG (DImode, REGNO (reg) + 2);
+        otherops[1] = adjust_address (ops[1], SImode, 8);
+
+        if (reg_overlap_mentioned_p (ops[0], mem))
+          {
+            sprintf (buff, "v%sr%%?\t%%P0, %%1", load ? "ld" : "st");
+            output_asm_insn (buff, otherops);
+            sprintf (buff, "v%sr%%?\t%%P0, %%1", load ? "ld" : "st");
+            output_asm_insn (buff, ops);
+          }
+        else
+          {
+            sprintf (buff, "v%sr%%?\t%%P0, %%1", load ? "ld" : "st");
+            output_asm_insn (buff, ops);
+            sprintf (buff, "v%sr%%?\t%%P0, %%1", load ? "ld" : "st");
+            output_asm_insn (buff, otherops);
+          }
+
+        return "";
+      }
+
+    default:
+      /* FIXME: See POST_INC.  */
+      template = "v%smia%%?\t%%m0, %%h1";
+      ops[0] = mem;
+      ops[1] = reg;
+    }
+  
+  sprintf (buff, template, load ? "ld" : "st");
+  output_asm_insn (buff, ops);
+  
+  return "";
+}
+
 /* Output an ADD r, s, #n where n may be too big for one instruction.
    If adding zero to one register, output nothing.  */
 const char *
@@ -9855,7 +10609,10 @@ arm_get_vfp_saved_size (void)
 		  /* Workaround ARM10 VFPr1 bug.  */
 		  if (count == 2 && !arm_arch6)
 		    count++;
-		  saved += count * 8 + space_for_format_word;
+		  /* Count extra padding for transfers > 16 words, which are
+		     split into multiple parts.  */
+		  saved += count * 8
+			   + ((count + 15) / 16) * space_for_format_word;
 		}
 	      count = 0;
 	    }
@@ -9866,7 +10623,7 @@ arm_get_vfp_saved_size (void)
 	{
 	  if (count == 2 && !arm_arch6)
 	    count++;
-	  saved += count * 8 + space_for_format_word;
+	  saved += count * 8 + ((count + 15) / 16) * space_for_format_word;
 	}
     }
   return saved;
@@ -11591,6 +12348,11 @@ arm_print_operand (FILE *stream, rtx x, int code)
       }
       return;
 
+    /* An integer without a preceding # sign.  */
+    case 'n':
+      fprintf (stream, HOST_WIDE_INT_PRINT_DEC, INTVAL (x));
+      return;
+
     case 'B':
       if (GET_CODE (x) == CONST_INT)
 	{
@@ -11708,6 +12470,26 @@ arm_print_operand (FILE *stream, rtx x, int code)
       asm_fprintf (stream, "%r", REGNO (x) + 1);
       return;
 
+    case 'J':
+      if (GET_CODE (x) != REG || REGNO (x) > LAST_ARM_REGNUM)
+	{
+	  output_operand_lossage ("invalid operand for code '%c'", code);
+	  return;
+	}
+
+      asm_fprintf (stream, "%r", REGNO (x) + (WORDS_BIG_ENDIAN ? 3 : 2));
+      return;
+
+    case 'K':
+      if (GET_CODE (x) != REG || REGNO (x) > LAST_ARM_REGNUM)
+	{
+	  output_operand_lossage ("invalid operand for code '%c'", code);
+	  return;
+	}
+
+      asm_fprintf (stream, "%r", REGNO (x) + (WORDS_BIG_ENDIAN ? 2 : 3));
+      return;
+
     case 'm':
       asm_fprintf (stream, "%r",
 		   GET_CODE (XEXP (x, 0)) == REG
@@ -11720,6 +12502,19 @@ arm_print_operand (FILE *stream, rtx x, int code)
 		   REGNO (x) + ARM_NUM_REGS (GET_MODE (x)) - 1);
       return;
 
+    /* Like 'M', but writing doubleword vector registers, for use by Neon
+       insns.  */
+    case 'h':
+      {
+        int regno = (REGNO (x) - FIRST_VFP_REGNUM) / 2;
+        int numregs = ARM_NUM_REGS (GET_MODE (x)) / 2;
+        if (numregs == 1)
+          asm_fprintf (stream, "{d%d}", regno);
+        else
+          asm_fprintf (stream, "{d%d-d%d}", regno, regno + numregs - 1);
+      }
+      return;
+
     case 'd':
       /* CONST_TRUE_RTX means always -- that's the default.  */
       if (x == const_true_rtx)
@@ -11832,13 +12627,15 @@ arm_print_operand (FILE *stream, rtx x, int code)
 	}
       return;
 
-      /* Print a VFP double precision register name.  */
+    /* Print a VFP/Neon double precision or quad precision register name.  */
     case 'P':
+    case 'q':
       {
 	int mode = GET_MODE (x);
-	int num;
+        int is_quad = (code == 'q');
+	int regno;
 
-	if (mode != DImode && mode != DFmode)
+	if (GET_MODE_SIZE (mode) != (is_quad ? 16 : 8))
 	  {
 	    output_operand_lossage ("invalid operand for code '%c'", code);
 	    return;
@@ -11851,14 +12648,52 @@ arm_print_operand (FILE *stream, rtx x, int code)
 	    return;
 	  }
 
-	num = REGNO(x) - FIRST_VFP_REGNUM;
-	if (num & 1)
+	regno = REGNO (x);
+	if ((is_quad && !NEON_REGNO_OK_FOR_QUAD (regno))
+            || (!is_quad && !VFP_REGNO_OK_FOR_DOUBLE (regno)))
 	  {
 	    output_operand_lossage ("invalid operand for code '%c'", code);
 	    return;
 	  }
 
-	fprintf (stream, "d%d", num >> 1);
+	fprintf (stream, "%c%d", is_quad ? 'q' : 'd',
+	  (regno - FIRST_VFP_REGNUM) >> (is_quad ? 2 : 1));
+      }
+      return;
+
+    /* These two codes print the low/high doubleword register of a Neon quad
+       register, respectively.  */
+    case 'e':
+    case 'f':
+      {
+        int mode = GET_MODE (x);
+        int regno;
+        
+        if (GET_MODE_SIZE (mode) != 16 || GET_CODE (x) != REG)
+          {
+	    output_operand_lossage ("invalid operand for code '%c'", code);
+	    return;
+          }
+        
+        regno = REGNO (x);
+        if (!NEON_REGNO_OK_FOR_QUAD (regno))
+          {
+	    output_operand_lossage ("invalid operand for code '%c'", code);
+	    return;
+          }
+        
+        fprintf (stream, "d%d", ((regno - FIRST_VFP_REGNUM) >> 1)
+				+ (code == 'f' ? 1 : 0));
+      }
+      return;
+
+    /* Print a VFPv3 floating-point constant, represented as an integer
+       index.  */
+    case 'G':
+      {
+        int index = vfp3_const_double_index (x);
+	gcc_assert (index != -1);
+	fprintf (stream, "%d", index);
       }
       return;
 
@@ -11881,7 +12716,15 @@ arm_print_operand (FILE *stream, rtx x, int code)
 	  break;
 
 	case CONST_DOUBLE:
-	  fprintf (stream, "#%s", fp_immediate_constant (x));
+          if (TARGET_NEON)
+            {
+              char fpstr[20];
+              real_to_decimal (fpstr, CONST_DOUBLE_REAL_VALUE (x),
+			       sizeof (fpstr), 0, 1);
+              fprintf (stream, "#%s", fpstr);
+            }
+          else
+	    fprintf (stream, "#%s", fp_immediate_constant (x));
 	  break;
 
 	default:
@@ -11899,6 +12742,8 @@ arm_print_operand (FILE *stream, rtx x, int code)
 static bool
 arm_assemble_integer (rtx x, unsigned int size, int aligned_p)
 {
+  enum machine_mode mode;
+
   if (size == UNITS_PER_WORD && aligned_p)
     {
       fputs ("\t.word\t", asm_out_file);
@@ -11922,31 +12767,48 @@ arm_assemble_integer (rtx x, unsigned int size, int aligned_p)
       return true;
     }
 
-  if (arm_vector_mode_supported_p (GET_MODE (x)))
+  mode = GET_MODE (x);
+
+  if (arm_vector_mode_supported_p (mode))
     {
       int i, units;
+      unsigned int invmask = 0, parts_per_word;
 
       gcc_assert (GET_CODE (x) == CONST_VECTOR);
 
       units = CONST_VECTOR_NUNITS (x);
-
-      switch (GET_MODE (x))
-	{
-	case V2SImode: size = 4; break;
-	case V4HImode: size = 2; break;
-	case V8QImode: size = 1; break;
-	default:
-	  gcc_unreachable ();
-	}
-
-      for (i = 0; i < units; i++)
-	{
-	  rtx elt;
-
-	  elt = CONST_VECTOR_ELT (x, i);
-	  assemble_integer
-	    (elt, size, i == 0 ? BIGGEST_ALIGNMENT : size * BITS_PER_UNIT, 1);
-	}
+      size = GET_MODE_SIZE (GET_MODE_INNER (mode));
+      
+      /* For big-endian Neon vectors, we must permute the vector to the form
+         which, when loaded by a VLDR or VLDM instruction, will give a vector
+         with the elements in the right order.  */
+      if (TARGET_NEON && WORDS_BIG_ENDIAN)
+        {
+          parts_per_word = UNITS_PER_WORD / size;
+          /* FIXME: This might be wrong for 64-bit vector elements, but we don't
+             support those anywhere yet.  */
+          invmask = (parts_per_word == 0) ? 0 : (1 << (parts_per_word - 1)) - 1;
+        }
+      
+      if (GET_MODE_CLASS (mode) == MODE_VECTOR_INT)
+        for (i = 0; i < units; i++)
+	  {
+	    rtx elt = CONST_VECTOR_ELT (x, i ^ invmask);
+	    assemble_integer
+	      (elt, size, i == 0 ? BIGGEST_ALIGNMENT : size * BITS_PER_UNIT, 1);
+	  }
+      else
+        for (i = 0; i < units; i++)
+          {
+            rtx elt = CONST_VECTOR_ELT (x, i);
+            REAL_VALUE_TYPE rval;
+            
+            REAL_VALUE_FROM_CONST_DOUBLE (rval, elt);
+            
+            assemble_real
+              (rval, GET_MODE_INNER (mode),
+              i == 0 ? BIGGEST_ALIGNMENT : size * BITS_PER_UNIT);
+          }
 
       return true;
     }
@@ -12622,11 +13484,16 @@ arm_hard_regno_mode_ok (unsigned int regno, enum machine_mode mode)
       && IS_VFP_REGNUM (regno))
     {
       if (mode == SFmode || mode == SImode)
-	return TRUE;
+	return VFP_REGNO_OK_FOR_SINGLE (regno);
 
-      /* DFmode values are only valid in even register pairs.  */
       if (mode == DFmode)
-	return ((regno - FIRST_VFP_REGNUM) & 1) == 0;
+	return VFP_REGNO_OK_FOR_DOUBLE (regno);
+      
+      if (TARGET_NEON)
+        return (VALID_NEON_DREG_MODE (mode) && VFP_REGNO_OK_FOR_DOUBLE (regno))
+               || (VALID_NEON_QREG_MODE (mode)
+                   && NEON_REGNO_OK_FOR_QUAD (regno));
+      
       return FALSE;
     }
 
@@ -12689,7 +13556,12 @@ arm_regno_class (int regno)
     return CIRRUS_REGS;
 
   if (IS_VFP_REGNUM (regno))
-    return VFP_REGS;
+    {
+      if (regno >= FIRST_VFP_REGNUM && regno <= LAST_LO_VFP_REGNUM)
+        return VFP_LO_REGS;
+      else
+        return VFP_HI_REGS;
+    }
 
   if (IS_IWMMXT_REGNUM (regno))
     return IWMMXT_REGS;
@@ -12871,38 +13743,38 @@ static const struct builtin_description bdesc_2arg[] =
 #define IWMMXT_BUILTIN2(code, builtin) \
   { FL_IWMMXT, CODE_FOR_##code, NULL, ARM_BUILTIN_##builtin, 0, 0 },
 
-  IWMMXT_BUILTIN2 (iwmmxt_wpackhss, WPACKHSS)
-  IWMMXT_BUILTIN2 (iwmmxt_wpackwss, WPACKWSS)
-  IWMMXT_BUILTIN2 (iwmmxt_wpackdss, WPACKDSS)
-  IWMMXT_BUILTIN2 (iwmmxt_wpackhus, WPACKHUS)
-  IWMMXT_BUILTIN2 (iwmmxt_wpackwus, WPACKWUS)
-  IWMMXT_BUILTIN2 (iwmmxt_wpackdus, WPACKDUS)
-  IWMMXT_BUILTIN2 (ashlv4hi3_di,    WSLLH)
-  IWMMXT_BUILTIN2 (ashlv4hi3,       WSLLHI)
-  IWMMXT_BUILTIN2 (ashlv2si3_di,    WSLLW)
-  IWMMXT_BUILTIN2 (ashlv2si3,       WSLLWI)
-  IWMMXT_BUILTIN2 (ashldi3_di,      WSLLD)
-  IWMMXT_BUILTIN2 (ashldi3_iwmmxt,  WSLLDI)
-  IWMMXT_BUILTIN2 (lshrv4hi3_di,    WSRLH)
-  IWMMXT_BUILTIN2 (lshrv4hi3,       WSRLHI)
-  IWMMXT_BUILTIN2 (lshrv2si3_di,    WSRLW)
-  IWMMXT_BUILTIN2 (lshrv2si3,       WSRLWI)
-  IWMMXT_BUILTIN2 (lshrdi3_di,      WSRLD)
-  IWMMXT_BUILTIN2 (lshrdi3_iwmmxt,  WSRLDI)
-  IWMMXT_BUILTIN2 (ashrv4hi3_di,    WSRAH)
-  IWMMXT_BUILTIN2 (ashrv4hi3,       WSRAHI)
-  IWMMXT_BUILTIN2 (ashrv2si3_di,    WSRAW)
-  IWMMXT_BUILTIN2 (ashrv2si3,       WSRAWI)
-  IWMMXT_BUILTIN2 (ashrdi3_di,      WSRAD)
-  IWMMXT_BUILTIN2 (ashrdi3_iwmmxt,  WSRADI)
-  IWMMXT_BUILTIN2 (rorv4hi3_di,     WRORH)
-  IWMMXT_BUILTIN2 (rorv4hi3,        WRORHI)
-  IWMMXT_BUILTIN2 (rorv2si3_di,     WRORW)
-  IWMMXT_BUILTIN2 (rorv2si3,        WRORWI)
-  IWMMXT_BUILTIN2 (rordi3_di,       WRORD)
-  IWMMXT_BUILTIN2 (rordi3,          WRORDI)
-  IWMMXT_BUILTIN2 (iwmmxt_wmacuz,   WMACUZ)
-  IWMMXT_BUILTIN2 (iwmmxt_wmacsz,   WMACSZ)
+  IWMMXT_BUILTIN2 (iwmmxt_wpackhss,  WPACKHSS)
+  IWMMXT_BUILTIN2 (iwmmxt_wpackwss,  WPACKWSS)
+  IWMMXT_BUILTIN2 (iwmmxt_wpackdss,  WPACKDSS)
+  IWMMXT_BUILTIN2 (iwmmxt_wpackhus,  WPACKHUS)
+  IWMMXT_BUILTIN2 (iwmmxt_wpackwus,  WPACKWUS)
+  IWMMXT_BUILTIN2 (iwmmxt_wpackdus,  WPACKDUS)
+  IWMMXT_BUILTIN2 (ashlv4hi3_di,     WSLLH)
+  IWMMXT_BUILTIN2 (ashlv4hi3_iwmmxt, WSLLHI)
+  IWMMXT_BUILTIN2 (ashlv2si3_di,     WSLLW)
+  IWMMXT_BUILTIN2 (ashlv2si3_iwmmxt, WSLLWI)
+  IWMMXT_BUILTIN2 (ashldi3_di,       WSLLD)
+  IWMMXT_BUILTIN2 (ashldi3_iwmmxt,   WSLLDI)
+  IWMMXT_BUILTIN2 (lshrv4hi3_di,     WSRLH)
+  IWMMXT_BUILTIN2 (lshrv4hi3_iwmmxt, WSRLHI)
+  IWMMXT_BUILTIN2 (lshrv2si3_di,     WSRLW)
+  IWMMXT_BUILTIN2 (lshrv2si3_iwmmxt, WSRLWI)
+  IWMMXT_BUILTIN2 (lshrdi3_di,       WSRLD)
+  IWMMXT_BUILTIN2 (lshrdi3_iwmmxt,   WSRLDI)
+  IWMMXT_BUILTIN2 (ashrv4hi3_di,     WSRAH)
+  IWMMXT_BUILTIN2 (ashrv4hi3_iwmmxt, WSRAHI)
+  IWMMXT_BUILTIN2 (ashrv2si3_di,     WSRAW)
+  IWMMXT_BUILTIN2 (ashrv2si3_iwmmxt, WSRAWI)
+  IWMMXT_BUILTIN2 (ashrdi3_di,       WSRAD)
+  IWMMXT_BUILTIN2 (ashrdi3_iwmmxt,   WSRADI)
+  IWMMXT_BUILTIN2 (rorv4hi3_di,      WRORH)
+  IWMMXT_BUILTIN2 (rorv4hi3,         WRORHI)
+  IWMMXT_BUILTIN2 (rorv2si3_di,      WRORW)
+  IWMMXT_BUILTIN2 (rorv2si3,         WRORWI)
+  IWMMXT_BUILTIN2 (rordi3_di,        WRORD)
+  IWMMXT_BUILTIN2 (rordi3,           WRORDI)
+  IWMMXT_BUILTIN2 (iwmmxt_wmacuz,    WMACUZ)
+  IWMMXT_BUILTIN2 (iwmmxt_wmacsz,    WMACSZ)
 };
 
 static const struct builtin_description bdesc_1arg[] =
@@ -15136,6 +16008,7 @@ arm_file_start (void)
 	}
       else
 	{
+	  int set_float_abi_attributes = 0;
 	  switch (arm_fpu_arch)
 	    {
 	    case FPUTYPE_FPA:
@@ -15151,15 +16024,27 @@ arm_file_start (void)
 	      fpu_name = "maverick";
 	      break;
 	    case FPUTYPE_VFP:
-	      if (TARGET_HARD_FLOAT)
-		asm_fprintf (asm_out_file, "\t.eabi_attribute 27, 3\n");
-	      if (TARGET_HARD_FLOAT_ABI)
-		asm_fprintf (asm_out_file, "\t.eabi_attribute 28, 1\n");
 	      fpu_name = "vfp";
+	      set_float_abi_attributes = 1;
+	      break;
+	    case FPUTYPE_VFP3:
+	      fpu_name = "vfp3";
+	      set_float_abi_attributes = 1;
+	      break;
+	    case FPUTYPE_NEON:
+	      fpu_name = "neon";
+	      set_float_abi_attributes = 1;
 	      break;
 	    default:
 	      abort();
 	    }
+	  if (set_float_abi_attributes)
+	    {
+	      if (TARGET_HARD_FLOAT)
+		asm_fprintf (asm_out_file, "\t.eabi_attribute 27, 3\n");
+	      if (TARGET_HARD_FLOAT_ABI)
+		asm_fprintf (asm_out_file, "\t.eabi_attribute 28, 1\n");
+	    }
 	}
       asm_fprintf (asm_out_file, "\t.fpu %s\n", fpu_name);
 
@@ -16004,6 +16889,11 @@ thumb_set_return_address (rtx source, rtx scratch)
 bool
 arm_vector_mode_supported_p (enum machine_mode mode)
 {
+  /* Neon also supports V2SImode, etc. listed in the clause below.  */
+  if (TARGET_NEON && (mode == V2SFmode || mode == V4SImode || mode == V8HImode
+      || mode == V16QImode || mode == V4SFmode))
+    return true;
+
   if ((mode == V2SImode)
       || (mode == V4HImode)
       || (mode == V8QImode))
@@ -16037,6 +16927,7 @@ arm_dbx_register_number (unsigned int regno)
   if (IS_FPA_REGNUM (regno))
     return (TARGET_AAPCS_BASED ? 96 : 16) + regno - FIRST_FPA_REGNUM;
 
+  /* FIXME: VFPv3 register numbering.  */
   if (IS_VFP_REGNUM (regno))
     return 64 + regno - FIRST_VFP_REGNUM;
 
diff --git a/gcc/config/arm/arm.h b/gcc/config/arm/arm.h
index fe494dbeab4..185ae654dd1 100644
--- a/gcc/config/arm/arm.h
+++ b/gcc/config/arm/arm.h
@@ -204,6 +204,15 @@ extern GTY(()) rtx aof_pic_label;
 /* 32-bit Thumb-2 code.  */
 #define TARGET_THUMB2			(TARGET_THUMB && arm_arch_thumb2)
 
+/* FPU is VFPv3 (with twice the number of D registers).  Setting the FPU to
+   Neon automatically enables VFPv3 too.  */
+#define TARGET_VFP3 (arm_fp_model == ARM_FP_MODEL_VFP \
+		     && (arm_fpu_arch == FPUTYPE_VFP3 \
+			 || arm_fpu_arch == FPUTYPE_NEON))
+/* FPU supports Neon instructions.  */
+#define TARGET_NEON (arm_fp_model == ARM_FP_MODEL_VFP \
+		     && arm_fpu_arch == FPUTYPE_NEON)
+
 /* "DSP" multiply instructions, eg. SMULxy.  */
 #define TARGET_DSP_MULTIPLY \
   (TARGET_32BIT && arm_arch5e && arm_arch_notm)
@@ -218,7 +227,7 @@ extern GTY(()) rtx aof_pic_label;
 /* True if FLDMX and FSTMX instructions must be used in function prologues
    and epilogues rather than FLDMD and FSTMD instructions.
    (This does not affect use of FLDMD and FSTMD anywhere else.)  */
-#define TARGET_FLDMX !arm_arch6
+#define TARGET_FLDMX (!arm_arch6 && !TARGET_VFP3)
 
 /* True iff the full BPABI is being used.  If TARGET_BPABI is true,
    then TARGET_AAPCS_BASED must be true -- but the converse does not
@@ -277,7 +286,11 @@ enum fputype
   /* Cirrus Maverick floating point co-processor.  */
   FPUTYPE_MAVERICK,
   /* VFP.  */
-  FPUTYPE_VFP
+  FPUTYPE_VFP,
+  /* VFPv3.  */
+  FPUTYPE_VFP3,
+  /* Neon.  */
+  FPUTYPE_NEON
 };
 
 /* Recast the floating point class to be the floating point attribute.  */
@@ -478,6 +491,12 @@ extern int arm_arch_hwdiv;
 
 #define UNITS_PER_WORD	4
 
+/* Use the option -mvectorize-with-neon-quad to override the use of doubleword
+   registers when autovectorizing for Neon, at least until multiple vector
+   widths are supported properly by the middle-end.  */
+#define UNITS_PER_SIMD_WORD \
+  (TARGET_NEON ? (TARGET_NEON_VECTORIZE_QUAD ? 16 : 8) : UNITS_PER_WORD)
+
 /* True if natural alignment is used for doubleword types.  */
 #define ARM_DOUBLEWORD_ALIGN	TARGET_AAPCS_BASED
 
@@ -644,6 +663,10 @@ extern int arm_structure_size_boundary;
   1,1,1,1,1,1,1,1,	\
   1,1,1,1,1,1,1,1,	\
   1,1,1,1,1,1,1,1,	\
+  1,1,1,1,1,1,1,1,	\
+  1,1,1,1,1,1,1,1,	\
+  1,1,1,1,1,1,1,1,	\
+  1,1,1,1,1,1,1,1,	\
   1			\
 }
 
@@ -670,6 +693,10 @@ extern int arm_structure_size_boundary;
   1,1,1,1,1,1,1,1,	     \
   1,1,1,1,1,1,1,1,	     \
   1,1,1,1,1,1,1,1,	     \
+  1,1,1,1,1,1,1,1,	     \
+  1,1,1,1,1,1,1,1,	     \
+  1,1,1,1,1,1,1,1,	     \
+  1,1,1,1,1,1,1,1,	     \
   1			     \
 }
 
@@ -721,6 +748,9 @@ extern int arm_structure_size_boundary;
 	}							\
       if (TARGET_VFP)						\
 	{							\
+	  /* VFPv3 registers are disabled when earlier VFP	\
+	     versions are selected due to the definition of	\
+	     LAST_VFP_REGNUM.  */				\
 	  for (regno = FIRST_VFP_REGNUM;			\
 	       regno <= LAST_VFP_REGNUM; ++ regno)		\
 	    {							\
@@ -899,15 +929,35 @@ extern int arm_structure_size_boundary;
   (((REGNUM) >= FIRST_CIRRUS_FP_REGNUM) && ((REGNUM) <= LAST_CIRRUS_FP_REGNUM))
 
 #define FIRST_VFP_REGNUM	63
-#define LAST_VFP_REGNUM		94
+#define LAST_VFP_REGNUM		(TARGET_VFP3 ? 126 : 94)
 #define IS_VFP_REGNUM(REGNUM) \
   (((REGNUM) >= FIRST_VFP_REGNUM) && ((REGNUM) <= LAST_VFP_REGNUM))
 
+/* VFP registers are split into two types: those defined by VFP versions < 3
+   have D registers overlaid on consecutive pairs of S registers. VFP version 3
+   defines 16 new D registers (d16-d31) which, for simplicity and correctness
+   in various parts of the backend, we implement as "fake" single-precision
+   registers (which would be S32-S63, but cannot be used in that way).  The
+   following macros define these ranges of registers.  */
+#define LAST_LO_VFP_REGNUM	94
+#define FIRST_HI_VFP_REGNUM	95
+
+#define VFP_REGNO_OK_FOR_SINGLE(REGNUM) \
+  ((REGNUM) <= LAST_LO_VFP_REGNUM)
+
+/* DFmode values are only valid in even register pairs.  */
+#define VFP_REGNO_OK_FOR_DOUBLE(REGNUM) \
+  ((((REGNUM) - FIRST_VFP_REGNUM) & 1) == 0)
+
+/* Neon Quad values must start at a multiple of four registers.  */
+#define NEON_REGNO_OK_FOR_QUAD(REGNUM) \
+  ((((REGNUM) - FIRST_VFP_REGNUM) & 3) == 0)
+
 /* The number of hard registers is 16 ARM + 8 FPA + 1 CC + 1 SFP + 1 AFP.  */
 /* + 16 Cirrus registers take us up to 43.  */
 /* Intel Wireless MMX Technology registers add 16 + 4 more.  */
-/* VFP adds 32 + 1 more.  */
-#define FIRST_PSEUDO_REGISTER   96
+/* VFP (VFP3) adds 32 (64) + 1 more.  */
+#define FIRST_PSEUDO_REGISTER   128
 
 #define DBX_REGISTER_NUMBER(REGNO) arm_dbx_register_number (REGNO)
 
@@ -956,29 +1006,48 @@ extern int arm_structure_size_boundary;
 #define VALID_IWMMXT_REG_MODE(MODE) \
  (arm_vector_mode_supported_p (MODE) || (MODE) == DImode)
 
+/* Modes valid for Neon D registers.  */
+#define VALID_NEON_DREG_MODE(MODE) \
+  ((MODE) == V2SImode || (MODE) == V4HImode || (MODE) == V8QImode \
+   || (MODE) == V2SFmode || (MODE) == DImode)
+
+/* Modes valid for Neon Q registers.  */
+#define VALID_NEON_QREG_MODE(MODE) \
+  ((MODE) == V4SImode || (MODE) == V8HImode || (MODE) == V16QImode \
+   || (MODE) == V4SFmode || (MODE) == TImode)
+
 /* The order in which register should be allocated.  It is good to use ip
    since no saving is required (though calls clobber it) and it never contains
    function parameters.  It is quite good to use lr since other calls may
    clobber it anyway.  Allocate r0 through r3 in reverse order since r3 is
    least likely to contain a function parameter; in addition results are
-   returned in r0.  */
-
-#define REG_ALLOC_ORDER  	    \
-{                                   \
-     3,  2,  1,  0, 12, 14,  4,  5, \
-     6,  7,  8, 10,  9, 11, 13, 15, \
-    16, 17, 18, 19, 20, 21, 22, 23, \
-    27, 28, 29, 30, 31, 32, 33, 34, \
-    35, 36, 37, 38, 39, 40, 41, 42, \
-    43, 44, 45, 46, 47, 48, 49, 50, \
-    51, 52, 53, 54, 55, 56, 57, 58, \
-    59, 60, 61, 62,		    \
-    24, 25, 26,			    \
-    78, 77, 76, 75, 74, 73, 72, 71, \
-    70, 69, 68, 67, 66, 65, 64, 63, \
-    79, 80, 81, 82, 83, 84, 85, 86, \
-    87, 88, 89, 90, 91, 92, 93, 94, \
-    95				    \
+   returned in r0.
+   For VFP/VFPv3, allocate caller-saved registers first (D0-D7), then D16-D31,
+   then D8-D15.  The reason for doing this is to attempt to reduce register
+   pressure when both single- and double-precision registers are used in a
+   function, but hopefully not force double-precision registers to be
+   callee-saved when it's not necessary. */
+
+#define REG_ALLOC_ORDER				\
+{						\
+     3,  2,  1,  0, 12, 14,  4,  5,		\
+     6,  7,  8, 10,  9, 11, 13, 15,		\
+    16, 17, 18, 19, 20, 21, 22, 23,		\
+    27, 28, 29, 30, 31, 32, 33, 34,		\
+    35, 36, 37, 38, 39, 40, 41, 42,		\
+    43, 44, 45, 46, 47, 48, 49, 50,		\
+    51, 52, 53, 54, 55, 56, 57, 58,		\
+    59, 60, 61, 62,				\
+    24, 25, 26,					\
+    78,  77,  76,  75,  74,  73,  72,  71,	\
+    70,  69,  68,  67,  66,  65,  64,  63,	\
+    95,  96,  97,  98,  99, 100, 101, 102,	\
+   103, 104, 105, 106, 107, 108, 109, 110,	\
+   111, 112, 113, 114, 115, 116, 117, 118,	\
+   119, 120, 121, 122, 123, 124, 125, 126,	\
+    79,  80,  81,  82,  83,  84,  85,  86,	\
+    87,  88,  89,  90,  91,  92,  93,  94,	\
+   127						\
 }
 
 /* Interrupt functions can only use registers that have already been
@@ -997,6 +1066,8 @@ enum reg_class
   NO_REGS,
   FPA_REGS,
   CIRRUS_REGS,
+  VFP_LO_REGS,
+  VFP_HI_REGS,
   VFP_REGS,
   IWMMXT_GR_REGS,
   IWMMXT_REGS,
@@ -1019,6 +1090,8 @@ enum reg_class
   "NO_REGS",		\
   "FPA_REGS",		\
   "CIRRUS_REGS",	\
+  "VFP_LO_REGS",	\
+  "VFP_HI_REGS",	\
   "VFP_REGS",		\
   "IWMMXT_GR_REGS",	\
   "IWMMXT_REGS",	\
@@ -1035,24 +1108,30 @@ enum reg_class
 /* Define which registers fit in which classes.
    This is an initializer for a vector of HARD_REG_SET
    of length N_REG_CLASSES.  */
-#define REG_CLASS_CONTENTS					\
-{								\
-  { 0x00000000, 0x00000000, 0x00000000 }, /* NO_REGS  */	\
-  { 0x00FF0000, 0x00000000, 0x00000000 }, /* FPA_REGS */	\
-  { 0xF8000000, 0x000007FF, 0x00000000 }, /* CIRRUS_REGS */	\
-  { 0x00000000, 0x80000000, 0x7FFFFFFF }, /* VFP_REGS  */	\
-  { 0x00000000, 0x00007800, 0x00000000 }, /* IWMMXT_GR_REGS */	\
-  { 0x00000000, 0x7FFF8000, 0x00000000 }, /* IWMMXT_REGS */	\
-  { 0x000000FF, 0x00000000, 0x00000000 }, /* LO_REGS */		\
-  { 0x00002000, 0x00000000, 0x00000000 }, /* STACK_REG */	\
-  { 0x000020FF, 0x00000000, 0x00000000 }, /* BASE_REGS */	\
-  { 0x0000FF00, 0x00000000, 0x00000000 }, /* HI_REGS */		\
-  { 0x01000000, 0x00000000, 0x00000000 }, /* CC_REG */		\
-  { 0x00000000, 0x00000000, 0x80000000 }, /* VFPCC_REG */	\
-  { 0x0200FFFF, 0x00000000, 0x00000000 }, /* GENERAL_REGS */	\
-  { 0xFAFFFFFF, 0xFFFFFFFF, 0x7FFFFFFF }  /* ALL_REGS */	\
+#define REG_CLASS_CONTENTS						\
+{									\
+  { 0x00000000, 0x00000000, 0x00000000, 0x00000000 }, /* NO_REGS  */	\
+  { 0x00FF0000, 0x00000000, 0x00000000, 0x00000000 }, /* FPA_REGS */	\
+  { 0xF8000000, 0x000007FF, 0x00000000, 0x00000000 }, /* CIRRUS_REGS */	\
+  { 0x00000000, 0x80000000, 0x7FFFFFFF, 0x00000000 }, /* VFP_LO_REGS  */ \
+  { 0x00000000, 0x00000000, 0x80000000, 0x7FFFFFFF }, /* VFP_HI_REGS  */ \
+  { 0x00000000, 0x80000000, 0xFFFFFFFF, 0x7FFFFFFF }, /* VFP_REGS  */	\
+  { 0x00000000, 0x00007800, 0x00000000, 0x00000000 }, /* IWMMXT_GR_REGS */ \
+  { 0x00000000, 0x7FFF8000, 0x00000000, 0x00000000 }, /* IWMMXT_REGS */	\
+  { 0x000000FF, 0x00000000, 0x00000000, 0x00000000 }, /* LO_REGS */	\
+  { 0x00002000, 0x00000000, 0x00000000, 0x00000000 }, /* STACK_REG */	\
+  { 0x000020FF, 0x00000000, 0x00000000, 0x00000000 }, /* BASE_REGS */	\
+  { 0x0000FF00, 0x00000000, 0x00000000, 0x00000000 }, /* HI_REGS */	\
+  { 0x01000000, 0x00000000, 0x00000000, 0x00000000 }, /* CC_REG */	\
+  { 0x00000000, 0x00000000, 0x00000000, 0x80000000 }, /* VFPCC_REG */	\
+  { 0x0200FFFF, 0x00000000, 0x00000000, 0x00000000 }, /* GENERAL_REGS */ \
+  { 0xFAFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0x7FFFFFFF }  /* ALL_REGS */	\
 }
 
+/* Any of the VFP register classes.  */
+#define IS_VFP_CLASS(X) \
+  ((X) == VFP_LO_REGS || (X) == VFP_HI_REGS || (X) == VFP_REGS)
+
 /* The same information, inverted:
    Return the class number of the smallest class containing
    reg number REGNO.  This could be a conditional expression
@@ -1098,11 +1177,17 @@ enum reg_class
 
 /* Get reg_class from a letter such as appears in the machine description.
    We only need constraint `f' for FPA_REGS (`r' == GENERAL_REGS) for the
-   ARM, but several more letters for the Thumb.  */
+   ARM, but several more letters for the Thumb.
+   For VFP, use 't' for things which can only go in single-precision registers,
+   else use 'w'. We use this distinction because VFP3 D registers are a
+   superset of the S registers, rather than simply being overlaid on top of
+   them. FIXME: This distinction may also be useful for VFP variants without
+   double-precision operations.  */
 #define REG_CLASS_FROM_LETTER(C)  	\
   (  (C) == 'f' ? FPA_REGS		\
    : (C) == 'v' ? CIRRUS_REGS		\
-   : (C) == 'w' ? VFP_REGS		\
+   : (C) == 'w' ? (TARGET_VFP3 ? VFP_REGS : VFP_LO_REGS) \
+   : (C) == 't' ? VFP_LO_REGS		\
    : (C) == 'y' ? IWMMXT_REGS		\
    : (C) == 'z' ? IWMMXT_GR_REGS	\
    : (C) == 'l' ? (TARGET_ARM ? GENERAL_REGS : LO_REGS)	\
@@ -1180,9 +1265,15 @@ enum reg_class
    'Da' is a constant that takes two ARM insns to load.
    'Db' takes three ARM insns.
    'Dc' takes four ARM insns, if we allow that in this compilation.
+   'Dn' is a constant which can be loaded with Neon VMOV immediate insns.
+   'Dl' is a constant which can be used with Neon VORR/VBIC insns.
+   'DL' is a constant which can be used with Neon VORN/VAND pseudo-insns.
+   'Dv' is a constant which can be loaded with VFPv3 fconst[sd] insns.
    'U' Prefixes an extended memory constraint where:
    'Uv' is an address valid for VFP load/store insns.
    'Uy' is an address valid for iwmmxt load/store insns.
+   'Un' is an address valid for Neon VLD1/VST1 insns.
+   'Us' is an address valid for ("simple") quad-word loads/stores from ARM regs.
    'Uq' is an address valid for ldrsb.  */
 
 #define EXTRA_CONSTRAINT_STR_ARM(OP, C, STR)				\
@@ -1195,7 +1286,21 @@ enum reg_class
 			   && arm_const_double_inline_cost (OP) == 3)	\
 		       || ((STR)[1] == 'c'				\
 			   && arm_const_double_inline_cost (OP) == 4	\
-			   && !(optimize_size || arm_ld_sched)))) :	\
+			   && !(optimize_size || arm_ld_sched))		\
+                       || ((STR)[1] == 'n'				\
+                           && GET_CODE (OP) == CONST_VECTOR		\
+                           && imm_for_neon_mov_operand ((OP), GET_MODE (OP))) \
+                       || ((STR)[1] == 'l'				\
+                           && GET_CODE (OP) == CONST_VECTOR		\
+                           && imm_for_neon_logic_operand ((OP),		\
+							  GET_MODE (OP))) \
+                       || ((STR)[1] == 'L'				\
+                           && GET_CODE (OP) == CONST_VECTOR		\
+                           && imm_for_neon_inv_logic_operand ((OP),	\
+							  GET_MODE (OP))) \
+		       || ((STR)[1] == 'v'				\
+			   && GET_CODE (OP) == CONST_DOUBLE		\
+			   && vfp3_const_double_rtx (OP)))) :		\
    ((C) == 'Q') ? (GET_CODE (OP) == MEM					\
 		 && GET_CODE (XEXP (OP, 0)) == REG) :			\
    ((C) == 'R') ? (GET_CODE (OP) == MEM					\
@@ -1205,6 +1310,8 @@ enum reg_class
    ((C) == 'T') ? cirrus_memory_offset (OP) :				\
    ((C) == 'U' && (STR)[1] == 'v') ? arm_coproc_mem_operand (OP, FALSE) : \
    ((C) == 'U' && (STR)[1] == 'y') ? arm_coproc_mem_operand (OP, TRUE) : \
+   ((C) == 'U' && (STR)[1] == 'n') ? neon_vector_mem_operand (OP, FALSE) : \
+   ((C) == 'U' && (STR)[1] == 's') ? neon_vector_mem_operand (OP, TRUE) : \
    ((C) == 'U' && (STR)[1] == 'q')					\
     ? arm_extendqisi_mem_op (OP, GET_MODE (OP))				\
    : 0)
@@ -1249,22 +1356,24 @@ enum reg_class
 
 /* Return the register class of a scratch register needed to copy IN into
    or out of a register in CLASS in MODE.  If it can be done directly,
-   NO_REGS is returned.  */
+   NO_REGS is returned.
+   FIXME: The IS_VFP_CLASS part may need attention.  */
 #define SECONDARY_OUTPUT_RELOAD_CLASS(CLASS, MODE, X)		\
   /* Restrict which direct reloads are allowed for VFP regs.  */ \
   ((TARGET_VFP && TARGET_HARD_FLOAT				\
-    && (CLASS) == VFP_REGS)					\
+    && IS_VFP_CLASS (CLASS))					\
    ? vfp_secondary_reload_class (MODE, X)			\
    : TARGET_32BIT						\
    ? (((MODE) == HImode && ! arm_arch4 && true_regnum (X) == -1) \
     ? GENERAL_REGS : NO_REGS)					\
    : THUMB_SECONDARY_OUTPUT_RELOAD_CLASS (CLASS, MODE, X))
 
-/* If we need to load shorts byte-at-a-time, then we need a scratch.  */
+/* If we need to load shorts byte-at-a-time, then we need a scratch.
+   FIXME: As above.  */
 #define SECONDARY_INPUT_RELOAD_CLASS(CLASS, MODE, X)		\
   /* Restrict which direct reloads are allowed for VFP regs.  */ \
   ((TARGET_VFP && TARGET_HARD_FLOAT				\
-    && (CLASS) == VFP_REGS)					\
+    && IS_VFP_CLASS (CLASS))					\
     ? vfp_secondary_reload_class (MODE, X) :			\
   /* Cannot load constants into Cirrus registers.  */		\
    (TARGET_MAVERICK && TARGET_HARD_FLOAT			\
@@ -1378,11 +1487,11 @@ do {									      \
 
 /* Moves between FPA_REGS and GENERAL_REGS are two memory insns.  */
 #define REGISTER_MOVE_COST(MODE, FROM, TO)		\
-  (TARGET_32BIT ?						\
+  (TARGET_32BIT ?					\
    ((FROM) == FPA_REGS && (TO) != FPA_REGS ? 20 :	\
     (FROM) != FPA_REGS && (TO) == FPA_REGS ? 20 :	\
-    (FROM) == VFP_REGS && (TO) != VFP_REGS ? 10 :  \
-    (FROM) != VFP_REGS && (TO) == VFP_REGS ? 10 :  \
+    IS_VFP_CLASS (FROM) && !IS_VFP_CLASS (TO) ? 10 :	\
+    !IS_VFP_CLASS (FROM) && IS_VFP_CLASS (TO) ? 10 :	\
     (FROM) == IWMMXT_REGS && (TO) != IWMMXT_REGS ? 4 :  \
     (FROM) != IWMMXT_REGS && (TO) == IWMMXT_REGS ? 4 :  \
     (FROM) == IWMMXT_GR_REGS || (TO) == IWMMXT_GR_REGS ? 20 :  \
@@ -1645,7 +1754,8 @@ typedef struct
 #define FUNCTION_ARG_ADVANCE(CUM, MODE, TYPE, NAMED)	\
   (CUM).nargs += 1;					\
   if (arm_vector_mode_supported_p (MODE)	       	\
-      && (CUM).named_count > (CUM).nargs)		\
+      && (CUM).named_count > (CUM).nargs		\
+      && TARGET_IWMMXT)					\
     (CUM).iwmmxt_nregs += 1;				\
   else							\
     (CUM).nregs += ARM_NUM_REGS2 (MODE, TYPE)
diff --git a/gcc/config/arm/arm.md b/gcc/config/arm/arm.md
index 9e2f53ec6bc..371d5819c4f 100644
--- a/gcc/config/arm/arm.md
+++ b/gcc/config/arm/arm.md
@@ -51,6 +51,7 @@
 
 ;; UNSPEC Usage:
 ;; Note: sin and cos are no-longer used.
+;; Unspec constants for Neon are defined in neon.md.
 
 (define_constants
   [(UNSPEC_SIN       0)	; `sin' operation (MODE_FLOAT):
@@ -119,12 +120,14 @@
 			;   a 32-bit object.
    (VUNSPEC_POOL_8   7) ; `pool-entry(8)'.  An entry in the constant pool for
 			;   a 64-bit object.
-   (VUNSPEC_TMRC     8) ; Used by the iWMMXt TMRC instruction.
-   (VUNSPEC_TMCR     9) ; Used by the iWMMXt TMCR instruction.
-   (VUNSPEC_ALIGN8   10) ; 8-byte alignment version of VUNSPEC_ALIGN
-   (VUNSPEC_WCMP_EQ  11) ; Used by the iWMMXt WCMPEQ instructions
-   (VUNSPEC_WCMP_GTU 12) ; Used by the iWMMXt WCMPGTU instructions
-   (VUNSPEC_WCMP_GT  13) ; Used by the iwMMXT WCMPGT instructions
+   (VUNSPEC_POOL_16  8) ; `pool-entry(16)'.  An entry in the constant pool for
+			;   a 128-bit object.
+   (VUNSPEC_TMRC     9) ; Used by the iWMMXt TMRC instruction.
+   (VUNSPEC_TMCR     10) ; Used by the iWMMXt TMCR instruction.
+   (VUNSPEC_ALIGN8   11) ; 8-byte alignment version of VUNSPEC_ALIGN
+   (VUNSPEC_WCMP_EQ  12) ; Used by the iWMMXt WCMPEQ instructions
+   (VUNSPEC_WCMP_GTU 13) ; Used by the iWMMXt WCMPGTU instructions
+   (VUNSPEC_WCMP_GT  14) ; Used by the iwMMXT WCMPGT instructions
    (VUNSPEC_EH_RETURN 20); Use to override the return address for exception
 			 ; handling.
   ]
@@ -5478,27 +5481,6 @@
   "
 )
 
-;; Vector Moves
-(define_expand "movv2si"
-  [(set (match_operand:V2SI 0 "nonimmediate_operand" "")
-	(match_operand:V2SI 1 "general_operand" ""))]
-  "TARGET_REALLY_IWMMXT"
-{
-})
-
-(define_expand "movv4hi"
-  [(set (match_operand:V4HI 0 "nonimmediate_operand" "")
-	(match_operand:V4HI 1 "general_operand" ""))]
-  "TARGET_REALLY_IWMMXT"
-{
-})
-
-(define_expand "movv8qi"
-  [(set (match_operand:V8QI 0 "nonimmediate_operand" "")
-	(match_operand:V8QI 1 "general_operand" ""))]
-  "TARGET_REALLY_IWMMXT"
-{
-})
 
 
 ;; load- and store-multiple insns
@@ -10182,6 +10164,30 @@
   [(set_attr "length" "8")]
 )
 
+(define_insn "consttable_16"
+  [(unspec_volatile [(match_operand 0 "" "")] VUNSPEC_POOL_16)]
+  "TARGET_EITHER"
+  "*
+  {
+    making_const_table = TRUE;
+    switch (GET_MODE_CLASS (GET_MODE (operands[0])))
+      {
+       case MODE_FLOAT:
+        {
+          REAL_VALUE_TYPE r;
+          REAL_VALUE_FROM_CONST_DOUBLE (r, operands[0]);
+          assemble_real (r, GET_MODE (operands[0]), BITS_PER_WORD);
+          break;
+        }
+      default:
+        assemble_integer (operands[0], 16, BITS_PER_WORD, 1);
+        break;
+      }
+    return \"\";
+  }"
+  [(set_attr "length" "16")]
+)
+
 ;; Miscellaneous Thumb patterns
 
 (define_expand "tablejump"
@@ -10357,10 +10363,14 @@
 (include "fpa.md")
 ;; Load the Maverick co-processor patterns
 (include "cirrus.md")
+;; Vector bits common to IWMMXT and Neon
+(include "vec-common.md")
 ;; Load the Intel Wireless Multimedia Extension patterns
 (include "iwmmxt.md")
 ;; Load the VFP co-processor patterns
 (include "vfp.md")
 ;; Thumb-2 patterns
 (include "thumb2.md")
+;; Neon patterns
+(include "neon.md")
 
diff --git a/gcc/config/arm/arm.opt b/gcc/config/arm/arm.opt
index ac497d96186..837b1a14982 100644
--- a/gcc/config/arm/arm.opt
+++ b/gcc/config/arm/arm.opt
@@ -153,3 +153,7 @@ Tune code for the given processor
 mwords-little-endian
 Target Report RejectNegative Mask(LITTLE_WORDS)
 Assume big endian bytes, little endian words
+
+mvectorize-with-neon-quad
+Target Report Mask(NEON_VECTORIZE_QUAD)
+Use Neon quad-word (rather than double-word) registers for vectorization
diff --git a/gcc/config/arm/iwmmxt.md b/gcc/config/arm/iwmmxt.md
index 9436b75ed1d..32f2e8eb3cc 100644
--- a/gcc/config/arm/iwmmxt.md
+++ b/gcc/config/arm/iwmmxt.md
@@ -20,6 +20,15 @@
 ;; the Free Software Foundation, 51 Franklin Street, Fifth Floor,
 ;; Boston, MA 02110-1301, USA.
 
+;; Integer element sizes implemented by IWMMXT.
+(define_mode_macro VMMX [V2SI V4HI V8QI])
+
+;; Integer element sizes for shifts.
+(define_mode_macro VSHFT [V4HI V2SI DI])
+
+;; Determine element size suffix from vector mode.
+(define_mode_attr MMX_char [(V8QI "b") (V4HI "h") (V2SI "w") (DI "d")])
+
 (define_insn "iwmmxt_iordi3"
   [(set (match_operand:DI         0 "register_operand" "=y,?&r,?&r")
         (ior:DI (match_operand:DI 1 "register_operand" "%y,0,r")
@@ -236,28 +245,12 @@
 
 ;; Vector add/subtract
 
-(define_insn "addv8qi3"
-  [(set (match_operand:V8QI            0 "register_operand" "=y")
-        (plus:V8QI (match_operand:V8QI 1 "register_operand"  "y")
-	           (match_operand:V8QI 2 "register_operand"  "y")))]
-  "TARGET_REALLY_IWMMXT"
-  "waddb%?\\t%0, %1, %2"
-  [(set_attr "predicable" "yes")])
-
-(define_insn "addv4hi3"
-  [(set (match_operand:V4HI            0 "register_operand" "=y")
-        (plus:V4HI (match_operand:V4HI 1 "register_operand"  "y")
-	           (match_operand:V4HI 2 "register_operand"  "y")))]
-  "TARGET_REALLY_IWMMXT"
-  "waddh%?\\t%0, %1, %2"
-  [(set_attr "predicable" "yes")])
-
-(define_insn "addv2si3"
-  [(set (match_operand:V2SI            0 "register_operand" "=y")
-        (plus:V2SI (match_operand:V2SI 1 "register_operand"  "y")
-	           (match_operand:V2SI 2 "register_operand"  "y")))]
+(define_insn "*add<mode>3_iwmmxt"
+  [(set (match_operand:VMMX            0 "register_operand" "=y")
+        (plus:VMMX (match_operand:VMMX 1 "register_operand"  "y")
+	           (match_operand:VMMX 2 "register_operand"  "y")))]
   "TARGET_REALLY_IWMMXT"
-  "waddw%?\\t%0, %1, %2"
+  "wadd<MMX_char>%?\\t%0, %1, %2"
   [(set_attr "predicable" "yes")])
 
 (define_insn "ssaddv8qi3"
@@ -308,28 +301,12 @@
   "waddwus%?\\t%0, %1, %2"
   [(set_attr "predicable" "yes")])
 
-(define_insn "subv8qi3"
-  [(set (match_operand:V8QI             0 "register_operand" "=y")
-        (minus:V8QI (match_operand:V8QI 1 "register_operand"  "y")
-		    (match_operand:V8QI 2 "register_operand"  "y")))]
+(define_insn "*sub<mode>3_iwmmxt"
+  [(set (match_operand:VMMX             0 "register_operand" "=y")
+        (minus:VMMX (match_operand:VMMX 1 "register_operand"  "y")
+		    (match_operand:VMMX 2 "register_operand"  "y")))]
   "TARGET_REALLY_IWMMXT"
-  "wsubb%?\\t%0, %1, %2"
-  [(set_attr "predicable" "yes")])
-
-(define_insn "subv4hi3"
-  [(set (match_operand:V4HI             0 "register_operand" "=y")
-        (minus:V4HI (match_operand:V4HI 1 "register_operand"  "y")
-		    (match_operand:V4HI 2 "register_operand"  "y")))]
-  "TARGET_REALLY_IWMMXT"
-  "wsubh%?\\t%0, %1, %2"
-  [(set_attr "predicable" "yes")])
-
-(define_insn "subv2si3"
-  [(set (match_operand:V2SI             0 "register_operand" "=y")
-        (minus:V2SI (match_operand:V2SI 1 "register_operand"  "y")
-		    (match_operand:V2SI 2 "register_operand"  "y")))]
-  "TARGET_REALLY_IWMMXT"
-  "wsubw%?\\t%0, %1, %2"
+  "wsub<MMX_char>%?\\t%0, %1, %2"
   [(set_attr "predicable" "yes")])
 
 (define_insn "sssubv8qi3"
@@ -380,7 +357,7 @@
   "wsubwus%?\\t%0, %1, %2"
   [(set_attr "predicable" "yes")])
 
-(define_insn "mulv4hi3"
+(define_insn "*mulv4hi3_iwmmxt"
   [(set (match_operand:V4HI            0 "register_operand" "=y")
         (mult:V4HI (match_operand:V4HI 1 "register_operand" "y")
 		   (match_operand:V4HI 2 "register_operand" "y")))]
@@ -731,100 +708,36 @@
 
 ;; Max/min insns
 
-(define_insn "smaxv8qi3"
-  [(set (match_operand:V8QI            0 "register_operand" "=y")
-        (smax:V8QI (match_operand:V8QI 1 "register_operand" "y")
-		   (match_operand:V8QI 2 "register_operand" "y")))]
+(define_insn "*smax<mode>3_iwmmxt"
+  [(set (match_operand:VMMX            0 "register_operand" "=y")
+        (smax:VMMX (match_operand:VMMX 1 "register_operand" "y")
+		   (match_operand:VMMX 2 "register_operand" "y")))]
   "TARGET_REALLY_IWMMXT"
-  "wmaxsb%?\\t%0, %1, %2"
+  "wmaxs<MMX_char>%?\\t%0, %1, %2"
   [(set_attr "predicable" "yes")])
 
-(define_insn "umaxv8qi3"
-  [(set (match_operand:V8QI            0 "register_operand" "=y")
-        (umax:V8QI (match_operand:V8QI 1 "register_operand" "y")
-		   (match_operand:V8QI 2 "register_operand" "y")))]
+(define_insn "*umax<mode>3_iwmmxt"
+  [(set (match_operand:VMMX            0 "register_operand" "=y")
+        (umax:VMMX (match_operand:VMMX 1 "register_operand" "y")
+		   (match_operand:VMMX 2 "register_operand" "y")))]
   "TARGET_REALLY_IWMMXT"
-  "wmaxub%?\\t%0, %1, %2"
+  "wmaxu<MMX_char>%?\\t%0, %1, %2"
   [(set_attr "predicable" "yes")])
 
-(define_insn "smaxv4hi3"
-  [(set (match_operand:V4HI            0 "register_operand" "=y")
-        (smax:V4HI (match_operand:V4HI 1 "register_operand" "y")
-		   (match_operand:V4HI 2 "register_operand" "y")))]
-  "TARGET_REALLY_IWMMXT"
-  "wmaxsh%?\\t%0, %1, %2"
-  [(set_attr "predicable" "yes")])
-
-(define_insn "umaxv4hi3"
-  [(set (match_operand:V4HI            0 "register_operand" "=y")
-        (umax:V4HI (match_operand:V4HI 1 "register_operand" "y")
-		   (match_operand:V4HI 2 "register_operand" "y")))]
-  "TARGET_REALLY_IWMMXT"
-  "wmaxuh%?\\t%0, %1, %2"
-  [(set_attr "predicable" "yes")])
-
-(define_insn "smaxv2si3"
-  [(set (match_operand:V2SI            0 "register_operand" "=y")
-        (smax:V2SI (match_operand:V2SI 1 "register_operand" "y")
-		   (match_operand:V2SI 2 "register_operand" "y")))]
-  "TARGET_REALLY_IWMMXT"
-  "wmaxsw%?\\t%0, %1, %2"
-  [(set_attr "predicable" "yes")])
-
-(define_insn "umaxv2si3"
-  [(set (match_operand:V2SI            0 "register_operand" "=y")
-        (umax:V2SI (match_operand:V2SI 1 "register_operand" "y")
-		   (match_operand:V2SI 2 "register_operand" "y")))]
-  "TARGET_REALLY_IWMMXT"
-  "wmaxuw%?\\t%0, %1, %2"
-  [(set_attr "predicable" "yes")])
-
-(define_insn "sminv8qi3"
-  [(set (match_operand:V8QI            0 "register_operand" "=y")
-        (smin:V8QI (match_operand:V8QI 1 "register_operand" "y")
-		   (match_operand:V8QI 2 "register_operand" "y")))]
-  "TARGET_REALLY_IWMMXT"
-  "wminsb%?\\t%0, %1, %2"
-  [(set_attr "predicable" "yes")])
-
-(define_insn "uminv8qi3"
-  [(set (match_operand:V8QI            0 "register_operand" "=y")
-        (umin:V8QI (match_operand:V8QI 1 "register_operand" "y")
-		   (match_operand:V8QI 2 "register_operand" "y")))]
-  "TARGET_REALLY_IWMMXT"
-  "wminub%?\\t%0, %1, %2"
-  [(set_attr "predicable" "yes")])
-
-(define_insn "sminv4hi3"
-  [(set (match_operand:V4HI            0 "register_operand" "=y")
-        (smin:V4HI (match_operand:V4HI 1 "register_operand" "y")
-		   (match_operand:V4HI 2 "register_operand" "y")))]
-  "TARGET_REALLY_IWMMXT"
-  "wminsh%?\\t%0, %1, %2"
-  [(set_attr "predicable" "yes")])
-
-(define_insn "uminv4hi3"
-  [(set (match_operand:V4HI            0 "register_operand" "=y")
-        (umin:V4HI (match_operand:V4HI 1 "register_operand" "y")
-		   (match_operand:V4HI 2 "register_operand" "y")))]
+(define_insn "*smin<mode>3_iwmmxt"
+  [(set (match_operand:VMMX            0 "register_operand" "=y")
+        (smin:VMMX (match_operand:VMMX 1 "register_operand" "y")
+		   (match_operand:VMMX 2 "register_operand" "y")))]
   "TARGET_REALLY_IWMMXT"
-  "wminuh%?\\t%0, %1, %2"
+  "wmins<MMX_char>%?\\t%0, %1, %2"
   [(set_attr "predicable" "yes")])
 
-(define_insn "sminv2si3"
-  [(set (match_operand:V2SI            0 "register_operand" "=y")
-        (smin:V2SI (match_operand:V2SI 1 "register_operand" "y")
-		   (match_operand:V2SI 2 "register_operand" "y")))]
+(define_insn "*umin<mode>3_iwmmxt"
+  [(set (match_operand:VMMX            0 "register_operand" "=y")
+        (umin:VMMX (match_operand:VMMX 1 "register_operand" "y")
+		   (match_operand:VMMX 2 "register_operand" "y")))]
   "TARGET_REALLY_IWMMXT"
-  "wminsw%?\\t%0, %1, %2"
-  [(set_attr "predicable" "yes")])
-
-(define_insn "uminv2si3"
-  [(set (match_operand:V2SI            0 "register_operand" "=y")
-        (umin:V2SI (match_operand:V2SI 1 "register_operand" "y")
-		   (match_operand:V2SI 2 "register_operand" "y")))]
-  "TARGET_REALLY_IWMMXT"
-  "wminuw%?\\t%0, %1, %2"
+  "wminu<MMX_char>%?\\t%0, %1, %2"
   [(set_attr "predicable" "yes")])
 
 ;; Pack/unpack insns.
@@ -1138,76 +1051,28 @@
   "wrordg%?\\t%0, %1, %2"
   [(set_attr "predicable" "yes")])
 
-(define_insn "ashrv4hi3"
-  [(set (match_operand:V4HI                0 "register_operand" "=y")
-        (ashiftrt:V4HI (match_operand:V4HI 1 "register_operand" "y")
-		       (match_operand:SI   2 "register_operand" "z")))]
-  "TARGET_REALLY_IWMMXT"
-  "wsrahg%?\\t%0, %1, %2"
-  [(set_attr "predicable" "yes")])
-
-(define_insn "ashrv2si3"
-  [(set (match_operand:V2SI                0 "register_operand" "=y")
-        (ashiftrt:V2SI (match_operand:V2SI 1 "register_operand" "y")
-		       (match_operand:SI   2 "register_operand" "z")))]
-  "TARGET_REALLY_IWMMXT"
-  "wsrawg%?\\t%0, %1, %2"
-  [(set_attr "predicable" "yes")])
-
-(define_insn "ashrdi3_iwmmxt"
-  [(set (match_operand:DI              0 "register_operand" "=y")
-	(ashiftrt:DI (match_operand:DI 1 "register_operand" "y")
-		   (match_operand:SI   2 "register_operand" "z")))]
-  "TARGET_REALLY_IWMMXT"
-  "wsradg%?\\t%0, %1, %2"
-  [(set_attr "predicable" "yes")])
-
-(define_insn "lshrv4hi3"
-  [(set (match_operand:V4HI                0 "register_operand" "=y")
-        (lshiftrt:V4HI (match_operand:V4HI 1 "register_operand" "y")
-		       (match_operand:SI   2 "register_operand" "z")))]
-  "TARGET_REALLY_IWMMXT"
-  "wsrlhg%?\\t%0, %1, %2"
-  [(set_attr "predicable" "yes")])
-
-(define_insn "lshrv2si3"
-  [(set (match_operand:V2SI                0 "register_operand" "=y")
-        (lshiftrt:V2SI (match_operand:V2SI 1 "register_operand" "y")
-		       (match_operand:SI   2 "register_operand" "z")))]
-  "TARGET_REALLY_IWMMXT"
-  "wsrlwg%?\\t%0, %1, %2"
-  [(set_attr "predicable" "yes")])
-
-(define_insn "lshrdi3_iwmmxt"
-  [(set (match_operand:DI              0 "register_operand" "=y")
-	(lshiftrt:DI (match_operand:DI 1 "register_operand" "y")
-		     (match_operand:SI 2 "register_operand" "z")))]
+(define_insn "ashr<mode>3_iwmmxt"
+  [(set (match_operand:VSHFT                 0 "register_operand" "=y")
+        (ashiftrt:VSHFT (match_operand:VSHFT 1 "register_operand" "y")
+			(match_operand:SI    2 "register_operand" "z")))]
   "TARGET_REALLY_IWMMXT"
-  "wsrldg%?\\t%0, %1, %2"
+  "wsra<MMX_char>g%?\\t%0, %1, %2"
   [(set_attr "predicable" "yes")])
 
-(define_insn "ashlv4hi3"
-  [(set (match_operand:V4HI              0 "register_operand" "=y")
-        (ashift:V4HI (match_operand:V4HI 1 "register_operand" "y")
-		     (match_operand:SI   2 "register_operand" "z")))]
+(define_insn "lshr<mode>3_iwmmxt"
+  [(set (match_operand:VSHFT                 0 "register_operand" "=y")
+        (lshiftrt:VSHFT (match_operand:VSHFT 1 "register_operand" "y")
+			(match_operand:SI    2 "register_operand" "z")))]
   "TARGET_REALLY_IWMMXT"
-  "wsllhg%?\\t%0, %1, %2"
+  "wsrl<MMX_char>g%?\\t%0, %1, %2"
   [(set_attr "predicable" "yes")])
 
-(define_insn "ashlv2si3"
-  [(set (match_operand:V2SI              0 "register_operand" "=y")
-        (ashift:V2SI (match_operand:V2SI 1 "register_operand" "y")
-		       (match_operand:SI 2 "register_operand" "z")))]
-  "TARGET_REALLY_IWMMXT"
-  "wsllwg%?\\t%0, %1, %2"
-  [(set_attr "predicable" "yes")])
-
-(define_insn "ashldi3_iwmmxt"
-  [(set (match_operand:DI            0 "register_operand" "=y")
-	(ashift:DI (match_operand:DI 1 "register_operand" "y")
-		   (match_operand:SI 2 "register_operand" "z")))]
+(define_insn "ashl<mode>3_iwmmxt"
+  [(set (match_operand:VSHFT               0 "register_operand" "=y")
+        (ashift:VSHFT (match_operand:VSHFT 1 "register_operand" "y")
+		      (match_operand:SI    2 "register_operand" "z")))]
   "TARGET_REALLY_IWMMXT"
-  "wslldg%?\\t%0, %1, %2"
+  "wsll<MMX_char>g%?\\t%0, %1, %2"
   [(set_attr "predicable" "yes")])
 
 (define_insn "rorv4hi3_di"
diff --git a/gcc/config/arm/neon.md b/gcc/config/arm/neon.md
new file mode 100644
index 00000000000..ab4c88a6854
--- /dev/null
+++ b/gcc/config/arm/neon.md
@@ -0,0 +1,688 @@
+;; ARM NEON coprocessor Machine Description
+;; Copyright (C) 2006 Free Software Foundation, Inc.
+;; Written by CodeSourcery.
+;;
+;; This file is part of GCC.
+;;
+;; GCC is free software; you can redistribute it and/or modify it
+;; under the terms of the GNU General Public License as published by
+;; the Free Software Foundation; either version 2, or (at your option)
+;; any later version.
+;;
+;; GCC is distributed in the hope that it will be useful, but
+;; WITHOUT ANY WARRANTY; without even the implied warranty of
+;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+;; General Public License for more details.
+;;
+;; You should have received a copy of the GNU General Public License
+;; along with GCC; see the file COPYING.  If not, write to the Free
+;; Software Foundation, 51 Franklin Street, Fifth Floor, Boston, MA
+;; 02110-1301, USA.
+
+;; Constants for unspecs.
+(define_constants
+  [(UNSPEC_VEXT 64)
+   (UNSPEC_VPADD 65)
+   (UNSPEC_VPSMIN 66)
+   (UNSPEC_VPUMIN 67)
+   (UNSPEC_VPSMAX 68)
+   (UNSPEC_VPUMAX 69)
+   (UNSPEC_ASHIFT_SIGNED 70)
+   (UNSPEC_ASHIFT_UNSIGNED 71)])
+
+;; Double-width vector modes.
+(define_mode_macro VD [V8QI V4HI V2SI V2SF])
+
+;; Same, without floating-point elements.
+(define_mode_macro VDI [V8QI V4HI V2SI])
+
+;; Quad-width vector modes.
+(define_mode_macro VQ [V16QI V8HI V4SI V4SF])
+
+;; Same, without floating-point elements.
+(define_mode_macro VQI [V16QI V8HI V4SI])
+
+;; Widenable modes.
+(define_mode_macro VW [V8QI V4HI V2SI])
+
+;; Narrowable modes.
+(define_mode_macro VN [V8HI V4SI V2DI])
+
+;; All supported vector modes.
+(define_mode_macro VDQ [V8QI V16QI V4HI V8HI V2SI V4SI V2SF V4SF])
+
+;; Supported integer vector modes.
+(define_mode_macro VDQI [V8QI V16QI V4HI V8HI V2SI V4SI])
+
+;; Define element mode for each vector mode.
+(define_mode_attr V_elem [(V8QI "QI") (V16QI "QI")
+			  (V4HI "HI") (V8HI "HI")
+                          (V2SI "SI") (V4SI "SI")
+                          (V2SF "SF") (V4SF "SF")])
+
+;; Register width from element mode
+(define_mode_attr V_reg [(V8QI "P") (V16QI "q")
+                         (V4HI "P") (V8HI "q")
+                         (V2SI "P") (V4SI "q")
+                         (V2SF "P") (V4SF "q")])
+
+;; Wider modes with the same number of elements.
+(define_mode_attr V_widen [(V8QI "V8HI") (V4HI "V4SI") (V2SI "V2DI")])
+
+;; Narrower modes with the same number of elements.
+(define_mode_attr V_narrow [(V8HI "V8QI") (V4SI "V4HI") (V2DI "V2SI")])
+
+;; Get element type from double-width mode, for operations where we don't care
+;; about signedness.
+(define_mode_attr V_if_elem [(V8QI "i8") (V16QI "i8")
+			     (V4HI "i16") (V8HI "i16")
+                             (V2SI "i32") (V4SI "i32")
+			     (V2SF "f32") (V4SF "f32")])
+
+;; Same, but for operations which work on signed values.
+(define_mode_attr V_s_elem [(V8QI "s8") (V16QI "s8")
+			    (V4HI "s16") (V8HI "s16")
+                            (V2SI "s32") (V4SI "s32")
+			    (V2SF "f32") (V4SF "f32")])
+
+;; Same, but for operations which work on unsigned values.
+(define_mode_attr V_u_elem [(V8QI "u8") (V16QI "u8")
+			    (V4HI "u16") (V8HI "u16")
+                            (V2SI "u32") (V4SI "u32")])
+
+;; Element types for extraction of unsigned scalars.
+(define_mode_attr VD_uf_sclr [(V8QI "u8") (V4HI "u16") (V2SI "32") (V2SF "32")])
+
+;; Element sizes for duplicating ARM registers to all elements of a vector.
+(define_mode_attr VD_dup [(V8QI "8") (V4HI "16") (V2SI "32") (V2SF "32")])
+
+;; FIXME: Attributes are probably borked.
+(define_insn "*neon_mov<mode>"
+  [(set (match_operand:VD 0 "nonimmediate_operand"
+	  "=w,Uv,w, w,  ?r,?w,?r,?r, ?Us")
+	(match_operand:VD 1 "general_operand"
+	  " w,w, Dn,Uvi, w, r, r, Usi,r"))]
+  "TARGET_NEON"
+{
+  if (which_alternative == 2)
+    {
+      int width, is_valid;
+      static char templ[40];
+
+      is_valid = neon_immediate_valid_for_move (operands[1], <MODE>mode,
+        &operands[1], &width);
+
+      gcc_assert (is_valid != 0);
+
+      if (width == 0)
+        return "vmov.f32\t%P0, %1  @ <mode>";
+      else
+        sprintf (templ, "vmov.i%d\t%%P0, %%1  @ <mode>", width);
+
+      return templ;
+    }
+
+  /* FIXME: If the memory layout is changed in big-endian mode, output_move_vfp
+     below must be changed to output_move_neon (which will use the
+     element/structure loads/stores), and the constraint changed to 'Un' instead
+     of 'Uv'.  */
+
+  switch (which_alternative)
+    {
+    case 0: return "vmov\t%P0, %P1  @ <mode>";
+    case 1: case 3: return output_move_vfp (operands);
+    case 2: gcc_unreachable ();
+    case 4: return "vmov\t%Q0, %R0, %P1  @ <mode>";
+    case 5: return "vmov\t%P0, %Q1, %R1  @ <mode>";
+    default: return output_move_double (operands);
+    }
+}
+  [(set_attr "type" "farith,f_stored,farith,f_loadd,f_2_r,r_2_f,*,load2,store2")
+   (set_attr "length" "4,4,4,4,4,4,8,8,8")
+   (set_attr "pool_range"     "*,*,*,1020,*,*,*,1020,*")
+   (set_attr "neg_pool_range" "*,*,*,1008,*,*,*,1008,*")
+   (set_attr "predicable" "no")])
+
+(define_insn "*neon_mov<mode>"
+  [(set (match_operand:VQ 0 "nonimmediate_operand"
+  	  "=w,Un,w, w,  ?r,?w,?r,?r,  ?Us")
+	(match_operand:VQ 1 "general_operand"
+	  " w,w, Dn,Uni, w, r, r, Usi, r"))]
+  "TARGET_NEON"
+{
+  if (which_alternative == 2)
+    {
+      int width, is_valid;
+      static char templ[40];
+      
+      is_valid = neon_immediate_valid_for_move (operands[1], <MODE>mode,
+        &operands[1], &width);
+      
+      gcc_assert (is_valid != 0);
+      
+      if (width == 0)
+        return "vmov.f32\t%q0, %1  @ <mode>";
+      else
+        sprintf (templ, "vmov.i%d\t%%q0, %%1  @ <mode>", width);
+      
+      return templ;
+    }
+  
+  switch (which_alternative)
+    {
+    case 0: return "vmov\t%q0, %q1  @ <mode>";
+    case 1: case 3: return output_move_neon (operands);
+    case 2: gcc_unreachable ();
+    case 4: return "vmov\t%Q0, %R0, %e1  @ <mode>\;vmov\t%J0, %K0, %f1";
+    case 5: return "vmov\t%e0, %Q1, %R1  @ <mode>\;vmov\t%f0, %J1, %K1";
+    default: return output_move_quad (operands);
+    }
+}
+  [(set_attr "type" "farith,f_stored,farith,f_loadd,f_2_r,r_2_f,*,load2,store2")
+   (set_attr "length" "4,8,4,8,8,8,16,8,16")
+   (set_attr "pool_range" "*,*,*,1020,*,*,*,1020,*")
+   (set_attr "neg_pool_range" "*,*,*,1008,*,*,*,1008,*")
+   (set_attr "predicable" "no")])
+
+; FIXME: Set/extract/init quads.
+
+(define_insn "vec_set<mode>"
+  [(set (match_operand:VD 0 "s_register_operand" "+w")
+        (vec_merge:VD
+          (match_operand:VD 1 "s_register_operand" "0")
+          (vec_duplicate:VD
+            (match_operand:<V_elem> 2 "s_register_operand" "r"))
+          (ashift:SI (const_int 1)
+                     (match_operand:SI 3 "immediate_operand" "i"))))]
+  "TARGET_NEON"
+  "vmov.<VD_uf_sclr>\t%P0[%n3], %2"
+  [(set_attr "predicable" "yes")])
+
+(define_insn "vec_extract<mode>"
+  [(set (match_operand:<V_elem> 0 "s_register_operand" "=r")
+        (vec_select:<V_elem>
+          (match_operand:VD 1 "s_register_operand" "w")
+          (parallel [(match_operand:SI 2 "immediate_operand" "i")])))]
+  "TARGET_NEON"
+  "vmov.<VD_uf_sclr>\t%0, %P1[%n2]"
+  [(set_attr "predicable" "yes")])
+
+(define_expand "vec_init<mode>"
+  [(match_operand:VD 0 "s_register_operand" "")
+   (match_operand 1 "" "")]
+  "TARGET_NEON"
+{
+  neon_expand_vector_init (operands[0], operands[1]);
+  DONE;
+})
+
+;; Doubleword and quadword arithmetic.
+
+;; NOTE: vadd/vsub and some other instructions also support 64-bit integer
+;; element size, which we could potentially use for "long long" operations. We
+;; don't want to do this at present though, because moving values from the
+;; vector unit to the ARM core is currently slow and 64-bit addition (etc.) is
+;; easy to do with ARM instructions anyway.
+
+(define_insn "*add<mode>3_neon"
+  [(set (match_operand:VDQ 0 "s_register_operand" "=w")
+        (plus:VDQ (match_operand:VDQ 1 "s_register_operand" "w")
+		  (match_operand:VDQ 2 "s_register_operand" "w")))]
+  "TARGET_NEON"
+  "vadd.<V_if_elem>\t%<V_reg>0, %<V_reg>1, %<V_reg>2"
+  [(set_attr "predicable" "no")])
+
+(define_insn "*sub<mode>3_neon"
+  [(set (match_operand:VDQ 0 "s_register_operand" "=w")
+        (minus:VDQ (match_operand:VDQ 1 "s_register_operand" "w")
+                   (match_operand:VDQ 2 "s_register_operand" "w")))]
+  "TARGET_NEON"
+  "vsub.<V_if_elem>\t%<V_reg>0, %<V_reg>1, %<V_reg>2"
+  [(set_attr "predicable" "no")])
+
+(define_insn "*mul<mode>3_neon"
+  [(set (match_operand:VDQ 0 "s_register_operand" "=w")
+        (mult:VDQ (match_operand:VDQ 1 "s_register_operand" "w")
+                  (match_operand:VDQ 2 "s_register_operand" "w")))]
+  "TARGET_NEON"
+  "vmul.<V_if_elem>\t%<V_reg>0, %<V_reg>1, %<V_reg>2"
+  [(set_attr "predicable" "no")])
+
+(define_insn "ior<mode>3"
+  [(set (match_operand:VDQ 0 "s_register_operand" "=w,w")
+	(ior:VDQ (match_operand:VDQ 1 "s_register_operand" "w,0")
+		 (match_operand:VDQ 2 "neon_logic_op2" "w,Dl")))]
+  "TARGET_NEON"
+{
+  switch (which_alternative)
+    {
+    case 0: return "vorr\t%<V_reg>0, %<V_reg>1, %<V_reg>2";
+    case 1: return neon_output_logic_immediate ("vorr", &operands[2],
+		     <MODE>mode, 0, VALID_NEON_QREG_MODE (<MODE>mode));
+    default: gcc_unreachable ();
+    }
+}
+  [(set_attr "predicable" "no")])
+
+;; The concrete forms of the Neon immediate-logic instructions are vbic and
+;; vorr. We support the pseudo-instruction vand instead, because that
+;; corresponds to the canonical form the middle-end expects to use for
+;; immediate bitwise-ANDs.
+
+(define_insn "and<mode>3"
+  [(set (match_operand:VDQ 0 "s_register_operand" "=w,w")
+	(and:VDQ (match_operand:VDQ 1 "s_register_operand" "w,0")
+		 (match_operand:VDQ 2 "neon_inv_logic_op2" "w,DL")))]
+  "TARGET_NEON"
+{
+  switch (which_alternative)
+    {
+    case 0: return "vand\t%<V_reg>0, %<V_reg>1, %<V_reg>2";
+    case 1: return neon_output_logic_immediate ("vand", &operands[2],
+    		     <MODE>mode, 1, VALID_NEON_QREG_MODE (<MODE>mode));
+    default: gcc_unreachable ();
+    }
+}
+  [(set_attr "predicable" "no")])
+
+(define_insn "*orn<mode>3_neon"
+  [(set (match_operand:VDQ 0 "s_register_operand" "=w")
+	(ior:VDQ (match_operand:VDQ 1 "s_register_operand" "w")
+		 (not:VDQ (match_operand:VDQ 2 "s_register_operand" "w"))))]
+  "TARGET_NEON"
+  "vorn\t%<V_reg>0, %<V_reg>1, %<V_reg>2"
+  [(set_attr "predicable" "no")])
+
+(define_insn "*bic<mode>3_neon"
+  [(set (match_operand:VDQ 0 "s_register_operand" "=w")
+	(and:VDQ (match_operand:VDQ 1 "s_register_operand" "w")
+		 (not:VDQ (match_operand:VDQ 2 "s_register_operand" "w"))))]
+  "TARGET_NEON"
+  "vbic\t%<V_reg>0, %<V_reg>1, %<V_reg>2"
+  [(set_attr "predicable" "no")])
+
+(define_insn "xor<mode>3"
+  [(set (match_operand:VDQ 0 "s_register_operand" "=w")
+	(xor:VDQ (match_operand:VDQ 1 "s_register_operand" "w")
+		 (match_operand:VDQ 2 "s_register_operand" "w")))]
+  "TARGET_NEON"
+  "veor\t%<V_reg>0, %<V_reg>1, %<V_reg>2"
+  [(set_attr "predicable" "no")])
+
+(define_insn "one_cmpl<mode>2"
+  [(set (match_operand:VDQ 0 "s_register_operand" "=w")
+        (not:VDQ (match_operand:VDQ 1 "s_register_operand" "w")))]
+  "TARGET_NEON"
+  "vmvn\t%<V_reg>0, %<V_reg>1"
+  [(set_attr "predicable" "no")])
+
+(define_insn "abs<mode>2"
+  [(set (match_operand:VDQ 0 "s_register_operand" "=w")
+	(abs:VDQ (match_operand:VDQ 1 "s_register_operand" "w")))]
+  "TARGET_NEON"
+  "vabs.<V_s_elem>\t%<V_reg>0, %<V_reg>1"
+  [(set_attr "predicable" "no")])
+
+(define_insn "neg<mode>2"
+  [(set (match_operand:VDQ 0 "s_register_operand" "=w")
+	(neg:VDQ (match_operand:VDQ 1 "s_register_operand" "w")))]
+  "TARGET_NEON"
+  "vneg.<V_s_elem>\t%<V_reg>0, %<V_reg>1"
+  [(set_attr "predicable" "no")])
+
+(define_insn "*umin<mode>3_neon"
+  [(set (match_operand:VDQI 0 "s_register_operand" "=w")
+	(umin:VDQI (match_operand:VDQI 1 "s_register_operand" "w")
+		   (match_operand:VDQI 2 "s_register_operand" "w")))]
+  "TARGET_NEON"
+  "vmin.<V_u_elem>\t%<V_reg>0, %<V_reg>1, %<V_reg>2"
+  [(set_attr "predicable" "no")])
+
+(define_insn "*umax<mode>3_neon"
+  [(set (match_operand:VDQI 0 "s_register_operand" "=w")
+	(umax:VDQI (match_operand:VDQI 1 "s_register_operand" "w")
+		   (match_operand:VDQI 2 "s_register_operand" "w")))]
+  "TARGET_NEON"
+  "vmax.<V_u_elem>\t%<V_reg>0, %<V_reg>1, %<V_reg>2"
+  [(set_attr "predicable" "no")])
+
+(define_insn "*smin<mode>3_neon"
+  [(set (match_operand:VDQ 0 "s_register_operand" "=w")
+	(smin:VDQ (match_operand:VDQ 1 "s_register_operand" "w")
+		  (match_operand:VDQ 2 "s_register_operand" "w")))]
+  "TARGET_NEON"
+  "vmin.<V_s_elem>\t%<V_reg>0, %<V_reg>1, %<V_reg>2"
+  [(set_attr "predicable" "no")])
+
+(define_insn "*smax<mode>3_neon"
+  [(set (match_operand:VDQ 0 "s_register_operand" "=w")
+	(smax:VDQ (match_operand:VDQ 1 "s_register_operand" "w")
+		  (match_operand:VDQ 2 "s_register_operand" "w")))]
+  "TARGET_NEON"
+  "vmax.<V_s_elem>\t%<V_reg>0, %<V_reg>1, %<V_reg>2"
+  [(set_attr "predicable" "no")])
+
+(define_insn "ashl<mode>3"
+  [(set (match_operand:VDQI 0 "s_register_operand" "=w")
+       (ashift:VDQI (match_operand:VDQI 1 "s_register_operand" "w")
+                    (match_operand:VDQI 2 "s_register_operand" "w")))]
+  "TARGET_NEON"
+  "vshl.<V_s_elem>\t%<V_reg>0, %<V_reg>1, %<V_reg>2"
+  [(set_attr "predicable" "no")])
+
+; Used for implementing logical shift-right, which is a left-shift by a negative
+; amount, with signed operands. This is essentially the same as ashl<mode>3
+; above, but using an unspec in case GCC tries anything tricky with negative
+; shift amounts.
+
+(define_insn "ashl<mode>3_signed"
+  [(set (match_operand:VDQI 0 "s_register_operand" "=w")
+	(unspec:VDQI [(match_operand:VDQI 1 "s_register_operand" "w")
+		      (match_operand:VDQI 2 "s_register_operand" "w")]
+		     UNSPEC_ASHIFT_SIGNED))]
+  "TARGET_NEON"
+  "vshl.<V_s_elem>\t%<V_reg>0, %<V_reg>1, %<V_reg>2"
+  [(set_attr "predicable" "no")])
+
+; Used for implementing logical shift-right, which is a left-shift by a negative
+; amount, with unsigned operands.
+
+(define_insn "ashl<mode>3_unsigned"
+  [(set (match_operand:VDQI 0 "s_register_operand" "=w")
+	(unspec:VDQI [(match_operand:VDQI 1 "s_register_operand" "w")
+		      (match_operand:VDQI 2 "s_register_operand" "w")]
+		     UNSPEC_ASHIFT_UNSIGNED))]
+  "TARGET_NEON"
+  "vshl.<V_u_elem>\t%<V_reg>0, %<V_reg>1, %<V_reg>2"
+  [(set_attr "predicable" "no")])
+
+(define_expand "ashr<mode>3"
+  [(set (match_operand:VDQI 0 "s_register_operand" "")
+	(ashiftrt:VDQI (match_operand:VDQI 1 "s_register_operand" "")
+		       (match_operand:VDQI 2 "s_register_operand" "")))]
+  "TARGET_NEON"
+{
+  rtx neg = gen_reg_rtx (<MODE>mode);
+
+  emit_insn (gen_neg<mode>2 (neg, operands[2]));
+  emit_insn (gen_ashl<mode>3_signed (operands[0], operands[1], neg));
+
+  DONE;
+})
+
+(define_expand "lshr<mode>3"
+  [(set (match_operand:VDQI 0 "s_register_operand" "")
+	(lshiftrt:VDQI (match_operand:VDQI 1 "s_register_operand" "")
+		       (match_operand:VDQI 2 "s_register_operand" "")))]
+  "TARGET_NEON"
+{
+  rtx neg = gen_reg_rtx (<MODE>mode);
+
+  emit_insn (gen_neg<mode>2 (neg, operands[2]));
+  emit_insn (gen_ashl<mode>3_unsigned (operands[0], operands[1], neg));
+
+  DONE;
+})
+
+;; Widening operations
+
+;; FIXME: I'm not sure if sign/zero_extend are legal to use on vector modes.
+
+(define_insn "widen_ssum<mode>3"
+  [(set (match_operand:<V_widen> 0 "s_register_operand" "=w")
+	(plus:<V_widen> (sign_extend:<V_widen>
+			  (match_operand:VW 1 "s_register_operand" "%w"))
+		        (match_operand:<V_widen> 2 "s_register_operand" "w")))]
+  "TARGET_NEON"
+  "vaddw.<V_s_elem>\t%q0, %q2, %P1"
+  [(set_attr "predicable" "no")])
+
+(define_insn "widen_usum<mode>3"
+  [(set (match_operand:<V_widen> 0 "s_register_operand" "=w")
+	(plus:<V_widen> (zero_extend:<V_widen>
+			  (match_operand:VW 1 "s_register_operand" "%w"))
+		        (match_operand:<V_widen> 2 "s_register_operand" "w")))]
+  "TARGET_NEON"
+  "vaddw.<V_u_elem>\t%q0, %q2, %P1"
+  [(set_attr "predicable" "no")])
+
+;; VEXT can be used to synthesize coarse whole-vector shifts with 8-bit
+;; shift-count granularity. That's good enough for the middle-end's current
+;; needs.
+
+(define_expand "vec_shr_<mode>"
+  [(match_operand:VDQ 0 "s_register_operand" "")
+   (match_operand:VDQ 1 "s_register_operand" "")
+   (match_operand:SI 2 "const_multiple_of_8_operand" "")]
+  "TARGET_NEON"
+{
+  rtx zero_reg;
+  HOST_WIDE_INT num_bits = INTVAL (operands[2]);
+  const int width = GET_MODE_BITSIZE (<MODE>mode);
+  const enum machine_mode bvecmode = (width == 128) ? V16QImode : V8QImode;
+  rtx (*gen_ext) (rtx, rtx, rtx, rtx) =
+    (width == 128) ? gen_neon_vextv16qi : gen_neon_vextv8qi;
+
+  if (num_bits == width)
+    {
+      emit_move_insn (operands[0], operands[1]);
+      DONE;
+    }
+  
+  zero_reg = force_reg (bvecmode, CONST0_RTX (bvecmode));
+  operands[0] = gen_lowpart (bvecmode, operands[0]);
+  operands[1] = gen_lowpart (bvecmode, operands[1]);
+  
+  emit_insn (gen_ext (operands[0], operands[1], zero_reg,
+		      GEN_INT (num_bits / BITS_PER_UNIT)));
+  DONE;
+})
+
+(define_expand "vec_shl_<mode>"
+  [(match_operand:VDQ 0 "s_register_operand" "")
+   (match_operand:VDQ 1 "s_register_operand" "")
+   (match_operand:SI 2 "const_multiple_of_8_operand" "")]
+  "TARGET_NEON"
+{
+  rtx zero_reg;
+  HOST_WIDE_INT num_bits = INTVAL (operands[2]);
+  const int width = GET_MODE_BITSIZE (<MODE>mode);
+  const enum machine_mode bvecmode = (width == 128) ? V16QImode : V8QImode;
+  rtx (*gen_ext) (rtx, rtx, rtx, rtx) =
+    (width == 128) ? gen_neon_vextv16qi : gen_neon_vextv8qi;
+  
+  if (num_bits == 0)
+    {
+      emit_move_insn (operands[0], CONST0_RTX (<MODE>mode));
+      DONE;
+    }
+  
+  num_bits = width - num_bits;
+  
+  zero_reg = force_reg (bvecmode, CONST0_RTX (bvecmode));
+  operands[0] = gen_lowpart (bvecmode, operands[0]);
+  operands[1] = gen_lowpart (bvecmode, operands[1]);
+  
+  emit_insn (gen_ext (operands[0], zero_reg, operands[1],
+		      GEN_INT (num_bits / BITS_PER_UNIT)));
+  DONE;
+})
+
+(define_insn "neon_vextv8qi"
+  [(set (match_operand:V8QI 0 "s_register_operand" "=w")
+        (unspec:V8QI [(match_operand:V8QI 1 "s_register_operand" "w")
+		      (match_operand:V8QI 2 "s_register_operand" "w")
+		      (match_operand:SI 3 "immediate_operand" "i")]
+                     UNSPEC_VEXT))]
+  "TARGET_NEON"
+  "vext.8\t%P0, %P1, %P2, %3"
+  [(set_attr "predicable" "no")])
+
+(define_insn "neon_vextv16qi"
+  [(set (match_operand:V16QI 0 "s_register_operand" "=w")
+        (unspec:V16QI [(match_operand:V16QI 1 "s_register_operand" "w")
+		       (match_operand:V16QI 2 "s_register_operand" "w")
+		       (match_operand:SI 3 "immediate_operand" "i")]
+                      UNSPEC_VEXT))]
+  "TARGET_NEON"
+  "vext.8\t%q0, %q1, %q2, %3"
+  [(set_attr "predicable" "no")])
+
+;; Reduction operations
+
+; We have pairwise addition with wraparound semantics: we don't need to define
+; reduc_splus_<mode> too.
+
+(define_expand "reduc_uplus_<mode>"
+  [(match_operand:VD 0 "s_register_operand" "")
+   (match_operand:VD 1 "s_register_operand" "")]
+  "TARGET_NEON"
+{
+  neon_pairwise_reduce (operands[0], operands[1], <MODE>mode,
+			&gen_neon_vpadd<mode>);
+  DONE;
+})
+
+(define_expand "reduc_smin_<mode>"
+  [(match_operand:VD 0 "s_register_operand" "")
+   (match_operand:VD 1 "s_register_operand" "")]
+  "TARGET_NEON"
+{
+  neon_pairwise_reduce (operands[0], operands[1], <MODE>mode,
+			&gen_neon_vpsmin<mode>);
+  DONE;
+})
+
+(define_expand "reduc_smax_<mode>"
+  [(match_operand:VD 0 "s_register_operand" "")
+   (match_operand:VD 1 "s_register_operand" "")]
+  "TARGET_NEON"
+{
+  neon_pairwise_reduce (operands[0], operands[1], <MODE>mode,
+			&gen_neon_vpsmax<mode>);
+  DONE;
+})
+
+(define_expand "reduc_umin_<mode>"
+  [(match_operand:VDI 0 "s_register_operand" "")
+   (match_operand:VDI 1 "s_register_operand" "")]
+  "TARGET_NEON"
+{
+  neon_pairwise_reduce (operands[0], operands[1], <MODE>mode,
+			&gen_neon_vpumin<mode>);
+  DONE;
+})
+
+(define_expand "reduc_umax_<mode>"
+  [(match_operand:VDI 0 "s_register_operand" "")
+   (match_operand:VDI 1 "s_register_operand" "")]
+  "TARGET_NEON"
+{
+  neon_pairwise_reduce (operands[0], operands[1], <MODE>mode,
+			&gen_neon_vpumax<mode>);
+  DONE;
+})
+
+(define_insn "neon_vpadd<mode>"
+  [(set (match_operand:VD 0 "s_register_operand" "=w")
+	(unspec:VD [(match_operand:VD 1 "s_register_operand" "w")
+		    (match_operand:VD 2 "s_register_operand" "w")]
+                   UNSPEC_VPADD))]
+  "TARGET_NEON"
+  "vpadd.<V_if_elem>\t%P0, %P1, %P2"
+  [(set_attr "predicable" "no")])
+
+(define_insn "neon_vpsmin<mode>"
+  [(set (match_operand:VD 0 "s_register_operand" "=w")
+	(unspec:VD [(match_operand:VD 1 "s_register_operand" "w")
+		    (match_operand:VD 2 "s_register_operand" "w")]
+                   UNSPEC_VPSMIN))]
+  "TARGET_NEON"
+  "vpmin.<V_s_elem>\t%P0, %P1, %P2"
+  [(set_attr "predicable" "no")])
+
+(define_insn "neon_vpsmax<mode>"
+  [(set (match_operand:VD 0 "s_register_operand" "=w")
+	(unspec:VD [(match_operand:VD 1 "s_register_operand" "w")
+		    (match_operand:VD 2 "s_register_operand" "w")]
+                   UNSPEC_VPSMAX))]
+  "TARGET_NEON"
+  "vpmax.<V_s_elem>\t%P0, %P1, %P2"
+  [(set_attr "predicable" "no")])
+
+(define_insn "neon_vpumin<mode>"
+  [(set (match_operand:VDI 0 "s_register_operand" "=w")
+	(unspec:VDI [(match_operand:VDI 1 "s_register_operand" "w")
+		     (match_operand:VDI 2 "s_register_operand" "w")]
+                   UNSPEC_VPUMIN))]
+  "TARGET_NEON"
+  "vpmin.<V_u_elem>\t%P0, %P1, %P2"
+  [(set_attr "predicable" "no")])
+
+(define_insn "neon_vpumax<mode>"
+  [(set (match_operand:VDI 0 "s_register_operand" "=w")
+	(unspec:VDI [(match_operand:VDI 1 "s_register_operand" "w")
+		     (match_operand:VDI 2 "s_register_operand" "w")]
+                   UNSPEC_VPUMAX))]
+  "TARGET_NEON"
+  "vpmax.<V_u_elem>\t%P0, %P1, %P2"
+  [(set_attr "predicable" "no")])
+
+;; Saturating arithmetic
+
+; NOTE: Neon supports many more saturating variants of instructions than the
+; following, but these are all GCC currently understands.
+; FIXME: Actually, GCC doesn't know how to create saturating add/sub by itself
+; yet either, although these patterns may be used by intrinsics when they're
+; added.
+
+(define_insn "*ss_add<mode>_neon"
+  [(set (match_operand:VD 0 "s_register_operand" "=w")
+       (ss_plus:VD (match_operand:VD 1 "s_register_operand" "w")
+                   (match_operand:VD 2 "s_register_operand" "w")))]
+  "TARGET_NEON"
+  "vqadd.<V_s_elem>\t%P0, %P1, %P2"
+  [(set_attr "predicable" "no")])
+
+(define_insn "*us_add<mode>_neon"
+  [(set (match_operand:VD 0 "s_register_operand" "=w")
+       (us_plus:VD (match_operand:VD 1 "s_register_operand" "w")
+                   (match_operand:VD 2 "s_register_operand" "w")))]
+  "TARGET_NEON"
+  "vqadd.<V_u_elem>\t%P0, %P1, %P2"
+  [(set_attr "predicable" "no")])
+
+(define_insn "*ss_sub<mode>_neon"
+  [(set (match_operand:VD 0 "s_register_operand" "=w")
+       (ss_minus:VD (match_operand:VD 1 "s_register_operand" "w")
+                    (match_operand:VD 2 "s_register_operand" "w")))]
+  "TARGET_NEON"
+  "vqsub.<V_s_elem>\t%P0, %P1, %P2"
+  [(set_attr "predicable" "no")])
+
+(define_insn "*us_sub<mode>_neon"
+  [(set (match_operand:VD 0 "s_register_operand" "=w")
+       (us_minus:VD (match_operand:VD 1 "s_register_operand" "w")
+                    (match_operand:VD 2 "s_register_operand" "w")))]
+  "TARGET_NEON"
+  "vqsub.<V_u_elem>\t%P0, %P1, %P2"
+  [(set_attr "predicable" "no")])
+
+; FIXME: These instructions aren't supported in GCC 4.1, but are documented
+; for the current trunk. Uncomment when this code is merged to a GCC version
+; which supports them.
+
+;(define_insn "*ss_neg<mode>_neon"
+;  [(set (match_operand:VD 0 "s_register_operand" "=w")
+;      (ss_neg:VD 1 (match_operand:VD 1 "s_register_operand" "w")))]
+;  "TARGET_NEON"
+;  "vqneg.<V_s_elem>\t%P0, %P1"
+;  [(set_attr "predicable" "no")])
+
+;(define_insn "*ss_ashift<mode>_neon"
+;  [(set (match_operand:VD 0 "s_register_operand" "=w")
+;      (ss_ashift:VD (match_operand:VD 1 "s_register_operand" "w")
+;                    (match_operand:VD 2 "s_register_operand" "w")))]
+;  "TARGET_NEON"
+;  "vqshl.<V_s_elem>\t%P0, %P1, %P2"
+;  [(set_attr "predicable" "no")])
diff --git a/gcc/config/arm/predicates.md b/gcc/config/arm/predicates.md
index 1918cf7a483..15ba8635078 100644
--- a/gcc/config/arm/predicates.md
+++ b/gcc/config/arm/predicates.md
@@ -470,3 +470,37 @@
        (match_test "((unsigned HOST_WIDE_INT) INTVAL (op)) < 64")))
 
 
+;; Neon predicates
+
+(define_predicate "const_multiple_of_8_operand"
+  (match_code "const_int")
+{
+  unsigned HOST_WIDE_INT val = INTVAL (op);
+  return (val & 7) == 0;
+})
+
+(define_predicate "imm_for_neon_mov_operand"
+  (match_code "const_vector")
+{
+  return neon_immediate_valid_for_move (op, mode, NULL, NULL);
+})
+
+(define_predicate "imm_for_neon_logic_operand"
+  (match_code "const_vector")
+{
+  return neon_immediate_valid_for_logic (op, mode, 0, NULL, NULL);
+})
+
+(define_predicate "imm_for_neon_inv_logic_operand"
+  (match_code "const_vector")
+{
+  return neon_immediate_valid_for_logic (op, mode, 1, NULL, NULL);
+})
+
+(define_predicate "neon_logic_op2"
+  (ior (match_operand 0 "imm_for_neon_logic_operand")
+       (match_operand 0 "s_register_operand")))
+
+(define_predicate "neon_inv_logic_op2"
+  (ior (match_operand 0 "imm_for_neon_inv_logic_operand")
+       (match_operand 0 "s_register_operand")))
diff --git a/gcc/config/arm/t-arm b/gcc/config/arm/t-arm
index 172740722ca..cde00ee3407 100644
--- a/gcc/config/arm/t-arm
+++ b/gcc/config/arm/t-arm
@@ -9,8 +9,10 @@ MD_INCLUDES= 	$(srcdir)/config/arm/arm-tune.md \
 		$(srcdir)/config/arm/arm926ejs.md \
 		$(srcdir)/config/arm/cirrus.md \
 		$(srcdir)/config/arm/fpa.md \
+		$(srcdir)/config/arm/vec-common.md \
 		$(srcdir)/config/arm/iwmmxt.md \
 		$(srcdir)/config/arm/vfp.md \
+		$(srcdir)/config/arm/neon.md \
 		$(srcdir)/config/arm/thumb2.md
 
 s-config s-conditions s-flags s-codes s-constants s-emit s-recog s-preds \
diff --git a/gcc/config/arm/vec-common.md b/gcc/config/arm/vec-common.md
new file mode 100644
index 00000000000..53ed3aee41b
--- /dev/null
+++ b/gcc/config/arm/vec-common.md
@@ -0,0 +1,101 @@
+;; Machine Description for shared bits common to IWMMXT and Neon.
+;; Copyright (C) 2006 Free Software Foundation, Inc.
+;; Written by CodeSourcery.
+;;
+;; This file is part of GCC.
+;;
+;; GCC is free software; you can redistribute it and/or modify it
+;; under the terms of the GNU General Public License as published by
+;; the Free Software Foundation; either version 2, or (at your option)
+;; any later version.
+;;
+;; GCC is distributed in the hope that it will be useful, but
+;; WITHOUT ANY WARRANTY; without even the implied warranty of
+;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+;; General Public License for more details.
+;;
+;; You should have received a copy of the GNU General Public License
+;; along with GCC; see the file COPYING.  If not, write to the Free
+;; Software Foundation, 51 Franklin Street, Fifth Floor, Boston, MA
+;; 02110-1301, USA.
+
+;; Vector Moves
+
+;; All integer and float modes supported by Neon and IWMMXT.
+(define_mode_macro VALL [V2SI V4HI V8QI V2SF V4SI V8HI V16QI V4SF])
+
+;; All integer modes supported by Neon and IWMMXT
+(define_mode_macro VINT [V2SI V4HI V8QI V4SI V8HI V16QI])
+
+(define_expand "mov<mode>"
+  [(set (match_operand:VALL 0 "nonimmediate_operand" "")
+	(match_operand:VALL 1 "general_operand" ""))]
+  "TARGET_NEON
+   || (TARGET_REALLY_IWMMXT && VALID_IWMMXT_REG_MODE (<MODE>mode))"
+{
+})
+
+;; Vector arithmetic. Expanders are blank, then unnamed insns implement
+;; patterns seperately for IWMMXT and Neon.
+
+(define_expand "add<mode>3"
+  [(set (match_operand:VALL 0 "s_register_operand" "")
+        (plus:VALL (match_operand:VALL 1 "s_register_operand" "")
+                   (match_operand:VALL 2 "s_register_operand" "")))]
+  "TARGET_NEON
+   || (TARGET_REALLY_IWMMXT && VALID_IWMMXT_REG_MODE (<MODE>mode))"
+{
+})
+
+(define_expand "sub<mode>3"
+  [(set (match_operand:VALL 0 "s_register_operand" "")
+        (minus:VALL (match_operand:VALL 1 "s_register_operand" "")
+                    (match_operand:VALL 2 "s_register_operand" "")))]
+  "TARGET_NEON
+   || (TARGET_REALLY_IWMMXT && VALID_IWMMXT_REG_MODE (<MODE>mode))"
+{
+})
+
+(define_expand "mul<mode>3"
+  [(set (match_operand:VALL 0 "s_register_operand" "")
+        (mult:VALL (match_operand:VALL 1 "s_register_operand" "")
+                   (match_operand:VALL 2 "s_register_operand" "")))]
+  "TARGET_NEON || (<MODE>mode == V4HImode && TARGET_REALLY_IWMMXT)"
+{
+})
+
+(define_expand "smin<mode>3"
+  [(set (match_operand:VINT 0 "s_register_operand" "")
+	(smin:VINT (match_operand:VINT 1 "s_register_operand" "")
+        	   (match_operand:VINT 2 "s_register_operand" "")))]
+  "TARGET_NEON
+   || (TARGET_REALLY_IWMMXT && VALID_IWMMXT_REG_MODE (<MODE>mode))"
+{
+})
+
+(define_expand "umin<mode>3"
+  [(set (match_operand:VINT 0 "s_register_operand" "")
+	(umin:VINT (match_operand:VINT 1 "s_register_operand" "")
+        	   (match_operand:VINT 2 "s_register_operand" "")))]
+  "TARGET_NEON
+   || (TARGET_REALLY_IWMMXT && VALID_IWMMXT_REG_MODE (<MODE>mode))"
+{
+})
+
+(define_expand "smax<mode>3"
+  [(set (match_operand:VINT 0 "s_register_operand" "")
+	(smax:VINT (match_operand:VINT 1 "s_register_operand" "")
+        	   (match_operand:VINT 2 "s_register_operand" "")))]
+  "TARGET_NEON
+   || (TARGET_REALLY_IWMMXT && VALID_IWMMXT_REG_MODE (<MODE>mode))"
+{
+})
+
+(define_expand "umax<mode>3"
+  [(set (match_operand:VINT 0 "s_register_operand" "")
+	(umax:VINT (match_operand:VINT 1 "s_register_operand" "")
+        	   (match_operand:VINT 2 "s_register_operand" "")))]
+  "TARGET_NEON
+   || (TARGET_REALLY_IWMMXT && VALID_IWMMXT_REG_MODE (<MODE>mode))"
+{
+})
diff --git a/gcc/config/arm/vfp.md b/gcc/config/arm/vfp.md
index 3f0d9dc13e1..7b317424e49 100644
--- a/gcc/config/arm/vfp.md
+++ b/gcc/config/arm/vfp.md
@@ -21,7 +21,7 @@
 
 ;; Additional register numbers
 (define_constants
-  [(VFPCC_REGNUM 95)]
+  [(VFPCC_REGNUM 127)]
 )
 
 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
@@ -121,8 +121,8 @@
 ;; ??? For now do not allow loading constants into vfp regs.  This causes
 ;; problems because small constants get converted into adds.
 (define_insn "*arm_movsi_vfp"
-  [(set (match_operand:SI 0 "nonimmediate_operand" "=r,r,r,r ,m,*w,r,*w,*w, *Uv")
-      (match_operand:SI 1 "general_operand"	   "rI,K,N,mi,r,r,*w,*w,*Uvi,*w"))]
+  [(set (match_operand:SI 0 "nonimmediate_operand" "=r,r,r,r ,m,*t,r,*t,*t, *Uv")
+      (match_operand:SI 1 "general_operand"	   "rI,K,N,mi,r,r,*t,*t,*Uvi,*t"))]
   "TARGET_ARM && TARGET_VFP && TARGET_HARD_FLOAT
    && (   s_register_operand (operands[0], SImode)
        || s_register_operand (operands[1], SImode))"
@@ -158,8 +158,8 @@
 )
 
 (define_insn "*thumb2_movsi_vfp"
-  [(set (match_operand:SI 0 "nonimmediate_operand" "=r,r,r,r,m,*w,r,*w,*w, *Uv")
-      (match_operand:SI 1 "general_operand"	   "rI,K,N,mi,r,r,*w,*w,*Uvi,*w"))]
+  [(set (match_operand:SI 0 "nonimmediate_operand" "=r,r,r,r,m,*t,r,*t,*t, *Uv")
+      (match_operand:SI 1 "general_operand"	   "rI,K,N,mi,r,r,*t,*t,*Uvi,*t"))]
   "TARGET_THUMB2 && TARGET_VFP && TARGET_HARD_FLOAT
    && (   s_register_operand (operands[0], SImode)
        || s_register_operand (operands[1], SImode))"
@@ -262,8 +262,8 @@
 ;; preferable to loading the value via integer registers.
 
 (define_insn "*movsf_vfp"
-  [(set (match_operand:SF 0 "nonimmediate_operand" "=w,?r,w  ,Uv,r ,m,w,r")
-	(match_operand:SF 1 "general_operand"	   " ?r,w,UvE,w, mE,r,w,r"))]
+  [(set (match_operand:SF 0 "nonimmediate_operand" "=t,?r,t ,t  ,Uv,r ,m,t,r")
+	(match_operand:SF 1 "general_operand"	   " ?r,t,Dv,UvE,t, mE,r,t,r"))]
   "TARGET_ARM && TARGET_HARD_FLOAT && TARGET_VFP
    && (   s_register_operand (operands[0], SFmode)
        || s_register_operand (operands[1], SFmode))"
@@ -274,29 +274,31 @@
       return \"fmsr%?\\t%0, %1\";
     case 1:
       return \"fmrs%?\\t%0, %1\";
-    case 2: case 3:
+    case 2:
+      return \"fconsts%?\\t%0, #%G1\";
+    case 3: case 4:
       return output_move_vfp (operands);
-    case 4:
-      return \"ldr%?\\t%0, %1\\t%@ float\";
     case 5:
-      return \"str%?\\t%1, %0\\t%@ float\";
+      return \"ldr%?\\t%0, %1\\t%@ float\";
     case 6:
-      return \"fcpys%?\\t%0, %1\";
+      return \"str%?\\t%1, %0\\t%@ float\";
     case 7:
+      return \"fcpys%?\\t%0, %1\";
+    case 8:
       return \"mov%?\\t%0, %1\\t%@ float\";
     default:
       gcc_unreachable ();
     }
   "
   [(set_attr "predicable" "yes")
-   (set_attr "type" "r_2_f,f_2_r,ffarith,*,f_loads,f_stores,load1,store1")
-   (set_attr "pool_range" "*,*,1020,*,4096,*,*,*")
-   (set_attr "neg_pool_range" "*,*,1008,*,4080,*,*,*")]
+   (set_attr "type" "r_2_f,f_2_r,farith,ffarith,*,f_loads,f_stores,load1,store1")
+   (set_attr "pool_range" "*,*,*,1020,*,4096,*,*,*")
+   (set_attr "neg_pool_range" "*,*,*,1008,*,4080,*,*,*")]
 )
 
 (define_insn "*thumb2_movsf_vfp"
-  [(set (match_operand:SF 0 "nonimmediate_operand" "=w,?r,w  ,Uv,r ,m,w,r")
-	(match_operand:SF 1 "general_operand"	   " ?r,w,UvE,w, mE,r,w,r"))]
+  [(set (match_operand:SF 0 "nonimmediate_operand" "=t,?r,t ,t  ,Uv,r ,m,t,r")
+	(match_operand:SF 1 "general_operand"	   " ?r,t,Dv,UvE,t, mE,r,t,r"))]
   "TARGET_THUMB2 && TARGET_HARD_FLOAT && TARGET_VFP
    && (   s_register_operand (operands[0], SFmode)
        || s_register_operand (operands[1], SFmode))"
@@ -307,32 +309,34 @@
       return \"fmsr%?\\t%0, %1\";
     case 1:
       return \"fmrs%?\\t%0, %1\";
-    case 2: case 3:
+    case 2:
+      return \"fconsts%?\\t%0, #%G1\";
+    case 3: case 4:
       return output_move_vfp (operands);
-    case 4:
-      return \"ldr%?\\t%0, %1\\t%@ float\";
     case 5:
-      return \"str%?\\t%1, %0\\t%@ float\";
+      return \"ldr%?\\t%0, %1\\t%@ float\";
     case 6:
-      return \"fcpys%?\\t%0, %1\";
+      return \"str%?\\t%1, %0\\t%@ float\";
     case 7:
+      return \"fcpys%?\\t%0, %1\";
+    case 8:
       return \"mov%?\\t%0, %1\\t%@ float\";
     default:
       gcc_unreachable ();
     }
   "
   [(set_attr "predicable" "yes")
-   (set_attr "type" "r_2_f,f_2_r,ffarith,*,f_load,f_store,load1,store1")
-   (set_attr "pool_range" "*,*,1020,*,4092,*,*,*")
-   (set_attr "neg_pool_range" "*,*,1008,*,0,*,*,*")]
+   (set_attr "type" "r_2_f,f_2_r,farith,ffarith,*,f_load,f_store,load1,store1")
+   (set_attr "pool_range" "*,*,*,1020,*,4092,*,*,*")
+   (set_attr "neg_pool_range" "*,*,*,1008,*,0,*,*,*")]
 )
 
 
 ;; DFmode moves
 
 (define_insn "*movdf_vfp"
-  [(set (match_operand:DF 0 "nonimmediate_soft_df_operand" "=w,?r,r, m,w  ,Uv,w,r")
-	(match_operand:DF 1 "soft_df_operand"		   " ?r,w,mF,r,UvF,w, w,r"))]
+  [(set (match_operand:DF 0 "nonimmediate_soft_df_operand" "=w,?r,w ,r, m,w  ,Uv,w,r")
+	(match_operand:DF 1 "soft_df_operand"		   " ?r,w,Dv,mF,r,UvF,w, w,r"))]
   "TARGET_ARM && TARGET_HARD_FLOAT && TARGET_VFP
    && (   register_operand (operands[0], DFmode)
        || register_operand (operands[1], DFmode))"
@@ -344,28 +348,30 @@
 	return \"fmdrr%?\\t%P0, %Q1, %R1\";
       case 1:
 	return \"fmrrd%?\\t%Q0, %R0, %P1\";
-      case 2: case 3:
+      case 2:
+        return \"fconstd%?\\t%P0, #%G1\";
+      case 3: case 4:
 	return output_move_double (operands);
-      case 4: case 5:
+      case 5: case 6:
 	return output_move_vfp (operands);
-      case 6:
-	return \"fcpyd%?\\t%P0, %P1\";
       case 7:
+	return \"fcpyd%?\\t%P0, %P1\";
+      case 8:
         return \"#\";
       default:
 	gcc_unreachable ();
       }
     }
   "
-  [(set_attr "type" "r_2_f,f_2_r,ffarith,*,load2,store2,f_loadd,f_stored")
-   (set_attr "length" "4,4,8,8,4,4,4,8")
-   (set_attr "pool_range" "*,*,1020,*,1020,*,*,*")
-   (set_attr "neg_pool_range" "*,*,1008,*,1008,*,*,*")]
+  [(set_attr "type" "r_2_f,f_2_r,farith,ffarith,*,load2,store2,f_loadd,f_stored")
+   (set_attr "length" "4,4,4,8,8,4,4,4,8")
+   (set_attr "pool_range" "*,*,*,1020,*,1020,*,*,*")
+   (set_attr "neg_pool_range" "*,*,*,1008,*,1008,*,*,*")]
 )
 
 (define_insn "*thumb2_movdf_vfp"
-  [(set (match_operand:DF 0 "nonimmediate_soft_df_operand" "=w,?r,r, m,w  ,Uv,w,r")
-	(match_operand:DF 1 "soft_df_operand"		   " ?r,w,mF,r,UvF,w, w,r"))]
+  [(set (match_operand:DF 0 "nonimmediate_soft_df_operand" "=w,?r,w ,r, m,w  ,Uv,w,r")
+	(match_operand:DF 1 "soft_df_operand"		   " ?r,w,Dv,mF,r,UvF,w, w,r"))]
   "TARGET_THUMB2 && TARGET_HARD_FLOAT && TARGET_VFP"
   "*
   {
@@ -375,33 +381,35 @@
 	return \"fmdrr%?\\t%P0, %Q1, %R1\";
       case 1:
 	return \"fmrrd%?\\t%Q0, %R0, %P1\";
-      case 2: case 3: case 7:
+      case 2:
+	return \"fconstd%?\\t%P0, #%G1\";
+      case 3: case 4: case 8:
 	return output_move_double (operands);
-      case 4: case 5:
+      case 5: case 6:
 	return output_move_vfp (operands);
-      case 6:
+      case 7:
 	return \"fcpyd%?\\t%P0, %P1\";
       default:
 	abort ();
       }
     }
   "
-  [(set_attr "type" "r_2_f,f_2_r,ffarith,*,load2,store2,f_load,f_store")
-   (set_attr "length" "4,4,8,8,4,4,4,8")
-   (set_attr "pool_range" "*,*,4096,*,1020,*,*,*")
-   (set_attr "neg_pool_range" "*,*,0,*,1008,*,*,*")]
+  [(set_attr "type" "r_2_f,f_2_r,farith,ffarith,*,load2,store2,f_load,f_store")
+   (set_attr "length" "4,4,4,8,8,4,4,4,8")
+   (set_attr "pool_range" "*,*,*,4096,*,1020,*,*,*")
+   (set_attr "neg_pool_range" "*,*,*,0,*,1008,*,*,*")]
 )
 
 
 ;; Conditional move patterns
 
 (define_insn "*movsfcc_vfp"
-  [(set (match_operand:SF   0 "s_register_operand" "=w,w,w,w,w,w,?r,?r,?r")
+  [(set (match_operand:SF   0 "s_register_operand" "=t,t,t,t,t,t,?r,?r,?r")
 	(if_then_else:SF
 	  (match_operator   3 "arm_comparison_operator"
 	    [(match_operand 4 "cc_register" "") (const_int 0)])
-	  (match_operand:SF 1 "s_register_operand" "0,w,w,0,?r,?r,0,w,w")
-	  (match_operand:SF 2 "s_register_operand" "w,0,w,?r,0,?r,w,0,w")))]
+	  (match_operand:SF 1 "s_register_operand" "0,t,t,0,?r,?r,0,t,t")
+	  (match_operand:SF 2 "s_register_operand" "t,0,t,?r,0,?r,t,0,t")))]
   "TARGET_ARM && TARGET_HARD_FLOAT && TARGET_VFP"
   "@
    fcpys%D3\\t%0, %2
@@ -419,12 +427,12 @@
 )
 
 (define_insn "*thumb2_movsfcc_vfp"
-  [(set (match_operand:SF   0 "s_register_operand" "=w,w,w,w,w,w,?r,?r,?r")
+  [(set (match_operand:SF   0 "s_register_operand" "=t,t,t,t,t,t,?r,?r,?r")
 	(if_then_else:SF
 	  (match_operator   3 "arm_comparison_operator"
 	    [(match_operand 4 "cc_register" "") (const_int 0)])
-	  (match_operand:SF 1 "s_register_operand" "0,w,w,0,?r,?r,0,w,w")
-	  (match_operand:SF 2 "s_register_operand" "w,0,w,?r,0,?r,w,0,w")))]
+	  (match_operand:SF 1 "s_register_operand" "0,t,t,0,?r,?r,0,t,t")
+	  (match_operand:SF 2 "s_register_operand" "t,0,t,?r,0,?r,t,0,t")))]
   "TARGET_THUMB2 && TARGET_HARD_FLOAT && TARGET_VFP"
   "@
    it\\t%D3\;fcpys%D3\\t%0, %2
@@ -491,8 +499,8 @@
 ;; Sign manipulation functions
 
 (define_insn "*abssf2_vfp"
-  [(set (match_operand:SF	  0 "s_register_operand" "=w")
-	(abs:SF (match_operand:SF 1 "s_register_operand" "w")))]
+  [(set (match_operand:SF	  0 "s_register_operand" "=t")
+	(abs:SF (match_operand:SF 1 "s_register_operand" "t")))]
   "TARGET_32BIT && TARGET_HARD_FLOAT && TARGET_VFP"
   "fabss%?\\t%0, %1"
   [(set_attr "predicable" "yes")
@@ -509,8 +517,8 @@
 )
 
 (define_insn "*negsf2_vfp"
-  [(set (match_operand:SF	  0 "s_register_operand" "=w,?r")
-	(neg:SF (match_operand:SF 1 "s_register_operand" "w,r")))]
+  [(set (match_operand:SF	  0 "s_register_operand" "=t,?r")
+	(neg:SF (match_operand:SF 1 "s_register_operand" "t,r")))]
   "TARGET_32BIT && TARGET_HARD_FLOAT && TARGET_VFP"
   "@
    fnegs%?\\t%0, %1
@@ -569,9 +577,9 @@
 ;; Arithmetic insns
 
 (define_insn "*addsf3_vfp"
-  [(set (match_operand:SF	   0 "s_register_operand" "=w")
-	(plus:SF (match_operand:SF 1 "s_register_operand" "w")
-		 (match_operand:SF 2 "s_register_operand" "w")))]
+  [(set (match_operand:SF	   0 "s_register_operand" "=t")
+	(plus:SF (match_operand:SF 1 "s_register_operand" "t")
+		 (match_operand:SF 2 "s_register_operand" "t")))]
   "TARGET_32BIT && TARGET_HARD_FLOAT && TARGET_VFP"
   "fadds%?\\t%0, %1, %2"
   [(set_attr "predicable" "yes")
@@ -590,9 +598,9 @@
 
 
 (define_insn "*subsf3_vfp"
-  [(set (match_operand:SF	    0 "s_register_operand" "=w")
-	(minus:SF (match_operand:SF 1 "s_register_operand" "w")
-		  (match_operand:SF 2 "s_register_operand" "w")))]
+  [(set (match_operand:SF	    0 "s_register_operand" "=t")
+	(minus:SF (match_operand:SF 1 "s_register_operand" "t")
+		  (match_operand:SF 2 "s_register_operand" "t")))]
   "TARGET_32BIT && TARGET_HARD_FLOAT && TARGET_VFP"
   "fsubs%?\\t%0, %1, %2"
   [(set_attr "predicable" "yes")
@@ -613,9 +621,9 @@
 ;; Division insns
 
 (define_insn "*divsf3_vfp"
-  [(set (match_operand:SF	  0 "s_register_operand" "+w")
-	(div:SF (match_operand:SF 1 "s_register_operand" "w")
-		(match_operand:SF 2 "s_register_operand" "w")))]
+  [(set (match_operand:SF	  0 "s_register_operand" "+t")
+	(div:SF (match_operand:SF 1 "s_register_operand" "t")
+		(match_operand:SF 2 "s_register_operand" "t")))]
   "TARGET_32BIT && TARGET_HARD_FLOAT && TARGET_VFP"
   "fdivs%?\\t%0, %1, %2"
   [(set_attr "predicable" "yes")
@@ -636,9 +644,9 @@
 ;; Multiplication insns
 
 (define_insn "*mulsf3_vfp"
-  [(set (match_operand:SF	   0 "s_register_operand" "+w")
-	(mult:SF (match_operand:SF 1 "s_register_operand" "w")
-		 (match_operand:SF 2 "s_register_operand" "w")))]
+  [(set (match_operand:SF	   0 "s_register_operand" "+t")
+	(mult:SF (match_operand:SF 1 "s_register_operand" "t")
+		 (match_operand:SF 2 "s_register_operand" "t")))]
   "TARGET_32BIT && TARGET_HARD_FLOAT && TARGET_VFP"
   "fmuls%?\\t%0, %1, %2"
   [(set_attr "predicable" "yes")
@@ -657,9 +665,9 @@
 
 
 (define_insn "*mulsf3negsf_vfp"
-  [(set (match_operand:SF		   0 "s_register_operand" "+w")
-	(mult:SF (neg:SF (match_operand:SF 1 "s_register_operand" "w"))
-		 (match_operand:SF	   2 "s_register_operand" "w")))]
+  [(set (match_operand:SF		   0 "s_register_operand" "+t")
+	(mult:SF (neg:SF (match_operand:SF 1 "s_register_operand" "t"))
+		 (match_operand:SF	   2 "s_register_operand" "t")))]
   "TARGET_32BIT && TARGET_HARD_FLOAT && TARGET_VFP"
   "fnmuls%?\\t%0, %1, %2"
   [(set_attr "predicable" "yes")
@@ -681,9 +689,9 @@
 
 ;; 0 = 1 * 2 + 0
 (define_insn "*mulsf3addsf_vfp"
-  [(set (match_operand:SF		    0 "s_register_operand" "=w")
-	(plus:SF (mult:SF (match_operand:SF 2 "s_register_operand" "w")
-			  (match_operand:SF 3 "s_register_operand" "w"))
+  [(set (match_operand:SF		    0 "s_register_operand" "=t")
+	(plus:SF (mult:SF (match_operand:SF 2 "s_register_operand" "t")
+			  (match_operand:SF 3 "s_register_operand" "t"))
 		 (match_operand:SF	    1 "s_register_operand" "0")))]
   "TARGET_32BIT && TARGET_HARD_FLOAT && TARGET_VFP"
   "fmacs%?\\t%0, %2, %3"
@@ -704,9 +712,9 @@
 
 ;; 0 = 1 * 2 - 0
 (define_insn "*mulsf3subsf_vfp"
-  [(set (match_operand:SF		     0 "s_register_operand" "=w")
-	(minus:SF (mult:SF (match_operand:SF 2 "s_register_operand" "w")
-			   (match_operand:SF 3 "s_register_operand" "w"))
+  [(set (match_operand:SF		     0 "s_register_operand" "=t")
+	(minus:SF (mult:SF (match_operand:SF 2 "s_register_operand" "t")
+			   (match_operand:SF 3 "s_register_operand" "t"))
 		  (match_operand:SF	     1 "s_register_operand" "0")))]
   "TARGET_32BIT && TARGET_HARD_FLOAT && TARGET_VFP"
   "fmscs%?\\t%0, %2, %3"
@@ -727,10 +735,10 @@
 
 ;; 0 = -(1 * 2) + 0
 (define_insn "*mulsf3negsfaddsf_vfp"
-  [(set (match_operand:SF		     0 "s_register_operand" "=w")
+  [(set (match_operand:SF		     0 "s_register_operand" "=t")
 	(minus:SF (match_operand:SF	     1 "s_register_operand" "0")
-		  (mult:SF (match_operand:SF 2 "s_register_operand" "w")
-			   (match_operand:SF 3 "s_register_operand" "w"))))]
+		  (mult:SF (match_operand:SF 2 "s_register_operand" "t")
+			   (match_operand:SF 3 "s_register_operand" "t"))))]
   "TARGET_32BIT && TARGET_HARD_FLOAT && TARGET_VFP"
   "fnmacs%?\\t%0, %2, %3"
   [(set_attr "predicable" "yes")
@@ -751,10 +759,10 @@
 
 ;; 0 = -(1 * 2) - 0
 (define_insn "*mulsf3negsfsubsf_vfp"
-  [(set (match_operand:SF		      0 "s_register_operand" "=w")
+  [(set (match_operand:SF		      0 "s_register_operand" "=t")
 	(minus:SF (mult:SF
-		    (neg:SF (match_operand:SF 2 "s_register_operand" "w"))
-		    (match_operand:SF	      3 "s_register_operand" "w"))
+		    (neg:SF (match_operand:SF 2 "s_register_operand" "t"))
+		    (match_operand:SF	      3 "s_register_operand" "t"))
 		  (match_operand:SF	      1 "s_register_operand" "0")))]
   "TARGET_32BIT && TARGET_HARD_FLOAT && TARGET_VFP"
   "fnmscs%?\\t%0, %2, %3"
@@ -779,7 +787,7 @@
 
 (define_insn "*extendsfdf2_vfp"
   [(set (match_operand:DF		   0 "s_register_operand" "=w")
-	(float_extend:DF (match_operand:SF 1 "s_register_operand" "w")))]
+	(float_extend:DF (match_operand:SF 1 "s_register_operand" "t")))]
   "TARGET_32BIT && TARGET_HARD_FLOAT && TARGET_VFP"
   "fcvtds%?\\t%P0, %1"
   [(set_attr "predicable" "yes")
@@ -787,7 +795,7 @@
 )
 
 (define_insn "*truncdfsf2_vfp"
-  [(set (match_operand:SF		   0 "s_register_operand" "=w")
+  [(set (match_operand:SF		   0 "s_register_operand" "=t")
 	(float_truncate:SF (match_operand:DF 1 "s_register_operand" "w")))]
   "TARGET_32BIT && TARGET_HARD_FLOAT && TARGET_VFP"
   "fcvtsd%?\\t%0, %P1"
@@ -796,8 +804,8 @@
 )
 
 (define_insn "*truncsisf2_vfp"
-  [(set (match_operand:SI		  0 "s_register_operand" "=w")
-	(fix:SI (fix:SF (match_operand:SF 1 "s_register_operand" "w"))))]
+  [(set (match_operand:SI		  0 "s_register_operand" "=t")
+	(fix:SI (fix:SF (match_operand:SF 1 "s_register_operand" "t"))))]
   "TARGET_32BIT && TARGET_HARD_FLOAT && TARGET_VFP"
   "ftosizs%?\\t%0, %1"
   [(set_attr "predicable" "yes")
@@ -805,7 +813,7 @@
 )
 
 (define_insn "*truncsidf2_vfp"
-  [(set (match_operand:SI		  0 "s_register_operand" "=w")
+  [(set (match_operand:SI		  0 "s_register_operand" "=t")
 	(fix:SI (fix:DF (match_operand:DF 1 "s_register_operand" "w"))))]
   "TARGET_32BIT && TARGET_HARD_FLOAT && TARGET_VFP"
   "ftosizd%?\\t%0, %P1"
@@ -815,8 +823,8 @@
 
 
 (define_insn "fixuns_truncsfsi2"
-  [(set (match_operand:SI		  0 "s_register_operand" "=w")
-	(unsigned_fix:SI (fix:SF (match_operand:SF 1 "s_register_operand" "w"))))]
+  [(set (match_operand:SI		  0 "s_register_operand" "=t")
+	(unsigned_fix:SI (fix:SF (match_operand:SF 1 "s_register_operand" "t"))))]
   "TARGET_32BIT && TARGET_HARD_FLOAT && TARGET_VFP"
   "ftouizs%?\\t%0, %1"
   [(set_attr "predicable" "yes")
@@ -824,8 +832,8 @@
 )
 
 (define_insn "fixuns_truncdfsi2"
-  [(set (match_operand:SI		  0 "s_register_operand" "=w")
-	(unsigned_fix:SI (fix:DF (match_operand:DF 1 "s_register_operand" "w"))))]
+  [(set (match_operand:SI		  0 "s_register_operand" "=t")
+	(unsigned_fix:SI (fix:DF (match_operand:DF 1 "s_register_operand" "t"))))]
   "TARGET_32BIT && TARGET_HARD_FLOAT && TARGET_VFP"
   "ftouizd%?\\t%0, %P1"
   [(set_attr "predicable" "yes")
@@ -834,8 +842,8 @@
 
 
 (define_insn "*floatsisf2_vfp"
-  [(set (match_operand:SF	    0 "s_register_operand" "=w")
-	(float:SF (match_operand:SI 1 "s_register_operand" "w")))]
+  [(set (match_operand:SF	    0 "s_register_operand" "=t")
+	(float:SF (match_operand:SI 1 "s_register_operand" "t")))]
   "TARGET_32BIT && TARGET_HARD_FLOAT && TARGET_VFP"
   "fsitos%?\\t%0, %1"
   [(set_attr "predicable" "yes")
@@ -844,7 +852,7 @@
 
 (define_insn "*floatsidf2_vfp"
   [(set (match_operand:DF	    0 "s_register_operand" "=w")
-	(float:DF (match_operand:SI 1 "s_register_operand" "w")))]
+	(float:DF (match_operand:SI 1 "s_register_operand" "t")))]
   "TARGET_32BIT && TARGET_HARD_FLOAT && TARGET_VFP"
   "fsitod%?\\t%P0, %1"
   [(set_attr "predicable" "yes")
@@ -853,8 +861,8 @@
 
 
 (define_insn "floatunssisf2"
-  [(set (match_operand:SF	    0 "s_register_operand" "=w")
-	(unsigned_float:SF (match_operand:SI 1 "s_register_operand" "w")))]
+  [(set (match_operand:SF	    0 "s_register_operand" "=t")
+	(unsigned_float:SF (match_operand:SI 1 "s_register_operand" "t")))]
   "TARGET_32BIT && TARGET_HARD_FLOAT && TARGET_VFP"
   "fuitos%?\\t%0, %1"
   [(set_attr "predicable" "yes")
@@ -863,7 +871,7 @@
 
 (define_insn "floatunssidf2"
   [(set (match_operand:DF	    0 "s_register_operand" "=w")
-	(unsigned_float:DF (match_operand:SI 1 "s_register_operand" "w")))]
+	(unsigned_float:DF (match_operand:SI 1 "s_register_operand" "t")))]
   "TARGET_32BIT && TARGET_HARD_FLOAT && TARGET_VFP"
   "fuitod%?\\t%P0, %1"
   [(set_attr "predicable" "yes")
@@ -874,8 +882,8 @@
 ;; Sqrt insns.
 
 (define_insn "*sqrtsf2_vfp"
-  [(set (match_operand:SF	   0 "s_register_operand" "=w")
-	(sqrt:SF (match_operand:SF 1 "s_register_operand" "w")))]
+  [(set (match_operand:SF	   0 "s_register_operand" "=t")
+	(sqrt:SF (match_operand:SF 1 "s_register_operand" "t")))]
   "TARGET_32BIT && TARGET_HARD_FLOAT && TARGET_VFP"
   "fsqrts%?\\t%0, %1"
   [(set_attr "predicable" "yes")
@@ -905,8 +913,8 @@
 
 (define_insn_and_split "*cmpsf_split_vfp"
   [(set (reg:CCFP CC_REGNUM)
-	(compare:CCFP (match_operand:SF 0 "s_register_operand"  "w")
-		      (match_operand:SF 1 "vfp_compare_operand" "wG")))]
+	(compare:CCFP (match_operand:SF 0 "s_register_operand"  "t")
+		      (match_operand:SF 1 "vfp_compare_operand" "tG")))]
   "TARGET_32BIT && TARGET_HARD_FLOAT && TARGET_VFP"
   "#"
   "TARGET_32BIT && TARGET_HARD_FLOAT && TARGET_VFP"
@@ -920,8 +928,8 @@
 
 (define_insn_and_split "*cmpsf_trap_split_vfp"
   [(set (reg:CCFPE CC_REGNUM)
-	(compare:CCFPE (match_operand:SF 0 "s_register_operand"  "w")
-		       (match_operand:SF 1 "vfp_compare_operand" "wG")))]
+	(compare:CCFPE (match_operand:SF 0 "s_register_operand"  "t")
+		       (match_operand:SF 1 "vfp_compare_operand" "tG")))]
   "TARGET_32BIT && TARGET_HARD_FLOAT && TARGET_VFP"
   "#"
   "TARGET_32BIT && TARGET_HARD_FLOAT && TARGET_VFP"
@@ -968,8 +976,8 @@
 
 (define_insn "*cmpsf_vfp"
   [(set (reg:CCFP VFPCC_REGNUM)
-	(compare:CCFP (match_operand:SF 0 "s_register_operand"  "w,w")
-		      (match_operand:SF 1 "vfp_compare_operand" "w,G")))]
+	(compare:CCFP (match_operand:SF 0 "s_register_operand"  "t,t")
+		      (match_operand:SF 1 "vfp_compare_operand" "t,G")))]
   "TARGET_32BIT && TARGET_HARD_FLOAT && TARGET_VFP"
   "@
    fcmps%?\\t%0, %1
@@ -980,8 +988,8 @@
 
 (define_insn "*cmpsf_trap_vfp"
   [(set (reg:CCFPE VFPCC_REGNUM)
-	(compare:CCFPE (match_operand:SF 0 "s_register_operand"  "w,w")
-		       (match_operand:SF 1 "vfp_compare_operand" "w,G")))]
+	(compare:CCFPE (match_operand:SF 0 "s_register_operand"  "t,t")
+		       (match_operand:SF 1 "vfp_compare_operand" "t,G")))]
   "TARGET_32BIT && TARGET_HARD_FLOAT && TARGET_VFP"
   "@
    fcmpes%?\\t%0, %1
author	Julian Brown <julian@codesourcery.com>	2006-09-06 14:57:08 +0000
committer	Julian Brown <julian@codesourcery.com>	2006-09-06 14:57:08 +0000
commit	01cde33addb17e5282dca60cb66784abf9290c5e (patch)
tree	847ab9ebcbecff401109d92c70671619486b3d52
parent	c76ea46f3013e8114a053960b4ecdefdc952f34c (diff)