40 files changed, 7672 insertions, 1229 deletions
diff --git a/gcc/config/arm/arm.md b/gcc/config/arm/arm.md
index 47171b99682..7cf87efd70a 100644
--- a/gcc/config/arm/arm.md
+++ b/gcc/config/arm/arm.md
@@ -121,7 +121,7 @@
 ; arm_arch6.  "v6t2" for Thumb-2 with arm_arch6.  This attribute is
 ; used to compute attribute "enabled", use type "any" to enable an
 ; alternative in all cases.
-(define_attr "arch" "any,a,t,32,t1,t2,v6,nov6,v6t2,neon_for_64bits,avoid_neon_for_64bits,iwmmxt,iwmmxt2,armv6_or_vfpv3"
+(define_attr "arch" "any,a,t,32,t1,t2,v6,nov6,v6t2,neon_for_64bits,avoid_neon_for_64bits,iwmmxt,iwmmxt2,armv6_or_vfpv3,neon"
   (const_string "any"))
 
 (define_attr "arch_enabled" "no,yes"
@@ -177,6 +177,10 @@
 	 (and (eq_attr "arch" "armv6_or_vfpv3")
 	      (match_test "arm_arch6 || TARGET_VFP3"))
 	 (const_string "yes")
+
+	 (and (eq_attr "arch" "neon")
+	      (match_test "TARGET_NEON"))
+	 (const_string "yes")
 	]
 
 	(const_string "no")))
@@ -8152,8 +8156,8 @@
 )
 
 (define_insn "probe_stack"
-  [(set (match_operand 0 "memory_operand" "=m")
-        (unspec [(const_int 0)] UNSPEC_PROBE_STACK))]
+  [(set (match_operand:SI 0 "memory_operand" "=m")
+        (unspec:SI [(const_int 0)] UNSPEC_PROBE_STACK))]
   "TARGET_32BIT"
   "str%?\\tr0, %0"
   [(set_attr "type" "store1")
diff --git a/gcc/config/arm/vfp.md b/gcc/config/arm/vfp.md
index ac5f3b862b5..6edea802b3b 100644
--- a/gcc/config/arm/vfp.md
+++ b/gcc/config/arm/vfp.md
@@ -394,8 +394,8 @@
 ;; DFmode moves
 
 (define_insn "*movdf_vfp"
-  [(set (match_operand:DF 0 "nonimmediate_soft_df_operand" "=w,?r,w ,w  ,Uv,r, m,w,r")
-	(match_operand:DF 1 "soft_df_operand"		   " ?r,w,Dy,UvF,w ,mF,r,w,r"))]
+  [(set (match_operand:DF 0 "nonimmediate_soft_df_operand" "=w,?r,w ,w,w  ,Uv,r, m,w,r")
+	(match_operand:DF 1 "soft_df_operand"		   " ?r,w,Dy,G,UvF,w ,mF,r,w,r"))]
   "TARGET_ARM && TARGET_HARD_FLOAT && TARGET_VFP
    && (   register_operand (operands[0], DFmode)
        || register_operand (operands[1], DFmode))"
@@ -410,39 +410,43 @@
       case 2:
 	gcc_assert (TARGET_VFP_DOUBLE);
         return \"vmov%?.f64\\t%P0, %1\";
-      case 3: case 4:
+      case 3:
+	gcc_assert (TARGET_VFP_DOUBLE);
+	return \"vmov.i64\\t%P0, #0\\t%@ float\";
+      case 4: case 5:
 	return output_move_vfp (operands);
-      case 5: case 6:
+      case 6: case 7:
 	return output_move_double (operands, true, NULL);
-      case 7:
+      case 8:
 	if (TARGET_VFP_SINGLE)
 	  return \"vmov%?.f32\\t%0, %1\;vmov%?.f32\\t%p0, %p1\";
 	else
 	  return \"vmov%?.f64\\t%P0, %P1\";
-      case 8:
+      case 9:
         return \"#\";
       default:
 	gcc_unreachable ();
       }
     }
   "
-  [(set_attr "type" "f_mcrr,f_mrrc,fconstd,f_loadd,f_stored,\
+  [(set_attr "type" "f_mcrr,f_mrrc,fconstd,neon_move,f_loadd,f_stored,\
                      load2,store2,ffarithd,multiple")
-   (set (attr "length") (cond [(eq_attr "alternative" "5,6,8") (const_int 8)
-			       (eq_attr "alternative" "7")
+   (set (attr "length") (cond [(eq_attr "alternative" "6,7,9") (const_int 8)
+			       (eq_attr "alternative" "8")
 				(if_then_else
 				 (match_test "TARGET_VFP_SINGLE")
 				 (const_int 8)
 				 (const_int 4))]
 			      (const_int 4)))
-   (set_attr "predicable" "yes")
-   (set_attr "pool_range" "*,*,*,1020,*,1020,*,*,*")
-   (set_attr "neg_pool_range" "*,*,*,1004,*,1004,*,*,*")]
+   (set_attr "predicable" "yes,yes,yes,no,yes,yes,yes,yes,yes,yes")
+   (set_attr "pool_range" "*,*,*,*,1020,*,1020,*,*,*")
+   (set_attr "neg_pool_range" "*,*,*,*,1004,*,1004,*,*,*")
+   (set_attr "arch" "any,any,any,neon,any,any,any,any,any,any")]
 )
 
 (define_insn "*thumb2_movdf_vfp"
-  [(set (match_operand:DF 0 "nonimmediate_soft_df_operand" "=w,?r,w ,w  ,Uv,r ,m,w,r")
-	(match_operand:DF 1 "soft_df_operand"		   " ?r,w,Dy,UvF,w, mF,r, w,r"))]
+  [(set (match_operand:DF 0 "nonimmediate_soft_df_operand" "=w,?r,w ,w,w  ,Uv,r ,m,w,r")
+	(match_operand:DF 1 "soft_df_operand"		   " ?r,w,Dy,G,UvF,w, mF,r, w,r"))]
   "TARGET_THUMB2 && TARGET_HARD_FLOAT && TARGET_VFP
    && (   register_operand (operands[0], DFmode)
        || register_operand (operands[1], DFmode))"
@@ -457,11 +461,14 @@
       case 2:
 	gcc_assert (TARGET_VFP_DOUBLE);
 	return \"vmov%?.f64\\t%P0, %1\";
-      case 3: case 4:
+      case 3:
+	gcc_assert (TARGET_VFP_DOUBLE);
+	return \"vmov.i64\\t%P0, #0\\t%@ float\";
+      case 4: case 5:
 	return output_move_vfp (operands);
-      case 5: case 6: case 8:
+      case 6: case 7: case 9:
 	return output_move_double (operands, true, NULL);
-      case 7:
+      case 8:
 	if (TARGET_VFP_SINGLE)
 	  return \"vmov%?.f32\\t%0, %1\;vmov%?.f32\\t%p0, %p1\";
 	else
@@ -471,17 +478,18 @@
       }
     }
   "
-  [(set_attr "type" "f_mcrr,f_mrrc,fconstd,f_loadd,\
+  [(set_attr "type" "f_mcrr,f_mrrc,fconstd,neon_move,f_loadd,\
                      f_stored,load2,store2,ffarithd,multiple")
-   (set (attr "length") (cond [(eq_attr "alternative" "5,6,8") (const_int 8)
-			       (eq_attr "alternative" "7")
+   (set (attr "length") (cond [(eq_attr "alternative" "6,7,9") (const_int 8)
+			       (eq_attr "alternative" "8")
 				(if_then_else
 				 (match_test "TARGET_VFP_SINGLE")
 				 (const_int 8)
 				 (const_int 4))]
 			      (const_int 4)))
-   (set_attr "pool_range" "*,*,*,1018,*,4094,*,*,*")
-   (set_attr "neg_pool_range" "*,*,*,1008,*,0,*,*,*")]
+   (set_attr "pool_range" "*,*,*,*,1018,*,4094,*,*,*")
+   (set_attr "neg_pool_range" "*,*,*,*,1008,*,0,*,*,*")
+   (set_attr "arch" "any,any,any,neon,any,any,any,any,any,any")]
 )
 
 
diff --git a/gcc/config/i386/i386-protos.h b/gcc/config/i386/i386-protos.h
index 4145ed56658..447f67e2dff 100644
--- a/gcc/config/i386/i386-protos.h
+++ b/gcc/config/i386/i386-protos.h
@@ -52,6 +52,7 @@ extern const char *standard_80387_constant_opcode (rtx);
 extern rtx standard_80387_constant_rtx (int);
 extern int standard_sse_constant_p (rtx, machine_mode);
 extern const char *standard_sse_constant_opcode (rtx_insn *, rtx);
+extern bool ix86_standard_x87sse_constant_load_p (const rtx_insn *, rtx);
 extern bool symbolic_reference_mentioned_p (rtx);
 extern bool extended_reg_mentioned_p (rtx);
 extern bool x86_extended_QIreg_mentioned_p (rtx_insn *);
diff --git a/gcc/config/i386/i386.c b/gcc/config/i386/i386.c
index 9680aaf3f50..05476f37449 100644
--- a/gcc/config/i386/i386.c
+++ b/gcc/config/i386/i386.c
@@ -11219,6 +11219,26 @@ standard_sse_constant_opcode (rtx_insn *insn, rtx x)
   gcc_unreachable ();
 }
 
+/* Returns true if INSN can be transformed from a memory load
+   to a supported FP constant load.  */
+
+bool
+ix86_standard_x87sse_constant_load_p (const rtx_insn *insn, rtx dst)
+{
+  rtx src = find_constant_src (insn);
+
+  gcc_assert (REG_P (dst));
+
+  if (src == NULL
+      || (SSE_REGNO_P (REGNO (dst))
+	  && standard_sse_constant_p (src, GET_MODE (dst)) != 1)
+      || (STACK_REGNO_P (REGNO (dst))
+	   && standard_80387_constant_p (src) < 1))
+    return false;
+
+  return true;
+}
+
 /* Returns true if OP contains a symbol reference */
 
 bool
diff --git a/gcc/config/i386/i386.md b/gcc/config/i386/i386.md
index be7cfbfd64e..9b5407aa697 100644
--- a/gcc/config/i386/i386.md
+++ b/gcc/config/i386/i386.md
@@ -1024,6 +1024,9 @@
 (define_mode_attr DWI [(QI "HI") (HI "SI") (SI "DI") (DI "TI")])
 (define_mode_attr dwi [(QI "hi") (HI "si") (SI "di") (DI "ti")])
 
+;; LEA mode corresponding to an integer mode
+(define_mode_attr LEAMODE [(QI "SI") (HI "SI") (SI "SI") (DI "DI")])
+
 ;; Half mode for double word integer modes.
 (define_mode_iterator DWIH [(SI "!TARGET_64BIT")
 			    (DI "TARGET_64BIT")])
@@ -2696,34 +2699,31 @@
    (set_attr "amdfam10_decode" "double")
    (set_attr "bdver1_decode" "double")])
 
-(define_insn "*swap<mode>_1"
-  [(set (match_operand:SWI12 0 "register_operand" "+r")
-	(match_operand:SWI12 1 "register_operand" "+r"))
+(define_insn "*swap<mode>"
+  [(set (match_operand:SWI12 0 "register_operand" "+<r>,r")
+	(match_operand:SWI12 1 "register_operand" "+<r>,r"))
    (set (match_dup 1)
 	(match_dup 0))]
-  "!TARGET_PARTIAL_REG_STALL || optimize_function_for_size_p (cfun)"
-  "xchg{l}\t%k1, %k0"
+  ""
+  "@
+   xchg{<imodesuffix>}\t%1, %0
+   xchg{l}\t%k1, %k0"
   [(set_attr "type" "imov")
-   (set_attr "mode" "SI")
+   (set_attr "mode" "<MODE>,SI")
+   (set (attr "preferred_for_size")
+     (cond [(eq_attr "alternative" "0")
+	      (symbol_ref "false")]
+	   (symbol_ref "true")))
+   ;; Potential partial reg stall on alternative 1.
+   (set (attr "preferred_for_speed")
+     (cond [(eq_attr "alternative" "1")
+	      (symbol_ref "!TARGET_PARTIAL_REG_STALL")]
+	   (symbol_ref "true")))
    (set_attr "pent_pair" "np")
    (set_attr "athlon_decode" "vector")
    (set_attr "amdfam10_decode" "double")
    (set_attr "bdver1_decode" "double")])
 
-;; Not added amdfam10_decode since TARGET_PARTIAL_REG_STALL
-;; is disabled for AMDFAM10
-(define_insn "*swap<mode>_2"
-  [(set (match_operand:SWI12 0 "register_operand" "+<r>")
-	(match_operand:SWI12 1 "register_operand" "+<r>"))
-   (set (match_dup 1)
-	(match_dup 0))]
-  "TARGET_PARTIAL_REG_STALL"
-  "xchg{<imodesuffix>}\t%1, %0"
-  [(set_attr "type" "imov")
-   (set_attr "mode" "<MODE>")
-   (set_attr "pent_pair" "np")
-   (set_attr "athlon_decode" "vector")])
-
 (define_expand "movstrict<mode>"
   [(set (strict_low_part (match_operand:SWI12 0 "nonimmediate_operand"))
 	(match_operand:SWI12 1 "general_operand"))]
@@ -3072,14 +3072,10 @@
 (define_split
   [(set (match_operand:SF 0 "push_operand")
 	(match_operand:SF 1 "memory_operand"))]
-  "reload_completed"
+  "reload_completed
+   && find_constant_src (insn)"
   [(set (match_dup 0) (match_dup 2))]
-{
-  operands[2] = find_constant_src (curr_insn);
-
-  if (operands[2] == NULL_RTX)
-    FAIL;
-})
+  "operands[2] = find_constant_src (curr_insn);")
 
 (define_split
   [(set (match_operand 0 "push_operand")
@@ -3601,19 +3597,10 @@
    && (GET_MODE (operands[0]) == TFmode
        || GET_MODE (operands[0]) == XFmode
        || GET_MODE (operands[0]) == DFmode
-       || GET_MODE (operands[0]) == SFmode)"
+       || GET_MODE (operands[0]) == SFmode)
+   && ix86_standard_x87sse_constant_load_p (insn, operands[0])"
   [(set (match_dup 0) (match_dup 2))]
-{
-  operands[2] = find_constant_src (curr_insn);
-
-  if (operands[2] == NULL_RTX
-      || (SSE_REGNO_P (REGNO (operands[0]))
-	  && standard_sse_constant_p (operands[2],
-				      GET_MODE (operands[0])) != 1)
-      || (STACK_REGNO_P (REGNO (operands[0]))
-	   && standard_80387_constant_p (operands[2]) < 1))
-    FAIL;
-})
+  "operands[2] = find_constant_src (curr_insn);")
 
 (define_split
   [(set (match_operand 0 "any_fp_register_operand")
@@ -3621,19 +3608,10 @@
   "reload_completed
    && (GET_MODE (operands[0]) == TFmode
        || GET_MODE (operands[0]) == XFmode
-       || GET_MODE (operands[0]) == DFmode)"
+       || GET_MODE (operands[0]) == DFmode)
+   && ix86_standard_x87sse_constant_load_p (insn, operands[0])"
   [(set (match_dup 0) (match_dup 2))]
-{
-  operands[2] = find_constant_src (curr_insn);
-
-  if (operands[2] == NULL_RTX
-      || (SSE_REGNO_P (REGNO (operands[0]))
-	  && standard_sse_constant_p (operands[2],
-				      GET_MODE (operands[0])) != 1)
-      || (STACK_REGNO_P (REGNO (operands[0]))
-	   && standard_80387_constant_p (operands[2]) < 1))
-    FAIL;
-})
+  "operands[2] = find_constant_src (curr_insn);")
 
 ;; Split the load of -0.0 or -1.0 into fldz;fchs or fld1;fchs sequence
 (define_split
@@ -3777,20 +3755,18 @@
   "split_double_mode (DImode, &operands[0], 1, &operands[3], &operands[4]);")
 
 (define_split
-  [(set (match_operand:DI 0 "register_operand")
-	(zero_extend:DI (match_operand:SI 1 "register_operand")))]
+  [(set (match_operand:DI 0 "general_reg_operand")
+	(zero_extend:DI (match_operand:SI 1 "general_reg_operand")))]
   "!TARGET_64BIT && reload_completed
-   && !(MMX_REG_P (operands[0]) || SSE_REG_P (operands[0]))
-   && true_regnum (operands[0]) == true_regnum (operands[1])"
+   && REGNO (operands[0]) == REGNO (operands[1])"
   [(set (match_dup 4) (const_int 0))]
   "split_double_mode (DImode, &operands[0], 1, &operands[3], &operands[4]);")
 
 (define_split
-  [(set (match_operand:DI 0 "nonimmediate_operand")
-	(zero_extend:DI (match_operand:SI 1 "nonimmediate_operand")))]
+  [(set (match_operand:DI 0 "nonimmediate_gr_operand")
+	(zero_extend:DI (match_operand:SI 1 "nonimmediate_gr_operand")))]
   "!TARGET_64BIT && reload_completed
-   && !(MEM_P (operands[0]) && MEM_P (operands[1]))
-   && !(MMX_REG_P (operands[0]) || SSE_REG_P (operands[0]))"
+   && !(MEM_P (operands[0]) && MEM_P (operands[1]))"
   [(set (match_dup 3) (match_dup 1))
    (set (match_dup 4) (const_int 0))]
   "split_double_mode (DImode, &operands[0], 1, &operands[3], &operands[4]);")
@@ -3828,7 +3804,8 @@
   [(parallel [(set (match_dup 0) (and:SI (match_dup 0) (match_dup 2)))
 	      (clobber (reg:CC FLAGS_REG))])]
 {
-  if (true_regnum (operands[0]) != true_regnum (operands[1]))
+  if (!REG_P (operands[1])
+      || REGNO (operands[0]) != REGNO (operands[1]))
     {
       ix86_expand_clear (operands[0]);
 
@@ -3875,7 +3852,8 @@
   [(parallel [(set (match_dup 0) (and:SI (match_dup 0) (const_int 255)))
 	      (clobber (reg:CC FLAGS_REG))])]
 {
-  if (true_regnum (operands[0]) != true_regnum (operands[1]))
+  if (!REG_P (operands[1])
+      || REGNO (operands[0]) != REGNO (operands[1]))
     {
       ix86_expand_clear (operands[0]);
 
@@ -3988,8 +3966,8 @@
 
   /* Generate a cltd if possible and doing so it profitable.  */
   if ((optimize_function_for_size_p (cfun) || TARGET_USE_CLTD)
-      && true_regnum (operands[1]) == AX_REG
-      && true_regnum (operands[2]) == DX_REG)
+      && REGNO (operands[1]) == AX_REG
+      && REGNO (operands[2]) == DX_REG)
     {
       emit_insn (gen_ashrsi3_cvt (operands[2], operands[1], GEN_INT (31)));
     }
@@ -4030,8 +4008,8 @@
    (set (match_operand:SI 3 "memory_operand") (match_dup 2))]
   "/* cltd is shorter than sarl $31, %eax */
    !optimize_function_for_size_p (cfun)
-   && true_regnum (operands[1]) == AX_REG
-   && true_regnum (operands[2]) == DX_REG
+   && REGNO (operands[1]) == AX_REG
+   && REGNO (operands[2]) == DX_REG
    && peep2_reg_dead_p (2, operands[1])
    && peep2_reg_dead_p (3, operands[2])
    && !reg_mentioned_p (operands[2], operands[3])"
@@ -4052,19 +4030,19 @@
 {
   split_double_mode (DImode, &operands[0], 1, &operands[3], &operands[4]);
 
-  if (true_regnum (operands[3]) != true_regnum (operands[1]))
+  if (REGNO (operands[3]) != REGNO (operands[1]))
     emit_move_insn (operands[3], operands[1]);
 
   /* Generate a cltd if possible and doing so it profitable.  */
   if ((optimize_function_for_size_p (cfun) || TARGET_USE_CLTD)
-      && true_regnum (operands[3]) == AX_REG
-      && true_regnum (operands[4]) == DX_REG)
+      && REGNO (operands[3]) == AX_REG
+      && REGNO (operands[4]) == DX_REG)
     {
       emit_insn (gen_ashrsi3_cvt (operands[4], operands[3], GEN_INT (31)));
       DONE;
     }
 
-  if (true_regnum (operands[4]) != true_regnum (operands[1]))
+  if (REGNO (operands[4]) != REGNO (operands[1]))
     emit_move_insn (operands[4], operands[1]);
 
   emit_insn (gen_ashrsi3_cvt (operands[4], operands[4], GEN_INT (31)));
@@ -4203,15 +4181,15 @@
   "operands[2] = GEN_INT (-GET_MODE_SIZE (XFmode));")
 
 (define_expand "extendsfdf2"
-  [(set (match_operand:DF 0 "nonimmediate_operand")
+  [(set (match_operand:DF 0 "nonimm_ssenomem_operand")
         (float_extend:DF (match_operand:SF 1 "general_operand")))]
-  "TARGET_80387 || (TARGET_SSE2 && TARGET_SSE_MATH)"
+  "TARGET_80387 || (SSE_FLOAT_MODE_P (DFmode) && TARGET_SSE_MATH)"
 {
   /* ??? Needed for compress_float_constant since all fp constants
      are TARGET_LEGITIMATE_CONSTANT_P.  */
   if (CONST_DOUBLE_P (operands[1]))
     {
-      if ((!TARGET_SSE2 || TARGET_MIX_SSE_I387)
+      if ((!SSE_FLOAT_MODE_P (DFmode) || TARGET_MIX_SSE_I387)
 	  && standard_80387_constant_p (operands[1]) > 0)
 	{
 	  operands[1] = simplify_const_unary_operation
@@ -4231,12 +4209,12 @@
    that might lead to ICE on 32bit target.  The sequence unlikely combine
    anyway.  */
 (define_split
-  [(set (match_operand:DF 0 "register_operand")
+  [(set (match_operand:DF 0 "sse_reg_operand")
         (float_extend:DF
 	  (match_operand:SF 1 "nonimmediate_operand")))]
   "TARGET_USE_VECTOR_FP_CONVERTS
    && optimize_insn_for_speed_p ()
-   && reload_completed && SSE_REG_P (operands[0])
+   && reload_completed
    && (!EXT_REX_SSE_REG_P (operands[0])
        || TARGET_AVX512VL)"
    [(set (match_dup 2)
@@ -4253,13 +4231,11 @@
     {
       /* If it is unsafe to overwrite upper half of source, we need
 	 to move to destination and unpack there.  */
-      if (((ORIGINAL_REGNO (operands[1]) < FIRST_PSEUDO_REGISTER
-	    || PSEUDO_REGNO_BYTES (ORIGINAL_REGNO (operands[1])) > 4)
-	   && true_regnum (operands[0]) != true_regnum (operands[1]))
+      if (REGNO (operands[0]) != REGNO (operands[1])
 	  || (EXT_REX_SSE_REG_P (operands[1])
 	      && !TARGET_AVX512VL))
 	{
-	  rtx tmp = gen_rtx_REG (SFmode, true_regnum (operands[0]));
+	  rtx tmp = lowpart_subreg (SFmode, operands[0], DFmode);
 	  emit_move_insn (tmp, operands[1]);
 	}
       else
@@ -4267,7 +4243,7 @@
       /* FIXME: vec_interleave_lowv4sf for AVX512VL should allow
 	 =v, v, then vbroadcastss will be only needed for AVX512F without
 	 AVX512VL.  */
-      if (!EXT_REX_SSE_REGNO_P (true_regnum (operands[3])))
+      if (!EXT_REX_SSE_REGNO_P (REGNO (operands[3])))
 	emit_insn (gen_vec_interleave_lowv4sf (operands[3], operands[3],
 					       operands[3]));
       else
@@ -4283,21 +4259,20 @@
 
 ;; It's more profitable to split and then extend in the same register.
 (define_peephole2
-  [(set (match_operand:DF 0 "register_operand")
+  [(set (match_operand:DF 0 "sse_reg_operand")
 	(float_extend:DF
 	  (match_operand:SF 1 "memory_operand")))]
   "TARGET_SPLIT_MEM_OPND_FOR_FP_CONVERTS
-   && optimize_insn_for_speed_p ()
-   && SSE_REG_P (operands[0])"
+   && optimize_insn_for_speed_p ()"
   [(set (match_dup 2) (match_dup 1))
    (set (match_dup 0) (float_extend:DF (match_dup 2)))]
-  "operands[2] = gen_rtx_REG (SFmode, REGNO (operands[0]));")
+  "operands[2] = lowpart_subreg (SFmode, operands[0], DFmode);")
 
-(define_insn "*extendsfdf2_mixed"
-  [(set (match_operand:DF 0 "nonimmediate_operand" "=f,m,v")
+(define_insn "*extendsfdf2"
+  [(set (match_operand:DF 0 "nonimm_ssenomem_operand" "=f,m,v")
         (float_extend:DF
 	  (match_operand:SF 1 "nonimmediate_operand" "fm,f,vm")))]
-  "TARGET_SSE2 && TARGET_SSE_MATH"
+  "TARGET_80387 || (SSE_FLOAT_MODE_P (DFmode) && TARGET_SSE_MATH)"
 {
   switch (which_alternative)
     {
@@ -4316,18 +4291,16 @@
    (set_attr "prefix" "orig,orig,maybe_vex")
    (set_attr "mode" "SF,XF,DF")
    (set (attr "enabled")
-     (cond [(eq_attr "alternative" "0,1")
-              (symbol_ref "TARGET_MIX_SSE_I387")
-	   ]
-           (symbol_ref "true")))])
-
-(define_insn "*extendsfdf2_i387"
-  [(set (match_operand:DF 0 "nonimmediate_operand" "=f,m")
-        (float_extend:DF (match_operand:SF 1 "nonimmediate_operand" "fm,f")))]
-  "TARGET_80387"
-  "* return output_387_reg_move (insn, operands);"
-  [(set_attr "type" "fmov")
-   (set_attr "mode" "SF,XF")])
+     (if_then_else
+       (match_test ("SSE_FLOAT_MODE_P (DFmode) && TARGET_SSE_MATH"))
+       (if_then_else
+	 (eq_attr "alternative" "0,1")
+	 (symbol_ref "TARGET_MIX_SSE_I387")
+	 (symbol_ref "true"))
+       (if_then_else
+	 (eq_attr "alternative" "0,1")
+	 (symbol_ref "true")
+	 (symbol_ref "false"))))])
 
 (define_expand "extend<mode>xf2"
   [(set (match_operand:XF 0 "nonimmediate_operand")
@@ -4370,9 +4343,9 @@
   [(set (match_operand:SF 0 "nonimmediate_operand")
 	(float_truncate:SF
 	  (match_operand:DF 1 "nonimmediate_operand")))]
-  "TARGET_80387 || (TARGET_SSE2 && TARGET_SSE_MATH)"
+  "TARGET_80387 || (SSE_FLOAT_MODE_P (DFmode) && TARGET_SSE_MATH)"
 {
-  if (TARGET_SSE2 && TARGET_SSE_MATH && !TARGET_MIX_SSE_I387)
+  if (SSE_FLOAT_MODE_P (DFmode) && TARGET_SSE_MATH && !TARGET_MIX_SSE_I387)
     ;
   else if (flag_unsafe_math_optimizations)
     ;
@@ -4392,12 +4365,12 @@
    that might lead to ICE on 32bit target.  The sequence unlikely combine
    anyway.  */
 (define_split
-  [(set (match_operand:SF 0 "register_operand")
+  [(set (match_operand:SF 0 "sse_reg_operand")
         (float_truncate:SF
 	  (match_operand:DF 1 "nonimmediate_operand")))]
   "TARGET_USE_VECTOR_FP_CONVERTS
    && optimize_insn_for_speed_p ()
-   && reload_completed && SSE_REG_P (operands[0])
+   && reload_completed
    && (!EXT_REX_SSE_REG_P (operands[0])
        || TARGET_AVX512VL)"
    [(set (match_dup 2)
@@ -4415,9 +4388,7 @@
   if (REG_P (operands[1]))
     {
       if (!TARGET_SSE3
-	  && true_regnum (operands[0]) != true_regnum (operands[1])
-	  && (ORIGINAL_REGNO (operands[1]) < FIRST_PSEUDO_REGISTER
-	      || PSEUDO_REGNO_BYTES (ORIGINAL_REGNO (operands[1])) > 8))
+	  && REGNO (operands[0]) != REGNO (operands[1]))
 	{
 	  rtx tmp = lowpart_subreg (DFmode, operands[0], SFmode);
 	  emit_move_insn (tmp, operands[1]);
@@ -4434,15 +4405,14 @@
 
 ;; It's more profitable to split and then extend in the same register.
 (define_peephole2
-  [(set (match_operand:SF 0 "register_operand")
+  [(set (match_operand:SF 0 "sse_reg_operand")
 	(float_truncate:SF
 	  (match_operand:DF 1 "memory_operand")))]
   "TARGET_SPLIT_MEM_OPND_FOR_FP_CONVERTS
-   && optimize_insn_for_speed_p ()
-   && SSE_REG_P (operands[0])"
+   && optimize_insn_for_speed_p ()"
   [(set (match_dup 2) (match_dup 1))
    (set (match_dup 0) (float_truncate:SF (match_dup 2)))]
-  "operands[2] = gen_rtx_REG (DFmode, REGNO (operands[0]));")
+  "operands[2] = lowpart_subreg (DFmode, operands[0], SFmode);")
 
 (define_expand "truncdfsf2_with_temp"
   [(parallel [(set (match_operand:SF 0)
@@ -4455,7 +4425,7 @@
   [(set (match_operand:SF 0 "nonimmediate_operand"   "=fm,v")
         (float_truncate:SF
           (match_operand:DF 1 "nonimmediate_operand" "f  ,vm")))]
-  "TARGET_SSE2 && TARGET_SSE_MATH"
+  "SSE_FLOAT_MODE_P (DFmode) && TARGET_SSE_MATH"
 {
   switch (which_alternative)
     {
@@ -4549,7 +4519,7 @@
   "reload_completed"
   [(set (match_dup 2) (match_dup 1))
    (set (match_dup 0) (match_dup 2))]
-  "operands[1] = gen_rtx_REG (SFmode, true_regnum (operands[1]));")
+  "operands[1] = gen_rtx_REG (SFmode, REGNO (operands[1]));")
 
 ;; Conversion from XFmode to {SF,DF}mode
 
@@ -5155,11 +5125,11 @@
 ;; slots when !TARGET_INTER_UNIT_MOVES_TO_VEC disables the general_regs
 ;; alternative in sse2_loadld.
 (define_split
-  [(set (match_operand:MODEF 0 "register_operand")
+  [(set (match_operand:MODEF 0 "sse_reg_operand")
 	(float:MODEF (match_operand:SI 1 "nonimmediate_operand")))]
-  "TARGET_SSE2 && TARGET_SSE_MATH
-   && TARGET_USE_VECTOR_CONVERTS && optimize_function_for_speed_p (cfun)
-   && reload_completed && SSE_REG_P (operands[0])
+  "TARGET_USE_VECTOR_CONVERTS
+   && optimize_function_for_speed_p (cfun)
+   && reload_completed
    && (MEM_P (operands[1]) || TARGET_INTER_UNIT_MOVES_TO_VEC)
    && (!EXT_REX_SSE_REG_P (operands[0])
        || TARGET_AVX512VL)"
@@ -5178,83 +5148,83 @@
   DONE;
 })
 
-;; Avoid partial SSE register dependency stalls
+;; Avoid partial SSE register dependency stalls.  This splitter should split
+;; late in the pass sequence (after register rename pass), so allocated
+;; registers won't change anymore
+
 (define_split
-  [(set (match_operand:MODEF 0 "register_operand")
+  [(set (match_operand:MODEF 0 "sse_reg_operand")
 	(float:MODEF (match_operand:SWI48 1 "nonimmediate_operand")))]
-  "TARGET_SSE2 && TARGET_SSE_MATH
-   && TARGET_SSE_PARTIAL_REG_DEPENDENCY
+  "TARGET_SSE_PARTIAL_REG_DEPENDENCY
    && optimize_function_for_speed_p (cfun)
-   && reload_completed && SSE_REG_P (operands[0])
+   && epilogue_completed
    && (!EXT_REX_SSE_REG_P (operands[0])
        || TARGET_AVX512VL)"
-  [(const_int 0)]
+  [(set (match_dup 0)
+	(vec_merge:<MODEF:ssevecmode>
+	  (vec_duplicate:<MODEF:ssevecmode>
+	    (float:MODEF
+	      (match_dup 1)))
+	  (match_dup 0)
+	  (const_int 1)))]
 {
   const machine_mode vmode = <MODEF:ssevecmode>mode;
-  const machine_mode mode = <MODEF:MODE>mode;
-  rtx t, op0 = lowpart_subreg (vmode, operands[0], mode);
-
-  emit_move_insn (op0, CONST0_RTX (vmode));
 
-  t = gen_rtx_FLOAT (mode, operands[1]);
-  t = gen_rtx_VEC_DUPLICATE (vmode, t);
-  t = gen_rtx_VEC_MERGE (vmode, t, op0, const1_rtx);
-  emit_insn (gen_rtx_SET (op0, t));
-  DONE;
+  operands[0] = lowpart_subreg (vmode, operands[0], <MODEF:MODE>mode);
+  emit_move_insn (operands[0], CONST0_RTX (vmode));
 })
 
-;; Break partial reg stall for cvtsd2ss.
+;; Break partial reg stall for cvtsd2ss.  This splitter should split
+;; late in the pass sequence (after register rename pass),
+;; so allocated registers won't change anymore.
 
-(define_peephole2
-  [(set (match_operand:SF 0 "register_operand")
+(define_split
+  [(set (match_operand:SF 0 "sse_reg_operand")
         (float_truncate:SF
 	  (match_operand:DF 1 "nonimmediate_operand")))]
-  "TARGET_SSE2 && TARGET_SSE_MATH
-   && TARGET_SSE_PARTIAL_REG_DEPENDENCY
+  "TARGET_SSE_PARTIAL_REG_DEPENDENCY
    && optimize_function_for_speed_p (cfun)
-   && SSE_REG_P (operands[0])
-   && (!SSE_REG_P (operands[1])
+   && epilogue_completed
+   && (!REG_P (operands[1])
        || REGNO (operands[0]) != REGNO (operands[1]))
    && (!EXT_REX_SSE_REG_P (operands[0])
        || TARGET_AVX512VL)"
   [(set (match_dup 0)
 	(vec_merge:V4SF
 	  (vec_duplicate:V4SF
-	    (float_truncate:V2SF
+	    (float_truncate:SF
 	      (match_dup 1)))
 	  (match_dup 0)
 	  (const_int 1)))]
 {
   operands[0] = lowpart_subreg (V4SFmode, operands[0], SFmode);
-  operands[1] = lowpart_subreg (V2DFmode, operands[1], DFmode);
   emit_move_insn (operands[0], CONST0_RTX (V4SFmode));
 })
 
-;; Break partial reg stall for cvtss2sd.
+;; Break partial reg stall for cvtss2sd.  This splitter should split
+;; late in the pass sequence (after register rename pass),
+;; so allocated registers won't change anymore.
 
-(define_peephole2
-  [(set (match_operand:DF 0 "register_operand")
+(define_split
+  [(set (match_operand:DF 0 "sse_reg_operand")
         (float_extend:DF
           (match_operand:SF 1 "nonimmediate_operand")))]
-  "TARGET_SSE2 && TARGET_SSE_MATH
-   && TARGET_SSE_PARTIAL_REG_DEPENDENCY
+  "TARGET_SSE_PARTIAL_REG_DEPENDENCY
    && optimize_function_for_speed_p (cfun)
-   && SSE_REG_P (operands[0])
-   && (!SSE_REG_P (operands[1])
+   && epilogue_completed
+   && (!REG_P (operands[1])
        || REGNO (operands[0]) != REGNO (operands[1]))
    && (!EXT_REX_SSE_REG_P (operands[0])
        || TARGET_AVX512VL)"
   [(set (match_dup 0)
         (vec_merge:V2DF
-          (float_extend:V2DF
-            (vec_select:V2SF
-              (match_dup 1)
-              (parallel [(const_int 0) (const_int 1)])))
-          (match_dup 0)
+	  (vec_duplicate:V2DF
+	    (float_extend:DF
+	      (match_dup 1)))
+	  (match_dup 0)
           (const_int 1)))]
 {
   operands[0] = lowpart_subreg (V2DFmode, operands[0], DFmode);
-  operands[1] = lowpart_subreg (V4SFmode, operands[1], SFmode);
   emit_move_insn (operands[0], CONST0_RTX (V2DFmode));
 })
 
@@ -5299,7 +5269,7 @@
   emit_insn (gen_vec_interleave_lowv4si (operands[3], operands[3],
 					 operands[4]));
 
-  operands[3] = gen_rtx_REG (DImode, REGNO (operands[3]));
+  operands[3] = gen_lowpart (DImode, operands[3]);
 })
 
 (define_split
@@ -5631,7 +5601,6 @@
 	(const_string "*")))
    (set_attr "mode" "HI,HI,HI,SI")])
 
-;; %%% Potential partial reg stall on alternatives 3 and 4.  What to do?
 (define_insn "*addqi_1"
   [(set (match_operand:QI 0 "nonimmediate_operand" "=qm,q,q,r,r,Yp")
 	(plus:QI (match_operand:QI 1 "nonimmediate_operand" "%0,0,q,0,r,Yp")
@@ -5639,7 +5608,7 @@
    (clobber (reg:CC FLAGS_REG))]
   "ix86_binary_operator_ok (PLUS, QImode, operands)"
 {
-  bool widen = (which_alternative == 3 || which_alternative == 4);
+  bool widen = (get_attr_mode (insn) != MODE_QI);
 
   switch (get_attr_type (insn))
     {
@@ -5688,7 +5657,12 @@
 	(and (eq_attr "type" "alu") (match_operand 2 "const128_operand"))
 	(const_string "1")
 	(const_string "*")))
-   (set_attr "mode" "QI,QI,QI,SI,SI,SI")])
+   (set_attr "mode" "QI,QI,QI,SI,SI,SI")
+   ;; Potential partial reg stall on alternatives 3 and 4.
+   (set (attr "preferred_for_speed")
+     (cond [(eq_attr "alternative" "3,4")
+	      (symbol_ref "!TARGET_PARTIAL_REG_STALL")]
+	   (symbol_ref "true")))])
 
 (define_insn "*addqi_1_slp"
   [(set (strict_low_part (match_operand:QI 0 "nonimmediate_operand" "+qm,q"))
@@ -5737,32 +5711,6 @@
    (parallel [(set (match_dup 0) (plus:SWI48 (match_dup 0) (match_dup 2)))
 	      (clobber (reg:CC FLAGS_REG))])])
 
-;; Convert add to the lea pattern to avoid flags dependency.
-(define_split
-  [(set (match_operand:SWI 0 "register_operand")
-	(plus:SWI (match_operand:SWI 1 "register_operand")
-		  (match_operand:SWI 2 "<nonmemory_operand>")))
-   (clobber (reg:CC FLAGS_REG))]
-  "reload_completed && ix86_lea_for_add_ok (insn, operands)" 
-  [(const_int 0)]
-{
-  machine_mode mode = <MODE>mode;
-  rtx pat;
-
-  if (<MODE_SIZE> < GET_MODE_SIZE (SImode))
-    { 
-      mode = SImode; 
-      operands[0] = gen_lowpart (mode, operands[0]);
-      operands[1] = gen_lowpart (mode, operands[1]);
-      operands[2] = gen_lowpart (mode, operands[2]);
-    }
-
-  pat = gen_rtx_PLUS (mode, operands[1], operands[2]);
-
-  emit_insn (gen_rtx_SET (operands[0], pat));
-  DONE;
-})
-
 ;; Split non destructive adds if we cannot use lea.
 (define_split
   [(set (match_operand:DI 0 "register_operand")
@@ -5780,6 +5728,24 @@
 
 ;; Convert add to the lea pattern to avoid flags dependency.
 (define_split
+  [(set (match_operand:SWI 0 "register_operand")
+	(plus:SWI (match_operand:SWI 1 "register_operand")
+		  (match_operand:SWI 2 "<nonmemory_operand>")))
+   (clobber (reg:CC FLAGS_REG))]
+  "reload_completed && ix86_lea_for_add_ok (insn, operands)" 
+  [(set (match_dup 0)
+	(plus:<LEAMODE> (match_dup 1) (match_dup 2)))]
+{
+  if (<MODE>mode != <LEAMODE>mode)
+    {
+      operands[0] = gen_lowpart (<LEAMODE>mode, operands[0]);
+      operands[1] = gen_lowpart (<LEAMODE>mode, operands[1]);
+      operands[2] = gen_lowpart (<LEAMODE>mode, operands[2]);
+    }
+})
+
+;; Convert add to the lea pattern to avoid flags dependency.
+(define_split
   [(set (match_operand:DI 0 "register_operand")
 	(zero_extend:DI
 	  (plus:SI (match_operand:SI 1 "register_operand")
@@ -6264,7 +6230,7 @@
   [(set (match_operand:SWI12 0 "register_operand" "=r")
 	(plus:SWI12
 	  (mult:SWI12 (match_operand:SWI12 1 "index_register_operand" "l")
-		      (match_operand:SWI12 2 "const248_operand" "n"))
+		      (match_operand 2 "const248_operand" "n"))
 	  (match_operand:SWI12 3 "nonmemory_operand" "ri")))]
   "!TARGET_PARTIAL_REG_STALL || optimize_function_for_size_p (cfun)"
   "#"
@@ -6286,7 +6252,7 @@
 	(plus:SWI12
 	  (plus:SWI12
 	    (mult:SWI12 (match_operand:SWI12 1 "index_register_operand" "l")
-			(match_operand:SWI12 2 "const248_operand" "n"))
+			(match_operand 2 "const248_operand" "n"))
 	    (match_operand:SWI12 3 "register_operand" "r"))
 	  (match_operand:SWI12 4 "immediate_operand" "i")))]
   "!TARGET_PARTIAL_REG_STALL || optimize_function_for_size_p (cfun)"
@@ -6312,8 +6278,8 @@
 	(any_or:SWI12
 	  (ashift:SWI12
 	    (match_operand:SWI12 1 "index_register_operand" "l")
-	    (match_operand:SWI12 2 "const_0_to_3_operand" "n"))
-	  (match_operand:SWI12 3 "const_int_operand" "n")))]
+	    (match_operand 2 "const_0_to_3_operand" "n"))
+	  (match_operand 3 "const_int_operand" "n")))]
   "(!TARGET_PARTIAL_REG_STALL || optimize_function_for_size_p (cfun))
    && ((unsigned HOST_WIDE_INT) INTVAL (operands[3])
        < (HOST_WIDE_INT_1U << INTVAL (operands[2])))"
@@ -6336,8 +6302,8 @@
 	(any_or:SWI48
 	  (ashift:SWI48
 	    (match_operand:SWI48 1 "index_register_operand" "l")
-	    (match_operand:SWI48 2 "const_0_to_3_operand" "n"))
-	  (match_operand:SWI48 3 "const_int_operand" "n")))]
+	    (match_operand 2 "const_0_to_3_operand" "n"))
+	  (match_operand 3 "const_int_operand" "n")))]
   "(unsigned HOST_WIDE_INT) INTVAL (operands[3])
    < (HOST_WIDE_INT_1U << INTVAL (operands[2]))"
   "#"
@@ -7201,7 +7167,7 @@
 	   (match_operand:DWIH 2 "nonimmediate_operand"))))
   (clobber (reg:CC FLAGS_REG))]
  "TARGET_BMI2 && reload_completed
-  && true_regnum (operands[1]) == DX_REG"
+  && REGNO (operands[1]) == DX_REG"
   [(parallel [(set (match_dup 3)
 		   (mult:DWIH (match_dup 1) (match_dup 2)))
 	      (set (match_dup 4)
@@ -8247,7 +8213,6 @@
        (const_string "*")))
    (set_attr "mode" "HI,HI,SI,HI")])
 
-;; %%% Potential partial reg stall on alternative 2.  What to do?
 (define_insn "*andqi_1"
   [(set (match_operand:QI 0 "nonimmediate_operand" "=qm,q,r,!k")
 	(and:QI (match_operand:QI 1 "nonimmediate_operand" "%0,0,0,k")
@@ -8270,7 +8235,12 @@
     }
 }
   [(set_attr "type" "alu,alu,alu,msklog")
-   (set_attr "mode" "QI,QI,SI,HI")])
+   (set_attr "mode" "QI,QI,SI,HI")
+   ;; Potential partial reg stall on alternative 2.
+   (set (attr "preferred_for_speed")
+     (cond [(eq_attr "alternative" "2")
+	      (symbol_ref "!TARGET_PARTIAL_REG_STALL")]
+	   (symbol_ref "true")))])
 
 (define_insn "*andqi_1_slp"
   [(set (strict_low_part (match_operand:QI 0 "nonimmediate_operand" "+qm,q"))
@@ -8346,7 +8316,8 @@
 		    (match_operand:SWI248 2 "const_int_operand")))
    (clobber (reg:CC FLAGS_REG))]
   "reload_completed
-   && true_regnum (operands[0]) != true_regnum (operands[1])"
+   && (!REG_P (operands[1])
+       || REGNO (operands[0]) != REGNO (operands[1]))"
   [(const_int 0)]
 {
   HOST_WIDE_INT ival = INTVAL (operands[2]);
@@ -8754,7 +8725,6 @@
   [(set_attr "type" "alu,alu,msklog")
    (set_attr "mode" "HI")])
 
-;; %%% Potential partial reg stall on alternative 2.  What to do?
 (define_insn "*<code>qi_1"
   [(set (match_operand:QI 0 "nonimmediate_operand" "=q,m,r,!k")
 	(any_or:QI (match_operand:QI 1 "nonimmediate_operand" "%0,0,0,k")
@@ -8767,7 +8737,12 @@
    <logic>{l}\t{%k2, %k0|%k0, %k2}
    k<logic>w\t{%2, %1, %0|%0, %1, %2}"
   [(set_attr "type" "alu,alu,alu,msklog")
-   (set_attr "mode" "QI,QI,SI,HI")])
+   (set_attr "mode" "QI,QI,SI,HI")
+   ;; Potential partial reg stall on alternative 2.
+   (set (attr "preferred_for_speed")
+     (cond [(eq_attr "alternative" "2")
+	      (symbol_ref "!TARGET_PARTIAL_REG_STALL")]
+	   (symbol_ref "true")))])
 
 ;; See comment for addsi_1_zext why we do use nonimmediate_operand
 (define_insn "*<code>si_1_zext"
@@ -9258,8 +9233,7 @@
 	  [(match_operand:MODEF 1 "register_operand" "0,x,0,0")]))
    (use (match_operand:<ssevecmode> 2 "nonimmediate_operand" "xm,0,X,X"))
    (clobber (reg:CC FLAGS_REG))]
-  "(SSE_FLOAT_MODE_P (<MODE>mode) && TARGET_SSE_MATH)
-   || TARGET_80387"
+  "TARGET_80387 || (SSE_FLOAT_MODE_P (<MODE>mode) && TARGET_SSE_MATH)"
   "#"
   [(set (attr "enabled")
      (if_then_else
@@ -9308,12 +9282,12 @@
   [(set (match_dup 0) (match_op_dup 1 [(match_dup 0)]))])
 
 (define_split
-  [(set (match_operand 0 "register_operand")
+  [(set (match_operand 0 "sse_reg_operand")
 	(match_operator 3 "absneg_operator"
 	  [(match_operand 1 "register_operand")]))
    (use (match_operand 2 "nonimmediate_operand"))
    (clobber (reg:CC FLAGS_REG))]
-  "reload_completed && SSE_REG_P (operands[0])"
+  "reload_completed"
   [(set (match_dup 0) (match_dup 3))]
 {
   machine_mode mode = GET_MODE (operands[0]);
@@ -9332,7 +9306,7 @@
 })
 
 (define_split
-  [(set (match_operand:SF 0 "register_operand")
+  [(set (match_operand:SF 0 "general_reg_operand")
 	(match_operator:SF 1 "absneg_operator" [(match_dup 0)]))
    (use (match_operand:V4SF 2))
    (clobber (reg:CC FLAGS_REG))]
@@ -9356,7 +9330,7 @@
 })
 
 (define_split
-  [(set (match_operand:DF 0 "register_operand")
+  [(set (match_operand:DF 0 "general_reg_operand")
 	(match_operator:DF 1 "absneg_operator" [(match_dup 0)]))
    (use (match_operand 2))
    (clobber (reg:CC FLAGS_REG))]
@@ -9394,7 +9368,7 @@
 })
 
 (define_split
-  [(set (match_operand:XF 0 "register_operand")
+  [(set (match_operand:XF 0 "general_reg_operand")
 	(match_operator:XF 1 "absneg_operator" [(match_dup 0)]))
    (use (match_operand 2))
    (clobber (reg:CC FLAGS_REG))]
@@ -9404,8 +9378,7 @@
 {
   rtx tmp;
   operands[0] = gen_rtx_REG (SImode,
-			     true_regnum (operands[0])
-			     + (TARGET_64BIT ? 1 : 2));
+			     REGNO (operands[0]) + (TARGET_64BIT ? 1 : 2));
   if (GET_CODE (operands[1]) == ABS)
     {
       tmp = GEN_INT (0x7fff);
@@ -9546,7 +9519,6 @@
    (set_attr "prefix" "*,vex")
    (set_attr "mode" "HI")])
 
-;; %%% Potential partial reg stall on alternative 1.  What to do?
 (define_insn "*one_cmplqi2_1"
   [(set (match_operand:QI 0 "nonimmediate_operand" "=qm,r,!k")
 	(not:QI (match_operand:QI 1 "nonimmediate_operand" "0,0,k")))]
@@ -9569,7 +9541,12 @@
   [(set_attr "isa" "*,*,avx512f")
    (set_attr "type" "negnot,negnot,msklog")
    (set_attr "prefix" "*,*,vex")
-   (set_attr "mode" "QI,SI,QI")])
+   (set_attr "mode" "QI,SI,QI")
+   ;; Potential partial reg stall on alternative 1.
+   (set (attr "preferred_for_speed")
+     (cond [(eq_attr "alternative" "1")
+	      (symbol_ref "!TARGET_PARTIAL_REG_STALL")]
+	   (symbol_ref "true")))])
 
 ;; ??? Currently never generated - xor is used instead.
 (define_insn "*one_cmplsi2_1_zext"
@@ -9988,7 +9965,6 @@
        (const_string "*")))
    (set_attr "mode" "HI,SI")])
 
-;; %%% Potential partial reg stall on alternative 1.  What to do?
 (define_insn "*ashlqi3_1"
   [(set (match_operand:QI 0 "nonimmediate_operand" "=qm,r,Yp")
 	(ashift:QI (match_operand:QI 1 "nonimmediate_operand" "0,0,l")
@@ -10044,7 +10020,12 @@
 			   (match_test "optimize_function_for_size_p (cfun)")))))
        (const_string "0")
        (const_string "*")))
-   (set_attr "mode" "QI,SI,SI")])
+   (set_attr "mode" "QI,SI,SI")
+   ;; Potential partial reg stall on alternative 1.
+   (set (attr "preferred_for_speed")
+     (cond [(eq_attr "alternative" "1")
+	      (symbol_ref "!TARGET_PARTIAL_REG_STALL")]
+	   (symbol_ref "true")))])
 
 (define_insn "*ashlqi3_1_slp"
   [(set (strict_low_part (match_operand:QI 0 "nonimmediate_operand" "+qm"))
@@ -10091,31 +10072,21 @@
 
 ;; Convert ashift to the lea pattern to avoid flags dependency.
 (define_split
-  [(set (match_operand 0 "register_operand")
-	(ashift (match_operand 1 "index_register_operand")
-                (match_operand:QI 2 "const_int_operand")))
+  [(set (match_operand:SWI 0 "register_operand")
+	(ashift:SWI (match_operand:SWI 1 "index_register_operand")
+		    (match_operand 2 "const_0_to_3_operand")))
    (clobber (reg:CC FLAGS_REG))]
-  "GET_MODE (operands[0]) == GET_MODE (operands[1])
-   && reload_completed
-   && true_regnum (operands[0]) != true_regnum (operands[1])"
-  [(const_int 0)]
+  "reload_completed
+   && REGNO (operands[0]) != REGNO (operands[1])"
+  [(set (match_dup 0)
+	(mult:<LEAMODE> (match_dup 1) (match_dup 2)))]
 {
-  machine_mode mode = GET_MODE (operands[0]);
-  rtx pat;
-
-  if (GET_MODE_SIZE (mode) < GET_MODE_SIZE (SImode))
-    { 
-      mode = SImode; 
-      operands[0] = gen_lowpart (mode, operands[0]);
-      operands[1] = gen_lowpart (mode, operands[1]);
+  if (<MODE>mode != <LEAMODE>mode)
+    {
+      operands[0] = gen_lowpart (<LEAMODE>mode, operands[0]);
+      operands[1] = gen_lowpart (<LEAMODE>mode, operands[1]);
     }
-
-  operands[2] = gen_int_mode (1 << INTVAL (operands[2]), mode);
-
-  pat = gen_rtx_MULT (mode, operands[1], operands[2]);
-
-  emit_insn (gen_rtx_SET (operands[0], pat));
-  DONE;
+  operands[2] = GEN_INT (1 << INTVAL (operands[2]));
 })
 
 ;; Convert ashift to the lea pattern to avoid flags dependency.
@@ -10123,15 +10094,15 @@
   [(set (match_operand:DI 0 "register_operand")
 	(zero_extend:DI
 	  (ashift:SI (match_operand:SI 1 "index_register_operand")
-		     (match_operand:QI 2 "const_int_operand"))))
+		     (match_operand 2 "const_0_to_3_operand"))))
    (clobber (reg:CC FLAGS_REG))]
   "TARGET_64BIT && reload_completed
-   && true_regnum (operands[0]) != true_regnum (operands[1])"
+   && REGNO (operands[0]) != REGNO (operands[1])"
   [(set (match_dup 0)
 	(zero_extend:DI (mult:SI (match_dup 1) (match_dup 2))))]
 {
   operands[1] = gen_lowpart (SImode, operands[1]);
-  operands[2] = gen_int_mode (1 << INTVAL (operands[2]), SImode);
+  operands[2] = GEN_INT (1 << INTVAL (operands[2]));
 })
 
 ;; This pattern can't accept a variable shift count, since shifts by
@@ -11078,20 +11049,19 @@
 		   (const_int 1))
 	      (clobber (reg:CC FLAGS_REG))])]
   "TARGET_64BIT && !TARGET_USE_BT"
-  [(const_int 0)]
+  [(parallel [(set (match_dup 0)
+		   (ior:DI (match_dup 0) (match_dup 3)))
+	      (clobber (reg:CC FLAGS_REG))])]
 {
   int i = INTVAL (operands[1]);
 
-  rtx op1 = gen_int_mode (HOST_WIDE_INT_1U << i, DImode);
+  operands[3] = gen_int_mode (HOST_WIDE_INT_1U << i, DImode);
 
-  if (i >= 31)
+  if (!x86_64_immediate_operand (operands[3], DImode))
     {
-      emit_move_insn (operands[2], op1);
-      op1 = operands[2];
+      emit_move_insn (operands[2], operands[3]);
+      operands[3] = operands[2];
     }
-
-  emit_insn (gen_iordi3 (operands[0], operands[0], op1));
-  DONE;
 })
 
 (define_peephole2
@@ -11103,20 +11073,19 @@
 		   (const_int 0))
 	      (clobber (reg:CC FLAGS_REG))])]
   "TARGET_64BIT && !TARGET_USE_BT"
-  [(const_int 0)]
+  [(parallel [(set (match_dup 0)
+		   (and:DI (match_dup 0) (match_dup 3)))
+	      (clobber (reg:CC FLAGS_REG))])]
 {
   int i = INTVAL (operands[1]);
 
-  rtx op1 = gen_int_mode (HOST_WIDE_INT_1U << i, DImode);
+  operands[3] = gen_int_mode (~(HOST_WIDE_INT_1U << i), DImode);
  
-  if (i >= 32)
+  if (!x86_64_immediate_operand (operands[3], DImode))
     {
-      emit_move_insn (operands[2], op1);
-      op1 = operands[2];
+      emit_move_insn (operands[2], operands[3]);
+      operands[3] = operands[2];
     }
-
-  emit_insn (gen_anddi3 (operands[0], operands[0], op1));
-  DONE;
 })
 
 (define_peephole2
@@ -11129,20 +11098,19 @@
 			(match_dup 0) (const_int 1) (match_dup 1))))
 	      (clobber (reg:CC FLAGS_REG))])]
   "TARGET_64BIT && !TARGET_USE_BT"
-  [(const_int 0)]
+  [(parallel [(set (match_dup 0)
+		   (xor:DI (match_dup 0) (match_dup 3)))
+	      (clobber (reg:CC FLAGS_REG))])]
 {
   int i = INTVAL (operands[1]);
 
-  rtx op1 = gen_int_mode (HOST_WIDE_INT_1U << i, DImode);
+  operands[3] = gen_int_mode (HOST_WIDE_INT_1U << i, DImode);
 
-  if (i >= 31)
+  if (!x86_64_immediate_operand (operands[3], DImode))
     {
-      emit_move_insn (operands[2], op1);
-      op1 = operands[2];
+      emit_move_insn (operands[2], operands[3]);
+      operands[3] = operands[2];
     }
-
-  emit_insn (gen_xordi3 (operands[0], operands[0], op1));
-  DONE;
 })
 
 (define_insn "*bt<mode>"
@@ -14063,9 +14031,9 @@
   [(set (match_operand:MODEF 0 "register_operand" "=f,f,x,v")
 	(match_operator:MODEF 3 "binary_fp_operator"
 	  [(match_operand:MODEF 1
-	     "nonimm_ssenomem_operand" "0,fm,0,v")
+	     "x87nonimm_ssenomem_operand" "0,fm,0,v")
 	   (match_operand:MODEF 2
-	     "nonimmediate_operand"    "fm,0,xm,vm")]))]
+	     "nonimmediate_operand"	  "fm,0,xm,vm")]))]
   "((SSE_FLOAT_MODE_P (<MODE>mode) && TARGET_SSE_MATH)
     || (TARGET_80387 && X87_ENABLE_ARITH (<MODE>mode)))
    && !COMMUTATIVE_ARITH_P (operands[3])
@@ -17239,7 +17207,7 @@
    (set_attr "mode" "DF,DF,DI,DI,DI,DI")])
 
 (define_split
-  [(set (match_operand:DF 0 "register_and_not_any_fp_reg_operand")
+  [(set (match_operand:DF 0 "general_reg_operand")
 	(if_then_else:DF (match_operator 1 "fcmov_comparison_operator"
 				[(reg FLAGS_REG) (const_int 0)])
 		      (match_operand:DF 2 "nonimmediate_operand")
@@ -17295,7 +17263,7 @@
 ;; Don't do conditional moves with memory inputs
 (define_peephole2
   [(match_scratch:MODEF 4 "r")
-   (set (match_operand:MODEF 0 "register_and_not_any_fp_reg_operand")
+   (set (match_operand:MODEF 0 "general_reg_operand")
 	(if_then_else:MODEF (match_operator 1 "fcmov_comparison_operator"
 			      [(reg FLAGS_REG) (const_int 0)])
 	  (match_operand:MODEF 2 "nonimmediate_operand")
@@ -17845,7 +17813,7 @@
 		   (match_operand:SI 3 "immediate_operand"))
 	   (const_int 0)]))]
   "ix86_match_ccmode (insn, CCNOmode)
-   && (true_regnum (operands[2]) != AX_REG
+   && (REGNO (operands[2]) != AX_REG
        || satisfies_constraint_K (operands[3]))
    && peep2_reg_dead_p (1, operands[2])"
   [(parallel
@@ -17866,7 +17834,7 @@
 	   (const_int 0)]))]
   "! TARGET_PARTIAL_REG_STALL
    && ix86_match_ccmode (insn, CCNOmode)
-   && true_regnum (operands[2]) != AX_REG
+   && REGNO (operands[2]) != AX_REG
    && peep2_reg_dead_p (1, operands[2])"
   [(parallel
      [(set (match_dup 0)
@@ -17887,7 +17855,7 @@
 	   (const_int 0)]))]
   "! TARGET_PARTIAL_REG_STALL
    && ix86_match_ccmode (insn, CCNOmode)
-   && true_regnum (operands[2]) != AX_REG
+   && REGNO (operands[2]) != AX_REG
    && peep2_reg_dead_p (1, operands[2])"
   [(parallel [(set (match_dup 0)
 		   (match_op_dup 1
diff --git a/gcc/config/i386/predicates.md b/gcc/config/i386/predicates.md
index b3a471d8b76..b3cf2a3cb04 100644
--- a/gcc/config/i386/predicates.md
+++ b/gcc/config/i386/predicates.md
@@ -27,11 +27,6 @@
   (and (match_code "reg")
        (match_test "STACK_REGNO_P (REGNO (op))")))
 
-;; Return true if OP is a non-fp register_operand.
-(define_predicate "register_and_not_any_fp_reg_operand"
-  (and (match_code "reg")
-       (not (match_test "ANY_FP_REGNO_P (REGNO (op))"))))
-
 ;; True if the operand is a GENERAL class register.
 (define_predicate "general_reg_operand"
   (and (match_code "reg")
@@ -43,11 +38,6 @@
     (match_test "GENERAL_REGNO_P (REGNO (op))")
     (match_operand 0 "nonimmediate_operand")))
 
-;; Return true if OP is a register operand other than an i387 fp register.
-(define_predicate "register_and_not_fp_reg_operand"
-  (and (match_code "reg")
-       (not (match_test "STACK_REGNO_P (REGNO (op))"))))
-
 ;; True if the operand is an MMX register.
 (define_predicate "mmx_reg_operand"
   (and (match_code "reg")
@@ -133,6 +123,14 @@
 (define_predicate "nonimm_ssenomem_operand"
   (if_then_else
     (and (match_test "SSE_FLOAT_MODE_P (mode) && TARGET_SSE_MATH")
+	 (not (match_test "TARGET_MIX_SSE_I387")))
+    (match_operand 0 "register_operand")
+    (match_operand 0 "nonimmediate_operand")))
+
+;; The above predicate, suitable for x87 arithmetic operators.
+(define_predicate "x87nonimm_ssenomem_operand"
+  (if_then_else
+    (and (match_test "SSE_FLOAT_MODE_P (mode) && TARGET_SSE_MATH")
 	 (not (match_test "TARGET_MIX_SSE_I387 && X87_ENABLE_ARITH (mode)")))
     (match_operand 0 "register_operand")
     (match_operand 0 "nonimmediate_operand")))
diff --git a/gcc/config/i386/sse.md b/gcc/config/i386/sse.md
index 42d553cfdaa..411f78e0ede 100644
--- a/gcc/config/i386/sse.md
+++ b/gcc/config/i386/sse.md
@@ -1076,10 +1076,10 @@
    (set_attr "mode" "<sseinsnmode>")])
 
 (define_insn "sse2_movq128"
-  [(set (match_operand:V2DI 0 "register_operand" "=x")
+  [(set (match_operand:V2DI 0 "register_operand" "=v")
 	(vec_concat:V2DI
 	  (vec_select:DI
-	    (match_operand:V2DI 1 "nonimmediate_operand" "xm")
+	    (match_operand:V2DI 1 "nonimmediate_operand" "vm")
 	    (parallel [(const_int 0)]))
 	  (const_int 0)))]
   "TARGET_SSE2"
@@ -3327,10 +3327,10 @@
    (set_attr "mode" "<MODE>")])
 
 (define_insn "<avx512>_fmadd_<mode>_mask3<round_name>"
-  [(set (match_operand:VF_AVX512VL 0 "register_operand" "=x")
+  [(set (match_operand:VF_AVX512VL 0 "register_operand" "=v")
 	(vec_merge:VF_AVX512VL
 	  (fma:VF_AVX512VL
-	    (match_operand:VF_AVX512VL 1 "register_operand" "x")
+	    (match_operand:VF_AVX512VL 1 "register_operand" "v")
 	    (match_operand:VF_AVX512VL 2 "nonimmediate_operand" "<round_constraint>")
 	    (match_operand:VF_AVX512VL 3 "register_operand" "0"))
 	  (match_dup 3)
@@ -4735,9 +4735,9 @@
   "operands[2] = CONST0_RTX (V4SImode);")
 
 (define_insn "*avx_cvtpd2dq256_2"
-  [(set (match_operand:V8SI 0 "register_operand" "=x")
+  [(set (match_operand:V8SI 0 "register_operand" "=v")
 	(vec_concat:V8SI
-	  (unspec:V4SI [(match_operand:V4DF 1 "nonimmediate_operand" "xm")]
+	  (unspec:V4SI [(match_operand:V4DF 1 "nonimmediate_operand" "vm")]
 		       UNSPEC_FIX_NOTRUNC)
 	  (match_operand:V4SI 2 "const0_operand")))]
   "TARGET_AVX"
@@ -4949,6 +4949,27 @@
    (set_attr "prefix" "orig,orig,<round_prefix>")
    (set_attr "mode" "SF")])
 
+(define_insn "*sse2_vd_cvtsd2ss"
+  [(set (match_operand:V4SF 0 "register_operand" "=x,x,v")
+	(vec_merge:V4SF
+	  (vec_duplicate:V4SF
+	    (float_truncate:SF (match_operand:DF 2 "nonimmediate_operand" "x,m,vm")))
+	  (match_operand:V4SF 1 "register_operand" "0,0,v")
+	  (const_int 1)))]
+  "TARGET_SSE2"
+  "@
+   cvtsd2ss\t{%2, %0|%0, %2}
+   cvtsd2ss\t{%2, %0|%0, %2}
+   vcvtsd2ss\t{%2, %1, %0|%0, %1, %2}"
+  [(set_attr "isa" "noavx,noavx,avx")
+   (set_attr "type" "ssecvt")
+   (set_attr "athlon_decode" "vector,double,*")
+   (set_attr "amdfam10_decode" "vector,double,*")
+   (set_attr "bdver1_decode" "direct,direct,*")
+   (set_attr "btver2_decode" "double,double,double")
+   (set_attr "prefix" "orig,orig,vex")
+   (set_attr "mode" "SF")])
+
 (define_insn "sse2_cvtss2sd<round_saeonly_name>"
   [(set (match_operand:V2DF 0 "register_operand" "=x,x,v")
 	(vec_merge:V2DF
@@ -4972,6 +4993,27 @@
    (set_attr "prefix" "orig,orig,<round_saeonly_prefix>")
    (set_attr "mode" "DF")])
 
+(define_insn "*sse2_vd_cvtss2sd"
+  [(set (match_operand:V2DF 0 "register_operand" "=x,x,v")
+	(vec_merge:V2DF
+	  (vec_duplicate:V2DF
+	    (float_extend:DF (match_operand:SF 2 "nonimmediate_operand" "x,m,vm")))
+	  (match_operand:V2DF 1 "register_operand" "0,0,v")
+	  (const_int 1)))]
+  "TARGET_SSE2"
+  "@
+   cvtss2sd\t{%2, %0|%0, %2}
+   cvtss2sd\t{%2, %0|%0, %2}
+   vcvtss2sd\t{%2, %1, %0|%0, %1, %2}"
+  [(set_attr "isa" "noavx,noavx,avx")
+   (set_attr "type" "ssecvt")
+   (set_attr "amdfam10_decode" "vector,double,*")
+   (set_attr "athlon_decode" "direct,direct,*")
+   (set_attr "bdver1_decode" "direct,direct,*")
+   (set_attr "btver2_decode" "double,double,double")
+   (set_attr "prefix" "orig,orig,vex")
+   (set_attr "mode" "DF")])
+
 (define_insn "<mask_codefor>avx512f_cvtpd2ps512<mask_name><round_name>"
   [(set (match_operand:V8SF 0 "register_operand" "=v")
 	(float_truncate:V8SF
@@ -5050,10 +5092,10 @@
    (set_attr "mode" "<MODE>")])
 
 (define_insn "*avx_cvtps2pd256_2"
-  [(set (match_operand:V4DF 0 "register_operand" "=x")
+  [(set (match_operand:V4DF 0 "register_operand" "=v")
 	(float_extend:V4DF
 	  (vec_select:V4SF
-	    (match_operand:V8SF 1 "nonimmediate_operand" "xm")
+	    (match_operand:V8SF 1 "nonimmediate_operand" "vm")
 	    (parallel [(const_int 0) (const_int 1)
 		       (const_int 2) (const_int 3)]))))]
   "TARGET_AVX"
@@ -5744,11 +5786,11 @@
 })
 
 (define_insn "sse_movhlps"
-  [(set (match_operand:V4SF 0 "nonimmediate_operand"     "=x,x,x,x,m")
+  [(set (match_operand:V4SF 0 "nonimmediate_operand"     "=x,v,x,v,m")
 	(vec_select:V4SF
 	  (vec_concat:V8SF
-	    (match_operand:V4SF 1 "nonimmediate_operand" " 0,x,0,x,0")
-	    (match_operand:V4SF 2 "nonimmediate_operand" " x,x,o,o,x"))
+	    (match_operand:V4SF 1 "nonimmediate_operand" " 0,v,0,v,0")
+	    (match_operand:V4SF 2 "nonimmediate_operand" " x,v,o,o,v"))
 	  (parallel [(const_int 6)
 		     (const_int 7)
 		     (const_int 2)
@@ -5762,7 +5804,7 @@
    %vmovhps\t{%2, %0|%q0, %2}"
   [(set_attr "isa" "noavx,avx,noavx,avx,*")
    (set_attr "type" "ssemov")
-   (set_attr "prefix" "orig,vex,orig,vex,maybe_vex")
+   (set_attr "prefix" "orig,maybe_evex,orig,maybe_evex,maybe_vex")
    (set_attr "mode" "V4SF,V4SF,V2SF,V2SF,V2SF")])
 
 (define_expand "sse_movlhps_exp"
@@ -5789,11 +5831,11 @@
 })
 
 (define_insn "sse_movlhps"
-  [(set (match_operand:V4SF 0 "nonimmediate_operand"     "=x,x,x,x,o")
+  [(set (match_operand:V4SF 0 "nonimmediate_operand"     "=x,v,x,v,o")
 	(vec_select:V4SF
 	  (vec_concat:V8SF
-	    (match_operand:V4SF 1 "nonimmediate_operand" " 0,x,0,x,0")
-	    (match_operand:V4SF 2 "nonimmediate_operand" " x,x,m,m,x"))
+	    (match_operand:V4SF 1 "nonimmediate_operand" " 0,v,0,v,0")
+	    (match_operand:V4SF 2 "nonimmediate_operand" " x,v,m,v,v"))
 	  (parallel [(const_int 0)
 		     (const_int 1)
 		     (const_int 4)
@@ -5807,7 +5849,7 @@
    %vmovlps\t{%2, %H0|%H0, %2}"
   [(set_attr "isa" "noavx,avx,noavx,avx,*")
    (set_attr "type" "ssemov")
-   (set_attr "prefix" "orig,vex,orig,vex,maybe_vex")
+   (set_attr "prefix" "orig,maybe_evex,orig,maybe_evex,maybe_vex")
    (set_attr "mode" "V4SF,V4SF,V2SF,V2SF,V2SF")])
 
 (define_insn "<mask_codefor>avx512f_unpckhps512<mask_name>"
@@ -5851,8 +5893,8 @@
   [(set (match_dup 3)
 	(vec_select:V8SF
 	  (vec_concat:V16SF
-	    (match_operand:V8SF 1 "register_operand" "x")
-	    (match_operand:V8SF 2 "nonimmediate_operand" "xm"))
+	    (match_operand:V8SF 1 "register_operand")
+	    (match_operand:V8SF 2 "nonimmediate_operand"))
 	  (parallel [(const_int 0) (const_int 8)
 		     (const_int 1) (const_int 9)
 		     (const_int 4) (const_int 12)
@@ -5956,8 +5998,8 @@
   [(set (match_dup 3)
 	(vec_select:V8SF
 	  (vec_concat:V16SF
-	    (match_operand:V8SF 1 "register_operand" "x")
-	    (match_operand:V8SF 2 "nonimmediate_operand" "xm"))
+	    (match_operand:V8SF 1 "register_operand")
+	    (match_operand:V8SF 2 "nonimmediate_operand"))
 	  (parallel [(const_int 0) (const_int 8)
 		     (const_int 1) (const_int 9)
 		     (const_int 4) (const_int 12)
@@ -5987,11 +6029,11 @@
 })
 
 (define_insn "vec_interleave_lowv4sf"
-  [(set (match_operand:V4SF 0 "register_operand" "=x,x")
+  [(set (match_operand:V4SF 0 "register_operand" "=x,v")
 	(vec_select:V4SF
 	  (vec_concat:V8SF
-	    (match_operand:V4SF 1 "register_operand" "0,x")
-	    (match_operand:V4SF 2 "vector_operand" "xBm,xm"))
+	    (match_operand:V4SF 1 "register_operand" "0,v")
+	    (match_operand:V4SF 2 "vector_operand" "xBm,vm"))
 	  (parallel [(const_int 0) (const_int 4)
 		     (const_int 1) (const_int 5)])))]
   "TARGET_SSE"
@@ -6000,7 +6042,7 @@
    vunpcklps\t{%2, %1, %0|%0, %1, %2}"
   [(set_attr "isa" "noavx,avx")
    (set_attr "type" "sselog")
-   (set_attr "prefix" "orig,vex")
+   (set_attr "prefix" "orig,maybe_evex")
    (set_attr "mode" "V4SF")])
 
 ;; These are modeled with the same vec_concat as the others so that we
@@ -6219,11 +6261,11 @@
    (set_attr "mode" "V4SF")])
 
 (define_insn "sse_shufps_<mode>"
-  [(set (match_operand:VI4F_128 0 "register_operand" "=x,x")
+  [(set (match_operand:VI4F_128 0 "register_operand" "=x,v")
 	(vec_select:VI4F_128
 	  (vec_concat:<ssedoublevecmode>
-	    (match_operand:VI4F_128 1 "register_operand" "0,x")
-	    (match_operand:VI4F_128 2 "vector_operand" "xBm,xm"))
+	    (match_operand:VI4F_128 1 "register_operand" "0,v")
+	    (match_operand:VI4F_128 2 "vector_operand" "xBm,vm"))
 	  (parallel [(match_operand 3 "const_0_to_3_operand")
 		     (match_operand 4 "const_0_to_3_operand")
 		     (match_operand 5 "const_4_to_7_operand")
@@ -6250,13 +6292,13 @@
   [(set_attr "isa" "noavx,avx")
    (set_attr "type" "sseshuf")
    (set_attr "length_immediate" "1")
-   (set_attr "prefix" "orig,vex")
+   (set_attr "prefix" "orig,maybe_evex")
    (set_attr "mode" "V4SF")])
 
 (define_insn "sse_storehps"
-  [(set (match_operand:V2SF 0 "nonimmediate_operand" "=m,x,x")
+  [(set (match_operand:V2SF 0 "nonimmediate_operand" "=m,v,v")
 	(vec_select:V2SF
-	  (match_operand:V4SF 1 "nonimmediate_operand" "x,x,o")
+	  (match_operand:V4SF 1 "nonimmediate_operand" "v,v,o")
 	  (parallel [(const_int 2) (const_int 3)])))]
   "TARGET_SSE"
   "@
@@ -6288,12 +6330,12 @@
 })
 
 (define_insn "sse_loadhps"
-  [(set (match_operand:V4SF 0 "nonimmediate_operand"     "=x,x,x,x,o")
+  [(set (match_operand:V4SF 0 "nonimmediate_operand"     "=x,v,x,v,o")
 	(vec_concat:V4SF
 	  (vec_select:V2SF
-	    (match_operand:V4SF 1 "nonimmediate_operand" " 0,x,0,x,0")
+	    (match_operand:V4SF 1 "nonimmediate_operand" " 0,v,0,v,0")
 	    (parallel [(const_int 0) (const_int 1)]))
-	  (match_operand:V2SF 2 "nonimmediate_operand"   " m,m,x,x,x")))]
+	  (match_operand:V2SF 2 "nonimmediate_operand"   " m,m,x,v,v")))]
   "TARGET_SSE"
   "@
    movhps\t{%2, %0|%0, %q2}
@@ -6303,13 +6345,13 @@
    %vmovlps\t{%2, %H0|%H0, %2}"
   [(set_attr "isa" "noavx,avx,noavx,avx,*")
    (set_attr "type" "ssemov")
-   (set_attr "prefix" "orig,vex,orig,vex,maybe_vex")
+   (set_attr "prefix" "orig,maybe_evex,orig,maybe_evex,maybe_vex")
    (set_attr "mode" "V2SF,V2SF,V4SF,V4SF,V2SF")])
 
 (define_insn "sse_storelps"
-  [(set (match_operand:V2SF 0 "nonimmediate_operand"   "=m,x,x")
+  [(set (match_operand:V2SF 0 "nonimmediate_operand"   "=m,v,v")
 	(vec_select:V2SF
-	  (match_operand:V4SF 1 "nonimmediate_operand" " x,x,m")
+	  (match_operand:V4SF 1 "nonimmediate_operand" " v,v,m")
 	  (parallel [(const_int 0) (const_int 1)])))]
   "TARGET_SSE"
   "@
@@ -6341,11 +6383,11 @@
 })
 
 (define_insn "sse_loadlps"
-  [(set (match_operand:V4SF 0 "nonimmediate_operand"     "=x,x,x,x,m")
+  [(set (match_operand:V4SF 0 "nonimmediate_operand"     "=x,v,x,v,m")
 	(vec_concat:V4SF
-	  (match_operand:V2SF 2 "nonimmediate_operand"   " 0,x,m,m,x")
+	  (match_operand:V2SF 2 "nonimmediate_operand"   " 0,v,m,m,v")
 	  (vec_select:V2SF
-	    (match_operand:V4SF 1 "nonimmediate_operand" " x,x,0,x,0")
+	    (match_operand:V4SF 1 "nonimmediate_operand" " x,v,0,v,0")
 	    (parallel [(const_int 2) (const_int 3)]))))]
   "TARGET_SSE"
   "@
@@ -6357,14 +6399,14 @@
   [(set_attr "isa" "noavx,avx,noavx,avx,*")
    (set_attr "type" "sseshuf,sseshuf,ssemov,ssemov,ssemov")
    (set_attr "length_immediate" "1,1,*,*,*")
-   (set_attr "prefix" "orig,vex,orig,vex,maybe_vex")
+   (set_attr "prefix" "orig,maybe_evex,orig,maybe_evex,maybe_vex")
    (set_attr "mode" "V4SF,V4SF,V2SF,V2SF,V2SF")])
 
 (define_insn "sse_movss"
-  [(set (match_operand:V4SF 0 "register_operand"   "=x,x")
+  [(set (match_operand:V4SF 0 "register_operand"   "=x,v")
 	(vec_merge:V4SF
-	  (match_operand:V4SF 2 "register_operand" " x,x")
-	  (match_operand:V4SF 1 "register_operand" " 0,x")
+	  (match_operand:V4SF 2 "register_operand" " x,v")
+	  (match_operand:V4SF 1 "register_operand" " 0,v")
 	  (const_int 1)))]
   "TARGET_SSE"
   "@
@@ -6372,31 +6414,31 @@
    vmovss\t{%2, %1, %0|%0, %1, %2}"
   [(set_attr "isa" "noavx,avx")
    (set_attr "type" "ssemov")
-   (set_attr "prefix" "orig,vex")
+   (set_attr "prefix" "orig,maybe_evex")
    (set_attr "mode" "SF")])
 
 (define_insn "avx2_vec_dup<mode>"
-  [(set (match_operand:VF1_128_256 0 "register_operand" "=x")
+  [(set (match_operand:VF1_128_256 0 "register_operand" "=v")
 	(vec_duplicate:VF1_128_256
 	  (vec_select:SF
-	    (match_operand:V4SF 1 "register_operand" "x")
+	    (match_operand:V4SF 1 "register_operand" "v")
 	    (parallel [(const_int 0)]))))]
   "TARGET_AVX2"
   "vbroadcastss\t{%1, %0|%0, %1}"
   [(set_attr "type" "sselog1")
-    (set_attr "prefix" "vex")
+    (set_attr "prefix" "maybe_evex")
     (set_attr "mode" "<MODE>")])
 
 (define_insn "avx2_vec_dupv8sf_1"
-  [(set (match_operand:V8SF 0 "register_operand" "=x")
+  [(set (match_operand:V8SF 0 "register_operand" "=v")
 	(vec_duplicate:V8SF
 	  (vec_select:SF
-	    (match_operand:V8SF 1 "register_operand" "x")
+	    (match_operand:V8SF 1 "register_operand" "v")
 	    (parallel [(const_int 0)]))))]
   "TARGET_AVX2"
   "vbroadcastss\t{%x1, %0|%0, %x1}"
   [(set_attr "type" "sselog1")
-    (set_attr "prefix" "vex")
+    (set_attr "prefix" "maybe_evex")
     (set_attr "mode" "V8SF")])
 
 (define_insn "avx512f_vec_dup<mode>_1"
@@ -6415,12 +6457,12 @@
 ;; unpcklps with register source since it is shorter.
 (define_insn "*vec_concatv2sf_sse4_1"
   [(set (match_operand:V2SF 0 "register_operand"
-	  "=Yr,*x,x,Yr,*x,x,x,*y ,*y")
+	  "=Yr,*x,v,Yr,*x,v,v,*y ,*y")
 	(vec_concat:V2SF
 	  (match_operand:SF 1 "nonimmediate_operand"
-	  "  0, 0,x, 0,0, x,m, 0 , m")
+	  "  0, 0,v, 0,0, v,m, 0 , m")
 	  (match_operand:SF 2 "vector_move_operand"
-	  " Yr,*x,x, m,m, m,C,*ym, C")))]
+	  " Yr,*x,v, m,m, m,C,*ym, C")))]
   "TARGET_SSE4_1 && !(MEM_P (operands[1]) && MEM_P (operands[2]))"
   "@
    unpcklps\t{%2, %0|%0, %2}
@@ -6437,7 +6479,7 @@
    (set_attr "prefix_data16" "*,*,*,1,1,*,*,*,*")
    (set_attr "prefix_extra" "*,*,*,1,1,1,*,*,*")
    (set_attr "length_immediate" "*,*,*,1,1,1,*,*,*")
-   (set_attr "prefix" "orig,orig,vex,orig,orig,vex,maybe_vex,orig,orig")
+   (set_attr "prefix" "orig,orig,maybe_evex,orig,orig,maybe_evex,maybe_vex,orig,orig")
    (set_attr "mode" "V4SF,V4SF,V4SF,V4SF,V4SF,V4SF,SF,DI,DI")])
 
 ;; ??? In theory we can match memory for the MMX alternative, but allowing
@@ -6458,10 +6500,10 @@
    (set_attr "mode" "V4SF,SF,DI,DI")])
 
 (define_insn "*vec_concatv4sf"
-  [(set (match_operand:V4SF 0 "register_operand"       "=x,x,x,x")
+  [(set (match_operand:V4SF 0 "register_operand"       "=x,v,x,v")
 	(vec_concat:V4SF
-	  (match_operand:V2SF 1 "register_operand"     " 0,x,0,x")
-	  (match_operand:V2SF 2 "nonimmediate_operand" " x,x,m,m")))]
+	  (match_operand:V2SF 1 "register_operand"     " 0,v,0,v")
+	  (match_operand:V2SF 2 "nonimmediate_operand" " x,v,m,m")))]
   "TARGET_SSE"
   "@
    movlhps\t{%2, %0|%0, %2}
@@ -6470,7 +6512,7 @@
    vmovhps\t{%2, %1, %0|%0, %1, %q2}"
   [(set_attr "isa" "noavx,avx,noavx,avx")
    (set_attr "type" "ssemov")
-   (set_attr "prefix" "orig,vex,orig,vex")
+   (set_attr "prefix" "orig,maybe_evex,orig,maybe_evex")
    (set_attr "mode" "V4SF,V4SF,V2SF,V2SF")])
 
 (define_expand "vec_init<mode>"
@@ -6613,9 +6655,9 @@
 })
 
 (define_insn_and_split "*vec_extractv4sf_0"
-  [(set (match_operand:SF 0 "nonimmediate_operand" "=x,m,f,r")
+  [(set (match_operand:SF 0 "nonimmediate_operand" "=v,m,f,r")
 	(vec_select:SF
-	  (match_operand:V4SF 1 "nonimmediate_operand" "xm,x,m,m")
+	  (match_operand:V4SF 1 "nonimmediate_operand" "vm,v,m,m")
 	  (parallel [(const_int 0)])))]
   "TARGET_SSE && !(MEM_P (operands[0]) && MEM_P (operands[1]))"
   "#"
@@ -6624,9 +6666,9 @@
   "operands[1] = gen_lowpart (SFmode, operands[1]);")
 
 (define_insn_and_split "*sse4_1_extractps"
-  [(set (match_operand:SF 0 "nonimmediate_operand" "=rm,rm,x,x")
+  [(set (match_operand:SF 0 "nonimmediate_operand" "=rm,rm,v,v")
 	(vec_select:SF
-	  (match_operand:V4SF 1 "register_operand" "Yr,*x,0,x")
+	  (match_operand:V4SF 1 "register_operand" "Yr,*v,0,v")
 	  (parallel [(match_operand:SI 2 "const_0_to_3_operand" "n,n,n,n")])))]
   "TARGET_SSE4_1"
   "@
@@ -6665,7 +6707,7 @@
    (set_attr "mode" "V4SF,V4SF,*,*")])
 
 (define_insn_and_split "*vec_extractv4sf_mem"
-  [(set (match_operand:SF 0 "register_operand" "=x,*r,f")
+  [(set (match_operand:SF 0 "register_operand" "=v,*r,f")
 	(vec_select:SF
 	  (match_operand:V4SF 1 "memory_operand" "o,o,o")
 	  (parallel [(match_operand 2 "const_0_to_3_operand" "n,n,n")])))]
@@ -7239,9 +7281,9 @@
    (set_attr "mode" "XI")])
 
 (define_insn_and_split "vec_extract_lo_v16hi"
-  [(set (match_operand:V8HI 0 "nonimmediate_operand" "=x,m")
+  [(set (match_operand:V8HI 0 "nonimmediate_operand" "=v,m")
 	(vec_select:V8HI
-	  (match_operand:V16HI 1 "nonimmediate_operand" "xm,x")
+	  (match_operand:V16HI 1 "nonimmediate_operand" "vm,v")
 	  (parallel [(const_int 0) (const_int 1)
 		     (const_int 2) (const_int 3)
 		     (const_int 4) (const_int 5)
@@ -7253,20 +7295,27 @@
   "operands[1] = gen_lowpart (V8HImode, operands[1]);")
 
 (define_insn "vec_extract_hi_v16hi"
-  [(set (match_operand:V8HI 0 "nonimmediate_operand" "=x,m")
+  [(set (match_operand:V8HI 0 "nonimmediate_operand" "=x,m,v,m,v,m")
 	(vec_select:V8HI
-	  (match_operand:V16HI 1 "register_operand" "x,x")
+	  (match_operand:V16HI 1 "register_operand" "x,x,v,v,v,v")
 	  (parallel [(const_int 8) (const_int 9)
 		     (const_int 10) (const_int 11)
 		     (const_int 12) (const_int 13)
 		     (const_int 14) (const_int 15)])))]
   "TARGET_AVX"
-  "vextract%~128\t{$0x1, %1, %0|%0, %1, 0x1}"
+  "@
+   vextract%~128\t{$0x1, %1, %0|%0, %1, 0x1}
+   vextract%~128\t{$0x1, %1, %0|%0, %1, 0x1}
+   vextracti32x4\t{$0x1, %1, %0|%0, %1, 0x1}
+   vextracti32x4\t{$0x1, %1, %0|%0, %1, 0x1}
+   vextracti32x4\t{$0x1, %g1, %0|%0, %g1, 0x1}
+   vextracti32x4\t{$0x1, %g1, %0|%0, %g1, 0x1}"
   [(set_attr "type" "sselog")
    (set_attr "prefix_extra" "1")
    (set_attr "length_immediate" "1")
-   (set_attr "memory" "none,store")
-   (set_attr "prefix" "vex")
+   (set_attr "isa" "*,*,avx512dq,avx512dq,avx512f,avx512f")
+   (set_attr "memory" "none,store,none,store,none,store")
+   (set_attr "prefix" "vex,vex,evex,evex,evex,evex")
    (set_attr "mode" "OI")])
 
 (define_insn_and_split "vec_extract_lo_v64qi"
@@ -7325,9 +7374,9 @@
    (set_attr "mode" "XI")])
 
 (define_insn_and_split "vec_extract_lo_v32qi"
-  [(set (match_operand:V16QI 0 "nonimmediate_operand" "=x,m")
+  [(set (match_operand:V16QI 0 "nonimmediate_operand" "=v,m")
 	(vec_select:V16QI
-	  (match_operand:V32QI 1 "nonimmediate_operand" "xm,x")
+	  (match_operand:V32QI 1 "nonimmediate_operand" "vm,v")
 	  (parallel [(const_int 0) (const_int 1)
 		     (const_int 2) (const_int 3)
 		     (const_int 4) (const_int 5)
@@ -7343,9 +7392,9 @@
   "operands[1] = gen_lowpart (V16QImode, operands[1]);")
 
 (define_insn "vec_extract_hi_v32qi"
-  [(set (match_operand:V16QI 0 "nonimmediate_operand" "=x,m")
+  [(set (match_operand:V16QI 0 "nonimmediate_operand" "=x,m,v,m,v,m")
 	(vec_select:V16QI
-	  (match_operand:V32QI 1 "register_operand" "x,x")
+	  (match_operand:V32QI 1 "register_operand" "x,x,v,v,v,v")
 	  (parallel [(const_int 16) (const_int 17)
 		     (const_int 18) (const_int 19)
 		     (const_int 20) (const_int 21)
@@ -7355,12 +7404,19 @@
 		     (const_int 28) (const_int 29)
 		     (const_int 30) (const_int 31)])))]
   "TARGET_AVX"
-  "vextract%~128\t{$0x1, %1, %0|%0, %1, 0x1}"
+  "@
+   vextract%~128\t{$0x1, %1, %0|%0, %1, 0x1}
+   vextract%~128\t{$0x1, %1, %0|%0, %1, 0x1}
+   vextracti32x4\t{$0x1, %1, %0|%0, %1, 0x1}
+   vextracti32x4\t{$0x1, %1, %0|%0, %1, 0x1}
+   vextracti32x4\t{$0x1, %g1, %0|%0, %g1, 0x1}
+   vextracti32x4\t{$0x1, %g1, %0|%0, %g1, 0x1}"
   [(set_attr "type" "sselog")
    (set_attr "prefix_extra" "1")
    (set_attr "length_immediate" "1")
-   (set_attr "memory" "none,store")
-   (set_attr "prefix" "vex")
+   (set_attr "isa" "*,*,avx512dq,avx512dq,avx512f,avx512f")
+   (set_attr "memory" "none,store,none,store,none,store")
+   (set_attr "prefix" "vex,vex,evex,evex,evex,evex")
    (set_attr "mode" "OI")])
 
 ;; Modes handled by vec_extract patterns.
@@ -7424,8 +7480,8 @@
   [(set (match_dup 3)
 	(vec_select:V4DF
 	  (vec_concat:V8DF
-	    (match_operand:V4DF 1 "register_operand" "x")
-	    (match_operand:V4DF 2 "nonimmediate_operand" "xm"))
+	    (match_operand:V4DF 1 "register_operand")
+	    (match_operand:V4DF 2 "nonimmediate_operand"))
 	  (parallel [(const_int 0) (const_int 4)
 		     (const_int 2) (const_int 6)])))
    (set (match_dup 4)
@@ -7480,11 +7536,11 @@
 })
 
 (define_insn "*vec_interleave_highv2df"
-  [(set (match_operand:V2DF 0 "nonimmediate_operand"     "=x,x,x,x,x,m")
+  [(set (match_operand:V2DF 0 "nonimmediate_operand"     "=x,v,v,x,v,m")
 	(vec_select:V2DF
 	  (vec_concat:V4DF
-	    (match_operand:V2DF 1 "nonimmediate_operand" " 0,x,o,o,o,x")
-	    (match_operand:V2DF 2 "nonimmediate_operand" " x,x,1,0,x,0"))
+	    (match_operand:V2DF 1 "nonimmediate_operand" " 0,v,o,o,o,v")
+	    (match_operand:V2DF 2 "nonimmediate_operand" " x,v,1,0,v,0"))
 	  (parallel [(const_int 1)
 		     (const_int 3)])))]
   "TARGET_SSE2 && ix86_vec_interleave_v2df_operator_ok (operands, 1)"
@@ -7498,7 +7554,7 @@
   [(set_attr "isa" "noavx,avx,sse3,noavx,avx,*")
    (set_attr "type" "sselog,sselog,sselog,ssemov,ssemov,ssemov")
    (set_attr "prefix_data16" "*,*,*,1,*,1")
-   (set_attr "prefix" "orig,vex,maybe_vex,orig,vex,maybe_vex")
+   (set_attr "prefix" "orig,maybe_evex,maybe_vex,orig,maybe_evex,maybe_vex")
    (set_attr "mode" "V2DF,V2DF,DF,V1DF,V1DF,V1DF")])
 
 (define_expand "avx512f_movddup512<mask_name>"
@@ -7584,8 +7640,8 @@
   [(set (match_dup 3)
 	(vec_select:V4DF
 	  (vec_concat:V8DF
-	    (match_operand:V4DF 1 "register_operand" "x")
-	    (match_operand:V4DF 2 "nonimmediate_operand" "xm"))
+	    (match_operand:V4DF 1 "register_operand")
+	    (match_operand:V4DF 2 "nonimmediate_operand"))
 	  (parallel [(const_int 0) (const_int 4)
 		     (const_int 2) (const_int 6)])))
    (set (match_dup 4)
@@ -7639,11 +7695,11 @@
 })
 
 (define_insn "*vec_interleave_lowv2df"
-  [(set (match_operand:V2DF 0 "nonimmediate_operand"     "=x,x,x,x,x,o")
+  [(set (match_operand:V2DF 0 "nonimmediate_operand"     "=x,v,v,x,v,o")
 	(vec_select:V2DF
 	  (vec_concat:V4DF
-	    (match_operand:V2DF 1 "nonimmediate_operand" " 0,x,m,0,x,0")
-	    (match_operand:V2DF 2 "nonimmediate_operand" " x,x,1,m,m,x"))
+	    (match_operand:V2DF 1 "nonimmediate_operand" " 0,v,m,0,v,0")
+	    (match_operand:V2DF 2 "nonimmediate_operand" " x,v,1,m,m,v"))
 	  (parallel [(const_int 0)
 		     (const_int 2)])))]
   "TARGET_SSE2 && ix86_vec_interleave_v2df_operator_ok (operands, 0)"
@@ -7657,7 +7713,7 @@
   [(set_attr "isa" "noavx,avx,sse3,noavx,avx,*")
    (set_attr "type" "sselog,sselog,sselog,ssemov,ssemov,ssemov")
    (set_attr "prefix_data16" "*,*,*,1,*,1")
-   (set_attr "prefix" "orig,vex,maybe_vex,orig,vex,maybe_vex")
+   (set_attr "prefix" "orig,maybe_evex,maybe_vex,orig,maybe_evex,maybe_vex")
    (set_attr "mode" "V2DF,V2DF,DF,V1DF,V1DF,V1DF")])
 
 (define_split
@@ -8248,11 +8304,11 @@
    (set_attr "mode" "TI")])
 
 (define_insn "sse2_shufpd_<mode>"
-  [(set (match_operand:VI8F_128 0 "register_operand" "=x,x")
+  [(set (match_operand:VI8F_128 0 "register_operand" "=x,v")
 	(vec_select:VI8F_128
 	  (vec_concat:<ssedoublevecmode>
-	    (match_operand:VI8F_128 1 "register_operand" "0,x")
-	    (match_operand:VI8F_128 2 "vector_operand" "xBm,xm"))
+	    (match_operand:VI8F_128 1 "register_operand" "0,v")
+	    (match_operand:VI8F_128 2 "vector_operand" "xBm,vm"))
 	  (parallel [(match_operand 3 "const_0_to_1_operand")
 		     (match_operand 4 "const_2_to_3_operand")])))]
   "TARGET_SSE2"
@@ -8275,15 +8331,15 @@
   [(set_attr "isa" "noavx,avx")
    (set_attr "type" "sseshuf")
    (set_attr "length_immediate" "1")
-   (set_attr "prefix" "orig,vex")
+   (set_attr "prefix" "orig,maybe_evex")
    (set_attr "mode" "V2DF")])
 
 ;; Avoid combining registers from different units in a single alternative,
 ;; see comment above inline_secondary_memory_needed function in i386.c
 (define_insn "sse2_storehpd"
-  [(set (match_operand:DF 0 "nonimmediate_operand"     "=m,x,x,x,*f,r")
+  [(set (match_operand:DF 0 "nonimmediate_operand"     "=m,x,v,x,*f,r")
 	(vec_select:DF
-	  (match_operand:V2DF 1 "nonimmediate_operand" " x,0,x,o,o,o")
+	  (match_operand:V2DF 1 "nonimmediate_operand" " v,0,v,o,o,o")
 	  (parallel [(const_int 1)])))]
   "TARGET_SSE2 && !(MEM_P (operands[0]) && MEM_P (operands[1]))"
   "@
@@ -8301,7 +8357,7 @@
 	    (not (match_test "TARGET_AVX")))
        (const_string "1")
        (const_string "*")))
-   (set_attr "prefix" "maybe_vex,orig,vex,*,*,*")
+   (set_attr "prefix" "maybe_vex,orig,maybe_evex,*,*,*")
    (set_attr "mode" "V1DF,V1DF,V2DF,DF,DF,DF")])
 
 (define_split
@@ -8332,7 +8388,7 @@
 (define_insn "sse2_storelpd"
   [(set (match_operand:DF 0 "nonimmediate_operand"     "=m,x,x,*f,r")
 	(vec_select:DF
-	  (match_operand:V2DF 1 "nonimmediate_operand" " x,x,m,m,m")
+	  (match_operand:V2DF 1 "nonimmediate_operand" " v,x,m,m,m")
 	  (parallel [(const_int 0)])))]
   "TARGET_SSE2 && !(MEM_P (operands[0]) && MEM_P (operands[1]))"
   "@
@@ -8393,14 +8449,14 @@
 ;; see comment above inline_secondary_memory_needed function in i386.c
 (define_insn "sse2_loadhpd"
   [(set (match_operand:V2DF 0 "nonimmediate_operand"
-	  "=x,x,x,x,o,o ,o")
+	  "=x,v,x,v,o,o ,o")
 	(vec_concat:V2DF
 	  (vec_select:DF
 	    (match_operand:V2DF 1 "nonimmediate_operand"
-	  " 0,x,0,x,0,0 ,0")
+	  " 0,v,0,v,0,0 ,0")
 	    (parallel [(const_int 0)]))
 	  (match_operand:DF 2 "nonimmediate_operand"
-	  " m,m,x,x,x,*f,r")))]
+	  " m,m,x,v,x,*f,r")))]
   "TARGET_SSE2 && !(MEM_P (operands[1]) && MEM_P (operands[2]))"
   "@
    movhpd\t{%2, %0|%0, %2}
@@ -8413,7 +8469,7 @@
   [(set_attr "isa" "noavx,avx,noavx,avx,*,*,*")
    (set_attr "type" "ssemov,ssemov,sselog,sselog,ssemov,fmov,imov")
    (set_attr "prefix_data16" "1,*,*,*,*,*,*")
-   (set_attr "prefix" "orig,vex,orig,vex,*,*,*")
+   (set_attr "prefix" "orig,maybe_evex,orig,maybe_evex,*,*,*")
    (set_attr "mode" "V1DF,V1DF,V2DF,V2DF,DF,DF,DF")])
 
 (define_split
@@ -8449,13 +8505,13 @@
 ;; see comment above inline_secondary_memory_needed function in i386.c
 (define_insn "sse2_loadlpd"
   [(set (match_operand:V2DF 0 "nonimmediate_operand"
-	  "=x,x,x,x,x,x,x,x,m,m ,m")
+	  "=v,x,v,x,v,x,x,v,m,m ,m")
 	(vec_concat:V2DF
 	  (match_operand:DF 2 "nonimmediate_operand"
-	  "xm,m,m,x,x,0,0,x,x,*f,r")
+	  "vm,m,m,x,v,0,0,v,x,*f,r")
 	  (vec_select:DF
 	    (match_operand:V2DF 1 "vector_move_operand"
-	  " C,0,x,0,x,x,o,o,0,0 ,0")
+	  " C,0,v,0,v,x,o,o,0,0 ,0")
 	    (parallel [(const_int 1)]))))]
   "TARGET_SSE2 && !(MEM_P (operands[1]) && MEM_P (operands[2]))"
   "@
@@ -8482,7 +8538,7 @@
 	   (const_string "ssemov")))
    (set_attr "prefix_data16" "*,1,*,*,*,*,1,*,*,*,*")
    (set_attr "length_immediate" "*,*,*,*,*,1,*,*,*,*,*")
-   (set_attr "prefix" "maybe_vex,orig,vex,orig,vex,orig,orig,vex,*,*,*")
+   (set_attr "prefix" "maybe_vex,orig,maybe_evex,orig,maybe_evex,orig,orig,maybe_evex,*,*,*")
    (set_attr "mode" "DF,V1DF,V1DF,V1DF,V1DF,V2DF,V1DF,V1DF,DF,DF,DF")])
 
 (define_split
@@ -8495,10 +8551,10 @@
   "operands[0] = adjust_address (operands[0], DFmode, 0);")
 
 (define_insn "sse2_movsd"
-  [(set (match_operand:V2DF 0 "nonimmediate_operand"   "=x,x,x,x,m,x,x,x,o")
+  [(set (match_operand:V2DF 0 "nonimmediate_operand"   "=x,v,x,v,m,x,x,v,o")
 	(vec_merge:V2DF
-	  (match_operand:V2DF 2 "nonimmediate_operand" " x,x,m,m,x,0,0,x,0")
-	  (match_operand:V2DF 1 "nonimmediate_operand" " 0,x,0,x,0,x,o,o,x")
+	  (match_operand:V2DF 2 "nonimmediate_operand" " x,v,m,m,v,0,0,v,0")
+	  (match_operand:V2DF 1 "nonimmediate_operand" " 0,v,0,v,0,x,o,o,v")
 	  (const_int 1)))]
   "TARGET_SSE2"
   "@
@@ -8524,7 +8580,7 @@
        (const_string "1")
        (const_string "*")))
    (set_attr "length_immediate" "*,*,*,*,*,1,*,*,*")
-   (set_attr "prefix" "orig,vex,orig,vex,maybe_vex,orig,orig,vex,maybe_vex")
+   (set_attr "prefix" "orig,maybe_evex,orig,maybe_evex,maybe_vex,orig,orig,maybe_evex,maybe_vex")
    (set_attr "mode" "DF,DF,V1DF,V1DF,V1DF,V2DF,V1DF,V1DF,V1DF")])
 
 (define_insn "vec_dupv2df<mask_name>"
@@ -9803,19 +9859,19 @@
   "ix86_fixup_binary_operands_no_copy (MULT, V16HImode, operands);")
 
 (define_insn "*avx2_pmaddwd"
-  [(set (match_operand:V8SI 0 "register_operand" "=x")
+  [(set (match_operand:V8SI 0 "register_operand" "=x,v")
 	(plus:V8SI
 	  (mult:V8SI
 	    (sign_extend:V8SI
 	      (vec_select:V8HI
-		(match_operand:V16HI 1 "nonimmediate_operand" "%x")
+		(match_operand:V16HI 1 "nonimmediate_operand" "%x,v")
 		(parallel [(const_int 0) (const_int 2)
 			   (const_int 4) (const_int 6)
 			   (const_int 8) (const_int 10)
 			   (const_int 12) (const_int 14)])))
 	    (sign_extend:V8SI
 	      (vec_select:V8HI
-		(match_operand:V16HI 2 "nonimmediate_operand" "xm")
+		(match_operand:V16HI 2 "nonimmediate_operand" "xm,vm")
 		(parallel [(const_int 0) (const_int 2)
 			   (const_int 4) (const_int 6)
 			   (const_int 8) (const_int 10)
@@ -9836,7 +9892,8 @@
   "TARGET_AVX2 && ix86_binary_operator_ok (MULT, V16HImode, operands)"
   "vpmaddwd\t{%2, %1, %0|%0, %1, %2}"
   [(set_attr "type" "sseiadd")
-   (set_attr "prefix" "vex")
+   (set_attr "isa" "*,avx512bw")
+   (set_attr "prefix" "vex,evex")
    (set_attr "mode" "OI")])
 
 (define_expand "sse2_pmaddwd"
@@ -9866,17 +9923,17 @@
   "ix86_fixup_binary_operands_no_copy (MULT, V8HImode, operands);")
 
 (define_insn "*sse2_pmaddwd"
-  [(set (match_operand:V4SI 0 "register_operand" "=x,x")
+  [(set (match_operand:V4SI 0 "register_operand" "=x,x,v")
 	(plus:V4SI
 	  (mult:V4SI
 	    (sign_extend:V4SI
 	      (vec_select:V4HI
-		(match_operand:V8HI 1 "vector_operand" "%0,x")
+		(match_operand:V8HI 1 "vector_operand" "%0,x,v")
 		(parallel [(const_int 0) (const_int 2)
 			   (const_int 4) (const_int 6)])))
 	    (sign_extend:V4SI
 	      (vec_select:V4HI
-		(match_operand:V8HI 2 "vector_operand" "xBm,xm")
+		(match_operand:V8HI 2 "vector_operand" "xBm,xm,vm")
 		(parallel [(const_int 0) (const_int 2)
 			   (const_int 4) (const_int 6)]))))
 	  (mult:V4SI
@@ -9891,12 +9948,13 @@
   "TARGET_SSE2 && ix86_binary_operator_ok (MULT, V8HImode, operands)"
   "@
    pmaddwd\t{%2, %0|%0, %2}
+   vpmaddwd\t{%2, %1, %0|%0, %1, %2}
    vpmaddwd\t{%2, %1, %0|%0, %1, %2}"
-  [(set_attr "isa" "noavx,avx")
+  [(set_attr "isa" "noavx,avx,avx512bw")
    (set_attr "type" "sseiadd")
    (set_attr "atom_unit" "simul")
-   (set_attr "prefix_data16" "1,*")
-   (set_attr "prefix" "orig,vex")
+   (set_attr "prefix_data16" "1,*,*")
+   (set_attr "prefix" "orig,vex,evex")
    (set_attr "mode" "TI")])
 
 (define_insn "avx512dq_mul<mode>3<mask_name>"
@@ -10072,6 +10130,20 @@
   DONE;
 })
 
+(define_insn "<mask_codefor>ashr<mode>3<mask_name>"
+  [(set (match_operand:VI24_AVX512BW_1 0 "register_operand" "=v,v")
+	(ashiftrt:VI24_AVX512BW_1
+	  (match_operand:VI24_AVX512BW_1 1 "nonimmediate_operand" "v,vm")
+	  (match_operand:SI 2 "nonmemory_operand" "v,N")))]
+  "TARGET_AVX512VL"
+  "vpsra<ssemodesuffix>\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}"
+  [(set_attr "type" "sseishft")
+   (set (attr "length_immediate")
+     (if_then_else (match_operand 2 "const_int_operand")
+       (const_string "1")
+       (const_string "0")))
+   (set_attr "mode" "<sseinsnmode>")])
+
 (define_insn "ashr<mode>3"
   [(set (match_operand:VI24_AVX2 0 "register_operand" "=x,x")
 	(ashiftrt:VI24_AVX2
@@ -10091,20 +10163,6 @@
    (set_attr "prefix" "orig,vex")
    (set_attr "mode" "<sseinsnmode>")])
 
-(define_insn "<mask_codefor>ashr<mode>3<mask_name>"
-  [(set (match_operand:VI24_AVX512BW_1 0 "register_operand" "=v,v")
-	(ashiftrt:VI24_AVX512BW_1
-	  (match_operand:VI24_AVX512BW_1 1 "nonimmediate_operand" "v,vm")
-	  (match_operand:SI 2 "nonmemory_operand" "v,N")))]
-  "TARGET_AVX512VL"
-  "vpsra<ssemodesuffix>\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}"
-  [(set_attr "type" "sseishft")
-   (set (attr "length_immediate")
-     (if_then_else (match_operand 2 "const_int_operand")
-       (const_string "1")
-       (const_string "0")))
-   (set_attr "mode" "<sseinsnmode>")])
-
 (define_insn "<mask_codefor>ashrv2di3<mask_name>"
   [(set (match_operand:V2DI 0 "register_operand" "=v,v")
 	(ashiftrt:V2DI
@@ -10442,19 +10500,20 @@
    (set_attr "mode" "TI")])
 
 (define_insn "*<code>v8hi3"
-  [(set (match_operand:V8HI 0 "register_operand" "=x,x")
+  [(set (match_operand:V8HI 0 "register_operand" "=x,x,v")
 	(smaxmin:V8HI
-	  (match_operand:V8HI 1 "vector_operand" "%0,x")
-	  (match_operand:V8HI 2 "vector_operand" "xBm,xm")))]
+	  (match_operand:V8HI 1 "vector_operand" "%0,x,v")
+	  (match_operand:V8HI 2 "vector_operand" "xBm,xm,vm")))]
   "TARGET_SSE2 && ix86_binary_operator_ok (<CODE>, V8HImode, operands)"
   "@
    p<maxmin_int>w\t{%2, %0|%0, %2}
+   vp<maxmin_int>w\t{%2, %1, %0|%0, %1, %2}
    vp<maxmin_int>w\t{%2, %1, %0|%0, %1, %2}"
-  [(set_attr "isa" "noavx,avx")
+  [(set_attr "isa" "noavx,avx,avx512bw")
    (set_attr "type" "sseiadd")
-   (set_attr "prefix_data16" "1,*")
-   (set_attr "prefix_extra" "*,1")
-   (set_attr "prefix" "orig,vex")
+   (set_attr "prefix_data16" "1,*,*")
+   (set_attr "prefix_extra" "*,1,1")
+   (set_attr "prefix" "orig,vex,evex")
    (set_attr "mode" "TI")])
 
 (define_expand "<code><mode>3"
@@ -10526,19 +10585,20 @@
    (set_attr "mode" "TI")])
 
 (define_insn "*<code>v16qi3"
-  [(set (match_operand:V16QI 0 "register_operand" "=x,x")
+  [(set (match_operand:V16QI 0 "register_operand" "=x,x,v")
 	(umaxmin:V16QI
-	  (match_operand:V16QI 1 "vector_operand" "%0,x")
-	  (match_operand:V16QI 2 "vector_operand" "xBm,xm")))]
+	  (match_operand:V16QI 1 "vector_operand" "%0,x,v")
+	  (match_operand:V16QI 2 "vector_operand" "xBm,xm,vm")))]
   "TARGET_SSE2 && ix86_binary_operator_ok (<CODE>, V16QImode, operands)"
   "@
    p<maxmin_int>b\t{%2, %0|%0, %2}
+   vp<maxmin_int>b\t{%2, %1, %0|%0, %1, %2}
    vp<maxmin_int>b\t{%2, %1, %0|%0, %1, %2}"
-  [(set_attr "isa" "noavx,avx")
+  [(set_attr "isa" "noavx,avx,avx512bw")
    (set_attr "type" "sseiadd")
-   (set_attr "prefix_data16" "1,*")
-   (set_attr "prefix_extra" "*,1")
-   (set_attr "prefix" "orig,vex")
+   (set_attr "prefix_data16" "1,*,*")
+   (set_attr "prefix_extra" "*,1,1")
+   (set_attr "prefix" "orig,vex,evex")
    (set_attr "mode" "TI")])
 
 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
diff --git a/gcc/config/mips/constraints.md b/gcc/config/mips/constraints.md
index 133e346a8fc..56b363e699b 100644
--- a/gcc/config/mips/constraints.md
+++ b/gcc/config/mips/constraints.md
@@ -308,6 +308,61 @@
    "@internal"
    (match_operand 0 "low_bitmask_operand"))
 
+(define_constraint "YI"
+  "@internal
+   A replicated vector const in which the replicated value is in the range
+   [-512,511]."
+  (and (match_code "const_vector")
+       (match_test "mips_const_vector_same_int_p (op, mode, -512, 511)")))
+
+(define_constraint "YC"
+  "@internal
+   A replicated vector const in which the replicated value has a single
+   bit set."
+  (and (match_code "const_vector")
+       (match_test "mips_const_vector_bitimm_set_p (op, mode)")))
+
+(define_constraint "YZ"
+  "@internal
+   A replicated vector const in which the replicated value has a single
+   bit clear."
+  (and (match_code "const_vector")
+       (match_test "mips_const_vector_bitimm_clr_p (op, mode)")))
+
+(define_constraint "Unv5"
+  "@internal
+   A replicated vector const in which the replicated value is in the range
+   [-31,0]."
+  (and (match_code "const_vector")
+       (match_test "mips_const_vector_same_int_p (op, mode, -31, 0)")))
+
+(define_constraint "Uuv5"
+  "@internal
+   A replicated vector const in which the replicated value is in the range
+   [0,31]."
+  (and (match_code "const_vector")
+       (match_test "mips_const_vector_same_int_p (op, mode, 0, 31)")))
+
+(define_constraint "Usv5"
+  "@internal
+   A replicated vector const in which the replicated value is in the range
+   [-16,15]."
+  (and (match_code "const_vector")
+       (match_test "mips_const_vector_same_int_p (op, mode, -16, 15)")))
+
+(define_constraint "Uuv6"
+  "@internal
+   A replicated vector const in which the replicated value is in the range
+   [0,63]."
+  (and (match_code "const_vector")
+       (match_test "mips_const_vector_same_int_p (op, mode, 0, 63)")))
+
+(define_constraint "Urv8"
+  "@internal
+   A replicated vector const with replicated byte values as well as elements"
+  (and (match_code "const_vector")
+       (match_test "mips_const_vector_same_bytes_p (op, mode)")))
+
 (define_memory_constraint "ZC"
   "A memory operand whose address is formed by a base register and offset
    that is suitable for use in instructions with the same addressing mode
diff --git a/gcc/config/mips/i6400.md b/gcc/config/mips/i6400.md
index 0f8230d6426..99881a79353 100644
--- a/gcc/config/mips/i6400.md
+++ b/gcc/config/mips/i6400.md
@@ -26,10 +26,13 @@
 (define_cpu_unit "i6400_control, i6400_ctu, i6400_alu0" "i6400_int_pipe")
 
 ;; Short FPU pipeline.
-(define_cpu_unit "i6400_fpu_short" "i6400_fpu_short_pipe")
+(define_cpu_unit "i6400_fpu_short, i6400_fpu_intadd, i6400_fpu_logic,
+		  i6400_fpu_div, i6400_fpu_cmp, i6400_fpu_float,
+		  i6400_fpu_store" "i6400_fpu_short_pipe")
 
 ;; Long FPU pipeline.
-(define_cpu_unit "i6400_fpu_long, i6400_fpu_apu" "i6400_fpu_long_pipe")
+(define_cpu_unit "i6400_fpu_long, i6400_fpu_logic_l, i6400_fpu_float_l,
+		  i6400_fpu_mult, i6400_fpu_apu" "i6400_fpu_long_pipe")
 
 (define_reservation "i6400_control_ctu" "i6400_control, i6400_ctu")
 (define_reservation "i6400_control_alu0" "i6400_control, i6400_alu0")
@@ -37,6 +40,176 @@
 (define_reservation "i6400_agen_alu1" "i6400_agen, i6400_alu1")
 
 ;;
+;; FPU-MSA pipe
+;;
+
+;; Short pipe
+
+;; addv, subv
+(define_insn_reservation "i6400_msa_add_d" 1
+  (and (eq_attr "cpu" "i6400")
+       (and (eq_attr "mode" "!V2DI")
+	    (eq_attr "alu_type" "simd_add")))
+  "i6400_fpu_short, i6400_fpu_intadd")
+
+;; add, hadd, sub, hsub, average, min, max, compare
+(define_insn_reservation "i6400_msa_int_add" 2
+  (and (eq_attr "cpu" "i6400")
+       (eq_attr "type" "simd_int_arith"))
+  "i6400_fpu_short, i6400_fpu_intadd")
+
+;; sat, pcnt
+(define_insn_reservation "i6400_msa_short_logic3" 3
+  (and (eq_attr "cpu" "i6400")
+       (eq_attr "type" "simd_sat,simd_pcnt"))
+  "i6400_fpu_short, i6400_fpu_logic")
+
+;; shifts, nloc, nlzc, bneg, bclr, shf
+(define_insn_reservation "i6400_msa_short_logic2" 2
+  (and (eq_attr "cpu" "i6400")
+       (eq_attr "type" "simd_shift,simd_shf,simd_bit"))
+  "i6400_fpu_short, i6400_fpu_logic")
+
+;; and, or, xor, ilv, pck, fill, splat
+(define_insn_reservation "i6400_msa_short_logic" 1
+  (and (eq_attr "cpu" "i6400")
+       (eq_attr "type" "simd_permute,simd_logic,simd_splat,simd_fill"))
+  "i6400_fpu_short, i6400_fpu_logic")
+
+;; move.v, ldi
+(define_insn_reservation "i6400_msa_move" 1
+  (and (eq_attr "cpu" "i6400")
+       (eq_attr "type" "simd_move"))
+  "i6400_fpu_short, i6400_fpu_logic")
+
+;; Float compare New: CMP.cond.fmt
+(define_insn_reservation "i6400_msa_cmp" 2
+  (and (eq_attr "cpu" "i6400")
+       (eq_attr "type" "simd_fcmp"))
+  "i6400_fpu_short, i6400_fpu_cmp")
+
+;; Float min, max, class
+(define_insn_reservation "i6400_msa_short_float2" 2
+  (and (eq_attr "cpu" "i6400")
+       (eq_attr "type" "simd_fminmax,simd_fclass"))
+  "i6400_fpu_short, i6400_fpu_float")
+
+;; div.d, mod.d (non-pipelined)
+(define_insn_reservation "i6400_msa_div_d" 36
+  (and (eq_attr "cpu" "i6400")
+       (and (eq_attr "mode" "V2DI")
+	    (eq_attr "type" "simd_div")))
+  "i6400_fpu_short+i6400_fpu_div*36")
+
+;; div.w, mod.w (non-pipelined)
+(define_insn_reservation "i6400_msa_div_w" 20
+  (and (eq_attr "cpu" "i6400")
+       (and (eq_attr "mode" "V4SI")
+	    (eq_attr "type" "simd_div")))
+  "i6400_fpu_short+i6400_fpu_div*20")
+
+;; div.h, mod.h (non-pipelined)
+(define_insn_reservation "i6400_msa_div_h" 12
+  (and (eq_attr "cpu" "i6400")
+       (and (eq_attr "mode" "V8HI")
+	    (eq_attr "type" "simd_div")))
+  "i6400_fpu_short+i6400_fpu_div*12")
+
+;; div.b, mod.b (non-pipelined)
+(define_insn_reservation "i6400_msa_div_b" 8
+  (and (eq_attr "cpu" "i6400")
+       (and (eq_attr "mode" "V16QI")
+	    (eq_attr "type" "simd_div")))
+  "i6400_fpu_short+i6400_fpu_div*8")
+
+;; Vector copy
+(define_insn_reservation "i6400_msa_copy" 1
+  (and (eq_attr "cpu" "i6400")
+       (eq_attr "type" "simd_copy"))
+  "i6400_fpu_short, i6400_fpu_store")
+
+;; Vector bz, bnz
+(define_insn_reservation "i6400_msa_branch" 1
+  (and (eq_attr "cpu" "i6400")
+       (eq_attr "type" "simd_branch"))
+  "i6400_control_ctu")
+
+;; Vector store
+(define_insn_reservation "i6400_fpu_msa_store" 1
+  (and (eq_attr "cpu" "i6400")
+       (eq_attr "type" "simd_store"))
+  "i6400_agen_lsu")
+
+;; Vector load
+(define_insn_reservation "i6400_fpu_msa_load" 3
+  (and (eq_attr "cpu" "i6400")
+       (eq_attr "type" "simd_load"))
+  "i6400_agen_lsu")
+
+;; cfcmsa, ctcmsa
+(define_insn_reservation "i6400_fpu_msa_move" 1
+  (and (eq_attr "cpu" "i6400")
+       (eq_attr "type" "simd_cmsa"))
+  "i6400_control_alu0 | i6400_agen_alu1")
+
+;; Long pipe
+
+;; bmz, bmnz, bsel, insert, insve
+(define_insn_reservation "i6400_msa_long_logic1" 1
+  (and (eq_attr "cpu" "i6400")
+       (eq_attr "type" "simd_bitmov,simd_insert"))
+  "i6400_fpu_long, i6400_fpu_logic_l")
+
+;; binsl, binsr, vshf, sld
+(define_insn_reservation "i6400_msa_long_logic2" 2
+  (and (eq_attr "cpu" "i6400")
+       (eq_attr "type" "simd_bitins,simd_sld"))
+  "i6400_fpu_long, i6400_fpu_logic_l")
+
+;; Vector mul, dotp, madd, msub
+(define_insn_reservation "i6400_msa_mult" 5
+  (and (eq_attr "cpu" "i6400")
+       (eq_attr "type" "simd_mul"))
+  "i6400_fpu_long, i6400_fpu_mult")
+
+;; Float flog2
+(define_insn_reservation "i6400_msa_long_float2" 2
+  (and (eq_attr "cpu" "i6400")
+       (eq_attr "type" "simd_flog2"))
+  "i6400_fpu_long, i6400_fpu_float_l")
+
+;; fadd, fsub
+(define_insn_reservation "i6400_msa_long_float4" 4
+  (and (eq_attr "cpu" "i6400")
+       (eq_attr "type" "simd_fadd,simd_fcvt"))
+  "i6400_fpu_long, i6400_fpu_float_l")
+
+;; fmul, fexp2
+(define_insn_reservation "i6400_msa_long_float5" 5
+  (and (eq_attr "cpu" "i6400")
+       (eq_attr "type" "simd_fmul,simd_fexp2"))
+  "i6400_fpu_long, i6400_fpu_float_l")
+
+;; fmadd, fmsub
+(define_insn_reservation "i6400_msa_long_float8" 8
+  (and (eq_attr "cpu" "i6400")
+       (eq_attr "type" "simd_fmadd"))
+  "i6400_fpu_long, i6400_fpu_float_l")
+
+;; fdiv.d
+(define_insn_reservation "i6400_msa_fdiv_df" 30
+  (and (eq_attr "cpu" "i6400")
+       (and (eq_attr "mode" "V2DF")
+	    (eq_attr "type" "simd_fdiv")))
+  "i6400_fpu_long+i6400_fpu_float_l*30")
+
+;; fdiv.w
+(define_insn_reservation "i6400_msa_fdiv_sf" 22
+  (and (eq_attr "cpu" "i6400")
+       (eq_attr "type" "simd_fdiv"))
+  "i6400_fpu_long+i6400_fpu_float_l*22")
+
+;;
 ;; FPU pipe
 ;;
 
diff --git a/gcc/config/mips/mips-cpus.def b/gcc/config/mips/mips-cpus.def
index 17034f2ea95..5df9807b192 100644
--- a/gcc/config/mips/mips-cpus.def
+++ b/gcc/config/mips/mips-cpus.def
@@ -44,10 +44,7 @@ MIPS_CPU ("mips4", PROCESSOR_R10000, 4, 0)
    isn't tuned to a specific processor.  */
 MIPS_CPU ("mips32", PROCESSOR_4KC, 32, PTF_AVOID_BRANCHLIKELY)
 MIPS_CPU ("mips32r2", PROCESSOR_74KF2_1, 33, PTF_AVOID_BRANCHLIKELY)
-/* mips32r3 is micromips hense why it uses the M4K processor.
-   mips32r5 should use the p5600 processor, but there is no definition 
-   for this yet, so in the short term we will use the same processor entry 
-   as mips32r2.  */
+/* mips32r3 is micromips hense why it uses the M4K processor.  */
 MIPS_CPU ("mips32r3", PROCESSOR_M4K, 34, PTF_AVOID_BRANCHLIKELY)
 MIPS_CPU ("mips32r5", PROCESSOR_P5600, 36, PTF_AVOID_BRANCHLIKELY)
 MIPS_CPU ("mips32r6", PROCESSOR_I6400, 37, 0)
@@ -150,7 +147,8 @@ MIPS_CPU ("1004kf1_1", PROCESSOR_24KF1_1, 33, 0)
 MIPS_CPU ("interaptiv", PROCESSOR_24KF2_1, 33, 0)
 
 /* MIPS32 Release 5 processors.  */
-MIPS_CPU ("p5600", PROCESSOR_P5600, 36, PTF_AVOID_BRANCHLIKELY)
+MIPS_CPU ("p5600", PROCESSOR_P5600, 36, PTF_AVOID_BRANCHLIKELY
+					| PTF_AVOID_IMADD)
 MIPS_CPU ("m5100", PROCESSOR_M5100, 36, PTF_AVOID_BRANCHLIKELY)
 MIPS_CPU ("m5101", PROCESSOR_M5100, 36, PTF_AVOID_BRANCHLIKELY)
 
diff --git a/gcc/config/mips/mips-ftypes.def b/gcc/config/mips/mips-ftypes.def
index 7fe1c06eb00..69cf4379ed1 100644
--- a/gcc/config/mips/mips-ftypes.def
+++ b/gcc/config/mips/mips-ftypes.def
@@ -35,6 +35,7 @@ along with GCC; see the file COPYING3.  If not see
    Please keep this list lexicographically sorted by the LIST argument.  */
 DEF_MIPS_FTYPE (1, (DF, DF))
 DEF_MIPS_FTYPE (2, (DF, DF, DF))
+DEF_MIPS_FTYPE (1, (DF, V2DF))
 
 DEF_MIPS_FTYPE (2, (DI, DI, DI))
 DEF_MIPS_FTYPE (2, (DI, DI, SI))
@@ -45,6 +46,7 @@ DEF_MIPS_FTYPE (3, (DI, DI, V4QI, V4QI))
 DEF_MIPS_FTYPE (2, (DI, POINTER, SI))
 DEF_MIPS_FTYPE (2, (DI, SI, SI))
 DEF_MIPS_FTYPE (2, (DI, USI, USI))
+DEF_MIPS_FTYPE (2, (DI, V2DI, UQI))
 
 DEF_MIPS_FTYPE (2, (INT, DF, DF))
 DEF_MIPS_FTYPE (2, (INT, SF, SF))
@@ -54,23 +56,51 @@ DEF_MIPS_FTYPE (4, (INT, V2SF, V2SF, V2SF, V2SF))
 DEF_MIPS_FTYPE (1, (SF, SF))
 DEF_MIPS_FTYPE (2, (SF, SF, SF))
 DEF_MIPS_FTYPE (1, (SF, V2SF))
+DEF_MIPS_FTYPE (1, (SF, V4SF))
 
 DEF_MIPS_FTYPE (2, (SI, DI, SI))
 DEF_MIPS_FTYPE (2, (SI, POINTER, SI))
 DEF_MIPS_FTYPE (1, (SI, SI))
 DEF_MIPS_FTYPE (2, (SI, SI, SI))
 DEF_MIPS_FTYPE (3, (SI, SI, SI, SI))
+DEF_MIPS_FTYPE (1, (SI, UQI))
+DEF_MIPS_FTYPE (1, (SI, UV16QI))
+DEF_MIPS_FTYPE (1, (SI, UV2DI))
+DEF_MIPS_FTYPE (1, (SI, UV4SI))
+DEF_MIPS_FTYPE (1, (SI, UV8HI))
+DEF_MIPS_FTYPE (2, (SI, V16QI, UQI))
 DEF_MIPS_FTYPE (1, (SI, V2HI))
 DEF_MIPS_FTYPE (2, (SI, V2HI, V2HI))
 DEF_MIPS_FTYPE (1, (SI, V4QI))
 DEF_MIPS_FTYPE (2, (SI, V4QI, V4QI))
+DEF_MIPS_FTYPE (2, (SI, V4SI, UQI))
+DEF_MIPS_FTYPE (2, (SI, V8HI, UQI))
 DEF_MIPS_FTYPE (1, (SI, VOID))
 
 DEF_MIPS_FTYPE (2, (UDI, UDI, UDI))
 DEF_MIPS_FTYPE (2, (UDI, UV2SI, UV2SI))
+DEF_MIPS_FTYPE (2, (UDI, V2DI, UQI))
 
+DEF_MIPS_FTYPE (2, (USI, V16QI, UQI))
+DEF_MIPS_FTYPE (2, (USI, V4SI, UQI))
+DEF_MIPS_FTYPE (2, (USI, V8HI, UQI))
 DEF_MIPS_FTYPE (1, (USI, VOID))
 
+DEF_MIPS_FTYPE (2, (UV16QI, UV16QI, UQI))
+DEF_MIPS_FTYPE (2, (UV16QI, UV16QI, UV16QI))
+DEF_MIPS_FTYPE (3, (UV16QI, UV16QI, UV16QI, UQI))
+DEF_MIPS_FTYPE (3, (UV16QI, UV16QI, UV16QI, UV16QI))
+DEF_MIPS_FTYPE (2, (UV16QI, UV16QI, V16QI))
+
+DEF_MIPS_FTYPE (2, (UV2DI, UV2DI, UQI))
+DEF_MIPS_FTYPE (2, (UV2DI, UV2DI, UV2DI))
+DEF_MIPS_FTYPE (3, (UV2DI, UV2DI, UV2DI, UQI))
+DEF_MIPS_FTYPE (3, (UV2DI, UV2DI, UV2DI, UV2DI))
+DEF_MIPS_FTYPE (3, (UV2DI, UV2DI, UV4SI, UV4SI))
+DEF_MIPS_FTYPE (2, (UV2DI, UV2DI, V2DI))
+DEF_MIPS_FTYPE (2, (UV2DI, UV4SI, UV4SI))
+DEF_MIPS_FTYPE (1, (UV2DI, V2DF))
+
 DEF_MIPS_FTYPE (2, (UV2SI, UV2SI, UQI))
 DEF_MIPS_FTYPE (2, (UV2SI, UV2SI, UV2SI))
 
@@ -82,10 +112,75 @@ DEF_MIPS_FTYPE (3, (UV4HI, UV4HI, UV4HI, USI))
 DEF_MIPS_FTYPE (1, (UV4HI, UV8QI))
 DEF_MIPS_FTYPE (2, (UV4HI, UV8QI, UV8QI))
 
+DEF_MIPS_FTYPE (2, (UV4SI, UV4SI, UQI))
+DEF_MIPS_FTYPE (2, (UV4SI, UV4SI, UV4SI))
+DEF_MIPS_FTYPE (3, (UV4SI, UV4SI, UV4SI, UQI))
+DEF_MIPS_FTYPE (3, (UV4SI, UV4SI, UV4SI, UV4SI))
+DEF_MIPS_FTYPE (3, (UV4SI, UV4SI, UV8HI, UV8HI))
+DEF_MIPS_FTYPE (2, (UV4SI, UV4SI, V4SI))
+DEF_MIPS_FTYPE (2, (UV4SI, UV8HI, UV8HI))
+DEF_MIPS_FTYPE (1, (UV4SI, V4SF))
+
+DEF_MIPS_FTYPE (2, (UV8HI, UV16QI, UV16QI))
+DEF_MIPS_FTYPE (2, (UV8HI, UV8HI, UQI))
+DEF_MIPS_FTYPE (3, (UV8HI, UV8HI, UV16QI, UV16QI))
+DEF_MIPS_FTYPE (2, (UV8HI, UV8HI, UV8HI))
+DEF_MIPS_FTYPE (3, (UV8HI, UV8HI, UV8HI, UQI))
+DEF_MIPS_FTYPE (3, (UV8HI, UV8HI, UV8HI, UV8HI))
+DEF_MIPS_FTYPE (2, (UV8HI, UV8HI, V8HI))
+
 DEF_MIPS_FTYPE (2, (UV8QI, UV4HI, UV4HI))
 DEF_MIPS_FTYPE (1, (UV8QI, UV8QI))
 DEF_MIPS_FTYPE (2, (UV8QI, UV8QI, UV8QI))
 
+DEF_MIPS_FTYPE (2, (V16QI, CVPOINTER, SI))
+DEF_MIPS_FTYPE (1, (V16QI, HI))
+DEF_MIPS_FTYPE (1, (V16QI, SI))
+DEF_MIPS_FTYPE (2, (V16QI, UV16QI, UQI))
+DEF_MIPS_FTYPE (2, (V16QI, UV16QI, UV16QI))
+DEF_MIPS_FTYPE (1, (V16QI, V16QI))
+DEF_MIPS_FTYPE (2, (V16QI, V16QI, QI))
+DEF_MIPS_FTYPE (2, (V16QI, V16QI, SI))
+DEF_MIPS_FTYPE (2, (V16QI, V16QI, UQI))
+DEF_MIPS_FTYPE (3, (V16QI, V16QI, UQI, SI))
+DEF_MIPS_FTYPE (3, (V16QI, V16QI, UQI, V16QI))
+DEF_MIPS_FTYPE (2, (V16QI, V16QI, V16QI))
+DEF_MIPS_FTYPE (3, (V16QI, V16QI, V16QI, SI))
+DEF_MIPS_FTYPE (3, (V16QI, V16QI, V16QI, UQI))
+DEF_MIPS_FTYPE (3, (V16QI, V16QI, V16QI, V16QI))
+
+DEF_MIPS_FTYPE (1, (V2DF, DF))
+DEF_MIPS_FTYPE (1, (V2DF, UV2DI))
+DEF_MIPS_FTYPE (1, (V2DF, V2DF))
+DEF_MIPS_FTYPE (2, (V2DF, V2DF, V2DF))
+DEF_MIPS_FTYPE (3, (V2DF, V2DF, V2DF, V2DF))
+DEF_MIPS_FTYPE (2, (V2DF, V2DF, V2DI))
+DEF_MIPS_FTYPE (1, (V2DF, V2DI))
+DEF_MIPS_FTYPE (1, (V2DF, V4SF))
+DEF_MIPS_FTYPE (1, (V2DF, V4SI))
+
+DEF_MIPS_FTYPE (2, (V2DI, CVPOINTER, SI))
+DEF_MIPS_FTYPE (1, (V2DI, DI))
+DEF_MIPS_FTYPE (1, (V2DI, HI))
+DEF_MIPS_FTYPE (2, (V2DI, UV2DI, UQI))
+DEF_MIPS_FTYPE (2, (V2DI, UV2DI, UV2DI))
+DEF_MIPS_FTYPE (2, (V2DI, UV4SI, UV4SI))
+DEF_MIPS_FTYPE (1, (V2DI, V2DF))
+DEF_MIPS_FTYPE (2, (V2DI, V2DF, V2DF))
+DEF_MIPS_FTYPE (1, (V2DI, V2DI))
+DEF_MIPS_FTYPE (2, (V2DI, V2DI, QI))
+DEF_MIPS_FTYPE (2, (V2DI, V2DI, SI))
+DEF_MIPS_FTYPE (2, (V2DI, V2DI, UQI))
+DEF_MIPS_FTYPE (3, (V2DI, V2DI, UQI, DI))
+DEF_MIPS_FTYPE (3, (V2DI, V2DI, UQI, V2DI))
+DEF_MIPS_FTYPE (3, (V2DI, V2DI, UV4SI, UV4SI))
+DEF_MIPS_FTYPE (2, (V2DI, V2DI, V2DI))
+DEF_MIPS_FTYPE (3, (V2DI, V2DI, V2DI, SI))
+DEF_MIPS_FTYPE (3, (V2DI, V2DI, V2DI, UQI))
+DEF_MIPS_FTYPE (3, (V2DI, V2DI, V2DI, V2DI))
+DEF_MIPS_FTYPE (3, (V2DI, V2DI, V4SI, V4SI))
+DEF_MIPS_FTYPE (2, (V2DI, V4SI, V4SI))
+
 DEF_MIPS_FTYPE (1, (V2HI, SI))
 DEF_MIPS_FTYPE (2, (V2HI, SI, SI))
 DEF_MIPS_FTYPE (3, (V2HI, SI, SI, SI))
@@ -118,12 +213,74 @@ DEF_MIPS_FTYPE (1, (V4QI, V4QI))
 DEF_MIPS_FTYPE (2, (V4QI, V4QI, SI))
 DEF_MIPS_FTYPE (2, (V4QI, V4QI, V4QI))
 
+DEF_MIPS_FTYPE (1, (V4SF, SF))
+DEF_MIPS_FTYPE (1, (V4SF, UV4SI))
+DEF_MIPS_FTYPE (2, (V4SF, V2DF, V2DF))
+DEF_MIPS_FTYPE (1, (V4SF, V4SF))
+DEF_MIPS_FTYPE (2, (V4SF, V4SF, V4SF))
+DEF_MIPS_FTYPE (3, (V4SF, V4SF, V4SF, V4SF))
+DEF_MIPS_FTYPE (2, (V4SF, V4SF, V4SI))
+DEF_MIPS_FTYPE (1, (V4SF, V4SI))
+DEF_MIPS_FTYPE (1, (V4SF, V8HI))
+
+DEF_MIPS_FTYPE (2, (V4SI, CVPOINTER, SI))
+DEF_MIPS_FTYPE (1, (V4SI, HI))
+DEF_MIPS_FTYPE (1, (V4SI, SI))
+DEF_MIPS_FTYPE (2, (V4SI, UV4SI, UQI))
+DEF_MIPS_FTYPE (2, (V4SI, UV4SI, UV4SI))
+DEF_MIPS_FTYPE (2, (V4SI, UV8HI, UV8HI))
+DEF_MIPS_FTYPE (2, (V4SI, V2DF, V2DF))
+DEF_MIPS_FTYPE (1, (V4SI, V4SF))
+DEF_MIPS_FTYPE (2, (V4SI, V4SF, V4SF))
+DEF_MIPS_FTYPE (1, (V4SI, V4SI))
+DEF_MIPS_FTYPE (2, (V4SI, V4SI, QI))
+DEF_MIPS_FTYPE (2, (V4SI, V4SI, SI))
+DEF_MIPS_FTYPE (2, (V4SI, V4SI, UQI))
+DEF_MIPS_FTYPE (3, (V4SI, V4SI, UQI, SI))
+DEF_MIPS_FTYPE (3, (V4SI, V4SI, UQI, V4SI))
+DEF_MIPS_FTYPE (3, (V4SI, V4SI, UV8HI, UV8HI))
+DEF_MIPS_FTYPE (2, (V4SI, V4SI, V4SI))
+DEF_MIPS_FTYPE (3, (V4SI, V4SI, V4SI, SI))
+DEF_MIPS_FTYPE (3, (V4SI, V4SI, V4SI, UQI))
+DEF_MIPS_FTYPE (3, (V4SI, V4SI, V4SI, V4SI))
+DEF_MIPS_FTYPE (3, (V4SI, V4SI, V8HI, V8HI))
+DEF_MIPS_FTYPE (2, (V4SI, V8HI, V8HI))
+
+DEF_MIPS_FTYPE (2, (V8HI, CVPOINTER, SI))
+DEF_MIPS_FTYPE (1, (V8HI, HI))
+DEF_MIPS_FTYPE (1, (V8HI, SI))
+DEF_MIPS_FTYPE (2, (V8HI, UV16QI, UV16QI))
+DEF_MIPS_FTYPE (2, (V8HI, UV8HI, UQI))
+DEF_MIPS_FTYPE (2, (V8HI, UV8HI, UV8HI))
+DEF_MIPS_FTYPE (2, (V8HI, V16QI, V16QI))
+DEF_MIPS_FTYPE (2, (V8HI, V4SF, V4SF))
+DEF_MIPS_FTYPE (1, (V8HI, V8HI))
+DEF_MIPS_FTYPE (2, (V8HI, V8HI, QI))
+DEF_MIPS_FTYPE (2, (V8HI, V8HI, SI))
+DEF_MIPS_FTYPE (3, (V8HI, V8HI, SI, UQI))
+DEF_MIPS_FTYPE (2, (V8HI, V8HI, UQI))
+DEF_MIPS_FTYPE (3, (V8HI, V8HI, UQI, SI))
+DEF_MIPS_FTYPE (3, (V8HI, V8HI, UQI, V8HI))
+DEF_MIPS_FTYPE (3, (V8HI, V8HI, UV16QI, UV16QI))
+DEF_MIPS_FTYPE (3, (V8HI, V8HI, V16QI, V16QI))
+DEF_MIPS_FTYPE (2, (V8HI, V8HI, V8HI))
+DEF_MIPS_FTYPE (3, (V8HI, V8HI, V8HI, SI))
+DEF_MIPS_FTYPE (3, (V8HI, V8HI, V8HI, UQI))
+DEF_MIPS_FTYPE (3, (V8HI, V8HI, V8HI, V8HI))
+
 DEF_MIPS_FTYPE (2, (V8QI, V4HI, V4HI))
 DEF_MIPS_FTYPE (1, (V8QI, V8QI))
 DEF_MIPS_FTYPE (2, (V8QI, V8QI, V8QI))
 
 DEF_MIPS_FTYPE (2, (VOID, SI, CVPOINTER))
 DEF_MIPS_FTYPE (2, (VOID, SI, SI))
+DEF_MIPS_FTYPE (2, (VOID, UQI, SI))
 DEF_MIPS_FTYPE (1, (VOID, USI))
+DEF_MIPS_FTYPE (3, (VOID, V16QI, CVPOINTER, SI))
+DEF_MIPS_FTYPE (3, (VOID, V2DF, POINTER, SI))
+DEF_MIPS_FTYPE (3, (VOID, V2DI, CVPOINTER, SI))
 DEF_MIPS_FTYPE (2, (VOID, V2HI, V2HI))
 DEF_MIPS_FTYPE (2, (VOID, V4QI, V4QI))
+DEF_MIPS_FTYPE (3, (VOID, V4SF, POINTER, SI))
+DEF_MIPS_FTYPE (3, (VOID, V4SI, CVPOINTER, SI))
+DEF_MIPS_FTYPE (3, (VOID, V8HI, CVPOINTER, SI))
diff --git a/gcc/config/mips/mips-modes.def b/gcc/config/mips/mips-modes.def
index 08d713243d9..b21f5d16c95 100644
--- a/gcc/config/mips/mips-modes.def
+++ b/gcc/config/mips/mips-modes.def
@@ -24,11 +24,17 @@ VECTOR_MODES (INT, 4);        /* V4QI  V2HI      */
 VECTOR_MODES (INT, 8);        /* V8QI  V4HI V2SI */
 VECTOR_MODES (FLOAT, 8);      /*       V4HF V2SF */
 
+/* For MIPS MSA 128 bits.  */
+VECTOR_MODES (INT, 16);       /* V16QI V8HI V4SI V2DI */
+VECTOR_MODES (FLOAT, 16);     /*            V4SF V2DF */
+
 /* Double-sized vector modes for vec_concat.  */
-VECTOR_MODE (INT, QI, 16);    /* V16QI           */
-VECTOR_MODE (INT, HI, 8);     /*       V8HI      */
-VECTOR_MODE (INT, SI, 4);     /*            V4SI */
-VECTOR_MODE (FLOAT, SF, 4);   /*            V4SF */
+VECTOR_MODE (INT, QI, 32);    /* V32QI                */
+VECTOR_MODE (INT, HI, 16);    /*       V16HI          */
+VECTOR_MODE (INT, SI, 8);     /*            V8SI      */
+VECTOR_MODE (INT, DI, 4);     /*                 V4DI */
+VECTOR_MODE (FLOAT, SF, 8);   /*            V8SF      */
+VECTOR_MODE (FLOAT, DF, 4);   /*                 V4DF */
 
 VECTOR_MODES (FRACT, 4);	/* V4QQ  V2HQ */
 VECTOR_MODES (UFRACT, 4);	/* V4UQQ V2UHQ */
diff --git a/gcc/config/mips/mips-msa.md b/gcc/config/mips/mips-msa.md
new file mode 100644
index 00000000000..1082856dd98
--- /dev/null
+++ b/gcc/config/mips/mips-msa.md
@@ -0,0 +1,2736 @@
+;; Machine Description for MIPS MSA ASE
+;; Based on the MIPS MSA spec Revision 1.11 8/4/2014
+;;
+;; Copyright (C) 2015 Free Software Foundation, Inc.
+;;
+;; This file is part of GCC.
+;;
+;; GCC is free software; you can redistribute it and/or modify
+;; it under the terms of the GNU General Public License as published by
+;; the Free Software Foundation; either version 3, or (at your option)
+;; any later version.
+;;
+;; GCC is distributed in the hope that it will be useful,
+;; but WITHOUT ANY WARRANTY; without even the implied warranty of
+;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+;; GNU General Public License for more details.
+;;
+;; You should have received a copy of the GNU General Public License
+;; along with GCC; see the file COPYING3.  If not see
+;; <http://www.gnu.org/licenses/>.
+;;
+
+(define_c_enum "unspec" [
+  UNSPEC_MSA_ASUB_S
+  UNSPEC_MSA_ASUB_U
+  UNSPEC_MSA_AVE_S
+  UNSPEC_MSA_AVE_U
+  UNSPEC_MSA_AVER_S
+  UNSPEC_MSA_AVER_U
+  UNSPEC_MSA_BCLR
+  UNSPEC_MSA_BCLRI
+  UNSPEC_MSA_BINSL
+  UNSPEC_MSA_BINSLI
+  UNSPEC_MSA_BINSR
+  UNSPEC_MSA_BINSRI
+  UNSPEC_MSA_BNEG
+  UNSPEC_MSA_BNEGI
+  UNSPEC_MSA_BSET
+  UNSPEC_MSA_BSETI
+  UNSPEC_MSA_BRANCH_V
+  UNSPEC_MSA_BRANCH
+  UNSPEC_MSA_CFCMSA
+  UNSPEC_MSA_CTCMSA
+  UNSPEC_MSA_FCAF
+  UNSPEC_MSA_FCLASS
+  UNSPEC_MSA_FCUNE
+  UNSPEC_MSA_FEXDO
+  UNSPEC_MSA_FEXP2
+  UNSPEC_MSA_FEXUPL
+  UNSPEC_MSA_FEXUPR
+  UNSPEC_MSA_FFQL
+  UNSPEC_MSA_FFQR
+  UNSPEC_MSA_FLOG2
+  UNSPEC_MSA_FRCP
+  UNSPEC_MSA_FRINT
+  UNSPEC_MSA_FRSQRT
+  UNSPEC_MSA_FSAF
+  UNSPEC_MSA_FSEQ
+  UNSPEC_MSA_FSLE
+  UNSPEC_MSA_FSLT
+  UNSPEC_MSA_FSNE
+  UNSPEC_MSA_FSOR
+  UNSPEC_MSA_FSUEQ
+  UNSPEC_MSA_FSULE
+  UNSPEC_MSA_FSULT
+  UNSPEC_MSA_FSUN
+  UNSPEC_MSA_FSUNE
+  UNSPEC_MSA_FTINT_S
+  UNSPEC_MSA_FTINT_U
+  UNSPEC_MSA_FTQ
+  UNSPEC_MSA_MADD_Q
+  UNSPEC_MSA_MADDR_Q
+  UNSPEC_MSA_MSUB_Q
+  UNSPEC_MSA_MSUBR_Q
+  UNSPEC_MSA_MUL_Q
+  UNSPEC_MSA_MULR_Q
+  UNSPEC_MSA_NLOC
+  UNSPEC_MSA_SAT_S
+  UNSPEC_MSA_SAT_U
+  UNSPEC_MSA_SLD
+  UNSPEC_MSA_SLDI
+  UNSPEC_MSA_SPLAT
+  UNSPEC_MSA_SPLATI
+  UNSPEC_MSA_SRAR
+  UNSPEC_MSA_SRARI
+  UNSPEC_MSA_SRLR
+  UNSPEC_MSA_SRLRI
+  UNSPEC_MSA_SUBS_S
+  UNSPEC_MSA_SUBS_U
+  UNSPEC_MSA_SUBSUU_S
+  UNSPEC_MSA_SUBSUS_U
+  UNSPEC_MSA_VSHF
+])
+
+;; All vector modes with 128 bits.
+(define_mode_iterator MSA      [V2DF V4SF V2DI V4SI V8HI V16QI])
+
+;; Same as MSA.  Used by vcond to iterate two modes.
+(define_mode_iterator MSA_2    [V2DF V4SF V2DI V4SI V8HI V16QI])
+
+;; Only used for splitting insert_d and copy_{u,s}.d.
+(define_mode_iterator MSA_D    [V2DI V2DF])
+
+;; Only used for copy_{u,s}.w.
+(define_mode_iterator MSA_W    [V4SI V4SF])
+
+;; Only integer modes.
+(define_mode_iterator IMSA     [V2DI V4SI V8HI V16QI])
+
+;; As IMSA but excludes V16QI.
+(define_mode_iterator IMSA_DWH [V2DI V4SI V8HI])
+
+;; As IMSA but excludes V2DI.
+(define_mode_iterator IMSA_WHB [V4SI V8HI V16QI])
+
+;; Only integer modes equal or larger than a word.
+(define_mode_iterator IMSA_DW  [V2DI V4SI])
+
+;; Only integer modes smaller than a word.
+(define_mode_iterator IMSA_HB  [V8HI V16QI])
+
+;; Only integer modes for fixed-point madd_q/maddr_q.
+(define_mode_iterator IMSA_WH  [V4SI V8HI])
+
+;; Only floating-point modes.
+(define_mode_iterator FMSA     [V2DF V4SF])
+
+;; Only used for immediate set shuffle elements instruction.
+(define_mode_iterator MSA_WHB_W [V4SI V8HI V16QI V4SF])
+
+;; The attribute gives the integer vector mode with same size.
+(define_mode_attr VIMODE
+  [(V2DF "V2DI")
+   (V4SF "V4SI")
+   (V2DI "V2DI")
+   (V4SI "V4SI")
+   (V8HI "V8HI")
+   (V16QI "V16QI")])
+
+;; The attribute gives half modes for vector modes.
+(define_mode_attr VHMODE
+  [(V8HI "V16QI")
+   (V4SI "V8HI")
+   (V2DI "V4SI")])
+
+;; The attribute gives double modes for vector modes.
+(define_mode_attr VDMODE
+  [(V4SI "V2DI")
+   (V8HI "V4SI")
+   (V16QI "V8HI")])
+
+;; The attribute gives half modes with same number of elements for vector modes.
+(define_mode_attr VTRUNCMODE
+  [(V8HI "V8QI")
+   (V4SI "V4HI")
+   (V2DI "V2SI")])
+
+;; This attribute gives the mode of the result for "copy_s_b, copy_u_b" etc.
+(define_mode_attr VRES
+  [(V2DF "DF")
+   (V4SF "SF")
+   (V2DI "DI")
+   (V4SI "SI")
+   (V8HI "SI")
+   (V16QI "SI")])
+
+;; Only used with MSA_D iterator.
+(define_mode_attr msa_d
+  [(V2DI "reg_or_0")
+   (V2DF "register")])
+
+;; This attribute gives the integer vector mode with same size.
+(define_mode_attr mode_i
+  [(V2DF "v2di")
+   (V4SF "v4si")
+   (V2DI "v2di")
+   (V4SI "v4si")
+   (V8HI "v8hi")
+   (V16QI "v16qi")])
+
+;; This attribute gives suffix for MSA instructions.
+(define_mode_attr msafmt
+  [(V2DF "d")
+   (V4SF "w")
+   (V2DI "d")
+   (V4SI "w")
+   (V8HI "h")
+   (V16QI "b")])
+
+;; This attribute gives suffix for integers in VHMODE.
+(define_mode_attr hmsafmt
+  [(V2DI "w")
+   (V4SI "h")
+   (V8HI "b")])
+
+;; This attribute gives define_insn suffix for MSA instructions that need
+;; distinction between integer and floating point.
+(define_mode_attr msafmt_f
+  [(V2DF "d_f")
+   (V4SF "w_f")
+   (V2DI "d")
+   (V4SI "w")
+   (V8HI "h")
+   (V16QI "b")])
+
+;; This is used to form an immediate operand constraint using
+;; "const_<indeximm>_operand".
+(define_mode_attr indeximm
+  [(V2DF "0_or_1")
+   (V4SF "0_to_3")
+   (V2DI "0_or_1")
+   (V4SI "0_to_3")
+   (V8HI "uimm3")
+   (V16QI "uimm4")])
+
+;; This attribute represents bitmask needed for vec_merge using
+;; "const_<bitmask>_operand".
+(define_mode_attr bitmask
+  [(V2DF "exp_2")
+   (V4SF "exp_4")
+   (V2DI "exp_2")
+   (V4SI "exp_4")
+   (V8HI "exp_8")
+   (V16QI "exp_16")])
+
+;; This attribute is used to form an immediate operand constraint using
+;; "const_<bitimm>_operand".
+(define_mode_attr bitimm
+  [(V16QI "uimm3")
+   (V8HI  "uimm4")
+   (V4SI  "uimm5")
+   (V2DI  "uimm6")])
+
+(define_expand "vec_init<mode>"
+  [(match_operand:MSA 0 "register_operand")
+   (match_operand:MSA 1 "")]
+  "ISA_HAS_MSA"
+{
+  mips_expand_vector_init (operands[0], operands[1]);
+  DONE;
+})
+
+;; pckev pattern with implicit type conversion.
+(define_insn "vec_pack_trunc_<mode>"
+   [(set (match_operand:<VHMODE> 0 "register_operand" "=f")
+	 (vec_concat:<VHMODE>
+	   (truncate:<VTRUNCMODE>
+	     (match_operand:IMSA_DWH 1 "register_operand" "f"))
+	   (truncate:<VTRUNCMODE>
+	     (match_operand:IMSA_DWH 2 "register_operand" "f"))))]
+  "ISA_HAS_MSA"
+  "pckev.<hmsafmt>\t%w0,%w2,%w1"
+  [(set_attr "type" "simd_permute")
+   (set_attr "mode" "<MODE>")])
+
+(define_expand "vec_unpacks_hi_v4sf"
+  [(set (match_operand:V2DF 0 "register_operand" "=f")
+	(float_extend:V2DF
+	  (vec_select:V2SF
+	    (match_operand:V4SF 1 "register_operand" "f")
+	    (match_dup 2))))]
+  "ISA_HAS_MSA"
+{
+  operands[2] = mips_msa_vec_parallel_const_half (V4SFmode, true/*high_p*/);
+})
+
+(define_expand "vec_unpacks_lo_v4sf"
+  [(set (match_operand:V2DF 0 "register_operand" "=f")
+	(float_extend:V2DF
+	  (vec_select:V2SF
+	    (match_operand:V4SF 1 "register_operand" "f")
+	    (match_dup 2))))]
+  "ISA_HAS_MSA"
+{
+  operands[2] = mips_msa_vec_parallel_const_half (V4SFmode, false/*high_p*/);
+})
+
+(define_expand "vec_unpacks_hi_<mode>"
+  [(match_operand:<VDMODE> 0 "register_operand")
+   (match_operand:IMSA_WHB 1 "register_operand")]
+  "ISA_HAS_MSA"
+{
+  mips_expand_vec_unpack (operands, false/*unsigned_p*/, true/*high_p*/);
+  DONE;
+})
+
+(define_expand "vec_unpacks_lo_<mode>"
+  [(match_operand:<VDMODE> 0 "register_operand")
+   (match_operand:IMSA_WHB 1 "register_operand")]
+  "ISA_HAS_MSA"
+{
+  mips_expand_vec_unpack (operands, false/*unsigned_p*/, false/*high_p*/);
+  DONE;
+})
+
+(define_expand "vec_unpacku_hi_<mode>"
+  [(match_operand:<VDMODE> 0 "register_operand")
+   (match_operand:IMSA_WHB 1 "register_operand")]
+  "ISA_HAS_MSA"
+{
+  mips_expand_vec_unpack (operands, true/*unsigned_p*/, true/*high_p*/);
+  DONE;
+})
+
+(define_expand "vec_unpacku_lo_<mode>"
+  [(match_operand:<VDMODE> 0 "register_operand")
+   (match_operand:IMSA_WHB 1 "register_operand")]
+  "ISA_HAS_MSA"
+{
+  mips_expand_vec_unpack (operands, true/*unsigned_p*/, false/*high_p*/);
+  DONE;
+})
+
+(define_expand "vec_extract<mode>"
+  [(match_operand:<UNITMODE> 0 "register_operand")
+   (match_operand:IMSA 1 "register_operand")
+   (match_operand 2 "const_<indeximm>_operand")]
+  "ISA_HAS_MSA"
+{
+  if (<UNITMODE>mode == QImode || <UNITMODE>mode == HImode)
+    {
+      rtx dest1 = gen_reg_rtx (SImode);
+      emit_insn (gen_msa_copy_s_<msafmt> (dest1, operands[1], operands[2]));
+      emit_move_insn (operands[0],
+		      gen_lowpart (<UNITMODE>mode, dest1));
+    }
+  else
+    emit_insn (gen_msa_copy_s_<msafmt> (operands[0], operands[1], operands[2]));
+  DONE;
+})
+
+(define_expand "vec_extract<mode>"
+  [(match_operand:<UNITMODE> 0 "register_operand")
+   (match_operand:FMSA 1 "register_operand")
+   (match_operand 2 "const_<indeximm>_operand")]
+  "ISA_HAS_MSA"
+{
+  rtx temp;
+  HOST_WIDE_INT val = INTVAL (operands[2]);
+
+  if (val == 0)
+    temp = operands[1];
+  else
+    {
+      /* We need to do the SLDI operation in V16QImode and adjust
+	 operands[2] accordingly.  */
+      rtx wd = gen_reg_rtx (V16QImode);
+      rtx ws = gen_reg_rtx (V16QImode);
+      emit_move_insn (ws, gen_rtx_SUBREG (V16QImode, operands[1], 0));
+      rtx n = GEN_INT (val * GET_MODE_SIZE (<UNITMODE>mode));
+      gcc_assert (INTVAL (n) < GET_MODE_NUNITS (V16QImode));
+      emit_insn (gen_msa_sldi_b (wd, ws, ws, n));
+      temp = gen_reg_rtx (<MODE>mode);
+      emit_move_insn (temp, gen_rtx_SUBREG (<MODE>mode, wd, 0));
+    }
+  emit_insn (gen_msa_vec_extract_<msafmt_f> (operands[0], temp));
+  DONE;
+})
+
+(define_insn_and_split "msa_vec_extract_<msafmt_f>"
+  [(set (match_operand:<UNITMODE> 0 "register_operand" "=f")
+	(vec_select:<UNITMODE>
+	  (match_operand:FMSA 1 "register_operand" "f")
+	  (parallel [(const_int 0)])))]
+  "ISA_HAS_MSA"
+  "#"
+  "&& reload_completed"
+  [(set (match_dup 0) (match_dup 1))]
+  "operands[1] = gen_rtx_REG (<UNITMODE>mode, REGNO (operands[1]));"
+  [(set_attr "move_type" "fmove")
+   (set_attr "mode" "<UNITMODE>")])
+
+(define_expand "vec_set<mode>"
+  [(match_operand:IMSA 0 "register_operand")
+   (match_operand:<UNITMODE> 1 "reg_or_0_operand")
+   (match_operand 2 "const_<indeximm>_operand")]
+  "ISA_HAS_MSA"
+{
+  rtx index = GEN_INT (1 << INTVAL (operands[2]));
+  emit_insn (gen_msa_insert_<msafmt> (operands[0], operands[1],
+				      operands[0], index));
+  DONE;
+})
+
+(define_expand "vec_set<mode>"
+  [(match_operand:FMSA 0 "register_operand")
+   (match_operand:<UNITMODE> 1 "register_operand")
+   (match_operand 2 "const_<indeximm>_operand")]
+  "ISA_HAS_MSA"
+{
+  rtx index = GEN_INT (1 << INTVAL (operands[2]));
+  emit_insn (gen_msa_insve_<msafmt_f>_scalar (operands[0], operands[1],
+					      operands[0], index));
+  DONE;
+})
+
+(define_expand "vcondu<MSA:mode><IMSA:mode>"
+  [(match_operand:MSA 0 "register_operand")
+   (match_operand:MSA 1 "reg_or_m1_operand")
+   (match_operand:MSA 2 "reg_or_0_operand")
+   (match_operator 3 ""
+     [(match_operand:IMSA 4 "register_operand")
+      (match_operand:IMSA 5 "register_operand")])]
+  "ISA_HAS_MSA
+   && (GET_MODE_NUNITS (<MSA:MODE>mode) == GET_MODE_NUNITS (<IMSA:MODE>mode))"
+{
+  mips_expand_vec_cond_expr (<MSA:MODE>mode, <MSA:VIMODE>mode, operands);
+  DONE;
+})
+
+(define_expand "vcond<MSA:mode><MSA_2:mode>"
+  [(match_operand:MSA 0 "register_operand")
+   (match_operand:MSA 1 "reg_or_m1_operand")
+   (match_operand:MSA 2 "reg_or_0_operand")
+   (match_operator 3 ""
+     [(match_operand:MSA_2 4 "register_operand")
+      (match_operand:MSA_2 5 "register_operand")])]
+  "ISA_HAS_MSA
+   && (GET_MODE_NUNITS (<MSA:MODE>mode) == GET_MODE_NUNITS (<MSA_2:MODE>mode))"
+{
+  mips_expand_vec_cond_expr (<MSA:MODE>mode, <MSA:VIMODE>mode, operands);
+  DONE;
+})
+
+(define_insn "msa_insert_<msafmt_f>"
+  [(set (match_operand:MSA 0 "register_operand" "=f")
+	(vec_merge:MSA
+	  (vec_duplicate:MSA
+	    (match_operand:<UNITMODE> 1 "reg_or_0_operand" "dJ"))
+	  (match_operand:MSA 2 "register_operand" "0")
+	  (match_operand 3 "const_<bitmask>_operand" "")))]
+  "ISA_HAS_MSA"
+{
+  if (!TARGET_64BIT && (<MODE>mode == V2DImode || <MODE>mode == V2DFmode))
+    return "#";
+  else
+    return "insert.<msafmt>\t%w0[%y3],%z1";
+}
+  [(set_attr "type" "simd_insert")
+   (set_attr "mode" "<MODE>")])
+
+(define_split
+  [(set (match_operand:MSA_D 0 "register_operand")
+	(vec_merge:MSA_D
+	  (vec_duplicate:MSA_D
+	    (match_operand:<UNITMODE> 1 "<MSA_D:msa_d>_operand"))
+	  (match_operand:MSA_D 2 "register_operand")
+	  (match_operand 3 "const_<bitmask>_operand")))]
+  "reload_completed && ISA_HAS_MSA && !TARGET_64BIT"
+  [(const_int 0)]
+{
+  mips_split_msa_insert_d (operands[0], operands[2], operands[3], operands[1]);
+  DONE;
+})
+
+(define_insn "msa_insve_<msafmt_f>"
+  [(set (match_operand:MSA 0 "register_operand" "=f")
+	(vec_merge:MSA
+	  (vec_duplicate:MSA
+	    (vec_select:<UNITMODE>
+	      (match_operand:MSA 1 "register_operand" "f")
+	      (parallel [(const_int 0)])))
+	  (match_operand:MSA 2 "register_operand" "0")
+	  (match_operand 3 "const_<bitmask>_operand" "")))]
+  "ISA_HAS_MSA"
+  "insve.<msafmt>\t%w0[%y3],%w1[0]"
+  [(set_attr "type" "simd_insert")
+   (set_attr "mode" "<MODE>")])
+
+;; Operand 3 is a scalar.
+(define_insn "msa_insve_<msafmt_f>_scalar"
+  [(set (match_operand:FMSA 0 "register_operand" "=f")
+	(vec_merge:FMSA
+	  (vec_duplicate:FMSA
+	    (match_operand:<UNITMODE> 1 "register_operand" "f"))
+	  (match_operand:FMSA 2 "register_operand" "0")
+	  (match_operand 3 "const_<bitmask>_operand" "")))]
+  "ISA_HAS_MSA"
+  "insve.<msafmt>\t%w0[%y3],%w1[0]"
+  [(set_attr "type" "simd_insert")
+   (set_attr "mode" "<MODE>")])
+
+(define_insn "msa_copy_<su>_<msafmt>"
+  [(set (match_operand:<VRES> 0 "register_operand" "=d")
+	(any_extend:<VRES>
+	  (vec_select:<UNITMODE>
+	    (match_operand:IMSA_HB 1 "register_operand" "f")
+	    (parallel [(match_operand 2 "const_<indeximm>_operand" "")]))))]
+  "ISA_HAS_MSA"
+  "copy_<su>.<msafmt>\t%0,%w1[%2]"
+  [(set_attr "type" "simd_copy")
+   (set_attr "mode" "<MODE>")])
+
+(define_insn "msa_copy_u_w"
+  [(set (match_operand:DI 0 "register_operand" "=d")
+	(zero_extend:DI
+	  (vec_select:SI
+	    (match_operand:V4SI 1 "register_operand" "f")
+	    (parallel [(match_operand 2 "const_0_to_3_operand" "")]))))]
+  "ISA_HAS_MSA && TARGET_64BIT"
+  "copy_u.w\t%0,%w1[%2]"
+  [(set_attr "type" "simd_copy")
+   (set_attr "mode" "V4SI")])
+
+(define_insn "msa_copy_s_<msafmt_f>_64bit"
+  [(set (match_operand:DI 0 "register_operand" "=d")
+	(sign_extend:DI
+	  (vec_select:<UNITMODE>
+	    (match_operand:MSA_W 1 "register_operand" "f")
+	    (parallel [(match_operand 2 "const_<indeximm>_operand" "")]))))]
+  "ISA_HAS_MSA && TARGET_64BIT"
+  "copy_s.<msafmt>\t%0,%w1[%2]"
+  [(set_attr "type" "simd_copy")
+   (set_attr "mode" "<MODE>")])
+
+(define_insn "msa_copy_s_<msafmt_f>"
+  [(set (match_operand:<UNITMODE> 0 "register_operand" "=d")
+	(vec_select:<UNITMODE>
+	  (match_operand:MSA_W 1 "register_operand" "f")
+	  (parallel [(match_operand 2 "const_<indeximm>_operand" "")])))]
+  "ISA_HAS_MSA"
+  "copy_s.<msafmt>\t%0,%w1[%2]"
+  [(set_attr "type" "simd_copy")
+   (set_attr "mode" "<MODE>")])
+
+(define_insn_and_split "msa_copy_s_<msafmt_f>"
+  [(set (match_operand:<UNITMODE> 0 "register_operand" "=d")
+	(vec_select:<UNITMODE>
+	  (match_operand:MSA_D 1 "register_operand" "f")
+	  (parallel [(match_operand 2 "const_<indeximm>_operand" "")])))]
+  "ISA_HAS_MSA"
+{
+  if (TARGET_64BIT)
+    return "copy_s.<msafmt>\t%0,%w1[%2]";
+  else
+    return "#";
+}
+  "reload_completed && ISA_HAS_MSA && !TARGET_64BIT"
+  [(const_int 0)]
+{
+  mips_split_msa_copy_d (operands[0], operands[1], operands[2],
+			 gen_msa_copy_s_w);
+  DONE;
+}
+  [(set_attr "type" "simd_copy")
+   (set_attr "mode" "<MODE>")])
+
+(define_expand "vec_perm_const<mode>"
+  [(match_operand:MSA 0 "register_operand")
+   (match_operand:MSA 1 "register_operand")
+   (match_operand:MSA 2 "register_operand")
+   (match_operand:<VIMODE> 3 "")]
+  "ISA_HAS_MSA"
+{
+  if (mips_expand_vec_perm_const (operands))
+    DONE;
+  else
+    FAIL;
+})
+
+(define_expand "abs<mode>2"
+  [(match_operand:IMSA 0 "register_operand" "=f")
+   (abs:IMSA (match_operand:IMSA 1 "register_operand" "f"))]
+  "ISA_HAS_MSA"
+{
+  rtx reg = gen_reg_rtx (<MODE>mode);
+  emit_move_insn (reg, CONST0_RTX (<MODE>mode));
+  emit_insn (gen_msa_add_a_<msafmt> (operands[0], operands[1], reg));
+  DONE;
+})
+
+(define_expand "neg<mode>2"
+  [(set (match_operand:MSA 0 "register_operand")
+	(minus:MSA (match_dup 2)
+		   (match_operand:MSA 1 "register_operand")))]
+  "ISA_HAS_MSA"
+{
+  rtx reg = gen_reg_rtx (<MODE>mode);
+  emit_move_insn (reg, CONST0_RTX (<MODE>mode));
+  operands[2] = reg;
+})
+
+(define_expand "msa_ldi<mode>"
+  [(match_operand:IMSA 0 "register_operand")
+   (match_operand 1 "const_imm10_operand")]
+  "ISA_HAS_MSA"
+{
+  if (<MODE>mode == V16QImode)
+    operands[1] = GEN_INT (trunc_int_for_mode (INTVAL (operands[1]),
+					       <UNITMODE>mode));
+  emit_move_insn (operands[0],
+		  mips_gen_const_int_vector (<MODE>mode, INTVAL (operands[1])));
+  DONE;
+})
+
+(define_insn "vec_perm<mode>"
+  [(set (match_operand:MSA 0 "register_operand" "=f")
+	(unspec:MSA [(match_operand:MSA 1 "register_operand" "f")
+		     (match_operand:MSA 2 "register_operand" "f")
+		     (match_operand:<VIMODE> 3 "register_operand" "0")]
+		    UNSPEC_MSA_VSHF))]
+  "ISA_HAS_MSA"
+  "vshf.<msafmt>\t%w0,%w2,%w1"
+  [(set_attr "type" "simd_sld")
+   (set_attr "mode" "<MODE>")])
+
+(define_expand "mov<mode>"
+  [(set (match_operand:MSA 0)
+	(match_operand:MSA 1))]
+  "ISA_HAS_MSA"
+{
+  if (mips_legitimize_move (<MODE>mode, operands[0], operands[1]))
+    DONE;
+})
+
+(define_expand "movmisalign<mode>"
+  [(set (match_operand:MSA 0)
+	(match_operand:MSA 1))]
+  "ISA_HAS_MSA"
+{
+  if (mips_legitimize_move (<MODE>mode, operands[0], operands[1]))
+    DONE;
+})
+
+;; 128-bit MSA modes can only exist in MSA registers or memory.  An exception
+;; is allowing MSA modes for GP registers for arguments and return values.
+(define_insn "mov<mode>_msa"
+  [(set (match_operand:MSA 0 "nonimmediate_operand" "=f,f,R,*d,*f")
+	(match_operand:MSA 1 "move_operand" "fYGYI,R,f,*f,*d"))]
+  "ISA_HAS_MSA"
+  { return mips_output_move (operands[0], operands[1]); }
+  [(set_attr "type" "simd_move,simd_load,simd_store,simd_copy,simd_insert")
+   (set_attr "mode" "<MODE>")])
+
+(define_split
+  [(set (match_operand:MSA 0 "nonimmediate_operand")
+	(match_operand:MSA 1 "move_operand"))]
+  "reload_completed && ISA_HAS_MSA
+   && mips_split_move_insn_p (operands[0], operands[1], insn)"
+  [(const_int 0)]
+{
+  mips_split_move_insn (operands[0], operands[1], curr_insn);
+  DONE;
+})
+
+;; Offset load
+(define_expand "msa_ld_<msafmt_f>"
+  [(match_operand:MSA 0 "register_operand")
+   (match_operand 1 "pmode_register_operand")
+   (match_operand 2 "aq10<msafmt>_operand")]
+  "ISA_HAS_MSA"
+{
+  rtx addr = plus_constant (GET_MODE (operands[1]), operands[1],
+				      INTVAL (operands[2]));
+  mips_emit_move (operands[0], gen_rtx_MEM (<MODE>mode, addr));
+  DONE;
+})
+
+;; Offset store
+(define_expand "msa_st_<msafmt_f>"
+  [(match_operand:MSA 0 "register_operand")
+   (match_operand 1 "pmode_register_operand")
+   (match_operand 2 "aq10<msafmt>_operand")]
+  "ISA_HAS_MSA"
+{
+  rtx addr = plus_constant (GET_MODE (operands[1]), operands[1],
+			    INTVAL (operands[2]));
+  mips_emit_move (gen_rtx_MEM (<MODE>mode, addr), operands[0]);
+  DONE;
+})
+
+;; Integer operations
+(define_insn "add<mode>3"
+  [(set (match_operand:IMSA 0 "register_operand" "=f,f,f")
+	(plus:IMSA
+	  (match_operand:IMSA 1 "register_operand" "f,f,f")
+	  (match_operand:IMSA 2 "reg_or_vector_same_ximm5_operand" "f,Unv5,Uuv5")))]
+  "ISA_HAS_MSA"
+{
+  switch (which_alternative)
+    {
+    case 0:
+      return "addv.<msafmt>\t%w0,%w1,%w2";
+    case 1:
+      {
+	HOST_WIDE_INT val = INTVAL (CONST_VECTOR_ELT (operands[2], 0));
+
+	operands[2] = GEN_INT (-val);
+	return "subvi.<msafmt>\t%w0,%w1,%d2";
+      }
+    case 2:
+      return "addvi.<msafmt>\t%w0,%w1,%E2";
+    default:
+      gcc_unreachable ();
+    }
+}
+  [(set_attr "alu_type" "simd_add")
+   (set_attr "type" "simd_int_arith")
+   (set_attr "mode" "<MODE>")])
+
+(define_insn "sub<mode>3"
+  [(set (match_operand:IMSA 0 "register_operand" "=f,f")
+	(minus:IMSA
+	  (match_operand:IMSA 1 "register_operand" "f,f")
+	  (match_operand:IMSA 2 "reg_or_vector_same_uimm5_operand" "f,Uuv5")))]
+  "ISA_HAS_MSA"
+  "@
+   subv.<msafmt>\t%w0,%w1,%w2
+   subvi.<msafmt>\t%w0,%w1,%E2"
+  [(set_attr "alu_type" "simd_add")
+   (set_attr "type" "simd_int_arith")
+   (set_attr "mode" "<MODE>")])
+
+(define_insn "mul<mode>3"
+  [(set (match_operand:IMSA 0 "register_operand" "=f")
+	(mult:IMSA (match_operand:IMSA 1 "register_operand" "f")
+		   (match_operand:IMSA 2 "register_operand" "f")))]
+  "ISA_HAS_MSA"
+  "mulv.<msafmt>\t%w0,%w1,%w2"
+  [(set_attr "type" "simd_mul")
+   (set_attr "mode" "<MODE>")])
+
+(define_insn "msa_maddv_<msafmt>"
+  [(set (match_operand:IMSA 0 "register_operand" "=f")
+	(plus:IMSA (mult:IMSA (match_operand:IMSA 1 "register_operand" "f")
+			      (match_operand:IMSA 2 "register_operand" "f"))
+		   (match_operand:IMSA 3 "register_operand" "0")))]
+  "ISA_HAS_MSA"
+  "maddv.<msafmt>\t%w0,%w1,%w2"
+  [(set_attr "type" "simd_mul")
+   (set_attr "mode" "<MODE>")])
+
+(define_insn "msa_msubv_<msafmt>"
+  [(set (match_operand:IMSA 0 "register_operand" "=f")
+	(minus:IMSA (match_operand:IMSA 1 "register_operand" "0")
+		    (mult:IMSA (match_operand:IMSA 2 "register_operand" "f")
+			       (match_operand:IMSA 3 "register_operand" "f"))))]
+  "ISA_HAS_MSA"
+  "msubv.<msafmt>\t%w0,%w2,%w3"
+  [(set_attr "type" "simd_mul")
+   (set_attr "mode" "<MODE>")])
+
+(define_insn "div<mode>3"
+  [(set (match_operand:IMSA 0 "register_operand" "=f")
+	(div:IMSA (match_operand:IMSA 1 "register_operand" "f")
+		  (match_operand:IMSA 2 "register_operand" "f")))]
+  "ISA_HAS_MSA"
+  { return mips_msa_output_division ("div_s.<msafmt>\t%w0,%w1,%w2", operands); }
+  [(set_attr "type" "simd_div")
+   (set_attr "mode" "<MODE>")])
+
+(define_insn "udiv<mode>3"
+  [(set (match_operand:IMSA 0 "register_operand" "=f")
+	(udiv:IMSA (match_operand:IMSA 1 "register_operand" "f")
+		   (match_operand:IMSA 2 "register_operand" "f")))]
+  "ISA_HAS_MSA"
+  { return mips_msa_output_division ("div_u.<msafmt>\t%w0,%w1,%w2", operands); }
+  [(set_attr "type" "simd_div")
+   (set_attr "mode" "<MODE>")])
+
+(define_insn "mod<mode>3"
+  [(set (match_operand:IMSA 0 "register_operand" "=f")
+	(mod:IMSA (match_operand:IMSA 1 "register_operand" "f")
+		  (match_operand:IMSA 2 "register_operand" "f")))]
+  "ISA_HAS_MSA"
+  { return mips_msa_output_division ("mod_s.<msafmt>\t%w0,%w1,%w2", operands); }
+  [(set_attr "type" "simd_div")
+   (set_attr "mode" "<MODE>")])
+
+(define_insn "umod<mode>3"
+  [(set (match_operand:IMSA 0 "register_operand" "=f")
+	(umod:IMSA (match_operand:IMSA 1 "register_operand" "f")
+		   (match_operand:IMSA 2 "register_operand" "f")))]
+  "ISA_HAS_MSA"
+  { return mips_msa_output_division ("mod_u.<msafmt>\t%w0,%w1,%w2", operands); }
+  [(set_attr "type" "simd_div")
+   (set_attr "mode" "<MODE>")])
+
+(define_insn "xor<mode>3"
+  [(set (match_operand:IMSA 0 "register_operand" "=f,f,f")
+	(xor:IMSA
+	  (match_operand:IMSA 1 "register_operand" "f,f,f")
+	  (match_operand:IMSA 2 "reg_or_vector_same_val_operand" "f,YC,Urv8")))]
+  "ISA_HAS_MSA"
+  "@
+   xor.v\t%w0,%w1,%w2
+   bnegi.%v0\t%w0,%w1,%V2
+   xori.b\t%w0,%w1,%B2"
+  [(set_attr "type" "simd_logic,simd_bit,simd_logic")
+   (set_attr "mode" "<MODE>")])
+
+(define_insn "ior<mode>3"
+  [(set (match_operand:IMSA 0 "register_operand" "=f,f,f")
+	(ior:IMSA
+	  (match_operand:IMSA 1 "register_operand" "f,f,f")
+	  (match_operand:IMSA 2 "reg_or_vector_same_val_operand" "f,YC,Urv8")))]
+  "ISA_HAS_MSA"
+  "@
+   or.v\t%w0,%w1,%w2
+   bseti.%v0\t%w0,%w1,%V2
+   ori.b\t%w0,%w1,%B2"
+  [(set_attr "type" "simd_logic,simd_bit,simd_logic")
+   (set_attr "mode" "<MODE>")])
+
+(define_insn "and<mode>3"
+  [(set (match_operand:IMSA 0 "register_operand" "=f,f,f")
+	(and:IMSA
+	  (match_operand:IMSA 1 "register_operand" "f,f,f")
+	  (match_operand:IMSA 2 "reg_or_vector_same_val_operand" "f,YZ,Urv8")))]
+  "ISA_HAS_MSA"
+{
+  switch (which_alternative)
+    {
+    case 0:
+      return "and.v\t%w0,%w1,%w2";
+    case 1:
+      {
+	rtx elt0 = CONST_VECTOR_ELT (operands[2], 0);
+	unsigned HOST_WIDE_INT val = ~UINTVAL (elt0);
+	operands[2] = mips_gen_const_int_vector (<MODE>mode, val & (-val));
+	return "bclri.%v0\t%w0,%w1,%V2";
+      }
+    case 2:
+      return "andi.b\t%w0,%w1,%B2";
+    default:
+      gcc_unreachable ();
+    }
+}
+  [(set_attr "type" "simd_logic,simd_bit,simd_logic")
+   (set_attr "mode" "<MODE>")])
+
+(define_insn "one_cmpl<mode>2"
+  [(set (match_operand:IMSA 0 "register_operand" "=f")
+	(not:IMSA (match_operand:IMSA 1 "register_operand" "f")))]
+  "ISA_HAS_MSA"
+  "nor.v\t%w0,%w1,%w1"
+  [(set_attr "type" "simd_logic")
+   (set_attr "mode" "TI")])
+
+(define_insn "vlshr<mode>3"
+  [(set (match_operand:IMSA 0 "register_operand" "=f,f")
+	(lshiftrt:IMSA
+	  (match_operand:IMSA 1 "register_operand" "f,f")
+	  (match_operand:IMSA 2 "reg_or_vector_same_uimm6_operand" "f,Uuv6")))]
+  "ISA_HAS_MSA"
+  "@
+   srl.<msafmt>\t%w0,%w1,%w2
+   srli.<msafmt>\t%w0,%w1,%E2"
+  [(set_attr "type" "simd_shift")
+   (set_attr "mode" "<MODE>")])
+
+(define_insn "vashr<mode>3"
+  [(set (match_operand:IMSA 0 "register_operand" "=f,f")
+	(ashiftrt:IMSA
+	  (match_operand:IMSA 1 "register_operand" "f,f")
+	  (match_operand:IMSA 2 "reg_or_vector_same_uimm6_operand" "f,Uuv6")))]
+  "ISA_HAS_MSA"
+  "@
+   sra.<msafmt>\t%w0,%w1,%w2
+   srai.<msafmt>\t%w0,%w1,%E2"
+  [(set_attr "type" "simd_shift")
+   (set_attr "mode" "<MODE>")])
+
+(define_insn "vashl<mode>3"
+  [(set (match_operand:IMSA 0 "register_operand" "=f,f")
+	(ashift:IMSA
+	  (match_operand:IMSA 1 "register_operand" "f,f")
+	  (match_operand:IMSA 2 "reg_or_vector_same_uimm6_operand" "f,Uuv6")))]
+  "ISA_HAS_MSA"
+  "@
+   sll.<msafmt>\t%w0,%w1,%w2
+   slli.<msafmt>\t%w0,%w1,%E2"
+  [(set_attr "type" "simd_shift")
+   (set_attr "mode" "<MODE>")])
+
+;; Floating-point operations
+(define_insn "add<mode>3"
+  [(set (match_operand:FMSA 0 "register_operand" "=f")
+	(plus:FMSA (match_operand:FMSA 1 "register_operand" "f")
+		   (match_operand:FMSA 2 "register_operand" "f")))]
+  "ISA_HAS_MSA"
+  "fadd.<msafmt>\t%w0,%w1,%w2"
+  [(set_attr "type" "simd_fadd")
+   (set_attr "mode" "<MODE>")])
+
+(define_insn "sub<mode>3"
+  [(set (match_operand:FMSA 0 "register_operand" "=f")
+	(minus:FMSA (match_operand:FMSA 1 "register_operand" "f")
+		    (match_operand:FMSA 2 "register_operand" "f")))]
+  "ISA_HAS_MSA"
+  "fsub.<msafmt>\t%w0,%w1,%w2"
+  [(set_attr "type" "simd_fadd")
+   (set_attr "mode" "<MODE>")])
+
+(define_insn "mul<mode>3"
+  [(set (match_operand:FMSA 0 "register_operand" "=f")
+	(mult:FMSA (match_operand:FMSA 1 "register_operand" "f")
+		   (match_operand:FMSA 2 "register_operand" "f")))]
+  "ISA_HAS_MSA"
+  "fmul.<msafmt>\t%w0,%w1,%w2"
+  [(set_attr "type" "simd_fmul")
+   (set_attr "mode" "<MODE>")])
+
+(define_insn "div<mode>3"
+  [(set (match_operand:FMSA 0 "register_operand" "=f")
+	(div:FMSA (match_operand:FMSA 1 "register_operand" "f")
+		  (match_operand:FMSA 2 "register_operand" "f")))]
+  "ISA_HAS_MSA"
+  "fdiv.<msafmt>\t%w0,%w1,%w2"
+  [(set_attr "type" "simd_fdiv")
+   (set_attr "mode" "<MODE>")])
+
+(define_insn "fma<mode>4"
+  [(set (match_operand:FMSA 0 "register_operand" "=f")
+	(fma:FMSA (match_operand:FMSA 1 "register_operand" "f")
+		  (match_operand:FMSA 2 "register_operand" "f")
+		  (match_operand:FMSA 3 "register_operand" "0")))]
+  "ISA_HAS_MSA"
+  "fmadd.<msafmt>\t%w0,%w1,%w2"
+  [(set_attr "type" "simd_fmadd")
+   (set_attr "mode" "<MODE>")])
+
+(define_insn "fnma<mode>4"
+  [(set (match_operand:FMSA 0 "register_operand" "=f")
+	(fma:FMSA (neg:FMSA (match_operand:FMSA 1 "register_operand" "f"))
+		  (match_operand:FMSA 2 "register_operand" "f")
+		  (match_operand:FMSA 3 "register_operand" "0")))]
+  "ISA_HAS_MSA"
+  "fmsub.<msafmt>\t%w0,%w1,%w2"
+  [(set_attr "type" "simd_fmadd")
+   (set_attr "mode" "<MODE>")])
+
+(define_insn "sqrt<mode>2"
+  [(set (match_operand:FMSA 0 "register_operand" "=f")
+	(sqrt:FMSA (match_operand:FMSA 1 "register_operand" "f")))]
+  "ISA_HAS_MSA"
+  "fsqrt.<msafmt>\t%w0,%w1"
+  [(set_attr "type" "simd_fdiv")
+   (set_attr "mode" "<MODE>")])
+
+;; Built-in functions
+(define_insn "msa_add_a_<msafmt>"
+  [(set (match_operand:IMSA 0 "register_operand" "=f")
+	(plus:IMSA (abs:IMSA (match_operand:IMSA 1 "register_operand" "f"))
+		   (abs:IMSA (match_operand:IMSA 2 "register_operand" "f"))))]
+  "ISA_HAS_MSA"
+  "add_a.<msafmt>\t%w0,%w1,%w2"
+  [(set_attr "type" "simd_int_arith")
+   (set_attr "mode" "<MODE>")])
+
+(define_insn "msa_adds_a_<msafmt>"
+  [(set (match_operand:IMSA 0 "register_operand" "=f")
+	(ss_plus:IMSA
+	  (abs:IMSA (match_operand:IMSA 1 "register_operand" "f"))
+	  (abs:IMSA (match_operand:IMSA 2 "register_operand" "f"))))]
+  "ISA_HAS_MSA"
+  "adds_a.<msafmt>\t%w0,%w1,%w2"
+  [(set_attr "type" "simd_int_arith")
+   (set_attr "mode" "<MODE>")])
+
+(define_insn "ssadd<mode>3"
+  [(set (match_operand:IMSA 0 "register_operand" "=f")
+	(ss_plus:IMSA (match_operand:IMSA 1 "register_operand" "f")
+		      (match_operand:IMSA 2 "register_operand" "f")))]
+  "ISA_HAS_MSA"
+  "adds_s.<msafmt>\t%w0,%w1,%w2"
+  [(set_attr "type" "simd_int_arith")
+   (set_attr "mode" "<MODE>")])
+
+(define_insn "usadd<mode>3"
+  [(set (match_operand:IMSA 0 "register_operand" "=f")
+	(us_plus:IMSA (match_operand:IMSA 1 "register_operand" "f")
+		      (match_operand:IMSA 2 "register_operand" "f")))]
+  "ISA_HAS_MSA"
+  "adds_u.<msafmt>\t%w0,%w1,%w2"
+  [(set_attr "type" "simd_int_arith")
+   (set_attr "mode" "<MODE>")])
+
+(define_insn "msa_asub_s_<msafmt>"
+  [(set (match_operand:IMSA 0 "register_operand" "=f")
+	(unspec:IMSA [(match_operand:IMSA 1 "register_operand" "f")
+		      (match_operand:IMSA 2 "register_operand" "f")]
+		     UNSPEC_MSA_ASUB_S))]
+  "ISA_HAS_MSA"
+  "asub_s.<msafmt>\t%w0,%w1,%w2"
+  [(set_attr "type" "simd_int_arith")
+   (set_attr "mode" "<MODE>")])
+
+(define_insn "msa_asub_u_<msafmt>"
+  [(set (match_operand:IMSA 0 "register_operand" "=f")
+	(unspec:IMSA [(match_operand:IMSA 1 "register_operand" "f")
+		      (match_operand:IMSA 2 "register_operand" "f")]
+		     UNSPEC_MSA_ASUB_U))]
+  "ISA_HAS_MSA"
+  "asub_u.<msafmt>\t%w0,%w1,%w2"
+  [(set_attr "type" "simd_int_arith")
+   (set_attr "mode" "<MODE>")])
+
+(define_insn "msa_ave_s_<msafmt>"
+  [(set (match_operand:IMSA 0 "register_operand" "=f")
+	(unspec:IMSA [(match_operand:IMSA 1 "register_operand" "f")
+		      (match_operand:IMSA 2 "register_operand" "f")]
+		     UNSPEC_MSA_AVE_S))]
+  "ISA_HAS_MSA"
+  "ave_s.<msafmt>\t%w0,%w1,%w2"
+  [(set_attr "type" "simd_int_arith")
+   (set_attr "mode" "<MODE>")])
+
+(define_insn "msa_ave_u_<msafmt>"
+  [(set (match_operand:IMSA 0 "register_operand" "=f")
+	(unspec:IMSA [(match_operand:IMSA 1 "register_operand" "f")
+		      (match_operand:IMSA 2 "register_operand" "f")]
+		     UNSPEC_MSA_AVE_U))]
+  "ISA_HAS_MSA"
+  "ave_u.<msafmt>\t%w0,%w1,%w2"
+  [(set_attr "type" "simd_int_arith")
+   (set_attr "mode" "<MODE>")])
+
+(define_insn "msa_aver_s_<msafmt>"
+  [(set (match_operand:IMSA 0 "register_operand" "=f")
+	(unspec:IMSA [(match_operand:IMSA 1 "register_operand" "f")
+		      (match_operand:IMSA 2 "register_operand" "f")]
+		     UNSPEC_MSA_AVER_S))]
+  "ISA_HAS_MSA"
+  "aver_s.<msafmt>\t%w0,%w1,%w2"
+  [(set_attr "type" "simd_int_arith")
+   (set_attr "mode" "<MODE>")])
+
+(define_insn "msa_aver_u_<msafmt>"
+  [(set (match_operand:IMSA 0 "register_operand" "=f")
+	(unspec:IMSA [(match_operand:IMSA 1 "register_operand" "f")
+		      (match_operand:IMSA 2 "register_operand" "f")]
+		     UNSPEC_MSA_AVER_U))]
+  "ISA_HAS_MSA"
+  "aver_u.<msafmt>\t%w0,%w1,%w2"
+  [(set_attr "type" "simd_int_arith")
+   (set_attr "mode" "<MODE>")])
+
+(define_insn "msa_bclr_<msafmt>"
+  [(set (match_operand:IMSA 0 "register_operand" "=f")
+	(unspec:IMSA [(match_operand:IMSA 1 "register_operand" "f")
+		      (match_operand:IMSA 2 "register_operand" "f")]
+		     UNSPEC_MSA_BCLR))]
+  "ISA_HAS_MSA"
+  "bclr.<msafmt>\t%w0,%w1,%w2"
+  [(set_attr "type" "simd_bit")
+   (set_attr "mode" "<MODE>")])
+
+(define_insn "msa_bclri_<msafmt>"
+  [(set (match_operand:IMSA 0 "register_operand" "=f")
+	(unspec:IMSA [(match_operand:IMSA 1 "register_operand" "f")
+		      (match_operand 2 "const_<bitimm>_operand" "")]
+		     UNSPEC_MSA_BCLRI))]
+  "ISA_HAS_MSA"
+  "bclri.<msafmt>\t%w0,%w1,%2"
+  [(set_attr "type" "simd_bit")
+   (set_attr "mode" "<MODE>")])
+
+(define_insn "msa_binsl_<msafmt>"
+  [(set (match_operand:IMSA 0 "register_operand" "=f")
+	(unspec:IMSA [(match_operand:IMSA 1 "register_operand" "0")
+		      (match_operand:IMSA 2 "register_operand" "f")
+		      (match_operand:IMSA 3 "register_operand" "f")]
+		     UNSPEC_MSA_BINSL))]
+  "ISA_HAS_MSA"
+  "binsl.<msafmt>\t%w0,%w2,%w3"
+  [(set_attr "type" "simd_bitins")
+   (set_attr "mode" "<MODE>")])
+
+(define_insn "msa_binsli_<msafmt>"
+  [(set (match_operand:IMSA 0 "register_operand" "=f")
+	(unspec:IMSA [(match_operand:IMSA 1 "register_operand" "0")
+		      (match_operand:IMSA 2 "register_operand" "f")
+		      (match_operand 3 "const_<bitimm>_operand" "")]
+		     UNSPEC_MSA_BINSLI))]
+  "ISA_HAS_MSA"
+  "binsli.<msafmt>\t%w0,%w2,%3"
+  [(set_attr "type" "simd_bitins")
+   (set_attr "mode" "<MODE>")])
+
+(define_insn "msa_binsr_<msafmt>"
+  [(set (match_operand:IMSA 0 "register_operand" "=f")
+	(unspec:IMSA [(match_operand:IMSA 1 "register_operand" "0")
+		      (match_operand:IMSA 2 "register_operand" "f")
+		      (match_operand:IMSA 3 "register_operand" "f")]
+		     UNSPEC_MSA_BINSR))]
+  "ISA_HAS_MSA"
+  "binsr.<msafmt>\t%w0,%w2,%w3"
+  [(set_attr "type" "simd_bitins")
+   (set_attr "mode" "<MODE>")])
+
+(define_insn "msa_binsri_<msafmt>"
+  [(set (match_operand:IMSA 0 "register_operand" "=f")
+	(unspec:IMSA [(match_operand:IMSA 1 "register_operand" "0")
+		      (match_operand:IMSA 2 "register_operand" "f")
+		      (match_operand 3 "const_<bitimm>_operand" "")]
+		     UNSPEC_MSA_BINSRI))]
+  "ISA_HAS_MSA"
+  "binsri.<msafmt>\t%w0,%w2,%3"
+  [(set_attr "type" "simd_bitins")
+   (set_attr "mode" "<MODE>")])
+
+(define_insn "msa_bmnz_<msafmt>"
+  [(set (match_operand:IMSA 0 "register_operand" "=f,f")
+	(ior:IMSA (and:IMSA (match_operand:IMSA 2 "register_operand" "f,f")
+			    (match_operand:IMSA 3 "reg_or_vector_same_val_operand" "f,Urv8"))
+		  (and:IMSA (not:IMSA (match_dup 3))
+			    (match_operand:IMSA 1 "register_operand" "0,0"))))]
+  "ISA_HAS_MSA"
+  "@
+   bmnz.v\t%w0,%w2,%w3
+   bmnzi.b\t%w0,%w2,%B3"
+  [(set_attr "type" "simd_bitmov")
+   (set_attr "mode" "<MODE>")])
+
+(define_insn "msa_bmz_<msafmt>"
+  [(set (match_operand:IMSA 0 "register_operand" "=f,f")
+	(ior:IMSA (and:IMSA (not:IMSA
+			      (match_operand:IMSA 3 "reg_or_vector_same_val_operand" "f,Urv8"))
+			    (match_operand:IMSA 2 "register_operand" "f,f"))
+		  (and:IMSA (match_operand:IMSA 1 "register_operand" "0,0")
+			    (match_dup 3))))]
+  "ISA_HAS_MSA"
+  "@
+   bmz.v\t%w0,%w2,%w3
+   bmzi.b\t%w0,%w2,%B3"
+  [(set_attr "type" "simd_bitmov")
+   (set_attr "mode" "<MODE>")])
+
+(define_insn "msa_bneg_<msafmt>"
+  [(set (match_operand:IMSA 0 "register_operand" "=f")
+	(unspec:IMSA [(match_operand:IMSA 1 "register_operand" "f")
+		      (match_operand:IMSA 2 "register_operand" "f")]
+		     UNSPEC_MSA_BNEG))]
+  "ISA_HAS_MSA"
+  "bneg.<msafmt>\t%w0,%w1,%w2"
+  [(set_attr "type" "simd_bit")
+   (set_attr "mode" "<MODE>")])
+
+(define_insn "msa_bnegi_<msafmt>"
+  [(set (match_operand:IMSA 0 "register_operand" "=f")
+	(unspec:IMSA [(match_operand:IMSA 1 "register_operand" "f")
+		       (match_operand 2 "const_msa_branch_operand" "")]
+		     UNSPEC_MSA_BNEGI))]
+  "ISA_HAS_MSA"
+  "bnegi.<msafmt>\t%w0,%w1,%2"
+  [(set_attr "type" "simd_bit")
+   (set_attr "mode" "<MODE>")])
+
+(define_insn "msa_bsel_<msafmt>"
+  [(set (match_operand:IMSA 0 "register_operand" "=f,f")
+	(ior:IMSA (and:IMSA (not:IMSA
+			      (match_operand:IMSA 1 "register_operand" "0,0"))
+			    (match_operand:IMSA 2 "register_operand" "f,f"))
+		  (and:IMSA (match_dup 1)
+			    (match_operand:IMSA 3 "reg_or_vector_same_val_operand" "f,Urv8"))))]
+  "ISA_HAS_MSA"
+  "@
+   bsel.v\t%w0,%w2,%w3
+   bseli.b\t%w0,%w2,%B3"
+  [(set_attr "type" "simd_bitmov")
+   (set_attr "mode" "<MODE>")])
+
+(define_insn "msa_bset_<msafmt>"
+  [(set (match_operand:IMSA 0 "register_operand" "=f")
+	(unspec:IMSA [(match_operand:IMSA 1 "register_operand" "f")
+		      (match_operand:IMSA 2 "register_operand" "f")]
+		     UNSPEC_MSA_BSET))]
+  "ISA_HAS_MSA"
+  "bset.<msafmt>\t%w0,%w1,%w2"
+  [(set_attr "type" "simd_bit")
+   (set_attr "mode" "<MODE>")])
+
+(define_insn "msa_bseti_<msafmt>"
+  [(set (match_operand:IMSA 0 "register_operand" "=f")
+	(unspec:IMSA [(match_operand:IMSA 1 "register_operand" "f")
+		      (match_operand 2 "const_<bitimm>_operand" "")]
+		     UNSPEC_MSA_BSETI))]
+  "ISA_HAS_MSA"
+  "bseti.<msafmt>\t%w0,%w1,%2"
+  [(set_attr "type" "simd_bit")
+   (set_attr "mode" "<MODE>")])
+
+(define_code_iterator ICC [eq le leu lt ltu])
+
+(define_code_attr icc
+  [(eq  "eq")
+   (le  "le_s")
+   (leu "le_u")
+   (lt  "lt_s")
+   (ltu "lt_u")])
+
+(define_code_attr icci
+  [(eq  "eqi")
+   (le  "lei_s")
+   (leu "lei_u")
+   (lt  "lti_s")
+   (ltu "lti_u")])
+
+(define_code_attr cmpi
+  [(eq   "s")
+   (le   "s")
+   (leu  "u")
+   (lt   "s")
+   (ltu  "u")])
+
+(define_insn "msa_c<ICC:icc>_<IMSA:msafmt>"
+  [(set (match_operand:IMSA 0 "register_operand" "=f,f")
+	(ICC:IMSA
+	  (match_operand:IMSA 1 "register_operand" "f,f")
+	  (match_operand:IMSA 2 "reg_or_vector_same_<ICC:cmpi>imm5_operand" "f,U<ICC:cmpi>v5")))]
+  "ISA_HAS_MSA"
+  "@
+   c<ICC:icc>.<IMSA:msafmt>\t%w0,%w1,%w2
+   c<ICC:icci>.<IMSA:msafmt>\t%w0,%w1,%E2"
+  [(set_attr "type" "simd_int_arith")
+   (set_attr "mode" "<MODE>")])
+
+(define_insn "msa_dotp_<su>_d"
+  [(set (match_operand:V2DI 0 "register_operand" "=f")
+	(plus:V2DI
+	  (mult:V2DI
+	    (any_extend:V2DI
+	      (vec_select:V2SI
+		(match_operand:V4SI 1 "register_operand" "%f")
+		(parallel [(const_int 0) (const_int 2)])))
+	    (any_extend:V2DI
+	      (vec_select:V2SI
+		(match_operand:V4SI 2 "register_operand" "f")
+		(parallel [(const_int 0) (const_int 2)]))))
+	  (mult:V2DI
+	    (any_extend:V2DI
+	      (vec_select:V4SI (match_dup 1)
+		(parallel [(const_int 1) (const_int 3)])))
+	    (any_extend:V2DI
+	      (vec_select:V4SI (match_dup 2)
+		(parallel [(const_int 1) (const_int 3)]))))))]
+  "ISA_HAS_MSA"
+  "dotp_<su>.d\t%w0,%w1,%w2"
+  [(set_attr "type" "simd_mul")
+   (set_attr "mode" "V2DI")])
+
+(define_insn "msa_dotp_<su>_w"
+  [(set (match_operand:V4SI 0 "register_operand" "=f")
+	(plus:V4SI
+	  (mult:V4SI
+	    (any_extend:V4SI
+	      (vec_select:V4HI
+		(match_operand:V8HI 1 "register_operand" "%f")
+		(parallel [(const_int 0) (const_int 2)
+			   (const_int 4) (const_int 6)])))
+	    (any_extend:V4SI
+	      (vec_select:V4HI
+		(match_operand:V8HI 2 "register_operand" "f")
+		(parallel [(const_int 0) (const_int 2)
+			   (const_int 4) (const_int 6)]))))
+	  (mult:V4SI
+	    (any_extend:V4SI
+	      (vec_select:V4HI (match_dup 1)
+		(parallel [(const_int 1) (const_int 3)
+			   (const_int 5) (const_int 7)])))
+	    (any_extend:V4SI
+	      (vec_select:V4HI (match_dup 2)
+		(parallel [(const_int 1) (const_int 3)
+			   (const_int 5) (const_int 7)]))))))]
+  "ISA_HAS_MSA"
+  "dotp_<su>.w\t%w0,%w1,%w2"
+  [(set_attr "type" "simd_mul")
+   (set_attr "mode" "V4SI")])
+
+(define_insn "msa_dotp_<su>_h"
+  [(set (match_operand:V8HI 0 "register_operand" "=f")
+	(plus:V8HI
+	  (mult:V8HI
+	    (any_extend:V8HI
+	      (vec_select:V8QI
+		(match_operand:V16QI 1 "register_operand" "%f")
+		(parallel [(const_int 0) (const_int 2)
+			   (const_int 4) (const_int 6)
+			   (const_int 8) (const_int 10)
+			   (const_int 12) (const_int 14)])))
+	    (any_extend:V8HI
+	      (vec_select:V8QI
+		(match_operand:V16QI 2 "register_operand" "f")
+		(parallel [(const_int 0) (const_int 2)
+			   (const_int 4) (const_int 6)
+			   (const_int 8) (const_int 10)
+			   (const_int 12) (const_int 14)]))))
+	  (mult:V8HI
+	    (any_extend:V8HI
+	      (vec_select:V8QI (match_dup 1)
+		(parallel [(const_int 1) (const_int 3)
+			   (const_int 5) (const_int 7)
+			   (const_int 9) (const_int 11)
+			   (const_int 13) (const_int 15)])))
+	    (any_extend:V8HI
+	      (vec_select:V8QI (match_dup 2)
+		(parallel [(const_int 1) (const_int 3)
+			   (const_int 5) (const_int 7)
+			   (const_int 9) (const_int 11)
+			   (const_int 13) (const_int 15)]))))))]
+  "ISA_HAS_MSA"
+  "dotp_<su>.h\t%w0,%w1,%w2"
+  [(set_attr "type" "simd_mul")
+   (set_attr "mode" "V8HI")])
+
+(define_insn "msa_dpadd_<su>_d"
+  [(set (match_operand:V2DI 0 "register_operand" "=f")
+	(plus:V2DI
+	  (plus:V2DI
+	    (mult:V2DI
+	      (any_extend:V2DI
+		(vec_select:V2SI
+		  (match_operand:V4SI 2 "register_operand" "%f")
+		  (parallel [(const_int 0) (const_int 2)])))
+	      (any_extend:V2DI
+		(vec_select:V2SI
+		  (match_operand:V4SI 3 "register_operand" "f")
+		  (parallel [(const_int 0) (const_int 2)]))))
+	    (mult:V2DI
+	      (any_extend:V2DI
+		(vec_select:V4SI (match_dup 2)
+		  (parallel [(const_int 1) (const_int 3)])))
+	      (any_extend:V2DI
+		(vec_select:V4SI (match_dup 3)
+		  (parallel [(const_int 1) (const_int 3)])))))
+	  (match_operand:V2DI 1 "register_operand" "0")))]
+  "ISA_HAS_MSA"
+  "dpadd_<su>.d\t%w0,%w2,%w3"
+  [(set_attr "type" "simd_mul")
+   (set_attr "mode" "V2DI")])
+
+(define_insn "msa_dpadd_<su>_w"
+  [(set (match_operand:V4SI 0 "register_operand" "=f")
+	(plus:V4SI
+	  (plus:V4SI
+	    (mult:V4SI
+	      (any_extend:V4SI
+		(vec_select:V4HI
+		  (match_operand:V8HI 2 "register_operand" "%f")
+		  (parallel [(const_int 0) (const_int 2)
+			     (const_int 4) (const_int 6)])))
+	      (any_extend:V4SI
+		(vec_select:V4HI
+		  (match_operand:V8HI 3 "register_operand" "f")
+		  (parallel [(const_int 0) (const_int 2)
+			     (const_int 4) (const_int 6)]))))
+	    (mult:V4SI
+	      (any_extend:V4SI
+		(vec_select:V4HI (match_dup 2)
+		  (parallel [(const_int 1) (const_int 3)
+			     (const_int 5) (const_int 7)])))
+	      (any_extend:V4SI
+		(vec_select:V4HI (match_dup 3)
+		  (parallel [(const_int 1) (const_int 3)
+			     (const_int 5) (const_int 7)])))))
+	  (match_operand:V4SI 1 "register_operand" "0")))]
+  "ISA_HAS_MSA"
+  "dpadd_<su>.w\t%w0,%w2,%w3"
+  [(set_attr "type" "simd_mul")
+   (set_attr "mode" "V4SI")])
+
+(define_insn "msa_dpadd_<su>_h"
+  [(set (match_operand:V8HI 0 "register_operand" "=f")
+	(plus:V8HI
+	  (plus:V8HI
+	    (mult:V8HI
+	      (any_extend:V8HI
+		(vec_select:V8QI
+		  (match_operand:V16QI 2 "register_operand" "%f")
+		  (parallel [(const_int 0) (const_int 2)
+			     (const_int 4) (const_int 6)
+			     (const_int 8) (const_int 10)
+			     (const_int 12) (const_int 14)])))
+	      (any_extend:V8HI
+		(vec_select:V8QI
+		  (match_operand:V16QI 3 "register_operand" "f")
+		  (parallel [(const_int 0) (const_int 2)
+			     (const_int 4) (const_int 6)
+			     (const_int 8) (const_int 10)
+			     (const_int 12) (const_int 14)]))))
+	    (mult:V8HI
+	      (any_extend:V8HI
+		(vec_select:V8QI (match_dup 2)
+		  (parallel [(const_int 1) (const_int 3)
+			     (const_int 5) (const_int 7)
+			     (const_int 9) (const_int 11)
+			     (const_int 13) (const_int 15)])))
+	      (any_extend:V8HI
+		(vec_select:V8QI (match_dup 3)
+		  (parallel [(const_int 1) (const_int 3)
+			     (const_int 5) (const_int 7)
+			     (const_int 9) (const_int 11)
+			     (const_int 13) (const_int 15)])))))
+	  (match_operand:V8HI 1 "register_operand" "0")))]
+  "ISA_HAS_MSA"
+  "dpadd_<su>.h\t%w0,%w2,%w3"
+  [(set_attr "type" "simd_mul")
+   (set_attr "mode" "V8HI")])
+
+(define_insn "msa_dpsub_<su>_d"
+  [(set (match_operand:V2DI 0 "register_operand" "=f")
+	(minus:V2DI
+	  (match_operand:V2DI 1 "register_operand" "0")
+	  (plus:V2DI
+	    (mult:V2DI
+	      (any_extend:V2DI
+		(vec_select:V2SI
+		  (match_operand:V4SI 2 "register_operand" "%f")
+		  (parallel [(const_int 0) (const_int 2)])))
+	      (any_extend:V2DI
+		(vec_select:V2SI
+		  (match_operand:V4SI 3 "register_operand" "f")
+		  (parallel [(const_int 0) (const_int 2)]))))
+	    (mult:V2DI
+	      (any_extend:V2DI
+		(vec_select:V4SI (match_dup 2)
+		  (parallel [(const_int 1) (const_int 3)])))
+	      (any_extend:V2DI
+		(vec_select:V4SI (match_dup 3)
+		  (parallel [(const_int 1) (const_int 3)])))))))]
+  "ISA_HAS_MSA"
+  "dpsub_<su>.d\t%w0,%w2,%w3"
+  [(set_attr "type" "simd_mul")
+   (set_attr "mode" "V2DI")])
+
+(define_insn "msa_dpsub_<su>_w"
+  [(set (match_operand:V4SI 0 "register_operand" "=f")
+	(minus:V4SI
+	  (match_operand:V4SI 1 "register_operand" "0")
+	  (plus:V4SI
+	    (mult:V4SI
+	      (any_extend:V4SI
+		(vec_select:V4HI
+		  (match_operand:V8HI 2 "register_operand" "%f")
+		  (parallel [(const_int 0) (const_int 2)
+			     (const_int 4) (const_int 6)])))
+	      (any_extend:V4SI
+		(vec_select:V4HI
+		  (match_operand:V8HI 3 "register_operand" "f")
+		  (parallel [(const_int 0) (const_int 2)
+			     (const_int 4) (const_int 6)]))))
+	    (mult:V4SI
+	      (any_extend:V4SI
+		(vec_select:V4HI (match_dup 2)
+		  (parallel [(const_int 1) (const_int 3)
+			     (const_int 5) (const_int 7)])))
+	      (any_extend:V4SI
+		(vec_select:V4HI (match_dup 3)
+		  (parallel [(const_int 1) (const_int 3)
+			     (const_int 5) (const_int 7)])))))))]
+  "ISA_HAS_MSA"
+  "dpsub_<su>.w\t%w0,%w2,%w3"
+  [(set_attr "type" "simd_mul")
+   (set_attr "mode" "V4SI")])
+
+(define_insn "msa_dpsub_<su>_h"
+  [(set (match_operand:V8HI 0 "register_operand" "=f")
+	(minus:V8HI
+	  (match_operand:V8HI 1 "register_operand" "0")
+	  (plus:V8HI
+	    (mult:V8HI
+	      (any_extend:V8HI
+		(vec_select:V8QI
+		  (match_operand:V16QI 2 "register_operand" "%f")
+		  (parallel [(const_int 0) (const_int 2)
+			     (const_int 4) (const_int 6)
+			     (const_int 8) (const_int 10)
+			     (const_int 12) (const_int 14)])))
+	      (any_extend:V8HI
+		(vec_select:V8QI
+		  (match_operand:V16QI 3 "register_operand" "f")
+		  (parallel [(const_int 0) (const_int 2)
+			     (const_int 4) (const_int 6)
+			     (const_int 8) (const_int 10)
+			     (const_int 12) (const_int 14)]))))
+	    (mult:V8HI
+	      (any_extend:V8HI
+		(vec_select:V8QI (match_dup 2)
+		  (parallel [(const_int 1) (const_int 3)
+			     (const_int 5) (const_int 7)
+			     (const_int 9) (const_int 11)
+			     (const_int 13) (const_int 15)])))
+	      (any_extend:V8HI
+		(vec_select:V8QI (match_dup 3)
+		  (parallel [(const_int 1) (const_int 3)
+			     (const_int 5) (const_int 7)
+			     (const_int 9) (const_int 11)
+			     (const_int 13) (const_int 15)])))))))]
+  "ISA_HAS_MSA"
+  "dpsub_<su>.h\t%w0,%w2,%w3"
+  [(set_attr "type" "simd_mul")
+   (set_attr "mode" "V8HI")])
+
+(define_insn "msa_fclass_<msafmt>"
+  [(set (match_operand:<VIMODE> 0 "register_operand" "=f")
+	(unspec:<VIMODE> [(match_operand:FMSA 1 "register_operand" "f")]
+			 UNSPEC_MSA_FCLASS))]
+  "ISA_HAS_MSA"
+  "fclass.<msafmt>\t%w0,%w1"
+  [(set_attr "type" "simd_fclass")
+   (set_attr "mode" "<MODE>")])
+
+(define_insn "msa_fcaf_<msafmt>"
+  [(set (match_operand:<VIMODE> 0 "register_operand" "=f")
+	(unspec:<VIMODE> [(match_operand:FMSA 1 "register_operand" "f")
+			  (match_operand:FMSA 2 "register_operand" "f")]
+			 UNSPEC_MSA_FCAF))]
+  "ISA_HAS_MSA"
+  "fcaf.<msafmt>\t%w0,%w1,%w2"
+  [(set_attr "type" "simd_fcmp")
+   (set_attr "mode" "<MODE>")])
+
+(define_insn "msa_fcune_<FMSA:msafmt>"
+  [(set (match_operand:<VIMODE> 0 "register_operand" "=f")
+	(unspec:<VIMODE> [(match_operand:FMSA 1 "register_operand" "f")
+			  (match_operand:FMSA 2 "register_operand" "f")]
+			 UNSPEC_MSA_FCUNE))]
+  "ISA_HAS_MSA"
+  "fcune.<FMSA:msafmt>\t%w0,%w1,%w2"
+  [(set_attr "type" "simd_fcmp")
+   (set_attr "mode" "<MODE>")])
+
+(define_code_iterator FCC [unordered ordered eq ne le lt uneq unle unlt])
+
+(define_code_attr fcc
+  [(unordered "fcun")
+   (ordered   "fcor")
+   (eq        "fceq")
+   (ne        "fcne")
+   (uneq      "fcueq")
+   (unle      "fcule")
+   (unlt      "fcult")
+   (le        "fcle")
+   (lt        "fclt")])
+
+(define_int_iterator FSC_UNS [UNSPEC_MSA_FSAF UNSPEC_MSA_FSUN UNSPEC_MSA_FSOR
+			      UNSPEC_MSA_FSEQ UNSPEC_MSA_FSNE UNSPEC_MSA_FSUEQ
+			      UNSPEC_MSA_FSUNE UNSPEC_MSA_FSULE UNSPEC_MSA_FSULT
+			      UNSPEC_MSA_FSLE UNSPEC_MSA_FSLT])
+
+(define_int_attr fsc
+  [(UNSPEC_MSA_FSAF  "fsaf")
+   (UNSPEC_MSA_FSUN  "fsun")
+   (UNSPEC_MSA_FSOR  "fsor")
+   (UNSPEC_MSA_FSEQ  "fseq")
+   (UNSPEC_MSA_FSNE  "fsne")
+   (UNSPEC_MSA_FSUEQ "fsueq")
+   (UNSPEC_MSA_FSUNE "fsune")
+   (UNSPEC_MSA_FSULE "fsule")
+   (UNSPEC_MSA_FSULT "fsult")
+   (UNSPEC_MSA_FSLE  "fsle")
+   (UNSPEC_MSA_FSLT  "fslt")])
+
+(define_insn "msa_<FCC:fcc>_<FMSA:msafmt>"
+  [(set (match_operand:<VIMODE> 0 "register_operand" "=f")
+	(FCC:<VIMODE> (match_operand:FMSA 1 "register_operand" "f")
+		      (match_operand:FMSA 2 "register_operand" "f")))]
+  "ISA_HAS_MSA"
+  "<FCC:fcc>.<FMSA:msafmt>\t%w0,%w1,%w2"
+  [(set_attr "type" "simd_fcmp")
+   (set_attr "mode" "<MODE>")])
+
+(define_insn "msa_<fsc>_<FMSA:msafmt>"
+  [(set (match_operand:<VIMODE> 0 "register_operand" "=f")
+	(unspec:<VIMODE> [(match_operand:FMSA 1 "register_operand" "f")
+			   (match_operand:FMSA 2 "register_operand" "f")]
+			 FSC_UNS))]
+  "ISA_HAS_MSA"
+  "<fsc>.<FMSA:msafmt>\t%w0,%w1,%w2"
+  [(set_attr "type" "simd_fcmp")
+   (set_attr "mode" "<MODE>")])
+
+(define_insn "msa_fexp2_<msafmt>"
+  [(set (match_operand:FMSA 0 "register_operand" "=f")
+	(unspec:FMSA [(match_operand:FMSA 1 "register_operand" "f")
+		      (match_operand:<VIMODE> 2 "register_operand" "f")]
+		     UNSPEC_MSA_FEXP2))]
+  "ISA_HAS_MSA"
+  "fexp2.<msafmt>\t%w0,%w1,%w2"
+  [(set_attr "type" "simd_fexp2")
+   (set_attr "mode" "<MODE>")])
+
+(define_mode_attr fint
+  [(V4SF "v4si")
+   (V2DF "v2di")])
+
+(define_mode_attr FQ
+  [(V4SF "V8HI")
+   (V2DF "V4SI")])
+
+(define_mode_attr FINTCNV
+  [(V4SF "I2S")
+   (V2DF "I2D")])
+
+(define_mode_attr FINTCNV_2
+  [(V4SF "S2I")
+   (V2DF "D2I")])
+
+(define_insn "float<fint><FMSA:mode>2"
+  [(set (match_operand:FMSA 0 "register_operand" "=f")
+	(float:FMSA (match_operand:<VIMODE> 1 "register_operand" "f")))]
+  "ISA_HAS_MSA"
+  "ffint_s.<msafmt>\t%w0,%w1"
+  [(set_attr "type" "simd_fcvt")
+   (set_attr "cnv_mode" "<FINTCNV>")
+   (set_attr "mode" "<MODE>")])
+
+(define_insn "floatuns<fint><FMSA:mode>2"
+  [(set (match_operand:FMSA 0 "register_operand" "=f")
+	(unsigned_float:FMSA
+	  (match_operand:<VIMODE> 1 "register_operand" "f")))]
+  "ISA_HAS_MSA"
+  "ffint_u.<msafmt>\t%w0,%w1"
+  [(set_attr "type" "simd_fcvt")
+   (set_attr "cnv_mode" "<FINTCNV>")
+   (set_attr "mode" "<MODE>")])
+
+(define_mode_attr FFQ
+  [(V4SF "V8HI")
+   (V2DF "V4SI")])
+
+(define_insn "msa_ffql_<msafmt>"
+  [(set (match_operand:FMSA 0 "register_operand" "=f")
+	(unspec:FMSA [(match_operand:<FQ> 1 "register_operand" "f")]
+		     UNSPEC_MSA_FFQL))]
+  "ISA_HAS_MSA"
+  "ffql.<msafmt>\t%w0,%w1"
+  [(set_attr "type" "simd_fcvt")
+   (set_attr "cnv_mode" "<FINTCNV>")
+   (set_attr "mode" "<MODE>")])
+
+(define_insn "msa_ffqr_<msafmt>"
+  [(set (match_operand:FMSA 0 "register_operand" "=f")
+	(unspec:FMSA [(match_operand:<FQ> 1 "register_operand" "f")]
+		     UNSPEC_MSA_FFQR))]
+  "ISA_HAS_MSA"
+  "ffqr.<msafmt>\t%w0,%w1"
+  [(set_attr "type" "simd_fcvt")
+   (set_attr "cnv_mode" "<FINTCNV>")
+   (set_attr "mode" "<MODE>")])
+
+(define_insn "msa_fill_<msafmt_f>"
+  [(set (match_operand:MSA 0 "register_operand" "=f,f")
+	(vec_duplicate:MSA
+	  (match_operand:<UNITMODE> 1 "reg_or_0_operand" "d,J")))]
+  "ISA_HAS_MSA"
+{
+  if (which_alternative == 1)
+    return "ldi.<msafmt>\t%w0,0";
+
+  if (!TARGET_64BIT && (<MODE>mode == V2DImode || <MODE>mode == V2DFmode))
+    return "#";
+  else
+    return "fill.<msafmt>\t%w0,%z1";
+}
+  [(set_attr "type" "simd_fill")
+   (set_attr "mode" "<MODE>")])
+
+(define_split
+  [(set (match_operand:MSA_D 0 "register_operand")
+	(vec_duplicate:MSA_D
+	  (match_operand:<UNITMODE> 1 "register_operand")))]
+  "reload_completed && ISA_HAS_MSA && !TARGET_64BIT"
+  [(const_int 0)]
+{
+  mips_split_msa_fill_d (operands[0], operands[1]);
+  DONE;
+})
+
+(define_insn "msa_flog2_<msafmt>"
+  [(set (match_operand:FMSA 0 "register_operand" "=f")
+	(unspec:FMSA [(match_operand:FMSA 1 "register_operand" "f")]
+		     UNSPEC_MSA_FLOG2))]
+  "ISA_HAS_MSA"
+  "flog2.<msafmt>\t%w0,%w1"
+  [(set_attr "type" "simd_flog2")
+   (set_attr "mode" "<MODE>")])
+
+(define_insn "smax<mode>3"
+  [(set (match_operand:FMSA 0 "register_operand" "=f")
+	(smax:FMSA (match_operand:FMSA 1 "register_operand" "f")
+		   (match_operand:FMSA 2 "register_operand" "f")))]
+  "ISA_HAS_MSA"
+  "fmax.<msafmt>\t%w0,%w1,%w2"
+  [(set_attr "type" "simd_fminmax")
+   (set_attr "mode" "<MODE>")])
+
+(define_insn "msa_fmax_a_<msafmt>"
+  [(set (match_operand:FMSA 0 "register_operand" "=f")
+	(if_then_else
+	   (gt (abs:FMSA (match_operand:FMSA 1 "register_operand" "f"))
+	       (abs:FMSA (match_operand:FMSA 2 "register_operand" "f")))
+	   (match_dup 1)
+	   (match_dup 2)))]
+  "ISA_HAS_MSA"
+  "fmax_a.<msafmt>\t%w0,%w1,%w2"
+  [(set_attr "type" "simd_fminmax")
+   (set_attr "mode" "<MODE>")])
+
+(define_insn "smin<mode>3"
+  [(set (match_operand:FMSA 0 "register_operand" "=f")
+	(smin:FMSA (match_operand:FMSA 1 "register_operand" "f")
+		   (match_operand:FMSA 2 "register_operand" "f")))]
+  "ISA_HAS_MSA"
+  "fmin.<msafmt>\t%w0,%w1,%w2"
+  [(set_attr "type" "simd_fminmax")
+   (set_attr "mode" "<MODE>")])
+
+(define_insn "msa_fmin_a_<msafmt>"
+  [(set (match_operand:FMSA 0 "register_operand" "=f")
+	(if_then_else
+	   (lt (abs:FMSA (match_operand:FMSA 1 "register_operand" "f"))
+	       (abs:FMSA (match_operand:FMSA 2 "register_operand" "f")))
+	   (match_dup 1)
+	   (match_dup 2)))]
+  "ISA_HAS_MSA"
+  "fmin_a.<msafmt>\t%w0,%w1,%w2"
+  [(set_attr "type" "simd_fminmax")
+   (set_attr "mode" "<MODE>")])
+
+(define_insn "msa_frcp_<msafmt>"
+  [(set (match_operand:FMSA 0 "register_operand" "=f")
+	(unspec:FMSA [(match_operand:FMSA 1 "register_operand" "f")]
+		     UNSPEC_MSA_FRCP))]
+  "ISA_HAS_MSA"
+  "frcp.<msafmt>\t%w0,%w1"
+  [(set_attr "type" "simd_fdiv")
+   (set_attr "mode" "<MODE>")])
+
+(define_insn "msa_frint_<msafmt>"
+  [(set (match_operand:FMSA 0 "register_operand" "=f")
+	(unspec:FMSA [(match_operand:FMSA 1 "register_operand" "f")]
+		     UNSPEC_MSA_FRINT))]
+  "ISA_HAS_MSA"
+  "frint.<msafmt>\t%w0,%w1"
+  [(set_attr "type" "simd_fcvt")
+   (set_attr "mode" "<MODE>")])
+
+(define_insn "msa_frsqrt_<msafmt>"
+  [(set (match_operand:FMSA 0 "register_operand" "=f")
+	(unspec:FMSA [(match_operand:FMSA 1 "register_operand" "f")]
+		     UNSPEC_MSA_FRSQRT))]
+  "ISA_HAS_MSA"
+  "frsqrt.<msafmt>\t%w0,%w1"
+  [(set_attr "type" "simd_fdiv")
+   (set_attr "mode" "<MODE>")])
+
+(define_insn "msa_ftint_s_<msafmt>"
+  [(set (match_operand:<VIMODE> 0 "register_operand" "=f")
+	(unspec:<VIMODE> [(match_operand:FMSA 1 "register_operand" "f")]
+			 UNSPEC_MSA_FTINT_S))]
+  "ISA_HAS_MSA"
+  "ftint_s.<msafmt>\t%w0,%w1"
+  [(set_attr "type" "simd_fcvt")
+   (set_attr "cnv_mode" "<FINTCNV_2>")
+   (set_attr "mode" "<MODE>")])
+
+(define_insn "msa_ftint_u_<msafmt>"
+  [(set (match_operand:<VIMODE> 0 "register_operand" "=f")
+	(unspec:<VIMODE> [(match_operand:FMSA 1 "register_operand" "f")]
+			 UNSPEC_MSA_FTINT_U))]
+  "ISA_HAS_MSA"
+  "ftint_u.<msafmt>\t%w0,%w1"
+  [(set_attr "type" "simd_fcvt")
+   (set_attr "cnv_mode" "<FINTCNV_2>")
+   (set_attr "mode" "<MODE>")])
+
+(define_insn "fix_trunc<FMSA:mode><mode_i>2"
+  [(set (match_operand:<VIMODE> 0 "register_operand" "=f")
+	(fix:<VIMODE> (match_operand:FMSA 1 "register_operand" "f")))]
+  "ISA_HAS_MSA"
+  "ftrunc_s.<msafmt>\t%w0,%w1"
+  [(set_attr "type" "simd_fcvt")
+   (set_attr "cnv_mode" "<FINTCNV_2>")
+   (set_attr "mode" "<MODE>")])
+
+(define_insn "fixuns_trunc<FMSA:mode><mode_i>2"
+  [(set (match_operand:<VIMODE> 0 "register_operand" "=f")
+	(unsigned_fix:<VIMODE> (match_operand:FMSA 1 "register_operand" "f")))]
+  "ISA_HAS_MSA"
+  "ftrunc_u.<msafmt>\t%w0,%w1"
+  [(set_attr "type" "simd_fcvt")
+   (set_attr "cnv_mode" "<FINTCNV_2>")
+   (set_attr "mode" "<MODE>")])
+
+(define_insn "msa_ftq_h"
+  [(set (match_operand:V8HI 0 "register_operand" "=f")
+	(unspec:V8HI [(match_operand:V4SF 1 "register_operand" "f")
+		      (match_operand:V4SF 2 "register_operand" "f")]
+		     UNSPEC_MSA_FTQ))]
+  "ISA_HAS_MSA"
+  "ftq.h\t%w0,%w1,%w2"
+  [(set_attr "type" "simd_fcvt")
+   (set_attr "cnv_mode" "S2I")
+   (set_attr "mode" "V4SF")])
+
+(define_insn "msa_ftq_w"
+  [(set (match_operand:V4SI 0 "register_operand" "=f")
+	(unspec:V4SI [(match_operand:V2DF 1 "register_operand" "f")
+		      (match_operand:V2DF 2 "register_operand" "f")]
+		     UNSPEC_MSA_FTQ))]
+  "ISA_HAS_MSA"
+  "ftq.w\t%w0,%w1,%w2"
+  [(set_attr "type" "simd_fcvt")
+   (set_attr "cnv_mode" "D2I")
+   (set_attr "mode" "V2DF")])
+
+(define_insn "msa_h<optab>_<su>_h"
+  [(set (match_operand:V8HI 0 "register_operand" "=f")
+	(addsub:V8HI
+	  (any_extend:V8HI
+	    (vec_select:V8QI
+	      (match_operand:V16QI 1 "register_operand" "f")
+	      (parallel [(const_int 1) (const_int 3)
+			 (const_int 5) (const_int 7)
+			 (const_int 9) (const_int 11)
+			 (const_int 13) (const_int 15)])))
+	  (any_extend:V8HI
+	    (vec_select:V8QI
+	      (match_operand:V16QI 2 "register_operand" "f")
+	      (parallel [(const_int 0) (const_int 2)
+			 (const_int 4) (const_int 6)
+			 (const_int 8) (const_int 10)
+			 (const_int 12) (const_int 14)])))))]
+  "ISA_HAS_MSA"
+  "h<optab>_<su>.h\t%w0,%w1,%w2"
+  [(set_attr "type" "simd_int_arith")
+   (set_attr "mode" "V8HI")])
+
+(define_insn "msa_h<optab>_<su>_w"
+  [(set (match_operand:V4SI 0 "register_operand" "=f")
+	(addsub:V4SI
+	  (any_extend:V4SI
+	    (vec_select:V4HI
+	      (match_operand:V8HI 1 "register_operand" "f")
+	      (parallel [(const_int 1) (const_int 3)
+			 (const_int 5) (const_int 7)])))
+	  (any_extend:V4SI
+	    (vec_select:V4HI
+	      (match_operand:V8HI 2 "register_operand" "f")
+	      (parallel [(const_int 0) (const_int 2)
+			 (const_int 4) (const_int 6)])))))]
+  "ISA_HAS_MSA"
+  "h<optab>_<su>.w\t%w0,%w1,%w2"
+  [(set_attr "type" "simd_int_arith")
+   (set_attr "mode" "V4SI")])
+
+(define_insn "msa_h<optab>_<su>_d"
+  [(set (match_operand:V2DI 0 "register_operand" "=f")
+	(addsub:V2DI
+	  (any_extend:V2DI
+	    (vec_select:V2SI
+	      (match_operand:V4SI 1 "register_operand" "f")
+	      (parallel [(const_int 1) (const_int 3)])))
+	  (any_extend:V2DI
+	    (vec_select:V2SI
+	      (match_operand:V4SI 2 "register_operand" "f")
+	      (parallel [(const_int 0) (const_int 2)])))))]
+  "ISA_HAS_MSA"
+  "h<optab>_<su>.d\t%w0,%w1,%w2"
+  [(set_attr "type" "simd_int_arith")
+   (set_attr "mode" "V2DI")])
+
+(define_insn "msa_ilvev_b"
+  [(set (match_operand:V16QI 0 "register_operand" "=f")
+	(vec_select:V16QI
+	  (vec_concat:V32QI
+	    (match_operand:V16QI 1 "register_operand" "f")
+	    (match_operand:V16QI 2 "register_operand" "f"))
+	  (parallel [(const_int 0)  (const_int 16)
+		     (const_int 2)  (const_int 18)
+		     (const_int 4)  (const_int 20)
+		     (const_int 6)  (const_int 22)
+		     (const_int 8)  (const_int 24)
+		     (const_int 10) (const_int 26)
+		     (const_int 12) (const_int 28)
+		     (const_int 14) (const_int 30)])))]
+  "ISA_HAS_MSA"
+  "ilvev.b\t%w0,%w2,%w1"
+  [(set_attr "type" "simd_permute")
+   (set_attr "mode" "V16QI")])
+
+(define_insn "msa_ilvev_h"
+  [(set (match_operand:V8HI 0 "register_operand" "=f")
+	(vec_select:V8HI
+	  (vec_concat:V16HI
+	    (match_operand:V8HI 1 "register_operand" "f")
+	    (match_operand:V8HI 2 "register_operand" "f"))
+	  (parallel [(const_int 0) (const_int 8)
+		     (const_int 2) (const_int 10)
+		     (const_int 4) (const_int 12)
+		     (const_int 6) (const_int 14)])))]
+  "ISA_HAS_MSA"
+  "ilvev.h\t%w0,%w2,%w1"
+  [(set_attr "type" "simd_permute")
+   (set_attr "mode" "V8HI")])
+
+(define_insn "msa_ilvev_w"
+  [(set (match_operand:V4SI 0 "register_operand" "=f")
+	(vec_select:V4SI
+	  (vec_concat:V8SI
+	    (match_operand:V4SI 1 "register_operand" "f")
+	    (match_operand:V4SI 2 "register_operand" "f"))
+	  (parallel [(const_int 0) (const_int 4)
+		     (const_int 2) (const_int 6)])))]
+  "ISA_HAS_MSA"
+  "ilvev.w\t%w0,%w2,%w1"
+  [(set_attr "type" "simd_permute")
+   (set_attr "mode" "V4SI")])
+
+(define_insn "msa_ilvev_w_f"
+  [(set (match_operand:V4SF 0 "register_operand" "=f")
+	(vec_select:V4SF
+	  (vec_concat:V8SF
+	    (match_operand:V4SF 1 "register_operand" "f")
+	    (match_operand:V4SF 2 "register_operand" "f"))
+	  (parallel [(const_int 0) (const_int 4)
+		     (const_int 2) (const_int 6)])))]
+  "ISA_HAS_MSA"
+  "ilvev.w\t%w0,%w2,%w1"
+  [(set_attr "type" "simd_permute")
+   (set_attr "mode" "V4SF")])
+
+(define_insn "msa_ilvl_b"
+  [(set (match_operand:V16QI 0 "register_operand" "=f")
+	(vec_select:V16QI
+	  (vec_concat:V32QI
+	    (match_operand:V16QI 1 "register_operand" "f")
+	    (match_operand:V16QI 2 "register_operand" "f"))
+	  (parallel [(const_int 8)  (const_int 24)
+		     (const_int 9)  (const_int 25)
+		     (const_int 10) (const_int 26)
+		     (const_int 11) (const_int 27)
+		     (const_int 12) (const_int 28)
+		     (const_int 13) (const_int 29)
+		     (const_int 14) (const_int 30)
+		     (const_int 15) (const_int 31)])))]
+  "ISA_HAS_MSA"
+  "ilvl.b\t%w0,%w2,%w1"
+  [(set_attr "type" "simd_permute")
+   (set_attr "mode" "V16QI")])
+
+(define_insn "msa_ilvl_h"
+  [(set (match_operand:V8HI 0 "register_operand" "=f")
+	(vec_select:V8HI
+	  (vec_concat:V16HI
+	    (match_operand:V8HI 1 "register_operand" "f")
+	    (match_operand:V8HI 2 "register_operand" "f"))
+	  (parallel [(const_int 4) (const_int 12)
+		     (const_int 5) (const_int 13)
+		     (const_int 6) (const_int 14)
+		     (const_int 7) (const_int 15)])))]
+  "ISA_HAS_MSA"
+  "ilvl.h\t%w0,%w2,%w1"
+  [(set_attr "type" "simd_permute")
+   (set_attr "mode" "V8HI")])
+
+(define_insn "msa_ilvl_w"
+  [(set (match_operand:V4SI 0 "register_operand" "=f")
+	(vec_select:V4SI
+	  (vec_concat:V8SI
+	    (match_operand:V4SI 1 "register_operand" "f")
+	    (match_operand:V4SI 2 "register_operand" "f"))
+	  (parallel [(const_int 2) (const_int 6)
+		     (const_int 3) (const_int 7)])))]
+  "ISA_HAS_MSA"
+  "ilvl.w\t%w0,%w2,%w1"
+  [(set_attr "type" "simd_permute")
+   (set_attr "mode" "V4SI")])
+
+(define_insn "msa_ilvl_w_f"
+  [(set (match_operand:V4SF 0 "register_operand" "=f")
+	(vec_select:V4SF
+	  (vec_concat:V8SF
+	    (match_operand:V4SF 1 "register_operand" "f")
+	    (match_operand:V4SF 2 "register_operand" "f"))
+	  (parallel [(const_int 2) (const_int 6)
+		     (const_int 3) (const_int 7)])))]
+  "ISA_HAS_MSA"
+  "ilvl.w\t%w0,%w2,%w1"
+  [(set_attr "type" "simd_permute")
+   (set_attr "mode" "V4SF")])
+
+(define_insn "msa_ilvl_d"
+  [(set (match_operand:V2DI 0 "register_operand" "=f")
+	(vec_select:V2DI
+	  (vec_concat:V4DI
+	    (match_operand:V2DI 1 "register_operand" "f")
+	    (match_operand:V2DI 2 "register_operand" "f"))
+	  (parallel [(const_int 1) (const_int 3)])))]
+  "ISA_HAS_MSA"
+  "ilvl.d\t%w0,%w2,%w1"
+  [(set_attr "type" "simd_permute")
+   (set_attr "mode" "V2DI")])
+
+(define_insn "msa_ilvl_d_f"
+  [(set (match_operand:V2DF 0 "register_operand" "=f")
+	(vec_select:V2DF
+	  (vec_concat:V4DF
+	    (match_operand:V2DF 1 "register_operand" "f")
+	    (match_operand:V2DF 2 "register_operand" "f"))
+	  (parallel [(const_int 1) (const_int 3)])))]
+  "ISA_HAS_MSA"
+  "ilvl.d\t%w0,%w2,%w1"
+  [(set_attr "type" "simd_permute")
+   (set_attr "mode" "V2DF")])
+
+(define_insn "msa_ilvod_b"
+  [(set (match_operand:V16QI 0 "register_operand" "=f")
+	(vec_select:V16QI
+	  (vec_concat:V32QI
+	    (match_operand:V16QI 1 "register_operand" "f")
+	    (match_operand:V16QI 2 "register_operand" "f"))
+	  (parallel [(const_int 1)  (const_int 17)
+		     (const_int 3)  (const_int 19)
+		     (const_int 5)  (const_int 21)
+		     (const_int 7)  (const_int 23)
+		     (const_int 9)  (const_int 25)
+		     (const_int 11) (const_int 27)
+		     (const_int 13) (const_int 29)
+		     (const_int 15) (const_int 31)])))]
+  "ISA_HAS_MSA"
+  "ilvod.b\t%w0,%w2,%w1"
+  [(set_attr "type" "simd_permute")
+   (set_attr "mode" "V16QI")])
+
+(define_insn "msa_ilvod_h"
+  [(set (match_operand:V8HI 0 "register_operand" "=f")
+	(vec_select:V8HI
+	  (vec_concat:V16HI
+	    (match_operand:V8HI 1 "register_operand" "f")
+	    (match_operand:V8HI 2 "register_operand" "f"))
+	  (parallel [(const_int 1) (const_int 9)
+		     (const_int 3) (const_int 11)
+		     (const_int 5) (const_int 13)
+		     (const_int 7) (const_int 15)])))]
+  "ISA_HAS_MSA"
+  "ilvod.h\t%w0,%w2,%w1"
+  [(set_attr "type" "simd_permute")
+   (set_attr "mode" "V8HI")])
+
+(define_insn "msa_ilvod_w"
+  [(set (match_operand:V4SI 0 "register_operand" "=f")
+	(vec_select:V4SI
+	  (vec_concat:V8SI
+	    (match_operand:V4SI 1 "register_operand" "f")
+	    (match_operand:V4SI 2 "register_operand" "f"))
+	  (parallel [(const_int 1) (const_int 5)
+		     (const_int 3) (const_int 7)])))]
+  "ISA_HAS_MSA"
+  "ilvod.w\t%w0,%w2,%w1"
+  [(set_attr "type" "simd_permute")
+   (set_attr "mode" "V4SI")])
+
+(define_insn "msa_ilvod_w_f"
+  [(set (match_operand:V4SF 0 "register_operand" "=f")
+	(vec_select:V4SF
+	  (vec_concat:V8SF
+	    (match_operand:V4SF 1 "register_operand" "f")
+	    (match_operand:V4SF 2 "register_operand" "f"))
+	  (parallel [(const_int 1) (const_int 5)
+		     (const_int 3) (const_int 7)])))]
+  "ISA_HAS_MSA"
+  "ilvod.w\t%w0,%w2,%w1"
+  [(set_attr "type" "simd_permute")
+   (set_attr "mode" "V4SF")])
+
+(define_insn "msa_ilvr_b"
+  [(set (match_operand:V16QI 0 "register_operand" "=f")
+	(vec_select:V16QI
+	  (vec_concat:V32QI
+	    (match_operand:V16QI 1 "register_operand" "f")
+	    (match_operand:V16QI 2 "register_operand" "f"))
+	  (parallel [(const_int 0) (const_int 16)
+		     (const_int 1) (const_int 17)
+		     (const_int 2) (const_int 18)
+		     (const_int 3) (const_int 19)
+		     (const_int 4) (const_int 20)
+		     (const_int 5) (const_int 21)
+		     (const_int 6) (const_int 22)
+		     (const_int 7) (const_int 23)])))]
+  "ISA_HAS_MSA"
+  "ilvr.b\t%w0,%w2,%w1"
+  [(set_attr "type" "simd_permute")
+   (set_attr "mode" "V16QI")])
+
+(define_insn "msa_ilvr_h"
+  [(set (match_operand:V8HI 0 "register_operand" "=f")
+	(vec_select:V8HI
+	  (vec_concat:V16HI
+	    (match_operand:V8HI 1 "register_operand" "f")
+	    (match_operand:V8HI 2 "register_operand" "f"))
+	  (parallel [(const_int 0) (const_int 8)
+		     (const_int 1) (const_int 9)
+		     (const_int 2) (const_int 10)
+		     (const_int 3) (const_int 11)])))]
+  "ISA_HAS_MSA"
+  "ilvr.h\t%w0,%w2,%w1"
+  [(set_attr "type" "simd_permute")
+   (set_attr "mode" "V8HI")])
+
+(define_insn "msa_ilvr_w"
+  [(set (match_operand:V4SI 0 "register_operand" "=f")
+	(vec_select:V4SI
+	  (vec_concat:V8SI
+	    (match_operand:V4SI 1 "register_operand" "f")
+	    (match_operand:V4SI 2 "register_operand" "f"))
+	  (parallel [(const_int 0) (const_int 4)
+		     (const_int 1) (const_int 5)])))]
+  "ISA_HAS_MSA"
+  "ilvr.w\t%w0,%w2,%w1"
+  [(set_attr "type" "simd_permute")
+   (set_attr "mode" "V4SI")])
+
+(define_insn "msa_ilvr_w_f"
+  [(set (match_operand:V4SF 0 "register_operand" "=f")
+	(vec_select:V4SF
+	  (vec_concat:V8SF
+	    (match_operand:V4SF 1 "register_operand" "f")
+	    (match_operand:V4SF 2 "register_operand" "f"))
+	  (parallel [(const_int 0) (const_int 4)
+		     (const_int 1) (const_int 5)])))]
+  "ISA_HAS_MSA"
+  "ilvr.w\t%w0,%w2,%w1"
+  [(set_attr "type" "simd_permute")
+   (set_attr "mode" "V4SF")])
+
+(define_insn "msa_ilvr_d"
+  [(set (match_operand:V2DI 0 "register_operand" "=f")
+	(vec_select:V2DI
+	  (vec_concat:V4DI
+	    (match_operand:V2DI 1 "register_operand" "f")
+	    (match_operand:V2DI 2 "register_operand" "f"))
+	  (parallel [(const_int 0) (const_int 2)])))]
+  "ISA_HAS_MSA"
+  "ilvr.d\t%w0,%w2,%w1"
+  [(set_attr "type" "simd_permute")
+   (set_attr "mode" "V2DI")])
+
+(define_insn "msa_ilvr_d_f"
+  [(set (match_operand:V2DF 0 "register_operand" "=f")
+	(vec_select:V2DF
+	  (vec_concat:V4DF
+	    (match_operand:V2DF 1 "register_operand" "f")
+	    (match_operand:V2DF 2 "register_operand" "f"))
+	  (parallel [(const_int 0) (const_int 2)])))]
+  "ISA_HAS_MSA"
+  "ilvr.d\t%w0,%w2,%w1"
+  [(set_attr "type" "simd_permute")
+   (set_attr "mode" "V2DF")])
+
+(define_insn "msa_madd_q_<msafmt>"
+  [(set (match_operand:IMSA_WH 0 "register_operand" "=f")
+	(unspec:IMSA_WH [(match_operand:IMSA_WH 1 "register_operand" "0")
+			 (match_operand:IMSA_WH 2 "register_operand" "f")
+			 (match_operand:IMSA_WH 3 "register_operand" "f")]
+			UNSPEC_MSA_MADD_Q))]
+  "ISA_HAS_MSA"
+  "madd_q.<msafmt>\t%w0,%w2,%w3"
+  [(set_attr "type" "simd_mul")
+   (set_attr "mode" "<MODE>")])
+
+(define_insn "msa_maddr_q_<msafmt>"
+  [(set (match_operand:IMSA_WH 0 "register_operand" "=f")
+	(unspec:IMSA_WH [(match_operand:IMSA_WH 1 "register_operand" "0")
+			 (match_operand:IMSA_WH 2 "register_operand" "f")
+			 (match_operand:IMSA_WH 3 "register_operand" "f")]
+			UNSPEC_MSA_MADDR_Q))]
+  "ISA_HAS_MSA"
+  "maddr_q.<msafmt>\t%w0,%w2,%w3"
+  [(set_attr "type" "simd_mul")
+   (set_attr "mode" "<MODE>")])
+
+(define_insn "msa_max_a_<msafmt>"
+  [(set (match_operand:IMSA 0 "register_operand" "=f")
+	(if_then_else
+	   (gt (abs:IMSA (match_operand:IMSA 1 "register_operand" "f"))
+	       (abs:IMSA (match_operand:IMSA 2 "register_operand" "f")))
+	   (match_dup 1)
+	   (match_dup 2)))]
+  "ISA_HAS_MSA"
+  "max_a.<msafmt>\t%w0,%w1,%w2"
+  [(set_attr "type" "simd_int_arith")
+   (set_attr "mode" "<MODE>")])
+
+(define_insn "smax<mode>3"
+  [(set (match_operand:IMSA 0 "register_operand" "=f,f")
+	(smax:IMSA (match_operand:IMSA 1 "register_operand" "f,f")
+		   (match_operand:IMSA 2 "reg_or_vector_same_simm5_operand" "f,Usv5")))]
+  "ISA_HAS_MSA"
+  "@
+   max_s.<msafmt>\t%w0,%w1,%w2
+   maxi_s.<msafmt>\t%w0,%w1,%B2"
+  [(set_attr "type" "simd_int_arith")
+   (set_attr "mode" "<MODE>")])
+
+(define_insn "umax<mode>3"
+  [(set (match_operand:IMSA 0 "register_operand" "=f,f")
+	(umax:IMSA (match_operand:IMSA 1 "register_operand" "f,f")
+		   (match_operand:IMSA 2 "reg_or_vector_same_uimm5_operand" "f,Uuv5")))]
+  "ISA_HAS_MSA"
+  "@
+   max_u.<msafmt>\t%w0,%w1,%w2
+   maxi_u.<msafmt>\t%w0,%w1,%B2"
+  [(set_attr "type" "simd_int_arith")
+   (set_attr "mode" "<MODE>")])
+
+(define_insn "msa_min_a_<msafmt>"
+  [(set (match_operand:IMSA 0 "register_operand" "=f")
+	(if_then_else
+	   (lt (abs:IMSA (match_operand:IMSA 1 "register_operand" "f"))
+	       (abs:IMSA (match_operand:IMSA 2 "register_operand" "f")))
+	   (match_dup 1)
+	   (match_dup 2)))]
+  "ISA_HAS_MSA"
+  "min_a.<msafmt>\t%w0,%w1,%w2"
+  [(set_attr "type" "simd_int_arith")
+   (set_attr "mode" "<MODE>")])
+
+(define_insn "smin<mode>3"
+  [(set (match_operand:IMSA 0 "register_operand" "=f,f")
+	(smin:IMSA (match_operand:IMSA 1 "register_operand" "f,f")
+		   (match_operand:IMSA 2 "reg_or_vector_same_simm5_operand" "f,Usv5")))]
+  "ISA_HAS_MSA"
+  "@
+   min_s.<msafmt>\t%w0,%w1,%w2
+   mini_s.<msafmt>\t%w0,%w1,%B2"
+  [(set_attr "type" "simd_int_arith")
+   (set_attr "mode" "<MODE>")])
+
+(define_insn "umin<mode>3"
+  [(set (match_operand:IMSA 0 "register_operand" "=f,f")
+	(umin:IMSA (match_operand:IMSA 1 "register_operand" "f,f")
+		   (match_operand:IMSA 2 "reg_or_vector_same_uimm5_operand" "f,Uuv5")))]
+  "ISA_HAS_MSA"
+  "@
+   min_u.<msafmt>\t%w0,%w1,%w2
+   mini_u.<msafmt>\t%w0,%w1,%B2"
+  [(set_attr "type" "simd_int_arith")
+   (set_attr "mode" "<MODE>")])
+
+(define_insn "msa_msub_q_<msafmt>"
+  [(set (match_operand:IMSA_WH 0 "register_operand" "=f")
+	(unspec:IMSA_WH [(match_operand:IMSA_WH 1 "register_operand" "0")
+			 (match_operand:IMSA_WH 2 "register_operand" "f")
+			 (match_operand:IMSA_WH 3 "register_operand" "f")]
+			UNSPEC_MSA_MSUB_Q))]
+  "ISA_HAS_MSA"
+  "msub_q.<msafmt>\t%w0,%w2,%w3"
+  [(set_attr "type" "simd_mul")
+   (set_attr "mode" "<MODE>")])
+
+(define_insn "msa_msubr_q_<msafmt>"
+  [(set (match_operand:IMSA_WH 0 "register_operand" "=f")
+	(unspec:IMSA_WH [(match_operand:IMSA_WH 1 "register_operand" "0")
+			 (match_operand:IMSA_WH 2 "register_operand" "f")
+			 (match_operand:IMSA_WH 3 "register_operand" "f")]
+			UNSPEC_MSA_MSUBR_Q))]
+  "ISA_HAS_MSA"
+  "msubr_q.<msafmt>\t%w0,%w2,%w3"
+  [(set_attr "type" "simd_mul")
+   (set_attr "mode" "<MODE>")])
+
+(define_insn "msa_mul_q_<msafmt>"
+  [(set (match_operand:IMSA_WH 0 "register_operand" "=f")
+	(unspec:IMSA_WH [(match_operand:IMSA_WH 1 "register_operand" "f")
+			 (match_operand:IMSA_WH 2 "register_operand" "f")]
+			UNSPEC_MSA_MUL_Q))]
+  "ISA_HAS_MSA"
+  "mul_q.<msafmt>\t%w0,%w1,%w2"
+  [(set_attr "type" "simd_mul")
+   (set_attr "mode" "<MODE>")])
+
+(define_insn "msa_mulr_q_<msafmt>"
+  [(set (match_operand:IMSA_WH 0 "register_operand" "=f")
+	(unspec:IMSA_WH [(match_operand:IMSA_WH 1 "register_operand" "f")
+			 (match_operand:IMSA_WH 2 "register_operand" "f")]
+			UNSPEC_MSA_MULR_Q))]
+  "ISA_HAS_MSA"
+  "mulr_q.<msafmt>\t%w0,%w1,%w2"
+  [(set_attr "type" "simd_mul")
+   (set_attr "mode" "<MODE>")])
+
+(define_insn "msa_nloc_<msafmt>"
+  [(set (match_operand:IMSA 0 "register_operand" "=f")
+	(unspec:IMSA [(match_operand:IMSA 1 "register_operand" "f")]
+		     UNSPEC_MSA_NLOC))]
+  "ISA_HAS_MSA"
+  "nloc.<msafmt>\t%w0,%w1"
+  [(set_attr "type" "simd_bit")
+   (set_attr "mode" "<MODE>")])
+
+(define_insn "clz<mode>2"
+  [(set (match_operand:IMSA 0 "register_operand" "=f")
+	(clz:IMSA (match_operand:IMSA 1 "register_operand" "f")))]
+  "ISA_HAS_MSA"
+  "nlzc.<msafmt>\t%w0,%w1"
+  [(set_attr "type" "simd_bit")
+   (set_attr "mode" "<MODE>")])
+
+(define_insn "msa_nor_<msafmt>"
+  [(set (match_operand:IMSA 0 "register_operand" "=f,f")
+	(and:IMSA (not:IMSA (match_operand:IMSA 1 "register_operand" "f,f"))
+		  (not:IMSA (match_operand:IMSA 2 "reg_or_vector_same_val_operand" "f,Urv8"))))]
+  "ISA_HAS_MSA"
+  "@
+   nor.v\t%w0,%w1,%w2
+   nori.b\t%w0,%w1,%B2"
+  [(set_attr "type" "simd_logic")
+   (set_attr "mode" "<MODE>")])
+
+(define_insn "msa_pckev_b"
+[(set (match_operand:V16QI 0 "register_operand" "=f")
+      (vec_select:V16QI
+	(vec_concat:V32QI
+	  (match_operand:V16QI 1 "register_operand" "f")
+	  (match_operand:V16QI 2 "register_operand" "f"))
+	(parallel [(const_int 0) (const_int 2)
+		   (const_int 4) (const_int 6)
+		   (const_int 8) (const_int 10)
+		   (const_int 12) (const_int 14)
+		   (const_int 16) (const_int 18)
+		   (const_int 20) (const_int 22)
+		   (const_int 24) (const_int 26)
+		   (const_int 28) (const_int 30)])))]
+  "ISA_HAS_MSA"
+  "pckev.b\t%w0,%w2,%w1"
+  [(set_attr "type" "simd_permute")
+   (set_attr "mode" "V16QI")])
+
+(define_insn "msa_pckev_h"
+[(set (match_operand:V8HI 0 "register_operand" "=f")
+      (vec_select:V8HI
+	(vec_concat:V16HI
+	  (match_operand:V8HI 1 "register_operand" "f")
+	  (match_operand:V8HI 2 "register_operand" "f"))
+	(parallel [(const_int 0) (const_int 2)
+		   (const_int 4) (const_int 6)
+		   (const_int 8) (const_int 10)
+		   (const_int 12) (const_int 14)])))]
+  "ISA_HAS_MSA"
+  "pckev.h\t%w0,%w2,%w1"
+  [(set_attr "type" "simd_permute")
+   (set_attr "mode" "V8HI")])
+
+(define_insn "msa_pckev_w"
+[(set (match_operand:V4SI 0 "register_operand" "=f")
+      (vec_select:V4SI
+	(vec_concat:V8SI
+	  (match_operand:V4SI 1 "register_operand" "f")
+	  (match_operand:V4SI 2 "register_operand" "f"))
+	(parallel [(const_int 0) (const_int 2)
+		   (const_int 4) (const_int 6)])))]
+  "ISA_HAS_MSA"
+  "pckev.w\t%w0,%w2,%w1"
+  [(set_attr "type" "simd_permute")
+   (set_attr "mode" "V4SI")])
+
+(define_insn "msa_pckev_w_f"
+[(set (match_operand:V4SF 0 "register_operand" "=f")
+      (vec_select:V4SF
+	(vec_concat:V8SF
+	  (match_operand:V4SF 1 "register_operand" "f")
+	  (match_operand:V4SF 2 "register_operand" "f"))
+	(parallel [(const_int 0) (const_int 2)
+		   (const_int 4) (const_int 6)])))]
+  "ISA_HAS_MSA"
+  "pckev.w\t%w0,%w2,%w1"
+  [(set_attr "type" "simd_permute")
+   (set_attr "mode" "V4SF")])
+
+(define_insn "msa_pckod_b"
+[(set (match_operand:V16QI 0 "register_operand" "=f")
+      (vec_select:V16QI
+	(vec_concat:V32QI
+	  (match_operand:V16QI 1 "register_operand" "f")
+	  (match_operand:V16QI 2 "register_operand" "f"))
+	(parallel [(const_int 1) (const_int 3)
+		   (const_int 5) (const_int 7)
+		   (const_int 9) (const_int 11)
+		   (const_int 13) (const_int 15)
+		   (const_int 17) (const_int 19)
+		   (const_int 21) (const_int 23)
+		   (const_int 25) (const_int 27)
+		   (const_int 29) (const_int 31)])))]
+  "ISA_HAS_MSA"
+  "pckod.b\t%w0,%w2,%w1"
+  [(set_attr "type" "simd_permute")
+   (set_attr "mode" "V16QI")])
+
+(define_insn "msa_pckod_h"
+[(set (match_operand:V8HI 0 "register_operand" "=f")
+      (vec_select:V8HI
+	(vec_concat:V16HI
+	  (match_operand:V8HI 1 "register_operand" "f")
+	  (match_operand:V8HI 2 "register_operand" "f"))
+	(parallel [(const_int 1) (const_int 3)
+		   (const_int 5) (const_int 7)
+		   (const_int 9) (const_int 11)
+		   (const_int 13) (const_int 15)])))]
+  "ISA_HAS_MSA"
+  "pckod.h\t%w0,%w2,%w1"
+  [(set_attr "type" "simd_permute")
+   (set_attr "mode" "V8HI")])
+
+(define_insn "msa_pckod_w"
+[(set (match_operand:V4SI 0 "register_operand" "=f")
+      (vec_select:V4SI
+	(vec_concat:V8SI
+	  (match_operand:V4SI 1 "register_operand" "f")
+	  (match_operand:V4SI 2 "register_operand" "f"))
+	(parallel [(const_int 1) (const_int 3)
+		   (const_int 5) (const_int 7)])))]
+  "ISA_HAS_MSA"
+  "pckod.w\t%w0,%w2,%w1"
+  [(set_attr "type" "simd_permute")
+   (set_attr "mode" "V4SI")])
+
+(define_insn "msa_pckod_w_f"
+[(set (match_operand:V4SF 0 "register_operand" "=f")
+      (vec_select:V4SF
+	(vec_concat:V8SF
+	  (match_operand:V4SF 1 "register_operand" "f")
+	  (match_operand:V4SF 2 "register_operand" "f"))
+	(parallel [(const_int 1) (const_int 3)
+		   (const_int 5) (const_int 7)])))]
+  "ISA_HAS_MSA"
+  "pckod.w\t%w0,%w2,%w1"
+  [(set_attr "type" "simd_permute")
+   (set_attr "mode" "V4SF")])
+
+(define_insn "popcount<mode>2"
+  [(set (match_operand:IMSA 0 "register_operand" "=f")
+	(popcount:IMSA (match_operand:IMSA 1 "register_operand" "f")))]
+  "ISA_HAS_MSA"
+  "pcnt.<msafmt>\t%w0,%w1"
+  [(set_attr "type" "simd_pcnt")
+   (set_attr "mode" "<MODE>")])
+
+(define_insn "msa_sat_s_<msafmt>"
+  [(set (match_operand:IMSA 0 "register_operand" "=f")
+	(unspec:IMSA [(match_operand:IMSA 1 "register_operand" "f")
+		      (match_operand 2 "const_<bitimm>_operand" "")]
+		     UNSPEC_MSA_SAT_S))]
+  "ISA_HAS_MSA"
+  "sat_s.<msafmt>\t%w0,%w1,%2"
+  [(set_attr "type" "simd_sat")
+   (set_attr "mode" "<MODE>")])
+
+(define_insn "msa_sat_u_<msafmt>"
+  [(set (match_operand:IMSA 0 "register_operand" "=f")
+	(unspec:IMSA [(match_operand:IMSA 1 "register_operand" "f")
+		      (match_operand 2 "const_<bitimm>_operand" "")]
+		     UNSPEC_MSA_SAT_U))]
+  "ISA_HAS_MSA"
+  "sat_u.<msafmt>\t%w0,%w1,%2"
+  [(set_attr "type" "simd_sat")
+   (set_attr "mode" "<MODE>")])
+
+(define_insn "msa_shf_<msafmt_f>"
+  [(set (match_operand:MSA_WHB_W 0 "register_operand" "=f")
+	(vec_select:MSA_WHB_W
+	  (match_operand:MSA_WHB_W 1 "register_operand" "f")
+	  (match_operand 2 "par_const_vector_shf_set_operand" "")))]
+  "ISA_HAS_MSA"
+{
+  HOST_WIDE_INT val = 0;
+  unsigned int i;
+
+  /* We convert the selection to an immediate.  */
+  for (i = 0; i < 4; i++)
+    val |= INTVAL (XVECEXP (operands[2], 0, i)) << (2 * i);
+
+  operands[2] = GEN_INT (val);
+  return "shf.<msafmt>\t%w0,%w1,%X2";
+}
+  [(set_attr "type" "simd_shf")
+   (set_attr "mode" "<MODE>")])
+
+(define_insn "msa_srar_<msafmt>"
+  [(set (match_operand:IMSA 0 "register_operand" "=f")
+	(unspec:IMSA [(match_operand:IMSA 1 "register_operand" "f")
+		      (match_operand:IMSA 2 "register_operand" "f")]
+		     UNSPEC_MSA_SRAR))]
+  "ISA_HAS_MSA"
+  "srar.<msafmt>\t%w0,%w1,%w2"
+  [(set_attr "type" "simd_shift")
+   (set_attr "mode" "<MODE>")])
+
+(define_insn "msa_srari_<msafmt>"
+  [(set (match_operand:IMSA 0 "register_operand" "=f")
+	(unspec:IMSA [(match_operand:IMSA 1 "register_operand" "f")
+		      (match_operand 2 "const_<bitimm>_operand" "")]
+		     UNSPEC_MSA_SRARI))]
+  "ISA_HAS_MSA"
+  "srari.<msafmt>\t%w0,%w1,%2"
+  [(set_attr "type" "simd_shift")
+   (set_attr "mode" "<MODE>")])
+
+(define_insn "msa_srlr_<msafmt>"
+  [(set (match_operand:IMSA 0 "register_operand" "=f")
+	(unspec:IMSA [(match_operand:IMSA 1 "register_operand" "f")
+		      (match_operand:IMSA 2 "register_operand" "f")]
+		     UNSPEC_MSA_SRLR))]
+  "ISA_HAS_MSA"
+  "srlr.<msafmt>\t%w0,%w1,%w2"
+  [(set_attr "type" "simd_shift")
+   (set_attr "mode" "<MODE>")])
+
+(define_insn "msa_srlri_<msafmt>"
+  [(set (match_operand:IMSA 0 "register_operand" "=f")
+	(unspec:IMSA [(match_operand:IMSA 1 "register_operand" "f")
+		      (match_operand 2 "const_<bitimm>_operand" "")]
+		     UNSPEC_MSA_SRLRI))]
+  "ISA_HAS_MSA"
+  "srlri.<msafmt>\t%w0,%w1,%2"
+  [(set_attr "type" "simd_shift")
+   (set_attr "mode" "<MODE>")])
+
+(define_insn "msa_subs_s_<msafmt>"
+  [(set (match_operand:IMSA 0 "register_operand" "=f")
+	(unspec:IMSA [(match_operand:IMSA 1 "register_operand" "f")
+		      (match_operand:IMSA 2 "register_operand" "f")]
+		     UNSPEC_MSA_SUBS_S))]
+  "ISA_HAS_MSA"
+  "subs_s.<msafmt>\t%w0,%w1,%w2"
+  [(set_attr "type" "simd_int_arith")
+   (set_attr "mode" "<MODE>")])
+
+(define_insn "msa_subs_u_<msafmt>"
+  [(set (match_operand:IMSA 0 "register_operand" "=f")
+	(unspec:IMSA [(match_operand:IMSA 1 "register_operand" "f")
+		      (match_operand:IMSA 2 "register_operand" "f")]
+		     UNSPEC_MSA_SUBS_U))]
+  "ISA_HAS_MSA"
+  "subs_u.<msafmt>\t%w0,%w1,%w2"
+  [(set_attr "type" "simd_int_arith")
+   (set_attr "mode" "<MODE>")])
+
+(define_insn "msa_subsuu_s_<msafmt>"
+  [(set (match_operand:IMSA 0 "register_operand" "=f")
+	(unspec:IMSA [(match_operand:IMSA 1 "register_operand" "f")
+		      (match_operand:IMSA 2 "register_operand" "f")]
+		     UNSPEC_MSA_SUBSUU_S))]
+  "ISA_HAS_MSA"
+  "subsuu_s.<msafmt>\t%w0,%w1,%w2"
+  [(set_attr "type" "simd_int_arith")
+   (set_attr "mode" "<MODE>")])
+
+(define_insn "msa_subsus_u_<msafmt>"
+  [(set (match_operand:IMSA 0 "register_operand" "=f")
+	(unspec:IMSA [(match_operand:IMSA 1 "register_operand" "f")
+		      (match_operand:IMSA 2 "register_operand" "f")]
+		     UNSPEC_MSA_SUBSUS_U))]
+  "ISA_HAS_MSA"
+  "subsus_u.<msafmt>\t%w0,%w1,%w2"
+  [(set_attr "type" "simd_int_arith")
+   (set_attr "mode" "<MODE>")])
+
+(define_insn "msa_sld_<msafmt_f>"
+  [(set (match_operand:MSA 0 "register_operand" "=f")
+	(unspec:MSA [(match_operand:MSA 1 "register_operand" "0")
+		     (match_operand:MSA 2 "register_operand" "f")
+		     (match_operand:SI 3 "reg_or_0_operand" "dJ")]
+		    UNSPEC_MSA_SLD))]
+  "ISA_HAS_MSA"
+  "sld.<msafmt>\t%w0,%w2[%z3]"
+  [(set_attr "type" "simd_sld")
+   (set_attr "mode" "<MODE>")])
+
+(define_insn "msa_sldi_<msafmt_f>"
+  [(set (match_operand:MSA 0 "register_operand" "=f")
+	(unspec:MSA [(match_operand:MSA 1 "register_operand" "0")
+		     (match_operand:MSA 2 "register_operand" "f")
+		     (match_operand 3 "const_<indeximm>_operand" "")]
+		    UNSPEC_MSA_SLDI))]
+  "ISA_HAS_MSA"
+  "sldi.<msafmt>\t%w0,%w2[%3]"
+  [(set_attr "type" "simd_sld")
+   (set_attr "mode" "<MODE>")])
+
+(define_insn "msa_splat_<msafmt_f>"
+  [(set (match_operand:MSA 0 "register_operand" "=f")
+	(unspec:MSA [(match_operand:MSA 1 "register_operand" "f")
+		     (match_operand:SI 2 "register_operand" "d")]
+		    UNSPEC_MSA_SPLAT))]
+  "ISA_HAS_MSA"
+  "splat.<msafmt>\t%w0,%w1[%z2]"
+  [(set_attr "type" "simd_splat")
+   (set_attr "mode" "<MODE>")])
+
+(define_insn "msa_splati_<msafmt_f>"
+  [(set (match_operand:MSA 0 "register_operand" "=f")
+	(vec_duplicate:MSA
+	  (vec_select:<UNITMODE>
+	    (match_operand:MSA 1 "register_operand" "f")
+	    (parallel [(match_operand 2 "const_<indeximm>_operand" "")]))))]
+  "ISA_HAS_MSA"
+  "splati.<msafmt>\t%w0,%w1[%2]"
+  [(set_attr "type" "simd_splat")
+   (set_attr "mode" "<MODE>")])
+
+(define_insn "msa_splati_<msafmt_f>_scalar"
+  [(set (match_operand:FMSA 0 "register_operand" "=f")
+	(unspec:FMSA [(match_operand:<UNITMODE> 1 "register_operand" "f")]
+		     UNSPEC_MSA_SPLATI))]
+  "ISA_HAS_MSA"
+  "splati.<msafmt>\t%w0,%w1[0]"
+  [(set_attr "type" "simd_splat")
+   (set_attr "mode" "<MODE>")])
+
+(define_insn "msa_cfcmsa"
+  [(set (match_operand:SI 0 "register_operand" "=d")
+	(unspec_volatile:SI [(match_operand 1 "const_uimm5_operand" "")]
+			    UNSPEC_MSA_CFCMSA))]
+  "ISA_HAS_MSA"
+  "cfcmsa\t%0,$%1"
+  [(set_attr "type" "simd_cmsa")
+   (set_attr "mode" "SI")])
+
+(define_insn "msa_ctcmsa"
+  [(unspec_volatile [(match_operand 0 "const_uimm5_operand" "")
+		     (match_operand:SI 1 "register_operand" "d")]
+		    UNSPEC_MSA_CTCMSA)]
+  "ISA_HAS_MSA"
+  "ctcmsa\t$%0,%1"
+  [(set_attr "type" "simd_cmsa")
+   (set_attr "mode" "SI")])
+
+(define_insn "msa_fexdo_h"
+  [(set (match_operand:V8HI 0 "register_operand" "=f")
+	(unspec:V8HI [(match_operand:V4SF 1 "register_operand" "f")
+		      (match_operand:V4SF 2 "register_operand" "f")]
+		     UNSPEC_MSA_FEXDO))]
+  "ISA_HAS_MSA"
+  "fexdo.h\t%w0,%w1,%w2"
+  [(set_attr "type" "simd_fcvt")
+   (set_attr "mode" "V8HI")])
+
+(define_insn "vec_pack_trunc_v2df"
+  [(set (match_operand:V4SF 0 "register_operand" "=f")
+	(vec_concat:V4SF
+	  (float_truncate:V2SF (match_operand:V2DF 1 "register_operand" "f"))
+	  (float_truncate:V2SF (match_operand:V2DF 2 "register_operand" "f"))))]
+  "ISA_HAS_MSA"
+  "fexdo.w\t%w0,%w2,%w1"
+  [(set_attr "type" "simd_fcvt")
+   (set_attr "mode" "V4SF")])
+
+(define_insn "msa_fexupl_w"
+  [(set (match_operand:V4SF 0 "register_operand" "=f")
+	(unspec:V4SF [(match_operand:V8HI 1 "register_operand" "f")]
+		     UNSPEC_MSA_FEXUPL))]
+  "ISA_HAS_MSA"
+  "fexupl.w\t%w0,%w1"
+  [(set_attr "type" "simd_fcvt")
+   (set_attr "mode" "V4SF")])
+
+(define_insn "msa_fexupl_d"
+  [(set (match_operand:V2DF 0 "register_operand" "=f")
+	(float_extend:V2DF
+	(vec_select:V2SF
+	  (match_operand:V4SF 1 "register_operand" "f")
+	  (parallel [(const_int 2) (const_int 3)]))))]
+  "ISA_HAS_MSA"
+  "fexupl.d\t%w0,%w1"
+  [(set_attr "type" "simd_fcvt")
+   (set_attr "mode" "V2DF")])
+
+(define_insn "msa_fexupr_w"
+  [(set (match_operand:V4SF 0 "register_operand" "=f")
+	(unspec:V4SF [(match_operand:V8HI 1 "register_operand" "f")]
+		     UNSPEC_MSA_FEXUPR))]
+  "ISA_HAS_MSA"
+  "fexupr.w\t%w0,%w1"
+  [(set_attr "type" "simd_fcvt")
+   (set_attr "mode" "V4SF")])
+
+(define_insn "msa_fexupr_d"
+  [(set (match_operand:V2DF 0 "register_operand" "=f")
+	(float_extend:V2DF
+	(vec_select:V2SF
+	  (match_operand:V4SF 1 "register_operand" "f")
+	  (parallel [(const_int 0) (const_int 1)]))))]
+  "ISA_HAS_MSA"
+  "fexupr.d\t%w0,%w1"
+  [(set_attr "type" "simd_fcvt")
+   (set_attr "mode" "V2DF")])
+
+(define_code_attr msabr
+  [(eq "bz")
+   (ne "bnz")])
+
+(define_code_attr msabr_neg
+  [(eq "bnz")
+   (ne "bz")])
+
+(define_insn "msa_<msabr>_<msafmt_f>"
+ [(set (pc) (if_then_else
+	      (equality_op
+		(unspec:SI [(match_operand:MSA 1 "register_operand" "f")]
+			    UNSPEC_MSA_BRANCH)
+		  (match_operand:SI 2 "const_0_operand"))
+		  (label_ref (match_operand 0))
+		  (pc)))]
+ "ISA_HAS_MSA"
+{
+  return mips_output_conditional_branch (insn, operands,
+					 MIPS_BRANCH ("<msabr>.<msafmt>",
+						      "%w1,%0"),
+					 MIPS_BRANCH ("<msabr_neg>.<msafmt>",
+						      "%w1,%0"));
+}
+ [(set_attr "type" "simd_branch")
+  (set_attr "mode" "<MODE>")
+  (set_attr "compact_form" "never")])
+
+(define_insn "msa_<msabr>_v_<msafmt_f>"
+ [(set (pc) (if_then_else
+	      (equality_op
+		(unspec:SI [(match_operand:MSA 1 "register_operand" "f")]
+			    UNSPEC_MSA_BRANCH_V)
+		  (match_operand:SI 2 "const_0_operand"))
+		  (label_ref (match_operand 0))
+		  (pc)))]
+ "ISA_HAS_MSA"
+{
+  return mips_output_conditional_branch (insn, operands,
+					 MIPS_BRANCH ("<msabr>.v", "%w1,%0"),
+					 MIPS_BRANCH ("<msabr_neg>.v",
+						      "%w1,%0"));
+}
+ [(set_attr "type" "simd_branch")
+  (set_attr "mode" "TI")
+  (set_attr "compact_form" "never")])
diff --git a/gcc/config/mips/mips-protos.h b/gcc/config/mips/mips-protos.h
index 01aad8295b3..09cf6626e27 100644
--- a/gcc/config/mips/mips-protos.h
+++ b/gcc/config/mips/mips-protos.h
@@ -197,8 +197,9 @@ extern bool mips_stack_address_p (rtx, machine_mode);
 extern int mips_address_insns (rtx, machine_mode, bool);
 extern int mips_const_insns (rtx);
 extern int mips_split_const_insns (rtx);
+extern int mips_split_128bit_const_insns (rtx);
 extern int mips_load_store_insns (rtx, rtx_insn *);
-extern int mips_idiv_insns (void);
+extern int mips_idiv_insns (machine_mode);
 extern rtx_insn *mips_emit_move (rtx, rtx);
 #ifdef RTX_CODE
 extern void mips_emit_binary (enum rtx_code, rtx, rtx, rtx);
@@ -216,6 +217,11 @@ extern bool mips_split_move_p (rtx, rtx, enum mips_split_type);
 extern void mips_split_move (rtx, rtx, enum mips_split_type);
 extern bool mips_split_move_insn_p (rtx, rtx, rtx);
 extern void mips_split_move_insn (rtx, rtx, rtx);
+extern void mips_split_128bit_move (rtx, rtx);
+extern bool mips_split_128bit_move_p (rtx, rtx);
+extern void mips_split_msa_copy_d (rtx, rtx, rtx, rtx (*)(rtx, rtx, rtx));
+extern void mips_split_msa_insert_d (rtx, rtx, rtx, rtx);
+extern void mips_split_msa_fill_d (rtx, rtx);
 extern const char *mips_output_move (rtx, rtx);
 extern bool mips_cfun_has_cprestore_slot_p (void);
 extern bool mips_cprestore_address_p (rtx, bool);
@@ -278,6 +284,15 @@ extern void mips_expand_before_return (void);
 extern void mips_expand_epilogue (bool);
 extern bool mips_can_use_return_insn (void);
 
+extern bool mips_const_vector_same_val_p (rtx, machine_mode);
+extern bool mips_const_vector_same_bytes_p (rtx, machine_mode);
+extern bool mips_const_vector_same_int_p (rtx, machine_mode, HOST_WIDE_INT,
+					  HOST_WIDE_INT);
+extern bool mips_const_vector_shuffle_set_p (rtx, machine_mode);
+extern bool mips_const_vector_bitimm_set_p (rtx, machine_mode);
+extern bool mips_const_vector_bitimm_clr_p (rtx, machine_mode);
+extern rtx mips_msa_vec_parallel_const_half (machine_mode, bool);
+extern rtx mips_gen_const_int_vector (machine_mode, int);
 extern bool mips_secondary_memory_needed (enum reg_class, enum reg_class,
 					  machine_mode);
 extern bool mips_cannot_change_mode_class (machine_mode,
@@ -305,6 +320,7 @@ extern const char *mips_output_sync (void);
 extern const char *mips_output_sync_loop (rtx_insn *, rtx *);
 extern unsigned int mips_sync_loop_insns (rtx_insn *, rtx *);
 extern const char *mips_output_division (const char *, rtx *);
+extern const char *mips_msa_output_division (const char *, rtx *);
 extern const char *mips_output_probe_stack_range (rtx, rtx);
 extern bool mips_hard_regno_rename_ok (unsigned int, unsigned int);
 extern unsigned int mips_hard_regno_nregs (int, machine_mode);
@@ -343,6 +359,7 @@ extern void mips_expand_vec_reduc (rtx, rtx, rtx (*)(rtx, rtx, rtx));
 extern void mips_expand_vec_minmax (rtx, rtx, rtx,
 				    rtx (*) (rtx, rtx, rtx), bool);
 
+extern int mips_ldst_scaled_shift (machine_mode);
 extern bool mips_signed_immediate_p (unsigned HOST_WIDE_INT, int, int);
 extern bool mips_unsigned_immediate_p (unsigned HOST_WIDE_INT, int, int);
 extern const char *umips_output_save_restore (bool, rtx);
@@ -372,5 +389,6 @@ extern mulsidi3_gen_fn mips_mulsidi3_gen_fn (enum rtx_code);
 #endif
 
 extern void mips_register_frame_header_opt (void);
+extern void mips_expand_vec_cond_expr (machine_mode, machine_mode, rtx *);
 
 #endif /* ! GCC_MIPS_PROTOS_H */
diff --git a/gcc/config/mips/mips.c b/gcc/config/mips/mips.c
index 399f231791d..06acd30ec25 100644
--- a/gcc/config/mips/mips.c
+++ b/gcc/config/mips/mips.c
@@ -244,6 +244,10 @@ enum mips_builtin_type {
   /* As above, but the instruction only sets a single $fcc register.  */
   MIPS_BUILTIN_CMP_SINGLE,
 
+  /* The function corresponds to an MSA conditional branch instruction
+     combined with a compare instruction.  */
+  MIPS_BUILTIN_MSA_TEST_BRANCH,
+
   /* For generating bposge32 branch instructions in MIPS32 DSP ASE.  */
   MIPS_BUILTIN_BPOSGE32
 };
@@ -1126,6 +1130,7 @@ static int mips_register_move_cost (machine_mode, reg_class_t,
 				    reg_class_t);
 static unsigned int mips_function_arg_boundary (machine_mode, const_tree);
 static machine_mode mips_get_reg_raw_mode (int regno);
+static rtx mips_gen_const_int_vector_shuffle (machine_mode, int);
 
 /* This hash table keeps track of implicit "mips16" and "nomips16" attributes
    for -mflip_mips16.  It maps decl names onto a boolean mode setting.  */
@@ -1835,6 +1840,140 @@ mips_symbol_binds_local_p (const_rtx x)
 	  : SYMBOL_REF_LOCAL_P (x));
 }
 
+/* Return true if OP is a constant vector with the number of units in MODE,
+   and each unit has the same bit set.  */
+
+bool
+mips_const_vector_bitimm_set_p (rtx op, machine_mode mode)
+{
+  if (GET_CODE (op) == CONST_VECTOR && op != CONST0_RTX (mode))
+    {
+      unsigned HOST_WIDE_INT val = UINTVAL (CONST_VECTOR_ELT (op, 0));
+      int vlog2 = exact_log2 (val & GET_MODE_MASK (GET_MODE_INNER (mode)));
+
+      if (vlog2 != -1)
+	{
+	  gcc_assert (GET_MODE_CLASS (mode) == MODE_VECTOR_INT);
+	  gcc_assert (vlog2 >= 0 && vlog2 <= GET_MODE_UNIT_BITSIZE (mode) - 1);
+	  return mips_const_vector_same_val_p (op, mode);
+	}
+    }
+
+  return false;
+}
+
+/* Return true if OP is a constant vector with the number of units in MODE,
+   and each unit has the same bit clear.  */
+
+bool
+mips_const_vector_bitimm_clr_p (rtx op, machine_mode mode)
+{
+  if (GET_CODE (op) == CONST_VECTOR && op != CONSTM1_RTX (mode))
+    {
+      unsigned HOST_WIDE_INT val = ~UINTVAL (CONST_VECTOR_ELT (op, 0));
+      int vlog2 = exact_log2 (val & GET_MODE_MASK (GET_MODE_INNER (mode)));
+
+      if (vlog2 != -1)
+	{
+	  gcc_assert (GET_MODE_CLASS (mode) == MODE_VECTOR_INT);
+	  gcc_assert (vlog2 >= 0 && vlog2 <= GET_MODE_UNIT_BITSIZE (mode) - 1);
+	  return mips_const_vector_same_val_p (op, mode);
+	}
+    }
+
+  return false;
+}
+
+/* Return true if OP is a constant vector with the number of units in MODE,
+   and each unit has the same value.  */
+
+bool
+mips_const_vector_same_val_p (rtx op, machine_mode mode)
+{
+  int i, nunits = GET_MODE_NUNITS (mode);
+  rtx first;
+
+  if (GET_CODE (op) != CONST_VECTOR || GET_MODE (op) != mode)
+    return false;
+
+  first = CONST_VECTOR_ELT (op, 0);
+  for (i = 1; i < nunits; i++)
+    if (!rtx_equal_p (first, CONST_VECTOR_ELT (op, i)))
+      return false;
+
+  return true;
+}
+
+/* Return true if OP is a constant vector with the number of units in MODE,
+   and each unit has the same value as well as replicated bytes in the value.
+*/
+
+bool
+mips_const_vector_same_bytes_p (rtx op, machine_mode mode)
+{
+  int i, bytes;
+  HOST_WIDE_INT val, first_byte;
+  rtx first;
+
+  if (!mips_const_vector_same_val_p (op, mode))
+    return false;
+
+  first = CONST_VECTOR_ELT (op, 0);
+  bytes = GET_MODE_UNIT_SIZE (mode);
+  val = INTVAL (first);
+  first_byte = val & 0xff;
+  for (i = 1; i < bytes; i++)
+    {
+      val >>= 8;
+      if ((val & 0xff) != first_byte)
+	return false;
+    }
+
+  return true;
+}
+
+/* Return true if OP is a constant vector with the number of units in MODE,
+   and each unit has the same integer value in the range [LOW, HIGH].  */
+
+bool
+mips_const_vector_same_int_p (rtx op, machine_mode mode, HOST_WIDE_INT low,
+			      HOST_WIDE_INT high)
+{
+  HOST_WIDE_INT value;
+  rtx elem0;
+
+  if (!mips_const_vector_same_val_p (op, mode))
+    return false;
+
+  elem0 = CONST_VECTOR_ELT (op, 0);
+  if (!CONST_INT_P (elem0))
+    return false;
+
+  value = INTVAL (elem0);
+  return (value >= low && value <= high);
+}
+
+/* Return true if OP is a constant vector with repeated 4-element sets
+   in mode MODE.  */
+
+bool
+mips_const_vector_shuffle_set_p (rtx op, machine_mode mode)
+{
+  int nunits = GET_MODE_NUNITS (mode);
+  int nsets = nunits / 4;
+  int set = 0;
+  int i, j;
+
+  /* Check if we have the same 4-element sets.  */
+  for (j = 0; j < nsets; j++, set = 4 * j)
+    for (i = 0; i < 4; i++)
+      if ((INTVAL (XVECEXP (op, 0, i))
+	   != (INTVAL (XVECEXP (op, 0, set + i)) - set))
+	  || !IN_RANGE (INTVAL (XVECEXP (op, 0, set + i)), 0, set + 3))
+	return false;
+  return true;
+}
+
 /* Return true if rtx constants of mode MODE should be put into a small
    data section.  */
 
@@ -2206,6 +2345,11 @@ mips_symbol_insns_1 (enum mips_symbol_type type, machine_mode mode)
 static int
 mips_symbol_insns (enum mips_symbol_type type, machine_mode mode)
 {
+  /* MSA LD.* and ST.* cannot support loading symbols via an immediate
+     operand.  */
+  if (MSA_SUPPORTED_MODE_P (mode))
+    return 0;
+
   return mips_symbol_insns_1 (type, mode) * (TARGET_MIPS16 ? 2 : 1);
 }
 
@@ -2325,6 +2469,12 @@ mips_valid_offset_p (rtx x, machine_mode mode)
       && !SMALL_OPERAND (INTVAL (x) + GET_MODE_SIZE (mode) - UNITS_PER_WORD))
     return false;
 
+  /* MSA LD.* and ST.* supports 10-bit signed offsets.  */
+  if (MSA_SUPPORTED_MODE_P (mode)
+      && !mips_signed_immediate_p (INTVAL (x), 10,
+				   mips_ldst_scaled_shift (mode)))
+    return false;
+
   return true;
 }
 
@@ -2351,6 +2501,10 @@ mips_valid_lo_sum_p (enum mips_symbol_type symbol_type, machine_mode mode)
       && GET_MODE_BITSIZE (mode) > GET_MODE_ALIGNMENT (mode))
     return false;
 
+  /* MSA LD.* and ST.* cannot support loading symbols via %lo($base).  */
+  if (MSA_SUPPORTED_MODE_P (mode))
+    return false;
+
   return true;
 }
 
@@ -2480,6 +2634,8 @@ mips_lx_address_p (rtx addr, machine_mode mode)
     return true;
   if (ISA_HAS_LDX && mode == DImode)
     return true;
+  if (MSA_SUPPORTED_MODE_P (mode))
+    return true;
   return false;
 }
 
@@ -2517,6 +2673,7 @@ mips_address_insns (rtx x, machine_mode mode, bool might_split_p)
 {
   struct mips_address_info addr;
   int factor;
+  bool msa_p = (!might_split_p && MSA_SUPPORTED_MODE_P (mode));
 
   /* BLKmode is used for single unaligned loads and stores and should
      not count as a multiword mode.  (GET_MODE_SIZE (BLKmode) is pretty
@@ -2531,6 +2688,15 @@ mips_address_insns (rtx x, machine_mode mode, bool might_split_p)
     switch (addr.type)
       {
       case ADDRESS_REG:
+	if (msa_p)
+	  {
+	    /* MSA LD.* and ST.* supports 10-bit signed offsets.  */
+	    if (mips_signed_immediate_p (INTVAL (addr.offset), 10,
+					 mips_ldst_scaled_shift (mode)))
+	      return 1;
+	    else
+	      return 0;
+	  }
 	if (TARGET_MIPS16
 	    && !mips16_unextended_reference_p (mode, addr.reg,
 					       UINTVAL (addr.offset)))
@@ -2538,13 +2704,13 @@ mips_address_insns (rtx x, machine_mode mode, bool might_split_p)
 	return factor;
 
       case ADDRESS_LO_SUM:
-	return TARGET_MIPS16 ? factor * 2 : factor;
+	return msa_p ? 0 : TARGET_MIPS16 ? factor * 2 : factor;
 
       case ADDRESS_CONST_INT:
-	return factor;
+	return msa_p ? 0 : factor;
 
       case ADDRESS_SYMBOLIC:
-	return factor * mips_symbol_insns (addr.symbol_type, mode);
+	return msa_p ? 0 : factor * mips_symbol_insns (addr.symbol_type, mode);
       }
   return 0;
 }
@@ -2568,6 +2734,19 @@ mips_signed_immediate_p (unsigned HOST_WIDE_INT x, int bits, int shift = 0)
   return mips_unsigned_immediate_p (x, bits, shift);
 }
 
+/* Return the scale shift that applied to MSA LD/ST address offset.  */
+
+int
+mips_ldst_scaled_shift (machine_mode mode)
+{
+  int shift = exact_log2 (GET_MODE_UNIT_SIZE (mode));
+
+  if (shift < 0 || shift > 8)
+    gcc_unreachable ();
+
+  return shift;
+}
+
 /* Return true if X is legitimate for accessing values of mode MODE,
    if it is based on a MIPS16 register, and if the offset satisfies
    OFFSET_PREDICATE.  */
@@ -2663,8 +2842,12 @@ mips_const_insns (rtx x)
 
       return mips_build_integer (codes, INTVAL (x));
 
-    case CONST_DOUBLE:
     case CONST_VECTOR:
+      if (ISA_HAS_MSA
+	  && mips_const_vector_same_int_p (x, GET_MODE (x), -512, 511))
+	return 1;
+      /* Fall through.  */
+    case CONST_DOUBLE:
       /* Allow zeros for normal mode, where we can use $0.  */
       return !TARGET_MIPS16 && x == CONST0_RTX (GET_MODE (x)) ? 1 : 0;
 
@@ -2724,6 +2907,26 @@ mips_split_const_insns (rtx x)
   return low + high;
 }
 
+/* Return one word of 128-bit value OP, taking into account the fixed
+   endianness of certain registers.  BYTE selects from the byte address.  */
+
+rtx
+mips_subword_at_byte (rtx op, unsigned int byte)
+{
+  machine_mode mode;
+
+  mode = GET_MODE (op);
+  if (mode == VOIDmode)
+    mode = TImode;
+
+  gcc_assert (!FP_REG_RTX_P (op));
+
+  if (MEM_P (op))
+    return mips_rewrite_small_data (adjust_address (op, word_mode, byte));
+
+  return simplify_gen_subreg (word_mode, op, mode, byte);
+}
+
 /* Return the number of instructions needed to implement INSN,
    given that it loads from or stores to MEM.  Assume that
    BASE_INSN_LENGTH is the length of one instruction.  */
@@ -2754,14 +2957,14 @@ mips_load_store_insns (rtx mem, rtx_insn *insn)
    assuming that BASE_INSN_LENGTH is the length of one instruction.  */
 
 int
-mips_idiv_insns (void)
+mips_idiv_insns (machine_mode mode)
 {
   int count;
 
   count = 1;
   if (TARGET_CHECK_ZERO_DIV)
     {
-      if (GENERATE_DIVIDE_TRAPS)
+      if (GENERATE_DIVIDE_TRAPS && !MSA_SUPPORTED_MODE_P (mode))
         count++;
       else
         count += 2;
@@ -2771,6 +2974,7 @@ mips_idiv_insns (void)
     count++;
   return count;
 }
+
 
 /* Emit a move from SRC to DEST.  Assume that the move expanders can
    handle all moves if !can_create_pseudo_p ().  The distinction is
@@ -3478,7 +3682,14 @@ mips_legitimize_const_move (machine_mode mode, rtx dest, rtx src)
 bool
 mips_legitimize_move (machine_mode mode, rtx dest, rtx src)
 {
-  if (!register_operand (dest, mode) && !reg_or_0_operand (src, mode))
+  /* Both src and dest are non-registers;  one special case is supported where
+     the source is (const_int 0) and the store can source the zero register.
+     MIPS16 and MSA are never able to source the zero register directly in
+     memory operations.  */
+  if (!register_operand (dest, mode)
+      && !register_operand (src, mode)
+      && (TARGET_MIPS16 || !const_0_operand (src, mode)
+	  || MSA_SUPPORTED_MODE_P (mode)))
     {
       mips_emit_move (dest, force_reg (mode, src));
       return true;
@@ -4044,6 +4255,10 @@ mips_rtx_costs (rtx x, machine_mode mode, int outer_code,
     case NE:
     case UNORDERED:
     case LTGT:
+    case UNGE:
+    case UNGT:
+    case UNLE:
+    case UNLT:
       /* Branch comparisons have VOIDmode, so use the first operand's
 	 mode instead.  */
       mode = GET_MODE (XEXP (x, 0));
@@ -4208,7 +4423,7 @@ mips_rtx_costs (rtx x, machine_mode mode, int outer_code,
 	      *total += set_src_cost (XEXP (x, 0), mode, speed);
 	      return true;
 	    }
-	  *total = COSTS_N_INSNS (mips_idiv_insns ());
+	  *total = COSTS_N_INSNS (mips_idiv_insns (mode));
 	}
       else if (mode == DImode)
         *total = mips_cost->int_div_di;
@@ -4514,6 +4729,10 @@ mips_split_move_p (rtx dest, rtx src, enum mips_split_type split_type)
 	return false;
     }
 
+  /* Check if MSA moves need splitting.  */
+  if (MSA_SUPPORTED_MODE_P (GET_MODE (dest)))
+    return mips_split_128bit_move_p (dest, src);
+
   /* Otherwise split all multiword moves.  */
   return size > UNITS_PER_WORD;
 }
@@ -4527,7 +4746,9 @@ mips_split_move (rtx dest, rtx src, enum mips_split_type split_type)
   rtx low_dest;
 
   gcc_checking_assert (mips_split_move_p (dest, src, split_type));
-  if (FP_REG_RTX_P (dest) || FP_REG_RTX_P (src))
+  if (MSA_SUPPORTED_MODE_P (GET_MODE (dest)))
+    mips_split_128bit_move (dest, src);
+  else if (FP_REG_RTX_P (dest) || FP_REG_RTX_P (src))
     {
       if (!TARGET_64BIT && GET_MODE (dest) == DImode)
 	emit_insn (gen_move_doubleword_fprdi (dest, src));
@@ -4600,6 +4821,199 @@ mips_insn_split_type (rtx insn)
   return SPLIT_IF_NECESSARY;
 }
 
+/* Return true if a 128-bit move from SRC to DEST should be split.  */
+
+bool
+mips_split_128bit_move_p (rtx dest, rtx src)
+{
+  /* MSA-to-MSA moves can be done in a single instruction.  */
+  if (FP_REG_RTX_P (src) && FP_REG_RTX_P (dest))
+    return false;
+
+  /* Check for MSA loads and stores.  */
+  if (FP_REG_RTX_P (dest) && MEM_P (src))
+    return false;
+  if (FP_REG_RTX_P (src) && MEM_P (dest))
+    return false;
+
+  /* Check for MSA set to an immediate const vector with valid replicated
+     element.  */
+  if (FP_REG_RTX_P (dest)
+      && mips_const_vector_same_int_p (src, GET_MODE (src), -512, 511))
+    return false;
+
+  /* Check for MSA load zero immediate.  */
+  if (FP_REG_RTX_P (dest) && src == CONST0_RTX (GET_MODE (src)))
+    return false;
+
+  return true;
+}
+
+/* Split a 128-bit move from SRC to DEST.  */
+
+void
+mips_split_128bit_move (rtx dest, rtx src)
+{
+  int byte, index;
+  rtx low_dest, low_src, d, s;
+
+  if (FP_REG_RTX_P (dest))
+    {
+      gcc_assert (!MEM_P (src));
+
+      rtx new_dest = dest;
+      if (!TARGET_64BIT)
+	{
+	  if (GET_MODE (dest) != V4SImode)
+	    new_dest = simplify_gen_subreg (V4SImode, dest, GET_MODE (dest), 0);
+	}
+      else
+	{
+	  if (GET_MODE (dest) != V2DImode)
+	    new_dest = simplify_gen_subreg (V2DImode, dest, GET_MODE (dest), 0);
+	}
+
+      for (byte = 0, index = 0; byte < GET_MODE_SIZE (TImode);
+	   byte += UNITS_PER_WORD, index++)
+	{
+	  s = mips_subword_at_byte (src, byte);
+	  if (!TARGET_64BIT)
+	    emit_insn (gen_msa_insert_w (new_dest, s, new_dest,
+					 GEN_INT (1 << index)));
+	  else
+	    emit_insn (gen_msa_insert_d (new_dest, s, new_dest,
+					 GEN_INT (1 << index)));
+	}
+    }
+  else if (FP_REG_RTX_P (src))
+    {
+      gcc_assert (!MEM_P (dest));
+
+      rtx new_src = src;
+      if (!TARGET_64BIT)
+	{
+	  if (GET_MODE (src) != V4SImode)
+	    new_src = simplify_gen_subreg (V4SImode, src, GET_MODE (src), 0);
+	}
+      else
+	{
+	  if (GET_MODE (src) != V2DImode)
+	    new_src = simplify_gen_subreg (V2DImode, src, GET_MODE (src), 0);
+	}
+
+      for (byte = 0, index = 0; byte < GET_MODE_SIZE (TImode);
+	   byte += UNITS_PER_WORD, index++)
+	{
+	  d = mips_subword_at_byte (dest, byte);
+	  if (!TARGET_64BIT)
+	    emit_insn (gen_msa_copy_s_w (d, new_src, GEN_INT (index)));
+	  else
+	    emit_insn (gen_msa_copy_s_d (d, new_src, GEN_INT (index)));
+	}
+    }
+  else
+    {
+      low_dest = mips_subword_at_byte (dest, 0);
+      low_src = mips_subword_at_byte (src, 0);
+      gcc_assert (REG_P (low_dest) && REG_P (low_src));
+      /* Make sure the source register is not written before reading.  */
+      if (REGNO (low_dest) <= REGNO (low_src))
+	{
+	  for (byte = 0; byte < GET_MODE_SIZE (TImode);
+	       byte += UNITS_PER_WORD)
+	    {
+	      d = mips_subword_at_byte (dest, byte);
+	      s = mips_subword_at_byte (src, byte);
+	      mips_emit_move (d, s);
+	    }
+	}
+      else
+	{
+	  for (byte = GET_MODE_SIZE (TImode) - UNITS_PER_WORD; byte >= 0;
+	       byte -= UNITS_PER_WORD)
+	    {
+	      d = mips_subword_at_byte (dest, byte);
+	      s = mips_subword_at_byte (src, byte);
+	      mips_emit_move (d, s);
+	    }
+	}
+    }
+}
+
+/* Split a COPY_S.D with operands DEST, SRC and INDEX.  GEN is a function
+   used to generate subregs.  */
+
+void
+mips_split_msa_copy_d (rtx dest, rtx src, rtx index,
+		       rtx (*gen_fn)(rtx, rtx, rtx))
+{
+  gcc_assert ((GET_MODE (src) == V2DImode && GET_MODE (dest) == DImode)
+	      || (GET_MODE (src) == V2DFmode && GET_MODE (dest) == DFmode));
+
+  /* Note that low is always from the lower index, and high is always
+     from the higher index.  */
+  rtx low = mips_subword (dest, false);
+  rtx high = mips_subword (dest, true);
+  rtx new_src = simplify_gen_subreg (V4SImode, src, GET_MODE (src), 0);
+
+  emit_insn (gen_fn (low, new_src, GEN_INT (INTVAL (index) * 2)));
+  emit_insn (gen_fn (high, new_src, GEN_INT (INTVAL (index) * 2 + 1)));
+}
+
+/* Split a INSERT.D with operand DEST, SRC1.INDEX and SRC2.  */
+
+void
+mips_split_msa_insert_d (rtx dest, rtx src1, rtx index, rtx src2)
+{
+  int i;
+  gcc_assert (GET_MODE (dest) == GET_MODE (src1));
+  gcc_assert ((GET_MODE (dest) == V2DImode
+	       && (GET_MODE (src2) == DImode || src2 == const0_rtx))
+	      || (GET_MODE (dest) == V2DFmode && GET_MODE (src2) == DFmode));
+
+  /* Note that low is always from the lower index, and high is always
+     from the higher index.  */
+  rtx low = mips_subword (src2, false);
+  rtx high = mips_subword (src2, true);
+  rtx new_dest = simplify_gen_subreg (V4SImode, dest, GET_MODE (dest), 0);
+  rtx new_src1 = simplify_gen_subreg (V4SImode, src1, GET_MODE (src1), 0);
+  i = exact_log2 (INTVAL (index));
+  gcc_assert (i != -1);
+
+  emit_insn (gen_msa_insert_w (new_dest, low, new_src1,
+			       GEN_INT (1 << (i * 2))));
+  emit_insn (gen_msa_insert_w (new_dest, high, new_dest,
+			       GEN_INT (1 << (i * 2 + 1))));
+}
+
+/* Split FILL.D.  */
+
+void
+mips_split_msa_fill_d (rtx dest, rtx src)
+{
+  gcc_assert ((GET_MODE (dest) == V2DImode
+	       && (GET_MODE (src) == DImode || src == const0_rtx))
+	      || (GET_MODE (dest) == V2DFmode && GET_MODE (src) == DFmode));
+
+  /* Note that low is always from the lower index, and high is always
+     from the higher index.  */
+  rtx low, high;
+  if (src == const0_rtx)
+    {
+      low = src;
+      high = src;
+    }
+  else
+    {
+      low = mips_subword (src, false);
+      high = mips_subword (src, true);
+    }
+  rtx new_dest = simplify_gen_subreg (V4SImode, dest, GET_MODE (dest), 0);
+  emit_insn (gen_msa_fill_w (new_dest, low));
+  emit_insn (gen_msa_insert_w (new_dest, high, new_dest, GEN_INT (1 << 1)));
+  emit_insn (gen_msa_insert_w (new_dest, high, new_dest, GEN_INT (1 << 3)));
+}
+
 /* Return true if a move from SRC to DEST in INSN should be split.  */
 
 bool
@@ -4623,19 +5037,25 @@ mips_split_move_insn (rtx dest, rtx src, rtx insn)
 const char *
 mips_output_move (rtx dest, rtx src)
 {
-  enum rtx_code dest_code, src_code;
-  machine_mode mode;
+  enum rtx_code dest_code = GET_CODE (dest);
+  enum rtx_code src_code = GET_CODE (src);
+  machine_mode mode = GET_MODE (dest);
+  bool dbl_p = (GET_MODE_SIZE (mode) == 8);
+  bool msa_p = MSA_SUPPORTED_MODE_P (mode);
   enum mips_symbol_type symbol_type;
-  bool dbl_p;
-
-  dest_code = GET_CODE (dest);
-  src_code = GET_CODE (src);
-  mode = GET_MODE (dest);
-  dbl_p = (GET_MODE_SIZE (mode) == 8);
 
   if (mips_split_move_p (dest, src, SPLIT_IF_NECESSARY))
     return "#";
 
+  if (msa_p
+      && dest_code == REG && FP_REG_P (REGNO (dest))
+      && src_code == CONST_VECTOR
+      && CONST_INT_P (CONST_VECTOR_ELT (src, 0)))
+    {
+      gcc_assert (mips_const_vector_same_int_p (src, mode, -512, 511));
+      return "ldi.%v0\t%w0,%E1";
+    }
+
   if ((src_code == REG && GP_REG_P (REGNO (src)))
       || (!TARGET_MIPS16 && src == CONST0_RTX (mode)))
     {
@@ -4666,7 +5086,15 @@ mips_output_move (rtx dest, rtx src)
 	    }
 
 	  if (FP_REG_P (REGNO (dest)))
-	    return dbl_p ? "dmtc1\t%z1,%0" : "mtc1\t%z1,%0";
+	    {
+	      if (msa_p)
+		{
+		  gcc_assert (src == CONST0_RTX (GET_MODE (src)));
+		  return "ldi.%v0\t%w0,0";
+		}
+
+	      return dbl_p ? "dmtc1\t%z1,%0" : "mtc1\t%z1,%0";
+	    }
 
 	  if (ALL_COP_REG_P (REGNO (dest)))
 	    {
@@ -4683,6 +5111,7 @@ mips_output_move (rtx dest, rtx src)
 	  case 2: return "sh\t%z1,%0";
 	  case 4: return "sw\t%z1,%0";
 	  case 8: return "sd\t%z1,%0";
+	  default: gcc_unreachable ();
 	  }
     }
   if (dest_code == REG && GP_REG_P (REGNO (dest)))
@@ -4711,7 +5140,10 @@ mips_output_move (rtx dest, rtx src)
 	    }
 
 	  if (FP_REG_P (REGNO (src)))
-	    return dbl_p ? "dmfc1\t%0,%1" : "mfc1\t%0,%1";
+	    {
+	      gcc_assert (!msa_p);
+	      return dbl_p ? "dmfc1\t%0,%1" : "mfc1\t%0,%1";
+	    }
 
 	  if (ALL_COP_REG_P (REGNO (src)))
 	    {
@@ -4729,6 +5161,7 @@ mips_output_move (rtx dest, rtx src)
 	  case 2: return "lhu\t%0,%1";
 	  case 4: return "lw\t%0,%1";
 	  case 8: return "ld\t%0,%1";
+	  default: gcc_unreachable ();
 	  }
 
       if (src_code == CONST_INT)
@@ -4775,17 +5208,29 @@ mips_output_move (rtx dest, rtx src)
 	{
 	  if (GET_MODE (dest) == V2SFmode)
 	    return "mov.ps\t%0,%1";
+	  else if (msa_p)
+	    return "move.v\t%w0,%w1";
 	  else
 	    return dbl_p ? "mov.d\t%0,%1" : "mov.s\t%0,%1";
 	}
 
       if (dest_code == MEM)
-	return dbl_p ? "sdc1\t%1,%0" : "swc1\t%1,%0";
+	{
+	  if (msa_p)
+	    return "st.%v1\t%w1,%0";
+
+	  return dbl_p ? "sdc1\t%1,%0" : "swc1\t%1,%0";
+	}
     }
   if (dest_code == REG && FP_REG_P (REGNO (dest)))
     {
       if (src_code == MEM)
-	return dbl_p ? "ldc1\t%0,%1" : "lwc1\t%0,%1";
+	{
+	  if (msa_p)
+	    return "ld.%v0\t%w0,%1";
+
+	  return dbl_p ? "ldc1\t%0,%1" : "lwc1\t%0,%1";
+	}
     }
   if (dest_code == REG && ALL_COP_REG_P (REGNO (dest)) && src_code == MEM)
     {
@@ -8455,10 +8900,14 @@ mips_print_operand_punct_valid_p (unsigned char code)
 
 /* Implement TARGET_PRINT_OPERAND.  The MIPS-specific operand codes are:
 
+   'E'	Print CONST_INT OP element 0 of a replicated CONST_VECTOR in decimal.
    'X'	Print CONST_INT OP in hexadecimal format.
    'x'	Print the low 16 bits of CONST_INT OP in hexadecimal format.
    'd'	Print CONST_INT OP in decimal.
+   'B'	Print CONST_INT OP element 0 of a replicated CONST_VECTOR
+	  as an unsigned byte [0..255].
    'm'	Print one less than CONST_INT OP in decimal.
+   'y'	Print exact log2 of CONST_INT OP in decimal.
    'h'	Print the high-part relocation associated with OP, after stripping
 	  any outermost HIGH.
    'R'	Print the low-part relocation associated with OP.
@@ -8466,6 +8915,7 @@ mips_print_operand_punct_valid_p (unsigned char code)
    'N'	Print the inverse of the integer branch condition for comparison OP.
    'F'	Print the FPU branch condition for comparison OP.
    'W'	Print the inverse of the FPU branch condition for comparison OP.
+   'w'	Print a MSA register.
    'T'	Print 'f' for (eq:CC ...), 't' for (ne:CC ...),
 	      'z' for (eq:?I ...), 'n' for (ne:?I ...).
    't'	Like 'T', but with the EQ/NE cases reversed
@@ -8476,7 +8926,11 @@ mips_print_operand_punct_valid_p (unsigned char code)
    'L'	Print the low-order register in a double-word register operand.
    'M'	Print high-order register in a double-word register operand.
    'z'	Print $0 if OP is zero, otherwise print OP normally.
-   'b'	Print the address of a memory operand, without offset.  */
+   'b'	Print the address of a memory operand, without offset.
+   'v'	Print the insn size suffix b, h, w or d for vector modes V16QI, V8HI,
+	  V4SI, V2SI, and w, d for vector modes V4SF, V2DF respectively.
+   'V'	Print exact log2 of CONST_INT OP element 0 of a replicated
+	  CONST_VECTOR in decimal.  */
 
 static void
 mips_print_operand (FILE *file, rtx op, int letter)
@@ -8494,6 +8948,18 @@ mips_print_operand (FILE *file, rtx op, int letter)
 
   switch (letter)
     {
+    case 'E':
+      if (GET_CODE (op) == CONST_VECTOR)
+	{
+	  gcc_assert (mips_const_vector_same_val_p (op, GET_MODE (op)));
+	  op = CONST_VECTOR_ELT (op, 0);
+	  gcc_assert (CONST_INT_P (op));
+	  fprintf (file, HOST_WIDE_INT_PRINT_DEC, INTVAL (op));
+	}
+      else
+	output_operand_lossage ("invalid use of '%%%c'", letter);
+      break;
+
     case 'X':
       if (CONST_INT_P (op))
 	fprintf (file, HOST_WIDE_INT_PRINT_HEX, INTVAL (op));
@@ -8515,6 +8981,19 @@ mips_print_operand (FILE *file, rtx op, int letter)
 	output_operand_lossage ("invalid use of '%%%c'", letter);
       break;
 
+    case 'B':
+      if (GET_CODE (op) == CONST_VECTOR)
+	{
+	  gcc_assert (mips_const_vector_same_val_p (op, GET_MODE (op)));
+	  op = CONST_VECTOR_ELT (op, 0);
+	  gcc_assert (CONST_INT_P (op));
+	  unsigned HOST_WIDE_INT val8 = UINTVAL (op) & GET_MODE_MASK (QImode);
+	  fprintf (file, HOST_WIDE_INT_PRINT_UNSIGNED, val8);
+	}
+      else
+	output_operand_lossage ("invalid use of '%%%c'", letter);
+      break;
+
     case 'm':
       if (CONST_INT_P (op))
 	fprintf (file, HOST_WIDE_INT_PRINT_DEC, INTVAL (op) - 1);
@@ -8522,6 +9001,34 @@ mips_print_operand (FILE *file, rtx op, int letter)
 	output_operand_lossage ("invalid use of '%%%c'", letter);
       break;
 
+    case 'y':
+      if (CONST_INT_P (op))
+	{
+	  int val = exact_log2 (INTVAL (op));
+	  if (val != -1)
+	    fprintf (file, "%d", val);
+	  else
+	    output_operand_lossage ("invalid use of '%%%c'", letter);
+	}
+      else
+	output_operand_lossage ("invalid use of '%%%c'", letter);
+      break;
+
+    case 'V':
+      if (GET_CODE (op) == CONST_VECTOR)
+	{
+	  machine_mode mode = GET_MODE_INNER (GET_MODE (op));
+	  unsigned HOST_WIDE_INT val = UINTVAL (CONST_VECTOR_ELT (op, 0));
+	  int vlog2 = exact_log2 (val & GET_MODE_MASK (mode));
+	  if (vlog2 != -1)
+	    fprintf (file, "%d", vlog2);
+	  else
+	    output_operand_lossage ("invalid use of '%%%c'", letter);
+	}
+      else
+	output_operand_lossage ("invalid use of '%%%c'", letter);
+      break;
+
     case 'h':
       if (code == HIGH)
 	op = XEXP (op, 0);
@@ -8582,6 +9089,35 @@ mips_print_operand (FILE *file, rtx op, int letter)
 	output_operand_lossage ("invalid use of '%%%c'", letter);
       break;
 
+    case 'w':
+      if (code == REG && MSA_REG_P (REGNO (op)))
+	fprintf (file, "$w%s", &reg_names[REGNO (op)][2]);
+      else
+	output_operand_lossage ("invalid use of '%%%c'", letter);
+      break;
+
+    case 'v':
+      switch (GET_MODE (op))
+	{
+	case V16QImode:
+	  fprintf (file, "b");
+	  break;
+	case V8HImode:
+	  fprintf (file, "h");
+	  break;
+	case V4SImode:
+	case V4SFmode:
+	  fprintf (file, "w");
+	  break;
+	case V2DImode:
+	case V2DFmode:
+	  fprintf (file, "d");
+	  break;
+	default:
+	  output_operand_lossage ("invalid use of '%%%c'", letter);
+	}
+      break;
+
     default:
       switch (code)
 	{
@@ -9316,6 +9852,10 @@ mips_file_start (void)
       attr = 1;
 
     fprintf (asm_out_file, "\t.gnu_attribute 4, %d\n", attr);
+
+    /* 128-bit MSA.  */
+    if (ISA_HAS_MSA)
+      fprintf (asm_out_file, "\t.gnu_attribute 8, 1\n");
   }
 #endif
 #endif
@@ -12159,9 +12699,13 @@ mips_hard_regno_mode_ok_p (unsigned int regno, machine_mode mode)
   size = GET_MODE_SIZE (mode);
   mclass = GET_MODE_CLASS (mode);
 
-  if (GP_REG_P (regno) && mode != CCFmode)
+  if (GP_REG_P (regno) && mode != CCFmode && !MSA_SUPPORTED_MODE_P (mode))
     return ((regno - GP_REG_FIRST) & 1) == 0 || size <= UNITS_PER_WORD;
 
+  /* For MSA, allow TImode and 128-bit vector modes in all FPR.  */
+  if (FP_REG_P (regno) && MSA_SUPPORTED_MODE_P (mode))
+    return true;
+
   if (FP_REG_P (regno)
       && (((regno - FP_REG_FIRST) % MAX_FPRS_PER_FMT) == 0
 	  || (MIN_FPRS_PER_FMT == 1 && size <= UNITS_PER_FPREG)))
@@ -12277,7 +12821,12 @@ mips_hard_regno_nregs (int regno, machine_mode mode)
     return (GET_MODE_SIZE (mode) + 3) / 4;
 
   if (FP_REG_P (regno))
-    return (GET_MODE_SIZE (mode) + UNITS_PER_FPREG - 1) / UNITS_PER_FPREG;
+    {
+      if (MSA_SUPPORTED_MODE_P (mode))
+	return 1;
+
+      return (GET_MODE_SIZE (mode) + UNITS_PER_FPREG - 1) / UNITS_PER_FPREG;
+    }
 
   /* All other registers are word-sized.  */
   return (GET_MODE_SIZE (mode) + UNITS_PER_WORD - 1) / UNITS_PER_WORD;
@@ -12298,12 +12847,19 @@ mips_class_max_nregs (enum reg_class rclass, machine_mode mode)
     {
       if (HARD_REGNO_MODE_OK (ST_REG_FIRST, mode))
 	size = MIN (size, 4);
+
       AND_COMPL_HARD_REG_SET (left, reg_class_contents[(int) ST_REGS]);
     }
   if (hard_reg_set_intersect_p (left, reg_class_contents[(int) FP_REGS]))
     {
       if (HARD_REGNO_MODE_OK (FP_REG_FIRST, mode))
-	size = MIN (size, UNITS_PER_FPREG);
+	{
+	  if (MSA_SUPPORTED_MODE_P (mode))
+	    size = MIN (size, UNITS_PER_MSA_REG);
+	  else
+	    size = MIN (size, UNITS_PER_FPREG);
+	}
+
       AND_COMPL_HARD_REG_SET (left, reg_class_contents[(int) FP_REGS]);
     }
   if (!hard_reg_set_empty_p (left))
@@ -12324,6 +12880,10 @@ mips_cannot_change_mode_class (machine_mode from,
       && INTEGRAL_MODE_P (from) && INTEGRAL_MODE_P (to))
     return false;
 
+  /* Allow conversions between different MSA vector modes.  */
+  if (MSA_SUPPORTED_MODE_P (from) && MSA_SUPPORTED_MODE_P (to))
+    return false;
+
   /* Otherwise, there are several problems with changing the modes of
      values in floating-point registers:
 
@@ -12359,7 +12919,8 @@ mips_small_register_classes_for_mode_p (machine_mode mode
   return TARGET_MIPS16;
 }
 
-/* Return true if moves in mode MODE can use the FPU's mov.fmt instruction.  */
+/* Return true if moves in mode MODE can use the FPU's mov.fmt instruction,
+   or use the MSA's move.v instruction.  */
 
 static bool
 mips_mode_ok_for_mov_fmt_p (machine_mode mode)
@@ -12377,7 +12938,7 @@ mips_mode_ok_for_mov_fmt_p (machine_mode mode)
       return TARGET_HARD_FLOAT && TARGET_PAIRED_SINGLE_FLOAT;
 
     default:
-      return false;
+      return MSA_SUPPORTED_MODE_P (mode);
     }
 }
 
@@ -12624,6 +13185,10 @@ mips_secondary_reload_class (enum reg_class rclass,
 	   pairs of lwc1s and swc1s if ldc1 and sdc1 are not supported.  */
 	return NO_REGS;
 
+      if (MEM_P (x) && MSA_SUPPORTED_MODE_P (mode))
+	/* In this case we can use MSA LD.* and ST.*.  */
+	return NO_REGS;
+
       if (GP_REG_P (regno) || x == CONST0_RTX (mode))
 	/* In this case we can use mtc1, mfc1, dmtc1 or dmfc1.  */
 	return NO_REGS;
@@ -12693,7 +13258,7 @@ mips_vector_mode_supported_p (machine_mode mode)
       return TARGET_LOONGSON_VECTORS;
 
     default:
-      return false;
+      return MSA_SUPPORTED_MODE_P (mode);
     }
 }
 
@@ -12712,14 +13277,46 @@ mips_scalar_mode_supported_p (machine_mode mode)
 /* Implement TARGET_VECTORIZE_PREFERRED_SIMD_MODE.  */
 
 static machine_mode
-mips_preferred_simd_mode (machine_mode mode ATTRIBUTE_UNUSED)
+mips_preferred_simd_mode (machine_mode mode)
 {
   if (TARGET_PAIRED_SINGLE_FLOAT
       && mode == SFmode)
     return V2SFmode;
+
+  if (!ISA_HAS_MSA)
+    return word_mode;
+
+  switch (mode)
+    {
+    case QImode:
+      return V16QImode;
+    case HImode:
+      return V8HImode;
+    case SImode:
+      return V4SImode;
+    case DImode:
+      return V2DImode;
+
+    case SFmode:
+      return V4SFmode;
+
+    case DFmode:
+      return V2DFmode;
+
+    default:
+      break;
+    }
   return word_mode;
 }
 
+/* Implement TARGET_VECTORIZE_AUTOVECTORIZE_VECTOR_SIZES.  */
+
+static unsigned int
+mips_autovectorize_vector_sizes (void)
+{
+  return ISA_HAS_MSA ? 16 : 0;
+}
+
 /* Implement TARGET_INIT_LIBFUNCS.  */
 
 static void
@@ -13727,6 +14324,25 @@ mips_output_division (const char *division, rtx *operands)
     }
   return s;
 }
+
+/* Return the assembly code for MSA DIV_{S,U}.DF or MOD_{S,U}.DF instructions,
+   which has the operands given by OPERANDS.  Add in a divide-by-zero check
+   if needed.  */
+
+const char *
+mips_msa_output_division (const char *division, rtx *operands)
+{
+  const char *s;
+
+  s = division;
+  if (TARGET_CHECK_ZERO_DIV)
+    {
+      output_asm_insn ("%(bnz.%v0\t%w2,1f", operands);
+      output_asm_insn (s, operands);
+      s = "break\t7%)\n1:";
+    }
+  return s;
+}
 
 /* Return true if destination of IN_INSN is used as add source in
    OUT_INSN. Both IN_INSN and OUT_INSN are of type fmadd. Example:
@@ -14480,6 +15096,7 @@ AVAIL_NON_MIPS16 (dsp_64, TARGET_64BIT && TARGET_DSP)
 AVAIL_NON_MIPS16 (dspr2_32, !TARGET_64BIT && TARGET_DSPR2)
 AVAIL_NON_MIPS16 (loongson, TARGET_LOONGSON_VECTORS)
 AVAIL_NON_MIPS16 (cache, TARGET_CACHE_BUILTIN)
+AVAIL_NON_MIPS16 (msa, TARGET_MSA)
 
 /* Construct a mips_builtin_description from the given arguments.
 
@@ -14596,6 +15213,38 @@ AVAIL_NON_MIPS16 (cache, TARGET_CACHE_BUILTIN)
 #define LOONGSON_BUILTIN_SUFFIX(INSN, SUFFIX, FUNCTION_TYPE)		\
   LOONGSON_BUILTIN_ALIAS (INSN, INSN ## _ ## SUFFIX, FUNCTION_TYPE)
 
+/* Define an MSA MIPS_BUILTIN_DIRECT function __builtin_msa_<INSN>
+   for instruction CODE_FOR_msa_<INSN>.  FUNCTION_TYPE is a builtin_description
+   field.  */
+#define MSA_BUILTIN(INSN, FUNCTION_TYPE)				\
+    { CODE_FOR_msa_ ## INSN, MIPS_FP_COND_f,				\
+    "__builtin_msa_" #INSN,  MIPS_BUILTIN_DIRECT,			\
+    FUNCTION_TYPE, mips_builtin_avail_msa }
+
+/* Define a remapped MSA MIPS_BUILTIN_DIRECT function __builtin_msa_<INSN>
+   for instruction CODE_FOR_msa_<INSN2>.  FUNCTION_TYPE is
+   a builtin_description field.  */
+#define MSA_BUILTIN_REMAP(INSN, INSN2, FUNCTION_TYPE)	\
+    { CODE_FOR_msa_ ## INSN2, MIPS_FP_COND_f,				\
+    "__builtin_msa_" #INSN,  MIPS_BUILTIN_DIRECT,			\
+    FUNCTION_TYPE, mips_builtin_avail_msa }
+
+/* Define an MSA MIPS_BUILTIN_MSA_TEST_BRANCH function __builtin_msa_<INSN>
+   for instruction CODE_FOR_msa_<INSN>.  FUNCTION_TYPE is a builtin_description
+   field.  */
+#define MSA_BUILTIN_TEST_BRANCH(INSN, FUNCTION_TYPE)			\
+    { CODE_FOR_msa_ ## INSN, MIPS_FP_COND_f,				\
+    "__builtin_msa_" #INSN, MIPS_BUILTIN_MSA_TEST_BRANCH,		\
+    FUNCTION_TYPE, mips_builtin_avail_msa }
+
+/* Define an MSA MIPS_BUILTIN_DIRECT_NO_TARGET function __builtin_msa_<INSN>
+   for instruction CODE_FOR_msa_<INSN>.  FUNCTION_TYPE is a builtin_description
+   field.  */
+#define MSA_NO_TARGET_BUILTIN(INSN, FUNCTION_TYPE)			\
+    { CODE_FOR_msa_ ## INSN, MIPS_FP_COND_f,				\
+    "__builtin_msa_" #INSN,  MIPS_BUILTIN_DIRECT_NO_TARGET,		\
+    FUNCTION_TYPE, mips_builtin_avail_msa }
+
 #define CODE_FOR_mips_sqrt_ps CODE_FOR_sqrtv2sf2
 #define CODE_FOR_mips_addq_ph CODE_FOR_addv2hi3
 #define CODE_FOR_mips_addu_qb CODE_FOR_addv4qi3
@@ -14636,6 +15285,203 @@ AVAIL_NON_MIPS16 (cache, TARGET_CACHE_BUILTIN)
 #define CODE_FOR_loongson_psubush CODE_FOR_ussubv4hi3
 #define CODE_FOR_loongson_psubusb CODE_FOR_ussubv8qi3
 
+#define CODE_FOR_msa_adds_s_b CODE_FOR_ssaddv16qi3
+#define CODE_FOR_msa_adds_s_h CODE_FOR_ssaddv8hi3
+#define CODE_FOR_msa_adds_s_w CODE_FOR_ssaddv4si3
+#define CODE_FOR_msa_adds_s_d CODE_FOR_ssaddv2di3
+#define CODE_FOR_msa_adds_u_b CODE_FOR_usaddv16qi3
+#define CODE_FOR_msa_adds_u_h CODE_FOR_usaddv8hi3
+#define CODE_FOR_msa_adds_u_w CODE_FOR_usaddv4si3
+#define CODE_FOR_msa_adds_u_d CODE_FOR_usaddv2di3
+#define CODE_FOR_msa_addv_b CODE_FOR_addv16qi3
+#define CODE_FOR_msa_addv_h CODE_FOR_addv8hi3
+#define CODE_FOR_msa_addv_w CODE_FOR_addv4si3
+#define CODE_FOR_msa_addv_d CODE_FOR_addv2di3
+#define CODE_FOR_msa_addvi_b CODE_FOR_addv16qi3
+#define CODE_FOR_msa_addvi_h CODE_FOR_addv8hi3
+#define CODE_FOR_msa_addvi_w CODE_FOR_addv4si3
+#define CODE_FOR_msa_addvi_d CODE_FOR_addv2di3
+#define CODE_FOR_msa_and_v CODE_FOR_andv16qi3
+#define CODE_FOR_msa_andi_b CODE_FOR_andv16qi3
+#define CODE_FOR_msa_bmnz_v CODE_FOR_msa_bmnz_b
+#define CODE_FOR_msa_bmnzi_b CODE_FOR_msa_bmnz_b
+#define CODE_FOR_msa_bmz_v CODE_FOR_msa_bmz_b
+#define CODE_FOR_msa_bmzi_b CODE_FOR_msa_bmz_b
+#define CODE_FOR_msa_bnz_v CODE_FOR_msa_bnz_v_b
+#define CODE_FOR_msa_bz_v CODE_FOR_msa_bz_v_b
+#define CODE_FOR_msa_bsel_v CODE_FOR_msa_bsel_b
+#define CODE_FOR_msa_bseli_b CODE_FOR_msa_bsel_b
+#define CODE_FOR_msa_ceqi_b CODE_FOR_msa_ceq_b
+#define CODE_FOR_msa_ceqi_h CODE_FOR_msa_ceq_h
+#define CODE_FOR_msa_ceqi_w CODE_FOR_msa_ceq_w
+#define CODE_FOR_msa_ceqi_d CODE_FOR_msa_ceq_d
+#define CODE_FOR_msa_clti_s_b CODE_FOR_msa_clt_s_b
+#define CODE_FOR_msa_clti_s_h CODE_FOR_msa_clt_s_h
+#define CODE_FOR_msa_clti_s_w CODE_FOR_msa_clt_s_w
+#define CODE_FOR_msa_clti_s_d CODE_FOR_msa_clt_s_d
+#define CODE_FOR_msa_clti_u_b CODE_FOR_msa_clt_u_b
+#define CODE_FOR_msa_clti_u_h CODE_FOR_msa_clt_u_h
+#define CODE_FOR_msa_clti_u_w CODE_FOR_msa_clt_u_w
+#define CODE_FOR_msa_clti_u_d CODE_FOR_msa_clt_u_d
+#define CODE_FOR_msa_clei_s_b CODE_FOR_msa_cle_s_b
+#define CODE_FOR_msa_clei_s_h CODE_FOR_msa_cle_s_h
+#define CODE_FOR_msa_clei_s_w CODE_FOR_msa_cle_s_w
+#define CODE_FOR_msa_clei_s_d CODE_FOR_msa_cle_s_d
+#define CODE_FOR_msa_clei_u_b CODE_FOR_msa_cle_u_b
+#define CODE_FOR_msa_clei_u_h CODE_FOR_msa_cle_u_h
+#define CODE_FOR_msa_clei_u_w CODE_FOR_msa_cle_u_w
+#define CODE_FOR_msa_clei_u_d CODE_FOR_msa_cle_u_d
+#define CODE_FOR_msa_div_s_b CODE_FOR_divv16qi3
+#define CODE_FOR_msa_div_s_h CODE_FOR_divv8hi3
+#define CODE_FOR_msa_div_s_w CODE_FOR_divv4si3
+#define CODE_FOR_msa_div_s_d CODE_FOR_divv2di3
+#define CODE_FOR_msa_div_u_b CODE_FOR_udivv16qi3
+#define CODE_FOR_msa_div_u_h CODE_FOR_udivv8hi3
+#define CODE_FOR_msa_div_u_w CODE_FOR_udivv4si3
+#define CODE_FOR_msa_div_u_d CODE_FOR_udivv2di3
+#define CODE_FOR_msa_fadd_w CODE_FOR_addv4sf3
+#define CODE_FOR_msa_fadd_d CODE_FOR_addv2df3
+#define CODE_FOR_msa_fexdo_w CODE_FOR_vec_pack_trunc_v2df
+#define CODE_FOR_msa_ftrunc_s_w CODE_FOR_fix_truncv4sfv4si2
+#define CODE_FOR_msa_ftrunc_s_d CODE_FOR_fix_truncv2dfv2di2
+#define CODE_FOR_msa_ftrunc_u_w CODE_FOR_fixuns_truncv4sfv4si2
+#define CODE_FOR_msa_ftrunc_u_d CODE_FOR_fixuns_truncv2dfv2di2
+#define CODE_FOR_msa_ffint_s_w CODE_FOR_floatv4siv4sf2
+#define CODE_FOR_msa_ffint_s_d CODE_FOR_floatv2div2df2
+#define CODE_FOR_msa_ffint_u_w CODE_FOR_floatunsv4siv4sf2
+#define CODE_FOR_msa_ffint_u_d CODE_FOR_floatunsv2div2df2
+#define CODE_FOR_msa_fsub_w CODE_FOR_subv4sf3
+#define CODE_FOR_msa_fsub_d CODE_FOR_subv2df3
+#define CODE_FOR_msa_fmadd_w CODE_FOR_fmav4sf4
+#define CODE_FOR_msa_fmadd_d CODE_FOR_fmav2df4
+#define CODE_FOR_msa_fmsub_w CODE_FOR_fnmav4sf4
+#define CODE_FOR_msa_fmsub_d CODE_FOR_fnmav2df4
+#define CODE_FOR_msa_fmul_w CODE_FOR_mulv4sf3
+#define CODE_FOR_msa_fmul_d CODE_FOR_mulv2df3
+#define CODE_FOR_msa_fdiv_w CODE_FOR_divv4sf3
+#define CODE_FOR_msa_fdiv_d CODE_FOR_divv2df3
+#define CODE_FOR_msa_fmax_w CODE_FOR_smaxv4sf3
+#define CODE_FOR_msa_fmax_d CODE_FOR_smaxv2df3
+#define CODE_FOR_msa_fmin_w CODE_FOR_sminv4sf3
+#define CODE_FOR_msa_fmin_d CODE_FOR_sminv2df3
+#define CODE_FOR_msa_fsqrt_w CODE_FOR_sqrtv4sf2
+#define CODE_FOR_msa_fsqrt_d CODE_FOR_sqrtv2df2
+#define CODE_FOR_msa_max_s_b CODE_FOR_smaxv16qi3
+#define CODE_FOR_msa_max_s_h CODE_FOR_smaxv8hi3
+#define CODE_FOR_msa_max_s_w CODE_FOR_smaxv4si3
+#define CODE_FOR_msa_max_s_d CODE_FOR_smaxv2di3
+#define CODE_FOR_msa_maxi_s_b CODE_FOR_smaxv16qi3
+#define CODE_FOR_msa_maxi_s_h CODE_FOR_smaxv8hi3
+#define CODE_FOR_msa_maxi_s_w CODE_FOR_smaxv4si3
+#define CODE_FOR_msa_maxi_s_d CODE_FOR_smaxv2di3
+#define CODE_FOR_msa_max_u_b CODE_FOR_umaxv16qi3
+#define CODE_FOR_msa_max_u_h CODE_FOR_umaxv8hi3
+#define CODE_FOR_msa_max_u_w CODE_FOR_umaxv4si3
+#define CODE_FOR_msa_max_u_d CODE_FOR_umaxv2di3
+#define CODE_FOR_msa_maxi_u_b CODE_FOR_umaxv16qi3
+#define CODE_FOR_msa_maxi_u_h CODE_FOR_umaxv8hi3
+#define CODE_FOR_msa_maxi_u_w CODE_FOR_umaxv4si3
+#define CODE_FOR_msa_maxi_u_d CODE_FOR_umaxv2di3
+#define CODE_FOR_msa_min_s_b CODE_FOR_sminv16qi3
+#define CODE_FOR_msa_min_s_h CODE_FOR_sminv8hi3
+#define CODE_FOR_msa_min_s_w CODE_FOR_sminv4si3
+#define CODE_FOR_msa_min_s_d CODE_FOR_sminv2di3
+#define CODE_FOR_msa_mini_s_b CODE_FOR_sminv16qi3
+#define CODE_FOR_msa_mini_s_h CODE_FOR_sminv8hi3
+#define CODE_FOR_msa_mini_s_w CODE_FOR_sminv4si3
+#define CODE_FOR_msa_mini_s_d CODE_FOR_sminv2di3
+#define CODE_FOR_msa_min_u_b CODE_FOR_uminv16qi3
+#define CODE_FOR_msa_min_u_h CODE_FOR_uminv8hi3
+#define CODE_FOR_msa_min_u_w CODE_FOR_uminv4si3
+#define CODE_FOR_msa_min_u_d CODE_FOR_uminv2di3
+#define CODE_FOR_msa_mini_u_b CODE_FOR_uminv16qi3
+#define CODE_FOR_msa_mini_u_h CODE_FOR_uminv8hi3
+#define CODE_FOR_msa_mini_u_w CODE_FOR_uminv4si3
+#define CODE_FOR_msa_mini_u_d CODE_FOR_uminv2di3
+#define CODE_FOR_msa_mod_s_b CODE_FOR_modv16qi3
+#define CODE_FOR_msa_mod_s_h CODE_FOR_modv8hi3
+#define CODE_FOR_msa_mod_s_w CODE_FOR_modv4si3
+#define CODE_FOR_msa_mod_s_d CODE_FOR_modv2di3
+#define CODE_FOR_msa_mod_u_b CODE_FOR_umodv16qi3
+#define CODE_FOR_msa_mod_u_h CODE_FOR_umodv8hi3
+#define CODE_FOR_msa_mod_u_w CODE_FOR_umodv4si3
+#define CODE_FOR_msa_mod_u_d CODE_FOR_umodv2di3
+#define CODE_FOR_msa_mod_s_b CODE_FOR_modv16qi3
+#define CODE_FOR_msa_mod_s_h CODE_FOR_modv8hi3
+#define CODE_FOR_msa_mod_s_w CODE_FOR_modv4si3
+#define CODE_FOR_msa_mod_s_d CODE_FOR_modv2di3
+#define CODE_FOR_msa_mod_u_b CODE_FOR_umodv16qi3
+#define CODE_FOR_msa_mod_u_h CODE_FOR_umodv8hi3
+#define CODE_FOR_msa_mod_u_w CODE_FOR_umodv4si3
+#define CODE_FOR_msa_mod_u_d CODE_FOR_umodv2di3
+#define CODE_FOR_msa_mulv_b CODE_FOR_mulv16qi3
+#define CODE_FOR_msa_mulv_h CODE_FOR_mulv8hi3
+#define CODE_FOR_msa_mulv_w CODE_FOR_mulv4si3
+#define CODE_FOR_msa_mulv_d CODE_FOR_mulv2di3
+#define CODE_FOR_msa_nlzc_b CODE_FOR_clzv16qi2
+#define CODE_FOR_msa_nlzc_h CODE_FOR_clzv8hi2
+#define CODE_FOR_msa_nlzc_w CODE_FOR_clzv4si2
+#define CODE_FOR_msa_nlzc_d CODE_FOR_clzv2di2
+#define CODE_FOR_msa_nor_v CODE_FOR_msa_nor_b
+#define CODE_FOR_msa_or_v CODE_FOR_iorv16qi3
+#define CODE_FOR_msa_ori_b CODE_FOR_iorv16qi3
+#define CODE_FOR_msa_nori_b CODE_FOR_msa_nor_b
+#define CODE_FOR_msa_pcnt_b CODE_FOR_popcountv16qi2
+#define CODE_FOR_msa_pcnt_h CODE_FOR_popcountv8hi2
+#define CODE_FOR_msa_pcnt_w CODE_FOR_popcountv4si2
+#define CODE_FOR_msa_pcnt_d CODE_FOR_popcountv2di2
+#define CODE_FOR_msa_xor_v CODE_FOR_xorv16qi3
+#define CODE_FOR_msa_xori_b CODE_FOR_xorv16qi3
+#define CODE_FOR_msa_sll_b CODE_FOR_vashlv16qi3
+#define CODE_FOR_msa_sll_h CODE_FOR_vashlv8hi3
+#define CODE_FOR_msa_sll_w CODE_FOR_vashlv4si3
+#define CODE_FOR_msa_sll_d CODE_FOR_vashlv2di3
+#define CODE_FOR_msa_slli_b CODE_FOR_vashlv16qi3
+#define CODE_FOR_msa_slli_h CODE_FOR_vashlv8hi3
+#define CODE_FOR_msa_slli_w CODE_FOR_vashlv4si3
+#define CODE_FOR_msa_slli_d CODE_FOR_vashlv2di3
+#define CODE_FOR_msa_sra_b CODE_FOR_vashrv16qi3
+#define CODE_FOR_msa_sra_h CODE_FOR_vashrv8hi3
+#define CODE_FOR_msa_sra_w CODE_FOR_vashrv4si3
+#define CODE_FOR_msa_sra_d CODE_FOR_vashrv2di3
+#define CODE_FOR_msa_srai_b CODE_FOR_vashrv16qi3
+#define CODE_FOR_msa_srai_h CODE_FOR_vashrv8hi3
+#define CODE_FOR_msa_srai_w CODE_FOR_vashrv4si3
+#define CODE_FOR_msa_srai_d CODE_FOR_vashrv2di3
+#define CODE_FOR_msa_srl_b CODE_FOR_vlshrv16qi3
+#define CODE_FOR_msa_srl_h CODE_FOR_vlshrv8hi3
+#define CODE_FOR_msa_srl_w CODE_FOR_vlshrv4si3
+#define CODE_FOR_msa_srl_d CODE_FOR_vlshrv2di3
+#define CODE_FOR_msa_srli_b CODE_FOR_vlshrv16qi3
+#define CODE_FOR_msa_srli_h CODE_FOR_vlshrv8hi3
+#define CODE_FOR_msa_srli_w CODE_FOR_vlshrv4si3
+#define CODE_FOR_msa_srli_d CODE_FOR_vlshrv2di3
+#define CODE_FOR_msa_subv_b CODE_FOR_subv16qi3
+#define CODE_FOR_msa_subv_h CODE_FOR_subv8hi3
+#define CODE_FOR_msa_subv_w CODE_FOR_subv4si3
+#define CODE_FOR_msa_subv_d CODE_FOR_subv2di3
+#define CODE_FOR_msa_subvi_b CODE_FOR_subv16qi3
+#define CODE_FOR_msa_subvi_h CODE_FOR_subv8hi3
+#define CODE_FOR_msa_subvi_w CODE_FOR_subv4si3
+#define CODE_FOR_msa_subvi_d CODE_FOR_subv2di3
+
+#define CODE_FOR_msa_move_v CODE_FOR_movv16qi
+
+#define CODE_FOR_msa_vshf_b CODE_FOR_vec_permv16qi
+#define CODE_FOR_msa_vshf_h CODE_FOR_vec_permv8hi
+#define CODE_FOR_msa_vshf_w CODE_FOR_vec_permv4si
+#define CODE_FOR_msa_vshf_d CODE_FOR_vec_permv2di
+
+#define CODE_FOR_msa_ilvod_d CODE_FOR_msa_ilvl_d
+#define CODE_FOR_msa_ilvev_d CODE_FOR_msa_ilvr_d
+#define CODE_FOR_msa_pckod_d CODE_FOR_msa_ilvl_d
+#define CODE_FOR_msa_pckev_d CODE_FOR_msa_ilvr_d
+
+#define CODE_FOR_msa_ldi_b CODE_FOR_msa_ldiv16qi
+#define CODE_FOR_msa_ldi_h CODE_FOR_msa_ldiv8hi
+#define CODE_FOR_msa_ldi_w CODE_FOR_msa_ldiv4si
+#define CODE_FOR_msa_ldi_d CODE_FOR_msa_ldiv2di
+
 static const struct mips_builtin_description mips_builtins[] = {
 #define MIPS_GET_FCSR 0
   DIRECT_BUILTIN (get_fcsr, MIPS_USI_FTYPE_VOID, hard_float),
@@ -14924,12 +15770,547 @@ static const struct mips_builtin_description mips_builtins[] = {
   LOONGSON_BUILTIN_SUFFIX (punpcklwd, s, MIPS_V2SI_FTYPE_V2SI_V2SI),
 
   /* Sundry other built-in functions.  */
-  DIRECT_NO_TARGET_BUILTIN (cache, MIPS_VOID_FTYPE_SI_CVPOINTER, cache)
+  DIRECT_NO_TARGET_BUILTIN (cache, MIPS_VOID_FTYPE_SI_CVPOINTER, cache),
+
+  /* Built-in functions for MSA.  */
+  MSA_BUILTIN (sll_b, MIPS_V16QI_FTYPE_V16QI_V16QI),
+  MSA_BUILTIN (sll_h, MIPS_V8HI_FTYPE_V8HI_V8HI),
+  MSA_BUILTIN (sll_w, MIPS_V4SI_FTYPE_V4SI_V4SI),
+  MSA_BUILTIN (sll_d, MIPS_V2DI_FTYPE_V2DI_V2DI),
+  MSA_BUILTIN (slli_b, MIPS_V16QI_FTYPE_V16QI_UQI),
+  MSA_BUILTIN (slli_h, MIPS_V8HI_FTYPE_V8HI_UQI),
+  MSA_BUILTIN (slli_w, MIPS_V4SI_FTYPE_V4SI_UQI),
+  MSA_BUILTIN (slli_d, MIPS_V2DI_FTYPE_V2DI_UQI),
+  MSA_BUILTIN (sra_b, MIPS_V16QI_FTYPE_V16QI_V16QI),
+  MSA_BUILTIN (sra_h, MIPS_V8HI_FTYPE_V8HI_V8HI),
+  MSA_BUILTIN (sra_w, MIPS_V4SI_FTYPE_V4SI_V4SI),
+  MSA_BUILTIN (sra_d, MIPS_V2DI_FTYPE_V2DI_V2DI),
+  MSA_BUILTIN (srai_b, MIPS_V16QI_FTYPE_V16QI_UQI),
+  MSA_BUILTIN (srai_h, MIPS_V8HI_FTYPE_V8HI_UQI),
+  MSA_BUILTIN (srai_w, MIPS_V4SI_FTYPE_V4SI_UQI),
+  MSA_BUILTIN (srai_d, MIPS_V2DI_FTYPE_V2DI_UQI),
+  MSA_BUILTIN (srar_b, MIPS_V16QI_FTYPE_V16QI_V16QI),
+  MSA_BUILTIN (srar_h, MIPS_V8HI_FTYPE_V8HI_V8HI),
+  MSA_BUILTIN (srar_w, MIPS_V4SI_FTYPE_V4SI_V4SI),
+  MSA_BUILTIN (srar_d, MIPS_V2DI_FTYPE_V2DI_V2DI),
+  MSA_BUILTIN (srari_b, MIPS_V16QI_FTYPE_V16QI_UQI),
+  MSA_BUILTIN (srari_h, MIPS_V8HI_FTYPE_V8HI_UQI),
+  MSA_BUILTIN (srari_w, MIPS_V4SI_FTYPE_V4SI_UQI),
+  MSA_BUILTIN (srari_d, MIPS_V2DI_FTYPE_V2DI_UQI),
+  MSA_BUILTIN (srl_b, MIPS_V16QI_FTYPE_V16QI_V16QI),
+  MSA_BUILTIN (srl_h, MIPS_V8HI_FTYPE_V8HI_V8HI),
+  MSA_BUILTIN (srl_w, MIPS_V4SI_FTYPE_V4SI_V4SI),
+  MSA_BUILTIN (srl_d, MIPS_V2DI_FTYPE_V2DI_V2DI),
+  MSA_BUILTIN (srli_b, MIPS_V16QI_FTYPE_V16QI_UQI),
+  MSA_BUILTIN (srli_h, MIPS_V8HI_FTYPE_V8HI_UQI),
+  MSA_BUILTIN (srli_w, MIPS_V4SI_FTYPE_V4SI_UQI),
+  MSA_BUILTIN (srli_d, MIPS_V2DI_FTYPE_V2DI_UQI),
+  MSA_BUILTIN (srlr_b, MIPS_V16QI_FTYPE_V16QI_V16QI),
+  MSA_BUILTIN (srlr_h, MIPS_V8HI_FTYPE_V8HI_V8HI),
+  MSA_BUILTIN (srlr_w, MIPS_V4SI_FTYPE_V4SI_V4SI),
+  MSA_BUILTIN (srlr_d, MIPS_V2DI_FTYPE_V2DI_V2DI),
+  MSA_BUILTIN (srlri_b, MIPS_V16QI_FTYPE_V16QI_UQI),
+  MSA_BUILTIN (srlri_h, MIPS_V8HI_FTYPE_V8HI_UQI),
+  MSA_BUILTIN (srlri_w, MIPS_V4SI_FTYPE_V4SI_UQI),
+  MSA_BUILTIN (srlri_d, MIPS_V2DI_FTYPE_V2DI_UQI),
+  MSA_BUILTIN (bclr_b, MIPS_UV16QI_FTYPE_UV16QI_UV16QI),
+  MSA_BUILTIN (bclr_h, MIPS_UV8HI_FTYPE_UV8HI_UV8HI),
+  MSA_BUILTIN (bclr_w, MIPS_UV4SI_FTYPE_UV4SI_UV4SI),
+  MSA_BUILTIN (bclr_d, MIPS_UV2DI_FTYPE_UV2DI_UV2DI),
+  MSA_BUILTIN (bclri_b, MIPS_UV16QI_FTYPE_UV16QI_UQI),
+  MSA_BUILTIN (bclri_h, MIPS_UV8HI_FTYPE_UV8HI_UQI),
+  MSA_BUILTIN (bclri_w, MIPS_UV4SI_FTYPE_UV4SI_UQI),
+  MSA_BUILTIN (bclri_d, MIPS_UV2DI_FTYPE_UV2DI_UQI),
+  MSA_BUILTIN (bset_b, MIPS_UV16QI_FTYPE_UV16QI_UV16QI),
+  MSA_BUILTIN (bset_h, MIPS_UV8HI_FTYPE_UV8HI_UV8HI),
+  MSA_BUILTIN (bset_w, MIPS_UV4SI_FTYPE_UV4SI_UV4SI),
+  MSA_BUILTIN (bset_d, MIPS_UV2DI_FTYPE_UV2DI_UV2DI),
+  MSA_BUILTIN (bseti_b, MIPS_UV16QI_FTYPE_UV16QI_UQI),
+  MSA_BUILTIN (bseti_h, MIPS_UV8HI_FTYPE_UV8HI_UQI),
+  MSA_BUILTIN (bseti_w, MIPS_UV4SI_FTYPE_UV4SI_UQI),
+  MSA_BUILTIN (bseti_d, MIPS_UV2DI_FTYPE_UV2DI_UQI),
+  MSA_BUILTIN (bneg_b, MIPS_UV16QI_FTYPE_UV16QI_UV16QI),
+  MSA_BUILTIN (bneg_h, MIPS_UV8HI_FTYPE_UV8HI_UV8HI),
+  MSA_BUILTIN (bneg_w, MIPS_UV4SI_FTYPE_UV4SI_UV4SI),
+  MSA_BUILTIN (bneg_d, MIPS_UV2DI_FTYPE_UV2DI_UV2DI),
+  MSA_BUILTIN (bnegi_b, MIPS_UV16QI_FTYPE_UV16QI_UQI),
+  MSA_BUILTIN (bnegi_h, MIPS_UV8HI_FTYPE_UV8HI_UQI),
+  MSA_BUILTIN (bnegi_w, MIPS_UV4SI_FTYPE_UV4SI_UQI),
+  MSA_BUILTIN (bnegi_d, MIPS_UV2DI_FTYPE_UV2DI_UQI),
+  MSA_BUILTIN (binsl_b, MIPS_UV16QI_FTYPE_UV16QI_UV16QI_UV16QI),
+  MSA_BUILTIN (binsl_h, MIPS_UV8HI_FTYPE_UV8HI_UV8HI_UV8HI),
+  MSA_BUILTIN (binsl_w, MIPS_UV4SI_FTYPE_UV4SI_UV4SI_UV4SI),
+  MSA_BUILTIN (binsl_d, MIPS_UV2DI_FTYPE_UV2DI_UV2DI_UV2DI),
+  MSA_BUILTIN (binsli_b, MIPS_UV16QI_FTYPE_UV16QI_UV16QI_UQI),
+  MSA_BUILTIN (binsli_h, MIPS_UV8HI_FTYPE_UV8HI_UV8HI_UQI),
+  MSA_BUILTIN (binsli_w, MIPS_UV4SI_FTYPE_UV4SI_UV4SI_UQI),
+  MSA_BUILTIN (binsli_d, MIPS_UV2DI_FTYPE_UV2DI_UV2DI_UQI),
+  MSA_BUILTIN (binsr_b, MIPS_UV16QI_FTYPE_UV16QI_UV16QI_UV16QI),
+  MSA_BUILTIN (binsr_h, MIPS_UV8HI_FTYPE_UV8HI_UV8HI_UV8HI),
+  MSA_BUILTIN (binsr_w, MIPS_UV4SI_FTYPE_UV4SI_UV4SI_UV4SI),
+  MSA_BUILTIN (binsr_d, MIPS_UV2DI_FTYPE_UV2DI_UV2DI_UV2DI),
+  MSA_BUILTIN (binsri_b, MIPS_UV16QI_FTYPE_UV16QI_UV16QI_UQI),
+  MSA_BUILTIN (binsri_h, MIPS_UV8HI_FTYPE_UV8HI_UV8HI_UQI),
+  MSA_BUILTIN (binsri_w, MIPS_UV4SI_FTYPE_UV4SI_UV4SI_UQI),
+  MSA_BUILTIN (binsri_d, MIPS_UV2DI_FTYPE_UV2DI_UV2DI_UQI),
+  MSA_BUILTIN (addv_b, MIPS_V16QI_FTYPE_V16QI_V16QI),
+  MSA_BUILTIN (addv_h, MIPS_V8HI_FTYPE_V8HI_V8HI),
+  MSA_BUILTIN (addv_w, MIPS_V4SI_FTYPE_V4SI_V4SI),
+  MSA_BUILTIN (addv_d, MIPS_V2DI_FTYPE_V2DI_V2DI),
+  MSA_BUILTIN (addvi_b, MIPS_V16QI_FTYPE_V16QI_UQI),
+  MSA_BUILTIN (addvi_h, MIPS_V8HI_FTYPE_V8HI_UQI),
+  MSA_BUILTIN (addvi_w, MIPS_V4SI_FTYPE_V4SI_UQI),
+  MSA_BUILTIN (addvi_d, MIPS_V2DI_FTYPE_V2DI_UQI),
+  MSA_BUILTIN (subv_b, MIPS_V16QI_FTYPE_V16QI_V16QI),
+  MSA_BUILTIN (subv_h, MIPS_V8HI_FTYPE_V8HI_V8HI),
+  MSA_BUILTIN (subv_w, MIPS_V4SI_FTYPE_V4SI_V4SI),
+  MSA_BUILTIN (subv_d, MIPS_V2DI_FTYPE_V2DI_V2DI),
+  MSA_BUILTIN (subvi_b, MIPS_V16QI_FTYPE_V16QI_UQI),
+  MSA_BUILTIN (subvi_h, MIPS_V8HI_FTYPE_V8HI_UQI),
+  MSA_BUILTIN (subvi_w, MIPS_V4SI_FTYPE_V4SI_UQI),
+  MSA_BUILTIN (subvi_d, MIPS_V2DI_FTYPE_V2DI_UQI),
+  MSA_BUILTIN (max_s_b, MIPS_V16QI_FTYPE_V16QI_V16QI),
+  MSA_BUILTIN (max_s_h, MIPS_V8HI_FTYPE_V8HI_V8HI),
+  MSA_BUILTIN (max_s_w, MIPS_V4SI_FTYPE_V4SI_V4SI),
+  MSA_BUILTIN (max_s_d, MIPS_V2DI_FTYPE_V2DI_V2DI),
+  MSA_BUILTIN (maxi_s_b, MIPS_V16QI_FTYPE_V16QI_QI),
+  MSA_BUILTIN (maxi_s_h, MIPS_V8HI_FTYPE_V8HI_QI),
+  MSA_BUILTIN (maxi_s_w, MIPS_V4SI_FTYPE_V4SI_QI),
+  MSA_BUILTIN (maxi_s_d, MIPS_V2DI_FTYPE_V2DI_QI),
+  MSA_BUILTIN (max_u_b, MIPS_UV16QI_FTYPE_UV16QI_UV16QI),
+  MSA_BUILTIN (max_u_h, MIPS_UV8HI_FTYPE_UV8HI_UV8HI),
+  MSA_BUILTIN (max_u_w, MIPS_UV4SI_FTYPE_UV4SI_UV4SI),
+  MSA_BUILTIN (max_u_d, MIPS_UV2DI_FTYPE_UV2DI_UV2DI),
+  MSA_BUILTIN (maxi_u_b, MIPS_UV16QI_FTYPE_UV16QI_UQI),
+  MSA_BUILTIN (maxi_u_h, MIPS_UV8HI_FTYPE_UV8HI_UQI),
+  MSA_BUILTIN (maxi_u_w, MIPS_UV4SI_FTYPE_UV4SI_UQI),
+  MSA_BUILTIN (maxi_u_d, MIPS_UV2DI_FTYPE_UV2DI_UQI),
+  MSA_BUILTIN (min_s_b, MIPS_V16QI_FTYPE_V16QI_V16QI),
+  MSA_BUILTIN (min_s_h, MIPS_V8HI_FTYPE_V8HI_V8HI),
+  MSA_BUILTIN (min_s_w, MIPS_V4SI_FTYPE_V4SI_V4SI),
+  MSA_BUILTIN (min_s_d, MIPS_V2DI_FTYPE_V2DI_V2DI),
+  MSA_BUILTIN (mini_s_b, MIPS_V16QI_FTYPE_V16QI_QI),
+  MSA_BUILTIN (mini_s_h, MIPS_V8HI_FTYPE_V8HI_QI),
+  MSA_BUILTIN (mini_s_w, MIPS_V4SI_FTYPE_V4SI_QI),
+  MSA_BUILTIN (mini_s_d, MIPS_V2DI_FTYPE_V2DI_QI),
+  MSA_BUILTIN (min_u_b, MIPS_UV16QI_FTYPE_UV16QI_UV16QI),
+  MSA_BUILTIN (min_u_h, MIPS_UV8HI_FTYPE_UV8HI_UV8HI),
+  MSA_BUILTIN (min_u_w, MIPS_UV4SI_FTYPE_UV4SI_UV4SI),
+  MSA_BUILTIN (min_u_d, MIPS_UV2DI_FTYPE_UV2DI_UV2DI),
+  MSA_BUILTIN (mini_u_b, MIPS_UV16QI_FTYPE_UV16QI_UQI),
+  MSA_BUILTIN (mini_u_h, MIPS_UV8HI_FTYPE_UV8HI_UQI),
+  MSA_BUILTIN (mini_u_w, MIPS_UV4SI_FTYPE_UV4SI_UQI),
+  MSA_BUILTIN (mini_u_d, MIPS_UV2DI_FTYPE_UV2DI_UQI),
+  MSA_BUILTIN (max_a_b, MIPS_V16QI_FTYPE_V16QI_V16QI),
+  MSA_BUILTIN (max_a_h, MIPS_V8HI_FTYPE_V8HI_V8HI),
+  MSA_BUILTIN (max_a_w, MIPS_V4SI_FTYPE_V4SI_V4SI),
+  MSA_BUILTIN (max_a_d, MIPS_V2DI_FTYPE_V2DI_V2DI),
+  MSA_BUILTIN (min_a_b, MIPS_V16QI_FTYPE_V16QI_V16QI),
+  MSA_BUILTIN (min_a_h, MIPS_V8HI_FTYPE_V8HI_V8HI),
+  MSA_BUILTIN (min_a_w, MIPS_V4SI_FTYPE_V4SI_V4SI),
+  MSA_BUILTIN (min_a_d, MIPS_V2DI_FTYPE_V2DI_V2DI),
+  MSA_BUILTIN (ceq_b, MIPS_V16QI_FTYPE_V16QI_V16QI),
+  MSA_BUILTIN (ceq_h, MIPS_V8HI_FTYPE_V8HI_V8HI),
+  MSA_BUILTIN (ceq_w, MIPS_V4SI_FTYPE_V4SI_V4SI),
+  MSA_BUILTIN (ceq_d, MIPS_V2DI_FTYPE_V2DI_V2DI),
+  MSA_BUILTIN (ceqi_b, MIPS_V16QI_FTYPE_V16QI_QI),
+  MSA_BUILTIN (ceqi_h, MIPS_V8HI_FTYPE_V8HI_QI),
+  MSA_BUILTIN (ceqi_w, MIPS_V4SI_FTYPE_V4SI_QI),
+  MSA_BUILTIN (ceqi_d, MIPS_V2DI_FTYPE_V2DI_QI),
+  MSA_BUILTIN (clt_s_b, MIPS_V16QI_FTYPE_V16QI_V16QI),
+  MSA_BUILTIN (clt_s_h, MIPS_V8HI_FTYPE_V8HI_V8HI),
+  MSA_BUILTIN (clt_s_w, MIPS_V4SI_FTYPE_V4SI_V4SI),
+  MSA_BUILTIN (clt_s_d, MIPS_V2DI_FTYPE_V2DI_V2DI),
+  MSA_BUILTIN (clti_s_b, MIPS_V16QI_FTYPE_V16QI_QI),
+  MSA_BUILTIN (clti_s_h, MIPS_V8HI_FTYPE_V8HI_QI),
+  MSA_BUILTIN (clti_s_w, MIPS_V4SI_FTYPE_V4SI_QI),
+  MSA_BUILTIN (clti_s_d, MIPS_V2DI_FTYPE_V2DI_QI),
+  MSA_BUILTIN (clt_u_b, MIPS_V16QI_FTYPE_UV16QI_UV16QI),
+  MSA_BUILTIN (clt_u_h, MIPS_V8HI_FTYPE_UV8HI_UV8HI),
+  MSA_BUILTIN (clt_u_w, MIPS_V4SI_FTYPE_UV4SI_UV4SI),
+  MSA_BUILTIN (clt_u_d, MIPS_V2DI_FTYPE_UV2DI_UV2DI),
+  MSA_BUILTIN (clti_u_b, MIPS_V16QI_FTYPE_UV16QI_UQI),
+  MSA_BUILTIN (clti_u_h, MIPS_V8HI_FTYPE_UV8HI_UQI),
+  MSA_BUILTIN (clti_u_w, MIPS_V4SI_FTYPE_UV4SI_UQI),
+  MSA_BUILTIN (clti_u_d, MIPS_V2DI_FTYPE_UV2DI_UQI),
+  MSA_BUILTIN (cle_s_b, MIPS_V16QI_FTYPE_V16QI_V16QI),
+  MSA_BUILTIN (cle_s_h, MIPS_V8HI_FTYPE_V8HI_V8HI),
+  MSA_BUILTIN (cle_s_w, MIPS_V4SI_FTYPE_V4SI_V4SI),
+  MSA_BUILTIN (cle_s_d, MIPS_V2DI_FTYPE_V2DI_V2DI),
+  MSA_BUILTIN (clei_s_b, MIPS_V16QI_FTYPE_V16QI_QI),
+  MSA_BUILTIN (clei_s_h, MIPS_V8HI_FTYPE_V8HI_QI),
+  MSA_BUILTIN (clei_s_w, MIPS_V4SI_FTYPE_V4SI_QI),
+  MSA_BUILTIN (clei_s_d, MIPS_V2DI_FTYPE_V2DI_QI),
+  MSA_BUILTIN (cle_u_b, MIPS_V16QI_FTYPE_UV16QI_UV16QI),
+  MSA_BUILTIN (cle_u_h, MIPS_V8HI_FTYPE_UV8HI_UV8HI),
+  MSA_BUILTIN (cle_u_w, MIPS_V4SI_FTYPE_UV4SI_UV4SI),
+  MSA_BUILTIN (cle_u_d, MIPS_V2DI_FTYPE_UV2DI_UV2DI),
+  MSA_BUILTIN (clei_u_b, MIPS_V16QI_FTYPE_UV16QI_UQI),
+  MSA_BUILTIN (clei_u_h, MIPS_V8HI_FTYPE_UV8HI_UQI),
+  MSA_BUILTIN (clei_u_w, MIPS_V4SI_FTYPE_UV4SI_UQI),
+  MSA_BUILTIN (clei_u_d, MIPS_V2DI_FTYPE_UV2DI_UQI),
+  MSA_BUILTIN (ld_b, MIPS_V16QI_FTYPE_CVPOINTER_SI),
+  MSA_BUILTIN (ld_h, MIPS_V8HI_FTYPE_CVPOINTER_SI),
+  MSA_BUILTIN (ld_w, MIPS_V4SI_FTYPE_CVPOINTER_SI),
+  MSA_BUILTIN (ld_d, MIPS_V2DI_FTYPE_CVPOINTER_SI),
+  MSA_NO_TARGET_BUILTIN (st_b, MIPS_VOID_FTYPE_V16QI_CVPOINTER_SI),
+  MSA_NO_TARGET_BUILTIN (st_h, MIPS_VOID_FTYPE_V8HI_CVPOINTER_SI),
+  MSA_NO_TARGET_BUILTIN (st_w, MIPS_VOID_FTYPE_V4SI_CVPOINTER_SI),
+  MSA_NO_TARGET_BUILTIN (st_d, MIPS_VOID_FTYPE_V2DI_CVPOINTER_SI),
+  MSA_BUILTIN (sat_s_b, MIPS_V16QI_FTYPE_V16QI_UQI),
+  MSA_BUILTIN (sat_s_h, MIPS_V8HI_FTYPE_V8HI_UQI),
+  MSA_BUILTIN (sat_s_w, MIPS_V4SI_FTYPE_V4SI_UQI),
+  MSA_BUILTIN (sat_s_d, MIPS_V2DI_FTYPE_V2DI_UQI),
+  MSA_BUILTIN (sat_u_b, MIPS_UV16QI_FTYPE_UV16QI_UQI),
+  MSA_BUILTIN (sat_u_h, MIPS_UV8HI_FTYPE_UV8HI_UQI),
+  MSA_BUILTIN (sat_u_w, MIPS_UV4SI_FTYPE_UV4SI_UQI),
+  MSA_BUILTIN (sat_u_d, MIPS_UV2DI_FTYPE_UV2DI_UQI),
+  MSA_BUILTIN (add_a_b, MIPS_V16QI_FTYPE_V16QI_V16QI),
+  MSA_BUILTIN (add_a_h, MIPS_V8HI_FTYPE_V8HI_V8HI),
+  MSA_BUILTIN (add_a_w, MIPS_V4SI_FTYPE_V4SI_V4SI),
+  MSA_BUILTIN (add_a_d, MIPS_V2DI_FTYPE_V2DI_V2DI),
+  MSA_BUILTIN (adds_a_b, MIPS_V16QI_FTYPE_V16QI_V16QI),
+  MSA_BUILTIN (adds_a_h, MIPS_V8HI_FTYPE_V8HI_V8HI),
+  MSA_BUILTIN (adds_a_w, MIPS_V4SI_FTYPE_V4SI_V4SI),
+  MSA_BUILTIN (adds_a_d, MIPS_V2DI_FTYPE_V2DI_V2DI),
+  MSA_BUILTIN (adds_s_b, MIPS_V16QI_FTYPE_V16QI_V16QI),
+  MSA_BUILTIN (adds_s_h, MIPS_V8HI_FTYPE_V8HI_V8HI),
+  MSA_BUILTIN (adds_s_w, MIPS_V4SI_FTYPE_V4SI_V4SI),
+  MSA_BUILTIN (adds_s_d, MIPS_V2DI_FTYPE_V2DI_V2DI),
+  MSA_BUILTIN (adds_u_b, MIPS_UV16QI_FTYPE_UV16QI_UV16QI),
+  MSA_BUILTIN (adds_u_h, MIPS_UV8HI_FTYPE_UV8HI_UV8HI),
+  MSA_BUILTIN (adds_u_w, MIPS_UV4SI_FTYPE_UV4SI_UV4SI),
+  MSA_BUILTIN (adds_u_d, MIPS_UV2DI_FTYPE_UV2DI_UV2DI),
+  MSA_BUILTIN (ave_s_b, MIPS_V16QI_FTYPE_V16QI_V16QI),
+  MSA_BUILTIN (ave_s_h, MIPS_V8HI_FTYPE_V8HI_V8HI),
+  MSA_BUILTIN (ave_s_w, MIPS_V4SI_FTYPE_V4SI_V4SI),
+  MSA_BUILTIN (ave_s_d, MIPS_V2DI_FTYPE_V2DI_V2DI),
+  MSA_BUILTIN (ave_u_b, MIPS_UV16QI_FTYPE_UV16QI_UV16QI),
+  MSA_BUILTIN (ave_u_h, MIPS_UV8HI_FTYPE_UV8HI_UV8HI),
+  MSA_BUILTIN (ave_u_w, MIPS_UV4SI_FTYPE_UV4SI_UV4SI),
+  MSA_BUILTIN (ave_u_d, MIPS_UV2DI_FTYPE_UV2DI_UV2DI),
+  MSA_BUILTIN (aver_s_b, MIPS_V16QI_FTYPE_V16QI_V16QI),
+  MSA_BUILTIN (aver_s_h, MIPS_V8HI_FTYPE_V8HI_V8HI),
+  MSA_BUILTIN (aver_s_w, MIPS_V4SI_FTYPE_V4SI_V4SI),
+  MSA_BUILTIN (aver_s_d, MIPS_V2DI_FTYPE_V2DI_V2DI),
+  MSA_BUILTIN (aver_u_b, MIPS_UV16QI_FTYPE_UV16QI_UV16QI),
+  MSA_BUILTIN (aver_u_h, MIPS_UV8HI_FTYPE_UV8HI_UV8HI),
+  MSA_BUILTIN (aver_u_w, MIPS_UV4SI_FTYPE_UV4SI_UV4SI),
+  MSA_BUILTIN (aver_u_d, MIPS_UV2DI_FTYPE_UV2DI_UV2DI),
+  MSA_BUILTIN (subs_s_b, MIPS_V16QI_FTYPE_V16QI_V16QI),
+  MSA_BUILTIN (subs_s_h, MIPS_V8HI_FTYPE_V8HI_V8HI),
+  MSA_BUILTIN (subs_s_w, MIPS_V4SI_FTYPE_V4SI_V4SI),
+  MSA_BUILTIN (subs_s_d, MIPS_V2DI_FTYPE_V2DI_V2DI),
+  MSA_BUILTIN (subs_u_b, MIPS_UV16QI_FTYPE_UV16QI_UV16QI),
+  MSA_BUILTIN (subs_u_h, MIPS_UV8HI_FTYPE_UV8HI_UV8HI),
+  MSA_BUILTIN (subs_u_w, MIPS_UV4SI_FTYPE_UV4SI_UV4SI),
+  MSA_BUILTIN (subs_u_d, MIPS_UV2DI_FTYPE_UV2DI_UV2DI),
+  MSA_BUILTIN (subsuu_s_b, MIPS_V16QI_FTYPE_UV16QI_UV16QI),
+  MSA_BUILTIN (subsuu_s_h, MIPS_V8HI_FTYPE_UV8HI_UV8HI),
+  MSA_BUILTIN (subsuu_s_w, MIPS_V4SI_FTYPE_UV4SI_UV4SI),
+  MSA_BUILTIN (subsuu_s_d, MIPS_V2DI_FTYPE_UV2DI_UV2DI),
+  MSA_BUILTIN (subsus_u_b, MIPS_UV16QI_FTYPE_UV16QI_V16QI),
+  MSA_BUILTIN (subsus_u_h, MIPS_UV8HI_FTYPE_UV8HI_V8HI),
+  MSA_BUILTIN (subsus_u_w, MIPS_UV4SI_FTYPE_UV4SI_V4SI),
+  MSA_BUILTIN (subsus_u_d, MIPS_UV2DI_FTYPE_UV2DI_V2DI),
+  MSA_BUILTIN (asub_s_b, MIPS_V16QI_FTYPE_V16QI_V16QI),
+  MSA_BUILTIN (asub_s_h, MIPS_V8HI_FTYPE_V8HI_V8HI),
+  MSA_BUILTIN (asub_s_w, MIPS_V4SI_FTYPE_V4SI_V4SI),
+  MSA_BUILTIN (asub_s_d, MIPS_V2DI_FTYPE_V2DI_V2DI),
+  MSA_BUILTIN (asub_u_b, MIPS_UV16QI_FTYPE_UV16QI_UV16QI),
+  MSA_BUILTIN (asub_u_h, MIPS_UV8HI_FTYPE_UV8HI_UV8HI),
+  MSA_BUILTIN (asub_u_w, MIPS_UV4SI_FTYPE_UV4SI_UV4SI),
+  MSA_BUILTIN (asub_u_d, MIPS_UV2DI_FTYPE_UV2DI_UV2DI),
+  MSA_BUILTIN (mulv_b, MIPS_V16QI_FTYPE_V16QI_V16QI),
+  MSA_BUILTIN (mulv_h, MIPS_V8HI_FTYPE_V8HI_V8HI),
+  MSA_BUILTIN (mulv_w, MIPS_V4SI_FTYPE_V4SI_V4SI),
+  MSA_BUILTIN (mulv_d, MIPS_V2DI_FTYPE_V2DI_V2DI),
+  MSA_BUILTIN (maddv_b, MIPS_V16QI_FTYPE_V16QI_V16QI_V16QI),
+  MSA_BUILTIN (maddv_h, MIPS_V8HI_FTYPE_V8HI_V8HI_V8HI),
+  MSA_BUILTIN (maddv_w, MIPS_V4SI_FTYPE_V4SI_V4SI_V4SI),
+  MSA_BUILTIN (maddv_d, MIPS_V2DI_FTYPE_V2DI_V2DI_V2DI),
+  MSA_BUILTIN (msubv_b, MIPS_V16QI_FTYPE_V16QI_V16QI_V16QI),
+  MSA_BUILTIN (msubv_h, MIPS_V8HI_FTYPE_V8HI_V8HI_V8HI),
+  MSA_BUILTIN (msubv_w, MIPS_V4SI_FTYPE_V4SI_V4SI_V4SI),
+  MSA_BUILTIN (msubv_d, MIPS_V2DI_FTYPE_V2DI_V2DI_V2DI),
+  MSA_BUILTIN (div_s_b, MIPS_V16QI_FTYPE_V16QI_V16QI),
+  MSA_BUILTIN (div_s_h, MIPS_V8HI_FTYPE_V8HI_V8HI),
+  MSA_BUILTIN (div_s_w, MIPS_V4SI_FTYPE_V4SI_V4SI),
+  MSA_BUILTIN (div_s_d, MIPS_V2DI_FTYPE_V2DI_V2DI),
+  MSA_BUILTIN (div_u_b, MIPS_UV16QI_FTYPE_UV16QI_UV16QI),
+  MSA_BUILTIN (div_u_h, MIPS_UV8HI_FTYPE_UV8HI_UV8HI),
+  MSA_BUILTIN (div_u_w, MIPS_UV4SI_FTYPE_UV4SI_UV4SI),
+  MSA_BUILTIN (div_u_d, MIPS_UV2DI_FTYPE_UV2DI_UV2DI),
+  MSA_BUILTIN (hadd_s_h, MIPS_V8HI_FTYPE_V16QI_V16QI),
+  MSA_BUILTIN (hadd_s_w, MIPS_V4SI_FTYPE_V8HI_V8HI),
+  MSA_BUILTIN (hadd_s_d, MIPS_V2DI_FTYPE_V4SI_V4SI),
+  MSA_BUILTIN (hadd_u_h, MIPS_UV8HI_FTYPE_UV16QI_UV16QI),
+  MSA_BUILTIN (hadd_u_w, MIPS_UV4SI_FTYPE_UV8HI_UV8HI),
+  MSA_BUILTIN (hadd_u_d, MIPS_UV2DI_FTYPE_UV4SI_UV4SI),
+  MSA_BUILTIN (hsub_s_h, MIPS_V8HI_FTYPE_V16QI_V16QI),
+  MSA_BUILTIN (hsub_s_w, MIPS_V4SI_FTYPE_V8HI_V8HI),
+  MSA_BUILTIN (hsub_s_d, MIPS_V2DI_FTYPE_V4SI_V4SI),
+  MSA_BUILTIN (hsub_u_h, MIPS_V8HI_FTYPE_UV16QI_UV16QI),
+  MSA_BUILTIN (hsub_u_w, MIPS_V4SI_FTYPE_UV8HI_UV8HI),
+  MSA_BUILTIN (hsub_u_d, MIPS_V2DI_FTYPE_UV4SI_UV4SI),
+  MSA_BUILTIN (mod_s_b, MIPS_V16QI_FTYPE_V16QI_V16QI),
+  MSA_BUILTIN (mod_s_h, MIPS_V8HI_FTYPE_V8HI_V8HI),
+  MSA_BUILTIN (mod_s_w, MIPS_V4SI_FTYPE_V4SI_V4SI),
+  MSA_BUILTIN (mod_s_d, MIPS_V2DI_FTYPE_V2DI_V2DI),
+  MSA_BUILTIN (mod_u_b, MIPS_UV16QI_FTYPE_UV16QI_UV16QI),
+  MSA_BUILTIN (mod_u_h, MIPS_UV8HI_FTYPE_UV8HI_UV8HI),
+  MSA_BUILTIN (mod_u_w, MIPS_UV4SI_FTYPE_UV4SI_UV4SI),
+  MSA_BUILTIN (mod_u_d, MIPS_UV2DI_FTYPE_UV2DI_UV2DI),
+  MSA_BUILTIN (dotp_s_h, MIPS_V8HI_FTYPE_V16QI_V16QI),
+  MSA_BUILTIN (dotp_s_w, MIPS_V4SI_FTYPE_V8HI_V8HI),
+  MSA_BUILTIN (dotp_s_d, MIPS_V2DI_FTYPE_V4SI_V4SI),
+  MSA_BUILTIN (dotp_u_h, MIPS_UV8HI_FTYPE_UV16QI_UV16QI),
+  MSA_BUILTIN (dotp_u_w, MIPS_UV4SI_FTYPE_UV8HI_UV8HI),
+  MSA_BUILTIN (dotp_u_d, MIPS_UV2DI_FTYPE_UV4SI_UV4SI),
+  MSA_BUILTIN (dpadd_s_h, MIPS_V8HI_FTYPE_V8HI_V16QI_V16QI),
+  MSA_BUILTIN (dpadd_s_w, MIPS_V4SI_FTYPE_V4SI_V8HI_V8HI),
+  MSA_BUILTIN (dpadd_s_d, MIPS_V2DI_FTYPE_V2DI_V4SI_V4SI),
+  MSA_BUILTIN (dpadd_u_h, MIPS_UV8HI_FTYPE_UV8HI_UV16QI_UV16QI),
+  MSA_BUILTIN (dpadd_u_w, MIPS_UV4SI_FTYPE_UV4SI_UV8HI_UV8HI),
+  MSA_BUILTIN (dpadd_u_d, MIPS_UV2DI_FTYPE_UV2DI_UV4SI_UV4SI),
+  MSA_BUILTIN (dpsub_s_h, MIPS_V8HI_FTYPE_V8HI_V16QI_V16QI),
+  MSA_BUILTIN (dpsub_s_w, MIPS_V4SI_FTYPE_V4SI_V8HI_V8HI),
+  MSA_BUILTIN (dpsub_s_d, MIPS_V2DI_FTYPE_V2DI_V4SI_V4SI),
+  MSA_BUILTIN (dpsub_u_h, MIPS_V8HI_FTYPE_V8HI_UV16QI_UV16QI),
+  MSA_BUILTIN (dpsub_u_w, MIPS_V4SI_FTYPE_V4SI_UV8HI_UV8HI),
+  MSA_BUILTIN (dpsub_u_d, MIPS_V2DI_FTYPE_V2DI_UV4SI_UV4SI),
+  MSA_BUILTIN (sld_b, MIPS_V16QI_FTYPE_V16QI_V16QI_SI),
+  MSA_BUILTIN (sld_h, MIPS_V8HI_FTYPE_V8HI_V8HI_SI),
+  MSA_BUILTIN (sld_w, MIPS_V4SI_FTYPE_V4SI_V4SI_SI),
+  MSA_BUILTIN (sld_d, MIPS_V2DI_FTYPE_V2DI_V2DI_SI),
+  MSA_BUILTIN (sldi_b, MIPS_V16QI_FTYPE_V16QI_V16QI_UQI),
+  MSA_BUILTIN (sldi_h, MIPS_V8HI_FTYPE_V8HI_V8HI_UQI),
+  MSA_BUILTIN (sldi_w, MIPS_V4SI_FTYPE_V4SI_V4SI_UQI),
+  MSA_BUILTIN (sldi_d, MIPS_V2DI_FTYPE_V2DI_V2DI_UQI),
+  MSA_BUILTIN (splat_b, MIPS_V16QI_FTYPE_V16QI_SI),
+  MSA_BUILTIN (splat_h, MIPS_V8HI_FTYPE_V8HI_SI),
+  MSA_BUILTIN (splat_w, MIPS_V4SI_FTYPE_V4SI_SI),
+  MSA_BUILTIN (splat_d, MIPS_V2DI_FTYPE_V2DI_SI),
+  MSA_BUILTIN (splati_b, MIPS_V16QI_FTYPE_V16QI_UQI),
+  MSA_BUILTIN (splati_h, MIPS_V8HI_FTYPE_V8HI_UQI),
+  MSA_BUILTIN (splati_w, MIPS_V4SI_FTYPE_V4SI_UQI),
+  MSA_BUILTIN (splati_d, MIPS_V2DI_FTYPE_V2DI_UQI),
+  MSA_BUILTIN (pckev_b, MIPS_V16QI_FTYPE_V16QI_V16QI),
+  MSA_BUILTIN (pckev_h, MIPS_V8HI_FTYPE_V8HI_V8HI),
+  MSA_BUILTIN (pckev_w, MIPS_V4SI_FTYPE_V4SI_V4SI),
+  MSA_BUILTIN (pckev_d, MIPS_V2DI_FTYPE_V2DI_V2DI),
+  MSA_BUILTIN (pckod_b, MIPS_V16QI_FTYPE_V16QI_V16QI),
+  MSA_BUILTIN (pckod_h, MIPS_V8HI_FTYPE_V8HI_V8HI),
+  MSA_BUILTIN (pckod_w, MIPS_V4SI_FTYPE_V4SI_V4SI),
+  MSA_BUILTIN (pckod_d, MIPS_V2DI_FTYPE_V2DI_V2DI),
+  MSA_BUILTIN (ilvl_b, MIPS_V16QI_FTYPE_V16QI_V16QI),
+  MSA_BUILTIN (ilvl_h, MIPS_V8HI_FTYPE_V8HI_V8HI),
+  MSA_BUILTIN (ilvl_w, MIPS_V4SI_FTYPE_V4SI_V4SI),
+  MSA_BUILTIN (ilvl_d, MIPS_V2DI_FTYPE_V2DI_V2DI),
+  MSA_BUILTIN (ilvr_b, MIPS_V16QI_FTYPE_V16QI_V16QI),
+  MSA_BUILTIN (ilvr_h, MIPS_V8HI_FTYPE_V8HI_V8HI),
+  MSA_BUILTIN (ilvr_w, MIPS_V4SI_FTYPE_V4SI_V4SI),
+  MSA_BUILTIN (ilvr_d, MIPS_V2DI_FTYPE_V2DI_V2DI),
+  MSA_BUILTIN (ilvev_b, MIPS_V16QI_FTYPE_V16QI_V16QI),
+  MSA_BUILTIN (ilvev_h, MIPS_V8HI_FTYPE_V8HI_V8HI),
+  MSA_BUILTIN (ilvev_w, MIPS_V4SI_FTYPE_V4SI_V4SI),
+  MSA_BUILTIN (ilvev_d, MIPS_V2DI_FTYPE_V2DI_V2DI),
+  MSA_BUILTIN (ilvod_b, MIPS_V16QI_FTYPE_V16QI_V16QI),
+  MSA_BUILTIN (ilvod_h, MIPS_V8HI_FTYPE_V8HI_V8HI),
+  MSA_BUILTIN (ilvod_w, MIPS_V4SI_FTYPE_V4SI_V4SI),
+  MSA_BUILTIN (ilvod_d, MIPS_V2DI_FTYPE_V2DI_V2DI),
+  MSA_BUILTIN (vshf_b, MIPS_V16QI_FTYPE_V16QI_V16QI_V16QI),
+  MSA_BUILTIN (vshf_h, MIPS_V8HI_FTYPE_V8HI_V8HI_V8HI),
+  MSA_BUILTIN (vshf_w, MIPS_V4SI_FTYPE_V4SI_V4SI_V4SI),
+  MSA_BUILTIN (vshf_d, MIPS_V2DI_FTYPE_V2DI_V2DI_V2DI),
+  MSA_BUILTIN (and_v, MIPS_UV16QI_FTYPE_UV16QI_UV16QI),
+  MSA_BUILTIN (andi_b, MIPS_UV16QI_FTYPE_UV16QI_UQI),
+  MSA_BUILTIN (or_v, MIPS_UV16QI_FTYPE_UV16QI_UV16QI),
+  MSA_BUILTIN (ori_b, MIPS_UV16QI_FTYPE_UV16QI_UQI),
+  MSA_BUILTIN (nor_v, MIPS_UV16QI_FTYPE_UV16QI_UV16QI),
+  MSA_BUILTIN (nori_b, MIPS_UV16QI_FTYPE_UV16QI_UQI),
+  MSA_BUILTIN (xor_v, MIPS_UV16QI_FTYPE_UV16QI_UV16QI),
+  MSA_BUILTIN (xori_b, MIPS_UV16QI_FTYPE_UV16QI_UQI),
+  MSA_BUILTIN (bmnz_v, MIPS_UV16QI_FTYPE_UV16QI_UV16QI_UV16QI),
+  MSA_BUILTIN (bmnzi_b, MIPS_UV16QI_FTYPE_UV16QI_UV16QI_UQI),
+  MSA_BUILTIN (bmz_v, MIPS_UV16QI_FTYPE_UV16QI_UV16QI_UV16QI),
+  MSA_BUILTIN (bmzi_b, MIPS_UV16QI_FTYPE_UV16QI_UV16QI_UQI),
+  MSA_BUILTIN (bsel_v, MIPS_UV16QI_FTYPE_UV16QI_UV16QI_UV16QI),
+  MSA_BUILTIN (bseli_b, MIPS_UV16QI_FTYPE_UV16QI_UV16QI_UQI),
+  MSA_BUILTIN (shf_b, MIPS_V16QI_FTYPE_V16QI_UQI),
+  MSA_BUILTIN (shf_h, MIPS_V8HI_FTYPE_V8HI_UQI),
+  MSA_BUILTIN (shf_w, MIPS_V4SI_FTYPE_V4SI_UQI),
+  MSA_BUILTIN_TEST_BRANCH (bnz_v, MIPS_SI_FTYPE_UV16QI),
+  MSA_BUILTIN_TEST_BRANCH (bz_v, MIPS_SI_FTYPE_UV16QI),
+  MSA_BUILTIN (fill_b, MIPS_V16QI_FTYPE_SI),
+  MSA_BUILTIN (fill_h, MIPS_V8HI_FTYPE_SI),
+  MSA_BUILTIN (fill_w, MIPS_V4SI_FTYPE_SI),
+  MSA_BUILTIN (fill_d, MIPS_V2DI_FTYPE_DI),
+  MSA_BUILTIN (pcnt_b, MIPS_V16QI_FTYPE_V16QI),
+  MSA_BUILTIN (pcnt_h, MIPS_V8HI_FTYPE_V8HI),
+  MSA_BUILTIN (pcnt_w, MIPS_V4SI_FTYPE_V4SI),
+  MSA_BUILTIN (pcnt_d, MIPS_V2DI_FTYPE_V2DI),
+  MSA_BUILTIN (nloc_b, MIPS_V16QI_FTYPE_V16QI),
+  MSA_BUILTIN (nloc_h, MIPS_V8HI_FTYPE_V8HI),
+  MSA_BUILTIN (nloc_w, MIPS_V4SI_FTYPE_V4SI),
+  MSA_BUILTIN (nloc_d, MIPS_V2DI_FTYPE_V2DI),
+  MSA_BUILTIN (nlzc_b, MIPS_V16QI_FTYPE_V16QI),
+  MSA_BUILTIN (nlzc_h, MIPS_V8HI_FTYPE_V8HI),
+  MSA_BUILTIN (nlzc_w, MIPS_V4SI_FTYPE_V4SI),
+  MSA_BUILTIN (nlzc_d, MIPS_V2DI_FTYPE_V2DI),
+  MSA_BUILTIN (copy_s_b, MIPS_SI_FTYPE_V16QI_UQI),
+  MSA_BUILTIN (copy_s_h, MIPS_SI_FTYPE_V8HI_UQI),
+  MSA_BUILTIN (copy_s_w, MIPS_SI_FTYPE_V4SI_UQI),
+  MSA_BUILTIN (copy_s_d, MIPS_DI_FTYPE_V2DI_UQI),
+  MSA_BUILTIN (copy_u_b, MIPS_USI_FTYPE_V16QI_UQI),
+  MSA_BUILTIN (copy_u_h, MIPS_USI_FTYPE_V8HI_UQI),
+  MSA_BUILTIN_REMAP (copy_u_w, copy_s_w, MIPS_USI_FTYPE_V4SI_UQI),
+  MSA_BUILTIN_REMAP (copy_u_d, copy_s_d, MIPS_UDI_FTYPE_V2DI_UQI),
+  MSA_BUILTIN (insert_b, MIPS_V16QI_FTYPE_V16QI_UQI_SI),
+  MSA_BUILTIN (insert_h, MIPS_V8HI_FTYPE_V8HI_UQI_SI),
+  MSA_BUILTIN (insert_w, MIPS_V4SI_FTYPE_V4SI_UQI_SI),
+  MSA_BUILTIN (insert_d, MIPS_V2DI_FTYPE_V2DI_UQI_DI),
+  MSA_BUILTIN (insve_b, MIPS_V16QI_FTYPE_V16QI_UQI_V16QI),
+  MSA_BUILTIN (insve_h, MIPS_V8HI_FTYPE_V8HI_UQI_V8HI),
+  MSA_BUILTIN (insve_w, MIPS_V4SI_FTYPE_V4SI_UQI_V4SI),
+  MSA_BUILTIN (insve_d, MIPS_V2DI_FTYPE_V2DI_UQI_V2DI),
+  MSA_BUILTIN_TEST_BRANCH (bnz_b, MIPS_SI_FTYPE_UV16QI),
+  MSA_BUILTIN_TEST_BRANCH (bnz_h, MIPS_SI_FTYPE_UV8HI),
+  MSA_BUILTIN_TEST_BRANCH (bnz_w, MIPS_SI_FTYPE_UV4SI),
+  MSA_BUILTIN_TEST_BRANCH (bnz_d, MIPS_SI_FTYPE_UV2DI),
+  MSA_BUILTIN_TEST_BRANCH (bz_b, MIPS_SI_FTYPE_UV16QI),
+  MSA_BUILTIN_TEST_BRANCH (bz_h, MIPS_SI_FTYPE_UV8HI),
+  MSA_BUILTIN_TEST_BRANCH (bz_w, MIPS_SI_FTYPE_UV4SI),
+  MSA_BUILTIN_TEST_BRANCH (bz_d, MIPS_SI_FTYPE_UV2DI),
+  MSA_BUILTIN (ldi_b, MIPS_V16QI_FTYPE_HI),
+  MSA_BUILTIN (ldi_h, MIPS_V8HI_FTYPE_HI),
+  MSA_BUILTIN (ldi_w, MIPS_V4SI_FTYPE_HI),
+  MSA_BUILTIN (ldi_d, MIPS_V2DI_FTYPE_HI),
+  MSA_BUILTIN (fcaf_w, MIPS_V4SI_FTYPE_V4SF_V4SF),
+  MSA_BUILTIN (fcaf_d, MIPS_V2DI_FTYPE_V2DF_V2DF),
+  MSA_BUILTIN (fcor_w, MIPS_V4SI_FTYPE_V4SF_V4SF),
+  MSA_BUILTIN (fcor_d, MIPS_V2DI_FTYPE_V2DF_V2DF),
+  MSA_BUILTIN (fcun_w, MIPS_V4SI_FTYPE_V4SF_V4SF),
+  MSA_BUILTIN (fcun_d, MIPS_V2DI_FTYPE_V2DF_V2DF),
+  MSA_BUILTIN (fcune_w, MIPS_V4SI_FTYPE_V4SF_V4SF),
+  MSA_BUILTIN (fcune_d, MIPS_V2DI_FTYPE_V2DF_V2DF),
+  MSA_BUILTIN (fcueq_w, MIPS_V4SI_FTYPE_V4SF_V4SF),
+  MSA_BUILTIN (fcueq_d, MIPS_V2DI_FTYPE_V2DF_V2DF),
+  MSA_BUILTIN (fceq_w, MIPS_V4SI_FTYPE_V4SF_V4SF),
+  MSA_BUILTIN (fceq_d, MIPS_V2DI_FTYPE_V2DF_V2DF),
+  MSA_BUILTIN (fcne_w, MIPS_V4SI_FTYPE_V4SF_V4SF),
+  MSA_BUILTIN (fcne_d, MIPS_V2DI_FTYPE_V2DF_V2DF),
+  MSA_BUILTIN (fclt_w, MIPS_V4SI_FTYPE_V4SF_V4SF),
+  MSA_BUILTIN (fclt_d, MIPS_V2DI_FTYPE_V2DF_V2DF),
+  MSA_BUILTIN (fcult_w, MIPS_V4SI_FTYPE_V4SF_V4SF),
+  MSA_BUILTIN (fcult_d, MIPS_V2DI_FTYPE_V2DF_V2DF),
+  MSA_BUILTIN (fcle_w, MIPS_V4SI_FTYPE_V4SF_V4SF),
+  MSA_BUILTIN (fcle_d, MIPS_V2DI_FTYPE_V2DF_V2DF),
+  MSA_BUILTIN (fcule_w, MIPS_V4SI_FTYPE_V4SF_V4SF),
+  MSA_BUILTIN (fcule_d, MIPS_V2DI_FTYPE_V2DF_V2DF),
+  MSA_BUILTIN (fsaf_w, MIPS_V4SI_FTYPE_V4SF_V4SF),
+  MSA_BUILTIN (fsaf_d, MIPS_V2DI_FTYPE_V2DF_V2DF),
+  MSA_BUILTIN (fsor_w, MIPS_V4SI_FTYPE_V4SF_V4SF),
+  MSA_BUILTIN (fsor_d, MIPS_V2DI_FTYPE_V2DF_V2DF),
+  MSA_BUILTIN (fsun_w, MIPS_V4SI_FTYPE_V4SF_V4SF),
+  MSA_BUILTIN (fsun_d, MIPS_V2DI_FTYPE_V2DF_V2DF),
+  MSA_BUILTIN (fsune_w, MIPS_V4SI_FTYPE_V4SF_V4SF),
+  MSA_BUILTIN (fsune_d, MIPS_V2DI_FTYPE_V2DF_V2DF),
+  MSA_BUILTIN (fsueq_w, MIPS_V4SI_FTYPE_V4SF_V4SF),
+  MSA_BUILTIN (fsueq_d, MIPS_V2DI_FTYPE_V2DF_V2DF),
+  MSA_BUILTIN (fseq_w, MIPS_V4SI_FTYPE_V4SF_V4SF),
+  MSA_BUILTIN (fseq_d, MIPS_V2DI_FTYPE_V2DF_V2DF),
+  MSA_BUILTIN (fsne_w, MIPS_V4SI_FTYPE_V4SF_V4SF),
+  MSA_BUILTIN (fsne_d, MIPS_V2DI_FTYPE_V2DF_V2DF),
+  MSA_BUILTIN (fslt_w, MIPS_V4SI_FTYPE_V4SF_V4SF),
+  MSA_BUILTIN (fslt_d, MIPS_V2DI_FTYPE_V2DF_V2DF),
+  MSA_BUILTIN (fsult_w, MIPS_V4SI_FTYPE_V4SF_V4SF),
+  MSA_BUILTIN (fsult_d, MIPS_V2DI_FTYPE_V2DF_V2DF),
+  MSA_BUILTIN (fsle_w, MIPS_V4SI_FTYPE_V4SF_V4SF),
+  MSA_BUILTIN (fsle_d, MIPS_V2DI_FTYPE_V2DF_V2DF),
+  MSA_BUILTIN (fsule_w, MIPS_V4SI_FTYPE_V4SF_V4SF),
+  MSA_BUILTIN (fsule_d, MIPS_V2DI_FTYPE_V2DF_V2DF),
+  MSA_BUILTIN (fadd_w, MIPS_V4SF_FTYPE_V4SF_V4SF),
+  MSA_BUILTIN (fadd_d, MIPS_V2DF_FTYPE_V2DF_V2DF),
+  MSA_BUILTIN (fsub_w, MIPS_V4SF_FTYPE_V4SF_V4SF),
+  MSA_BUILTIN (fsub_d, MIPS_V2DF_FTYPE_V2DF_V2DF),
+  MSA_BUILTIN (fmul_w, MIPS_V4SF_FTYPE_V4SF_V4SF),
+  MSA_BUILTIN (fmul_d, MIPS_V2DF_FTYPE_V2DF_V2DF),
+  MSA_BUILTIN (fdiv_w, MIPS_V4SF_FTYPE_V4SF_V4SF),
+  MSA_BUILTIN (fdiv_d, MIPS_V2DF_FTYPE_V2DF_V2DF),
+  MSA_BUILTIN (fmadd_w, MIPS_V4SF_FTYPE_V4SF_V4SF_V4SF),
+  MSA_BUILTIN (fmadd_d, MIPS_V2DF_FTYPE_V2DF_V2DF_V2DF),
+  MSA_BUILTIN (fmsub_w, MIPS_V4SF_FTYPE_V4SF_V4SF_V4SF),
+  MSA_BUILTIN (fmsub_d, MIPS_V2DF_FTYPE_V2DF_V2DF_V2DF),
+  MSA_BUILTIN (fexp2_w, MIPS_V4SF_FTYPE_V4SF_V4SI),
+  MSA_BUILTIN (fexp2_d, MIPS_V2DF_FTYPE_V2DF_V2DI),
+  MSA_BUILTIN (fexdo_h, MIPS_V8HI_FTYPE_V4SF_V4SF),
+  MSA_BUILTIN (fexdo_w, MIPS_V4SF_FTYPE_V2DF_V2DF),
+  MSA_BUILTIN (ftq_h, MIPS_V8HI_FTYPE_V4SF_V4SF),
+  MSA_BUILTIN (ftq_w, MIPS_V4SI_FTYPE_V2DF_V2DF),
+  MSA_BUILTIN (fmin_w, MIPS_V4SF_FTYPE_V4SF_V4SF),
+  MSA_BUILTIN (fmin_d, MIPS_V2DF_FTYPE_V2DF_V2DF),
+  MSA_BUILTIN (fmin_a_w, MIPS_V4SF_FTYPE_V4SF_V4SF),
+  MSA_BUILTIN (fmin_a_d, MIPS_V2DF_FTYPE_V2DF_V2DF),
+  MSA_BUILTIN (fmax_w, MIPS_V4SF_FTYPE_V4SF_V4SF),
+  MSA_BUILTIN (fmax_d, MIPS_V2DF_FTYPE_V2DF_V2DF),
+  MSA_BUILTIN (fmax_a_w, MIPS_V4SF_FTYPE_V4SF_V4SF),
+  MSA_BUILTIN (fmax_a_d, MIPS_V2DF_FTYPE_V2DF_V2DF),
+  MSA_BUILTIN (mul_q_h, MIPS_V8HI_FTYPE_V8HI_V8HI),
+  MSA_BUILTIN (mul_q_w, MIPS_V4SI_FTYPE_V4SI_V4SI),
+  MSA_BUILTIN (mulr_q_h, MIPS_V8HI_FTYPE_V8HI_V8HI),
+  MSA_BUILTIN (mulr_q_w, MIPS_V4SI_FTYPE_V4SI_V4SI),
+  MSA_BUILTIN (madd_q_h, MIPS_V8HI_FTYPE_V8HI_V8HI_V8HI),
+  MSA_BUILTIN (madd_q_w, MIPS_V4SI_FTYPE_V4SI_V4SI_V4SI),
+  MSA_BUILTIN (maddr_q_h, MIPS_V8HI_FTYPE_V8HI_V8HI_V8HI),
+  MSA_BUILTIN (maddr_q_w, MIPS_V4SI_FTYPE_V4SI_V4SI_V4SI),
+  MSA_BUILTIN (msub_q_h, MIPS_V8HI_FTYPE_V8HI_V8HI_V8HI),
+  MSA_BUILTIN (msub_q_w, MIPS_V4SI_FTYPE_V4SI_V4SI_V4SI),
+  MSA_BUILTIN (msubr_q_h, MIPS_V8HI_FTYPE_V8HI_V8HI_V8HI),
+  MSA_BUILTIN (msubr_q_w, MIPS_V4SI_FTYPE_V4SI_V4SI_V4SI),
+  MSA_BUILTIN (fclass_w, MIPS_V4SI_FTYPE_V4SF),
+  MSA_BUILTIN (fclass_d, MIPS_V2DI_FTYPE_V2DF),
+  MSA_BUILTIN (fsqrt_w, MIPS_V4SF_FTYPE_V4SF),
+  MSA_BUILTIN (fsqrt_d, MIPS_V2DF_FTYPE_V2DF),
+  MSA_BUILTIN (frcp_w, MIPS_V4SF_FTYPE_V4SF),
+  MSA_BUILTIN (frcp_d, MIPS_V2DF_FTYPE_V2DF),
+  MSA_BUILTIN (frint_w, MIPS_V4SF_FTYPE_V4SF),
+  MSA_BUILTIN (frint_d, MIPS_V2DF_FTYPE_V2DF),
+  MSA_BUILTIN (frsqrt_w, MIPS_V4SF_FTYPE_V4SF),
+  MSA_BUILTIN (frsqrt_d, MIPS_V2DF_FTYPE_V2DF),
+  MSA_BUILTIN (flog2_w, MIPS_V4SF_FTYPE_V4SF),
+  MSA_BUILTIN (flog2_d, MIPS_V2DF_FTYPE_V2DF),
+  MSA_BUILTIN (fexupl_w, MIPS_V4SF_FTYPE_V8HI),
+  MSA_BUILTIN (fexupl_d, MIPS_V2DF_FTYPE_V4SF),
+  MSA_BUILTIN (fexupr_w, MIPS_V4SF_FTYPE_V8HI),
+  MSA_BUILTIN (fexupr_d, MIPS_V2DF_FTYPE_V4SF),
+  MSA_BUILTIN (ffql_w, MIPS_V4SF_FTYPE_V8HI),
+  MSA_BUILTIN (ffql_d, MIPS_V2DF_FTYPE_V4SI),
+  MSA_BUILTIN (ffqr_w, MIPS_V4SF_FTYPE_V8HI),
+  MSA_BUILTIN (ffqr_d, MIPS_V2DF_FTYPE_V4SI),
+  MSA_BUILTIN (ftint_s_w, MIPS_V4SI_FTYPE_V4SF),
+  MSA_BUILTIN (ftint_s_d, MIPS_V2DI_FTYPE_V2DF),
+  MSA_BUILTIN (ftint_u_w, MIPS_UV4SI_FTYPE_V4SF),
+  MSA_BUILTIN (ftint_u_d, MIPS_UV2DI_FTYPE_V2DF),
+  MSA_BUILTIN (ftrunc_s_w, MIPS_V4SI_FTYPE_V4SF),
+  MSA_BUILTIN (ftrunc_s_d, MIPS_V2DI_FTYPE_V2DF),
+  MSA_BUILTIN (ftrunc_u_w, MIPS_UV4SI_FTYPE_V4SF),
+  MSA_BUILTIN (ftrunc_u_d, MIPS_UV2DI_FTYPE_V2DF),
+  MSA_BUILTIN (ffint_s_w, MIPS_V4SF_FTYPE_V4SI),
+  MSA_BUILTIN (ffint_s_d, MIPS_V2DF_FTYPE_V2DI),
+  MSA_BUILTIN (ffint_u_w, MIPS_V4SF_FTYPE_UV4SI),
+  MSA_BUILTIN (ffint_u_d, MIPS_V2DF_FTYPE_UV2DI),
+  MSA_NO_TARGET_BUILTIN (ctcmsa, MIPS_VOID_FTYPE_UQI_SI),
+  MSA_BUILTIN (cfcmsa, MIPS_SI_FTYPE_UQI),
+  MSA_BUILTIN (move_v, MIPS_V16QI_FTYPE_V16QI),
 };
 
 /* Index I is the function declaration for mips_builtins[I], or null if the
    function isn't defined on this target.  */
 static GTY(()) tree mips_builtin_decls[ARRAY_SIZE (mips_builtins)];
+/* Get the index I of the function declaration for mips_builtin_decls[I]
+   using the instruction code or return null if not defined for the target.  */
+static GTY(()) int mips_get_builtin_decl_index[NUM_INSN_CODES];
 
 /* MODE is a vector mode whose elements have type TYPE.  Return the type
    of the vector itself.  */
@@ -14971,7 +16352,9 @@ mips_build_cvpointer_type (void)
 #define MIPS_ATYPE_CVPOINTER mips_build_cvpointer_type ()
 
 /* Standard mode-based argument types.  */
+#define MIPS_ATYPE_QI intQI_type_node
 #define MIPS_ATYPE_UQI unsigned_intQI_type_node
+#define MIPS_ATYPE_HI intHI_type_node
 #define MIPS_ATYPE_SI intSI_type_node
 #define MIPS_ATYPE_USI unsigned_intSI_type_node
 #define MIPS_ATYPE_DI intDI_type_node
@@ -14986,6 +16369,24 @@ mips_build_cvpointer_type (void)
 #define MIPS_ATYPE_V4QI mips_builtin_vector_type (intQI_type_node, V4QImode)
 #define MIPS_ATYPE_V4HI mips_builtin_vector_type (intHI_type_node, V4HImode)
 #define MIPS_ATYPE_V8QI mips_builtin_vector_type (intQI_type_node, V8QImode)
+
+#define MIPS_ATYPE_V2DI						\
+  mips_builtin_vector_type (long_long_integer_type_node, V2DImode)
+#define MIPS_ATYPE_V4SI mips_builtin_vector_type (intSI_type_node, V4SImode)
+#define MIPS_ATYPE_V8HI mips_builtin_vector_type (intHI_type_node, V8HImode)
+#define MIPS_ATYPE_V16QI mips_builtin_vector_type (intQI_type_node, V16QImode)
+#define MIPS_ATYPE_V2DF mips_builtin_vector_type (double_type_node, V2DFmode)
+#define MIPS_ATYPE_V4SF mips_builtin_vector_type (float_type_node, V4SFmode)
+
+#define MIPS_ATYPE_UV2DI					\
+  mips_builtin_vector_type (long_long_unsigned_type_node, V2DImode)
+#define MIPS_ATYPE_UV4SI					\
+  mips_builtin_vector_type (unsigned_intSI_type_node, V4SImode)
+#define MIPS_ATYPE_UV8HI					\
+  mips_builtin_vector_type (unsigned_intHI_type_node, V8HImode)
+#define MIPS_ATYPE_UV16QI					\
+  mips_builtin_vector_type (unsigned_intQI_type_node, V16QImode)
+
 #define MIPS_ATYPE_UV2SI					\
   mips_builtin_vector_type (unsigned_intSI_type_node, V2SImode)
 #define MIPS_ATYPE_UV4HI					\
@@ -15047,10 +16448,13 @@ mips_init_builtins (void)
     {
       d = &mips_builtins[i];
       if (d->avail ())
-	mips_builtin_decls[i]
-	  = add_builtin_function (d->name,
-				  mips_build_function_type (d->function_type),
-				  i, BUILT_IN_MD, NULL, NULL);
+	{
+	  mips_builtin_decls[i]
+	    = add_builtin_function (d->name,
+				    mips_build_function_type (d->function_type),
+				    i, BUILT_IN_MD, NULL, NULL);
+	  mips_get_builtin_decl_index[d->icode] = i;
+	}
     }
 }
 
@@ -15064,6 +16468,48 @@ mips_builtin_decl (unsigned int code, bool initialize_p ATTRIBUTE_UNUSED)
   return mips_builtin_decls[code];
 }
 
+/* Implement TARGET_VECTORIZE_BUILTIN_VECTORIZED_FUNCTION.  */
+
+static tree
+mips_builtin_vectorized_function (unsigned int fn, tree type_out, tree type_in)
+{
+  machine_mode in_mode, out_mode;
+  int in_n, out_n;
+
+  if (TREE_CODE (type_out) != VECTOR_TYPE
+      || TREE_CODE (type_in) != VECTOR_TYPE
+      || !ISA_HAS_MSA)
+    return NULL_TREE;
+
+  out_mode = TYPE_MODE (TREE_TYPE (type_out));
+  out_n = TYPE_VECTOR_SUBPARTS (type_out);
+  in_mode = TYPE_MODE (TREE_TYPE (type_in));
+  in_n = TYPE_VECTOR_SUBPARTS (type_in);
+
+  /* INSN is the name of the associated instruction pattern, without
+     the leading CODE_FOR_.  */
+#define MIPS_GET_BUILTIN(INSN) \
+  mips_builtin_decls[mips_get_builtin_decl_index[CODE_FOR_##INSN]]
+
+  switch (fn)
+    {
+    case BUILT_IN_SQRT:
+      if (out_mode == DFmode && out_n == 2
+	  && in_mode == DFmode && in_n == 2)
+	return MIPS_GET_BUILTIN (msa_fsqrt_d);
+      break;
+    case BUILT_IN_SQRTF:
+      if (out_mode == SFmode && out_n == 4
+	  && in_mode == SFmode && in_n == 4)
+	return MIPS_GET_BUILTIN (msa_fsqrt_w);
+      break;
+    default:
+      break;
+    }
+
+  return NULL_TREE;
+}
+
 /* Take argument ARGNO from EXP's argument list and convert it into
    an expand operand.  Store the operand in *OP.  */
 
@@ -15090,6 +16536,211 @@ static rtx
 mips_expand_builtin_insn (enum insn_code icode, unsigned int nops,
 			  struct expand_operand *ops, bool has_target_p)
 {
+  machine_mode imode;
+
+  switch (icode)
+    {
+    case CODE_FOR_msa_addvi_b:
+    case CODE_FOR_msa_addvi_h:
+    case CODE_FOR_msa_addvi_w:
+    case CODE_FOR_msa_addvi_d:
+    case CODE_FOR_msa_clti_u_b:
+    case CODE_FOR_msa_clti_u_h:
+    case CODE_FOR_msa_clti_u_w:
+    case CODE_FOR_msa_clti_u_d:
+    case CODE_FOR_msa_clei_u_b:
+    case CODE_FOR_msa_clei_u_h:
+    case CODE_FOR_msa_clei_u_w:
+    case CODE_FOR_msa_clei_u_d:
+    case CODE_FOR_msa_maxi_u_b:
+    case CODE_FOR_msa_maxi_u_h:
+    case CODE_FOR_msa_maxi_u_w:
+    case CODE_FOR_msa_maxi_u_d:
+    case CODE_FOR_msa_mini_u_b:
+    case CODE_FOR_msa_mini_u_h:
+    case CODE_FOR_msa_mini_u_w:
+    case CODE_FOR_msa_mini_u_d:
+    case CODE_FOR_msa_subvi_b:
+    case CODE_FOR_msa_subvi_h:
+    case CODE_FOR_msa_subvi_w:
+    case CODE_FOR_msa_subvi_d:
+      gcc_assert (has_target_p && nops == 3);
+      /* We only generate a vector of constants iff the second argument
+	 is an immediate.  We also validate the range of the immediate.  */
+      if (!CONST_INT_P (ops[2].value)
+	  || !IN_RANGE (INTVAL (ops[2].value), 0,  31))
+	break;
+      ops[2].mode = ops[0].mode;
+      ops[2].value = mips_gen_const_int_vector (ops[2].mode,
+						INTVAL (ops[2].value));
+      break;
+
+    case CODE_FOR_msa_ceqi_b:
+    case CODE_FOR_msa_ceqi_h:
+    case CODE_FOR_msa_ceqi_w:
+    case CODE_FOR_msa_ceqi_d:
+    case CODE_FOR_msa_clti_s_b:
+    case CODE_FOR_msa_clti_s_h:
+    case CODE_FOR_msa_clti_s_w:
+    case CODE_FOR_msa_clti_s_d:
+    case CODE_FOR_msa_clei_s_b:
+    case CODE_FOR_msa_clei_s_h:
+    case CODE_FOR_msa_clei_s_w:
+    case CODE_FOR_msa_clei_s_d:
+    case CODE_FOR_msa_maxi_s_b:
+    case CODE_FOR_msa_maxi_s_h:
+    case CODE_FOR_msa_maxi_s_w:
+    case CODE_FOR_msa_maxi_s_d:
+    case CODE_FOR_msa_mini_s_b:
+    case CODE_FOR_msa_mini_s_h:
+    case CODE_FOR_msa_mini_s_w:
+    case CODE_FOR_msa_mini_s_d:
+      gcc_assert (has_target_p && nops == 3);
+      /* We only generate a vector of constants iff the second argument
+	 is an immediate.  We also validate the range of the immediate.  */
+      if (!CONST_INT_P (ops[2].value)
+	  || !IN_RANGE (INTVAL (ops[2].value), -16,  15))
+	break;
+      ops[2].mode = ops[0].mode;
+      ops[2].value = mips_gen_const_int_vector (ops[2].mode,
+						INTVAL (ops[2].value));
+      break;
+
+    case CODE_FOR_msa_andi_b:
+    case CODE_FOR_msa_ori_b:
+    case CODE_FOR_msa_nori_b:
+    case CODE_FOR_msa_xori_b:
+      gcc_assert (has_target_p && nops == 3);
+      if (!CONST_INT_P (ops[2].value))
+	break;
+      ops[2].mode = ops[0].mode;
+      ops[2].value = mips_gen_const_int_vector (ops[2].mode,
+						INTVAL (ops[2].value));
+      break;
+
+    case CODE_FOR_msa_bmzi_b:
+    case CODE_FOR_msa_bmnzi_b:
+    case CODE_FOR_msa_bseli_b:
+      gcc_assert (has_target_p && nops == 4);
+      if (!CONST_INT_P (ops[3].value))
+	break;
+      ops[3].mode = ops[0].mode;
+      ops[3].value = mips_gen_const_int_vector (ops[3].mode,
+						INTVAL (ops[3].value));
+      break;
+
+    case CODE_FOR_msa_fill_b:
+    case CODE_FOR_msa_fill_h:
+    case CODE_FOR_msa_fill_w:
+    case CODE_FOR_msa_fill_d:
+      /* Map the built-ins to vector fill operations.  We need fix up the mode
+	 for the element being inserted.  */
+      gcc_assert (has_target_p && nops == 2);
+      imode = GET_MODE_INNER (ops[0].mode);
+      ops[1].value = lowpart_subreg (imode, ops[1].value, ops[1].mode);
+      ops[1].mode = imode;
+      break;
+
+    case CODE_FOR_msa_ilvl_b:
+    case CODE_FOR_msa_ilvl_h:
+    case CODE_FOR_msa_ilvl_w:
+    case CODE_FOR_msa_ilvl_d:
+    case CODE_FOR_msa_ilvr_b:
+    case CODE_FOR_msa_ilvr_h:
+    case CODE_FOR_msa_ilvr_w:
+    case CODE_FOR_msa_ilvr_d:
+    case CODE_FOR_msa_ilvev_b:
+    case CODE_FOR_msa_ilvev_h:
+    case CODE_FOR_msa_ilvev_w:
+    case CODE_FOR_msa_ilvod_b:
+    case CODE_FOR_msa_ilvod_h:
+    case CODE_FOR_msa_ilvod_w:
+    case CODE_FOR_msa_pckev_b:
+    case CODE_FOR_msa_pckev_h:
+    case CODE_FOR_msa_pckev_w:
+    case CODE_FOR_msa_pckod_b:
+    case CODE_FOR_msa_pckod_h:
+    case CODE_FOR_msa_pckod_w:
+      /* Swap the operands 1 and 2 for interleave operations.  Built-ins follow
+	 convention of ISA, which have op1 as higher component and op2 as lower
+	 component.  However, the VEC_PERM op in tree and vec_concat in RTL
+	 expects first operand to be lower component, because of which this
+	 swap is needed for builtins.  */
+      gcc_assert (has_target_p && nops == 3);
+      std::swap (ops[1], ops[2]);
+      break;
+
+    case CODE_FOR_msa_slli_b:
+    case CODE_FOR_msa_slli_h:
+    case CODE_FOR_msa_slli_w:
+    case CODE_FOR_msa_slli_d:
+    case CODE_FOR_msa_srai_b:
+    case CODE_FOR_msa_srai_h:
+    case CODE_FOR_msa_srai_w:
+    case CODE_FOR_msa_srai_d:
+    case CODE_FOR_msa_srli_b:
+    case CODE_FOR_msa_srli_h:
+    case CODE_FOR_msa_srli_w:
+    case CODE_FOR_msa_srli_d:
+      gcc_assert (has_target_p && nops == 3);
+      if (!CONST_INT_P (ops[2].value)
+	  || !IN_RANGE (INTVAL (ops[2].value), 0,
+			GET_MODE_UNIT_PRECISION (ops[0].mode) - 1))
+	break;
+      ops[2].mode = ops[0].mode;
+      ops[2].value = mips_gen_const_int_vector (ops[2].mode,
+						INTVAL (ops[2].value));
+      break;
+
+    case CODE_FOR_msa_insert_b:
+    case CODE_FOR_msa_insert_h:
+    case CODE_FOR_msa_insert_w:
+    case CODE_FOR_msa_insert_d:
+      /* Map the built-ins to insert operations.  We need to swap operands,
+	 fix up the mode for the element being inserted, and generate
+	 a bit mask for vec_merge.  */
+      gcc_assert (has_target_p && nops == 4);
+      std::swap (ops[1], ops[2]);
+      std::swap (ops[1], ops[3]);
+      imode = GET_MODE_INNER (ops[0].mode);
+      ops[1].value = lowpart_subreg (imode, ops[1].value, ops[1].mode);
+      ops[1].mode = imode;
+      ops[3].value = GEN_INT (1 << INTVAL (ops[3].value));
+      break;
+
+    case CODE_FOR_msa_insve_b:
+    case CODE_FOR_msa_insve_h:
+    case CODE_FOR_msa_insve_w:
+    case CODE_FOR_msa_insve_d:
+      /* Map the built-ins to element insert operations.  We need to swap
+	 operands and generate a bit mask.  */
+      gcc_assert (has_target_p && nops == 4);
+      std::swap (ops[1], ops[2]);
+      std::swap (ops[1], ops[3]);
+      ops[3].value = GEN_INT (1 << INTVAL (ops[3].value));
+      break;
+
+    case CODE_FOR_msa_shf_b:
+    case CODE_FOR_msa_shf_h:
+    case CODE_FOR_msa_shf_w:
+    case CODE_FOR_msa_shf_w_f:
+      gcc_assert (has_target_p && nops == 3);
+      ops[2].value = mips_gen_const_int_vector_shuffle (ops[0].mode,
+							INTVAL (ops[2].value));
+      break;
+
+    case CODE_FOR_msa_vshf_b:
+    case CODE_FOR_msa_vshf_h:
+    case CODE_FOR_msa_vshf_w:
+    case CODE_FOR_msa_vshf_d:
+      gcc_assert (has_target_p && nops == 4);
+      std::swap (ops[1], ops[3]);
+      break;
+
+    default:
+      break;
+  }
+
   if (!maybe_expand_insn (icode, nops, ops))
     {
       error ("invalid argument to built-in function");
@@ -15182,6 +16833,50 @@ mips_expand_builtin_movtf (enum mips_builtin_type type,
 				   4, ops, true);
 }
 
+/* Expand an MSA built-in for a compare and branch instruction specified by
+   ICODE, set a general-purpose register to 1 if the branch was taken,
+   0 otherwise.  */
+
+static rtx
+mips_expand_builtin_msa_test_branch (enum insn_code icode, tree exp)
+{
+  struct expand_operand ops[3];
+  rtx_insn *cbranch;
+  rtx_code_label *true_label, *done_label;
+  rtx cmp_result;
+
+  true_label = gen_label_rtx ();
+  done_label = gen_label_rtx ();
+
+  create_input_operand (&ops[0], true_label, TYPE_MODE (TREE_TYPE (exp)));
+  mips_prepare_builtin_arg (&ops[1], exp, 0);
+  create_fixed_operand (&ops[2], const0_rtx);
+
+  /* Make sure that the operand 1 is a REG.  */
+  if (GET_CODE (ops[1].value) != REG)
+    ops[1].value = force_reg (ops[1].mode, ops[1].value);
+
+  if ((cbranch = maybe_gen_insn (icode, 3, ops)) == NULL_RTX)
+    error ("failed to expand built-in function");
+
+  cmp_result = gen_reg_rtx (SImode);
+
+  /* First assume that CMP_RESULT is false.  */
+  mips_emit_move (cmp_result, const0_rtx);
+
+  /* Branch to TRUE_LABEL if CBRANCH is taken and DONE_LABEL otherwise.  */
+  emit_jump_insn (cbranch);
+  emit_jump_insn (gen_jump (done_label));
+  emit_barrier ();
+
+  /* Set CMP_RESULT to true if the branch was taken.  */
+  emit_label (true_label);
+  mips_emit_move (cmp_result, const1_rtx);
+
+  emit_label (done_label);
+  return cmp_result;
+}
+
 /* Move VALUE_IF_TRUE into TARGET if CONDITION is true; move VALUE_IF_FALSE
    into TARGET otherwise.  Return TARGET.  */
 
@@ -15318,6 +17013,9 @@ mips_expand_builtin (tree exp, rtx target, rtx subtarget ATTRIBUTE_UNUSED,
       return mips_expand_builtin_compare (d->builtin_type, d->icode,
 					  d->cond, target, exp);
 
+    case MIPS_BUILTIN_MSA_TEST_BRANCH:
+      return mips_expand_builtin_msa_test_branch (d->icode, exp);
+
     case MIPS_BUILTIN_BPOSGE32:
       return mips_expand_builtin_bposge (d->builtin_type, target);
     }
@@ -17592,6 +19290,9 @@ mips_set_compression_mode (unsigned int compression_mode)
 
       if (TARGET_HARD_FLOAT_ABI && !TARGET_OLDABI)
 	sorry ("hard-float MIPS16 code for ABIs other than o32 and o64");
+
+      if (TARGET_MSA)
+	sorry ("MSA MIPS16 code");
     }
   else
     {
@@ -17768,6 +19469,11 @@ mips_option_override (void)
   if (TARGET_MICROMIPS && TARGET_MIPS16)
     error ("unsupported combination: %s", "-mips16 -mmicromips");
 
+  /* Prohibit Paired-Single and MSA combination.  This is software restriction
+     rather than architectural.  */
+  if (ISA_HAS_MSA && TARGET_PAIRED_SINGLE_FLOAT)
+    error ("unsupported combination: %s", "-mmsa -mpaired-single");
+
   /* Save the base compression state and process flags as though we
      were generating uncompressed code.  */
   mips_base_compression_flags = TARGET_COMPRESSION;
@@ -17871,6 +19577,8 @@ mips_option_override (void)
 	target_flags |= MASK_FLOAT64;
       else if (TARGET_64BIT && TARGET_DOUBLE_FLOAT)
 	target_flags |= MASK_FLOAT64;
+      else if (mips_abi == ABI_32 && ISA_HAS_MSA && !TARGET_FLOATXX)
+	target_flags |= MASK_FLOAT64;
       else
 	target_flags &= ~MASK_FLOAT64;
     }
@@ -18129,6 +19837,11 @@ mips_option_override (void)
       TARGET_MIPS3D = 0;
     }
 
+  /* Make sure that when ISA_HAS_MSA is true, TARGET_FLOAT64 and
+     TARGET_HARD_FLOAT_ABI and  both true.  */
+  if (ISA_HAS_MSA && !(TARGET_FLOAT64 && TARGET_HARD_FLOAT_ABI))
+    error ("%<-mmsa%> must be used with %<-mfp64%> and %<-mhard-float%>");
+
   /* Make sure that -mpaired-single is only used on ISAs that support it.
      We must disable it otherwise since it relies on other ISA properties
      like ISA_HAS_8CC having their normal values.  */
@@ -19164,7 +20877,7 @@ mips_prepare_pch_save (void)
 
 /* Generate or test for an insn that supports a constant permutation.  */
 
-#define MAX_VECT_LEN 8
+#define MAX_VECT_LEN 16
 
 struct expand_vec_perm_d
 {
@@ -19368,6 +21081,41 @@ mips_expand_vpc_loongson_bcast (struct expand_vec_perm_d *d)
   return true;
 }
 
+/* Construct (set target (vec_select op0 (parallel selector))) and
+   return true if that's a valid instruction in the active ISA.  */
+
+static bool
+mips_expand_msa_shuffle (struct expand_vec_perm_d *d)
+{
+  rtx x, elts[MAX_VECT_LEN];
+  rtvec v;
+  rtx_insn *insn;
+  unsigned i;
+
+  if (!ISA_HAS_MSA)
+    return false;
+
+  for (i = 0; i < d->nelt; i++)
+    elts[i] = GEN_INT (d->perm[i]);
+
+  v = gen_rtvec_v (d->nelt, elts);
+  x = gen_rtx_PARALLEL (VOIDmode, v);
+
+  if (!mips_const_vector_shuffle_set_p (x, d->vmode))
+    return false;
+
+  x = gen_rtx_VEC_SELECT (d->vmode, d->op0, x);
+  x = gen_rtx_SET (d->target, x);
+
+  insn = emit_insn (x);
+  if (recog_memoized (insn) < 0)
+    {
+      remove_insn (insn);
+      return false;
+    }
+  return true;
+}
+
 static bool
 mips_expand_vec_perm_const_1 (struct expand_vec_perm_d *d)
 {
@@ -19402,6 +21150,8 @@ mips_expand_vec_perm_const_1 (struct expand_vec_perm_d *d)
     return true;
   if (mips_expand_vpc_loongson_bcast (d))
     return true;
+  if (mips_expand_msa_shuffle (d))
+    return true;
   return false;
 }
 
@@ -19480,6 +21230,17 @@ mips_expand_vec_perm_const (rtx operands[4])
   return ok;
 }
 
+/* Implement TARGET_SCHED_REASSOCIATION_WIDTH.  */
+
+static int
+mips_sched_reassociation_width (unsigned int opc ATTRIBUTE_UNUSED,
+				machine_mode mode)
+{
+  if (MSA_SUPPORTED_MODE_P (mode))
+    return 2;
+  return 1;
+}
+
 /* Implement TARGET_VECTORIZE_VEC_PERM_CONST_OK.  */
 
 static bool
@@ -19530,9 +21291,62 @@ mips_expand_vec_unpack (rtx operands[2], bool unsigned_p, bool high_p)
 {
   machine_mode imode = GET_MODE (operands[1]);
   rtx (*unpack) (rtx, rtx, rtx);
-  rtx (*cmpgt) (rtx, rtx, rtx);
+  rtx (*cmpFunc) (rtx, rtx, rtx);
   rtx tmp, dest, zero;
 
+  if (ISA_HAS_MSA)
+    {
+      switch (imode)
+	{
+	case V4SImode:
+	  if (BYTES_BIG_ENDIAN != high_p)
+	    unpack = gen_msa_ilvl_w;
+	  else
+	    unpack = gen_msa_ilvr_w;
+
+	  cmpFunc = gen_msa_clt_s_w;
+	  break;
+
+	case V8HImode:
+	  if (BYTES_BIG_ENDIAN != high_p)
+	    unpack = gen_msa_ilvl_h;
+	  else
+	    unpack = gen_msa_ilvr_h;
+
+	  cmpFunc = gen_msa_clt_s_h;
+	  break;
+
+	case V16QImode:
+	  if (BYTES_BIG_ENDIAN != high_p)
+	    unpack = gen_msa_ilvl_b;
+	  else
+	    unpack = gen_msa_ilvr_b;
+
+	  cmpFunc = gen_msa_clt_s_b;
+	  break;
+
+	default:
+	  gcc_unreachable ();
+	  break;
+	}
+
+      if (!unsigned_p)
+	{
+	  /* Extract sign extention for each element comparing each element
+	     with immediate zero.  */
+	  tmp = gen_reg_rtx (imode);
+	  emit_insn (cmpFunc (tmp, operands[1], CONST0_RTX (imode)));
+	}
+      else
+	tmp = force_reg (imode, CONST0_RTX (imode));
+
+      dest = gen_reg_rtx (imode);
+
+      emit_insn (unpack (dest, operands[1], tmp));
+      emit_move_insn (operands[0], gen_lowpart (GET_MODE (operands[0]), dest));
+      return;
+    }
+
   switch (imode)
     {
     case V8QImode:
@@ -19540,14 +21354,14 @@ mips_expand_vec_unpack (rtx operands[2], bool unsigned_p, bool high_p)
 	unpack = gen_loongson_punpckhbh;
       else
 	unpack = gen_loongson_punpcklbh;
-      cmpgt = gen_loongson_pcmpgtb;
+      cmpFunc = gen_loongson_pcmpgtb;
       break;
     case V4HImode:
       if (high_p)
 	unpack = gen_loongson_punpckhhw;
       else
 	unpack = gen_loongson_punpcklhw;
-      cmpgt = gen_loongson_pcmpgth;
+      cmpFunc = gen_loongson_pcmpgth;
       break;
     default:
       gcc_unreachable ();
@@ -19559,7 +21373,7 @@ mips_expand_vec_unpack (rtx operands[2], bool unsigned_p, bool high_p)
   else
     {
       tmp = gen_reg_rtx (imode);
-      emit_insn (cmpgt (tmp, zero, operands[1]));
+      emit_insn (cmpFunc (tmp, zero, operands[1]));
     }
 
   dest = gen_reg_rtx (imode);
@@ -19568,6 +21382,28 @@ mips_expand_vec_unpack (rtx operands[2], bool unsigned_p, bool high_p)
   emit_move_insn (operands[0], gen_lowpart (GET_MODE (operands[0]), dest));
 }
 
+/* Construct and return PARALLEL RTX with CONST_INTs for HIGH (high_p == TRUE)
+   or LOW (high_p == FALSE) half of a vector for mode MODE.  */
+
+rtx
+mips_msa_vec_parallel_const_half (machine_mode mode, bool high_p)
+{
+  int nunits = GET_MODE_NUNITS (mode);
+  rtvec v = rtvec_alloc (nunits / 2);
+  int base;
+  int i;
+
+  if (BYTES_BIG_ENDIAN)
+    base = high_p ? 0 : nunits / 2;
+  else
+    base = high_p ? nunits / 2 : 0;
+
+  for (i = 0; i < nunits / 2; i++)
+    RTVEC_ELT (v, i) = GEN_INT (base + i);
+
+  return gen_rtx_PARALLEL (VOIDmode, v);
+}
+
 /* A subroutine of mips_expand_vec_init, match constant vector elements.  */
 
 static inline bool
@@ -19615,6 +21451,42 @@ mips_expand_vi_broadcast (machine_mode vmode, rtx target, rtx elt)
   gcc_assert (ok);
 }
 
+/* Return a const_int vector of VAL with mode MODE.  */
+
+rtx
+mips_gen_const_int_vector (machine_mode mode, int val)
+{
+  int nunits = GET_MODE_NUNITS (mode);
+  rtvec v = rtvec_alloc (nunits);
+  int i;
+
+  for (i = 0; i < nunits; i++)
+    RTVEC_ELT (v, i) = gen_int_mode (val, GET_MODE_INNER (mode));
+
+  return gen_rtx_CONST_VECTOR (mode, v);
+}
+
+/* Return a vector of repeated 4-element sets generated from
+   immediate VAL in mode MODE.  */
+
+static rtx
+mips_gen_const_int_vector_shuffle (machine_mode mode, int val)
+{
+  int nunits = GET_MODE_NUNITS (mode);
+  int nsets = nunits / 4;
+  rtx elts[MAX_VECT_LEN];
+  int set = 0;
+  int i, j;
+
+  /* Generate a const_int vector replicating the same 4-element set
+     from an immediate.  */
+  for (j = 0; j < nsets; j++, set = 4 * j)
+    for (i = 0; i < 4; i++)
+      elts[set + i] = GEN_INT (set + ((val >> (2 * i)) & 0x3));
+
+  return gen_rtx_PARALLEL (VOIDmode, gen_rtvec_v (nunits, elts));
+}
+
 /* A subroutine of mips_expand_vec_init, replacing all of the non-constant
    elements of VALS with zeros, copy the constant vector to TARGET.  */
 
@@ -19627,8 +21499,9 @@ mips_expand_vi_constant (machine_mode vmode, unsigned nelt,
 
   for (i = 0; i < nelt; ++i)
     {
-      if (!mips_constant_elt_p (RTVEC_ELT (vec, i)))
-	RTVEC_ELT (vec, i) = const0_rtx;
+      rtx elem = RTVEC_ELT (vec, i);
+      if (!mips_constant_elt_p (elem))
+	RTVEC_ELT (vec, i) = CONST0_RTX (GET_MODE (elem));
     }
 
   emit_move_insn (target, gen_rtx_CONST_VECTOR (vmode, vec));
@@ -19689,6 +21562,106 @@ mips_expand_vector_init (rtx target, rtx vals)
 	all_same = false;
     }
 
+  if (ISA_HAS_MSA)
+    {
+      if (all_same)
+	{
+	  rtx same = XVECEXP (vals, 0, 0);
+	  rtx temp, temp2;
+
+	  if (CONST_INT_P (same) && nvar == 0
+	      && mips_signed_immediate_p (INTVAL (same), 10, 0))
+	    {
+	      switch (vmode)
+		{
+		case V16QImode:
+		case V8HImode:
+		case V4SImode:
+		case V2DImode:
+		  emit_move_insn (target, same);
+		  return;
+
+		default:
+		  break;
+		}
+	    }
+	  temp = gen_reg_rtx (imode);
+	  if (imode == GET_MODE (same))
+	    temp2 = same;
+	  else if (GET_MODE_SIZE (imode) >= UNITS_PER_WORD)
+	    temp2 = simplify_gen_subreg (imode, same, GET_MODE (same), 0);
+	  else
+	    temp2 = lowpart_subreg (imode, same, GET_MODE (same));
+	  emit_move_insn (temp, temp2);
+
+	  switch (vmode)
+	    {
+	    case V16QImode:
+	    case V8HImode:
+	    case V4SImode:
+	    case V2DImode:
+	      mips_emit_move (target, gen_rtx_VEC_DUPLICATE (vmode, temp));
+	      break;
+
+	    case V4SFmode:
+	      emit_insn (gen_msa_splati_w_f_scalar (target, temp));
+	      break;
+
+	    case V2DFmode:
+	      emit_insn (gen_msa_splati_d_f_scalar (target, temp));
+	      break;
+
+	    default:
+	      gcc_unreachable ();
+	    }
+	}
+      else
+	{
+	  rtvec vec = shallow_copy_rtvec (XVEC (vals, 0));
+
+	  for (i = 0; i < nelt; ++i)
+	    RTVEC_ELT (vec, i) = CONST0_RTX (imode);
+
+	  emit_move_insn (target, gen_rtx_CONST_VECTOR (vmode, vec));
+
+	  for (i = 0; i < nelt; ++i)
+	    {
+	      rtx temp = gen_reg_rtx (imode);
+	      emit_move_insn (temp, XVECEXP (vals, 0, i));
+	      switch (vmode)
+		{
+		case V16QImode:
+		  emit_insn (gen_vec_setv16qi (target, temp, GEN_INT (i)));
+		  break;
+
+		case V8HImode:
+		  emit_insn (gen_vec_setv8hi (target, temp, GEN_INT (i)));
+		  break;
+
+		case V4SImode:
+		  emit_insn (gen_vec_setv4si (target, temp, GEN_INT (i)));
+		  break;
+
+		case V2DImode:
+		  emit_insn (gen_vec_setv2di (target, temp, GEN_INT (i)));
+		  break;
+
+		case V4SFmode:
+		  emit_insn (gen_vec_setv4sf (target, temp, GEN_INT (i)));
+		  break;
+
+		case V2DFmode:
+		  emit_insn (gen_vec_setv2df (target, temp, GEN_INT (i)));
+		  break;
+
+		default:
+		  gcc_unreachable ();
+		}
+	    }
+	}
+      return;
+    }
+
   /* Load constants from the pool, or whatever's handy.  */
   if (nvar == 0)
     {
@@ -19839,6 +21812,169 @@ mips_hard_regno_caller_save_mode (unsigned int regno,
     return mode;
 }
 
+/* Generate RTL for comparing CMP_OP0 and CMP_OP1 using condition COND and
+   store the result -1 or 0 in DEST.  */
+
+static void
+mips_expand_msa_cmp (rtx dest, enum rtx_code cond, rtx op0, rtx op1)
+{
+  machine_mode cmp_mode = GET_MODE (op0);
+  int unspec = -1;
+  bool negate = false;
+
+  switch (cmp_mode)
+    {
+    case V16QImode:
+    case V8HImode:
+    case V4SImode:
+    case V2DImode:
+      switch (cond)
+	{
+	case NE:
+	  cond = reverse_condition (cond);
+	  negate = true;
+	  break;
+	case EQ:
+	case LT:
+	case LE:
+	case LTU:
+	case LEU:
+	  break;
+	case GE:
+	case GT:
+	case GEU:
+	case GTU:
+	  std::swap (op0, op1);
+	  cond = swap_condition (cond);
+	  break;
+	default:
+	  gcc_unreachable ();
+	}
+      mips_emit_binary (cond, dest, op0, op1);
+      if (negate)
+	emit_move_insn (dest, gen_rtx_NOT (GET_MODE (dest), dest));
+      break;
+
+    case V4SFmode:
+    case V2DFmode:
+      switch (cond)
+	{
+	case UNORDERED:
+	case ORDERED:
+	case EQ:
+	case NE:
+	case UNEQ:
+	case UNLE:
+	case UNLT:
+	  break;
+	case LTGT: cond = NE; break;
+	case UNGE: cond = UNLE; std::swap (op0, op1); break;
+	case UNGT: cond = UNLT; std::swap (op0, op1); break;
+	case LE: unspec = UNSPEC_MSA_FSLE; break;
+	case LT: unspec = UNSPEC_MSA_FSLT; break;
+	case GE: unspec = UNSPEC_MSA_FSLE; std::swap (op0, op1); break;
+	case GT: unspec = UNSPEC_MSA_FSLT; std::swap (op0, op1); break;
+	default:
+	  gcc_unreachable ();
+	}
+      if (unspec < 0)
+	mips_emit_binary (cond, dest, op0, op1);
+      else
+	{
+	  rtx x = gen_rtx_UNSPEC (GET_MODE (dest),
+				  gen_rtvec (2, op0, op1), unspec);
+	  emit_insn (gen_rtx_SET (dest, x));
+	}
+      break;
+
+    default:
+      gcc_unreachable ();
+      break;
+    }
+}
+
+/* Expand VEC_COND_EXPR, where:
+   MODE is mode of the result
+   VIMODE equivalent integer mode
+   OPERANDS operands of VEC_COND_EXPR.  */
+
+void
+mips_expand_vec_cond_expr (machine_mode mode, machine_mode vimode,
+			   rtx *operands)
+{
+  rtx cond = operands[3];
+  rtx cmp_op0 = operands[4];
+  rtx cmp_op1 = operands[5];
+  rtx cmp_res = gen_reg_rtx (vimode);
+
+  mips_expand_msa_cmp (cmp_res, GET_CODE (cond), cmp_op0, cmp_op1);
+
+  /* We handle the following cases:
+     1) r = a CMP b ? -1 : 0
+     2) r = a CMP b ? -1 : v
+     3) r = a CMP b ?  v : 0
+     4) r = a CMP b ? v1 : v2  */
+
+  /* Case (1) above.  We only move the results.  */
+  if (operands[1] == CONSTM1_RTX (vimode)
+      && operands[2] == CONST0_RTX (vimode))
+    emit_move_insn (operands[0], cmp_res);
+  else
+    {
+      rtx src1 = gen_reg_rtx (vimode);
+      rtx src2 = gen_reg_rtx (vimode);
+      rtx mask = gen_reg_rtx (vimode);
+      rtx bsel;
+
+      /* Move the vector result to use it as a mask.  */
+      emit_move_insn (mask, cmp_res);
+
+      if (register_operand (operands[1], mode))
+	{
+	  rtx xop1 = operands[1];
+	  if (mode != vimode)
+	    {
+	      xop1 = gen_reg_rtx (vimode);
+	      emit_move_insn (xop1, gen_rtx_SUBREG (vimode, operands[1], 0));
+	    }
+	  emit_move_insn (src1, xop1);
+	}
+      else
+	{
+	  gcc_assert (operands[1] == CONSTM1_RTX (vimode));
+	  /* Case (2) if the below doesn't move the mask to src2.  */
+	  emit_move_insn (src1, mask);
+	}
+
+      if (register_operand (operands[2], mode))
+	{
+	  rtx xop2 = operands[2];
+	  if (mode != vimode)
+	    {
+	      xop2 = gen_reg_rtx (vimode);
+	      emit_move_insn (xop2, gen_rtx_SUBREG (vimode, operands[2], 0));
+	    }
+	  emit_move_insn (src2, xop2);
+	}
+      else
+	{
+	  gcc_assert (operands[2] == CONST0_RTX (mode));
+	  /* Case (3) if the above didn't move the mask to src1.  */
+	  emit_move_insn (src2, mask);
+	}
+
+      /* We deal with case (4) if the mask wasn't moved to either src1 or src2.
+	 In any case, we eventually do vector mask-based copy.  */
+      bsel = gen_rtx_IOR (vimode,
+			  gen_rtx_AND (vimode,
+				       gen_rtx_NOT (vimode, mask), src2),
+			  gen_rtx_AND (vimode, mask, src1));
+      /* The result is placed back to a register with the mask.  */
+      emit_insn (gen_rtx_SET (mask, bsel));
+      emit_move_insn (operands[0], gen_rtx_SUBREG (mode, mask, 0));
+    }
+}
+
 /* Implement TARGET_CASE_VALUES_THRESHOLD.  */
 
 unsigned int
@@ -20120,6 +22256,9 @@ mips_promote_function_mode (const_tree type ATTRIBUTE_UNUSED,
 #undef TARGET_MODE_REP_EXTENDED
 #define TARGET_MODE_REP_EXTENDED mips_mode_rep_extended
 
+#undef TARGET_VECTORIZE_BUILTIN_VECTORIZED_FUNCTION
+#define TARGET_VECTORIZE_BUILTIN_VECTORIZED_FUNCTION \
+  mips_builtin_vectorized_function
 #undef TARGET_VECTOR_MODE_SUPPORTED_P
 #define TARGET_VECTOR_MODE_SUPPORTED_P mips_vector_mode_supported_p
 
@@ -20128,6 +22267,9 @@ mips_promote_function_mode (const_tree type ATTRIBUTE_UNUSED,
 
 #undef TARGET_VECTORIZE_PREFERRED_SIMD_MODE
 #define TARGET_VECTORIZE_PREFERRED_SIMD_MODE mips_preferred_simd_mode
+#undef TARGET_VECTORIZE_AUTOVECTORIZE_VECTOR_SIZES
+#define TARGET_VECTORIZE_AUTOVECTORIZE_VECTOR_SIZES \
+  mips_autovectorize_vector_sizes
 
 #undef TARGET_INIT_BUILTINS
 #define TARGET_INIT_BUILTINS mips_init_builtins
@@ -20205,6 +22347,9 @@ mips_promote_function_mode (const_tree type ATTRIBUTE_UNUSED,
 #undef TARGET_VECTORIZE_VEC_PERM_CONST_OK
 #define TARGET_VECTORIZE_VEC_PERM_CONST_OK mips_vectorize_vec_perm_const_ok
 
+#undef TARGET_SCHED_REASSOCIATION_WIDTH
+#define TARGET_SCHED_REASSOCIATION_WIDTH mips_sched_reassociation_width
+
 #undef TARGET_CASE_VALUES_THRESHOLD
 #define TARGET_CASE_VALUES_THRESHOLD mips_case_values_threshold
 
diff --git a/gcc/config/mips/mips.h b/gcc/config/mips/mips.h
index 803ab98e760..1efa61a6ede 100644
--- a/gcc/config/mips/mips.h
+++ b/gcc/config/mips/mips.h
@@ -472,6 +472,12 @@ struct mips_cpu_info {
 	    builtin_define ("__mips_dsp_rev=1");			\
 	}								\
 									\
+      if (ISA_HAS_MSA)							\
+	{								\
+	  builtin_define ("__mips_msa");				\
+	  builtin_define ("__mips_msa_width=128");			\
+	}								\
+									\
       MIPS_CPP_SET_PROCESSOR ("_MIPS_ARCH", mips_arch_info);		\
       MIPS_CPP_SET_PROCESSOR ("_MIPS_TUNE", mips_tune_info);		\
 									\
@@ -824,7 +830,8 @@ struct mips_cpu_info {
    --with-fpu is ignored if -msoft-float, -msingle-float or -mdouble-float are
      specified.
    --with-nan is ignored if -mnan is specified.
-   --with-fp-32 is ignored if -msoft-float, -msingle-float or -mfp are specified.
+   --with-fp-32 is ignored if -msoft-float, -msingle-float, -mmsa or -mfp are
+     specified.
    --with-odd-spreg-32 is ignored if -msoft-float, -msingle-float, -modd-spreg
      or -mno-odd-spreg are specified.
    --with-divide is ignored if -mdivide-traps or -mdivide-breaks are
@@ -841,7 +848,7 @@ struct mips_cpu_info {
   {"fpu", "%{!msoft-float:%{!msingle-float:%{!mdouble-float:-m%(VALUE)-float}}}" }, \
   {"nan", "%{!mnan=*:-mnan=%(VALUE)}" }, \
   {"fp_32", "%{" OPT_ARCH32 \
-	    ":%{!msoft-float:%{!msingle-float:%{!mfp*:-mfp%(VALUE)}}}}" }, \
+	    ":%{!msoft-float:%{!msingle-float:%{!mfp*:%{!mmsa:-mfp%(VALUE)}}}}}" }, \
   {"odd_spreg_32", "%{" OPT_ARCH32 ":%{!msoft-float:%{!msingle-float:" \
 		   "%{!modd-spreg:%{!mno-odd-spreg:-m%(VALUE)}}}}}" }, \
   {"divide", "%{!mdivide-traps:%{!mdivide-breaks:-mdivide-%(VALUE)}}" }, \
@@ -1175,6 +1182,9 @@ struct mips_cpu_info {
 /* Revision 2 of the DSP ASE is available.  */
 #define ISA_HAS_DSPR2		(TARGET_DSPR2 && !TARGET_MIPS16)
 
+/* The MSA ASE is available.  */
+#define ISA_HAS_MSA		(TARGET_MSA && !TARGET_MIPS16)
+
 /* True if the result of a load is not available to the next instruction.
    A nop will then be needed between instructions like "lw $4,..."
    and "addiu $4,$4,1".  */
@@ -1316,6 +1326,7 @@ struct mips_cpu_info {
 %{meva} %{mno-eva} \
 %{mvirt} %{mno-virt} \
 %{mxpa} %{mno-xpa} \
+%{mmsa} %{mno-msa} \
 %{msmartmips} %{mno-smartmips} \
 %{mmt} %{mno-mt} \
 %{mfix-rm7000} %{mno-fix-rm7000} \
@@ -1487,6 +1498,11 @@ FP_ASM_SPEC "\
 #define MIN_UNITS_PER_WORD 4
 #endif
 
+/* Width of a MSA vector register in bytes.  */
+#define UNITS_PER_MSA_REG 16
+/* Width of a MSA vector register in bits.  */
+#define BITS_PER_MSA_REG (UNITS_PER_MSA_REG * BITS_PER_UNIT)
+
 /* For MIPS, width of a floating point register.  */
 #define UNITS_PER_FPREG (TARGET_FLOAT64 ? 8 : 4)
 
@@ -1559,8 +1575,11 @@ FP_ASM_SPEC "\
 /* 8 is observed right on a DECstation and on riscos 4.02.  */
 #define STRUCTURE_SIZE_BOUNDARY 8
 
-/* There is no point aligning anything to a rounder boundary than this.  */
-#define BIGGEST_ALIGNMENT LONG_DOUBLE_TYPE_SIZE
+/* There is no point aligning anything to a rounder boundary than
+   LONG_DOUBLE_TYPE_SIZE, unless under MSA the bigggest alignment is
+   BITS_PER_MSA_REG.  */
+#define BIGGEST_ALIGNMENT \
+  (ISA_HAS_MSA ? BITS_PER_MSA_REG : LONG_DOUBLE_TYPE_SIZE)
 
 /* All accesses must be aligned.  */
 #define STRICT_ALIGNMENT 1
@@ -1667,7 +1686,7 @@ FP_ASM_SPEC "\
 /* The [d]clz instructions have the natural values at 0.  */
 
 #define CLZ_DEFINED_VALUE_AT_ZERO(MODE, VALUE) \
-  ((VALUE) = GET_MODE_BITSIZE (MODE), 2)
+  ((VALUE) = GET_MODE_UNIT_BITSIZE (MODE), 2)
 
 /* Standard register usage.  */
 
@@ -1798,6 +1817,10 @@ FP_ASM_SPEC "\
 #define MD_REG_NUM   (MD_REG_LAST - MD_REG_FIRST + 1)
 #define MD_DBX_FIRST (FP_DBX_FIRST + FP_REG_NUM)
 
+#define MSA_REG_FIRST FP_REG_FIRST
+#define MSA_REG_LAST  FP_REG_LAST
+#define MSA_REG_NUM   FP_REG_NUM
+
 /* The DWARF 2 CFA column which tracks the return address from a
    signal handler context.  This means that to maintain backwards
    compatibility, no hard register can be assigned this column if it
@@ -1886,8 +1909,11 @@ FP_ASM_SPEC "\
 /* Test if REGNO is hi, lo, or one of the 6 new DSP accumulators.  */
 #define ACC_REG_P(REGNO) \
   (MD_REG_P (REGNO) || DSP_ACC_REG_P (REGNO))
+#define MSA_REG_P(REGNO) \
+  ((unsigned int) ((int) (REGNO) - MSA_REG_FIRST) < MSA_REG_NUM)
 
 #define FP_REG_RTX_P(X) (REG_P (X) && FP_REG_P (REGNO (X)))
+#define MSA_REG_RTX_P(X) (REG_P (X) && MSA_REG_P (REGNO (X)))
 
 /* True if X is (const (unspec [(const_int 0)] UNSPEC_GP)).  This is used
    to initialize the mips16 gp pseudo register.  */
@@ -1916,10 +1942,12 @@ FP_ASM_SPEC "\
   mips_hard_regno_caller_save_mode (REGNO, NREGS, MODE)
 
 /* Odd-numbered single-precision registers are not considered callee-saved
-   for o32 FPXX as they will be clobbered when run on an FR=1 FPU.  */
+   for o32 FPXX as they will be clobbered when run on an FR=1 FPU.
+   MSA vector registers with MODE > 64 bits are part clobbered too.  */
 #define HARD_REGNO_CALL_PART_CLOBBERED(REGNO, MODE)			\
-  (TARGET_FLOATXX && hard_regno_nregs[REGNO][MODE] == 1			\
-   && FP_REG_P (REGNO) && ((REGNO) & 1))
+  ((TARGET_FLOATXX && hard_regno_nregs[REGNO][MODE] == 1		\
+   && FP_REG_P (REGNO) && ((REGNO) & 1))				\
+   || (ISA_HAS_MSA && FP_REG_P (REGNO) && GET_MODE_SIZE (MODE) > 8))
 
 #define MODES_TIEABLE_P mips_modes_tieable_p
 
@@ -2381,6 +2409,13 @@ enum reg_class
 #define FP_ARG_FIRST (FP_REG_FIRST + 12)
 #define FP_ARG_LAST  (FP_ARG_FIRST + MAX_ARGS_IN_REGISTERS - 1)
 
+/* True if MODE is vector and supported in a MSA vector register.  */
+#define MSA_SUPPORTED_MODE_P(MODE)			\
+  (ISA_HAS_MSA						\
+   && GET_MODE_SIZE (MODE) == UNITS_PER_MSA_REG		\
+   && (GET_MODE_CLASS (MODE) == MODE_VECTOR_INT		\
+       || GET_MODE_CLASS (MODE) == MODE_VECTOR_FLOAT))
+
 /* Temporary register that is used when restoring $gp after a call.  $4 and $5
    are used for returning complex double values in soft-float code, so $6 is the
    first suitable candidate for TARGET_MIPS16.  For !TARGET_MIPS16 we can use
@@ -2606,6 +2641,7 @@ typedef struct mips_args {
    we generally don't want to use them for copying arbitrary data.
    A single N-word move is usually the same cost as N single-word moves.  */
 #define MOVE_MAX UNITS_PER_WORD
+/* We don't modify it for MSA as it is only used by the classic reload.  */
 #define MAX_MOVE_MAX 8
 
 /* Define this macro as a C expression which is nonzero if
@@ -2767,7 +2803,39 @@ typedef struct mips_args {
   { "gp",	28 + GP_REG_FIRST },					\
   { "sp",	29 + GP_REG_FIRST },					\
   { "fp",	30 + GP_REG_FIRST },					\
-  { "ra",	31 + GP_REG_FIRST }					\
+  { "ra",	31 + GP_REG_FIRST },					\
+  { "$w0",	 0 + FP_REG_FIRST },					\
+  { "$w1",	 1 + FP_REG_FIRST },					\
+  { "$w2",	 2 + FP_REG_FIRST },					\
+  { "$w3",	 3 + FP_REG_FIRST },					\
+  { "$w4",	 4 + FP_REG_FIRST },					\
+  { "$w5",	 5 + FP_REG_FIRST },					\
+  { "$w6",	 6 + FP_REG_FIRST },					\
+  { "$w7",	 7 + FP_REG_FIRST },					\
+  { "$w8",	 8 + FP_REG_FIRST },					\
+  { "$w9",	 9 + FP_REG_FIRST },					\
+  { "$w10",	10 + FP_REG_FIRST },					\
+  { "$w11",	11 + FP_REG_FIRST },					\
+  { "$w12",	12 + FP_REG_FIRST },					\
+  { "$w13",	13 + FP_REG_FIRST },					\
+  { "$w14",	14 + FP_REG_FIRST },					\
+  { "$w15",	15 + FP_REG_FIRST },					\
+  { "$w16",	16 + FP_REG_FIRST },					\
+  { "$w17",	17 + FP_REG_FIRST },					\
+  { "$w18",	18 + FP_REG_FIRST },					\
+  { "$w19",	19 + FP_REG_FIRST },					\
+  { "$w20",	20 + FP_REG_FIRST },					\
+  { "$w21",	21 + FP_REG_FIRST },					\
+  { "$w22",	22 + FP_REG_FIRST },					\
+  { "$w23",	23 + FP_REG_FIRST },					\
+  { "$w24",	24 + FP_REG_FIRST },					\
+  { "$w25",	25 + FP_REG_FIRST },					\
+  { "$w26",	26 + FP_REG_FIRST },					\
+  { "$w27",	27 + FP_REG_FIRST },					\
+  { "$w28",	28 + FP_REG_FIRST },					\
+  { "$w29",	29 + FP_REG_FIRST },					\
+  { "$w30",	30 + FP_REG_FIRST },					\
+  { "$w31",	31 + FP_REG_FIRST }					\
 }
 
 #define DBR_OUTPUT_SEQEND(STREAM)					\
diff --git a/gcc/config/mips/mips.md b/gcc/config/mips/mips.md
index 188308aae83..d8d564fabd9 100644
--- a/gcc/config/mips/mips.md
+++ b/gcc/config/mips/mips.md
@@ -225,11 +225,12 @@
    shift_shift"
   (const_string "unknown"))
 
-(define_attr "alu_type" "unknown,add,sub,not,nor,and,or,xor"
+(define_attr "alu_type" "unknown,add,sub,not,nor,and,or,xor,simd_add"
   (const_string "unknown"))
 
 ;; Main data type used by the insn
-(define_attr "mode" "unknown,none,QI,HI,SI,DI,TI,SF,DF,TF,FPSW"
+(define_attr "mode" "unknown,none,QI,HI,SI,DI,TI,SF,DF,TF,FPSW,
+  V2DI,V4SI,V8HI,V16QI,V2DF,V4SF"
   (const_string "unknown"))
 
 ;; True if the main data type is twice the size of a word.
@@ -243,6 +244,13 @@
 	 (const_string "yes")]
 	(const_string "no")))
 
+;; True if the main data type is four times of the size of a word.
+(define_attr "qword_mode" "no,yes"
+  (cond [(and (eq_attr "mode" "TI,TF")
+	      (not (match_test "TARGET_64BIT")))
+	 (const_string "yes")]
+	(const_string "no")))
+
 ;; Attributes describing a sync loop.  These loops have the form:
 ;;
 ;;       if (RELEASE_BARRIER == YES) sync
@@ -365,7 +373,12 @@
    shift,slt,signext,clz,pop,trap,imul,imul3,imul3nc,imadd,idiv,idiv3,move,
    fmove,fadd,fmul,fmadd,fdiv,frdiv,frdiv1,frdiv2,fabs,fneg,fcmp,fcvt,fsqrt,
    frsqrt,frsqrt1,frsqrt2,dspmac,dspmacsat,accext,accmod,dspalu,dspalusat,
-   multi,atomic,syncloop,nop,ghost,multimem"
+   multi,atomic,syncloop,nop,ghost,multimem,
+   simd_div,simd_fclass,simd_flog2,simd_fadd,simd_fcvt,simd_fmul,simd_fmadd,
+   simd_fdiv,simd_bitins,simd_bitmov,simd_insert,simd_sld,simd_mul,simd_fcmp,
+   simd_fexp2,simd_int_arith,simd_bit,simd_shift,simd_splat,simd_fill,
+   simd_permute,simd_shf,simd_sat,simd_pcnt,simd_copy,simd_branch,simd_cmsa,
+   simd_fminmax,simd_logic,simd_move,simd_load,simd_store"
   (cond [(eq_attr "jal" "!unset") (const_string "call")
 	 (eq_attr "got" "load") (const_string "load")
 
@@ -400,6 +413,11 @@
 	 (eq_attr "move_type" "constN,shift_shift")
 	   (const_string "multi")
 
+	 ;; These types of move are split for quadword modes only.
+	 (and (eq_attr "move_type" "move,const")
+	      (eq_attr "qword_mode" "yes"))
+	   (const_string "multi")
+
 	 ;; These types of move are split for doubleword modes only.
 	 (and (eq_attr "move_type" "move,const")
 	      (eq_attr "dword_mode" "yes"))
@@ -486,6 +504,12 @@
 	      (eq_attr "dword_mode" "yes"))
 	 (const_int 2)
 
+	 ;; Check for quadword moves that are decomposed into four
+	 ;; instructions.
+	 (and (eq_attr "move_type" "mtc,mfc,move")
+	      (eq_attr "qword_mode" "yes"))
+	 (const_int 4)
+
 	 ;; Constants, loads and stores are handled by external routines.
 	 (and (eq_attr "move_type" "const,constN")
 	      (eq_attr "dword_mode" "yes"))
@@ -527,7 +551,7 @@
 	 (const_int 2)
 
 	 (eq_attr "type" "idiv,idiv3")
-	 (symbol_ref "mips_idiv_insns ()")
+	 (symbol_ref "mips_idiv_insns (GET_MODE (PATTERN (insn)))")
 
 	 (not (eq_attr "sync_mem" "none"))
 	 (symbol_ref "mips_sync_loop_insns (insn, operands)")]
@@ -884,8 +908,10 @@
 (define_mode_attr fmt [(SF "s") (DF "d") (V2SF "ps")])
 
 ;; This attribute gives the upper-case mode name for one unit of a
-;; floating-point mode.
-(define_mode_attr UNITMODE [(SF "SF") (DF "DF") (V2SF "SF")])
+;; floating-point mode or vector mode.
+(define_mode_attr UNITMODE [(SF "SF") (DF "DF") (V2SF "SF") (V4SF "SF")
+			    (V16QI "QI") (V8HI "HI") (V4SI "SI") (V2DI "DI")
+			    (V2DF "DF")])
 
 ;; This attribute gives the integer mode that has the same size as a
 ;; fixed-point mode.
@@ -941,6 +967,10 @@
 ;; from the same template.
 (define_code_iterator any_mod [mod umod])
 
+;; This code iterator allows addition and subtraction to be generated
+;; from the same template.
+(define_code_iterator addsub [plus minus])
+
 ;; This code iterator allows all native floating-point comparisons to be
 ;; generated from the same template.
 (define_code_iterator fcond [unordered uneq unlt unle eq lt le
@@ -7634,6 +7664,9 @@
 ; ST-Microelectronics Loongson-2E/2F-specific patterns.
 (include "loongson.md")
 
+; The MIPS MSA Instructions.
+(include "mips-msa.md")
+
 (define_c_enum "unspec" [
   UNSPEC_ADDRESS_FIRST
 ])
diff --git a/gcc/config/mips/mips.opt b/gcc/config/mips/mips.opt
index ebd67e4bdb9..08dd83e14ce 100644
--- a/gcc/config/mips/mips.opt
+++ b/gcc/config/mips/mips.opt
@@ -299,6 +299,10 @@ mmicromips
 Target Report Mask(MICROMIPS)
 Use microMIPS instructions.
 
+mmsa
+Target Report Var(TARGET_MSA)
+Use MIPS MSA Extension instructions.
+
 mmt
 Target Report Var(TARGET_MT)
 Allow the use of MT instructions.
diff --git a/gcc/config/mips/msa.h b/gcc/config/mips/msa.h
new file mode 100644
index 00000000000..341eb7f81d1
--- /dev/null
+++ b/gcc/config/mips/msa.h
@@ -0,0 +1,582 @@
+/* MIPS MSA intrinsics include file.
+
+   Copyright (C) 2015 Free Software Foundation, Inc.
+   Contributed by Imagination Technologies Ltd.
+
+   This file is part of GCC.
+
+   GCC is free software; you can redistribute it and/or modify it
+   under the terms of the GNU General Public License as published
+   by the Free Software Foundation; either version 3, or (at your
+   option) any later version.
+
+   GCC is distributed in the hope that it will be useful, but WITHOUT
+   ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+   or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public
+   License for more details.
+
+   Under Section 7 of GPL version 3, you are granted additional
+   permissions described in the GCC Runtime Library Exception, version
+   3.1, as published by the Free Software Foundation.
+
+   You should have received a copy of the GNU General Public License and
+   a copy of the GCC Runtime Library Exception along with this program;
+   see the files COPYING3 and COPYING.RUNTIME respectively.  If not, see
+   <http://www.gnu.org/licenses/>.  */
+
+#ifndef _MSA_H
+#define _MSA_H 1
+
+#if defined(__mips_msa)
+typedef signed char v16i8 __attribute__ ((vector_size(16), aligned(16)));
+typedef signed char v16i8_b __attribute__ ((vector_size(16), aligned(1)));
+typedef unsigned char v16u8 __attribute__ ((vector_size(16), aligned(16)));
+typedef unsigned char v16u8_b __attribute__ ((vector_size(16), aligned(1)));
+typedef short v8i16 __attribute__ ((vector_size(16), aligned(16)));
+typedef short v8i16_h __attribute__ ((vector_size(16), aligned(2)));
+typedef unsigned short v8u16 __attribute__ ((vector_size(16), aligned(16)));
+typedef unsigned short v8u16_h __attribute__ ((vector_size(16), aligned(2)));
+typedef int v4i32 __attribute__ ((vector_size(16), aligned(16)));
+typedef int v4i32_w __attribute__ ((vector_size(16), aligned(4)));
+typedef unsigned int v4u32 __attribute__ ((vector_size(16), aligned(16)));
+typedef unsigned int v4u32_w __attribute__ ((vector_size(16), aligned(4)));
+typedef long long v2i64 __attribute__ ((vector_size(16), aligned(16)));
+typedef long long v2i64_d __attribute__ ((vector_size(16), aligned(8)));
+typedef unsigned long long v2u64 __attribute__ ((vector_size(16), aligned(16)));
+typedef unsigned long long v2u64_d __attribute__ ((vector_size(16), aligned(8)));
+typedef float v4f32 __attribute__ ((vector_size(16), aligned(16)));
+typedef float v4f32_w __attribute__ ((vector_size(16), aligned(4)));
+typedef double v2f64 __attribute__ ((vector_size(16), aligned(16)));
+typedef double v2f64_d __attribute__ ((vector_size(16), aligned(8)));
+
+#define __msa_sll_b __builtin_msa_sll_b
+#define __msa_sll_h __builtin_msa_sll_h
+#define __msa_sll_w __builtin_msa_sll_w
+#define __msa_sll_d __builtin_msa_sll_d
+#define __msa_slli_b __builtin_msa_slli_b
+#define __msa_slli_h __builtin_msa_slli_h
+#define __msa_slli_w __builtin_msa_slli_w
+#define __msa_slli_d __builtin_msa_slli_d
+#define __msa_sra_b __builtin_msa_sra_b
+#define __msa_sra_h __builtin_msa_sra_h
+#define __msa_sra_w __builtin_msa_sra_w
+#define __msa_sra_d __builtin_msa_sra_d
+#define __msa_srai_b __builtin_msa_srai_b
+#define __msa_srai_h __builtin_msa_srai_h
+#define __msa_srai_w __builtin_msa_srai_w
+#define __msa_srai_d __builtin_msa_srai_d
+#define __msa_srar_b __builtin_msa_srar_b
+#define __msa_srar_h __builtin_msa_srar_h
+#define __msa_srar_w __builtin_msa_srar_w
+#define __msa_srar_d __builtin_msa_srar_d
+#define __msa_srari_b __builtin_msa_srari_b
+#define __msa_srari_h __builtin_msa_srari_h
+#define __msa_srari_w __builtin_msa_srari_w
+#define __msa_srari_d __builtin_msa_srari_d
+#define __msa_srl_b __builtin_msa_srl_b
+#define __msa_srl_h __builtin_msa_srl_h
+#define __msa_srl_w __builtin_msa_srl_w
+#define __msa_srl_d __builtin_msa_srl_d
+#define __msa_srli_b __builtin_msa_srli_b
+#define __msa_srli_h __builtin_msa_srli_h
+#define __msa_srli_w __builtin_msa_srli_w
+#define __msa_srli_d __builtin_msa_srli_d
+#define __msa_srlr_b __builtin_msa_srlr_b
+#define __msa_srlr_h __builtin_msa_srlr_h
+#define __msa_srlr_w __builtin_msa_srlr_w
+#define __msa_srlr_d __builtin_msa_srlr_d
+#define __msa_srlri_b __builtin_msa_srlri_b
+#define __msa_srlri_h __builtin_msa_srlri_h
+#define __msa_srlri_w __builtin_msa_srlri_w
+#define __msa_srlri_d __builtin_msa_srlri_d
+#define __msa_bclr_b __builtin_msa_bclr_b
+#define __msa_bclr_h __builtin_msa_bclr_h
+#define __msa_bclr_w __builtin_msa_bclr_w
+#define __msa_bclr_d __builtin_msa_bclr_d
+#define __msa_bclri_b __builtin_msa_bclri_b
+#define __msa_bclri_h __builtin_msa_bclri_h
+#define __msa_bclri_w __builtin_msa_bclri_w
+#define __msa_bclri_d __builtin_msa_bclri_d
+#define __msa_bset_b __builtin_msa_bset_b
+#define __msa_bset_h __builtin_msa_bset_h
+#define __msa_bset_w __builtin_msa_bset_w
+#define __msa_bset_d __builtin_msa_bset_d
+#define __msa_bseti_b __builtin_msa_bseti_b
+#define __msa_bseti_h __builtin_msa_bseti_h
+#define __msa_bseti_w __builtin_msa_bseti_w
+#define __msa_bseti_d __builtin_msa_bseti_d
+#define __msa_bneg_b __builtin_msa_bneg_b
+#define __msa_bneg_h __builtin_msa_bneg_h
+#define __msa_bneg_w __builtin_msa_bneg_w
+#define __msa_bneg_d __builtin_msa_bneg_d
+#define __msa_bnegi_b __builtin_msa_bnegi_b
+#define __msa_bnegi_h __builtin_msa_bnegi_h
+#define __msa_bnegi_w __builtin_msa_bnegi_w
+#define __msa_bnegi_d __builtin_msa_bnegi_d
+#define __msa_binsl_b __builtin_msa_binsl_b
+#define __msa_binsl_h __builtin_msa_binsl_h
+#define __msa_binsl_w __builtin_msa_binsl_w
+#define __msa_binsl_d __builtin_msa_binsl_d
+#define __msa_binsli_b __builtin_msa_binsli_b
+#define __msa_binsli_h __builtin_msa_binsli_h
+#define __msa_binsli_w __builtin_msa_binsli_w
+#define __msa_binsli_d __builtin_msa_binsli_d
+#define __msa_binsr_b __builtin_msa_binsr_b
+#define __msa_binsr_h __builtin_msa_binsr_h
+#define __msa_binsr_w __builtin_msa_binsr_w
+#define __msa_binsr_d __builtin_msa_binsr_d
+#define __msa_binsri_b __builtin_msa_binsri_b
+#define __msa_binsri_h __builtin_msa_binsri_h
+#define __msa_binsri_w __builtin_msa_binsri_w
+#define __msa_binsri_d __builtin_msa_binsri_d
+#define __msa_addv_b __builtin_msa_addv_b
+#define __msa_addv_h __builtin_msa_addv_h
+#define __msa_addv_w __builtin_msa_addv_w
+#define __msa_addv_d __builtin_msa_addv_d
+#define __msa_addvi_b __builtin_msa_addvi_b
+#define __msa_addvi_h __builtin_msa_addvi_h
+#define __msa_addvi_w __builtin_msa_addvi_w
+#define __msa_addvi_d __builtin_msa_addvi_d
+#define __msa_subv_b __builtin_msa_subv_b
+#define __msa_subv_h __builtin_msa_subv_h
+#define __msa_subv_w __builtin_msa_subv_w
+#define __msa_subv_d __builtin_msa_subv_d
+#define __msa_subvi_b __builtin_msa_subvi_b
+#define __msa_subvi_h __builtin_msa_subvi_h
+#define __msa_subvi_w __builtin_msa_subvi_w
+#define __msa_subvi_d __builtin_msa_subvi_d
+#define __msa_max_s_b __builtin_msa_max_s_b
+#define __msa_max_s_h __builtin_msa_max_s_h
+#define __msa_max_s_w __builtin_msa_max_s_w
+#define __msa_max_s_d __builtin_msa_max_s_d
+#define __msa_maxi_s_b __builtin_msa_maxi_s_b
+#define __msa_maxi_s_h __builtin_msa_maxi_s_h
+#define __msa_maxi_s_w __builtin_msa_maxi_s_w
+#define __msa_maxi_s_d __builtin_msa_maxi_s_d
+#define __msa_max_u_b __builtin_msa_max_u_b
+#define __msa_max_u_h __builtin_msa_max_u_h
+#define __msa_max_u_w __builtin_msa_max_u_w
+#define __msa_max_u_d __builtin_msa_max_u_d
+#define __msa_maxi_u_b __builtin_msa_maxi_u_b
+#define __msa_maxi_u_h __builtin_msa_maxi_u_h
+#define __msa_maxi_u_w __builtin_msa_maxi_u_w
+#define __msa_maxi_u_d __builtin_msa_maxi_u_d
+#define __msa_min_s_b __builtin_msa_min_s_b
+#define __msa_min_s_h __builtin_msa_min_s_h
+#define __msa_min_s_w __builtin_msa_min_s_w
+#define __msa_min_s_d __builtin_msa_min_s_d
+#define __msa_mini_s_b __builtin_msa_mini_s_b
+#define __msa_mini_s_h __builtin_msa_mini_s_h
+#define __msa_mini_s_w __builtin_msa_mini_s_w
+#define __msa_mini_s_d __builtin_msa_mini_s_d
+#define __msa_min_u_b __builtin_msa_min_u_b
+#define __msa_min_u_h __builtin_msa_min_u_h
+#define __msa_min_u_w __builtin_msa_min_u_w
+#define __msa_min_u_d __builtin_msa_min_u_d
+#define __msa_mini_u_b __builtin_msa_mini_u_b
+#define __msa_mini_u_h __builtin_msa_mini_u_h
+#define __msa_mini_u_w __builtin_msa_mini_u_w
+#define __msa_mini_u_d __builtin_msa_mini_u_d
+#define __msa_max_a_b __builtin_msa_max_a_b
+#define __msa_max_a_h __builtin_msa_max_a_h
+#define __msa_max_a_w __builtin_msa_max_a_w
+#define __msa_max_a_d __builtin_msa_max_a_d
+#define __msa_min_a_b __builtin_msa_min_a_b
+#define __msa_min_a_h __builtin_msa_min_a_h
+#define __msa_min_a_w __builtin_msa_min_a_w
+#define __msa_min_a_d __builtin_msa_min_a_d
+#define __msa_ceq_b __builtin_msa_ceq_b
+#define __msa_ceq_h __builtin_msa_ceq_h
+#define __msa_ceq_w __builtin_msa_ceq_w
+#define __msa_ceq_d __builtin_msa_ceq_d
+#define __msa_ceqi_b __builtin_msa_ceqi_b
+#define __msa_ceqi_h __builtin_msa_ceqi_h
+#define __msa_ceqi_w __builtin_msa_ceqi_w
+#define __msa_ceqi_d __builtin_msa_ceqi_d
+#define __msa_clt_s_b __builtin_msa_clt_s_b
+#define __msa_clt_s_h __builtin_msa_clt_s_h
+#define __msa_clt_s_w __builtin_msa_clt_s_w
+#define __msa_clt_s_d __builtin_msa_clt_s_d
+#define __msa_clti_s_b __builtin_msa_clti_s_b
+#define __msa_clti_s_h __builtin_msa_clti_s_h
+#define __msa_clti_s_w __builtin_msa_clti_s_w
+#define __msa_clti_s_d __builtin_msa_clti_s_d
+#define __msa_clt_u_b __builtin_msa_clt_u_b
+#define __msa_clt_u_h __builtin_msa_clt_u_h
+#define __msa_clt_u_w __builtin_msa_clt_u_w
+#define __msa_clt_u_d __builtin_msa_clt_u_d
+#define __msa_clti_u_b __builtin_msa_clti_u_b
+#define __msa_clti_u_h __builtin_msa_clti_u_h
+#define __msa_clti_u_w __builtin_msa_clti_u_w
+#define __msa_clti_u_d __builtin_msa_clti_u_d
+#define __msa_cle_s_b __builtin_msa_cle_s_b
+#define __msa_cle_s_h __builtin_msa_cle_s_h
+#define __msa_cle_s_w __builtin_msa_cle_s_w
+#define __msa_cle_s_d __builtin_msa_cle_s_d
+#define __msa_clei_s_b __builtin_msa_clei_s_b
+#define __msa_clei_s_h __builtin_msa_clei_s_h
+#define __msa_clei_s_w __builtin_msa_clei_s_w
+#define __msa_clei_s_d __builtin_msa_clei_s_d
+#define __msa_cle_u_b __builtin_msa_cle_u_b
+#define __msa_cle_u_h __builtin_msa_cle_u_h
+#define __msa_cle_u_w __builtin_msa_cle_u_w
+#define __msa_cle_u_d __builtin_msa_cle_u_d
+#define __msa_clei_u_b __builtin_msa_clei_u_b
+#define __msa_clei_u_h __builtin_msa_clei_u_h
+#define __msa_clei_u_w __builtin_msa_clei_u_w
+#define __msa_clei_u_d __builtin_msa_clei_u_d
+#define __msa_ld_b __builtin_msa_ld_b
+#define __msa_ld_h __builtin_msa_ld_h
+#define __msa_ld_w __builtin_msa_ld_w
+#define __msa_ld_d __builtin_msa_ld_d
+#define __msa_st_b __builtin_msa_st_b
+#define __msa_st_h __builtin_msa_st_h
+#define __msa_st_w __builtin_msa_st_w
+#define __msa_st_d __builtin_msa_st_d
+#define __msa_sat_s_b __builtin_msa_sat_s_b
+#define __msa_sat_s_h __builtin_msa_sat_s_h
+#define __msa_sat_s_w __builtin_msa_sat_s_w
+#define __msa_sat_s_d __builtin_msa_sat_s_d
+#define __msa_sat_u_b __builtin_msa_sat_u_b
+#define __msa_sat_u_h __builtin_msa_sat_u_h
+#define __msa_sat_u_w __builtin_msa_sat_u_w
+#define __msa_sat_u_d __builtin_msa_sat_u_d
+#define __msa_add_a_b __builtin_msa_add_a_b
+#define __msa_add_a_h __builtin_msa_add_a_h
+#define __msa_add_a_w __builtin_msa_add_a_w
+#define __msa_add_a_d __builtin_msa_add_a_d
+#define __msa_adds_a_b __builtin_msa_adds_a_b
+#define __msa_adds_a_h __builtin_msa_adds_a_h
+#define __msa_adds_a_w __builtin_msa_adds_a_w
+#define __msa_adds_a_d __builtin_msa_adds_a_d
+#define __msa_adds_s_b __builtin_msa_adds_s_b
+#define __msa_adds_s_h __builtin_msa_adds_s_h
+#define __msa_adds_s_w __builtin_msa_adds_s_w
+#define __msa_adds_s_d __builtin_msa_adds_s_d
+#define __msa_adds_u_b __builtin_msa_adds_u_b
+#define __msa_adds_u_h __builtin_msa_adds_u_h
+#define __msa_adds_u_w __builtin_msa_adds_u_w
+#define __msa_adds_u_d __builtin_msa_adds_u_d
+#define __msa_ave_s_b __builtin_msa_ave_s_b
+#define __msa_ave_s_h __builtin_msa_ave_s_h
+#define __msa_ave_s_w __builtin_msa_ave_s_w
+#define __msa_ave_s_d __builtin_msa_ave_s_d
+#define __msa_ave_u_b __builtin_msa_ave_u_b
+#define __msa_ave_u_h __builtin_msa_ave_u_h
+#define __msa_ave_u_w __builtin_msa_ave_u_w
+#define __msa_ave_u_d __builtin_msa_ave_u_d
+#define __msa_aver_s_b __builtin_msa_aver_s_b
+#define __msa_aver_s_h __builtin_msa_aver_s_h
+#define __msa_aver_s_w __builtin_msa_aver_s_w
+#define __msa_aver_s_d __builtin_msa_aver_s_d
+#define __msa_aver_u_b __builtin_msa_aver_u_b
+#define __msa_aver_u_h __builtin_msa_aver_u_h
+#define __msa_aver_u_w __builtin_msa_aver_u_w
+#define __msa_aver_u_d __builtin_msa_aver_u_d
+#define __msa_subs_s_b __builtin_msa_subs_s_b
+#define __msa_subs_s_h __builtin_msa_subs_s_h
+#define __msa_subs_s_w __builtin_msa_subs_s_w
+#define __msa_subs_s_d __builtin_msa_subs_s_d
+#define __msa_subs_u_b __builtin_msa_subs_u_b
+#define __msa_subs_u_h __builtin_msa_subs_u_h
+#define __msa_subs_u_w __builtin_msa_subs_u_w
+#define __msa_subs_u_d __builtin_msa_subs_u_d
+#define __msa_subsuu_s_b __builtin_msa_subsuu_s_b
+#define __msa_subsuu_s_h __builtin_msa_subsuu_s_h
+#define __msa_subsuu_s_w __builtin_msa_subsuu_s_w
+#define __msa_subsuu_s_d __builtin_msa_subsuu_s_d
+#define __msa_subsus_u_b __builtin_msa_subsus_u_b
+#define __msa_subsus_u_h __builtin_msa_subsus_u_h
+#define __msa_subsus_u_w __builtin_msa_subsus_u_w
+#define __msa_subsus_u_d __builtin_msa_subsus_u_d
+#define __msa_asub_s_b __builtin_msa_asub_s_b
+#define __msa_asub_s_h __builtin_msa_asub_s_h
+#define __msa_asub_s_w __builtin_msa_asub_s_w
+#define __msa_asub_s_d __builtin_msa_asub_s_d
+#define __msa_asub_u_b __builtin_msa_asub_u_b
+#define __msa_asub_u_h __builtin_msa_asub_u_h
+#define __msa_asub_u_w __builtin_msa_asub_u_w
+#define __msa_asub_u_d __builtin_msa_asub_u_d
+#define __msa_mulv_b __builtin_msa_mulv_b
+#define __msa_mulv_h __builtin_msa_mulv_h
+#define __msa_mulv_w __builtin_msa_mulv_w
+#define __msa_mulv_d __builtin_msa_mulv_d
+#define __msa_maddv_b __builtin_msa_maddv_b
+#define __msa_maddv_h __builtin_msa_maddv_h
+#define __msa_maddv_w __builtin_msa_maddv_w
+#define __msa_maddv_d __builtin_msa_maddv_d
+#define __msa_msubv_b __builtin_msa_msubv_b
+#define __msa_msubv_h __builtin_msa_msubv_h
+#define __msa_msubv_w __builtin_msa_msubv_w
+#define __msa_msubv_d __builtin_msa_msubv_d
+#define __msa_div_s_b __builtin_msa_div_s_b
+#define __msa_div_s_h __builtin_msa_div_s_h
+#define __msa_div_s_w __builtin_msa_div_s_w
+#define __msa_div_s_d __builtin_msa_div_s_d
+#define __msa_div_u_b __builtin_msa_div_u_b
+#define __msa_div_u_h __builtin_msa_div_u_h
+#define __msa_div_u_w __builtin_msa_div_u_w
+#define __msa_div_u_d __builtin_msa_div_u_d
+#define __msa_hadd_s_h __builtin_msa_hadd_s_h
+#define __msa_hadd_s_w __builtin_msa_hadd_s_w
+#define __msa_hadd_s_d __builtin_msa_hadd_s_d
+#define __msa_hadd_u_h __builtin_msa_hadd_u_h
+#define __msa_hadd_u_w __builtin_msa_hadd_u_w
+#define __msa_hadd_u_d __builtin_msa_hadd_u_d
+#define __msa_hsub_s_h __builtin_msa_hsub_s_h
+#define __msa_hsub_s_w __builtin_msa_hsub_s_w
+#define __msa_hsub_s_d __builtin_msa_hsub_s_d
+#define __msa_hsub_u_h __builtin_msa_hsub_u_h
+#define __msa_hsub_u_w __builtin_msa_hsub_u_w
+#define __msa_hsub_u_d __builtin_msa_hsub_u_d
+#define __msa_mod_s_b __builtin_msa_mod_s_b
+#define __msa_mod_s_h __builtin_msa_mod_s_h
+#define __msa_mod_s_w __builtin_msa_mod_s_w
+#define __msa_mod_s_d __builtin_msa_mod_s_d
+#define __msa_mod_u_b __builtin_msa_mod_u_b
+#define __msa_mod_u_h __builtin_msa_mod_u_h
+#define __msa_mod_u_w __builtin_msa_mod_u_w
+#define __msa_mod_u_d __builtin_msa_mod_u_d
+#define __msa_dotp_s_h __builtin_msa_dotp_s_h
+#define __msa_dotp_s_w __builtin_msa_dotp_s_w
+#define __msa_dotp_s_d __builtin_msa_dotp_s_d
+#define __msa_dotp_u_h __builtin_msa_dotp_u_h
+#define __msa_dotp_u_w __builtin_msa_dotp_u_w
+#define __msa_dotp_u_d __builtin_msa_dotp_u_d
+#define __msa_dpadd_s_h __builtin_msa_dpadd_s_h
+#define __msa_dpadd_s_w __builtin_msa_dpadd_s_w
+#define __msa_dpadd_s_d __builtin_msa_dpadd_s_d
+#define __msa_dpadd_u_h __builtin_msa_dpadd_u_h
+#define __msa_dpadd_u_w __builtin_msa_dpadd_u_w
+#define __msa_dpadd_u_d __builtin_msa_dpadd_u_d
+#define __msa_dpsub_s_h __builtin_msa_dpsub_s_h
+#define __msa_dpsub_s_w __builtin_msa_dpsub_s_w
+#define __msa_dpsub_s_d __builtin_msa_dpsub_s_d
+#define __msa_dpsub_u_h __builtin_msa_dpsub_u_h
+#define __msa_dpsub_u_w __builtin_msa_dpsub_u_w
+#define __msa_dpsub_u_d __builtin_msa_dpsub_u_d
+#define __msa_sld_b __builtin_msa_sld_b
+#define __msa_sld_h __builtin_msa_sld_h
+#define __msa_sld_w __builtin_msa_sld_w
+#define __msa_sld_d __builtin_msa_sld_d
+#define __msa_sldi_b __builtin_msa_sldi_b
+#define __msa_sldi_h __builtin_msa_sldi_h
+#define __msa_sldi_w __builtin_msa_sldi_w
+#define __msa_sldi_d __builtin_msa_sldi_d
+#define __msa_splat_b __builtin_msa_splat_b
+#define __msa_splat_h __builtin_msa_splat_h
+#define __msa_splat_w __builtin_msa_splat_w
+#define __msa_splat_d __builtin_msa_splat_d
+#define __msa_splati_b __builtin_msa_splati_b
+#define __msa_splati_h __builtin_msa_splati_h
+#define __msa_splati_w __builtin_msa_splati_w
+#define __msa_splati_d __builtin_msa_splati_d
+#define __msa_pckev_b __builtin_msa_pckev_b
+#define __msa_pckev_h __builtin_msa_pckev_h
+#define __msa_pckev_w __builtin_msa_pckev_w
+#define __msa_pckev_d __builtin_msa_pckev_d
+#define __msa_pckod_b __builtin_msa_pckod_b
+#define __msa_pckod_h __builtin_msa_pckod_h
+#define __msa_pckod_w __builtin_msa_pckod_w
+#define __msa_pckod_d __builtin_msa_pckod_d
+#define __msa_ilvl_b __builtin_msa_ilvl_b
+#define __msa_ilvl_h __builtin_msa_ilvl_h
+#define __msa_ilvl_w __builtin_msa_ilvl_w
+#define __msa_ilvl_d __builtin_msa_ilvl_d
+#define __msa_ilvr_b __builtin_msa_ilvr_b
+#define __msa_ilvr_h __builtin_msa_ilvr_h
+#define __msa_ilvr_w __builtin_msa_ilvr_w
+#define __msa_ilvr_d __builtin_msa_ilvr_d
+#define __msa_ilvev_b __builtin_msa_ilvev_b
+#define __msa_ilvev_h __builtin_msa_ilvev_h
+#define __msa_ilvev_w __builtin_msa_ilvev_w
+#define __msa_ilvev_d __builtin_msa_ilvev_d
+#define __msa_ilvod_b __builtin_msa_ilvod_b
+#define __msa_ilvod_h __builtin_msa_ilvod_h
+#define __msa_ilvod_w __builtin_msa_ilvod_w
+#define __msa_ilvod_d __builtin_msa_ilvod_d
+#define __msa_vshf_b __builtin_msa_vshf_b
+#define __msa_vshf_h __builtin_msa_vshf_h
+#define __msa_vshf_w __builtin_msa_vshf_w
+#define __msa_vshf_d __builtin_msa_vshf_d
+#define __msa_and_v __builtin_msa_and_v
+#define __msa_andi_b __builtin_msa_andi_b
+#define __msa_or_v __builtin_msa_or_v
+#define __msa_ori_b __builtin_msa_ori_b
+#define __msa_nor_v __builtin_msa_nor_v
+#define __msa_nori_b __builtin_msa_nori_b
+#define __msa_xor_v __builtin_msa_xor_v
+#define __msa_xori_b __builtin_msa_xori_b
+#define __msa_bmnz_v __builtin_msa_bmnz_v
+#define __msa_bmnzi_b __builtin_msa_bmnzi_b
+#define __msa_bmz_v __builtin_msa_bmz_v
+#define __msa_bmzi_b __builtin_msa_bmzi_b
+#define __msa_bsel_v __builtin_msa_bsel_v
+#define __msa_bseli_b __builtin_msa_bseli_b
+#define __msa_shf_b __builtin_msa_shf_b
+#define __msa_shf_h __builtin_msa_shf_h
+#define __msa_shf_w __builtin_msa_shf_w
+#define __msa_test_bnz_v __builtin_msa_bnz_v
+#define __msa_test_bz_v __builtin_msa_bz_v
+#define __msa_fill_b __builtin_msa_fill_b
+#define __msa_fill_h __builtin_msa_fill_h
+#define __msa_fill_w __builtin_msa_fill_w
+#define __msa_fill_d __builtin_msa_fill_d
+#define __msa_pcnt_b __builtin_msa_pcnt_b
+#define __msa_pcnt_h __builtin_msa_pcnt_h
+#define __msa_pcnt_w __builtin_msa_pcnt_w
+#define __msa_pcnt_d __builtin_msa_pcnt_d
+#define __msa_nloc_b __builtin_msa_nloc_b
+#define __msa_nloc_h __builtin_msa_nloc_h
+#define __msa_nloc_w __builtin_msa_nloc_w
+#define __msa_nloc_d __builtin_msa_nloc_d
+#define __msa_nlzc_b __builtin_msa_nlzc_b
+#define __msa_nlzc_h __builtin_msa_nlzc_h
+#define __msa_nlzc_w __builtin_msa_nlzc_w
+#define __msa_nlzc_d __builtin_msa_nlzc_d
+#define __msa_copy_s_b __builtin_msa_copy_s_b
+#define __msa_copy_s_h __builtin_msa_copy_s_h
+#define __msa_copy_s_w __builtin_msa_copy_s_w
+#define __msa_copy_s_d __builtin_msa_copy_s_d
+#define __msa_copy_u_b __builtin_msa_copy_u_b
+#define __msa_copy_u_h __builtin_msa_copy_u_h
+#define __msa_copy_u_w __builtin_msa_copy_u_w
+#define __msa_copy_u_d __builtin_msa_copy_u_d
+#define __msa_insert_b __builtin_msa_insert_b
+#define __msa_insert_h __builtin_msa_insert_h
+#define __msa_insert_w __builtin_msa_insert_w
+#define __msa_insert_d __builtin_msa_insert_d
+#define __msa_insve_b __builtin_msa_insve_b
+#define __msa_insve_h __builtin_msa_insve_h
+#define __msa_insve_w __builtin_msa_insve_w
+#define __msa_insve_d __builtin_msa_insve_d
+#define __msa_test_bnz_b __builtin_msa_bnz_b
+#define __msa_test_bnz_h __builtin_msa_bnz_h
+#define __msa_test_bnz_w __builtin_msa_bnz_w
+#define __msa_test_bnz_d __builtin_msa_bnz_d
+#define __msa_test_bz_b __builtin_msa_bz_b
+#define __msa_test_bz_h __builtin_msa_bz_h
+#define __msa_test_bz_w __builtin_msa_bz_w
+#define __msa_test_bz_d __builtin_msa_bz_d
+#define __msa_ldi_b __builtin_msa_ldi_b
+#define __msa_ldi_h __builtin_msa_ldi_h
+#define __msa_ldi_w __builtin_msa_ldi_w
+#define __msa_ldi_d __builtin_msa_ldi_d
+#define __msa_fcaf_w __builtin_msa_fcaf_w
+#define __msa_fcaf_d __builtin_msa_fcaf_d
+#define __msa_fcor_w __builtin_msa_fcor_w
+#define __msa_fcor_d __builtin_msa_fcor_d
+#define __msa_fcun_w __builtin_msa_fcun_w
+#define __msa_fcun_d __builtin_msa_fcun_d
+#define __msa_fcune_w __builtin_msa_fcune_w
+#define __msa_fcune_d __builtin_msa_fcune_d
+#define __msa_fcueq_w __builtin_msa_fcueq_w
+#define __msa_fcueq_d __builtin_msa_fcueq_d
+#define __msa_fceq_w __builtin_msa_fceq_w
+#define __msa_fceq_d __builtin_msa_fceq_d
+#define __msa_fcne_w __builtin_msa_fcne_w
+#define __msa_fcne_d __builtin_msa_fcne_d
+#define __msa_fclt_w __builtin_msa_fclt_w
+#define __msa_fclt_d __builtin_msa_fclt_d
+#define __msa_fcult_w __builtin_msa_fcult_w
+#define __msa_fcult_d __builtin_msa_fcult_d
+#define __msa_fcle_w __builtin_msa_fcle_w
+#define __msa_fcle_d __builtin_msa_fcle_d
+#define __msa_fcule_w __builtin_msa_fcule_w
+#define __msa_fcule_d __builtin_msa_fcule_d
+#define __msa_fsaf_w __builtin_msa_fsaf_w
+#define __msa_fsaf_d __builtin_msa_fsaf_d
+#define __msa_fsor_w __builtin_msa_fsor_w
+#define __msa_fsor_d __builtin_msa_fsor_d
+#define __msa_fsun_w __builtin_msa_fsun_w
+#define __msa_fsun_d __builtin_msa_fsun_d
+#define __msa_fsune_w __builtin_msa_fsune_w
+#define __msa_fsune_d __builtin_msa_fsune_d
+#define __msa_fsueq_w __builtin_msa_fsueq_w
+#define __msa_fsueq_d __builtin_msa_fsueq_d
+#define __msa_fseq_w __builtin_msa_fseq_w
+#define __msa_fseq_d __builtin_msa_fseq_d
+#define __msa_fsne_w __builtin_msa_fsne_w
+#define __msa_fsne_d __builtin_msa_fsne_d
+#define __msa_fslt_w __builtin_msa_fslt_w
+#define __msa_fslt_d __builtin_msa_fslt_d
+#define __msa_fsult_w __builtin_msa_fsult_w
+#define __msa_fsult_d __builtin_msa_fsult_d
+#define __msa_fsle_w __builtin_msa_fsle_w
+#define __msa_fsle_d __builtin_msa_fsle_d
+#define __msa_fsule_w __builtin_msa_fsule_w
+#define __msa_fsule_d __builtin_msa_fsule_d
+#define __msa_fadd_w __builtin_msa_fadd_w
+#define __msa_fadd_d __builtin_msa_fadd_d
+#define __msa_fsub_w __builtin_msa_fsub_w
+#define __msa_fsub_d __builtin_msa_fsub_d
+#define __msa_fmul_w __builtin_msa_fmul_w
+#define __msa_fmul_d __builtin_msa_fmul_d
+#define __msa_fdiv_w __builtin_msa_fdiv_w
+#define __msa_fdiv_d __builtin_msa_fdiv_d
+#define __msa_fmadd_w __builtin_msa_fmadd_w
+#define __msa_fmadd_d __builtin_msa_fmadd_d
+#define __msa_fmsub_w __builtin_msa_fmsub_w
+#define __msa_fmsub_d __builtin_msa_fmsub_d
+#define __msa_fexp2_w __builtin_msa_fexp2_w
+#define __msa_fexp2_d __builtin_msa_fexp2_d
+#define __msa_fexdo_h __builtin_msa_fexdo_h
+#define __msa_fexdo_w __builtin_msa_fexdo_w
+#define __msa_ftq_h __builtin_msa_ftq_h
+#define __msa_ftq_w __builtin_msa_ftq_w
+#define __msa_fmin_w __builtin_msa_fmin_w
+#define __msa_fmin_d __builtin_msa_fmin_d
+#define __msa_fmin_a_w __builtin_msa_fmin_a_w
+#define __msa_fmin_a_d __builtin_msa_fmin_a_d
+#define __msa_fmax_w __builtin_msa_fmax_w
+#define __msa_fmax_d __builtin_msa_fmax_d
+#define __msa_fmax_a_w __builtin_msa_fmax_a_w
+#define __msa_fmax_a_d __builtin_msa_fmax_a_d
+#define __msa_mul_q_h __builtin_msa_mul_q_h
+#define __msa_mul_q_w __builtin_msa_mul_q_w
+#define __msa_mulr_q_h __builtin_msa_mulr_q_h
+#define __msa_mulr_q_w __builtin_msa_mulr_q_w
+#define __msa_madd_q_h __builtin_msa_madd_q_h
+#define __msa_madd_q_w __builtin_msa_madd_q_w
+#define __msa_maddr_q_h __builtin_msa_maddr_q_h
+#define __msa_maddr_q_w __builtin_msa_maddr_q_w
+#define __msa_msub_q_h __builtin_msa_msub_q_h
+#define __msa_msub_q_w __builtin_msa_msub_q_w
+#define __msa_msubr_q_h __builtin_msa_msubr_q_h
+#define __msa_msubr_q_w __builtin_msa_msubr_q_w
+#define __msa_fclass_w __builtin_msa_fclass_w
+#define __msa_fclass_d __builtin_msa_fclass_d
+#define __msa_fsqrt_w __builtin_msa_fsqrt_w
+#define __msa_fsqrt_d __builtin_msa_fsqrt_d
+#define __msa_frcp_w __builtin_msa_frcp_w
+#define __msa_frcp_d __builtin_msa_frcp_d
+#define __msa_frint_w __builtin_msa_frint_w
+#define __msa_frint_d __builtin_msa_frint_d
+#define __msa_frsqrt_w __builtin_msa_frsqrt_w
+#define __msa_frsqrt_d __builtin_msa_frsqrt_d
+#define __msa_flog2_w __builtin_msa_flog2_w
+#define __msa_flog2_d __builtin_msa_flog2_d
+#define __msa_fexupl_w __builtin_msa_fexupl_w
+#define __msa_fexupl_d __builtin_msa_fexupl_d
+#define __msa_fexupr_w __builtin_msa_fexupr_w
+#define __msa_fexupr_d __builtin_msa_fexupr_d
+#define __msa_ffql_w __builtin_msa_ffql_w
+#define __msa_ffql_d __builtin_msa_ffql_d
+#define __msa_ffqr_w __builtin_msa_ffqr_w
+#define __msa_ffqr_d __builtin_msa_ffqr_d
+#define __msa_ftint_s_w __builtin_msa_ftint_s_w
+#define __msa_ftint_s_d __builtin_msa_ftint_s_d
+#define __msa_ftint_u_w __builtin_msa_ftint_u_w
+#define __msa_ftint_u_d __builtin_msa_ftint_u_d
+#define __msa_ftrunc_s_w __builtin_msa_ftrunc_s_w
+#define __msa_ftrunc_s_d __builtin_msa_ftrunc_s_d
+#define __msa_ftrunc_u_w __builtin_msa_ftrunc_u_w
+#define __msa_ftrunc_u_d __builtin_msa_ftrunc_u_d
+#define __msa_ffint_s_w __builtin_msa_ffint_s_w
+#define __msa_ffint_s_d __builtin_msa_ffint_s_d
+#define __msa_ffint_u_w __builtin_msa_ffint_u_w
+#define __msa_ffint_u_d __builtin_msa_ffint_u_d
+#define __msa_cfcmsa __builtin_msa_cfcmsa
+#define __msa_move_v __builtin_msa_move_v
+#endif /* defined(__mips_msa) */
+#endif /* _MSA_H */
diff --git a/gcc/config/mips/mti-elf.h b/gcc/config/mips/mti-elf.h
index e804f6ab645..c4ae24bac36 100644
--- a/gcc/config/mips/mti-elf.h
+++ b/gcc/config/mips/mti-elf.h
@@ -39,8 +39,8 @@ along with GCC; see the file COPYING3.  If not see
 									\
   /* If no FP ABI option is specified, infer one from the		\
      ABI/ISA level.  */							\
-  "%{!msoft-float: %{!msingle-float: %{!mfp*: %{mabi=32: %{"		\
-  MIPS_FPXX_OPTION_SPEC ": -mfpxx}}}}}",				\
+  "%{!msoft-float: %{!msingle-float: %{!mfp*: %{!mmsa: %{mabi=32: %{"	\
+  MIPS_FPXX_OPTION_SPEC ": -mfpxx}}}}}}",				\
 									\
   /* Make sure that an endian option is always present.  This makes	\
      things like LINK_SPEC easier to write.  */				\
diff --git a/gcc/config/mips/mti-linux.h b/gcc/config/mips/mti-linux.h
index d84ad1842b2..76b0f34059c 100644
--- a/gcc/config/mips/mti-linux.h
+++ b/gcc/config/mips/mti-linux.h
@@ -61,9 +61,9 @@ along with GCC; see the file COPYING3.  If not see
   "%{!mabi=*: %{" MIPS_32BIT_OPTION_SPEC ": -mabi=32;: -mabi=n32}}",	\
 									\
   /* If no FP ABI option is specified, infer one from the		\
-     ABI/ISA level.  */							\
-  "%{!msoft-float: %{!msingle-float: %{!mfp*: %{mabi=32: %{"		\
-  MIPS_FPXX_OPTION_SPEC ": -mfpxx}}}}}",				\
+     ABI/ISA level unless there is a conflicting option.  */		\
+  "%{!msoft-float: %{!msingle-float: %{!mfp*: %{!mmsa: %{mabi=32: %{"	\
+  MIPS_FPXX_OPTION_SPEC ": -mfpxx}}}}}}",				\
 									\
   /* Base SPECs.  */							\
   BASE_DRIVER_SELF_SPECS						\
diff --git a/gcc/config/mips/p5600.md b/gcc/config/mips/p5600.md
index 35e8749e8da..694a745a926 100644
--- a/gcc/config/mips/p5600.md
+++ b/gcc/config/mips/p5600.md
@@ -31,10 +31,15 @@
 (define_cpu_unit "p5600_fpu_short, p5600_fpu_long" "p5600_fpu_pipe")
 
 ;; Short FPU pipeline
-(define_cpu_unit "p5600_fpu_store" "p5600_fpu_pipe")
+(define_cpu_unit "p5600_fpu_intadd, p5600_fpu_cmp, p5600_fpu_float,
+		  p5600_fpu_logic_a, p5600_fpu_logic_b, p5600_fpu_div,
+		  p5600_fpu_store" "p5600_fpu_pipe")
 
 ;; Long FPU pipeline
-(define_cpu_unit "p5600_fpu_apu" "p5600_fpu_pipe")
+(define_cpu_unit "p5600_fpu_logic, p5600_fpu_float_a, p5600_fpu_float_b,
+		  p5600_fpu_float_c, p5600_fpu_float_d" "p5600_fpu_pipe")
+(define_cpu_unit "p5600_fpu_mult, p5600_fpu_fdiv, p5600_fpu_load,
+		  p5600_fpu_apu" "p5600_fpu_pipe")
 
 (define_reservation "p5600_agq_al2" "p5600_agq, p5600_al2")
 (define_reservation "p5600_agq_ctistd" "p5600_agq, p5600_ctistd")
@@ -42,6 +47,116 @@
 (define_reservation "p5600_alq_alu" "p5600_alq, p5600_alu")
 
 ;;
+;; FPU-MSA pipe
+;;
+
+;; Arithmetic
+;; add, hadd, sub, hsub, average, min, max, compare
+(define_insn_reservation "msa_short_int_add" 2
+  (and (eq_attr "cpu" "p5600")
+       (eq_attr "type" "simd_int_arith"))
+  "p5600_fpu_short, p5600_fpu_intadd")
+
+;; Bitwise Instructions
+;; and, or, xor, bit-clear, leading-bits-count, shift, shuffle
+(define_insn_reservation "msa_short_logic" 2
+  (and (eq_attr "cpu" "p5600")
+       (eq_attr "type" "simd_shift,simd_bit,simd_splat,simd_fill,simd_shf,
+			simd_permute,simd_logic"))
+  "p5600_fpu_short, p5600_fpu_logic_a")
+
+;; move.v
+(define_insn_reservation "msa_short_logic_move_v" 2
+  (and (eq_attr "cpu" "p5600")
+       (eq_attr "type" "simd_move"))
+  "p5600_fpu_short, p5600_fpu_logic_a")
+
+;; Float compare
+(define_insn_reservation "msa_short_cmp" 2
+  (and (eq_attr "cpu" "p5600")
+       (eq_attr "type" "simd_fcmp"))
+  "p5600_fpu_short, p5600_fpu_cmp")
+
+;; Float exp2, min, max
+(define_insn_reservation "msa_short_float2" 2
+  (and (eq_attr "cpu" "p5600")
+       (eq_attr "type" "simd_fexp2,simd_fminmax"))
+  "p5600_fpu_short, p5600_fpu_float")
+
+;; Vector sat
+(define_insn_reservation "msa_short_logic3" 3
+  (and (eq_attr "cpu" "p5600")
+       (eq_attr "type" "simd_sat,simd_pcnt"))
+  "p5600_fpu_short, p5600_fpu_logic_a, p5600_fpu_logic_b")
+
+;; Vector copy, bz, bnz
+(define_insn_reservation "msa_short_store4" 4
+  (and (eq_attr "cpu" "p5600")
+       (eq_attr "type" "simd_copy,simd_branch,simd_cmsa"))
+  "p5600_fpu_short, p5600_fpu_store")
+
+;; Vector load
+(define_insn_reservation "msa_long_load" 10
+  (and (eq_attr "cpu" "p5600")
+       (eq_attr "type" "simd_load"))
+  "p5600_fpu_long, p5600_fpu_load")
+
+;; Vector store
+(define_insn_reservation "msa_short_store" 2
+  (and (eq_attr "cpu" "p5600")
+       (eq_attr "type" "simd_store"))
+  "p5600_fpu_short, p5600_fpu_store")
+
+;; binsl, binsr, insert, vshf, sld
+(define_insn_reservation "msa_long_logic" 2
+  (and (eq_attr "cpu" "p5600")
+       (eq_attr "type" "simd_bitins,simd_bitmov,simd_insert,simd_sld"))
+  "p5600_fpu_long, p5600_fpu_logic")
+
+;; Float fclass, flog2
+(define_insn_reservation "msa_long_float2" 2
+  (and (eq_attr "cpu" "p5600")
+       (eq_attr "type" "simd_fclass,simd_flog2"))
+  "p5600_fpu_long, p5600_fpu_float_a")
+
+;; fadd, fsub
+(define_insn_reservation "msa_long_float4" 4
+  (and (eq_attr "cpu" "p5600")
+       (eq_attr "type" "simd_fadd,simd_fcvt"))
+  "p5600_fpu_long, p5600_fpu_float_a, p5600_fpu_float_b")
+
+;; fmul
+(define_insn_reservation "msa_long_float5" 5
+  (and (eq_attr "cpu" "p5600")
+       (eq_attr "type" "simd_fmul"))
+  "p5600_fpu_long, p5600_fpu_float_a, p5600_fpu_float_b, p5600_fpu_float_c")
+
+;; fmadd, fmsub
+(define_insn_reservation "msa_long_float8" 8
+  (and (eq_attr "cpu" "p5600")
+       (eq_attr "type" "simd_fmadd"))
+  "p5600_fpu_long, p5600_fpu_float_a,
+   p5600_fpu_float_b, p5600_fpu_float_c, p5600_fpu_float_d")
+
+;; Vector mul, dotp, madd, msub
+(define_insn_reservation "msa_long_mult" 5
+  (and (eq_attr "cpu" "p5600")
+       (eq_attr "type" "simd_mul"))
+  "p5600_fpu_long, p5600_fpu_mult")
+
+;; fdiv, fmod (semi-pipelined)
+(define_insn_reservation "msa_long_fdiv" 10
+  (and (eq_attr "cpu" "p5600")
+       (eq_attr "type" "simd_fdiv"))
+  "p5600_fpu_long, nothing, nothing, p5600_fpu_fdiv*8")
+
+;; div, mod (non-pipelined)
+(define_insn_reservation "msa_long_div" 10
+  (and (eq_attr "cpu" "p5600")
+       (eq_attr "type" "simd_div"))
+  "p5600_fpu_long, p5600_fpu_div*9, p5600_fpu_div + p5600_fpu_logic_a")
+
+;;
 ;; FPU pipe
 ;;
 
diff --git a/gcc/config/mips/predicates.md b/gcc/config/mips/predicates.md
index cbeace9d640..e6b6d2f60da 100644
--- a/gcc/config/mips/predicates.md
+++ b/gcc/config/mips/predicates.md
@@ -35,12 +35,36 @@
 
 (define_predicate "const_immlsa_operand"
   (and (match_code "const_int")
-         (match_test "IN_RANGE (INTVAL (op), 1, 4)")))
+       (match_test "IN_RANGE (INTVAL (op), 1, 4)")))
+
+(define_predicate "const_msa_branch_operand"
+  (and (match_code "const_int")
+       (match_test "IN_RANGE (INTVAL (op), -1024, 1023)")))
+
+(define_predicate "const_uimm3_operand"
+  (and (match_code "const_int")
+       (match_test "IN_RANGE (INTVAL (op), 0, 7)")))
+
+(define_predicate "const_uimm4_operand"
+  (and (match_code "const_int")
+       (match_test "IN_RANGE (INTVAL (op), 0, 15)")))
+
+(define_predicate "const_uimm5_operand"
+  (and (match_code "const_int")
+       (match_test "IN_RANGE (INTVAL (op), 0, 31)")))
 
 (define_predicate "const_uimm6_operand"
   (and (match_code "const_int")
        (match_test "UIMM6_OPERAND (INTVAL (op))")))
 
+(define_predicate "const_uimm8_operand"
+  (and (match_code "const_int")
+       (match_test "IN_RANGE (INTVAL (op), 0, 255)")))
+
+(define_predicate "const_imm5_operand"
+  (and (match_code "const_int")
+       (match_test "IN_RANGE (INTVAL (op), -16, 15)")))
+
 (define_predicate "const_imm10_operand"
   (and (match_code "const_int")
        (match_test "IMM10_OPERAND (INTVAL (op))")))
@@ -49,6 +73,22 @@
   (ior (match_operand 0 "const_imm10_operand")
        (match_operand 0 "register_operand")))
 
+(define_predicate "aq10b_operand"
+  (and (match_code "const_int")
+       (match_test "mips_signed_immediate_p (INTVAL (op), 10, 0)")))
+
+(define_predicate "aq10h_operand"
+  (and (match_code "const_int")
+       (match_test "mips_signed_immediate_p (INTVAL (op), 10, 1)")))
+
+(define_predicate "aq10w_operand"
+  (and (match_code "const_int")
+       (match_test "mips_signed_immediate_p (INTVAL (op), 10, 2)")))
+
+(define_predicate "aq10d_operand"
+  (and (match_code "const_int")
+       (match_test "mips_signed_immediate_p (INTVAL (op), 10, 3)")))
+
 (define_predicate "sle_operand"
   (and (match_code "const_int")
        (match_test "SMALL_OPERAND (INTVAL (op) + 1)")))
@@ -61,6 +101,14 @@
   (and (match_code "const_int,const_double,const_vector")
        (match_test "op == CONST0_RTX (GET_MODE (op))")))
 
+(define_predicate "const_m1_operand"
+  (and (match_code "const_int,const_double,const_vector")
+       (match_test "op == CONSTM1_RTX (GET_MODE (op))")))
+
+(define_predicate "reg_or_m1_operand"
+  (ior (match_operand 0 "const_m1_operand")
+       (match_operand 0 "register_operand")))
+
 (define_predicate "reg_or_0_operand"
   (ior (and (match_operand 0 "const_0_operand")
 	    (not (match_test "TARGET_MIPS16")))
@@ -74,6 +122,23 @@
   (ior (match_operand 0 "const_1_operand")
        (match_operand 0 "register_operand")))
 
+;; These are used in vec_merge, hence accept bitmask as const_int.
+(define_predicate "const_exp_2_operand"
+  (and (match_code "const_int")
+       (match_test "IN_RANGE (exact_log2 (INTVAL (op)), 0, 1)")))
+
+(define_predicate "const_exp_4_operand"
+  (and (match_code "const_int")
+       (match_test "IN_RANGE (exact_log2 (INTVAL (op)), 0, 3)")))
+
+(define_predicate "const_exp_8_operand"
+  (and (match_code "const_int")
+       (match_test "IN_RANGE (exact_log2 (INTVAL (op)), 0, 7)")))
+
+(define_predicate "const_exp_16_operand"
+  (and (match_code "const_int")
+       (match_test "IN_RANGE (exact_log2 (INTVAL (op)), 0, 15)")))
+
 ;; This is used for indexing into vectors, and hence only accepts const_int.
 (define_predicate "const_0_or_1_operand"
   (and (match_code "const_int")
@@ -507,3 +572,65 @@
 (define_predicate "non_volatile_mem_operand"
   (and (match_operand 0 "memory_operand")
        (not (match_test "MEM_VOLATILE_P (op)"))))
+
+(define_predicate "const_vector_same_val_operand"
+  (match_code "const_vector")
+{
+  return mips_const_vector_same_val_p (op, mode);
+})
+
+(define_predicate "const_vector_same_simm5_operand"
+  (match_code "const_vector")
+{
+  return mips_const_vector_same_int_p (op, mode, -16, 15);
+})
+
+(define_predicate "const_vector_same_uimm5_operand"
+  (match_code "const_vector")
+{
+  return mips_const_vector_same_int_p (op, mode, 0, 31);
+})
+
+(define_predicate "const_vector_same_ximm5_operand"
+  (match_code "const_vector")
+{
+  return mips_const_vector_same_int_p (op, mode, -31, 31);
+})
+
+(define_predicate "const_vector_same_uimm6_operand"
+  (match_code "const_vector")
+{
+  return mips_const_vector_same_int_p (op, mode, 0, 63);
+})
+
+(define_predicate "const_vector_same_uimm8_operand"
+  (match_code "const_vector")
+{
+  return mips_const_vector_same_int_p (op, mode, 0, 255);
+})
+
+(define_predicate "par_const_vector_shf_set_operand"
+  (match_code "parallel")
+{
+  return mips_const_vector_shuffle_set_p (op, mode);
+})
+
+(define_predicate "reg_or_vector_same_val_operand"
+  (ior (match_operand 0 "register_operand")
+       (match_operand 0 "const_vector_same_val_operand")))
+
+(define_predicate "reg_or_vector_same_simm5_operand"
+  (ior (match_operand 0 "register_operand")
+       (match_operand 0 "const_vector_same_simm5_operand")))
+
+(define_predicate "reg_or_vector_same_uimm5_operand"
+  (ior (match_operand 0 "register_operand")
+       (match_operand 0 "const_vector_same_uimm5_operand")))
+
+(define_predicate "reg_or_vector_same_ximm5_operand"
+  (ior (match_operand 0 "register_operand")
+       (match_operand 0 "const_vector_same_ximm5_operand")))
+
+(define_predicate "reg_or_vector_same_uimm6_operand"
+  (ior (match_operand 0 "register_operand")
+       (match_operand 0 "const_vector_same_uimm6_operand")))
diff --git a/gcc/config/rl78/rl78.c b/gcc/config/rl78/rl78.c
index 13c22539268..2d850610ea6 100644
--- a/gcc/config/rl78/rl78.c
+++ b/gcc/config/rl78/rl78.c
@@ -76,6 +76,23 @@ static const char * const word_regnames[] =
   "sp", "ap", "psw", "es", "cs"
 };
 
+/* Structure for G13 MDUC registers.  */
+struct mduc_reg_type
+{
+  unsigned int       address;
+  enum machine_mode  mode;
+};
+
+struct mduc_reg_type  mduc_regs[] =
+{
+  {0xf00e8, QImode},
+  {0xffff0, HImode},
+  {0xffff2, HImode},
+  {0xf2224, HImode},
+  {0xf00e0, HImode},
+  {0xf00e2, HImode}
+};
+
 struct GTY(()) machine_function
 {
   /* If set, the rest of the fields have been computed.  */
@@ -317,6 +334,10 @@ rl78_output_symbol_ref (FILE * file, rtx sym)
 #undef  TARGET_OPTION_OVERRIDE
 #define TARGET_OPTION_OVERRIDE		rl78_option_override
 
+#define MUST_SAVE_MDUC_REGISTERS			\
+  (TARGET_SAVE_MDUC_REGISTERS				\
+   && (is_interrupt_func (NULL_TREE)) && RL78_MUL_G13)
+
 static void
 rl78_option_override (void)
 {
@@ -344,6 +365,9 @@ rl78_option_override (void)
     /* Address spaces are currently only supported by C.  */
     error ("-mes0 can only be used with C");
 
+  if (TARGET_SAVE_MDUC_REGISTERS && !(TARGET_G13 || RL78_MUL_G13))
+    warning (0, "mduc registers only saved for G13 target");
+
   switch (rl78_cpu_type)
     {
     case CPU_UNINIT:
@@ -1257,13 +1281,34 @@ rl78_initial_elimination_offset (int from, int to)
   return rv;
 }
 
-static int
+static bool
 rl78_is_naked_func (void)
 {
   return (lookup_attribute ("naked", DECL_ATTRIBUTES (current_function_decl)) != NULL_TREE);
 }
 
+/* Check if the block uses mul/div insns for G13 target.  */
+
+static bool
+check_mduc_usage (void)
+{
+  rtx_insn * insn;
+  basic_block bb;
+
+  FOR_EACH_BB_FN (bb, cfun)
+    {
+      FOR_BB_INSNS (bb, insn)
+        {
+          if (INSN_P (insn)
+              && (get_attr_is_g13_muldiv_insn (insn) == IS_G13_MULDIV_INSN_YES))
+	    return true;
+	}
+    }
+  return false;
+}
+
 /* Expand the function prologue (from the prologue pattern).  */
+
 void
 rl78_expand_prologue (void)
 {
@@ -1278,6 +1323,9 @@ rl78_expand_prologue (void)
   /* Always re-compute the frame info - the register usage may have changed.  */
   rl78_compute_frame_info ();
 
+  if (MUST_SAVE_MDUC_REGISTERS && (!crtl->is_leaf || check_mduc_usage ()))
+    cfun->machine->framesize += ARRAY_SIZE (mduc_regs) * 2;
+
   if (flag_stack_usage_info)
     current_function_static_stack_size = cfun->machine->framesize;
 
@@ -1327,6 +1375,24 @@ rl78_expand_prologue (void)
       F (emit_insn (gen_push (ax)));
     }
 
+  /* Save MDUC registers inside interrupt routine.  */
+  if (MUST_SAVE_MDUC_REGISTERS && (!crtl->is_leaf || check_mduc_usage ()))
+    {
+      for (int i = 0; i < ARRAY_SIZE (mduc_regs); i++)
+        {
+          mduc_reg_type *reg = mduc_regs + i;
+          rtx mem_mduc = gen_rtx_MEM (reg->mode, GEN_INT (reg->address));
+
+          MEM_VOLATILE_P (mem_mduc) = 1;
+          if (reg->mode == QImode)
+            emit_insn (gen_movqi (gen_rtx_REG (QImode, A_REG), mem_mduc));
+          else
+            emit_insn (gen_movhi (gen_rtx_REG (HImode, AX_REG), mem_mduc));
+
+          emit_insn (gen_push (gen_rtx_REG (HImode, AX_REG)));
+        }
+    }
+
   if (frame_pointer_needed)
     {
       F (emit_move_insn (ax, sp));
@@ -1400,6 +1466,23 @@ rl78_expand_epilogue (void)
 	}
     }
 
+  /* Restore MDUC registers from interrupt routine.  */
+  if (MUST_SAVE_MDUC_REGISTERS && (!crtl->is_leaf || check_mduc_usage ()))
+    {
+      for (int i = ARRAY_SIZE (mduc_regs) - 1; i >= 0; i--)
+        {
+          mduc_reg_type *reg = mduc_regs + i;
+          rtx mem_mduc = gen_rtx_MEM (reg->mode, GEN_INT (reg->address));
+
+          emit_insn (gen_pop (gen_rtx_REG (HImode, AX_REG)));
+          MEM_VOLATILE_P (mem_mduc) = 1;
+          if (reg->mode == QImode)
+            emit_insn (gen_movqi (mem_mduc, gen_rtx_REG (QImode, A_REG)));
+          else
+            emit_insn (gen_movhi (mem_mduc, gen_rtx_REG (HImode, AX_REG)));
+        }
+    }
+
   if (is_interrupt_func (cfun->decl) && cfun->machine->uses_es)
     {
       emit_insn (gen_pop (gen_rtx_REG (HImode, AX_REG)));
@@ -1495,6 +1578,9 @@ rl78_start_function (FILE *file, HOST_WIDE_INT hwi_local ATTRIBUTE_UNUSED)
 
   if (cfun->machine->uses_es)
     fprintf (file, "\t; uses ES register\n");
+
+  if (MUST_SAVE_MDUC_REGISTERS)
+    fprintf (file, "\t; preserves MDUC registers\n");
 }
 
 /* Return an RTL describing where a function return value of type RET_TYPE
diff --git a/gcc/config/rl78/rl78.md b/gcc/config/rl78/rl78.md
index 739f6057b92..33bd1988537 100644
--- a/gcc/config/rl78/rl78.md
+++ b/gcc/config/rl78/rl78.md
@@ -67,6 +67,7 @@
 (include "rl78-virt.md")
 (include "rl78-real.md")
 
+(define_attr "is_g13_muldiv_insn" "yes,no" (const_string "no"))
 
 ;; Function Prologue/Epilogue Instructions
 
@@ -379,7 +380,8 @@
 	movw    ax, 0xffff6     ; MDBL
 	movw    %h0, ax
         ; end of mulhi macro"
-  [(set_attr "valloc" "macax")]
+  [(set_attr "valloc" "macax")
+   (set_attr "is_g13_muldiv_insn" "yes")]
 )
 
 ;; 0xFFFF0 is MACR(L).  0xFFFF2 is MACR(H) but we don't care about it
@@ -459,7 +461,8 @@
 	movw	ax, !0xf00e0	; MDCL
 	movw	%H0, ax
 	; end of mulsi macro"
-  [(set_attr "valloc" "macax")]
+  [(set_attr "valloc" "macax")
+   (set_attr "is_g13_muldiv_insn" "yes")]
 )
 
 (define_expand "udivmodhi4"
@@ -692,5 +695,6 @@
 	movw	%H3, ax		\n\
 	; end of udivmodsi macro";
       }
-  [(set_attr "valloc" "macax")]
+  [(set_attr "valloc" "macax")
+   (set_attr "is_g13_muldiv_insn" "yes")]
 )
diff --git a/gcc/config/rl78/rl78.opt b/gcc/config/rl78/rl78.opt
index a8e53ee0735..26db67cb658 100644
--- a/gcc/config/rl78/rl78.opt
+++ b/gcc/config/rl78/rl78.opt
@@ -91,3 +91,7 @@ Alias for -mcpu=g14.
 mes0
 Target Mask(ES0)
 Assume ES is zero throughout program execution, use ES: for read-only data.
+
+msave-mduc-in-interrupts
+Target Mask(SAVE_MDUC_REGISTERS)
+Stores the MDUC registers in interrupt handlers for G13 target.
diff --git a/gcc/config/rs6000/freebsd64.h b/gcc/config/rs6000/freebsd64.h
index 899b858d821..3038c43b25f 100644
--- a/gcc/config/rs6000/freebsd64.h
+++ b/gcc/config/rs6000/freebsd64.h
@@ -349,7 +349,7 @@ extern int dot_symbols;
    true if the symbol may be affected by dynamic relocations.  */
 #undef	ASM_PREFERRED_EH_DATA_FORMAT
 #define	ASM_PREFERRED_EH_DATA_FORMAT(CODE, GLOBAL) \
-  ((TARGET_64BIT || flag_pic || TARGET_RELOCATABLE)			\
+  (TARGET_64BIT || flag_pic						\
    ? (((GLOBAL) ? DW_EH_PE_indirect : 0) | DW_EH_PE_pcrel		\
       | (TARGET_64BIT ? DW_EH_PE_udata8 : DW_EH_PE_sdata4))		\
    : DW_EH_PE_absptr)
@@ -384,7 +384,7 @@ extern int dot_symbols;
 #define MINIMAL_TOC_SECTION_ASM_OP \
   (TARGET_64BIT                                         \
    ? "\t.section\t\".toc1\",\"aw\""                     \
-   : ((TARGET_RELOCATABLE || flag_pic)                  \
+   : (flag_pic						\
       ? "\t.section\t\".got2\",\"aw\""                  \
       : "\t.section\t\".got1\",\"aw\""))
 
@@ -422,7 +422,6 @@ extern int dot_symbols;
                         && ! TARGET_NO_FP_IN_TOC)))                     \
                || (!TARGET_64BIT                                        \
                    && !TARGET_NO_FP_IN_TOC                              \
-                   && !TARGET_RELOCATABLE                               \
                    && SCALAR_FLOAT_MODE_P (GET_MODE (X))                \
                    && BITS_PER_WORD == HOST_BITS_PER_INT)))))
 
diff --git a/gcc/config/rs6000/linux64.h b/gcc/config/rs6000/linux64.h
index fefa0c4eef3..e86b5d52ad6 100644
--- a/gcc/config/rs6000/linux64.h
+++ b/gcc/config/rs6000/linux64.h
@@ -489,7 +489,7 @@ extern int dot_symbols;
 #define MINIMAL_TOC_SECTION_ASM_OP \
   (TARGET_64BIT						\
    ? "\t.section\t\".toc1\",\"aw\""			\
-   : ((TARGET_RELOCATABLE || flag_pic)			\
+   : (flag_pic						\
       ? "\t.section\t\".got2\",\"aw\""			\
       : "\t.section\t\".got1\",\"aw\""))
 
@@ -585,7 +585,6 @@ extern int dot_symbols;
 			&& ! TARGET_NO_FP_IN_TOC)))			\
 	       || (!TARGET_64BIT					\
 		   && !TARGET_NO_FP_IN_TOC				\
-		   && !TARGET_RELOCATABLE				\
 		   && SCALAR_FLOAT_MODE_P (GET_MODE (X))		\
 		   && BITS_PER_WORD == HOST_BITS_PER_INT)))))
 
@@ -594,7 +593,7 @@ extern int dot_symbols;
    true if the symbol may be affected by dynamic relocations.  */
 #undef	ASM_PREFERRED_EH_DATA_FORMAT
 #define	ASM_PREFERRED_EH_DATA_FORMAT(CODE, GLOBAL) \
-  ((TARGET_64BIT || flag_pic || TARGET_RELOCATABLE)			\
+  (TARGET_64BIT || flag_pic						\
    ? (((GLOBAL) ? DW_EH_PE_indirect : 0) | DW_EH_PE_pcrel		\
       | (TARGET_64BIT ? DW_EH_PE_udata8 : DW_EH_PE_sdata4))		\
    : DW_EH_PE_absptr)
diff --git a/gcc/config/rs6000/predicates.md b/gcc/config/rs6000/predicates.md
index 71fac765e24..3b40e3ad953 100644
--- a/gcc/config/rs6000/predicates.md
+++ b/gcc/config/rs6000/predicates.md
@@ -539,13 +539,6 @@
   if (flag_pic && DEFAULT_ABI == ABI_V4)
     return 0;
 
-#ifdef TARGET_RELOCATABLE
-  /* Similarly if we are using -mrelocatable, consider all constants
-     to be hard.  */
-  if (TARGET_RELOCATABLE)
-    return 0;
-#endif
-
   /* If we have real FPRs, consider floating point constants hard (other than
      0.0 under VSX), so that the constant gets pushed to memory during the
      early RTL phases.  This has the advantage that double precision constants
diff --git a/gcc/config/rs6000/rs6000.c b/gcc/config/rs6000/rs6000.c
index 61d39242b39..34495f3c99d 100644
--- a/gcc/config/rs6000/rs6000.c
+++ b/gcc/config/rs6000/rs6000.c
@@ -1755,6 +1755,9 @@ static const struct attribute_spec rs6000_attribute_table[] =
 #undef TARGET_CONDITIONAL_REGISTER_USAGE
 #define TARGET_CONDITIONAL_REGISTER_USAGE rs6000_conditional_register_usage
 
+#undef TARGET_SCHED_REASSOCIATION_WIDTH
+#define TARGET_SCHED_REASSOCIATION_WIDTH rs6000_reassociation_width
+
 #undef TARGET_TRAMPOLINE_INIT
 #define TARGET_TRAMPOLINE_INIT rs6000_trampoline_init
 
@@ -8633,6 +8636,40 @@ rs6000_offsettable_memref_p (rtx op, machine_mode reg_mode)
 					     true, worst_case);
 }
 
+/* Determine the reassociation width to be used in reassociate_bb.
+   This takes into account how many parallel operations we
+   can actually do of a given type, and also the latency.
+   P8:
+     int add/sub 6/cycle     
+         mul 2/cycle
+     vect add/sub/mul 2/cycle
+     fp   add/sub/mul 2/cycle
+     dfp  1/cycle
+*/
+ 
+static int
+rs6000_reassociation_width (unsigned int opc ATTRIBUTE_UNUSED,
+                            enum machine_mode mode)
+{
+  switch (rs6000_cpu)
+    {
+    case PROCESSOR_POWER8:
+    case PROCESSOR_POWER9:
+      if (DECIMAL_FLOAT_MODE_P (mode))
+	return 1;
+      if (VECTOR_MODE_P (mode))
+	return 4;
+      if (INTEGRAL_MODE_P (mode)) 
+	return opc == MULT_EXPR ? 4 : 6;
+      if (FLOAT_MODE_P (mode))
+	return 4;
+      break;
+    default:
+      break;
+    }
+  return 1;
+}
+
 /* Change register usage conditional on target flags.  */
 static void
 rs6000_conditional_register_usage (void)
@@ -20665,7 +20702,8 @@ rs6000_assemble_integer (rtx x, unsigned int size, int aligned_p)
 	 don't need to mark it here.  We used to skip the text section, but it
 	 should never be valid for relocated addresses to be placed in the text
 	 section.  */
-      if (TARGET_RELOCATABLE
+      if (DEFAULT_ABI == ABI_V4
+	  && (TARGET_RELOCATABLE || flag_pic > 1)
 	  && in_section != toc_section
 	  && !recurse
 	  && !CONST_SCALAR_INT_P (x)
@@ -23422,6 +23460,15 @@ rs6000_savres_strategy (rs6000_stack_t *info,
 	  }
     }
 
+  /* info->lr_save_p isn't yet set if the only reason lr needs to be
+     saved is an out-of-line save or restore.  Set up the value for
+     the next test (excluding out-of-line gprs).  */
+  bool lr_save_p = (info->lr_save_p
+		    || !(strategy & SAVE_INLINE_FPRS)
+		    || !(strategy & SAVE_INLINE_VRS)
+		    || !(strategy & REST_INLINE_FPRS)
+		    || !(strategy & REST_INLINE_VRS));
+
   if (TARGET_MULTIPLE
       && !TARGET_POWERPC64
       && !(TARGET_SPE_ABI && info->spe_64bit_regs_used)
@@ -23431,15 +23478,6 @@ rs6000_savres_strategy (rs6000_stack_t *info,
 	 since the store-multiple instruction will always be smaller.  */
       strategy |= SAVE_INLINE_GPRS | SAVE_MULTIPLE;
 
-      /* info->lr_save_p isn't yet set if the only reason lr needs to be
-	 saved is an out-of-line save or restore.  Set up the value for
-	 the next test (excluding out-of-line gprs).  */
-      bool lr_save_p = (info->lr_save_p
-			|| !(strategy & SAVE_INLINE_FPRS)
-			|| !(strategy & SAVE_INLINE_VRS)
-			|| !(strategy & REST_INLINE_FPRS)
-			|| !(strategy & REST_INLINE_VRS));
-
       /* The situation is more complicated with load multiple.  We'd
 	 prefer to use the out-of-line routines for restores, since the
 	 "exit" out-of-line routines can handle the restore of LR and the
@@ -23452,6 +23490,12 @@ rs6000_savres_strategy (rs6000_stack_t *info,
 	strategy |= REST_INLINE_GPRS | REST_MULTIPLE;
     }
 
+  /* Using the "exit" out-of-line routine does not improve code size
+     if using it would require lr to be saved and if only saving one
+     or two gprs.  */
+  else if (!lr_save_p && info->first_gp_reg_save > 29)
+    strategy |= SAVE_INLINE_GPRS | REST_INLINE_GPRS;
+
   /* We can only use load multiple or the out-of-line routines to
      restore gprs if we've saved all the registers from
      first_gp_reg_save.  Otherwise, we risk loading garbage.
@@ -23689,9 +23733,9 @@ rs6000_stack_info (void)
   info->calls_p = (!crtl->is_leaf || cfun->machine->ra_needs_full_frame);
 
   /* Determine if we need to save the condition code registers.  */
-  if (df_regs_ever_live_p (CR2_REGNO)
-      || df_regs_ever_live_p (CR3_REGNO)
-      || df_regs_ever_live_p (CR4_REGNO))
+  if (save_reg_p (CR2_REGNO)
+      || save_reg_p (CR3_REGNO)
+      || save_reg_p (CR4_REGNO))
     {
       info->cr_save_p = 1;
       if (DEFAULT_ABI == ABI_V4)
@@ -23856,7 +23900,9 @@ rs6000_stack_info (void)
 	  && !TARGET_PROFILE_KERNEL)
       || (DEFAULT_ABI == ABI_V4 && cfun->calls_alloca)
 #ifdef TARGET_RELOCATABLE
-      || (TARGET_RELOCATABLE && (get_pool_size () != 0))
+      || (DEFAULT_ABI == ABI_V4
+	  && (TARGET_RELOCATABLE || flag_pic > 1)
+	  && get_pool_size () != 0)
 #endif
       || rs6000_ra_ever_killed ())
     info->lr_save_p = 1;
@@ -24712,7 +24758,7 @@ output_probe_stack_range (rtx reg1, rtx reg2)
 }
 
 /* Add to 'insn' a note which is PATTERN (INSN) but with REG replaced
-   with (plus:P (reg 1) VAL), and with REG2 replaced with RREG if REG2
+   with (plus:P (reg 1) VAL), and with REG2 replaced with REPL2 if REG2
    is not NULL.  It would be nice if dwarf2out_frame_debug_expr could
    deduce these equivalences by itself so it wasn't necessary to hold
    its hand so much.  Don't be tempted to always supply d2_f_d_e with
@@ -24722,22 +24768,28 @@ output_probe_stack_range (rtx reg1, rtx reg2)
 
 static rtx
 rs6000_frame_related (rtx insn, rtx reg, HOST_WIDE_INT val,
-		      rtx reg2, rtx rreg)
+		      rtx reg2, rtx repl2)
 {
-  rtx real, temp;
+  rtx repl;
 
-  if (REGNO (reg) == STACK_POINTER_REGNUM && reg2 == NULL_RTX)
+  if (REGNO (reg) == STACK_POINTER_REGNUM)
     {
-      /* No need for any replacement.  Just set RTX_FRAME_RELATED_P.  */
-      int i;
-
       gcc_checking_assert (val == 0);
-      real = PATTERN (insn);
-      if (GET_CODE (real) == PARALLEL)
-	for (i = 0; i < XVECLEN (real, 0); i++)
-	  if (GET_CODE (XVECEXP (real, 0, i)) == SET)
+      repl = NULL_RTX;
+    }
+  else
+    repl = gen_rtx_PLUS (Pmode, gen_rtx_REG (Pmode, STACK_POINTER_REGNUM),
+			 GEN_INT (val));
+
+  rtx pat = PATTERN (insn);
+  if (!repl && !reg2)
+    {
+      /* No need for any replacement.  Just set RTX_FRAME_RELATED_P.  */
+      if (GET_CODE (pat) == PARALLEL)
+	for (int i = 0; i < XVECLEN (pat, 0); i++)
+	  if (GET_CODE (XVECEXP (pat, 0, i)) == SET)
 	    {
-	      rtx set = XVECEXP (real, 0, i);
+	      rtx set = XVECEXP (pat, 0, i);
 
 	      /* If this PARALLEL has been emitted for out-of-line
 		 register save functions, or store multiple, then omit
@@ -24752,79 +24804,47 @@ rs6000_frame_related (rtx insn, rtx reg, HOST_WIDE_INT val,
       return insn;
     }
 
-  /* copy_rtx will not make unique copies of registers, so we need to
-     ensure we don't have unwanted sharing here.  */
-  if (reg == reg2)
-    reg = gen_raw_REG (GET_MODE (reg), REGNO (reg));
-
-  if (reg == rreg)
-    reg = gen_raw_REG (GET_MODE (reg), REGNO (reg));
-
-  real = copy_rtx (PATTERN (insn));
-
-  if (reg2 != NULL_RTX)
-    real = replace_rtx (real, reg2, rreg);
-
-  if (REGNO (reg) == STACK_POINTER_REGNUM)
-    gcc_checking_assert (val == 0);
-  else
-    real = replace_rtx (real, reg,
-			gen_rtx_PLUS (Pmode, gen_rtx_REG (Pmode,
-							  STACK_POINTER_REGNUM),
-				      GEN_INT (val)));
-
-  /* We expect that 'real' is either a SET or a PARALLEL containing
+  /* We expect that 'pat' is either a SET or a PARALLEL containing
      SETs (and possibly other stuff).  In a PARALLEL, all the SETs
-     are important so they all have to be marked RTX_FRAME_RELATED_P.  */
+     are important so they all have to be marked RTX_FRAME_RELATED_P.
+     Call simplify_replace_rtx on the SETs rather than the whole insn
+     so as to leave the other stuff alone (for example USE of r12).  */
 
-  if (GET_CODE (real) == SET)
+  if (GET_CODE (pat) == SET)
     {
-      rtx set = real;
-
-      temp = simplify_rtx (SET_SRC (set));
-      if (temp)
-	SET_SRC (set) = temp;
-      temp = simplify_rtx (SET_DEST (set));
-      if (temp)
-	SET_DEST (set) = temp;
-      if (GET_CODE (SET_DEST (set)) == MEM)
-	{
-	  temp = simplify_rtx (XEXP (SET_DEST (set), 0));
-	  if (temp)
-	    XEXP (SET_DEST (set), 0) = temp;
-	}
+      if (repl)
+	pat = simplify_replace_rtx (pat, reg, repl);
+      if (reg2)
+	pat = simplify_replace_rtx (pat, reg2, repl2);
     }
-  else
+  else if (GET_CODE (pat) == PARALLEL)
     {
-      int i;
+      pat = shallow_copy_rtx (pat);
+      XVEC (pat, 0) = shallow_copy_rtvec (XVEC (pat, 0));
 
-      gcc_assert (GET_CODE (real) == PARALLEL);
-      for (i = 0; i < XVECLEN (real, 0); i++)
-	if (GET_CODE (XVECEXP (real, 0, i)) == SET)
+      for (int i = 0; i < XVECLEN (pat, 0); i++)
+	if (GET_CODE (XVECEXP (pat, 0, i)) == SET)
 	  {
-	    rtx set = XVECEXP (real, 0, i);
-
-	    temp = simplify_rtx (SET_SRC (set));
-	    if (temp)
-	      SET_SRC (set) = temp;
-	    temp = simplify_rtx (SET_DEST (set));
-	    if (temp)
-	      SET_DEST (set) = temp;
-	    if (GET_CODE (SET_DEST (set)) == MEM)
-	      {
-		temp = simplify_rtx (XEXP (SET_DEST (set), 0));
-		if (temp)
-		  XEXP (SET_DEST (set), 0) = temp;
-	      }
+	    rtx set = XVECEXP (pat, 0, i);
+
+	    if (repl)
+	      set = simplify_replace_rtx (set, reg, repl);
+	    if (reg2)
+	      set = simplify_replace_rtx (set, reg2, repl2);
+	    XVECEXP (pat, 0, i) = set;
+
 	    /* Omit eh_frame info for any user-defined global regs.  */
 	    if (!REG_P (SET_SRC (set))
 		|| !fixed_reg_p (REGNO (SET_SRC (set))))
 	      RTX_FRAME_RELATED_P (set) = 1;
 	  }
     }
+  else
+    gcc_unreachable ();
 
   RTX_FRAME_RELATED_P (insn) = 1;
-  add_reg_note (insn, REG_FRAME_RELATED_EXPR, real);
+  if (repl || reg2)
+    add_reg_note (insn, REG_FRAME_RELATED_EXPR, pat);
 
   return insn;
 }
@@ -31334,13 +31354,12 @@ static void
 rs6000_elf_output_toc_section_asm_op (const void *data ATTRIBUTE_UNUSED)
 {
   if ((DEFAULT_ABI == ABI_AIX || DEFAULT_ABI == ABI_ELFv2)
-      && TARGET_MINIMAL_TOC
-      && !TARGET_RELOCATABLE)
+      && TARGET_MINIMAL_TOC)
     {
       if (!toc_initialized)
 	{
-	  toc_initialized = 1;
 	  fprintf (asm_out_file, "%s\n", TOC_SECTION_ASM_OP);
+	  ASM_OUTPUT_ALIGN (asm_out_file, TARGET_64BIT ? 3 : 2);
 	  (*targetm.asm_out.internal_label) (asm_out_file, "LCTOC", 0);
 	  fprintf (asm_out_file, "\t.tc ");
 	  ASM_OUTPUT_INTERNAL_LABEL_PREFIX (asm_out_file, "LCTOC1[TC],");
@@ -31348,20 +31367,29 @@ rs6000_elf_output_toc_section_asm_op (const void *data ATTRIBUTE_UNUSED)
 	  fprintf (asm_out_file, "\n");
 
 	  fprintf (asm_out_file, "%s\n", MINIMAL_TOC_SECTION_ASM_OP);
+	  ASM_OUTPUT_ALIGN (asm_out_file, TARGET_64BIT ? 3 : 2);
 	  ASM_OUTPUT_INTERNAL_LABEL_PREFIX (asm_out_file, "LCTOC1");
 	  fprintf (asm_out_file, " = .+32768\n");
+	  toc_initialized = 1;
 	}
       else
 	fprintf (asm_out_file, "%s\n", MINIMAL_TOC_SECTION_ASM_OP);
     }
-  else if ((DEFAULT_ABI == ABI_AIX || DEFAULT_ABI == ABI_ELFv2)
-	   && !TARGET_RELOCATABLE)
-    fprintf (asm_out_file, "%s\n", TOC_SECTION_ASM_OP);
+  else if (DEFAULT_ABI == ABI_AIX || DEFAULT_ABI == ABI_ELFv2)
+    {
+      fprintf (asm_out_file, "%s\n", TOC_SECTION_ASM_OP);
+      if (!toc_initialized)
+	{
+	  ASM_OUTPUT_ALIGN (asm_out_file, TARGET_64BIT ? 3 : 2);
+	  toc_initialized = 1;
+	}
+    }
   else
     {
       fprintf (asm_out_file, "%s\n", MINIMAL_TOC_SECTION_ASM_OP);
       if (!toc_initialized)
 	{
+	  ASM_OUTPUT_ALIGN (asm_out_file, TARGET_64BIT ? 3 : 2);
 	  ASM_OUTPUT_INTERNAL_LABEL_PREFIX (asm_out_file, "LCTOC1");
 	  fprintf (asm_out_file, " = .+32768\n");
 	  toc_initialized = 1;
@@ -31938,7 +31966,8 @@ rs6000_elf_asm_out_constructor (rtx symbol, int priority)
   switch_to_section (get_section (section, SECTION_WRITE, NULL));
   assemble_align (POINTER_SIZE);
 
-  if (TARGET_RELOCATABLE)
+  if (DEFAULT_ABI == ABI_V4
+      && (TARGET_RELOCATABLE || flag_pic > 1))
     {
       fputs ("\t.long (", asm_out_file);
       output_addr_const (asm_out_file, symbol);
@@ -31968,7 +31997,8 @@ rs6000_elf_asm_out_destructor (rtx symbol, int priority)
   switch_to_section (get_section (section, SECTION_WRITE, NULL));
   assemble_align (POINTER_SIZE);
 
-  if (TARGET_RELOCATABLE)
+  if (DEFAULT_ABI == ABI_V4
+      && (TARGET_RELOCATABLE || flag_pic > 1))
     {
       fputs ("\t.long (", asm_out_file);
       output_addr_const (asm_out_file, symbol);
@@ -32010,7 +32040,8 @@ rs6000_elf_declare_function_name (FILE *file, const char *name, tree decl)
       return;
     }
 
-  if (TARGET_RELOCATABLE
+  if (DEFAULT_ABI == ABI_V4
+      && (TARGET_RELOCATABLE || flag_pic > 1)
       && !TARGET_SECURE_PLT
       && (get_pool_size () != 0 || crtl->profile)
       && uses_TOC ())
diff --git a/gcc/config/rs6000/rs6000.h b/gcc/config/rs6000/rs6000.h
index 12fa7275cdc..9647106fbcd 100644
--- a/gcc/config/rs6000/rs6000.h
+++ b/gcc/config/rs6000/rs6000.h
@@ -636,18 +636,10 @@ extern int rs6000_vector_align[];
 #define MASK_64BIT			OPTION_MASK_64BIT
 #endif
 
-#ifdef TARGET_RELOCATABLE
-#define MASK_RELOCATABLE		OPTION_MASK_RELOCATABLE
-#endif
-
 #ifdef TARGET_LITTLE_ENDIAN
 #define MASK_LITTLE_ENDIAN		OPTION_MASK_LITTLE_ENDIAN
 #endif
 
-#ifdef TARGET_MINIMAL_TOC
-#define MASK_MINIMAL_TOC		OPTION_MASK_MINIMAL_TOC
-#endif
-
 #ifdef TARGET_REGNAMES
 #define MASK_REGNAMES			OPTION_MASK_REGNAMES
 #endif
@@ -2058,7 +2050,10 @@ do {									     \
    to allocate such a register (if necessary).  */
 
 #define RS6000_PIC_OFFSET_TABLE_REGNUM 30
-#define PIC_OFFSET_TABLE_REGNUM (flag_pic ? RS6000_PIC_OFFSET_TABLE_REGNUM : INVALID_REGNUM)
+#define PIC_OFFSET_TABLE_REGNUM \
+  (TARGET_TOC ? TOC_REGISTER			\
+   : flag_pic ? RS6000_PIC_OFFSET_TABLE_REGNUM	\
+   : INVALID_REGNUM)
 
 #define TOC_REGISTER (TARGET_MINIMAL_TOC ? RS6000_PIC_OFFSET_TABLE_REGNUM : 2)
 
diff --git a/gcc/config/rs6000/rs6000.md b/gcc/config/rs6000/rs6000.md
index 5566185076a..ed1989cd2a6 100644
--- a/gcc/config/rs6000/rs6000.md
+++ b/gcc/config/rs6000/rs6000.md
@@ -9499,12 +9499,8 @@
   char buf[30];
   extern int need_toc_init;
   need_toc_init = 1;
-#ifdef TARGET_RELOCATABLE
   ASM_GENERATE_INTERNAL_LABEL (buf, \"LCTOC\",
-			       !TARGET_MINIMAL_TOC || TARGET_RELOCATABLE);
-#else
-  ASM_GENERATE_INTERNAL_LABEL (buf, \"LCTOC\", 1);
-#endif
+			       !TARGET_ELF || !TARGET_MINIMAL_TOC);
   if (TARGET_ELF)
     strcat (buf, \"@toc\");
   operands[1] = gen_rtx_SYMBOL_REF (Pmode, ggc_strdup (buf));
diff --git a/gcc/config/rs6000/sysv4.h b/gcc/config/rs6000/sysv4.h
index cbf909722da..46d2b4bcdbc 100644
--- a/gcc/config/rs6000/sysv4.h
+++ b/gcc/config/rs6000/sysv4.h
@@ -40,10 +40,8 @@
 #undef	ASM_DEFAULT_SPEC
 #define	ASM_DEFAULT_SPEC "-mppc"
 
-#define	TARGET_TOC		((rs6000_isa_flags & OPTION_MASK_64BIT)	\
-				 || ((rs6000_isa_flags			\
-				      & (OPTION_MASK_RELOCATABLE	\
-					 | OPTION_MASK_MINIMAL_TOC))	\
+#define	TARGET_TOC		(TARGET_64BIT				\
+				 || (TARGET_MINIMAL_TOC			\
 				     && flag_pic > 1)			\
 				 || DEFAULT_ABI != ABI_V4)
 
@@ -192,16 +190,25 @@ do {									\
       error ("-msecure-plt not supported by your assembler");		\
     }									\
 									\
-  /* Treat -fPIC the same as -mrelocatable.  */				\
   if (flag_pic > 1 && DEFAULT_ABI == ABI_V4)				\
     {									\
-      rs6000_isa_flags |= OPTION_MASK_RELOCATABLE | OPTION_MASK_MINIMAL_TOC; \
+      /* Note: flag_pic should not change any option flags that would	\
+	 be invalid with or pessimise -fno-PIC code.  LTO turns off	\
+	 flag_pic when linking/recompiling a fixed position executable. \
+	 However, if the objects were originally compiled with -fPIC,	\
+	 then other target options forced on here by -fPIC are restored \
+	 when recompiling those objects without -fPIC.  In particular	\
+	 TARGET_RELOCATABLE must not be enabled here by flag_pic.  */	\
+      rs6000_isa_flags |= OPTION_MASK_MINIMAL_TOC;			\
       TARGET_NO_FP_IN_TOC = 1;						\
     }									\
 									\
-  else if (TARGET_RELOCATABLE)						\
-    if (!flag_pic)							\
-      flag_pic = 2;							\
+  if (TARGET_RELOCATABLE)						\
+    {									\
+      if (!flag_pic)							\
+	flag_pic = 2;							\
+      TARGET_NO_FP_IN_TOC = 1;						\
+    }									\
 } while (0)
 
 #ifndef RS6000_BI_ARCH
@@ -317,8 +324,7 @@ do {									\
 
 /* Put PC relative got entries in .got2.  */
 #define	MINIMAL_TOC_SECTION_ASM_OP \
-  (TARGET_RELOCATABLE || (flag_pic && DEFAULT_ABI == ABI_V4)		\
-   ? "\t.section\t\".got2\",\"aw\"" : "\t.section\t\".got1\",\"aw\"")
+  (flag_pic ? "\t.section\t\".got2\",\"aw\"" : "\t.section\t\".got1\",\"aw\"")
 
 #define	SDATA_SECTION_ASM_OP "\t.section\t\".sdata\",\"aw\""
 #define	SDATA2_SECTION_ASM_OP "\t.section\t\".sdata2\",\"a\""
@@ -352,7 +358,6 @@ do {									\
        || (GET_CODE (X) == CONST_INT 					\
 	   && GET_MODE_BITSIZE (MODE) <= GET_MODE_BITSIZE (Pmode))	\
        || (!TARGET_NO_FP_IN_TOC						\
-	   && !TARGET_RELOCATABLE					\
 	   && GET_CODE (X) == CONST_DOUBLE				\
 	   && SCALAR_FLOAT_MODE_P (GET_MODE (X))			\
 	   && BITS_PER_WORD == HOST_BITS_PER_INT)))
@@ -941,9 +946,10 @@ ncrtn.o%s"
 /* Select a format to encode pointers in exception handling data.  CODE
    is 0 for data, 1 for code labels, 2 for function pointers.  GLOBAL is
    true if the symbol may be affected by dynamic relocations.  */
-#define ASM_PREFERRED_EH_DATA_FORMAT(CODE,GLOBAL)			     \
-  ((flag_pic || TARGET_RELOCATABLE)					     \
-   ? (((GLOBAL) ? DW_EH_PE_indirect : 0) | DW_EH_PE_pcrel | DW_EH_PE_sdata4) \
+#define ASM_PREFERRED_EH_DATA_FORMAT(CODE, GLOBAL)			\
+  (flag_pic								\
+   ? (((GLOBAL) ? DW_EH_PE_indirect : 0) | DW_EH_PE_pcrel		\
+      | DW_EH_PE_sdata4)						\
    : DW_EH_PE_absptr)
 
 #define DOUBLE_INT_ASM_OP "\t.quad\t"
diff --git a/gcc/config/sh/predicates.md b/gcc/config/sh/predicates.md
index 3e69d88f64b..4de90af9abb 100644
--- a/gcc/config/sh/predicates.md
+++ b/gcc/config/sh/predicates.md
@@ -230,6 +230,24 @@
        (match_test "sh_disp_addr_displacement (op)
 		    <= sh_max_mov_insn_displacement (GET_MODE (op), false)")))
 
+;; Returns true if OP is a displacement address that does not fit into
+;; a 16 bit (non-SH2A) memory load / store insn.
+(define_predicate "long_displacement_mem_operand"
+  (and (match_operand 0 "displacement_mem_operand")
+       (not (match_operand 0 "short_displacement_mem_operand"))))
+
+;; Returns true if OP is a post-increment addressing mode memory reference.
+(define_predicate "post_inc_mem"
+  (and (match_code "mem")
+       (match_code "post_inc" "0")
+       (match_code "reg" "00")))
+
+;; Returns true if OP is a pre-decrement addressing mode memory reference.
+(define_predicate "pre_dec_mem"
+  (and (match_code "mem")
+       (match_code "pre_dec" "0")
+       (match_code "reg" "00")))
+
 ;; Returns 1 if the operand can be used in an SH2A movu.{b|w} insn.
 (define_predicate "zero_extend_movu_operand"
   (and (ior (match_operand 0 "displacement_mem_operand")
diff --git a/gcc/config/sh/sh-protos.h b/gcc/config/sh/sh-protos.h
index ea7e847300d..fecbb886d0f 100644
--- a/gcc/config/sh/sh-protos.h
+++ b/gcc/config/sh/sh-protos.h
@@ -224,8 +224,12 @@ sh_find_set_of_reg (rtx reg, rtx_insn* insn, F stepfunc,
 	}
     }
 
-  if (result.set_src != NULL)
-    gcc_assert (result.insn != NULL && result.set_rtx != NULL);
+  /* If the searched reg is found inside a (mem (post_inc:SI (reg))), set_of
+     will return NULL and set_rtx will be NULL.
+     In this case report a 'not found'.  result.insn will always be non-null
+     at this point, so no need to check it.  */
+  if (result.set_src != NULL && result.set_rtx == NULL)
+    result.set_src = NULL;
 
   return result;
 }
@@ -344,13 +348,24 @@ private:
 
 extern sh_treg_insns sh_split_treg_set_expr (rtx x, rtx_insn* curr_insn);
 
+enum
+{
+  /* An effective conditional branch distance of zero bytes is impossible.
+     Hence we can use it to designate an unknown value.  */
+  unknown_cbranch_distance = 0u,
+  infinite_cbranch_distance = ~0u
+};
+
+unsigned int
+sh_cbranch_distance (rtx_insn* cbranch_insn,
+		     unsigned int max_dist = infinite_cbranch_distance);
+
 #endif /* RTX_CODE */
 
 extern void sh_cpu_cpp_builtins (cpp_reader* pfile);
 
 extern const char *output_jump_label_table (void);
 extern rtx get_t_reg_rtx (void);
-extern int sh_media_register_for_return (void);
 extern void sh_expand_prologue (void);
 extern void sh_expand_epilogue (bool);
 extern void sh_set_return_address (rtx, rtx);
diff --git a/gcc/config/sh/sh.c b/gcc/config/sh/sh.c
index ebdb523cd17..a36b098cf40 100644
--- a/gcc/config/sh/sh.c
+++ b/gcc/config/sh/sh.c
@@ -80,8 +80,9 @@ int code_for_indirect_jump_scratch = CODE_FOR_indirect_jump_scratch;
 		  ? (DECL_ATTRIBUTES (decl)) \
 		  : TYPE_ATTRIBUTES (TREE_TYPE (decl))
 
-/* Set to 1 by expand_prologue() when the function is an interrupt handler.  */
-int current_function_interrupt;
+/* Set to true by expand_prologue() when the function is an
+   interrupt handler.  */
+bool current_function_interrupt;
 
 tree sh_deferred_function_attributes;
 tree *sh_deferred_function_attributes_tail = &sh_deferred_function_attributes;
@@ -180,10 +181,10 @@ static void sh_reorg (void);
 static void sh_option_override (void);
 static void sh_override_options_after_change (void);
 static void output_stack_adjust (int, rtx, int, HARD_REG_SET *, bool);
-static rtx_insn *frame_insn (rtx);
+static rtx_insn* emit_frame_insn (rtx);
 static rtx push (int);
 static void pop (int);
-static void push_regs (HARD_REG_SET *, int);
+static void push_regs (HARD_REG_SET* mask, bool interrupt_handler);
 static int calc_live_regs (HARD_REG_SET *);
 static HOST_WIDE_INT rounded_frame_size (int);
 static bool sh_frame_pointer_required (void);
@@ -267,7 +268,6 @@ static rtx sh_delegitimize_address (rtx);
 static bool sh_cannot_substitute_mem_equiv_p (rtx);
 static bool sh_legitimize_address_displacement (rtx *, rtx *, machine_mode);
 static int scavenge_reg (HARD_REG_SET *s);
-struct save_schedule_s;
 
 static rtx sh_struct_value_rtx (tree, int);
 static rtx sh_function_value (const_tree, const_tree, bool);
@@ -355,12 +355,6 @@ static const struct attribute_spec sh_attribute_table[] =
 #undef TARGET_ASM_UNALIGNED_SI_OP
 #define TARGET_ASM_UNALIGNED_SI_OP "\t.ualong\t"
 
-/* These are NULLed out on non-SH5 in TARGET_OPTION_OVERRIDE.  */
-#undef TARGET_ASM_UNALIGNED_DI_OP
-#define TARGET_ASM_UNALIGNED_DI_OP "\t.uaquad\t"
-#undef TARGET_ASM_ALIGNED_DI_OP
-#define TARGET_ASM_ALIGNED_DI_OP "\t.quad\t"
-
 #undef TARGET_OPTION_OVERRIDE
 #define TARGET_OPTION_OVERRIDE sh_option_override
 
@@ -832,10 +826,6 @@ sh_option_override (void)
       sh_cpu = PROCESSOR_SH4A;
     }
 
-  /* Only the sh64-elf assembler fully supports .quad properly.  */
-  targetm.asm_out.aligned_op.di = NULL;
-  targetm.asm_out.unaligned_op.di = NULL;
-
   /* User/priviledged mode is supported only on SH3* and SH4*.
      Disable it for everything else.  */
   if (!TARGET_SH3 && TARGET_USERMODE)
@@ -1662,11 +1652,9 @@ prepare_move_operands (rtx operands[], machine_mode mode)
 
   if (mode == Pmode || mode == ptr_mode)
     {
-      rtx op0, op1, opc;
-      enum tls_model tls_kind;
-
-      op0 = operands[0];
-      op1 = operands[1];
+      rtx op0 = operands[0];
+      rtx op1 = operands[1];
+      rtx opc;
       if (GET_CODE (op1) == CONST
 	  && GET_CODE (XEXP (op1, 0)) == PLUS
 	  && (tls_symbolic_operand (XEXP (XEXP (op1, 0), 0), Pmode)
@@ -1678,6 +1666,8 @@ prepare_move_operands (rtx operands[], machine_mode mode)
       else
 	opc = NULL_RTX;
 
+      enum tls_model tls_kind;
+
       if (! reload_in_progress && ! reload_completed
 	  && (tls_kind = tls_symbolic_operand (op1, Pmode)) != TLS_MODEL_NONE)
 	{
@@ -1698,7 +1688,7 @@ prepare_move_operands (rtx operands[], machine_mode mode)
 	      emit_use (gen_rtx_REG (SImode, PIC_REG));
 	      if (flag_schedule_insns)
 		emit_insn (gen_blockage ());
-	}
+	    }
 
 	  switch (tls_kind)
 	    {
@@ -1928,6 +1918,52 @@ sh_fixed_condition_code_regs (unsigned int* p1, unsigned int* p2)
   return true;
 }
 
+/* Try to calculate the branch distance of a conditional branch in bytes.
+
+   FIXME: Because of PR 59189 we can't use the CFG here.  Instead just
+   walk from this insn into the next (fall-through) basic block and see if
+   we hit the label.  */
+unsigned int
+sh_cbranch_distance (rtx_insn* _cbranch_insn, unsigned int max_dist)
+{
+  rtx_jump_insn* cbranch_insn = safe_as_a<rtx_jump_insn*> (_cbranch_insn);
+
+  if (dump_file)
+    {
+      fprintf (dump_file, "sh_cbranch_distance insn = \n");
+      print_rtl_single (dump_file, cbranch_insn);
+    }
+
+  unsigned int dist = 0;
+
+  for (rtx_insn* i = next_nonnote_insn (cbranch_insn);
+       i != NULL && dist < max_dist; i = next_nonnote_insn (i))
+    {
+      const unsigned int i_len = get_attr_length (i);
+      dist += i_len;
+
+      if (dump_file)
+	fprintf (dump_file, "  insn %d  length = %u  dist = %u\n",
+		 INSN_UID (i), i_len, dist);
+
+      if (rtx_code_label* l = dyn_cast<rtx_code_label*> (i))
+	{
+	  if (l == cbranch_insn->jump_target ())
+	    {
+	      if (dump_file)
+		fprintf (dump_file, "  cbranch dist = %u\n", dist);
+	      return dist;
+	    }
+	  break;
+	}
+    }
+
+  if (dump_file)
+    fprintf (dump_file, "  cbranch dist = unknown\n");
+
+  return unknown_cbranch_distance;
+}
+
 enum rtx_code
 prepare_cbranch_operands (rtx *operands, machine_mode mode,
 			  enum rtx_code comparison)
@@ -2210,7 +2246,6 @@ sh_emit_scc_to_t (enum rtx_code code, rtx op0, rtx op1)
 {
   rtx t_reg = get_t_reg_rtx ();
   enum rtx_code oldcode = code;
-  machine_mode mode;
 
   /* First need a compare insn.  */
   switch (code)
@@ -2236,7 +2271,7 @@ sh_emit_scc_to_t (enum rtx_code code, rtx op0, rtx op1)
   if (code != oldcode)
     std::swap (op0, op1);
 
-  mode = GET_MODE (op0);
+  machine_mode mode = GET_MODE (op0);
   if (mode == VOIDmode)
     mode = GET_MODE (op1);
 
@@ -2779,14 +2814,13 @@ static bool
 unspec_caller_rtx_p (rtx pat)
 {
   rtx base, offset;
-  int i;
-
   split_const (pat, &base, &offset);
+
   if (GET_CODE (base) == UNSPEC)
     {
       if (XINT (base, 1) == UNSPEC_CALLER)
 	return true;
-      for (i = 0; i < XVECLEN (base, 0); i++)
+      for (int i = 0; i < XVECLEN (base, 0); i++)
 	if (unspec_caller_rtx_p (XVECEXP (base, 0, i)))
 	  return true;
     }
@@ -2798,8 +2832,6 @@ unspec_caller_rtx_p (rtx pat)
 static bool
 sh_cannot_copy_insn_p (rtx_insn *insn)
 {
-  rtx pat;
-
   if (!reload_completed || !flag_pic)
     return false;
 
@@ -2808,7 +2840,7 @@ sh_cannot_copy_insn_p (rtx_insn *insn)
   if (asm_noperands (insn) >= 0)
     return false;
 
-  pat = PATTERN (insn);
+  rtx pat = PATTERN (insn);
 
   if (GET_CODE (pat) == CLOBBER || GET_CODE (pat) == USE)
     return false;
@@ -3209,6 +3241,15 @@ sh_rtx_costs (rtx x, machine_mode mode ATTRIBUTE_UNUSED, int outer_code,
 	  *total = 1; //COSTS_N_INSNS (1);
 	  return true;
 	}
+
+      /* div0s variant.  */
+      if (GET_CODE (XEXP (x, 0)) == XOR
+	  && GET_CODE (XEXP (XEXP (x, 0), 0)) == XOR
+	  && CONST_INT_P (XEXP (XEXP (x, 0), 1)))
+	{
+	  *total = 1;
+	  return true;
+	}
       return false;
 
     /* The cost of a sign or zero extend depends on whether the source is a
@@ -4424,12 +4465,11 @@ static int max_labelno_before_reorg;
 static rtx_code_label *
 add_constant (rtx x, machine_mode mode, rtx last_value)
 {
-  int i;
   rtx_code_label *lab, *new_rtx;
   label_ref_list_t ref, newref;
 
   /* First see if we've already got it.  */
-  for (i = 0; i < pool_size; i++)
+  for (int i = 0; i < pool_size; i++)
     {
       if (x->code == pool_vector[i].value->code
 	  && mode == pool_vector[i].mode)
@@ -4503,7 +4543,6 @@ static void
 dump_table (rtx_insn *start, rtx_insn *barrier)
 {
   rtx_insn *scan = barrier;
-  int i;
   bool need_align = true;
   rtx lab;
   label_ref_list_t ref;
@@ -4511,7 +4550,7 @@ dump_table (rtx_insn *start, rtx_insn *barrier)
 
   /* Do two passes, first time dump out the HI sized constants.  */
 
-  for (i = 0; i < pool_size; i++)
+  for (int i = 0; i < pool_size; i++)
     {
       pool_node *p = &pool_vector[i];
 
@@ -4560,7 +4599,7 @@ dump_table (rtx_insn *start, rtx_insn *barrier)
       scan = emit_insn_after (gen_align_log (GEN_INT (3)), scan);
       need_align = false;
 
-      for (i = 0; i < pool_size; i++)
+      for (int i = 0; i < pool_size; i++)
 	{
 	  pool_node *p = &pool_vector[i];
 
@@ -4626,7 +4665,7 @@ dump_table (rtx_insn *start, rtx_insn *barrier)
       pool_size = 0;
     }
 
-  for (i = 0; i < pool_size; i++)
+  for (int i = 0; i < pool_size; i++)
     {
       pool_node *p = &pool_vector[i];
 
@@ -5195,7 +5234,7 @@ sfunc_uses_reg (rtx_insn *insn)
   if (! reg_part)
     return NULL_RTX;
   reg = XEXP (reg_part, 0);
-  for (i = XVECLEN (pattern, 0) - 1; i >= 0; i--)
+  for (int i = XVECLEN (pattern, 0) - 1; i >= 0; i--)
     {
       part = XVECEXP (pattern, 0, i);
       if (part == reg_part || GET_CODE (part) == CLOBBER)
@@ -5214,14 +5253,12 @@ sfunc_uses_reg (rtx_insn *insn)
 static bool
 noncall_uses_reg (rtx reg, rtx_insn *insn, rtx *set)
 {
-  rtx pattern, reg2;
-
   *set = NULL_RTX;
 
-  reg2 = sfunc_uses_reg (insn);
+  rtx reg2 = sfunc_uses_reg (insn);
   if (reg2 && REGNO (reg2) == REGNO (reg))
     {
-      pattern = single_set (insn);
+      rtx pattern = single_set (insn);
       if (pattern
 	  && REG_P (SET_DEST (pattern))
 	  && REGNO (reg) == REGNO (SET_DEST (pattern)))
@@ -5232,7 +5269,7 @@ noncall_uses_reg (rtx reg, rtx_insn *insn, rtx *set)
     {
       /* We don't use rtx_equal_p because we don't care if the mode is
 	 different.  */
-      pattern = single_set (insn);
+      rtx pattern = single_set (insn);
       if (pattern
 	  && REG_P (SET_DEST (pattern))
 	  && REGNO (reg) == REGNO (SET_DEST (pattern)))
@@ -5255,13 +5292,11 @@ noncall_uses_reg (rtx reg, rtx_insn *insn, rtx *set)
       return true;
     }
 
-  pattern = PATTERN (insn);
+  rtx pattern = PATTERN (insn);
 
   if (GET_CODE (pattern) == PARALLEL)
     {
-      int i;
-
-      for (i = XVECLEN (pattern, 0) - 1; i >= 1; i--)
+      for (int i = XVECLEN (pattern, 0) - 1; i >= 1; i--)
 	if (reg_mentioned_p (reg, XVECEXP (pattern, 0, i)))
 	  return true;
       pattern = XVECEXP (pattern, 0, 0);
@@ -5301,7 +5336,7 @@ regs_used (rtx x, int is_dest)
 {
   enum rtx_code code;
   const char *fmt;
-  int i, used = 0;
+  int used = 0;
 
   if (! x)
     return used;
@@ -5348,12 +5383,11 @@ regs_used (rtx x, int is_dest)
 
   fmt = GET_RTX_FORMAT (code);
 
-  for (i = GET_RTX_LENGTH (code) - 1; i >= 0; i--)
+  for (int i = GET_RTX_LENGTH (code) - 1; i >= 0; i--)
     {
       if (fmt[i] == 'E')
 	{
-	  int j;
-	  for (j = XVECLEN (x, i) - 1; j >= 0; j--)
+	  for (int j = XVECLEN (x, i) - 1; j >= 0; j--)
 	    used |= regs_used (XVECEXP (x, i, j), is_dest);
 	}
       else if (fmt[i] == 'e')
@@ -5376,7 +5410,6 @@ gen_block_redirect (rtx_insn *jump, int addr, int need_block)
 {
   int dead = 0;
   rtx_insn *prev = prev_nonnote_insn (jump);
-  rtx dest;
 
   /* First, check if we already have an instruction that satisfies our need.  */
   if (prev && NONJUMP_INSN_P (prev) && ! prev->deleted ())
@@ -5402,7 +5435,7 @@ gen_block_redirect (rtx_insn *jump, int addr, int need_block)
     }
   /* We can't use JUMP_LABEL here because it might be undefined
      when not optimizing.  */
-  dest = XEXP (SET_SRC (PATTERN (jump)), 0);
+  rtx dest = XEXP (SET_SRC (PATTERN (jump)), 0);
   /* If the branch is out of range, try to find a scratch register for it.  */
   if (optimize
       && (INSN_ADDRESSES (INSN_UID (dest)) - addr + (unsigned) 4092
@@ -5420,11 +5453,9 @@ gen_block_redirect (rtx_insn *jump, int addr, int need_block)
 
       for (scan = jump; (scan = PREV_INSN (scan)); )
 	{
-	  enum rtx_code code;
-
 	  if (scan->deleted ())
 	    continue;
-	  code = GET_CODE (scan);
+	  rtx_code code = GET_CODE (scan);
 	  if (code == CODE_LABEL || code == JUMP_INSN)
 	    break;
 	  if (code == INSN
@@ -5439,11 +5470,9 @@ gen_block_redirect (rtx_insn *jump, int addr, int need_block)
       for (used = dead = 0, scan = JUMP_LABEL_AS_INSN (jump);
 	   (scan = NEXT_INSN (scan)); )
 	{
-	  enum rtx_code code;
-
 	  if (scan->deleted ())
 	    continue;
-	  code = GET_CODE (scan);
+	  rtx_code code = GET_CODE (scan);
 	  if (INSN_P (scan))
 	    {
 	      used |= regs_used (PATTERN (scan), 0);
@@ -5539,15 +5568,14 @@ struct far_branch
   int address;
 };
 
-static void gen_far_branch (struct far_branch *);
 enum mdep_reorg_phase_e mdep_reorg_phase;
+
 static void
 gen_far_branch (struct far_branch *bp)
 {
   rtx_insn *insn = bp->insert_place;
   rtx_jump_insn *jump;
   rtx_code_label *label = gen_label_rtx ();
-  int ok;
 
   emit_label_after (label, insn);
   if (bp->far_label)
@@ -5576,7 +5604,7 @@ gen_far_branch (struct far_branch *bp)
       JUMP_LABEL (jump) = pat;
     }
 
-  ok = invert_jump (as_a <rtx_jump_insn *> (insn), label, 1);
+  bool ok = invert_jump (as_a <rtx_jump_insn *> (insn), label, 1);
   gcc_assert (ok);
 
   /* If we are branching around a jump (rather than a return), prevent
@@ -5646,8 +5674,6 @@ fixup_addr_diff_vecs (rtx_insn *first)
 int
 barrier_align (rtx_insn *barrier_or_label)
 {
-  rtx next, pat;
-
   if (! barrier_or_label)
     return 0;
 
@@ -5660,7 +5686,7 @@ barrier_align (rtx_insn *barrier_or_label)
       && PREV_INSN (barrier_or_label)
       && JUMP_TABLE_DATA_P (PREV_INSN (barrier_or_label)))
     {
-      pat = PATTERN (PREV_INSN (barrier_or_label));
+      rtx pat = PATTERN (PREV_INSN (barrier_or_label));
       /* If this is a very small table, we want to keep the alignment after
 	 the table to the minimum for proper code alignment.  */
       return ((optimize_size
@@ -5669,12 +5695,12 @@ barrier_align (rtx_insn *barrier_or_label)
 	      ? 1 : align_jumps_log);
     }
 
-  next = next_active_insn (barrier_or_label);
+  rtx next = next_active_insn (barrier_or_label);
 
   if (! next)
     return 0;
 
-  pat = PATTERN (next);
+  rtx pat = PATTERN (next);
 
   if (GET_CODE (pat) == UNSPEC_VOLATILE && XINT (pat, 1) == UNSPECV_ALIGN)
     /* This is a barrier in front of a constant table.  */
@@ -6242,11 +6268,11 @@ int
 get_dest_uid (rtx label, int max_uid)
 {
   rtx_insn *dest = next_real_insn (label);
-  int dest_uid;
+
   if (! dest)
     /* This can happen for an undefined label.  */
     return 0;
-  dest_uid = INSN_UID (dest);
+  int dest_uid = INSN_UID (dest);
   /* If this is a newly created branch redirection blocking instruction,
      we cannot index the branch_uid or insn_addresses arrays with its
      uid.  But then, we won't need to, because the actual destination is
@@ -6505,14 +6531,9 @@ final_prescan_insn (rtx_insn *insn, rtx *opvec ATTRIBUTE_UNUSED,
 
   if (TARGET_RELAX)
     {
-      rtx note;
-
-      note = find_reg_note (insn, REG_LABEL_OPERAND, NULL_RTX);
-      if (note)
+      if (rtx note = find_reg_note (insn, REG_LABEL_OPERAND, NULL_RTX))
 	{
-	  rtx pattern;
-
-	  pattern = PATTERN (insn);
+	  rtx pattern = PATTERN (insn);
 	  if (GET_CODE (pattern) == PARALLEL)
 	    pattern = XVECEXP (pattern, 0, 0);
 	  switch (GET_CODE (pattern))
@@ -6543,12 +6564,10 @@ final_prescan_insn (rtx_insn *insn, rtx *opvec ATTRIBUTE_UNUSED,
 const char *
 output_jump_label_table (void)
 {
-  int i;
-
   if (pool_size)
     {
       fprintf (asm_out_file, "\t.align 2\n");
-      for (i = 0; i < pool_size; i++)
+      for (int i = 0; i < pool_size; i++)
 	{
 	  pool_node *p = &pool_vector[i];
 
@@ -6593,7 +6612,7 @@ static void
 output_stack_adjust (int size, rtx reg, int epilogue_p,
 		     HARD_REG_SET *live_regs_mask, bool frame_p)
 {
-  rtx_insn *(*emit_fn) (rtx) = frame_p ? &frame_insn : &emit_insn;
+  rtx_insn *(*emit_fn) (rtx) = frame_p ? &emit_frame_insn : &emit_insn;
   if (size)
     {
       HOST_WIDE_INT align = STACK_BOUNDARY / BITS_PER_UNIT;
@@ -6743,10 +6762,9 @@ output_stack_adjust (int size, rtx reg, int epilogue_p,
     }
 }
 
-/* Emit the specified insn and mark it as frame related.
-   FIXME: Rename this to emit_frame_insn.  */
+/* Emit the specified insn and mark it as frame related.  */
 static rtx_insn *
-frame_insn (rtx x)
+emit_frame_insn (rtx x)
 {
   rtx_insn *insn = emit_insn (x);
   RTX_FRAME_RELATED_P (insn) = 1;
@@ -6774,7 +6792,7 @@ push (int rn)
   else
     x = gen_push (gen_rtx_REG (SImode, rn));
 
-  x = frame_insn (x);
+  x = emit_frame_insn (x);
   add_reg_note (x, REG_INC, gen_rtx_REG (SImode, STACK_POINTER_REGNUM));
   return x;
 }
@@ -6817,15 +6835,15 @@ pop (int rn)
 
 /* Generate code to push the regs specified in the mask.  */
 static void
-push_regs (HARD_REG_SET *mask, int interrupt_handler)
+push_regs (HARD_REG_SET *mask, bool interrupt_handler)
 {
-  int i = interrupt_handler ? LAST_BANKED_REG + 1 : 0;
-  int skip_fpscr = 0;
+  bool skip_fpscr = false;
 
   /* Push PR last; this gives better latencies after the prologue, and
      candidates for the return delay slot when there are no general
      registers pushed.  */
-  for (; i < FIRST_PSEUDO_REGISTER; i++)
+  for (int i = interrupt_handler ? LAST_BANKED_REG + 1 : 0;
+       i < FIRST_PSEUDO_REGISTER; i++)
     {
       /* If this is an interrupt handler, and the SZ bit varies,
 	 and we have to push any floating point register, we need
@@ -6838,7 +6856,7 @@ push_regs (HARD_REG_SET *mask, int interrupt_handler)
 	  push (FPSCR_REG);
 	  COMPL_HARD_REG_SET (unsaved, *mask);
 	  fpscr_set_from_mem (NORMAL_MODE (FP_MODE), unsaved);
-	  skip_fpscr = 1;
+	  skip_fpscr = true;
 	}
       if (i != PR_REG
 	  && (i != FPSCR_REG || ! skip_fpscr)
@@ -6864,7 +6882,7 @@ push_regs (HARD_REG_SET *mask, int interrupt_handler)
 	{
 	  unsigned int count = 0;
 
-	  for (i = FIRST_BANKED_REG; i <= LAST_BANKED_REG; i++)
+	  for (int i = FIRST_BANKED_REG; i <= LAST_BANKED_REG; i++)
 	    if (TEST_HARD_REG_BIT (*mask, i))
 	      count++;
 	    else
@@ -6886,8 +6904,8 @@ push_regs (HARD_REG_SET *mask, int interrupt_handler)
 	     insns.  */
 	  emit_insn (gen_blockage ());
 	  x = gen_movml_push_banked (sp_reg);
-	  x = frame_insn (x);
-	  for (i = FIRST_BANKED_REG; i <= LAST_BANKED_REG; i++)
+	  x = emit_frame_insn (x);
+	  for (int i = FIRST_BANKED_REG; i <= LAST_BANKED_REG; i++)
 	    {
 	      mem = gen_rtx_MEM (SImode, plus_constant (Pmode, sp_reg, i * 4));
 	      reg = gen_rtx_REG (SImode, i);
@@ -6899,7 +6917,7 @@ push_regs (HARD_REG_SET *mask, int interrupt_handler)
 	  emit_insn (gen_blockage ());
 	}
       else
-	for (i = FIRST_BANKED_REG; i <= LAST_BANKED_REG; i++)
+	for (int i = FIRST_BANKED_REG; i <= LAST_BANKED_REG; i++)
 	  if (TEST_HARD_REG_BIT (*mask, i))
 	    push (i);
     }
@@ -6919,11 +6937,9 @@ static int
 calc_live_regs (HARD_REG_SET *live_regs_mask)
 {
   unsigned int reg;
-  int count;
   tree attrs;
   bool interrupt_or_trapa_handler, trapa_handler, interrupt_handler;
   bool nosave_low_regs;
-  int pr_live, has_call;
 
   attrs = DECL_ATTRIBUTES (current_function_decl);
   interrupt_or_trapa_handler = sh_cfun_interrupt_handler_p ();
@@ -6937,7 +6953,7 @@ calc_live_regs (HARD_REG_SET *live_regs_mask)
     target_flags &= ~MASK_FPU_SINGLE;
   /* If we can save a lot of saves by switching to double mode, do that.  */
   else if (TARGET_FPU_DOUBLE && TARGET_FMOVD && TARGET_FPU_SINGLE)
-    for (count = 0, reg = FIRST_FP_REG; reg <= LAST_FP_REG; reg += 2)
+    for (int count = 0, reg = FIRST_FP_REG; reg <= LAST_FP_REG; reg += 2)
       if (df_regs_ever_live_p (reg) && df_regs_ever_live_p (reg+1)
 	  && (! call_really_used_regs[reg]
 	      || interrupt_handler)
@@ -6947,20 +6963,22 @@ calc_live_regs (HARD_REG_SET *live_regs_mask)
 	  break;
 	}
 
-    {
-      rtx pr_initial = has_hard_reg_initial_val (Pmode, PR_REG);
-      pr_live = (pr_initial
+
+  rtx pr_initial = has_hard_reg_initial_val (Pmode, PR_REG);
+  bool pr_live = (pr_initial
 		 ? (!REG_P (pr_initial)
 		    || REGNO (pr_initial) != (PR_REG))
 		 : df_regs_ever_live_p (PR_REG));
-      /* For Shcompact, if not optimizing, we end up with a memory reference
-	 using the return address pointer for __builtin_return_address even
-	 though there is no actual need to put the PR register on the stack.  */
-      pr_live |= df_regs_ever_live_p (RETURN_ADDRESS_POINTER_REGNUM);
-    }
+  /* For Shcompact, if not optimizing, we end up with a memory reference
+     using the return address pointer for __builtin_return_address even
+     though there is no actual need to put the PR register on the stack.  */
+  pr_live |= df_regs_ever_live_p (RETURN_ADDRESS_POINTER_REGNUM);
+
   /* Force PR to be live if the prologue has to call the SHmedia
      argument decoder or register saver.  */
-  has_call = pr_live;
+  bool has_call = pr_live;
+
+  int count;
   for (count = 0, reg = FIRST_PSEUDO_REGISTER; reg-- != 0; )
     {
       if (reg == PR_REG
@@ -7064,68 +7082,11 @@ rounded_frame_size (int pushed)
   return ((size + pushed + align - 1) & -align) - pushed;
 }
 
-/* Choose a call-clobbered target-branch register that remains
-   unchanged along the whole function.  We set it up as the return
-   value in the prologue.  */
-int
-sh_media_register_for_return (void)
-{
-  int regno;
-  int tr0_used;
-
-  if (! crtl->is_leaf)
-    return -1;
-  if (lookup_attribute ("interrupt_handler",
-			DECL_ATTRIBUTES (current_function_decl)))
-    return -1;
-  if (sh_cfun_interrupt_handler_p ())
-    return -1;
-
-  tr0_used = flag_pic && df_regs_ever_live_p (PIC_OFFSET_TABLE_REGNUM);
-
-  for (regno = FIRST_TARGET_REG + tr0_used; regno <= LAST_TARGET_REG; regno++)
-    if (call_really_used_regs[regno] && ! df_regs_ever_live_p (regno))
-      return regno;
-
-  return -1;
-}
-
-/* The maximum registers we need to save are:
-   - 62 general purpose registers (r15 is stack pointer, r63 is zero)
-   - 32 floating point registers (for each pair, we save none,
-         one single precision value, or a double precision value).
-   -  8 target registers
-   -  add 1 entry for a delimiter.  */
-#define MAX_SAVED_REGS (62+32+8)
-
-typedef struct save_entry_s
-{
-  unsigned char reg;
-  unsigned char mode;
-  short offset;
-} save_entry;
-
-#define MAX_TEMPS 4
-
-/* There will be a delimiter entry with VOIDmode both at the start and the
-   end of a filled in schedule.  The end delimiter has the offset of the
-   save with the smallest (i.e. most negative) offset.  */
-typedef struct save_schedule_s
-{
-  save_entry entries[MAX_SAVED_REGS + 2];
-  int temps[MAX_TEMPS+1];
-} save_schedule;
-
 /* Expand code for the function prologue.  */
 void
 sh_expand_prologue (void)
 {
-  HARD_REG_SET live_regs_mask;
-  int d, i;
-  int d_rounding = 0;
   int save_flags = target_flags;
-  int pretend_args;
-  int stack_usage;
   tree sp_switch_attr
     = lookup_attribute ("sp_switch", DECL_ATTRIBUTES (current_function_decl));
 
@@ -7133,16 +7094,14 @@ sh_expand_prologue (void)
 
   /* We have pretend args if we had an object sent partially in registers
      and partially on the stack, e.g. a large structure.  */
-  pretend_args = crtl->args.pretend_args_size;
+  int pretend_args = crtl->args.pretend_args_size;
   if (TARGET_VARARGS_PRETEND_ARGS (current_function_decl)
       && (NPARM_REGS(SImode)
 	  > crtl->args.info.arg_count[(int) SH_ARG_INT]))
     pretend_args = 0;
 
-  output_stack_adjust (-pretend_args
-		       - crtl->args.info.stack_regs * 8,
-		       stack_pointer_rtx, 0, NULL, true);
-  stack_usage = pretend_args + crtl->args.info.stack_regs * 8;
+  output_stack_adjust (-pretend_args, stack_pointer_rtx, 0, NULL, true);
+  int stack_usage = pretend_args;
 
   /* Emit the code for SETUP_VARARGS.  */
   if (cfun->stdarg)
@@ -7150,7 +7109,7 @@ sh_expand_prologue (void)
       if (TARGET_VARARGS_PRETEND_ARGS (current_function_decl))
 	{
 	  /* Push arg regs as if they'd been provided by caller in stack.  */
-	  for (i = 0; i < NPARM_REGS(SImode); i++)
+	  for (int i = 0; i < NPARM_REGS(SImode); i++)
 	    {
 	      int rn = NPARM_REGS(SImode) + FIRST_PARM_REG - i - 1;
 
@@ -7171,8 +7130,7 @@ sh_expand_prologue (void)
       /* The argument specifies a variable holding the address of the
 	 stack the interrupt function should switch to/from at entry/exit.  */
       tree arg = TREE_VALUE ( TREE_VALUE (sp_switch_attr));
-      const char *s
-	= ggc_strdup (TREE_STRING_POINTER (arg));
+      const char* s = ggc_strdup (TREE_STRING_POINTER (arg));
       rtx sp_switch = gen_rtx_SYMBOL_REF (Pmode, s);
 
       lab = add_constant (sp_switch, SImode, 0);
@@ -7181,7 +7139,8 @@ sh_expand_prologue (void)
       emit_insn (gen_sp_switch_1 (newsrc));
     }
 
-  d = calc_live_regs (&live_regs_mask);
+  HARD_REG_SET live_regs_mask;
+  int d = calc_live_regs (&live_regs_mask);
   /* ??? Maybe we could save some switching if we can move a mode switch
      that already happens to be at the function start into the prologue.  */
   if (target_flags != save_flags && ! current_function_interrupt)
@@ -7199,12 +7158,12 @@ sh_expand_prologue (void)
 
   target_flags = save_flags;
 
-  output_stack_adjust (-rounded_frame_size (d) + d_rounding,
+  output_stack_adjust (-rounded_frame_size (d),
 		       stack_pointer_rtx, 0, NULL, true);
-  stack_usage += rounded_frame_size (d) - d_rounding;
+  stack_usage += rounded_frame_size (d);
 
   if (frame_pointer_needed)
-    frame_insn (GEN_MOV (hard_frame_pointer_rtx, stack_pointer_rtx));
+    emit_frame_insn (GEN_MOV (hard_frame_pointer_rtx, stack_pointer_rtx));
 
   /* If we are profiling, make sure no instructions are scheduled before
      the call to mcount.  Similarly if some call instructions are swapped
@@ -7221,19 +7180,15 @@ sh_expand_prologue (void)
 void
 sh_expand_epilogue (bool sibcall_p)
 {
-  HARD_REG_SET live_regs_mask;
-  int d, i;
-  int d_rounding = 0;
-
   int save_flags = target_flags;
-  int frame_size, save_size;
-  int fpscr_deferred = 0;
+  bool fpscr_deferred = false;
   int e = sibcall_p ? -1 : 1;
 
-  d = calc_live_regs (&live_regs_mask);
+  HARD_REG_SET live_regs_mask;
+  int d = calc_live_regs (&live_regs_mask);
 
-  save_size = d;
-  frame_size = rounded_frame_size (d);
+  int save_size = d;
+  int frame_size = rounded_frame_size (d);
 
   if (frame_pointer_needed)
     {
@@ -7248,7 +7203,7 @@ sh_expand_epilogue (bool sibcall_p)
 	 occur after the SP adjustment and clobber data in the local
 	 frame.  */
       emit_insn (gen_blockage ());
-      frame_insn (GEN_MOV (stack_pointer_rtx, hard_frame_pointer_rtx));
+      emit_frame_insn (GEN_MOV (stack_pointer_rtx, hard_frame_pointer_rtx));
     }
   else if (frame_size)
     {
@@ -7290,7 +7245,7 @@ sh_expand_epilogue (bool sibcall_p)
 	    {
 	      unsigned int count = 0;
 
-	      for (i = FIRST_BANKED_REG; i <= LAST_BANKED_REG; i++)
+	      for (int i = FIRST_BANKED_REG; i <= LAST_BANKED_REG; i++)
 		if (TEST_HARD_REG_BIT (live_regs_mask, i))
 		  count++;
 		else
@@ -7314,7 +7269,7 @@ sh_expand_epilogue (bool sibcall_p)
 	      emit_insn (gen_blockage ());
 	    }
 	  else
-	    for (i = LAST_BANKED_REG; i >= FIRST_BANKED_REG; i--)
+	    for (int i = LAST_BANKED_REG; i >= FIRST_BANKED_REG; i--)
 	      if (TEST_HARD_REG_BIT (live_regs_mask, i))
 		pop (i);
 
@@ -7323,14 +7278,14 @@ sh_expand_epilogue (bool sibcall_p)
       else
 	last_reg = FIRST_PSEUDO_REGISTER;
 
-      for (i = 0; i < last_reg; i++)
+      for (int i = 0; i < last_reg; i++)
 	{
 	  int j = (FIRST_PSEUDO_REGISTER - 1) - i;
 
 	  if (j == FPSCR_REG && current_function_interrupt && TARGET_FMOVD
 	      && hard_reg_set_intersect_p (live_regs_mask,
 					  reg_class_contents[DF_REGS]))
-	    fpscr_deferred = 1;
+	    fpscr_deferred = true;
 	  /* For an ISR with RESBANK attribute assigned, don't pop
 	     following registers, R0-R14, MACH, MACL and GBR.  */
 	  else if (j != PR_REG && TEST_HARD_REG_BIT (live_regs_mask, j) 
@@ -7350,9 +7305,7 @@ sh_expand_epilogue (bool sibcall_p)
     emit_insn (gen_toggle_sz ());
   target_flags = save_flags;
 
-  output_stack_adjust (crtl->args.pretend_args_size
-		       + save_size + d_rounding
-		       + crtl->args.info.stack_regs * 8,
+  output_stack_adjust (crtl->args.pretend_args_size + save_size,
 		       stack_pointer_rtx, e, NULL, true);
 
   if (crtl->calls_eh_return)
@@ -7379,8 +7332,7 @@ sh_set_return_address (rtx ra, rtx tmp)
   HARD_REG_SET live_regs_mask;
   int d = calc_live_regs (&live_regs_mask);
 
-  /* If pr_reg isn't life, we can set it (or the register given in
-     sh_media_register_for_return) directly.  */
+  /* If pr_reg isn't life, we can set it directly.  */
   if (! TEST_HARD_REG_BIT (live_regs_mask, PR_REG))
     {
       rtx rr = gen_rtx_REG (SImode, PR_REG);
@@ -7427,7 +7379,7 @@ sh_builtin_saveregs (void)
   int bufsize, regno;
   alias_set_type alias_set;
 
-  if (! TARGET_SH2E && ! TARGET_SH4)
+  if (!TARGET_FPU_ANY)
     {
       error ("__builtin_saveregs not supported by this subtarget");
       return const0_rtx;
@@ -7668,30 +7620,26 @@ static tree
 sh_gimplify_va_arg_expr (tree valist, tree type, gimple_seq *pre_p,
 			 gimple_seq *post_p ATTRIBUTE_UNUSED)
 {
-  HOST_WIDE_INT size, rsize;
-  tree tmp, pptr_type_node;
+  tree tmp;
   tree addr, lab_over = NULL, result = NULL;
-  bool pass_by_ref;
   tree eff_type;
 
-  if (!VOID_TYPE_P (type))
-    pass_by_ref = targetm.calls.must_pass_in_stack (TYPE_MODE (type), type);
-  else
-    pass_by_ref = false;
+  const bool pass_by_ref =
+    !VOID_TYPE_P (type)
+    && targetm.calls.must_pass_in_stack (TYPE_MODE (type), type);
 
   if (pass_by_ref)
     type = build_pointer_type (type);
 
-  size = int_size_in_bytes (type);
-  rsize = (size + UNITS_PER_WORD - 1) & -UNITS_PER_WORD;
-  pptr_type_node = build_pointer_type (ptr_type_node);
+  HOST_WIDE_INT size = int_size_in_bytes (type);
+  HOST_WIDE_INT rsize = (size + UNITS_PER_WORD - 1) & -UNITS_PER_WORD;
+  tree pptr_type_node = build_pointer_type (ptr_type_node);
 
   if ((TARGET_SH2E || TARGET_SH4)
       && ! (TARGET_HITACHI || sh_cfun_attr_renesas_p ()))
     {
       tree f_next_o, f_next_o_limit, f_next_fp, f_next_fp_limit, f_next_stack;
       tree next_o, next_o_limit, next_fp, next_fp_limit, next_stack;
-      int pass_as_float;
       tree lab_false;
       tree member;
 
@@ -7736,6 +7684,7 @@ sh_gimplify_va_arg_expr (tree valist, tree type, gimple_seq *pre_p,
 	    }
 	}
 
+      bool pass_as_float;
       if (TARGET_FPU_DOUBLE)
 	{
 	  pass_as_float = ((TREE_CODE (eff_type) == REAL_TYPE && size <= 8)
@@ -7949,6 +7898,20 @@ sh_callee_copies (cumulative_args_t cum, machine_mode mode,
 	      % SH_MIN_ALIGN_FOR_CALLEE_COPY == 0));
 }
 
+static sh_arg_class
+get_sh_arg_class (machine_mode mode)
+{
+  if (TARGET_FPU_ANY && mode == SFmode)
+    return SH_ARG_FLOAT;
+
+  if (TARGET_FPU_DOUBLE
+      && (GET_MODE_CLASS (mode) == MODE_FLOAT
+	  || GET_MODE_CLASS (mode) == MODE_COMPLEX_FLOAT))
+    return SH_ARG_FLOAT;
+
+  return SH_ARG_INT;
+}
+
 /* Round a register number up to a proper boundary for an arg of mode
    MODE.
    The SH doesn't care about double alignment, so we only
@@ -7964,9 +7927,9 @@ sh_round_reg (const CUMULATIVE_ARGS& cum, machine_mode mode)
 	  && (mode == DFmode || mode == DCmode)
 	  && cum.arg_count[(int) SH_ARG_FLOAT] < NPARM_REGS (mode)))
      && GET_MODE_UNIT_SIZE (mode) > UNITS_PER_WORD)
-    ? (cum.arg_count[(int) GET_SH_ARG_CLASS (mode)]
-       + (cum.arg_count[(int) GET_SH_ARG_CLASS (mode)] & 1))
-    : cum.arg_count[(int) GET_SH_ARG_CLASS (mode)]);
+    ? (cum.arg_count[(int) get_sh_arg_class (mode)]
+       + (cum.arg_count[(int) get_sh_arg_class (mode)] & 1))
+    : cum.arg_count[(int) get_sh_arg_class (mode)]);
 }
 
 /* Return true if arg of the specified mode should be passed in a register
@@ -8094,7 +8057,7 @@ sh_function_arg_advance (cumulative_args_t ca_v, machine_mode mode,
   CUMULATIVE_ARGS *ca = get_cumulative_args (ca_v);
 
   if (ca->force_mem)
-    ca->force_mem = 0;
+    ca->force_mem = false;
 
   if ((TARGET_HITACHI || ca->renesas_abi) && TARGET_FPU_DOUBLE)
     {
@@ -8118,7 +8081,7 @@ sh_function_arg_advance (cumulative_args_t ca_v, machine_mode mode,
 
   if (! ((TARGET_SH4 || TARGET_SH2A) || ca->renesas_abi)
       || sh_pass_in_reg_p (*ca, mode, type))
-    (ca->arg_count[(int) GET_SH_ARG_CLASS (mode)]
+    (ca->arg_count[(int) get_sh_arg_class (mode)]
      = (sh_round_reg (*ca, mode)
 	+ (mode == BLKmode
 	   ? CEIL (int_size_in_bytes (type), UNITS_PER_WORD)
@@ -8235,27 +8198,22 @@ sh_pretend_outgoing_varargs_named (cumulative_args_t ca_v)
 int
 initial_elimination_offset (int from, int to)
 {
-  int regs_saved;
-  int regs_saved_rounding = 0;
-  int total_saved_regs_space;
-  int total_auto_space;
+  const int regs_saved_rounding = 0;
   int save_flags = target_flags;
   HARD_REG_SET live_regs_mask;
 
-  regs_saved = calc_live_regs (&live_regs_mask);
+  int regs_saved = calc_live_regs (&live_regs_mask);
 
-  total_auto_space = rounded_frame_size (regs_saved) - regs_saved_rounding;
+  int total_auto_space = rounded_frame_size (regs_saved) - regs_saved_rounding;
   target_flags = save_flags;
 
-  total_saved_regs_space = regs_saved + regs_saved_rounding;
+  int total_saved_regs_space = regs_saved + regs_saved_rounding;
 
   if (from == ARG_POINTER_REGNUM && to == HARD_FRAME_POINTER_REGNUM)
-    return total_saved_regs_space + total_auto_space
-	   + crtl->args.info.byref_regs * 8;
+    return total_saved_regs_space + total_auto_space;
 
   if (from == ARG_POINTER_REGNUM && to == STACK_POINTER_REGNUM)
-    return total_saved_regs_space + total_auto_space
-	   + crtl->args.info.byref_regs * 8;
+    return total_saved_regs_space + total_auto_space;
 
   /* Initial gap between fp and sp is 0.  */
   if (from == HARD_FRAME_POINTER_REGNUM && to == STACK_POINTER_REGNUM)
@@ -8277,39 +8235,34 @@ initial_elimination_offset (int from, int to)
 void
 sh_fix_range (const char *const_str)
 {
-  int i, first, last;
-  char *str, *dash, *comma;
-
   /* str must be of the form REG1'-'REG2{,REG1'-'REG} where REG1 and
      REG2 are either register names or register numbers.  The effect
      of this option is to mark the registers in the range from REG1 to
      REG2 as ``fixed'' so they won't be used by the compiler.  */
 
-  i = strlen (const_str);
-  str = (char *) alloca (i + 1);
-  memcpy (str, const_str, i + 1);
+  char* str = strcpy ((char*)alloca (strlen (const_str) + 1), const_str);
 
   while (1)
     {
-      dash = strchr (str, '-');
+      char* dash = strchr (str, '-');
       if (!dash)
 	{
 	  warning (0, "value of -mfixed-range must have form REG1-REG2");
 	  return;
 	}
       *dash = '\0';
-      comma = strchr (dash + 1, ',');
+      char* comma = strchr (dash + 1, ',');
       if (comma)
 	*comma = '\0';
 
-      first = decode_reg_name (str);
+      int first = decode_reg_name (str);
       if (first < 0)
 	{
 	  warning (0, "unknown register name: %s", str);
 	  return;
 	}
 
-      last = decode_reg_name (dash + 1);
+      int last = decode_reg_name (dash + 1);
       if (last < 0)
 	{
 	  warning (0, "unknown register name: %s", dash + 1);
@@ -8324,7 +8277,7 @@ sh_fix_range (const char *const_str)
 	  return;
 	}
 
-      for (i = first; i <= last; ++i)
+      for (int i = first; i <= last; ++i)
 	fixed_regs[i] = call_used_regs[i] = 1;
 
       if (!comma)
@@ -8339,8 +8292,6 @@ sh_fix_range (const char *const_str)
 static void
 sh_insert_attributes (tree node, tree *attributes)
 {
-  tree attrs;
-
   if (TREE_CODE (node) != FUNCTION_DECL)
     return;
 
@@ -8350,7 +8301,7 @@ sh_insert_attributes (tree node, tree *attributes)
 
   /* Append the attributes to the deferred attributes.  */
   *sh_deferred_function_attributes_tail = *attributes;
-  attrs = sh_deferred_function_attributes;
+  tree attrs = sh_deferred_function_attributes;
   if (!attrs)
     return;
 
@@ -8545,28 +8496,17 @@ sh2a_is_function_vector_call (rtx x)
 int
 sh2a_get_function_vector_number (rtx x)
 {
-  int num;
-  tree list, t;
-
   if ((GET_CODE (x) == SYMBOL_REF)
       && (SYMBOL_REF_FLAGS (x) & SYMBOL_FLAG_FUNCVEC_FUNCTION))
     {
-      t = SYMBOL_REF_DECL (x);
+      tree t = SYMBOL_REF_DECL (x);
 
       if (TREE_CODE (t) != FUNCTION_DECL)
 	return 0;
 
-      list = SH_ATTRIBUTES (t);
-      while (list)
-	{
-	  if (is_attribute_p ("function_vector", TREE_PURPOSE (list)))
-	    {
-	      num = TREE_INT_CST_LOW (TREE_VALUE (TREE_VALUE (list)));
-	      return num;
-	    }
-
-	  list = TREE_CHAIN (list);
-	}
+      for (tree list = SH_ATTRIBUTES (t); list; list = TREE_CHAIN (list))
+	if (is_attribute_p ("function_vector", TREE_PURPOSE (list)))
+	  return TREE_INT_CST_LOW (TREE_VALUE (TREE_VALUE (list)));
 
       return 0;
     }
@@ -8644,8 +8584,7 @@ sh_attr_renesas_p (const_tree td)
     td = TREE_TYPE (td);
   if (td == error_mark_node)
     return false;
-  return (lookup_attribute ("renesas", TYPE_ATTRIBUTES (td))
-	  != NULL_TREE);
+  return lookup_attribute ("renesas", TYPE_ATTRIBUTES (td)) != NULL_TREE;
 }
 
 /* True if __attribute__((renesas)) or -mrenesas, for the current
@@ -8671,18 +8610,13 @@ sh_cfun_interrupt_handler_p (void)
 bool
 sh2a_function_vector_p (tree func)
 {
-  tree list;
   if (TREE_CODE (func) != FUNCTION_DECL)
     return false;
 
-  list = SH_ATTRIBUTES (func);
-  while (list)
-    {
-      if (is_attribute_p ("function_vector", TREE_PURPOSE (list)))
-	return true;
+  for (tree list = SH_ATTRIBUTES (func); list; list = TREE_CHAIN (list))
+    if (is_attribute_p ("function_vector", TREE_PURPOSE (list)))
+      return true;
 
-      list = TREE_CHAIN (list);
-    }
   return false;
 }
 
@@ -8742,12 +8676,10 @@ system_reg_operand (rtx op, machine_mode mode ATTRIBUTE_UNUSED)
 bool
 fp_zero_operand (rtx op)
 {
-  const REAL_VALUE_TYPE *r;
-
   if (GET_MODE (op) != SFmode)
     return false;
 
-  r = CONST_DOUBLE_REAL_VALUE (op);
+  const REAL_VALUE_TYPE* r = CONST_DOUBLE_REAL_VALUE (op);
   return real_equal (r, &dconst0) && ! REAL_VALUE_MINUS_ZERO (*r);
 }
 
@@ -8775,13 +8707,11 @@ static int
 branch_dest (rtx branch)
 {
   rtx dest = SET_SRC (PATTERN (branch));
-  int dest_uid;
 
   if (GET_CODE (dest) == IF_THEN_ELSE)
     dest = XEXP (dest, 1);
-  dest = XEXP (dest, 0);
-  dest_uid = INSN_UID (dest);
-  return INSN_ADDRESSES (dest_uid);
+
+  return INSN_ADDRESSES (INSN_UID (XEXP (dest, 0)));
 }
 
 /* Return nonzero if REG is not used after INSN.
@@ -8790,24 +8720,20 @@ branch_dest (rtx branch)
 bool
 reg_unused_after (rtx reg, rtx_insn *insn)
 {
-  enum rtx_code code;
-  rtx set;
-
   /* If the reg is set by this instruction, then it is safe for our
      case.  Disregard the case where this is a store to memory, since
      we are checking a register used in the store address.  */
-  set = single_set (insn);
+  rtx set = single_set (insn);
   if (set && !MEM_P (SET_DEST (set))
       && reg_overlap_mentioned_p (reg, SET_DEST (set)))
     return true;
 
   while ((insn = NEXT_INSN (insn)))
     {
-      rtx set;
       if (!INSN_P (insn))
 	continue;
 
-      code = GET_CODE (insn);
+      rtx_code code = GET_CODE (insn);
 
 #if 0
       /* If this is a label that existed before reload, then the register
@@ -8829,10 +8755,9 @@ reg_unused_after (rtx reg, rtx_insn *insn)
       else if (code == INSN && GET_CODE (PATTERN (insn)) == SEQUENCE)
 	{
 	  rtx_sequence *seq = as_a <rtx_sequence *> (PATTERN (insn));
-	  int i;
-	  int retval = 0;
+	  bool retval = false;
 
-	  for (i = 0; i < seq->len (); i++)
+	  for (int i = 0; i < seq->len (); i++)
 	    {
 	      rtx_insn *this_insn = seq->insn (i);
 	      rtx set = single_set (this_insn);
@@ -8859,18 +8784,18 @@ reg_unused_after (rtx reg, rtx_insn *insn)
 		  && reg_overlap_mentioned_p (reg, PATTERN (this_insn)))
 		return false;
 	    }
-	  if (retval == 1)
+	  if (retval)
 	    return true;
 	  else if (code == JUMP_INSN)
 	    return false;
 	}
 
-      set = single_set (insn);
+      rtx set = single_set (insn);
       if (set && reg_overlap_mentioned_p (reg, SET_SRC (set)))
 	return false;
       if (set && reg_overlap_mentioned_p (reg, SET_DEST (set)))
 	return !MEM_P (SET_DEST (set));
-      if (set == 0 && reg_overlap_mentioned_p (reg, PATTERN (insn)))
+      if (set == NULL && reg_overlap_mentioned_p (reg, PATTERN (insn)))
 	return false;
 
       if (code == CALL_INSN && call_really_used_regs[REGNO (reg)])
@@ -8894,13 +8819,9 @@ static GTY(()) tree fpscr_values;
 static void
 emit_fpu_switch (rtx scratch, int index)
 {
-  rtx src;
-
   if (fpscr_values == NULL)
     {
-      tree t;
-
-      t = build_index_type (integer_one_node);
+      tree t = build_index_type (integer_one_node);
       t = build_array_type (integer_type_node, t);
       t = build_decl (BUILTINS_LOCATION,
 		      VAR_DECL, get_identifier ("__fpscr_values"), t);
@@ -8914,7 +8835,7 @@ emit_fpu_switch (rtx scratch, int index)
       fpscr_values = t;
     }
 
-  src = DECL_RTL (fpscr_values);
+  rtx src = DECL_RTL (fpscr_values);
   if (!can_create_pseudo_p ())
     {
       emit_move_insn (scratch, XEXP (src, 0));
@@ -8962,9 +8883,8 @@ fpscr_set_from_mem (int mode, HARD_REG_SET regs_live)
 {
   enum attr_fp_mode fp_mode = (enum attr_fp_mode) mode;
   enum attr_fp_mode norm_mode = ACTUAL_NORMAL_MODE (FP_MODE);
-  rtx addr_reg;
 
-  addr_reg = !can_create_pseudo_p () ? get_free_reg (regs_live) : NULL_RTX;
+  rtx addr_reg = !can_create_pseudo_p () ? get_free_reg (regs_live) : NULL_RTX;
   emit_fpu_switch (addr_reg, fp_mode == norm_mode);
 }
 
@@ -8976,13 +8896,11 @@ fpscr_set_from_mem (int mode, HARD_REG_SET regs_live)
 static bool
 sequence_insn_p (rtx_insn *insn)
 {
-  rtx_insn *prev, *next;
-
-  prev = PREV_INSN (insn);
+  rtx_insn* prev = PREV_INSN (insn);
   if (prev == NULL)
     return false;
 
-  next = NEXT_INSN (prev);
+  rtx_insn* next = NEXT_INSN (prev);
   if (next == NULL)
     return false;
 
@@ -9146,9 +9064,6 @@ sh_legitimate_address_p (machine_mode mode, rtx x, bool strict)
 bool
 nonpic_symbol_mentioned_p (rtx x)
 {
-  const char *fmt;
-  int i;
-
   if (GET_CODE (x) == SYMBOL_REF || GET_CODE (x) == LABEL_REF
       || GET_CODE (x) == PC)
     return true;
@@ -9174,13 +9089,12 @@ nonpic_symbol_mentioned_p (rtx x)
 	  || XINT (x, 1) == UNSPEC_GOTOFFFUNCDESC))
     return false;
 
-  fmt = GET_RTX_FORMAT (GET_CODE (x));
-  for (i = GET_RTX_LENGTH (GET_CODE (x)) - 1; i >= 0; i--)
+  const char* fmt = GET_RTX_FORMAT (GET_CODE (x));
+  for (int i = GET_RTX_LENGTH (GET_CODE (x)) - 1; i >= 0; i--)
     {
       if (fmt[i] == 'E')
 	{
-	  int j;
-	  for (j = XVECLEN (x, i) - 1; j >= 0; j--)
+	  for (int j = XVECLEN (x, i) - 1; j >= 0; j--)
 	    if (nonpic_symbol_mentioned_p (XVECEXP (x, i, j)))
 	      return true;
 	}
@@ -9194,8 +9108,7 @@ nonpic_symbol_mentioned_p (rtx x)
 /* Convert a non-PIC address in `orig' to a PIC address using @GOT or
    @GOTOFF in `reg'.  */
 rtx
-legitimize_pic_address (rtx orig, machine_mode mode ATTRIBUTE_UNUSED,
-			rtx reg)
+legitimize_pic_address (rtx orig, machine_mode mode ATTRIBUTE_UNUSED, rtx reg)
 {
   if (tls_symbolic_operand (orig, Pmode) != TLS_MODEL_NONE)
     return orig;
@@ -9411,16 +9324,14 @@ sh_legitimize_reload_address (rtx *p, machine_mode mode, int opnum,
 static rtx
 sh_delegitimize_address (rtx orig_x)
 {
-  rtx x, y;
-
   orig_x = delegitimize_mem_from_attrs (orig_x);
 
-  x = orig_x;
+  rtx x = orig_x;
   if (MEM_P (x))
     x = XEXP (x, 0);
   if (GET_CODE (x) == CONST)
     {
-      y = XEXP (x, 0);
+      rtx y = XEXP (x, 0);
       if (GET_CODE (y) == UNSPEC)
 	{
 	  if (XINT (y, 1) == UNSPEC_GOT
@@ -9450,9 +9361,6 @@ sh_delegitimize_address (rtx orig_x)
 static rtx
 mark_constant_pool_use (rtx x)
 {
-  rtx_insn *insn, *lab;
-  rtx pattern;
-
   if (x == NULL_RTX)
     return x;
 
@@ -9468,8 +9376,8 @@ mark_constant_pool_use (rtx x)
 
   /* Get the first label in the list of labels for the same constant
      and delete another labels in the list.  */
-  lab = as_a <rtx_insn *> (x);
-  for (insn = PREV_INSN (lab); insn; insn = PREV_INSN (insn))
+  rtx_insn* lab = as_a <rtx_insn*> (x);
+  for (rtx_insn* insn = PREV_INSN (lab); insn; insn = PREV_INSN (insn))
     {
       if (!LABEL_P (insn)
 	  || LABEL_REFS (insn) != NEXT_INSN (insn))
@@ -9481,12 +9389,13 @@ mark_constant_pool_use (rtx x)
     as_a<rtx_insn *> (insn)->set_deleted ();
 
   /* Mark constants in a window.  */
-  for (insn = NEXT_INSN (as_a <rtx_insn *> (x)); insn; insn = NEXT_INSN (insn))
+  for (rtx_insn* insn = NEXT_INSN (as_a <rtx_insn *> (x)); insn;
+       insn = NEXT_INSN (insn))
     {
       if (!NONJUMP_INSN_P (insn))
 	continue;
 
-      pattern = PATTERN (insn);
+      rtx pattern = PATTERN (insn);
       if (GET_CODE (pattern) != UNSPEC_VOLATILE)
 	continue;
 
@@ -9581,14 +9490,11 @@ sh_adjust_cost (rtx_insn *insn, rtx link ATTRIBUTE_UNUSED,
 
   if (REG_NOTE_KIND (link) == 0)
     {
-      enum attr_type type;
-      rtx dep_set;
-
       if (recog_memoized (insn) < 0
 	  || recog_memoized (dep_insn) < 0)
 	return cost;
 
-      dep_set = single_set (dep_insn);
+      rtx dep_set = single_set (dep_insn);
 
       /* The latency that we specify in the scheduling description refers
 	 to the actual output, not to an auto-increment register; for that,
@@ -9634,8 +9540,8 @@ sh_adjust_cost (rtx_insn *insn, rtx link ATTRIBUTE_UNUSED,
 	}
       if (TARGET_HARD_SH4 && !TARGET_SH4_300)
 	{
-	  enum attr_type dep_type = get_attr_type (dep_insn);
-
+	  attr_type dep_type = get_attr_type (dep_insn);
+	  attr_type type;
 	  if (dep_type == TYPE_FLOAD || dep_type == TYPE_PCFLOAD)
 	    cost--;
 	  else if ((dep_type == TYPE_LOAD_SI || dep_type == TYPE_PCLOAD_SI)
@@ -9675,6 +9581,7 @@ sh_adjust_cost (rtx_insn *insn, rtx link ATTRIBUTE_UNUSED,
       else if (TARGET_SH4_300)
 	{
 	  /* Stores need their input register two cycles later.  */
+	  attr_type type;
 	  if (dep_set && cost >= 1
 	      && ((type = get_attr_type (insn)) == TYPE_STORE
 		  || type == TYPE_PSTORE
@@ -9794,12 +9701,9 @@ find_set_regmode_weight (rtx x, machine_mode mode)
 static short
 find_insn_regmode_weight (rtx insn, machine_mode mode)
 {
-  short reg_weight = 0;
-  rtx x;
-
   /* Increment weight for each register born here.  */
-  x = PATTERN (insn);
-  reg_weight += find_set_regmode_weight (x, mode);
+  rtx x = PATTERN (insn);
+  short reg_weight = find_set_regmode_weight (x, mode);
   if (GET_CODE (x) == PARALLEL)
     {
       int j;
@@ -9894,27 +9798,24 @@ ready_reorder (rtx_insn **ready, int nready)
 static int
 find_r0_life_regions (basic_block b)
 {
-  rtx_insn *end, *insn;
-  rtx pset;
-  rtx r0_reg;
-  int live;
+  bool live;
   int set;
   int death = 0;
 
   if (REGNO_REG_SET_P (df_get_live_in (b), R0_REG))
     {
       set = 1;
-      live = 1;
+      live = true;
     }
   else
     {
       set = 0;
-      live = 0;
+      live = false;
     }
 
-  insn = BB_HEAD (b);
-  end = BB_END (b);
-  r0_reg = gen_rtx_REG (SImode, R0_REG);
+  rtx_insn* insn = BB_HEAD (b);
+  rtx_insn* end = BB_END (b);
+  rtx r0_reg = gen_rtx_REG (SImode, R0_REG);
   while (1)
     {
       if (INSN_P (insn))
@@ -9922,15 +9823,17 @@ find_r0_life_regions (basic_block b)
 	  if (find_regno_note (insn, REG_DEAD, R0_REG))
 	    {
 	      death++;
-	      live = 0;
+	      live = false;
 	    }
+
+	  rtx pset;
 	  if (!live
 	      && (pset = single_set (insn))
 	      && reg_overlap_mentioned_p (r0_reg, SET_DEST (pset))
 	      && !find_regno_note (insn, REG_UNUSED, R0_REG))
 	    {
 	      set++;
-	      live = 1;
+	      live = true;
 	    }
 	}
       if (insn == end)
@@ -10223,10 +10126,6 @@ sh_trampoline_adjust_address (rtx tramp)
   return tramp;
 }
 
-/* FIXME: This is overly conservative.  A SHcompact function that
-   receives arguments ``by reference'' will have them stored in its
-   own stack frame, so it must not pass pointers or references to
-   these arguments to other functions by means of sibling calls.  */
 /* If PIC, we cannot make sibling calls to global functions
    because the PLT requires r12 to be live.  */
 static bool
@@ -10566,15 +10465,14 @@ sh_expand_builtin (tree exp, rtx target, rtx subtarget ATTRIBUTE_UNUSED,
 
   for (int i = 1; i <= 3; i++, nop++)
     {
-      tree arg;
-      machine_mode opmode, argmode;
-      tree optype;
-
       if (! signature_args[signature][i])
 	break;
-      arg = CALL_EXPR_ARG (exp, i - 1);
+      tree arg = CALL_EXPR_ARG (exp, i - 1);
       if (arg == error_mark_node)
 	return const0_rtx;
+
+      machine_mode opmode;
+      tree optype;
       if (signature_args[signature][i] & 8)
 	{
 	  opmode = ptr_mode;
@@ -10585,7 +10483,8 @@ sh_expand_builtin (tree exp, rtx target, rtx subtarget ATTRIBUTE_UNUSED,
 	  opmode = insn_data[icode].operand[nop].mode;
 	  optype = (*lang_hooks.types.type_for_mode) (opmode, 0);
 	}
-      argmode = TYPE_MODE (TREE_TYPE (arg));
+
+      machine_mode argmode = TYPE_MODE (TREE_TYPE (arg));
       if (argmode != opmode)
 	arg = build1 (NOP_EXPR, optype, arg);
       op[nop] = expand_expr (arg, NULL_RTX, opmode, EXPAND_NORMAL);
@@ -11095,12 +10994,12 @@ function_symbol (rtx target, const char *name, sh_function_kind kind)
   return function_symbol_result (sym, lab);
 }
 
-/* Find the number of a general purpose register in S.  */
+/* Find the number of the first general purpose register in S that
+   is not set.  */
 static int
 scavenge_reg (HARD_REG_SET *s)
 {
-  int r;
-  for (r = FIRST_GENERAL_REG; r <= LAST_GENERAL_REG; r++)
+  for (int r = FIRST_GENERAL_REG; r <= LAST_GENERAL_REG; r++)
     if (TEST_HARD_REG_BIT (*s, r))
       return r;
   return -1;
@@ -11133,14 +11032,13 @@ sh_expand_t_scc (rtx operands[])
   rtx op0 = operands[2];
   rtx op1 = operands[3];
   rtx result = target;
-  HOST_WIDE_INT val;
 
   if (!REG_P (op0) || REGNO (op0) != T_REG
       || !CONST_INT_P (op1))
     return false;
   if (!REG_P (result))
     result = gen_reg_rtx (SImode);
-  val = INTVAL (op1);
+  HOST_WIDE_INT val = INTVAL (op1);
   if ((code == EQ && val == 1) || (code == NE && val == 0))
     emit_insn (gen_movt (result, get_t_reg_rtx ()));
   else if ((code == EQ && val == 0) || (code == NE && val == 1))
@@ -11158,14 +11056,11 @@ sh_expand_t_scc (rtx operands[])
 static rtx
 extract_sfunc_addr (rtx insn)
 {
-  rtx pattern, part = NULL_RTX;
-  int len, i;
-
-  pattern = PATTERN (insn);
-  len = XVECLEN (pattern, 0);
-  for (i = 0; i < len; i++)
+  rtx pattern = PATTERN (insn);
+  const int len = XVECLEN (pattern, 0);
+  for (int i = 0; i < len; i++)
     {
-      part = XVECEXP (pattern, 0, i);
+      rtx part = XVECEXP (pattern, 0, i);
       if (GET_CODE (part) == USE && GET_MODE (XEXP (part, 0)) == Pmode
 	  && GENERAL_REGISTER_P (true_regnum (XEXP (part, 0))))
 	return XEXP (part, 0);
@@ -11250,13 +11145,10 @@ sh_init_cumulative_args (CUMULATIVE_ARGS *  pcum,
 {
   pcum->arg_count [(int) SH_ARG_FLOAT] = 0;
   pcum->free_single_fp_reg = 0;
-  pcum->stack_regs = 0;
-  pcum->byref_regs = 0;
-  pcum->byref = 0;
-  pcum->outgoing = (n_named_args == -1) ? 0 : 1;
+  pcum->outgoing = n_named_args != -1;
 
-  /* XXX - Should we check TARGET_HITACHI here ???  */
-  pcum->renesas_abi = sh_attr_renesas_p (fntype) ? 1 : 0;
+  /* FIXME: Should we check TARGET_HITACHI here ???  */
+  pcum->renesas_abi = sh_attr_renesas_p (fntype);
 
   if (fntype)
     {
@@ -11268,7 +11160,7 @@ sh_init_cumulative_args (CUMULATIVE_ARGS *  pcum,
   else
     {
       pcum->arg_count [(int) SH_ARG_INT] = 0;
-      pcum->prototype_p = FALSE;
+      pcum->prototype_p = false;
       if (mode != VOIDmode)
 	{
 	  /* If the default ABI is the Renesas ABI then all library
@@ -11287,7 +11179,7 @@ sh_init_cumulative_args (CUMULATIVE_ARGS *  pcum,
 			 && TARGET_FPU_DOUBLE)));
 	}
       else
-	pcum->force_mem = FALSE;
+	pcum->force_mem = false;
     }
 }
 
@@ -11543,8 +11435,7 @@ sh_movsf_ie_ra_split_p (rtx op0, rtx op1, rtx op2)
 static void
 sh_conditional_register_usage (void)
 {
-  int regno;
-  for (regno = 0; regno < FIRST_PSEUDO_REGISTER; regno ++)
+  for (int regno = 0; regno < FIRST_PSEUDO_REGISTER; regno ++)
     if (! VALID_REGISTER_P (regno))
       fixed_regs[regno] = call_used_regs[regno] = 1;
   /* R8 and R9 are call-clobbered on SH5, but not on earlier SH ABIs.  */
@@ -11566,7 +11457,7 @@ sh_conditional_register_usage (void)
       call_really_used_regs[MACL_REG] = 0;
     }
 
-  for (regno = FIRST_GENERAL_REG; regno <= LAST_GENERAL_REG; regno++)
+  for (int regno = FIRST_GENERAL_REG; regno <= LAST_GENERAL_REG; regno++)
     if (! fixed_regs[regno] && call_really_used_regs[regno])
       SET_HARD_REG_BIT (reg_class_contents[SIBCALL_REGS], regno);
 
@@ -11614,9 +11505,6 @@ sh_init_sync_libfuncs (void)
 bool
 sh_can_use_simple_return_p (void)
 {
-  HARD_REG_SET live_regs_mask;
-  int d;
-
   if (! reload_completed || frame_pointer_needed)
     return false;
 
@@ -11625,7 +11513,8 @@ sh_can_use_simple_return_p (void)
     return false;
 
   /* Finally, allow for pr save.  */
-  d = calc_live_regs (&live_regs_mask);
+  HARD_REG_SET live_regs_mask;
+  int d = calc_live_regs (&live_regs_mask);
 
   if (rounded_frame_size (d) > 4)
    return false;
diff --git a/gcc/config/sh/sh.h b/gcc/config/sh/sh.h
index 60c625028c1..a1a789fba66 100644
--- a/gcc/config/sh/sh.h
+++ b/gcc/config/sh/sh.h
@@ -201,7 +201,7 @@ extern int code_for_indirect_jump_scratch;
   SUBTARGET_EXTRA_SPECS
 
 #if TARGET_CPU_DEFAULT & MASK_HARD_SH4
-#define SUBTARGET_ASM_RELAX_SPEC "%{!m1:%{!m2:%{!m3*::-isa=sh4-up}}}"
+#define SUBTARGET_ASM_RELAX_SPEC "%{!m1:%{!m2:%{!m3*:-isa=sh4-up}}}"
 #else
 #define SUBTARGET_ASM_RELAX_SPEC "%{m4*:-isa=sh4-up}"
 #endif
@@ -245,7 +245,7 @@ extern int code_for_indirect_jump_scratch;
 /* Strict nofpu means that the compiler should tell the assembler
    to reject FPU instructions. E.g. from ASM inserts.  */
 #if TARGET_CPU_DEFAULT & MASK_HARD_SH4 && !(TARGET_CPU_DEFAULT & MASK_SH_E)
-#define SUBTARGET_ASM_ISA_SPEC "%{!m1:%{!m2:%{!m3*:%{m4-nofpu|!m4*::-isa=sh4-nofpu}}}}"
+#define SUBTARGET_ASM_ISA_SPEC "%{!m1:%{!m2:%{!m3*:%{m4-nofpu|!m4*:-isa=sh4-nofpu}}}}"
 #else
 
 #define SUBTARGET_ASM_ISA_SPEC \
@@ -1154,6 +1154,8 @@ extern enum reg_class regno_reg_class[FIRST_PSEUDO_REGISTER];
        && (unsigned) (REGNO) < (unsigned) (FIRST_FP_PARM_REG		\
 					   + NPARM_REGS (SFmode))))
 
+#ifdef __cplusplus
+
 /* Define a data type for recording info about an argument list
    during the scan of that argument list.  This data type should
    hold all necessary information about the function itself
@@ -1164,48 +1166,37 @@ extern enum reg_class regno_reg_class[FIRST_PSEUDO_REGISTER];
    of arguments scanned so far (including the invisible argument,
    if any, which holds the structure-value-address).
    Thus NARGREGS or more means all following args should go on the stack.  */
+
 enum sh_arg_class { SH_ARG_INT = 0, SH_ARG_FLOAT = 1 };
-struct sh_args {
-    int arg_count[2];
-    int force_mem;
+
+struct sh_args
+{
+  /* How many SH_ARG_INT and how many SH_ARG_FLOAT args there are.  */
+  int arg_count[2];
+
+  bool force_mem;
+
   /* Nonzero if a prototype is available for the function.  */
-    int prototype_p;
+  bool prototype_p;
+
   /* The number of an odd floating-point register, that should be used
      for the next argument of type float.  */
-    int free_single_fp_reg;
+  int free_single_fp_reg;
+
   /* Whether we're processing an outgoing function call.  */
-    int outgoing;
-  /* The number of general-purpose registers that should have been
-     used to pass partial arguments, that are passed totally on the
-     stack.  On SHcompact, a call trampoline will pop them off the
-     stack before calling the actual function, and, if the called
-     function is implemented in SHcompact mode, the incoming arguments
-     decoder will push such arguments back onto the stack.  For
-     incoming arguments, STACK_REGS also takes into account other
-     arguments passed by reference, that the decoder will also push
-     onto the stack.  */
-    int stack_regs;
-  /* The number of general-purpose registers that should have been
-     used to pass arguments, if the arguments didn't have to be passed
-     by reference.  */
-    int byref_regs;
-  /* Set as by shcompact_byref if the current argument is to be passed
-     by reference.  */
-    int byref;
+  bool outgoing;
 
   /* This is set to nonzero when the call in question must use the Renesas ABI,
      even without the -mrenesas option.  */
-    int renesas_abi;
+  bool renesas_abi;
 };
 
-#define CUMULATIVE_ARGS  struct sh_args
+typedef sh_args CUMULATIVE_ARGS;
+
+/* Set when processing a function with interrupt attribute.  */
+extern bool current_function_interrupt;
 
-#define GET_SH_ARG_CLASS(MODE) \
-  ((TARGET_FPU_ANY && (MODE) == SFmode) \
-   ? SH_ARG_FLOAT \
-   : TARGET_FPU_DOUBLE && (GET_MODE_CLASS (MODE) == MODE_FLOAT \
-			   || GET_MODE_CLASS (MODE) == MODE_COMPLEX_FLOAT) \
-     ? SH_ARG_FLOAT : SH_ARG_INT)
+#endif // __cplusplus
 
 /* Initialize a variable CUM of type CUMULATIVE_ARGS
    for a call to a function whose data type is FNTYPE.
@@ -1307,12 +1298,10 @@ struct sh_args {
 #define HAVE_POST_INCREMENT  TARGET_SH1
 #define HAVE_PRE_DECREMENT   TARGET_SH1
 
-#define USE_LOAD_POST_INCREMENT(mode)    ((mode == SImode || mode == DImode) \
-					  ? 0 : TARGET_SH1)
-#define USE_LOAD_PRE_DECREMENT(mode)     0
-#define USE_STORE_POST_INCREMENT(mode)   0
-#define USE_STORE_PRE_DECREMENT(mode)    ((mode == SImode || mode == DImode) \
-					  ? 0 : TARGET_SH1)
+#define USE_LOAD_POST_INCREMENT(mode) TARGET_SH1
+#define USE_LOAD_PRE_DECREMENT(mode) TARGET_SH2A
+#define USE_STORE_POST_INCREMENT(mode) TARGET_SH2A
+#define USE_STORE_PRE_DECREMENT(mode) TARGET_SH1
 
 /* If a memory clear move would take CLEAR_RATIO or more simple
    move-instruction pairs, we will do a setmem instead.  */
@@ -1807,10 +1796,6 @@ struct sh_args {
 #define FINAL_PRESCAN_INSN(INSN, OPVEC, NOPERANDS) \
   final_prescan_insn ((INSN), (OPVEC), (NOPERANDS))
 
-
-extern rtx sh_compare_op0;
-extern rtx sh_compare_op1;
-
 /* Which processor to schedule for.  The elements of the enumeration must
    match exactly the cpu attribute in the sh.md file.  */
 enum processor_type {
@@ -1849,8 +1834,6 @@ extern enum mdep_reorg_phase_e mdep_reorg_phase;
 extern tree sh_deferred_function_attributes;
 extern tree *sh_deferred_function_attributes_tail;
 
-/* Set when processing a function with interrupt attribute.  */
-extern int current_function_interrupt;
 
 
 /* Instructions with unfilled delay slots take up an
@@ -1891,8 +1874,7 @@ extern int current_function_interrupt;
    ? (TARGET_FMOVD ? FP_MODE_DOUBLE : FP_MODE_NONE) \
    : ACTUAL_NORMAL_MODE (ENTITY))
 
-#define EPILOGUE_USES(REGNO) ((TARGET_SH2E || TARGET_SH4) \
-			      && (REGNO) == FPSCR_REG)
+#define EPILOGUE_USES(REGNO) (TARGET_FPU_ANY && REGNO == FPSCR_REG)
 
 #define DWARF_FRAME_RETURN_COLUMN (DWARF_FRAME_REGNUM (PR_REG))
 
diff --git a/gcc/config/sh/sh.md b/gcc/config/sh/sh.md
index 2d9502b7aa7..406721dc736 100644
--- a/gcc/config/sh/sh.md
+++ b/gcc/config/sh/sh.md
@@ -477,16 +477,6 @@
 (define_attr "is_sfunc" ""
   (if_then_else (eq_attr "type" "sfunc") (const_int 1) (const_int 0)))
 
-(define_attr "branch_zero" "yes,no"
-  (cond [(eq_attr "type" "!cbranch") (const_string "no")
-	 (ne (symbol_ref "(next_active_insn (insn)\
-			   == (prev_active_insn\
-			       (XEXP (SET_SRC (PATTERN (insn)), 1))))\
-			  && get_attr_length (next_active_insn (insn)) == 2")
-	     (const_int 0))
-	 (const_string "yes")]
-	(const_string "no")))
-
 ;; SH4 Double-precision computation with double-precision result -
 ;; the two halves are ready at different times.
 (define_attr "dfp_comp" "yes,no"
@@ -539,8 +529,13 @@
 	(eq_attr "type" "!pstore,prget")) (nil) (nil)])
 
 ;; Conditional branches with delay slots are available starting with SH2.
+;; If zero displacement conditional branches are fast, disable the delay
+;; slot if the branch jumps over only one 2-byte insn.
 (define_delay
-  (and (eq_attr "type" "cbranch") (match_test "TARGET_SH2"))
+  (and (eq_attr "type" "cbranch")
+       (match_test "TARGET_SH2")
+       (not (and (match_test "TARGET_ZDCBRANCH")
+		 (match_test "sh_cbranch_distance (insn, 4) == 2"))))
   [(eq_attr "cond_delay_slot" "yes") (nil) (nil)])
 
 ;; -------------------------------------------------------------------------
@@ -909,22 +904,6 @@
   FAIL;
 })
 
-;; FIXME: For some reason, on SH4A and SH2A combine fails to simplify this
-;; pattern by itself.  What this actually does is:
-;;	x == 0: (1 >> 0-0) & 1 = 1
-;;	x != 0: (1 >> 0-x) & 1 = 0
-;; Without this the test pr51244-8.c fails on SH2A and SH4A.
-(define_insn_and_split "*cmpeqsi_t"
-  [(set (reg:SI T_REG)
-	(and:SI (lshiftrt:SI
-		  (const_int 1)
-		  (neg:SI (match_operand:SI 0 "arith_reg_operand" "r")))
-		(const_int 1)))]
-  "TARGET_SH1"
-  "#"
-  "&& 1"
-  [(set (reg:SI T_REG) (eq:SI (match_dup 0) (const_int 0)))])
-
 (define_insn "cmpgtsi_t"
   [(set (reg:SI T_REG)
 	(gt:SI (match_operand:SI 0 "arith_reg_operand" "r,r")
@@ -1103,6 +1082,97 @@
 	(lshiftrt:SI (xor:SI (match_dup 0) (match_dup 1)) (const_int 31)))
    (set (reg:SI T_REG) (xor:SI (reg:SI T_REG) (const_int 1)))])
 
+;; In some cases, it might be shorter to get a tested bit into bit 31 and
+;; use div0s.  Otherwise it's usually better to just leave the xor and tst
+;; sequence.  The only thing we can try to do here is avoiding the large
+;; tst constant.
+(define_insn_and_split "*cmp_div0s_7"
+  [(set (reg:SI T_REG)
+	(zero_extract:SI (xor:SI (match_operand:SI 0 "arith_reg_operand")
+				 (match_operand:SI 1 "arith_reg_operand"))
+			 (const_int 1)
+			 (match_operand 2 "const_int_operand")))]
+  "TARGET_SH1 && can_create_pseudo_p ()
+   && (INTVAL (operands[2]) == 7 || INTVAL (operands[2]) == 15
+       || INTVAL (operands[2]) == 23 || INTVAL (operands[2]) == 29
+       || INTVAL (operands[2]) == 30 || INTVAL (operands[2]) == 31)"
+  "#"
+  "&& 1"
+  [(const_int 0)]
+{
+  const int bitpos = INTVAL (operands[2]);
+
+  rtx op0 = gen_reg_rtx (SImode);
+  rtx op1 = gen_reg_rtx (SImode);
+
+  if (bitpos == 23 || bitpos == 30 || bitpos == 29)
+    {
+      emit_insn (gen_ashlsi3 (op0, operands[0], GEN_INT (31 - bitpos)));
+      emit_insn (gen_ashlsi3 (op1, operands[1], GEN_INT (31 - bitpos)));
+    }
+  else if (bitpos == 15)
+    {
+      emit_insn (gen_extendhisi2 (op0, gen_lowpart (HImode, operands[0])));
+      emit_insn (gen_extendhisi2 (op1, gen_lowpart (HImode, operands[1])));
+    }
+  else if (bitpos == 7)
+    {
+      emit_insn (gen_extendqisi2 (op0, gen_lowpart (QImode, operands[0])));
+      emit_insn (gen_extendqisi2 (op1, gen_lowpart (QImode, operands[1])));
+    }
+  else if (bitpos == 31)
+    {
+      op0 = operands[0];
+      op1 = operands[1];
+    }
+  else
+    gcc_unreachable ();
+
+  emit_insn (gen_cmp_div0s (op0, op1));
+  DONE;
+})
+
+;; For bits 0..7 using a xor and tst #imm,r0 sequence seems to be better.
+;; Thus allow the following patterns only for higher bit positions where
+;; we it's more likely to save the large tst constant.
+(define_insn_and_split "*cmp_div0s_8"
+  [(set (reg:SI T_REG)
+	(eq:SI (zero_extract:SI (match_operand:SI 0 "arith_reg_operand")
+				(const_int 1)
+				(match_operand 2 "const_int_operand"))
+	       (zero_extract:SI (match_operand:SI 1 "arith_reg_operand")
+				(const_int 1)
+				(match_dup 2))))]
+  "TARGET_SH1 && can_create_pseudo_p ()
+   && (INTVAL (operands[2]) == 15
+       || INTVAL (operands[2]) == 23 || INTVAL (operands[2]) == 29
+       || INTVAL (operands[2]) == 30 || INTVAL (operands[2]) == 31)"
+  "#"
+  "&& 1"
+  [(set (reg:SI T_REG)
+	(zero_extract:SI (xor:SI (match_dup 0) (match_dup 1))
+			 (const_int 1) (match_dup 2)))
+   (set (reg:SI T_REG) (xor:SI (reg:SI T_REG) (const_int 1)))])
+
+(define_insn_and_split "*cmp_div0s_9"
+  [(set (reg:SI T_REG)
+	(zero_extract:SI (xor:SI (xor:SI (match_operand:SI 0 "arith_reg_operand")
+					 (match_operand:SI 1 "arith_reg_operand"))
+				 (match_operand 2 "const_int_operand"))
+			 (const_int 1)
+			 (match_operand 3 "const_int_operand")))]
+  "TARGET_SH1 && can_create_pseudo_p ()
+   && (INTVAL (operands[2]) & 0xFFFFFFFF) == (1U << INTVAL (operands[3]))
+   && (INTVAL (operands[3]) == 15
+       || INTVAL (operands[3]) == 23 || INTVAL (operands[3]) == 29
+       || INTVAL (operands[3]) == 30 || INTVAL (operands[3]) == 31)"
+  "#"
+  "&& 1"
+  [(set (reg:SI T_REG)
+	(zero_extract:SI (xor:SI (match_dup 0) (match_dup 1))
+			 (const_int 1) (match_dup 3)))
+   (set (reg:SI T_REG) (xor:SI (reg:SI T_REG) (const_int 1)))])
+
 ;; -------------------------------------------------------------------------
 ;; SImode compare and branch
 ;; -------------------------------------------------------------------------
@@ -1138,29 +1208,6 @@
 			   (label_ref (match_dup 2))
 			   (pc)))])
 
-;; FIXME: Similar to the *cmpeqsi_t pattern above, for some reason, on SH4A
-;; and SH2A combine fails to simplify this pattern by itself.
-;; What this actually does is:
-;;	x == 0: (1 >> 0-0) & 1 = 1
-;;	x != 0: (1 >> 0-x) & 1 = 0
-;; Without this the test pr51244-8.c fails on SH2A and SH4A.
-(define_split
-  [(set (pc)
-	(if_then_else
-	  (eq (and:SI (lshiftrt:SI
-			(const_int 1)
-			(neg:SI (match_operand:SI 0 "arith_reg_operand" "")))
-		      (const_int 1))
-	      (const_int 0))
-	  (label_ref (match_operand 2))
-	  (pc)))
-   (clobber (reg:SI T_REG))]
-  "TARGET_SH1"
-  [(set (reg:SI T_REG) (eq:SI (match_dup 0) (const_int 0)))
-   (set (pc) (if_then_else (eq (reg:SI T_REG) (const_int 0))
-			   (label_ref (match_dup 2))
-			   (pc)))])
-
 ;; FIXME: These don't seem to have any effect on the generated cbranch code
 ;;	  anymore, but only on some register allocation choices.
 (define_split
@@ -3359,6 +3406,22 @@
   DONE;
 })
 
+(define_insn_and_split "*rotcr"
+  [(set (match_operand:SI 0 "arith_reg_dest")
+	(ior:SI (lshiftrt:SI (match_operand:SI 1 "arith_reg_operand")
+			     (const_int 1))
+		(const_int -2147483648))) ;; 0xffffffff80000000
+   (clobber (reg:SI T_REG))]
+  "TARGET_SH1"
+  "#"
+  "&& can_create_pseudo_p ()"
+  [(const_int 0)]
+{
+  emit_insn (gen_sett ());
+  emit_insn (gen_rotcr (operands[0], operands[1], get_t_reg_rtx ()));
+  DONE;
+})
+
 ;; rotcr combine patterns for rotating in the negated T_REG value.
 (define_insn_and_split "*rotcr_neg_t"
   [(set (match_operand:SI 0 "arith_reg_dest")
@@ -4820,6 +4883,15 @@
   [(set_attr "type" "load")
    (set_attr "length" "2,2,4")])
 
+;; The pre-dec and post-inc mems must be captured by the '<' and '>'
+;; constraints, otherwise wrong code might get generated.
+(define_insn "*extend<mode>si2_predec"
+  [(set (match_operand:SI 0 "arith_reg_dest" "=z")
+	(sign_extend:SI (match_operand:QIHI 1 "pre_dec_mem" "<")))]
+  "TARGET_SH2A"
+  "mov.<bw>	%1,%0"
+  [(set_attr "type" "load")])
+
 ;; The *_snd patterns will take care of other QImode/HImode addressing
 ;; modes than displacement addressing.  They must be defined _after_ the
 ;; displacement addressing patterns.  Otherwise the displacement addressing
@@ -5065,20 +5137,23 @@
 ;; t/r must come after r/r, lest reload will try to reload stuff like
 ;; (set (subreg:SI (mem:QI (plus:SI (reg:SI SP_REG) (const_int 12)) 0) 0)
 ;; (made from (set (subreg:SI (reg:QI ###) 0) ) into T.
+;; Notice that although this pattern allows movi20 and movi20s on non-SH2A,
+;; those alternatives will not be taken, as they will be converted into
+;; PC-relative loads.
 (define_insn "movsi_i"
   [(set (match_operand:SI 0 "general_movdst_operand"
-	    "=r,r,r,r,r,r,m,<,<,x,l,x,l,r")
+			    "=r,r,  r,  r,  r, r,r,r,m,<,<,x,l,x,l,r")
 	(match_operand:SI 1 "general_movsrc_operand"
-	 "Q,r,I08,mr,x,l,r,x,l,r,r,>,>,i"))]
-  "TARGET_SH1
-   && ! TARGET_SH2E
-   && ! TARGET_SH2A
+			    " Q,r,I08,I20,I28,mr,x,l,r,x,l,r,r,>,>,i"))]
+  "TARGET_SH1 && !TARGET_FPU_ANY
    && (register_operand (operands[0], SImode)
        || register_operand (operands[1], SImode))"
   "@
 	mov.l	%1,%0
 	mov	%1,%0
 	mov	%1,%0
+	movi20	%1,%0
+	movi20s	%1,%0
 	mov.l	%1,%0
 	sts	%1,%0
 	sts	%1,%0
@@ -5090,9 +5165,27 @@
 	lds.l	%1,%0
 	lds.l	%1,%0
 	fake	%1,%0"
-  [(set_attr "type" "pcload_si,move,movi8,load_si,mac_gp,prget,store,mac_mem,
-		     pstore,gp_mac,prset,mem_mac,pload,pcload_si")
-   (set_attr "length" "*,*,*,*,*,*,*,*,*,*,*,*,*,*")])
+  [(set_attr "type" "pcload_si,move,movi8,move,move,load_si,mac_gp,prget,store,
+		     mac_mem,pstore,gp_mac,prset,mem_mac,pload,pcload_si")
+   (set_attr_alternative "length"
+     [(const_int 2)
+      (const_int 2)
+      (const_int 2)
+      (const_int 4)
+      (const_int 4)
+      (if_then_else (match_operand 1 "long_displacement_mem_operand")
+		    (const_int 4) (const_int 2))
+      (const_int 2)
+      (const_int 2)
+      (if_then_else (match_operand 0 "long_displacement_mem_operand")
+		    (const_int 4) (const_int 2))
+      (const_int 2)
+      (const_int 2)
+      (const_int 2)
+      (const_int 2)
+      (const_int 2)
+      (const_int 2)
+      (const_int 2)])])
 
 ;; t/r must come after r/r, lest reload will try to reload stuff like
 ;; (subreg:SI (reg:SF FR14_REG) 0) into T (compiling stdlib/strtod.c -m3e -O2)
@@ -5100,12 +5193,15 @@
 ;; will require a reload.
 ;; ??? We can't include f/f because we need the proper FPSCR setting when
 ;; TARGET_FMOVD is in effect, and mode switching is done before reload.
+;; Notice that although this pattern allows movi20 and movi20s on non-SH2A,
+;; those alternatives will not be taken, as they will be converted into
+;; PC-relative loads.
 (define_insn "movsi_ie"
   [(set (match_operand:SI 0 "general_movdst_operand"
-	    "=r,r,r,r,r,r,r,r,mr,<,<,x,l,x,l,y,<,r,y,r,*f,y,*f,y")
+	    "=r,r,  r,  r,  r, r,r,r,mr,<,<,x,l,x,l,y,<,r,y,r,*f, y,*f,y")
 	(match_operand:SI 1 "general_movsrc_operand"
-	 "Q,r,I08,I20,I28,mr,x,l,r,x,l,r,r,>,>,>,y,i,r,y,y,*f,*f,y"))]
-  "(TARGET_SH2E || TARGET_SH2A)
+	    " Q,r,I08,I20,I28,mr,x,l, r,x,l,r,r,>,>,>,y,i,r,y, y,*f,*f,y"))]
+  "TARGET_SH1 && TARGET_FPU_ANY
    && ((register_operand (operands[0], SImode)
 	&& !fpscr_operand (operands[0], SImode))
        || (register_operand (operands[1], SImode)
@@ -5145,14 +5241,12 @@
       (const_int 2)
       (const_int 4)
       (const_int 4)
-      (if_then_else
-	(match_test "TARGET_SH2A")
-	(const_int 4) (const_int 2))
+      (if_then_else (match_operand 1 "long_displacement_mem_operand")
+		    (const_int 4) (const_int 2))
       (const_int 2)
       (const_int 2)
-      (if_then_else
-	(match_test "TARGET_SH2A")
-	(const_int 4) (const_int 2))
+      (if_then_else (match_operand 0 "long_displacement_mem_operand")
+		    (const_int 4) (const_int 2))
       (const_int 2)
       (const_int 2)
       (const_int 2)
@@ -5169,23 +5263,44 @@
       (const_int 2)
       (const_int 0)])])
 
+;; Notice that although this pattern allows movi20 and movi20s on non-SH2A,
+;; those alternatives will not be taken, as they will be converted into
+;; PC-relative loads.
 (define_insn "movsi_i_lowpart"
   [(set (strict_low_part
-	  (match_operand:SI 0 "general_movdst_operand" "+r,r,r,r,r,r,m,r"))
-	(match_operand:SI 1 "general_movsrc_operand" "Q,r,I08,mr,x,l,r,i"))]
-   "TARGET_SH1
-    && (register_operand (operands[0], SImode)
-        || register_operand (operands[1], SImode))"
+	  (match_operand:SI 0 "general_movdst_operand"
+			      "+r,r,  r,  r,  r, r,r,r,m,r"))
+	(match_operand:SI 1 "general_movsrc_operand"
+			      " Q,r,I08,I20,I28,mr,x,l,r,i"))]
+  "TARGET_SH1
+   && (register_operand (operands[0], SImode)
+       || register_operand (operands[1], SImode))"
   "@
 	mov.l	%1,%0
 	mov	%1,%0
 	mov	%1,%0
+	movi20	%1,%0
+	movi20s	%1,%0
 	mov.l	%1,%0
 	sts	%1,%0
 	sts	%1,%0
 	mov.l	%1,%0
 	fake	%1,%0"
-  [(set_attr "type" "pcload,move,arith,load,mac_gp,prget,store,pcload")])
+  [(set_attr "type" "pcload,move,movi8,move,move,load,mac_gp,prget,store,
+		     pcload")
+   (set_attr_alternative "length"
+     [(const_int 2)
+      (const_int 2)
+      (const_int 2)
+      (const_int 4)
+      (const_int 4)
+      (if_then_else (match_operand 1 "long_displacement_mem_operand")
+		    (const_int 4) (const_int 2))
+      (const_int 2)
+      (const_int 2)
+      (if_then_else (match_operand 0 "long_displacement_mem_operand")
+		    (const_int 4) (const_int 2))
+      (const_int 2)])])
 
 (define_insn_and_split "load_ra"
   [(set (match_operand:SI 0 "general_movdst_operand" "")
@@ -5242,7 +5357,7 @@
 	 "	synco"		"\n"
 	 "	icbi	@%0";
 }
-  [(set_attr "length" "16")	;; FIXME: Why 16 and not 6?  Looks like typo.
+  [(set_attr "length" "6")
    (set_attr "type" "cwb")])
 
 (define_expand "mov<mode>"
@@ -5261,6 +5376,22 @@
   prepare_move_operands (operands, <MODE>mode);
 })
 
+;; The pre-dec and post-inc mems must be captured by the '<' and '>'
+;; constraints, otherwise wrong code might get generated.
+(define_insn "*mov<mode>_load_predec"
+  [(set (match_operand:QIHISI 0 "arith_reg_dest" "=z")
+	(match_operand:QIHISI 1 "pre_dec_mem" "<"))]
+  "TARGET_SH2A"
+  "mov.<bwl>	%1,%0"
+  [(set_attr "type" "load")])
+
+(define_insn "*mov<mode>_store_postinc"
+  [(set (match_operand:QIHISI 0 "post_inc_mem" "=>")
+	(match_operand:QIHISI 1 "arith_reg_operand" "z"))]
+  "TARGET_SH2A"
+  "mov.<bwl>	%1,%0"
+  [(set_attr "type" "store")])
+
 ;; Specifying the displacement addressing load / store patterns separately
 ;; before the generic movqi / movhi pattern allows controlling the order
 ;; in which load / store insns are selected in a more fine grained way.
@@ -5346,27 +5477,26 @@
 	lds	%1,%0"
   [(set_attr "type" "pcload,move,movi8,store,load,store,load,store,load,prget,prset")
    (set (attr "length")
-	(cond [(and (match_operand 0 "displacement_mem_operand")
-		    (not (match_operand 0 "short_displacement_mem_operand")))
-	       (const_int 4)
-	       (and (match_operand 1 "displacement_mem_operand")
-		    (not (match_operand 1 "short_displacement_mem_operand")))
-	       (const_int 4)]
+	(cond [(match_operand 0 "long_displacement_mem_operand") (const_int 4)
+	       (match_operand 1 "long_displacement_mem_operand") (const_int 4)]
 	      (const_int 2)))])
 
 ;; x/r can be created by inlining/cse, e.g. for execute/961213-1.c
 ;; compiled with -m2 -ml -O3 -funroll-loops
 (define_insn "*movdi_i"
-  [(set (match_operand:DI 0 "general_movdst_operand" "=r,r,r,m,r,r,r,*!x")
-	(match_operand:DI 1 "general_movsrc_operand" "Q,r,m,r,I08,i,x,r"))]
+  [(set (match_operand:DI 0 "general_movdst_operand" "=r,r,r,m,  r,r,r,*!x")
+	(match_operand:DI 1 "general_movsrc_operand" " Q,r,m,r,I08,i,x,  r"))]
   "TARGET_SH1
    && (arith_reg_operand (operands[0], DImode)
        || arith_reg_operand (operands[1], DImode))"
 {
   return output_movedouble (insn, operands, DImode);
 }
-  [(set_attr "length" "4")
-   (set_attr "type" "pcload,move,load,store,move,pcload,move,move")])
+  [(set_attr "type" "pcload,move,load,store,move,pcload,move,move")
+   (set (attr "length")
+	(cond [(match_operand 0 "long_displacement_mem_operand") (const_int 8)
+	       (match_operand 1 "long_displacement_mem_operand") (const_int 8)]
+	      (const_int 4)))])
 
 ;; If the output is a register and the input is memory or a register, we have
 ;; to be careful and see which word needs to be loaded first.
@@ -5444,8 +5574,8 @@
 
 ;; FIXME: This should be a define_insn_and_split.
 (define_insn "movdf_k"
-  [(set (match_operand:DF 0 "general_movdst_operand" "=r,r,r,m")
-	(match_operand:DF 1 "general_movsrc_operand" "r,FQ,m,r"))]
+  [(set (match_operand:DF 0 "general_movdst_operand" "=r, r,r,m")
+	(match_operand:DF 1 "general_movsrc_operand" " r,FQ,m,r"))]
   "TARGET_SH1
    && (!TARGET_FPU_DOUBLE || reload_completed
        /* ??? We provide some insn so that direct_{load,store}[DFmode] get set */
@@ -5456,8 +5586,11 @@
 {
   return output_movedouble (insn, operands, DFmode);
 }
-  [(set_attr "length" "4")
-   (set_attr "type" "move,pcload,load,store")])
+  [(set_attr "type" "move,pcload,load,store")
+   (set (attr "length")
+	(cond [(match_operand 0 "long_displacement_mem_operand") (const_int 8)
+	       (match_operand 1 "long_displacement_mem_operand") (const_int 8)]
+	      (const_int 4)))])
 
 ;; All alternatives of movdf_i4 are split for ! TARGET_FMOVD.
 ;; However, the d/F/c/z alternative cannot be split directly; it is converted
@@ -5465,11 +5598,19 @@
 ;; the d/m/c/X alternative, which is split later into single-precision
 ;; instructions.  And when not optimizing, no splits are done before fixing
 ;; up pcloads, so we need usable length information for that.
+;; A DF constant load results in the following worst-case 8 byte sequence:
+;;	mova	...,r0
+;;	fmov.s	@r0+,..
+;;	fmov.s	@r0,...
+;;	add	#-4,r0
 (define_insn "movdf_i4"
-  [(set (match_operand:DF 0 "general_movdst_operand" "=d,r,d,d,m,r,r,m,!??r,!???d")
-	(match_operand:DF 1 "general_movsrc_operand"  "d,r,F,m,d,FQ,m,r,d,r"))
+  [(set (match_operand:DF 0 "general_movdst_operand"
+				"=d,r, d,d,m, r,r,m,!??r,!???d")
+	(match_operand:DF 1 "general_movsrc_operand"
+				" d,r, F,m,d,FQ,m,r,   d,    r"))
    (use (reg:SI FPSCR_MODES_REG))
-   (clobber (match_scratch:SI 2                      "=X,X,&z,X,X,X,X,X,X,X"))]
+   (clobber (match_scratch:SI 2
+				"=X,X,&z,X,X, X,X,X,   X,    X"))]
   "TARGET_FPU_DOUBLE
    && (arith_reg_operand (operands[0], DFmode)
        || arith_reg_operand (operands[1], DFmode))"
@@ -5493,16 +5634,28 @@
     }
   }
   [(set_attr_alternative "length"
-     [(if_then_else (eq_attr "fmovd" "yes") (const_int 4) (const_int 8))
+     [(if_then_else (eq_attr "fmovd" "yes") (const_int 2) (const_int 4))
       (const_int 4)
-      (if_then_else (eq_attr "fmovd" "yes") (const_int 4) (const_int 6))
-      (if_then_else (eq_attr "fmovd" "yes") (const_int 4) (const_int 6))
-      (if_then_else (eq_attr "fmovd" "yes") (const_int 4) (const_int 6))
+      (if_then_else (eq_attr "fmovd" "yes") (const_int 4) (const_int 8))
+      (if_then_else (match_operand 1 "displacement_mem_operand")
+		    (if_then_else (eq_attr "fmovd" "yes")
+				  (const_int 4) (const_int 8))
+		    (if_then_else (eq_attr "fmovd" "yes")
+				  (const_int 2) (const_int 4)))
+      (if_then_else (match_operand 0 "displacement_mem_operand")
+		    (if_then_else (eq_attr "fmovd" "yes")
+				  (const_int 4) (const_int 8))
+		    (if_then_else (eq_attr "fmovd" "yes")
+				  (const_int 2) (const_int 4)))
       (const_int 4)
-      (const_int 8) (const_int 8) ;; these need only 8 bytes for @(r0,rn)
+      (if_then_else (match_operand 1 "long_displacement_mem_operand")
+		    (const_int 8) (const_int 4))
+      (if_then_else (match_operand 0 "long_displacement_mem_operand")
+		    (const_int 8) (const_int 4))
       (const_int 8)
       (const_int 8)])
-   (set_attr "type" "fmove,move,pcfload,fload,fstore,pcload,load,store,load,fload")
+   (set_attr "type" "fmove,move,pcfload,fload,fstore,pcload,load,store,load,
+		    fload")
    (set_attr "late_fp_use" "*,*,*,*,yes,*,*,*,*,*")
    (set (attr "fp_mode") (if_then_else (eq_attr "fmovd" "yes")
 					   (const_string "double")
@@ -5896,8 +6049,11 @@
     }
 })
 
+;; FIXME Although the movsf_i pattern is not used when there's an FPU,
+;; it somehow influences some RA choices also on FPU targets.
+;; For non-FPU targets it's actually not needed.
 (define_insn "movsf_i"
-  [(set (match_operand:SF 0 "general_movdst_operand" "=r,r,r,r,m,l,r")
+  [(set (match_operand:SF 0 "general_movdst_operand" "=r,r, r, r,m,l,r")
 	(match_operand:SF 1 "general_movsrc_operand"  "r,G,FQ,mr,r,r,l"))]
   "TARGET_SH1
    && (! TARGET_SH2E
@@ -5914,21 +6070,34 @@
 	mov.l	%1,%0
 	lds	%1,%0
 	sts	%1,%0"
-  [(set_attr "type" "move,move,pcload,load,store,move,move")])
+  [(set_attr "type" "move,move,pcload,load,store,move,move")
+   (set_attr_alternative "length"
+     [(const_int 2)
+      (const_int 2)
+      (if_then_else (match_operand 1 "long_displacement_mem_operand")
+		    (const_int 4) (const_int 2))
+      (if_then_else (match_operand 1 "long_displacement_mem_operand")
+		    (const_int 4) (const_int 2))
+      (if_then_else (match_operand 0 "long_displacement_mem_operand")
+		    (const_int 4) (const_int 2))
+      (const_int 2)
+      (const_int 2)])])
 
 ;; We may not split the ry/yr/XX alternatives to movsi_ie, since
 ;; update_flow_info would not know where to put REG_EQUAL notes
 ;; when the destination changes mode.
 (define_insn "movsf_ie"
   [(set (match_operand:SF 0 "general_movdst_operand"
-	 "=f,r,f,f,fy,f,m,r,r,m,f,y,y,rf,r,y,<,y,y")
+			        "=f,r,f,f,fy, f,m, r, r,m,f,y,y,rf,r,y,<,y,y")
 	(match_operand:SF 1 "general_movsrc_operand"
-	  "f,r,G,H,FQ,mf,f,FQ,mr,r,y,f,>,fr,y,r,y,>,y"))
+			        " f,r,G,H,FQ,mf,f,FQ,mr,r,y,f,>,fr,y,r,y,>,y"))
    (use (reg:SI FPSCR_MODES_REG))
-   (clobber (match_scratch:SI 2 "=X,X,X,X,&z,X,X,X,X,X,X,X,X,y,X,X,X,X,X"))]
+   (clobber (match_scratch:SI 2 "=X,X,X,X,&z, X,X, X, X,X,X,X,X, y,X,X,X,X,X"))]
   "TARGET_SH2E
-   && (arith_reg_operand (operands[0], SFmode) || fpul_operand (operands[0], SFmode)
-       || arith_reg_operand (operands[1], SFmode) || fpul_operand (operands[1], SFmode)
+   && (arith_reg_operand (operands[0], SFmode)
+       || fpul_operand (operands[0], SFmode)
+       || arith_reg_operand (operands[1], SFmode)
+       || fpul_operand (operands[1], SFmode)
        || arith_reg_operand (operands[2], SImode))"
   "@
 	fmov	%1,%0
@@ -5959,19 +6128,15 @@
       (const_int 2)
       (const_int 2)
       (const_int 4)
-      (if_then_else
-	(match_test "TARGET_SH2A")
-	(const_int 4) (const_int 2))
-      (if_then_else
-	(match_test "TARGET_SH2A")
-	(const_int 4) (const_int 2))
+      (if_then_else (match_operand 1 "displacement_mem_operand")
+		    (const_int 4) (const_int 2))
+      (if_then_else (match_operand 0 "displacement_mem_operand")
+		    (const_int 4) (const_int 2))
       (const_int 2)
-      (if_then_else
-	(match_test "TARGET_SH2A")
-	(const_int 4) (const_int 2))
-      (if_then_else
-	(match_test "TARGET_SH2A")
-	(const_int 4) (const_int 2))
+      (if_then_else (match_operand 1 "long_displacement_mem_operand")
+		    (const_int 4) (const_int 2))
+      (if_then_else (match_operand 0 "long_displacement_mem_operand")
+		    (const_int 4) (const_int 2))
       (const_int 2)
       (const_int 2)
       (const_int 2)
@@ -6007,11 +6172,11 @@
 
 (define_insn_and_split "movsf_ie_ra"
   [(set (match_operand:SF 0 "general_movdst_operand"
-	 "=f,r,f,f,fy,f,m,r,r,m,f,y,y,rf,r,y,<,y,y")
+	 			"=f,r,f,f,fy,f,m, r,r,m,f,y,y,rf,r,y,<,y,y")
 	(match_operand:SF 1 "general_movsrc_operand"
-	  "f,r,G,H,FQ,m,f,FQ,m,r,y,f,>,fr,y,r,y,>,y"))
+				" f,r,G,H,FQ,m,f,FQ,m,r,y,f,>,fr,y,r,y,>,y"))
    (use (reg:SI FPSCR_MODES_REG))
-   (clobber (match_scratch:SF 2 "=r,r,X,X,&z,r,r,X,r,r,r,r,r,y,r,r,r,r,r"))
+   (clobber (match_scratch:SF 2 "=r,r,X,X,&z,r,r, X,r,r,r,r,r, y,r,r,r,r,r"))
    (const_int 0)]
   "TARGET_SH2E
    && (arith_reg_operand (operands[0], SFmode)
@@ -6057,19 +6222,15 @@
       (const_int 2)
       (const_int 2)
       (const_int 4)
-      (if_then_else
-	(match_test "TARGET_SH2A")
-	(const_int 4) (const_int 2))
-      (if_then_else
-	(match_test "TARGET_SH2A")
-	(const_int 4) (const_int 2))
+      (if_then_else (match_operand 1 "displacement_mem_operand")
+		    (const_int 4) (const_int 2))
+      (if_then_else (match_operand 0 "displacement_mem_operand")
+		    (const_int 4) (const_int 2))
       (const_int 2)
-      (if_then_else
-	(match_test "TARGET_SH2A")
-	(const_int 4) (const_int 2))
-      (if_then_else
-	(match_test "TARGET_SH2A")
-	(const_int 4) (const_int 2))
+      (if_then_else (match_operand 1 "long_displacement_mem_operand")
+		    (const_int 4) (const_int 2))
+      (if_then_else (match_operand 0 "long_displacement_mem_operand")
+		    (const_int 4) (const_int 2))
       (const_int 2)
       (const_int 2)
       (const_int 2)
diff --git a/gcc/config/sh/sh.opt b/gcc/config/sh/sh.opt
index f9b02c520cb..2a94c9becb2 100644
--- a/gcc/config/sh/sh.opt
+++ b/gcc/config/sh/sh.opt
@@ -181,10 +181,6 @@ maccumulate-outgoing-args
 Target Report Var(TARGET_ACCUMULATE_OUTGOING_ARGS) Init(1)
 Reserve space for outgoing arguments in the function prologue.
 
-madjust-unroll
-Target Ignore
-Does nothing.  Preserved for backward compatibility.
-
 mb
 Target Report RejectNegative InverseMask(LITTLE_ENDIAN)
 Generate code in big endian mode.
@@ -245,10 +241,6 @@ minline-ic_invalidate
 Target Report Var(TARGET_INLINE_IC_INVALIDATE)
 inline code to invalidate instruction cache entries after setting up nested function trampolines.
 
-minvalid-symbols
-Target Report Mask(INVALID_SYMBOLS) Condition(SUPPORT_ANY_SH5)
-Assume symbols might be invalid.
-
 misize
 Target Report RejectNegative Mask(DUMPISIZE)
 Annotate assembler instructions with estimated addresses.
@@ -279,10 +271,6 @@ mrenesas
 Target Mask(HITACHI)
 Follow Renesas (formerly Hitachi) / SuperH calling conventions.
 
-msoft-atomic
-Target Undocumented Alias(matomic-model=, soft-gusa, none)
-Deprecated.  Use -matomic= instead to select the atomic model.
-
 matomic-model=
 Target Report RejectNegative Joined Var(sh_atomic_model_str)
 Specify the model for atomic operations.
@@ -291,10 +279,6 @@ mtas
 Target Report RejectNegative Var(TARGET_ENABLE_TAS)
 Use tas.b instruction for __atomic_test_and_set.
 
-mspace
-Target RejectNegative Alias(Os)
-Deprecated.  Use -Os instead.
-
 multcost=
 Target RejectNegative Joined UInteger Var(sh_multcost) Init(-1)
 Cost to assume for a multiply insn.
diff --git a/gcc/config/sol2.h b/gcc/config/sol2.h
index 5160e1fda18..50f2b383a1b 100644
--- a/gcc/config/sol2.h
+++ b/gcc/config/sol2.h
@@ -166,21 +166,26 @@ along with GCC; see the file COPYING3.  If not see
 #define STARTFILE_CRTBEGIN_SPEC	"crtbegin.o%s"
 #endif
 
+#if ENABLE_VTABLE_VERIFY
 #if SUPPORTS_INIT_PRIORITY
 #define STARTFILE_VTV_SPEC \
   "%{fvtable-verify=none:%s; \
      fvtable-verify=preinit:vtv_start_preinit.o%s; \
      fvtable-verify=std:vtv_start.o%s}"
-
 #define ENDFILE_VTV_SPEC \
   "%{fvtable-verify=none:%s; \
      fvtable-verify=preinit:vtv_end_preinit.o%s; \
      fvtable-verify=std:vtv_end.o%s}"
-#else
+#else /* !SUPPORTS_INIT_PRIORITY */
 #define STARTFILE_VTV_SPEC \
-  "%{fvtable-verify:%e-fvtable-verify is not supported in this configuration}"
+  "%{fvtable-verify=*: \
+     %e-fvtable-verify=%* is not supported in this configuration}"
 #define ENDFILE_VTV_SPEC ""
-#endif
+#endif /* !SUPPORTS_INIT_PRIORITY */
+#else /* !ENABLE_VTABLE_VERIFY */
+#define STARTFILE_VTV_SPEC ""
+#define ENDFILE_VTV_SPEC ""
+#endif /* !ENABLE_VTABLE_VERIFY */
 
 /* We don't use the standard svr4 STARTFILE_SPEC because it's wrong for us.  */
 #undef STARTFILE_SPEC