18 files changed, 881 insertions, 784 deletions
diff --git a/gcc/config/rs6000/altivec.h b/gcc/config/rs6000/altivec.h
index 71cdca523df..c8e508cf0a0 100644
--- a/gcc/config/rs6000/altivec.h
+++ b/gcc/config/rs6000/altivec.h
@@ -355,6 +355,7 @@
 #define vec_vsx_ld __builtin_vec_vsx_ld
 #define vec_vsx_st __builtin_vec_vsx_st
 #define vec_xl __builtin_vec_vsx_ld
+#define vec_xl_be __builtin_vec_xl_be
 #define vec_xst __builtin_vec_vsx_st
 
 /* Note, xxsldi and xxpermdi were added as __builtin_vsx_<xxx> functions
@@ -449,6 +450,9 @@
 #define vec_insert_exp __builtin_vec_insert_exp
 #define vec_test_data_class __builtin_vec_test_data_class
 
+#define vec_extract_fp_from_shorth __builtin_vec_vextract_fp_from_shorth
+#define vec_extract_fp_from_shortl __builtin_vec_vextract_fp_from_shortl
+
 #define scalar_extract_exp __builtin_vec_scalar_extract_exp
 #define scalar_extract_sig __builtin_vec_scalar_extract_sig
 #define scalar_insert_exp __builtin_vec_scalar_insert_exp
diff --git a/gcc/config/rs6000/altivec.md b/gcc/config/rs6000/altivec.md
index 91c56512308..4077afdadb6 100644
--- a/gcc/config/rs6000/altivec.md
+++ b/gcc/config/rs6000/altivec.md
@@ -36,10 +36,14 @@
    UNSPEC_VMULESB
    UNSPEC_VMULEUH
    UNSPEC_VMULESH
+   UNSPEC_VMULEUW
+   UNSPEC_VMULESW
    UNSPEC_VMULOUB
    UNSPEC_VMULOSB
    UNSPEC_VMULOUH
    UNSPEC_VMULOSH
+   UNSPEC_VMULOUW
+   UNSPEC_VMULOSW
    UNSPEC_VPKPX
    UNSPEC_VPACK_SIGN_SIGN_SAT
    UNSPEC_VPACK_SIGN_UNS_SAT
@@ -307,7 +311,7 @@
   for (i = 0; i < num_elements; i++)
     RTVEC_ELT (v, i) = constm1_rtx;
 
-  emit_insn (gen_vec_initv4si (dest, gen_rtx_PARALLEL (mode, v)));
+  emit_insn (gen_vec_initv4sisi (dest, gen_rtx_PARALLEL (mode, v)));
   emit_insn (gen_rtx_SET (dest, gen_rtx_ASHIFT (mode, dest, dest)));
   DONE;
 })
@@ -1538,6 +1542,41 @@
   "vmulosh %0,%1,%2"
   [(set_attr "type" "veccomplex")])
 
+(define_insn "altivec_vmuleuw"
+  [(set (match_operand:V2DI 0 "register_operand" "=v")
+       (unspec:V2DI [(match_operand:V4SI 1 "register_operand" "v")
+                     (match_operand:V4SI 2 "register_operand" "v")]
+                    UNSPEC_VMULEUW))]
+  "TARGET_P8_VECTOR"
+  "vmuleuw %0,%1,%2"
+  [(set_attr "type" "veccomplex")])
+
+(define_insn "altivec_vmulouw"
+  [(set (match_operand:V2DI 0 "register_operand" "=v")
+       (unspec:V2DI [(match_operand:V4SI 1 "register_operand" "v")
+                     (match_operand:V4SI 2 "register_operand" "v")]
+                    UNSPEC_VMULOUW))]
+  "TARGET_P8_VECTOR"
+  "vmulouw %0,%1,%2"
+  [(set_attr "type" "veccomplex")])
+
+(define_insn "altivec_vmulesw"
+  [(set (match_operand:V2DI 0 "register_operand" "=v")
+       (unspec:V2DI [(match_operand:V4SI 1 "register_operand" "v")
+                     (match_operand:V4SI 2 "register_operand" "v")]
+                    UNSPEC_VMULESW))]
+  "TARGET_P8_VECTOR"
+  "vmulesw %0,%1,%2"
+  [(set_attr "type" "veccomplex")])
+
+(define_insn "altivec_vmulosw"
+  [(set (match_operand:V2DI 0 "register_operand" "=v")
+       (unspec:V2DI [(match_operand:V4SI 1 "register_operand" "v")
+                     (match_operand:V4SI 2 "register_operand" "v")]
+                    UNSPEC_VMULOSW))]
+  "TARGET_P8_VECTOR"
+  "vmulosw %0,%1,%2"
+  [(set_attr "type" "veccomplex")])
 
 ;; Vector pack/unpack
 (define_insn "altivec_vpkpx"
@@ -2228,7 +2267,7 @@
   RTVEC_ELT (v, 2) = GEN_INT (mask_val);
   RTVEC_ELT (v, 3) = GEN_INT (mask_val);
 
-  emit_insn (gen_vec_initv4si (mask, gen_rtx_PARALLEL (V4SImode, v)));
+  emit_insn (gen_vec_initv4sisi (mask, gen_rtx_PARALLEL (V4SImode, v)));
   emit_insn (gen_vector_select_v4sf (operands[0], operands[1], operands[2],
 				     gen_lowpart (V4SFmode, mask)));
   DONE;
@@ -3370,7 +3409,7 @@
   RTVEC_ELT (v, 14) = gen_rtx_CONST_INT (QImode, be ? 16 :  0);
   RTVEC_ELT (v, 15) = gen_rtx_CONST_INT (QImode, be ?  7 : 16);
 
-  emit_insn (gen_vec_initv16qi (mask, gen_rtx_PARALLEL (V16QImode, v)));
+  emit_insn (gen_vec_initv16qiqi (mask, gen_rtx_PARALLEL (V16QImode, v)));
   emit_insn (gen_vperm_v16qiv8hi (operands[0], operands[1], vzero, mask));
   DONE;
 }")
@@ -3406,7 +3445,7 @@
   RTVEC_ELT (v, 14) = gen_rtx_CONST_INT (QImode, be ?  6 : 17);
   RTVEC_ELT (v, 15) = gen_rtx_CONST_INT (QImode, be ?  7 : 16);
 
-  emit_insn (gen_vec_initv16qi (mask, gen_rtx_PARALLEL (V16QImode, v)));
+  emit_insn (gen_vec_initv16qiqi (mask, gen_rtx_PARALLEL (V16QImode, v)));
   emit_insn (gen_vperm_v8hiv4si (operands[0], operands[1], vzero, mask));
   DONE;
 }")
@@ -3442,7 +3481,7 @@
   RTVEC_ELT (v, 14) = gen_rtx_CONST_INT (QImode, be ? 16 :  8);
   RTVEC_ELT (v, 15) = gen_rtx_CONST_INT (QImode, be ? 15 : 16);
 
-  emit_insn (gen_vec_initv16qi (mask, gen_rtx_PARALLEL (V16QImode, v)));
+  emit_insn (gen_vec_initv16qiqi (mask, gen_rtx_PARALLEL (V16QImode, v)));
   emit_insn (gen_vperm_v16qiv8hi (operands[0], operands[1], vzero, mask));
   DONE;
 }")
@@ -3478,7 +3517,7 @@
   RTVEC_ELT (v, 14) = gen_rtx_CONST_INT (QImode, be ? 14 : 17);
   RTVEC_ELT (v, 15) = gen_rtx_CONST_INT (QImode, be ? 15 : 16);
 
-  emit_insn (gen_vec_initv16qi (mask, gen_rtx_PARALLEL (V16QImode, v)));
+  emit_insn (gen_vec_initv16qiqi (mask, gen_rtx_PARALLEL (V16QImode, v)));
   emit_insn (gen_vperm_v8hiv4si (operands[0], operands[1], vzero, mask));
   DONE;
 }")
@@ -3719,7 +3758,7 @@
      = gen_rtx_CONST_INT (QImode, BYTES_BIG_ENDIAN ? 2 * i + 17 : 15 - 2 * i);
   }
 
-  emit_insn (gen_vec_initv16qi (mask, gen_rtx_PARALLEL (V16QImode, v)));
+  emit_insn (gen_vec_initv16qiqi (mask, gen_rtx_PARALLEL (V16QImode, v)));
   emit_insn (gen_altivec_vmulesb (even, operands[1], operands[2]));
   emit_insn (gen_altivec_vmulosb (odd, operands[1], operands[2]));
   emit_insn (gen_altivec_vperm_v8hiv16qi (operands[0], even, odd, mask));
@@ -3765,7 +3804,7 @@
       RTVEC_ELT (v, i + j * size)
 	= GEN_INT (i + (num_elements - 1 - j) * size);
 
-  emit_insn (gen_vec_initv16qi (mask, gen_rtx_PARALLEL (V16QImode, v)));
+  emit_insn (gen_vec_initv16qiqi (mask, gen_rtx_PARALLEL (V16QImode, v)));
   emit_insn (gen_altivec_vperm_<mode> (operands[0], operands[1],
 	     operands[1], mask));
   DONE;
diff --git a/gcc/config/rs6000/paired.md b/gcc/config/rs6000/paired.md
index c9f95867c0f..b0aa329d7b8 100644
--- a/gcc/config/rs6000/paired.md
+++ b/gcc/config/rs6000/paired.md
@@ -377,7 +377,7 @@
   "ps_muls1 %0, %1, %2"
   [(set_attr "type" "fp")])
 
-(define_expand "vec_initv2sf"
+(define_expand "vec_initv2sfsf"
   [(match_operand:V2SF 0 "gpc_reg_operand" "=f")
    (match_operand 1 "" "")]
   "TARGET_PAIRED_FLOAT"
diff --git a/gcc/config/rs6000/ppc-auxv.h b/gcc/config/rs6000/ppc-auxv.h
index c7e2e0bfbf2..dcee28a8152 100644
--- a/gcc/config/rs6000/ppc-auxv.h
+++ b/gcc/config/rs6000/ppc-auxv.h
@@ -89,6 +89,8 @@
 #define PPC_FEATURE2_HTM_NOSC       0x01000000
 #define PPC_FEATURE2_ARCH_3_00      0x00800000
 #define PPC_FEATURE2_HAS_IEEE128    0x00400000
+#define PPC_FEATURE2_DARN           0x00200000
+#define PPC_FEATURE2_SCV            0x00100000
 
 
 /* Thread Control Block (TCB) offsets of the AT_PLATFORM, AT_HWCAP and
diff --git a/gcc/config/rs6000/predicates.md b/gcc/config/rs6000/predicates.md
index aa1c01b93dd..466f9131aa0 100644
--- a/gcc/config/rs6000/predicates.md
+++ b/gcc/config/rs6000/predicates.md
@@ -783,10 +783,8 @@
   (and (and (match_code "mem")
 	    (match_test "MEM_VOLATILE_P (op)"))
        (if_then_else (match_test "reload_completed")
-         (match_operand 0 "memory_operand")
-         (if_then_else (match_test "reload_in_progress")
-	   (match_test "strict_memory_address_p (mode, XEXP (op, 0))")
-	   (match_test "memory_address_p (mode, XEXP (op, 0))")))))
+	 (match_operand 0 "memory_operand")
+	 (match_test "memory_address_p (mode, XEXP (op, 0))"))))
 
 ;; Return 1 if the operand is an offsettable memory operand.
 (define_predicate "offsettable_mem_operand"
@@ -1142,7 +1140,7 @@
       if (! volatile_ok && MEM_VOLATILE_P (op))
 	return 0;
 
-      if (reload_in_progress || lra_in_progress || reload_completed)
+      if (lra_in_progress || reload_completed)
 	return indexed_or_indirect_address (addr, vmode);
       else
 	return memory_address_addr_space_p (vmode, addr, MEM_ADDR_SPACE (op));
diff --git a/gcc/config/rs6000/rs6000-builtin.def b/gcc/config/rs6000/rs6000-builtin.def
index bf2c90b49fb..850164a0987 100644
--- a/gcc/config/rs6000/rs6000-builtin.def
+++ b/gcc/config/rs6000/rs6000-builtin.def
@@ -1031,10 +1031,14 @@ BU_ALTIVEC_2 (VMULEUB,	      "vmuleub",	CONST,	vec_widen_umult_even_v16qi)
 BU_ALTIVEC_2 (VMULESB,	      "vmulesb",	CONST,	vec_widen_smult_even_v16qi)
 BU_ALTIVEC_2 (VMULEUH,	      "vmuleuh",	CONST,	vec_widen_umult_even_v8hi)
 BU_ALTIVEC_2 (VMULESH,	      "vmulesh",	CONST,	vec_widen_smult_even_v8hi)
+BU_ALTIVEC_2 (VMULEUW,	      "vmuleuw",	CONST,  altivec_vmuleuw)
+BU_ALTIVEC_2 (VMULESW,	      "vmulesw",	CONST,  altivec_vmulesw)
 BU_ALTIVEC_2 (VMULOUB,	      "vmuloub",	CONST,	vec_widen_umult_odd_v16qi)
 BU_ALTIVEC_2 (VMULOSB,	      "vmulosb",	CONST,	vec_widen_smult_odd_v16qi)
 BU_ALTIVEC_2 (VMULOUH,	      "vmulouh",	CONST,	vec_widen_umult_odd_v8hi)
 BU_ALTIVEC_2 (VMULOSH,	      "vmulosh",	CONST,	vec_widen_smult_odd_v8hi)
+BU_ALTIVEC_2 (VMULOUW,	      "vmulouw",	CONST,  altivec_vmulouw)
+BU_ALTIVEC_2 (VMULOSW,	      "vmulosw",	CONST,  altivec_vmulosw)
 BU_ALTIVEC_2 (VNOR,	      "vnor",		CONST,	norv4si3)
 BU_ALTIVEC_2 (VOR,	      "vor",		CONST,	iorv4si3)
 BU_ALTIVEC_2 (VPKUHUM,	      "vpkuhum",	CONST,	altivec_vpkuhum)
@@ -1353,12 +1357,16 @@ BU_ALTIVEC_OVERLOAD_2 (VMRGLH,	   "vmrglh")
 BU_ALTIVEC_OVERLOAD_2 (VMRGLW,	   "vmrglw")
 BU_ALTIVEC_OVERLOAD_2 (VMULESB,	   "vmulesb")
 BU_ALTIVEC_OVERLOAD_2 (VMULESH,	   "vmulesh")
+BU_ALTIVEC_OVERLOAD_2 (VMULESW,	   "vmulesw")
 BU_ALTIVEC_OVERLOAD_2 (VMULEUB,	   "vmuleub")
 BU_ALTIVEC_OVERLOAD_2 (VMULEUH,	   "vmuleuh")
+BU_ALTIVEC_OVERLOAD_2 (VMULEUW,	   "vmuleuw")
 BU_ALTIVEC_OVERLOAD_2 (VMULOSB,	   "vmulosb")
 BU_ALTIVEC_OVERLOAD_2 (VMULOSH,	   "vmulosh")
+BU_ALTIVEC_OVERLOAD_2 (VMULOSW,	   "vmulosw")
 BU_ALTIVEC_OVERLOAD_2 (VMULOUB,	   "vmuloub")
 BU_ALTIVEC_OVERLOAD_2 (VMULOUH,	   "vmulouh")
+BU_ALTIVEC_OVERLOAD_2 (VMULOUW,	   "vmulouw")
 BU_ALTIVEC_OVERLOAD_2 (VPKSHSS,	   "vpkshss")
 BU_ALTIVEC_OVERLOAD_2 (VPKSHUS,	   "vpkshus")
 BU_ALTIVEC_OVERLOAD_2 (VPKSWSS,	   "vpkswss")
@@ -1727,6 +1735,14 @@ BU_VSX_X (LXVW4X_V4SF,	      "lxvw4x_v4sf",	MEM)
 BU_VSX_X (LXVW4X_V4SI,        "lxvw4x_v4si",	MEM)
 BU_VSX_X (LXVW4X_V8HI,        "lxvw4x_v8hi",	MEM)
 BU_VSX_X (LXVW4X_V16QI,	      "lxvw4x_v16qi",	MEM)
+
+BU_VSX_X (XL_BE_V16QI, "xl_be_v16qi", MEM)
+BU_VSX_X (XL_BE_V8HI, "xl_be_v8hi", MEM)
+BU_VSX_X (XL_BE_V4SI, "xl_be_v4si", MEM)
+BU_VSX_X (XL_BE_V2DI, "xl_be_v2di", MEM)
+BU_VSX_X (XL_BE_V4SF, "xl_be_v4sf", MEM)
+BU_VSX_X (XL_BE_V2DF, "xl_be_v2df", MEM)
+
 BU_VSX_X (STXSDX,	      "stxsdx",		MEM)
 BU_VSX_X (STXVD2X_V1TI,	      "stxvd2x_v1ti",	MEM)
 BU_VSX_X (STXVD2X_V2DF,	      "stxvd2x_v2df",	MEM)
@@ -1827,6 +1843,7 @@ BU_VSX_OVERLOAD_1 (VUNSIGNEDO,  "vunsignedo")
 BU_VSX_OVERLOAD_X (LD,	     "ld")
 BU_VSX_OVERLOAD_X (ST,	     "st")
 BU_VSX_OVERLOAD_X (XL,	     "xl")
+BU_VSX_OVERLOAD_X (XL_BE,    "xl_be")
 BU_VSX_OVERLOAD_X (XST,	     "xst")
 
 /* 2 argument CMPB instructions added in ISA 2.05. */
@@ -2058,6 +2075,9 @@ BU_P9V_OVERLOAD_1 (VSTDCNSP,	"scalar_test_neg_sp")
 
 BU_P9V_OVERLOAD_1 (REVB,	"revb")
 
+BU_P9V_OVERLOAD_1 (VEXTRACT_FP_FROM_SHORTH, "vextract_fp_from_shorth")
+BU_P9V_OVERLOAD_1 (VEXTRACT_FP_FROM_SHORTL, "vextract_fp_from_shortl")
+
 /* ISA 3.0 vector scalar overloaded 2 argument functions.  */
 BU_P9V_OVERLOAD_2 (VSIEDP,	"scalar_insert_exp")
 
@@ -2076,6 +2096,8 @@ BU_P9V_VSX_1 (VEEDP, "extract_exp_dp", CONST, xvxexpdp)
 BU_P9V_VSX_1 (VEESP, "extract_exp_sp", CONST, xvxexpsp)
 BU_P9V_VSX_1 (VESDP, "extract_sig_dp", CONST, xvxsigdp)
 BU_P9V_VSX_1 (VESSP, "extract_sig_sp", CONST, xvxsigsp)
+BU_P9V_VSX_1 (VEXTRACT_FP_FROM_SHORTH, "vextract_fp_from_shorth", CONST, vextract_fp_from_shorth)
+BU_P9V_VSX_1 (VEXTRACT_FP_FROM_SHORTL, "vextract_fp_from_shortl", CONST, vextract_fp_from_shortl)
 
 /* 2 argument vsx vector functions added in ISA 3.0 (power9).  */
 BU_P9V_VSX_2 (VIEDP, "insert_exp_dp", CONST, xviexpdp)
diff --git a/gcc/config/rs6000/rs6000-c.c b/gcc/config/rs6000/rs6000-c.c
index 2a361260759..11febbb4d46 100644
--- a/gcc/config/rs6000/rs6000-c.c
+++ b/gcc/config/rs6000/rs6000-c.c
@@ -575,40 +575,6 @@ rs6000_target_modify_macros (bool define_p, HOST_WIDE_INT flags,
      2. If TARGET_ALTIVEC is turned off.  */
   if ((flags & OPTION_MASK_CRYPTO) != 0)
     rs6000_define_or_undefine_macro (define_p, "__CRYPTO__");
-  /* Note that the OPTION_MASK_UPPER_REGS_DF flag is automatically
-     turned on in the following conditions:
-     1. If TARGET_UPPER_REGS is explicitly turned on and
-	TARGET_VSX is turned on and OPTION_MASK_UPPER_REGS_DF is not
-	explicitly turned off.  Hereafter, the
-	OPTION_MASK_UPPER_REGS_DF flag is considered to have been
-	explicitly set.
-     Note that the OPTION_MASK_UPPER_REGS_DF flag is automatically
-     turned off in the following conditions:
-     1. If TARGET_UPPER_REGS is explicitly turned off and TARGET_VSX
-	is turned on and OPTION_MASK_UPPER_REGS_DF is not explicitly
-	turned on.  Hereafter, the OPTION_MASK_UPPER_REGS_DF flag is
-	considered to have been explicitly cleared.
-     2. If TARGET_UPPER_REGS_DF is turned on but TARGET_VSX is turned
-	off.  */
-  if ((flags & OPTION_MASK_UPPER_REGS_DF) != 0)
-    rs6000_define_or_undefine_macro (define_p, "__UPPER_REGS_DF__");
-  /* Note that the OPTION_MASK_UPPER_REGS_SF flag is automatically
-     turned on in the following conditions:
-     1. If TARGET_UPPER_REGS is explicitly turned on and
-	TARGET_P8_VECTOR is on and OPTION_MASK_UPPER_REGS_SF is not
-	turned off explicitly.  Hereafter, the
-	OPTION_MASK_UPPER_REGS_SF flag is considered to have been
-	explicitly set.
-     Note that the OPTION_MASK_UPPER_REGS_SF flag is automatically
-     turned off in the following conditions:
-     1. If TARGET_UPPER_REGS is explicitly turned off and
-	TARGET_P8_VECTOR is on and OPTION_MASK_UPPER_REGS_SF is not
-	turned off explicitly.  Hereafter, the
-	OPTION_MASK_UPPER_REGS_SF flag is considered to have been
-	explicitly cleared.
-     2. If TARGET_P8_VECTOR is off.  */
-  if ((flags & OPTION_MASK_UPPER_REGS_SF) != 0)
-    rs6000_define_or_undefine_macro (define_p, "__UPPER_REGS_SF__");
 
   /* options from the builtin masks.  */
   /* Note that RS6000_BTM_PAIRED is enabled only if
@@ -2232,9 +2198,9 @@ const struct altivec_builtin_types altivec_overloaded_builtins[] = {
     RS6000_BTI_unsigned_V4SI, RS6000_BTI_unsigned_V8HI, RS6000_BTI_unsigned_V8HI, 0 },
   { ALTIVEC_BUILTIN_VEC_MULE, ALTIVEC_BUILTIN_VMULESH,
     RS6000_BTI_V4SI, RS6000_BTI_V8HI, RS6000_BTI_V8HI, 0 },
-  { ALTIVEC_BUILTIN_VEC_MULE, ALTIVEC_BUILTIN_VMULESH,
+  { ALTIVEC_BUILTIN_VEC_MULE, ALTIVEC_BUILTIN_VMULESW,
     RS6000_BTI_V2DI, RS6000_BTI_V4SI, RS6000_BTI_V4SI, 0 },
-  { ALTIVEC_BUILTIN_VEC_MULE, ALTIVEC_BUILTIN_VMULEUH,
+  { ALTIVEC_BUILTIN_VEC_MULE, ALTIVEC_BUILTIN_VMULEUW,
     RS6000_BTI_unsigned_V2DI, RS6000_BTI_unsigned_V4SI,
     RS6000_BTI_unsigned_V4SI, 0 },
   { ALTIVEC_BUILTIN_VEC_VMULEUB, ALTIVEC_BUILTIN_VMULEUB,
@@ -2251,9 +2217,9 @@ const struct altivec_builtin_types altivec_overloaded_builtins[] = {
     RS6000_BTI_V8HI, RS6000_BTI_V16QI, RS6000_BTI_V16QI, 0 },
   { ALTIVEC_BUILTIN_VEC_MULO, ALTIVEC_BUILTIN_VMULOUH,
     RS6000_BTI_unsigned_V4SI, RS6000_BTI_unsigned_V8HI, RS6000_BTI_unsigned_V8HI, 0 },
-  { ALTIVEC_BUILTIN_VEC_MULO, ALTIVEC_BUILTIN_VMULOSH,
+  { ALTIVEC_BUILTIN_VEC_MULO, ALTIVEC_BUILTIN_VMULOSW,
     RS6000_BTI_V2DI, RS6000_BTI_V4SI, RS6000_BTI_V4SI, 0 },
-  { ALTIVEC_BUILTIN_VEC_MULO, ALTIVEC_BUILTIN_VMULOUH,
+  { ALTIVEC_BUILTIN_VEC_MULO, ALTIVEC_BUILTIN_VMULOUW,
     RS6000_BTI_unsigned_V2DI, RS6000_BTI_unsigned_V4SI,
     RS6000_BTI_unsigned_V4SI, 0 },
   { ALTIVEC_BUILTIN_VEC_MULO, ALTIVEC_BUILTIN_VMULOSH,
@@ -3111,6 +3077,26 @@ const struct altivec_builtin_types altivec_overloaded_builtins[] = {
     ~RS6000_BTI_unsigned_V16QI, 0 },
   { VSX_BUILTIN_VEC_XL, VSX_BUILTIN_LD_ELEMREV_V16QI,
     RS6000_BTI_unsigned_V16QI, RS6000_BTI_INTSI, ~RS6000_BTI_UINTQI, 0 },
+  { VSX_BUILTIN_VEC_XL_BE, VSX_BUILTIN_XL_BE_V16QI,
+    RS6000_BTI_unsigned_V16QI, RS6000_BTI_INTSI, ~RS6000_BTI_UINTQI, 0 },
+  { VSX_BUILTIN_VEC_XL_BE, VSX_BUILTIN_XL_BE_V16QI,
+    RS6000_BTI_V16QI, RS6000_BTI_INTSI, ~RS6000_BTI_INTQI, 0 },
+  { VSX_BUILTIN_VEC_XL_BE, VSX_BUILTIN_XL_BE_V8HI,
+    RS6000_BTI_V8HI, RS6000_BTI_INTSI, ~RS6000_BTI_INTHI, 0 },
+  { VSX_BUILTIN_VEC_XL_BE, VSX_BUILTIN_XL_BE_V8HI,
+    RS6000_BTI_unsigned_V8HI, RS6000_BTI_INTSI, ~RS6000_BTI_UINTHI, 0 },
+  { VSX_BUILTIN_VEC_XL_BE, VSX_BUILTIN_XL_BE_V4SI,
+    RS6000_BTI_V4SI, RS6000_BTI_INTSI, ~RS6000_BTI_INTSI, 0 },
+  { VSX_BUILTIN_VEC_XL_BE, VSX_BUILTIN_XL_BE_V4SI,
+    RS6000_BTI_unsigned_V4SI, RS6000_BTI_INTSI, ~RS6000_BTI_UINTSI, 0 },
+  { VSX_BUILTIN_VEC_XL_BE, VSX_BUILTIN_XL_BE_V2DI,
+    RS6000_BTI_V2DI, RS6000_BTI_INTSI, ~RS6000_BTI_long_long, 0 },
+  { VSX_BUILTIN_VEC_XL_BE, VSX_BUILTIN_XL_BE_V2DI,
+    RS6000_BTI_unsigned_V2DI, RS6000_BTI_INTSI, ~RS6000_BTI_unsigned_long_long, 0 },
+  { VSX_BUILTIN_VEC_XL_BE, VSX_BUILTIN_XL_BE_V4SF,
+    RS6000_BTI_V4SF, RS6000_BTI_INTSI, ~RS6000_BTI_float, 0 },
+  { VSX_BUILTIN_VEC_XL_BE, VSX_BUILTIN_XL_BE_V2DF,
+    RS6000_BTI_V2DF, RS6000_BTI_INTSI, ~RS6000_BTI_double, 0 },
   { ALTIVEC_BUILTIN_VEC_XOR, ALTIVEC_BUILTIN_VXOR,
     RS6000_BTI_V4SF, RS6000_BTI_V4SF, RS6000_BTI_V4SF, 0 },
   { ALTIVEC_BUILTIN_VEC_XOR, ALTIVEC_BUILTIN_VXOR,
@@ -5184,6 +5170,11 @@ const struct altivec_builtin_types altivec_overloaded_builtins[] = {
   { P9V_BUILTIN_VEC_VEXTRACT4B, P9V_BUILTIN_VEXTRACT4B,
     RS6000_BTI_INTDI, RS6000_BTI_unsigned_V16QI, RS6000_BTI_UINTSI, 0 },
 
+  { P9V_BUILTIN_VEC_VEXTRACT_FP_FROM_SHORTH, P9V_BUILTIN_VEXTRACT_FP_FROM_SHORTH,
+    RS6000_BTI_V4SF, RS6000_BTI_unsigned_V8HI, 0, 0 },
+  { P9V_BUILTIN_VEC_VEXTRACT_FP_FROM_SHORTL, P9V_BUILTIN_VEXTRACT_FP_FROM_SHORTL,
+    RS6000_BTI_V4SF, RS6000_BTI_unsigned_V8HI, 0, 0 },
+
   { P9V_BUILTIN_VEC_VEXTULX, P9V_BUILTIN_VEXTUBLX,
     RS6000_BTI_INTQI, RS6000_BTI_UINTSI,
     RS6000_BTI_V16QI, 0 },
@@ -5881,6 +5872,12 @@ altivec_resolve_overloaded_builtin (location_t loc, tree fndecl,
       tree arg1 = (*arglist)[1];
       tree arg1_type = TREE_TYPE (arg1);
 
+      /* Both arguments must be vectors and the types must be compatible.  */
+      if (TREE_CODE (arg0_type) != VECTOR_TYPE)
+	goto bad;
+      if (!lang_hooks.types_compatible_p (arg0_type, arg1_type))
+	goto bad;
+
       /* Power9 instructions provide the most efficient implementation of
 	 ALTIVEC_BUILTIN_VEC_CMPNE if the mode is not DImode or TImode
 	 or SFmode or DFmode.  */
@@ -5890,12 +5887,6 @@ altivec_resolve_overloaded_builtin (location_t loc, tree fndecl,
 	  || (TYPE_MODE (TREE_TYPE (arg0_type)) == SFmode)
 	  || (TYPE_MODE (TREE_TYPE (arg0_type)) == DFmode))
 	{
-	  /* Both arguments must be vectors and the types must be compatible.  */
-	  if (TREE_CODE (arg0_type) != VECTOR_TYPE)
-	    goto bad;
-	  if (!lang_hooks.types_compatible_p (arg0_type, arg1_type))
-	    goto bad;
-
 	  switch (TYPE_MODE (TREE_TYPE (arg0_type)))
 	    {
 	      /* vec_cmpneq (va, vb) == vec_nor (vec_cmpeq (va, vb),
@@ -5960,8 +5951,8 @@ altivec_resolve_overloaded_builtin (location_t loc, tree fndecl,
 	 __int128) and the types must be compatible.  */
       if (TREE_CODE (arg0_type) != VECTOR_TYPE)
 	goto bad;
-      if (!lang_hooks.types_compatible_p (arg0_type, arg1_type) ||
-	  !lang_hooks.types_compatible_p (arg1_type, arg2_type))
+      if (!lang_hooks.types_compatible_p (arg0_type, arg1_type)
+	  || !lang_hooks.types_compatible_p (arg1_type, arg2_type))
 	goto bad;
 
       switch (TYPE_MODE (TREE_TYPE (arg0_type)))
@@ -6043,8 +6034,8 @@ altivec_resolve_overloaded_builtin (location_t loc, tree fndecl,
 	 __int128) and the types must be compatible.  */
       if (TREE_CODE (arg0_type) != VECTOR_TYPE)
 	goto bad;
-      if (!lang_hooks.types_compatible_p (arg0_type, arg1_type) ||
-	  !lang_hooks.types_compatible_p (arg1_type, arg2_type))
+      if (!lang_hooks.types_compatible_p (arg0_type, arg1_type)
+	  || !lang_hooks.types_compatible_p (arg1_type, arg2_type))
 	goto bad;
 
       switch (TYPE_MODE (TREE_TYPE (arg0_type)))
@@ -6493,6 +6484,9 @@ altivec_resolve_overloaded_builtin (location_t loc, tree fndecl,
 
       /* Strip qualifiers like "const" from the pointer arg.  */
       tree arg1_type = TREE_TYPE (arg1);
+      if (!POINTER_TYPE_P (arg1_type) && TREE_CODE (arg1_type) != ARRAY_TYPE)
+	goto bad;
+
       tree inner_type = TREE_TYPE (arg1_type);
       if (TYPE_QUALS (TREE_TYPE (arg1_type)) != 0)
 	{
@@ -6581,11 +6575,6 @@ altivec_resolve_overloaded_builtin (location_t loc, tree fndecl,
 	      arg2 = build1 (ADDR_EXPR, arg2_type, arg2_elt0);
 	    }
 
-	  tree addr = fold_build2_loc (loc, POINTER_PLUS_EXPR, arg2_type,
-				       arg2, arg1);
-	  tree aligned = fold_build2_loc (loc, BIT_AND_EXPR, arg2_type, addr,
-					  build_int_cst (arg2_type, -16));
-
 	  /* Find the built-in to make sure a compatible one exists; if not
 	     we fall back to default handling to get the error message.  */
 	  for (desc = altivec_overloaded_builtins;
@@ -6598,6 +6587,12 @@ altivec_resolve_overloaded_builtin (location_t loc, tree fndecl,
 		&& rs6000_builtin_type_compatible (TREE_TYPE (arg2),
 						   desc->op3))
 	      {
+		tree addr = fold_build2_loc (loc, POINTER_PLUS_EXPR, arg2_type,
+					     arg2, arg1);
+		tree aligned
+		  = fold_build2_loc (loc, BIT_AND_EXPR, arg2_type,
+				     addr, build_int_cst (arg2_type, -16));
+
 		tree arg0_type = TREE_TYPE (arg0);
 		if (TYPE_MODE (arg0_type) == V2DImode)
 		  /* Type-based aliasing analysis thinks vector long
@@ -6723,8 +6718,8 @@ altivec_resolve_overloaded_builtin (location_t loc, tree fndecl,
 	    overloaded_code = P6_BUILTIN_CMPB_32;
 	  }
 
-	while (desc->code && desc->code == fcode &&
-	       desc->overloaded_code != overloaded_code)
+	while (desc->code && desc->code == fcode
+	       && desc->overloaded_code != overloaded_code)
 	  desc++;
 
 	if (desc->code && (desc->code == fcode)
@@ -6770,8 +6765,8 @@ altivec_resolve_overloaded_builtin (location_t loc, tree fndecl,
 	    else
 	      overloaded_code = P9V_BUILTIN_VSIEDP;
 	  }
-	while (desc->code && desc->code == fcode &&
-	       desc->overloaded_code != overloaded_code)
+	while (desc->code && desc->code == fcode
+	       && desc->overloaded_code != overloaded_code)
 	  desc++;
 	if (desc->code && (desc->code == fcode)
 	    && rs6000_builtin_type_compatible (types[0], desc->op1)
@@ -6807,15 +6802,15 @@ altivec_resolve_overloaded_builtin (location_t loc, tree fndecl,
     if (unsupported_builtin)
       {
 	const char *name = rs6000_overloaded_builtin_name (fcode);
-	error ("Builtin function %s not supported in this compiler configuration",
+	error ("builtin function %s not supported in this compiler configuration",
 	       name);
 	return error_mark_node;
       }
   }
  bad:
-    {
-      const char *name = rs6000_overloaded_builtin_name (fcode);
-      error ("invalid parameter combination for AltiVec intrinsic %s", name);
-      return error_mark_node;
-    }
+  {
+    const char *name = rs6000_overloaded_builtin_name (fcode);
+    error ("invalid parameter combination for AltiVec intrinsic %s", name);
+    return error_mark_node;
+  }
 }
diff --git a/gcc/config/rs6000/rs6000-cpus.def b/gcc/config/rs6000/rs6000-cpus.def
index cd5c70688d8..190f9123fa0 100644
--- a/gcc/config/rs6000/rs6000-cpus.def
+++ b/gcc/config/rs6000/rs6000-cpus.def
@@ -44,9 +44,7 @@
 #define ISA_2_6_MASKS_SERVER	(ISA_2_5_MASKS_SERVER			\
 				 | OPTION_MASK_POPCNTD			\
 				 | OPTION_MASK_ALTIVEC			\
-				 | OPTION_MASK_VSX			\
-				 | OPTION_MASK_UPPER_REGS_DI		\
-				 | OPTION_MASK_UPPER_REGS_DF)
+				 | OPTION_MASK_VSX)
 
 /* For now, don't provide an embedded version of ISA 2.07.  */
 #define ISA_2_7_MASKS_SERVER	(ISA_2_6_MASKS_SERVER			\
@@ -57,9 +55,7 @@
 				 | OPTION_MASK_EFFICIENT_UNALIGNED_VSX	\
 				 | OPTION_MASK_HTM			\
 				 | OPTION_MASK_QUAD_MEMORY		\
-  				 | OPTION_MASK_QUAD_MEMORY_ATOMIC	\
-				 | OPTION_MASK_UPPER_REGS_SF		\
-				 | OPTION_MASK_VSX_SMALL_INTEGER)
+				 | OPTION_MASK_QUAD_MEMORY_ATOMIC)
 
 /* Add ISEL back into ISA 3.0, since it is supposed to be a win.  Do not add
    FLOAT128_HW here until we are ready to make -mfloat128 on by default.  */
@@ -78,11 +74,7 @@
 #define ISA_3_0_MASKS_IEEE	(OPTION_MASK_VSX			\
 				 | OPTION_MASK_P8_VECTOR		\
 				 | OPTION_MASK_P9_VECTOR		\
-				 | OPTION_MASK_DIRECT_MOVE		\
-				 | OPTION_MASK_UPPER_REGS_DI		\
-				 | OPTION_MASK_UPPER_REGS_DF		\
-				 | OPTION_MASK_UPPER_REGS_SF		\
-				 | OPTION_MASK_VSX_SMALL_INTEGER)
+				 | OPTION_MASK_DIRECT_MOVE)
 
 /* Flags that need to be turned off if -mno-power9-vector.  */
 #define OTHER_P9_VECTOR_MASKS	(OPTION_MASK_FLOAT128_HW		\
@@ -94,8 +86,7 @@
 #define OTHER_P8_VECTOR_MASKS	(OTHER_P9_VECTOR_MASKS			\
 				 | OPTION_MASK_P9_VECTOR		\
 				 | OPTION_MASK_DIRECT_MOVE		\
-				 | OPTION_MASK_CRYPTO			\
-				 | OPTION_MASK_UPPER_REGS_SF)		\
+				 | OPTION_MASK_CRYPTO)
 
 /* Flags that need to be turned off if -mno-vsx.  */
 #define OTHER_VSX_VECTOR_MASKS	(OTHER_P8_VECTOR_MASKS			\
@@ -103,9 +94,6 @@
 				 | OPTION_MASK_FLOAT128_KEYWORD		\
 				 | OPTION_MASK_FLOAT128_TYPE		\
 				 | OPTION_MASK_P8_VECTOR		\
-				 | OPTION_MASK_UPPER_REGS_DI		\
-				 | OPTION_MASK_UPPER_REGS_DF		\
-				 | OPTION_MASK_VSX_SMALL_INTEGER	\
 				 | OPTION_MASK_VSX_TIMODE)
 
 #define POWERPC_7400_MASK	(OPTION_MASK_PPC_GFXOPT | OPTION_MASK_ALTIVEC)
@@ -135,7 +123,6 @@
 				 | OPTION_MASK_FPRND			\
 				 | OPTION_MASK_HTM			\
 				 | OPTION_MASK_ISEL			\
-				 | OPTION_MASK_LRA			\
 				 | OPTION_MASK_MFCRF			\
 				 | OPTION_MASK_MFPGPR			\
 				 | OPTION_MASK_MODULO			\
@@ -160,11 +147,7 @@
 				 | OPTION_MASK_SOFT_FLOAT		\
 				 | OPTION_MASK_STRICT_ALIGN_OPTIONAL	\
 				 | OPTION_MASK_TOC_FUSION		\
-				 | OPTION_MASK_UPPER_REGS_DI		\
-				 | OPTION_MASK_UPPER_REGS_DF		\
-				 | OPTION_MASK_UPPER_REGS_SF		\
 				 | OPTION_MASK_VSX			\
-				 | OPTION_MASK_VSX_SMALL_INTEGER	\
 				 | OPTION_MASK_VSX_TIMODE)
 
 #endif
@@ -251,11 +234,7 @@ RS6000_CPU ("power6", PROCESSOR_POWER6, MASK_POWERPC64 | MASK_PPC_GPOPT
 RS6000_CPU ("power6x", PROCESSOR_POWER6, MASK_POWERPC64 | MASK_PPC_GPOPT
 	    | MASK_PPC_GFXOPT | MASK_MFCRF | MASK_POPCNTB | MASK_FPRND
 	    | MASK_CMPB | MASK_DFP | MASK_MFPGPR | MASK_RECIP_PRECISION)
-RS6000_CPU ("power7", PROCESSOR_POWER7,   /* Don't add MASK_ISEL by default */
-	    POWERPC_7400_MASK | MASK_POWERPC64 | MASK_PPC_GPOPT | MASK_MFCRF
-	    | MASK_POPCNTB | MASK_FPRND | MASK_CMPB | MASK_DFP | MASK_POPCNTD
-	    | MASK_VSX | MASK_RECIP_PRECISION | OPTION_MASK_UPPER_REGS_DF
-	    | OPTION_MASK_UPPER_REGS_DI)
+RS6000_CPU ("power7", PROCESSOR_POWER7, MASK_POWERPC64 | ISA_2_6_MASKS_SERVER)
 RS6000_CPU ("power8", PROCESSOR_POWER8, MASK_POWERPC64 | ISA_2_7_MASKS_SERVER)
 RS6000_CPU ("power9", PROCESSOR_POWER9, MASK_POWERPC64 | ISA_3_0_MASKS_SERVER)
 RS6000_CPU ("powerpc", PROCESSOR_POWERPC, 0)
diff --git a/gcc/config/rs6000/rs6000-protos.h b/gcc/config/rs6000/rs6000-protos.h
index aeec9b2f1c2..144bdb26fa4 100644
--- a/gcc/config/rs6000/rs6000-protos.h
+++ b/gcc/config/rs6000/rs6000-protos.h
@@ -151,10 +151,10 @@ extern rtx rs6000_longcall_ref (rtx);
 extern void rs6000_fatal_bad_address (rtx);
 extern rtx create_TOC_reference (rtx, rtx);
 extern void rs6000_split_multireg_move (rtx, rtx);
+extern void rs6000_emit_le_vsx_permute (rtx, rtx, machine_mode);
 extern void rs6000_emit_le_vsx_move (rtx, rtx, machine_mode);
 extern bool valid_sf_si_move (rtx, rtx, machine_mode);
 extern void rs6000_emit_move (rtx, rtx, machine_mode);
-extern rtx rs6000_secondary_memory_needed_rtx (machine_mode);
 extern machine_mode rs6000_secondary_memory_needed_mode (machine_mode);
 extern rtx (*rs6000_legitimize_reload_address_ptr) (rtx, machine_mode,
 						    int, int, int, int *);
diff --git a/gcc/config/rs6000/rs6000.c b/gcc/config/rs6000/rs6000.c
index 988926b8d59..74158cdd075 100644
--- a/gcc/config/rs6000/rs6000.c
+++ b/gcc/config/rs6000/rs6000.c
@@ -143,10 +143,6 @@ typedef struct GTY(()) machine_function
   /* Offset from virtual_stack_vars_rtx to the start of the ABI_V4
      varargs save area.  */
   HOST_WIDE_INT varargs_save_offset;
-  /* Temporary stack slot to use for SDmode copies.  This slot is
-     64-bits wide and is allocated early enough so that the offset
-     does not overflow the 16-bit load/store offset field.  */
-  rtx sdmode_stack_slot;
   /* Alternative internal arg pointer for -fsplit-stack.  */
   rtx split_stack_arg_pointer;
   bool split_stack_argp_used;
@@ -379,7 +375,9 @@ static const struct
   { "tar",		PPC_FEATURE2_HAS_TAR,		1 },
   { "vcrypto",		PPC_FEATURE2_HAS_VEC_CRYPTO,	1 },
   { "arch_3_00",	PPC_FEATURE2_ARCH_3_00,		1 },
-  { "ieee128",		PPC_FEATURE2_HAS_IEEE128,	1 }
+  { "ieee128",		PPC_FEATURE2_HAS_IEEE128,	1 },
+  { "darn",		PPC_FEATURE2_DARN,		1 },
+  { "scv",		PPC_FEATURE2_SCV,		1 }
 };
 
 /* On PowerPC, we have a limited number of target clones that we care about
@@ -437,7 +435,7 @@ enum rs6000_reg_type {
   ALTIVEC_REG_TYPE,
   FPR_REG_TYPE,
   SPR_REG_TYPE,
-  CR_REG_TYPE,
+  CR_REG_TYPE
 };
 
 /* Map register class to register type.  */
@@ -1872,12 +1870,6 @@ static const struct attribute_spec rs6000_attribute_table[] =
 #undef TARGET_BUILTIN_RECIPROCAL
 #define TARGET_BUILTIN_RECIPROCAL rs6000_builtin_reciprocal
 
-#undef TARGET_EXPAND_TO_RTL_HOOK
-#define TARGET_EXPAND_TO_RTL_HOOK rs6000_alloc_sdmode_stack_slot
-
-#undef TARGET_INSTANTIATE_DECLS
-#define TARGET_INSTANTIATE_DECLS rs6000_instantiate_decls
-
 #undef TARGET_SECONDARY_RELOAD
 #define TARGET_SECONDARY_RELOAD rs6000_secondary_reload
 
@@ -1887,9 +1879,6 @@ static const struct attribute_spec rs6000_attribute_table[] =
 #undef TARGET_MODE_DEPENDENT_ADDRESS_P
 #define TARGET_MODE_DEPENDENT_ADDRESS_P rs6000_mode_dependent_address_p
 
-#undef TARGET_LRA_P
-#define TARGET_LRA_P rs6000_lra_p
-
 #undef TARGET_COMPUTE_PRESSURE_CLASSES
 #define TARGET_COMPUTE_PRESSURE_CLASSES rs6000_compute_pressure_classes
 
@@ -2104,14 +2093,11 @@ rs6000_hard_regno_mode_ok (int regno, machine_mode mode)
 	  if(GET_MODE_SIZE (mode) == UNITS_PER_FP_WORD)
 	    return 1;
 
-	  if (TARGET_VSX_SMALL_INTEGER)
-	    {
-	      if (mode == SImode)
-		return 1;
+	  if (TARGET_P8_VECTOR && (mode == SImode))
+	    return 1;
 
-	      if (TARGET_P9_VECTOR && (mode == HImode || mode == QImode))
-		return 1;
-	    }
+	  if (TARGET_P9_VECTOR && (mode == QImode || mode == HImode))
+	    return 1;
 	}
 
       if (PAIRED_SIMD_REGNO_P (regno) && TARGET_PAIRED_FLOAT
@@ -2793,8 +2779,6 @@ rs6000_debug_reg_global (void)
   if (TARGET_LINK_STACK)
     fprintf (stderr, DEBUG_FMT_S, "link_stack", "true");
 
-  fprintf (stderr, DEBUG_FMT_S, "lra", TARGET_LRA ? "true" : "false");
-
   if (TARGET_P8_FUSION)
     {
       char options[80];
@@ -2907,9 +2891,7 @@ rs6000_setup_reg_addr_masks (void)
 		  && !VECTOR_MODE_P (m2)
 		  && !FLOAT128_VECTOR_P (m2)
 		  && !complex_p
-		  && !small_int_vsx_p
-		  && (m2 != DFmode || !TARGET_UPPER_REGS_DF)
-		  && (m2 != SFmode || !TARGET_UPPER_REGS_SF))
+		  && !small_int_vsx_p)
 		{
 		  addr_mask |= RELOAD_REG_PRE_INCDEC;
 
@@ -3218,22 +3200,12 @@ rs6000_init_hard_regno_mode_ok (bool global_init_p)
       rs6000_constraints[RS6000_CONSTRAINT_wa] = VSX_REGS;
       rs6000_constraints[RS6000_CONSTRAINT_wd] = VSX_REGS;	/* V2DFmode  */
       rs6000_constraints[RS6000_CONSTRAINT_wf] = VSX_REGS;	/* V4SFmode  */
+      rs6000_constraints[RS6000_CONSTRAINT_ws] = VSX_REGS;	/* DFmode  */
+      rs6000_constraints[RS6000_CONSTRAINT_wv] = ALTIVEC_REGS;	/* DFmode  */
+      rs6000_constraints[RS6000_CONSTRAINT_wi] = VSX_REGS;	/* DImode  */
 
       if (TARGET_VSX_TIMODE)
 	rs6000_constraints[RS6000_CONSTRAINT_wt] = VSX_REGS;	/* TImode  */
-
-      if (TARGET_UPPER_REGS_DF)					/* DFmode  */
-	{
-	  rs6000_constraints[RS6000_CONSTRAINT_ws] = VSX_REGS;
-	  rs6000_constraints[RS6000_CONSTRAINT_wv] = ALTIVEC_REGS;
-	}
-      else
-	rs6000_constraints[RS6000_CONSTRAINT_ws] = FLOAT_REGS;
-
-      if (TARGET_UPPER_REGS_DI)					/* DImode  */
-	rs6000_constraints[RS6000_CONSTRAINT_wi] = VSX_REGS;
-      else
-	rs6000_constraints[RS6000_CONSTRAINT_wi] = FLOAT_REGS;
     }
 
   /* Add conditional constraints based on various options, to allow us to
@@ -3263,7 +3235,7 @@ rs6000_init_hard_regno_mode_ok (bool global_init_p)
       rs6000_constraints[RS6000_CONSTRAINT_wA] = BASE_REGS;
     }
 
-  if (TARGET_P8_VECTOR && TARGET_UPPER_REGS_SF)			/* SFmode  */
+  if (TARGET_P8_VECTOR)						/* SFmode  */
     {
       rs6000_constraints[RS6000_CONSTRAINT_wu] = ALTIVEC_REGS;
       rs6000_constraints[RS6000_CONSTRAINT_wy] = VSX_REGS;
@@ -3303,7 +3275,7 @@ rs6000_init_hard_regno_mode_ok (bool global_init_p)
     rs6000_constraints[RS6000_CONSTRAINT_we] = VSX_REGS;
 
   /* Support small integers in VSX registers.  */
-  if (TARGET_VSX_SMALL_INTEGER)
+  if (TARGET_P8_VECTOR)
     {
       rs6000_constraints[RS6000_CONSTRAINT_wH] = ALTIVEC_REGS;
       rs6000_constraints[RS6000_CONSTRAINT_wI] = FLOAT_REGS;
@@ -3458,18 +3430,14 @@ rs6000_init_hard_regno_mode_ok (bool global_init_p)
 	    }
 	}
 
-      if (TARGET_UPPER_REGS_DF)
-	reg_addr[DFmode].scalar_in_vmx_p = true;
-
-      if (TARGET_UPPER_REGS_DI)
-	reg_addr[DImode].scalar_in_vmx_p = true;
+      reg_addr[DFmode].scalar_in_vmx_p = true;
+      reg_addr[DImode].scalar_in_vmx_p = true;
 
-      if (TARGET_UPPER_REGS_SF)
-	reg_addr[SFmode].scalar_in_vmx_p = true;
-
-      if (TARGET_VSX_SMALL_INTEGER)
+      if (TARGET_P8_VECTOR)
 	{
+	  reg_addr[SFmode].scalar_in_vmx_p = true;
 	  reg_addr[SImode].scalar_in_vmx_p = true;
+
 	  if (TARGET_P9_VECTOR)
 	    {
 	      reg_addr[HImode].scalar_in_vmx_p = true;
@@ -4214,6 +4182,10 @@ rs6000_option_override_internal (bool global_init_p)
       rs6000_altivec_element_order = 0;
     }
 
+  if (!rs6000_fold_gimple)
+     fprintf (stderr,
+	      "gimple folding of rs6000 builtins has been disabled.\n");
+
   /* Add some warnings for VSX.  */
   if (TARGET_VSX)
     {
@@ -4277,20 +4249,12 @@ rs6000_option_override_internal (bool global_init_p)
 	{
 	  if (cpu_index == PROCESSOR_POWER9)
 	    {
-	      /* legacy behavior: allow -mcpu-power9 with certain
+	      /* legacy behavior: allow -mcpu=power9 with certain
 		 capabilities explicitly disabled.  */
 	      rs6000_isa_flags |= (ISA_3_0_MASKS_SERVER & ~ignore_masks);
-	      /* However, reject this automatic fix if certain
-		 capabilities required for TARGET_P9_MINMAX support
-		 have been explicitly disabled.  */
-	      if (((OPTION_MASK_VSX | OPTION_MASK_UPPER_REGS_SF
-		    | OPTION_MASK_UPPER_REGS_DF) & rs6000_isa_flags)
-		  != (OPTION_MASK_VSX | OPTION_MASK_UPPER_REGS_SF
-		      | OPTION_MASK_UPPER_REGS_DF))
-		error ("-mpower9-minmax incompatible with explicitly disabled options");
-		}
+	    }
 	  else
-	    error ("Power9 target option is incompatible with -mcpu=<xxx> for "
+	    error ("power9 target option is incompatible with -mcpu=<xxx> for "
 		   "<xxx> less than power9");
 	}
       else if ((ISA_3_0_MASKS_SERVER & rs6000_isa_flags_explicit)
@@ -4374,73 +4338,6 @@ rs6000_option_override_internal (bool global_init_p)
       rs6000_isa_flags &= ~OPTION_MASK_DFP;
     }
 
-  /* Allow an explicit -mupper-regs to set -mupper-regs-df, -mupper-regs-di,
-     and -mupper-regs-sf, depending on the cpu, unless the user explicitly also
-     set the individual option.  */
-  if (TARGET_UPPER_REGS > 0)
-    {
-      if (TARGET_VSX
-	  && !(rs6000_isa_flags_explicit & OPTION_MASK_UPPER_REGS_DF))
-	{
-	  rs6000_isa_flags |= OPTION_MASK_UPPER_REGS_DF;
-	  rs6000_isa_flags_explicit |= OPTION_MASK_UPPER_REGS_DF;
-	}
-      if (TARGET_VSX
-	  && !(rs6000_isa_flags_explicit & OPTION_MASK_UPPER_REGS_DI))
-	{
-	  rs6000_isa_flags |= OPTION_MASK_UPPER_REGS_DI;
-	  rs6000_isa_flags_explicit |= OPTION_MASK_UPPER_REGS_DI;
-	}
-      if (TARGET_P8_VECTOR
-	  && !(rs6000_isa_flags_explicit & OPTION_MASK_UPPER_REGS_SF))
-	{
-	  rs6000_isa_flags |= OPTION_MASK_UPPER_REGS_SF;
-	  rs6000_isa_flags_explicit |= OPTION_MASK_UPPER_REGS_SF;
-	}
-    }
-  else if (TARGET_UPPER_REGS == 0)
-    {
-      if (TARGET_VSX
-	  && !(rs6000_isa_flags_explicit & OPTION_MASK_UPPER_REGS_DF))
-	{
-	  rs6000_isa_flags &= ~OPTION_MASK_UPPER_REGS_DF;
-	  rs6000_isa_flags_explicit |= OPTION_MASK_UPPER_REGS_DF;
-	}
-      if (TARGET_VSX
-	  && !(rs6000_isa_flags_explicit & OPTION_MASK_UPPER_REGS_DI))
-	{
-	  rs6000_isa_flags &= ~OPTION_MASK_UPPER_REGS_DI;
-	  rs6000_isa_flags_explicit |= OPTION_MASK_UPPER_REGS_DI;
-	}
-      if (TARGET_P8_VECTOR
-	  && !(rs6000_isa_flags_explicit & OPTION_MASK_UPPER_REGS_SF))
-	{
-	  rs6000_isa_flags &= ~OPTION_MASK_UPPER_REGS_SF;
-	  rs6000_isa_flags_explicit |= OPTION_MASK_UPPER_REGS_SF;
-	}
-    }
-
-  if (TARGET_UPPER_REGS_DF && !TARGET_VSX)
-    {
-      if (rs6000_isa_flags_explicit & OPTION_MASK_UPPER_REGS_DF)
-	error ("-mupper-regs-df requires -mvsx");
-      rs6000_isa_flags &= ~OPTION_MASK_UPPER_REGS_DF;
-    }
-
-  if (TARGET_UPPER_REGS_DI && !TARGET_VSX)
-    {
-      if (rs6000_isa_flags_explicit & OPTION_MASK_UPPER_REGS_DI)
-	error ("-mupper-regs-di requires -mvsx");
-      rs6000_isa_flags &= ~OPTION_MASK_UPPER_REGS_DI;
-    }
-
-  if (TARGET_UPPER_REGS_SF && !TARGET_P8_VECTOR)
-    {
-      if (rs6000_isa_flags_explicit & OPTION_MASK_UPPER_REGS_SF)
-	error ("-mupper-regs-sf requires -mpower8-vector");
-      rs6000_isa_flags &= ~OPTION_MASK_UPPER_REGS_SF;
-    }
-
   /* The quad memory instructions only works in 64-bit mode. In 32-bit mode,
      silently turn off quad memory mode.  */
   if ((TARGET_QUAD_MEMORY || TARGET_QUAD_MEMORY_ATOMIC) && !TARGET_POWERPC64)
@@ -4649,53 +4546,10 @@ rs6000_option_override_internal (bool global_init_p)
 	}
     }
 
-  if (TARGET_P9_DFORM_SCALAR && !TARGET_UPPER_REGS_DF)
-    {
-      /* We prefer to not mention undocumented options in
-	 error messages.  However, if users have managed to select
-	 power9-dform without selecting upper-regs-df, they
-	 already know about undocumented flags.  */
-      if (rs6000_isa_flags_explicit & OPTION_MASK_UPPER_REGS_DF)
-	error ("-mpower9-dform requires -mupper-regs-df");
-      rs6000_isa_flags &= ~OPTION_MASK_P9_DFORM_SCALAR;
-    }
-
-  if (TARGET_P9_DFORM_SCALAR && !TARGET_UPPER_REGS_SF)
-    {
-      if (rs6000_isa_flags_explicit & OPTION_MASK_UPPER_REGS_SF)
-	error ("-mpower9-dform requires -mupper-regs-sf");
-      rs6000_isa_flags &= ~OPTION_MASK_P9_DFORM_SCALAR;
-    }
-
-  /* Enable LRA by default.  */
-  if ((rs6000_isa_flags_explicit & OPTION_MASK_LRA) == 0)
-    rs6000_isa_flags |= OPTION_MASK_LRA;
-
-  /* There have been bugs with -mvsx-timode that don't show up with -mlra,
-     but do show up with -mno-lra.  Given -mlra will become the default once
-     PR 69847 is fixed, turn off the options with problems by default if
-     -mno-lra was used, and warn if the user explicitly asked for the option.
-
-     Enable -mpower9-dform-vector by default if LRA and other power9 options.
-     Enable -mvsx-timode by default if LRA and VSX.  */
-  if (!TARGET_LRA)
-    {
-      if (TARGET_VSX_TIMODE)
-	{
-	  if ((rs6000_isa_flags_explicit & OPTION_MASK_VSX_TIMODE) != 0)
-	    warning (0, "-mvsx-timode might need -mlra");
-
-	  else
-	    rs6000_isa_flags &= ~OPTION_MASK_VSX_TIMODE;
-	}
-    }
-
-  else
-    {
-      if (TARGET_VSX && !TARGET_VSX_TIMODE
-	  && (rs6000_isa_flags_explicit & OPTION_MASK_VSX_TIMODE) == 0)
-	rs6000_isa_flags |= OPTION_MASK_VSX_TIMODE;
-    }
+  /* Enable -mvsx-timode by default if VSX.  */
+  if (TARGET_VSX && !TARGET_VSX_TIMODE
+      && (rs6000_isa_flags_explicit & OPTION_MASK_VSX_TIMODE) == 0)
+    rs6000_isa_flags |= OPTION_MASK_VSX_TIMODE;
 
   /* Set -mallow-movmisalign to explicitly on if we have full ISA 2.07
      support. If we only have ISA 2.06 support, and the user did not specify
@@ -4737,20 +4591,6 @@ rs6000_option_override_internal (bool global_init_p)
 	}
     }
 
-  /* Check whether we should allow small integers into VSX registers.  We
-     require direct move to prevent the register allocator from having to move
-     variables through memory to do moves.  SImode can be used on ISA 2.07,
-     while HImode and QImode require ISA 3.0.  */
-  if (TARGET_VSX_SMALL_INTEGER
-      && (!TARGET_DIRECT_MOVE || !TARGET_P8_VECTOR || !TARGET_UPPER_REGS_DI))
-    {
-      if (rs6000_isa_flags_explicit & OPTION_MASK_VSX_SMALL_INTEGER)
-	error ("-mvsx-small-integer requires -mpower8-vector, "
-	       "-mupper-regs-di, and -mdirect-move");
-
-      rs6000_isa_flags &= ~OPTION_MASK_VSX_SMALL_INTEGER;
-    }
-
   /* Set long double size before the IEEE 128-bit tests.  */
   if (!global_options_set.x_rs6000_long_double_type_size)
     {
@@ -5757,7 +5597,7 @@ rs6000_builtin_vectorization_cost (enum vect_cost_for_stmt type_of_cost,
 	    if (TARGET_P9_VECTOR)
 	      return TYPE_VECTOR_SUBPARTS (vectype) - 1 + 2;
 	    else
-	      return TYPE_VECTOR_SUBPARTS (vectype) - 1 + 11;
+	      return TYPE_VECTOR_SUBPARTS (vectype) - 1 + 5;
 	  }
 	else
 	  /* V2DFmode doesn't need a direct move.  */
@@ -7443,8 +7283,7 @@ rs6000_expand_vector_set (rtx target, rtx val, int elt)
       else if (mode == V2DImode)
 	insn = gen_vsx_set_v2di (target, target, val, elt_rtx);
 
-      else if (TARGET_P9_VECTOR && TARGET_VSX_SMALL_INTEGER
-	       && TARGET_UPPER_REGS_DI && TARGET_POWERPC64)
+      else if (TARGET_P9_VECTOR && TARGET_POWERPC64)
 	{
 	  if (mode == V4SImode)
 	    insn = gen_vsx_set_v4si_p9 (target, target, val, elt_rtx);
@@ -8785,14 +8624,6 @@ legitimate_indexed_address_p (rtx x, int strict)
   op0 = XEXP (x, 0);
   op1 = XEXP (x, 1);
 
-  /* Recognize the rtl generated by reload which we know will later be
-     replaced with proper base and index regs.  */
-  if (!strict
-      && reload_in_progress
-      && (REG_P (op0) || GET_CODE (op0) == PLUS)
-      && REG_P (op1))
-    return true;
-
   return (REG_P (op0) && REG_P (op1)
 	  && ((INT_REG_OK_FOR_BASE_P (op0, strict)
 	       && INT_REG_OK_FOR_INDEX_P (op1, strict))
@@ -10036,9 +9867,7 @@ rs6000_debug_legitimate_address_p (machine_mode mode, rtx x,
 	   ret ? "true" : "false",
 	   GET_MODE_NAME (mode),
 	   reg_ok_strict,
-	   (reload_completed
-	    ? "after"
-	    : (reload_in_progress ? "progress" : "before")),
+	   (reload_completed ? "after" : "before"),
 	   GET_RTX_NAME (GET_CODE (x)));
   debug_rtx (x);
 
@@ -10440,9 +10269,6 @@ rs6000_emit_set_long_const (rtx dest, HOST_WIDE_INT c)
 static void
 rs6000_eliminate_indexed_memrefs (rtx operands[2])
 {
-  if (reload_in_progress)
-    return;
-
   if (GET_CODE (operands[0]) == MEM
       && GET_CODE (XEXP (operands[0], 0)) != REG
       && ! legitimate_constant_pool_address_p (XEXP (operands[0], 0),
@@ -10501,19 +10327,30 @@ rs6000_const_vec (machine_mode mode)
   return v;
 }
 
-/* Generate a permute rtx that represents an lxvd2x, stxvd2x, or xxpermdi
-   for a VSX load or store operation.  */
-rtx
-rs6000_gen_le_vsx_permute (rtx source, machine_mode mode)
+/* Emit an lxvd2x, stxvd2x, or xxpermdi instruction for a VSX load or
+   store operation.  */
+void
+rs6000_emit_le_vsx_permute (rtx dest, rtx source, machine_mode mode)
 {
-  /* Use ROTATE instead of VEC_SELECT on IEEE 128-bit floating point, and
-     128-bit integers if they are allowed in VSX registers.  */
-  if (FLOAT128_VECTOR_P (mode) || mode == TImode || mode == V1TImode)
-    return gen_rtx_ROTATE (mode, source, GEN_INT (64));
+  /* Scalar permutations are easier to express in integer modes rather than
+     floating-point modes, so cast them here.  We use V1TImode instead
+     of TImode to ensure that the values don't go through GPRs.  */
+  if (FLOAT128_VECTOR_P (mode))
+    {
+      dest = gen_lowpart (V1TImode, dest);
+      source = gen_lowpart (V1TImode, source);
+      mode = V1TImode;
+    }
+
+  /* Use ROTATE instead of VEC_SELECT if the mode contains only a single
+     scalar.  */
+  if (mode == TImode || mode == V1TImode)
+    emit_insn (gen_rtx_SET (dest, gen_rtx_ROTATE (mode, source,
+						  GEN_INT (64))));
   else
     {
       rtx par = gen_rtx_PARALLEL (VOIDmode, rs6000_const_vec (mode));
-      return gen_rtx_VEC_SELECT (mode, source, par);
+      emit_insn (gen_rtx_SET (dest, gen_rtx_VEC_SELECT (mode, source, par)));
     }
 }
 
@@ -10523,8 +10360,6 @@ rs6000_gen_le_vsx_permute (rtx source, machine_mode mode)
 void
 rs6000_emit_le_vsx_load (rtx dest, rtx source, machine_mode mode)
 {
-  rtx tmp, permute_mem, permute_reg;
-
   /* Use V2DImode to do swaps of types with 128-bit scalare parts (TImode,
      V1TImode).  */
   if (mode == TImode || mode == V1TImode)
@@ -10534,11 +10369,9 @@ rs6000_emit_le_vsx_load (rtx dest, rtx source, machine_mode mode)
       source = adjust_address (source, V2DImode, 0);
     }
 
-  tmp = can_create_pseudo_p () ? gen_reg_rtx_and_attrs (dest) : dest;
-  permute_mem = rs6000_gen_le_vsx_permute (source, mode);
-  permute_reg = rs6000_gen_le_vsx_permute (tmp, mode);
-  emit_insn (gen_rtx_SET (tmp, permute_mem));
-  emit_insn (gen_rtx_SET (dest, permute_reg));
+  rtx tmp = can_create_pseudo_p () ? gen_reg_rtx_and_attrs (dest) : dest;
+  rs6000_emit_le_vsx_permute (tmp, source, mode);
+  rs6000_emit_le_vsx_permute (dest, tmp, mode);
 }
 
 /* Emit a little-endian store to vector memory location DEST from VSX
@@ -10547,12 +10380,10 @@ rs6000_emit_le_vsx_load (rtx dest, rtx source, machine_mode mode)
 void
 rs6000_emit_le_vsx_store (rtx dest, rtx source, machine_mode mode)
 {
-  rtx tmp, permute_src, permute_tmp;
-
-  /* This should never be called during or after reload, because it does
+  /* This should never be called during or after LRA, because it does
      not re-permute the source register.  It is intended only for use
      during expand.  */
-  gcc_assert (!reload_in_progress && !lra_in_progress && !reload_completed);
+  gcc_assert (!lra_in_progress && !reload_completed);
 
   /* Use V2DImode to do swaps of types with 128-bit scalar parts (TImode,
      V1TImode).  */
@@ -10563,11 +10394,9 @@ rs6000_emit_le_vsx_store (rtx dest, rtx source, machine_mode mode)
       source = gen_lowpart (V2DImode, source);
     }
 
-  tmp = can_create_pseudo_p () ? gen_reg_rtx_and_attrs (source) : source;
-  permute_src = rs6000_gen_le_vsx_permute (source, mode);
-  permute_tmp = rs6000_gen_le_vsx_permute (tmp, mode);
-  emit_insn (gen_rtx_SET (tmp, permute_src));
-  emit_insn (gen_rtx_SET (dest, permute_tmp));
+  rtx tmp = can_create_pseudo_p () ? gen_reg_rtx_and_attrs (source) : source;
+  rs6000_emit_le_vsx_permute (tmp, source, mode);
+  rs6000_emit_le_vsx_permute (dest, tmp, mode);
 }
 
 /* Emit a sequence representing a little-endian VSX load or store,
@@ -10646,8 +10475,7 @@ valid_sf_si_move (rtx dest, rtx src, machine_mode mode)
 static bool
 rs6000_emit_move_si_sf_subreg (rtx dest, rtx source, machine_mode mode)
 {
-  if (TARGET_DIRECT_MOVE_64BIT && !reload_in_progress && !reload_completed
-      && !lra_in_progress
+  if (TARGET_DIRECT_MOVE_64BIT && !lra_in_progress && !reload_completed
       && (!SUBREG_P (dest) || !sf_subreg_operand (dest, mode))
       && SUBREG_P (source) && sf_subreg_operand (source, mode))
     {
@@ -10681,10 +10509,10 @@ rs6000_emit_move (rtx dest, rtx source, machine_mode mode)
   if (TARGET_DEBUG_ADDR)
     {
       fprintf (stderr,
-	       "\nrs6000_emit_move: mode = %s, reload_in_progress = %d, "
+	       "\nrs6000_emit_move: mode = %s, lra_in_progress = %d, "
 	       "reload_completed = %d, can_create_pseudos = %d.\ndest:\n",
 	       GET_MODE_NAME (mode),
-	       reload_in_progress,
+	       lra_in_progress,
 	       reload_completed,
 	       can_create_pseudo_p ());
       debug_rtx (dest);
@@ -10758,12 +10586,6 @@ rs6000_emit_move (rtx dest, rtx source, machine_mode mode)
       operands[1] = tmp;
     }
 
-  /* Handle the case where reload calls us with an invalid address.  */
-  if (reload_in_progress && mode == Pmode
-      && (! general_operand (operands[1], mode)
-	  || ! nonimmediate_operand (operands[0], mode)))
-    goto emit_set;
-
   /* 128-bit constant floating-point values on Darwin should really be loaded
      as two parts.  However, this premature splitting is a problem when DFmode
      values can go into Altivec registers.  */
@@ -10781,11 +10603,6 @@ rs6000_emit_move (rtx dest, rtx source, machine_mode mode)
       return;
     }
 
-  if (reload_in_progress && cfun->machine->sdmode_stack_slot != NULL_RTX)
-    cfun->machine->sdmode_stack_slot =
-      eliminate_regs (cfun->machine->sdmode_stack_slot, VOIDmode, NULL_RTX);
-
-
   /* Transform (p0:DD, (SUBREG:DD p1:SD)) to ((SUBREG:SD p0:DD),
      p1:SD) if p1 is not of floating point class and p0 is spilled as
      we can have no analogous movsd_store for this.  */
@@ -10895,57 +10712,6 @@ rs6000_emit_move (rtx dest, rtx source, machine_mode mode)
       return;
     }
 
-  if (reload_in_progress
-      && mode == SDmode
-      && cfun->machine->sdmode_stack_slot != NULL_RTX
-      && MEM_P (operands[0])
-      && rtx_equal_p (operands[0], cfun->machine->sdmode_stack_slot)
-      && REG_P (operands[1]))
-    {
-      if (FP_REGNO_P (REGNO (operands[1])))
-	{
-	  rtx mem = adjust_address_nv (operands[0], DDmode, 0);
-	  mem = eliminate_regs (mem, VOIDmode, NULL_RTX);
-	  emit_insn (gen_movsd_store (mem, operands[1]));
-	}
-      else if (INT_REGNO_P (REGNO (operands[1])))
-	{
-	  rtx mem = operands[0];
-	  if (BYTES_BIG_ENDIAN)
-	    mem = adjust_address_nv (mem, mode, 4);
-	  mem = eliminate_regs (mem, VOIDmode, NULL_RTX);
-	  emit_insn (gen_movsd_hardfloat (mem, operands[1]));
-	}
-      else
-	gcc_unreachable();
-      return;
-    }
-  if (reload_in_progress
-      && mode == SDmode
-      && REG_P (operands[0])
-      && MEM_P (operands[1])
-      && cfun->machine->sdmode_stack_slot != NULL_RTX
-      && rtx_equal_p (operands[1], cfun->machine->sdmode_stack_slot))
-    {
-      if (FP_REGNO_P (REGNO (operands[0])))
-	{
-	  rtx mem = adjust_address_nv (operands[1], DDmode, 0);
-	  mem = eliminate_regs (mem, VOIDmode, NULL_RTX);
-	  emit_insn (gen_movsd_load (operands[0], mem));
-	}
-      else if (INT_REGNO_P (REGNO (operands[0])))
-	{
-	  rtx mem = operands[1];
-	  if (BYTES_BIG_ENDIAN)
-	    mem = adjust_address_nv (mem, mode, 4);
-	  mem = eliminate_regs (mem, VOIDmode, NULL_RTX);
-	  emit_insn (gen_movsd_hardfloat (operands[0], mem));
-	}
-      else
-	gcc_unreachable();
-      return;
-    }
-
   /* FIXME:  In the long term, this switch statement should go away
      and be replaced by a sequence of tests based on things like
      mode == Pmode.  */
@@ -11104,10 +10870,9 @@ rs6000_emit_move (rtx dest, rtx source, machine_mode mode)
 
 	  /* If we are to limit the number of things we put in the TOC and
 	     this is a symbol plus a constant we can add in one insn,
-	     just put the symbol in the TOC and add the constant.  Don't do
-	     this if reload is in progress.  */
+	     just put the symbol in the TOC and add the constant.  */
 	  if (GET_CODE (operands[1]) == CONST
-	      && TARGET_NO_SUM_IN_TOC && ! reload_in_progress
+	      && TARGET_NO_SUM_IN_TOC
 	      && GET_CODE (XEXP (operands[1], 0)) == PLUS
 	      && add_operand (XEXP (XEXP (operands[1], 0), 1), mode)
 	      && (GET_CODE (XEXP (XEXP (operands[1], 0), 0)) == LABEL_REF
@@ -11153,10 +10918,9 @@ rs6000_emit_move (rtx dest, rtx source, machine_mode mode)
   /* Above, we may have called force_const_mem which may have returned
      an invalid address.  If we can, fix this up; otherwise, reload will
      have to deal with it.  */
-  if (GET_CODE (operands[1]) == MEM && ! reload_in_progress)
+  if (GET_CODE (operands[1]) == MEM)
     operands[1] = validize_mem (operands[1]);
 
- emit_set:
   emit_insn (gen_rtx_SET (operands[0], operands[1]));
 }
 
@@ -14692,6 +14456,58 @@ altivec_expand_lv_builtin (enum insn_code icode, tree exp, rtx target, bool blk)
 }
 
 static rtx
+altivec_expand_xl_be_builtin (enum insn_code icode, tree exp, rtx target, bool blk)
+{
+  rtx pat, addr;
+  tree arg0 = CALL_EXPR_ARG (exp, 0);
+  tree arg1 = CALL_EXPR_ARG (exp, 1);
+  machine_mode tmode = insn_data[icode].operand[0].mode;
+  machine_mode mode0 = Pmode;
+  machine_mode mode1 = Pmode;
+  rtx op0 = expand_normal (arg0);
+  rtx op1 = expand_normal (arg1);
+
+  if (icode == CODE_FOR_nothing)
+    /* Builtin not supported on this processor.  */
+    return 0;
+
+  /* If we got invalid arguments bail out before generating bad rtl.  */
+  if (arg0 == error_mark_node || arg1 == error_mark_node)
+    return const0_rtx;
+
+  if (target == 0
+      || GET_MODE (target) != tmode
+      || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
+	  target = gen_reg_rtx (tmode);
+
+  op1 = copy_to_mode_reg (mode1, op1);
+
+  if (op0 == const0_rtx)
+    addr = gen_rtx_MEM (blk ? BLKmode : tmode, op1);
+  else
+    {
+      op0 = copy_to_mode_reg (mode0, op0);
+      addr = gen_rtx_MEM (blk ? BLKmode : tmode,
+                          gen_rtx_PLUS (Pmode, op1, op0));
+    }
+
+  pat = GEN_FCN (icode) (target, addr);
+  if (!pat)
+    return 0;
+
+  emit_insn (pat);
+  /*  Reverse element order of elements if in LE mode */
+  if (!VECTOR_ELT_ORDER_BIG)
+    {
+      rtx sel = swap_selector_for_mode (tmode);
+      rtx vperm = gen_rtx_UNSPEC (tmode, gen_rtvec (3, target, target, sel),
+				  UNSPEC_VPERM);
+      emit_insn (gen_rtx_SET (target, vperm));
+    }
+  return target;
+}
+
+static rtx
 paired_expand_stv_builtin (enum insn_code icode, tree exp)
 {
   tree arg0 = CALL_EXPR_ARG (exp, 0);
@@ -16083,6 +15899,50 @@ altivec_expand_builtin (tree exp, rtx target, bool *expandedp)
       /* Fall through.  */
     }
 
+  /* XL_BE  We initialized them to always load in big endian order.  */
+  switch (fcode)
+    {
+    case VSX_BUILTIN_XL_BE_V2DI:
+      {
+        enum insn_code code = CODE_FOR_vsx_load_v2di;
+        return altivec_expand_xl_be_builtin (code, exp, target, false);
+      }
+      break;
+    case VSX_BUILTIN_XL_BE_V4SI:
+      {
+        enum insn_code code = CODE_FOR_vsx_load_v4si;
+        return altivec_expand_xl_be_builtin (code, exp, target, false);
+      }
+      break;
+    case VSX_BUILTIN_XL_BE_V8HI:
+      {
+        enum insn_code code = CODE_FOR_vsx_load_v8hi;
+        return altivec_expand_xl_be_builtin (code, exp, target, false);
+      }
+      break;
+    case VSX_BUILTIN_XL_BE_V16QI:
+      {
+        enum insn_code code = CODE_FOR_vsx_load_v16qi;
+        return altivec_expand_xl_be_builtin (code, exp, target, false);
+       }
+      break;
+    case VSX_BUILTIN_XL_BE_V2DF:
+      {
+        enum insn_code code = CODE_FOR_vsx_load_v2df;
+        return altivec_expand_xl_be_builtin (code, exp, target, false);
+      }
+      break;
+    case VSX_BUILTIN_XL_BE_V4SF:
+      {
+        enum insn_code code = CODE_FOR_vsx_load_v4sf;
+        return altivec_expand_xl_be_builtin (code, exp, target, false);
+      }
+      break;
+    default:
+      break;
+      /* Fall through.  */
+    }
+
   *expandedp = false;
   return NULL_RTX;
 }
@@ -16198,51 +16058,51 @@ paired_expand_predicate_builtin (enum insn_code icode, tree exp, rtx target)
 static void
 rs6000_invalid_builtin (enum rs6000_builtins fncode)
 {
-  size_t uns_fncode = (size_t)fncode;
+  size_t uns_fncode = (size_t) fncode;
   const char *name = rs6000_builtin_info[uns_fncode].name;
   HOST_WIDE_INT fnmask = rs6000_builtin_info[uns_fncode].mask;
 
   gcc_assert (name != NULL);
   if ((fnmask & RS6000_BTM_CELL) != 0)
-    error ("Builtin function %s is only valid for the cell processor", name);
+    error ("builtin function %s is only valid for the cell processor", name);
   else if ((fnmask & RS6000_BTM_VSX) != 0)
-    error ("Builtin function %s requires the -mvsx option", name);
+    error ("builtin function %s requires the -mvsx option", name);
   else if ((fnmask & RS6000_BTM_HTM) != 0)
-    error ("Builtin function %s requires the -mhtm option", name);
+    error ("builtin function %s requires the -mhtm option", name);
   else if ((fnmask & RS6000_BTM_ALTIVEC) != 0)
-    error ("Builtin function %s requires the -maltivec option", name);
+    error ("builtin function %s requires the -maltivec option", name);
   else if ((fnmask & RS6000_BTM_PAIRED) != 0)
-    error ("Builtin function %s requires the -mpaired option", name);
+    error ("builtin function %s requires the -mpaired option", name);
   else if ((fnmask & (RS6000_BTM_DFP | RS6000_BTM_P8_VECTOR))
 	   == (RS6000_BTM_DFP | RS6000_BTM_P8_VECTOR))
-    error ("Builtin function %s requires the -mhard-dfp and"
+    error ("builtin function %s requires the -mhard-dfp and"
 	   " -mpower8-vector options", name);
   else if ((fnmask & RS6000_BTM_DFP) != 0)
-    error ("Builtin function %s requires the -mhard-dfp option", name);
+    error ("builtin function %s requires the -mhard-dfp option", name);
   else if ((fnmask & RS6000_BTM_P8_VECTOR) != 0)
-    error ("Builtin function %s requires the -mpower8-vector option", name);
+    error ("builtin function %s requires the -mpower8-vector option", name);
   else if ((fnmask & (RS6000_BTM_P9_VECTOR | RS6000_BTM_64BIT))
 	   == (RS6000_BTM_P9_VECTOR | RS6000_BTM_64BIT))
-    error ("Builtin function %s requires the -mcpu=power9 and"
+    error ("builtin function %s requires the -mcpu=power9 and"
 	   " -m64 options", name);
   else if ((fnmask & RS6000_BTM_P9_VECTOR) != 0)
-    error ("Builtin function %s requires the -mcpu=power9 option", name);
+    error ("builtin function %s requires the -mcpu=power9 option", name);
   else if ((fnmask & (RS6000_BTM_P9_MISC | RS6000_BTM_64BIT))
 	   == (RS6000_BTM_P9_MISC | RS6000_BTM_64BIT))
-    error ("Builtin function %s requires the -mcpu=power9 and"
+    error ("builtin function %s requires the -mcpu=power9 and"
 	   " -m64 options", name);
   else if ((fnmask & RS6000_BTM_P9_MISC) == RS6000_BTM_P9_MISC)
-    error ("Builtin function %s requires the -mcpu=power9 option", name);
+    error ("builtin function %s requires the -mcpu=power9 option", name);
   else if ((fnmask & (RS6000_BTM_HARD_FLOAT | RS6000_BTM_LDBL128))
 	   == (RS6000_BTM_HARD_FLOAT | RS6000_BTM_LDBL128))
-    error ("Builtin function %s requires the -mhard-float and"
+    error ("builtin function %s requires the -mhard-float and"
 	   " -mlong-double-128 options", name);
   else if ((fnmask & RS6000_BTM_HARD_FLOAT) != 0)
-    error ("Builtin function %s requires the -mhard-float option", name);
+    error ("builtin function %s requires the -mhard-float option", name);
   else if ((fnmask & RS6000_BTM_FLOAT128) != 0)
-    error ("Builtin function %s requires the -mfloat128 option", name);
+    error ("builtin function %s requires the -mfloat128 option", name);
   else
-    error ("Builtin function %s is not supported with the current options",
+    error ("builtin function %s is not supported with the current options",
 	   name);
 }
 
@@ -16303,6 +16163,20 @@ rs6000_gimple_fold_builtin (gimple_stmt_iterator *gsi)
     = (enum rs6000_builtins) DECL_FUNCTION_CODE (fndecl);
   tree arg0, arg1, lhs;
 
+  size_t uns_fncode = (size_t) fn_code;
+  enum insn_code icode = rs6000_builtin_info[uns_fncode].icode;
+  const char *fn_name1 = rs6000_builtin_info[uns_fncode].name;
+  const char *fn_name2 = (icode != CODE_FOR_nothing)
+			  ? get_insn_name ((int) icode)
+			  : "nothing";
+
+  if (TARGET_DEBUG_BUILTIN)
+      fprintf (stderr, "rs6000_gimple_fold_builtin %d %s %s\n",
+	       fn_code, fn_name1, fn_name2);
+
+  if (!rs6000_fold_gimple)
+    return false;
+
   /* Generic solution to prevent gimple folding of code without a LHS.  */
   if (!gimple_call_lhs (stmt))
     return false;
@@ -16662,6 +16536,9 @@ rs6000_gimple_fold_builtin (gimple_stmt_iterator *gsi)
 	return true;
       }
     default:
+	if (TARGET_DEBUG_BUILTIN)
+	   fprintf (stderr, "gimple builtin intrinsic not matched:%d %s %s\n",
+		    fn_code, fn_name1, fn_name2);
       break;
     }
 
@@ -16694,9 +16571,9 @@ rs6000_expand_builtin (tree exp, rtx target, rtx subtarget ATTRIBUTE_UNUSED,
     {
       enum insn_code icode = rs6000_builtin_info[uns_fcode].icode;
       const char *name1 = rs6000_builtin_info[uns_fcode].name;
-      const char *name2 = ((icode != CODE_FOR_nothing)
-			   ? get_insn_name ((int)icode)
-			   : "nothing");
+      const char *name2 = (icode != CODE_FOR_nothing)
+			   ? get_insn_name ((int) icode)
+			   : "nothing";
       const char *name3;
 
       switch (rs6000_builtin_info[uns_fcode].attr & RS6000_BTC_TYPE_MASK)
@@ -16715,7 +16592,7 @@ rs6000_expand_builtin (tree exp, rtx target, rtx subtarget ATTRIBUTE_UNUSED,
       fprintf (stderr,
 	       "rs6000_expand_builtin, %s (%d), insn = %s (%d), type=%s%s\n",
 	       (name1) ? name1 : "---", fcode,
-	       (name2) ? name2 : "---", (int)icode,
+	       (name2) ? name2 : "---", (int) icode,
 	       name3,
 	       func_valid_p ? "" : ", not valid");
     }	     
@@ -17543,6 +17420,19 @@ altivec_init_builtins (void)
   def_builtin ("__builtin_vsx_st_elemrev_v4si", void_ftype_v4si_long_pvoid,
 	       VSX_BUILTIN_ST_ELEMREV_V4SI);
 
+  def_builtin ("__builtin_vsx_le_be_v8hi", v8hi_ftype_long_pcvoid,
+		   VSX_BUILTIN_XL_BE_V8HI);
+  def_builtin ("__builtin_vsx_le_be_v4si", v4si_ftype_long_pcvoid,
+		   VSX_BUILTIN_XL_BE_V4SI);
+  def_builtin ("__builtin_vsx_le_be_v2di", v2di_ftype_long_pcvoid,
+		   VSX_BUILTIN_XL_BE_V2DI);
+  def_builtin ("__builtin_vsx_le_be_v4sf", v4sf_ftype_long_pcvoid,
+		   VSX_BUILTIN_XL_BE_V4SF);
+  def_builtin ("__builtin_vsx_le_be_v2df", v2df_ftype_long_pcvoid,
+		   VSX_BUILTIN_XL_BE_V2DF);
+  def_builtin ("__builtin_vsx_le_be_v16qi", v16qi_ftype_long_pcvoid,
+		   VSX_BUILTIN_XL_BE_V16QI);
+
   if (TARGET_P9_VECTOR)
     {
       def_builtin ("__builtin_vsx_ld_elemrev_v8hi", v8hi_ftype_long_pcvoid,
@@ -17572,6 +17462,8 @@ altivec_init_builtins (void)
 	       VSX_BUILTIN_VEC_ST);
   def_builtin ("__builtin_vec_xl", opaque_ftype_long_pcvoid,
 	       VSX_BUILTIN_VEC_XL);
+  def_builtin ("__builtin_vec_xl_be", opaque_ftype_long_pcvoid,
+	       VSX_BUILTIN_VEC_XL_BE);
   def_builtin ("__builtin_vec_xst", void_ftype_opaque_long_pvoid,
 	       VSX_BUILTIN_VEC_XST);
 
@@ -18065,8 +17957,10 @@ builtin_function_type (machine_mode mode_ret, machine_mode mode_arg0,
       /* unsigned 2 argument functions.  */
     case ALTIVEC_BUILTIN_VMULEUB:
     case ALTIVEC_BUILTIN_VMULEUH:
+    case ALTIVEC_BUILTIN_VMULEUW:
     case ALTIVEC_BUILTIN_VMULOUB:
     case ALTIVEC_BUILTIN_VMULOUH:
+    case ALTIVEC_BUILTIN_VMULOUW:
     case CRYPTO_BUILTIN_VCIPHER:
     case CRYPTO_BUILTIN_VCIPHERLAST:
     case CRYPTO_BUILTIN_VNCIPHER:
@@ -19347,42 +19241,6 @@ mems_ok_for_quad_peep (rtx mem1, rtx mem2)
   return 1;
 }
 
-
-rtx
-rs6000_secondary_memory_needed_rtx (machine_mode mode)
-{
-  static bool eliminated = false;
-  rtx ret;
-
-  if (mode != SDmode || TARGET_NO_SDMODE_STACK)
-    ret = assign_stack_local (mode, GET_MODE_SIZE (mode), 0);
-  else
-    {
-      rtx mem = cfun->machine->sdmode_stack_slot;
-      gcc_assert (mem != NULL_RTX);
-
-      if (!eliminated)
-	{
-	  mem = eliminate_regs (mem, VOIDmode, NULL_RTX);
-	  cfun->machine->sdmode_stack_slot = mem;
-	  eliminated = true;
-	}
-      ret = mem;
-    }
-
-  if (TARGET_DEBUG_ADDR)
-    {
-      fprintf (stderr, "\nrs6000_secondary_memory_needed_rtx, mode %s, rtx:\n",
-	       GET_MODE_NAME (mode));
-      if (!ret)
-	fprintf (stderr, "\tNULL_RTX\n");
-      else
-	debug_rtx (ret);
-    }
-
-  return ret;
-}
-
 /* Return the mode to be used for memory when a secondary memory
    location is needed.  For SDmode values we need to use DDmode, in
    all other cases we can use the same mode.  */
@@ -19394,36 +19252,6 @@ rs6000_secondary_memory_needed_mode (machine_mode mode)
   return mode;
 }
 
-static tree
-rs6000_check_sdmode (tree *tp, int *walk_subtrees, void *data ATTRIBUTE_UNUSED)
-{
-  /* Don't walk into types.  */
-  if (*tp == NULL_TREE || *tp == error_mark_node || TYPE_P (*tp))
-    {
-      *walk_subtrees = 0;
-      return NULL_TREE;
-    }
-
-  switch (TREE_CODE (*tp))
-    {
-    case VAR_DECL:
-    case PARM_DECL:
-    case FIELD_DECL:
-    case RESULT_DECL:
-    case SSA_NAME:
-    case REAL_CST:
-    case MEM_REF:
-    case VIEW_CONVERT_EXPR:
-      if (TYPE_MODE (TREE_TYPE (*tp)) == SDmode)
-	return *tp;
-      break;
-    default:
-      break;
-    }
-
-  return NULL_TREE;
-}
-
 /* Classify a register type.  Because the FMRGOW/FMRGEW instructions only work
    on traditional floating point registers, and the VMRGOW/VMRGEW instructions
    only work on the traditional altivec registers, note if an altivec register
@@ -19444,7 +19272,7 @@ register_to_reg_type (rtx reg, bool *is_altivec)
   regno = REGNO (reg);
   if (regno >= FIRST_PSEUDO_REGISTER)
     {
-      if (!lra_in_progress && !reload_in_progress && !reload_completed)
+      if (!lra_in_progress && !reload_completed)
 	return PSEUDO_REG_TYPE;
 
       regno = true_regnum (reg);
@@ -19817,7 +19645,7 @@ rs6000_secondary_reload_simple_move (enum rs6000_reg_type to_type,
 	}
 
       /* ISA 2.07: MTVSRWZ or  MFVSRWZ.  */
-      if (TARGET_VSX_SMALL_INTEGER)
+      if (TARGET_P8_VECTOR)
 	{
 	  if (mode == SImode)
 	    return true;
@@ -20521,64 +20349,6 @@ rs6000_secondary_reload_gpr (rtx reg, rtx mem, rtx scratch, bool store_p)
   return;
 }
 
-/* Allocate a 64-bit stack slot to be used for copying SDmode values through if
-   this function has any SDmode references.  If we are on a power7 or later, we
-   don't need the 64-bit stack slot since the LFIWZX and STIFWX instructions
-   can load/store the value.  */
-
-static void
-rs6000_alloc_sdmode_stack_slot (void)
-{
-  tree t;
-  basic_block bb;
-  gimple_stmt_iterator gsi;
-
-  gcc_assert (cfun->machine->sdmode_stack_slot == NULL_RTX);
-  /* We use a different approach for dealing with the secondary
-     memory in LRA.  */
-  if (ira_use_lra_p)
-    return;
-
-  if (TARGET_NO_SDMODE_STACK)
-    return;
-
-  FOR_EACH_BB_FN (bb, cfun)
-    for (gsi = gsi_start_bb (bb); !gsi_end_p (gsi); gsi_next (&gsi))
-      {
-	tree ret = walk_gimple_op (gsi_stmt (gsi), rs6000_check_sdmode, NULL);
-	if (ret)
-	  {
-	    rtx stack = assign_stack_local (DDmode, GET_MODE_SIZE (DDmode), 0);
-	    cfun->machine->sdmode_stack_slot = adjust_address_nv (stack,
-								  SDmode, 0);
-	    return;
-	  }
-      }
-
-  /* Check for any SDmode parameters of the function.  */
-  for (t = DECL_ARGUMENTS (cfun->decl); t; t = DECL_CHAIN (t))
-    {
-      if (TREE_TYPE (t) == error_mark_node)
-	continue;
-
-      if (TYPE_MODE (TREE_TYPE (t)) == SDmode
-	  || TYPE_MODE (DECL_ARG_TYPE (t)) == SDmode)
-	{
-	  rtx stack = assign_stack_local (DDmode, GET_MODE_SIZE (DDmode), 0);
-	  cfun->machine->sdmode_stack_slot = adjust_address_nv (stack,
-								SDmode, 0);
-	  return;
-	}
-    }
-}
-
-static void
-rs6000_instantiate_decls (void)
-{
-  if (cfun->machine->sdmode_stack_slot != NULL_RTX)
-    instantiate_decl_rtl (cfun->machine->sdmode_stack_slot);
-}
-
 /* Given an rtx X being reloaded into a reg required to be
    in class CLASS, return the class of reg to actually use.
    In general this is just CLASS; but on some machines
@@ -20651,7 +20421,6 @@ rs6000_preferred_reload_class (rtx x, enum reg_class rclass)
 	      /* ISA 3.0 can load -128..127 using the XXSPLTIB instruction and
 		 a sign extend in the Altivec registers.  */
 	      if (IN_RANGE (value, -128, 127) && TARGET_P9_VECTOR
-		  && TARGET_VSX_SMALL_INTEGER
 		  && (rclass == ALTIVEC_REGS || rclass == VSX_REGS))
 		return ALTIVEC_REGS;
 	    }
@@ -23607,10 +23376,9 @@ static rtx
 rs6000_pre_atomic_barrier (rtx mem, enum memmodel model)
 {
   rtx addr = XEXP (mem, 0);
-  int strict_p = (reload_in_progress || reload_completed);
 
-  if (!legitimate_indirect_address_p (addr, strict_p)
-      && !legitimate_indexed_address_p (addr, strict_p))
+  if (!legitimate_indirect_address_p (addr, reload_completed)
+      && !legitimate_indexed_address_p (addr, reload_completed))
     {
       addr = force_reg (Pmode, addr);
       mem = replace_equiv_address_nv (mem, addr);
@@ -24684,6 +24452,21 @@ rs6000_savres_strategy (rs6000_stack_t *info,
   else if (!lr_save_p && info->first_gp_reg_save > 29)
     strategy |= SAVE_INLINE_GPRS | REST_INLINE_GPRS;
 
+  /* We can only use save multiple if we need to save all the registers from
+     first_gp_reg_save.  Otherwise, the CFI gets messed up (we save some
+     register we do not restore).  */
+  if (strategy & SAVE_MULTIPLE)
+    {
+      int i;
+
+      for (i = info->first_gp_reg_save; i < 32; i++)
+	if (fixed_reg_p (i) || !save_reg_p (i))
+	  {
+	    strategy &= ~SAVE_MULTIPLE;
+	    break;
+	  }
+    }
+
   /* We can only use load multiple or the out-of-line routines to
      restore gprs if we've saved all the registers from
      first_gp_reg_save.  Otherwise, we risk loading garbage.
@@ -32525,7 +32308,7 @@ rs6000_trampoline_init (rtx m_tramp, tree fndecl, rtx cxt)
 	rtx fnmem, fn_reg, toc_reg;
 
 	if (!TARGET_POINTERS_TO_NESTED_FUNCTIONS)
-	  error ("You cannot take the address of a nested function if you use "
+	  error ("you cannot take the address of a nested function if you use "
 		 "the -mno-pointers-to-nested-functions option.");
 
 	fnmem = gen_const_mem (Pmode, force_reg (Pmode, fnaddr));
@@ -33337,7 +33120,7 @@ rs6000_machopic_legitimize_pic_address (rtx orig, machine_mode mode,
 {
   rtx base, offset;
 
-  if (reg == NULL && ! reload_in_progress && ! reload_completed)
+  if (reg == NULL && !reload_completed)
     reg = gen_reg_rtx (Pmode);
 
   if (GET_CODE (orig) == CONST)
@@ -33363,7 +33146,7 @@ rs6000_machopic_legitimize_pic_address (rtx orig, machine_mode mode,
 	{
 	  if (SMALL_INT (offset))
 	    return plus_constant (Pmode, base, INTVAL (offset));
-	  else if (! reload_in_progress && ! reload_completed)
+	  else if (!reload_completed)
 	    offset = force_reg (Pmode, offset);
 	  else
 	    {
@@ -35999,14 +35782,6 @@ rs6000_libcall_value (machine_mode mode)
   return gen_rtx_REG (mode, regno);
 }
 
-
-/* Return true if we use LRA instead of reload pass.  */
-static bool
-rs6000_lra_p (void)
-{
-  return TARGET_LRA;
-}
-
 /* Compute register pressure classes.  We implement the target hook to avoid
    IRA picking something like NON_SPECIAL_REGS as a pressure class, which can
    lead to incorrect estimates of number of available registers and therefor
@@ -36358,11 +36133,7 @@ static struct rs6000_opt_mask const rs6000_opt_masks[] =
   { "string",			OPTION_MASK_STRING,		false, true  },
   { "toc-fusion",		OPTION_MASK_TOC_FUSION,		false, true  },
   { "update",			OPTION_MASK_NO_UPDATE,		true , true  },
-  { "upper-regs-di",		OPTION_MASK_UPPER_REGS_DI,	false, true  },
-  { "upper-regs-df",		OPTION_MASK_UPPER_REGS_DF,	false, true  },
-  { "upper-regs-sf",		OPTION_MASK_UPPER_REGS_SF,	false, true  },
   { "vsx",			OPTION_MASK_VSX,		false, true  },
-  { "vsx-small-integer",	OPTION_MASK_VSX_SMALL_INTEGER,	false, true  },
   { "vsx-timode",		OPTION_MASK_VSX_TIMODE,		false, true  },
 #ifdef OPTION_MASK_64BIT
 #if TARGET_AIX_OS
@@ -37624,7 +37395,7 @@ rs6000_allocate_stack_temp (machine_mode mode,
 {
   rtx stack = assign_stack_temp (mode, GET_MODE_SIZE (mode));
   rtx addr = XEXP (stack, 0);
-  int strict_p = (reload_in_progress || reload_completed);
+  int strict_p = reload_completed;
 
   if (!legitimate_indirect_address_p (addr, strict_p))
     {
@@ -37646,13 +37417,12 @@ rs6000_allocate_stack_temp (machine_mode mode,
 rtx
 rs6000_address_for_fpconvert (rtx x)
 {
-  int strict_p = (reload_in_progress || reload_completed);
   rtx addr;
 
   gcc_assert (MEM_P (x));
   addr = XEXP (x, 0);
-  if (! legitimate_indirect_address_p (addr, strict_p)
-      && ! legitimate_indexed_address_p (addr, strict_p))
+  if (! legitimate_indirect_address_p (addr, reload_completed)
+      && ! legitimate_indexed_address_p (addr, reload_completed))
     {
       if (GET_CODE (addr) == PRE_INC || GET_CODE (addr) == PRE_DEC)
 	{
@@ -37690,10 +37460,9 @@ rs6000_address_for_altivec (rtx x)
   if (!altivec_indexed_or_indirect_operand (x, GET_MODE (x)))
     {
       rtx addr = XEXP (x, 0);
-      int strict_p = (reload_in_progress || reload_completed);
 
-      if (!legitimate_indexed_address_p (addr, strict_p)
-	  && !legitimate_indirect_address_p (addr, strict_p))
+      if (!legitimate_indexed_address_p (addr, reload_completed)
+	  && !legitimate_indirect_address_p (addr, reload_completed))
 	addr = copy_to_mode_reg (Pmode, addr);
 
       addr = gen_rtx_AND (Pmode, addr, GEN_INT (-16));
diff --git a/gcc/config/rs6000/rs6000.h b/gcc/config/rs6000/rs6000.h
index 9b73be1e176..82a0bda48c6 100644
--- a/gcc/config/rs6000/rs6000.h
+++ b/gcc/config/rs6000/rs6000.h
@@ -600,8 +600,7 @@ extern int rs6000_vector_align[];
 #define TARGET_DIRECT_MOVE_128	(TARGET_P9_VECTOR && TARGET_DIRECT_MOVE \
 				 && TARGET_POWERPC64)
 #define TARGET_VEXTRACTUB	(TARGET_P9_VECTOR && TARGET_DIRECT_MOVE \
-				 && TARGET_UPPER_REGS_DI && TARGET_POWERPC64)
-
+				 && TARGET_POWERPC64)
 
 /* Whether we should avoid (SUBREG:SI (REG:SF) and (SUBREG:SF (REG:SI).  */
 #define TARGET_NO_SF_SUBREG	TARGET_DIRECT_MOVE_64BIT
@@ -761,7 +760,6 @@ extern int rs6000_vector_align[];
 #define TARGET_DIRECT_MOVE_64BIT	(TARGET_DIRECT_MOVE		\
 					 && TARGET_P8_VECTOR		\
 					 && TARGET_POWERPC64		\
-					 && TARGET_UPPER_REGS_DI	\
 					 && (rs6000_altivec_element_order != 2))
 
 /* Whether the various reciprocal divide/square root estimate instructions
@@ -1585,13 +1583,6 @@ extern enum reg_class rs6000_constraints[RS6000_CONSTRAINT_MAX];
 #define SECONDARY_MEMORY_NEEDED(CLASS1,CLASS2,MODE)			\
   rs6000_secondary_memory_needed_ptr (CLASS1, CLASS2, MODE)
 
-/* For cpus that cannot load/store SDmode values from the 64-bit
-   FP registers without using a full 64-bit load/store, we need
-   to allocate a full 64-bit stack slot for them.  */
-
-#define SECONDARY_MEMORY_NEEDED_RTX(MODE) \
-  rs6000_secondary_memory_needed_rtx (MODE)
-
 /* Specify the mode to be used for memory when a secondary memory
    location is needed.  For cpus that cannot load/store SDmode values
    from the 64-bit FP registers without using a full 64-bit
diff --git a/gcc/config/rs6000/rs6000.md b/gcc/config/rs6000/rs6000.md
index 2fd9ef0f168..6985b9f82da 100644
--- a/gcc/config/rs6000/rs6000.md
+++ b/gcc/config/rs6000/rs6000.md
@@ -691,7 +691,7 @@
 ;;	D-form load to FPR register & move to Altivec register
 ;;	Move Altivec register to FPR register and store
 (define_mode_iterator ALTIVEC_DFORM [DF
-				     SF
+				     (SF "TARGET_P8_VECTOR")
 				     (DI "TARGET_POWERPC64")])
 
 
@@ -1004,8 +1004,7 @@
 (define_split
   [(set (match_operand:DI 0 "altivec_register_operand")
 	(sign_extend:DI (match_operand:SI 1 "altivec_register_operand")))]
-  "TARGET_VSX_SMALL_INTEGER && TARGET_P8_VECTOR && !TARGET_P9_VECTOR
-   && reload_completed"
+  "TARGET_P8_VECTOR && !TARGET_P9_VECTOR && reload_completed"
   [(const_int 0)]
 {
   rtx dest = operands[0];
@@ -5161,7 +5160,7 @@
   operands[1] = rs6000_address_for_fpconvert (operands[1]);
   if (GET_CODE (operands[2]) == SCRATCH)
     operands[2] = gen_reg_rtx (DImode);
-  if (TARGET_VSX_SMALL_INTEGER)
+  if (TARGET_P8_VECTOR)
     emit_insn (gen_extendsidi2 (operands[2], operands[1]));
   else
     emit_insn (gen_lfiwax (operands[2], operands[1]));
@@ -5238,7 +5237,7 @@
   operands[1] = rs6000_address_for_fpconvert (operands[1]);
   if (GET_CODE (operands[2]) == SCRATCH)
     operands[2] = gen_reg_rtx (DImode);
-  if (TARGET_VSX_SMALL_INTEGER)
+  if (TARGET_P8_VECTOR)
     emit_insn (gen_zero_extendsidi2 (operands[2], operands[1]));
   else
     emit_insn (gen_lfiwzx (operands[2], operands[1]));
@@ -5423,8 +5422,7 @@
 	      (clobber (match_scratch:DI 2))
 	      (clobber (match_scratch:DI 3))
 	      (clobber (match_scratch:<QHI:MODE> 4))])]
-  "TARGET_P9_VECTOR && TARGET_DIRECT_MOVE && TARGET_POWERPC64
-   && TARGET_VSX_SMALL_INTEGER"
+  "TARGET_P9_VECTOR && TARGET_DIRECT_MOVE && TARGET_POWERPC64"
 {
   if (MEM_P (operands[1]))
     operands[1] = rs6000_address_for_fpconvert (operands[1]);
@@ -5437,8 +5435,7 @@
    (clobber (match_scratch:DI 2 "=wK,wi,wK"))
    (clobber (match_scratch:DI 3 "=X,r,X"))
    (clobber (match_scratch:<QHI:MODE> 4 "=X,X,wK"))]
-  "TARGET_P9_VECTOR && TARGET_DIRECT_MOVE && TARGET_POWERPC64
-   && TARGET_UPPER_REGS_DI && TARGET_VSX_SMALL_INTEGER"
+  "TARGET_P9_VECTOR && TARGET_DIRECT_MOVE && TARGET_POWERPC64"
   "#"
   "&& reload_completed"
   [(const_int 0)]
@@ -5477,8 +5474,7 @@
 		    (match_operand:QHI 1 "input_operand" "")))
 	      (clobber (match_scratch:DI 2 ""))
 	      (clobber (match_scratch:DI 3 ""))])]
-  "TARGET_P9_VECTOR && TARGET_DIRECT_MOVE && TARGET_POWERPC64
-   && TARGET_VSX_SMALL_INTEGER"
+  "TARGET_P9_VECTOR && TARGET_DIRECT_MOVE && TARGET_POWERPC64"
 {
   if (MEM_P (operands[1]))
     operands[1] = rs6000_address_for_fpconvert (operands[1]);
@@ -5490,8 +5486,7 @@
 	 (match_operand:QHI 1 "reg_or_indexed_operand" "wK,r,Z")))
    (clobber (match_scratch:DI 2 "=wK,wi,wJwK"))
    (clobber (match_scratch:DI 3 "=X,r,X"))]
-  "TARGET_P9_VECTOR && TARGET_DIRECT_MOVE && TARGET_POWERPC64
-   && TARGET_VSX_SMALL_INTEGER"
+  "TARGET_P9_VECTOR && TARGET_DIRECT_MOVE && TARGET_POWERPC64"
   "#"
   "&& reload_completed"
   [(const_int 0)]
@@ -5524,7 +5519,7 @@
   "TARGET_HARD_FLOAT && <TARGET_FLOAT>"
   "
 {
-  if (!TARGET_VSX_SMALL_INTEGER)
+  if (!TARGET_P8_VECTOR)
     {
       rtx src = force_reg (<MODE>mode, operands[1]);
 
@@ -5551,7 +5546,7 @@
   "TARGET_HARD_FLOAT && TARGET_DOUBLE_FLOAT
    && (<MODE>mode != SFmode || TARGET_SINGLE_FLOAT)
    && TARGET_STFIWX && can_create_pseudo_p ()
-   && !TARGET_VSX_SMALL_INTEGER"
+   && !TARGET_P8_VECTOR"
   "#"
   ""
   [(pc)]
@@ -5592,7 +5587,7 @@
 	(fix:SI (match_operand:SFDF 1 "gpc_reg_operand" "d,<rreg>")))
    (clobber (match_operand:DI 2 "gpc_reg_operand" "=1,d"))
    (clobber (match_operand:DI 3 "offsettable_mem_operand" "=o,o"))]
-  "TARGET_HARD_FLOAT && TARGET_DOUBLE_FLOAT && !TARGET_VSX_SMALL_INTEGER"
+  "TARGET_HARD_FLOAT && TARGET_DOUBLE_FLOAT && !TARGET_P8_VECTOR"
   "#"
   ""
   [(pc)]
@@ -5629,8 +5624,7 @@
   [(parallel [(set (match_operand:<QHI:MODE> 0 "nonimmediate_operand")
 		   (fix:QHI (match_operand:SFDF 1 "gpc_reg_operand")))
 	      (clobber (match_scratch:DI 2))])]
-  "TARGET_P9_VECTOR && TARGET_DIRECT_MOVE_64BIT
-   && TARGET_VSX_SMALL_INTEGER"
+  "TARGET_P9_VECTOR && TARGET_DIRECT_MOVE_64BIT"
 {
   if (MEM_P (operands[0]))
     operands[0] = rs6000_address_for_fpconvert (operands[0]);
@@ -5641,8 +5635,7 @@
 	(fix:QHI
 	 (match_operand:SFDF 1 "gpc_reg_operand" "<SFDF:Fv>,<SFDF:Fv>")))
    (clobber (match_scratch:DI 2 "=X,wi"))]
-  "TARGET_P9_VECTOR && TARGET_DIRECT_MOVE_64BIT
-   && TARGET_VSX_SMALL_INTEGER"
+  "TARGET_P9_VECTOR && TARGET_DIRECT_MOVE_64BIT"
   "#"
   "&& reload_completed"
   [(const_int 0)]
@@ -5672,7 +5665,7 @@
   "TARGET_HARD_FLOAT && <TARGET_FLOAT> && TARGET_FCTIWUZ && TARGET_STFIWX"
   "
 {
-  if (!TARGET_VSX_SMALL_INTEGER)
+  if (!TARGET_P8_VECTOR)
     {
       emit_insn (gen_fixuns_trunc<mode>si2_stfiwx (operands[0], operands[1]));
       DONE;
@@ -5685,7 +5678,7 @@
    (clobber (match_scratch:DI 2 "=d"))]
   "TARGET_HARD_FLOAT && <TARGET_FLOAT> && TARGET_FCTIWUZ
    && TARGET_STFIWX && can_create_pseudo_p ()
-   && !TARGET_VSX_SMALL_INTEGER"
+   && !TARGET_P8_VECTOR"
   "#"
   ""
   [(pc)]
@@ -5734,8 +5727,7 @@
   [(parallel [(set (match_operand:<QHI:MODE> 0 "nonimmediate_operand")
 		   (unsigned_fix:QHI (match_operand:SFDF 1 "gpc_reg_operand")))
 	      (clobber (match_scratch:DI 2))])]
-  "TARGET_P9_VECTOR && TARGET_DIRECT_MOVE_64BIT
-   && TARGET_VSX_SMALL_INTEGER"
+  "TARGET_P9_VECTOR && TARGET_DIRECT_MOVE_64BIT"
 {
   if (MEM_P (operands[0]))
     operands[0] = rs6000_address_for_fpconvert (operands[0]);
@@ -5746,8 +5738,7 @@
 	(unsigned_fix:QHI
 	 (match_operand:SFDF 1 "gpc_reg_operand" "<SFDF:Fv>,<SFDF:Fv>")))
    (clobber (match_scratch:DI 2 "=X,wi"))]
-  "TARGET_P9_VECTOR && TARGET_DIRECT_MOVE_64BIT
-   && TARGET_VSX_SMALL_INTEGER"
+  "TARGET_P9_VECTOR && TARGET_DIRECT_MOVE_64BIT"
   "#"
   "&& reload_completed"
   [(const_int 0)]
@@ -5777,7 +5768,7 @@
 (define_insn "*fctiw<u>z_<mode>_smallint"
   [(set (match_operand:SI 0 "vsx_register_operand" "=d,wi")
 	(any_fix:SI (match_operand:SFDF 1 "gpc_reg_operand" "<Ff>,<Fv>")))]
-  "TARGET_HARD_FLOAT && TARGET_DOUBLE_FLOAT && TARGET_VSX_SMALL_INTEGER"
+  "TARGET_HARD_FLOAT && TARGET_DOUBLE_FLOAT && TARGET_P8_VECTOR"
   "@
    fctiw<u>z %0,%1
    xscvdp<su>xws %x0,%x1"
@@ -5789,7 +5780,7 @@
   [(set (match_operand:SI 0 "memory_operand" "=Z")
 	(any_fix:SI (match_operand:SFDF 1 "gpc_reg_operand" "wa")))
    (clobber (match_scratch:SI 2 "=wa"))]
-  "TARGET_HARD_FLOAT && TARGET_DOUBLE_FLOAT && TARGET_VSX_SMALL_INTEGER"
+  "TARGET_HARD_FLOAT && TARGET_DOUBLE_FLOAT && TARGET_P8_VECTOR"
   "#"
   "&& reload_completed"
   [(set (match_dup 2)
@@ -6681,7 +6672,7 @@
 		   UNSPEC_MOVSI_GOT))]
   "DEFAULT_ABI == ABI_V4
     && flag_pic == 1
-    && (reload_in_progress || reload_completed)"
+    && reload_completed"
   [(set (match_dup 0) (match_dup 2))
    (set (match_dup 0) (unspec:SI [(match_dup 1)(match_dup 0)]
 				 UNSPEC_MOVSI_GOT))]
@@ -6959,7 +6950,7 @@
 (define_split
   [(set (match_operand:DI 0 "altivec_register_operand")
 	(match_operand:DI 1 "xxspltib_constant_split"))]
-  "TARGET_VSX_SMALL_INTEGER && TARGET_P9_VECTOR && reload_completed"
+  "TARGET_P9_VECTOR && reload_completed"
   [(const_int 0)]
 {
   rtx op0 = operands[0];
@@ -8234,7 +8225,7 @@
 	(and:P (plus:P (match_operand:P 1 "gpc_reg_operand" "r")
 		       (match_operand:P 2 "reg_or_cint_operand" "rI"))
 	       (const_int -16)))]
-  "TARGET_ALTIVEC && (reload_in_progress || reload_completed)"
+  "TARGET_ALTIVEC && reload_completed"
   "#"
   "&& reload_completed"
   [(set (match_dup 0)
@@ -8664,7 +8655,7 @@
 (define_split
   [(set (match_operand:DI 0 "altivec_register_operand" "")
 	(match_operand:DI 1 "s5bit_cint_operand" ""))]
-  "TARGET_UPPER_REGS_DI && TARGET_VSX && reload_completed"
+  "TARGET_VSX && reload_completed"
   [(const_int 0)]
 {
   rtx op0 = operands[0];
@@ -8686,7 +8677,7 @@
 (define_split
   [(set (match_operand:INT_ISA3 0 "altivec_register_operand" "")
 	(match_operand:INT_ISA3 1 "xxspltib_constant_split" ""))]
-  "TARGET_UPPER_REGS_DI && TARGET_P9_VECTOR && reload_completed"
+  "TARGET_P9_VECTOR && reload_completed"
   [(const_int 0)]
 {
   rtx op0 = operands[0];
@@ -9766,7 +9757,7 @@
 	(match_operand:DF 1 "any_operand" ""))
    (set (match_operand:DF 2 "gpc_reg_operand" "")
 	(match_dup 0))]
-  "!TARGET_UPPER_REGS_DF
+  "!TARGET_VSX
    && peep2_reg_dead_p (2, operands[0])"
   [(set (match_dup 2) (match_dup 1))])
 
@@ -9775,7 +9766,7 @@
 	(match_operand:SF 1 "any_operand" ""))
    (set (match_operand:SF 2 "gpc_reg_operand" "")
 	(match_dup 0))]
-  "!TARGET_UPPER_REGS_SF
+  "!TARGET_P8_VECTOR
    && peep2_reg_dead_p (2, operands[0])"
   [(set (match_dup 2) (match_dup 1))])
 
@@ -13974,8 +13965,7 @@
 	(match_operand:ALTIVEC_DFORM 2 "simple_offsettable_mem_operand"))
    (set (match_operand:ALTIVEC_DFORM 3 "altivec_register_operand")
 	(match_dup 1))]
-  "TARGET_VSX && TARGET_UPPER_REGS_<MODE> && !TARGET_P9_DFORM_SCALAR
-   && peep2_reg_dead_p (2, operands[1])"
+  "TARGET_VSX && !TARGET_P9_DFORM_SCALAR && peep2_reg_dead_p (2, operands[1])"
   [(set (match_dup 0)
 	(match_dup 4))
    (set (match_dup 3)
@@ -14011,8 +14001,7 @@
 	(match_operand:ALTIVEC_DFORM 2 "altivec_register_operand"))
    (set (match_operand:ALTIVEC_DFORM 3 "simple_offsettable_mem_operand")
 	(match_dup 1))]
-  "TARGET_VSX && TARGET_UPPER_REGS_<MODE> && !TARGET_P9_DFORM_SCALAR
-   && peep2_reg_dead_p (2, operands[1])"
+  "TARGET_VSX && !TARGET_P9_DFORM_SCALAR && peep2_reg_dead_p (2, operands[1])"
   [(set (match_dup 0)
 	(match_dup 4))
    (set (match_dup 5)
diff --git a/gcc/config/rs6000/rs6000.opt b/gcc/config/rs6000/rs6000.opt
index 28d899391fb..1ee84cb4dc5 100644
--- a/gcc/config/rs6000/rs6000.opt
+++ b/gcc/config/rs6000/rs6000.opt
@@ -148,6 +148,10 @@ maltivec=be
 Target Report RejectNegative Var(rs6000_altivec_element_order, 2)
 Generate AltiVec instructions using big-endian element order.
 
+mfold-gimple
+Target Report Var(rs6000_fold_gimple) Init(1)
+Enable early gimple folding of builtins.
+
 mhard-dfp
 Target Report Mask(DFP) Var(rs6000_isa_flags)
 Use decimal floating point instructions.
@@ -200,9 +204,6 @@ mvsx-scalar-double
 Target Undocumented Report Var(TARGET_VSX_SCALAR_DOUBLE) Init(1)
 ; If -mvsx, use VSX arithmetic instructions for DFmode (on by default)
 
-mvsx-scalar-memory
-Target Undocumented Report Alias(mupper-regs-df)
-
 mvsx-align-128
 Target Undocumented Report Var(TARGET_VSX_ALIGN_128) Save
 ; If -mvsx, set alignment to 128 bits instead of 32/64
@@ -433,9 +434,9 @@ mlong-double-
 Target RejectNegative Joined UInteger Var(rs6000_long_double_type_size) Save
 -mlong-double-<n>	Specify size of long double (64 or 128 bits).
 
+; This option existed in the past, but now is always on.
 mlra
-Target Report Mask(LRA) Var(rs6000_isa_flags)
-Enable Local Register Allocation.
+Target RejectNegative Undocumented Ignore
 
 msched-costly-dep=
 Target RejectNegative Joined Var(rs6000_sched_costly_dep_str)
@@ -549,22 +550,6 @@ mcompat-align-parm
 Target Report Var(rs6000_compat_align_parm) Init(0) Save
 Generate aggregate parameter passing code with at most 64-bit alignment.
 
-mupper-regs-df
-Target Report Mask(UPPER_REGS_DF) Var(rs6000_isa_flags)
-Allow double variables in upper registers with -mcpu=power7 or -mvsx.
-
-mupper-regs-sf
-Target Report Mask(UPPER_REGS_SF) Var(rs6000_isa_flags)
-Allow float variables in upper registers with -mcpu=power8 or -mpower8-vector.
-
-mupper-regs
-Target Report Var(TARGET_UPPER_REGS) Init(-1) Save
-Allow float/double variables in upper registers if cpu allows it.
-
-mupper-regs-di
-Target Report Mask(UPPER_REGS_DI) Var(rs6000_isa_flags)
-Allow 64-bit integer variables in upper registers with -mcpu=power7 or -mvsx.
-
 moptimize-swaps
 Target Undocumented Var(rs6000_optimize_swaps) Init(1) Save
 Analyze and remove doubleword swaps from VSX computations.
@@ -625,10 +610,6 @@ mfloat128-convert
 Target Undocumented Mask(FLOAT128_CVT) Var(rs6000_isa_flags)
 Enable default conversions between __float128 & long double.
 
-mvsx-small-integer
-Target Report Mask(VSX_SMALL_INTEGER) Var(rs6000_isa_flags)
-Enable small integers to be in VSX registers.
-
 mstack-protector-guard=
 Target RejectNegative Joined Enum(stack_protector_guard) Var(rs6000_stack_protector_guard) Init(SSP_TLS)
 Use given stack-protector guard.
diff --git a/gcc/config/rs6000/rtems.h b/gcc/config/rs6000/rtems.h
index 54a36de6eb4..8a62fdcbaf3 100644
--- a/gcc/config/rs6000/rtems.h
+++ b/gcc/config/rs6000/rtems.h
@@ -14,33 +14,172 @@
    or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public
    License for more details.
 
-   You should have received a copy of the GNU General Public License
-   along with GCC; see the file COPYING3.  If not see
+   Under Section 7 of GPL version 3, you are granted additional
+   permissions described in the GCC Runtime Library Exception, version
+   3.1, as published by the Free Software Foundation.
+
+   You should have received a copy of the GNU General Public License and
+   a copy of the GCC Runtime Library Exception along with this program;
+   see the files COPYING3 and COPYING.RUNTIME respectively.  If not, see
    <http://www.gnu.org/licenses/>.  */
 
-/* Specify predefined symbols in preprocessor.  */
+/* Copy and paste from linux64.h and freebsd64.h */
+#ifdef IN_LIBGCC2
+#undef TARGET_64BIT
+#ifdef __powerpc64__
+#define TARGET_64BIT 1
+#else
+#define TARGET_64BIT 0
+#endif
+#endif
+
+/* Copy and paste from linux64.h and freebsd64.h */
+#undef	TARGET_AIX
+#define	TARGET_AIX TARGET_64BIT
 
 #undef TARGET_OS_CPP_BUILTINS
-#define TARGET_OS_CPP_BUILTINS()          \
-  do                                      \
-    {                                     \
-      builtin_define_std ("PPC");         \
-      builtin_define ("__rtems__");       \
-      builtin_define ("__USE_INIT_FINI__"); \
-      builtin_assert ("system=rtems");    \
-      builtin_assert ("cpu=powerpc");     \
-      builtin_assert ("machine=powerpc"); \
-      TARGET_OS_SYSV_CPP_BUILTINS ();     \
-    }                                     \
+#define TARGET_OS_CPP_BUILTINS()			\
+  do							\
+    {							\
+      builtin_define ("__rtems__");			\
+      builtin_define ("__USE_INIT_FINI__");		\
+      builtin_assert ("system=rtems");			\
+      if (TARGET_64BIT)					\
+	{						\
+	  builtin_define ("__PPC__");			\
+	  builtin_define ("__PPC64__");			\
+	  builtin_define ("__powerpc64__");		\
+	  builtin_assert ("cpu=powerpc64");		\
+	  builtin_assert ("machine=powerpc64");		\
+	}						\
+      else						\
+	{						\
+	  builtin_define_std ("PPC");			\
+	  builtin_define_std ("powerpc");		\
+	  builtin_assert ("cpu=powerpc");		\
+	  builtin_assert ("machine=powerpc");		\
+	  TARGET_OS_SYSV_CPP_BUILTINS ();		\
+	}						\
+    }							\
+  while (0)
+
+/* Copy and paste from linux64.h and freebsd64.h */
+#define INVALID_64BIT "-m%s not supported in this configuration"
+
+/* A lot of copy and paste from linux64.h and freebsd64.h */
+#undef	SUBSUBTARGET_OVERRIDE_OPTIONS
+#define	SUBSUBTARGET_OVERRIDE_OPTIONS				\
+  do								\
+    {								\
+      if (rs6000_isa_flags & OPTION_MASK_64BIT)			\
+	{							\
+	  rs6000_elf_abi = 2;					\
+	  rs6000_current_abi = ABI_ELFv2;			\
+	  if (rs6000_isa_flags & OPTION_MASK_RELOCATABLE)	\
+	    {							\
+	      rs6000_isa_flags &= ~OPTION_MASK_RELOCATABLE;	\
+	      error (INVALID_64BIT, "relocatable");		\
+	    }							\
+	  if (rs6000_isa_flags & OPTION_MASK_EABI)		\
+	    {							\
+	      rs6000_isa_flags &= ~OPTION_MASK_EABI;		\
+	      error (INVALID_64BIT, "eabi");			\
+	    }							\
+	  if (TARGET_PROTOTYPE)					\
+	    {							\
+	      target_prototype = 0;				\
+	      error (INVALID_64BIT, "prototype");		\
+	    }							\
+	  if ((rs6000_isa_flags & OPTION_MASK_POWERPC64) == 0)	\
+	    {							\
+	      rs6000_isa_flags |= OPTION_MASK_POWERPC64;	\
+	      error ("-m64 requires a PowerPC64 cpu");		\
+	    }							\
+	}							\
+    }								\
   while (0)
 
 #undef TARGET_LIBGCC_SDATA_SECTION
 #define TARGET_LIBGCC_SDATA_SECTION ".sdata"
 
-#undef CPP_OS_DEFAULT_SPEC
-#define CPP_OS_DEFAULT_SPEC "%(cpp_os_rtems)"
+/* Copy and paste from linux64.h and freebsd64.h */
+#undef	SIZE_TYPE
+#define	SIZE_TYPE (TARGET_64BIT ? "long unsigned int" : "unsigned int")
+
+/* Copy and paste from linux64.h and freebsd64.h */
+#undef	PTRDIFF_TYPE
+#define	PTRDIFF_TYPE (TARGET_64BIT ? "long int" : "int")
+
+/* Copy and paste from freebsd64.h */
+#undef WCHAR_TYPE
+
+/* Copy and paste from freebsd64.h */
+#undef  WCHAR_TYPE_SIZE
+#define WCHAR_TYPE_SIZE 32
+
+/* Copy and paste from linux64.h and freebsd64.h */
+#ifdef __powerpc64__
+#define CRT_CALL_STATIC_FUNCTION(SECTION_OP, FUNC)	\
+  asm (SECTION_OP "\n"					\
+"	bl " #FUNC "\n"					\
+"	nop\n"						\
+"	.previous");
+#endif
+
+/* This could be also POWERPC_FREEBSD.  It is related to the save/restore
+   defines below.  */
+#define POWERPC_LINUX
+
+/* Copy and paste from linux64.h and freebsd64.h */
+#undef  SAVE_FP_PREFIX
+#define SAVE_FP_PREFIX (TARGET_64BIT ? "._savef" : "_savefpr_")
+#undef  SAVE_FP_SUFFIX
+#define SAVE_FP_SUFFIX ""
+#undef  RESTORE_FP_PREFIX
+#define RESTORE_FP_PREFIX (TARGET_64BIT ? "._restf" : "_restfpr_")
+#undef  RESTORE_FP_SUFFIX
+#define RESTORE_FP_SUFFIX ""
 
-#define CPP_OS_RTEMS_SPEC "\
+/* Copy and paste from linux64.h and freebsd64.h */
+#undef	ASM_DECLARE_FUNCTION_SIZE
+#define	ASM_DECLARE_FUNCTION_SIZE(FILE, FNAME, DECL)			\
+  do									\
+    {									\
+      if (!flag_inhibit_size_directive)					\
+	{								\
+	  fputs ("\t.size\t", (FILE));					\
+	  if (TARGET_64BIT && DOT_SYMBOLS)				\
+	    putc ('.', (FILE));						\
+	  assemble_name ((FILE), (FNAME));				\
+	  fputs (",.-", (FILE));					\
+	  rs6000_output_function_entry (FILE, FNAME);			\
+	  putc ('\n', (FILE));						\
+	}								\
+    }									\
+  while (0)
+
+/* Copy and paste from linux64.h and freebsd64.h */
+#undef  ASM_OUTPUT_SPECIAL_POOL_ENTRY_P
+#define ASM_OUTPUT_SPECIAL_POOL_ENTRY_P(X, MODE)			\
+  (TARGET_TOC								\
+   && (GET_CODE (X) == SYMBOL_REF					\
+       || (GET_CODE (X) == CONST && GET_CODE (XEXP (X, 0)) == PLUS	\
+	   && GET_CODE (XEXP (XEXP (X, 0), 0)) == SYMBOL_REF)		\
+       || GET_CODE (X) == LABEL_REF					\
+       || (GET_CODE (X) == CONST_INT 					\
+	   && GET_MODE_BITSIZE (MODE) <= GET_MODE_BITSIZE (Pmode))	\
+       || (GET_CODE (X) == CONST_DOUBLE					\
+	   && ((TARGET_64BIT						\
+		&& (TARGET_MINIMAL_TOC					\
+		    || (SCALAR_FLOAT_MODE_P (GET_MODE (X))		\
+			&& ! TARGET_NO_FP_IN_TOC)))			\
+	       || (!TARGET_64BIT					\
+		   && !TARGET_NO_FP_IN_TOC				\
+		   && SCALAR_FLOAT_MODE_P (GET_MODE (X))		\
+		   && BITS_PER_WORD == HOST_BITS_PER_INT)))))
+
+#undef CPP_OS_DEFAULT_SPEC
+#define CPP_OS_DEFAULT_SPEC "\
 %{!mcpu*:  %{!Dppc*: %{!Dmpc*: -Dmpc750} } }\
 %{mcpu=403:  %{!Dppc*: %{!Dmpc*: -Dppc403}  } } \
 %{mcpu=505:  %{!Dppc*: %{!Dmpc*: -Dmpc505}  } } \
@@ -55,6 +194,37 @@
 %{mcpu=8540: %{!Dppc*: %{!Dmpc*: -Dppc8540}  } } \
 %{mcpu=e6500: -D__PPC_CPU_E6500__}"
 
+#undef	ASM_DEFAULT_SPEC
+#define	ASM_DEFAULT_SPEC "-mppc%{m64:64}"
+
+#undef	ASM_SPEC
+#define	ASM_SPEC "%{!m64:%(asm_spec32)}%{m64:%(asm_spec64)} %(asm_spec_common)"
+
+#define ASM_SPEC32 "-a32 \
+%{mrelocatable} %{mrelocatable-lib} %{" FPIE_OR_FPIC_SPEC ":-K PIC} \
+%{memb|msdata=eabi: -memb}"
+
+#define ASM_SPEC64 "-a64"
+
+#define ASM_SPEC_COMMON "%(asm_cpu) \
+%{,assembler|,assembler-with-cpp: %{mregnames} %{mno-regnames}}" \
+  ENDIAN_SELECT(" -mbig", " -mlittle", DEFAULT_ASM_ENDIAN)
+
+#undef  LINK_OS_DEFAULT_SPEC
+#define LINK_OS_DEFAULT_SPEC \
+"%{!m64:%(link_os_spec32)}%{m64:%(link_os_spec64)}"
+
+#define LINK_OS_SPEC32 ENDIAN_SELECT(" -m elf32ppc",		\
+				     " -m elf32lppc",		\
+				     " -m elf32ppc")
+#define LINK_OS_SPEC64 ENDIAN_SELECT(" -m elf64ppc",		\
+				     " -m elf64lppc",		\
+				     " -m elf64ppc")
+
 #undef  SUBSUBTARGET_EXTRA_SPECS
 #define SUBSUBTARGET_EXTRA_SPECS \
-  { "cpp_os_rtems",		CPP_OS_RTEMS_SPEC }
+  { "asm_spec_common",		ASM_SPEC_COMMON },			\
+  { "asm_spec32",		ASM_SPEC32 },				\
+  { "asm_spec64",		ASM_SPEC64 },				\
+  { "link_os_spec32",		LINK_OS_SPEC32 },			\
+  { "link_os_spec64",		LINK_OS_SPEC64 },
diff --git a/gcc/config/rs6000/sysv4.h b/gcc/config/rs6000/sysv4.h
index de386291a51..cbee89140dd 100644
--- a/gcc/config/rs6000/sysv4.h
+++ b/gcc/config/rs6000/sysv4.h
@@ -757,24 +757,34 @@ ENDIAN_SELECT(" -mbig", " -mlittle", DEFAULT_ASM_ENDIAN)
 #define CRTOFFLOADEND ""
 #endif
 
-#ifdef HAVE_LD_PIE
-#define	STARTFILE_LINUX_SPEC "\
-%{!shared: %{pg|p|profile:gcrt1.o%s;pie:Scrt1.o%s;:crt1.o%s}} \
-%{mnewlib:ecrti.o%s;:crti.o%s} \
-%{static:crtbeginT.o%s;shared|pie:crtbeginS.o%s;:crtbegin.o%s} \
-" CRTOFFLOADBEGIN
-#else
-#define	STARTFILE_LINUX_SPEC "\
-%{!shared: %{pg|p|profile:gcrt1.o%s;:crt1.o%s}} \
-%{mnewlib:ecrti.o%s;:crti.o%s} \
-%{static:crtbeginT.o%s;shared|pie:crtbeginS.o%s;:crtbegin.o%s} \
-" CRTOFFLOADBEGIN
-#endif
-
-#define	ENDFILE_LINUX_SPEC "\
-%{shared|pie:crtendS.o%s;:crtend.o%s} \
-%{mnewlib:ecrtn.o%s;:crtn.o%s} \
-" CRTOFFLOADEND
+/* STARTFILE_LINUX_SPEC should be the same as GNU_USER_TARGET_STARTFILE_SPEC
+   but with the mnewlib ecrti.o%s selection substituted for crti.o%s.  */
+#define	STARTFILE_LINUX_SPEC \
+  "%{shared:; \
+     pg|p|profile:gcrt1.o%s; \
+     static:crt1.o%s; \
+     " PIE_SPEC ":Scrt1.o%s; \
+     :crt1.o%s} \
+   %{mnewlib:ecrti.o%s;:crti.o%s} \
+   %{static:crtbeginT.o%s; \
+     shared|" PIE_SPEC ":crtbeginS.o%s; \
+     :crtbegin.o%s} \
+   %{fvtable-verify=none:%s; \
+     fvtable-verify=preinit:vtv_start_preinit.o%s; \
+     fvtable-verify=std:vtv_start.o%s} \
+   " CRTOFFLOADBEGIN
+
+/* ENDFILE_LINUX_SPEC should be the same as GNU_USER_TARGET_ENDFILE_SPEC
+   but with the mnewlib ecrtn.o%s selection substituted for crtn.o%s.  */
+#define ENDFILE_LINUX_SPEC \
+  "%{fvtable-verify=none:%s; \
+     fvtable-verify=preinit:vtv_end_preinit.o%s; \
+     fvtable-verify=std:vtv_end.o%s} \
+   %{static:crtend.o%s; \
+     shared|" PIE_SPEC ":crtendS.o%s; \
+     :crtend.o%s} \
+   %{mnewlib:ecrtn.o%s;:crtn.o%s} \
+   " CRTOFFLOADEND
 
 #define LINK_START_LINUX_SPEC ""
 
diff --git a/gcc/config/rs6000/t-rtems b/gcc/config/rs6000/t-rtems
index 8290f5c5bdd..0e39c6320c8 100644
--- a/gcc/config/rs6000/t-rtems
+++ b/gcc/config/rs6000/t-rtems
@@ -27,8 +27,8 @@ MULTILIB_REQUIRED =
 MULTILIB_OPTIONS += mcpu=403/mcpu=505/mcpu=603e/mcpu=604/mcpu=860/mcpu=7400/mcpu=8540/mcpu=e6500
 MULTILIB_DIRNAMES += m403 m505 m603e m604 m860 m7400 m8540 me6500
 
-MULTILIB_OPTIONS += m32
-MULTILIB_DIRNAMES += m32
+MULTILIB_OPTIONS += m32/m64
+MULTILIB_DIRNAMES += m32 m64
 
 MULTILIB_OPTIONS += msoft-float
 MULTILIB_DIRNAMES += nof
@@ -72,3 +72,5 @@ MULTILIB_REQUIRED += mcpu=8540/msoft-float
 MULTILIB_REQUIRED += mcpu=860
 MULTILIB_REQUIRED += mcpu=e6500/m32
 MULTILIB_REQUIRED += mcpu=e6500/m32/msoft-float/mno-altivec
+MULTILIB_REQUIRED += mcpu=e6500/m64
+MULTILIB_REQUIRED += mcpu=e6500/m64/msoft-float/mno-altivec
diff --git a/gcc/config/rs6000/vector.md b/gcc/config/rs6000/vector.md
index a3d53e7f439..d6f2fd13fcb 100644
--- a/gcc/config/rs6000/vector.md
+++ b/gcc/config/rs6000/vector.md
@@ -74,6 +74,16 @@
 			    (V1TI  "TI")
 			    (TI    "TI")])
 
+;; As above, but in lower case
+(define_mode_attr VEC_base_l [(V16QI "qi")
+			      (V8HI  "hi")
+			      (V4SI  "si")
+			      (V2DI  "di")
+			      (V4SF  "sf")
+			      (V2DF  "df")
+			      (V1TI  "ti")
+			      (TI    "ti")])
+
 ;; Same size integer type for floating point data
 (define_mode_attr VEC_int [(V4SF  "v4si")
 			   (V2DF  "v2di")])
@@ -1016,7 +1026,7 @@
 
 
 ;; Vector initialization, set, extract
-(define_expand "vec_init<mode>"
+(define_expand "vec_init<mode><VEC_base_l>"
   [(match_operand:VEC_E 0 "vlogical_operand" "")
    (match_operand:VEC_E 1 "" "")]
   "VECTOR_MEM_ALTIVEC_OR_VSX_P (<MODE>mode)"
@@ -1035,7 +1045,7 @@
   DONE;
 })
 
-(define_expand "vec_extract<mode>"
+(define_expand "vec_extract<mode><VEC_base_l>"
   [(match_operand:<VEC_base> 0 "register_operand" "")
    (match_operand:VEC_E 1 "vlogical_operand" "")
    (match_operand 2 "const_int_operand" "")]
diff --git a/gcc/config/rs6000/vsx.md b/gcc/config/rs6000/vsx.md
index ff65caa35dc..510294d97eb 100644
--- a/gcc/config/rs6000/vsx.md
+++ b/gcc/config/rs6000/vsx.md
@@ -37,6 +37,9 @@
 				  (TI	"TARGET_VSX_TIMODE")
 				  V1TI])
 
+;; Iterator for 128-bit integer types that go in a single vector register.
+(define_mode_iterator VSX_TI [(TI "TARGET_VSX_TIMODE") V1TI])
+
 ;; Iterator for the 2 32-bit vector types
 (define_mode_iterator VSX_W [V4SF V4SI])
 
@@ -326,6 +329,7 @@
    UNSPEC_VSX_CVDPSXWS
    UNSPEC_VSX_CVDPUXWS
    UNSPEC_VSX_CVSPDP
+   UNSPEC_VSX_CVHPSP
    UNSPEC_VSX_CVSPDPN
    UNSPEC_VSX_CVDPSPN
    UNSPEC_VSX_CVSXWDP
@@ -348,6 +352,8 @@
    UNSPEC_VSX_ROUND_I
    UNSPEC_VSX_ROUND_IC
    UNSPEC_VSX_SLDWI
+   UNSPEC_VSX_XXPERM
+
    UNSPEC_VSX_XXSPLTW
    UNSPEC_VSX_XXSPLTD
    UNSPEC_VSX_DIVSD
@@ -368,6 +374,8 @@
    UNSPEC_VSX_SIEXPQP
    UNSPEC_VSX_SCMPEXPDP
    UNSPEC_VSX_STSTDC
+   UNSPEC_VSX_VEXTRACT_FP_FROM_SHORTH
+   UNSPEC_VSX_VEXTRACT_FP_FROM_SHORTL
    UNSPEC_VSX_VXEXP
    UNSPEC_VSX_VXSIG
    UNSPEC_VSX_VIEXP
@@ -751,9 +759,9 @@
 ;; special V1TI container class, which it is not appropriate to use vec_select
 ;; for the type.
 (define_insn "*vsx_le_permute_<mode>"
-  [(set (match_operand:VSX_LE_128 0 "nonimmediate_operand" "=<VSa>,<VSa>,Z")
-	(rotate:VSX_LE_128
-	 (match_operand:VSX_LE_128 1 "input_operand" "<VSa>,Z,<VSa>")
+  [(set (match_operand:VSX_TI 0 "nonimmediate_operand" "=<VSa>,<VSa>,Z")
+	(rotate:VSX_TI
+	 (match_operand:VSX_TI 1 "input_operand" "<VSa>,Z,<VSa>")
 	 (const_int 64)))]
   "!BYTES_BIG_ENDIAN && TARGET_VSX && !TARGET_P9_VECTOR"
   "@
@@ -764,10 +772,10 @@
    (set_attr "type" "vecperm,vecload,vecstore")])
 
 (define_insn_and_split "*vsx_le_undo_permute_<mode>"
-  [(set (match_operand:VSX_LE_128 0 "vsx_register_operand" "=<VSa>,<VSa>")
-	(rotate:VSX_LE_128
-	 (rotate:VSX_LE_128
-	  (match_operand:VSX_LE_128 1 "vsx_register_operand" "0,<VSa>")
+  [(set (match_operand:VSX_TI 0 "vsx_register_operand" "=<VSa>,<VSa>")
+	(rotate:VSX_TI
+	 (rotate:VSX_TI
+	  (match_operand:VSX_TI 1 "vsx_register_operand" "0,<VSa>")
 	  (const_int 64))
 	 (const_int 64)))]
   "!BYTES_BIG_ENDIAN && TARGET_VSX"
@@ -792,16 +800,15 @@
   "!BYTES_BIG_ENDIAN && TARGET_VSX && !TARGET_P9_VECTOR"
   "#"
   "!BYTES_BIG_ENDIAN && TARGET_VSX && !TARGET_P9_VECTOR"
-  [(set (match_dup 2)
-	(rotate:VSX_LE_128 (match_dup 1)
-			   (const_int 64)))
-   (set (match_dup 0)
-	(rotate:VSX_LE_128 (match_dup 2)
-			   (const_int 64)))]
+  [(const_int 0)]
   "
 {
-  operands[2] = can_create_pseudo_p () ? gen_reg_rtx_and_attrs (operands[0])
-                                       : operands[0];
+  rtx tmp = (can_create_pseudo_p ()
+	     ? gen_reg_rtx_and_attrs (operands[0])
+	     : operands[0]);
+  rs6000_emit_le_vsx_permute (tmp, operands[1], <MODE>mode);
+  rs6000_emit_le_vsx_permute (operands[0], tmp, <MODE>mode);
+  DONE;
 }
   "
   [(set_attr "type" "vecload")
@@ -819,15 +826,14 @@
   [(set (match_operand:VSX_LE_128 0 "memory_operand" "")
         (match_operand:VSX_LE_128 1 "vsx_register_operand" ""))]
   "!BYTES_BIG_ENDIAN && TARGET_VSX && !reload_completed && !TARGET_P9_VECTOR"
-  [(set (match_dup 2)
-	(rotate:VSX_LE_128 (match_dup 1)
-			   (const_int 64)))
-   (set (match_dup 0)
-	(rotate:VSX_LE_128 (match_dup 2)
-			   (const_int 64)))]
+  [(const_int 0)]
 {
-  operands[2] = can_create_pseudo_p () ? gen_reg_rtx_and_attrs (operands[0])
-                                       : operands[0];
+  rtx tmp = (can_create_pseudo_p ()
+	     ? gen_reg_rtx_and_attrs (operands[0])
+	     : operands[0]);
+  rs6000_emit_le_vsx_permute (tmp, operands[1], <MODE>mode);
+  rs6000_emit_le_vsx_permute (operands[0], tmp, <MODE>mode);
+  DONE;
 })
 
 ;; Peephole to catch memory to memory transfers for TImode if TImode landed in
@@ -851,16 +857,13 @@
   [(set (match_operand:VSX_LE_128 0 "memory_operand" "")
         (match_operand:VSX_LE_128 1 "vsx_register_operand" ""))]
   "!BYTES_BIG_ENDIAN && TARGET_VSX && reload_completed && !TARGET_P9_VECTOR"
-  [(set (match_dup 1)
-	(rotate:VSX_LE_128 (match_dup 1)
-			   (const_int 64)))
-   (set (match_dup 0)
-	(rotate:VSX_LE_128 (match_dup 1)
-			   (const_int 64)))
-   (set (match_dup 1)
-	(rotate:VSX_LE_128 (match_dup 1)
-			   (const_int 64)))]
-  "")
+  [(const_int 0)]
+{
+  rs6000_emit_le_vsx_permute (operands[1], operands[1], <MODE>mode);
+  rs6000_emit_le_vsx_permute (operands[0], operands[1], <MODE>mode);
+  rs6000_emit_le_vsx_permute (operands[1], operands[1], <MODE>mode);
+  DONE;
+})
 
 ;; Vector constants that can be generated with XXSPLTIB that was added in ISA
 ;; 3.0.  Both (const_vector [..]) and (vec_duplicate ...) forms are recognized.
@@ -1198,7 +1201,7 @@
                      UNSPEC_VSX_MULSD))]
   "VECTOR_MEM_VSX_P (V2DImode)"
   "#"
-  "VECTOR_MEM_VSX_P (V2DImode) && !reload_completed && !reload_in_progress"
+  "VECTOR_MEM_VSX_P (V2DImode) && !reload_completed"
   [(const_int 0)]
   "
 {
@@ -1236,7 +1239,7 @@
                      UNSPEC_VSX_DIVSD))]
   "VECTOR_MEM_VSX_P (V2DImode)"
   "#"
-  "VECTOR_MEM_VSX_P (V2DImode) && !reload_completed && !reload_in_progress"
+  "VECTOR_MEM_VSX_P (V2DImode) && !reload_completed"
   [(const_int 0)]
   "
 {
@@ -1264,7 +1267,7 @@
                      UNSPEC_VSX_DIVUD))]
   "VECTOR_MEM_VSX_P (V2DImode)"
   "#"
-  "VECTOR_MEM_VSX_P (V2DImode) && !reload_completed && !reload_in_progress"
+  "VECTOR_MEM_VSX_P (V2DImode) && !reload_completed"
   [(const_int 0)]
   "
 {
@@ -1746,6 +1749,15 @@
   "xscvspdp %x0,%x1"
   [(set_attr "type" "fp")])
 
+;; Generate xvcvhpsp instruction
+(define_insn "vsx_xvcvhpsp"
+  [(set (match_operand:V4SF 0 "vsx_register_operand" "=wa")
+	(unspec:V4SF [(match_operand: V16QI 1 "vsx_register_operand" "wa")]
+		     UNSPEC_VSX_CVHPSP))]
+  "TARGET_P9_VECTOR"
+  "xvcvhpsp %x0,%x1"
+  [(set_attr "type" "vecfloat")])
+
 ;; xscvdpsp used for splat'ing a scalar to V4SF, knowing that the internal SF
 ;; format of scalars is actually DF.
 (define_insn "vsx_xscvdpsp_scalar"
@@ -2352,10 +2364,10 @@
 
 ;; Build a V2DF/V2DI vector from two scalars
 (define_insn "vsx_concat_<mode>"
-  [(set (match_operand:VSX_D 0 "gpc_reg_operand" "=<VSa>,we")
+  [(set (match_operand:VSX_D 0 "vsx_register_operand" "=wa,we")
 	(vec_concat:VSX_D
-	 (match_operand:<VS_scalar> 1 "gpc_reg_operand" "<VS_64reg>,b")
-	 (match_operand:<VS_scalar> 2 "gpc_reg_operand" "<VS_64reg>,b")))]
+	 (match_operand:<VS_scalar> 1 "gpc_reg_operand" "wa,b")
+	 (match_operand:<VS_scalar> 2 "gpc_reg_operand" "wa,b")))]
   "VECTOR_MEM_VSX_P (<MODE>mode)"
 {
   if (which_alternative == 0)
@@ -2373,6 +2385,80 @@
 }
   [(set_attr "type" "vecperm")])
 
+;; Combiner patterns to allow creating XXPERMDI's to access either double
+;; word element in a vector register.
+(define_insn "*vsx_concat_<mode>_1"
+  [(set (match_operand:VSX_D 0 "vsx_register_operand" "=wa")
+	(vec_concat:VSX_D
+	 (vec_select:<VS_scalar>
+	  (match_operand:VSX_D 1 "gpc_reg_operand" "wa")
+	  (parallel [(match_operand:QI 2 "const_0_to_1_operand" "n")]))
+	 (match_operand:<VS_scalar> 3 "gpc_reg_operand" "wa")))]
+  "VECTOR_MEM_VSX_P (<MODE>mode)"
+{
+  HOST_WIDE_INT dword = INTVAL (operands[2]);
+  if (BYTES_BIG_ENDIAN)
+    {
+      operands[4] = GEN_INT (2*dword);
+      return "xxpermdi %x0,%x1,%x3,%4";
+    }
+  else
+    {
+      operands[4] = GEN_INT (!dword);
+      return "xxpermdi %x0,%x3,%x1,%4";
+    }
+}
+  [(set_attr "type" "vecperm")])
+
+(define_insn "*vsx_concat_<mode>_2"
+  [(set (match_operand:VSX_D 0 "vsx_register_operand" "=wa")
+	(vec_concat:VSX_D
+	 (match_operand:<VS_scalar> 1 "gpc_reg_operand" "wa")
+	 (vec_select:<VS_scalar>
+	  (match_operand:VSX_D 2 "gpc_reg_operand" "wa")
+	  (parallel [(match_operand:QI 3 "const_0_to_1_operand" "n")]))))]
+  "VECTOR_MEM_VSX_P (<MODE>mode)"
+{
+  HOST_WIDE_INT dword = INTVAL (operands[3]);
+  if (BYTES_BIG_ENDIAN)
+    {
+      operands[4] = GEN_INT (dword);
+      return "xxpermdi %x0,%x1,%x2,%4";
+    }
+  else
+    {
+      operands[4] = GEN_INT (2 * !dword);
+      return "xxpermdi %x0,%x2,%x1,%4";
+    }
+}
+  [(set_attr "type" "vecperm")])
+
+(define_insn "*vsx_concat_<mode>_3"
+  [(set (match_operand:VSX_D 0 "vsx_register_operand" "=wa")
+	(vec_concat:VSX_D
+	 (vec_select:<VS_scalar>
+	  (match_operand:VSX_D 1 "gpc_reg_operand" "wa")
+	  (parallel [(match_operand:QI 2 "const_0_to_1_operand" "n")]))
+	 (vec_select:<VS_scalar>
+	  (match_operand:VSX_D 3 "gpc_reg_operand" "wa")
+	  (parallel [(match_operand:QI 4 "const_0_to_1_operand" "n")]))))]
+  "VECTOR_MEM_VSX_P (<MODE>mode)"
+{
+  HOST_WIDE_INT dword1 = INTVAL (operands[2]);
+  HOST_WIDE_INT dword2 = INTVAL (operands[4]);
+  if (BYTES_BIG_ENDIAN)
+    {
+      operands[5] = GEN_INT ((2 * dword1) + dword2);
+      return "xxpermdi %x0,%x1,%x3,%5";
+    }
+  else
+    {
+      operands[5] = GEN_INT ((2 * !dword2) + !dword1);
+      return "xxpermdi %x0,%x3,%x1,%5";
+    }
+}
+  [(set_attr "type" "vecperm")])
+
 ;; Special purpose concat using xxpermdi to glue two single precision values
 ;; together, relying on the fact that internally scalar floats are represented
 ;; as doubles.  This is used to initialize a V4SF vector with 4 floats
@@ -2573,25 +2659,35 @@
   DONE;
 })
 
-;; Set the element of a V2DI/VD2F mode
-(define_insn "vsx_set_<mode>"
-  [(set (match_operand:VSX_D 0 "vsx_register_operand" "=wd,?<VSa>")
-	(unspec:VSX_D
-	 [(match_operand:VSX_D 1 "vsx_register_operand" "wd,<VSa>")
-	  (match_operand:<VS_scalar> 2 "vsx_register_operand" "<VS_64reg>,<VSa>")
-	  (match_operand:QI 3 "u5bit_cint_operand" "i,i")]
-	 UNSPEC_VSX_SET))]
+;; Rewrite V2DF/V2DI set in terms of VEC_CONCAT
+(define_expand "vsx_set_<mode>"
+  [(use (match_operand:VSX_D 0 "vsx_register_operand"))
+   (use (match_operand:VSX_D 1 "vsx_register_operand"))
+   (use (match_operand:<VS_scalar> 2 "gpc_reg_operand"))
+   (use (match_operand:QI 3 "const_0_to_1_operand"))]
   "VECTOR_MEM_VSX_P (<MODE>mode)"
 {
-  int idx_first = BYTES_BIG_ENDIAN ? 0 : 1;
-  if (INTVAL (operands[3]) == idx_first)
-    return \"xxpermdi %x0,%x2,%x1,1\";
-  else if (INTVAL (operands[3]) == 1 - idx_first)
-    return \"xxpermdi %x0,%x1,%x2,0\";
+  rtx dest = operands[0];
+  rtx vec_reg = operands[1];
+  rtx value = operands[2];
+  rtx ele = operands[3];
+  rtx tmp = gen_reg_rtx (<VS_scalar>mode);
+
+  if (ele == const0_rtx)
+    {
+      emit_insn (gen_vsx_extract_<mode> (tmp, vec_reg, const1_rtx));
+      emit_insn (gen_vsx_concat_<mode> (dest, value, tmp));
+      DONE;
+    }
+  else if (ele == const1_rtx)
+    {
+      emit_insn (gen_vsx_extract_<mode> (tmp, vec_reg, const0_rtx));
+      emit_insn (gen_vsx_concat_<mode> (dest, tmp, value));
+      DONE;
+    }
   else
     gcc_unreachable ();
-}
-  [(set_attr "type" "vecperm")])
+})
 
 ;; Extract a DF/DI element from V2DF/V2DI
 ;; Optimize cases were we can do a simple or direct move.
@@ -2924,7 +3020,7 @@
   "VECTOR_MEM_VSX_P (<MODE>mode) && TARGET_DIRECT_MOVE_64BIT"
 {
   /* If we have ISA 3.0, we can do a xxextractuw/vextractu{b,h}.  */
-  if (TARGET_VSX_SMALL_INTEGER && TARGET_P9_VECTOR)
+  if (TARGET_P9_VECTOR)
     {
       emit_insn (gen_vsx_extract_<mode>_p9 (operands[0], operands[1],
 					    operands[2]));
@@ -2938,8 +3034,7 @@
 	 (match_operand:VSX_EXTRACT_I 1 "gpc_reg_operand" "wK,<VSX_EX>")
 	 (parallel [(match_operand:QI 2 "<VSX_EXTRACT_PREDICATE>" "n,n")])))
    (clobber (match_scratch:SI 3 "=r,X"))]
-  "VECTOR_MEM_VSX_P (<MODE>mode) && TARGET_VEXTRACTUB
-   && TARGET_VSX_SMALL_INTEGER"
+  "VECTOR_MEM_VSX_P (<MODE>mode) && TARGET_VEXTRACTUB"
 {
   if (which_alternative == 0)
     return "#";
@@ -2969,8 +3064,7 @@
 	 (match_operand:VSX_EXTRACT_I 1 "altivec_register_operand")
 	 (parallel [(match_operand:QI 2 "const_int_operand")])))
    (clobber (match_operand:SI 3 "int_reg_operand"))]
-  "VECTOR_MEM_VSX_P (<MODE>mode) && TARGET_VEXTRACTUB
-   && TARGET_VSX_SMALL_INTEGER && reload_completed"
+  "VECTOR_MEM_VSX_P (<MODE>mode) && TARGET_VEXTRACTUB && reload_completed"
   [(const_int 0)]
 {
   rtx op0_si = gen_rtx_REG (SImode, REGNO (operands[0]));
@@ -2995,8 +3089,7 @@
 	  (match_operand:VSX_EXTRACT_I 1 "gpc_reg_operand" "wK,<VSX_EX>")
 	  (parallel [(match_operand:QI 2 "const_int_operand" "n,n")]))))
    (clobber (match_scratch:SI 3 "=r,X"))]
-  "VECTOR_MEM_VSX_P (<MODE>mode) && TARGET_VEXTRACTUB
-   && TARGET_VSX_SMALL_INTEGER"
+  "VECTOR_MEM_VSX_P (<MODE>mode) && TARGET_VEXTRACTUB"
   "#"
   "&& reload_completed"
   [(parallel [(set (match_dup 4)
@@ -3016,8 +3109,7 @@
 	 (parallel [(match_operand:QI 2 "const_int_operand" "n,n")])))
    (clobber (match_scratch:<VS_scalar> 3 "=<VSX_EX>,&r"))
    (clobber (match_scratch:SI 4 "=X,&r"))]
-  "VECTOR_MEM_VSX_P (<MODE>mode) && TARGET_VEXTRACTUB
-   && TARGET_VSX_SMALL_INTEGER"
+  "VECTOR_MEM_VSX_P (<MODE>mode) && TARGET_VEXTRACTUB"
   "#"
   "&& reload_completed"
   [(parallel [(set (match_dup 3)
@@ -3034,8 +3126,7 @@
 	 (match_operand:V4SI 1 "gpc_reg_operand" "wJv,wJv,wJv")
 	 (parallel [(match_operand:QI 2 "const_0_to_3_operand" "n,n,n")])))
    (clobber (match_scratch:V4SI 3 "=wJv,wJv,wJv"))]
-  "VECTOR_MEM_VSX_P (V4SImode) && TARGET_DIRECT_MOVE_64BIT
-   && (!TARGET_P9_VECTOR || !TARGET_VSX_SMALL_INTEGER)"
+  "VECTOR_MEM_VSX_P (V4SImode) && TARGET_DIRECT_MOVE_64BIT && !TARGET_P9_VECTOR"
   "#"
   "&& reload_completed"
   [(const_int 0)]
@@ -3053,15 +3144,7 @@
      instruction.  */
   value = INTVAL (element);
   if (value != 1)
-    {
-      if (TARGET_P9_VECTOR && TARGET_VSX_SMALL_INTEGER)
-	{
-	  rtx si_tmp = gen_rtx_REG (SImode, REGNO (vec_tmp));
-	  emit_insn (gen_vsx_extract_v4si_p9 (si_tmp,src, element));
-	}
-      else
-	emit_insn (gen_altivec_vspltw_direct (vec_tmp, src, element));
-    }
+    emit_insn (gen_altivec_vspltw_direct (vec_tmp, src, element));
   else
     vec_tmp = src;
 
@@ -3070,13 +3153,13 @@
       if (can_create_pseudo_p ())
 	dest = rs6000_address_for_fpconvert (dest);
 
-      if (TARGET_VSX_SMALL_INTEGER)
+      if (TARGET_P8_VECTOR)
 	emit_move_insn (dest, gen_rtx_REG (SImode, REGNO (vec_tmp)));
       else
 	emit_insn (gen_stfiwx (dest, gen_rtx_REG (DImode, REGNO (vec_tmp))));
     }
 
-  else if (TARGET_VSX_SMALL_INTEGER)
+  else if (TARGET_P8_VECTOR)
     emit_move_insn (dest, gen_rtx_REG (SImode, REGNO (vec_tmp)));
   else
     emit_move_insn (gen_rtx_REG (DImode, REGNO (dest)),
@@ -3094,7 +3177,7 @@
 	 (parallel [(match_operand:QI 2 "<VSX_EXTRACT_PREDICATE>" "n")])))
    (clobber (match_scratch:VSX_EXTRACT_I2 3 "=v"))]
   "VECTOR_MEM_VSX_P (<MODE>mode) && TARGET_DIRECT_MOVE_64BIT
-   && (!TARGET_P9_VECTOR || !TARGET_VSX_SMALL_INTEGER)"
+   && !TARGET_P9_VECTOR"
   "#"
   "&& reload_completed"
   [(const_int 0)]
@@ -3305,7 +3388,7 @@
 	  (parallel [(match_operand:QI 2 "const_int_operand" "n")]))))
    (clobber (match_scratch:<VSX_EXTRACT_I:VS_scalar> 3 "=v"))]
   "VECTOR_MEM_VSX_P (<VSX_EXTRACT_I:MODE>mode) && TARGET_DIRECT_MOVE_64BIT
-   && TARGET_P9_VECTOR && TARGET_VSX_SMALL_INTEGER"
+   && TARGET_P9_VECTOR"
   "#"
   "&& reload_completed"
   [(parallel [(set (match_dup 3)
@@ -3329,7 +3412,7 @@
 	  (parallel [(match_operand:QI 2 "const_int_operand" "n")]))))
    (clobber (match_scratch:<VSX_EXTRACT_I:VS_scalar> 3 "=v"))]
   "VECTOR_MEM_VSX_P (<VSX_EXTRACT_I:MODE>mode) && TARGET_DIRECT_MOVE_64BIT
-   && TARGET_P9_VECTOR && TARGET_VSX_SMALL_INTEGER"
+   && TARGET_P9_VECTOR"
   "#"
   "&& reload_completed"
   [(parallel [(set (match_dup 3)
@@ -3351,8 +3434,7 @@
 	  (match_operand:<VS_scalar> 2 "gpc_reg_operand" "<VSX_EX>")
 	  (match_operand:QI 3 "<VSX_EXTRACT_PREDICATE>" "n")]
 	 UNSPEC_VSX_SET))]
-  "VECTOR_MEM_VSX_P (<MODE>mode) && TARGET_P9_VECTOR && TARGET_VSX_SMALL_INTEGER
-   && TARGET_UPPER_REGS_DI && TARGET_POWERPC64"
+  "VECTOR_MEM_VSX_P (<MODE>mode) && TARGET_P9_VECTOR && TARGET_POWERPC64"
 {
   int ele = INTVAL (operands[3]);
   int nunits = GET_MODE_NUNITS (<MODE>mode);
@@ -3376,8 +3458,7 @@
 	  (match_operand:QI 3 "const_0_to_3_operand" "n")]
 	 UNSPEC_VSX_SET))
    (clobber (match_scratch:SI 4 "=&wJwK"))]
-  "VECTOR_MEM_VSX_P (V4SFmode) && TARGET_P9_VECTOR && TARGET_VSX_SMALL_INTEGER
-   && TARGET_UPPER_REGS_DI && TARGET_POWERPC64"
+  "VECTOR_MEM_VSX_P (V4SFmode) && TARGET_P9_VECTOR && TARGET_POWERPC64"
   "#"
   "&& reload_completed"
   [(set (match_dup 5)
@@ -3412,8 +3493,7 @@
 	  (match_operand:QI 3 "const_0_to_3_operand" "n")]
 	 UNSPEC_VSX_SET))
    (clobber (match_scratch:SI 4 "=&wJwK"))]
-  "VECTOR_MEM_VSX_P (V4SFmode) && TARGET_P9_VECTOR && TARGET_VSX_SMALL_INTEGER
-   && TARGET_UPPER_REGS_DI && TARGET_POWERPC64"
+  "VECTOR_MEM_VSX_P (V4SFmode) && TARGET_P9_VECTOR && TARGET_POWERPC64"
   "#"
   "&& reload_completed"
   [(set (match_dup 4)
@@ -3443,8 +3523,7 @@
 			  [(match_operand:QI 3 "const_0_to_3_operand" "n")]))
 	  (match_operand:QI 4 "const_0_to_3_operand" "n")]
 	 UNSPEC_VSX_SET))]
-  "VECTOR_MEM_VSX_P (V4SFmode) && TARGET_P9_VECTOR && TARGET_VSX_SMALL_INTEGER
-   && TARGET_UPPER_REGS_DI && TARGET_POWERPC64
+  "VECTOR_MEM_VSX_P (V4SFmode) && TARGET_P9_VECTOR && TARGET_POWERPC64
    && (INTVAL (operands[3]) == (VECTOR_ELT_ORDER_BIG ? 1 : 2))"
 {
   int ele = INTVAL (operands[4]);
@@ -3472,8 +3551,7 @@
 	 UNSPEC_VSX_SET))
    (clobber (match_scratch:SI 5 "=&wJwK"))]
   "VECTOR_MEM_VSX_P (V4SFmode) && VECTOR_MEM_VSX_P (V4SImode)
-   && TARGET_P9_VECTOR && TARGET_VSX_SMALL_INTEGER
-   && TARGET_UPPER_REGS_DI && TARGET_POWERPC64
+   && TARGET_P9_VECTOR && TARGET_POWERPC64
    && (INTVAL (operands[3]) != (VECTOR_ELT_ORDER_BIG ? 1 : 2))"
   "#"
   "&& 1"
@@ -4506,7 +4584,65 @@
   "xxinsertw %x0,%x1,%3"
   [(set_attr "type" "vecperm")])
 
-
+;; Generate vector extract four float 32 values from left four elements
+;; of eight element vector of float 16 values.
+(define_expand "vextract_fp_from_shorth"
+  [(set (match_operand:V4SF 0 "register_operand" "=wa")
+	(unspec:V4SF [(match_operand:V8HI 1 "register_operand" "wa")]
+   UNSPEC_VSX_VEXTRACT_FP_FROM_SHORTH))]
+  "TARGET_P9_VECTOR"
+{
+  int vals[16] = {15, 14, 0, 0, 13, 12, 0, 0, 11, 10, 0, 0, 9, 8, 0, 0};
+  int i;
+
+  rtx rvals[16];
+  rtx mask = gen_reg_rtx (V16QImode);
+  rtx tmp = gen_reg_rtx (V16QImode);
+  rtvec v;
+
+  for (i = 0; i < 16; i++)
+    rvals[i] = GEN_INT (vals[i]);
+
+  /* xvcvhpsp - vector convert F16 to vector F32 requires the four F16
+     inputs in half words 1,3,5,7 (IBM numbering).  Use xxperm to move
+     src half words 0,1,2,3 for the conversion instruction.  */
+  v = gen_rtvec_v (16, rvals);
+  emit_insn (gen_vec_initv16qiqi (mask, gen_rtx_PARALLEL (V16QImode, v)));
+  emit_insn (gen_altivec_vperm_v8hiv16qi (tmp, operands[1],
+					  operands[1], mask));
+  emit_insn (gen_vsx_xvcvhpsp (operands[0], tmp));
+  DONE;
+})
+
+;; Generate vector extract four float 32 values from right four elements
+;; of eight element vector of float 16 values.
+(define_expand "vextract_fp_from_shortl"
+  [(set (match_operand:V4SF 0 "register_operand" "=wa")
+	(unspec:V4SF [(match_operand:V8HI 1 "register_operand" "wa")]
+	UNSPEC_VSX_VEXTRACT_FP_FROM_SHORTL))]
+  "TARGET_P9_VECTOR"
+{
+  int vals[16] = {7, 6, 0, 0, 5, 4, 0, 0, 3, 2, 0, 0, 1, 0, 0, 0};
+  int i;
+  rtx rvals[16];
+  rtx mask = gen_reg_rtx (V16QImode);
+  rtx tmp = gen_reg_rtx (V16QImode);
+  rtvec v;
+
+  for (i = 0; i < 16; i++)
+    rvals[i] = GEN_INT (vals[i]);
+
+  /* xvcvhpsp - vector convert F16 to vector F32 requires the four F16
+     inputs in half words 1,3,5,7 (IBM numbering).  Use xxperm to move
+     src half words 4,5,6,7 for the conversion instruction.  */
+  v = gen_rtvec_v (16, rvals);
+  emit_insn (gen_vec_initv16qiqi (mask, gen_rtx_PARALLEL (V16QImode, v)));
+  emit_insn (gen_altivec_vperm_v8hiv16qi (tmp, operands[1],
+					  operands[1], mask));
+  emit_insn (gen_vsx_xvcvhpsp (operands[0], tmp));
+  DONE;
+})
+
 ;; Support for ISA 3.0 vector byte reverse
 
 ;; Swap all bytes with in a vector