28 files changed, 3511 insertions, 686 deletions
diff --git a/gcc/ChangeLog.meissner b/gcc/ChangeLog.meissner
new file mode 100644
index 00000000000..d83c79fcf21
--- /dev/null
+++ b/gcc/ChangeLog.meissner
@@ -0,0 +1,227 @@
+2015-02-06  Michael Meissner  <meissner@linux.vnet.ibm.com>
+
+	Merge up to ibm/gcc-4_9-branch, subversion id 220484.
+	* REVISION: Update subversion id.
+
+2014-12-11  Michael Meissner  <meissner@linux.vnet.ibm.com>
+
+	Merge up to ibm/gcc-4_9-branch, subversion id 218646.
+	* REVISION: Update subversion id.
+
+2014-12-09  Michael Meissner  <meissner@linux.vnet.ibm.com>
+
+	* config/rs6000/rs6000.c (rs6000_secondary_reload): Clear entire
+	secondary_reload_info structure instead of just setting a few
+	fields to 0.  Add an assertion checking that the secondary reload
+	function is in range.
+
+2014-12-05  Michael Meissner  <meissner@linux.vnet.ibm.com>
+
+	* config/rs6000/rs6000.c (rs6000_emit_move): Do not split TFmode
+	constant moves if -mupper-regs-df.
+
+	* config/rs6000/rs6000.md (mov<mode>_64bit_dm): Optimize moving
+	0.0L to TFmode.
+	(movtd_64bit_nodm): Likewise.
+	(mov<mode>_32bit, FMOVE128 case): Likewise.
+
+2014-12-02  Michael Meissner  <meissner@linux.vnet.ibm.com>
+
+	Clone branch from at 8.0 branch, subversion id 218285 (FSF
+	subversion id 217046)
+
+	* REVISION: Update file.
+
+[gcc, patch #1]
+2014-11-11  Michael Meissner  <meissner@linux.vnet.ibm.com>
+
+	* config/rs6000/predicates.md (easy_fp_constant): Delete redunant
+	tests for 0.0.
+
+[gcc, patch #2]
+2014-11-11  Michael Meissner  <meissner@linux.vnet.ibm.com>
+
+	* config/rs6000/vector.md (VEC_R): Move secondary reload support
+	insns to rs6000.md from vector.md.
+	(reload_<VEC_R:mode>_<P:mptrsize>_store): Likewise.
+	(reload_<VEC_R:mode>_<P:mptrsize>_load): Likewise.
+	(vec_reload_and_plus_<mptrsize>): Likewise.
+
+	* config/rs6000/rs6000.md (RELOAD): New mode iterator for all of
+	the types that have secondary reload address support to load up a
+	base register.
+	(reload_<RELOAD:mode>_<P:mptrsize>_store): Move the reload
+	handlers here from vector.md, and expand the types we generate
+	reload handlers for.
+	(reload_<RELOAD:mode>_<P:mptrsize>_load): Likewise.
+	(vec_reload_and_plus_<mptrsize>): Likewise.
+
+[gcc, patch #3]
+2014-11-11  Michael Meissner  <meissner@linux.vnet.ibm.com>
+
+	* config/rs6000/vsx.md (vsx_float<VSi><mode>2): Only provide the
+	vector forms of the instructions.  Move VSX scalar forms to
+	rs6000.md, and add support for -mupper-regs-sf.
+	(vsx_floatuns<VSi><mode>2): Likewise.
+	(vsx_fix_trunc<mode><VSi>2): Likewise.
+	(vsx_fixuns_trunc<mode><VSi>2): Likewise.
+	(vsx_float_fix_<mode>2): Delete DF version, rename to
+	vsx_float_fix_v2df2.
+	(vsx_float_fix_v2df2): Likewise.
+
+	* config/rs6000/rs6000.md (Fa): New mode attribute to give
+	constraint for the Altivec registers for a type.
+	(extendsfdf2_fpr): Use correct constraint.
+	(copysign<mode>3_fcpsgn): For SFmode, use correct xscpsgndp
+	instruction.
+	(floatsi<mode>2_lfiwax): Add support for -mupper-regs-{sf,df}.
+	Generate the non-VSX instruction if all registers were FPRs.  Do
+	not use the patterns in vsx.md for scalar operations.
+	(floatsi<mode>2_lfiwax_mem): Likewise.
+	(floatunssi<mode>2_lfiwzx): Likewise.
+	(floatunssi<mode>2_lfiwzx_mem): Likewise.
+	(fix_trunc<mode>di2_fctidz): Likewise.
+	(fixuns_trunc<mode>di2_fctiduz): Likewise.
+	(fctiwz_<mode>): Likewise.
+	(fctiwuz_<mode>): Likewise.
+	(friz): Likewise.
+	(floatdidf2_fpr): Likewise.
+	(floatdidf2_mem): Likewise.
+	(floatunsdidf2): Likewise.
+	(floatunsdidf2_fcfidu): Likewise.
+	(floatunsdidf2_mem): Likewise.
+	(floatdisf2_fcfids): Likewise.
+	(floatdisf2_mem): Likewise.
+	(floatdisf2_internal1): Add explicit test for not FCFIDS to make
+	it more obvious that the code is for pre-ISA 2.06 machines.
+	(floatdisf2_internal2): Likewise.
+	(floatunsdisf2_fcfidus): Add support for -mupper-regs-{sf,df}.
+	Generate the non-VSX instruction if all registers were FPRs.  Do
+	not use the patterns in vsx.md for scalar operations.
+	(floatunsdisf2_mem): Likewise.
+
+[gcc, patch #4]
+2014-11-11  Michael Meissner  <meissner@linux.vnet.ibm.com>
+
+	* config/rs6000/rs6000.c (RELOAD_REG_AND_M16): Add support for
+	Altivec style vector loads that ignore the bottom 3 bits of the
+	address.
+	(rs6000_debug_addr_mask): New function to print the addr_mask
+	values if debugging.
+	(rs6000_debug_print_mode): Call rs6000_debug_addr_mask to print
+	out addr_mask.
+	(rs6000_setup_reg_addr_masks): Add support for Altivec style
+	vector loads that ignore the bottom 3 bits of the address.
+	(rs6000_init_hard_regno_mode_ok): Rework DFmode support if
+	-mupper-regs-df.  Add support for -mupper-regs-sf.  Rearrange code
+	placement for direct move support.
+	(rs6000_option_override_internal): Add checks for -mupper-regs-df
+	requiring -mvsx, and -mupper-regs-sf requiring -mpower8-vector.
+	(rs6000_secondary_reload_fail): Add ATTRIBUTE_NORETURN.
+
+[gcc, patch #5]
+2014-11-11  Michael Meissner  <meissner@linux.vnet.ibm.com>
+	    Ulrich Weigand  <Ulrich.Weigand@de.ibm.com>
+
+	* config/rs6000/rs6000.c (rs6000_secondary_reload_toc_costs):
+	Helper function to identify costs of a TOC load for secondary
+	reload support.
+	(rs6000_secondary_reload_memory): Helper function for secondary
+	reload, to determine if a particular memory operation is directly
+	handled by the hardware, or if it needs support from secondary
+	reload to create a valid address.
+	(rs6000_secondary_reload): Rework code, to be clearer.  If the
+	appropriate -mupper-regs-{sf,df} is used, use FPR registers to
+	reload scalar values, since the FPR registers have D-form
+	addressing. Move most of the code handling memory to the function
+	rs6000_secondary_reload_memory, and use the reg_addr structure to
+	determine what type of address modes are supported.  Print more
+	debug information if -mdebug=addr.
+	(rs6000_secondary_reload_inner): Rework entire function to be more
+	general.  Use the reg_addr bits to determine what type of
+	addressing is supported.
+	(rs6000_preferred_reload_class): Rework.  Move constant handling
+	into a single place.  Prefer using FLOAT_REGS for scalar floating
+	point.
+	(rs6000_secondary_reload_class): Use a FPR register to move a
+	value from an Altivec register to a GPR, and vice versa.  Move VSX
+	handling above traditional floating point.
+
+	* config/rs6000/rs6000.md (mov<mode>_hardfloat, FMOVE32 case):
+	Delete some spaces in the constraints.
+	(DF->DF move peephole2): Disable if -mupper-regs-{sf,df} to
+	allow using FPR registers to load/store an Altivec register for
+	scalar floating point types.
+	(SF->SF move peephole2): Likewise.
+
+[gcc, patch #6]
+2014-11-11  Michael Meissner  <meissner@linux.vnet.ibm.com>
+
+	* config/rs6000/rs6000.opt (-mupper-regs-df): Make option public.
+	(-mupper-regs-sf): Likewise.
+
+	* config/rs6000/rs6000-c.c (rs6000_target_modify_macros): Define
+	__UPPER_REGS_DF__ if -mupper-regs-df.  Define __UPPER_REGS_SF__ if
+	-mupper-regs-sf.
+
+	* doc/invoke.texi (RS/6000 and PowerPC Options): Document
+	-mupper-regs-{sf,df}.
+
+[gcc, patch #8]
+2014-11-14  Michael Meissner  <meissner@linux.vnet.ibm.com>
+
+	* config/rs6000/predicates.md (memory_fp_constant): New predicate
+	to return true if the operand is a floating point constant that
+	must be put into the constant pool, before register allocation
+	occurs.
+
+	* config/rs6000/rs6000-cpus.def (ISA_2_6_MASKS_SERVER): Enable
+	-mupper-regs-df by default.
+	(ISA_2_7_MASKS_SERVER): Enable -mupper-regs-sf by default.
+	(POWERPC_MASKS): Add -mupper-regs-{sf,df} as options set by the
+	various -mcpu=... options.
+	(power7 cpu): Enable -mupper-regs-df by default.
+
+	* config/rs6000/rs6000.opt (-mupper-regs): New combination option
+	that sets -mupper-regs-sf and -mupper-regs-df by default if the
+	cpu supports the instructions.
+
+	* config/rs6000/rs6000.c (rs6000_setup_reg_addr_masks): Allow
+	pre-increment and pre-decrement on floating point, even if the
+	-mupper-regs-{sf,df} options were used.
+	(rs6000_option_override_internal): If -mupper-regs, set both
+	-mupper-regs-sf and -mupper-regs-df, depending on the underlying
+	cpu.
+
+	* config/rs6000/rs6000.md (DFmode splitter): Add a define_split to
+	move floating point constants to the constant pool before register
+	allocation.  Normally constants are put into the pool immediately,
+	but -ffast-math delays putting them into the constant pool for the
+	reciprocal approximation support.
+	(SFmode splitter): Likewise.
+
+	* doc/invoke.texi (RS/6000 and PowerPC Options): Document
+	-mupper-regs.
+
+[pr63965 fix]
+2014-11-20  Michael Meissner  <meissner@linux.vnet.ibm.com>
+
+	PR target/63965
+	* config/rs6000/rs6000.c (rs6000_setup_reg_addr_masks): Do not set
+	Altivec & -16 mask if the type is not valid for Altivec registers.
+	(rs6000_secondary_reload_memory): Add support for ((reg + const) +
+	reg) that occurs during push_reload processing.
+
+	* config/rs6000/altivec.md (altivec_mov<mode>): Add instruction
+	alternative for moving constant vectors which are easy altivec
+	constants to GPRs.  Set the length attribute each of the
+	alternatives.
+
+[pr64019 fix]
+2014-12-01  Michael Meissner  <meissner@linux.vnet.ibm.com>
+
+	PR target/64019
+	* config/rs6000/rs6000.c (rs6000_legitimize_reload_address): Do
+	not create LO_SUM address for constant addresses if the type can
+	go in Altivec registers.
+
diff --git a/gcc/REVISION b/gcc/REVISION
index dd537118185..62601b616b3 100644
--- a/gcc/REVISION
+++ b/gcc/REVISION
@@ -1 +1 @@
-[ibm/gcc-4_9-branch merged from gcc-4_9-branch, revision 220457]
+[ibm/gcc-4_9-addr merged from gcc-4_9-branch, revision 220457, merged from at8 branch 220484]
diff --git a/gcc/config/rs6000/altivec.md b/gcc/config/rs6000/altivec.md
index 02ea1423782..9a2f5d764f4 100644
--- a/gcc/config/rs6000/altivec.md
+++ b/gcc/config/rs6000/altivec.md
@@ -189,8 +189,8 @@
 
 ;; Vector move instructions.
 (define_insn "*altivec_mov<mode>"
-  [(set (match_operand:VM2 0 "nonimmediate_operand" "=Z,v,v,*Y,*r,*r,v,v")
-	(match_operand:VM2 1 "input_operand" "v,Z,v,r,Y,r,j,W"))]
+  [(set (match_operand:VM2 0 "nonimmediate_operand" "=Z,v,v,*Y,*r,*r,v,v,*r")
+	(match_operand:VM2 1 "input_operand" "v,Z,v,r,Y,r,j,W,W"))]
   "VECTOR_MEM_ALTIVEC_P (<MODE>mode)
    && (register_operand (operands[0], <MODE>mode) 
        || register_operand (operands[1], <MODE>mode))"
@@ -205,10 +205,12 @@
     case 5: return "#";
     case 6: return "vxor %0,%0,%0";
     case 7: return output_vec_const_move (operands);
+    case 8: return "#";
     default: gcc_unreachable ();
     }
 }
-  [(set_attr "type" "vecstore,vecload,vecsimple,store,load,*,vecsimple,*")])
+  [(set_attr "type" "vecstore,vecload,vecsimple,store,load,*,vecsimple,*,*")
+   (set_attr "length" "4,4,4,20,20,20,4,8,32")])
 
 ;; Unlike other altivec moves, allow the GPRs, since a normal use of TImode
 ;; is for unions.  However for plain data movement, slightly favor the vector
diff --git a/gcc/config/rs6000/predicates.md b/gcc/config/rs6000/predicates.md
index 2f40462159e..cc955c52a62 100644
--- a/gcc/config/rs6000/predicates.md
+++ b/gcc/config/rs6000/predicates.md
@@ -471,10 +471,6 @@
 	      && num_insns_constant_wide ((HOST_WIDE_INT) k[3]) == 1);
 
     case DFmode:
-      /* The constant 0.f is easy under VSX.  */
-      if (op == CONST0_RTX (DFmode) && VECTOR_UNIT_VSX_P (DFmode))
-	return 1;
-
       /* Force constants to memory before reload to utilize
 	 compress_float_constant.
 	 Avoid this when flag_unsafe_math_optimizations is enabled
@@ -492,10 +488,6 @@
 	      && num_insns_constant_wide ((HOST_WIDE_INT) k[1]) == 1);
 
     case SFmode:
-      /* The constant 0.f is easy.  */
-      if (op == CONST0_RTX (SFmode))
-	return 1;
-
       /* Force constants to memory before reload to utilize
 	 compress_float_constant.
 	 Avoid this when flag_unsafe_math_optimizations is enabled
@@ -521,6 +513,27 @@
   }
 })
 
+;; Return 1 if the operand must be loaded from memory.  This is used by a
+;; define_split to insure constants get pushed to the constant pool before
+;; reload.  If -ffast-math is used, easy_fp_constant will allow move insns to
+;; have constants in order not interfere with reciprocal estimation.  However,
+;; with -mupper-regs support, these constants must be moved to the constant
+;; pool before register allocation.
+
+(define_predicate "memory_fp_constant"
+  (match_code "const_double")
+{
+  if (TARGET_VSX && op == CONST0_RTX (mode))
+    return 0;
+
+  if (!TARGET_HARD_FLOAT || !TARGET_FPRS
+      || (mode == SFmode && !TARGET_SINGLE_FLOAT)
+      || (mode == DFmode && !TARGET_DOUBLE_FLOAT))
+    return 0;
+	  
+  return 1;
+})
+
 ;; Return 1 if the operand is a CONST_VECTOR and can be loaded into a
 ;; vector register without using memory.
 (define_predicate "easy_vector_constant"
diff --git a/gcc/config/rs6000/rs6000-c.c b/gcc/config/rs6000/rs6000-c.c
index 8dedeec2643..3c6e45afb08 100644
--- a/gcc/config/rs6000/rs6000-c.c
+++ b/gcc/config/rs6000/rs6000-c.c
@@ -362,6 +362,10 @@ rs6000_target_modify_macros (bool define_p, HOST_WIDE_INT flags,
     rs6000_define_or_undefine_macro (define_p, "__QUAD_MEMORY_ATOMIC__");
   if ((flags & OPTION_MASK_CRYPTO) != 0)
     rs6000_define_or_undefine_macro (define_p, "__CRYPTO__");
+  if ((flags & OPTION_MASK_UPPER_REGS_DF) != 0)
+    rs6000_define_or_undefine_macro (define_p, "__UPPER_REGS_DF__");
+  if ((flags & OPTION_MASK_UPPER_REGS_SF) != 0)
+    rs6000_define_or_undefine_macro (define_p, "__UPPER_REGS_SF__");
 
   /* options from the builtin masks.  */
   if ((bu_mask & RS6000_BTM_SPE) != 0)
diff --git a/gcc/config/rs6000/rs6000-cpus.def b/gcc/config/rs6000/rs6000-cpus.def
index ba56df83dd1..17f5a571383 100644
--- a/gcc/config/rs6000/rs6000-cpus.def
+++ b/gcc/config/rs6000/rs6000-cpus.def
@@ -44,7 +44,8 @@
 #define ISA_2_6_MASKS_SERVER	(ISA_2_5_MASKS_SERVER			\
 				 | OPTION_MASK_POPCNTD			\
 				 | OPTION_MASK_ALTIVEC			\
-				 | OPTION_MASK_VSX)
+				 | OPTION_MASK_VSX			\
+				 | OPTION_MASK_UPPER_REGS_DF)
 
 /* For now, don't provide an embedded version of ISA 2.07.  */
 #define ISA_2_7_MASKS_SERVER	(ISA_2_6_MASKS_SERVER			\
@@ -54,7 +55,8 @@
 				 | OPTION_MASK_DIRECT_MOVE		\
 				 | OPTION_MASK_HTM			\
 				 | OPTION_MASK_QUAD_MEMORY		\
-  				 | OPTION_MASK_QUAD_MEMORY_ATOMIC)
+  				 | OPTION_MASK_QUAD_MEMORY_ATOMIC	\
+				 | OPTION_MASK_UPPER_REGS_SF)
 
 #define POWERPC_7400_MASK	(OPTION_MASK_PPC_GFXOPT | OPTION_MASK_ALTIVEC)
 
@@ -95,6 +97,8 @@
 				 | OPTION_MASK_RECIP_PRECISION		\
 				 | OPTION_MASK_SOFT_FLOAT		\
 				 | OPTION_MASK_STRICT_ALIGN_OPTIONAL	\
+				 | OPTION_MASK_UPPER_REGS_DF		\
+				 | OPTION_MASK_UPPER_REGS_SF		\
 				 | OPTION_MASK_VSX			\
 				 | OPTION_MASK_VSX_TIMODE)
 
@@ -185,7 +189,7 @@ RS6000_CPU ("power6x", PROCESSOR_POWER6, MASK_POWERPC64 | MASK_PPC_GPOPT
 RS6000_CPU ("power7", PROCESSOR_POWER7,   /* Don't add MASK_ISEL by default */
 	    POWERPC_7400_MASK | MASK_POWERPC64 | MASK_PPC_GPOPT | MASK_MFCRF
 	    | MASK_POPCNTB | MASK_FPRND | MASK_CMPB | MASK_DFP | MASK_POPCNTD
-	    | MASK_VSX | MASK_RECIP_PRECISION)
+	    | MASK_VSX | MASK_RECIP_PRECISION | OPTION_MASK_UPPER_REGS_DF)
 RS6000_CPU ("power8", PROCESSOR_POWER8, MASK_POWERPC64 | ISA_2_7_MASKS_SERVER)
 RS6000_CPU ("powerpc", PROCESSOR_POWERPC, 0)
 RS6000_CPU ("powerpc64", PROCESSOR_POWERPC64, MASK_PPC_GFXOPT | MASK_POWERPC64)
diff --git a/gcc/config/rs6000/rs6000.c b/gcc/config/rs6000/rs6000.c
index 1917cdc97ab..b5b18411f85 100644
--- a/gcc/config/rs6000/rs6000.c
+++ b/gcc/config/rs6000/rs6000.c
@@ -383,6 +383,7 @@ typedef unsigned char addr_mask_type;
 #define RELOAD_REG_OFFSET	0x08	/* Reg+offset addressing. */
 #define RELOAD_REG_PRE_INCDEC	0x10	/* PRE_INC/PRE_DEC valid.  */
 #define RELOAD_REG_PRE_MODIFY	0x20	/* PRE_MODIFY valid.  */
+#define RELOAD_REG_AND_M16	0x40	/* AND -16 addressing.  */
 
 /* Register type masks based on the type, of valid addressing modes.  */
 struct rs6000_reg_addr {
@@ -1904,6 +1905,54 @@ rs6000_debug_vector_unit (enum rs6000_vector v)
   return ret;
 }
 
+/* Inner function printing just the address mask for a particular reload
+   register class.  */
+DEBUG_FUNCTION char *
+rs6000_debug_addr_mask (addr_mask_type mask, bool keep_spaces)
+{
+  static char ret[8];
+  char *p = ret;
+
+  if ((mask & RELOAD_REG_VALID) != 0)
+    *p++ = 'v';
+  else if (keep_spaces)
+    *p++ = ' ';
+
+  if ((mask & RELOAD_REG_MULTIPLE) != 0)
+    *p++ = 'm';
+  else if (keep_spaces)
+    *p++ = ' ';
+
+  if ((mask & RELOAD_REG_INDEXED) != 0)
+    *p++ = 'i';
+  else if (keep_spaces)
+    *p++ = ' ';
+
+  if ((mask & RELOAD_REG_OFFSET) != 0)
+    *p++ = 'o';
+  else if (keep_spaces)
+    *p++ = ' ';
+
+  if ((mask & RELOAD_REG_PRE_INCDEC) != 0)
+    *p++ = '+';
+  else if (keep_spaces)
+    *p++ = ' ';
+
+  if ((mask & RELOAD_REG_PRE_MODIFY) != 0)
+    *p++ = '+';
+  else if (keep_spaces)
+    *p++ = ' ';
+
+  if ((mask & RELOAD_REG_AND_M16) != 0)
+    *p++ = '&';
+  else if (keep_spaces)
+    *p++ = ' ';
+
+  *p = '\0';
+
+  return ret;
+}
+
 /* Print the address masks in a human readble fashion.  */
 DEBUG_FUNCTION void
 rs6000_debug_print_mode (ssize_t m)
@@ -1912,18 +1961,8 @@ rs6000_debug_print_mode (ssize_t m)
 
   fprintf (stderr, "Mode: %-5s", GET_MODE_NAME (m));
   for (rc = 0; rc < N_RELOAD_REG; rc++)
-    {
-      addr_mask_type mask = reg_addr[m].addr_mask[rc];
-      fprintf (stderr,
-	       "  %s: %c%c%c%c%c%c",
-	       reload_reg_map[rc].name,
-	       (mask & RELOAD_REG_VALID)      != 0 ? 'v' : ' ',
-	       (mask & RELOAD_REG_MULTIPLE)   != 0 ? 'm' : ' ',
-	       (mask & RELOAD_REG_INDEXED)    != 0 ? 'i' : ' ',
-	       (mask & RELOAD_REG_OFFSET)     != 0 ? 'o' : ' ',
-	       (mask & RELOAD_REG_PRE_INCDEC) != 0 ? '+' : ' ',
-	       (mask & RELOAD_REG_PRE_MODIFY) != 0 ? '+' : ' ');
-    }
+    fprintf (stderr, " %s: %s", reload_reg_map[rc].name,
+	     rs6000_debug_addr_mask (reg_addr[m].addr_mask[rc], true));
 
   if (rs6000_vector_unit[m] != VECTOR_NONE
       || rs6000_vector_mem[m] != VECTOR_NONE
@@ -2399,9 +2438,7 @@ rs6000_setup_reg_addr_masks (void)
 	      /* Figure out if we can do PRE_INC, PRE_DEC, or PRE_MODIFY
 		 addressing.  Restrict addressing on SPE for 64-bit types
 		 because of the SUBREG hackery used to address 64-bit floats in
-		 '32-bit' GPRs.  To simplify secondary reload, don't allow
-		 update forms on scalar floating point types that can go in the
-		 upper registers.  */
+		 '32-bit' GPRs.  */
 
 	      if (TARGET_UPDATE
 		  && (rc == RELOAD_REG_GPR || rc == RELOAD_REG_FPR)
@@ -2409,8 +2446,7 @@ rs6000_setup_reg_addr_masks (void)
 		  && !VECTOR_MODE_P (m2)
 		  && !COMPLEX_MODE_P (m2)
 		  && !indexed_only_p
-		  && !(TARGET_E500_DOUBLE && GET_MODE_SIZE (m2) == 8)
-		  && !reg_addr[m2].scalar_in_vmx_p)
+		  && !(TARGET_E500_DOUBLE && GET_MODE_SIZE (m2) == 8))
 		{
 		  addr_mask |= RELOAD_REG_PRE_INCDEC;
 
@@ -2443,6 +2479,12 @@ rs6000_setup_reg_addr_masks (void)
 	      && (rc == RELOAD_REG_GPR || rc == RELOAD_REG_FPR))
 	    addr_mask |= RELOAD_REG_OFFSET;
 
+	  /* VMX registers can do (REG & -16) and ((REG+REG) & -16)
+	     addressing on 128-bit types.  */
+	  if (rc == RELOAD_REG_VMX && GET_MODE_SIZE (m2) == 16
+	      && (addr_mask & RELOAD_REG_VALID) != 0)
+	    addr_mask |= RELOAD_REG_AND_M16;
+
 	  reg_addr[m].addr_mask[rc] = addr_mask;
 	  any_addr_mask |= addr_mask;
 	}
@@ -2609,13 +2651,19 @@ rs6000_init_hard_regno_mode_ok (bool global_init_p)
       rs6000_vector_align[V1TImode] = 128;
     }
 
-  /* DFmode, see if we want to use the VSX unit.  */
+  /* DFmode, see if we want to use the VSX unit.  Memory is handled
+     differently, so don't set rs6000_vector_mem.  */
   if (TARGET_VSX && TARGET_VSX_SCALAR_DOUBLE)
     {
       rs6000_vector_unit[DFmode] = VECTOR_VSX;
-      rs6000_vector_mem[DFmode]
-	= (TARGET_UPPER_REGS_DF ? VECTOR_VSX : VECTOR_NONE);
-      rs6000_vector_align[DFmode] = align64;
+      rs6000_vector_align[DFmode] = 64;
+    }
+
+  /* SFmode, see if we want to use the VSX unit.  */
+  if (TARGET_P8_VECTOR && TARGET_VSX_SCALAR_FLOAT)
+    {
+      rs6000_vector_unit[SFmode] = VECTOR_VSX;
+      rs6000_vector_align[SFmode] = 32;
     }
 
   /* Allow TImode in VSX register and set the VSX memory macros.  */
@@ -2750,58 +2798,42 @@ rs6000_init_hard_regno_mode_ok (bool global_init_p)
 	  reg_addr[V4SFmode].reload_load   = CODE_FOR_reload_v4sf_di_load;
 	  reg_addr[V2DFmode].reload_store  = CODE_FOR_reload_v2df_di_store;
 	  reg_addr[V2DFmode].reload_load   = CODE_FOR_reload_v2df_di_load;
-	  if (TARGET_VSX && TARGET_UPPER_REGS_DF)
-	    {
-	      reg_addr[DFmode].reload_store    = CODE_FOR_reload_df_di_store;
-	      reg_addr[DFmode].reload_load     = CODE_FOR_reload_df_di_load;
-	      reg_addr[DFmode].scalar_in_vmx_p = true;
-	      reg_addr[DDmode].reload_store    = CODE_FOR_reload_dd_di_store;
-	      reg_addr[DDmode].reload_load     = CODE_FOR_reload_dd_di_load;
-	    }
-	  if (TARGET_P8_VECTOR)
-	    {
-	      reg_addr[SFmode].reload_store  = CODE_FOR_reload_sf_di_store;
-	      reg_addr[SFmode].reload_load   = CODE_FOR_reload_sf_di_load;
-	      reg_addr[SDmode].reload_store  = CODE_FOR_reload_sd_di_store;
-	      reg_addr[SDmode].reload_load   = CODE_FOR_reload_sd_di_load;
-	      if (TARGET_UPPER_REGS_SF)
-		reg_addr[SFmode].scalar_in_vmx_p = true;
-	    }
+	  reg_addr[DFmode].reload_store    = CODE_FOR_reload_df_di_store;
+	  reg_addr[DFmode].reload_load     = CODE_FOR_reload_df_di_load;
+	  reg_addr[DDmode].reload_store    = CODE_FOR_reload_dd_di_store;
+	  reg_addr[DDmode].reload_load     = CODE_FOR_reload_dd_di_load;
+	  reg_addr[SFmode].reload_store    = CODE_FOR_reload_sf_di_store;
+	  reg_addr[SFmode].reload_load     = CODE_FOR_reload_sf_di_load;
+	  reg_addr[SDmode].reload_store    = CODE_FOR_reload_sd_di_store;
+	  reg_addr[SDmode].reload_load     = CODE_FOR_reload_sd_di_load;
+
 	  if (TARGET_VSX_TIMODE)
 	    {
 	      reg_addr[TImode].reload_store  = CODE_FOR_reload_ti_di_store;
 	      reg_addr[TImode].reload_load   = CODE_FOR_reload_ti_di_load;
 	    }
+
 	  if (TARGET_DIRECT_MOVE)
 	    {
-	      if (TARGET_POWERPC64)
-		{
-		  reg_addr[TImode].reload_gpr_vsx    = CODE_FOR_reload_gpr_from_vsxti;
-		  reg_addr[V1TImode].reload_gpr_vsx  = CODE_FOR_reload_gpr_from_vsxv1ti;
-		  reg_addr[V2DFmode].reload_gpr_vsx  = CODE_FOR_reload_gpr_from_vsxv2df;
-		  reg_addr[V2DImode].reload_gpr_vsx  = CODE_FOR_reload_gpr_from_vsxv2di;
-		  reg_addr[V4SFmode].reload_gpr_vsx  = CODE_FOR_reload_gpr_from_vsxv4sf;
-		  reg_addr[V4SImode].reload_gpr_vsx  = CODE_FOR_reload_gpr_from_vsxv4si;
-		  reg_addr[V8HImode].reload_gpr_vsx  = CODE_FOR_reload_gpr_from_vsxv8hi;
-		  reg_addr[V16QImode].reload_gpr_vsx = CODE_FOR_reload_gpr_from_vsxv16qi;
-		  reg_addr[SFmode].reload_gpr_vsx    = CODE_FOR_reload_gpr_from_vsxsf;
-
-		  reg_addr[TImode].reload_vsx_gpr    = CODE_FOR_reload_vsx_from_gprti;
-		  reg_addr[V1TImode].reload_vsx_gpr  = CODE_FOR_reload_vsx_from_gprv1ti;
-		  reg_addr[V2DFmode].reload_vsx_gpr  = CODE_FOR_reload_vsx_from_gprv2df;
-		  reg_addr[V2DImode].reload_vsx_gpr  = CODE_FOR_reload_vsx_from_gprv2di;
-		  reg_addr[V4SFmode].reload_vsx_gpr  = CODE_FOR_reload_vsx_from_gprv4sf;
-		  reg_addr[V4SImode].reload_vsx_gpr  = CODE_FOR_reload_vsx_from_gprv4si;
-		  reg_addr[V8HImode].reload_vsx_gpr  = CODE_FOR_reload_vsx_from_gprv8hi;
-		  reg_addr[V16QImode].reload_vsx_gpr = CODE_FOR_reload_vsx_from_gprv16qi;
-		  reg_addr[SFmode].reload_vsx_gpr    = CODE_FOR_reload_vsx_from_gprsf;
-		}
-	      else
-		{
-		  reg_addr[DImode].reload_fpr_gpr = CODE_FOR_reload_fpr_from_gprdi;
-		  reg_addr[DDmode].reload_fpr_gpr = CODE_FOR_reload_fpr_from_gprdd;
-		  reg_addr[DFmode].reload_fpr_gpr = CODE_FOR_reload_fpr_from_gprdf;
-		}
+	      reg_addr[TImode].reload_gpr_vsx    = CODE_FOR_reload_gpr_from_vsxti;
+	      reg_addr[V1TImode].reload_gpr_vsx  = CODE_FOR_reload_gpr_from_vsxv1ti;
+	      reg_addr[V2DFmode].reload_gpr_vsx  = CODE_FOR_reload_gpr_from_vsxv2df;
+	      reg_addr[V2DImode].reload_gpr_vsx  = CODE_FOR_reload_gpr_from_vsxv2di;
+	      reg_addr[V4SFmode].reload_gpr_vsx  = CODE_FOR_reload_gpr_from_vsxv4sf;
+	      reg_addr[V4SImode].reload_gpr_vsx  = CODE_FOR_reload_gpr_from_vsxv4si;
+	      reg_addr[V8HImode].reload_gpr_vsx  = CODE_FOR_reload_gpr_from_vsxv8hi;
+	      reg_addr[V16QImode].reload_gpr_vsx = CODE_FOR_reload_gpr_from_vsxv16qi;
+	      reg_addr[SFmode].reload_gpr_vsx    = CODE_FOR_reload_gpr_from_vsxsf;
+
+	      reg_addr[TImode].reload_vsx_gpr    = CODE_FOR_reload_vsx_from_gprti;
+	      reg_addr[V1TImode].reload_vsx_gpr  = CODE_FOR_reload_vsx_from_gprv1ti;
+	      reg_addr[V2DFmode].reload_vsx_gpr  = CODE_FOR_reload_vsx_from_gprv2df;
+	      reg_addr[V2DImode].reload_vsx_gpr  = CODE_FOR_reload_vsx_from_gprv2di;
+	      reg_addr[V4SFmode].reload_vsx_gpr  = CODE_FOR_reload_vsx_from_gprv4sf;
+	      reg_addr[V4SImode].reload_vsx_gpr  = CODE_FOR_reload_vsx_from_gprv4si;
+	      reg_addr[V8HImode].reload_vsx_gpr  = CODE_FOR_reload_vsx_from_gprv8hi;
+	      reg_addr[V16QImode].reload_vsx_gpr = CODE_FOR_reload_vsx_from_gprv16qi;
+	      reg_addr[SFmode].reload_vsx_gpr    = CODE_FOR_reload_vsx_from_gprsf;
 	    }
 	}
       else
@@ -2820,29 +2852,34 @@ rs6000_init_hard_regno_mode_ok (bool global_init_p)
 	  reg_addr[V4SFmode].reload_load   = CODE_FOR_reload_v4sf_si_load;
 	  reg_addr[V2DFmode].reload_store  = CODE_FOR_reload_v2df_si_store;
 	  reg_addr[V2DFmode].reload_load   = CODE_FOR_reload_v2df_si_load;
-	  if (TARGET_VSX && TARGET_UPPER_REGS_DF)
-	    {
-	      reg_addr[DFmode].reload_store    = CODE_FOR_reload_df_si_store;
-	      reg_addr[DFmode].reload_load     = CODE_FOR_reload_df_si_load;
-	      reg_addr[DFmode].scalar_in_vmx_p = true;
-	      reg_addr[DDmode].reload_store    = CODE_FOR_reload_dd_si_store;
-	      reg_addr[DDmode].reload_load     = CODE_FOR_reload_dd_si_load;
-	    }
-	  if (TARGET_P8_VECTOR)
-	    {
-	      reg_addr[SFmode].reload_store  = CODE_FOR_reload_sf_si_store;
-	      reg_addr[SFmode].reload_load   = CODE_FOR_reload_sf_si_load;
-	      reg_addr[SDmode].reload_store  = CODE_FOR_reload_sd_si_store;
-	      reg_addr[SDmode].reload_load   = CODE_FOR_reload_sd_si_load;
-	      if (TARGET_UPPER_REGS_SF)
-		reg_addr[SFmode].scalar_in_vmx_p = true;
-	    }
+	  reg_addr[DFmode].reload_store    = CODE_FOR_reload_df_si_store;
+	  reg_addr[DFmode].reload_load     = CODE_FOR_reload_df_si_load;
+	  reg_addr[DDmode].reload_store    = CODE_FOR_reload_dd_si_store;
+	  reg_addr[DDmode].reload_load     = CODE_FOR_reload_dd_si_load;
+	  reg_addr[SFmode].reload_store    = CODE_FOR_reload_sf_si_store;
+	  reg_addr[SFmode].reload_load     = CODE_FOR_reload_sf_si_load;
+	  reg_addr[SDmode].reload_store    = CODE_FOR_reload_sd_si_store;
+	  reg_addr[SDmode].reload_load     = CODE_FOR_reload_sd_si_load;
+
 	  if (TARGET_VSX_TIMODE)
 	    {
 	      reg_addr[TImode].reload_store  = CODE_FOR_reload_ti_si_store;
 	      reg_addr[TImode].reload_load   = CODE_FOR_reload_ti_si_load;
 	    }
+
+	  if (TARGET_DIRECT_MOVE)
+	    {
+	      reg_addr[DImode].reload_fpr_gpr = CODE_FOR_reload_fpr_from_gprdi;
+	      reg_addr[DDmode].reload_fpr_gpr = CODE_FOR_reload_fpr_from_gprdd;
+	      reg_addr[DFmode].reload_fpr_gpr = CODE_FOR_reload_fpr_from_gprdf;
+	    }
 	}
+
+      if (TARGET_UPPER_REGS_DF)
+	reg_addr[DFmode].scalar_in_vmx_p = true;
+
+      if (TARGET_UPPER_REGS_SF)
+	reg_addr[SFmode].scalar_in_vmx_p = true;
     }
 
   /* Precalculate HARD_REGNO_NREGS.  */
@@ -3446,6 +3483,54 @@ rs6000_option_override_internal (bool global_init_p)
       rs6000_isa_flags &= ~OPTION_MASK_DFP;
     }
 
+  /* Allow an explicit -mupper-regs to set both -mupper-regs-df and
+     -mupper-regs-sf, depending on the cpu, unless the user explicitly also set
+     the individual option.  */
+  if (TARGET_UPPER_REGS > 0)
+    {
+      if (TARGET_VSX
+	  && !(rs6000_isa_flags_explicit & OPTION_MASK_UPPER_REGS_DF))
+	{
+	  rs6000_isa_flags |= OPTION_MASK_UPPER_REGS_DF;
+	  rs6000_isa_flags_explicit |= OPTION_MASK_UPPER_REGS_DF;
+	}
+      if (TARGET_P8_VECTOR
+	  && !(rs6000_isa_flags_explicit & OPTION_MASK_UPPER_REGS_SF))
+	{
+	  rs6000_isa_flags |= OPTION_MASK_UPPER_REGS_SF;
+	  rs6000_isa_flags_explicit |= OPTION_MASK_UPPER_REGS_SF;
+	}
+    }
+  else if (TARGET_UPPER_REGS == 0)
+    {
+      if (TARGET_VSX
+	  && !(rs6000_isa_flags_explicit & OPTION_MASK_UPPER_REGS_DF))
+	{
+	  rs6000_isa_flags &= ~OPTION_MASK_UPPER_REGS_DF;
+	  rs6000_isa_flags_explicit |= OPTION_MASK_UPPER_REGS_DF;
+	}
+      if (TARGET_P8_VECTOR
+	  && !(rs6000_isa_flags_explicit & OPTION_MASK_UPPER_REGS_SF))
+	{
+	  rs6000_isa_flags &= ~OPTION_MASK_UPPER_REGS_SF;
+	  rs6000_isa_flags_explicit |= OPTION_MASK_UPPER_REGS_SF;
+	}
+    }
+
+  if (TARGET_UPPER_REGS_DF && !TARGET_VSX)
+    {
+      if (rs6000_isa_flags_explicit & OPTION_MASK_UPPER_REGS_DF)
+	error ("-mupper-regs-df requires -mvsx");
+      rs6000_isa_flags &= ~OPTION_MASK_UPPER_REGS_DF;
+    }
+
+  if (TARGET_UPPER_REGS_SF && !TARGET_P8_VECTOR)
+    {
+      if (rs6000_isa_flags_explicit & OPTION_MASK_UPPER_REGS_SF)
+	error ("-mupper-regs-sf requires -mpower8-vector");
+      rs6000_isa_flags &= ~OPTION_MASK_UPPER_REGS_SF;
+    }
+
   /* The quad memory instructions only works in 64-bit mode. In 32-bit mode,
      silently turn off quad memory mode.  */
   if ((TARGET_QUAD_MEMORY || TARGET_QUAD_MEMORY_ATOMIC) && !TARGET_POWERPC64)
@@ -7472,7 +7557,11 @@ rs6000_legitimize_reload_address (rtx x, enum machine_mode mode,
 	 naturally aligned.  Since we say the address is good here, we
 	 can't disable offsets from LO_SUMs in mem_operand_gpr.
 	 FIXME: Allow offset from lo_sum for other modes too, when
-	 mem is sufficiently aligned.  */
+	 mem is sufficiently aligned.
+
+	 Also disallow this if the type can go in VMX/Altivec registers, since
+	 those registers do not have d-form (reg+offset) address modes.  */
+      && !reg_addr[mode].scalar_in_vmx_p
       && mode != TFmode
       && mode != TDmode
       && (mode != TImode || !TARGET_VSX_TIMODE)
@@ -8304,9 +8393,11 @@ rs6000_emit_move (rtx dest, rtx source, enum machine_mode mode)
 	  || ! nonimmediate_operand (operands[0], mode)))
     goto emit_set;
 
-  /* 128-bit constant floating-point values on Darwin should really be
-     loaded as two parts.  */
+  /* 128-bit constant floating-point values on Darwin should really be loaded
+     as two parts.  However, this premature splitting is a problem when DFmode
+     values can go into Altivec registers.  */
   if (!TARGET_IEEEQUAD && TARGET_LONG_DOUBLE_128
+      && !reg_addr[DFmode].scalar_in_vmx_p
       && mode == TFmode && GET_CODE (operands[1]) == CONST_DOUBLE)
     {
       rs6000_emit_move (simplify_gen_subreg (DFmode, operands[0], mode, 0),
@@ -8325,6 +8416,30 @@ rs6000_emit_move (rtx dest, rtx source, enum machine_mode mode)
       eliminate_regs (cfun->machine->sdmode_stack_slot, VOIDmode, NULL_RTX);
 
 
+  /* Transform (p0:DD, (SUBREG:DD p1:SD)) to ((SUBREG:SD p0:DD),
+     p1:SD) if p1 is not of floating point class and p0 is spilled as
+     we can have no analogous movsd_store for this.  */
+  if (lra_in_progress && mode == DDmode
+      && REG_P (operands[0]) && REGNO (operands[0]) >= FIRST_PSEUDO_REGISTER
+      && reg_preferred_class (REGNO (operands[0])) == NO_REGS
+      && GET_CODE (operands[1]) == SUBREG && REG_P (SUBREG_REG (operands[1]))
+      && GET_MODE (SUBREG_REG (operands[1])) == SDmode)
+    {
+      enum reg_class cl;
+      int regno = REGNO (SUBREG_REG (operands[1]));
+
+      if (regno >= FIRST_PSEUDO_REGISTER)
+	{
+	  cl = reg_preferred_class (regno);
+	  regno = cl == NO_REGS ? -1 : ira_class_hard_regs[cl][1];
+	}
+      if (regno >= 0 && ! FP_REGNO_P (regno))
+	{
+	  mode = SDmode;
+	  operands[0] = gen_lowpart_SUBREG (SDmode, operands[0]);
+	  operands[1] = SUBREG_REG (operands[1]);
+	}
+    }
   if (lra_in_progress
       && mode == SDmode
       && REG_P (operands[0]) && REGNO (operands[0]) >= FIRST_PSEUDO_REGISTER
@@ -8355,6 +8470,30 @@ rs6000_emit_move (rtx dest, rtx source, enum machine_mode mode)
 	gcc_unreachable();
       return;
     }
+  /* Transform ((SUBREG:DD p0:SD), p1:DD) to (p0:SD, (SUBREG:SD
+     p:DD)) if p0 is not of floating point class and p1 is spilled as
+     we can have no analogous movsd_load for this.  */
+  if (lra_in_progress && mode == DDmode
+      && GET_CODE (operands[0]) == SUBREG && REG_P (SUBREG_REG (operands[0]))
+      && GET_MODE (SUBREG_REG (operands[0])) == SDmode
+      && REG_P (operands[1]) && REGNO (operands[1]) >= FIRST_PSEUDO_REGISTER
+      && reg_preferred_class (REGNO (operands[1])) == NO_REGS)
+    {
+      enum reg_class cl;
+      int regno = REGNO (SUBREG_REG (operands[0]));
+
+      if (regno >= FIRST_PSEUDO_REGISTER)
+	{
+	  cl = reg_preferred_class (regno);
+	  regno = cl == NO_REGS ? -1 : ira_class_hard_regs[cl][0];
+	}
+      if (regno >= 0 && ! FP_REGNO_P (regno))
+	{
+	  mode = SDmode;
+	  operands[0] = SUBREG_REG (operands[0]);
+	  operands[1] = gen_lowpart_SUBREG (SDmode, operands[1]);
+	}
+    }
   if (lra_in_progress
       && mode == SDmode
       && (REG_P (operands[0])
@@ -16281,6 +16420,289 @@ register_to_reg_type (rtx reg, bool *is_altivec)
   return reg_class_to_reg_type[(int)rclass];
 }
 
+/* Helper function to return the cost of adding a TOC entry address.  */
+
+static inline int
+rs6000_secondary_reload_toc_costs (addr_mask_type addr_mask)
+{
+  int ret;
+
+  if (TARGET_CMODEL != CMODEL_SMALL)
+    ret = ((addr_mask & RELOAD_REG_OFFSET) == 0) ? 1 : 2;
+
+  else
+    ret = (TARGET_MINIMAL_TOC) ? 6 : 3;
+
+  return ret;
+}
+
+/* Helper function for rs6000_secondary_reload to determine whether the memory
+   address (ADDR) with a given register class (RCLASS) and machine mode (MODE)
+   needs reloading.  Return negative if the memory is not handled by the memory
+   helper functions and to try a different reload method, 0 if no additional
+   instructions are need, and positive to give the extra cost for the
+   memory.  */
+
+static int
+rs6000_secondary_reload_memory (rtx addr,
+				enum reg_class rclass,
+				enum machine_mode mode)
+{
+  int extra_cost = 0;
+  rtx reg, and_arg, plus_arg0, plus_arg1;
+  addr_mask_type addr_mask;
+  const char *type = NULL;
+  const char *fail_msg = NULL;
+
+  if (GPR_REG_CLASS_P (rclass))
+    addr_mask = reg_addr[mode].addr_mask[RELOAD_REG_GPR];
+
+  else if (rclass == FLOAT_REGS)
+    addr_mask = reg_addr[mode].addr_mask[RELOAD_REG_FPR];
+
+  else if (rclass == ALTIVEC_REGS)
+    addr_mask = reg_addr[mode].addr_mask[RELOAD_REG_VMX];
+
+  /* For the combined VSX_REGS, turn off Altivec AND -16.  */
+  else if (rclass == VSX_REGS)
+    addr_mask = (reg_addr[mode].addr_mask[RELOAD_REG_VMX]
+		 & ~RELOAD_REG_AND_M16);
+
+  else
+    {
+      if (TARGET_DEBUG_ADDR)
+	fprintf (stderr,
+		 "rs6000_secondary_reload_memory: mode = %s, class = %s, "
+		 "class is not GPR, FPR, VMX\n",
+		 GET_MODE_NAME (mode), reg_class_names[rclass]);
+
+      return -1;
+    }
+
+  /* If the register isn't valid in this register class, just return now.  */
+  if ((addr_mask & RELOAD_REG_VALID) == 0)
+    {
+      if (TARGET_DEBUG_ADDR)
+	fprintf (stderr,
+		 "rs6000_secondary_reload_memory: mode = %s, class = %s, "
+		 "not valid in class\n",
+		 GET_MODE_NAME (mode), reg_class_names[rclass]);
+
+      return -1;
+    }
+
+  switch (GET_CODE (addr))
+    {
+      /* Does the register class supports auto update forms for this mode?  We
+	 don't need a scratch register, since the powerpc only supports
+	 PRE_INC, PRE_DEC, and PRE_MODIFY.  */
+    case PRE_INC:
+    case PRE_DEC:
+      reg = XEXP (addr, 0);
+      if (!base_reg_operand (addr, GET_MODE (reg)))
+	{
+	  fail_msg = "no base register #1";
+	  extra_cost = -1;
+	}
+
+      else if ((addr_mask & RELOAD_REG_PRE_INCDEC) == 0)
+	{
+	  extra_cost = 1;
+	  type = "update";
+	}
+      break;
+
+    case PRE_MODIFY:
+      reg = XEXP (addr, 0);
+      plus_arg1 = XEXP (addr, 1);
+      if (!base_reg_operand (reg, GET_MODE (reg))
+	  || GET_CODE (plus_arg1) != PLUS
+	  || !rtx_equal_p (reg, XEXP (plus_arg1, 0)))
+	{
+	  fail_msg = "bad PRE_MODIFY";
+	  extra_cost = -1;
+	}
+
+      else if ((addr_mask & RELOAD_REG_PRE_MODIFY) == 0)
+	{
+	  extra_cost = 1;
+	  type = "update";
+	}
+      break;
+
+      /* Do we need to simulate AND -16 to clear the bottom address bits used
+	 in VMX load/stores?  Only allow the AND for vector sizes.  */
+    case AND:
+      and_arg = XEXP (addr, 0);
+      if (GET_MODE_SIZE (mode) != 16
+	  || GET_CODE (XEXP (addr, 1)) != CONST_INT
+	  || INTVAL (XEXP (addr, 1)) != -16)
+	{
+	  fail_msg = "bad Altivec AND #1";
+	  extra_cost = -1;
+	}
+
+      if (rclass != ALTIVEC_REGS)
+	{
+	  if (legitimate_indirect_address_p (and_arg, false))
+	    extra_cost = 1;
+
+	  else if (legitimate_indexed_address_p (and_arg, false))
+	    extra_cost = 2;
+
+	  else
+	    {
+	      fail_msg = "bad Altivec AND #2";
+	      extra_cost = -1;
+	    }
+
+	  type = "and";
+	}
+      break;
+
+      /* If this is an indirect address, make sure it is a base register.  */
+    case REG:
+    case SUBREG:
+      if (!legitimate_indirect_address_p (addr, false))
+	{
+	  extra_cost = 1;
+	  type = "move";
+	}
+      break;
+
+      /* If this is an indexed address, make sure the register class can handle
+	 indexed addresses for this mode.  */
+    case PLUS:
+      plus_arg0 = XEXP (addr, 0);
+      plus_arg1 = XEXP (addr, 1);
+
+      /* (plus (plus (reg) (constant)) (constant)) is generated during
+	 push_reload processing, so handle it now.  */
+      if (GET_CODE (plus_arg0) == PLUS && CONST_INT_P (plus_arg1))
+	{
+	  if ((addr_mask & RELOAD_REG_OFFSET) == 0)
+	    {
+	      extra_cost = 1;
+	      type = "offset";
+	    }
+	}
+
+      /* (plus (plus (reg) (constant)) (reg)) is also generated during
+	 push_reload processing, so handle it now.  */
+      else if (GET_CODE (plus_arg0) == PLUS && REG_P (plus_arg1))
+	{
+	  if ((addr_mask & RELOAD_REG_INDEXED) == 0)
+	    {
+	      extra_cost = 1;
+	      type = "indexed #2";
+	    }
+	}
+
+      else if (!base_reg_operand (plus_arg0, GET_MODE (plus_arg0)))
+	{
+	  fail_msg = "no base register #2";
+	  extra_cost = -1;
+	}
+
+      else if (int_reg_operand (plus_arg1, GET_MODE (plus_arg1)))
+	{
+	  if ((addr_mask & RELOAD_REG_INDEXED) == 0
+	      || !legitimate_indexed_address_p (addr, false))
+	    {
+	      extra_cost = 1;
+	      type = "indexed";
+	    }
+	}
+
+      /* Make sure the register class can handle offset addresses.  */
+      else if (rs6000_legitimate_offset_address_p (mode, addr, false, true))
+	{
+	  if ((addr_mask & RELOAD_REG_OFFSET) == 0)
+	    {
+	      extra_cost = 1;
+	      type = "offset";
+	    }
+	}
+
+      else
+	{
+	  fail_msg = "bad PLUS";
+	  extra_cost = -1;
+	}
+
+      break;
+
+    case LO_SUM:
+      if (!legitimate_lo_sum_address_p (mode, addr, false))
+	{
+	  fail_msg = "bad LO_SUM";
+	  extra_cost = -1;
+	}
+
+      if ((addr_mask & RELOAD_REG_OFFSET) == 0)
+	{
+	  extra_cost = 1;
+	  type = "lo_sum";
+	}
+      break;
+
+      /* Static addresses need to create a TOC entry.  */
+    case CONST:
+    case SYMBOL_REF:
+    case LABEL_REF:
+      type = "address";
+      extra_cost = rs6000_secondary_reload_toc_costs (addr_mask);
+      break;
+
+      /* TOC references look like offsetable memory.  */
+    case UNSPEC:
+      if (TARGET_CMODEL == CMODEL_SMALL || XINT (addr, 1) != UNSPEC_TOCREL)
+	{
+	  fail_msg = "bad UNSPEC";
+	  extra_cost = -1;
+	}
+
+      else if ((addr_mask & RELOAD_REG_OFFSET) == 0)
+	{
+	  extra_cost = 1;
+	  type = "toc reference";
+	}
+      break;
+
+    default:
+	{
+	  fail_msg = "bad address";
+	  extra_cost = -1;
+	}
+    }
+
+  if (TARGET_DEBUG_ADDR /* && extra_cost != 0 */)
+    {
+      if (extra_cost < 0)
+	fprintf (stderr,
+		 "rs6000_secondary_reload_memory error: mode = %s, "
+		 "class = %s, addr_mask = '%s', %s\n",
+		 GET_MODE_NAME (mode),
+		 reg_class_names[rclass],
+		 rs6000_debug_addr_mask (addr_mask, false),
+		 (fail_msg != NULL) ? fail_msg : "<bad address>");
+
+      else
+	fprintf (stderr,
+		 "rs6000_secondary_reload_memory: mode = %s, class = %s, "
+		 "addr_mask = '%s', extra cost = %d, %s\n",
+		 GET_MODE_NAME (mode),
+		 reg_class_names[rclass],
+		 rs6000_debug_addr_mask (addr_mask, false),
+		 extra_cost,
+		 (type) ? type : "<none>");
+
+      debug_rtx (addr);
+    }
+
+  return extra_cost;
+}
+
 /* Helper function for rs6000_secondary_reload to return true if a move to a
    different register classe is really a simple move.  */
 
@@ -16434,6 +16856,13 @@ rs6000_secondary_reload_move (enum rs6000_reg_type to_type,
 			      secondary_reload_info *sri,
 			      bool altivec_p)
 {
+  /* Make sure sri is setup if passed.  */
+  if (sri)
+    {
+      sri->icode = CODE_FOR_nothing;
+      sri->extra_cost = 0;
+    }
+
   /* Fall back to load/store reloads if either type is not a register.  */
   if (to_type == NO_REG_TYPE || from_type == NO_REG_TYPE)
     return false;
@@ -16452,14 +16881,7 @@ rs6000_secondary_reload_move (enum rs6000_reg_type to_type,
 
   /* Check whether a simple move can be done directly.  */
   if (rs6000_secondary_reload_simple_move (to_type, from_type, mode))
-    {
-      if (sri)
-	{
-	  sri->icode = CODE_FOR_nothing;
-	  sri->extra_cost = 0;
-	}
-      return true;
-    }
+    return true;
 
   /* Now check if we can do it in a few steps.  */
   return rs6000_secondary_reload_direct_move (to_type, from_type, mode, sri,
@@ -16487,8 +16909,17 @@ rs6000_secondary_reload (bool in_p,
   reg_class_t ret = ALL_REGS;
   enum insn_code icode;
   bool default_p = false;
+  bool done_p = false;
+
+  /* Allow subreg of memory before/during reload.  */
+  bool memory_p = (MEM_P (x)
+		   || (!reload_completed && GET_CODE (x) == SUBREG
+		       && MEM_P (SUBREG_REG (x))));
 
+  /* Set the secondary reload structure to a known state.  */
+  memset ((void *)sri, '\0', sizeof (secondary_reload_info));
   sri->icode = CODE_FOR_nothing;
+
   icode = ((in_p)
 	   ? reg_addr[mode].reload_load
 	   : reg_addr[mode].reload_store);
@@ -16512,121 +16943,54 @@ rs6000_secondary_reload (bool in_p,
 	{
 	  icode = (enum insn_code)sri->icode;
 	  default_p = false;
+	  done_p = true;
 	  ret = NO_REGS;
 	}
     }
 
-  /* Handle vector moves with reload helper functions.  */
-  if (ret == ALL_REGS && icode != CODE_FOR_nothing)
+  /* Make sure 0.0 is not reloaded or forced into memory.  */
+  if (x == CONST0_RTX (mode) && VSX_REG_CLASS_P (rclass))
     {
       ret = NO_REGS;
-      sri->icode = CODE_FOR_nothing;
-      sri->extra_cost = 0;
+      default_p = false;
+      done_p = true;
+    }
 
-      if (GET_CODE (x) == MEM)
-	{
-	  rtx addr = XEXP (x, 0);
+  /* If this is a scalar floating point value and we want to load it into the
+     traditional Altivec registers, do it via a move via a traditional floating
+     point register.  Also make sure that non-zero constants use a FPR.  */
+  if (!done_p && reg_addr[mode].scalar_in_vmx_p
+      && (rclass == VSX_REGS || rclass == ALTIVEC_REGS)
+      && (memory_p || (GET_CODE (x) == CONST_DOUBLE)))
+    {
+      ret = FLOAT_REGS;
+      default_p = false;
+      done_p = true;
+    }
 
-	  /* Loads to and stores from gprs can do reg+offset, and wouldn't need
-	     an extra register in that case, but it would need an extra
-	     register if the addressing is reg+reg or (reg+reg)&(-16).  Special
-	     case load/store quad.  */
-	  if (rclass == GENERAL_REGS || rclass == BASE_REGS)
-	    {
-	      if (TARGET_POWERPC64 && TARGET_QUAD_MEMORY
-		  && GET_MODE_SIZE (mode) == 16
-		  && quad_memory_operand (x, mode))
-		{
-		  sri->icode = icode;
-		  sri->extra_cost = 2;
-		}
+  /* Handle reload of load/stores if we have reload helper functions.  */
+  if (!done_p && icode != CODE_FOR_nothing && memory_p)
+    {
+      int extra_cost = rs6000_secondary_reload_memory (XEXP (x, 0), rclass,
+						       mode);
 
-	      else if (!legitimate_indirect_address_p (addr, false)
-		       && !rs6000_legitimate_offset_address_p (PTImode, addr,
-							       false, true))
-		{
-		  sri->icode = icode;
-		  /* account for splitting the loads, and converting the
-		     address from reg+reg to reg.  */
-		  sri->extra_cost = (((TARGET_64BIT) ? 3 : 5)
-				     + ((GET_CODE (addr) == AND) ? 1 : 0));
-		}
-	    }
-         /* Allow scalar loads to/from the traditional floating point
-            registers, even if VSX memory is set.  */
-         else if ((rclass == FLOAT_REGS || rclass == NO_REGS)
-                  && (GET_MODE_SIZE (mode) == 4 || GET_MODE_SIZE (mode) == 8)
-                  && (legitimate_indirect_address_p (addr, false)
-                      || legitimate_indirect_address_p (addr, false)
-                      || rs6000_legitimate_offset_address_p (mode, addr,
-                                                             false, true)))
-
-           ;
-         /* Loads to and stores from vector registers can only do reg+reg
-            addressing.  Altivec registers can also do (reg+reg)&(-16).  Allow
-            scalar modes loading up the traditional floating point registers
-            to use offset addresses.  */
-	  else if (rclass == VSX_REGS || rclass == ALTIVEC_REGS
-		   || rclass == FLOAT_REGS || rclass == NO_REGS)
-	    {
-	      if (!VECTOR_MEM_ALTIVEC_P (mode)
-		  && GET_CODE (addr) == AND
-		  && GET_CODE (XEXP (addr, 1)) == CONST_INT
-		  && INTVAL (XEXP (addr, 1)) == -16
-		  && (legitimate_indirect_address_p (XEXP (addr, 0), false)
-		      || legitimate_indexed_address_p (XEXP (addr, 0), false)))
-		{
-		  sri->icode = icode;
-		  sri->extra_cost = ((GET_CODE (XEXP (addr, 0)) == PLUS)
-				     ? 2 : 1);
-		}
-	      else if (!legitimate_indirect_address_p (addr, false)
-		       && (rclass == NO_REGS
-			   || !legitimate_indexed_address_p (addr, false)))
-		{
-		  sri->icode = icode;
-		  sri->extra_cost = 1;
-		}
-	      else
-		icode = CODE_FOR_nothing;
-	    }
-	  /* Any other loads, including to pseudo registers which haven't been
-	     assigned to a register yet, default to require a scratch
-	     register.  */
-	  else
-	    {
-	      sri->icode = icode;
-	      sri->extra_cost = 2;
-	    }
-	}
-      else if (REG_P (x))
+      if (extra_cost >= 0)
 	{
-	  int regno = true_regnum (x);
-
-	  icode = CODE_FOR_nothing;
-	  if (regno < 0 || regno >= FIRST_PSEUDO_REGISTER)
-	    default_p = true;
-	  else
+	  done_p = true;
+	  ret = NO_REGS;
+	  if (extra_cost > 0)
 	    {
-	      enum reg_class xclass = REGNO_REG_CLASS (regno);
-	      enum rs6000_reg_type rtype1 = reg_class_to_reg_type[(int)rclass];
-	      enum rs6000_reg_type rtype2 = reg_class_to_reg_type[(int)xclass];
-
-	      /* If memory is needed, use default_secondary_reload to create the
-		 stack slot.  */
-	      if (rtype1 != rtype2 || !IS_STD_REG_TYPE (rtype1))
-		default_p = true;
-	      else
-		ret = NO_REGS;
+	      sri->extra_cost = extra_cost;
+	      sri->icode = icode;
 	    }
 	}
-      else
-	default_p = true;
     }
-  else if (TARGET_POWERPC64
-	   && reg_class_to_reg_type[(int)rclass] == GPR_REG_TYPE
-	   && MEM_P (x)
-	   && GET_MODE_SIZE (GET_MODE (x)) >= UNITS_PER_WORD)
+
+  /* Handle unaligned loads and stores of integer registers.  */
+  if (!done_p && TARGET_POWERPC64
+      && reg_class_to_reg_type[(int)rclass] == GPR_REG_TYPE
+      && memory_p
+      && GET_MODE_SIZE (GET_MODE (x)) >= UNITS_PER_WORD)
     {
       rtx addr = XEXP (x, 0);
       rtx off = address_offset (addr);
@@ -16658,6 +17022,7 @@ rs6000_secondary_reload (bool in_p,
 			      : CODE_FOR_reload_di_store);
 	      sri->extra_cost = 2;
 	      ret = NO_REGS;
+	      done_p = true;
 	    }
 	  else
 	    default_p = true;
@@ -16665,10 +17030,11 @@ rs6000_secondary_reload (bool in_p,
       else
 	default_p = true;
     }
-  else if (!TARGET_POWERPC64
-	   && reg_class_to_reg_type[(int)rclass] == GPR_REG_TYPE
-	   && MEM_P (x)
-	   && GET_MODE_SIZE (GET_MODE (x)) > UNITS_PER_WORD)
+
+  if (!done_p && !TARGET_POWERPC64
+      && reg_class_to_reg_type[(int)rclass] == GPR_REG_TYPE
+      && memory_p
+      && GET_MODE_SIZE (GET_MODE (x)) > UNITS_PER_WORD)
     {
       rtx addr = XEXP (x, 0);
       rtx off = address_offset (addr);
@@ -16704,6 +17070,7 @@ rs6000_secondary_reload (bool in_p,
 		sri->icode = CODE_FOR_reload_si_store;
 	      sri->extra_cost = 2;
 	      ret = NO_REGS;
+	      done_p = true;
 	    }
 	  else
 	    default_p = true;
@@ -16711,13 +17078,28 @@ rs6000_secondary_reload (bool in_p,
       else
 	default_p = true;
     }
-  else
+
+  if (!done_p)
     default_p = true;
 
   if (default_p)
-    ret = default_secondary_reload (in_p, x, rclass, mode, sri);
+    {
+      ret = default_secondary_reload (in_p, x, rclass, mode, sri);
+
+      if (!IN_RANGE (sri->icode, CODE_FOR_nothing, LAST_INSN_CODE))
+	{
+	  fprintf (stderr,
+		   "default_secondary_reload failure, ret = %s, rclass = %s, mode = %s, in_p = %s\n",
+		   reg_class_names[ret],
+		   reg_class_names[rclass],
+		   GET_MODE_NAME (mode),
+		   in_p ? "true" : "false");
+	  debug_rtx (x);
+	}
+    }
 
   gcc_assert (ret != ALL_REGS);
+  gcc_assert (IN_RANGE (sri->icode, CODE_FOR_nothing, LAST_INSN_CODE));
 
   if (TARGET_DEBUG_ADDR)
     {
@@ -16729,15 +17111,20 @@ rs6000_secondary_reload (bool in_p,
 	       reg_class_names[rclass],
 	       GET_MODE_NAME (mode));
 
+      if (reload_completed)
+	fputs (", after reload", stderr);
+
+      if (!done_p)
+	fputs (", done_p not set", stderr);
+
       if (default_p)
-	fprintf (stderr, ", default secondary reload");
+	fputs (", default secondary reload", stderr);
 
       if (sri->icode != CODE_FOR_nothing)
-	fprintf (stderr, ", reload func = %s, extra cost = %d\n",
+	fprintf (stderr, ", reload func = %s, extra cost = %d",
 		 insn_data[sri->icode].name, sri->extra_cost);
-      else
-	fprintf (stderr, "\n");
 
+      fputs ("\n", stderr);
       debug_rtx (x);
     }
 
@@ -16766,6 +17153,9 @@ rs6000_secondary_reload_trace (int line, rtx reg, rtx mem, rtx scratch,
   debug_rtx (gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, set, clobber)));
 }
 
+static void rs6000_secondary_reload_fail (int, rtx, rtx, rtx, bool)
+  ATTRIBUTE_NORETURN;
+
 static void
 rs6000_secondary_reload_fail (int line, rtx reg, rtx mem, rtx scratch,
 			      bool store_p)
@@ -16774,209 +17164,148 @@ rs6000_secondary_reload_fail (int line, rtx reg, rtx mem, rtx scratch,
   gcc_unreachable ();
 }
 
-/* Fixup reload addresses for Altivec or VSX loads/stores to change SP+offset
-   to SP+reg addressing.  */
+/* Fixup reload addresses for values in GPR, FPR, and VMX registers that have
+   reload helper functions.  These were identified in
+   rs6000_secondary_reload_memory, and if reload decided to use the secondary
+   reload, it calls the insns:
+	reload_<RELOAD:mode>_<P:mptrsize>_store
+	reload_<RELOAD:mode>_<P:mptrsize>_load
+
+   which in turn calls this function, to do whatever is necessary to create
+   valid addresses.  */
 
 void
 rs6000_secondary_reload_inner (rtx reg, rtx mem, rtx scratch, bool store_p)
 {
   int regno = true_regnum (reg);
-  enum machine_mode mode = GET_MODE (reg);
-  enum reg_class rclass;
+  machine_mode mode = GET_MODE (reg);
+  addr_mask_type addr_mask;
   rtx addr;
-  rtx and_op2 = NULL_RTX;
-  rtx addr_op1;
-  rtx addr_op2;
-  rtx scratch_or_premodify = scratch;
-  rtx and_rtx;
+  rtx new_addr;
+  rtx op_reg, op0, op1;
+  rtx and_op;
   rtx cc_clobber;
+  rtvec rv;
 
-  if (TARGET_DEBUG_ADDR)
-    rs6000_secondary_reload_trace (__LINE__, reg, mem, scratch, store_p);
+  if (regno < 0 || regno >= FIRST_PSEUDO_REGISTER || !MEM_P (mem)
+      || !base_reg_operand (scratch, GET_MODE (scratch)))
+    rs6000_secondary_reload_fail (__LINE__, reg, mem, scratch, store_p);
 
-  if (regno < 0 || regno >= FIRST_PSEUDO_REGISTER)
+  if (IN_RANGE (regno, FIRST_GPR_REGNO, LAST_GPR_REGNO))
+    addr_mask = reg_addr[mode].addr_mask[RELOAD_REG_GPR];
+
+  else if (IN_RANGE (regno, FIRST_FPR_REGNO, LAST_FPR_REGNO))
+    addr_mask = reg_addr[mode].addr_mask[RELOAD_REG_FPR];
+
+  else if (IN_RANGE (regno, FIRST_ALTIVEC_REGNO, LAST_ALTIVEC_REGNO))
+    addr_mask = reg_addr[mode].addr_mask[RELOAD_REG_VMX];
+
+  else
     rs6000_secondary_reload_fail (__LINE__, reg, mem, scratch, store_p);
 
-  if (GET_CODE (mem) != MEM)
+  /* Make sure the mode is valid in this register class.  */
+  if ((addr_mask & RELOAD_REG_VALID) == 0)
     rs6000_secondary_reload_fail (__LINE__, reg, mem, scratch, store_p);
 
-  rclass = REGNO_REG_CLASS (regno);
-  addr = find_replacement (&XEXP (mem, 0));
+  if (TARGET_DEBUG_ADDR)
+    rs6000_secondary_reload_trace (__LINE__, reg, mem, scratch, store_p);
 
-  switch (rclass)
+  new_addr = addr = XEXP (mem, 0);
+  switch (GET_CODE (addr))
     {
-      /* GPRs can handle reg + small constant, all other addresses need to use
-	 the scratch register.  */
-    case GENERAL_REGS:
-    case BASE_REGS:
-      if (GET_CODE (addr) == AND)
+      /* Does the register class support auto update forms for this mode?  If
+	 not, do the update now.  We don't need a scratch register, since the
+	 powerpc only supports PRE_INC, PRE_DEC, and PRE_MODIFY.  */
+    case PRE_INC:
+    case PRE_DEC:
+      op_reg = XEXP (addr, 0);
+      if (!base_reg_operand (op_reg, Pmode))
+	rs6000_secondary_reload_fail (__LINE__, reg, mem, scratch, store_p);
+
+      if ((addr_mask & RELOAD_REG_PRE_INCDEC) == 0)
 	{
-	  and_op2 = XEXP (addr, 1);
-	  addr = find_replacement (&XEXP (addr, 0));
+	  emit_insn (gen_add2_insn (op_reg, GEN_INT (GET_MODE_SIZE (mode))));
+	  new_addr = op_reg;
 	}
+      break;
 
-      if (GET_CODE (addr) == PRE_MODIFY)
-	{
-	  scratch_or_premodify = find_replacement (&XEXP (addr, 0));
-	  if (!REG_P (scratch_or_premodify))
-	    rs6000_secondary_reload_fail (__LINE__, reg, mem, scratch, store_p);
+    case PRE_MODIFY:
+      op0 = XEXP (addr, 0);
+      op1 = XEXP (addr, 1);
+      if (!base_reg_operand (op0, Pmode)
+	  || GET_CODE (op1) != PLUS
+	  || !rtx_equal_p (op0, XEXP (op1, 0)))
+	rs6000_secondary_reload_fail (__LINE__, reg, mem, scratch, store_p);
 
-	  addr = find_replacement (&XEXP (addr, 1));
-	  if (GET_CODE (addr) != PLUS)
-	    rs6000_secondary_reload_fail (__LINE__, reg, mem, scratch, store_p);
+      if ((addr_mask & RELOAD_REG_PRE_MODIFY) == 0)
+	{
+	  emit_insn (gen_rtx_SET (VOIDmode, op0, op1));
+	  new_addr = reg;
 	}
+      break;
 
-      if (GET_CODE (addr) == PLUS
-	  && (and_op2 != NULL_RTX
-	      || !rs6000_legitimate_offset_address_p (PTImode, addr,
-						      false, true)))
+      /* Do we need to simulate AND -16 to clear the bottom address bits used
+	 in VMX load/stores?  */
+    case AND:
+      op0 = XEXP (addr, 0);
+      op1 = XEXP (addr, 1);
+      if ((addr_mask & RELOAD_REG_AND_M16) == 0)
 	{
-	  /* find_replacement already recurses into both operands of
-	     PLUS so we don't need to call it here.  */
-	  addr_op1 = XEXP (addr, 0);
-	  addr_op2 = XEXP (addr, 1);
-	  if (!legitimate_indirect_address_p (addr_op1, false))
-	    rs6000_secondary_reload_fail (__LINE__, reg, mem, scratch, store_p);
+	  if (REG_P (op0) || GET_CODE (op0) == SUBREG)
+	    op_reg = op0;
 
-	  if (!REG_P (addr_op2)
-	      && (GET_CODE (addr_op2) != CONST_INT
-		  || !satisfies_constraint_I (addr_op2)))
+	  else if (GET_CODE (op1) == PLUS)
 	    {
-	      if (TARGET_DEBUG_ADDR)
-		{
-		  fprintf (stderr,
-			   "\nMove plus addr to register %s, mode = %s: ",
-			   rs6000_reg_names[REGNO (scratch)],
-			   GET_MODE_NAME (mode));
-		  debug_rtx (addr_op2);
-		}
-	      rs6000_emit_move (scratch, addr_op2, Pmode);
-	      addr_op2 = scratch;
+	      emit_insn (gen_rtx_SET (VOIDmode, scratch, op1));
+	      op_reg = scratch;
 	    }
 
-	  emit_insn (gen_rtx_SET (VOIDmode,
-				  scratch_or_premodify,
-				  gen_rtx_PLUS (Pmode,
-						addr_op1,
-						addr_op2)));
+	  else
+	    rs6000_secondary_reload_fail (__LINE__, reg, mem, scratch, store_p);
 
-	  addr = scratch_or_premodify;
-	  scratch_or_premodify = scratch;
-	}
-      else if (!legitimate_indirect_address_p (addr, false)
-	       && !rs6000_legitimate_offset_address_p (PTImode, addr,
-						       false, true))
-	{
-	  if (TARGET_DEBUG_ADDR)
-	    {
-	      fprintf (stderr, "\nMove addr to register %s, mode = %s: ",
-		       rs6000_reg_names[REGNO (scratch_or_premodify)],
-		       GET_MODE_NAME (mode));
-	      debug_rtx (addr);
-	    }
-	  rs6000_emit_move (scratch_or_premodify, addr, Pmode);
-	  addr = scratch_or_premodify;
-	  scratch_or_premodify = scratch;
+	  and_op = gen_rtx_AND (GET_MODE (scratch), op_reg, op1);
+	  cc_clobber = gen_rtx_CLOBBER (VOIDmode, gen_rtx_SCRATCH (CCmode));
+	  rv = gen_rtvec (2, gen_rtx_SET (VOIDmode, scratch, and_op), cc_clobber);
+	  emit_insn (gen_rtx_PARALLEL (VOIDmode, rv));
+	  new_addr = scratch;
 	}
       break;
 
-      /* Float registers can do offset+reg addressing for scalar types.  */
-    case FLOAT_REGS:
-      if (legitimate_indirect_address_p (addr, false)	/* reg */
-	  || legitimate_indexed_address_p (addr, false)	/* reg+reg */
-	  || ((GET_MODE_SIZE (mode) == 4 || GET_MODE_SIZE (mode) == 8)
-	      && and_op2 == NULL_RTX
-	      && scratch_or_premodify == scratch
-	      && rs6000_legitimate_offset_address_p (mode, addr, false, false)))
-	break;
-
-      /* If this isn't a legacy floating point load/store, fall through to the
-	 VSX defaults.  */
-
-      /* VSX/Altivec registers can only handle reg+reg addressing.  Move other
-	 addresses into a scratch register.  */
-    case VSX_REGS:
-    case ALTIVEC_REGS:
-
-      /* With float regs, we need to handle the AND ourselves, since we can't
-	 use the Altivec instruction with an implicit AND -16.  Allow scalar
-	 loads to float registers to use reg+offset even if VSX.  */
-      if (GET_CODE (addr) == AND
-	  && (rclass != ALTIVEC_REGS || GET_MODE_SIZE (mode) != 16
-	      || GET_CODE (XEXP (addr, 1)) != CONST_INT
-	      || INTVAL (XEXP (addr, 1)) != -16
-	      || !VECTOR_MEM_ALTIVEC_P (mode)))
-	{
-	  and_op2 = XEXP (addr, 1);
-	  addr = find_replacement (&XEXP (addr, 0));
-	}
-
-      /* If we aren't using a VSX load, save the PRE_MODIFY register and use it
-	 as the address later.  */
-      if (GET_CODE (addr) == PRE_MODIFY
-	  && ((ALTIVEC_OR_VSX_VECTOR_MODE (mode)
-	       && (rclass != FLOAT_REGS
-		   || (GET_MODE_SIZE (mode) != 4 && GET_MODE_SIZE (mode) != 8)))
-	      || and_op2 != NULL_RTX
-	      || !legitimate_indexed_address_p (XEXP (addr, 1), false)))
-	{
-	  scratch_or_premodify = find_replacement (&XEXP (addr, 0));
-	  if (!legitimate_indirect_address_p (scratch_or_premodify, false))
-	    rs6000_secondary_reload_fail (__LINE__, reg, mem, scratch, store_p);
-
-	  addr = find_replacement (&XEXP (addr, 1));
-	  if (GET_CODE (addr) != PLUS)
-	    rs6000_secondary_reload_fail (__LINE__, reg, mem, scratch, store_p);
+      /* If this is an indirect address, make sure it is a base register.  */
+    case REG:
+    case SUBREG:
+      if (!base_reg_operand (addr, GET_MODE (addr)))
+	{
+	  emit_insn (gen_rtx_SET (VOIDmode, scratch, addr));
+	  new_addr = scratch;
 	}
+      break;
 
-      if (legitimate_indirect_address_p (addr, false)	/* reg */
-	  || legitimate_indexed_address_p (addr, false)	/* reg+reg */
-	  || (GET_CODE (addr) == AND			/* Altivec memory */
-	      && rclass == ALTIVEC_REGS
-	      && GET_CODE (XEXP (addr, 1)) == CONST_INT
-	      && INTVAL (XEXP (addr, 1)) == -16
-	      && (legitimate_indirect_address_p (XEXP (addr, 0), false)
-		  || legitimate_indexed_address_p (XEXP (addr, 0), false))))
-	;
+      /* If this is an indexed address, make sure the register class can handle
+	 indexed addresses for this mode.  */
+    case PLUS:
+      op0 = XEXP (addr, 0);
+      op1 = XEXP (addr, 1);
+      if (!base_reg_operand (op0, Pmode))
+	rs6000_secondary_reload_fail (__LINE__, reg, mem, scratch, store_p);
 
-      else if (GET_CODE (addr) == PLUS)
+      else if (int_reg_operand (op1, Pmode))
 	{
-	  addr_op1 = XEXP (addr, 0);
-	  addr_op2 = XEXP (addr, 1);
-	  if (!REG_P (addr_op1))
-	    rs6000_secondary_reload_fail (__LINE__, reg, mem, scratch, store_p);
-
-	  if (TARGET_DEBUG_ADDR)
+	  if ((addr_mask & RELOAD_REG_INDEXED) == 0)
 	    {
-	      fprintf (stderr, "\nMove plus addr to register %s, mode = %s: ",
-		       rs6000_reg_names[REGNO (scratch)], GET_MODE_NAME (mode));
-	      debug_rtx (addr_op2);
+	      emit_insn (gen_rtx_SET (VOIDmode, scratch, addr));
+	      new_addr = scratch;
 	    }
-	  rs6000_emit_move (scratch, addr_op2, Pmode);
-	  emit_insn (gen_rtx_SET (VOIDmode,
-				  scratch_or_premodify,
-				  gen_rtx_PLUS (Pmode,
-						addr_op1,
-						scratch)));
-	  addr = scratch_or_premodify;
-	  scratch_or_premodify = scratch;
 	}
 
-      else if (GET_CODE (addr) == SYMBOL_REF || GET_CODE (addr) == CONST
-	       || GET_CODE (addr) == CONST_INT || GET_CODE (addr) == LO_SUM
-	       || REG_P (addr))
+      /* Make sure the register class can handle offset addresses.  */
+      else if (rs6000_legitimate_offset_address_p (mode, addr, false, true))
 	{
-	  if (TARGET_DEBUG_ADDR)
+	  if ((addr_mask & RELOAD_REG_OFFSET) == 0)
 	    {
-	      fprintf (stderr, "\nMove addr to register %s, mode = %s: ",
-		       rs6000_reg_names[REGNO (scratch_or_premodify)],
-		       GET_MODE_NAME (mode));
-	      debug_rtx (addr);
+	      emit_insn (gen_rtx_SET (VOIDmode, scratch, addr));
+	      new_addr = scratch;
 	    }
-
-	  rs6000_emit_move (scratch_or_premodify, addr, Pmode);
-	  addr = scratch_or_premodify;
-	  scratch_or_premodify = scratch;
 	}
 
       else
@@ -16984,55 +17313,58 @@ rs6000_secondary_reload_inner (rtx reg, rtx mem, rtx scratch, bool store_p)
 
       break;
 
-    default:
-      rs6000_secondary_reload_fail (__LINE__, reg, mem, scratch, store_p);
-    }
-
-  /* If the original address involved a pre-modify that we couldn't use the VSX
-     memory instruction with update, and we haven't taken care of already,
-     store the address in the pre-modify register and use that as the
-     address.  */
-  if (scratch_or_premodify != scratch && scratch_or_premodify != addr)
-    {
-      emit_insn (gen_rtx_SET (VOIDmode, scratch_or_premodify, addr));
-      addr = scratch_or_premodify;
-    }
+    case LO_SUM:
+      op0 = XEXP (addr, 0);
+      op1 = XEXP (addr, 1);
+      if (!base_reg_operand (op0, Pmode))
+	rs6000_secondary_reload_fail (__LINE__, reg, mem, scratch, store_p);
 
-  /* If the original address involved an AND -16 and we couldn't use an ALTIVEC
-     memory instruction, recreate the AND now, including the clobber which is
-     generated by the general ANDSI3/ANDDI3 patterns for the
-     andi. instruction.  */
-  if (and_op2 != NULL_RTX)
-    {
-      if (! legitimate_indirect_address_p (addr, false))
+      else if (int_reg_operand (op1, Pmode))
 	{
-	  emit_insn (gen_rtx_SET (VOIDmode, scratch, addr));
-	  addr = scratch;
+	  if ((addr_mask & RELOAD_REG_INDEXED) == 0)
+	    {
+	      emit_insn (gen_rtx_SET (VOIDmode, scratch, addr));
+	      new_addr = scratch;
+	    }
 	}
 
-      if (TARGET_DEBUG_ADDR)
+      /* Make sure the register class can handle offset addresses.  */
+      else if (legitimate_lo_sum_address_p (mode, addr, false))
 	{
-	  fprintf (stderr, "\nAnd addr to register %s, mode = %s: ",
-		   rs6000_reg_names[REGNO (scratch)], GET_MODE_NAME (mode));
-	  debug_rtx (and_op2);
+	  if ((addr_mask & RELOAD_REG_OFFSET) == 0)
+	    {
+	      emit_insn (gen_rtx_SET (VOIDmode, scratch, addr));
+	      new_addr = scratch;
+	    }
 	}
 
-      and_rtx = gen_rtx_SET (VOIDmode,
-			     scratch,
-			     gen_rtx_AND (Pmode,
-					  addr,
-					  and_op2));
+      else
+	rs6000_secondary_reload_fail (__LINE__, reg, mem, scratch, store_p);
 
-      cc_clobber = gen_rtx_CLOBBER (CCmode, gen_rtx_SCRATCH (CCmode));
-      emit_insn (gen_rtx_PARALLEL (VOIDmode,
-				   gen_rtvec (2, and_rtx, cc_clobber)));
-      addr = scratch;
+      break;
+
+    case SYMBOL_REF:
+    case CONST:
+    case LABEL_REF:
+#if 0
+      if (TARGET_TOC)
+	emit_insn (gen_rtx_SET (VOIDmode, scratch,
+				create_TOC_reference (addr, scratch)));
+      else
+#endif
+	rs6000_emit_move (scratch, addr, Pmode);
+
+      new_addr = scratch;
+      break;
+
+    default:
+      rs6000_secondary_reload_fail (__LINE__, reg, mem, scratch, store_p);
     }
 
   /* Adjust the address if it changed.  */
-  if (addr != XEXP (mem, 0))
+  if (addr != new_addr)
     {
-      mem = replace_equiv_address_nv (mem, addr);
+      mem = replace_equiv_address_nv (mem, new_addr);
       if (TARGET_DEBUG_ADDR)
 	fprintf (stderr, "\nrs6000_secondary_reload_inner, mem adjusted.\n");
     }
@@ -17176,44 +17508,36 @@ rs6000_instantiate_decls (void)
 static enum reg_class
 rs6000_preferred_reload_class (rtx x, enum reg_class rclass)
 {
-  enum machine_mode mode = GET_MODE (x);
+  machine_mode mode = GET_MODE (x);
+  bool is_constant = CONSTANT_P (x);
 
-  if (TARGET_VSX && x == CONST0_RTX (mode) && VSX_REG_CLASS_P (rclass))
-    return rclass;
-
-  if (VECTOR_UNIT_ALTIVEC_OR_VSX_P (mode)
-      && (rclass == ALTIVEC_REGS || rclass == VSX_REGS)
-      && easy_vector_constant (x, mode))
-    return ALTIVEC_REGS;
-
-  if ((CONSTANT_P (x) || GET_CODE (x) == PLUS))
+  /* Do VSX tests before handling traditional floaitng point registers.  */
+  if (TARGET_VSX && VSX_REG_CLASS_P (rclass))
     {
-      if (reg_class_subset_p (GENERAL_REGS, rclass))
-	return GENERAL_REGS;
-      if (reg_class_subset_p (BASE_REGS, rclass))
-	return BASE_REGS;
-      return NO_REGS;
-    }
+      if (is_constant)
+	{
+	  /* Zero is always allowed in all VSX registers.  */
+	  if (x == CONST0_RTX (mode))
+	    return rclass;
 
-  if (GET_MODE_CLASS (mode) == MODE_INT && rclass == NON_SPECIAL_REGS)
-    return GENERAL_REGS;
+	  /* If this is a vector constant that can be formed with a few Altivec
+	     instructions, we want altivec registers.  */
+	  if (GET_CODE (x) == CONST_VECTOR && easy_vector_constant (x, mode))
+	    return ALTIVEC_REGS;
 
-  /* For VSX, prefer the traditional registers for 64-bit values because we can
-     use the non-VSX loads.  Prefer the Altivec registers if Altivec is
-     handling the vector operations (i.e. V16QI, V8HI, and V4SI), or if we
-     prefer Altivec loads..  */
-  if (rclass == VSX_REGS)
-    {
-      if (MEM_P (x) && reg_addr[mode].scalar_in_vmx_p)
-	{
-	  rtx addr = XEXP (x, 0);
-	  if (rs6000_legitimate_offset_address_p (mode, addr, false, true)
-	      || legitimate_lo_sum_address_p (mode, addr, false))
-	    return FLOAT_REGS;
+	  /* Force constant to memory.  */
+	  return NO_REGS;
 	}
-      else if (GET_MODE_SIZE (mode) <= 8 && !reg_addr[mode].scalar_in_vmx_p)
+
+      /* If this is a scalar floating point value, prefer the traditional
+	 floating point registers so that we can use D-form (register+offset)
+	 addressing.  */
+      if (GET_MODE_SIZE (mode) < 16)
 	return FLOAT_REGS;
 
+      /* Prefer the Altivec registers if Altivec is handling the vector
+	 operations (i.e. V16QI, V8HI, and V4SI), or if we prefer Altivec
+	 loads.  */
       if (VECTOR_UNIT_ALTIVEC_P (mode) || VECTOR_MEM_ALTIVEC_P (mode)
 	  || mode == V1TImode)
 	return ALTIVEC_REGS;
@@ -17221,6 +17545,18 @@ rs6000_preferred_reload_class (rtx x, enum reg_class rclass)
       return rclass;
     }
 
+  if (is_constant || GET_CODE (x) == PLUS)
+    {
+      if (reg_class_subset_p (GENERAL_REGS, rclass))
+	return GENERAL_REGS;
+      if (reg_class_subset_p (BASE_REGS, rclass))
+	return BASE_REGS;
+      return NO_REGS;
+    }
+
+  if (GET_MODE_CLASS (mode) == MODE_INT && rclass == NON_SPECIAL_REGS)
+    return GENERAL_REGS;
+
   return rclass;
 }
 
@@ -17340,30 +17676,34 @@ rs6000_secondary_reload_class (enum reg_class rclass, enum machine_mode mode,
   else
     regno = -1;
 
+  /* If we have VSX register moves, prefer moving scalar values between
+     Altivec registers and GPR by going via an FPR (and then via memory)
+     instead of reloading the secondary memory address for Altivec moves.  */
+  if (TARGET_VSX
+      && GET_MODE_SIZE (mode) < 16
+      && (((rclass == GENERAL_REGS || rclass == BASE_REGS)
+           && (regno >= 0 && ALTIVEC_REGNO_P (regno)))
+          || ((rclass == VSX_REGS || rclass == ALTIVEC_REGS)
+              && (regno >= 0 && INT_REGNO_P (regno)))))
+    return FLOAT_REGS;
+
   /* We can place anything into GENERAL_REGS and can put GENERAL_REGS
      into anything.  */
   if (rclass == GENERAL_REGS || rclass == BASE_REGS
       || (regno >= 0 && INT_REGNO_P (regno)))
     return NO_REGS;
 
+  /* Constants, memory, and VSX registers can go into VSX registers (both the
+     traditional floating point and the altivec registers).  */
+  if (rclass == VSX_REGS
+      && (regno == -1 || VSX_REGNO_P (regno)))
+    return NO_REGS;
+
   /* Constants, memory, and FP registers can go into FP registers.  */
   if ((regno == -1 || FP_REGNO_P (regno))
       && (rclass == FLOAT_REGS || rclass == NON_SPECIAL_REGS))
     return (mode != SDmode || lra_in_progress) ? NO_REGS : GENERAL_REGS;
 
-  /* Memory, and FP/altivec registers can go into fp/altivec registers under
-     VSX.  However, for scalar variables, use the traditional floating point
-     registers so that we can use offset+register addressing.  */
-  if (TARGET_VSX
-      && (regno == -1 || VSX_REGNO_P (regno))
-      && VSX_REG_CLASS_P (rclass))
-    {
-      if (GET_MODE_SIZE (mode) < 16)
-	return FLOAT_REGS;
-
-      return NO_REGS;
-    }
-
   /* Memory, and AltiVec registers can go into AltiVec registers.  */
   if ((regno == -1 || ALTIVEC_REGNO_P (regno))
       && rclass == ALTIVEC_REGS)
@@ -18481,7 +18821,7 @@ print_operand (FILE *file, rtx x, int code)
 	  fprintf (file, "0,%s", reg_names[REGNO (tmp)]);
 	else
 	  {
-	    if (!GET_CODE (tmp) == PLUS
+	    if (GET_CODE (tmp) != PLUS
 		|| !REG_P (XEXP (tmp, 0))
 		|| !REG_P (XEXP (tmp, 1)))
 	      {
@@ -20000,7 +20340,7 @@ rs6000_adjust_atomic_subword (rtx orig_mem, rtx *pshift, rtx *pmask)
   shift = gen_reg_rtx (SImode);
   addr = gen_lowpart (SImode, addr);
   emit_insn (gen_rlwinm (shift, addr, GEN_INT (3), GEN_INT (shift_mask)));
-  if (WORDS_BIG_ENDIAN)
+  if (BYTES_BIG_ENDIAN)
     shift = expand_simple_binop (SImode, XOR, shift, GEN_INT (shift_mask),
 			         shift, 1, OPTAB_LIB_WIDEN);
   *pshift = shift;
diff --git a/gcc/config/rs6000/rs6000.md b/gcc/config/rs6000/rs6000.md
index f77754aa110..fff13f06000 100644
--- a/gcc/config/rs6000/rs6000.md
+++ b/gcc/config/rs6000/rs6000.md
@@ -370,6 +370,9 @@
 ; SF/DF constraint for arithmetic on VSX registers
 (define_mode_attr Fv		[(SF "wy") (DF "ws")])
 
+; SF/DF constraint for arithmetic on altivec registers
+(define_mode_attr Fa		[(SF "wu") (DF "wv")])
+
 ; s/d suffix for things like fp_addsub_s/fp_addsub_d
 (define_mode_attr Fs		[(SF "s")  (DF "d")])
 
@@ -515,6 +518,12 @@
 (define_mode_attr idiv_ldiv [(SI "idiv")
 			     (DI "ldiv")])
 
+
+;; Reload iterator for creating the function to allocate a base register to
+;; supplement addressing modes.
+(define_mode_iterator RELOAD [V16QI V8HI V4SI V2DI V4SF V2DF V1TI
+			      SF SD SI DF DD DI TI PTI])
+
 
 ;; Start with fixed-point load and store insns.  Here we put only the more
 ;; complex forms.  Basic data transfer is done later.
@@ -5264,7 +5273,7 @@
   "")
 
 (define_insn_and_split "*extendsfdf2_fpr"
-  [(set (match_operand:DF 0 "gpc_reg_operand" "=d,?d,d,ws,?ws,wv")
+  [(set (match_operand:DF 0 "gpc_reg_operand" "=d,?d,d,ws,?ws,wu")
 	(float_extend:DF (match_operand:SF 1 "reg_or_mem_operand" "0,f,m,0,wy,Z")))]
   "TARGET_HARD_FLOAT && TARGET_FPRS && TARGET_DOUBLE_FLOAT"
   "@
@@ -5387,7 +5396,7 @@
   "TARGET_<MODE>_FPR && TARGET_CMPB"
   "@
    fcpsgn %0,%2,%1
-   xscpsgn<Fvsx> %x0,%x2,%x1"
+   xscpsgndp %x0,%x2,%x1"
   [(set_attr "type" "fp")])
 
 ;; For MIN, MAX, and conditional move, we use DEFINE_EXPAND's that involve a
@@ -5659,9 +5668,9 @@
 ; not be needed and also in case the insns are deleted as dead code.
 
 (define_insn_and_split "floatsi<mode>2_lfiwax"
-  [(set (match_operand:SFDF 0 "gpc_reg_operand" "=d")
+  [(set (match_operand:SFDF 0 "gpc_reg_operand" "=<Fv>")
 	(float:SFDF (match_operand:SI 1 "nonimmediate_operand" "r")))
-   (clobber (match_scratch:DI 2 "=d"))]
+   (clobber (match_scratch:DI 2 "=wj"))]
   "TARGET_HARD_FLOAT && TARGET_FPRS && TARGET_DOUBLE_FLOAT && TARGET_LFIWAX
    && <SI_CONVERT_FP> && can_create_pseudo_p ()"
   "#"
@@ -5700,7 +5709,7 @@
    (set_attr "type" "fpload")])
 
 (define_insn_and_split "floatsi<mode>2_lfiwax_mem"
-  [(set (match_operand:SFDF 0 "gpc_reg_operand" "=d,<rreg2>")
+  [(set (match_operand:SFDF 0 "gpc_reg_operand" "=<Ff>,<Fa>")
 	(float:SFDF
 	 (sign_extend:DI
 	  (match_operand:SI 1 "memory_operand" "Z,Z"))))
@@ -5734,9 +5743,9 @@
   [(set_attr "type" "fpload,fpload,mftgpr")])
 
 (define_insn_and_split "floatunssi<mode>2_lfiwzx"
-  [(set (match_operand:SFDF 0 "gpc_reg_operand" "=d")
+  [(set (match_operand:SFDF 0 "gpc_reg_operand" "=<Fv>")
 	(unsigned_float:SFDF (match_operand:SI 1 "nonimmediate_operand" "r")))
-   (clobber (match_scratch:DI 2 "=d"))]
+   (clobber (match_scratch:DI 2 "=wj"))]
   "TARGET_HARD_FLOAT && TARGET_FPRS && TARGET_DOUBLE_FLOAT && TARGET_LFIWZX
    && <SI_CONVERT_FP>"
   "#"
@@ -5775,7 +5784,7 @@
    (set_attr "type" "fpload")])
 
 (define_insn_and_split "floatunssi<mode>2_lfiwzx_mem"
-  [(set (match_operand:SFDF 0 "gpc_reg_operand" "=d,<rreg2>")
+  [(set (match_operand:SFDF 0 "gpc_reg_operand" "=<Ff>,<Fa>")
 	(unsigned_float:SFDF
 	 (zero_extend:DI
 	  (match_operand:SI 1 "memory_operand" "Z,Z"))))
@@ -6088,11 +6097,13 @@
   "")
 
 (define_insn "*fix_trunc<mode>di2_fctidz"
-  [(set (match_operand:DI 0 "gpc_reg_operand" "=d")
-	(fix:DI (match_operand:SFDF 1 "gpc_reg_operand" "d")))]
+  [(set (match_operand:DI 0 "gpc_reg_operand" "=d,wi")
+	(fix:DI (match_operand:SFDF 1 "gpc_reg_operand" "<Ff>,<Fa>")))]
   "TARGET_HARD_FLOAT && TARGET_DOUBLE_FLOAT && TARGET_FPRS
-    && TARGET_FCFID && !VECTOR_UNIT_VSX_P (<MODE>mode)"
-  "fctidz %0,%1"
+    && TARGET_FCFID"
+  "@
+   fctidz %0,%1
+   xscvdpsxds %x0,%x1"
   [(set_attr "type" "fp")])
 
 (define_expand "fixuns_trunc<mode>si2"
@@ -6158,11 +6169,13 @@
   "")
 
 (define_insn "*fixuns_trunc<mode>di2_fctiduz"
-  [(set (match_operand:DI 0 "gpc_reg_operand" "=d")
-	(unsigned_fix:DI (match_operand:SFDF 1 "gpc_reg_operand" "d")))]
+  [(set (match_operand:DI 0 "gpc_reg_operand" "=d,wi")
+	(unsigned_fix:DI (match_operand:SFDF 1 "gpc_reg_operand" "<Ff>,<Fa>")))]
   "TARGET_HARD_FLOAT && TARGET_DOUBLE_FLOAT && TARGET_FPRS
-    && TARGET_FCTIDUZ && !VECTOR_UNIT_VSX_P (<MODE>mode)"
-  "fctiduz %0,%1"
+    && TARGET_FCTIDUZ"
+  "@
+   fctiduz %0,%1
+   xscvdpuxds %x0,%x1"
   [(set_attr "type" "fp")])
 
 ; Here, we use (set (reg) (unspec:DI [(fix:SI ...)] UNSPEC_FCTIWZ))
@@ -6170,32 +6183,37 @@
 ; because the first makes it clear that operand 0 is not live
 ; before the instruction.
 (define_insn "fctiwz_<mode>"
-  [(set (match_operand:DI 0 "gpc_reg_operand" "=d")
-	(unspec:DI [(fix:SI (match_operand:SFDF 1 "gpc_reg_operand" "d"))]
+  [(set (match_operand:DI 0 "gpc_reg_operand" "=d,wi")
+	(unspec:DI [(fix:SI (match_operand:SFDF 1 "gpc_reg_operand" "<Ff>,<Fv>"))]
 		   UNSPEC_FCTIWZ))]
   "TARGET_HARD_FLOAT && TARGET_FPRS && TARGET_DOUBLE_FLOAT"
-  "fctiwz %0,%1"
+  "@
+   fctiwz %0,%1
+   xscvdpsxws %x0,%x1"
   [(set_attr "type" "fp")])
 
 (define_insn "fctiwuz_<mode>"
-  [(set (match_operand:DI 0 "gpc_reg_operand" "=d")
+  [(set (match_operand:DI 0 "gpc_reg_operand" "=d,wi")
 	(unspec:DI [(unsigned_fix:SI
-		     (match_operand:SFDF 1 "gpc_reg_operand" "<rreg2>"))]
+		     (match_operand:SFDF 1 "gpc_reg_operand" "<Ff>,<Fv>"))]
 		   UNSPEC_FCTIWUZ))]
   "TARGET_HARD_FLOAT && TARGET_FPRS && <TARGET_FLOAT> && TARGET_FCTIWUZ"
-  "fctiwuz %0,%1"
+  "@
+   fctiwuz %0,%1
+   xscvdpuxws %x0,%x1"
   [(set_attr "type" "fp")])
 
 ;; Only optimize (float (fix x)) -> frz if we are in fast-math mode, since
 ;; since the friz instruction does not truncate the value if the floating
 ;; point value is < LONG_MIN or > LONG_MAX.
 (define_insn "*friz"
-  [(set (match_operand:DF 0 "gpc_reg_operand" "=d")
-	(float:DF (fix:DI (match_operand:DF 1 "gpc_reg_operand" "d"))))]
+  [(set (match_operand:DF 0 "gpc_reg_operand" "=d,ws")
+	(float:DF (fix:DI (match_operand:DF 1 "gpc_reg_operand" "d,ws"))))]
   "TARGET_HARD_FLOAT && TARGET_FPRS && TARGET_DOUBLE_FLOAT && TARGET_FPRND
-   && !VECTOR_UNIT_VSX_P (DFmode) && flag_unsafe_math_optimizations
-   && !flag_trapping_math && TARGET_FRIZ"
-  "friz %0,%1"
+   && flag_unsafe_math_optimizations && !flag_trapping_math && TARGET_FRIZ"
+  "@
+   friz %0,%1
+   xsrdpiz %x0,%x1"
   [(set_attr "type" "fp")])
 
 ;; Since FCTIWZ doesn't sign extend the upper bits, we have to do a store and a
@@ -6378,11 +6396,12 @@
   "")
 
 (define_insn "*floatdidf2_fpr"
-  [(set (match_operand:DF 0 "gpc_reg_operand" "=d")
-	(float:DF (match_operand:DI 1 "gpc_reg_operand" "d")))]
-  "TARGET_FCFID && TARGET_HARD_FLOAT && TARGET_DOUBLE_FLOAT && TARGET_FPRS
-   && !VECTOR_UNIT_VSX_P (DFmode)"
-  "fcfid %0,%1"
+  [(set (match_operand:DF 0 "gpc_reg_operand" "=d,ws")
+	(float:DF (match_operand:DI 1 "gpc_reg_operand" "d,wi")))]
+  "TARGET_FCFID && TARGET_HARD_FLOAT && TARGET_DOUBLE_FLOAT && TARGET_FPRS"
+  "@
+   fcfid %0,%1
+   xscvsxddp %x0,%x1"
   [(set_attr "type" "fp")])
 
 ; Allow the combiner to merge source memory operands to the conversion so that
@@ -6391,9 +6410,9 @@
 ; hit.  We will split after reload to avoid the trip through the GPRs
 
 (define_insn_and_split "*floatdidf2_mem"
-  [(set (match_operand:DF 0 "gpc_reg_operand" "=d")
-	(float:DF (match_operand:DI 1 "memory_operand" "m")))
-   (clobber (match_scratch:DI 2 "=d"))]
+  [(set (match_operand:DF 0 "gpc_reg_operand" "=d,ws")
+	(float:DF (match_operand:DI 1 "memory_operand" "m,Z")))
+   (clobber (match_scratch:DI 2 "=d,wi"))]
   "TARGET_HARD_FLOAT && TARGET_DOUBLE_FLOAT && TARGET_FPRS && TARGET_FCFID"
   "#"
   "&& reload_completed"
@@ -6407,21 +6426,23 @@
   [(set (match_operand:DF 0 "gpc_reg_operand" "")
 	(unsigned_float:DF
 	 (match_operand:DI 1 "gpc_reg_operand" "")))]
-  "TARGET_HARD_FLOAT && (TARGET_FCFIDU || VECTOR_UNIT_VSX_P (DFmode))"
+  "TARGET_HARD_FLOAT && TARGET_FCFIDU"
   "")
 
 (define_insn "*floatunsdidf2_fcfidu"
-  [(set (match_operand:DF 0 "gpc_reg_operand" "=d")
-	(unsigned_float:DF (match_operand:DI 1 "gpc_reg_operand" "d")))]
-  "TARGET_HARD_FLOAT && TARGET_FCFIDU && !VECTOR_UNIT_VSX_P (DFmode)"
-  "fcfidu %0,%1"
+  [(set (match_operand:DF 0 "gpc_reg_operand" "=d,ws")
+	(unsigned_float:DF (match_operand:DI 1 "gpc_reg_operand" "d,wi")))]
+  "TARGET_HARD_FLOAT && TARGET_FCFIDU"
+  "@
+   fcfidu %0,%1
+   xscvuxddp %x0,%x1"
   [(set_attr "type" "fp")
    (set_attr "length" "4")])
 
 (define_insn_and_split "*floatunsdidf2_mem"
-  [(set (match_operand:DF 0 "gpc_reg_operand" "=d")
-	(unsigned_float:DF (match_operand:DI 1 "memory_operand" "m")))
-   (clobber (match_scratch:DI 2 "=d"))]
+  [(set (match_operand:DF 0 "gpc_reg_operand" "=d,ws")
+	(unsigned_float:DF (match_operand:DI 1 "memory_operand" "m,Z")))
+   (clobber (match_scratch:DI 2 "=d,wi"))]
   "TARGET_HARD_FLOAT && (TARGET_FCFIDU || VECTOR_UNIT_VSX_P (DFmode))"
   "#"
   "&& reload_completed"
@@ -6454,17 +6475,19 @@
 }")
 
 (define_insn "floatdisf2_fcfids"
-  [(set (match_operand:SF 0 "gpc_reg_operand" "=f")
-	(float:SF (match_operand:DI 1 "gpc_reg_operand" "d")))]
+  [(set (match_operand:SF 0 "gpc_reg_operand" "=f,wy")
+	(float:SF (match_operand:DI 1 "gpc_reg_operand" "d,wi")))]
   "TARGET_HARD_FLOAT && TARGET_FPRS && TARGET_SINGLE_FLOAT
    && TARGET_DOUBLE_FLOAT && TARGET_FCFIDS"
-  "fcfids %0,%1"
+  "@
+   fcfids %0,%1
+   xscvsxdsp %x0,%x1"
   [(set_attr "type" "fp")])
 
 (define_insn_and_split "*floatdisf2_mem"
-  [(set (match_operand:SF 0 "gpc_reg_operand" "=f")
-	(float:SF (match_operand:DI 1 "memory_operand" "m")))
-   (clobber (match_scratch:DI 2 "=f"))]
+  [(set (match_operand:SF 0 "gpc_reg_operand" "=f,wy,wy")
+	(float:SF (match_operand:DI 1 "memory_operand" "m,m,Z")))
+   (clobber (match_scratch:DI 2 "=d,d,wi"))]
   "TARGET_HARD_FLOAT && TARGET_FPRS && TARGET_SINGLE_FLOAT
    && TARGET_DOUBLE_FLOAT && TARGET_FCFIDS"
   "#"
@@ -6486,7 +6509,8 @@
   [(set (match_operand:SF 0 "gpc_reg_operand" "=f")
         (float:SF (match_operand:DI 1 "gpc_reg_operand" "d")))
    (clobber (match_scratch:DF 2 "=d"))]
-  "TARGET_FCFID && TARGET_HARD_FLOAT && TARGET_FPRS && TARGET_SINGLE_FLOAT"
+  "TARGET_FCFID && TARGET_HARD_FLOAT && TARGET_FPRS && TARGET_SINGLE_FLOAT
+   && !TARGET_FCFIDS"
   "#"
   "&& reload_completed"
   [(set (match_dup 2)
@@ -6522,7 +6546,8 @@
 			   (label_ref (match_operand:DI 2 "" ""))
 			   (pc)))
    (set (match_dup 0) (match_dup 1))]
-  "TARGET_POWERPC64 && TARGET_HARD_FLOAT && TARGET_FPRS && TARGET_SINGLE_FLOAT"
+  "TARGET_POWERPC64 && TARGET_HARD_FLOAT && TARGET_FPRS && TARGET_SINGLE_FLOAT
+   && !TARGET_FCFIDS"
   "
 {
   operands[3] = gen_reg_rtx (DImode);
@@ -6537,17 +6562,19 @@
   "")
 
 (define_insn "floatunsdisf2_fcfidus"
-  [(set (match_operand:SF 0 "gpc_reg_operand" "=f")
-        (unsigned_float:SF (match_operand:DI 1 "gpc_reg_operand" "d")))]
+  [(set (match_operand:SF 0 "gpc_reg_operand" "=f,wu")
+        (unsigned_float:SF (match_operand:DI 1 "gpc_reg_operand" "d,wi")))]
   "TARGET_HARD_FLOAT && TARGET_FPRS && TARGET_SINGLE_FLOAT
    && TARGET_DOUBLE_FLOAT && TARGET_FCFIDUS"
-  "fcfidus %0,%1"
+  "@
+   fcfidus %0,%1
+   xscvuxdsp %x0,%x1"
   [(set_attr "type" "fp")])
 
 (define_insn_and_split "*floatunsdisf2_mem"
-  [(set (match_operand:SF 0 "gpc_reg_operand" "=f")
-	(unsigned_float:SF (match_operand:DI 1 "memory_operand" "m")))
-   (clobber (match_scratch:DI 2 "=f"))]
+  [(set (match_operand:SF 0 "gpc_reg_operand" "=f,wy,wy")
+	(unsigned_float:SF (match_operand:DI 1 "memory_operand" "m,m,Z")))
+   (clobber (match_scratch:DI 2 "=d,d,wi"))]
   "TARGET_HARD_FLOAT && TARGET_FPRS && TARGET_SINGLE_FLOAT
    && TARGET_DOUBLE_FLOAT && TARGET_FCFIDUS"
   "#"
@@ -9207,7 +9234,7 @@
 
 (define_insn "mov<mode>_hardfloat"
   [(set (match_operand:FMOVE32 0 "nonimmediate_operand" "=!r,!r,m,f,<f32_vsx>,<f32_vsx>,<f32_lr>,<f32_sm>,<f32_av>,Z,?<f32_dm>,?r,*c*l,!r,*h,!r,!r")
-	(match_operand:FMOVE32 1 "input_operand" "r,m,r,f,<f32_vsx>,j,<f32_lm>,<f32_sr>,Z,<f32_av>,r,<f32_dm>,r, h, 0, G,Fn"))]
+	(match_operand:FMOVE32 1 "input_operand" "r,m,r,f,<f32_vsx>,j,<f32_lm>,<f32_sr>,Z,<f32_av>,r,<f32_dm>,r,h,0,G,Fn"))]
   "(gpc_reg_operand (operands[0], <MODE>mode)
    || gpc_reg_operand (operands[1], <MODE>mode))
    && (TARGET_HARD_FLOAT && TARGET_FPRS && TARGET_SINGLE_FLOAT)"
@@ -9611,8 +9638,8 @@
 ;; problematical.  Don't allow direct move for this case.
 
 (define_insn_and_split "*mov<mode>_64bit_dm"
-  [(set (match_operand:FMOVE128 0 "nonimmediate_operand" "=m,d,d,Y,r,r,r,wm")
-	(match_operand:FMOVE128 1 "input_operand" "d,m,d,r,YGHF,r,wm,r"))]
+  [(set (match_operand:FMOVE128 0 "nonimmediate_operand" "=m,d,d,ws,Y,r,r,r,wm")
+	(match_operand:FMOVE128 1 "input_operand" "d,m,d,j,r,jYGHF,r,wm,r"))]
   "TARGET_HARD_FLOAT && TARGET_FPRS && TARGET_POWERPC64
    && (<MODE>mode != TDmode || WORDS_BIG_ENDIAN)
    && (gpc_reg_operand (operands[0], <MODE>mode)
@@ -9621,11 +9648,11 @@
   "&& reload_completed"
   [(pc)]
 { rs6000_split_multireg_move (operands[0], operands[1]); DONE; }
-  [(set_attr "length" "8,8,8,12,12,8,8,8")])
+  [(set_attr "length" "8,8,8,8,12,12,8,8,8")])
 
 (define_insn_and_split "*movtd_64bit_nodm"
-  [(set (match_operand:TD 0 "nonimmediate_operand" "=m,d,d,Y,r,r")
-	(match_operand:TD 1 "input_operand" "d,m,d,r,YGHF,r"))]
+  [(set (match_operand:TD 0 "nonimmediate_operand" "=m,d,d,ws,Y,r,r")
+	(match_operand:TD 1 "input_operand" "d,m,d,j,r,jYGHF,r"))]
   "TARGET_HARD_FLOAT && TARGET_FPRS && TARGET_POWERPC64 && !WORDS_BIG_ENDIAN
    && (gpc_reg_operand (operands[0], TDmode)
        || gpc_reg_operand (operands[1], TDmode))"
@@ -9633,11 +9660,11 @@
   "&& reload_completed"
   [(pc)]
 { rs6000_split_multireg_move (operands[0], operands[1]); DONE; }
-  [(set_attr "length" "8,8,8,12,12,8")])
+  [(set_attr "length" "8,8,8,8,12,12,8")])
 
 (define_insn_and_split "*mov<mode>_32bit"
-  [(set (match_operand:FMOVE128 0 "nonimmediate_operand" "=m,d,d,Y,r,r")
-	(match_operand:FMOVE128 1 "input_operand" "d,m,d,r,YGHF,r"))]
+  [(set (match_operand:FMOVE128 0 "nonimmediate_operand" "=m,d,d,ws,Y,r,r")
+	(match_operand:FMOVE128 1 "input_operand" "d,m,d,j,r,jYGHF,r"))]
   "TARGET_HARD_FLOAT && TARGET_FPRS && !TARGET_POWERPC64
    && (gpc_reg_operand (operands[0], <MODE>mode)
        || gpc_reg_operand (operands[1], <MODE>mode))"
@@ -9645,7 +9672,7 @@
   "&& reload_completed"
   [(pc)]
 { rs6000_split_multireg_move (operands[0], operands[1]); DONE; }
-  [(set_attr "length" "8,8,8,20,20,16")])
+  [(set_attr "length" "8,8,8,8,20,20,16")])
 
 (define_insn_and_split "*mov<mode>_softfloat"
   [(set (match_operand:FMOVE128 0 "rs6000_nonimmediate_operand" "=Y,r,r")
@@ -9659,6 +9686,21 @@
 { rs6000_split_multireg_move (operands[0], operands[1]); DONE; }
   [(set_attr "length" "20,20,16")])
 
+;; If we are using -ffast-math, easy_fp_constant assumes all constants are
+;; 'easy' in order to allow for reciprocal estimation.  Make sure the constant
+;; is in the constant pool before reload occurs.  This simplifies accessing
+;; scalars in the traditional Altivec registers.
+
+(define_split
+  [(set (match_operand:SFDF 0 "register_operand" "")
+	(match_operand:SFDF 1 "memory_fp_constant" ""))]
+  "TARGET_<MODE>_FPR && flag_unsafe_math_optimizations
+   && !reload_in_progress && !reload_completed && !lra_in_progress"
+  [(set (match_dup 0) (match_dup 2))]
+{
+  operands[2] = validize_mem (force_const_mem (<MODE>mode, operands[1]));
+})
+
 (define_expand "extenddftf2"
   [(set (match_operand:TF 0 "nonimmediate_operand" "")
 	(float_extend:TF (match_operand:DF 1 "input_operand" "")))]
@@ -9964,6 +10006,50 @@
 })
 
 
+;; Reload patterns for various types using the vector registers.  We may need
+;; an additional base register to convert the reg+offset addressing to reg+reg
+;; for vector registers and reg+reg or (reg+reg)&(-16) addressing to just an
+;; index register for gpr registers.
+(define_expand "reload_<RELOAD:mode>_<P:mptrsize>_store"
+  [(parallel [(match_operand:RELOAD 0 "memory_operand" "m")
+              (match_operand:RELOAD 1 "gpc_reg_operand" "wa")
+              (match_operand:P 2 "register_operand" "=b")])]
+  "<P:tptrsize>"
+{
+  rs6000_secondary_reload_inner (operands[1], operands[0], operands[2], true);
+  DONE;
+})
+
+(define_expand "reload_<RELOAD:mode>_<P:mptrsize>_load"
+  [(parallel [(match_operand:RELOAD 0 "gpc_reg_operand" "wa")
+              (match_operand:RELOAD 1 "memory_operand" "m")
+              (match_operand:P 2 "register_operand" "=b")])]
+  "<P:tptrsize>"
+{
+  rs6000_secondary_reload_inner (operands[0], operands[1], operands[2], false);
+  DONE;
+})
+
+
+;; Reload sometimes tries to move the address to a GPR, and can generate
+;; invalid RTL for addresses involving AND -16.  Allow addresses involving
+;; reg+reg, reg+small constant, or just reg, all wrapped in an AND -16.
+
+(define_insn_and_split "*vec_reload_and_plus_<mptrsize>"
+  [(set (match_operand:P 0 "gpc_reg_operand" "=b")
+	(and:P (plus:P (match_operand:P 1 "gpc_reg_operand" "r")
+		       (match_operand:P 2 "reg_or_cint_operand" "rI"))
+	       (const_int -16)))]
+  "TARGET_ALTIVEC && (reload_in_progress || reload_completed)"
+  "#"
+  "&& reload_completed"
+  [(set (match_dup 0)
+	(plus:P (match_dup 1)
+		(match_dup 2)))
+   (set (match_dup 0)
+	(and:P (match_dup 0)
+	       (const_int -16)))])
+
 ;; Power8 merge instructions to allow direct move to/from floating point
 ;; registers in 32-bit mode.  We use TF mode to get two registers to move the
 ;; individual 32-bit parts across.  Subreg doesn't work too well on the TF
@@ -11278,12 +11364,15 @@
 ;; sequences, using get_attr_length here will smash the operands
 ;; array.  Neither is there an early_cobbler_p predicate.
 ;; Disallow subregs for E500 so we don't munge frob_di_df_2.
+;; Also this optimization interferes with scalars going into
+;; altivec registers (the code does reloading through the FPRs).
 (define_peephole2
   [(set (match_operand:DF 0 "gpc_reg_operand" "")
 	(match_operand:DF 1 "any_operand" ""))
    (set (match_operand:DF 2 "gpc_reg_operand" "")
 	(match_dup 0))]
   "!(TARGET_E500_DOUBLE && GET_CODE (operands[2]) == SUBREG)
+   && !TARGET_UPPER_REGS_DF
    && peep2_reg_dead_p (2, operands[0])"
   [(set (match_dup 2) (match_dup 1))])
 
@@ -11292,7 +11381,8 @@
 	(match_operand:SF 1 "any_operand" ""))
    (set (match_operand:SF 2 "gpc_reg_operand" "")
 	(match_dup 0))]
-  "peep2_reg_dead_p (2, operands[0])"
+  "!TARGET_UPPER_REGS_SF
+   && peep2_reg_dead_p (2, operands[0])"
   [(set (match_dup 2) (match_dup 1))])
 
 
diff --git a/gcc/config/rs6000/rs6000.opt b/gcc/config/rs6000/rs6000.opt
index 4d0d5e73db1..eb3e3237935 100644
--- a/gcc/config/rs6000/rs6000.opt
+++ b/gcc/config/rs6000/rs6000.opt
@@ -582,12 +582,16 @@ Target Report Var(rs6000_compat_align_parm) Init(0) Save
 Generate aggregate parameter passing code with at most 64-bit alignment.
 
 mupper-regs-df
-Target Undocumented Mask(UPPER_REGS_DF) Var(rs6000_isa_flags)
+Target Report Mask(UPPER_REGS_DF) Var(rs6000_isa_flags)
 Allow double variables in upper registers with -mcpu=power7 or -mvsx
 
 mupper-regs-sf
-Target Undocumented Mask(UPPER_REGS_SF) Var(rs6000_isa_flags)
-Allow float variables in upper registers with -mcpu=power8 or -mp8-vector
+Target Report Mask(UPPER_REGS_SF) Var(rs6000_isa_flags)
+Allow float variables in upper registers with -mcpu=power8 or -mpower8-vector
+
+mupper-regs
+Target Report Var(TARGET_UPPER_REGS) Init(-1) Save
+Allow float/double variables in upper registers if cpu allows it
 
 moptimize-swaps
 Target Undocumented Var(rs6000_optimize_swaps) Init(1) Save
diff --git a/gcc/config/rs6000/vector.md b/gcc/config/rs6000/vector.md
index edbb83161d1..e0bf6c4f5e4 100644
--- a/gcc/config/rs6000/vector.md
+++ b/gcc/config/rs6000/vector.md
@@ -53,10 +53,6 @@
 ;; Vector modes for 64-bit base types
 (define_mode_iterator VEC_64 [V2DI V2DF])
 
-;; Vector reload iterator
-(define_mode_iterator VEC_R [V16QI V8HI V4SI V2DI V4SF V2DF V1TI
-			     SF SD SI DF DD DI TI])
-
 ;; Base type from vector mode
 (define_mode_attr VEC_base [(V16QI "QI")
 			    (V8HI  "HI")
@@ -183,66 +179,6 @@
     }
 }")
 
-
-
-;; Reload patterns for vector operations.  We may need an additional base
-;; register to convert the reg+offset addressing to reg+reg for vector
-;; registers and reg+reg or (reg+reg)&(-16) addressing to just an index
-;; register for gpr registers.
-(define_expand "reload_<VEC_R:mode>_<P:mptrsize>_store"
-  [(parallel [(match_operand:VEC_R 0 "memory_operand" "m")
-              (match_operand:VEC_R 1 "gpc_reg_operand" "r")
-              (match_operand:P 2 "register_operand" "=&b")])]
-  "<P:tptrsize>"
-{
-  rs6000_secondary_reload_inner (operands[1], operands[0], operands[2], true);
-  DONE;
-})
-
-(define_expand "reload_<VEC_R:mode>_<P:mptrsize>_load"
-  [(parallel [(match_operand:VEC_R 0 "gpc_reg_operand" "=&r")
-              (match_operand:VEC_R 1 "memory_operand" "m")
-              (match_operand:P 2 "register_operand" "=&b")])]
-  "<P:tptrsize>"
-{
-  rs6000_secondary_reload_inner (operands[0], operands[1], operands[2], false);
-  DONE;
-})
-
-;; Reload sometimes tries to move the address to a GPR, and can generate
-;; invalid RTL for addresses involving AND -16.  Allow addresses involving
-;; reg+reg, reg+small constant, or just reg, all wrapped in an AND -16.
-
-(define_insn_and_split "*vec_reload_and_plus_<mptrsize>"
-  [(set (match_operand:P 0 "gpc_reg_operand" "=b")
-	(and:P (plus:P (match_operand:P 1 "gpc_reg_operand" "r")
-		       (match_operand:P 2 "reg_or_cint_operand" "rI"))
-	       (const_int -16)))]
-  "(TARGET_ALTIVEC || TARGET_VSX) && (reload_in_progress || reload_completed)"
-  "#"
-  "&& reload_completed"
-  [(set (match_dup 0)
-	(plus:P (match_dup 1)
-		(match_dup 2)))
-   (parallel [(set (match_dup 0)
-		   (and:P (match_dup 0)
-			  (const_int -16)))
-	      (clobber:CC (scratch:CC))])])
-
-;; The normal ANDSI3/ANDDI3 won't match if reload decides to move an AND -16
-;; address to a register because there is no clobber of a (scratch), so we add
-;; it here.
-(define_insn_and_split "*vec_reload_and_reg_<mptrsize>"
-  [(set (match_operand:P 0 "gpc_reg_operand" "=b")
-	(and:P (match_operand:P 1 "gpc_reg_operand" "r")
-	       (const_int -16)))]
-  "(TARGET_ALTIVEC || TARGET_VSX) && (reload_in_progress || reload_completed)"
-  "#"
-  "&& reload_completed"
-  [(parallel [(set (match_dup 0)
-		   (and:P (match_dup 1)
-			  (const_int -16)))
-	      (clobber:CC (scratch:CC))])])
 
 ;; Generic floating point vector arithmetic support
 (define_expand "add<mode>3"
diff --git a/gcc/config/rs6000/vsx.md b/gcc/config/rs6000/vsx.md
index 9aaf0642804..ae7db4012b4 100644
--- a/gcc/config/rs6000/vsx.md
+++ b/gcc/config/rs6000/vsx.md
@@ -1197,24 +1197,24 @@
 ;; in rs6000.md so don't test VECTOR_UNIT_VSX_P, just test against VSX.
 ;; Don't use vsx_register_operand here, use gpc_reg_operand to match rs6000.md.
 (define_insn "vsx_float<VSi><mode>2"
-  [(set (match_operand:VSX_B 0 "gpc_reg_operand" "=<VSr>,?<VSa>")
-	(float:VSX_B (match_operand:<VSI> 1 "gpc_reg_operand" "<VSr2>,<VSr3>")))]
+  [(set (match_operand:VSX_F 0 "gpc_reg_operand" "=<VSr>,?<VSa>")
+	(float:VSX_F (match_operand:<VSI> 1 "gpc_reg_operand" "<VSr2>,<VSr3>")))]
   "VECTOR_UNIT_VSX_P (<MODE>mode)"
-  "x<VSv>cvsx<VSc><VSs> %x0,%x1"
+  "xvcvsx<VSc><VSs> %x0,%x1"
   [(set_attr "type" "<VStype_simple>")
    (set_attr "fp_type" "<VSfptype_simple>")])
 
 (define_insn "vsx_floatuns<VSi><mode>2"
-  [(set (match_operand:VSX_B 0 "gpc_reg_operand" "=<VSr>,?<VSa>")
-	(unsigned_float:VSX_B (match_operand:<VSI> 1 "gpc_reg_operand" "<VSr2>,<VSr3>")))]
+  [(set (match_operand:VSX_F 0 "gpc_reg_operand" "=<VSr>,?<VSa>")
+	(unsigned_float:VSX_F (match_operand:<VSI> 1 "gpc_reg_operand" "<VSr2>,<VSr3>")))]
   "VECTOR_UNIT_VSX_P (<MODE>mode)"
-  "x<VSv>cvux<VSc><VSs> %x0,%x1"
+  "xvcvux<VSc><VSs> %x0,%x1"
   [(set_attr "type" "<VStype_simple>")
    (set_attr "fp_type" "<VSfptype_simple>")])
 
 (define_insn "vsx_fix_trunc<mode><VSi>2"
   [(set (match_operand:<VSI> 0 "gpc_reg_operand" "=<VSr2>,?<VSr3>")
-	(fix:<VSI> (match_operand:VSX_B 1 "gpc_reg_operand" "<VSr>,<VSa>")))]
+	(fix:<VSI> (match_operand:VSX_F 1 "gpc_reg_operand" "<VSr>,<VSa>")))]
   "VECTOR_UNIT_VSX_P (<MODE>mode)"
   "x<VSv>cv<VSs>sx<VSc>s %x0,%x1"
   [(set_attr "type" "<VStype_simple>")
@@ -1222,7 +1222,7 @@
 
 (define_insn "vsx_fixuns_trunc<mode><VSi>2"
   [(set (match_operand:<VSI> 0 "gpc_reg_operand" "=<VSr2>,?<VSr3>")
-	(unsigned_fix:<VSI> (match_operand:VSX_B 1 "gpc_reg_operand" "<VSr>,<VSa>")))]
+	(unsigned_fix:<VSI> (match_operand:VSX_F 1 "gpc_reg_operand" "<VSr>,<VSa>")))]
   "VECTOR_UNIT_VSX_P (<MODE>mode)"
   "x<VSv>cv<VSs>ux<VSc>s %x0,%x1"
   [(set_attr "type" "<VStype_simple>")
@@ -1525,19 +1525,19 @@
   [(set_attr "type" "vecdouble")])
 
 ;; Only optimize (float (fix x)) -> frz if we are in fast-math mode, since
-;; since the xsrdpiz instruction does not truncate the value if the floating
+;; since the xvrdpiz instruction does not truncate the value if the floating
 ;; point value is < LONG_MIN or > LONG_MAX.
-(define_insn "*vsx_float_fix_<mode>2"
-  [(set (match_operand:VSX_DF 0 "vsx_register_operand" "=<VSr>,?<VSa>")
-	(float:VSX_DF
-	 (fix:<VSI>
-	  (match_operand:VSX_DF 1 "vsx_register_operand" "<VSr>,?<VSa>"))))]
+(define_insn "*vsx_float_fix_v2df2"
+  [(set (match_operand:V2DF 0 "vsx_register_operand" "=wd,?wa")
+	(float:V2DF
+	 (fix:V2DI
+	  (match_operand:V2DF 1 "vsx_register_operand" "wd,?wa"))))]
   "TARGET_HARD_FLOAT && TARGET_FPRS && TARGET_DOUBLE_FLOAT
-   && VECTOR_UNIT_VSX_P (<MODE>mode) && flag_unsafe_math_optimizations
+   && VECTOR_UNIT_VSX_P (V2DFmode) && flag_unsafe_math_optimizations
    && !flag_trapping_math && TARGET_FRIZ"
-  "x<VSv>r<VSs>iz %x0,%x1"
-  [(set_attr "type" "<VStype_simple>")
-   (set_attr "fp_type" "<VSfptype_simple>")])
+  "xvrdpiz %x0,%x1"
+  [(set_attr "type" "vecdouble")
+   (set_attr "fp_type" "fp_addsub_d")])
 
 
 ;; Permute operations
diff --git a/gcc/doc/invoke.texi b/gcc/doc/invoke.texi
index 7bb83ede7e5..871a4175446 100644
--- a/gcc/doc/invoke.texi
+++ b/gcc/doc/invoke.texi
@@ -923,7 +923,9 @@ See RS/6000 and PowerPC Options.
 -mcrypto -mno-crypto -mdirect-move -mno-direct-move @gol
 -mquad-memory -mno-quad-memory @gol
 -mquad-memory-atomic -mno-quad-memory-atomic @gol
--mcompat-align-parm -mno-compat-align-parm}
+-mcompat-align-parm -mno-compat-align-parm @gol
+-mupper-regs-df -mno-upper-regs-df -mupper-regs-sf -mno-upper-regs-sf @gol
+-mupper-regs -mno-upper-regs}
 
 @emph{RX Options}
 @gccoptlist{-m64bit-doubles  -m32bit-doubles  -fpu  -nofpu@gol
@@ -19169,6 +19171,39 @@ Generate code that uses (does not use) the atomic quad word memory
 instructions.  The @option{-mquad-memory-atomic} option requires use of
 64-bit mode.
 
+@item -mupper-regs-df
+@itemx -mno-upper-regs-df
+@opindex mupper-regs-df
+@opindex mno-upper-regs-df
+Generate code that uses (does not use) the scalar double precision
+instructions that target all 64 registers in the vector/scalar
+floating point register set that were added in version 2.06 of the
+PowerPC ISA.  The @option{-mupper-regs-df} turned on by default if you
+use either of the @option{-mcpu=power7}, @option{-mcpu=power8}, or
+@option{-mvsx} options.
+
+@item -mupper-regs-sf
+@itemx -mno-upper-regs-sf
+@opindex mupper-regs-sf
+@opindex mno-upper-regs-sf
+Generate code that uses (does not use) the scalar single precision
+instructions that target all 64 registers in the vector/scalar
+floating point register set that were added in version 2.07 of the
+PowerPC ISA.  The @option{-mupper-regs-sf} turned on by default if you
+use either of the @option{-mcpu=power8}, or @option{-mpower8-vector}
+options.
+
+@item -mupper-regs
+@itemx -mno-upper-regs
+@opindex mupper-regs
+@opindex mno-upper-regs
+Generate code that uses (does not use) the scalar
+instructions that target all 64 registers in the vector/scalar
+floating point register set, depending on the model of the machine.
+
+If the @option{-mno-upper-regs} option was used, it will turn off both
+@option{-mupper-regs-sf} and @option{-mupper-regs-df} options.
+
 @item -mfloat-gprs=@var{yes/single/double/no}
 @itemx -mfloat-gprs
 @opindex mfloat-gprs
diff --git a/gcc/testsuite/ChangeLog.meissner b/gcc/testsuite/ChangeLog.meissner
new file mode 100644
index 00000000000..a79d0da9920
--- /dev/null
+++ b/gcc/testsuite/ChangeLog.meissner
@@ -0,0 +1,46 @@
+2015-02-06  Michael Meissner  <meissner@linux.vnet.ibm.com>
+
+	Merge up to ibm/gcc-4_9-branch, subversion id 220484.
+
+2014-12-11  Michael Meissner  <meissner@linux.vnet.ibm.com>
+
+	Merge up to ibm/gcc-4_9-branch, subversion id 218646.
+
+2014-12-02  Michael Meissner  <meissner@linux.vnet.ibm.com>
+
+	Clone branch from at 8.0 branch, subversion id 218285 (FSF
+	subversion id 217046)
+
+[gcc/testsuite, patch #3]
+2014-11-11  Michael Meissner  <meissner@linux.vnet.ibm.com>
+
+	* gcc.target/powerpc/ppc-fpconv-1.c: Adjust for -mupper-regs-df
+	changes.
+	* gcc.target/powerpc/ppc-fpconv-2.c: Likewise.
+	* gcc.target/powerpc/ppc-fpconv-3.c: Likewise.
+	* gcc.target/powerpc/ppc-fpconv-4.c: Likewise.
+	* gcc.target/powerpc/ppc-fpconv-5.c: Likewise.
+	* gcc.target/powerpc/ppc-fpconv-6.c: Likewise.
+	* gcc.target/powerpc/ppc-fpconv-7.c: Likewise.
+	* gcc.target/powerpc/ppc-fpconv-8.c: Likewise.
+	* gcc.target/powerpc/ppc-fpconv-9.c: Likewise.
+	* gcc.target/powerpc/ppc-fpconv-10.c: Likewise.
+	* gcc.target/powerpc/ppc-round.c: Likewise.
+
+[gcc/testsuite, patch #7]
+2014-11-11  Michael Meissner  <meissner@linux.vnet.ibm.com>
+
+	* gcc.target/powerpc/p8vector-ldst.c: Rewrite to use 40 live
+	floating point variables instead of using asm to test allocating
+	values to the Altivec registers.
+
+	* gcc.target/powerpc/upper-regs-sf.c: New -mupper-regs-sf and
+	-mupper-regs-df tests.
+	* gcc.target/powerpc/upper-regs-df.c: Likewise.
+
+[gcc/testsuite, pr64019 fix]
+2014-12-01  Michael Meissner  <meissner@linux.vnet.ibm.com>
+
+	PR target/64019
+	* gcc.target/powerpc/pr64019.c: New file.
+
diff --git a/gcc/testsuite/gcc.target/powerpc/p8vector-ldst.c b/gcc/testsuite/gcc.target/powerpc/p8vector-ldst.c
index 33f19991f76..5da7388097b 100644
--- a/gcc/testsuite/gcc.target/powerpc/p8vector-ldst.c
+++ b/gcc/testsuite/gcc.target/powerpc/p8vector-ldst.c
@@ -1,42 +1,624 @@
-/* { dg-do compile { target { powerpc*-*-* } } } */
+/* { dg-do compile { target { powerpc*-*-* && lp64 } } } */
 /* { dg-skip-if "" { powerpc*-*-darwin* } { "*" } { "" } } */
 /* { dg-require-effective-target powerpc_p8vector_ok } */
+/* { dg-skip-if "do not override -mcpu" { powerpc*-*-* } { "-mcpu=*" } { "-mcpu=power8" } } */
 /* { dg-options "-mcpu=power8 -O2 -mupper-regs-df -mupper-regs-sf" } */
 
-float load_sf (float *p)
+float
+load_store_sf (unsigned long num,
+	       const float *from_ptr,
+	       float *to_ptr,
+	       const unsigned long *in_mask_ptr,
+	       const unsigned long *out_mask_ptr)
 {
-  float f = *p;
-  __asm__ ("# reg %x0" : "+v" (f));
-  return f;
-}
+  float value00	= 0.0f;
+  float value01	= 0.0f;
+  float value02	= 0.0f;
+  float value03	= 0.0f;
+  float value04	= 0.0f;
+  float value05	= 0.0f;
+  float value06	= 0.0f;
+  float value07	= 0.0f;
+  float value08	= 0.0f;
+  float value09	= 0.0f;
+  float value10	= 0.0f;
+  float value11	= 0.0f;
+  float value12	= 0.0f;
+  float value13	= 0.0f;
+  float value14	= 0.0f;
+  float value15	= 0.0f;
+  float value16	= 0.0f;
+  float value17	= 0.0f;
+  float value18	= 0.0f;
+  float value19	= 0.0f;
+  float value20	= 0.0f;
+  float value21	= 0.0f;
+  float value22	= 0.0f;
+  float value23	= 0.0f;
+  float value24	= 0.0f;
+  float value25	= 0.0f;
+  float value26	= 0.0f;
+  float value27	= 0.0f;
+  float value28	= 0.0f;
+  float value29	= 0.0f;
+  float value30	= 0.0f;
+  float value31	= 0.0f;
+  float value32	= 0.0f;
+  float value33	= 0.0f;
+  float value34	= 0.0f;
+  float value35	= 0.0f;
+  float value36	= 0.0f;
+  float value37	= 0.0f;
+  float value38	= 0.0f;
+  float value39	= 0.0f;
+  unsigned long in_mask;
+  unsigned long out_mask;
+  unsigned long i;
 
-double load_df (double *p)
-{
-  double d = *p;
-  __asm__ ("# reg %x0" : "+v" (d));
-  return d;
-}
+  for (i = 0; i < num; i++)
+    {
+      in_mask = *in_mask_ptr++;
+      if ((in_mask & (1L <<  0)) != 0L)
+	value00 = *from_ptr++;
 
-double load_dfsf (float *p)
-{
-  double d = (double) *p;
-  __asm__ ("# reg %x0" : "+v" (d));
-  return d;
-}
+      if ((in_mask & (1L <<  1)) != 0L)
+	value01 = *from_ptr++;
 
-void store_sf (float *p, float f)
-{
-  __asm__ ("# reg %x0" : "+v" (f));
-  *p = f;
+      if ((in_mask & (1L <<  2)) != 0L)
+	value02 = *from_ptr++;
+
+      if ((in_mask & (1L <<  3)) != 0L)
+	value03 = *from_ptr++;
+
+      if ((in_mask & (1L <<  4)) != 0L)
+	value04 = *from_ptr++;
+
+      if ((in_mask & (1L <<  5)) != 0L)
+	value05 = *from_ptr++;
+
+      if ((in_mask & (1L <<  6)) != 0L)
+	value06 = *from_ptr++;
+
+      if ((in_mask & (1L <<  7)) != 0L)
+	value07 = *from_ptr++;
+
+      if ((in_mask & (1L <<  8)) != 0L)
+	value08 = *from_ptr++;
+
+      if ((in_mask & (1L <<  9)) != 0L)
+	value09 = *from_ptr++;
+
+      if ((in_mask & (1L << 10)) != 0L)
+	value10 = *from_ptr++;
+
+      if ((in_mask & (1L << 11)) != 0L)
+	value11 = *from_ptr++;
+
+      if ((in_mask & (1L << 12)) != 0L)
+	value12 = *from_ptr++;
+
+      if ((in_mask & (1L << 13)) != 0L)
+	value13 = *from_ptr++;
+
+      if ((in_mask & (1L << 14)) != 0L)
+	value14 = *from_ptr++;
+
+      if ((in_mask & (1L << 15)) != 0L)
+	value15 = *from_ptr++;
+
+      if ((in_mask & (1L << 16)) != 0L)
+	value16 = *from_ptr++;
+
+      if ((in_mask & (1L << 17)) != 0L)
+	value17 = *from_ptr++;
+
+      if ((in_mask & (1L << 18)) != 0L)
+	value18 = *from_ptr++;
+
+      if ((in_mask & (1L << 19)) != 0L)
+	value19 = *from_ptr++;
+
+      if ((in_mask & (1L << 20)) != 0L)
+	value20 = *from_ptr++;
+
+      if ((in_mask & (1L << 21)) != 0L)
+	value21 = *from_ptr++;
+
+      if ((in_mask & (1L << 22)) != 0L)
+	value22 = *from_ptr++;
+
+      if ((in_mask & (1L << 23)) != 0L)
+	value23 = *from_ptr++;
+
+      if ((in_mask & (1L << 24)) != 0L)
+	value24 = *from_ptr++;
+
+      if ((in_mask & (1L << 25)) != 0L)
+	value25 = *from_ptr++;
+
+      if ((in_mask & (1L << 26)) != 0L)
+	value26 = *from_ptr++;
+
+      if ((in_mask & (1L << 27)) != 0L)
+	value27 = *from_ptr++;
+
+      if ((in_mask & (1L << 28)) != 0L)
+	value28 = *from_ptr++;
+
+      if ((in_mask & (1L << 29)) != 0L)
+	value29 = *from_ptr++;
+
+      if ((in_mask & (1L << 30)) != 0L)
+	value30 = *from_ptr++;
+
+      if ((in_mask & (1L << 31)) != 0L)
+	value31 = *from_ptr++;
+
+      if ((in_mask & (1L << 32)) != 0L)
+	value32 = *from_ptr++;
+
+      if ((in_mask & (1L << 33)) != 0L)
+	value33 = *from_ptr++;
+
+      if ((in_mask & (1L << 34)) != 0L)
+	value34 = *from_ptr++;
+
+      if ((in_mask & (1L << 35)) != 0L)
+	value35 = *from_ptr++;
+
+      if ((in_mask & (1L << 36)) != 0L)
+	value36 = *from_ptr++;
+
+      if ((in_mask & (1L << 37)) != 0L)
+	value37 = *from_ptr++;
+
+      if ((in_mask & (1L << 38)) != 0L)
+	value38 = *from_ptr++;
+
+      if ((in_mask & (1L << 39)) != 0L)
+	value39 = *from_ptr++;
+
+      out_mask = *out_mask_ptr++;
+      if ((out_mask & (1L <<  0)) != 0L)
+	*to_ptr++ = value00;
+
+      if ((out_mask & (1L <<  1)) != 0L)
+	*to_ptr++ = value01;
+
+      if ((out_mask & (1L <<  2)) != 0L)
+	*to_ptr++ = value02;
+
+      if ((out_mask & (1L <<  3)) != 0L)
+	*to_ptr++ = value03;
+
+      if ((out_mask & (1L <<  4)) != 0L)
+	*to_ptr++ = value04;
+
+      if ((out_mask & (1L <<  5)) != 0L)
+	*to_ptr++ = value05;
+
+      if ((out_mask & (1L <<  6)) != 0L)
+	*to_ptr++ = value06;
+
+      if ((out_mask & (1L <<  7)) != 0L)
+	*to_ptr++ = value07;
+
+      if ((out_mask & (1L <<  8)) != 0L)
+	*to_ptr++ = value08;
+
+      if ((out_mask & (1L <<  9)) != 0L)
+	*to_ptr++ = value09;
+
+      if ((out_mask & (1L << 10)) != 0L)
+	*to_ptr++ = value10;
+
+      if ((out_mask & (1L << 11)) != 0L)
+	*to_ptr++ = value11;
+
+      if ((out_mask & (1L << 12)) != 0L)
+	*to_ptr++ = value12;
+
+      if ((out_mask & (1L << 13)) != 0L)
+	*to_ptr++ = value13;
+
+      if ((out_mask & (1L << 14)) != 0L)
+	*to_ptr++ = value14;
+
+      if ((out_mask & (1L << 15)) != 0L)
+	*to_ptr++ = value15;
+
+      if ((out_mask & (1L << 16)) != 0L)
+	*to_ptr++ = value16;
+
+      if ((out_mask & (1L << 17)) != 0L)
+	*to_ptr++ = value17;
+
+      if ((out_mask & (1L << 18)) != 0L)
+	*to_ptr++ = value18;
+
+      if ((out_mask & (1L << 19)) != 0L)
+	*to_ptr++ = value19;
+
+      if ((out_mask & (1L << 20)) != 0L)
+	*to_ptr++ = value20;
+
+      if ((out_mask & (1L << 21)) != 0L)
+	*to_ptr++ = value21;
+
+      if ((out_mask & (1L << 22)) != 0L)
+	*to_ptr++ = value22;
+
+      if ((out_mask & (1L << 23)) != 0L)
+	*to_ptr++ = value23;
+
+      if ((out_mask & (1L << 24)) != 0L)
+	*to_ptr++ = value24;
+
+      if ((out_mask & (1L << 25)) != 0L)
+	*to_ptr++ = value25;
+
+      if ((out_mask & (1L << 26)) != 0L)
+	*to_ptr++ = value26;
+
+      if ((out_mask & (1L << 27)) != 0L)
+	*to_ptr++ = value27;
+
+      if ((out_mask & (1L << 28)) != 0L)
+	*to_ptr++ = value28;
+
+      if ((out_mask & (1L << 29)) != 0L)
+	*to_ptr++ = value29;
+
+      if ((out_mask & (1L << 30)) != 0L)
+	*to_ptr++ = value30;
+
+      if ((out_mask & (1L << 31)) != 0L)
+	*to_ptr++ = value31;
+
+      if ((out_mask & (1L << 32)) != 0L)
+	*to_ptr++ = value32;
+
+      if ((out_mask & (1L << 33)) != 0L)
+	*to_ptr++ = value33;
+
+      if ((out_mask & (1L << 34)) != 0L)
+	*to_ptr++ = value34;
+
+      if ((out_mask & (1L << 35)) != 0L)
+	*to_ptr++ = value35;
+
+      if ((out_mask & (1L << 36)) != 0L)
+	*to_ptr++ = value36;
+
+      if ((out_mask & (1L << 37)) != 0L)
+	*to_ptr++ = value37;
+
+      if ((out_mask & (1L << 38)) != 0L)
+	*to_ptr++ = value38;
+
+      if ((out_mask & (1L << 39)) != 0L)
+	*to_ptr++ = value39;
+    }
+
+  return (  value00 + value01 + value02 + value03 + value04
+	  + value05 + value06 + value07 + value08 + value09
+	  + value10 + value11 + value12 + value13 + value14
+	  + value15 + value16 + value17 + value18 + value19
+	  + value20 + value21 + value22 + value23 + value24
+	  + value25 + value26 + value27 + value28 + value29
+	  + value30 + value31 + value32 + value33 + value34
+	  + value35 + value36 + value37 + value38 + value39);
 }
 
-void store_df (double *p, double d)
+double
+load_store_df (unsigned long num,
+	       const double *from_ptr,
+	       double *to_ptr,
+	       const unsigned long *in_mask_ptr,
+	       const unsigned long *out_mask_ptr)
 {
-  __asm__ ("# reg %x0" : "+v" (d));
-  *p = d;
+  double value00	= 0.0;
+  double value01	= 0.0;
+  double value02	= 0.0;
+  double value03	= 0.0;
+  double value04	= 0.0;
+  double value05	= 0.0;
+  double value06	= 0.0;
+  double value07	= 0.0;
+  double value08	= 0.0;
+  double value09	= 0.0;
+  double value10	= 0.0;
+  double value11	= 0.0;
+  double value12	= 0.0;
+  double value13	= 0.0;
+  double value14	= 0.0;
+  double value15	= 0.0;
+  double value16	= 0.0;
+  double value17	= 0.0;
+  double value18	= 0.0;
+  double value19	= 0.0;
+  double value20	= 0.0;
+  double value21	= 0.0;
+  double value22	= 0.0;
+  double value23	= 0.0;
+  double value24	= 0.0;
+  double value25	= 0.0;
+  double value26	= 0.0;
+  double value27	= 0.0;
+  double value28	= 0.0;
+  double value29	= 0.0;
+  double value30	= 0.0;
+  double value31	= 0.0;
+  double value32	= 0.0;
+  double value33	= 0.0;
+  double value34	= 0.0;
+  double value35	= 0.0;
+  double value36	= 0.0;
+  double value37	= 0.0;
+  double value38	= 0.0;
+  double value39	= 0.0;
+  unsigned long in_mask;
+  unsigned long out_mask;
+  unsigned long i;
+
+  for (i = 0; i < num; i++)
+    {
+      in_mask = *in_mask_ptr++;
+      if ((in_mask & (1L <<  0)) != 0L)
+	value00 = *from_ptr++;
+
+      if ((in_mask & (1L <<  1)) != 0L)
+	value01 = *from_ptr++;
+
+      if ((in_mask & (1L <<  2)) != 0L)
+	value02 = *from_ptr++;
+
+      if ((in_mask & (1L <<  3)) != 0L)
+	value03 = *from_ptr++;
+
+      if ((in_mask & (1L <<  4)) != 0L)
+	value04 = *from_ptr++;
+
+      if ((in_mask & (1L <<  5)) != 0L)
+	value05 = *from_ptr++;
+
+      if ((in_mask & (1L <<  6)) != 0L)
+	value06 = *from_ptr++;
+
+      if ((in_mask & (1L <<  7)) != 0L)
+	value07 = *from_ptr++;
+
+      if ((in_mask & (1L <<  8)) != 0L)
+	value08 = *from_ptr++;
+
+      if ((in_mask & (1L <<  9)) != 0L)
+	value09 = *from_ptr++;
+
+      if ((in_mask & (1L << 10)) != 0L)
+	value10 = *from_ptr++;
+
+      if ((in_mask & (1L << 11)) != 0L)
+	value11 = *from_ptr++;
+
+      if ((in_mask & (1L << 12)) != 0L)
+	value12 = *from_ptr++;
+
+      if ((in_mask & (1L << 13)) != 0L)
+	value13 = *from_ptr++;
+
+      if ((in_mask & (1L << 14)) != 0L)
+	value14 = *from_ptr++;
+
+      if ((in_mask & (1L << 15)) != 0L)
+	value15 = *from_ptr++;
+
+      if ((in_mask & (1L << 16)) != 0L)
+	value16 = *from_ptr++;
+
+      if ((in_mask & (1L << 17)) != 0L)
+	value17 = *from_ptr++;
+
+      if ((in_mask & (1L << 18)) != 0L)
+	value18 = *from_ptr++;
+
+      if ((in_mask & (1L << 19)) != 0L)
+	value19 = *from_ptr++;
+
+      if ((in_mask & (1L << 20)) != 0L)
+	value20 = *from_ptr++;
+
+      if ((in_mask & (1L << 21)) != 0L)
+	value21 = *from_ptr++;
+
+      if ((in_mask & (1L << 22)) != 0L)
+	value22 = *from_ptr++;
+
+      if ((in_mask & (1L << 23)) != 0L)
+	value23 = *from_ptr++;
+
+      if ((in_mask & (1L << 24)) != 0L)
+	value24 = *from_ptr++;
+
+      if ((in_mask & (1L << 25)) != 0L)
+	value25 = *from_ptr++;
+
+      if ((in_mask & (1L << 26)) != 0L)
+	value26 = *from_ptr++;
+
+      if ((in_mask & (1L << 27)) != 0L)
+	value27 = *from_ptr++;
+
+      if ((in_mask & (1L << 28)) != 0L)
+	value28 = *from_ptr++;
+
+      if ((in_mask & (1L << 29)) != 0L)
+	value29 = *from_ptr++;
+
+      if ((in_mask & (1L << 30)) != 0L)
+	value30 = *from_ptr++;
+
+      if ((in_mask & (1L << 31)) != 0L)
+	value31 = *from_ptr++;
+
+      if ((in_mask & (1L << 32)) != 0L)
+	value32 = *from_ptr++;
+
+      if ((in_mask & (1L << 33)) != 0L)
+	value33 = *from_ptr++;
+
+      if ((in_mask & (1L << 34)) != 0L)
+	value34 = *from_ptr++;
+
+      if ((in_mask & (1L << 35)) != 0L)
+	value35 = *from_ptr++;
+
+      if ((in_mask & (1L << 36)) != 0L)
+	value36 = *from_ptr++;
+
+      if ((in_mask & (1L << 37)) != 0L)
+	value37 = *from_ptr++;
+
+      if ((in_mask & (1L << 38)) != 0L)
+	value38 = *from_ptr++;
+
+      if ((in_mask & (1L << 39)) != 0L)
+	value39 = *from_ptr++;
+
+      out_mask = *out_mask_ptr++;
+      if ((out_mask & (1L <<  0)) != 0L)
+	*to_ptr++ = value00;
+
+      if ((out_mask & (1L <<  1)) != 0L)
+	*to_ptr++ = value01;
+
+      if ((out_mask & (1L <<  2)) != 0L)
+	*to_ptr++ = value02;
+
+      if ((out_mask & (1L <<  3)) != 0L)
+	*to_ptr++ = value03;
+
+      if ((out_mask & (1L <<  4)) != 0L)
+	*to_ptr++ = value04;
+
+      if ((out_mask & (1L <<  5)) != 0L)
+	*to_ptr++ = value05;
+
+      if ((out_mask & (1L <<  6)) != 0L)
+	*to_ptr++ = value06;
+
+      if ((out_mask & (1L <<  7)) != 0L)
+	*to_ptr++ = value07;
+
+      if ((out_mask & (1L <<  8)) != 0L)
+	*to_ptr++ = value08;
+
+      if ((out_mask & (1L <<  9)) != 0L)
+	*to_ptr++ = value09;
+
+      if ((out_mask & (1L << 10)) != 0L)
+	*to_ptr++ = value10;
+
+      if ((out_mask & (1L << 11)) != 0L)
+	*to_ptr++ = value11;
+
+      if ((out_mask & (1L << 12)) != 0L)
+	*to_ptr++ = value12;
+
+      if ((out_mask & (1L << 13)) != 0L)
+	*to_ptr++ = value13;
+
+      if ((out_mask & (1L << 14)) != 0L)
+	*to_ptr++ = value14;
+
+      if ((out_mask & (1L << 15)) != 0L)
+	*to_ptr++ = value15;
+
+      if ((out_mask & (1L << 16)) != 0L)
+	*to_ptr++ = value16;
+
+      if ((out_mask & (1L << 17)) != 0L)
+	*to_ptr++ = value17;
+
+      if ((out_mask & (1L << 18)) != 0L)
+	*to_ptr++ = value18;
+
+      if ((out_mask & (1L << 19)) != 0L)
+	*to_ptr++ = value19;
+
+      if ((out_mask & (1L << 20)) != 0L)
+	*to_ptr++ = value20;
+
+      if ((out_mask & (1L << 21)) != 0L)
+	*to_ptr++ = value21;
+
+      if ((out_mask & (1L << 22)) != 0L)
+	*to_ptr++ = value22;
+
+      if ((out_mask & (1L << 23)) != 0L)
+	*to_ptr++ = value23;
+
+      if ((out_mask & (1L << 24)) != 0L)
+	*to_ptr++ = value24;
+
+      if ((out_mask & (1L << 25)) != 0L)
+	*to_ptr++ = value25;
+
+      if ((out_mask & (1L << 26)) != 0L)
+	*to_ptr++ = value26;
+
+      if ((out_mask & (1L << 27)) != 0L)
+	*to_ptr++ = value27;
+
+      if ((out_mask & (1L << 28)) != 0L)
+	*to_ptr++ = value28;
+
+      if ((out_mask & (1L << 29)) != 0L)
+	*to_ptr++ = value29;
+
+      if ((out_mask & (1L << 30)) != 0L)
+	*to_ptr++ = value30;
+
+      if ((out_mask & (1L << 31)) != 0L)
+	*to_ptr++ = value31;
+
+      if ((out_mask & (1L << 32)) != 0L)
+	*to_ptr++ = value32;
+
+      if ((out_mask & (1L << 33)) != 0L)
+	*to_ptr++ = value33;
+
+      if ((out_mask & (1L << 34)) != 0L)
+	*to_ptr++ = value34;
+
+      if ((out_mask & (1L << 35)) != 0L)
+	*to_ptr++ = value35;
+
+      if ((out_mask & (1L << 36)) != 0L)
+	*to_ptr++ = value36;
+
+      if ((out_mask & (1L << 37)) != 0L)
+	*to_ptr++ = value37;
+
+      if ((out_mask & (1L << 38)) != 0L)
+	*to_ptr++ = value38;
+
+      if ((out_mask & (1L << 39)) != 0L)
+	*to_ptr++ = value39;
+    }
+
+  return (  value00 + value01 + value02 + value03 + value04
+	  + value05 + value06 + value07 + value08 + value09
+	  + value10 + value11 + value12 + value13 + value14
+	  + value15 + value16 + value17 + value18 + value19
+	  + value20 + value21 + value22 + value23 + value24
+	  + value25 + value26 + value27 + value28 + value29
+	  + value30 + value31 + value32 + value33 + value34
+	  + value35 + value36 + value37 + value38 + value39);
 }
 
 /* { dg-final { scan-assembler "lxsspx"  } } */
 /* { dg-final { scan-assembler "lxsdx"   } } */
 /* { dg-final { scan-assembler "stxsspx" } } */
 /* { dg-final { scan-assembler "stxsdx"  } } */
+/* { dg-final { scan-assembler "xsaddsp" } } */
+/* { dg-final { scan-assembler "xsadddp" } } */
diff --git a/gcc/testsuite/gcc.target/powerpc/ppc-fpconv-1.c b/gcc/testsuite/gcc.target/powerpc/ppc-fpconv-1.c
index 8a6cc08b909..ea20f60e1a9 100644
--- a/gcc/testsuite/gcc.target/powerpc/ppc-fpconv-1.c
+++ b/gcc/testsuite/gcc.target/powerpc/ppc-fpconv-1.c
@@ -1,13 +1,16 @@
 /* { dg-do compile { target { powerpc*-*-* } } } */
 /* { dg-skip-if "" { powerpc*-*-darwin* } { "*" } { "" } } */
 /* { dg-require-effective-target powerpc_vsx_ok } */
-/* { dg-options "-O2 -mcpu=power7 -ffast-math" } */
+/* { dg-skip-if "do not override -mcpu" { powerpc*-*-* } { "-mcpu=*" } { "-mcpu=power7" } } */
+/* { dg-options "-O2 -mcpu=power7 -ffast-math -mno-upper-regs-df" } */
 /* { dg-final { scan-assembler-times "lfiwax" 2 } } */
 /* { dg-final { scan-assembler-times "lfiwzx" 2 } } */
-/* { dg-final { scan-assembler-times "fcfids" 3 } } */
-/* { dg-final { scan-assembler-times "fcfidus" 1 } } */
-/* { dg-final { scan-assembler-times "xscvsxddp" 3 } } */
-/* { dg-final { scan-assembler-times "xscvuxddp" 1 } } */
+/* { dg-final { scan-assembler-times "fcfids " 3 } } */
+/* { dg-final { scan-assembler-times "fcfidus " 1 } } */
+/* { dg-final { scan-assembler-times "fcfid " 3 } } */
+/* { dg-final { scan-assembler-times "fcfidu " 1 } } */
+/* { dg-final { scan-assembler-not "xscvdpsxds" } } */
+/* { dg-final { scan-assembler-not "xscvdpuxds" } } */
 
 void int_to_float (float *dest, int *src)
 {
diff --git a/gcc/testsuite/gcc.target/powerpc/ppc-fpconv-10.c b/gcc/testsuite/gcc.target/powerpc/ppc-fpconv-10.c
index 59ba5f91f48..11628c91840 100644
--- a/gcc/testsuite/gcc.target/powerpc/ppc-fpconv-10.c
+++ b/gcc/testsuite/gcc.target/powerpc/ppc-fpconv-10.c
@@ -1,9 +1,9 @@
 /* { dg-do compile { target { powerpc*-*-* } } } */
 /* { dg-skip-if "" { powerpc*-*-darwin* } { "*" } { "" } } */
 /* { dg-require-effective-target powerpc_vsx_ok } */
-/* { dg-options "-O2 -mcpu=power7 -ffast-math" } */
-/* { dg-final { scan-assembler "xsrdpiz" } } */
-/* { dg-final { scan-assembler-not "friz" } } */
+/* { dg-skip-if "do not override -mcpu" { powerpc*-*-* } { "-mcpu=*" } { "-mcpu=power7" } } */
+/* { dg-options "-O2 -mcpu=power7 -ffast-math -mno-upper-regs-df" } */
+/* { dg-final { scan-assembler "friz" } } */
 
 double round_double_llong (double a)
 {
diff --git a/gcc/testsuite/gcc.target/powerpc/ppc-fpconv-2.c b/gcc/testsuite/gcc.target/powerpc/ppc-fpconv-2.c
index e0a83422593..572dec628d6 100644
--- a/gcc/testsuite/gcc.target/powerpc/ppc-fpconv-2.c
+++ b/gcc/testsuite/gcc.target/powerpc/ppc-fpconv-2.c
@@ -1,12 +1,14 @@
 /* { dg-do compile { target { powerpc*-*-* } } } */
 /* { dg-skip-if "" { powerpc*-*-darwin* } { "*" } { "" } } */
 /* { dg-require-effective-target powerpc_vsx_ok } */
+/* { dg-skip-if "do not override -mcpu" { powerpc*-*-* } { "-mcpu=*" } { "-mcpu=power6" } } */
 /* { dg-options "-O2 -mcpu=power6 -ffast-math" } */
 /* { dg-final { scan-assembler-times "lfiwax" 2 } } */
 /* { dg-final { scan-assembler-not "lfiwzx" } } */
 /* { dg-final { scan-assembler-times "fcfid " 10 } } */
-/* { dg-final { scan-assembler-not "fcfids" } } */
-/* { dg-final { scan-assembler-not "fcfidus" } } */
+/* { dg-final { scan-assembler-not "fcfids " } } */
+/* { dg-final { scan-assembler-not "fcfidus " } } */
+/* { dg-final { scan-assembler-not "fcfidu " } } */
 /* { dg-final { scan-assembler-not "xscvsxddp" } } */
 /* { dg-final { scan-assembler-not "xscvuxddp" } } */
 
diff --git a/gcc/testsuite/gcc.target/powerpc/ppc-fpconv-3.c b/gcc/testsuite/gcc.target/powerpc/ppc-fpconv-3.c
index bf12113d28c..984d3f3f07c 100644
--- a/gcc/testsuite/gcc.target/powerpc/ppc-fpconv-3.c
+++ b/gcc/testsuite/gcc.target/powerpc/ppc-fpconv-3.c
@@ -2,14 +2,16 @@
 /* { dg-skip-if "" { powerpc*-*-darwin* } { "*" } { "" } } */
 /* { dg-require-effective-target ilp32 } */
 /* { dg-require-effective-target powerpc_fprs } */
+/* { dg-skip-if "do not override -mcpu" { powerpc*-*-* } { "-mcpu=*" } { "-mcpu=power5" } } */
 /* { dg-options "-O2 -mcpu=power5 -ffast-math" } */
 /* { dg-final { scan-assembler-not "lfiwax" } } */
 /* { dg-final { scan-assembler-not "lfiwzx" } } */
 /* { dg-final { scan-assembler-times "fcfid " 10 } } */
-/* { dg-final { scan-assembler-not "fcfids" } } */
-/* { dg-final { scan-assembler-not "fcfidus" } } */
-/* { dg-final { scan-assembler-not "xscvsxddp" } } */
-/* { dg-final { scan-assembler-not "xscvuxddp" } } */
+/* { dg-final { scan-assembler-not "fcfids " } } */
+/* { dg-final { scan-assembler-not "fcfidus " } } */
+/* { dg-final { scan-assembler-not "fcfidu " } } */
+/* { dg-final { scan-assembler-not "xscvsxddp " } } */
+/* { dg-final { scan-assembler-not "xscvuxddp " } } */
 
 void int_to_float (float *dest, int *src)
 {
diff --git a/gcc/testsuite/gcc.target/powerpc/ppc-fpconv-4.c b/gcc/testsuite/gcc.target/powerpc/ppc-fpconv-4.c
index c4b9ea69bf0..dc1f710321a 100644
--- a/gcc/testsuite/gcc.target/powerpc/ppc-fpconv-4.c
+++ b/gcc/testsuite/gcc.target/powerpc/ppc-fpconv-4.c
@@ -1,14 +1,16 @@
 /* { dg-do compile { target { powerpc*-*-* } } } */
 /* { dg-skip-if "" { powerpc*-*-darwin* } { "*" } { "" } } */
 /* { dg-require-effective-target ilp32 } */
+/* { dg-skip-if "do not override -mcpu" { powerpc*-*-* } { "-mcpu=*" } { "-mcpu=750" } } */
 /* { dg-options "-O2 -mcpu=750 -ffast-math" } */
 /* { dg-final { scan-assembler-not "lfiwax" } } */
 /* { dg-final { scan-assembler-not "lfiwzx" } } */
 /* { dg-final { scan-assembler-not "fcfid " } } */
-/* { dg-final { scan-assembler-not "fcfids" } } */
-/* { dg-final { scan-assembler-not "fcfidus" } } */
-/* { dg-final { scan-assembler-not "xscvsxddp" } } */
-/* { dg-final { scan-assembler-not "xscvuxddp" } } */
+/* { dg-final { scan-assembler-not "fcfids " } } */
+/* { dg-final { scan-assembler-not "fcfidus " } } */
+/* { dg-final { scan-assembler-not "fcfidu " } } */
+/* { dg-final { scan-assembler-not "xscvsxddp " } } */
+/* { dg-final { scan-assembler-not "xscvuxddp " } } */
 
 void int_to_float (float *dest, int *src)
 {
diff --git a/gcc/testsuite/gcc.target/powerpc/ppc-fpconv-5.c b/gcc/testsuite/gcc.target/powerpc/ppc-fpconv-5.c
index a071fc12292..c44eb08231c 100644
--- a/gcc/testsuite/gcc.target/powerpc/ppc-fpconv-5.c
+++ b/gcc/testsuite/gcc.target/powerpc/ppc-fpconv-5.c
@@ -1,13 +1,14 @@
 /* { dg-do compile { target { powerpc*-*-* } } } */
 /* { dg-skip-if "" { powerpc*-*-darwin* } { "*" } { "" } } */
 /* { dg-require-effective-target powerpc_vsx_ok } */
-/* { dg-options "-O3 -mcpu=power7 -ffast-math" } */
-/* { dg-final { scan-assembler-times "fctiwz" 2 } } */
-/* { dg-final { scan-assembler-times "fctiwuz" 2 } } */
-/* { dg-final { scan-assembler-times "fctidz" 1 } } */
-/* { dg-final { scan-assembler-times "fctiduz" 1 } } */
-/* { dg-final { scan-assembler-times "xscvdpsxds" 1 } } */
-/* { dg-final { scan-assembler-times "xscvdpuxds" 1 } } */
+/* { dg-skip-if "do not override -mcpu" { powerpc*-*-* } { "-mcpu=*" } { "-mcpu=power7" } } */
+/* { dg-options "-O3 -mcpu=power7 -ffast-math -mno-upper-regs-df" } */
+/* { dg-final { scan-assembler-times "fctiwz " 2 } } */
+/* { dg-final { scan-assembler-times "fctiwuz " 2 } } */
+/* { dg-final { scan-assembler-times "fctidz " 2 } } */
+/* { dg-final { scan-assembler-times "fctiduz " 2 } } */
+/* { dg-final { scan-assembler-not "xscvdpsxds" } } */
+/* { dg-final { scan-assembler-not "xscvdpuxds" } } */
 
 void float_to_int  (int *dest, float  src) { *dest = (int) src; }
 void double_to_int (int *dest, double src) { *dest = (int) src; }
diff --git a/gcc/testsuite/gcc.target/powerpc/ppc-fpconv-6.c b/gcc/testsuite/gcc.target/powerpc/ppc-fpconv-6.c
index 09ee1885a17..5282a5a4293 100644
--- a/gcc/testsuite/gcc.target/powerpc/ppc-fpconv-6.c
+++ b/gcc/testsuite/gcc.target/powerpc/ppc-fpconv-6.c
@@ -1,11 +1,13 @@
 /* { dg-do compile { target { powerpc*-*-* } } } */
 /* { dg-skip-if "" { powerpc*-*-darwin* } { "*" } { "" } } */
 /* { dg-require-effective-target powerpc_vsx_ok } */
+/* { dg-skip-if "do not override -mcpu" { powerpc*-*-* } { "-mcpu=*" } { "-mcpu=power6" } } */
 /* { dg-options "-O3 -mcpu=power6 -ffast-math" } */
-/* { dg-final { scan-assembler-times "fctiwz" 2 } } */
-/* { dg-final { scan-assembler-not "fctiwuz" } } */
-/* { dg-final { scan-assembler-times "fctidz" 8 } } */
-/* { dg-final { scan-assembler-not "fctiduz" } } */
+/* { dg-final { scan-assembler-times "fctiwz " 2 } } */
+/* { dg-final { scan-assembler-not "fctiwuz " } } */
+/* { dg-final { scan-assembler-times "fctidz " 8 } } */
+/* { dg-final { scan-assembler-not "fctiduz " } } */
+/* { dg-final { scan-assembler-not "fctidu " } } */
 /* { dg-final { scan-assembler-not "xscvdpsxds" } } */
 /* { dg-final { scan-assembler-not "xscvdpuxds" } } */
 
diff --git a/gcc/testsuite/gcc.target/powerpc/ppc-fpconv-7.c b/gcc/testsuite/gcc.target/powerpc/ppc-fpconv-7.c
index 808cbc39078..fa0b50edb30 100644
--- a/gcc/testsuite/gcc.target/powerpc/ppc-fpconv-7.c
+++ b/gcc/testsuite/gcc.target/powerpc/ppc-fpconv-7.c
@@ -2,11 +2,13 @@
 /* { dg-skip-if "" { powerpc*-*-darwin* } { "*" } { "" } } */
 /* { dg-require-effective-target ilp32 } */
 /* { dg-require-effective-target powerpc_fprs } */
+/* { dg-skip-if "do not override -mcpu" { powerpc*-*-* } { "-mcpu=*" } { "-mcpu=power5" } } */
 /* { dg-options "-O3 -mcpu=power5 -ffast-math" } */
-/* { dg-final { scan-assembler-times "fctiwz" 2 } } */
-/* { dg-final { scan-assembler-not "fctiwuz" } } */
-/* { dg-final { scan-assembler-times "fctidz" 8 } } */
-/* { dg-final { scan-assembler-not "fctiduz" } } */
+/* { dg-final { scan-assembler-times "fctiwz " 2 } } */
+/* { dg-final { scan-assembler-not "fctiwuz " } } */
+/* { dg-final { scan-assembler-times "fctidz " 8 } } */
+/* { dg-final { scan-assembler-not "fctiduz " } } */
+/* { dg-final { scan-assembler-not "fctidu " } } */
 /* { dg-final { scan-assembler-not "xscvdpsxds" } } */
 /* { dg-final { scan-assembler-not "xscvdpuxds" } } */
 
diff --git a/gcc/testsuite/gcc.target/powerpc/ppc-fpconv-8.c b/gcc/testsuite/gcc.target/powerpc/ppc-fpconv-8.c
index f841d7ee073..5f1bb23c50e 100644
--- a/gcc/testsuite/gcc.target/powerpc/ppc-fpconv-8.c
+++ b/gcc/testsuite/gcc.target/powerpc/ppc-fpconv-8.c
@@ -2,11 +2,13 @@
 /* { dg-skip-if "" { powerpc*-*-darwin* } { "*" } { "" } } */
 /* { dg-require-effective-target ilp32 } */
 /* { dg-require-effective-target powerpc_fprs } */
+/* { dg-skip-if "do not override -mcpu" { powerpc*-*-* } { "-mcpu=*" } { "-mcpu=750" } } */
 /* { dg-options "-O3 -mcpu=750 -ffast-math" } */
-/* { dg-final { scan-assembler-times "fctiwz" 6 } } */
-/* { dg-final { scan-assembler-not "fctiwuz" } } */
-/* { dg-final { scan-assembler-not "fctidz" } } */
-/* { dg-final { scan-assembler-not "fctiduz" } } */
+/* { dg-final { scan-assembler-times "fctiwz " 6 } } */
+/* { dg-final { scan-assembler-not "fctiwuz " } } */
+/* { dg-final { scan-assembler-not "fctidz " } } */
+/* { dg-final { scan-assembler-not "fctiduz " } } */
+/* { dg-final { scan-assembler-not "fctidu " } } */
 /* { dg-final { scan-assembler-not "xscvdpsxds" } } */
 /* { dg-final { scan-assembler-not "xscvdpuxds" } } */
 
diff --git a/gcc/testsuite/gcc.target/powerpc/ppc-fpconv-9.c b/gcc/testsuite/gcc.target/powerpc/ppc-fpconv-9.c
index 836c030baa6..62ead0a9e4f 100644
--- a/gcc/testsuite/gcc.target/powerpc/ppc-fpconv-9.c
+++ b/gcc/testsuite/gcc.target/powerpc/ppc-fpconv-9.c
@@ -1,7 +1,9 @@
 /* { dg-do compile { target { powerpc*-*-* } } } */
 /* { dg-skip-if "" { powerpc*-*-darwin* } { "*" } { "" } } */
 /* { dg-require-effective-target powerpc_vsx_ok } */
-/* { dg-options "-O3 -mcpu=power7 -ffast-math" } */
+/* { dg-skip-if "do not override -mcpu" { powerpc*-*-* } { "-mcpu=*" } { "-mcpu=power7" } } */
+/* { dg-options "-O3 -mcpu=power7 -ffast-math -mno-upper-regs-df" } */
+/* { dg-final { scan-assembler-times "fctidz" 2 } } */
 /* { dg-final { scan-assembler-not "lwz" } } */
 /* { dg-final { scan-assembler-not "stw" } } */
 /* { dg-final { scan-assembler-not "ld " } } */
diff --git a/gcc/testsuite/gcc.target/powerpc/ppc-round.c b/gcc/testsuite/gcc.target/powerpc/ppc-round.c
index 20262aa449a..4fc1679622d 100644
--- a/gcc/testsuite/gcc.target/powerpc/ppc-round.c
+++ b/gcc/testsuite/gcc.target/powerpc/ppc-round.c
@@ -1,13 +1,14 @@
 /* { dg-do compile { target { powerpc*-*-* && lp64 } } } */
 /* { dg-skip-if "" { powerpc*-*-darwin* } { "*" } { "" } } */
 /* { dg-require-effective-target powerpc_vsx_ok } */
-/* { dg-options "-O2 -mcpu=power7" } */
+/* { dg-skip-if "do not override -mcpu" { powerpc*-*-* } { "-mcpu=*" } { "-mcpu=power7" } } */
+/* { dg-options "-O2 -mcpu=power7 -mno-upper-regs-df" } */
 /* { dg-final { scan-assembler-times "stfiwx" 4 } } */
 /* { dg-final { scan-assembler-times "lfiwax" 2 } } */
 /* { dg-final { scan-assembler-times "lfiwzx" 2 } } */
-/* { dg-final { scan-assembler-times "fctiwz" 2 } } */
-/* { dg-final { scan-assembler-times "xscvsxddp" 2 } } */
-/* { dg-final { scan-assembler-times "fcfids" 2 } } */
+/* { dg-final { scan-assembler-times "fctiwz " 2 } } */
+/* { dg-final { scan-assembler-times "fctiwuz " 2 } } */
+/* { dg-final { scan-assembler-times "fcfids " 2 } } */
 /* { dg-final { scan-assembler-not "lwz" } } */
 /* { dg-final { scan-assembler-not "stw" } } */
 
diff --git a/gcc/testsuite/gcc.target/powerpc/pr64019.c b/gcc/testsuite/gcc.target/powerpc/pr64019.c
new file mode 100644
index 00000000000..a39b2191798
--- /dev/null
+++ b/gcc/testsuite/gcc.target/powerpc/pr64019.c
@@ -0,0 +1,71 @@
+/* { dg-do compile { target { powerpc*-*-* } } } */
+/* { dg-skip-if "" { powerpc*-*-darwin* } { "*" } { "" } } */
+/* { dg-require-effective-target powerpc_vsx_ok } */
+/* { dg-skip-if "do not override -mcpu" { powerpc*-*-* } { "-mcpu=*" } { "-mcpu=power7" } } */
+/* { dg-options "-O2 -ffast-math -mcpu=power7" } */
+
+#include <math.h>
+
+typedef struct
+{
+  double x, y, z;
+  double q, a, b, mass;
+  double vx, vy, vz, vw, dx, dy, dz;
+}
+ATOM;
+int
+u_f_nonbon (lambda)
+     double lambda;
+{
+  double r, r0, xt, yt, zt;
+  double lcutoff, cutoff, get_f_variable ();
+  double rdebye;
+  int inbond, inangle, i;
+  ATOM *a1, *a2, *bonded[10], *angled[10];
+  ATOM *(*use)[];
+  int uselist (), nuse, used;
+  ATOM *cp, *bp;
+  int a_number (), inbuffer;
+  double (*buffer)[], xx, yy, zz, k;
+  int invector, atomsused, ii, jj, imax;
+  double (*vector)[];
+  ATOM *(*atms)[];
+  double dielectric;
+  rdebye = cutoff / 2.;
+  dielectric = get_f_variable ("dielec");
+  imax = a_number ();
+  for (jj = 1; jj < imax; jj++, a1 = bp)
+    {
+      if ((*use)[used] == a1)
+	{
+	  used += 1;
+	}
+      while ((*use)[used] != a1)
+	{
+	  for (i = 0; i < inbuffer; i++)
+	    {
+	    }
+	  xx = a1->x + lambda * a1->dx;
+	  yy = a1->y + lambda * a1->dy;
+	  zz = a1->z + lambda * a1->dz;
+	  for (i = 0; i < inbuffer; i++)
+	    {
+	      xt = xx - (*buffer)[3 * i];
+	      yt = yy - (*buffer)[3 * i + 1];
+	      zt = zz - (*buffer)[3 * i + 2];
+	      r = xt * xt + yt * yt + zt * zt;
+	      r0 = sqrt (r);
+	      xt = xt / r0;
+	      zt = zt / r0;
+	      k =
+		-a1->q * (*atms)[i]->q * dielectric * exp (-r0 / rdebye) *
+		(1. / (rdebye * r0) + 1. / r);
+	      k += a1->a * (*atms)[i]->a / r / r0 * 6;
+	      k -= a1->b * (*atms)[i]->b / r / r / r0 * 12;
+	      (*vector)[3 * i] = xt * k;
+	      (*vector)[3 * i + 1] = yt * k;
+	      (*vector)[3 * i + 2] = zt * k;
+	    }
+	}
+    }
+}
diff --git a/gcc/testsuite/gcc.target/powerpc/upper-regs-df.c b/gcc/testsuite/gcc.target/powerpc/upper-regs-df.c
new file mode 100644
index 00000000000..e3a284ca0da
--- /dev/null
+++ b/gcc/testsuite/gcc.target/powerpc/upper-regs-df.c
@@ -0,0 +1,726 @@
+/* { dg-do compile { target { powerpc*-*-* } } } */
+/* { dg-require-effective-target powerpc_vsx_ok } */
+/* { dg-skip-if "" { powerpc*-*-darwin* } { "*" } { "" } } */
+/* { dg-skip-if "do not override -mcpu" { powerpc*-*-* } { "-mcpu=*" } { "-mcpu=power8" } } */
+/* { dg-options "-mcpu=power7 -O2 -mupper-regs-df" } */
+
+/* Test for the -mupper-regs-df option to make sure double values are allocated
+   to the Altivec registers as well as the traditional FPR registers.  */
+
+#ifndef TYPE
+#define TYPE double
+#endif
+
+#ifndef MASK_TYPE
+#define MASK_TYPE unsigned long long
+#endif
+
+#define MASK_ONE	((MASK_TYPE)1)
+#define ZERO		((TYPE) 0.0)
+
+TYPE
+test_add (const MASK_TYPE *add_mask, const TYPE *add_values,
+	  const MASK_TYPE *sub_mask, const TYPE *sub_values,
+	  const MASK_TYPE *mul_mask, const TYPE *mul_values,
+	  const MASK_TYPE *div_mask, const TYPE *div_values,
+	  const MASK_TYPE *eq0_mask, int *eq0_ptr)
+{
+  TYPE value;
+  TYPE value00	= ZERO;
+  TYPE value01	= ZERO;
+  TYPE value02	= ZERO;
+  TYPE value03	= ZERO;
+  TYPE value04	= ZERO;
+  TYPE value05	= ZERO;
+  TYPE value06	= ZERO;
+  TYPE value07	= ZERO;
+  TYPE value08	= ZERO;
+  TYPE value09	= ZERO;
+  TYPE value10	= ZERO;
+  TYPE value11	= ZERO;
+  TYPE value12	= ZERO;
+  TYPE value13	= ZERO;
+  TYPE value14	= ZERO;
+  TYPE value15	= ZERO;
+  TYPE value16	= ZERO;
+  TYPE value17	= ZERO;
+  TYPE value18	= ZERO;
+  TYPE value19	= ZERO;
+  TYPE value20	= ZERO;
+  TYPE value21	= ZERO;
+  TYPE value22	= ZERO;
+  TYPE value23	= ZERO;
+  TYPE value24	= ZERO;
+  TYPE value25	= ZERO;
+  TYPE value26	= ZERO;
+  TYPE value27	= ZERO;
+  TYPE value28	= ZERO;
+  TYPE value29	= ZERO;
+  TYPE value30	= ZERO;
+  TYPE value31	= ZERO;
+  TYPE value32	= ZERO;
+  TYPE value33	= ZERO;
+  TYPE value34	= ZERO;
+  TYPE value35	= ZERO;
+  TYPE value36	= ZERO;
+  TYPE value37	= ZERO;
+  TYPE value38	= ZERO;
+  TYPE value39	= ZERO;
+  MASK_TYPE mask;
+  int eq0;
+
+  while ((mask = *add_mask++) != 0)
+    {
+      value = *add_values++;
+
+      __asm__ (" #reg %0" : "+d" (value));
+
+      if ((mask & (MASK_ONE <<  0)) != 0)
+	value00 += value;
+
+      if ((mask & (MASK_ONE <<  1)) != 0)
+	value01 += value;
+
+      if ((mask & (MASK_ONE <<  2)) != 0)
+	value02 += value;
+
+      if ((mask & (MASK_ONE <<  3)) != 0)
+	value03 += value;
+
+      if ((mask & (MASK_ONE <<  4)) != 0)
+	value04 += value;
+
+      if ((mask & (MASK_ONE <<  5)) != 0)
+	value05 += value;
+
+      if ((mask & (MASK_ONE <<  6)) != 0)
+	value06 += value;
+
+      if ((mask & (MASK_ONE <<  7)) != 0)
+	value07 += value;
+
+      if ((mask & (MASK_ONE <<  8)) != 0)
+	value08 += value;
+
+      if ((mask & (MASK_ONE <<  9)) != 0)
+	value09 += value;
+
+      if ((mask & (MASK_ONE << 10)) != 0)
+	value10 += value;
+
+      if ((mask & (MASK_ONE << 11)) != 0)
+	value11 += value;
+
+      if ((mask & (MASK_ONE << 12)) != 0)
+	value12 += value;
+
+      if ((mask & (MASK_ONE << 13)) != 0)
+	value13 += value;
+
+      if ((mask & (MASK_ONE << 14)) != 0)
+	value14 += value;
+
+      if ((mask & (MASK_ONE << 15)) != 0)
+	value15 += value;
+
+      if ((mask & (MASK_ONE << 16)) != 0)
+	value16 += value;
+
+      if ((mask & (MASK_ONE << 17)) != 0)
+	value17 += value;
+
+      if ((mask & (MASK_ONE << 18)) != 0)
+	value18 += value;
+
+      if ((mask & (MASK_ONE << 19)) != 0)
+	value19 += value;
+
+      if ((mask & (MASK_ONE << 20)) != 0)
+	value20 += value;
+
+      if ((mask & (MASK_ONE << 21)) != 0)
+	value21 += value;
+
+      if ((mask & (MASK_ONE << 22)) != 0)
+	value22 += value;
+
+      if ((mask & (MASK_ONE << 23)) != 0)
+	value23 += value;
+
+      if ((mask & (MASK_ONE << 24)) != 0)
+	value24 += value;
+
+      if ((mask & (MASK_ONE << 25)) != 0)
+	value25 += value;
+
+      if ((mask & (MASK_ONE << 26)) != 0)
+	value26 += value;
+
+      if ((mask & (MASK_ONE << 27)) != 0)
+	value27 += value;
+
+      if ((mask & (MASK_ONE << 28)) != 0)
+	value28 += value;
+
+      if ((mask & (MASK_ONE << 29)) != 0)
+	value29 += value;
+
+      if ((mask & (MASK_ONE << 30)) != 0)
+	value30 += value;
+
+      if ((mask & (MASK_ONE << 31)) != 0)
+	value31 += value;
+
+      if ((mask & (MASK_ONE << 32)) != 0)
+	value32 += value;
+
+      if ((mask & (MASK_ONE << 33)) != 0)
+	value33 += value;
+
+      if ((mask & (MASK_ONE << 34)) != 0)
+	value34 += value;
+
+      if ((mask & (MASK_ONE << 35)) != 0)
+	value35 += value;
+
+      if ((mask & (MASK_ONE << 36)) != 0)
+	value36 += value;
+
+      if ((mask & (MASK_ONE << 37)) != 0)
+	value37 += value;
+
+      if ((mask & (MASK_ONE << 38)) != 0)
+	value38 += value;
+
+      if ((mask & (MASK_ONE << 39)) != 0)
+	value39 += value;
+    }
+
+  while ((mask = *sub_mask++) != 0)
+    {
+      value = *sub_values++;
+
+      __asm__ (" #reg %0" : "+d" (value));
+
+      if ((mask & (MASK_ONE <<  0)) != 0)
+	value00 -= value;
+
+      if ((mask & (MASK_ONE <<  1)) != 0)
+	value01 -= value;
+
+      if ((mask & (MASK_ONE <<  2)) != 0)
+	value02 -= value;
+
+      if ((mask & (MASK_ONE <<  3)) != 0)
+	value03 -= value;
+
+      if ((mask & (MASK_ONE <<  4)) != 0)
+	value04 -= value;
+
+      if ((mask & (MASK_ONE <<  5)) != 0)
+	value05 -= value;
+
+      if ((mask & (MASK_ONE <<  6)) != 0)
+	value06 -= value;
+
+      if ((mask & (MASK_ONE <<  7)) != 0)
+	value07 -= value;
+
+      if ((mask & (MASK_ONE <<  8)) != 0)
+	value08 -= value;
+
+      if ((mask & (MASK_ONE <<  9)) != 0)
+	value09 -= value;
+
+      if ((mask & (MASK_ONE << 10)) != 0)
+	value10 -= value;
+
+      if ((mask & (MASK_ONE << 11)) != 0)
+	value11 -= value;
+
+      if ((mask & (MASK_ONE << 12)) != 0)
+	value12 -= value;
+
+      if ((mask & (MASK_ONE << 13)) != 0)
+	value13 -= value;
+
+      if ((mask & (MASK_ONE << 14)) != 0)
+	value14 -= value;
+
+      if ((mask & (MASK_ONE << 15)) != 0)
+	value15 -= value;
+
+      if ((mask & (MASK_ONE << 16)) != 0)
+	value16 -= value;
+
+      if ((mask & (MASK_ONE << 17)) != 0)
+	value17 -= value;
+
+      if ((mask & (MASK_ONE << 18)) != 0)
+	value18 -= value;
+
+      if ((mask & (MASK_ONE << 19)) != 0)
+	value19 -= value;
+
+      if ((mask & (MASK_ONE << 20)) != 0)
+	value20 -= value;
+
+      if ((mask & (MASK_ONE << 21)) != 0)
+	value21 -= value;
+
+      if ((mask & (MASK_ONE << 22)) != 0)
+	value22 -= value;
+
+      if ((mask & (MASK_ONE << 23)) != 0)
+	value23 -= value;
+
+      if ((mask & (MASK_ONE << 24)) != 0)
+	value24 -= value;
+
+      if ((mask & (MASK_ONE << 25)) != 0)
+	value25 -= value;
+
+      if ((mask & (MASK_ONE << 26)) != 0)
+	value26 -= value;
+
+      if ((mask & (MASK_ONE << 27)) != 0)
+	value27 -= value;
+
+      if ((mask & (MASK_ONE << 28)) != 0)
+	value28 -= value;
+
+      if ((mask & (MASK_ONE << 29)) != 0)
+	value29 -= value;
+
+      if ((mask & (MASK_ONE << 30)) != 0)
+	value30 -= value;
+
+      if ((mask & (MASK_ONE << 31)) != 0)
+	value31 -= value;
+
+      if ((mask & (MASK_ONE << 32)) != 0)
+	value32 -= value;
+
+      if ((mask & (MASK_ONE << 33)) != 0)
+	value33 -= value;
+
+      if ((mask & (MASK_ONE << 34)) != 0)
+	value34 -= value;
+
+      if ((mask & (MASK_ONE << 35)) != 0)
+	value35 -= value;
+
+      if ((mask & (MASK_ONE << 36)) != 0)
+	value36 -= value;
+
+      if ((mask & (MASK_ONE << 37)) != 0)
+	value37 -= value;
+
+      if ((mask & (MASK_ONE << 38)) != 0)
+	value38 -= value;
+
+      if ((mask & (MASK_ONE << 39)) != 0)
+	value39 -= value;
+    }
+
+  while ((mask = *mul_mask++) != 0)
+    {
+      value = *mul_values++;
+
+      __asm__ (" #reg %0" : "+d" (value));
+
+      if ((mask & (MASK_ONE <<  0)) != 0)
+	value00 *= value;
+
+      if ((mask & (MASK_ONE <<  1)) != 0)
+	value01 *= value;
+
+      if ((mask & (MASK_ONE <<  2)) != 0)
+	value02 *= value;
+
+      if ((mask & (MASK_ONE <<  3)) != 0)
+	value03 *= value;
+
+      if ((mask & (MASK_ONE <<  4)) != 0)
+	value04 *= value;
+
+      if ((mask & (MASK_ONE <<  5)) != 0)
+	value05 *= value;
+
+      if ((mask & (MASK_ONE <<  6)) != 0)
+	value06 *= value;
+
+      if ((mask & (MASK_ONE <<  7)) != 0)
+	value07 *= value;
+
+      if ((mask & (MASK_ONE <<  8)) != 0)
+	value08 *= value;
+
+      if ((mask & (MASK_ONE <<  9)) != 0)
+	value09 *= value;
+
+      if ((mask & (MASK_ONE << 10)) != 0)
+	value10 *= value;
+
+      if ((mask & (MASK_ONE << 11)) != 0)
+	value11 *= value;
+
+      if ((mask & (MASK_ONE << 12)) != 0)
+	value12 *= value;
+
+      if ((mask & (MASK_ONE << 13)) != 0)
+	value13 *= value;
+
+      if ((mask & (MASK_ONE << 14)) != 0)
+	value14 *= value;
+
+      if ((mask & (MASK_ONE << 15)) != 0)
+	value15 *= value;
+
+      if ((mask & (MASK_ONE << 16)) != 0)
+	value16 *= value;
+
+      if ((mask & (MASK_ONE << 17)) != 0)
+	value17 *= value;
+
+      if ((mask & (MASK_ONE << 18)) != 0)
+	value18 *= value;
+
+      if ((mask & (MASK_ONE << 19)) != 0)
+	value19 *= value;
+
+      if ((mask & (MASK_ONE << 20)) != 0)
+	value20 *= value;
+
+      if ((mask & (MASK_ONE << 21)) != 0)
+	value21 *= value;
+
+      if ((mask & (MASK_ONE << 22)) != 0)
+	value22 *= value;
+
+      if ((mask & (MASK_ONE << 23)) != 0)
+	value23 *= value;
+
+      if ((mask & (MASK_ONE << 24)) != 0)
+	value24 *= value;
+
+      if ((mask & (MASK_ONE << 25)) != 0)
+	value25 *= value;
+
+      if ((mask & (MASK_ONE << 26)) != 0)
+	value26 *= value;
+
+      if ((mask & (MASK_ONE << 27)) != 0)
+	value27 *= value;
+
+      if ((mask & (MASK_ONE << 28)) != 0)
+	value28 *= value;
+
+      if ((mask & (MASK_ONE << 29)) != 0)
+	value29 *= value;
+
+      if ((mask & (MASK_ONE << 30)) != 0)
+	value30 *= value;
+
+      if ((mask & (MASK_ONE << 31)) != 0)
+	value31 *= value;
+
+      if ((mask & (MASK_ONE << 32)) != 0)
+	value32 *= value;
+
+      if ((mask & (MASK_ONE << 33)) != 0)
+	value33 *= value;
+
+      if ((mask & (MASK_ONE << 34)) != 0)
+	value34 *= value;
+
+      if ((mask & (MASK_ONE << 35)) != 0)
+	value35 *= value;
+
+      if ((mask & (MASK_ONE << 36)) != 0)
+	value36 *= value;
+
+      if ((mask & (MASK_ONE << 37)) != 0)
+	value37 *= value;
+
+      if ((mask & (MASK_ONE << 38)) != 0)
+	value38 *= value;
+
+      if ((mask & (MASK_ONE << 39)) != 0)
+	value39 *= value;
+    }
+
+  while ((mask = *div_mask++) != 0)
+    {
+      value = *div_values++;
+
+      __asm__ (" #reg %0" : "+d" (value));
+
+      if ((mask & (MASK_ONE <<  0)) != 0)
+	value00 /= value;
+
+      if ((mask & (MASK_ONE <<  1)) != 0)
+	value01 /= value;
+
+      if ((mask & (MASK_ONE <<  2)) != 0)
+	value02 /= value;
+
+      if ((mask & (MASK_ONE <<  3)) != 0)
+	value03 /= value;
+
+      if ((mask & (MASK_ONE <<  4)) != 0)
+	value04 /= value;
+
+      if ((mask & (MASK_ONE <<  5)) != 0)
+	value05 /= value;
+
+      if ((mask & (MASK_ONE <<  6)) != 0)
+	value06 /= value;
+
+      if ((mask & (MASK_ONE <<  7)) != 0)
+	value07 /= value;
+
+      if ((mask & (MASK_ONE <<  8)) != 0)
+	value08 /= value;
+
+      if ((mask & (MASK_ONE <<  9)) != 0)
+	value09 /= value;
+
+      if ((mask & (MASK_ONE << 10)) != 0)
+	value10 /= value;
+
+      if ((mask & (MASK_ONE << 11)) != 0)
+	value11 /= value;
+
+      if ((mask & (MASK_ONE << 12)) != 0)
+	value12 /= value;
+
+      if ((mask & (MASK_ONE << 13)) != 0)
+	value13 /= value;
+
+      if ((mask & (MASK_ONE << 14)) != 0)
+	value14 /= value;
+
+      if ((mask & (MASK_ONE << 15)) != 0)
+	value15 /= value;
+
+      if ((mask & (MASK_ONE << 16)) != 0)
+	value16 /= value;
+
+      if ((mask & (MASK_ONE << 17)) != 0)
+	value17 /= value;
+
+      if ((mask & (MASK_ONE << 18)) != 0)
+	value18 /= value;
+
+      if ((mask & (MASK_ONE << 19)) != 0)
+	value19 /= value;
+
+      if ((mask & (MASK_ONE << 20)) != 0)
+	value20 /= value;
+
+      if ((mask & (MASK_ONE << 21)) != 0)
+	value21 /= value;
+
+      if ((mask & (MASK_ONE << 22)) != 0)
+	value22 /= value;
+
+      if ((mask & (MASK_ONE << 23)) != 0)
+	value23 /= value;
+
+      if ((mask & (MASK_ONE << 24)) != 0)
+	value24 /= value;
+
+      if ((mask & (MASK_ONE << 25)) != 0)
+	value25 /= value;
+
+      if ((mask & (MASK_ONE << 26)) != 0)
+	value26 /= value;
+
+      if ((mask & (MASK_ONE << 27)) != 0)
+	value27 /= value;
+
+      if ((mask & (MASK_ONE << 28)) != 0)
+	value28 /= value;
+
+      if ((mask & (MASK_ONE << 29)) != 0)
+	value29 /= value;
+
+      if ((mask & (MASK_ONE << 30)) != 0)
+	value30 /= value;
+
+      if ((mask & (MASK_ONE << 31)) != 0)
+	value31 /= value;
+
+      if ((mask & (MASK_ONE << 32)) != 0)
+	value32 /= value;
+
+      if ((mask & (MASK_ONE << 33)) != 0)
+	value33 /= value;
+
+      if ((mask & (MASK_ONE << 34)) != 0)
+	value34 /= value;
+
+      if ((mask & (MASK_ONE << 35)) != 0)
+	value35 /= value;
+
+      if ((mask & (MASK_ONE << 36)) != 0)
+	value36 /= value;
+
+      if ((mask & (MASK_ONE << 37)) != 0)
+	value37 /= value;
+
+      if ((mask & (MASK_ONE << 38)) != 0)
+	value38 /= value;
+
+      if ((mask & (MASK_ONE << 39)) != 0)
+	value39 /= value;
+    }
+
+  while ((mask = *eq0_mask++) != 0)
+    {
+      eq0 = 0;
+
+      if ((mask & (MASK_ONE <<  0)) != 0)
+	eq0 |= (value00 == ZERO);
+
+      if ((mask & (MASK_ONE <<  1)) != 0)
+	eq0 |= (value01 == ZERO);
+
+      if ((mask & (MASK_ONE <<  2)) != 0)
+	eq0 |= (value02 == ZERO);
+
+      if ((mask & (MASK_ONE <<  3)) != 0)
+	eq0 |= (value03 == ZERO);
+
+      if ((mask & (MASK_ONE <<  4)) != 0)
+	eq0 |= (value04 == ZERO);
+
+      if ((mask & (MASK_ONE <<  5)) != 0)
+	eq0 |= (value05 == ZERO);
+
+      if ((mask & (MASK_ONE <<  6)) != 0)
+	eq0 |= (value06 == ZERO);
+
+      if ((mask & (MASK_ONE <<  7)) != 0)
+	eq0 |= (value07 == ZERO);
+
+      if ((mask & (MASK_ONE <<  8)) != 0)
+	eq0 |= (value08 == ZERO);
+
+      if ((mask & (MASK_ONE <<  9)) != 0)
+	eq0 |= (value09 == ZERO);
+
+      if ((mask & (MASK_ONE << 10)) != 0)
+	eq0 |= (value10 == ZERO);
+
+      if ((mask & (MASK_ONE << 11)) != 0)
+	eq0 |= (value11 == ZERO);
+
+      if ((mask & (MASK_ONE << 12)) != 0)
+	eq0 |= (value12 == ZERO);
+
+      if ((mask & (MASK_ONE << 13)) != 0)
+	eq0 |= (value13 == ZERO);
+
+      if ((mask & (MASK_ONE << 14)) != 0)
+	eq0 |= (value14 == ZERO);
+
+      if ((mask & (MASK_ONE << 15)) != 0)
+	eq0 |= (value15 == ZERO);
+
+      if ((mask & (MASK_ONE << 16)) != 0)
+	eq0 |= (value16 == ZERO);
+
+      if ((mask & (MASK_ONE << 17)) != 0)
+	eq0 |= (value17 == ZERO);
+
+      if ((mask & (MASK_ONE << 18)) != 0)
+	eq0 |= (value18 == ZERO);
+
+      if ((mask & (MASK_ONE << 19)) != 0)
+	eq0 |= (value19 == ZERO);
+
+      if ((mask & (MASK_ONE << 20)) != 0)
+	eq0 |= (value20 == ZERO);
+
+      if ((mask & (MASK_ONE << 21)) != 0)
+	eq0 |= (value21 == ZERO);
+
+      if ((mask & (MASK_ONE << 22)) != 0)
+	eq0 |= (value22 == ZERO);
+
+      if ((mask & (MASK_ONE << 23)) != 0)
+	eq0 |= (value23 == ZERO);
+
+      if ((mask & (MASK_ONE << 24)) != 0)
+	eq0 |= (value24 == ZERO);
+
+      if ((mask & (MASK_ONE << 25)) != 0)
+	eq0 |= (value25 == ZERO);
+
+      if ((mask & (MASK_ONE << 26)) != 0)
+	eq0 |= (value26 == ZERO);
+
+      if ((mask & (MASK_ONE << 27)) != 0)
+	eq0 |= (value27 == ZERO);
+
+      if ((mask & (MASK_ONE << 28)) != 0)
+	eq0 |= (value28 == ZERO);
+
+      if ((mask & (MASK_ONE << 29)) != 0)
+	eq0 |= (value29 == ZERO);
+
+      if ((mask & (MASK_ONE << 30)) != 0)
+	eq0 |= (value30 == ZERO);
+
+      if ((mask & (MASK_ONE << 31)) != 0)
+	eq0 |= (value31 == ZERO);
+
+      if ((mask & (MASK_ONE << 32)) != 0)
+	eq0 |= (value32 == ZERO);
+
+      if ((mask & (MASK_ONE << 33)) != 0)
+	eq0 |= (value33 == ZERO);
+
+      if ((mask & (MASK_ONE << 34)) != 0)
+	eq0 |= (value34 == ZERO);
+
+      if ((mask & (MASK_ONE << 35)) != 0)
+	eq0 |= (value35 == ZERO);
+
+      if ((mask & (MASK_ONE << 36)) != 0)
+	eq0 |= (value36 == ZERO);
+
+      if ((mask & (MASK_ONE << 37)) != 0)
+	eq0 |= (value37 == ZERO);
+
+      if ((mask & (MASK_ONE << 38)) != 0)
+	eq0 |= (value38 == ZERO);
+
+      if ((mask & (MASK_ONE << 39)) != 0)
+	eq0 |= (value39 == ZERO);
+
+      *eq0_ptr++ = eq0;
+    }
+
+  return (  value00 + value01 + value02 + value03 + value04
+	  + value05 + value06 + value07 + value08 + value09
+	  + value10 + value11 + value12 + value13 + value14
+	  + value15 + value16 + value17 + value18 + value19
+	  + value20 + value21 + value22 + value23 + value24
+	  + value25 + value26 + value27 + value28 + value29
+	  + value30 + value31 + value32 + value33 + value34
+	  + value35 + value36 + value37 + value38 + value39);
+}
+
+/* { dg-final { scan-assembler "fadd"     } } */
+/* { dg-final { scan-assembler "fsub"     } } */
+/* { dg-final { scan-assembler "fmul"     } } */
+/* { dg-final { scan-assembler "fdiv"     } } */
+/* { dg-final { scan-assembler "fcmpu"    } } */
+/* { dg-final { scan-assembler "xsadddp"  } } */
+/* { dg-final { scan-assembler "xssubdp"  } } */
+/* { dg-final { scan-assembler "xsmuldp"  } } */
+/* { dg-final { scan-assembler "xsdivdp"  } } */
+/* { dg-final { scan-assembler "xscmpudp" } } */
diff --git a/gcc/testsuite/gcc.target/powerpc/upper-regs-sf.c b/gcc/testsuite/gcc.target/powerpc/upper-regs-sf.c
new file mode 100644
index 00000000000..401b5c16ffa
--- /dev/null
+++ b/gcc/testsuite/gcc.target/powerpc/upper-regs-sf.c
@@ -0,0 +1,726 @@
+/* { dg-do compile { target { powerpc*-*-* } } } */
+/* { dg-require-effective-target powerpc_p8vector_ok } */
+/* { dg-skip-if "" { powerpc*-*-darwin* } { "*" } { "" } } */
+/* { dg-skip-if "do not override -mcpu" { powerpc*-*-* } { "-mcpu=*" } { "-mcpu=power8" } } */
+/* { dg-options "-mcpu=power8 -O2 -mupper-regs-df -mupper-regs-sf" } */
+
+/* Test for the -mupper-regs-df option to make sure double values are allocated
+   to the Altivec registers as well as the traditional FPR registers.  */
+
+#ifndef TYPE
+#define TYPE float
+#endif
+
+#ifndef MASK_TYPE
+#define MASK_TYPE unsigned long long
+#endif
+
+#define MASK_ONE	((MASK_TYPE)1)
+#define ZERO		((TYPE) 0.0)
+
+TYPE
+test_add (const MASK_TYPE *add_mask, const TYPE *add_values,
+	  const MASK_TYPE *sub_mask, const TYPE *sub_values,
+	  const MASK_TYPE *mul_mask, const TYPE *mul_values,
+	  const MASK_TYPE *div_mask, const TYPE *div_values,
+	  const MASK_TYPE *eq0_mask, int *eq0_ptr)
+{
+  TYPE value;
+  TYPE value00	= ZERO;
+  TYPE value01	= ZERO;
+  TYPE value02	= ZERO;
+  TYPE value03	= ZERO;
+  TYPE value04	= ZERO;
+  TYPE value05	= ZERO;
+  TYPE value06	= ZERO;
+  TYPE value07	= ZERO;
+  TYPE value08	= ZERO;
+  TYPE value09	= ZERO;
+  TYPE value10	= ZERO;
+  TYPE value11	= ZERO;
+  TYPE value12	= ZERO;
+  TYPE value13	= ZERO;
+  TYPE value14	= ZERO;
+  TYPE value15	= ZERO;
+  TYPE value16	= ZERO;
+  TYPE value17	= ZERO;
+  TYPE value18	= ZERO;
+  TYPE value19	= ZERO;
+  TYPE value20	= ZERO;
+  TYPE value21	= ZERO;
+  TYPE value22	= ZERO;
+  TYPE value23	= ZERO;
+  TYPE value24	= ZERO;
+  TYPE value25	= ZERO;
+  TYPE value26	= ZERO;
+  TYPE value27	= ZERO;
+  TYPE value28	= ZERO;
+  TYPE value29	= ZERO;
+  TYPE value30	= ZERO;
+  TYPE value31	= ZERO;
+  TYPE value32	= ZERO;
+  TYPE value33	= ZERO;
+  TYPE value34	= ZERO;
+  TYPE value35	= ZERO;
+  TYPE value36	= ZERO;
+  TYPE value37	= ZERO;
+  TYPE value38	= ZERO;
+  TYPE value39	= ZERO;
+  MASK_TYPE mask;
+  int eq0;
+
+  while ((mask = *add_mask++) != 0)
+    {
+      value = *add_values++;
+
+      __asm__ (" #reg %0" : "+d" (value));
+
+      if ((mask & (MASK_ONE <<  0)) != 0)
+	value00 += value;
+
+      if ((mask & (MASK_ONE <<  1)) != 0)
+	value01 += value;
+
+      if ((mask & (MASK_ONE <<  2)) != 0)
+	value02 += value;
+
+      if ((mask & (MASK_ONE <<  3)) != 0)
+	value03 += value;
+
+      if ((mask & (MASK_ONE <<  4)) != 0)
+	value04 += value;
+
+      if ((mask & (MASK_ONE <<  5)) != 0)
+	value05 += value;
+
+      if ((mask & (MASK_ONE <<  6)) != 0)
+	value06 += value;
+
+      if ((mask & (MASK_ONE <<  7)) != 0)
+	value07 += value;
+
+      if ((mask & (MASK_ONE <<  8)) != 0)
+	value08 += value;
+
+      if ((mask & (MASK_ONE <<  9)) != 0)
+	value09 += value;
+
+      if ((mask & (MASK_ONE << 10)) != 0)
+	value10 += value;
+
+      if ((mask & (MASK_ONE << 11)) != 0)
+	value11 += value;
+
+      if ((mask & (MASK_ONE << 12)) != 0)
+	value12 += value;
+
+      if ((mask & (MASK_ONE << 13)) != 0)
+	value13 += value;
+
+      if ((mask & (MASK_ONE << 14)) != 0)
+	value14 += value;
+
+      if ((mask & (MASK_ONE << 15)) != 0)
+	value15 += value;
+
+      if ((mask & (MASK_ONE << 16)) != 0)
+	value16 += value;
+
+      if ((mask & (MASK_ONE << 17)) != 0)
+	value17 += value;
+
+      if ((mask & (MASK_ONE << 18)) != 0)
+	value18 += value;
+
+      if ((mask & (MASK_ONE << 19)) != 0)
+	value19 += value;
+
+      if ((mask & (MASK_ONE << 20)) != 0)
+	value20 += value;
+
+      if ((mask & (MASK_ONE << 21)) != 0)
+	value21 += value;
+
+      if ((mask & (MASK_ONE << 22)) != 0)
+	value22 += value;
+
+      if ((mask & (MASK_ONE << 23)) != 0)
+	value23 += value;
+
+      if ((mask & (MASK_ONE << 24)) != 0)
+	value24 += value;
+
+      if ((mask & (MASK_ONE << 25)) != 0)
+	value25 += value;
+
+      if ((mask & (MASK_ONE << 26)) != 0)
+	value26 += value;
+
+      if ((mask & (MASK_ONE << 27)) != 0)
+	value27 += value;
+
+      if ((mask & (MASK_ONE << 28)) != 0)
+	value28 += value;
+
+      if ((mask & (MASK_ONE << 29)) != 0)
+	value29 += value;
+
+      if ((mask & (MASK_ONE << 30)) != 0)
+	value30 += value;
+
+      if ((mask & (MASK_ONE << 31)) != 0)
+	value31 += value;
+
+      if ((mask & (MASK_ONE << 32)) != 0)
+	value32 += value;
+
+      if ((mask & (MASK_ONE << 33)) != 0)
+	value33 += value;
+
+      if ((mask & (MASK_ONE << 34)) != 0)
+	value34 += value;
+
+      if ((mask & (MASK_ONE << 35)) != 0)
+	value35 += value;
+
+      if ((mask & (MASK_ONE << 36)) != 0)
+	value36 += value;
+
+      if ((mask & (MASK_ONE << 37)) != 0)
+	value37 += value;
+
+      if ((mask & (MASK_ONE << 38)) != 0)
+	value38 += value;
+
+      if ((mask & (MASK_ONE << 39)) != 0)
+	value39 += value;
+    }
+
+  while ((mask = *sub_mask++) != 0)
+    {
+      value = *sub_values++;
+
+      __asm__ (" #reg %0" : "+d" (value));
+
+      if ((mask & (MASK_ONE <<  0)) != 0)
+	value00 -= value;
+
+      if ((mask & (MASK_ONE <<  1)) != 0)
+	value01 -= value;
+
+      if ((mask & (MASK_ONE <<  2)) != 0)
+	value02 -= value;
+
+      if ((mask & (MASK_ONE <<  3)) != 0)
+	value03 -= value;
+
+      if ((mask & (MASK_ONE <<  4)) != 0)
+	value04 -= value;
+
+      if ((mask & (MASK_ONE <<  5)) != 0)
+	value05 -= value;
+
+      if ((mask & (MASK_ONE <<  6)) != 0)
+	value06 -= value;
+
+      if ((mask & (MASK_ONE <<  7)) != 0)
+	value07 -= value;
+
+      if ((mask & (MASK_ONE <<  8)) != 0)
+	value08 -= value;
+
+      if ((mask & (MASK_ONE <<  9)) != 0)
+	value09 -= value;
+
+      if ((mask & (MASK_ONE << 10)) != 0)
+	value10 -= value;
+
+      if ((mask & (MASK_ONE << 11)) != 0)
+	value11 -= value;
+
+      if ((mask & (MASK_ONE << 12)) != 0)
+	value12 -= value;
+
+      if ((mask & (MASK_ONE << 13)) != 0)
+	value13 -= value;
+
+      if ((mask & (MASK_ONE << 14)) != 0)
+	value14 -= value;
+
+      if ((mask & (MASK_ONE << 15)) != 0)
+	value15 -= value;
+
+      if ((mask & (MASK_ONE << 16)) != 0)
+	value16 -= value;
+
+      if ((mask & (MASK_ONE << 17)) != 0)
+	value17 -= value;
+
+      if ((mask & (MASK_ONE << 18)) != 0)
+	value18 -= value;
+
+      if ((mask & (MASK_ONE << 19)) != 0)
+	value19 -= value;
+
+      if ((mask & (MASK_ONE << 20)) != 0)
+	value20 -= value;
+
+      if ((mask & (MASK_ONE << 21)) != 0)
+	value21 -= value;
+
+      if ((mask & (MASK_ONE << 22)) != 0)
+	value22 -= value;
+
+      if ((mask & (MASK_ONE << 23)) != 0)
+	value23 -= value;
+
+      if ((mask & (MASK_ONE << 24)) != 0)
+	value24 -= value;
+
+      if ((mask & (MASK_ONE << 25)) != 0)
+	value25 -= value;
+
+      if ((mask & (MASK_ONE << 26)) != 0)
+	value26 -= value;
+
+      if ((mask & (MASK_ONE << 27)) != 0)
+	value27 -= value;
+
+      if ((mask & (MASK_ONE << 28)) != 0)
+	value28 -= value;
+
+      if ((mask & (MASK_ONE << 29)) != 0)
+	value29 -= value;
+
+      if ((mask & (MASK_ONE << 30)) != 0)
+	value30 -= value;
+
+      if ((mask & (MASK_ONE << 31)) != 0)
+	value31 -= value;
+
+      if ((mask & (MASK_ONE << 32)) != 0)
+	value32 -= value;
+
+      if ((mask & (MASK_ONE << 33)) != 0)
+	value33 -= value;
+
+      if ((mask & (MASK_ONE << 34)) != 0)
+	value34 -= value;
+
+      if ((mask & (MASK_ONE << 35)) != 0)
+	value35 -= value;
+
+      if ((mask & (MASK_ONE << 36)) != 0)
+	value36 -= value;
+
+      if ((mask & (MASK_ONE << 37)) != 0)
+	value37 -= value;
+
+      if ((mask & (MASK_ONE << 38)) != 0)
+	value38 -= value;
+
+      if ((mask & (MASK_ONE << 39)) != 0)
+	value39 -= value;
+    }
+
+  while ((mask = *mul_mask++) != 0)
+    {
+      value = *mul_values++;
+
+      __asm__ (" #reg %0" : "+d" (value));
+
+      if ((mask & (MASK_ONE <<  0)) != 0)
+	value00 *= value;
+
+      if ((mask & (MASK_ONE <<  1)) != 0)
+	value01 *= value;
+
+      if ((mask & (MASK_ONE <<  2)) != 0)
+	value02 *= value;
+
+      if ((mask & (MASK_ONE <<  3)) != 0)
+	value03 *= value;
+
+      if ((mask & (MASK_ONE <<  4)) != 0)
+	value04 *= value;
+
+      if ((mask & (MASK_ONE <<  5)) != 0)
+	value05 *= value;
+
+      if ((mask & (MASK_ONE <<  6)) != 0)
+	value06 *= value;
+
+      if ((mask & (MASK_ONE <<  7)) != 0)
+	value07 *= value;
+
+      if ((mask & (MASK_ONE <<  8)) != 0)
+	value08 *= value;
+
+      if ((mask & (MASK_ONE <<  9)) != 0)
+	value09 *= value;
+
+      if ((mask & (MASK_ONE << 10)) != 0)
+	value10 *= value;
+
+      if ((mask & (MASK_ONE << 11)) != 0)
+	value11 *= value;
+
+      if ((mask & (MASK_ONE << 12)) != 0)
+	value12 *= value;
+
+      if ((mask & (MASK_ONE << 13)) != 0)
+	value13 *= value;
+
+      if ((mask & (MASK_ONE << 14)) != 0)
+	value14 *= value;
+
+      if ((mask & (MASK_ONE << 15)) != 0)
+	value15 *= value;
+
+      if ((mask & (MASK_ONE << 16)) != 0)
+	value16 *= value;
+
+      if ((mask & (MASK_ONE << 17)) != 0)
+	value17 *= value;
+
+      if ((mask & (MASK_ONE << 18)) != 0)
+	value18 *= value;
+
+      if ((mask & (MASK_ONE << 19)) != 0)
+	value19 *= value;
+
+      if ((mask & (MASK_ONE << 20)) != 0)
+	value20 *= value;
+
+      if ((mask & (MASK_ONE << 21)) != 0)
+	value21 *= value;
+
+      if ((mask & (MASK_ONE << 22)) != 0)
+	value22 *= value;
+
+      if ((mask & (MASK_ONE << 23)) != 0)
+	value23 *= value;
+
+      if ((mask & (MASK_ONE << 24)) != 0)
+	value24 *= value;
+
+      if ((mask & (MASK_ONE << 25)) != 0)
+	value25 *= value;
+
+      if ((mask & (MASK_ONE << 26)) != 0)
+	value26 *= value;
+
+      if ((mask & (MASK_ONE << 27)) != 0)
+	value27 *= value;
+
+      if ((mask & (MASK_ONE << 28)) != 0)
+	value28 *= value;
+
+      if ((mask & (MASK_ONE << 29)) != 0)
+	value29 *= value;
+
+      if ((mask & (MASK_ONE << 30)) != 0)
+	value30 *= value;
+
+      if ((mask & (MASK_ONE << 31)) != 0)
+	value31 *= value;
+
+      if ((mask & (MASK_ONE << 32)) != 0)
+	value32 *= value;
+
+      if ((mask & (MASK_ONE << 33)) != 0)
+	value33 *= value;
+
+      if ((mask & (MASK_ONE << 34)) != 0)
+	value34 *= value;
+
+      if ((mask & (MASK_ONE << 35)) != 0)
+	value35 *= value;
+
+      if ((mask & (MASK_ONE << 36)) != 0)
+	value36 *= value;
+
+      if ((mask & (MASK_ONE << 37)) != 0)
+	value37 *= value;
+
+      if ((mask & (MASK_ONE << 38)) != 0)
+	value38 *= value;
+
+      if ((mask & (MASK_ONE << 39)) != 0)
+	value39 *= value;
+    }
+
+  while ((mask = *div_mask++) != 0)
+    {
+      value = *div_values++;
+
+      __asm__ (" #reg %0" : "+d" (value));
+
+      if ((mask & (MASK_ONE <<  0)) != 0)
+	value00 /= value;
+
+      if ((mask & (MASK_ONE <<  1)) != 0)
+	value01 /= value;
+
+      if ((mask & (MASK_ONE <<  2)) != 0)
+	value02 /= value;
+
+      if ((mask & (MASK_ONE <<  3)) != 0)
+	value03 /= value;
+
+      if ((mask & (MASK_ONE <<  4)) != 0)
+	value04 /= value;
+
+      if ((mask & (MASK_ONE <<  5)) != 0)
+	value05 /= value;
+
+      if ((mask & (MASK_ONE <<  6)) != 0)
+	value06 /= value;
+
+      if ((mask & (MASK_ONE <<  7)) != 0)
+	value07 /= value;
+
+      if ((mask & (MASK_ONE <<  8)) != 0)
+	value08 /= value;
+
+      if ((mask & (MASK_ONE <<  9)) != 0)
+	value09 /= value;
+
+      if ((mask & (MASK_ONE << 10)) != 0)
+	value10 /= value;
+
+      if ((mask & (MASK_ONE << 11)) != 0)
+	value11 /= value;
+
+      if ((mask & (MASK_ONE << 12)) != 0)
+	value12 /= value;
+
+      if ((mask & (MASK_ONE << 13)) != 0)
+	value13 /= value;
+
+      if ((mask & (MASK_ONE << 14)) != 0)
+	value14 /= value;
+
+      if ((mask & (MASK_ONE << 15)) != 0)
+	value15 /= value;
+
+      if ((mask & (MASK_ONE << 16)) != 0)
+	value16 /= value;
+
+      if ((mask & (MASK_ONE << 17)) != 0)
+	value17 /= value;
+
+      if ((mask & (MASK_ONE << 18)) != 0)
+	value18 /= value;
+
+      if ((mask & (MASK_ONE << 19)) != 0)
+	value19 /= value;
+
+      if ((mask & (MASK_ONE << 20)) != 0)
+	value20 /= value;
+
+      if ((mask & (MASK_ONE << 21)) != 0)
+	value21 /= value;
+
+      if ((mask & (MASK_ONE << 22)) != 0)
+	value22 /= value;
+
+      if ((mask & (MASK_ONE << 23)) != 0)
+	value23 /= value;
+
+      if ((mask & (MASK_ONE << 24)) != 0)
+	value24 /= value;
+
+      if ((mask & (MASK_ONE << 25)) != 0)
+	value25 /= value;
+
+      if ((mask & (MASK_ONE << 26)) != 0)
+	value26 /= value;
+
+      if ((mask & (MASK_ONE << 27)) != 0)
+	value27 /= value;
+
+      if ((mask & (MASK_ONE << 28)) != 0)
+	value28 /= value;
+
+      if ((mask & (MASK_ONE << 29)) != 0)
+	value29 /= value;
+
+      if ((mask & (MASK_ONE << 30)) != 0)
+	value30 /= value;
+
+      if ((mask & (MASK_ONE << 31)) != 0)
+	value31 /= value;
+
+      if ((mask & (MASK_ONE << 32)) != 0)
+	value32 /= value;
+
+      if ((mask & (MASK_ONE << 33)) != 0)
+	value33 /= value;
+
+      if ((mask & (MASK_ONE << 34)) != 0)
+	value34 /= value;
+
+      if ((mask & (MASK_ONE << 35)) != 0)
+	value35 /= value;
+
+      if ((mask & (MASK_ONE << 36)) != 0)
+	value36 /= value;
+
+      if ((mask & (MASK_ONE << 37)) != 0)
+	value37 /= value;
+
+      if ((mask & (MASK_ONE << 38)) != 0)
+	value38 /= value;
+
+      if ((mask & (MASK_ONE << 39)) != 0)
+	value39 /= value;
+    }
+
+  while ((mask = *eq0_mask++) != 0)
+    {
+      eq0 = 0;
+
+      if ((mask & (MASK_ONE <<  0)) != 0)
+	eq0 |= (value00 == ZERO);
+
+      if ((mask & (MASK_ONE <<  1)) != 0)
+	eq0 |= (value01 == ZERO);
+
+      if ((mask & (MASK_ONE <<  2)) != 0)
+	eq0 |= (value02 == ZERO);
+
+      if ((mask & (MASK_ONE <<  3)) != 0)
+	eq0 |= (value03 == ZERO);
+
+      if ((mask & (MASK_ONE <<  4)) != 0)
+	eq0 |= (value04 == ZERO);
+
+      if ((mask & (MASK_ONE <<  5)) != 0)
+	eq0 |= (value05 == ZERO);
+
+      if ((mask & (MASK_ONE <<  6)) != 0)
+	eq0 |= (value06 == ZERO);
+
+      if ((mask & (MASK_ONE <<  7)) != 0)
+	eq0 |= (value07 == ZERO);
+
+      if ((mask & (MASK_ONE <<  8)) != 0)
+	eq0 |= (value08 == ZERO);
+
+      if ((mask & (MASK_ONE <<  9)) != 0)
+	eq0 |= (value09 == ZERO);
+
+      if ((mask & (MASK_ONE << 10)) != 0)
+	eq0 |= (value10 == ZERO);
+
+      if ((mask & (MASK_ONE << 11)) != 0)
+	eq0 |= (value11 == ZERO);
+
+      if ((mask & (MASK_ONE << 12)) != 0)
+	eq0 |= (value12 == ZERO);
+
+      if ((mask & (MASK_ONE << 13)) != 0)
+	eq0 |= (value13 == ZERO);
+
+      if ((mask & (MASK_ONE << 14)) != 0)
+	eq0 |= (value14 == ZERO);
+
+      if ((mask & (MASK_ONE << 15)) != 0)
+	eq0 |= (value15 == ZERO);
+
+      if ((mask & (MASK_ONE << 16)) != 0)
+	eq0 |= (value16 == ZERO);
+
+      if ((mask & (MASK_ONE << 17)) != 0)
+	eq0 |= (value17 == ZERO);
+
+      if ((mask & (MASK_ONE << 18)) != 0)
+	eq0 |= (value18 == ZERO);
+
+      if ((mask & (MASK_ONE << 19)) != 0)
+	eq0 |= (value19 == ZERO);
+
+      if ((mask & (MASK_ONE << 20)) != 0)
+	eq0 |= (value20 == ZERO);
+
+      if ((mask & (MASK_ONE << 21)) != 0)
+	eq0 |= (value21 == ZERO);
+
+      if ((mask & (MASK_ONE << 22)) != 0)
+	eq0 |= (value22 == ZERO);
+
+      if ((mask & (MASK_ONE << 23)) != 0)
+	eq0 |= (value23 == ZERO);
+
+      if ((mask & (MASK_ONE << 24)) != 0)
+	eq0 |= (value24 == ZERO);
+
+      if ((mask & (MASK_ONE << 25)) != 0)
+	eq0 |= (value25 == ZERO);
+
+      if ((mask & (MASK_ONE << 26)) != 0)
+	eq0 |= (value26 == ZERO);
+
+      if ((mask & (MASK_ONE << 27)) != 0)
+	eq0 |= (value27 == ZERO);
+
+      if ((mask & (MASK_ONE << 28)) != 0)
+	eq0 |= (value28 == ZERO);
+
+      if ((mask & (MASK_ONE << 29)) != 0)
+	eq0 |= (value29 == ZERO);
+
+      if ((mask & (MASK_ONE << 30)) != 0)
+	eq0 |= (value30 == ZERO);
+
+      if ((mask & (MASK_ONE << 31)) != 0)
+	eq0 |= (value31 == ZERO);
+
+      if ((mask & (MASK_ONE << 32)) != 0)
+	eq0 |= (value32 == ZERO);
+
+      if ((mask & (MASK_ONE << 33)) != 0)
+	eq0 |= (value33 == ZERO);
+
+      if ((mask & (MASK_ONE << 34)) != 0)
+	eq0 |= (value34 == ZERO);
+
+      if ((mask & (MASK_ONE << 35)) != 0)
+	eq0 |= (value35 == ZERO);
+
+      if ((mask & (MASK_ONE << 36)) != 0)
+	eq0 |= (value36 == ZERO);
+
+      if ((mask & (MASK_ONE << 37)) != 0)
+	eq0 |= (value37 == ZERO);
+
+      if ((mask & (MASK_ONE << 38)) != 0)
+	eq0 |= (value38 == ZERO);
+
+      if ((mask & (MASK_ONE << 39)) != 0)
+	eq0 |= (value39 == ZERO);
+
+      *eq0_ptr++ = eq0;
+    }
+
+  return (  value00 + value01 + value02 + value03 + value04
+	  + value05 + value06 + value07 + value08 + value09
+	  + value10 + value11 + value12 + value13 + value14
+	  + value15 + value16 + value17 + value18 + value19
+	  + value20 + value21 + value22 + value23 + value24
+	  + value25 + value26 + value27 + value28 + value29
+	  + value30 + value31 + value32 + value33 + value34
+	  + value35 + value36 + value37 + value38 + value39);
+}
+
+/* { dg-final { scan-assembler "fadds"    } } */
+/* { dg-final { scan-assembler "fsubs"    } } */
+/* { dg-final { scan-assembler "fmuls"    } } */
+/* { dg-final { scan-assembler "fdivs"    } } */
+/* { dg-final { scan-assembler "fcmpu"    } } */
+/* { dg-final { scan-assembler "xsaddsp"  } } */
+/* { dg-final { scan-assembler "xssubsp"  } } */
+/* { dg-final { scan-assembler "xsmulsp"  } } */
+/* { dg-final { scan-assembler "xsdivsp"  } } */
+/* { dg-final { scan-assembler "xscmpudp" } } */