Merge branches/gcc-5-branch rev 247822.

Change-Id: I6afae07bee582b09ce8b99c952f016bb1c8ac0f8
author: Yvan Roux <yvan.roux@linaro.org> 2017-05-10 11:40:20 +0200
committer: Yvan Roux <yvan.roux@linaro.org> 2017-05-12 10:45:17 +0000
commit: 52fe0bbb9b9dc80e1c616961bbded9a4578804c9 (patch)
tree: 888aa100137a4f9cf5b25a255ee53eeb5a61bfd8
parent: d40160202c70d757138d6a59189b7f40a10b3e15 (diff)
33 files changed, 1146 insertions, 57 deletions
diff --git a/gcc/ChangeLog b/gcc/ChangeLog
index d93c79c28f2..583f500bbe8 100644
--- a/gcc/ChangeLog
+++ b/gcc/ChangeLog
@@ -1,3 +1,126 @@
+2016-05-08  Bill Schmidt  <wschmidt@linux.vnet.ibm.com>
+
+	Backport from mainline
+	PR target/69868 + swap optimization backports
+	* config/rs6000/rs6000.c (swap_web_entry): Enlarge
+	special_handling bitfield.
+	(special_handling_values): Add SH_XXPERMDI, SH_CONCAT, SH_VPERM,
+	and SH_VPERM_COMP.
+	(const_load_sequence_p): New.
+	(load_comp_mask_p): New.
+	(v2df_reduction_p): New.
+	(rtx_is_swappable_p): Perform special handling for XXPERMDI and
+	for reductions.
+	(insn_is_swappable_p): Perform special handling for VEC_CONCAT,
+	V2DF reductions, and various permutes.
+	(adjust_xxpermdi): New.
+	(adjust_concat): New.
+	(find_swapped_load_and_const_vector): New.
+	(replace_const_vector_in_load): New.
+	(adjust_vperm): New.
+	(adjust_vperm_comp): New.
+	(handle_special_swappables): Call adjust_xxpermdi, adjust_concat,
+	adjust_vperm, and adjust_vperm_comp.
+	(replace_swap_with_copy): Allow vector NOT operations to also be
+	replaced by copies.
+	(dump_swap_insn_table): Handle new special handling values.
+
+2017-05-03  Uros Bizjak  <ubizjak@gmail.com>
+
+	Backport from mainline
+	2017-05-01  Uros Bizjak  <ubizjak@gmail.com>
+
+	PR target/68491
+	* config/i386/cpuid.h (__get_cpuid): Always return 0 when
+	__get_cpuid_max returns 0.
+
+2017-04-21  Christophe Lyon  <christophe.lyon@linaro.org>
+
+	Backport from mainline
+	+2015-11-23  Kugan Vivekanandarajah  <kuganv@linaro.org>
+
+	PR target/68390
+	* config/arm/arm.c (arm_function_ok_for_sibcall): Get function type
+	for indirect function call.
+
+2017-04-12  Bill Schmidt  <wschmidt@linux.vnet.ibm.com>
+
+	Backport from mainline
+	2017-04-11  Bill Schmidt  <wschmidt@linux.vnet.ibm.com>
+
+	PR target/80376
+	PR target/80315
+	* config/rs6000/rs6000.c (rs6000_expand_unop_builtin): Return
+	CONST0_RTX (mode) rather than const0_rtx where appropriate.
+	(rs6000_expand_binop_builtin): Likewise.
+	(rs6000_expand_ternop_builtin): Likewise; also add missing
+	vsx_xxpermdi_* variants; also fix typo (arg1 => arg2) for
+	vshasigma built-ins.
+	* doc/extend.texi: Document that vec_xxpermdi's third argument
+	must be a constant.
+
+2017-04-11  Thomas Preud'homme  <thomas.preudhomme@arm.com>
+
+	Backport from GCC 6
+	2017-04-06  Thomas Preud'homme  <thomas.preudhomme@arm.com>
+
+	PR target/80082
+	* config/arm/arm-protos.h (FL_LPAE): Define macro.
+	(FL_FOR_ARCH7VE): Add FL_LPAE.
+	(arm_arch_lpae): Declare extern.
+	* config/arm/arm.c (arm_arch_lpae): Declare.
+	(arm_option_override): Define arm_arch_lpae.
+	* config/arm/arm.h (TARGET_HAVE_LPAE): Redefine in term of
+	arm_arch_lpae.
+
+2017-04-11  Martin Jambor  <mjambor@suse.cz>
+
+	Backport from mainline
+	2017-03-30  Martin Jambor  <mjambor@suse.cz>
+
+        PR ipa/77333
+        * cgraph.h (cgraph_build_function_type_skip_args): Declare.
+        * cgraph.c (redirect_call_stmt_to_callee): Set gimple fntype so that
+        it reflects the signature changes performed at the callee side.
+        * cgraphclones.c (build_function_type_skip_args): Make public, renamed
+        to cgraph_build_function_type_skip_args.
+        (build_function_decl_skip_args): Adjust call to the above function.
+
+2017-04-11  Bin Cheng  <bin.cheng@arm.com>
+
+	Backport from mainline
+	2016-02-10  Bin Cheng  <bin.cheng@arm.com>
+
+	PR tree-optimization/68021
+	* tree-ssa-loop-ivopts.c (get_computation_aff): Set ratio to 1 if
+	when computing the value of biv cand by itself.
+
+2017-04-08  Andreas Tobler  <andreast@gcc.gnu.org>
+
+	Backport from mainline
+	2017-04-08  Andreas Tobler  <andreast@gcc.gnu.org>
+
+	* config/aarch64/aarch64-freebsd.h: Define MCOUNT_NAME.
+	Add comment for WCHAR_T.
+
+2017-04-07  Andreas Tobler  <andreast@gcc.gnu.org>
+
+	Backport from mainline
+	2017-04-07  Andreas Tobler  <andreast@gcc.gnu.org>
+
+	* config/aarch64/aarch64-freebsd.h: Define WCHAR_T.
+
+2017-04-07  Uros Bizjak  <ubizjak@gmail.com>
+
+	Backport from mainline
+	2017-04-06  Uros Bizjak  <ubizjak@gmail.com>
+
+	PR target/79733
+	* config/i386/i386.c (ix86_expand_builtin)
+	<case IX86_BUILTIN_KORTEST{C,Z}16>: Determine insn operand
+	mode from insn data. Convert operands to insn operand mode.
+	Copy operands that don't satisfy insn predicate to a register.
+
 2017-03-30  Peter Bergner  <bergner@vnet.ibm.com>
 
 	Backport from mainline
diff --git a/gcc/DATESTAMP b/gcc/DATESTAMP
index 7c3b057f980..05d952cb71f 100644
--- a/gcc/DATESTAMP
+++ b/gcc/DATESTAMP
@@ -1 +1 @@
-20170404
+20170510
diff --git a/gcc/cgraph.c b/gcc/cgraph.c
index e25ecb3fb96..448e940586f 100644
--- a/gcc/cgraph.c
+++ b/gcc/cgraph.c
@@ -1437,8 +1437,23 @@ cgraph_edge::redirect_call_stmt_to_callee (void)
       if (skip_bounds)
 	new_stmt = chkp_copy_call_skip_bounds (new_stmt);
 
+      tree old_fntype = gimple_call_fntype (e->call_stmt);
       gimple_call_set_fndecl (new_stmt, e->callee->decl);
-      gimple_call_set_fntype (new_stmt, gimple_call_fntype (e->call_stmt));
+      cgraph_node *origin = e->callee;
+      while (origin->clone_of)
+	origin = origin->clone_of;
+
+      if ((origin->former_clone_of
+	   && old_fntype == TREE_TYPE (origin->former_clone_of))
+	  || old_fntype == TREE_TYPE (origin->decl))
+	gimple_call_set_fntype (new_stmt, TREE_TYPE (e->callee->decl));
+      else
+	{
+	  bitmap skip = e->callee->clone.combined_args_to_skip;
+	  tree t = cgraph_build_function_type_skip_args (old_fntype, skip,
+							 false);
+	  gimple_call_set_fntype (new_stmt, t);
+	}
 
       if (gimple_vdef (new_stmt)
 	  && TREE_CODE (gimple_vdef (new_stmt)) == SSA_NAME)
diff --git a/gcc/cgraph.h b/gcc/cgraph.h
index e3689968e96..104b3bbc6db 100644
--- a/gcc/cgraph.h
+++ b/gcc/cgraph.h
@@ -2238,6 +2238,8 @@ tree clone_function_name (tree decl, const char *);
 
 void tree_function_versioning (tree, tree, vec<ipa_replace_map *, va_gc> *,
 			       bool, bitmap, bool, bitmap, basic_block);
+tree cgraph_build_function_type_skip_args (tree orig_type, bitmap args_to_skip,
+					   bool skip_return);
 
 /* In cgraphbuild.c  */
 int compute_call_stmt_bb_frequency (tree, basic_block bb);
diff --git a/gcc/cgraphclones.c b/gcc/cgraphclones.c
index 546cac86564..93668ab9981 100644
--- a/gcc/cgraphclones.c
+++ b/gcc/cgraphclones.c
@@ -191,9 +191,9 @@ cgraph_edge::clone (cgraph_node *n, gcall *call_stmt, unsigned stmt_uid,
 /* Build variant of function type ORIG_TYPE skipping ARGS_TO_SKIP and the
    return value if SKIP_RETURN is true.  */
 
-static tree
-build_function_type_skip_args (tree orig_type, bitmap args_to_skip,
-			       bool skip_return)
+tree
+cgraph_build_function_type_skip_args (tree orig_type, bitmap args_to_skip,
+				      bool skip_return)
 {
   tree new_type = NULL;
   tree args, new_args = NULL;
@@ -258,7 +258,8 @@ build_function_decl_skip_args (tree orig_decl, bitmap args_to_skip,
   if (prototype_p (new_type)
       || (skip_return && !VOID_TYPE_P (TREE_TYPE (new_type))))
     new_type
-      = build_function_type_skip_args (new_type, args_to_skip, skip_return);
+      = cgraph_build_function_type_skip_args (new_type, args_to_skip,
+					      skip_return);
   TREE_TYPE (new_decl) = new_type;
 
   /* For declarations setting DECL_VINDEX (i.e. methods)
diff --git a/gcc/config/aarch64/aarch64-freebsd.h b/gcc/config/aarch64/aarch64-freebsd.h
index b9c1bfdc95f..bd47b3c4dfd 100644
--- a/gcc/config/aarch64/aarch64-freebsd.h
+++ b/gcc/config/aarch64/aarch64-freebsd.h
@@ -91,4 +91,12 @@
 #undef TARGET_BINDS_LOCAL_P
 #define TARGET_BINDS_LOCAL_P default_binds_local_p_2
 
+/* Use the AAPCS type for wchar_t, override the one from
+   config/freebsd.h.  */
+#undef  WCHAR_TYPE
+#define WCHAR_TYPE  "unsigned int"
+
+#undef MCOUNT_NAME
+#define MCOUNT_NAME ".mcount"
+
 #endif  /* GCC_AARCH64_FREEBSD_H */
diff --git a/gcc/config/arm/arm-protos.h b/gcc/config/arm/arm-protos.h
index 45f9d9bc2e3..6d363e66b7b 100644
--- a/gcc/config/arm/arm-protos.h
+++ b/gcc/config/arm/arm-protos.h
@@ -358,7 +358,7 @@ extern bool arm_is_constant_pool_ref (rtx);
 #define FL_STRONG     (1 << 8)	      /* StrongARM */
 #define FL_ARCH5E     (1 << 9)        /* DSP extensions to v5 */
 #define FL_XSCALE     (1 << 10)	      /* XScale */
-/* spare	      (1 << 11)	*/
+#define FL_LPAE       (1 << 11)       /* ARMv7-A LPAE.  */
 #define FL_ARCH6      (1 << 12)       /* Architecture rel 6.  Adds
 					 media instructions.  */
 #define FL_VFPV2      (1 << 13)       /* Vector Floating Point V2.  */
@@ -410,7 +410,7 @@ extern bool arm_is_constant_pool_ref (rtx);
 #define FL_FOR_ARCH6M	(FL_FOR_ARCH6 & ~FL_NOTM)
 #define FL_FOR_ARCH7	((FL_FOR_ARCH6T2 & ~FL_NOTM) | FL_ARCH7)
 #define FL_FOR_ARCH7A	(FL_FOR_ARCH7 | FL_NOTM | FL_ARCH6K)
-#define FL_FOR_ARCH7VE	(FL_FOR_ARCH7A | FL_THUMB_DIV | FL_ARM_DIV)
+#define FL_FOR_ARCH7VE	(FL_FOR_ARCH7A | FL_THUMB_DIV | FL_ARM_DIV | FL_LPAE)
 #define FL_FOR_ARCH7R	(FL_FOR_ARCH7A | FL_THUMB_DIV)
 #define FL_FOR_ARCH7M	(FL_FOR_ARCH7 | FL_THUMB_DIV)
 #define FL_FOR_ARCH7EM  (FL_FOR_ARCH7M | FL_ARCH7EM)
@@ -606,6 +606,9 @@ extern int arm_arch_thumb2;
 extern int arm_arch_arm_hwdiv;
 extern int arm_arch_thumb_hwdiv;
 
+/* Nonzero if this chip supports the Large Physical Address Extension.  */
+extern int arm_arch_lpae;
+
 /* Nonzero if chip disallows volatile memory access in IT block.  */
 extern int arm_arch_no_volatile_ce;
 
diff --git a/gcc/config/arm/arm.c b/gcc/config/arm/arm.c
index 1038ab11040..81341020928 100644
--- a/gcc/config/arm/arm.c
+++ b/gcc/config/arm/arm.c
@@ -890,6 +890,9 @@ int arm_arch_thumb2;
 int arm_arch_arm_hwdiv;
 int arm_arch_thumb_hwdiv;
 
+/* Nonzero if this chip supports the Large Physical Address Extension.  */
+int arm_arch_lpae;
+
 /* Nonzero if chip disallows volatile memory access in IT block.  */
 int arm_arch_no_volatile_ce;
 
@@ -3145,6 +3148,7 @@ arm_option_override (void)
   arm_arch_iwmmxt2 = ARM_FSET_HAS_CPU1 (insn_flags, FL_IWMMXT2);
   arm_arch_thumb_hwdiv = ARM_FSET_HAS_CPU1 (insn_flags, FL_THUMB_DIV);
   arm_arch_arm_hwdiv = ARM_FSET_HAS_CPU1 (insn_flags, FL_ARM_DIV);
+  arm_arch_lpae = ARM_FSET_HAS_CPU1 (insn_flags, FL_LPAE);
   arm_arch_no_volatile_ce = ARM_FSET_HAS_CPU1 (insn_flags, FL_NO_VOLATILE_CE);
   arm_tune_cortex_a9 = (arm_tune == cortexa9) != 0;
   arm_arch_crc = ARM_FSET_HAS_CPU1 (insn_flags, FL_CRC32);
diff --git a/gcc/config/arm/arm.h b/gcc/config/arm/arm.h
index f60ec4f834e..f7d9d99285a 100644
--- a/gcc/config/arm/arm.h
+++ b/gcc/config/arm/arm.h
@@ -255,8 +255,7 @@ extern void (*arm_lang_output_object_attributes_hook)(void);
 #define TARGET_HAVE_LDREX        ((arm_arch6 && TARGET_ARM) || arm_arch7)
 
 /* Nonzero if this chip supports LPAE.  */
-#define TARGET_HAVE_LPAE						\
-  (arm_arch7 && ARM_FSET_HAS_CPU1 (insn_flags, FL_FOR_ARCH7VE))
+#define TARGET_HAVE_LPAE	(arm_arch_lpae)
 
 /* Nonzero if this chip supports ldrex{bh} and strex{bh}.  */
 #define TARGET_HAVE_LDREXBH ((arm_arch6k && TARGET_ARM) || arm_arch7)
diff --git a/gcc/config/i386/cpuid.h b/gcc/config/i386/cpuid.h
index 1ddc6e2e6a8..c7778d989f5 100644
--- a/gcc/config/i386/cpuid.h
+++ b/gcc/config/i386/cpuid.h
@@ -234,8 +234,9 @@ __get_cpuid (unsigned int __level,
 	     unsigned int *__ecx, unsigned int *__edx)
 {
   unsigned int __ext = __level & 0x80000000;
+  unsigned int __maxlevel = __get_cpuid_max (__ext, 0);
 
-  if (__get_cpuid_max (__ext, 0) < __level)
+  if (__maxlevel == 0 || __maxlevel < __level)
     return 0;
 
   __cpuid (__level, *__eax, *__ebx, *__ecx, *__edx);
diff --git a/gcc/config/i386/i386.c b/gcc/config/i386/i386.c
index 18b136a5366..e852e099eab 100644
--- a/gcc/config/i386/i386.c
+++ b/gcc/config/i386/i386.c
@@ -39698,14 +39698,12 @@ rdseed_step:
 
     case IX86_BUILTIN_KORTESTC16:
       icode = CODE_FOR_kortestchi;
-      mode0 = HImode;
-      mode1 = CCCmode;
+      mode3 = CCCmode;
       goto kortest;
 
     case IX86_BUILTIN_KORTESTZ16:
       icode = CODE_FOR_kortestzhi;
-      mode0 = HImode;
-      mode1 = CCZmode;
+      mode3 = CCZmode;
 
     kortest:
       arg0 = CALL_EXPR_ARG (exp, 0); /* Mask reg src1.  */
@@ -39713,19 +39711,32 @@ rdseed_step:
       op0 = expand_normal (arg0);
       op1 = expand_normal (arg1);
 
-      op0 = copy_to_reg (op0);
-      op0 = simplify_gen_subreg (mode0, op0, GET_MODE (op0), 0);
-      op1 = copy_to_reg (op1);
-      op1 = simplify_gen_subreg (mode0, op1, GET_MODE (op1), 0);
+      mode0 = insn_data[icode].operand[0].mode;
+      mode1 = insn_data[icode].operand[1].mode;
+
+      if (GET_MODE (op0) != VOIDmode)
+	op0 = force_reg (GET_MODE (op0), op0);
+
+      op0 = gen_lowpart (mode0, op0);
+
+      if (!insn_data[icode].operand[0].predicate (op0, mode0))
+	op0 = copy_to_mode_reg (mode0, op0);
+
+      if (GET_MODE (op1) != VOIDmode)
+	op1 = force_reg (GET_MODE (op1), op1);
+
+      op1 = gen_lowpart (mode1, op1);
+
+      if (!insn_data[icode].operand[1].predicate (op1, mode1))
+	op1 = copy_to_mode_reg (mode1, op1);
 
       target = gen_reg_rtx (QImode);
-      emit_insn (gen_rtx_SET (mode0, target, const0_rtx));
 
       /* Emit kortest.  */
       emit_insn (GEN_FCN (icode) (op0, op1));
       /* And use setcc to return result from flags.  */
       ix86_expand_setcc (target, EQ,
-			 gen_rtx_REG (mode1, FLAGS_REG), const0_rtx);
+			 gen_rtx_REG (mode3, FLAGS_REG), const0_rtx);
       return target;
 
     case IX86_BUILTIN_GATHERSIV2DF:
diff --git a/gcc/config/rs6000/rs6000.c b/gcc/config/rs6000/rs6000.c
index e10f7edd32f..a3fec739104 100644
--- a/gcc/config/rs6000/rs6000.c
+++ b/gcc/config/rs6000/rs6000.c
@@ -12175,7 +12175,7 @@ rs6000_expand_unop_builtin (enum insn_code icode, tree exp, rtx target)
 	  || INTVAL (op0) < -16)
 	{
 	  error ("argument 1 must be a 5-bit signed literal");
-	  return const0_rtx;
+	  return CONST0_RTX (tmode);
 	}
     }
 
@@ -12278,7 +12278,7 @@ rs6000_expand_binop_builtin (enum insn_code icode, tree exp, rtx target)
 	  || TREE_INT_CST_LOW (arg1) & ~0x1f)
 	{
 	  error ("argument 2 must be a 5-bit unsigned literal");
-	  return const0_rtx;
+	  return CONST0_RTX (tmode);
 	}
     }
 
@@ -12957,13 +12957,18 @@ rs6000_expand_ternop_builtin (enum insn_code icode, tree exp, rtx target)
 	  || TREE_INT_CST_LOW (arg2) & ~0xf)
 	{
 	  error ("argument 3 must be a 4-bit unsigned literal");
-	  return const0_rtx;
+	  return CONST0_RTX (tmode);
 	}
     }
   else if (icode == CODE_FOR_vsx_xxpermdi_v2df
            || icode == CODE_FOR_vsx_xxpermdi_v2di
            || icode == CODE_FOR_vsx_xxpermdi_v2df_be
            || icode == CODE_FOR_vsx_xxpermdi_v2di_be
+           || icode == CODE_FOR_vsx_xxpermdi_v1ti
+           || icode == CODE_FOR_vsx_xxpermdi_v4sf
+           || icode == CODE_FOR_vsx_xxpermdi_v4si
+           || icode == CODE_FOR_vsx_xxpermdi_v8hi
+           || icode == CODE_FOR_vsx_xxpermdi_v16qi
            || icode == CODE_FOR_vsx_xxsldwi_v16qi
            || icode == CODE_FOR_vsx_xxsldwi_v8hi
            || icode == CODE_FOR_vsx_xxsldwi_v4si
@@ -12977,7 +12982,7 @@ rs6000_expand_ternop_builtin (enum insn_code icode, tree exp, rtx target)
 	  || TREE_INT_CST_LOW (arg2) & ~0x3)
 	{
 	  error ("argument 3 must be a 2-bit unsigned literal");
-	  return const0_rtx;
+	  return CONST0_RTX (tmode);
 	}
     }
   else if (icode == CODE_FOR_vsx_set_v2df
@@ -12997,7 +13002,7 @@ rs6000_expand_ternop_builtin (enum insn_code icode, tree exp, rtx target)
 	  || TREE_INT_CST_LOW (arg2) & ~0x1)
 	{
 	  error ("argument 3 must be a 1-bit unsigned literal");
-	  return const0_rtx;
+	  return CONST0_RTX (tmode);
 	}
     }
   else if (icode == CODE_FOR_dfp_ddedpd_dd
@@ -13009,7 +13014,7 @@ rs6000_expand_ternop_builtin (enum insn_code icode, tree exp, rtx target)
 	  || TREE_INT_CST_LOW (arg2) & ~0x3)
 	{
 	  error ("argument 1 must be 0 or 2");
-	  return const0_rtx;
+	  return CONST0_RTX (tmode);
 	}
     }
   else if (icode == CODE_FOR_dfp_denbcd_dd
@@ -13021,7 +13026,7 @@ rs6000_expand_ternop_builtin (enum insn_code icode, tree exp, rtx target)
 	  || TREE_INT_CST_LOW (arg0) & ~0x1)
 	{
 	  error ("argument 1 must be a 1-bit unsigned literal");
-	  return const0_rtx;
+	  return CONST0_RTX (tmode);
 	}
     }
   else if (icode == CODE_FOR_dfp_dscli_dd
@@ -13035,7 +13040,7 @@ rs6000_expand_ternop_builtin (enum insn_code icode, tree exp, rtx target)
 	  || TREE_INT_CST_LOW (arg1) & ~0x3f)
 	{
 	  error ("argument 2 must be a 6-bit unsigned literal");
-	  return const0_rtx;
+	  return CONST0_RTX (tmode);
 	}
     }
   else if (icode == CODE_FOR_crypto_vshasigmaw
@@ -13047,14 +13052,14 @@ rs6000_expand_ternop_builtin (enum insn_code icode, tree exp, rtx target)
       if (TREE_CODE (arg1) != INTEGER_CST || wi::geu_p (arg1, 2))
 	{
 	  error ("argument 2 must be 0 or 1");
-	  return const0_rtx;
+	  return CONST0_RTX (tmode);
 	}
 
       STRIP_NOPS (arg2);
-      if (TREE_CODE (arg2) != INTEGER_CST || wi::geu_p (arg1, 16))
+      if (TREE_CODE (arg2) != INTEGER_CST || wi::geu_p (arg2, 16))
 	{
 	  error ("argument 3 must be in the range 0..15");
-	  return const0_rtx;
+	  return CONST0_RTX (tmode);
 	}
     }
 
@@ -34153,10 +34158,8 @@ emit_fusion_gpr_load (rtx target, rtx mem)
    throughout the computation, we can get correct behavior by replacing
    M with M' as follows:
 
-            { M[i+8]+8 : i < 8, M[i+8] in [0,7] U [16,23]
-    M'[i] = { M[i+8]-8 : i < 8, M[i+8] in [8,15] U [24,31]
-            { M[i-8]+8 : i >= 8, M[i-8] in [0,7] U [16,23]
-            { M[i-8]-8 : i >= 8, M[i-8] in [8,15] U [24,31]
+    M'[i] = { (M[i]+8)%16      : M[i] in [0,15]
+            { ((M[i]+8)%16)+16 : M[i] in [16,31]
 
    This seems promising at first, since we are just replacing one mask
    with another.  But certain masks are preferable to others.  If M
@@ -34174,7 +34177,11 @@ emit_fusion_gpr_load (rtx target, rtx mem)
    mask to be produced by an UNSPEC_LVSL, in which case the mask 
    cannot be known at compile time.  In such a case we would have to
    generate several instructions to compute M' as above at run time,
-   and a cost model is needed again.  */
+   and a cost model is needed again.
+
+   However, when the mask M for an UNSPEC_VPERM is loaded from the
+   constant pool, we can replace M with M' as above at no cost
+   beyond adding a constant pool entry.  */
 
 /* This is based on the union-find logic in web.c.  web_entry_base is
    defined in df.h.  */
@@ -34210,7 +34217,7 @@ class swap_web_entry : public web_entry_base
   /* A nonzero value indicates what kind of special handling for this
      insn is required if doublewords are swapped.  Undefined if
      is_swappable is not set.  */
-  unsigned int special_handling : 3;
+  unsigned int special_handling : 4;
   /* Set if the web represented by this entry cannot be optimized.  */
   unsigned int web_not_optimizable : 1;
   /* Set if this insn should be deleted.  */
@@ -34224,7 +34231,11 @@ enum special_handling_values {
   SH_NOSWAP_LD,
   SH_NOSWAP_ST,
   SH_EXTRACT,
-  SH_SPLAT
+  SH_SPLAT,
+  SH_XXPERMDI,
+  SH_CONCAT,
+  SH_VPERM,
+  SH_VPERM_COMP
 };
 
 /* Union INSN with all insns containing definitions that reach USE.
@@ -34359,6 +34370,164 @@ insn_is_swap_p (rtx insn)
   return 1;
 }
 
+/* Return TRUE if insn is a swap fed by a load from the constant pool.  */
+static bool
+const_load_sequence_p (swap_web_entry *insn_entry, rtx insn)
+{
+  unsigned uid = INSN_UID (insn);
+  if (!insn_entry[uid].is_swap || insn_entry[uid].is_load)
+    return false;
+
+  /* Find the unique use in the swap and locate its def.  If the def
+     isn't unique, punt.  */
+  struct df_insn_info *insn_info = DF_INSN_INFO_GET (insn);
+  df_ref use;
+  FOR_EACH_INSN_INFO_USE (use, insn_info)
+    {
+      struct df_link *def_link = DF_REF_CHAIN (use);
+      if (!def_link || def_link->next)
+	return false;
+
+      rtx def_insn = DF_REF_INSN (def_link->ref);
+      unsigned uid2 = INSN_UID (def_insn);
+      if (!insn_entry[uid2].is_load || !insn_entry[uid2].is_swap)
+	return false;
+
+      rtx body = PATTERN (def_insn);
+      if (GET_CODE (body) != SET
+	  || GET_CODE (SET_SRC (body)) != VEC_SELECT
+	  || GET_CODE (XEXP (SET_SRC (body), 0)) != MEM)
+	return false;
+
+      rtx mem = XEXP (SET_SRC (body), 0);
+      rtx base_reg = XEXP (mem, 0);
+
+      if (!REG_P (base_reg))
+	{
+	  gcc_assert (GET_CODE (base_reg) == PLUS);
+	  base_reg = XEXP (base_reg, 0);
+	}
+
+      df_ref base_use;
+      rtx_insn *tocrel_insn = 0;
+      insn_info = DF_INSN_INFO_GET (def_insn);
+      FOR_EACH_INSN_INFO_USE (base_use, insn_info)
+	{
+	  if (!rtx_equal_p (DF_REF_REG (base_use), base_reg))
+	    continue;
+
+	  struct df_link *base_def_link = DF_REF_CHAIN (base_use);
+	  if (!base_def_link || base_def_link->next)
+	    return false;
+
+	  tocrel_insn = DF_REF_INSN (base_def_link->ref);
+	  rtx tocrel_body = PATTERN (tocrel_insn);
+	  rtx base, offset;
+	  if (GET_CODE (tocrel_body) != SET)
+	    return false;
+	  /* There is an extra level of indirection for small/large
+	     code models.  */
+	  rtx tocrel_expr = SET_SRC (tocrel_body);
+	  if (GET_CODE (tocrel_expr) == MEM)
+	    tocrel_expr = XEXP (tocrel_expr, 0);
+	  if (!toc_relative_expr_p (tocrel_expr, false))
+	    return false;
+	  split_const (XVECEXP (tocrel_base, 0, 0), &base, &offset);
+	  if (GET_CODE (base) != SYMBOL_REF || !CONSTANT_POOL_ADDRESS_P (base))
+	    return false;
+	  rtx const_vector = get_pool_constant (base);
+	  /* With the extra indirection, get_pool_constant will produce the
+	     real constant from the reg_equal expression, so get the real
+	     constant.  It's still possible that the reg_equal doesn't
+	     represent a constant, so punt in that case.  */
+	  if (GET_CODE (const_vector) == SYMBOL_REF)
+	    {
+	      if (!CONSTANT_POOL_ADDRESS_P (const_vector))
+		return false;
+	      const_vector = get_pool_constant (const_vector);
+	    }
+	  if (GET_CODE (const_vector) != CONST_VECTOR)
+	    return false;
+	}
+      gcc_assert (tocrel_insn);
+    }
+  return true;
+}
+
+/* Return TRUE if insn is a swap fed by a load from the constant pool
+   and subsequently complemented.  */
+static bool
+load_comp_mask_p (swap_web_entry *insn_entry, rtx insn)
+{
+  rtx body = PATTERN (insn);
+  if (GET_CODE (body) != SET)
+    return false;
+  rtx ior = SET_SRC (body);
+  if (GET_CODE (ior) != IOR)
+    return false;
+  rtx not1 = XEXP (ior, 0);
+  rtx not2 = XEXP (ior, 1);
+  if (GET_CODE (not1) != NOT || GET_CODE (not2) != NOT)
+    return false;
+  rtx reg1 = XEXP (not1, 0);
+  rtx reg2 = XEXP (not2, 0);
+  if (!REG_P (reg1) || !rtx_equal_p (reg1, reg2))
+    return false;
+
+  /* We have a VNOR operation.  Find the def of its source reg and
+     check for the remaining conditions.  */
+  struct df_insn_info *insn_info = DF_INSN_INFO_GET (insn);
+  df_ref use;
+  FOR_EACH_INSN_INFO_USE (use, insn_info)
+    {
+      struct df_link *def_link = DF_REF_CHAIN (use);
+      if (!def_link || def_link->next)
+	return false;
+      rtx def_insn = DF_REF_INSN (def_link->ref);
+      return const_load_sequence_p (insn_entry, def_insn);
+    }
+
+  gcc_unreachable ();
+}
+
+/* Return TRUE iff OP matches a V2DF reduction pattern.  See the
+   definition of vsx_reduc_<VEC_reduc_name>_v2df in vsx.md.  */
+static bool
+v2df_reduction_p (rtx op)
+{
+  if (GET_MODE (op) != V2DFmode)
+    return false;
+  
+  enum rtx_code code = GET_CODE (op);
+  if (code != PLUS && code != SMIN && code != SMAX)
+    return false;
+
+  rtx concat = XEXP (op, 0);
+  if (GET_CODE (concat) != VEC_CONCAT)
+    return false;
+
+  rtx select0 = XEXP (concat, 0);
+  rtx select1 = XEXP (concat, 1);
+  if (GET_CODE (select0) != VEC_SELECT || GET_CODE (select1) != VEC_SELECT)
+    return false;
+
+  rtx reg0 = XEXP (select0, 0);
+  rtx reg1 = XEXP (select1, 0);
+  if (!rtx_equal_p (reg0, reg1) || !REG_P (reg0))
+    return false;
+
+  rtx parallel0 = XEXP (select0, 1);
+  rtx parallel1 = XEXP (select1, 1);
+  if (GET_CODE (parallel0) != PARALLEL || GET_CODE (parallel1) != PARALLEL)
+    return false;
+
+  if (!rtx_equal_p (XVECEXP (parallel0, 0, 0), const1_rtx)
+      || !rtx_equal_p (XVECEXP (parallel1, 0, 0), const0_rtx))
+    return false;
+
+  return true;
+}
+
 /* Return 1 iff OP is an operand that will not be affected by having
    vector doublewords swapped in memory.  */
 static unsigned int
@@ -34416,6 +34585,22 @@ rtx_is_swappable_p (rtx op, unsigned int *special)
 	  *special = SH_EXTRACT;
 	  return 1;
 	}
+      /* An XXPERMDI is ok if we adjust the lanes.  Note that if the
+	 XXPERMDI is a swap operation, it will be identified by
+	 insn_is_swap_p and therefore we won't get here.  */
+      else if (GET_CODE (XEXP (op, 0)) == VEC_CONCAT
+	       && (GET_MODE (XEXP (op, 0)) == V4DFmode
+		   || GET_MODE (XEXP (op, 0)) == V4DImode)
+	       && GET_CODE ((parallel = XEXP (op, 1))) == PARALLEL
+	       && XVECLEN (parallel, 0) == 2
+	       && GET_CODE (XVECEXP (parallel, 0, 0)) == CONST_INT
+	       && GET_CODE (XVECEXP (parallel, 0, 1)) == CONST_INT)
+	{
+	  *special = SH_XXPERMDI;
+	  return 1;
+	}
+      else if (v2df_reduction_p (op))
+	return 1;
       else
 	return 0;
 
@@ -34480,6 +34665,9 @@ rtx_is_swappable_p (rtx op, unsigned int *special)
 	  case UNSPEC_VSPLT_DIRECT:
 	    *special = SH_SPLAT;
 	    return 1;
+	  case UNSPEC_REDUC_PLUS:
+	  case UNSPEC_REDUC:
+	    return 1;
 	  }
       }
 
@@ -34593,6 +34781,59 @@ insn_is_swappable_p (swap_web_entry *insn_entry, rtx insn,
       return 1;
     }
 
+  /* A concatenation of two doublewords is ok if we reverse the
+     order of the inputs.  */
+  if (GET_CODE (body) == SET
+      && GET_CODE (SET_SRC (body)) == VEC_CONCAT
+      && (GET_MODE (SET_SRC (body)) == V2DFmode
+	  || GET_MODE (SET_SRC (body)) == V2DImode))
+    {
+      *special = SH_CONCAT;
+      return 1;
+    }
+
+  /* V2DF reductions are always swappable.  */
+  if (GET_CODE (body) == PARALLEL)
+    {
+      rtx expr = XVECEXP (body, 0, 0);
+      if (GET_CODE (expr) == SET
+	  && v2df_reduction_p (SET_SRC (expr)))
+	return 1;
+    }
+
+  /* An UNSPEC_VPERM is ok if the mask operand is loaded from the
+     constant pool, and optionally complemented afterwards.  */
+  if (GET_CODE (body) == SET
+      && GET_CODE (SET_SRC (body)) == UNSPEC
+      && XINT (SET_SRC (body), 1) == UNSPEC_VPERM
+      && XVECLEN (SET_SRC (body), 0) == 3
+      && GET_CODE (XVECEXP (SET_SRC (body), 0, 2)) == REG)
+    {
+      rtx mask_reg = XVECEXP (SET_SRC (body), 0, 2);
+      struct df_insn_info *insn_info = DF_INSN_INFO_GET (insn);
+      df_ref use;
+      FOR_EACH_INSN_INFO_USE (use, insn_info)
+	if (rtx_equal_p (DF_REF_REG (use), mask_reg))
+	  {
+	    struct df_link *def_link = DF_REF_CHAIN (use);
+	    /* Punt if multiple definitions for this reg.  */
+	    if (def_link && !def_link->next &&
+		const_load_sequence_p (insn_entry,
+				       DF_REF_INSN (def_link->ref)))
+	      {
+		*special = SH_VPERM;
+		return 1;
+	      }
+	    else if (def_link && !def_link->next &&
+		     load_comp_mask_p (insn_entry,
+				       DF_REF_INSN (def_link->ref)))
+	      {
+		*special = SH_VPERM_COMP;
+		return 1;
+	      }
+	  }
+    }
+
   /* Otherwise check the operands for vector lane violations.  */
   return rtx_is_swappable_p (body, special);
 }
@@ -34882,6 +35123,235 @@ adjust_splat (rtx_insn *insn)
     fprintf (dump_file, "Changing lane for splat %d\n", INSN_UID (insn));
 }
 
+/* Given OP that contains an XXPERMDI operation (that is not a doubleword
+   swap), reverse the order of the source operands and adjust the indices
+   of the source lanes to account for doubleword reversal.  */
+static void
+adjust_xxpermdi (rtx_insn *insn)
+{
+  rtx set = PATTERN (insn);
+  rtx select = XEXP (set, 1);
+  rtx concat = XEXP (select, 0);
+  rtx src0 = XEXP (concat, 0);
+  XEXP (concat, 0) = XEXP (concat, 1);
+  XEXP (concat, 1) = src0;
+  rtx parallel = XEXP (select, 1);
+  int lane0 = INTVAL (XVECEXP (parallel, 0, 0));
+  int lane1 = INTVAL (XVECEXP (parallel, 0, 1));
+  int new_lane0 = 3 - lane1;
+  int new_lane1 = 3 - lane0;
+  XVECEXP (parallel, 0, 0) = GEN_INT (new_lane0);
+  XVECEXP (parallel, 0, 1) = GEN_INT (new_lane1);
+  INSN_CODE (insn) = -1; /* Force re-recognition.  */
+  df_insn_rescan (insn);
+
+  if (dump_file)
+    fprintf (dump_file, "Changing lanes for xxpermdi %d\n", INSN_UID (insn));
+}
+
+/* Given OP that contains a VEC_CONCAT operation of two doublewords,
+   reverse the order of those inputs.  */
+static void
+adjust_concat (rtx_insn *insn)
+{
+  rtx set = PATTERN (insn);
+  rtx concat = XEXP (set, 1);
+  rtx src0 = XEXP (concat, 0);
+  XEXP (concat, 0) = XEXP (concat, 1);
+  XEXP (concat, 1) = src0;
+  INSN_CODE (insn) = -1; /* Force re-recognition.  */
+  df_insn_rescan (insn);
+
+  if (dump_file)
+    fprintf (dump_file, "Reversing inputs for concat %d\n", INSN_UID (insn));
+}
+
+/* We previously determined that a use of MASK_REG in INSN was fed by a
+   swap of a swapping load of a TOC-relative constant pool symbol.  Return
+   the CONST_VECTOR that was loaded, as well as the LOAD_INSN (by
+   reference).  */
+static rtx
+find_swapped_load_and_const_vector (rtx_insn *insn, rtx_insn **load_insn,
+				    rtx mask_reg)
+{
+  /* Find the swap.  */
+  struct df_insn_info *insn_info = DF_INSN_INFO_GET (insn);
+  df_ref use;
+  rtx_insn *swap_insn = 0;
+  FOR_EACH_INSN_INFO_USE (use, insn_info)
+    if (rtx_equal_p (DF_REF_REG (use), mask_reg))
+      {
+	struct df_link *def_link = DF_REF_CHAIN (use);
+	gcc_assert (def_link && !def_link->next);
+	swap_insn = DF_REF_INSN (def_link->ref);
+	break;
+      }
+  gcc_assert (swap_insn);
+  
+  /* Find the load.  */
+  insn_info = DF_INSN_INFO_GET (swap_insn);
+  FOR_EACH_INSN_INFO_USE (use, insn_info)
+    {
+      struct df_link *def_link = DF_REF_CHAIN (use);
+      gcc_assert (def_link && !def_link->next);
+      *load_insn = DF_REF_INSN (def_link->ref);
+      break;
+    }
+  gcc_assert (*load_insn);
+
+  /* Find the TOC-relative symbol access.  */
+  insn_info = DF_INSN_INFO_GET (*load_insn);
+  rtx_insn *tocrel_insn = 0;
+  FOR_EACH_INSN_INFO_USE (use, insn_info)
+    {
+      struct df_link *def_link = DF_REF_CHAIN (use);
+      gcc_assert (def_link && !def_link->next);
+      tocrel_insn = DF_REF_INSN (def_link->ref);
+      break;
+    }
+  gcc_assert (tocrel_insn);
+
+  /* Find the embedded CONST_VECTOR.  We have to call toc_relative_expr_p
+     to set tocrel_base; otherwise it would be unnecessary as we've
+     already established it will return true.  */
+  rtx base, offset;
+  rtx tocrel_expr = SET_SRC (PATTERN (tocrel_insn));
+  /* There is an extra level of indirection for small/large code models.  */
+  if (GET_CODE (tocrel_expr) == MEM)
+    tocrel_expr = XEXP (tocrel_expr, 0);
+  if (!toc_relative_expr_p (tocrel_expr, false))
+    gcc_unreachable ();
+  split_const (XVECEXP (tocrel_base, 0, 0), &base, &offset);
+  rtx const_vector = get_pool_constant (base);
+  /* With the extra indirection, get_pool_constant will produce the
+     real constant from the reg_equal expression, so get the real
+     constant.  */
+  if (GET_CODE (const_vector) == SYMBOL_REF)
+    const_vector = get_pool_constant (const_vector);
+  gcc_assert (GET_CODE (const_vector) == CONST_VECTOR);
+
+  return const_vector;
+}
+
+/* Create a new CONST_VECTOR from NEW_MASK, and replace the MEM in
+   LOAD_INSN with a MEM referencing that CONST_VECTOR.  */
+static void
+replace_const_vector_in_load (rtx_insn *load_insn, unsigned int *new_mask)
+{
+  unsigned int i;
+  rtx vals = gen_rtx_PARALLEL (V16QImode, rtvec_alloc (16));
+  for (i = 0; i < 16; ++i)
+    XVECEXP (vals, 0, i) = GEN_INT (new_mask[i]);
+  rtx new_const_vector = gen_rtx_CONST_VECTOR (V16QImode, XVEC (vals, 0));
+  rtx new_mem = force_const_mem (V16QImode, new_const_vector);
+  /* This gives us a MEM whose base operand is a SYMBOL_REF, which we
+     can't recognize.  Force the SYMBOL_REF into a register.  */
+  if (!REG_P (XEXP (new_mem, 0))) {
+    rtx base_reg = force_reg (Pmode, XEXP (new_mem, 0));
+    XEXP (new_mem, 0) = base_reg;
+    /* Move the newly created insn ahead of the load insn.  */
+    rtx_insn *force_insn = get_last_insn ();
+    remove_insn (force_insn);
+    rtx_insn *before_load_insn = PREV_INSN (load_insn);
+    add_insn_after (force_insn, before_load_insn, BLOCK_FOR_INSN (load_insn));
+    df_insn_rescan (before_load_insn);
+    df_insn_rescan (force_insn);
+  }
+
+  XEXP (SET_SRC (PATTERN (load_insn)), 0) = new_mem;
+  INSN_CODE (load_insn) = -1; /* Force re-recognition.  */
+  df_insn_rescan (load_insn);
+}
+
+/* Given an UNSPEC_VPERM insn, modify the mask loaded from the
+   constant pool to reflect swapped doublewords.  */
+static void
+adjust_vperm (rtx_insn *insn)
+{
+  /* We previously determined that the UNSPEC_VPERM was fed by a
+     swap of a swapping load of a TOC-relative constant pool symbol.
+     Find the MEM in the swapping load and replace it with a MEM for
+     the adjusted mask constant.  */
+  rtx set = PATTERN (insn);
+  rtx mask_reg = XVECEXP (SET_SRC (set), 0, 2);
+  rtx_insn *load_insn = 0;
+  rtx const_vector = find_swapped_load_and_const_vector (insn, &load_insn,
+							 mask_reg);
+
+  /* Create an adjusted mask from the initial mask.  */
+  unsigned int new_mask[16], i, val;
+  for (i = 0; i < 16; ++i) {
+    val = INTVAL (XVECEXP (const_vector, 0, i));
+    if (val < 16)
+      new_mask[i] = (val + 8) % 16;
+    else
+      new_mask[i] = ((val + 8) % 16) + 16;
+  }
+
+  /* Update the load instruction to load the new constant vector.  */
+  replace_const_vector_in_load (load_insn, new_mask);
+
+  if (dump_file)
+    fprintf (dump_file, "Adjusting mask for vperm %d\n", INSN_UID (insn));
+}
+
+/* Given an UNSPEC_VPERM insn fed by a complement operation, modify
+   the mask loaded from the constant pool to reflect swapped doublewords
+   and the complement.  */
+static void
+adjust_vperm_comp (rtx_insn *insn, swap_web_entry *insn_entry)
+{
+  /* We previously determined that the UNSPEC_VPERM was fed by a
+     VNOR, itself fed by a swap of a swapping load of a TOC-relative
+     constant pool symbol.  Find the MEM in the swapping load and
+     replace it with a MEM for the adjusted mask constant.  */
+  rtx set = PATTERN (insn);
+  rtx mask_reg = XVECEXP (SET_SRC (set), 0, 2);
+
+  /* Find the VNOR and mark it for removal.  */
+  struct df_insn_info *insn_info = DF_INSN_INFO_GET (insn);
+  df_ref use;
+  rtx_insn *vnor_insn = 0;
+  FOR_EACH_INSN_INFO_USE (use, insn_info)
+    if (rtx_equal_p (DF_REF_REG (use), mask_reg))
+      {
+	struct df_link *def_link = DF_REF_CHAIN (use);
+	gcc_assert (def_link && !def_link->next);
+	vnor_insn = DF_REF_INSN (def_link->ref);
+	break;
+      }
+  gcc_assert (vnor_insn);
+
+  unsigned uid = INSN_UID (vnor_insn);
+  insn_entry[uid].will_delete = 1;
+
+  /* Identify the original mask register from the VNOR.  */
+  set = PATTERN (vnor_insn);
+  mask_reg = XEXP (XEXP (SET_SRC (set), 0), 0);
+
+  /* Find the load insn and the CONST_VECTOR that it loads.  */
+  rtx_insn *load_insn = 0;
+  rtx const_vector
+    = find_swapped_load_and_const_vector (vnor_insn, &load_insn, mask_reg);
+
+  /* Create an adjusted mask from the initial mask, which reflects
+     both the effect of the swap and of the complement. */
+  unsigned int new_mask[16], i, val;
+  for (i = 0; i < 16; ++i) {
+    val = 31 - INTVAL (XVECEXP (const_vector, 0, i));
+    if (val < 16)
+      new_mask[i] = (val + 8) % 16;
+    else
+      new_mask[i] = ((val + 8) % 16) + 16;
+  }
+
+  /* Update the load instruction to load the new constant vector.  */
+  replace_const_vector_in_load (load_insn, new_mask);
+
+  if (dump_file)
+    fprintf (dump_file, "Adjusting mask for vperm %d\n", INSN_UID (insn));
+}
+
 /* The insn described by INSN_ENTRY[I] can be swapped, but only
    with special handling.  Take care of that here.  */
 static void
@@ -34928,17 +35398,38 @@ handle_special_swappables (swap_web_entry *insn_entry, unsigned i)
       /* Change the lane on a direct-splat operation.  */
       adjust_splat (insn);
       break;
+    case SH_XXPERMDI:
+      /* Change the lanes on an XXPERMDI operation.  */
+      adjust_xxpermdi (insn);
+      break;
+    case SH_CONCAT:
+      /* Reverse the order of a concatenation operation.  */
+      adjust_concat (insn);
+      break;
+    case SH_VPERM:
+      /* Change the mask loaded from the constant pool for a VPERM.  */
+      adjust_vperm (insn);
+      break;
+    case SH_VPERM_COMP:
+      /* Change the mask loaded from the constant pool and
+	 complemented for a vec_perm built-in.  */
+      adjust_vperm_comp (insn, insn_entry);
     }
 }
 
 /* Find the insn from the Ith table entry, which is known to be a
-   register swap Y = SWAP(X).  Replace it with a copy Y = X.  */
+   register swap Y = SWAP(X).  Replace it with a copy Y = X.
+   There is now one exception to this.  The table entry may also
+   refer to Y = VNOR(X, X).  */
 static void
 replace_swap_with_copy (swap_web_entry *insn_entry, unsigned i)
 {
   rtx_insn *insn = insn_entry[i].insn;
   rtx body = PATTERN (insn);
-  rtx src_reg = XEXP (SET_SRC (body), 0);
+  enum rtx_code code = GET_CODE (SET_SRC (body));
+  rtx src_reg = (code == IOR
+		 ? XEXP (XEXP (SET_SRC (body), 0), 0)
+		 : XEXP (SET_SRC (body), 0));
   rtx copy = gen_rtx_SET (VOIDmode, SET_DEST (body), src_reg);
   rtx_insn *new_insn = emit_insn_before (copy, insn);
   set_block_for_insn (new_insn, BLOCK_FOR_INSN (insn));
@@ -34947,7 +35438,10 @@ replace_swap_with_copy (swap_web_entry *insn_entry, unsigned i)
   if (dump_file)
     {
       unsigned int new_uid = INSN_UID (new_insn);
-      fprintf (dump_file, "Replacing swap %d with copy %d\n", i, new_uid);
+      if (code == IOR)
+	fprintf (dump_file, "Replacing vnor %d with copy %d\n", i, new_uid);
+      else
+	fprintf (dump_file, "Replacing swap %d with copy %d\n", i, new_uid);
     }
 
   df_insn_delete (insn);
@@ -35000,6 +35494,14 @@ dump_swap_insn_table (swap_web_entry *insn_entry)
 	      fputs ("special:extract ", dump_file);
 	    else if (insn_entry[i].special_handling == SH_SPLAT)
 	      fputs ("special:splat ", dump_file);
+	    else if (insn_entry[i].special_handling == SH_XXPERMDI)
+	      fputs ("special:xxpermdi ", dump_file);
+	    else if (insn_entry[i].special_handling == SH_CONCAT)
+	      fputs ("special:concat ", dump_file);
+	    else if (insn_entry[i].special_handling == SH_VPERM)
+	      fputs ("special:vperm ", dump_file);
+	    else if (insn_entry[i].special_handling == SH_VPERM_COMP)
+	      fputs ("special:vperm_c ", dump_file);
 	  }
 	if (insn_entry[i].web_not_optimizable)
 	  fputs ("unoptimizable ", dump_file);
diff --git a/gcc/doc/extend.texi b/gcc/doc/extend.texi
index b306bb7fd1c..61b2d163321 100644
--- a/gcc/doc/extend.texi
+++ b/gcc/doc/extend.texi
@@ -15321,20 +15321,21 @@ void vec_vsx_st (vector bool char, int, vector bool char *);
 void vec_vsx_st (vector bool char, int, unsigned char *);
 void vec_vsx_st (vector bool char, int, signed char *);
 
-vector double vec_xxpermdi (vector double, vector double, int);
-vector float vec_xxpermdi (vector float, vector float, int);
-vector long long vec_xxpermdi (vector long long, vector long long, int);
+vector double vec_xxpermdi (vector double, vector double, const int);
+vector float vec_xxpermdi (vector float, vector float, const int);
+vector long long vec_xxpermdi (vector long long, vector long long, const int);
 vector unsigned long long vec_xxpermdi (vector unsigned long long,
-                                        vector unsigned long long, int);
-vector int vec_xxpermdi (vector int, vector int, int);
+                                        vector unsigned long long, const int);
+vector int vec_xxpermdi (vector int, vector int, const int);
 vector unsigned int vec_xxpermdi (vector unsigned int,
-                                  vector unsigned int, int);
-vector short vec_xxpermdi (vector short, vector short, int);
+                                  vector unsigned int, const int);
+vector short vec_xxpermdi (vector short, vector short, const int);
 vector unsigned short vec_xxpermdi (vector unsigned short,
-                                    vector unsigned short, int);
-vector signed char vec_xxpermdi (vector signed char, vector signed char, int);
+                                    vector unsigned short, const int);
+vector signed char vec_xxpermdi (vector signed char, vector signed char,
+                                 const int);
 vector unsigned char vec_xxpermdi (vector unsigned char,
-                                   vector unsigned char, int);
+                                   vector unsigned char, const int);
 
 vector double vec_xxsldi (vector double, vector double, int);
 vector float vec_xxsldi (vector float, vector float, int);
diff --git a/gcc/fortran/ChangeLog b/gcc/fortran/ChangeLog
index 9dc753b567c..b3f4403ba9c 100644
--- a/gcc/fortran/ChangeLog
+++ b/gcc/fortran/ChangeLog
@@ -1,3 +1,28 @@
+2017-05-01  Janus Weil  <janus@gcc.gnu.org>
+
+	Backport from trunk
+	PR fortran/80392
+	* trans-types.c (gfc_get_derived_type): Prevent an infinite loop when
+	building a derived type that includes a procedure pointer component
+	with a polymorphic result.
+
+2017-04-21  Janus Weil  <janus@gcc.gnu.org>
+
+	Backport from trunk
+	PR fortran/80361
+	* class.c (generate_finalization_wrapper): Give the finalization wrapper
+	the recursive attribute.
+
+2017-04-14  Dominique d'Humieres  <dominiq@lps.ens.fr>
+
+	Backport from trunk
+	2015-11-18  Steven G. Kargl  <kargl@gcc.gnu.org>
+
+	PR fortran/59910
+	PR fortran/80388
+	* primary.c (gfc_match_structure_constructor): Reduce a structure
+	constructor in a DATA statement.
+
 2017-02-07  Steven G. Kargl  <kargl@gcc.gnu.org>
 
 	* trans-types.c	(gfc_get_int_kind_from_width_isofortranen):  Choose
diff --git a/gcc/fortran/class.c b/gcc/fortran/class.c
index 4ab96524b24..c8f67d53820 100644
--- a/gcc/fortran/class.c
+++ b/gcc/fortran/class.c
@@ -1599,6 +1599,7 @@ generate_finalization_wrapper (gfc_symbol *derived, gfc_namespace *ns,
   final->attr.flavor = FL_PROCEDURE;
   final->attr.function = 1;
   final->attr.pure = 0;
+  final->attr.recursive = 1;
   final->result = final;
   final->ts.type = BT_INTEGER;
   final->ts.kind = 4;
diff --git a/gcc/fortran/primary.c b/gcc/fortran/primary.c
index 44b9901cf67..d2fe8bc0eac 100644
--- a/gcc/fortran/primary.c
+++ b/gcc/fortran/primary.c
@@ -2657,6 +2657,12 @@ gfc_match_structure_constructor (gfc_symbol *sym, gfc_expr **result)
        return MATCH_ERROR;
      }
 
+  /* If a structure constructor is in a DATA statement, then each entity
+     in the structure constructor must be a constant.  Try to reduce the
+     expression here.  */
+  if (gfc_in_match_data ())
+    gfc_reduce_init_expr (e);
+
    *result = e;
    return MATCH_YES;
 }
diff --git a/gcc/fortran/trans-types.c b/gcc/fortran/trans-types.c
index 5d59ef52f78..153914e753f 100644
--- a/gcc/fortran/trans-types.c
+++ b/gcc/fortran/trans-types.c
@@ -2554,9 +2554,10 @@ gfc_get_derived_type (gfc_symbol * derived)
 	 the same as derived, by forcing the procedure pointer component to
 	 be built as if the explicit interface does not exist.  */
       if (c->attr.proc_pointer
-	  && ((c->ts.type != BT_DERIVED && c->ts.type != BT_CLASS)
-	       || (c->ts.u.derived
-		   && !gfc_compare_derived_types (derived, c->ts.u.derived))))
+	  && (c->ts.type != BT_DERIVED || (c->ts.u.derived
+		    && !gfc_compare_derived_types (derived, c->ts.u.derived)))
+	  && (c->ts.type != BT_CLASS || (CLASS_DATA (c)->ts.u.derived
+		    && !gfc_compare_derived_types (derived, CLASS_DATA (c)->ts.u.derived))))
 	field_type = gfc_get_ppc_type (c);
       else if (c->attr.proc_pointer && derived->backend_decl)
 	{
diff --git a/gcc/testsuite/ChangeLog b/gcc/testsuite/ChangeLog
index 14932534994..554990f0ddb 100644
--- a/gcc/testsuite/ChangeLog
+++ b/gcc/testsuite/ChangeLog
@@ -1,3 +1,76 @@
+2016-05-08  Bill Schmidt  <wschmidt@linux.vnet.ibm.com>
+
+	Backport from mainline
+	PR target/69868 + swap optimization backports
+	* gcc.target/powerpc/swaps-p8-20.c: New.
+	* gcc.target/powerpc/swaps-p8-23.c: New.
+	* gcc.target/powerpc/swaps-p8-24.c: New.
+
+2017-05-01  Janus Weil  <janus@gcc.gnu.org>
+
+	Backport from trunk
+	PR fortran/80392
+	* gfortran.dg/proc_ptr_comp_49.f90: New test case.
+
+2017-04-21  Janus Weil  <janus@gcc.gnu.org>
+
+	Backport from trunk
+	PR fortran/80361
+	* gfortran.dg/class_62.f90: New test case.
+
+2017-04-21  Christophe Lyon  <christophe.lyon@linaro.org>
+
+	Backport from mainline
+	2015-11-23  Kugan Vivekanandarajah  <kuganv@linaro.org>
+
+	PR target/68390
+	* gcc.c-torture/execute/pr68390.c: New test.
+
+2017-04-14  Dominique d'Humieres  <dominiq@lps.ens.fr>
+
+	Backport from trunk
+	2015-11-18  Steven G. Kargl  <kargl@gcc.gnu.org>
+
+	PR fortran/59910
+	PR fortran/80388
+	* gfortran.dg/pr59910.f90: New test.
+
+2017-04-11  Thomas Preud'homme  <thomas.preudhomme@arm.com>
+
+	Backport from GCC 6
+	2017-04-06  Thomas Preud'homme  <thomas.preudhomme@arm.com>
+
+	PR target/80082
+	* gcc.target/arm/atomic_loaddi_10.c: New testcase.
+	* gcc.target/arm/atomic_loaddi_11.c: Likewise.
+
+2017-04-11  Martin Jambor  <mjambor@suse.cz>
+
+	Backport from mainline
+	2017-03-30  Martin Jambor  <mjambor@suse.cz>
+
+        PR ipa/77333
+        * g++.dg/ipa/pr77333.C: New test.
+
+2017-04-11  Bin Cheng  <bin.cheng@arm.com>
+
+	PR tree-optimization/80345
+	* gcc.c-torture/compile/pr80345.c
+
+	Backport from mainline
+	2016-02-10  Bin Cheng  <bin.cheng@arm.com>
+
+	PR tree-optimization/68021
+	* gcc.dg/tree-ssa/pr68021.c: New test.
+
+2017-04-07  Uros Bizjak  <ubizjak@gmail.com>
+
+	Backport from mainline
+	2017-04-06  Uros Bizjak  <ubizjak@gmail.com>
+
+	PR target/79733
+	* gcc.target/i386/pr79733.c: New test.
+
 2017-04-03  Peter Bergner  <bergner@vnet.ibm.com>
 
 	Backport from mainline
diff --git a/gcc/testsuite/g++.dg/ipa/pr77333.C b/gcc/testsuite/g++.dg/ipa/pr77333.C
new file mode 100644
index 00000000000..1ef997f7a54
--- /dev/null
+++ b/gcc/testsuite/g++.dg/ipa/pr77333.C
@@ -0,0 +1,65 @@
+// { dg-do run }
+// { dg-options "-O2 -fno-ipa-sra" }
+
+volatile int global;
+int __attribute__((noinline, noclone))
+get_data (int i)
+{
+  global = i;
+  return i;
+}
+
+typedef int array[32];
+
+namespace {
+
+char buf[512];
+
+class A
+{
+public:
+  int field;
+  char *s;
+
+  A() : field(223344)
+  {
+    s = buf;
+  }
+
+  int __attribute__((noinline))
+  foo (int a, int b, int c, int d, int e, int f, int g, int h, int i, int j,
+       int k, int l, int m, int n, int o, int p, int q, int r, int s, int t)
+  {
+    global = a+b+c+d+e+f+g+h+i+j+k+l+m+n+o+p+q+r+s+t;
+    return global;
+  }
+
+  int __attribute__((noinline))
+  bar()
+  {
+    int r = foo (get_data (1), get_data (1), get_data (1), get_data (1),
+		 get_data (1), get_data (1), get_data (1), get_data (1),
+		 get_data (1), get_data (1), get_data (1), get_data (1),
+		 get_data (1), get_data (1), get_data (1), get_data (1),
+		 get_data (1), get_data (1), get_data (1), get_data (1));
+
+    if (field != 223344)
+      __builtin_abort ();
+    return 0;
+  }
+};
+
+}
+
+int main (int argc, char **argv)
+{
+  A a;
+  int r = a.bar();
+  r = a.bar ();
+  if (a.field != 223344)
+      __builtin_abort ();
+  if (global != 20)
+    __builtin_abort ();
+
+  return r;
+}
diff --git a/gcc/testsuite/gcc.c-torture/compile/pr80345.c b/gcc/testsuite/gcc.c-torture/compile/pr80345.c
new file mode 100644
index 00000000000..9762f7c8877
--- /dev/null
+++ b/gcc/testsuite/gcc.c-torture/compile/pr80345.c
@@ -0,0 +1,17 @@
+/* PR tree-optimization/80345 */
+
+typedef long mp_limb_signed_t;
+void fn1(mp_limb_signed_t p1) {
+  int *a = (int *)1;
+  mp_limb_signed_t i, j;
+  i = 0;
+  for (; i < p1; i++) {
+    j = 0;
+    for (; j <= i; j++)
+      *a++ = 0;
+    j = i + 1;
+    for (; j < p1; j++)
+      a++;
+  }
+}
+void fn2() { fn1((mp_limb_signed_t)fn2); }
diff --git a/gcc/testsuite/gcc.dg/tree-ssa/pr68021.c b/gcc/testsuite/gcc.dg/tree-ssa/pr68021.c
new file mode 100644
index 00000000000..f60b1ff1ac3
--- /dev/null
+++ b/gcc/testsuite/gcc.dg/tree-ssa/pr68021.c
@@ -0,0 +1,17 @@
+/* { dg-do compile } */
+/* { dg-options "-O3" } */
+
+char a;
+void fn1 (char *p1, int p2, int p3)
+{
+  int i, x;
+  for (i = 0; i < 10; i++)
+    {
+      for (x = 0; x < p3; x++)
+	{
+	  *p1 = a;
+	  p1--;
+	}
+      p1 += p2;
+    }
+}
diff --git a/gcc/testsuite/gcc.target/arm/atomic_loaddi_10.c b/gcc/testsuite/gcc.target/arm/atomic_loaddi_10.c
new file mode 100644
index 00000000000..ecc3d06d0c9
--- /dev/null
+++ b/gcc/testsuite/gcc.target/arm/atomic_loaddi_10.c
@@ -0,0 +1,15 @@
+/* { dg-do compile } */
+/* { dg-require-effective-target arm_arch_v7ve_ok } */
+/* { dg-options "-O2" } */
+/* { dg-add-options arm_arch_v7ve } */
+
+#include <stdatomic.h>
+
+atomic_llong x = 0;
+
+atomic_llong get_x()
+{
+  return atomic_load(&x);
+}
+
+/* { dg-final { scan-assembler "ldrd" } } */
diff --git a/gcc/testsuite/gcc.target/arm/atomic_loaddi_11.c b/gcc/testsuite/gcc.target/arm/atomic_loaddi_11.c
new file mode 100644
index 00000000000..85c64ae68b1
--- /dev/null
+++ b/gcc/testsuite/gcc.target/arm/atomic_loaddi_11.c
@@ -0,0 +1,15 @@
+/* { dg-do compile } */
+/* { dg-require-effective-target arm_arch_v7r_ok } */
+/* { dg-skip-if "do not override -mcpu" { *-*-* } { "-mcpu=*" "-march=*" } { "-mcpu=cortex-r5" } } */
+/* { dg-options "-O2 -mcpu=cortex-r5" } */
+
+#include <stdatomic.h>
+
+atomic_llong x = 0;
+
+atomic_llong get_x()
+{
+  return atomic_load(&x);
+}
+
+/* { dg-final { scan-assembler-not "ldrd" } } */
diff --git a/gcc/testsuite/gcc.target/i386/pr79733.c b/gcc/testsuite/gcc.target/i386/pr79733.c
new file mode 100644
index 00000000000..5caec911b01
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/pr79733.c
@@ -0,0 +1,23 @@
+/* PR target/79733 */
+/* { dg-do compile } */
+/* { dg-options "-O2 -mavx512f" } */
+
+typedef unsigned short __mmask16;
+
+extern __inline int
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_kortestc (__mmask16 __A, __mmask16 __B)
+{
+  return (__mmask16) __builtin_ia32_kortestchi ((__mmask16) __A,
+                                                (__mmask16) __B);
+}
+
+void
+avx512f_test ()
+{
+  volatile __mmask16 k1 = 0;
+  __mmask16 k2 = 0;
+  volatile short r;
+
+  r = _mm512_kortestc (k1, k2);
+}
diff --git a/gcc/testsuite/gcc.target/powerpc/swaps-p8-20.c b/gcc/testsuite/gcc.target/powerpc/swaps-p8-20.c
new file mode 100644
index 00000000000..7463781281e
--- /dev/null
+++ b/gcc/testsuite/gcc.target/powerpc/swaps-p8-20.c
@@ -0,0 +1,29 @@
+/* { dg-do run { target { powerpc64le-*-* } } } */
+/* { dg-skip-if "do not override -mcpu" { powerpc*-*-* } { "-mcpu=*" } { "-mcpu=power8" } }
+/* { dg-require-effective-target powerpc_altivec_ok } */
+/* { dg-options "-O2 -mcpu=power8 -maltivec" } */
+
+/* The expansion for vector character multiply introduces a vperm operation.
+   This tests that the swap optimization to remove swaps by changing the
+   vperm mask results in correct code.  */
+
+#include <altivec.h>
+
+void abort ();
+
+vector unsigned char r;
+vector unsigned char v =
+  { 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15 };
+vector unsigned char i =
+  { 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16 };
+vector unsigned char e =
+  {0, 2, 6, 12, 20, 30, 42, 56, 72, 90, 110, 132, 156, 182, 210, 240};
+
+int main ()
+{
+  int j;
+  r = v * i;
+  if (!vec_all_eq (r, e))
+    abort ();
+  return 0;
+}
diff --git a/gcc/testsuite/gcc.target/powerpc/swaps-p8-23.c b/gcc/testsuite/gcc.target/powerpc/swaps-p8-23.c
new file mode 100644
index 00000000000..a3f83ae26b9
--- /dev/null
+++ b/gcc/testsuite/gcc.target/powerpc/swaps-p8-23.c
@@ -0,0 +1,26 @@
+/* { dg-do compile { target { powerpc64le-*-* } } } */
+/* { dg-skip-if "do not override -mcpu" { powerpc*-*-* } { "-mcpu=*" } { "-mcpu=power8" } } */
+/* { dg-options "-mcpu=power8 -O3 -ffast-math" } */
+/* { dg-final { scan-assembler "lxvd2x" } } */
+/* { dg-final { scan-assembler-not "xxpermdi" } } */
+
+/* Verify that swap optimization works correctly in the presence of
+   a V2DFmode reduction.  */
+
+extern double optvalue;
+extern void obfuscate (double, unsigned int);
+
+void
+foo (double *x, double *y, unsigned int n, unsigned int m)
+{
+  unsigned int i, j;
+  double sacc;
+  for (j = 0; j < m; ++j)
+    {
+      sacc = 0.0;
+      for (i = 0; i < n; ++i)
+	sacc += x[i] * y[i];
+      obfuscate (sacc, n);
+    }
+  optvalue = n * 2.0 * m;
+}
diff --git a/gcc/testsuite/gcc.target/powerpc/swaps-p8-24.c b/gcc/testsuite/gcc.target/powerpc/swaps-p8-24.c
new file mode 100644
index 00000000000..528d6e6a68c
--- /dev/null
+++ b/gcc/testsuite/gcc.target/powerpc/swaps-p8-24.c
@@ -0,0 +1,26 @@
+/* { dg-do compile { target { powerpc64le-*-* } } } */
+/* { dg-skip-if "do not override -mcpu" { powerpc*-*-* } { "-mcpu=*" } { "-mcpu=power8" } } */
+/* { dg-options "-mcpu=power8 -O3 -ffast-math" } */
+/* { dg-final { scan-assembler "lxvd2x" } } */
+/* { dg-final { scan-assembler-not "xxpermdi" } } */
+
+/* Verify that swap optimization works correctly in the presence of
+   a V4SFmode reduction.  */
+
+extern double optvalue;
+extern void obfuscate (float, unsigned int);
+
+void
+foo (float *x, float *y, unsigned int n, unsigned int m)
+{
+  unsigned int i, j;
+  float sacc;
+  for (j = 0; j < m; ++j)
+    {
+      sacc = 0.0f;
+      for (i = 0; i < n; ++i)
+	sacc += x[i] * y[i];
+      obfuscate (sacc, n);
+    }
+  optvalue = n * 2.0f * m;
+}
diff --git a/gcc/testsuite/gfortran.dg/class_62.f90 b/gcc/testsuite/gfortran.dg/class_62.f90
new file mode 100644
index 00000000000..39ee98dd361
--- /dev/null
+++ b/gcc/testsuite/gfortran.dg/class_62.f90
@@ -0,0 +1,29 @@
+! { dg-do run }
+! { dg-options "-fcheck=recursion" }
+!
+! PR 80361: [5/6/7 Regression] bogus recursive call to nonrecursive procedure with -fcheck=recursion
+!
+! Contributed by Jürgen Reuter <juergen.reuter@desy.de>
+
+program main_ut
+
+  implicit none
+
+  type :: prt_spec_expr_t
+  end type
+
+  type :: prt_expr_t
+     class(prt_spec_expr_t), allocatable :: x
+  end type
+
+  type, extends (prt_spec_expr_t) :: prt_spec_list_t
+     type(prt_expr_t) :: e
+  end type
+
+  class(prt_spec_list_t), allocatable :: y
+
+  allocate (y)
+  allocate (prt_spec_list_t :: y%e%x)
+  deallocate(y)
+
+end program
diff --git a/gcc/testsuite/gfortran.dg/pr59910.f90 b/gcc/testsuite/gfortran.dg/pr59910.f90
new file mode 100644
index 00000000000..2b288e4ff6c
--- /dev/null
+++ b/gcc/testsuite/gfortran.dg/pr59910.f90
@@ -0,0 +1,11 @@
+! { dg-do compile }
+! PR fortran/59910
+!
+program main
+  implicit none
+  type bar
+      integer :: limit(1)
+  end type
+  type (bar) :: testsuite
+  data testsuite / bar(reshape(source=[10],shape=[1])) /
+end
diff --git a/gcc/testsuite/gfortran.dg/proc_ptr_comp_49.f90 b/gcc/testsuite/gfortran.dg/proc_ptr_comp_49.f90
new file mode 100644
index 00000000000..e89791f728c
--- /dev/null
+++ b/gcc/testsuite/gfortran.dg/proc_ptr_comp_49.f90
@@ -0,0 +1,21 @@
+! { dg-do compile }
+!
+! PR 80392: [5/6/7 Regression] [OOP] ICE with allocatable polymorphic function result in a procedure pointer component
+!
+! Contributed by <zed.three@gmail.com>
+
+module mwe
+
+  implicit none
+
+  type :: MyType
+     procedure(my_op), nopass, pointer :: op
+  end type
+
+contains
+
+  function my_op() result(foo)
+    class(MyType), allocatable :: foo
+  end function
+
+end module
diff --git a/gcc/tree-ssa-loop-ivopts.c b/gcc/tree-ssa-loop-ivopts.c
index d5f33646343..29cca40bc0a 100644
--- a/gcc/tree-ssa-loop-ivopts.c
+++ b/gcc/tree-ssa-loop-ivopts.c
@@ -3400,7 +3400,18 @@ get_computation_aff (struct loop *loop,
       var = fold_convert (uutype, var);
     }
 
-  if (!constant_multiple_of (ustep, cstep, &rat))
+  /* Ratio is 1 when computing the value of biv cand by itself.
+     We can't rely on constant_multiple_of in this case because the
+     use is created after the original biv is selected.  The call
+     could fail because of inconsistent fold behavior.  See PR68021
+     for more information.  */
+  if (cand->pos == IP_ORIGINAL && cand->incremented_at == use->stmt)
+    {
+      gcc_assert (is_gimple_assign (use->stmt));
+      gcc_assert (gimple_assign_lhs (use->stmt) == cand->var_after);
+      rat = 1;
+    }
+  else if (!constant_multiple_of (ustep, cstep, &rat))
     return false;
 
   /* In case both UBASE and CBASE are shortened to UUTYPE from some common
diff --git a/libgcc/ChangeLog b/libgcc/ChangeLog
index 49c7bd8a2fa..4c7b8c2654d 100644
--- a/libgcc/ChangeLog
+++ b/libgcc/ChangeLog
@@ -1,3 +1,8 @@
+2017-04-07  Alan Modra  <amodra@gmail.com>
+
+	PR target/45053
+	* config/rs6000/t-crtstuff (CRTSTUFF_T_CFLAGS): Add -O2.
+
 2017-01-09  Andreas Tobler  <andreast@gcc.gnu.org>
 
 	Backport from mainline
diff --git a/libgcc/config/rs6000/t-crtstuff b/libgcc/config/rs6000/t-crtstuff
index 7422d383754..0b2601b05bd 100644
--- a/libgcc/config/rs6000/t-crtstuff
+++ b/libgcc/config/rs6000/t-crtstuff
@@ -1,3 +1,6 @@
 # If .sdata is enabled __CTOR_{LIST,END}__ go into .sdata instead of
 # .ctors.
-CRTSTUFF_T_CFLAGS = -msdata=none
+# Do not build crtend.o with -Os as that can result in references to
+# out-of-line register save/restore functions, which may be unresolved
+# as crtend.o is linked after libgcc.a.  See PR45053.
+CRTSTUFF_T_CFLAGS = -msdata=none -O2
author	Yvan Roux <yvan.roux@linaro.org>	2017-05-10 11:40:20 +0200
committer	Yvan Roux <yvan.roux@linaro.org>	2017-05-12 10:45:17 +0000
commit	52fe0bbb9b9dc80e1c616961bbded9a4578804c9 (patch)
tree	888aa100137a4f9cf5b25a255ee53eeb5a61bfd8
parent	d40160202c70d757138d6a59189b7f40a10b3e15 (diff)