diff options
author | Yvan Roux <yvan.roux@linaro.org> | 2017-05-10 11:40:20 +0200 |
---|---|---|
committer | Yvan Roux <yvan.roux@linaro.org> | 2017-05-12 10:45:17 +0000 |
commit | 52fe0bbb9b9dc80e1c616961bbded9a4578804c9 (patch) | |
tree | 888aa100137a4f9cf5b25a255ee53eeb5a61bfd8 | |
parent | d40160202c70d757138d6a59189b7f40a10b3e15 (diff) |
Merge branches/gcc-5-branch rev 247822.
Change-Id: I6afae07bee582b09ce8b99c952f016bb1c8ac0f8
33 files changed, 1146 insertions, 57 deletions
diff --git a/gcc/ChangeLog b/gcc/ChangeLog index d93c79c28f2..583f500bbe8 100644 --- a/gcc/ChangeLog +++ b/gcc/ChangeLog @@ -1,3 +1,126 @@ +2016-05-08 Bill Schmidt <wschmidt@linux.vnet.ibm.com> + + Backport from mainline + PR target/69868 + swap optimization backports + * config/rs6000/rs6000.c (swap_web_entry): Enlarge + special_handling bitfield. + (special_handling_values): Add SH_XXPERMDI, SH_CONCAT, SH_VPERM, + and SH_VPERM_COMP. + (const_load_sequence_p): New. + (load_comp_mask_p): New. + (v2df_reduction_p): New. + (rtx_is_swappable_p): Perform special handling for XXPERMDI and + for reductions. + (insn_is_swappable_p): Perform special handling for VEC_CONCAT, + V2DF reductions, and various permutes. + (adjust_xxpermdi): New. + (adjust_concat): New. + (find_swapped_load_and_const_vector): New. + (replace_const_vector_in_load): New. + (adjust_vperm): New. + (adjust_vperm_comp): New. + (handle_special_swappables): Call adjust_xxpermdi, adjust_concat, + adjust_vperm, and adjust_vperm_comp. + (replace_swap_with_copy): Allow vector NOT operations to also be + replaced by copies. + (dump_swap_insn_table): Handle new special handling values. + +2017-05-03 Uros Bizjak <ubizjak@gmail.com> + + Backport from mainline + 2017-05-01 Uros Bizjak <ubizjak@gmail.com> + + PR target/68491 + * config/i386/cpuid.h (__get_cpuid): Always return 0 when + __get_cpuid_max returns 0. + +2017-04-21 Christophe Lyon <christophe.lyon@linaro.org> + + Backport from mainline + +2015-11-23 Kugan Vivekanandarajah <kuganv@linaro.org> + + PR target/68390 + * config/arm/arm.c (arm_function_ok_for_sibcall): Get function type + for indirect function call. + +2017-04-12 Bill Schmidt <wschmidt@linux.vnet.ibm.com> + + Backport from mainline + 2017-04-11 Bill Schmidt <wschmidt@linux.vnet.ibm.com> + + PR target/80376 + PR target/80315 + * config/rs6000/rs6000.c (rs6000_expand_unop_builtin): Return + CONST0_RTX (mode) rather than const0_rtx where appropriate. + (rs6000_expand_binop_builtin): Likewise. + (rs6000_expand_ternop_builtin): Likewise; also add missing + vsx_xxpermdi_* variants; also fix typo (arg1 => arg2) for + vshasigma built-ins. + * doc/extend.texi: Document that vec_xxpermdi's third argument + must be a constant. + +2017-04-11 Thomas Preud'homme <thomas.preudhomme@arm.com> + + Backport from GCC 6 + 2017-04-06 Thomas Preud'homme <thomas.preudhomme@arm.com> + + PR target/80082 + * config/arm/arm-protos.h (FL_LPAE): Define macro. + (FL_FOR_ARCH7VE): Add FL_LPAE. + (arm_arch_lpae): Declare extern. + * config/arm/arm.c (arm_arch_lpae): Declare. + (arm_option_override): Define arm_arch_lpae. + * config/arm/arm.h (TARGET_HAVE_LPAE): Redefine in term of + arm_arch_lpae. + +2017-04-11 Martin Jambor <mjambor@suse.cz> + + Backport from mainline + 2017-03-30 Martin Jambor <mjambor@suse.cz> + + PR ipa/77333 + * cgraph.h (cgraph_build_function_type_skip_args): Declare. + * cgraph.c (redirect_call_stmt_to_callee): Set gimple fntype so that + it reflects the signature changes performed at the callee side. + * cgraphclones.c (build_function_type_skip_args): Make public, renamed + to cgraph_build_function_type_skip_args. + (build_function_decl_skip_args): Adjust call to the above function. + +2017-04-11 Bin Cheng <bin.cheng@arm.com> + + Backport from mainline + 2016-02-10 Bin Cheng <bin.cheng@arm.com> + + PR tree-optimization/68021 + * tree-ssa-loop-ivopts.c (get_computation_aff): Set ratio to 1 if + when computing the value of biv cand by itself. + +2017-04-08 Andreas Tobler <andreast@gcc.gnu.org> + + Backport from mainline + 2017-04-08 Andreas Tobler <andreast@gcc.gnu.org> + + * config/aarch64/aarch64-freebsd.h: Define MCOUNT_NAME. + Add comment for WCHAR_T. + +2017-04-07 Andreas Tobler <andreast@gcc.gnu.org> + + Backport from mainline + 2017-04-07 Andreas Tobler <andreast@gcc.gnu.org> + + * config/aarch64/aarch64-freebsd.h: Define WCHAR_T. + +2017-04-07 Uros Bizjak <ubizjak@gmail.com> + + Backport from mainline + 2017-04-06 Uros Bizjak <ubizjak@gmail.com> + + PR target/79733 + * config/i386/i386.c (ix86_expand_builtin) + <case IX86_BUILTIN_KORTEST{C,Z}16>: Determine insn operand + mode from insn data. Convert operands to insn operand mode. + Copy operands that don't satisfy insn predicate to a register. + 2017-03-30 Peter Bergner <bergner@vnet.ibm.com> Backport from mainline diff --git a/gcc/DATESTAMP b/gcc/DATESTAMP index 7c3b057f980..05d952cb71f 100644 --- a/gcc/DATESTAMP +++ b/gcc/DATESTAMP @@ -1 +1 @@ -20170404 +20170510 diff --git a/gcc/cgraph.c b/gcc/cgraph.c index e25ecb3fb96..448e940586f 100644 --- a/gcc/cgraph.c +++ b/gcc/cgraph.c @@ -1437,8 +1437,23 @@ cgraph_edge::redirect_call_stmt_to_callee (void) if (skip_bounds) new_stmt = chkp_copy_call_skip_bounds (new_stmt); + tree old_fntype = gimple_call_fntype (e->call_stmt); gimple_call_set_fndecl (new_stmt, e->callee->decl); - gimple_call_set_fntype (new_stmt, gimple_call_fntype (e->call_stmt)); + cgraph_node *origin = e->callee; + while (origin->clone_of) + origin = origin->clone_of; + + if ((origin->former_clone_of + && old_fntype == TREE_TYPE (origin->former_clone_of)) + || old_fntype == TREE_TYPE (origin->decl)) + gimple_call_set_fntype (new_stmt, TREE_TYPE (e->callee->decl)); + else + { + bitmap skip = e->callee->clone.combined_args_to_skip; + tree t = cgraph_build_function_type_skip_args (old_fntype, skip, + false); + gimple_call_set_fntype (new_stmt, t); + } if (gimple_vdef (new_stmt) && TREE_CODE (gimple_vdef (new_stmt)) == SSA_NAME) diff --git a/gcc/cgraph.h b/gcc/cgraph.h index e3689968e96..104b3bbc6db 100644 --- a/gcc/cgraph.h +++ b/gcc/cgraph.h @@ -2238,6 +2238,8 @@ tree clone_function_name (tree decl, const char *); void tree_function_versioning (tree, tree, vec<ipa_replace_map *, va_gc> *, bool, bitmap, bool, bitmap, basic_block); +tree cgraph_build_function_type_skip_args (tree orig_type, bitmap args_to_skip, + bool skip_return); /* In cgraphbuild.c */ int compute_call_stmt_bb_frequency (tree, basic_block bb); diff --git a/gcc/cgraphclones.c b/gcc/cgraphclones.c index 546cac86564..93668ab9981 100644 --- a/gcc/cgraphclones.c +++ b/gcc/cgraphclones.c @@ -191,9 +191,9 @@ cgraph_edge::clone (cgraph_node *n, gcall *call_stmt, unsigned stmt_uid, /* Build variant of function type ORIG_TYPE skipping ARGS_TO_SKIP and the return value if SKIP_RETURN is true. */ -static tree -build_function_type_skip_args (tree orig_type, bitmap args_to_skip, - bool skip_return) +tree +cgraph_build_function_type_skip_args (tree orig_type, bitmap args_to_skip, + bool skip_return) { tree new_type = NULL; tree args, new_args = NULL; @@ -258,7 +258,8 @@ build_function_decl_skip_args (tree orig_decl, bitmap args_to_skip, if (prototype_p (new_type) || (skip_return && !VOID_TYPE_P (TREE_TYPE (new_type)))) new_type - = build_function_type_skip_args (new_type, args_to_skip, skip_return); + = cgraph_build_function_type_skip_args (new_type, args_to_skip, + skip_return); TREE_TYPE (new_decl) = new_type; /* For declarations setting DECL_VINDEX (i.e. methods) diff --git a/gcc/config/aarch64/aarch64-freebsd.h b/gcc/config/aarch64/aarch64-freebsd.h index b9c1bfdc95f..bd47b3c4dfd 100644 --- a/gcc/config/aarch64/aarch64-freebsd.h +++ b/gcc/config/aarch64/aarch64-freebsd.h @@ -91,4 +91,12 @@ #undef TARGET_BINDS_LOCAL_P #define TARGET_BINDS_LOCAL_P default_binds_local_p_2 +/* Use the AAPCS type for wchar_t, override the one from + config/freebsd.h. */ +#undef WCHAR_TYPE +#define WCHAR_TYPE "unsigned int" + +#undef MCOUNT_NAME +#define MCOUNT_NAME ".mcount" + #endif /* GCC_AARCH64_FREEBSD_H */ diff --git a/gcc/config/arm/arm-protos.h b/gcc/config/arm/arm-protos.h index 45f9d9bc2e3..6d363e66b7b 100644 --- a/gcc/config/arm/arm-protos.h +++ b/gcc/config/arm/arm-protos.h @@ -358,7 +358,7 @@ extern bool arm_is_constant_pool_ref (rtx); #define FL_STRONG (1 << 8) /* StrongARM */ #define FL_ARCH5E (1 << 9) /* DSP extensions to v5 */ #define FL_XSCALE (1 << 10) /* XScale */ -/* spare (1 << 11) */ +#define FL_LPAE (1 << 11) /* ARMv7-A LPAE. */ #define FL_ARCH6 (1 << 12) /* Architecture rel 6. Adds media instructions. */ #define FL_VFPV2 (1 << 13) /* Vector Floating Point V2. */ @@ -410,7 +410,7 @@ extern bool arm_is_constant_pool_ref (rtx); #define FL_FOR_ARCH6M (FL_FOR_ARCH6 & ~FL_NOTM) #define FL_FOR_ARCH7 ((FL_FOR_ARCH6T2 & ~FL_NOTM) | FL_ARCH7) #define FL_FOR_ARCH7A (FL_FOR_ARCH7 | FL_NOTM | FL_ARCH6K) -#define FL_FOR_ARCH7VE (FL_FOR_ARCH7A | FL_THUMB_DIV | FL_ARM_DIV) +#define FL_FOR_ARCH7VE (FL_FOR_ARCH7A | FL_THUMB_DIV | FL_ARM_DIV | FL_LPAE) #define FL_FOR_ARCH7R (FL_FOR_ARCH7A | FL_THUMB_DIV) #define FL_FOR_ARCH7M (FL_FOR_ARCH7 | FL_THUMB_DIV) #define FL_FOR_ARCH7EM (FL_FOR_ARCH7M | FL_ARCH7EM) @@ -606,6 +606,9 @@ extern int arm_arch_thumb2; extern int arm_arch_arm_hwdiv; extern int arm_arch_thumb_hwdiv; +/* Nonzero if this chip supports the Large Physical Address Extension. */ +extern int arm_arch_lpae; + /* Nonzero if chip disallows volatile memory access in IT block. */ extern int arm_arch_no_volatile_ce; diff --git a/gcc/config/arm/arm.c b/gcc/config/arm/arm.c index 1038ab11040..81341020928 100644 --- a/gcc/config/arm/arm.c +++ b/gcc/config/arm/arm.c @@ -890,6 +890,9 @@ int arm_arch_thumb2; int arm_arch_arm_hwdiv; int arm_arch_thumb_hwdiv; +/* Nonzero if this chip supports the Large Physical Address Extension. */ +int arm_arch_lpae; + /* Nonzero if chip disallows volatile memory access in IT block. */ int arm_arch_no_volatile_ce; @@ -3145,6 +3148,7 @@ arm_option_override (void) arm_arch_iwmmxt2 = ARM_FSET_HAS_CPU1 (insn_flags, FL_IWMMXT2); arm_arch_thumb_hwdiv = ARM_FSET_HAS_CPU1 (insn_flags, FL_THUMB_DIV); arm_arch_arm_hwdiv = ARM_FSET_HAS_CPU1 (insn_flags, FL_ARM_DIV); + arm_arch_lpae = ARM_FSET_HAS_CPU1 (insn_flags, FL_LPAE); arm_arch_no_volatile_ce = ARM_FSET_HAS_CPU1 (insn_flags, FL_NO_VOLATILE_CE); arm_tune_cortex_a9 = (arm_tune == cortexa9) != 0; arm_arch_crc = ARM_FSET_HAS_CPU1 (insn_flags, FL_CRC32); diff --git a/gcc/config/arm/arm.h b/gcc/config/arm/arm.h index f60ec4f834e..f7d9d99285a 100644 --- a/gcc/config/arm/arm.h +++ b/gcc/config/arm/arm.h @@ -255,8 +255,7 @@ extern void (*arm_lang_output_object_attributes_hook)(void); #define TARGET_HAVE_LDREX ((arm_arch6 && TARGET_ARM) || arm_arch7) /* Nonzero if this chip supports LPAE. */ -#define TARGET_HAVE_LPAE \ - (arm_arch7 && ARM_FSET_HAS_CPU1 (insn_flags, FL_FOR_ARCH7VE)) +#define TARGET_HAVE_LPAE (arm_arch_lpae) /* Nonzero if this chip supports ldrex{bh} and strex{bh}. */ #define TARGET_HAVE_LDREXBH ((arm_arch6k && TARGET_ARM) || arm_arch7) diff --git a/gcc/config/i386/cpuid.h b/gcc/config/i386/cpuid.h index 1ddc6e2e6a8..c7778d989f5 100644 --- a/gcc/config/i386/cpuid.h +++ b/gcc/config/i386/cpuid.h @@ -234,8 +234,9 @@ __get_cpuid (unsigned int __level, unsigned int *__ecx, unsigned int *__edx) { unsigned int __ext = __level & 0x80000000; + unsigned int __maxlevel = __get_cpuid_max (__ext, 0); - if (__get_cpuid_max (__ext, 0) < __level) + if (__maxlevel == 0 || __maxlevel < __level) return 0; __cpuid (__level, *__eax, *__ebx, *__ecx, *__edx); diff --git a/gcc/config/i386/i386.c b/gcc/config/i386/i386.c index 18b136a5366..e852e099eab 100644 --- a/gcc/config/i386/i386.c +++ b/gcc/config/i386/i386.c @@ -39698,14 +39698,12 @@ rdseed_step: case IX86_BUILTIN_KORTESTC16: icode = CODE_FOR_kortestchi; - mode0 = HImode; - mode1 = CCCmode; + mode3 = CCCmode; goto kortest; case IX86_BUILTIN_KORTESTZ16: icode = CODE_FOR_kortestzhi; - mode0 = HImode; - mode1 = CCZmode; + mode3 = CCZmode; kortest: arg0 = CALL_EXPR_ARG (exp, 0); /* Mask reg src1. */ @@ -39713,19 +39711,32 @@ rdseed_step: op0 = expand_normal (arg0); op1 = expand_normal (arg1); - op0 = copy_to_reg (op0); - op0 = simplify_gen_subreg (mode0, op0, GET_MODE (op0), 0); - op1 = copy_to_reg (op1); - op1 = simplify_gen_subreg (mode0, op1, GET_MODE (op1), 0); + mode0 = insn_data[icode].operand[0].mode; + mode1 = insn_data[icode].operand[1].mode; + + if (GET_MODE (op0) != VOIDmode) + op0 = force_reg (GET_MODE (op0), op0); + + op0 = gen_lowpart (mode0, op0); + + if (!insn_data[icode].operand[0].predicate (op0, mode0)) + op0 = copy_to_mode_reg (mode0, op0); + + if (GET_MODE (op1) != VOIDmode) + op1 = force_reg (GET_MODE (op1), op1); + + op1 = gen_lowpart (mode1, op1); + + if (!insn_data[icode].operand[1].predicate (op1, mode1)) + op1 = copy_to_mode_reg (mode1, op1); target = gen_reg_rtx (QImode); - emit_insn (gen_rtx_SET (mode0, target, const0_rtx)); /* Emit kortest. */ emit_insn (GEN_FCN (icode) (op0, op1)); /* And use setcc to return result from flags. */ ix86_expand_setcc (target, EQ, - gen_rtx_REG (mode1, FLAGS_REG), const0_rtx); + gen_rtx_REG (mode3, FLAGS_REG), const0_rtx); return target; case IX86_BUILTIN_GATHERSIV2DF: diff --git a/gcc/config/rs6000/rs6000.c b/gcc/config/rs6000/rs6000.c index e10f7edd32f..a3fec739104 100644 --- a/gcc/config/rs6000/rs6000.c +++ b/gcc/config/rs6000/rs6000.c @@ -12175,7 +12175,7 @@ rs6000_expand_unop_builtin (enum insn_code icode, tree exp, rtx target) || INTVAL (op0) < -16) { error ("argument 1 must be a 5-bit signed literal"); - return const0_rtx; + return CONST0_RTX (tmode); } } @@ -12278,7 +12278,7 @@ rs6000_expand_binop_builtin (enum insn_code icode, tree exp, rtx target) || TREE_INT_CST_LOW (arg1) & ~0x1f) { error ("argument 2 must be a 5-bit unsigned literal"); - return const0_rtx; + return CONST0_RTX (tmode); } } @@ -12957,13 +12957,18 @@ rs6000_expand_ternop_builtin (enum insn_code icode, tree exp, rtx target) || TREE_INT_CST_LOW (arg2) & ~0xf) { error ("argument 3 must be a 4-bit unsigned literal"); - return const0_rtx; + return CONST0_RTX (tmode); } } else if (icode == CODE_FOR_vsx_xxpermdi_v2df || icode == CODE_FOR_vsx_xxpermdi_v2di || icode == CODE_FOR_vsx_xxpermdi_v2df_be || icode == CODE_FOR_vsx_xxpermdi_v2di_be + || icode == CODE_FOR_vsx_xxpermdi_v1ti + || icode == CODE_FOR_vsx_xxpermdi_v4sf + || icode == CODE_FOR_vsx_xxpermdi_v4si + || icode == CODE_FOR_vsx_xxpermdi_v8hi + || icode == CODE_FOR_vsx_xxpermdi_v16qi || icode == CODE_FOR_vsx_xxsldwi_v16qi || icode == CODE_FOR_vsx_xxsldwi_v8hi || icode == CODE_FOR_vsx_xxsldwi_v4si @@ -12977,7 +12982,7 @@ rs6000_expand_ternop_builtin (enum insn_code icode, tree exp, rtx target) || TREE_INT_CST_LOW (arg2) & ~0x3) { error ("argument 3 must be a 2-bit unsigned literal"); - return const0_rtx; + return CONST0_RTX (tmode); } } else if (icode == CODE_FOR_vsx_set_v2df @@ -12997,7 +13002,7 @@ rs6000_expand_ternop_builtin (enum insn_code icode, tree exp, rtx target) || TREE_INT_CST_LOW (arg2) & ~0x1) { error ("argument 3 must be a 1-bit unsigned literal"); - return const0_rtx; + return CONST0_RTX (tmode); } } else if (icode == CODE_FOR_dfp_ddedpd_dd @@ -13009,7 +13014,7 @@ rs6000_expand_ternop_builtin (enum insn_code icode, tree exp, rtx target) || TREE_INT_CST_LOW (arg2) & ~0x3) { error ("argument 1 must be 0 or 2"); - return const0_rtx; + return CONST0_RTX (tmode); } } else if (icode == CODE_FOR_dfp_denbcd_dd @@ -13021,7 +13026,7 @@ rs6000_expand_ternop_builtin (enum insn_code icode, tree exp, rtx target) || TREE_INT_CST_LOW (arg0) & ~0x1) { error ("argument 1 must be a 1-bit unsigned literal"); - return const0_rtx; + return CONST0_RTX (tmode); } } else if (icode == CODE_FOR_dfp_dscli_dd @@ -13035,7 +13040,7 @@ rs6000_expand_ternop_builtin (enum insn_code icode, tree exp, rtx target) || TREE_INT_CST_LOW (arg1) & ~0x3f) { error ("argument 2 must be a 6-bit unsigned literal"); - return const0_rtx; + return CONST0_RTX (tmode); } } else if (icode == CODE_FOR_crypto_vshasigmaw @@ -13047,14 +13052,14 @@ rs6000_expand_ternop_builtin (enum insn_code icode, tree exp, rtx target) if (TREE_CODE (arg1) != INTEGER_CST || wi::geu_p (arg1, 2)) { error ("argument 2 must be 0 or 1"); - return const0_rtx; + return CONST0_RTX (tmode); } STRIP_NOPS (arg2); - if (TREE_CODE (arg2) != INTEGER_CST || wi::geu_p (arg1, 16)) + if (TREE_CODE (arg2) != INTEGER_CST || wi::geu_p (arg2, 16)) { error ("argument 3 must be in the range 0..15"); - return const0_rtx; + return CONST0_RTX (tmode); } } @@ -34153,10 +34158,8 @@ emit_fusion_gpr_load (rtx target, rtx mem) throughout the computation, we can get correct behavior by replacing M with M' as follows: - { M[i+8]+8 : i < 8, M[i+8] in [0,7] U [16,23] - M'[i] = { M[i+8]-8 : i < 8, M[i+8] in [8,15] U [24,31] - { M[i-8]+8 : i >= 8, M[i-8] in [0,7] U [16,23] - { M[i-8]-8 : i >= 8, M[i-8] in [8,15] U [24,31] + M'[i] = { (M[i]+8)%16 : M[i] in [0,15] + { ((M[i]+8)%16)+16 : M[i] in [16,31] This seems promising at first, since we are just replacing one mask with another. But certain masks are preferable to others. If M @@ -34174,7 +34177,11 @@ emit_fusion_gpr_load (rtx target, rtx mem) mask to be produced by an UNSPEC_LVSL, in which case the mask cannot be known at compile time. In such a case we would have to generate several instructions to compute M' as above at run time, - and a cost model is needed again. */ + and a cost model is needed again. + + However, when the mask M for an UNSPEC_VPERM is loaded from the + constant pool, we can replace M with M' as above at no cost + beyond adding a constant pool entry. */ /* This is based on the union-find logic in web.c. web_entry_base is defined in df.h. */ @@ -34210,7 +34217,7 @@ class swap_web_entry : public web_entry_base /* A nonzero value indicates what kind of special handling for this insn is required if doublewords are swapped. Undefined if is_swappable is not set. */ - unsigned int special_handling : 3; + unsigned int special_handling : 4; /* Set if the web represented by this entry cannot be optimized. */ unsigned int web_not_optimizable : 1; /* Set if this insn should be deleted. */ @@ -34224,7 +34231,11 @@ enum special_handling_values { SH_NOSWAP_LD, SH_NOSWAP_ST, SH_EXTRACT, - SH_SPLAT + SH_SPLAT, + SH_XXPERMDI, + SH_CONCAT, + SH_VPERM, + SH_VPERM_COMP }; /* Union INSN with all insns containing definitions that reach USE. @@ -34359,6 +34370,164 @@ insn_is_swap_p (rtx insn) return 1; } +/* Return TRUE if insn is a swap fed by a load from the constant pool. */ +static bool +const_load_sequence_p (swap_web_entry *insn_entry, rtx insn) +{ + unsigned uid = INSN_UID (insn); + if (!insn_entry[uid].is_swap || insn_entry[uid].is_load) + return false; + + /* Find the unique use in the swap and locate its def. If the def + isn't unique, punt. */ + struct df_insn_info *insn_info = DF_INSN_INFO_GET (insn); + df_ref use; + FOR_EACH_INSN_INFO_USE (use, insn_info) + { + struct df_link *def_link = DF_REF_CHAIN (use); + if (!def_link || def_link->next) + return false; + + rtx def_insn = DF_REF_INSN (def_link->ref); + unsigned uid2 = INSN_UID (def_insn); + if (!insn_entry[uid2].is_load || !insn_entry[uid2].is_swap) + return false; + + rtx body = PATTERN (def_insn); + if (GET_CODE (body) != SET + || GET_CODE (SET_SRC (body)) != VEC_SELECT + || GET_CODE (XEXP (SET_SRC (body), 0)) != MEM) + return false; + + rtx mem = XEXP (SET_SRC (body), 0); + rtx base_reg = XEXP (mem, 0); + + if (!REG_P (base_reg)) + { + gcc_assert (GET_CODE (base_reg) == PLUS); + base_reg = XEXP (base_reg, 0); + } + + df_ref base_use; + rtx_insn *tocrel_insn = 0; + insn_info = DF_INSN_INFO_GET (def_insn); + FOR_EACH_INSN_INFO_USE (base_use, insn_info) + { + if (!rtx_equal_p (DF_REF_REG (base_use), base_reg)) + continue; + + struct df_link *base_def_link = DF_REF_CHAIN (base_use); + if (!base_def_link || base_def_link->next) + return false; + + tocrel_insn = DF_REF_INSN (base_def_link->ref); + rtx tocrel_body = PATTERN (tocrel_insn); + rtx base, offset; + if (GET_CODE (tocrel_body) != SET) + return false; + /* There is an extra level of indirection for small/large + code models. */ + rtx tocrel_expr = SET_SRC (tocrel_body); + if (GET_CODE (tocrel_expr) == MEM) + tocrel_expr = XEXP (tocrel_expr, 0); + if (!toc_relative_expr_p (tocrel_expr, false)) + return false; + split_const (XVECEXP (tocrel_base, 0, 0), &base, &offset); + if (GET_CODE (base) != SYMBOL_REF || !CONSTANT_POOL_ADDRESS_P (base)) + return false; + rtx const_vector = get_pool_constant (base); + /* With the extra indirection, get_pool_constant will produce the + real constant from the reg_equal expression, so get the real + constant. It's still possible that the reg_equal doesn't + represent a constant, so punt in that case. */ + if (GET_CODE (const_vector) == SYMBOL_REF) + { + if (!CONSTANT_POOL_ADDRESS_P (const_vector)) + return false; + const_vector = get_pool_constant (const_vector); + } + if (GET_CODE (const_vector) != CONST_VECTOR) + return false; + } + gcc_assert (tocrel_insn); + } + return true; +} + +/* Return TRUE if insn is a swap fed by a load from the constant pool + and subsequently complemented. */ +static bool +load_comp_mask_p (swap_web_entry *insn_entry, rtx insn) +{ + rtx body = PATTERN (insn); + if (GET_CODE (body) != SET) + return false; + rtx ior = SET_SRC (body); + if (GET_CODE (ior) != IOR) + return false; + rtx not1 = XEXP (ior, 0); + rtx not2 = XEXP (ior, 1); + if (GET_CODE (not1) != NOT || GET_CODE (not2) != NOT) + return false; + rtx reg1 = XEXP (not1, 0); + rtx reg2 = XEXP (not2, 0); + if (!REG_P (reg1) || !rtx_equal_p (reg1, reg2)) + return false; + + /* We have a VNOR operation. Find the def of its source reg and + check for the remaining conditions. */ + struct df_insn_info *insn_info = DF_INSN_INFO_GET (insn); + df_ref use; + FOR_EACH_INSN_INFO_USE (use, insn_info) + { + struct df_link *def_link = DF_REF_CHAIN (use); + if (!def_link || def_link->next) + return false; + rtx def_insn = DF_REF_INSN (def_link->ref); + return const_load_sequence_p (insn_entry, def_insn); + } + + gcc_unreachable (); +} + +/* Return TRUE iff OP matches a V2DF reduction pattern. See the + definition of vsx_reduc_<VEC_reduc_name>_v2df in vsx.md. */ +static bool +v2df_reduction_p (rtx op) +{ + if (GET_MODE (op) != V2DFmode) + return false; + + enum rtx_code code = GET_CODE (op); + if (code != PLUS && code != SMIN && code != SMAX) + return false; + + rtx concat = XEXP (op, 0); + if (GET_CODE (concat) != VEC_CONCAT) + return false; + + rtx select0 = XEXP (concat, 0); + rtx select1 = XEXP (concat, 1); + if (GET_CODE (select0) != VEC_SELECT || GET_CODE (select1) != VEC_SELECT) + return false; + + rtx reg0 = XEXP (select0, 0); + rtx reg1 = XEXP (select1, 0); + if (!rtx_equal_p (reg0, reg1) || !REG_P (reg0)) + return false; + + rtx parallel0 = XEXP (select0, 1); + rtx parallel1 = XEXP (select1, 1); + if (GET_CODE (parallel0) != PARALLEL || GET_CODE (parallel1) != PARALLEL) + return false; + + if (!rtx_equal_p (XVECEXP (parallel0, 0, 0), const1_rtx) + || !rtx_equal_p (XVECEXP (parallel1, 0, 0), const0_rtx)) + return false; + + return true; +} + /* Return 1 iff OP is an operand that will not be affected by having vector doublewords swapped in memory. */ static unsigned int @@ -34416,6 +34585,22 @@ rtx_is_swappable_p (rtx op, unsigned int *special) *special = SH_EXTRACT; return 1; } + /* An XXPERMDI is ok if we adjust the lanes. Note that if the + XXPERMDI is a swap operation, it will be identified by + insn_is_swap_p and therefore we won't get here. */ + else if (GET_CODE (XEXP (op, 0)) == VEC_CONCAT + && (GET_MODE (XEXP (op, 0)) == V4DFmode + || GET_MODE (XEXP (op, 0)) == V4DImode) + && GET_CODE ((parallel = XEXP (op, 1))) == PARALLEL + && XVECLEN (parallel, 0) == 2 + && GET_CODE (XVECEXP (parallel, 0, 0)) == CONST_INT + && GET_CODE (XVECEXP (parallel, 0, 1)) == CONST_INT) + { + *special = SH_XXPERMDI; + return 1; + } + else if (v2df_reduction_p (op)) + return 1; else return 0; @@ -34480,6 +34665,9 @@ rtx_is_swappable_p (rtx op, unsigned int *special) case UNSPEC_VSPLT_DIRECT: *special = SH_SPLAT; return 1; + case UNSPEC_REDUC_PLUS: + case UNSPEC_REDUC: + return 1; } } @@ -34593,6 +34781,59 @@ insn_is_swappable_p (swap_web_entry *insn_entry, rtx insn, return 1; } + /* A concatenation of two doublewords is ok if we reverse the + order of the inputs. */ + if (GET_CODE (body) == SET + && GET_CODE (SET_SRC (body)) == VEC_CONCAT + && (GET_MODE (SET_SRC (body)) == V2DFmode + || GET_MODE (SET_SRC (body)) == V2DImode)) + { + *special = SH_CONCAT; + return 1; + } + + /* V2DF reductions are always swappable. */ + if (GET_CODE (body) == PARALLEL) + { + rtx expr = XVECEXP (body, 0, 0); + if (GET_CODE (expr) == SET + && v2df_reduction_p (SET_SRC (expr))) + return 1; + } + + /* An UNSPEC_VPERM is ok if the mask operand is loaded from the + constant pool, and optionally complemented afterwards. */ + if (GET_CODE (body) == SET + && GET_CODE (SET_SRC (body)) == UNSPEC + && XINT (SET_SRC (body), 1) == UNSPEC_VPERM + && XVECLEN (SET_SRC (body), 0) == 3 + && GET_CODE (XVECEXP (SET_SRC (body), 0, 2)) == REG) + { + rtx mask_reg = XVECEXP (SET_SRC (body), 0, 2); + struct df_insn_info *insn_info = DF_INSN_INFO_GET (insn); + df_ref use; + FOR_EACH_INSN_INFO_USE (use, insn_info) + if (rtx_equal_p (DF_REF_REG (use), mask_reg)) + { + struct df_link *def_link = DF_REF_CHAIN (use); + /* Punt if multiple definitions for this reg. */ + if (def_link && !def_link->next && + const_load_sequence_p (insn_entry, + DF_REF_INSN (def_link->ref))) + { + *special = SH_VPERM; + return 1; + } + else if (def_link && !def_link->next && + load_comp_mask_p (insn_entry, + DF_REF_INSN (def_link->ref))) + { + *special = SH_VPERM_COMP; + return 1; + } + } + } + /* Otherwise check the operands for vector lane violations. */ return rtx_is_swappable_p (body, special); } @@ -34882,6 +35123,235 @@ adjust_splat (rtx_insn *insn) fprintf (dump_file, "Changing lane for splat %d\n", INSN_UID (insn)); } +/* Given OP that contains an XXPERMDI operation (that is not a doubleword + swap), reverse the order of the source operands and adjust the indices + of the source lanes to account for doubleword reversal. */ +static void +adjust_xxpermdi (rtx_insn *insn) +{ + rtx set = PATTERN (insn); + rtx select = XEXP (set, 1); + rtx concat = XEXP (select, 0); + rtx src0 = XEXP (concat, 0); + XEXP (concat, 0) = XEXP (concat, 1); + XEXP (concat, 1) = src0; + rtx parallel = XEXP (select, 1); + int lane0 = INTVAL (XVECEXP (parallel, 0, 0)); + int lane1 = INTVAL (XVECEXP (parallel, 0, 1)); + int new_lane0 = 3 - lane1; + int new_lane1 = 3 - lane0; + XVECEXP (parallel, 0, 0) = GEN_INT (new_lane0); + XVECEXP (parallel, 0, 1) = GEN_INT (new_lane1); + INSN_CODE (insn) = -1; /* Force re-recognition. */ + df_insn_rescan (insn); + + if (dump_file) + fprintf (dump_file, "Changing lanes for xxpermdi %d\n", INSN_UID (insn)); +} + +/* Given OP that contains a VEC_CONCAT operation of two doublewords, + reverse the order of those inputs. */ +static void +adjust_concat (rtx_insn *insn) +{ + rtx set = PATTERN (insn); + rtx concat = XEXP (set, 1); + rtx src0 = XEXP (concat, 0); + XEXP (concat, 0) = XEXP (concat, 1); + XEXP (concat, 1) = src0; + INSN_CODE (insn) = -1; /* Force re-recognition. */ + df_insn_rescan (insn); + + if (dump_file) + fprintf (dump_file, "Reversing inputs for concat %d\n", INSN_UID (insn)); +} + +/* We previously determined that a use of MASK_REG in INSN was fed by a + swap of a swapping load of a TOC-relative constant pool symbol. Return + the CONST_VECTOR that was loaded, as well as the LOAD_INSN (by + reference). */ +static rtx +find_swapped_load_and_const_vector (rtx_insn *insn, rtx_insn **load_insn, + rtx mask_reg) +{ + /* Find the swap. */ + struct df_insn_info *insn_info = DF_INSN_INFO_GET (insn); + df_ref use; + rtx_insn *swap_insn = 0; + FOR_EACH_INSN_INFO_USE (use, insn_info) + if (rtx_equal_p (DF_REF_REG (use), mask_reg)) + { + struct df_link *def_link = DF_REF_CHAIN (use); + gcc_assert (def_link && !def_link->next); + swap_insn = DF_REF_INSN (def_link->ref); + break; + } + gcc_assert (swap_insn); + + /* Find the load. */ + insn_info = DF_INSN_INFO_GET (swap_insn); + FOR_EACH_INSN_INFO_USE (use, insn_info) + { + struct df_link *def_link = DF_REF_CHAIN (use); + gcc_assert (def_link && !def_link->next); + *load_insn = DF_REF_INSN (def_link->ref); + break; + } + gcc_assert (*load_insn); + + /* Find the TOC-relative symbol access. */ + insn_info = DF_INSN_INFO_GET (*load_insn); + rtx_insn *tocrel_insn = 0; + FOR_EACH_INSN_INFO_USE (use, insn_info) + { + struct df_link *def_link = DF_REF_CHAIN (use); + gcc_assert (def_link && !def_link->next); + tocrel_insn = DF_REF_INSN (def_link->ref); + break; + } + gcc_assert (tocrel_insn); + + /* Find the embedded CONST_VECTOR. We have to call toc_relative_expr_p + to set tocrel_base; otherwise it would be unnecessary as we've + already established it will return true. */ + rtx base, offset; + rtx tocrel_expr = SET_SRC (PATTERN (tocrel_insn)); + /* There is an extra level of indirection for small/large code models. */ + if (GET_CODE (tocrel_expr) == MEM) + tocrel_expr = XEXP (tocrel_expr, 0); + if (!toc_relative_expr_p (tocrel_expr, false)) + gcc_unreachable (); + split_const (XVECEXP (tocrel_base, 0, 0), &base, &offset); + rtx const_vector = get_pool_constant (base); + /* With the extra indirection, get_pool_constant will produce the + real constant from the reg_equal expression, so get the real + constant. */ + if (GET_CODE (const_vector) == SYMBOL_REF) + const_vector = get_pool_constant (const_vector); + gcc_assert (GET_CODE (const_vector) == CONST_VECTOR); + + return const_vector; +} + +/* Create a new CONST_VECTOR from NEW_MASK, and replace the MEM in + LOAD_INSN with a MEM referencing that CONST_VECTOR. */ +static void +replace_const_vector_in_load (rtx_insn *load_insn, unsigned int *new_mask) +{ + unsigned int i; + rtx vals = gen_rtx_PARALLEL (V16QImode, rtvec_alloc (16)); + for (i = 0; i < 16; ++i) + XVECEXP (vals, 0, i) = GEN_INT (new_mask[i]); + rtx new_const_vector = gen_rtx_CONST_VECTOR (V16QImode, XVEC (vals, 0)); + rtx new_mem = force_const_mem (V16QImode, new_const_vector); + /* This gives us a MEM whose base operand is a SYMBOL_REF, which we + can't recognize. Force the SYMBOL_REF into a register. */ + if (!REG_P (XEXP (new_mem, 0))) { + rtx base_reg = force_reg (Pmode, XEXP (new_mem, 0)); + XEXP (new_mem, 0) = base_reg; + /* Move the newly created insn ahead of the load insn. */ + rtx_insn *force_insn = get_last_insn (); + remove_insn (force_insn); + rtx_insn *before_load_insn = PREV_INSN (load_insn); + add_insn_after (force_insn, before_load_insn, BLOCK_FOR_INSN (load_insn)); + df_insn_rescan (before_load_insn); + df_insn_rescan (force_insn); + } + + XEXP (SET_SRC (PATTERN (load_insn)), 0) = new_mem; + INSN_CODE (load_insn) = -1; /* Force re-recognition. */ + df_insn_rescan (load_insn); +} + +/* Given an UNSPEC_VPERM insn, modify the mask loaded from the + constant pool to reflect swapped doublewords. */ +static void +adjust_vperm (rtx_insn *insn) +{ + /* We previously determined that the UNSPEC_VPERM was fed by a + swap of a swapping load of a TOC-relative constant pool symbol. + Find the MEM in the swapping load and replace it with a MEM for + the adjusted mask constant. */ + rtx set = PATTERN (insn); + rtx mask_reg = XVECEXP (SET_SRC (set), 0, 2); + rtx_insn *load_insn = 0; + rtx const_vector = find_swapped_load_and_const_vector (insn, &load_insn, + mask_reg); + + /* Create an adjusted mask from the initial mask. */ + unsigned int new_mask[16], i, val; + for (i = 0; i < 16; ++i) { + val = INTVAL (XVECEXP (const_vector, 0, i)); + if (val < 16) + new_mask[i] = (val + 8) % 16; + else + new_mask[i] = ((val + 8) % 16) + 16; + } + + /* Update the load instruction to load the new constant vector. */ + replace_const_vector_in_load (load_insn, new_mask); + + if (dump_file) + fprintf (dump_file, "Adjusting mask for vperm %d\n", INSN_UID (insn)); +} + +/* Given an UNSPEC_VPERM insn fed by a complement operation, modify + the mask loaded from the constant pool to reflect swapped doublewords + and the complement. */ +static void +adjust_vperm_comp (rtx_insn *insn, swap_web_entry *insn_entry) +{ + /* We previously determined that the UNSPEC_VPERM was fed by a + VNOR, itself fed by a swap of a swapping load of a TOC-relative + constant pool symbol. Find the MEM in the swapping load and + replace it with a MEM for the adjusted mask constant. */ + rtx set = PATTERN (insn); + rtx mask_reg = XVECEXP (SET_SRC (set), 0, 2); + + /* Find the VNOR and mark it for removal. */ + struct df_insn_info *insn_info = DF_INSN_INFO_GET (insn); + df_ref use; + rtx_insn *vnor_insn = 0; + FOR_EACH_INSN_INFO_USE (use, insn_info) + if (rtx_equal_p (DF_REF_REG (use), mask_reg)) + { + struct df_link *def_link = DF_REF_CHAIN (use); + gcc_assert (def_link && !def_link->next); + vnor_insn = DF_REF_INSN (def_link->ref); + break; + } + gcc_assert (vnor_insn); + + unsigned uid = INSN_UID (vnor_insn); + insn_entry[uid].will_delete = 1; + + /* Identify the original mask register from the VNOR. */ + set = PATTERN (vnor_insn); + mask_reg = XEXP (XEXP (SET_SRC (set), 0), 0); + + /* Find the load insn and the CONST_VECTOR that it loads. */ + rtx_insn *load_insn = 0; + rtx const_vector + = find_swapped_load_and_const_vector (vnor_insn, &load_insn, mask_reg); + + /* Create an adjusted mask from the initial mask, which reflects + both the effect of the swap and of the complement. */ + unsigned int new_mask[16], i, val; + for (i = 0; i < 16; ++i) { + val = 31 - INTVAL (XVECEXP (const_vector, 0, i)); + if (val < 16) + new_mask[i] = (val + 8) % 16; + else + new_mask[i] = ((val + 8) % 16) + 16; + } + + /* Update the load instruction to load the new constant vector. */ + replace_const_vector_in_load (load_insn, new_mask); + + if (dump_file) + fprintf (dump_file, "Adjusting mask for vperm %d\n", INSN_UID (insn)); +} + /* The insn described by INSN_ENTRY[I] can be swapped, but only with special handling. Take care of that here. */ static void @@ -34928,17 +35398,38 @@ handle_special_swappables (swap_web_entry *insn_entry, unsigned i) /* Change the lane on a direct-splat operation. */ adjust_splat (insn); break; + case SH_XXPERMDI: + /* Change the lanes on an XXPERMDI operation. */ + adjust_xxpermdi (insn); + break; + case SH_CONCAT: + /* Reverse the order of a concatenation operation. */ + adjust_concat (insn); + break; + case SH_VPERM: + /* Change the mask loaded from the constant pool for a VPERM. */ + adjust_vperm (insn); + break; + case SH_VPERM_COMP: + /* Change the mask loaded from the constant pool and + complemented for a vec_perm built-in. */ + adjust_vperm_comp (insn, insn_entry); } } /* Find the insn from the Ith table entry, which is known to be a - register swap Y = SWAP(X). Replace it with a copy Y = X. */ + register swap Y = SWAP(X). Replace it with a copy Y = X. + There is now one exception to this. The table entry may also + refer to Y = VNOR(X, X). */ static void replace_swap_with_copy (swap_web_entry *insn_entry, unsigned i) { rtx_insn *insn = insn_entry[i].insn; rtx body = PATTERN (insn); - rtx src_reg = XEXP (SET_SRC (body), 0); + enum rtx_code code = GET_CODE (SET_SRC (body)); + rtx src_reg = (code == IOR + ? XEXP (XEXP (SET_SRC (body), 0), 0) + : XEXP (SET_SRC (body), 0)); rtx copy = gen_rtx_SET (VOIDmode, SET_DEST (body), src_reg); rtx_insn *new_insn = emit_insn_before (copy, insn); set_block_for_insn (new_insn, BLOCK_FOR_INSN (insn)); @@ -34947,7 +35438,10 @@ replace_swap_with_copy (swap_web_entry *insn_entry, unsigned i) if (dump_file) { unsigned int new_uid = INSN_UID (new_insn); - fprintf (dump_file, "Replacing swap %d with copy %d\n", i, new_uid); + if (code == IOR) + fprintf (dump_file, "Replacing vnor %d with copy %d\n", i, new_uid); + else + fprintf (dump_file, "Replacing swap %d with copy %d\n", i, new_uid); } df_insn_delete (insn); @@ -35000,6 +35494,14 @@ dump_swap_insn_table (swap_web_entry *insn_entry) fputs ("special:extract ", dump_file); else if (insn_entry[i].special_handling == SH_SPLAT) fputs ("special:splat ", dump_file); + else if (insn_entry[i].special_handling == SH_XXPERMDI) + fputs ("special:xxpermdi ", dump_file); + else if (insn_entry[i].special_handling == SH_CONCAT) + fputs ("special:concat ", dump_file); + else if (insn_entry[i].special_handling == SH_VPERM) + fputs ("special:vperm ", dump_file); + else if (insn_entry[i].special_handling == SH_VPERM_COMP) + fputs ("special:vperm_c ", dump_file); } if (insn_entry[i].web_not_optimizable) fputs ("unoptimizable ", dump_file); diff --git a/gcc/doc/extend.texi b/gcc/doc/extend.texi index b306bb7fd1c..61b2d163321 100644 --- a/gcc/doc/extend.texi +++ b/gcc/doc/extend.texi @@ -15321,20 +15321,21 @@ void vec_vsx_st (vector bool char, int, vector bool char *); void vec_vsx_st (vector bool char, int, unsigned char *); void vec_vsx_st (vector bool char, int, signed char *); -vector double vec_xxpermdi (vector double, vector double, int); -vector float vec_xxpermdi (vector float, vector float, int); -vector long long vec_xxpermdi (vector long long, vector long long, int); +vector double vec_xxpermdi (vector double, vector double, const int); +vector float vec_xxpermdi (vector float, vector float, const int); +vector long long vec_xxpermdi (vector long long, vector long long, const int); vector unsigned long long vec_xxpermdi (vector unsigned long long, - vector unsigned long long, int); -vector int vec_xxpermdi (vector int, vector int, int); + vector unsigned long long, const int); +vector int vec_xxpermdi (vector int, vector int, const int); vector unsigned int vec_xxpermdi (vector unsigned int, - vector unsigned int, int); -vector short vec_xxpermdi (vector short, vector short, int); + vector unsigned int, const int); +vector short vec_xxpermdi (vector short, vector short, const int); vector unsigned short vec_xxpermdi (vector unsigned short, - vector unsigned short, int); -vector signed char vec_xxpermdi (vector signed char, vector signed char, int); + vector unsigned short, const int); +vector signed char vec_xxpermdi (vector signed char, vector signed char, + const int); vector unsigned char vec_xxpermdi (vector unsigned char, - vector unsigned char, int); + vector unsigned char, const int); vector double vec_xxsldi (vector double, vector double, int); vector float vec_xxsldi (vector float, vector float, int); diff --git a/gcc/fortran/ChangeLog b/gcc/fortran/ChangeLog index 9dc753b567c..b3f4403ba9c 100644 --- a/gcc/fortran/ChangeLog +++ b/gcc/fortran/ChangeLog @@ -1,3 +1,28 @@ +2017-05-01 Janus Weil <janus@gcc.gnu.org> + + Backport from trunk + PR fortran/80392 + * trans-types.c (gfc_get_derived_type): Prevent an infinite loop when + building a derived type that includes a procedure pointer component + with a polymorphic result. + +2017-04-21 Janus Weil <janus@gcc.gnu.org> + + Backport from trunk + PR fortran/80361 + * class.c (generate_finalization_wrapper): Give the finalization wrapper + the recursive attribute. + +2017-04-14 Dominique d'Humieres <dominiq@lps.ens.fr> + + Backport from trunk + 2015-11-18 Steven G. Kargl <kargl@gcc.gnu.org> + + PR fortran/59910 + PR fortran/80388 + * primary.c (gfc_match_structure_constructor): Reduce a structure + constructor in a DATA statement. + 2017-02-07 Steven G. Kargl <kargl@gcc.gnu.org> * trans-types.c (gfc_get_int_kind_from_width_isofortranen): Choose diff --git a/gcc/fortran/class.c b/gcc/fortran/class.c index 4ab96524b24..c8f67d53820 100644 --- a/gcc/fortran/class.c +++ b/gcc/fortran/class.c @@ -1599,6 +1599,7 @@ generate_finalization_wrapper (gfc_symbol *derived, gfc_namespace *ns, final->attr.flavor = FL_PROCEDURE; final->attr.function = 1; final->attr.pure = 0; + final->attr.recursive = 1; final->result = final; final->ts.type = BT_INTEGER; final->ts.kind = 4; diff --git a/gcc/fortran/primary.c b/gcc/fortran/primary.c index 44b9901cf67..d2fe8bc0eac 100644 --- a/gcc/fortran/primary.c +++ b/gcc/fortran/primary.c @@ -2657,6 +2657,12 @@ gfc_match_structure_constructor (gfc_symbol *sym, gfc_expr **result) return MATCH_ERROR; } + /* If a structure constructor is in a DATA statement, then each entity + in the structure constructor must be a constant. Try to reduce the + expression here. */ + if (gfc_in_match_data ()) + gfc_reduce_init_expr (e); + *result = e; return MATCH_YES; } diff --git a/gcc/fortran/trans-types.c b/gcc/fortran/trans-types.c index 5d59ef52f78..153914e753f 100644 --- a/gcc/fortran/trans-types.c +++ b/gcc/fortran/trans-types.c @@ -2554,9 +2554,10 @@ gfc_get_derived_type (gfc_symbol * derived) the same as derived, by forcing the procedure pointer component to be built as if the explicit interface does not exist. */ if (c->attr.proc_pointer - && ((c->ts.type != BT_DERIVED && c->ts.type != BT_CLASS) - || (c->ts.u.derived - && !gfc_compare_derived_types (derived, c->ts.u.derived)))) + && (c->ts.type != BT_DERIVED || (c->ts.u.derived + && !gfc_compare_derived_types (derived, c->ts.u.derived))) + && (c->ts.type != BT_CLASS || (CLASS_DATA (c)->ts.u.derived + && !gfc_compare_derived_types (derived, CLASS_DATA (c)->ts.u.derived)))) field_type = gfc_get_ppc_type (c); else if (c->attr.proc_pointer && derived->backend_decl) { diff --git a/gcc/testsuite/ChangeLog b/gcc/testsuite/ChangeLog index 14932534994..554990f0ddb 100644 --- a/gcc/testsuite/ChangeLog +++ b/gcc/testsuite/ChangeLog @@ -1,3 +1,76 @@ +2016-05-08 Bill Schmidt <wschmidt@linux.vnet.ibm.com> + + Backport from mainline + PR target/69868 + swap optimization backports + * gcc.target/powerpc/swaps-p8-20.c: New. + * gcc.target/powerpc/swaps-p8-23.c: New. + * gcc.target/powerpc/swaps-p8-24.c: New. + +2017-05-01 Janus Weil <janus@gcc.gnu.org> + + Backport from trunk + PR fortran/80392 + * gfortran.dg/proc_ptr_comp_49.f90: New test case. + +2017-04-21 Janus Weil <janus@gcc.gnu.org> + + Backport from trunk + PR fortran/80361 + * gfortran.dg/class_62.f90: New test case. + +2017-04-21 Christophe Lyon <christophe.lyon@linaro.org> + + Backport from mainline + 2015-11-23 Kugan Vivekanandarajah <kuganv@linaro.org> + + PR target/68390 + * gcc.c-torture/execute/pr68390.c: New test. + +2017-04-14 Dominique d'Humieres <dominiq@lps.ens.fr> + + Backport from trunk + 2015-11-18 Steven G. Kargl <kargl@gcc.gnu.org> + + PR fortran/59910 + PR fortran/80388 + * gfortran.dg/pr59910.f90: New test. + +2017-04-11 Thomas Preud'homme <thomas.preudhomme@arm.com> + + Backport from GCC 6 + 2017-04-06 Thomas Preud'homme <thomas.preudhomme@arm.com> + + PR target/80082 + * gcc.target/arm/atomic_loaddi_10.c: New testcase. + * gcc.target/arm/atomic_loaddi_11.c: Likewise. + +2017-04-11 Martin Jambor <mjambor@suse.cz> + + Backport from mainline + 2017-03-30 Martin Jambor <mjambor@suse.cz> + + PR ipa/77333 + * g++.dg/ipa/pr77333.C: New test. + +2017-04-11 Bin Cheng <bin.cheng@arm.com> + + PR tree-optimization/80345 + * gcc.c-torture/compile/pr80345.c + + Backport from mainline + 2016-02-10 Bin Cheng <bin.cheng@arm.com> + + PR tree-optimization/68021 + * gcc.dg/tree-ssa/pr68021.c: New test. + +2017-04-07 Uros Bizjak <ubizjak@gmail.com> + + Backport from mainline + 2017-04-06 Uros Bizjak <ubizjak@gmail.com> + + PR target/79733 + * gcc.target/i386/pr79733.c: New test. + 2017-04-03 Peter Bergner <bergner@vnet.ibm.com> Backport from mainline diff --git a/gcc/testsuite/g++.dg/ipa/pr77333.C b/gcc/testsuite/g++.dg/ipa/pr77333.C new file mode 100644 index 00000000000..1ef997f7a54 --- /dev/null +++ b/gcc/testsuite/g++.dg/ipa/pr77333.C @@ -0,0 +1,65 @@ +// { dg-do run } +// { dg-options "-O2 -fno-ipa-sra" } + +volatile int global; +int __attribute__((noinline, noclone)) +get_data (int i) +{ + global = i; + return i; +} + +typedef int array[32]; + +namespace { + +char buf[512]; + +class A +{ +public: + int field; + char *s; + + A() : field(223344) + { + s = buf; + } + + int __attribute__((noinline)) + foo (int a, int b, int c, int d, int e, int f, int g, int h, int i, int j, + int k, int l, int m, int n, int o, int p, int q, int r, int s, int t) + { + global = a+b+c+d+e+f+g+h+i+j+k+l+m+n+o+p+q+r+s+t; + return global; + } + + int __attribute__((noinline)) + bar() + { + int r = foo (get_data (1), get_data (1), get_data (1), get_data (1), + get_data (1), get_data (1), get_data (1), get_data (1), + get_data (1), get_data (1), get_data (1), get_data (1), + get_data (1), get_data (1), get_data (1), get_data (1), + get_data (1), get_data (1), get_data (1), get_data (1)); + + if (field != 223344) + __builtin_abort (); + return 0; + } +}; + +} + +int main (int argc, char **argv) +{ + A a; + int r = a.bar(); + r = a.bar (); + if (a.field != 223344) + __builtin_abort (); + if (global != 20) + __builtin_abort (); + + return r; +} diff --git a/gcc/testsuite/gcc.c-torture/compile/pr80345.c b/gcc/testsuite/gcc.c-torture/compile/pr80345.c new file mode 100644 index 00000000000..9762f7c8877 --- /dev/null +++ b/gcc/testsuite/gcc.c-torture/compile/pr80345.c @@ -0,0 +1,17 @@ +/* PR tree-optimization/80345 */ + +typedef long mp_limb_signed_t; +void fn1(mp_limb_signed_t p1) { + int *a = (int *)1; + mp_limb_signed_t i, j; + i = 0; + for (; i < p1; i++) { + j = 0; + for (; j <= i; j++) + *a++ = 0; + j = i + 1; + for (; j < p1; j++) + a++; + } +} +void fn2() { fn1((mp_limb_signed_t)fn2); } diff --git a/gcc/testsuite/gcc.dg/tree-ssa/pr68021.c b/gcc/testsuite/gcc.dg/tree-ssa/pr68021.c new file mode 100644 index 00000000000..f60b1ff1ac3 --- /dev/null +++ b/gcc/testsuite/gcc.dg/tree-ssa/pr68021.c @@ -0,0 +1,17 @@ +/* { dg-do compile } */ +/* { dg-options "-O3" } */ + +char a; +void fn1 (char *p1, int p2, int p3) +{ + int i, x; + for (i = 0; i < 10; i++) + { + for (x = 0; x < p3; x++) + { + *p1 = a; + p1--; + } + p1 += p2; + } +} diff --git a/gcc/testsuite/gcc.target/arm/atomic_loaddi_10.c b/gcc/testsuite/gcc.target/arm/atomic_loaddi_10.c new file mode 100644 index 00000000000..ecc3d06d0c9 --- /dev/null +++ b/gcc/testsuite/gcc.target/arm/atomic_loaddi_10.c @@ -0,0 +1,15 @@ +/* { dg-do compile } */ +/* { dg-require-effective-target arm_arch_v7ve_ok } */ +/* { dg-options "-O2" } */ +/* { dg-add-options arm_arch_v7ve } */ + +#include <stdatomic.h> + +atomic_llong x = 0; + +atomic_llong get_x() +{ + return atomic_load(&x); +} + +/* { dg-final { scan-assembler "ldrd" } } */ diff --git a/gcc/testsuite/gcc.target/arm/atomic_loaddi_11.c b/gcc/testsuite/gcc.target/arm/atomic_loaddi_11.c new file mode 100644 index 00000000000..85c64ae68b1 --- /dev/null +++ b/gcc/testsuite/gcc.target/arm/atomic_loaddi_11.c @@ -0,0 +1,15 @@ +/* { dg-do compile } */ +/* { dg-require-effective-target arm_arch_v7r_ok } */ +/* { dg-skip-if "do not override -mcpu" { *-*-* } { "-mcpu=*" "-march=*" } { "-mcpu=cortex-r5" } } */ +/* { dg-options "-O2 -mcpu=cortex-r5" } */ + +#include <stdatomic.h> + +atomic_llong x = 0; + +atomic_llong get_x() +{ + return atomic_load(&x); +} + +/* { dg-final { scan-assembler-not "ldrd" } } */ diff --git a/gcc/testsuite/gcc.target/i386/pr79733.c b/gcc/testsuite/gcc.target/i386/pr79733.c new file mode 100644 index 00000000000..5caec911b01 --- /dev/null +++ b/gcc/testsuite/gcc.target/i386/pr79733.c @@ -0,0 +1,23 @@ +/* PR target/79733 */ +/* { dg-do compile } */ +/* { dg-options "-O2 -mavx512f" } */ + +typedef unsigned short __mmask16; + +extern __inline int +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm512_kortestc (__mmask16 __A, __mmask16 __B) +{ + return (__mmask16) __builtin_ia32_kortestchi ((__mmask16) __A, + (__mmask16) __B); +} + +void +avx512f_test () +{ + volatile __mmask16 k1 = 0; + __mmask16 k2 = 0; + volatile short r; + + r = _mm512_kortestc (k1, k2); +} diff --git a/gcc/testsuite/gcc.target/powerpc/swaps-p8-20.c b/gcc/testsuite/gcc.target/powerpc/swaps-p8-20.c new file mode 100644 index 00000000000..7463781281e --- /dev/null +++ b/gcc/testsuite/gcc.target/powerpc/swaps-p8-20.c @@ -0,0 +1,29 @@ +/* { dg-do run { target { powerpc64le-*-* } } } */ +/* { dg-skip-if "do not override -mcpu" { powerpc*-*-* } { "-mcpu=*" } { "-mcpu=power8" } } +/* { dg-require-effective-target powerpc_altivec_ok } */ +/* { dg-options "-O2 -mcpu=power8 -maltivec" } */ + +/* The expansion for vector character multiply introduces a vperm operation. + This tests that the swap optimization to remove swaps by changing the + vperm mask results in correct code. */ + +#include <altivec.h> + +void abort (); + +vector unsigned char r; +vector unsigned char v = + { 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15 }; +vector unsigned char i = + { 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16 }; +vector unsigned char e = + {0, 2, 6, 12, 20, 30, 42, 56, 72, 90, 110, 132, 156, 182, 210, 240}; + +int main () +{ + int j; + r = v * i; + if (!vec_all_eq (r, e)) + abort (); + return 0; +} diff --git a/gcc/testsuite/gcc.target/powerpc/swaps-p8-23.c b/gcc/testsuite/gcc.target/powerpc/swaps-p8-23.c new file mode 100644 index 00000000000..a3f83ae26b9 --- /dev/null +++ b/gcc/testsuite/gcc.target/powerpc/swaps-p8-23.c @@ -0,0 +1,26 @@ +/* { dg-do compile { target { powerpc64le-*-* } } } */ +/* { dg-skip-if "do not override -mcpu" { powerpc*-*-* } { "-mcpu=*" } { "-mcpu=power8" } } */ +/* { dg-options "-mcpu=power8 -O3 -ffast-math" } */ +/* { dg-final { scan-assembler "lxvd2x" } } */ +/* { dg-final { scan-assembler-not "xxpermdi" } } */ + +/* Verify that swap optimization works correctly in the presence of + a V2DFmode reduction. */ + +extern double optvalue; +extern void obfuscate (double, unsigned int); + +void +foo (double *x, double *y, unsigned int n, unsigned int m) +{ + unsigned int i, j; + double sacc; + for (j = 0; j < m; ++j) + { + sacc = 0.0; + for (i = 0; i < n; ++i) + sacc += x[i] * y[i]; + obfuscate (sacc, n); + } + optvalue = n * 2.0 * m; +} diff --git a/gcc/testsuite/gcc.target/powerpc/swaps-p8-24.c b/gcc/testsuite/gcc.target/powerpc/swaps-p8-24.c new file mode 100644 index 00000000000..528d6e6a68c --- /dev/null +++ b/gcc/testsuite/gcc.target/powerpc/swaps-p8-24.c @@ -0,0 +1,26 @@ +/* { dg-do compile { target { powerpc64le-*-* } } } */ +/* { dg-skip-if "do not override -mcpu" { powerpc*-*-* } { "-mcpu=*" } { "-mcpu=power8" } } */ +/* { dg-options "-mcpu=power8 -O3 -ffast-math" } */ +/* { dg-final { scan-assembler "lxvd2x" } } */ +/* { dg-final { scan-assembler-not "xxpermdi" } } */ + +/* Verify that swap optimization works correctly in the presence of + a V4SFmode reduction. */ + +extern double optvalue; +extern void obfuscate (float, unsigned int); + +void +foo (float *x, float *y, unsigned int n, unsigned int m) +{ + unsigned int i, j; + float sacc; + for (j = 0; j < m; ++j) + { + sacc = 0.0f; + for (i = 0; i < n; ++i) + sacc += x[i] * y[i]; + obfuscate (sacc, n); + } + optvalue = n * 2.0f * m; +} diff --git a/gcc/testsuite/gfortran.dg/class_62.f90 b/gcc/testsuite/gfortran.dg/class_62.f90 new file mode 100644 index 00000000000..39ee98dd361 --- /dev/null +++ b/gcc/testsuite/gfortran.dg/class_62.f90 @@ -0,0 +1,29 @@ +! { dg-do run } +! { dg-options "-fcheck=recursion" } +! +! PR 80361: [5/6/7 Regression] bogus recursive call to nonrecursive procedure with -fcheck=recursion +! +! Contributed by Jürgen Reuter <juergen.reuter@desy.de> + +program main_ut + + implicit none + + type :: prt_spec_expr_t + end type + + type :: prt_expr_t + class(prt_spec_expr_t), allocatable :: x + end type + + type, extends (prt_spec_expr_t) :: prt_spec_list_t + type(prt_expr_t) :: e + end type + + class(prt_spec_list_t), allocatable :: y + + allocate (y) + allocate (prt_spec_list_t :: y%e%x) + deallocate(y) + +end program diff --git a/gcc/testsuite/gfortran.dg/pr59910.f90 b/gcc/testsuite/gfortran.dg/pr59910.f90 new file mode 100644 index 00000000000..2b288e4ff6c --- /dev/null +++ b/gcc/testsuite/gfortran.dg/pr59910.f90 @@ -0,0 +1,11 @@ +! { dg-do compile } +! PR fortran/59910 +! +program main + implicit none + type bar + integer :: limit(1) + end type + type (bar) :: testsuite + data testsuite / bar(reshape(source=[10],shape=[1])) / +end diff --git a/gcc/testsuite/gfortran.dg/proc_ptr_comp_49.f90 b/gcc/testsuite/gfortran.dg/proc_ptr_comp_49.f90 new file mode 100644 index 00000000000..e89791f728c --- /dev/null +++ b/gcc/testsuite/gfortran.dg/proc_ptr_comp_49.f90 @@ -0,0 +1,21 @@ +! { dg-do compile } +! +! PR 80392: [5/6/7 Regression] [OOP] ICE with allocatable polymorphic function result in a procedure pointer component +! +! Contributed by <zed.three@gmail.com> + +module mwe + + implicit none + + type :: MyType + procedure(my_op), nopass, pointer :: op + end type + +contains + + function my_op() result(foo) + class(MyType), allocatable :: foo + end function + +end module diff --git a/gcc/tree-ssa-loop-ivopts.c b/gcc/tree-ssa-loop-ivopts.c index d5f33646343..29cca40bc0a 100644 --- a/gcc/tree-ssa-loop-ivopts.c +++ b/gcc/tree-ssa-loop-ivopts.c @@ -3400,7 +3400,18 @@ get_computation_aff (struct loop *loop, var = fold_convert (uutype, var); } - if (!constant_multiple_of (ustep, cstep, &rat)) + /* Ratio is 1 when computing the value of biv cand by itself. + We can't rely on constant_multiple_of in this case because the + use is created after the original biv is selected. The call + could fail because of inconsistent fold behavior. See PR68021 + for more information. */ + if (cand->pos == IP_ORIGINAL && cand->incremented_at == use->stmt) + { + gcc_assert (is_gimple_assign (use->stmt)); + gcc_assert (gimple_assign_lhs (use->stmt) == cand->var_after); + rat = 1; + } + else if (!constant_multiple_of (ustep, cstep, &rat)) return false; /* In case both UBASE and CBASE are shortened to UUTYPE from some common diff --git a/libgcc/ChangeLog b/libgcc/ChangeLog index 49c7bd8a2fa..4c7b8c2654d 100644 --- a/libgcc/ChangeLog +++ b/libgcc/ChangeLog @@ -1,3 +1,8 @@ +2017-04-07 Alan Modra <amodra@gmail.com> + + PR target/45053 + * config/rs6000/t-crtstuff (CRTSTUFF_T_CFLAGS): Add -O2. + 2017-01-09 Andreas Tobler <andreast@gcc.gnu.org> Backport from mainline diff --git a/libgcc/config/rs6000/t-crtstuff b/libgcc/config/rs6000/t-crtstuff index 7422d383754..0b2601b05bd 100644 --- a/libgcc/config/rs6000/t-crtstuff +++ b/libgcc/config/rs6000/t-crtstuff @@ -1,3 +1,6 @@ # If .sdata is enabled __CTOR_{LIST,END}__ go into .sdata instead of # .ctors. -CRTSTUFF_T_CFLAGS = -msdata=none +# Do not build crtend.o with -Os as that can result in references to +# out-of-line register save/restore functions, which may be unresolved +# as crtend.o is linked after libgcc.a. See PR45053. +CRTSTUFF_T_CFLAGS = -msdata=none -O2 |