diff options
Diffstat (limited to 'gcc/config')
62 files changed, 1898 insertions, 1194 deletions
diff --git a/gcc/config/aarch64/aarch64-option-extensions.def b/gcc/config/aarch64/aarch64-option-extensions.def index 53dcd03590d..010fd3ccf76 100644 --- a/gcc/config/aarch64/aarch64-option-extensions.def +++ b/gcc/config/aarch64/aarch64-option-extensions.def @@ -108,7 +108,7 @@ AARCH64_OPT_EXTENSION("sm4", AARCH64_FL_SM4, AARCH64_FL_SIMD, 0, false, "sm3 sm4 /* Enabling "fp16fml" also enables "fp" and "fp16". Disabling "fp16fml" just disables "fp16fml". */ -AARCH64_OPT_EXTENSION("fp16fml", AARCH64_FL_F16FML, AARCH64_FL_FP | AARCH64_FL_F16, 0, false, "asimdfml") +AARCH64_OPT_EXTENSION("fp16fml", AARCH64_FL_F16FML, AARCH64_FL_FP | AARCH64_FL_F16, 0, false, "asimdfhm") /* Enabling "sve" also enables "fp16", "fp" and "simd". Disabling "sve" just disables "sve". */ diff --git a/gcc/config/aarch64/aarch64.md b/gcc/config/aarch64/aarch64.md index 5a1894063a1..cc5a887d404 100644 --- a/gcc/config/aarch64/aarch64.md +++ b/gcc/config/aarch64/aarch64.md @@ -1059,8 +1059,8 @@ (match_operand:GPI 1 "general_operand" ""))] "" " - if (MEM_P (operands[0]) && CONST_INT_P (operands[1]) - && <MODE>mode == DImode + if (MEM_P (operands[0]) && !MEM_VOLATILE_P (operands[0]) + && CONST_INT_P (operands[1]) && <MODE>mode == DImode && aarch64_split_dimode_const_store (operands[0], operands[1])) DONE; @@ -6304,7 +6304,7 @@ [(match_operand:GPI 0 "register_operand") (match_operand:GPF 1 "register_operand")] "TARGET_FLOAT - && ((GET_MODE_SIZE (<GPF:MODE>mode) <= GET_MODE_SIZE (<GPI:MODE>mode)) + && ((GET_MODE_BITSIZE (<GPF:MODE>mode) <= LONG_TYPE_SIZE) || !flag_trapping_math || flag_fp_int_builtin_inexact)" { rtx cvt = gen_reg_rtx (<GPF:MODE>mode); diff --git a/gcc/config/aarch64/aarch64.opt b/gcc/config/aarch64/aarch64.opt index 7719c3b6352..3c6d1cc90ad 100644 --- a/gcc/config/aarch64/aarch64.opt +++ b/gcc/config/aarch64/aarch64.opt @@ -119,15 +119,15 @@ EnumValue Enum(aarch64_tls_size) String(48) Value(48) march= -Target RejectNegative ToLower Joined Var(aarch64_arch_string) +Target RejectNegative Negative(march=) ToLower Joined Var(aarch64_arch_string) Use features of architecture ARCH. mcpu= -Target RejectNegative ToLower Joined Var(aarch64_cpu_string) +Target RejectNegative Negative(mcpu=) ToLower Joined Var(aarch64_cpu_string) Use features of and optimize for CPU. mtune= -Target RejectNegative ToLower Joined Var(aarch64_tune_string) +Target RejectNegative Negative(mtune=) ToLower Joined Var(aarch64_tune_string) Optimize for CPU. mabi= diff --git a/gcc/config/alpha/alpha.c b/gcc/config/alpha/alpha.c index 984540f0b53..524379d3763 100644 --- a/gcc/config/alpha/alpha.c +++ b/gcc/config/alpha/alpha.c @@ -8839,6 +8839,9 @@ alpha_handle_trap_shadows (void) case CODE_LABEL: goto close_shadow; + case DEBUG_INSN: + break; + default: gcc_unreachable (); } diff --git a/gcc/config/arc/arc.c b/gcc/config/arc/arc.c index ee1f70bbb51..e6e4fb18ce1 100644 --- a/gcc/config/arc/arc.c +++ b/gcc/config/arc/arc.c @@ -5992,6 +5992,22 @@ arc_legitimize_pic_address (rtx addr) switch (GET_CODE (addr)) { + case UNSPEC: + /* Can be one or our GOT or GOTOFFPC unspecs. This situation + happens when an address is not a legitimate constant and we + need the resolve it via force_reg in + prepare_move_operands. */ + switch (XINT (addr, 1)) + { + case ARC_UNSPEC_GOT: + case ARC_UNSPEC_GOTOFFPC: + /* Recover the symbol ref. */ + addr = XVECEXP (addr, 0, 0); + break; + default: + return addr; + } + /* Fall through. */ case SYMBOL_REF: /* TLS symbols are handled in different place. */ if (SYMBOL_REF_TLS_MODEL (addr)) diff --git a/gcc/config/arc/arc.md b/gcc/config/arc/arc.md index 78e304d4c71..722cccd8be3 100644 --- a/gcc/config/arc/arc.md +++ b/gcc/config/arc/arc.md @@ -3742,20 +3742,20 @@ core_3, archs4x, archs4xd, archs4xd_slow" ; cond_exec patterns (define_insn "*movsi_ne" [(cond_exec - (ne (match_operand:CC_Z 2 "cc_use_register" "Rcc, Rcc, Rcc,Rcc,Rcc") (const_int 0)) - (set (match_operand:SI 0 "dest_reg_operand" "=Rcq#q,Rcq#q,Rcq#q, w,w") - (match_operand:SI 1 "nonmemory_operand" "C_0, h, ?Cal, Lc,?Cal")))] + (ne (match_operand:CC_Z 2 "cc_use_register" "Rcc,Rcc,Rcc,Rcc,Rcc") (const_int 0)) + (set (match_operand:SI 0 "dest_reg_operand" "=q, q, r, q, r") + (match_operand:SI 1 "nonmemory_operand" "C_0, h, Lr,Cal,Cal")))] "" "@ - * current_insn_predicate = 0; return \"sub%?.ne %0,%0,%0%&\"; - * current_insn_predicate = 0; return \"mov%?.ne %0,%1\"; - * current_insn_predicate = 0; return \"mov%?.ne %0,%1\"; - mov.ne %0,%1 - mov.ne %0,%1" + * current_insn_predicate = 0; return \"sub%?.ne\\t%0,%0,%0\"; + * current_insn_predicate = 0; return \"mov%?.ne\\t%0,%1\"; + mov.ne\\t%0,%1 + * current_insn_predicate = 0; return \"mov%?.ne\\t%0,%1\"; + mov.ne\\t%0,%1" [(set_attr "type" "cmove") - (set_attr "iscompact" "true,true,true_limm,false,false") - (set_attr "length" "2,2,6,4,8") - (set_attr "cpu_facility" "*,av2,av2,*,*")]) + (set_attr "iscompact" "true,true,false,true_limm,false") + (set_attr "length" "2,2,4,6,8") + (set_attr "cpu_facility" "*,av2,*,av2,*")]) (define_insn "*movsi_cond_exec" [(cond_exec diff --git a/gcc/config/arm/arm.c b/gcc/config/arm/arm.c index 45abcd89963..91bb65130b8 100644 --- a/gcc/config/arm/arm.c +++ b/gcc/config/arm/arm.c @@ -19670,6 +19670,35 @@ arm_compute_save_core_reg_mask (void) return save_reg_mask; } +/* Return a mask for the call-clobbered low registers that are unused + at the end of the prologue. */ +static unsigned long +thumb1_prologue_unused_call_clobbered_lo_regs (void) +{ + unsigned long mask = 0; + + for (int reg = 0; reg <= LAST_LO_REGNUM; reg++) + if (!callee_saved_reg_p (reg) + && !REGNO_REG_SET_P (df_get_live_out (ENTRY_BLOCK_PTR_FOR_FN (cfun)), + reg)) + mask |= 1 << reg; + return mask; +} + +/* Similarly for the start of the epilogue. */ +static unsigned long +thumb1_epilogue_unused_call_clobbered_lo_regs (void) +{ + unsigned long mask = 0; + + for (int reg = 0; reg <= LAST_LO_REGNUM; reg++) + if (!callee_saved_reg_p (reg) + && !REGNO_REG_SET_P (df_get_live_in (EXIT_BLOCK_PTR_FOR_FN (cfun)), + reg)) + mask |= 1 << reg; + return mask; +} + /* Compute a bit mask of which core registers need to be saved on the stack for the current function. */ static unsigned long @@ -19701,10 +19730,19 @@ thumb1_compute_save_core_reg_mask (void) if (mask & 0xff || thumb_force_lr_save ()) mask |= (1 << LR_REGNUM); - /* Make sure we have a low work register if we need one. - We will need one if we are going to push a high register, - but we are not currently intending to push a low register. */ + bool call_clobbered_scratch + = (thumb1_prologue_unused_call_clobbered_lo_regs () + && thumb1_epilogue_unused_call_clobbered_lo_regs ()); + + /* Make sure we have a low work register if we need one. We will + need one if we are going to push a high register, but we are not + currently intending to push a low register. However if both the + prologue and epilogue have a spare call-clobbered low register, + then we won't need to find an additional work register. It does + not need to be the same register in the prologue and + epilogue. */ if ((mask & 0xff) == 0 + && !call_clobbered_scratch && ((mask & 0x0f00) || TARGET_BACKTRACE)) { /* Use thumb_find_work_register to choose which register @@ -24930,12 +24968,7 @@ thumb1_unexpanded_epilogue (void) unsigned long mask = live_regs_mask & 0xff; int next_hi_reg; - /* The available low registers depend on the size of the value we are - returning. */ - if (size <= 12) - mask |= 1 << 3; - if (size <= 8) - mask |= 1 << 2; + mask |= thumb1_epilogue_unused_call_clobbered_lo_regs (); if (mask == 0) /* Oh dear! We have no low registers into which we can pop @@ -24943,7 +24976,7 @@ thumb1_unexpanded_epilogue (void) internal_error ("no low registers available for popping high registers"); - for (next_hi_reg = 8; next_hi_reg < 13; next_hi_reg++) + for (next_hi_reg = 12; next_hi_reg > LAST_LO_REGNUM; next_hi_reg--) if (live_regs_mask & (1 << next_hi_reg)) break; @@ -24951,7 +24984,7 @@ thumb1_unexpanded_epilogue (void) { /* Find lo register(s) into which the high register(s) can be popped. */ - for (regno = 0; regno <= LAST_LO_REGNUM; regno++) + for (regno = LAST_LO_REGNUM; regno >= 0; regno--) { if (mask & (1 << regno)) high_regs_pushed--; @@ -24959,20 +24992,22 @@ thumb1_unexpanded_epilogue (void) break; } - mask &= (2 << regno) - 1; /* A noop if regno == 8 */ + if (high_regs_pushed == 0 && regno >= 0) + mask &= ~((1 << regno) - 1); /* Pop the values into the low register(s). */ thumb_pop (asm_out_file, mask); /* Move the value(s) into the high registers. */ - for (regno = 0; regno <= LAST_LO_REGNUM; regno++) + for (regno = LAST_LO_REGNUM; regno >= 0; regno--) { if (mask & (1 << regno)) { asm_fprintf (asm_out_file, "\tmov\t%r, %r\n", next_hi_reg, regno); - for (next_hi_reg++; next_hi_reg < 13; next_hi_reg++) + for (next_hi_reg--; next_hi_reg > LAST_LO_REGNUM; + next_hi_reg--) if (live_regs_mask & (1 << next_hi_reg)) break; } @@ -25354,10 +25389,20 @@ thumb1_expand_prologue (void) break; /* Here we need to mask out registers used for passing arguments - even if they can be pushed. This is to avoid using them to stash the high - registers. Such kind of stash may clobber the use of arguments. */ + even if they can be pushed. This is to avoid using them to + stash the high registers. Such kind of stash may clobber the + use of arguments. */ pushable_regs = l_mask & (~arg_regs_mask); - if (lr_needs_saving) + pushable_regs |= thumb1_prologue_unused_call_clobbered_lo_regs (); + + /* Normally, LR can be used as a scratch register once it has been + saved; but if the function examines its own return address then + the value is still live and we need to avoid using it. */ + bool return_addr_live + = REGNO_REG_SET_P (df_get_live_out (ENTRY_BLOCK_PTR_FOR_FN (cfun)), + LR_REGNUM); + + if (lr_needs_saving || return_addr_live) pushable_regs &= ~(1 << LR_REGNUM); if (pushable_regs == 0) @@ -25398,6 +25443,11 @@ thumb1_expand_prologue (void) push_mask |= 1 << LR_REGNUM; real_regs_mask |= 1 << LR_REGNUM; lr_needs_saving = false; + /* If the return address is not live at this point, we + can add LR to the list of registers that we can use + for pushes. */ + if (!return_addr_live) + pushable_regs |= 1 << LR_REGNUM; } insn = thumb1_emit_multi_reg_push (push_mask, real_regs_mask); diff --git a/gcc/config/arm/arm.md b/gcc/config/arm/arm.md index 0aecd03891c..ae582172ab9 100644 --- a/gcc/config/arm/arm.md +++ b/gcc/config/arm/arm.md @@ -4483,62 +4483,78 @@ ; ARMv6+ unaligned load/store instructions (used for packed structure accesses). (define_insn "unaligned_loadsi" - [(set (match_operand:SI 0 "s_register_operand" "=l,r") - (unspec:SI [(match_operand:SI 1 "memory_operand" "Uw,m")] + [(set (match_operand:SI 0 "s_register_operand" "=l,l,r") + (unspec:SI [(match_operand:SI 1 "memory_operand" "m,Uw,m")] UNSPEC_UNALIGNED_LOAD))] "unaligned_access" - "ldr%?\t%0, %1\t@ unaligned" - [(set_attr "arch" "t2,any") - (set_attr "length" "2,4") - (set_attr "predicable" "yes") - (set_attr "predicable_short_it" "yes,no") + "@ + ldr\t%0, %1\t@ unaligned + ldr%?\t%0, %1\t@ unaligned + ldr%?\t%0, %1\t@ unaligned" + [(set_attr "arch" "t1,t2,32") + (set_attr "length" "2,2,4") + (set_attr "predicable" "no,yes,yes") + (set_attr "predicable_short_it" "no,yes,no") (set_attr "type" "load_4")]) +;; The 16-bit Thumb1 variant of ldrsh requires two registers in the +;; address (there's no immediate format). That's tricky to support +;; here and we don't really need this pattern for that case, so only +;; enable for 32-bit ISAs. (define_insn "unaligned_loadhis" [(set (match_operand:SI 0 "s_register_operand" "=r") (sign_extend:SI (unspec:HI [(match_operand:HI 1 "memory_operand" "Uh")] UNSPEC_UNALIGNED_LOAD)))] - "unaligned_access" + "unaligned_access && TARGET_32BIT" "ldrsh%?\t%0, %1\t@ unaligned" [(set_attr "predicable" "yes") (set_attr "type" "load_byte")]) (define_insn "unaligned_loadhiu" - [(set (match_operand:SI 0 "s_register_operand" "=l,r") + [(set (match_operand:SI 0 "s_register_operand" "=l,l,r") (zero_extend:SI - (unspec:HI [(match_operand:HI 1 "memory_operand" "Uw,m")] + (unspec:HI [(match_operand:HI 1 "memory_operand" "m,Uw,m")] UNSPEC_UNALIGNED_LOAD)))] "unaligned_access" - "ldrh%?\t%0, %1\t@ unaligned" - [(set_attr "arch" "t2,any") - (set_attr "length" "2,4") - (set_attr "predicable" "yes") - (set_attr "predicable_short_it" "yes,no") + "@ + ldrh\t%0, %1\t@ unaligned + ldrh%?\t%0, %1\t@ unaligned + ldrh%?\t%0, %1\t@ unaligned" + [(set_attr "arch" "t1,t2,32") + (set_attr "length" "2,2,4") + (set_attr "predicable" "no,yes,yes") + (set_attr "predicable_short_it" "no,yes,no") (set_attr "type" "load_byte")]) (define_insn "unaligned_storesi" - [(set (match_operand:SI 0 "memory_operand" "=Uw,m") - (unspec:SI [(match_operand:SI 1 "s_register_operand" "l,r")] + [(set (match_operand:SI 0 "memory_operand" "=m,Uw,m") + (unspec:SI [(match_operand:SI 1 "s_register_operand" "l,l,r")] UNSPEC_UNALIGNED_STORE))] "unaligned_access" - "str%?\t%1, %0\t@ unaligned" - [(set_attr "arch" "t2,any") - (set_attr "length" "2,4") - (set_attr "predicable" "yes") - (set_attr "predicable_short_it" "yes,no") + "@ + str\t%1, %0\t@ unaligned + str%?\t%1, %0\t@ unaligned + str%?\t%1, %0\t@ unaligned" + [(set_attr "arch" "t1,t2,32") + (set_attr "length" "2,2,4") + (set_attr "predicable" "no,yes,yes") + (set_attr "predicable_short_it" "no,yes,no") (set_attr "type" "store_4")]) (define_insn "unaligned_storehi" - [(set (match_operand:HI 0 "memory_operand" "=Uw,m") - (unspec:HI [(match_operand:HI 1 "s_register_operand" "l,r")] + [(set (match_operand:HI 0 "memory_operand" "=m,Uw,m") + (unspec:HI [(match_operand:HI 1 "s_register_operand" "l,l,r")] UNSPEC_UNALIGNED_STORE))] "unaligned_access" - "strh%?\t%1, %0\t@ unaligned" - [(set_attr "arch" "t2,any") - (set_attr "length" "2,4") - (set_attr "predicable" "yes") - (set_attr "predicable_short_it" "yes,no") + "@ + strh\t%1, %0\t@ unaligned + strh%?\t%1, %0\t@ unaligned + strh%?\t%1, %0\t@ unaligned" + [(set_attr "arch" "t1,t2,32") + (set_attr "length" "2,2,4") + (set_attr "predicable" "no,yes,yes") + (set_attr "predicable_short_it" "no,yes,no") (set_attr "type" "store_4")]) diff --git a/gcc/config/arm/arm.opt b/gcc/config/arm/arm.opt index 9067d491b9c..78f3878e037 100644 --- a/gcc/config/arm/arm.opt +++ b/gcc/config/arm/arm.opt @@ -82,7 +82,7 @@ mapcs-stack-check Target Report Mask(APCS_STACK) Undocumented march= -Target RejectNegative ToLower Joined Var(arm_arch_string) +Target RejectNegative Negative(march=) ToLower Joined Var(arm_arch_string) Specify the name of the target architecture. ; Other arm_arch values are loaded from arm-tables.opt @@ -107,7 +107,7 @@ Target Report Mask(CALLER_INTERWORKING) Thumb: Assume function pointers may go to non-Thumb aware code. mcpu= -Target RejectNegative ToLower Joined Var(arm_cpu_string) +Target RejectNegative Negative(mcpu=) ToLower Joined Var(arm_cpu_string) Specify the name of the target CPU. mfloat-abi= @@ -232,7 +232,7 @@ Target Report Mask(TPCS_LEAF_FRAME) Thumb: Generate (leaf) stack frames even if not needed. mtune= -Target RejectNegative ToLower Joined Var(arm_tune_string) +Target RejectNegative Negative(mtune=) ToLower Joined Var(arm_tune_string) Tune code for the given processor. mprint-tune-info diff --git a/gcc/config/arm/arm_acle.h b/gcc/config/arm/arm_acle.h index 2c7acc698ea..6857ab1787d 100644 --- a/gcc/config/arm/arm_acle.h +++ b/gcc/config/arm/arm_acle.h @@ -174,8 +174,12 @@ __arm_mrrc2 (const unsigned int __coproc, const unsigned int __opc1, #endif /* (!__thumb__ || __thumb2__) && __ARM_ARCH >= 4. */ #pragma GCC push_options -#if __ARM_ARCH >= 8 +#ifdef __ARM_FEATURE_CRC32 +#ifdef __ARM_FP +#pragma GCC target ("arch=armv8-a+crc+simd") +#else #pragma GCC target ("arch=armv8-a+crc") +#endif __extension__ static __inline uint32_t __attribute__ ((__always_inline__)) __crc32b (uint32_t __a, uint8_t __b) @@ -235,7 +239,7 @@ __crc32cd (uint32_t __a, uint64_t __b) } #endif -#endif /* __ARM_ARCH >= 8. */ +#endif /* __ARM_FEATURE_CRC32 */ #pragma GCC pop_options #ifdef __cplusplus diff --git a/gcc/config/arm/t-multilib b/gcc/config/arm/t-multilib index 08526302283..dc97c8f09fb 100644 --- a/gcc/config/arm/t-multilib +++ b/gcc/config/arm/t-multilib @@ -24,6 +24,8 @@ # values during the configure step. We enforce this during the # top-level configury. +s-mlib: $(srcdir)/config/arm/t-multilib $(srcdir)/config/arm/t-aprofile $(srcdir)/config/arm/t-rmprofile + MULTILIB_OPTIONS = MULTILIB_DIRNAMES = MULTILIB_EXCEPTIONS = @@ -63,6 +65,8 @@ all_early_arch := armv5tej armv6 armv6j armv6k armv6z armv6kz \ v7_a_arch_variants := $(call all_feat_combs, mp sec) v7_a_nosimd_variants := +fp +vfpv3 +vfpv3-d16-fp16 +vfpv3-fp16 +vfpv4-d16 +vfpv4 v7_a_simd_variants := +simd +neon-fp16 +neon-vfpv4 +v7_r_sp_variants := +fp.sp +fp.sp+idiv +vfpv3xd-fp16 +vfpv3xd-fp16+idiv +v7_r_dp_variants := +fp +fp+idiv +vfpv3-d16-fp16 +vfpv3-d16-fp16+idiv v7ve_nosimd_variants := +vfpv3-d16 +vfpv3 +vfpv3-d16-fp16 +vfpv3-fp16 +fp +vfpv4 v7ve_vfpv3_simd_variants := +neon +neon-fp16 v7ve_vfpv4_simd_variants := +simd @@ -86,8 +90,8 @@ SEP := $(and $(HAS_APROFILE),$(HAS_RMPROFILE),/) MULTILIB_OPTIONS += marm/mthumb MULTILIB_DIRNAMES += arm thumb -MULTILIB_OPTIONS += march=armv5te+fp/march=armv7/march=armv7+fp/$(MULTI_ARCH_OPTS_A)$(SEP)$(MULTI_ARCH_OPTS_RM) -MULTILIB_DIRNAMES += v5te v7 v7+fp $(MULTI_ARCH_DIRS_A) $(MULTI_ARCH_DIRS_RM) +MULTILIB_OPTIONS += march=armv5te+fp/march=armv7/march=armv7+fp/march=armv7-r+fp.sp/$(MULTI_ARCH_OPTS_A)$(SEP)$(MULTI_ARCH_OPTS_RM) +MULTILIB_DIRNAMES += v5te v7 v7+fp v7-r+fp.sp $(MULTI_ARCH_DIRS_A) $(MULTI_ARCH_DIRS_RM) MULTILIB_OPTIONS += mfloat-abi=soft/mfloat-abi=softfp/mfloat-abi=hard MULTILIB_DIRNAMES += nofp softfp hard @@ -100,22 +104,31 @@ MULTILIB_REQUIRED += mthumb/march=armv7/mfloat-abi=soft MULTILIB_REQUIRED += mthumb/march=armv7+fp/mfloat-abi=softfp MULTILIB_REQUIRED += mthumb/march=armv7+fp/mfloat-abi=hard -# Map v7-r down onto common v7 code. +MULTILIB_REQUIRED += mthumb/march=armv7-r+fp.sp/mfloat-abi=softfp +MULTILIB_REQUIRED += mthumb/march=armv7-r+fp.sp/mfloat-abi=hard + +# Map v7-r with double precision down onto common v7 code. MULTILIB_MATCHES += march?armv7=march?armv7-r MULTILIB_MATCHES += march?armv7=march?armv7-r+idiv -MULTILIB_MATCHES += march?armv7+fp=march?armv7-r+fp -MULTILIB_MATCHES += march?armv7+fp=march?armv7-r+fp+idiv +MULTILIB_MATCHES += $(foreach ARCH, $(v7_r_dp_variants), \ + march?armv7+fp=march?armv7-r$(ARCH)) + +# Map v7-r single precision variants to v7-r with single precision. +MULTILIB_MATCHES += $(foreach ARCH, \ + $(filter-out +fp.sp, $(v7_r_sp_variants)), \ + march?armv7-r+fp.sp=march?armv7-r$(ARCH)) MULTILIB_MATCHES += $(foreach ARCH, $(all_early_arch), \ march?armv5te+fp=march?$(ARCH)+fp) -# Map v8-r down onto common v7 code. +# Map v8-r down onto common v7 code or v7-r sp. MULTILIB_MATCHES += march?armv7=march?armv8-r MULTILIB_MATCHES += $(foreach ARCH, $(v8_r_nosimd_variants), \ march?armv7=march?armv8-r$(ARCH)) MULTILIB_MATCHES += $(foreach ARCH,+simd +crypto, \ march?armv7+fp=march?armv8-r$(ARCH) \ march?armv7+fp=march?armv8-r+crc$(ARCH)) - +MULTILIB_MATCHES += march?armv7-r+fp.sp=march?armv8-r+fp.sp +MULTILIB_MATCHES += march?armv7-r+fp.sp=march?armv8-r+crc+fp.sp ifeq (,$(HAS_APROFILE)) # Map all v7-a @@ -177,7 +190,7 @@ MULTILIB_MATCHES += $(foreach ARCH, $(v8_5_a_simd_variants), \ MULTILIB_REUSE += mthumb/march.armv7/mfloat-abi.soft=marm/march.armv7/mfloat-abi.soft MULTILIB_REUSE += $(foreach ABI, hard softfp, \ - $(foreach ARCH, armv7+fp, \ + $(foreach ARCH, armv7+fp armv7-r+fp\.sp, \ mthumb/march.$(ARCH)/mfloat-abi.$(ABI)=marm/march.$(ARCH)/mfloat-abi.$(ABI))) # Softfp but no FP, use the soft-float libraries. diff --git a/gcc/config/avr/avr.c b/gcc/config/avr/avr.c index a9f72b314c2..cb4b14ae379 100644 --- a/gcc/config/avr/avr.c +++ b/gcc/config/avr/avr.c @@ -3797,13 +3797,14 @@ avr_out_lpm (rtx_insn *insn, rtx *op, int *plen) gcc_unreachable(); case 1: - return avr_asm_len ("%4lpm %0,%a2", xop, plen, 1); + avr_asm_len ("%4lpm %0,%a2", xop, plen, 1); + break; case 2: if (REGNO (dest) == REG_Z) - return avr_asm_len ("%4lpm %5,%a2+" CR_TAB - "%4lpm %B0,%a2" CR_TAB - "mov %A0,%5", xop, plen, 3); + avr_asm_len ("%4lpm %5,%a2+" CR_TAB + "%4lpm %B0,%a2" CR_TAB + "mov %A0,%5", xop, plen, 3); else { avr_asm_len ("%4lpm %A0,%a2+" CR_TAB @@ -3832,9 +3833,9 @@ avr_out_lpm (rtx_insn *insn, rtx *op, int *plen) "%4lpm %B0,%a2+", xop, plen, 2); if (REGNO (dest) == REG_Z - 2) - return avr_asm_len ("%4lpm %5,%a2+" CR_TAB - "%4lpm %C0,%a2" CR_TAB - "mov %D0,%5", xop, plen, 3); + avr_asm_len ("%4lpm %5,%a2+" CR_TAB + "%4lpm %C0,%a2" CR_TAB + "mov %D0,%5", xop, plen, 3); else { avr_asm_len ("%4lpm %C0,%a2+" CR_TAB diff --git a/gcc/config/darwin-driver.c b/gcc/config/darwin-driver.c index 3d85f29cb3d..b3577c416bc 100644 --- a/gcc/config/darwin-driver.c +++ b/gcc/config/darwin-driver.c @@ -210,6 +210,28 @@ darwin_default_min_version (void) return new_flag; } +/* See if we can find the sysroot from the SDKROOT environment variable. */ + +static const char * +maybe_get_sysroot_from_sdkroot () +{ + const char *maybe_sysroot = getenv ("SDKROOT"); + + /* We'll use the same rules as the clang driver, for compatibility. + 1) The path must be absolute + 2) Ignore "/", that is the default anyway and we do not want the + sysroot semantics to be applied to it. + 3) It must exist (actually, we'll check it's readable too). */ + + if (maybe_sysroot == NULL + || *maybe_sysroot != '/' + || strlen (maybe_sysroot) == 1 + || access (maybe_sysroot, R_OK) == -1) + return NULL; + + return xstrndup (maybe_sysroot, strlen (maybe_sysroot)); +} + /* Translate -filelist and -framework options in *DECODED_OPTIONS (size *DECODED_OPTIONS_COUNT) to use -Xlinker so that they are considered to be linker inputs in the case that no other inputs are @@ -234,6 +256,7 @@ darwin_driver_init (unsigned int *decoded_options_count, bool appendM64 = false; const char *vers_string = NULL; bool seen_version_min = false; + bool seen_sysroot_p = false; for (i = 1; i < *decoded_options_count; i++) { @@ -314,6 +337,11 @@ darwin_driver_init (unsigned int *decoded_options_count, --*decoded_options_count; break; + case OPT__sysroot_: + case OPT_isysroot: + seen_sysroot_p = true; + break; + default: break; } @@ -375,6 +403,22 @@ darwin_driver_init (unsigned int *decoded_options_count, &(*decoded_options)[*decoded_options_count - 1]); } + if (! seen_sysroot_p) + { + /* We will pick up an SDKROOT if we didn't specify a sysroot and treat + it as overriding any configure-time --with-sysroot. */ + const char *sdkroot = maybe_get_sysroot_from_sdkroot (); + if (sdkroot) + { + ++*decoded_options_count; + *decoded_options = XRESIZEVEC (struct cl_decoded_option, + *decoded_options, + *decoded_options_count); + generate_option (OPT__sysroot_, sdkroot, 1, CL_DRIVER, + &(*decoded_options)[*decoded_options_count - 1]); + } + } + /* We will need to know the OS X version we're trying to build for here so that we can figure out the mechanism and source for the sysroot to be used. */ diff --git a/gcc/config/darwin-protos.h b/gcc/config/darwin-protos.h index e5614b627d7..afeca81f807 100644 --- a/gcc/config/darwin-protos.h +++ b/gcc/config/darwin-protos.h @@ -53,8 +53,6 @@ extern void darwin_set_default_type_attributes (tree); #endif /* TREE_CODE */ -extern void machopic_finish (FILE *); - extern int machopic_reloc_rw_mask (void); extern section *machopic_select_section (tree, int, unsigned HOST_WIDE_INT); diff --git a/gcc/config/darwin.c b/gcc/config/darwin.c index b9862353843..a7610829f75 100644 --- a/gcc/config/darwin.c +++ b/gcc/config/darwin.c @@ -75,15 +75,9 @@ along with GCC; see the file COPYING3. If not see setting the second word in the .non_lazy_symbol_pointer data structure to symbol. See indirect_data for the code that handles the extra indirection, and machopic_output_indirection and its use - of MACHO_SYMBOL_STATIC for the code that handles @code{static} + of MACHO_SYMBOL_FLAG_STATIC for the code that handles @code{static} symbol indirection. */ -/* For darwin >= 9 (OSX 10.5) the linker is capable of making the necessary - branch islands and we no longer need to emit darwin stubs. - However, if we are generating code for earlier systems (or for use in the - kernel) the stubs might still be required, and this will be set true. */ -int darwin_emit_branch_islands = false; - typedef struct GTY(()) cdtor_record { rtx symbol; int priority; /* [con/de]structor priority */ @@ -105,6 +99,10 @@ int generating_for_darwin_version ; for weak or single-definition items. */ static bool ld_uses_coal_sects = false; +/* Very old (ld_classic) linkers need a symbol to mark the start of + each FDE. */ +static bool ld_needs_eh_markers = false; + /* Section names. */ section * darwin_sections[NUM_DARWIN_SECTIONS]; @@ -250,7 +248,7 @@ name_needs_quotes (const char *name) int machopic_symbol_defined_p (rtx sym_ref) { - if (SYMBOL_REF_FLAGS (sym_ref) & MACHO_SYMBOL_FLAG_DEFINED) + if (MACHO_SYMBOL_DEFINED_P (sym_ref)) return true; /* If a symbol references local and is not an extern to this @@ -259,7 +257,7 @@ machopic_symbol_defined_p (rtx sym_ref) { /* If the symbol references a variable and the variable is a common symbol, then this symbol is not defined. */ - if (SYMBOL_REF_FLAGS (sym_ref) & MACHO_SYMBOL_FLAG_VARIABLE) + if (MACHO_SYMBOL_VARIABLE_P (sym_ref)) { tree decl = SYMBOL_REF_DECL (sym_ref); if (!decl) @@ -455,6 +453,13 @@ typedef struct GTY ((for_user)) machopic_indirection bool stub_p; /* True iff this stub or pointer has been referenced. */ bool used; + /* True iff a non-lazy symbol pointer should be emitted into the .data + section, rather than the non-lazy symbol pointers section. The cases + for which this occurred seem to have been unintentional, and later + toolchains emit all of the indirections to the 'usual' section. We + are keeping this in case it is necessary to preserve compatibility with + older toolchains. */ + bool nlsp_in_data_section; } machopic_indirection; struct indirection_hasher : ggc_ptr_hash<machopic_indirection> @@ -489,7 +494,7 @@ indirection_hasher::equal (machopic_indirection *s, const char *k) /* Return the name of the non-lazy pointer (if STUB_P is false) or stub (if STUB_B is true) corresponding to the given name. - If we have a situation like: + PR71767 - If we have a situation like: global_weak_symbol: .... @@ -498,36 +503,22 @@ Lnon_weak_local: ld64 will be unable to split this into two atoms (because the "L" makes the second symbol 'invisible'). This means that legitimate direct accesses - to the second symbol will appear to be non-allowed direct accesses to an - atom of type weak, global which are not allowed. - - To avoid this, we make the indirections have a leading 'l' (lower-case L) - which has a special meaning: linker can see this and use it to determine - atoms, but it is not placed into the final symbol table. + to the second symbol will appear to be direct accesses to an atom of type + weak, global which are not allowed. - The implementation here is somewhat heavy-handed in that it will also mark - indirections to the __IMPORT,__pointers section the same way which is - really unnecessary, since ld64 _can_ split those into atoms as they are - fixed size. FIXME: determine if this is a penalty worth extra code to - fix. + To avoid this, we make any data-section indirections have a leading 'l' + (lower-case L) which has a special meaning: linker can see this and use + it to determine atoms, but it is not placed into the final symbol table. + Symbols in the non-lazy symbol pointers section (or stubs) do not have this + problem because ld64 already knows the size of each entry. */ const char * machopic_indirection_name (rtx sym_ref, bool stub_p) { - char *buffer; const char *name = XSTR (sym_ref, 0); - size_t namelen = strlen (name); - machopic_indirection *p; - bool needs_quotes; - const char *suffix; - char L_or_l = 'L'; - const char *prefix = user_label_prefix; - const char *quote = ""; - tree id; - - id = maybe_get_identifier (name); + tree id = maybe_get_identifier (name); if (id) { tree id_orig = id; @@ -535,43 +526,47 @@ machopic_indirection_name (rtx sym_ref, bool stub_p) while (IDENTIFIER_TRANSPARENT_ALIAS (id)) id = TREE_CHAIN (id); if (id != id_orig) - { - name = IDENTIFIER_POINTER (id); - namelen = strlen (name); - } + name = IDENTIFIER_POINTER (id); } + const char *prefix = user_label_prefix; + /* If we are emitting the label 'verbatim' then omit the U_L_P and count + the name without the leading '*'. */ if (name[0] == '*') { prefix = ""; ++name; - --namelen; - } - - needs_quotes = name_needs_quotes (name); - if (needs_quotes) - { - quote = "\""; } - if (stub_p) - suffix = STUB_SUFFIX; - else - { - suffix = NON_LAZY_POINTER_SUFFIX; - /* Let the linker see this. */ - L_or_l = 'l'; - } - - buffer = XALLOCAVEC (char, 2 /* strlen ("&L") or ("&l") */ - + strlen (prefix) - + namelen - + strlen (suffix) - + 2 * strlen (quote) - + 1 /* '\0' */); + /* Here we are undoing a number of causes that placed some indirections + (apparently erroneously) into the .data section. Specifically, some + symbols that are ABI mandated indirections and some hidden symbols + were being placed there - which cause difficulties with later + versions of ld64. Iff (after these checks) some symbol still gets an + indirection in the data section, we want to adjust the indirection + name to be linker visible to deal with PR71767 (notes above). */ + bool nlsp_in_data_section = + ! MACHO_SYMBOL_MUST_INDIRECT_P (sym_ref) + && ! MACHO_SYMBOL_HIDDEN_VIS_P (sym_ref) + && (machopic_symbol_defined_p (sym_ref) || SYMBOL_REF_LOCAL_P (sym_ref)) + && ! indirect_data (sym_ref); + + const char *suffix = stub_p ? STUB_SUFFIX : NON_LAZY_POINTER_SUFFIX; + /* If the indirection is in the data section, let the linker see it. */ + char L_or_l = (!stub_p && nlsp_in_data_section) ? 'l' : 'L'; + /* We have mangled symbols with spaces and punctuation which typically + need surrounding in quotes for the assembler to consume them. */ + const char *quote = name_needs_quotes (name) ? "\"" : ""; + char *buffer = XALLOCAVEC (char, 2 /* strlen ("&L") or ("&l") */ + + strlen (prefix) + + strlen (name) + + strlen (suffix) + + 2 * strlen (quote) + + 1 /* '\0' */); /* Construct the name of the non-lazy pointer or stub. */ - sprintf (buffer, "&%s%c%s%s%s%s", quote, L_or_l, prefix, name, suffix, quote); + sprintf (buffer, "&%s%c%s%s%s%s", quote, L_or_l, prefix, name, + suffix, quote); if (!machopic_indirections) machopic_indirections = hash_table<indirection_hasher>::create_ggc (37); @@ -580,10 +575,9 @@ machopic_indirection_name (rtx sym_ref, bool stub_p) = machopic_indirections->find_slot_with_hash (buffer, htab_hash_string (buffer), INSERT); + machopic_indirection *p; if (*slot) - { - p = *slot; - } + p = *slot; else { p = ggc_alloc<machopic_indirection> (); @@ -591,6 +585,7 @@ machopic_indirection_name (rtx sym_ref, bool stub_p) p->ptr_name = xstrdup (buffer); p->stub_p = stub_p; p->used = false; + p->nlsp_in_data_section = nlsp_in_data_section; *slot = p; } @@ -666,7 +661,7 @@ machopic_indirect_data_reference (rtx orig, rtx reg) /* some other cpu -- writeme! */ gcc_unreachable (); } - else if (defined) + else if (defined && ! MACHO_SYMBOL_MUST_INDIRECT_P (orig)) { rtx offset = NULL; if (DARWIN_PPC || HAVE_lo_sum) @@ -708,6 +703,7 @@ machopic_indirect_data_reference (rtx orig, rtx reg) machopic_indirection_name (orig, /*stub_p=*/false))); SYMBOL_REF_DATA (ptr_ref) = SYMBOL_REF_DATA (orig); + SYMBOL_REF_FLAGS (ptr_ref) |= MACHO_SYMBOL_FLAG_INDIRECTION; ptr_ref = gen_const_mem (Pmode, ptr_ref); machopic_define_symbol (ptr_ref); @@ -790,7 +786,7 @@ machopic_indirect_data_reference (rtx orig, rtx reg) rtx machopic_indirect_call_target (rtx target) { - if (! darwin_emit_branch_islands) + if (! darwin_symbol_stubs) return target; if (GET_CODE (target) != MEM) @@ -798,8 +794,7 @@ machopic_indirect_call_target (rtx target) if (MACHOPIC_INDIRECT && GET_CODE (XEXP (target, 0)) == SYMBOL_REF - && !(SYMBOL_REF_FLAGS (XEXP (target, 0)) - & MACHO_SYMBOL_FLAG_DEFINED)) + && ! MACHO_SYMBOL_DEFINED_P (XEXP (target, 0))) { rtx sym_ref = XEXP (target, 0); const char *stub_name = machopic_indirection_name (sym_ref, @@ -808,6 +803,7 @@ machopic_indirect_call_target (rtx target) XEXP (target, 0) = gen_rtx_SYMBOL_REF (mode, stub_name); SYMBOL_REF_DATA (XEXP (target, 0)) = SYMBOL_REF_DATA (sym_ref); + SYMBOL_REF_FLAGS (XEXP (target, 0)) |= MACHO_SYMBOL_FLAG_INDIRECTION; MEM_READONLY_P (target) = 1; MEM_NOTRAP_P (target) = 1; } @@ -844,7 +840,7 @@ machopic_legitimize_pic_address (rtx orig, machine_mode mode, rtx reg) { if (reg == 0) { - gcc_assert (!reload_in_progress); + gcc_assert (!lra_in_progress); reg = gen_reg_rtx (Pmode); } @@ -928,7 +924,7 @@ machopic_legitimize_pic_address (rtx orig, machine_mode mode, rtx reg) emit_use (gen_rtx_REG (Pmode, PIC_OFFSET_TABLE_REGNUM)); #endif - if (reload_in_progress) + if (lra_in_progress) df_set_regs_ever_live (REGNO (pic), true); pic_ref = gen_rtx_PLUS (Pmode, pic, machopic_gen_offset (XEXP (orig, 0))); @@ -952,7 +948,7 @@ machopic_legitimize_pic_address (rtx orig, machine_mode mode, rtx reg) if (reg == 0) { - gcc_assert (!reload_in_progress); + gcc_assert (!lra_in_progress); reg = gen_reg_rtx (Pmode); } @@ -998,7 +994,7 @@ machopic_legitimize_pic_address (rtx orig, machine_mode mode, rtx reg) #if 0 emit_use (pic_offset_table_rtx); #endif - if (reload_in_progress) + if (lra_in_progress) df_set_regs_ever_live (REGNO (pic), true); pic_ref = gen_rtx_PLUS (Pmode, pic, @@ -1069,129 +1065,160 @@ machopic_legitimize_pic_address (rtx orig, machine_mode mode, rtx reg) return pic_ref; } -/* Output the stub or non-lazy pointer in *SLOT, if it has been used. - DATA is the FILE* for assembly output. Called from - htab_traverse. */ +/* Callbacks to output the stub or non-lazy pointers. + Each works on the item in *SLOT,if it has been used. + DATA is the FILE* for assembly output. + Called from htab_traverses, invoked from machopic_finish(). */ int -machopic_output_indirection (machopic_indirection **slot, FILE *asm_out_file) +machopic_output_data_section_indirection (machopic_indirection **slot, + FILE *asm_out_file) { machopic_indirection *p = *slot; - rtx symbol; - const char *sym_name; - const char *ptr_name; - if (!p->used) + if (!p->used || !p->nlsp_in_data_section) return 1; - symbol = p->symbol; - sym_name = XSTR (symbol, 0); - ptr_name = p->ptr_name; + rtx symbol = p->symbol; + /* The original symbol name. */ + const char *sym_name = XSTR (symbol, 0); + /* The name of the indirection symbol. */ + const char *ptr_name = p->ptr_name; - if (p->stub_p) - { - char *sym; - char *stub; - tree id; + switch_to_section (data_section); + assemble_align (GET_MODE_ALIGNMENT (Pmode)); + assemble_label (asm_out_file, ptr_name); + assemble_integer (gen_rtx_SYMBOL_REF (Pmode, sym_name), + GET_MODE_SIZE (Pmode), + GET_MODE_ALIGNMENT (Pmode), 1); - id = maybe_get_identifier (sym_name); - if (id) - { - tree id_orig = id; + return 1; +} - while (IDENTIFIER_TRANSPARENT_ALIAS (id)) - id = TREE_CHAIN (id); - if (id != id_orig) - sym_name = IDENTIFIER_POINTER (id); - } +int +machopic_output_stub_indirection (machopic_indirection **slot, + FILE *asm_out_file) +{ + machopic_indirection *p = *slot; - sym = XALLOCAVEC (char, strlen (sym_name) + 2); - if (sym_name[0] == '*' || sym_name[0] == '&') - strcpy (sym, sym_name + 1); - else if (sym_name[0] == '-' || sym_name[0] == '+') - strcpy (sym, sym_name); - else - sprintf (sym, "%s%s", user_label_prefix, sym_name); + if (!p->used || !p->stub_p) + return 1; - stub = XALLOCAVEC (char, strlen (ptr_name) + 2); - if (ptr_name[0] == '*' || ptr_name[0] == '&') - strcpy (stub, ptr_name + 1); - else - sprintf (stub, "%s%s", user_label_prefix, ptr_name); + rtx symbol = p->symbol; + /* The original symbol name. */ + const char *sym_name = XSTR (symbol, 0); + /* The name of the stub symbol. */ + const char *ptr_name = p->ptr_name; - machopic_output_stub (asm_out_file, sym, stub); - } - else if (! indirect_data (symbol) - && (machopic_symbol_defined_p (symbol) - || SYMBOL_REF_LOCAL_P (symbol))) + tree id = maybe_get_identifier (sym_name); + if (id) { - switch_to_section (data_section); - assemble_align (GET_MODE_ALIGNMENT (Pmode)); - assemble_label (asm_out_file, ptr_name); - assemble_integer (gen_rtx_SYMBOL_REF (Pmode, sym_name), - GET_MODE_SIZE (Pmode), - GET_MODE_ALIGNMENT (Pmode), 1); + tree id_orig = id; + + while (IDENTIFIER_TRANSPARENT_ALIAS (id)) + id = TREE_CHAIN (id); + if (id != id_orig) + sym_name = IDENTIFIER_POINTER (id); } + + char *sym = XALLOCAVEC (char, strlen (sym_name) + 2); + if (sym_name[0] == '*' || sym_name[0] == '&') + strcpy (sym, sym_name + 1); + else if (sym_name[0] == '-' || sym_name[0] == '+') + strcpy (sym, sym_name); else - { - rtx init = const0_rtx; + sprintf (sym, "%s%s", user_label_prefix, sym_name); + + char *stub = XALLOCAVEC (char, strlen (ptr_name) + 2); + if (ptr_name[0] == '*' || ptr_name[0] == '&') + strcpy (stub, ptr_name + 1); + else + sprintf (stub, "%s%s", user_label_prefix, ptr_name); - switch_to_section (darwin_sections[machopic_nl_symbol_ptr_section]); + machopic_output_stub (asm_out_file, sym, stub); - /* Mach-O symbols are passed around in code through indirect - references and the original symbol_ref hasn't passed through - the generic handling and reference-catching in - output_operand, so we need to manually mark weak references - as such. */ - if (SYMBOL_REF_WEAK (symbol)) + return 1; +} + +int +machopic_output_indirection (machopic_indirection **slot, FILE *asm_out_file) +{ + machopic_indirection *p = *slot; + + if (!p->used || p->stub_p || p->nlsp_in_data_section) + return 1; + + rtx symbol = p->symbol; + /* The original symbol name. */ + const char *sym_name = XSTR (symbol, 0); + /* The nonlazy-stub symbol name. */ + const char *ptr_name = p->ptr_name; + + switch_to_section (darwin_sections[machopic_nl_symbol_ptr_section]); + + /* Mach-O symbols are passed around in code through indirect references and + the original symbol_ref hasn't passed through the generic handling and + reference-catching in output_operand, so we need to manually mark weak + references as such. */ + + if (SYMBOL_REF_WEAK (symbol)) + { + tree decl = SYMBOL_REF_DECL (symbol); + gcc_checking_assert (DECL_P (decl)); + + if (decl != NULL_TREE + && DECL_EXTERNAL (decl) && TREE_PUBLIC (decl) + /* Handle only actual external-only definitions, not + e.g. extern inline code or variables for which + storage has been allocated. */ + && !TREE_STATIC (decl)) { - tree decl = SYMBOL_REF_DECL (symbol); - gcc_assert (DECL_P (decl)); - - if (decl != NULL_TREE - && DECL_EXTERNAL (decl) && TREE_PUBLIC (decl) - /* Handle only actual external-only definitions, not - e.g. extern inline code or variables for which - storage has been allocated. */ - && !TREE_STATIC (decl)) - { - fputs ("\t.weak_reference ", asm_out_file); - assemble_name (asm_out_file, sym_name); - fputc ('\n', asm_out_file); - } + fputs ("\t.weak_reference ", asm_out_file); + assemble_name (asm_out_file, sym_name); + fputc ('\n', asm_out_file); } + } - assemble_name (asm_out_file, ptr_name); - fprintf (asm_out_file, ":\n"); + assemble_name (asm_out_file, ptr_name); + fprintf (asm_out_file, ":\n"); - fprintf (asm_out_file, "\t.indirect_symbol "); - assemble_name (asm_out_file, sym_name); - fprintf (asm_out_file, "\n"); + fprintf (asm_out_file, "\t.indirect_symbol "); + assemble_name (asm_out_file, sym_name); + fprintf (asm_out_file, "\n"); - /* Variables that are marked with MACHO_SYMBOL_STATIC need to - have their symbol name instead of 0 in the second entry of - the non-lazy symbol pointer data structure when they are - defined. This allows the runtime to rebind newer instances - of the translation unit with the original instance of the - symbol. */ + /* Variables that are marked with MACHO_SYMBOL_FLAG_STATIC need to + have their symbol name instead of 0 in the second entry of + the non-lazy symbol pointer data structure when they are + defined. This allows the runtime to rebind newer instances + of the translation unit with the original instance of the + symbol. */ - if ((SYMBOL_REF_FLAGS (symbol) & MACHO_SYMBOL_STATIC) - && machopic_symbol_defined_p (symbol)) - init = gen_rtx_SYMBOL_REF (Pmode, sym_name); + rtx init = const0_rtx; + if (MACHO_SYMBOL_STATIC_P (symbol) && machopic_symbol_defined_p (symbol)) + init = gen_rtx_SYMBOL_REF (Pmode, sym_name); - assemble_integer (init, GET_MODE_SIZE (Pmode), - GET_MODE_ALIGNMENT (Pmode), 1); - } + assemble_integer (init, GET_MODE_SIZE (Pmode), + GET_MODE_ALIGNMENT (Pmode), 1); return 1; } -void +static void machopic_finish (FILE *asm_out_file) { - if (machopic_indirections) - machopic_indirections - ->traverse_noresize<FILE *, machopic_output_indirection> (asm_out_file); + if (!machopic_indirections) + return; + + /* First output an symbol indirections that have been placed into .data + (we don't expect these now). */ + machopic_indirections->traverse_noresize + <FILE *, machopic_output_data_section_indirection> (asm_out_file); + + machopic_indirections->traverse_noresize + <FILE *, machopic_output_stub_indirection> (asm_out_file); + + machopic_indirections->traverse_noresize + <FILE *, machopic_output_indirection> (asm_out_file); } int @@ -1206,25 +1233,51 @@ machopic_operand_p (rtx op) && XINT (XEXP (op, 0), 1) == UNSPEC_MACHOPIC_OFFSET); } -/* This function records whether a given name corresponds to a defined - or undefined function or variable, for machopic_classify_ident to - use later. */ +/* This function: + computes and caches a series of flags that characterise the symbol's + properties that affect Mach-O code gen (including accidental cases + from older toolchains). + + TODO: + Here we also need to do enough analysis to determine if a symbol's + name needs to be made linker-visible. This is more tricky - since + it depends on whether we've previously seen a global weak definition + in the same section. + */ void -darwin_encode_section_info (tree decl, rtx rtl, int first ATTRIBUTE_UNUSED) +darwin_encode_section_info (tree decl, rtx rtl, int first) { - rtx sym_ref; + /* Careful not to prod global register variables. */ + if (!MEM_P (rtl)) + return; - /* Do the standard encoding things first. */ + /* Do the standard encoding things first; this sets: + SYMBOL_FLAG_FUNCTION, + SYMBOL_FLAG_LOCAL, (binds_local_p) + TLS_MODEL, SYMBOL_FLAG_SMALL + SYMBOL_FLAG_EXTERNAL. */ default_encode_section_info (decl, rtl, first); - if (TREE_CODE (decl) != FUNCTION_DECL && TREE_CODE (decl) != VAR_DECL) + if (! VAR_OR_FUNCTION_DECL_P (decl)) return; - sym_ref = XEXP (rtl, 0); - if (TREE_CODE (decl) == VAR_DECL) + rtx sym_ref = XEXP (rtl, 0); + if (VAR_P (decl)) SYMBOL_REF_FLAGS (sym_ref) |= MACHO_SYMBOL_FLAG_VARIABLE; + /* Only really common if there's no initialiser. */ + bool really_common_p = (DECL_COMMON (decl) + && (DECL_INITIAL (decl) == NULL + || (!in_lto_p + && DECL_INITIAL (decl) == error_mark_node))); + + /* For Darwin, if we have specified visibility and it's not the default + that's counted 'hidden'. */ + if (DECL_VISIBILITY_SPECIFIED (decl) + && DECL_VISIBILITY (decl) != VISIBILITY_DEFAULT) + SYMBOL_REF_FLAGS (sym_ref) |= MACHO_SYMBOL_FLAG_HIDDEN_VIS; + if (!DECL_EXTERNAL (decl) && (!TREE_PUBLIC (decl) || !DECL_WEAK (decl)) && ! lookup_attribute ("weakref", DECL_ATTRIBUTES (decl)) @@ -1235,7 +1288,13 @@ darwin_encode_section_info (tree decl, rtx rtl, int first ATTRIBUTE_UNUSED) SYMBOL_REF_FLAGS (sym_ref) |= MACHO_SYMBOL_FLAG_DEFINED; if (! TREE_PUBLIC (decl)) - SYMBOL_REF_FLAGS (sym_ref) |= MACHO_SYMBOL_STATIC; + SYMBOL_REF_FLAGS (sym_ref) |= MACHO_SYMBOL_FLAG_STATIC; + + /* Short cut check for Darwin 'must indirect' rules. */ + if (really_common_p + || (DECL_WEAK (decl) && ! MACHO_SYMBOL_HIDDEN_VIS_P (sym_ref)) + || lookup_attribute ("weakref", DECL_ATTRIBUTES (decl))) + SYMBOL_REF_FLAGS (sym_ref) |= MACHO_SYMBOL_FLAG_MUST_INDIRECT; } void @@ -1252,12 +1311,13 @@ darwin_mark_decl_preserved (const char *name) } static section * -darwin_rodata_section (int use_coal, bool zsize) +darwin_rodata_section (int use_coal, bool zsize, int reloc) { return (use_coal ? darwin_sections[const_coal_section] : (zsize ? darwin_sections[zobj_const_section] - : darwin_sections[const_section])); + : reloc ? darwin_sections[const_data_section] + : darwin_sections[const_section])); } static section * @@ -1550,7 +1610,7 @@ machopic_select_section (tree decl, case SECCAT_RODATA: case SECCAT_SRODATA: - base_section = darwin_rodata_section (use_coal, zsize); + base_section = darwin_rodata_section (use_coal, zsize, reloc); break; case SECCAT_RODATA_MERGE_STR: @@ -2086,11 +2146,11 @@ darwin_emit_unwind_label (FILE *file, tree decl, int for_eh, int empty) static int invok_count = 0; static tree last_fun_decl = NULL_TREE; - /* We use the linker to emit the .eh labels for Darwin 9 and above. */ - if (! for_eh || generating_for_darwin_version >= 9) + /* Modern linkers can produce distinct FDEs without compiler support. */ + if (! for_eh || ! ld_needs_eh_markers) return; - /* FIXME: This only works when the eh for all sections of a function is + /* FIXME: This only works when the eh for all sections of a function are emitted at the same time. If that changes, we would need to use a lookup table of some form to determine what to do. Also, we should emit the unadorned label for the partition containing the public label for a @@ -3148,17 +3208,19 @@ darwin_override_options (void) : (generating_for_darwin_version >= 9) ? 1 : 0); - /* Objective-C family ABI 2 is only valid for next/m64 at present. */ if (global_options_set.x_flag_objc_abi && flag_next_runtime) { - if (TARGET_64BIT && global_options.x_flag_objc_abi < 2) - error_at (UNKNOWN_LOCATION, "%<-fobjc-abi-version%> >= 2 must be" - " used for %<-m64%> targets with" - " %<-fnext-runtime%>"); - if (!TARGET_64BIT && global_options.x_flag_objc_abi >= 2) - error_at (UNKNOWN_LOCATION, "%<-fobjc-abi-version%> >= 2 is not" - " supported on %<-m32%> targets with" - " %<-fnext-runtime%>"); + if (TARGET_64BIT && global_options.x_flag_objc_abi != 2) + /* The Objective-C family ABI 2 is the only valid version NeXT/m64. */ + error_at (UNKNOWN_LOCATION, + "%<-fobjc-abi-version%> 2 must be used for 64 bit targets" + " with %<-fnext-runtime%>"); + else if (!TARGET_64BIT && global_options.x_flag_objc_abi >= 2) + /* ABI versions 0 and 1 are the only valid versions NeXT/m32. */ + error_at (UNKNOWN_LOCATION, + "%<-fobjc-abi-version%> %d is not supported for 32 bit" + " targets with %<-fnext-runtime%>", + global_options.x_flag_objc_abi); } /* Don't emit DWARF3/4 unless specifically selected. This is a @@ -3261,11 +3323,44 @@ darwin_override_options (void) flag_pic = 2; } - /* It is assumed that branch island stubs are needed for earlier systems. */ - if (generating_for_darwin_version < 9) - darwin_emit_branch_islands = true; - else - emit_aligned_common = true; /* Later systems can support aligned common. */ + /* Linkers >= ld64-62.1 (at least) are capable of making the necessary PIC + indirections and we no longer need to emit pic symbol stubs. + However, if we are generating code for earlier ones (or for use in the + kernel) the stubs might still be required, and this will be set true. + If the user sets it on or off - then that takes precedence. + + Linkers that don't need stubs, don't need the EH symbol markers either. + */ + + if (!global_options_set.x_darwin_symbol_stubs) + { + if (darwin_target_linker) + { + if (strverscmp (darwin_target_linker, MIN_LD64_OMIT_STUBS) < 0) + { + darwin_symbol_stubs = true; + ld_needs_eh_markers = true; + } + } + else if (generating_for_darwin_version < 9) + { + /* If we don't know the linker version and we're targeting an old + system, we know no better than to assume the use of an earlier + linker. */ + darwin_symbol_stubs = true; + ld_needs_eh_markers = true; + } + } + else if (DARWIN_X86 && darwin_symbol_stubs && TARGET_64BIT) + { + inform (input_location, + "%<-msymbol-stubs%> is not required for 64b code (ignored)"); + darwin_symbol_stubs = false; + } + + if (generating_for_darwin_version >= 9) + /* Later systems can support aligned common. */ + emit_aligned_common = true; /* The c_dialect...() macros are not available to us here. */ darwin_running_cxx = (strstr (lang_hooks.name, "C++") != 0); diff --git a/gcc/config/darwin.h b/gcc/config/darwin.h index 0e253cb0dfa..be261e73a7b 100644 --- a/gcc/config/darwin.h +++ b/gcc/config/darwin.h @@ -126,6 +126,24 @@ extern GTY(()) int darwin_ms_struct; "%{gsplit-dwarf:%ngsplit-dwarf is not supported on this platform} \ %<gsplit-dwarf" +#if LD64_HAS_EXPORT_DYNAMIC +#define DARWIN_RDYNAMIC "%{rdynamic:-export_dynamic}" +#else +#define DARWIN_RDYNAMIC "%{rdynamic:%nrdynamic is not supported}" +#endif + +/* FIXME: we should check that the linker supports the -pie and -no_pie. + options. */ +#define DARWIN_PIE_SPEC \ +"%{pie|fpie|fPIE:\ + %{mdynamic-no-pic: \ + %n'-mdynamic-no-pic' overrides '-pie', '-fpie' or '-fPIE'; \ + :%:version-compare(>= 10.5 mmacosx-version-min= -pie) }} " + +#define DARWIN_NOPIE_SPEC \ +"%{no-pie|fno-pie|fno-PIE: \ + %:version-compare(>= 10.7 mmacosx-version-min= -no_pie) }" + #define DARWIN_CC1_SPEC \ "%{findirect-virtual-calls: -fapple-kext} %<findirect-virtual-calls " \ "%{fterminated-vtables: -fapple-kext} %<fterminated-vtables " \ @@ -156,20 +174,31 @@ extern GTY(()) int darwin_ms_struct; #define CPP_SPEC "%{static:%{!dynamic:-D__STATIC__}}%{!static:-D__DYNAMIC__}" \ " %{pthread:-D_REENTRANT}" -/* This is mostly a clone of the standard LINK_COMMAND_SPEC, plus - precomp, libtool, and fat build additions. +/* This is a fix for PR41260 by passing -no_compact_unwind on darwin10 and + later until the assembler, linker and libunwind are able to deal with the + output from GCC. + + FIXME: we should check that the linker supports the option. +*/ + +#define DARWIN_NOCOMPACT_UNWIND \ +" %:version-compare(>= 10.6 mmacosx-version-min= -no_compact_unwind) " + +/* In Darwin linker specs we can put -lcrt0.o and ld will search the library + path for crt0.o or -lcrtx.a and it will search for for libcrtx.a. As for + other ports, we can also put xxx.{o,a}%s and get the appropriate complete + startfile absolute directory. This latter point is important when we want + to override ld's rule of .dylib being found ahead of .a and the user wants + the convenience library to be linked. */ + +/* The LINK_COMMAND spec is mostly a clone of the standard LINK_COMMAND_SPEC, + plus precomp, libtool, and fat build additions. In general, random Darwin linker flags should go into LINK_SPEC instead of LINK_COMMAND_SPEC. The command spec is better for specifying the handling of options understood by generic Unix linkers, and for positional arguments like libraries. */ -#if LD64_HAS_EXPORT_DYNAMIC -#define DARWIN_EXPORT_DYNAMIC " %{rdynamic:-export_dynamic}" -#else -#define DARWIN_EXPORT_DYNAMIC " %{rdynamic: %nrdynamic is not supported}" -#endif - #define LINK_COMMAND_SPEC_A \ "%{!fdump=*:%{!fsyntax-only:%{!c:%{!M:%{!MM:%{!E:%{!S:\ %(linker)" \ @@ -190,10 +219,14 @@ extern GTY(()) int darwin_ms_struct; %{%:sanitize(address): -lasan } \ %{%:sanitize(undefined): -lubsan } \ %(link_ssp) \ - " DARWIN_EXPORT_DYNAMIC " %<rdynamic \ %(link_gcc_c_sequence) \ }}}\ - %{!nostdlib:%{!r:%{!nostartfiles:%E}}} %{T*} %{F*} }}}}}}}" + %{!nostdlib:%{!r:%{!nostartfiles:%E}}} %{T*} %{F*} "\ + DARWIN_PIE_SPEC \ + DARWIN_NOPIE_SPEC \ + DARWIN_RDYNAMIC \ + DARWIN_NOCOMPACT_UNWIND \ + "}}}}}}} %<pie %<no-pie %<rdynamic " #define DSYMUTIL "\ndsymutil" @@ -230,8 +263,6 @@ extern GTY(()) int darwin_ms_struct; #define STANDARD_STARTFILE_PREFIX_1 "" #define STANDARD_STARTFILE_PREFIX_2 "" -#define DARWIN_PIE_SPEC "%{fpie|pie|fPIE:}" - /* Please keep the random linker options in alphabetical order (modulo 'Z' and 'no' prefixes). Note that options taking arguments may appear multiple times on a command line with different arguments each time, @@ -295,7 +326,6 @@ extern GTY(()) int darwin_ms_struct; %:version-compare(< 10.5 mmacosx-version-min= -multiply_defined) \ %:version-compare(< 10.5 mmacosx-version-min= suppress)}} \ %{Zmultiplydefinedunused*:-multiply_defined_unused %*} \ - " DARWIN_PIE_SPEC " \ %{prebind} %{noprebind} %{nofixprebinding} %{prebind_all_twolevel_modules} \ %{read_only_relocs} \ %{sectcreate*} %{sectorder*} %{seg1addr*} %{segprot*} \ @@ -327,43 +357,42 @@ extern GTY(()) int darwin_ms_struct; /* Support -mmacosx-version-min by supplying different (stub) libgcc_s.dylib libraries to link against, and by not linking against libgcc_s on - earlier-than-10.3.9. + earlier-than-10.3.9. If we need exceptions, prior to 10.3.9, then we have + to link the static eh lib, since there's no shared version on the system. - Note that by default, -lgcc_eh is not linked against! This is - because in a future version of Darwin the EH frame information may - be in a new format, or the fallback routine might be changed; if - you want to explicitly link against the static version of those - routines, because you know you don't need to unwind through system - libraries, you need to explicitly say -static-libgcc. + Note that by default, except as above, -lgcc_eh is not linked against. + This is because,in general, we need to unwind through system libraries that + are linked with the shared unwinder in libunwind (or libgcc_s for 10.4/5). - If it is linked against, it has to be before -lgcc, because it may + The static version of the current libgcc unwinder (which differs from the + implementation in libunwind.dylib on systems Darwin10 [10.6]+) can be used + by specifying -static-libgcc. + + If libgcc_eh is linked against, it has to be before -lgcc, because it might need symbols from -lgcc. */ + #undef REAL_LIBGCC_SPEC #define REAL_LIBGCC_SPEC \ "%{static-libgcc|static: -lgcc_eh -lgcc; \ - shared-libgcc|fexceptions|fgnu-runtime: \ - %:version-compare(!> 10.5 mmacosx-version-min= -lgcc_s.10.4) \ + shared-libgcc|fexceptions|fobjc-exceptions|fgnu-runtime: \ + %:version-compare(!> 10.3.9 mmacosx-version-min= -lgcc_eh) \ + %:version-compare(>< 10.3.9 10.5 mmacosx-version-min= -lgcc_s.10.4) \ %:version-compare(>< 10.5 10.6 mmacosx-version-min= -lgcc_s.10.5) \ - %:version-compare(!> 10.5 mmacosx-version-min= -lgcc_ext.10.4) \ + %:version-compare(>< 10.3.9 10.5 mmacosx-version-min= -lgcc_ext.10.4) \ %:version-compare(>= 10.5 mmacosx-version-min= -lgcc_ext.10.5) \ -lgcc ; \ :%:version-compare(>< 10.3.9 10.5 mmacosx-version-min= -lgcc_s.10.4) \ %:version-compare(>< 10.5 10.6 mmacosx-version-min= -lgcc_s.10.5) \ - %:version-compare(!> 10.5 mmacosx-version-min= -lgcc_ext.10.4) \ + %:version-compare(>< 10.3.9 10.5 mmacosx-version-min= -lgcc_ext.10.4) \ %:version-compare(>= 10.5 mmacosx-version-min= -lgcc_ext.10.5) \ -lgcc }" -/* We specify crt0.o as -lcrt0.o so that ld will search the library path. - - crt3.o provides __cxa_atexit on systems that don't have it. Since - it's only used with C++, which requires passing -shared-libgcc, key - off that to avoid unnecessarily adding a destructor to every - powerpc program built. */ +/* We specify crt0.o as -lcrt0.o so that ld will search the library path. */ #undef STARTFILE_SPEC #define STARTFILE_SPEC \ - "%{Zdynamiclib: %(darwin_dylib1) %{fgnu-tm: -lcrttms.o}} \ - %{!Zdynamiclib:%{Zbundle:%{!static: \ +"%{Zdynamiclib: %(darwin_dylib1) %{fgnu-tm: -lcrttms.o}} \ + %{!Zdynamiclib:%{Zbundle:%{!static: \ %:version-compare(< 10.6 mmacosx-version-min= -lbundle1.o) \ %{fgnu-tm: -lcrttms.o}}} \ %{!Zbundle:%{pg:%{static:-lgcrt0.o} \ @@ -377,7 +406,7 @@ extern GTY(()) int darwin_ms_struct; %{!object:%{preload:-lcrt0.o} \ %{!preload: %(darwin_crt1) \ %(darwin_crt2)}}}}}} \ - %{shared-libgcc:%:version-compare(< 10.5 mmacosx-version-min= crt3.o%s)}" + %(darwin_crt3)" /* We want a destructor last in the list. */ #define TM_DESTRUCTOR "%{fgnu-tm: -lcrttme.o}" @@ -385,18 +414,30 @@ extern GTY(()) int darwin_ms_struct; #define DARWIN_EXTRA_SPECS \ { "darwin_crt1", DARWIN_CRT1_SPEC }, \ + { "darwin_crt2", DARWIN_CRT2_SPEC }, \ + { "darwin_crt3", DARWIN_CRT3_SPEC }, \ { "darwin_dylib1", DARWIN_DYLIB1_SPEC }, -#define DARWIN_DYLIB1_SPEC \ - "%:version-compare(!> 10.5 mmacosx-version-min= -ldylib1.o) \ - %:version-compare(>< 10.5 10.6 mmacosx-version-min= -ldylib1.10.5.o)" - #define DARWIN_CRT1_SPEC \ "%:version-compare(!> 10.5 mmacosx-version-min= -lcrt1.o) \ %:version-compare(>< 10.5 10.6 mmacosx-version-min= -lcrt1.10.5.o) \ %:version-compare(>< 10.6 10.8 mmacosx-version-min= -lcrt1.10.6.o) \ %{fgnu-tm: -lcrttms.o}" +#define DARWIN_CRT2_SPEC "" + +/* crt3.o provides __cxa_atexit on systems that don't have it (and a fix + up for faulty versions on 10.4). Since it's only used with C++, which + requires passing -shared-libgcc, key off that to avoid unnecessarily + adding a destructor to every program built for 10.4 or earlier. */ + +#define DARWIN_CRT3_SPEC \ +"%{shared-libgcc:%:version-compare(< 10.5 mmacosx-version-min= crt3.o%s)}" + +#define DARWIN_DYLIB1_SPEC \ + "%:version-compare(!> 10.5 mmacosx-version-min= -ldylib1.o) \ + %:version-compare(>< 10.5 10.6 mmacosx-version-min= -ldylib1.10.5.o)" + #ifdef HAVE_AS_MMACOSX_VERSION_MIN_OPTION /* Emit macosx version (but only major). */ #define ASM_MMACOSX_VERSION_MIN_SPEC \ @@ -762,21 +803,52 @@ extern GTY(()) section * darwin_sections[NUM_DARWIN_SECTIONS]; #undef TARGET_ASM_MARK_DECL_PRESERVED #define TARGET_ASM_MARK_DECL_PRESERVED darwin_mark_decl_preserved -/* Set on a symbol with SYMBOL_FLAG_FUNCTION or - MACHO_SYMBOL_FLAG_VARIABLE to indicate that the function or - variable has been defined in this translation unit. - When porting Mach-O to new architectures you need to make - sure these aren't clobbered by the backend. */ +/* Any port using this header needs to define the first available + subtarget symbol bit: SYMBOL_FLAG_SUBT_DEP. */ -#define MACHO_SYMBOL_FLAG_VARIABLE (SYMBOL_FLAG_MACH_DEP) -#define MACHO_SYMBOL_FLAG_DEFINED ((SYMBOL_FLAG_MACH_DEP) << 1) +/* Is a variable. */ +#define MACHO_SYMBOL_FLAG_VARIABLE (SYMBOL_FLAG_SUBT_DEP) +#define MACHO_SYMBOL_VARIABLE_P(RTX) \ + ((SYMBOL_REF_FLAGS (RTX) & MACHO_SYMBOL_FLAG_VARIABLE) != 0) + +/* Set on a symbol that must be indirected, even when there is a + definition in the TU. The ABI mandates that common symbols are so + indirected, as are weak. If 'fix-and-continue' is operational then + data symbols might also be. */ + +#define MACHO_SYMBOL_FLAG_MUST_INDIRECT ((SYMBOL_FLAG_SUBT_DEP) << 1) +#define MACHO_SYMBOL_MUST_INDIRECT_P(RTX) \ + ((SYMBOL_REF_FLAGS (RTX) & MACHO_SYMBOL_FLAG_MUST_INDIRECT) != 0) + +/* Set on a symbol with SYMBOL_FLAG_FUNCTION or MACHO_SYMBOL_FLAG_VARIABLE + to indicate that the function or variable is considered defined in this + translation unit. */ + +#define MACHO_SYMBOL_FLAG_DEFINED ((SYMBOL_FLAG_SUBT_DEP) << 2) +#define MACHO_SYMBOL_DEFINED_P(RTX) \ + ((SYMBOL_REF_FLAGS (RTX) & MACHO_SYMBOL_FLAG_DEFINED) != 0) + +/* Set on a symbol that has specified non-default visibility. */ + +#define MACHO_SYMBOL_FLAG_HIDDEN_VIS ((SYMBOL_FLAG_SUBT_DEP) << 3) +#define MACHO_SYMBOL_HIDDEN_VIS_P(RTX) \ + ((SYMBOL_REF_FLAGS (RTX) & MACHO_SYMBOL_FLAG_HIDDEN_VIS) != 0) + +/* Set on a symbol that is a pic stub or symbol indirection (i.e. the + L_xxxxx${stub,non_lazy_ptr,lazy_ptr}. */ + +#define MACHO_SYMBOL_FLAG_INDIRECTION ((SYMBOL_FLAG_SUBT_DEP) << 5) +#define MACHO_SYMBOL_INDIRECTION_P(RTX) \ + ((SYMBOL_REF_FLAGS (RTX) & MACHO_SYMBOL_FLAG_INDIRECTION) != 0) /* Set on a symbol to indicate when fix-and-continue style code generation is being used and the symbol refers to a static symbol that should be rebound from new instances of a translation unit to the original instance of the data. */ -#define MACHO_SYMBOL_STATIC ((SYMBOL_FLAG_MACH_DEP) << 2) +#define MACHO_SYMBOL_FLAG_STATIC ((SYMBOL_FLAG_SUBT_DEP) << 6) +#define MACHO_SYMBOL_STATIC_P(RTX) \ + ((SYMBOL_REF_FLAGS (RTX) & MACHO_SYMBOL_FLAG_STATIC) != 0) /* Symbolic names for various things we might know about a symbol. */ @@ -969,8 +1041,12 @@ extern void darwin_driver_init (unsigned int *,struct cl_decoded_option **); _tested_ version known to support this so far. */ #define MIN_LD64_NO_COAL_SECTS "236.4" +/* From at least version 62.1, ld64 can build symbol indirection stubs as + needed, and there is no need for the compiler to emit them. */ +#define MIN_LD64_OMIT_STUBS "85.2" + #ifndef LD64_VERSION -#define LD64_VERSION "85.2" +#define LD64_VERSION "62.1" #else #define DEF_LD64 LD64_VERSION #endif diff --git a/gcc/config/darwin.opt b/gcc/config/darwin.opt index 22667def0a1..7f5616cbe07 100644 --- a/gcc/config/darwin.opt +++ b/gcc/config/darwin.opt @@ -18,240 +18,382 @@ ; along with GCC; see the file COPYING3. If not see ; <http://www.gnu.org/licenses/>. -; Various linker options have a -Z added so that they can get to specs -; processing without interference. Note that an option name with a -; prefix that matches another option name, that also takes an -; argument, being mapped to a -Z linker option, needs to be modified -; so the prefix is different, otherwise a '*' after the shorter option -; will match with the longer one. +; We have a lot of Driver options, many of which are obsolete or very very +; rarely used so, to keep this file easier to manage: + +; Please place all Non-driver options first (in alphabetical order), followed +; by Driver-only options. + +; Non-driver options. + +dependency-file +C ObjC C++ ObjC++ Separate Alias(MF) MissingArgError(missing filename after %qs) + +fapple-kext +Target Report C++ Var(flag_apple_kext) +Generate code for darwin loadable kernel extensions. + +iframework +Target RejectNegative C ObjC C++ ObjC++ Joined Separate +-iframework <dir> Add <dir> to the end of the system framework include path. + +mconstant-cfstrings +Target Report Var(darwin_constant_cfstrings) Init(1) +Generate compile-time CFString objects. + +Wnonportable-cfstrings +Target Report Var(darwin_warn_nonportable_cfstrings) Init(1) Warning +Warn if constant CFString objects contain non-portable characters. + +; Use new-style pic stubs if this is true, x86 only so far. +matt-stubs +Target Report Var(darwin_macho_att_stub) Init(1) +Generate AT&T-style stubs for Mach-O. + +mdynamic-no-pic +Target Common Report Mask(MACHO_DYNAMIC_NO_PIC) +Generate code suitable for executables (NOT shared libs). + +mfix-and-continue +Target Report Var(darwin_fix_and_continue) +Generate code suitable for fast turn around debugging. + +mkernel +Target Report Var(flag_mkernel) +Generate code for the kernel or loadable kernel extensions. + +; The Init here is for the convenience of GCC developers, so that cc1 +; and cc1plus don't crash if no -mmacosx-version-min is passed. The +; driver will always pass a -mmacosx-version-min, so in normal use the +; Init is never used. +mmacosx-version-min= +Target RejectNegative Joined Report Var(darwin_macosx_version_min) Init(DEF_MIN_OSX_VERSION) +The earliest MacOS X version on which this program will run. + +; Really, only relevant to PowerPC which has a 4 byte bool by default. +mone-byte-bool +Target RejectNegative Report Var(darwin_one_byte_bool) +Set sizeof(bool) to 1. + +msymbol-stubs +Target Report Var(darwin_symbol_stubs) Init(0) +Force generation of external symbol indirection stubs. + +; Some code-gen may be improved / adjusted if the linker is sufficiently modern. +mtarget-linker= +Target RejectNegative Joined Report Alias(mtarget-linker) + +mtarget-linker +Target RejectNegative Joined Separate Report Var(darwin_target_linker) Init(LD64_VERSION) +The version of ld64 in use for this toolchain. + +; Driver options. all_load -Driver Alias(Zall_load) +Driver RejectNegative Alias(Zall_load) +Loads all members of archive libraries allowable_client -Driver Separate Alias(Zallowable_client) +Driver RejectNegative Separate Alias(Zallowable_client) +-allowable_client <name> The output dylib is private to the client(s) named arch Driver RejectNegative Separate +-arch <name> Specify that the output file should be generated for architecture \"name\" arch_errors_fatal -Driver Alias(Zarch_errors_fatal) +Driver RejectNegative Alias(Zarch_errors_fatal) +Mismatches between file architecture and the \"-arch\" are errors instead of warnings asm_macosx_version_min= Driver RejectNegative Joined +The earliest MacOS X version on which this program will run (formatted for the assembler) bind_at_load -Driver Alias(Zbind_at_load) +Driver RejectNegative Alias(Zbind_at_load) +Produce an output file that will bind symbols on load, rather than lazily. bundle -Driver Alias(Zbundle) +Driver RejectNegative Alias(Zbundle) +Produce a Mach-O bundle (file type MH_BUNDLE) bundle_loader -Driver Separate Alias(Zbundle_loader) +Driver RejectNegative Separate Alias(Zbundle_loader) +-bundle_loader <executable> Treat \"executable\" (that will be loading this bundle) as if it was one of the dynamic libraries the bundle is linked against for symbol resolution -dead_strip -Driver Alias(Zdead_strip) +client_name +Driver RejectNegative Separate +-client_name <name> Enable the executable being built to link against a private dylib (using allowable_client) -dependency-file -C ObjC C++ ObjC++ Separate Alias(MF) MissingArgError(missing filename after %qs) +compatibility_version +Driver RejectNegative Separate +-compatibility_version <number> Set the minimum version for the client interface. Clients must record a greater number than this or the binding will fail at runtime + +current_version +Driver RejectNegative Separate +-current_version <number> Set the current version for the library. + +dead_strip +Driver RejectNegative Alias(Zdead_strip) +Remove code and data that is unreachable from any exported symbol (including the entry point) dylib_file Driver Separate Alias(Zdylib_file) dylinker -Driver +Driver RejectNegative +Produce a Mach-O dylinker (file type MH_DYLINKER), only used for building dyld. + +dylinker_install_name +Driver RejectNegative Separate +-dylinker_install_name <path> Only used for building dyld. dynamic -Driver Alias(Zdynamic) +Driver RejectNegative Alias(Zdynamic) +The default (and opposite of -static), implied by user mode executables, shared libraries and bundles. dynamiclib -Driver Alias(Zdynamiclib) +Driver RejectNegative Alias(Zdynamiclib) +Produce a Mach-O shared library (file type MH_DYLIB), synonym for -shared exported_symbols_list -Driver Separate Alias(Zexported_symbols_list) +Driver RejectNegative Separate Alias(Zexported_symbols_list) +-exported_symbols_list <filename> Global symbols in \"filename\" will be exported from the linked output file, any symbols not mentioned will be treated as hidden. filelist Driver RejectNegative Separate +Supply a list of objects to be linked from a file, rather than the command line findirect-virtual-calls Driver RejectNegative +Used for generating code for some older kernel revisions. flat_namespace Driver RejectNegative Alias(Zflat_namespace) +Ignore the normal two-level namespace; resolve symbols in command line order and do not record which library provided the resolved symbol. force_cpusubtype_ALL Driver RejectNegative Alias(Zforce_cpusubtype_ALL) +For the assembler (and linker) permit any architecture sub-variant to be used without error. force_flat_namespace Driver RejectNegative Alias(Zforce_flat_namespace) +Set the output object such that, on loading, dyld will ignore any two-level information and resolve symbols in the discovery order for loaded libs. framework Driver RejectNegative Separate +-framework <name> The linker should search for the named framework in the framework search path. fterminated-vtables Driver RejectNegative +Used for generating code for some older kernel revisions. gfull Driver RejectNegative +Abbreviation for \"-g -fno-eliminate-unused-debug-symbols\" gused Driver RejectNegative +Abbreviation for \"-g -feliminate-unused-debug-symbols\" headerpad_max_install_names -Driver +Driver RejectNegative +Automatically adds space for longer path names in load commands (up to MAXPATHLEN) image_base -Driver Separate Alias(Zimage_base) +Driver RejectNegative Separate Alias(Zimage_base) +-image_base <address> Choose a base address for a dylib or bundle. init -Driver Separate Alias(Zinit) +Driver RejectNegative Separate Alias(Zinit) +-init <symbol_name> The symbol \"symbol_name\" will be used as the first initialiser for a dylib. install_name -Driver Separate Alias(Zinstall_name) +Driver RejectNegative Separate Alias(Zinstall_name) +-install_name <name> Set the install name for a dylib. keep_private_externs -Driver - -mconstant-cfstrings -Target Report Var(darwin_constant_cfstrings) Init(1) -Generate compile-time CFString objects. +Driver RejectNegative +Usually \"private extern\" (hidden) symbols are made local when linking, this command suppresses that such that they remain exported. multi_module Driver RejectNegative Alias(Zmulti_module) +(Obsolete after 10.4) Multi modules are ignored at runtime since MacOS 10.4 multiply_defined Driver RejectNegative Separate Alias(Zmultiply_defined) +(Obsolete after 10.4) -multiply_defined <treatment> Provided a mechanism for warning about symbols defined in multiple dylibs. multiply_defined_unused Driver RejectNegative Separate Alias(Zmultiplydefinedunused) +(Obsolete after 10.4) -multiply_defined_unused <treatment> Provided a mechanism for warning about symbols defined in the current executable also being defined in linked dylibs. no_dead_strip_inits_and_terms -Driver Alias(Zno_dead_strip_inits_and_terms) +Driver RejectNegative Alias(Zno_dead_strip_inits_and_terms) +(Obsolete) The linker never dead strips these items, so the option is not needed. nofixprebinding -Driver +Driver RejectNegative +(Obsolete after 10.3.9) Set MH_NOPREFIXBINDING, in an exectuable. nomultidefs -Driver +Driver RejectNegative +(Obsolete after 10.4) Set MH_NOMULTIDEFS in an umbrella framework. noprebind -Driver +Driver RejectNegative Negative(prebind) +(Obsolete) LD_PREBIND is no longer supported. noseglinkedit -Driver +Driver RejectNegative Negative(seglinkedit) +(Obsolete) This is the default. object -Driver +Driver RejectNegative + +pagezero_size +Driver RejectNegative Separate +-pagezero_size size Allows setting the page 0 size to 4kb for certain special cases. prebind -Driver +Driver RejectNegative Negative(noprebind) +(Obsolete) LD_PREBIND is no longer supported. prebind_all_twolevel_modules -Driver +Driver RejectNegative +(Obsolete) LD_PREBIND is no longer supported. preload -Driver +Driver RejectNegative +Produces a Mach-O file suitable for embedded/ROM use. private_bundle -Driver +Driver RejectNegative +(Obsolete) Allowed linking to proceed with \"-flat_namespace\" when a linked bundle contained a symbol also exported from the main executable. pthread -Driver +Driver RejectNegative rdynamic -Driver +Driver RejectNegative +Synonym for \"-export-dynamic\" for linker versions that support it. + +read_only_relocs +Driver RejectNegative Separate +-read_only_relocs <treatment> This will allow relocs in read-only pages (not advisable). + +sectalign +Driver RejectNegative Separate Args(3) +-sectalign <segname> <sectname> <value> Set section \"sectname\" in segment \"segname\" to have alignment \"value\" which must be an integral power of two expressed in hexadecimal form. + +sectcreate +Driver RejectNegative Separate Args(3) +-sectcreate <segname> <sectname> <file> Create section \"sectname\" in segment \"segname\" from the contents of \"file\". + +sectobjectsymbols +Driver RejectNegative Separate Args(2) +(Obsolete) -sectobjectsymbols <segname> <sectname> Setting a local symbol at the start of a section is no longer supported. + +sectorder +Driver RejectNegative Separate Args(3) +(Obsolete) -sectorder <segname> <sectname> orderfile Replaced by a more general option \"-order_file\". seg_addr_table -Driver Separate Alias(Zseg_addr_table) +Driver RejectNegative Separate Alias(Zseg_addr_table) +-seg_addr_table <file> Specify the base addresses for dynamic libraries, \"file\" contains a line for each library. +; This is only usable by the ld_classic linker. seg_addr_table_filename -Driver Separate Alias(Zfn_seg_addr_table_filename) +Driver RejectNegative Separate Alias(Zfn_seg_addr_table_filename) +(Obsolete, ld_classic only) -seg_addr_table_filename <path> + +seg1addr +Driver RejectNegative Separate +Synonym for \"image_base\" segaddr -Driver Separate Args(2) Alias(Zsegaddr) +Driver RejectNegative Separate Args(2) Alias(Zsegaddr) +-segaddr <name> <address> Set the base address of segment \"name\" to \"address\" which must be aligned to a page boundary (currently 4kb). + +; This is only usable by the ld_classic linker. +segcreate +Driver RejectNegative Separate Args(3) +(Obsolete, ld_classic only) -sectcreate segname sectname file seglinkedit -Driver +Driver RejectNegative Negative(noseglinkedit) +(Obsolete) Object files with LINKEDIT sections are no longer supported. + +segprot +Driver RejectNegative Separate Args(3) +-segprot <segname> max_prot init_prot The protection values are \"r\", \"w\", \"x\" or \"-\" the latter meaning \"no access\". segs_read_only_addr -Driver Separate Alias(Zsegs_read_only_addr) +Driver RejectNegative Separate Alias(Zsegs_read_only_addr) +-segs_read_only_addr address Allows specifying the address of the read only portion of a dylib. segs_read_write_addr -Driver Separate Alias(Zsegs_read_write_addr) +Driver RejectNegative Separate Alias(Zsegs_read_write_addr) +-segs_read_write_addr address Allows specifying the address of the read/write portion of a dylib. single_module -Driver Alias(Zsingle_module) +Driver RejectNegative Alias(Zsingle_module) +(Obsolete) This is the default. + +sub_library +Driver RejectNegative Separate +-sub_library <name> Library named \"name\" will be re-exported (only useful for dylibs). + +sub_umbrella +Driver RejectNegative Separate +-sub_umbrella <name> Framework named \"name\" will be re-exported (only useful for dylibs). twolevel_namespace -Driver +Driver RejectNegative +This is the default twolevel_namespace_hints -Driver +Driver RejectNegative +Specifies content that can speed up dynamic loading when the binaries are unchanged. umbrella -Driver Separate Alias(Zumbrella) +Driver RejectNegative Separate Alias(Zumbrella) +-umbrella <framework> The specified framework will be re-exported. + +undefined +Driver RejectNegative Separate +-undefined <treatment> Specify the handling for undefined symbols (default is error). unexported_symbols_list -Driver Separate Alias(Zunexported_symbols_list) +Driver RejectNegative Separate Alias(Zunexported_symbols_list) +-unexported_symbols_list <filename> Don't export global symbols listed in filename. weak_reference_mismatches -Driver Separate Alias(Zweak_reference_mismatches) +Driver RejectNegative Separate Alias(Zweak_reference_mismatches) +-weak_reference_mismatches <treatment> Specifies what to do if a symbol import conflicts between file (weak in one and not in another) the default is to treat the symbol as non-weak. whatsloaded -Driver +Driver RejectNegative +Logs the object files the linker loads whyload -Driver +Driver RejectNegative +Logs which symbol(s) caused an object to be loaded. + +;(Obsolete, ignored) Strip symbols starting with "L", this is the default. +X +Driver RejectNegative y -Driver Joined +Driver RejectNegative Joined +(Obsolete, ignored) Old support similar to whyload. Mach -Driver - -Wnonportable-cfstrings -Target Report Var(darwin_warn_nonportable_cfstrings) Init(1) Warning -Warn if constant CFString objects contain non-portable characters. - -; Use new-style pic stubs if this is true, x86 only so far. -matt-stubs -Target Report Var(darwin_macho_att_stub) Init(1) -Generate AT&T-style stubs for Mach-O. - -mdynamic-no-pic -Target Common Report Mask(MACHO_DYNAMIC_NO_PIC) -Generate code suitable for executables (NOT shared libs). - -mfix-and-continue -Target Report Var(darwin_fix_and_continue) -Generate code suitable for fast turn around debugging. - -; The Init here is for the convenience of GCC developers, so that cc1 -; and cc1plus don't crash if no -mmacosx-version-min is passed. The -; driver will always pass a -mmacosx-version-min, so in normal use the -; Init is never used. -mmacosx-version-min= -Target Joined Report Var(darwin_macosx_version_min) Init(DEF_MIN_OSX_VERSION) -The earliest MacOS X version on which this program will run. - -mone-byte-bool -Target RejectNegative Report Var(darwin_one_byte_bool) -Set sizeof(bool) to 1. - -fapple-kext -Target Report C++ Var(flag_apple_kext) -Generate code for darwin loadable kernel extensions. - -mkernel -Target Report Var(flag_mkernel) -Generate code for the kernel or loadable kernel extensions. - -iframework -Target RejectNegative C ObjC C++ ObjC++ Joined Separate --iframework <dir> Add <dir> to the end of the system framework include path. +Driver RejectNegative +(Obsolete and unhandled by ld64, ignored) ld should produce an executable (only handled by ld_classic). -X -Driver +;; These are not "real" options, but placeholders used to hide the real options +;; from generic options processing... FIXME: they can be eliminated now. Zall_load Driver @@ -343,62 +485,3 @@ Driver Separate Zweak_reference_mismatches Driver Separate -client_name -Driver Separate - -compatibility_version -Driver Separate - -current_version -Driver Separate - -dylinker_install_name -Driver Separate - -pagezero_size -Driver Separate - -read_only_relocs -Driver Separate - -sectalign -Driver Separate Args(3) - -sectcreate -Driver Separate Args(3) - -sectobjectsymbols -Driver Separate Args(2) - -sectorder -Driver Separate Args(3) - -seg1addr -Driver Separate - -segcreate -Driver Separate Args(3) - -segprot -Driver Separate Args(3) - -segs_read_only_addr -Driver Separate - -segs_read_write_addr -Driver Separate - -sub_library -Driver Separate - -sub_umbrella -Driver Separate - -; Certain aspects of code-gen may be improved / adjusted if the version of ld64 -; is sufficiently modern. -mtarget-linker -Target RejectNegative Joined Separate Report Var(darwin_target_linker) Init(LD64_VERSION) -The version of ld64 in use for this toolchain. - -undefined -Driver Separate diff --git a/gcc/config/darwin10.h b/gcc/config/darwin10.h index 07a2b465a04..a6d1eba6ebd 100644 --- a/gcc/config/darwin10.h +++ b/gcc/config/darwin10.h @@ -18,17 +18,12 @@ You should have received a copy of the GNU General Public License along with GCC; see the file COPYING3. If not see <http://www.gnu.org/licenses/>. */ -/* Fix PR41260 by passing -no_compact_unwind on darwin10 and later until - unwinder in libSystem is fixed to digest new epilog unwinding notes. +/* Fix PR47558 by linking against libSystem ahead of libgcc_ext. */ - Fix PR47558 by linking against libSystem ahead of libgcc_ext. */ #undef LINK_GCC_C_SEQUENCE_SPEC #define LINK_GCC_C_SEQUENCE_SPEC \ -"%:version-compare(>= 10.6 mmacosx-version-min= -no_compact_unwind) \ - %{!static:%{!static-libgcc: \ +"%{!static:%{!static-libgcc: \ %:version-compare(>= 10.6 mmacosx-version-min= -lSystem) } } \ - %{fno-pic|fno-PIC|fno-pie|fno-PIE|fapple-kext|mkernel|static|mdynamic-no-pic: \ - %:version-compare(>= 10.7 mmacosx-version-min= -no_pie) } \ %{!nostdlib:%:version-compare(>< 10.6 10.7 mmacosx-version-min= -ld10-uwfef.o)} \ %G %{!nolibc:%L}" diff --git a/gcc/config/darwin9.h b/gcc/config/darwin9.h index ca5c51718c8..1fd1604aed8 100644 --- a/gcc/config/darwin9.h +++ b/gcc/config/darwin9.h @@ -35,12 +35,6 @@ along with GCC; see the file COPYING3. If not see /* Tell collect2 to run dsymutil for us as necessary. */ #define COLLECT_RUN_DSYMUTIL 1 -#undef DARWIN_PIE_SPEC -#define DARWIN_PIE_SPEC \ - "%{fpie|pie|fPIE: \ - %{mdynamic-no-pic: %n'-mdynamic-no-pic' overrides '-pie', '-fpie' or '-fPIE'; \ - :-pie}}" - /* Only ask as for debug data if the debug style is stabs (since as doesn't yet generate dwarf.) */ diff --git a/gcc/config/i386/avx2intrin.h b/gcc/config/i386/avx2intrin.h index 7606efdbf79..372d77f84fe 100644 --- a/gcc/config/i386/avx2intrin.h +++ b/gcc/config/i386/avx2intrin.h @@ -258,7 +258,7 @@ extern __inline __m256i __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) _mm256_cmpgt_epi8 (__m256i __A, __m256i __B) { - return (__m256i) ((__v32qi)__A > (__v32qi)__B); + return (__m256i) ((__v32qs)__A > (__v32qs)__B); } extern __inline __m256i diff --git a/gcc/config/i386/avxintrin.h b/gcc/config/i386/avxintrin.h index 29115a11a0d..3301451e4de 100644 --- a/gcc/config/i386/avxintrin.h +++ b/gcc/config/i386/avxintrin.h @@ -47,6 +47,7 @@ typedef unsigned int __v8su __attribute__ ((__vector_size__ (32))); typedef short __v16hi __attribute__ ((__vector_size__ (32))); typedef unsigned short __v16hu __attribute__ ((__vector_size__ (32))); typedef char __v32qi __attribute__ ((__vector_size__ (32))); +typedef signed char __v32qs __attribute__ ((__vector_size__ (32))); typedef unsigned char __v32qu __attribute__ ((__vector_size__ (32))); /* The Intel API is flexible enough that we must allow aliasing with other diff --git a/gcc/config/i386/darwin.h b/gcc/config/i386/darwin.h index 1e96350fa71..bdb36f00959 100644 --- a/gcc/config/i386/darwin.h +++ b/gcc/config/i386/darwin.h @@ -90,14 +90,12 @@ along with GCC; see the file COPYING3. If not see #undef WCHAR_TYPE_SIZE #define WCHAR_TYPE_SIZE 32 -/* Generate branch islands stubs if this is true. */ -extern int darwin_emit_branch_islands; - -#undef TARGET_MACHO_BRANCH_ISLANDS -#define TARGET_MACHO_BRANCH_ISLANDS darwin_emit_branch_islands +/* Generate pic symbol indirection stubs if this is true. */ +#undef TARGET_MACHO_SYMBOL_STUBS +#define TARGET_MACHO_SYMBOL_STUBS (darwin_symbol_stubs) /* For compatibility with OSX system tools, use the new style of pic stub - if this is set. */ + if this is set (default). */ #undef MACHOPIC_ATT_STUB #define MACHOPIC_ATT_STUB (darwin_macho_att_stub) @@ -245,7 +243,7 @@ extern int darwin_emit_branch_islands; #undef FUNCTION_PROFILER #define FUNCTION_PROFILER(FILE, LABELNO) \ do { \ - if (TARGET_MACHO_BRANCH_ISLANDS \ + if (TARGET_MACHO_SYMBOL_STUBS \ && MACHOPIC_INDIRECT && !TARGET_64BIT) \ { \ const char *name = machopic_mcount_stub_name (); \ @@ -326,10 +324,8 @@ extern int darwin_emit_branch_islands; } \ } -/* This needs to move since i386 uses the first flag and other flags are - used in Mach-O. */ -#undef MACHO_SYMBOL_FLAG_VARIABLE -#define MACHO_SYMBOL_FLAG_VARIABLE ((SYMBOL_FLAG_MACH_DEP) << 3) +/* First available SYMBOL flag bit for use by subtargets. */ +#define SYMBOL_FLAG_SUBT_DEP (SYMBOL_FLAG_MACH_DEP << 5) #undef MACHOPIC_NL_SYMBOL_PTR_SECTION #define MACHOPIC_NL_SYMBOL_PTR_SECTION \ diff --git a/gcc/config/i386/emmintrin.h b/gcc/config/i386/emmintrin.h index f9e7b33b0dd..1683d842c65 100644 --- a/gcc/config/i386/emmintrin.h +++ b/gcc/config/i386/emmintrin.h @@ -1308,7 +1308,7 @@ _mm_xor_si128 (__m128i __A, __m128i __B) extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__)) _mm_cmpeq_epi8 (__m128i __A, __m128i __B) { - return (__m128i) ((__v16qs)__A == (__v16qs)__B); + return (__m128i) ((__v16qi)__A == (__v16qi)__B); } extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__)) diff --git a/gcc/config/i386/i386.c b/gcc/config/i386/i386.c index 2b37296e537..ce3fbd120ed 100644 --- a/gcc/config/i386/i386.c +++ b/gcc/config/i386/i386.c @@ -5423,7 +5423,25 @@ ix86_valid_target_attribute_inner_p (tree args, char *p_strings[], ret = false; } else - p_strings[opt] = xstrdup (p + opt_len); + { + p_strings[opt] = xstrdup (p + opt_len); + if (opt == IX86_FUNCTION_SPECIFIC_ARCH) + { + /* If arch= is set, clear all bits in x_ix86_isa_flags, + except for ISA_64BIT, ABI_64, ABI_X32, and CODE16 + and all bits in x_ix86_isa_flags2. */ + opts->x_ix86_isa_flags &= (OPTION_MASK_ISA_64BIT + | OPTION_MASK_ABI_64 + | OPTION_MASK_ABI_X32 + | OPTION_MASK_CODE16); + opts->x_ix86_isa_flags_explicit &= (OPTION_MASK_ISA_64BIT + | OPTION_MASK_ABI_64 + | OPTION_MASK_ABI_X32 + | OPTION_MASK_CODE16); + opts->x_ix86_isa_flags2 = 0; + opts->x_ix86_isa_flags2_explicit = 0; + } + } } else if (type == ix86_opt_enum) @@ -5498,18 +5516,8 @@ ix86_valid_target_attribute_tree (tree args, /* If we are using the default tune= or arch=, undo the string assigned, and use the default. */ if (option_strings[IX86_FUNCTION_SPECIFIC_ARCH]) - { - opts->x_ix86_arch_string - = ggc_strdup (option_strings[IX86_FUNCTION_SPECIFIC_ARCH]); - - /* If arch= is set, clear all bits in x_ix86_isa_flags, - except for ISA_64BIT, ABI_64, ABI_X32, and CODE16. */ - opts->x_ix86_isa_flags &= (OPTION_MASK_ISA_64BIT - | OPTION_MASK_ABI_64 - | OPTION_MASK_ABI_X32 - | OPTION_MASK_CODE16); - opts->x_ix86_isa_flags2 = 0; - } + opts->x_ix86_arch_string + = ggc_strdup (option_strings[IX86_FUNCTION_SPECIFIC_ARCH]); else if (!orig_arch_specified) opts->x_ix86_arch_string = NULL; @@ -16960,7 +16968,7 @@ output_pic_addr_const (FILE *file, rtx x, int code) break; case SYMBOL_REF: - if (TARGET_64BIT || ! TARGET_MACHO_BRANCH_ISLANDS) + if (TARGET_64BIT || ! TARGET_MACHO_SYMBOL_STUBS) output_addr_const (file, x); else { diff --git a/gcc/config/i386/i386.h b/gcc/config/i386/i386.h index 2cb16d9fbf6..934352d6331 100644 --- a/gcc/config/i386/i386.h +++ b/gcc/config/i386/i386.h @@ -640,7 +640,7 @@ extern tree x86_mfence; /* Replace MACH-O, ifdefs by in-line tests, where possible. (a) Macros defined in config/i386/darwin.h */ #define TARGET_MACHO 0 -#define TARGET_MACHO_BRANCH_ISLANDS 0 +#define TARGET_MACHO_SYMBOL_STUBS 0 #define MACHOPIC_ATT_STUB 0 /* (b) Macros defined in config/darwin.h */ #define MACHO_DYNAMIC_NO_PIC_P 0 @@ -2380,7 +2380,7 @@ const wide_int_bitmask PTA_CANNONLAKE = PTA_SKYLAKE | PTA_AVX512F | PTA_AVX512VBMI | PTA_AVX512IFMA | PTA_SHA; const wide_int_bitmask PTA_ICELAKE_CLIENT = PTA_CANNONLAKE | PTA_AVX512VNNI | PTA_GFNI | PTA_VAES | PTA_AVX512VBMI2 | PTA_VPCLMULQDQ | PTA_AVX512BITALG - | PTA_RDPID | PTA_CLWB; + | PTA_RDPID | PTA_CLWB | PTA_AVX512VPOPCNTDQ; const wide_int_bitmask PTA_ICELAKE_SERVER = PTA_ICELAKE_CLIENT | PTA_PCONFIG | PTA_WBNOINVD; const wide_int_bitmask PTA_KNL = PTA_BROADWELL | PTA_AVX512PF | PTA_AVX512ER diff --git a/gcc/config/i386/intelmic-mkoffload.c b/gcc/config/i386/intelmic-mkoffload.c index 2a3e912ee74..0b12edc72c1 100644 --- a/gcc/config/i386/intelmic-mkoffload.c +++ b/gcc/config/i386/intelmic-mkoffload.c @@ -453,8 +453,6 @@ prepare_target_image (const char *target_compiler, int argc, char **argv) if (verbose) obstack_ptr_grow (&argv_obstack, "-v"); obstack_ptr_grow (&argv_obstack, "-xlto"); - obstack_ptr_grow (&argv_obstack, "-shared"); - obstack_ptr_grow (&argv_obstack, "-fPIC"); obstack_ptr_grow (&argv_obstack, opt1); for (int i = 1; i < argc; i++) { @@ -466,6 +464,9 @@ prepare_target_image (const char *target_compiler, int argc, char **argv) if (!out_obj_filename) fatal_error (input_location, "output file not specified"); obstack_ptr_grow (&argv_obstack, opt2); + /* NB: Put -fPIC and -shared the last to create shared library. */ + obstack_ptr_grow (&argv_obstack, "-fPIC"); + obstack_ptr_grow (&argv_obstack, "-shared"); obstack_ptr_grow (&argv_obstack, "-o"); obstack_ptr_grow (&argv_obstack, target_so_filename); compile_for_target (&argv_obstack); diff --git a/gcc/config/i386/sse.md b/gcc/config/i386/sse.md index 152b7cc77f1..c1b7ce99125 100644 --- a/gcc/config/i386/sse.md +++ b/gcc/config/i386/sse.md @@ -2625,7 +2625,7 @@ ;; Modes handled by reduc_sm{in,ax}* patterns. (define_mode_iterator REDUC_SSE_SMINMAX_MODE [(V4SF "TARGET_SSE") (V2DF "TARGET_SSE") - (V2DI "TARGET_SSE") (V4SI "TARGET_SSE") (V8HI "TARGET_SSE") + (V2DI "TARGET_SSE4_2") (V4SI "TARGET_SSE") (V8HI "TARGET_SSE") (V16QI "TARGET_SSE")]) (define_expand "reduc_<code>_scal_<mode>" diff --git a/gcc/config/mips/mips.c b/gcc/config/mips/mips.c index 2dfc7c73cce..d758fbf1be6 100644 --- a/gcc/config/mips/mips.c +++ b/gcc/config/mips/mips.c @@ -4849,7 +4849,7 @@ mips_split_move (rtx dest, rtx src, enum mips_split_type split_type, rtx insn_) can forward SRC for DEST. This is most useful if the next insn is a simple store. */ rtx_insn *insn = (rtx_insn *)insn_; - struct mips_address_info addr; + struct mips_address_info addr = {}; if (insn) { rtx_insn *next = next_nonnote_nondebug_insn_bb (insn); @@ -4862,7 +4862,7 @@ mips_split_move (rtx dest, rtx src, enum mips_split_type split_type, rtx insn_) { rtx tmp = XEXP (src, 0); mips_classify_address (&addr, tmp, GET_MODE (tmp), true); - if (REGNO (addr.reg) != REGNO (dest)) + if (addr.reg && !reg_overlap_mentioned_p (dest, addr.reg)) validate_change (next, &SET_SRC (set), src, false); } else diff --git a/gcc/config/mips/mips.md b/gcc/config/mips/mips.md index a9b0c86be07..3cfb1a751e9 100644 --- a/gcc/config/mips/mips.md +++ b/gcc/config/mips/mips.md @@ -5836,8 +5836,8 @@ "ISA_HAS_ROR" { if (CONST_INT_P (operands[2])) - gcc_assert (INTVAL (operands[2]) >= 0 - && INTVAL (operands[2]) < GET_MODE_BITSIZE (<MODE>mode)); + operands[2] = GEN_INT (INTVAL (operands[2]) + & (GET_MODE_BITSIZE (<MODE>mode) - 1)); return "<d>ror\t%0,%1,%2"; } diff --git a/gcc/config/pa/pa.c b/gcc/config/pa/pa.c index 707ae5822c3..6d75f26e02f 100644 --- a/gcc/config/pa/pa.c +++ b/gcc/config/pa/pa.c @@ -7856,7 +7856,7 @@ pa_attr_length_call (rtx_insn *insn, int sibcall) /* 64-bit plabel sequence. */ else if (TARGET_64BIT && !local_call) - length += sibcall ? 28 : 24; + length += 24; /* non-pic long absolute branch sequence. */ else if ((TARGET_LONG_ABS_CALL || local_call) && !flag_pic) @@ -7928,38 +7928,24 @@ pa_output_call (rtx_insn *insn, rtx call_dest, int sibcall) xoperands[0] = pa_get_deferred_plabel (call_dest); xoperands[1] = gen_label_rtx (); - /* If this isn't a sibcall, we put the load of %r27 into the - delay slot. We can't do this in a sibcall as we don't - have a second call-clobbered scratch register available. - We don't need to do anything when generating fast indirect - calls. */ - if (seq_length != 0 && !sibcall) + /* Put the load of %r27 into the delay slot. We don't need to + do anything when generating fast indirect calls. */ + if (seq_length != 0) { final_scan_insn (NEXT_INSN (insn), asm_out_file, optimize, 0, NULL); /* Now delete the delay insn. */ SET_INSN_DELETED (NEXT_INSN (insn)); - seq_length = 0; } output_asm_insn ("addil LT'%0,%%r27", xoperands); output_asm_insn ("ldd RT'%0(%%r1),%%r1", xoperands); output_asm_insn ("ldd 0(%%r1),%%r1", xoperands); - - if (sibcall) - { - output_asm_insn ("ldd 24(%%r1),%%r27", xoperands); - output_asm_insn ("ldd 16(%%r1),%%r1", xoperands); - output_asm_insn ("bve (%%r1)", xoperands); - } - else - { - output_asm_insn ("ldd 16(%%r1),%%r2", xoperands); - output_asm_insn ("bve,l (%%r2),%%r2", xoperands); - output_asm_insn ("ldd 24(%%r1),%%r27", xoperands); - seq_length = 1; - } + output_asm_insn ("ldd 16(%%r1),%%r2", xoperands); + output_asm_insn ("bve,l (%%r2),%%r2", xoperands); + output_asm_insn ("ldd 24(%%r1),%%r27", xoperands); + seq_length = 1; } else { @@ -8052,20 +8038,22 @@ pa_output_call (rtx_insn *insn, rtx call_dest, int sibcall) { output_asm_insn ("addil LT'%0,%%r19", xoperands); output_asm_insn ("ldw RT'%0(%%r1),%%r1", xoperands); - output_asm_insn ("ldw 0(%%r1),%%r1", xoperands); + output_asm_insn ("ldw 0(%%r1),%%r22", xoperands); } else { output_asm_insn ("addil LR'%0-$global$,%%r27", xoperands); - output_asm_insn ("ldw RR'%0-$global$(%%r1),%%r1", + output_asm_insn ("ldw RR'%0-$global$(%%r1),%%r22", xoperands); } - output_asm_insn ("bb,>=,n %%r1,30,.+16", xoperands); - output_asm_insn ("depi 0,31,2,%%r1", xoperands); - output_asm_insn ("ldw 4(%%sr0,%%r1),%%r19", xoperands); - output_asm_insn ("ldw 0(%%sr0,%%r1),%%r1", xoperands); + output_asm_insn ("bb,>=,n %%r22,30,.+16", xoperands); + output_asm_insn ("depi 0,31,2,%%r22", xoperands); + /* Should this be an ordered load to ensure the target + address is loaded before the global pointer? */ + output_asm_insn ("ldw 0(%%r22),%%r1", xoperands); + output_asm_insn ("ldw 4(%%r22),%%r19", xoperands); if (!sibcall && !TARGET_PA_20) { @@ -8158,10 +8146,6 @@ pa_attr_length_indirect_call (rtx_insn *insn) if (TARGET_PORTABLE_RUNTIME) return 16; - /* Inline version of $$dyncall. */ - if ((TARGET_NO_SPACE_REGS || TARGET_PA_20) && !optimize_size) - return 20; - if (!TARGET_LONG_CALLS && ((TARGET_PA_20 && !TARGET_SOM && distance < 7600000) || distance < MAX_PCREL17F_OFFSET)) @@ -8171,12 +8155,15 @@ pa_attr_length_indirect_call (rtx_insn *insn) if (!flag_pic) return 12; - /* Inline version of $$dyncall. */ - if (TARGET_NO_SPACE_REGS || TARGET_PA_20) - return 20; - + /* Inline versions of $$dyncall. */ if (!optimize_size) - return 36; + { + if (TARGET_NO_SPACE_REGS) + return 28; + + if (TARGET_PA_20) + return 32; + } /* Long PIC pc-relative call. */ return 20; @@ -8214,22 +8201,6 @@ pa_output_indirect_call (rtx_insn *insn, rtx call_dest) return "blr %%r0,%%r2\n\tbv,n %%r0(%%r31)"; } - /* Maybe emit a fast inline version of $$dyncall. */ - if ((TARGET_NO_SPACE_REGS || TARGET_PA_20) && !optimize_size) - { - output_asm_insn ("bb,>=,n %%r22,30,.+12\n\t" - "ldw 2(%%r22),%%r19\n\t" - "ldw -2(%%r22),%%r22", xoperands); - pa_output_arg_descriptor (insn); - if (TARGET_NO_SPACE_REGS) - { - if (TARGET_PA_20) - return "bve,l,n (%%r22),%%r2\n\tnop"; - return "ble 0(%%sr4,%%r22)\n\tcopy %%r31,%%r2"; - } - return "bve,l (%%r22),%%r2\n\tstw %%r2,-24(%%sp)"; - } - /* Now the normal case -- we can reach $$dyncall directly or we're sure that we can get there via a long-branch stub. @@ -8258,35 +8229,40 @@ pa_output_indirect_call (rtx_insn *insn, rtx call_dest) return "ble R'$$dyncall(%%sr4,%%r2)\n\tcopy %%r31,%%r2"; } - /* Maybe emit a fast inline version of $$dyncall. The long PIC - pc-relative call sequence is five instructions. The inline PA 2.0 - version of $$dyncall is also five instructions. The PA 1.X versions - are longer but still an overall win. */ - if (TARGET_NO_SPACE_REGS || TARGET_PA_20 || !optimize_size) + /* The long PIC pc-relative call sequence is five instructions. So, + let's use an inline version of $$dyncall when the calling sequence + has a roughly similar number of instructions and we are not optimizing + for size. We need two instructions to load the return pointer plus + the $$dyncall implementation. */ + if (!optimize_size) { - output_asm_insn ("bb,>=,n %%r22,30,.+12\n\t" - "ldw 2(%%r22),%%r19\n\t" - "ldw -2(%%r22),%%r22", xoperands); if (TARGET_NO_SPACE_REGS) { pa_output_arg_descriptor (insn); - if (TARGET_PA_20) - return "bve,l,n (%%r22),%%r2\n\tnop"; - return "ble 0(%%sr4,%%r22)\n\tcopy %%r31,%%r2"; + output_asm_insn ("bl .+8,%%r2\n\t" + "ldo 20(%%r2),%%r2\n\t" + "extru,<> %%r22,30,1,%%r0\n\t" + "bv,n %%r0(%%r22)\n\t" + "ldw -2(%%r22),%%r21\n\t" + "bv %%r0(%%r21)\n\t" + "ldw 2(%%r22),%%r19", xoperands); + return ""; } if (TARGET_PA_20) { pa_output_arg_descriptor (insn); - return "bve,l (%%r22),%%r2\n\tstw %%r2,-24(%%sp)"; + output_asm_insn ("bl .+8,%%r2\n\t" + "ldo 24(%%r2),%%r2\n\t" + "stw %%r2,-24(%%sp)\n\t" + "extru,<> %r22,30,1,%%r0\n\t" + "bve,n (%%r22)\n\t" + "ldw -2(%%r22),%%r21\n\t" + "bve (%%r21)\n\t" + "ldw 2(%%r22),%%r19", xoperands); + return ""; } - output_asm_insn ("bl .+8,%%r2\n\t" - "ldo 16(%%r2),%%r2\n\t" - "ldsid (%%r22),%%r1\n\t" - "mtsp %%r1,%%sr0", xoperands); - pa_output_arg_descriptor (insn); - return "be 0(%%sr0,%%r22)\n\tstw %%r2,-24(%%sp)"; } - + /* We need a long PIC call to $$dyncall. */ xoperands[0] = gen_rtx_SYMBOL_REF (Pmode, "$$dyncall"); xoperands[1] = gen_rtx_REG (Pmode, 2); @@ -10048,7 +10024,7 @@ pa_modes_tieable_p (machine_mode mode1, machine_mode mode2) /* Length in units of the trampoline instruction code. */ -#define TRAMPOLINE_CODE_SIZE (TARGET_64BIT ? 24 : (TARGET_PA_20 ? 32 : 40)) +#define TRAMPOLINE_CODE_SIZE (TARGET_64BIT ? 24 : (TARGET_PA_20 ? 36 : 48)) /* Output assembler code for a block containing the constant parts @@ -10069,27 +10045,46 @@ pa_asm_trampoline_template (FILE *f) { if (!TARGET_64BIT) { - fputs ("\tldw 36(%r22),%r21\n", f); - fputs ("\tbb,>=,n %r21,30,.+16\n", f); - if (ASSEMBLER_DIALECT == 0) - fputs ("\tdepi 0,31,2,%r21\n", f); - else - fputs ("\tdepwi 0,31,2,%r21\n", f); - fputs ("\tldw 4(%r21),%r19\n", f); - fputs ("\tldw 0(%r21),%r21\n", f); if (TARGET_PA_20) { - fputs ("\tbve (%r21)\n", f); - fputs ("\tldw 40(%r22),%r29\n", f); + fputs ("\tmfia %r20\n", f); + fputs ("\tldw 48(%r20),%r22\n", f); + fputs ("\tcopy %r22,%r21\n", f); + fputs ("\tbb,>=,n %r22,30,.+16\n", f); + fputs ("\tdepwi 0,31,2,%r22\n", f); + fputs ("\tldw 0(%r22),%r21\n", f); + fputs ("\tldw 4(%r22),%r19\n", f); + fputs ("\tbve (%r21)\n", f); + fputs ("\tldw 52(%r1),%r29\n", f); + fputs ("\t.word 0\n", f); fputs ("\t.word 0\n", f); fputs ("\t.word 0\n", f); } else { + if (ASSEMBLER_DIALECT == 0) + { + fputs ("\tbl .+8,%r20\n", f); + fputs ("\tdepi 0,31,2,%r20\n", f); + } + else + { + fputs ("\tb,l .+8,%r20\n", f); + fputs ("\tdepwi 0,31,2,%r20\n", f); + } + fputs ("\tldw 40(%r20),%r22\n", f); + fputs ("\tcopy %r22,%r21\n", f); + fputs ("\tbb,>=,n %r22,30,.+16\n", f); + if (ASSEMBLER_DIALECT == 0) + fputs ("\tdepi 0,31,2,%r22\n", f); + else + fputs ("\tdepwi 0,31,2,%r22\n", f); + fputs ("\tldw 0(%r22),%r21\n", f); + fputs ("\tldw 4(%r22),%r19\n", f); fputs ("\tldsid (%r21),%r1\n", f); fputs ("\tmtsp %r1,%sr0\n", f); - fputs ("\tbe 0(%sr0,%r21)\n", f); - fputs ("\tldw 40(%r22),%r29\n", f); + fputs ("\tbe 0(%sr0,%r21)\n", f); + fputs ("\tldw 44(%r20),%r29\n", f); } fputs ("\t.word 0\n", f); fputs ("\t.word 0\n", f); @@ -10103,11 +10098,11 @@ pa_asm_trampoline_template (FILE *f) fputs ("\t.dword 0\n", f); fputs ("\t.dword 0\n", f); fputs ("\tmfia %r31\n", f); - fputs ("\tldd 24(%r31),%r1\n", f); - fputs ("\tldd 24(%r1),%r27\n", f); - fputs ("\tldd 16(%r1),%r1\n", f); - fputs ("\tbve (%r1)\n", f); + fputs ("\tldd 24(%r31),%r27\n", f); fputs ("\tldd 32(%r31),%r31\n", f); + fputs ("\tldd 16(%r27),%r1\n", f); + fputs ("\tbve (%r1)\n", f); + fputs ("\tldd 24(%r27),%r27\n", f); fputs ("\t.dword 0 ; fptr\n", f); fputs ("\t.dword 0 ; static link\n", f); } @@ -10117,10 +10112,10 @@ pa_asm_trampoline_template (FILE *f) FNADDR is an RTX for the address of the function's pure code. CXT is an RTX for the static chain value for the function. - Move the function address to the trampoline template at offset 36. - Move the static chain value to trampoline template at offset 40. - Move the trampoline address to trampoline template at offset 44. - Move r19 to trampoline template at offset 48. The latter two + Move the function address to the trampoline template at offset 48. + Move the static chain value to trampoline template at offset 52. + Move the trampoline address to trampoline template at offset 56. + Move r19 to trampoline template at offset 60. The latter two words create a plabel for the indirect call to the trampoline. A similar sequence is used for the 64-bit port but the plabel is @@ -10146,15 +10141,15 @@ pa_trampoline_init (rtx m_tramp, tree fndecl, rtx chain_value) if (!TARGET_64BIT) { - tmp = adjust_address (m_tramp, Pmode, 36); + tmp = adjust_address (m_tramp, Pmode, 48); emit_move_insn (tmp, fnaddr); - tmp = adjust_address (m_tramp, Pmode, 40); + tmp = adjust_address (m_tramp, Pmode, 52); emit_move_insn (tmp, chain_value); /* Create a fat pointer for the trampoline. */ - tmp = adjust_address (m_tramp, Pmode, 44); + tmp = adjust_address (m_tramp, Pmode, 56); emit_move_insn (tmp, r_tramp); - tmp = adjust_address (m_tramp, Pmode, 48); + tmp = adjust_address (m_tramp, Pmode, 60); emit_move_insn (tmp, gen_rtx_REG (Pmode, 19)); /* fdc and fic only use registers for the address to flush, @@ -10206,20 +10201,20 @@ pa_trampoline_init (rtx m_tramp, tree fndecl, rtx chain_value) } #ifdef HAVE_ENABLE_EXECUTE_STACK - Â emit_library_call (gen_rtx_SYMBOL_REF (Pmode, "__enable_execute_stack"), + emit_library_call (gen_rtx_SYMBOL_REF (Pmode, "__enable_execute_stack"), LCT_NORMAL, VOIDmode, XEXP (m_tramp, 0), Pmode); #endif } /* Perform any machine-specific adjustment in the address of the trampoline. ADDR contains the address that was passed to pa_trampoline_init. - Adjust the trampoline address to point to the plabel at offset 44. */ + Adjust the trampoline address to point to the plabel at offset 56. */ static rtx pa_trampoline_adjust_address (rtx addr) { if (!TARGET_64BIT) - addr = memory_address (Pmode, plus_constant (Pmode, addr, 46)); + addr = memory_address (Pmode, plus_constant (Pmode, addr, 58)); return addr; } diff --git a/gcc/config/pa/pa.h b/gcc/config/pa/pa.h index 561efa51882..24739e56f97 100644 --- a/gcc/config/pa/pa.h +++ b/gcc/config/pa/pa.h @@ -689,7 +689,7 @@ extern int may_call_alloca; /* Length in units of the trampoline for entering a nested function. */ -#define TRAMPOLINE_SIZE (TARGET_64BIT ? 72 : 52) +#define TRAMPOLINE_SIZE (TARGET_64BIT ? 72 : 64) /* Alignment required by the trampoline. */ @@ -1293,13 +1293,12 @@ do { \ #endif /* The maximum offset in bytes for a PA 1.X pc-relative call to the - head of the preceding stub table. The selected offsets have been - chosen so that approximately one call stub is allocated for every - 86.7 instructions. A long branch stub is two instructions when - not generating PIC code. For HP-UX and ELF targets, PIC stubs are - seven and four instructions, respectively. */ -#define MAX_PCREL17F_OFFSET \ - (flag_pic ? (TARGET_HPUX ? 198164 : 221312) : 240000) + head of the preceding stub table. A long branch stub is two or three + instructions for non-PIC and PIC, respectively. Import stubs are + seven and five instructions for HP-UX and ELF targets, respectively. + The default stub group size for ELF targets is 217856 bytes. + FIXME: We need an option to set the maximum offset. */ +#define MAX_PCREL17F_OFFSET (TARGET_HPUX ? 198164 : 217856) #define NEED_INDICATE_EXEC_STACK 0 diff --git a/gcc/config/pa/pa.md b/gcc/config/pa/pa.md index 84630ad536d..e273cddc01d 100644 --- a/gcc/config/pa/pa.md +++ b/gcc/config/pa/pa.md @@ -10091,23 +10091,55 @@ add,l %2,%3,%3\;bv,n %%r0(%3)" (set_attr "length" "4,16")]) ;; PA 2.0 hardware supports out-of-order execution of loads and stores, so -;; we need a memory barrier to enforce program order for memory references. -;; Since we want PA 1.x code to be PA 2.0 compatible, we also need the -;; barrier when generating PA 1.x code. +;; we need memory barriers to enforce program order for memory references +;; when the TLB and PSW O bits are not set. We assume all PA 2.0 systems +;; are weakly ordered since neither HP-UX or Linux set the PSW O bit. Since +;; we want PA 1.x code to be PA 2.0 compatible, we also need barriers when +;; generating PA 1.x code even though all PA 1.x systems are strongly ordered. + +;; When barriers are needed, we use a strongly ordered ldcw instruction as +;; the barrier. Most PA 2.0 targets are cache coherent. In that case, we +;; can use the coherent cache control hint and avoid aligning the ldcw +;; address. In spite of its description, it is not clear that the sync +;; instruction works as a barrier. (define_expand "memory_barrier" - [(set (match_dup 0) - (unspec:BLK [(match_dup 0)] UNSPEC_MEMORY_BARRIER))] + [(parallel + [(set (match_dup 0) (unspec:BLK [(match_dup 0)] UNSPEC_MEMORY_BARRIER)) + (clobber (match_dup 1))])] "" { - operands[0] = gen_rtx_MEM (BLKmode, gen_rtx_SCRATCH (Pmode)); + /* We don't need a barrier if the target uses ordered memory references. */ + if (TARGET_ORDERED) + FAIL; + operands[1] = gen_reg_rtx (Pmode); + operands[0] = gen_rtx_MEM (BLKmode, operands[1]); MEM_VOLATILE_P (operands[0]) = 1; }) -(define_insn "*memory_barrier" +(define_insn "*memory_barrier_coherent" [(set (match_operand:BLK 0 "" "") - (unspec:BLK [(match_dup 0)] UNSPEC_MEMORY_BARRIER))] - "" - "sync" + (unspec:BLK [(match_dup 0)] UNSPEC_MEMORY_BARRIER)) + (clobber (match_operand 1 "pmode_register_operand" "=r"))] + "TARGET_PA_20 && TARGET_COHERENT_LDCW" + "ldcw,co 0(%%sp),%1" [(set_attr "type" "binary") (set_attr "length" "4")]) + +(define_insn "*memory_barrier_64" + [(set (match_operand:BLK 0 "" "") + (unspec:BLK [(match_dup 0)] UNSPEC_MEMORY_BARRIER)) + (clobber (match_operand 1 "pmode_register_operand" "=&r"))] + "TARGET_64BIT" + "ldo 15(%%sp),%1\n\tdepd %%r0,63,3,%1\n\tldcw 0(%1),%1" + [(set_attr "type" "binary") + (set_attr "length" "12")]) + +(define_insn "*memory_barrier_32" + [(set (match_operand:BLK 0 "" "") + (unspec:BLK [(match_dup 0)] UNSPEC_MEMORY_BARRIER)) + (clobber (match_operand 1 "pmode_register_operand" "=&r"))] + "" + "ldo 15(%%sp),%1\n\t{dep|depw} %%r0,31,3,%1\n\tldcw 0(%1),%1" + [(set_attr "type" "binary") + (set_attr "length" "12")]) diff --git a/gcc/config/pa/pa.opt b/gcc/config/pa/pa.opt index b32b3d8abc0..4cb81109933 100644 --- a/gcc/config/pa/pa.opt +++ b/gcc/config/pa/pa.opt @@ -45,6 +45,10 @@ mcaller-copies Target Report Mask(CALLER_COPIES) Caller copies function arguments passed by hidden reference. +mcoherent-ldcw +Target Report Var(TARGET_COHERENT_LDCW) Init(1) +Use ldcw/ldcd coherent cache-control hint. + mdisable-fpregs Target Report Mask(DISABLE_FPREGS) Disable FP regs. @@ -90,6 +94,10 @@ mno-space-regs Target RejectNegative Report Mask(NO_SPACE_REGS) Disable space regs. +mordered +Target Report Var(TARGET_ORDERED) Init(0) +Assume memory references are ordered and barriers are not needed. + mpa-risc-1-0 Target RejectNegative Generate PA1.0 code. diff --git a/gcc/config/riscv/riscv-protos.h b/gcc/config/riscv/riscv-protos.h index 8b510f87df8..5b0bbdd7cb4 100644 --- a/gcc/config/riscv/riscv-protos.h +++ b/gcc/config/riscv/riscv-protos.h @@ -44,10 +44,10 @@ extern int riscv_const_insns (rtx); extern int riscv_split_const_insns (rtx); extern int riscv_load_store_insns (rtx, rtx_insn *); extern rtx riscv_emit_move (rtx, rtx); -extern bool riscv_split_symbol (rtx, rtx, machine_mode, rtx *); +extern bool riscv_split_symbol (rtx, rtx, machine_mode, rtx *, bool); extern bool riscv_split_symbol_type (enum riscv_symbol_type); extern rtx riscv_unspec_address (rtx, enum riscv_symbol_type); -extern void riscv_move_integer (rtx, rtx, HOST_WIDE_INT); +extern void riscv_move_integer (rtx, rtx, HOST_WIDE_INT, bool); extern bool riscv_legitimize_move (machine_mode, rtx, rtx); extern rtx riscv_subword (rtx, bool); extern bool riscv_split_64bit_move_p (rtx, rtx); diff --git a/gcc/config/riscv/riscv.c b/gcc/config/riscv/riscv.c index 35219956c80..5cb295d3abb 100644 --- a/gcc/config/riscv/riscv.c +++ b/gcc/config/riscv/riscv.c @@ -508,8 +508,8 @@ riscv_split_integer (HOST_WIDE_INT val, machine_mode mode) unsigned HOST_WIDE_INT hival = sext_hwi ((val - loval) >> 32, 32); rtx hi = gen_reg_rtx (mode), lo = gen_reg_rtx (mode); - riscv_move_integer (hi, hi, hival); - riscv_move_integer (lo, lo, loval); + riscv_move_integer (hi, hi, hival, FALSE); + riscv_move_integer (lo, lo, loval, FALSE); hi = gen_rtx_fmt_ee (ASHIFT, mode, hi, GEN_INT (32)); hi = force_reg (mode, hi); @@ -1021,9 +1021,12 @@ riscv_force_binary (machine_mode mode, enum rtx_code code, rtx x, rtx y) are allowed, copy it into a new register, otherwise use DEST. */ static rtx -riscv_force_temporary (rtx dest, rtx value) +riscv_force_temporary (rtx dest, rtx value, bool in_splitter) { - if (can_create_pseudo_p ()) + /* We can't call gen_reg_rtx from a splitter, because this might realloc + the regno_reg_rtx array, which would invalidate reg rtx pointers in the + combine undo buffer. */ + if (can_create_pseudo_p () && !in_splitter) return force_reg (Pmode, value); else { @@ -1082,7 +1085,7 @@ static rtx riscv_unspec_offset_high (rtx temp, rtx addr, enum riscv_symbol_type symbol_type) { addr = gen_rtx_HIGH (Pmode, riscv_unspec_address (addr, symbol_type)); - return riscv_force_temporary (temp, addr); + return riscv_force_temporary (temp, addr, FALSE); } /* Load an entry from the GOT for a TLS GD access. */ @@ -1130,7 +1133,8 @@ static rtx riscv_tls_add_tp_le (rtx dest, rtx base, rtx sym) is guaranteed to be a legitimate address for mode MODE. */ bool -riscv_split_symbol (rtx temp, rtx addr, machine_mode mode, rtx *low_out) +riscv_split_symbol (rtx temp, rtx addr, machine_mode mode, rtx *low_out, + bool in_splitter) { enum riscv_symbol_type symbol_type; @@ -1146,7 +1150,7 @@ riscv_split_symbol (rtx temp, rtx addr, machine_mode mode, rtx *low_out) case SYMBOL_ABSOLUTE: { rtx high = gen_rtx_HIGH (Pmode, copy_rtx (addr)); - high = riscv_force_temporary (temp, high); + high = riscv_force_temporary (temp, high, in_splitter); *low_out = gen_rtx_LO_SUM (Pmode, high, addr); } break; @@ -1205,8 +1209,9 @@ riscv_add_offset (rtx temp, rtx reg, HOST_WIDE_INT offset) overflow, so we need to force a sign-extension check. */ high = gen_int_mode (CONST_HIGH_PART (offset), Pmode); offset = CONST_LOW_PART (offset); - high = riscv_force_temporary (temp, high); - reg = riscv_force_temporary (temp, gen_rtx_PLUS (Pmode, high, reg)); + high = riscv_force_temporary (temp, high, FALSE); + reg = riscv_force_temporary (temp, gen_rtx_PLUS (Pmode, high, reg), + FALSE); } return plus_constant (Pmode, reg, offset); } @@ -1315,7 +1320,7 @@ riscv_legitimize_address (rtx x, rtx oldx ATTRIBUTE_UNUSED, return riscv_legitimize_tls_address (x); /* See if the address can split into a high part and a LO_SUM. */ - if (riscv_split_symbol (NULL, x, mode, &addr)) + if (riscv_split_symbol (NULL, x, mode, &addr, FALSE)) return riscv_force_address (addr, mode); /* Handle BASE + OFFSET using riscv_add_offset. */ @@ -1337,17 +1342,23 @@ riscv_legitimize_address (rtx x, rtx oldx ATTRIBUTE_UNUSED, /* Load VALUE into DEST. TEMP is as for riscv_force_temporary. */ void -riscv_move_integer (rtx temp, rtx dest, HOST_WIDE_INT value) +riscv_move_integer (rtx temp, rtx dest, HOST_WIDE_INT value, + bool in_splitter) { struct riscv_integer_op codes[RISCV_MAX_INTEGER_OPS]; machine_mode mode; int i, num_ops; rtx x; + /* We can't call gen_reg_rtx from a splitter, because this might realloc + the regno_reg_rtx array, which would invalidate reg rtx pointers in the + combine undo buffer. */ + bool can_create_pseudo = can_create_pseudo_p () && ! in_splitter; + mode = GET_MODE (dest); num_ops = riscv_build_integer (codes, value, mode); - if (can_create_pseudo_p () && num_ops > 2 /* not a simple constant */ + if (can_create_pseudo && num_ops > 2 /* not a simple constant */ && num_ops >= riscv_split_integer_cost (value)) x = riscv_split_integer (value, mode); else @@ -1357,7 +1368,7 @@ riscv_move_integer (rtx temp, rtx dest, HOST_WIDE_INT value) for (i = 1; i < num_ops; i++) { - if (!can_create_pseudo_p ()) + if (!can_create_pseudo) x = riscv_emit_set (temp, x); else x = force_reg (mode, x); @@ -1381,12 +1392,12 @@ riscv_legitimize_const_move (machine_mode mode, rtx dest, rtx src) /* Split moves of big integers into smaller pieces. */ if (splittable_const_int_operand (src, mode)) { - riscv_move_integer (dest, dest, INTVAL (src)); + riscv_move_integer (dest, dest, INTVAL (src), FALSE); return; } /* Split moves of symbolic constants into high/low pairs. */ - if (riscv_split_symbol (dest, src, MAX_MACHINE_MODE, &src)) + if (riscv_split_symbol (dest, src, MAX_MACHINE_MODE, &src, FALSE)) { riscv_emit_set (dest, src); return; @@ -1407,7 +1418,7 @@ riscv_legitimize_const_move (machine_mode mode, rtx dest, rtx src) if (offset != const0_rtx && (targetm.cannot_force_const_mem (mode, src) || can_create_pseudo_p ())) { - base = riscv_force_temporary (dest, base); + base = riscv_force_temporary (dest, base, FALSE); riscv_emit_move (dest, riscv_add_offset (NULL, base, INTVAL (offset))); return; } @@ -1416,7 +1427,7 @@ riscv_legitimize_const_move (machine_mode mode, rtx dest, rtx src) /* When using explicit relocs, constant pool references are sometimes not legitimate addresses. */ - riscv_split_symbol (dest, XEXP (src, 0), mode, &XEXP (src, 0)); + riscv_split_symbol (dest, XEXP (src, 0), mode, &XEXP (src, 0), FALSE); riscv_emit_move (dest, src); } diff --git a/gcc/config/riscv/riscv.md b/gcc/config/riscv/riscv.md index a8bac170e72..e40535c9e40 100644 --- a/gcc/config/riscv/riscv.md +++ b/gcc/config/riscv/riscv.md @@ -1051,7 +1051,9 @@ "@ # lwu\t%0,%1" - "&& reload_completed && REG_P (operands[1])" + "&& reload_completed + && REG_P (operands[1]) + && !paradoxical_subreg_p (operands[0])" [(set (match_dup 0) (ashift:DI (match_dup 1) (const_int 32))) (set (match_dup 0) @@ -1068,7 +1070,9 @@ "@ # lhu\t%0,%1" - "&& reload_completed && REG_P (operands[1])" + "&& reload_completed + && REG_P (operands[1]) + && !paradoxical_subreg_p (operands[0])" [(set (match_dup 0) (ashift:GPR (match_dup 1) (match_dup 2))) (set (match_dup 0) @@ -1117,7 +1121,9 @@ "@ # l<SHORT:size>\t%0,%1" - "&& reload_completed && REG_P (operands[1])" + "&& reload_completed + && REG_P (operands[1]) + && !paradoxical_subreg_p (operands[0])" [(set (match_dup 0) (ashift:SI (match_dup 1) (match_dup 2))) (set (match_dup 0) (ashiftrt:SI (match_dup 0) (match_dup 2)))] { @@ -1278,7 +1284,7 @@ "" [(const_int 0)] { - riscv_move_integer (operands[2], operands[0], INTVAL (operands[1])); + riscv_move_integer (operands[2], operands[0], INTVAL (operands[1]), TRUE); DONE; }) @@ -1287,11 +1293,11 @@ [(set (match_operand:P 0 "register_operand") (match_operand:P 1)) (clobber (match_operand:P 2 "register_operand"))] - "riscv_split_symbol (operands[2], operands[1], MAX_MACHINE_MODE, NULL)" + "riscv_split_symbol (operands[2], operands[1], MAX_MACHINE_MODE, NULL, TRUE)" [(set (match_dup 0) (match_dup 3))] { riscv_split_symbol (operands[2], operands[1], - MAX_MACHINE_MODE, &operands[3]); + MAX_MACHINE_MODE, &operands[3], TRUE); }) ;; 64-bit integer moves @@ -1765,15 +1771,20 @@ ;; Handle AND with 2^N-1 for N from 12 to XLEN. This can be split into ;; two logical shifts. Otherwise it requires 3 instructions: lui, ;; xor/addi/srli, and. + +;; Generating a temporary for the shift output gives better combiner results; +;; and also fixes a problem where op0 could be a paradoxical reg and shifting +;; by amounts larger than the size of the SUBREG_REG doesn't work. (define_split [(set (match_operand:GPR 0 "register_operand") (and:GPR (match_operand:GPR 1 "register_operand") - (match_operand:GPR 2 "p2m1_shift_operand")))] + (match_operand:GPR 2 "p2m1_shift_operand"))) + (clobber (match_operand:GPR 3 "register_operand"))] "" - [(set (match_dup 0) + [(set (match_dup 3) (ashift:GPR (match_dup 1) (match_dup 2))) (set (match_dup 0) - (lshiftrt:GPR (match_dup 0) (match_dup 2)))] + (lshiftrt:GPR (match_dup 3) (match_dup 2)))] { /* Op2 is a VOIDmode constant, so get the mode size from op1. */ operands[2] = GEN_INT (GET_MODE_BITSIZE (GET_MODE (operands[1])) @@ -1785,12 +1796,13 @@ (define_split [(set (match_operand:DI 0 "register_operand") (and:DI (match_operand:DI 1 "register_operand") - (match_operand:DI 2 "high_mask_shift_operand")))] + (match_operand:DI 2 "high_mask_shift_operand"))) + (clobber (match_operand:DI 3 "register_operand"))] "TARGET_64BIT" - [(set (match_dup 0) + [(set (match_dup 3) (lshiftrt:DI (match_dup 1) (match_dup 2))) (set (match_dup 0) - (ashift:DI (match_dup 0) (match_dup 2)))] + (ashift:DI (match_dup 3) (match_dup 2)))] { operands[2] = GEN_INT (ctz_hwi (INTVAL (operands[2]))); }) diff --git a/gcc/config/rs6000/altivec.md b/gcc/config/rs6000/altivec.md index 4a1150e0994..654d756647c 100644 --- a/gcc/config/rs6000/altivec.md +++ b/gcc/config/rs6000/altivec.md @@ -80,9 +80,6 @@ UNSPEC_VUPKHPX UNSPEC_VUPKLPX UNSPEC_CONVERT_4F32_8I16 - UNSPEC_DARN - UNSPEC_DARN_32 - UNSPEC_DARN_RAW UNSPEC_DST UNSPEC_DSTT UNSPEC_DSTST @@ -161,9 +158,6 @@ UNSPEC_BCDADD UNSPEC_BCDSUB UNSPEC_BCD_OVERFLOW - UNSPEC_CMPRB - UNSPEC_CMPRB2 - UNSPEC_CMPEQB UNSPEC_VRLMI UNSPEC_VRLNM ]) @@ -566,7 +560,7 @@ [(set_attr "type" "vecsimple")]) ;; -(define_insn "altivec_vavgu<VI_char>" +(define_insn "uavg<mode>3_ceil" [(set (match_operand:VI 0 "register_operand" "=v") (unspec:VI [(match_operand:VI 1 "register_operand" "v") (match_operand:VI 2 "register_operand" "v")] @@ -575,7 +569,7 @@ "vavgu<VI_char> %0,%1,%2" [(set_attr "type" "vecsimple")]) -(define_insn "altivec_vavgs<VI_char>" +(define_insn "avg<mode>3_ceil" [(set (match_operand:VI 0 "register_operand" "=v") (unspec:VI [(match_operand:VI 1 "register_operand" "v") (match_operand:VI 2 "register_operand" "v")] @@ -4101,223 +4095,6 @@ "bcd<bcd_add_sub>. %0,%1,%2,%3" [(set_attr "type" "vecsimple")]) -(define_insn "darn_32" - [(set (match_operand:SI 0 "register_operand" "=r") - (unspec:SI [(const_int 0)] UNSPEC_DARN_32))] - "TARGET_P9_MISC" - "darn %0,0" - [(set_attr "type" "integer")]) - -(define_insn "darn_raw" - [(set (match_operand:DI 0 "register_operand" "=r") - (unspec:DI [(const_int 0)] UNSPEC_DARN_RAW))] - "TARGET_P9_MISC && TARGET_64BIT" - "darn %0,2" - [(set_attr "type" "integer")]) - -(define_insn "darn" - [(set (match_operand:DI 0 "register_operand" "=r") - (unspec:DI [(const_int 0)] UNSPEC_DARN))] - "TARGET_P9_MISC && TARGET_64BIT" - "darn %0,1" - [(set_attr "type" "integer")]) - -;; Test byte within range. -;; -;; The bytes of operand 1 are organized as xx:xx:xx:vv, where xx -;; represents a byte whose value is ignored in this context and -;; vv, the least significant byte, holds the byte value that is to -;; be tested for membership within the range specified by operand 2. -;; The bytes of operand 2 are organized as xx:xx:hi:lo. -;; -;; Return in target register operand 0 a value of 1 if lo <= vv and -;; vv <= hi. Otherwise, set register operand 0 to 0. -;; -;; Though the instructions to which this expansion maps operate on -;; 64-bit registers, the current implementation only operates on -;; SI-mode operands as the high-order bits provide no information -;; that is not already available in the low-order bits. To avoid the -;; costs of data widening operations, future enhancements might allow -;; DI mode for operand 0 and/or might allow operand 1 to be QI mode. -(define_expand "cmprb" - [(set (match_dup 3) - (unspec:CC [(match_operand:SI 1 "gpc_reg_operand" "r") - (match_operand:SI 2 "gpc_reg_operand" "r")] - UNSPEC_CMPRB)) - (set (match_operand:SI 0 "gpc_reg_operand" "=r") - (if_then_else:SI (lt (match_dup 3) - (const_int 0)) - (const_int -1) - (if_then_else (gt (match_dup 3) - (const_int 0)) - (const_int 1) - (const_int 0))))] - "TARGET_P9_MISC" -{ - operands[3] = gen_reg_rtx (CCmode); -}) - -;; The bytes of operand 1 are organized as xx:xx:xx:vv, where xx -;; represents a byte whose value is ignored in this context and -;; vv, the least significant byte, holds the byte value that is to -;; be tested for membership within the range specified by operand 2. -;; The bytes of operand 2 are organized as xx:xx:hi:lo. -;; -;; Set bit 1 (the GT bit, 0x4) of CR register operand 0 to 1 if -;; lo <= vv and vv <= hi. Otherwise, set the GT bit to 0. The other -;; 3 bits of the target CR register are all set to 0. -(define_insn "*cmprb_internal" - [(set (match_operand:CC 0 "cc_reg_operand" "=y") - (unspec:CC [(match_operand:SI 1 "gpc_reg_operand" "r") - (match_operand:SI 2 "gpc_reg_operand" "r")] - UNSPEC_CMPRB))] - "TARGET_P9_MISC" - "cmprb %0,0,%1,%2" - [(set_attr "type" "logical")]) - -;; Set operand 0 register to -1 if the LT bit (0x8) of condition -;; register operand 1 is on. Otherwise, set operand 0 register to 1 -;; if the GT bit (0x4) of condition register operand 1 is on. -;; Otherwise, set operand 0 to 0. Note that the result stored into -;; register operand 0 is non-zero iff either the LT or GT bits are on -;; within condition register operand 1. -(define_insn "setb_signed" - [(set (match_operand:SI 0 "gpc_reg_operand" "=r") - (if_then_else:SI (lt (match_operand:CC 1 "cc_reg_operand" "y") - (const_int 0)) - (const_int -1) - (if_then_else (gt (match_dup 1) - (const_int 0)) - (const_int 1) - (const_int 0))))] - "TARGET_P9_MISC" - "setb %0,%1" - [(set_attr "type" "logical")]) - -(define_insn "setb_unsigned" - [(set (match_operand:SI 0 "gpc_reg_operand" "=r") - (if_then_else:SI (ltu (match_operand:CCUNS 1 "cc_reg_operand" "y") - (const_int 0)) - (const_int -1) - (if_then_else (gtu (match_dup 1) - (const_int 0)) - (const_int 1) - (const_int 0))))] - "TARGET_P9_MISC" - "setb %0,%1" - [(set_attr "type" "logical")]) - -;; Test byte within two ranges. -;; -;; The bytes of operand 1 are organized as xx:xx:xx:vv, where xx -;; represents a byte whose value is ignored in this context and -;; vv, the least significant byte, holds the byte value that is to -;; be tested for membership within the range specified by operand 2. -;; The bytes of operand 2 are organized as hi_1:lo_1:hi_2:lo_2. -;; -;; Return in target register operand 0 a value of 1 if (lo_1 <= vv and -;; vv <= hi_1) or if (lo_2 <= vv and vv <= hi_2). Otherwise, set register -;; operand 0 to 0. -;; -;; Though the instructions to which this expansion maps operate on -;; 64-bit registers, the current implementation only operates on -;; SI-mode operands as the high-order bits provide no information -;; that is not already available in the low-order bits. To avoid the -;; costs of data widening operations, future enhancements might allow -;; DI mode for operand 0 and/or might allow operand 1 to be QI mode. -(define_expand "cmprb2" - [(set (match_dup 3) - (unspec:CC [(match_operand:SI 1 "gpc_reg_operand" "r") - (match_operand:SI 2 "gpc_reg_operand" "r")] - UNSPEC_CMPRB2)) - (set (match_operand:SI 0 "gpc_reg_operand" "=r") - (if_then_else:SI (lt (match_dup 3) - (const_int 0)) - (const_int -1) - (if_then_else (gt (match_dup 3) - (const_int 0)) - (const_int 1) - (const_int 0))))] - "TARGET_P9_MISC" -{ - operands[3] = gen_reg_rtx (CCmode); -}) - -;; The bytes of operand 1 are organized as xx:xx:xx:vv, where xx -;; represents a byte whose value is ignored in this context and -;; vv, the least significant byte, holds the byte value that is to -;; be tested for membership within the ranges specified by operand 2. -;; The bytes of operand 2 are organized as hi_1:lo_1:hi_2:lo_2. -;; -;; Set bit 1 (the GT bit, 0x4) of CR register operand 0 to 1 if -;; (lo_1 <= vv and vv <= hi_1) or if (lo_2 <= vv and vv <= hi_2). -;; Otherwise, set the GT bit to 0. The other 3 bits of the target -;; CR register are all set to 0. -(define_insn "*cmprb2_internal" - [(set (match_operand:CC 0 "cc_reg_operand" "=y") - (unspec:CC [(match_operand:SI 1 "gpc_reg_operand" "r") - (match_operand:SI 2 "gpc_reg_operand" "r")] - UNSPEC_CMPRB2))] - "TARGET_P9_MISC" - "cmprb %0,1,%1,%2" - [(set_attr "type" "logical")]) - -;; Test byte membership within set of 8 bytes. -;; -;; The bytes of operand 1 are organized as xx:xx:xx:vv, where xx -;; represents a byte whose value is ignored in this context and -;; vv, the least significant byte, holds the byte value that is to -;; be tested for membership within the set specified by operand 2. -;; The bytes of operand 2 are organized as e0:e1:e2:e3:e4:e5:e6:e7. -;; -;; Return in target register operand 0 a value of 1 if vv equals one -;; of the values e0, e1, e2, e3, e4, e5, e6, or e7. Otherwise, set -;; register operand 0 to 0. Note that the 8 byte values held within -;; operand 2 need not be unique. -;; -;; Though the instructions to which this expansion maps operate on -;; 64-bit registers, the current implementation requires that operands -;; 0 and 1 have mode SI as the high-order bits provide no information -;; that is not already available in the low-order bits. To avoid the -;; costs of data widening operations, future enhancements might allow -;; DI mode for operand 0 and/or might allow operand 1 to be QI mode. -(define_expand "cmpeqb" - [(set (match_dup 3) - (unspec:CC [(match_operand:SI 1 "gpc_reg_operand" "r") - (match_operand:DI 2 "gpc_reg_operand" "r")] - UNSPEC_CMPEQB)) - (set (match_operand:SI 0 "gpc_reg_operand" "=r") - (if_then_else:SI (lt (match_dup 3) - (const_int 0)) - (const_int -1) - (if_then_else (gt (match_dup 3) - (const_int 0)) - (const_int 1) - (const_int 0))))] - "TARGET_P9_MISC && TARGET_64BIT" -{ - operands[3] = gen_reg_rtx (CCmode); -}) - -;; The bytes of operand 1 are organized as xx:xx:xx:vv, where xx -;; represents a byte whose value is ignored in this context and -;; vv, the least significant byte, holds the byte value that is to -;; be tested for membership within the set specified by operand 2. -;; The bytes of operand 2 are organized as e0:e1:e2:e3:e4:e5:e6:e7. -;; -;; Set bit 1 (the GT bit, 0x4) of CR register operand 0 to 1 if vv -;; equals one of the values e0, e1, e2, e3, e4, e5, e6, or e7. Otherwise, -;; set the GT bit to zero. The other 3 bits of the target CR register -;; are all set to 0. -(define_insn "*cmpeqb_internal" - [(set (match_operand:CC 0 "cc_reg_operand" "=y") - (unspec:CC [(match_operand:SI 1 "gpc_reg_operand" "r") - (match_operand:DI 2 "gpc_reg_operand" "r")] - UNSPEC_CMPEQB))] - "TARGET_P9_MISC && TARGET_64BIT" - "cmpeqb %0,%1,%2" - [(set_attr "type" "logical")]) - (define_expand "bcd<bcd_add_sub>_<code>" [(parallel [(set (reg:CCFP CR6_REGNO) (compare:CCFP diff --git a/gcc/config/rs6000/darwin.h b/gcc/config/rs6000/darwin.h index 98ecf0ad2fe..e36de59ca25 100644 --- a/gcc/config/rs6000/darwin.h +++ b/gcc/config/rs6000/darwin.h @@ -75,9 +75,6 @@ } \ while (0) -/* Generate branch islands stubs if this is true. */ -extern int darwin_emit_branch_islands; - #define SUBTARGET_OVERRIDE_OPTIONS darwin_rs6000_override_options () #define C_COMMON_OVERRIDE_OPTIONS do { \ @@ -139,9 +136,30 @@ extern int darwin_emit_branch_islands; %:version-compare(>< 10.5 10.7 mmacosx-version-min= -lcrt1.10.5.o) \ %{fgnu-tm: -lcrttms.o}" -/* crt2.o is at least partially required for 10.3.x and earlier. */ +/* crt2.o is at least partially required for 10.3.x and earlier. + It deals with registration of the unwind frames, where this is not + automatically provided by the system. So we need it for any case that + might use exceptions. */ +#undef DARWIN_CRT2_SPEC #define DARWIN_CRT2_SPEC \ - "%{!m64:%:version-compare(!> 10.4 mmacosx-version-min= crt2.o%s)}" +"%{!m64:%{shared-libgcc|static-libstdc++|fexceptions|fobjc-exceptions|fgnu-runtime: \ + %:version-compare(!> 10.4 mmacosx-version-min= crt2.o%s) \ + }}" + +/* crt3 deals with providing cxa_atexit on earlier systems (or fixing it up, + for broken versions). It's only needed for c++ code, so we can make it + conditional on shared-libgcc since that's forced on for c++. */ +#undef DARWIN_CRT3_SPEC +#define DARWIN_CRT3_SPEC \ +"%{!m64:%{shared-libgcc|static-libstdc++: \ + %:version-compare(>< 10.4 10.5 mmacosx-version-min= crt3.o%s) \ + %:version-compare(!> 10.4 mmacosx-version-min= crt3_2.o%s) \ + }}" + +/* The PPC regs save/restore functions are leaves and could, conceivably + be used by the tm destructor. */ +#undef ENDFILE_SPEC +#define ENDFILE_SPEC TM_DESTRUCTOR " -lef_ppc" #undef SUBTARGET_EXTRA_SPECS #define SUBTARGET_EXTRA_SPECS \ @@ -291,9 +309,9 @@ extern int darwin_emit_branch_islands; /* This is supported in cctools 465 and later. The macro test above prevents using it in earlier build environments. */ #define ASM_OUTPUT_MAX_SKIP_ALIGN(FILE,LOG,MAX_SKIP) \ - if ((LOG) != 0) \ + if ((LOG) > 0) \ { \ - if ((MAX_SKIP) == 0) \ + if ((MAX_SKIP) <= 0) \ fprintf ((FILE), "\t.p2align %d\n", (LOG)); \ else \ fprintf ((FILE), "\t.p2align %d,,%d\n", (LOG), (MAX_SKIP)); \ @@ -468,6 +486,9 @@ do { \ this will need to be modified similar to the x86 case. */ #define TARGET_FOLD_BUILTIN SUBTARGET_FOLD_BUILTIN +/* First available SYMBOL flag bit for use by subtargets. */ +#define SYMBOL_FLAG_SUBT_DEP (SYMBOL_FLAG_MACH_DEP) + /* Use standard DWARF numbering for DWARF debugging information. */ #define RS6000_USE_DWARF_NUMBERING diff --git a/gcc/config/rs6000/darwin.md b/gcc/config/rs6000/darwin.md index 471058dd417..a1a46a9e7ea 100644 --- a/gcc/config/rs6000/darwin.md +++ b/gcc/config/rs6000/darwin.md @@ -122,33 +122,6 @@ You should have received a copy of the GNU General Public License [(set_attr "type" "store")]) ;; 64-bit MachO load/store support -(define_insn "movdi_low" - [(set (match_operand:DI 0 "gpc_reg_operand" "=r,*!d") - (mem:DI (lo_sum:DI (match_operand:DI 1 "gpc_reg_operand" "b,b") - (match_operand 2 "" ""))))] - "TARGET_MACHO && TARGET_64BIT" - "@ - ld %0,lo16(%2)(%1) - lfd %0,lo16(%2)(%1)" - [(set_attr "type" "load")]) - -(define_insn "movsi_low_st" - [(set (mem:SI (lo_sum:SI (match_operand:SI 1 "gpc_reg_operand" "b") - (match_operand 2 "" ""))) - (match_operand:SI 0 "gpc_reg_operand" "r"))] - "TARGET_MACHO && ! TARGET_64BIT" - "stw %0,lo16(%2)(%1)" - [(set_attr "type" "store")]) - -(define_insn "movdi_low_st" - [(set (mem:DI (lo_sum:DI (match_operand:DI 1 "gpc_reg_operand" "b,b") - (match_operand 2 "" ""))) - (match_operand:DI 0 "gpc_reg_operand" "r,*!d"))] - "TARGET_MACHO && TARGET_64BIT" - "@ - std %0,lo16(%2)(%1) - stfd %0,lo16(%2)(%1)" - [(set_attr "type" "store")]) ;; Mach-O PIC trickery. (define_expand "macho_high" diff --git a/gcc/config/rs6000/rs6000-builtin.def b/gcc/config/rs6000/rs6000-builtin.def index 0a2bdb79e15..0feee7cafca 100644 --- a/gcc/config/rs6000/rs6000-builtin.def +++ b/gcc/config/rs6000/rs6000-builtin.def @@ -1002,12 +1002,12 @@ BU_ALTIVEC_2 (VADDUWS, "vadduws", CONST, altivec_vadduws) BU_ALTIVEC_2 (VADDSWS, "vaddsws", CONST, altivec_vaddsws) BU_ALTIVEC_2 (VAND, "vand", CONST, andv4si3) BU_ALTIVEC_2 (VANDC, "vandc", CONST, andcv4si3) -BU_ALTIVEC_2 (VAVGUB, "vavgub", CONST, altivec_vavgub) -BU_ALTIVEC_2 (VAVGSB, "vavgsb", CONST, altivec_vavgsb) -BU_ALTIVEC_2 (VAVGUH, "vavguh", CONST, altivec_vavguh) -BU_ALTIVEC_2 (VAVGSH, "vavgsh", CONST, altivec_vavgsh) -BU_ALTIVEC_2 (VAVGUW, "vavguw", CONST, altivec_vavguw) -BU_ALTIVEC_2 (VAVGSW, "vavgsw", CONST, altivec_vavgsw) +BU_ALTIVEC_2 (VAVGUB, "vavgub", CONST, uavgv16qi3_ceil) +BU_ALTIVEC_2 (VAVGSB, "vavgsb", CONST, avgv16qi3_ceil) +BU_ALTIVEC_2 (VAVGUH, "vavguh", CONST, uavgv8hi3_ceil) +BU_ALTIVEC_2 (VAVGSH, "vavgsh", CONST, avgv8hi3_ceil) +BU_ALTIVEC_2 (VAVGUW, "vavguw", CONST, uavgv4si3_ceil) +BU_ALTIVEC_2 (VAVGSW, "vavgsw", CONST, avgv4si3_ceil) BU_ALTIVEC_2 (VCFUX, "vcfux", CONST, altivec_vcfux) BU_ALTIVEC_2 (VCFSX, "vcfsx", CONST, altivec_vcfsx) BU_ALTIVEC_2 (VCMPBFP, "vcmpbfp", CONST, altivec_vcmpbfp) @@ -1177,26 +1177,26 @@ BU_ALTIVEC_X (MTVSCR, "mtvscr", MISC) BU_ALTIVEC_X (MFVSCR, "mfvscr", MISC) BU_ALTIVEC_X (DSSALL, "dssall", MISC) BU_ALTIVEC_X (DSS, "dss", MISC) -BU_ALTIVEC_X (LVSL, "lvsl", MEM) -BU_ALTIVEC_X (LVSR, "lvsr", MEM) -BU_ALTIVEC_X (LVEBX, "lvebx", MEM) -BU_ALTIVEC_X (LVEHX, "lvehx", MEM) -BU_ALTIVEC_X (LVEWX, "lvewx", MEM) -BU_ALTIVEC_X (LVXL, "lvxl", MEM) -BU_ALTIVEC_X (LVXL_V2DF, "lvxl_v2df", MEM) -BU_ALTIVEC_X (LVXL_V2DI, "lvxl_v2di", MEM) -BU_ALTIVEC_X (LVXL_V4SF, "lvxl_v4sf", MEM) -BU_ALTIVEC_X (LVXL_V4SI, "lvxl_v4si", MEM) -BU_ALTIVEC_X (LVXL_V8HI, "lvxl_v8hi", MEM) -BU_ALTIVEC_X (LVXL_V16QI, "lvxl_v16qi", MEM) -BU_ALTIVEC_X (LVX, "lvx", MEM) -BU_ALTIVEC_X (LVX_V1TI, "lvx_v1ti", MEM) -BU_ALTIVEC_X (LVX_V2DF, "lvx_v2df", MEM) -BU_ALTIVEC_X (LVX_V2DI, "lvx_v2di", MEM) -BU_ALTIVEC_X (LVX_V4SF, "lvx_v4sf", MEM) -BU_ALTIVEC_X (LVX_V4SI, "lvx_v4si", MEM) -BU_ALTIVEC_X (LVX_V8HI, "lvx_v8hi", MEM) -BU_ALTIVEC_X (LVX_V16QI, "lvx_v16qi", MEM) +BU_ALTIVEC_X (LVSL, "lvsl", PURE) +BU_ALTIVEC_X (LVSR, "lvsr", PURE) +BU_ALTIVEC_X (LVEBX, "lvebx", PURE) +BU_ALTIVEC_X (LVEHX, "lvehx", PURE) +BU_ALTIVEC_X (LVEWX, "lvewx", PURE) +BU_ALTIVEC_X (LVXL, "lvxl", PURE) +BU_ALTIVEC_X (LVXL_V2DF, "lvxl_v2df", PURE) +BU_ALTIVEC_X (LVXL_V2DI, "lvxl_v2di", PURE) +BU_ALTIVEC_X (LVXL_V4SF, "lvxl_v4sf", PURE) +BU_ALTIVEC_X (LVXL_V4SI, "lvxl_v4si", PURE) +BU_ALTIVEC_X (LVXL_V8HI, "lvxl_v8hi", PURE) +BU_ALTIVEC_X (LVXL_V16QI, "lvxl_v16qi", PURE) +BU_ALTIVEC_X (LVX, "lvx", PURE) +BU_ALTIVEC_X (LVX_V1TI, "lvx_v1ti", PURE) +BU_ALTIVEC_X (LVX_V2DF, "lvx_v2df", PURE) +BU_ALTIVEC_X (LVX_V2DI, "lvx_v2di", PURE) +BU_ALTIVEC_X (LVX_V4SF, "lvx_v4sf", PURE) +BU_ALTIVEC_X (LVX_V4SI, "lvx_v4si", PURE) +BU_ALTIVEC_X (LVX_V8HI, "lvx_v8hi", PURE) +BU_ALTIVEC_X (LVX_V16QI, "lvx_v16qi", PURE) BU_ALTIVEC_X (STVX, "stvx", MEM) BU_ALTIVEC_X (STVX_V2DF, "stvx_v2df", MEM) BU_ALTIVEC_X (STVX_V2DI, "stvx_v2di", MEM) @@ -1204,10 +1204,10 @@ BU_ALTIVEC_X (STVX_V4SF, "stvx_v4sf", MEM) BU_ALTIVEC_X (STVX_V4SI, "stvx_v4si", MEM) BU_ALTIVEC_X (STVX_V8HI, "stvx_v8hi", MEM) BU_ALTIVEC_X (STVX_V16QI, "stvx_v16qi", MEM) -BU_ALTIVEC_C (LVLX, "lvlx", MEM) -BU_ALTIVEC_C (LVLXL, "lvlxl", MEM) -BU_ALTIVEC_C (LVRX, "lvrx", MEM) -BU_ALTIVEC_C (LVRXL, "lvrxl", MEM) +BU_ALTIVEC_C (LVLX, "lvlx", PURE) +BU_ALTIVEC_C (LVLXL, "lvlxl", PURE) +BU_ALTIVEC_C (LVRX, "lvrx", PURE) +BU_ALTIVEC_C (LVRXL, "lvrxl", PURE) BU_ALTIVEC_X (STVEBX, "stvebx", MEM) BU_ALTIVEC_X (STVEHX, "stvehx", MEM) BU_ALTIVEC_X (STVEWX, "stvewx", MEM) @@ -1718,15 +1718,15 @@ BU_VSX_P (XVCMPGEDP_P, "xvcmpgedp_p", CONST, vector_ge_v2df_p) BU_VSX_P (XVCMPGTDP_P, "xvcmpgtdp_p", CONST, vector_gt_v2df_p) /* VSX builtins that are handled as special cases. */ -BU_VSX_X (LXSDX, "lxsdx", MEM) -BU_VSX_X (LXVD2X_V1TI, "lxvd2x_v1ti", MEM) -BU_VSX_X (LXVD2X_V2DF, "lxvd2x_v2df", MEM) -BU_VSX_X (LXVD2X_V2DI, "lxvd2x_v2di", MEM) -BU_VSX_X (LXVDSX, "lxvdsx", MEM) -BU_VSX_X (LXVW4X_V4SF, "lxvw4x_v4sf", MEM) -BU_VSX_X (LXVW4X_V4SI, "lxvw4x_v4si", MEM) -BU_VSX_X (LXVW4X_V8HI, "lxvw4x_v8hi", MEM) -BU_VSX_X (LXVW4X_V16QI, "lxvw4x_v16qi", MEM) +BU_VSX_X (LXSDX, "lxsdx", PURE) +BU_VSX_X (LXVD2X_V1TI, "lxvd2x_v1ti", PURE) +BU_VSX_X (LXVD2X_V2DF, "lxvd2x_v2df", PURE) +BU_VSX_X (LXVD2X_V2DI, "lxvd2x_v2di", PURE) +BU_VSX_X (LXVDSX, "lxvdsx", PURE) +BU_VSX_X (LXVW4X_V4SF, "lxvw4x_v4sf", PURE) +BU_VSX_X (LXVW4X_V4SI, "lxvw4x_v4si", PURE) +BU_VSX_X (LXVW4X_V8HI, "lxvw4x_v8hi", PURE) +BU_VSX_X (LXVW4X_V16QI, "lxvw4x_v16qi", PURE) BU_VSX_X (STXSDX, "stxsdx", MEM) BU_VSX_X (STXVD2X_V1TI, "stxvd2x_v1ti", MEM) BU_VSX_X (STXVD2X_V2DF, "stxvd2x_v2df", MEM) @@ -1735,13 +1735,13 @@ BU_VSX_X (STXVW4X_V4SF, "stxvw4x_v4sf", MEM) BU_VSX_X (STXVW4X_V4SI, "stxvw4x_v4si", MEM) BU_VSX_X (STXVW4X_V8HI, "stxvw4x_v8hi", MEM) BU_VSX_X (STXVW4X_V16QI, "stxvw4x_v16qi", MEM) -BU_VSX_X (LD_ELEMREV_V1TI, "ld_elemrev_v1ti", MEM) -BU_VSX_X (LD_ELEMREV_V2DF, "ld_elemrev_v2df", MEM) -BU_VSX_X (LD_ELEMREV_V2DI, "ld_elemrev_v2di", MEM) -BU_VSX_X (LD_ELEMREV_V4SF, "ld_elemrev_v4sf", MEM) -BU_VSX_X (LD_ELEMREV_V4SI, "ld_elemrev_v4si", MEM) -BU_VSX_X (LD_ELEMREV_V8HI, "ld_elemrev_v8hi", MEM) -BU_VSX_X (LD_ELEMREV_V16QI, "ld_elemrev_v16qi", MEM) +BU_VSX_X (LD_ELEMREV_V1TI, "ld_elemrev_v1ti", PURE) +BU_VSX_X (LD_ELEMREV_V2DF, "ld_elemrev_v2df", PURE) +BU_VSX_X (LD_ELEMREV_V2DI, "ld_elemrev_v2di", PURE) +BU_VSX_X (LD_ELEMREV_V4SF, "ld_elemrev_v4sf", PURE) +BU_VSX_X (LD_ELEMREV_V4SI, "ld_elemrev_v4si", PURE) +BU_VSX_X (LD_ELEMREV_V8HI, "ld_elemrev_v8hi", PURE) +BU_VSX_X (LD_ELEMREV_V16QI, "ld_elemrev_v16qi", PURE) BU_VSX_X (ST_ELEMREV_V1TI, "st_elemrev_v1ti", MEM) BU_VSX_X (ST_ELEMREV_V2DF, "st_elemrev_v2df", MEM) BU_VSX_X (ST_ELEMREV_V2DI, "st_elemrev_v2di", MEM) diff --git a/gcc/config/rs6000/rs6000-p8swap.c b/gcc/config/rs6000/rs6000-p8swap.c index c3b98315503..d30e5dec980 100644 --- a/gcc/config/rs6000/rs6000-p8swap.c +++ b/gcc/config/rs6000/rs6000-p8swap.c @@ -791,6 +791,11 @@ rtx_is_swappable_p (rtx op, unsigned int *special) case UNSPEC_REDUC_PLUS: case UNSPEC_REDUC: return 1; + case UNSPEC_VPMSUM: + /* vpmsumd is not swappable, but vpmsum[bhw] are. */ + if (GET_MODE (op) == V2DImode) + return 0; + break; } } diff --git a/gcc/config/rs6000/rs6000.c b/gcc/config/rs6000/rs6000.c index 6b5f1cd06f7..760967c06d0 100644 --- a/gcc/config/rs6000/rs6000.c +++ b/gcc/config/rs6000/rs6000.c @@ -7671,6 +7671,104 @@ address_offset (rtx op) return NULL_RTX; } +/* This tests that a lo_sum {constant, symbol, symbol+offset} is valid for + the mode. If we can't find (or don't know) the alignment of the symbol + we assume (optimistically) that it's sufficiently aligned [??? maybe we + should be pessimistic]. Offsets are validated in the same way as for + reg + offset. */ +static bool +darwin_rs6000_legitimate_lo_sum_const_p (rtx x, machine_mode mode) +{ + /* We should not get here with this. */ + gcc_checking_assert (! mode_supports_dq_form (mode)); + + if (GET_CODE (x) == CONST) + x = XEXP (x, 0); + + if (GET_CODE (x) == UNSPEC && XINT (x, 1) == UNSPEC_MACHOPIC_OFFSET) + x = XVECEXP (x, 0, 0); + + rtx sym = NULL_RTX; + unsigned HOST_WIDE_INT offset = 0; + + if (GET_CODE (x) == PLUS) + { + sym = XEXP (x, 0); + if (! SYMBOL_REF_P (sym)) + return false; + if (!CONST_INT_P (XEXP (x, 1))) + return false; + offset = INTVAL (XEXP (x, 1)); + } + else if (SYMBOL_REF_P (x)) + sym = x; + else if (CONST_INT_P (x)) + offset = INTVAL (x); + else if (GET_CODE (x) == LABEL_REF) + offset = 0; // We assume code labels are Pmode aligned + else + return false; // not sure what we have here. + + /* If we don't know the alignment of the thing to which the symbol refers, + we assume optimistically it is "enough". + ??? maybe we should be pessimistic instead. */ + unsigned align = 0; + + if (sym) + { + tree decl = SYMBOL_REF_DECL (sym); +#if TARGET_MACHO + if (MACHO_SYMBOL_INDIRECTION_P (sym)) + /* The decl in an indirection symbol is the original one, which might + be less aligned than the indirection. Our indirections are always + pointer-aligned. */ + ; + else +#endif + if (decl && DECL_ALIGN (decl)) + align = DECL_ALIGN_UNIT (decl); + } + + unsigned int extra = 0; + switch (mode) + { + case E_DFmode: + case E_DDmode: + case E_DImode: + /* If we are using VSX scalar loads, restrict ourselves to reg+reg + addressing. */ + if (VECTOR_MEM_VSX_P (mode)) + return false; + + if (!TARGET_POWERPC64) + extra = 4; + else if ((offset & 3) || (align & 3)) + return false; + break; + + case E_TFmode: + case E_IFmode: + case E_KFmode: + case E_TDmode: + case E_TImode: + case E_PTImode: + extra = 8; + if (!TARGET_POWERPC64) + extra = 12; + else if ((offset & 3) || (align & 3)) + return false; + break; + + default: + break; + } + + /* We only care if the access(es) would cause a change to the high part. */ + offset = ((offset & 0xffff) ^ 0x8000) - 0x8000; + return IN_RANGE (offset, -(HOST_WIDE_INT_1 << 15), + (HOST_WIDE_INT_1 << 15) - 1 - extra); +} + /* Return true if the MEM operand is a memory operand suitable for use with a (full width, possibly multiple) gpr load/store. On powerpc64 this means the offset must be divisible by 4. @@ -7705,7 +7803,13 @@ mem_operand_gpr (rtx op, machine_mode mode) && legitimate_indirect_address_p (XEXP (addr, 0), false)) return true; - /* Don't allow non-offsettable addresses. See PRs 83969 and 84279. */ + /* We need to look through Mach-O PIC unspecs to determine if a lo_sum is + really OK. Doing this early avoids teaching all the other machinery + about them. */ + if (TARGET_MACHO && GET_CODE (addr) == LO_SUM) + return darwin_rs6000_legitimate_lo_sum_const_p (XEXP (addr, 1), mode); + + /* Only allow offsettable addresses. See PRs 83969 and 84279. */ if (!rs6000_offsettable_memref_p (op, mode, false)) return false; @@ -21330,7 +21434,7 @@ print_operand (FILE *file, rtx x, int code) { const char *name = XSTR (x, 0); #if TARGET_MACHO - if (darwin_emit_branch_islands + if (darwin_symbol_stubs && MACHOPIC_INDIRECT && machopic_classify_symbol (x) == MACHOPIC_UNDEFINED_FUNCTION) name = machopic_indirection_name (x, /*stub_p=*/true); @@ -25729,10 +25833,14 @@ rs6000_emit_allocate_stack (HOST_WIDE_INT size, rtx copy_reg, int copy_off) stack_limit_rtx, GEN_INT (size))); - emit_insn (gen_elf_high (tmp_reg, toload)); - emit_insn (gen_elf_low (tmp_reg, tmp_reg, toload)); - emit_insn (gen_cond_trap (LTU, stack_reg, tmp_reg, - const0_rtx)); + /* We cannot use r0 with elf_low. Lamely solve this problem by + moving registers around. */ + rtx r11_reg = gen_rtx_REG (Pmode, 11); + emit_move_insn (tmp_reg, r11_reg); + emit_insn (gen_elf_high (r11_reg, toload)); + emit_insn (gen_elf_low (r11_reg, r11_reg, toload)); + emit_insn (gen_cond_trap (LTU, stack_reg, r11_reg, const0_rtx)); + emit_move_insn (r11_reg, tmp_reg); } else warning (0, "stack limit expression is not supported"); @@ -37779,25 +37887,31 @@ rs6000_can_inline_p (tree caller, tree callee) tree caller_tree = DECL_FUNCTION_SPECIFIC_TARGET (caller); tree callee_tree = DECL_FUNCTION_SPECIFIC_TARGET (callee); - /* If callee has no option attributes, then it is ok to inline. */ + /* If the callee has no option attributes, then it is ok to inline. */ if (!callee_tree) ret = true; - /* If caller has no option attributes, but callee does then it is not ok to - inline. */ - else if (!caller_tree) - ret = false; - else { - struct cl_target_option *caller_opts = TREE_TARGET_OPTION (caller_tree); + HOST_WIDE_INT caller_isa; struct cl_target_option *callee_opts = TREE_TARGET_OPTION (callee_tree); + HOST_WIDE_INT callee_isa = callee_opts->x_rs6000_isa_flags; + HOST_WIDE_INT explicit_isa = callee_opts->x_rs6000_isa_flags_explicit; - /* Callee's options should a subset of the caller's, i.e. a vsx function - can inline an altivec function but a non-vsx function can't inline a - vsx function. */ - if ((caller_opts->x_rs6000_isa_flags & callee_opts->x_rs6000_isa_flags) - == callee_opts->x_rs6000_isa_flags) + /* If the caller has option attributes, then use them. + Otherwise, use the command line options. */ + if (caller_tree) + caller_isa = TREE_TARGET_OPTION (caller_tree)->x_rs6000_isa_flags; + else + caller_isa = rs6000_isa_flags; + + /* The callee's options must be a subset of the caller's options, i.e. + a vsx function may inline an altivec function, but a no-vsx function + must not inline a vsx function. However, for those options that the + callee has explicitly enabled or disabled, then we must enforce that + the callee's and caller's options match exactly; see PR70010. */ + if (((caller_isa & callee_isa) == callee_isa) + && (caller_isa & explicit_isa) == (callee_isa & explicit_isa)) ret = true; } @@ -38290,7 +38404,8 @@ rs6000_call_darwin_1 (rtx value, rtx func_desc, rtx tlsarg, if ((cookie_val & CALL_LONG) != 0 && GET_CODE (func_desc) == SYMBOL_REF) { - if (darwin_emit_branch_islands && TARGET_32BIT) + /* FIXME: the longcall opt should not hang off picsymbol stubs. */ + if (darwin_symbol_stubs && TARGET_32BIT) make_island = true; /* Do nothing yet, retain the CALL_LONG flag. */ else { diff --git a/gcc/config/rs6000/rs6000.md b/gcc/config/rs6000/rs6000.md index ad80592765d..7bfa5eea2ee 100644 --- a/gcc/config/rs6000/rs6000.md +++ b/gcc/config/rs6000/rs6000.md @@ -137,6 +137,9 @@ UNSPEC_LSQ UNSPEC_FUSION_GPR UNSPEC_STACK_CHECK + UNSPEC_CMPRB + UNSPEC_CMPRB2 + UNSPEC_CMPEQB UNSPEC_ADD_ROUND_TO_ODD UNSPEC_SUB_ROUND_TO_ODD UNSPEC_MUL_ROUND_TO_ODD @@ -164,6 +167,9 @@ UNSPECV_EH_RR ; eh_reg_restore UNSPECV_ISYNC ; isync instruction UNSPECV_MFTB ; move from time base + UNSPECV_DARN ; darn 1 (deliver a random number) + UNSPECV_DARN_32 ; darn 2 + UNSPECV_DARN_RAW ; darn 0 UNSPECV_NLGR ; non-local goto receiver UNSPECV_MFFS ; Move from FPSCR UNSPECV_MFFSL ; Move from FPSCR light instruction version @@ -5930,7 +5936,7 @@ /* Insert new RN mode into FSCPR. */ emit_insn (gen_rs6000_mffs (tmp_df)); tmp_di = simplify_gen_subreg (DImode, tmp_df, DFmode, 0); - emit_insn (gen_anddi3 (tmp_di, tmp_di, GEN_INT (0xFFFFFFF8FFFFFFFF))); + emit_insn (gen_anddi3 (tmp_di, tmp_di, GEN_INT (0xFFFFFFF8FFFFFFFFULL))); emit_insn (gen_iordi3 (tmp_di, tmp_di, tmp_rn)); /* Need to write to field 7. The fields are [0:15]. The equation to @@ -6818,13 +6824,6 @@ ;; do the load 16-bits at a time. We could do this by loading from memory, ;; and this is even supposed to be faster, but it is simpler not to get ;; integers in the TOC. -(define_insn "movsi_low" - [(set (match_operand:SI 0 "gpc_reg_operand" "=r") - (mem:SI (lo_sum:SI (match_operand:SI 1 "gpc_reg_operand" "b") - (match_operand 2 "" ""))))] - "TARGET_MACHO && ! TARGET_64BIT" - "lwz %0,lo16(%2)(%1)" - [(set_attr "type" "load")]) ;; MR LA LWZ LFIWZX LXSIWZX ;; STW STFIWX STXSIWX LI LIS @@ -14322,7 +14321,225 @@ "xscmpuqp %0,%1,%2" [(set_attr "type" "veccmp") (set_attr "size" "128")]) + +;; Miscellaneous ISA 3.0 (power9) instructions + +(define_insn "darn_32" + [(set (match_operand:SI 0 "register_operand" "=r") + (unspec_volatile:SI [(const_int 0)] UNSPECV_DARN_32))] + "TARGET_P9_MISC" + "darn %0,0" + [(set_attr "type" "integer")]) +(define_insn "darn_raw" + [(set (match_operand:DI 0 "register_operand" "=r") + (unspec_volatile:DI [(const_int 0)] UNSPECV_DARN_RAW))] + "TARGET_P9_MISC && TARGET_64BIT" + "darn %0,2" + [(set_attr "type" "integer")]) + +(define_insn "darn" + [(set (match_operand:DI 0 "register_operand" "=r") + (unspec_volatile:DI [(const_int 0)] UNSPECV_DARN))] + "TARGET_P9_MISC && TARGET_64BIT" + "darn %0,1" + [(set_attr "type" "integer")]) + +;; Test byte within range. +;; +;; The bytes of operand 1 are organized as xx:xx:xx:vv, where xx +;; represents a byte whose value is ignored in this context and +;; vv, the least significant byte, holds the byte value that is to +;; be tested for membership within the range specified by operand 2. +;; The bytes of operand 2 are organized as xx:xx:hi:lo. +;; +;; Return in target register operand 0 a value of 1 if lo <= vv and +;; vv <= hi. Otherwise, set register operand 0 to 0. +;; +;; Though the instructions to which this expansion maps operate on +;; 64-bit registers, the current implementation only operates on +;; SI-mode operands as the high-order bits provide no information +;; that is not already available in the low-order bits. To avoid the +;; costs of data widening operations, future enhancements might allow +;; DI mode for operand 0 and/or might allow operand 1 to be QI mode. +(define_expand "cmprb" + [(set (match_dup 3) + (unspec:CC [(match_operand:SI 1 "gpc_reg_operand" "r") + (match_operand:SI 2 "gpc_reg_operand" "r")] + UNSPEC_CMPRB)) + (set (match_operand:SI 0 "gpc_reg_operand" "=r") + (if_then_else:SI (lt (match_dup 3) + (const_int 0)) + (const_int -1) + (if_then_else (gt (match_dup 3) + (const_int 0)) + (const_int 1) + (const_int 0))))] + "TARGET_P9_MISC" +{ + operands[3] = gen_reg_rtx (CCmode); +}) + +;; The bytes of operand 1 are organized as xx:xx:xx:vv, where xx +;; represents a byte whose value is ignored in this context and +;; vv, the least significant byte, holds the byte value that is to +;; be tested for membership within the range specified by operand 2. +;; The bytes of operand 2 are organized as xx:xx:hi:lo. +;; +;; Set bit 1 (the GT bit, 0x4) of CR register operand 0 to 1 if +;; lo <= vv and vv <= hi. Otherwise, set the GT bit to 0. The other +;; 3 bits of the target CR register are all set to 0. +(define_insn "*cmprb_internal" + [(set (match_operand:CC 0 "cc_reg_operand" "=y") + (unspec:CC [(match_operand:SI 1 "gpc_reg_operand" "r") + (match_operand:SI 2 "gpc_reg_operand" "r")] + UNSPEC_CMPRB))] + "TARGET_P9_MISC" + "cmprb %0,0,%1,%2" + [(set_attr "type" "logical")]) + +;; Set operand 0 register to -1 if the LT bit (0x8) of condition +;; register operand 1 is on. Otherwise, set operand 0 register to 1 +;; if the GT bit (0x4) of condition register operand 1 is on. +;; Otherwise, set operand 0 to 0. Note that the result stored into +;; register operand 0 is non-zero iff either the LT or GT bits are on +;; within condition register operand 1. +(define_insn "setb_signed" + [(set (match_operand:SI 0 "gpc_reg_operand" "=r") + (if_then_else:SI (lt (match_operand:CC 1 "cc_reg_operand" "y") + (const_int 0)) + (const_int -1) + (if_then_else (gt (match_dup 1) + (const_int 0)) + (const_int 1) + (const_int 0))))] + "TARGET_P9_MISC" + "setb %0,%1" + [(set_attr "type" "logical")]) + +(define_insn "setb_unsigned" + [(set (match_operand:SI 0 "gpc_reg_operand" "=r") + (if_then_else:SI (ltu (match_operand:CCUNS 1 "cc_reg_operand" "y") + (const_int 0)) + (const_int -1) + (if_then_else (gtu (match_dup 1) + (const_int 0)) + (const_int 1) + (const_int 0))))] + "TARGET_P9_MISC" + "setb %0,%1" + [(set_attr "type" "logical")]) + +;; Test byte within two ranges. +;; +;; The bytes of operand 1 are organized as xx:xx:xx:vv, where xx +;; represents a byte whose value is ignored in this context and +;; vv, the least significant byte, holds the byte value that is to +;; be tested for membership within the range specified by operand 2. +;; The bytes of operand 2 are organized as hi_1:lo_1:hi_2:lo_2. +;; +;; Return in target register operand 0 a value of 1 if (lo_1 <= vv and +;; vv <= hi_1) or if (lo_2 <= vv and vv <= hi_2). Otherwise, set register +;; operand 0 to 0. +;; +;; Though the instructions to which this expansion maps operate on +;; 64-bit registers, the current implementation only operates on +;; SI-mode operands as the high-order bits provide no information +;; that is not already available in the low-order bits. To avoid the +;; costs of data widening operations, future enhancements might allow +;; DI mode for operand 0 and/or might allow operand 1 to be QI mode. +(define_expand "cmprb2" + [(set (match_dup 3) + (unspec:CC [(match_operand:SI 1 "gpc_reg_operand" "r") + (match_operand:SI 2 "gpc_reg_operand" "r")] + UNSPEC_CMPRB2)) + (set (match_operand:SI 0 "gpc_reg_operand" "=r") + (if_then_else:SI (lt (match_dup 3) + (const_int 0)) + (const_int -1) + (if_then_else (gt (match_dup 3) + (const_int 0)) + (const_int 1) + (const_int 0))))] + "TARGET_P9_MISC" +{ + operands[3] = gen_reg_rtx (CCmode); +}) + +;; The bytes of operand 1 are organized as xx:xx:xx:vv, where xx +;; represents a byte whose value is ignored in this context and +;; vv, the least significant byte, holds the byte value that is to +;; be tested for membership within the ranges specified by operand 2. +;; The bytes of operand 2 are organized as hi_1:lo_1:hi_2:lo_2. +;; +;; Set bit 1 (the GT bit, 0x4) of CR register operand 0 to 1 if +;; (lo_1 <= vv and vv <= hi_1) or if (lo_2 <= vv and vv <= hi_2). +;; Otherwise, set the GT bit to 0. The other 3 bits of the target +;; CR register are all set to 0. +(define_insn "*cmprb2_internal" + [(set (match_operand:CC 0 "cc_reg_operand" "=y") + (unspec:CC [(match_operand:SI 1 "gpc_reg_operand" "r") + (match_operand:SI 2 "gpc_reg_operand" "r")] + UNSPEC_CMPRB2))] + "TARGET_P9_MISC" + "cmprb %0,1,%1,%2" + [(set_attr "type" "logical")]) + +;; Test byte membership within set of 8 bytes. +;; +;; The bytes of operand 1 are organized as xx:xx:xx:vv, where xx +;; represents a byte whose value is ignored in this context and +;; vv, the least significant byte, holds the byte value that is to +;; be tested for membership within the set specified by operand 2. +;; The bytes of operand 2 are organized as e0:e1:e2:e3:e4:e5:e6:e7. +;; +;; Return in target register operand 0 a value of 1 if vv equals one +;; of the values e0, e1, e2, e3, e4, e5, e6, or e7. Otherwise, set +;; register operand 0 to 0. Note that the 8 byte values held within +;; operand 2 need not be unique. +;; +;; Though the instructions to which this expansion maps operate on +;; 64-bit registers, the current implementation requires that operands +;; 0 and 1 have mode SI as the high-order bits provide no information +;; that is not already available in the low-order bits. To avoid the +;; costs of data widening operations, future enhancements might allow +;; DI mode for operand 0 and/or might allow operand 1 to be QI mode. +(define_expand "cmpeqb" + [(set (match_dup 3) + (unspec:CC [(match_operand:SI 1 "gpc_reg_operand" "r") + (match_operand:DI 2 "gpc_reg_operand" "r")] + UNSPEC_CMPEQB)) + (set (match_operand:SI 0 "gpc_reg_operand" "=r") + (if_then_else:SI (lt (match_dup 3) + (const_int 0)) + (const_int -1) + (if_then_else (gt (match_dup 3) + (const_int 0)) + (const_int 1) + (const_int 0))))] + "TARGET_P9_MISC && TARGET_64BIT" +{ + operands[3] = gen_reg_rtx (CCmode); +}) + +;; The bytes of operand 1 are organized as xx:xx:xx:vv, where xx +;; represents a byte whose value is ignored in this context and +;; vv, the least significant byte, holds the byte value that is to +;; be tested for membership within the set specified by operand 2. +;; The bytes of operand 2 are organized as e0:e1:e2:e3:e4:e5:e6:e7. +;; +;; Set bit 1 (the GT bit, 0x4) of CR register operand 0 to 1 if vv +;; equals one of the values e0, e1, e2, e3, e4, e5, e6, or e7. Otherwise, +;; set the GT bit to zero. The other 3 bits of the target CR register +;; are all set to 0. +(define_insn "*cmpeqb_internal" + [(set (match_operand:CC 0 "cc_reg_operand" "=y") + (unspec:CC [(match_operand:SI 1 "gpc_reg_operand" "r") + (match_operand:DI 2 "gpc_reg_operand" "r")] + UNSPEC_CMPEQB))] + "TARGET_P9_MISC && TARGET_64BIT" + "cmpeqb %0,%1,%2" + [(set_attr "type" "logical")]) (include "sync.md") diff --git a/gcc/config/s390/8561.md b/gcc/config/s390/8561.md index e5a345f4dba..2442349271b 100644 --- a/gcc/config/s390/8561.md +++ b/gcc/config/s390/8561.md @@ -1,4 +1,4 @@ -;; Scheduling description for arch13. +;; Scheduling description for z15. ;; Copyright (C) 2019 Free Software Foundation, Inc. ;; Contributed by Robin Dapp (rdapp@linux.ibm.com) ;; This file is part of GCC. @@ -17,12 +17,12 @@ ;; along with GCC; see the file COPYING3. If not see ;; <http://www.gnu.org/licenses/>. -(define_attr "arch13_unit_fpd" "" +(define_attr "z15_unit_fpd" "" (cond [(eq_attr "mnemonic" "ddb,ddbr,deb,debr,dxbr,sqdb,sqdbr,sqeb,\ sqebr,sqxbr,vfddb,vfdsb,vfsqdb,vfsqsb,wfddb,wfdsb,wfdxb,wfsqdb,wfsqxb") (const_int 1)] (const_int 0))) -(define_attr "arch13_unit_fxa" "" +(define_attr "z15_unit_fxa" "" (cond [(eq_attr "mnemonic" "a,afi,ag,agf,agfi,agfr,agh,aghi,aghik,\ agr,agrk,ah,ahi,ahik,ahy,al,alc,alcg,alcgr,alcr,alfi,alg,algf,algfi,algfr,\ alghsik,algr,algrk,alhsik,alr,alrk,aly,ar,ark,ay,bras,brasl,etnd,exrl,flogr,\ @@ -39,7 +39,7 @@ slgrk,sll,sllg,sllk,slr,slrk,sly,sr,sra,srag,srak,srk,srl,srlg,srlk,sy,x,xg,\ xgr,xgrk,xihf,xilf,xr,xrk,xy") (const_int 1)] (const_int 0))) -(define_attr "arch13_unit_fxb" "" +(define_attr "z15_unit_fxb" "" (cond [(eq_attr "mnemonic" "agsi,algsi,alsi,asi,b,bc,bcr,bi,br,brcl,\ c,cfi,cg,cgf,cgfi,cgfr,cgfrl,cgh,cghi,cghrl,cghsi,cgit,cgr,cgrl,cgrt,ch,\ chi,chrl,chsi,chy,cit,cl,clfhsi,clfi,clfit,clg,clgf,clgfi,clgfr,clgfrl,\ @@ -52,11 +52,11 @@ tmhl,tml,tmlh,tmll,tmy,vlgvb,vlgvf,vlgvg,vlgvh,vlr,vlvgb,vlvgf,vlvgg,vlvgh,\ vlvgp,vst,vstef,vsteg,vstl,vstrl,vstrlr,xi,xiy") (const_int 1)] (const_int 0))) -(define_attr "arch13_unit_fxd" "" +(define_attr "z15_unit_fxd" "" (cond [(eq_attr "mnemonic" "dlgr,dlr,dr,dsgfr,dsgr") (const_int 1)] (const_int 0))) -(define_attr "arch13_unit_lsu" "" +(define_attr "z15_unit_lsu" "" (cond [(eq_attr "mnemonic" "a,adb,aeb,ag,agf,agh,agsi,ah,ahy,al,alc,\ alcg,alg,algf,algsi,alsi,aly,asi,ay,c,cdb,ceb,cg,cgf,cgfrl,cgh,cghrl,cghsi,\ cgrl,ch,chrl,chsi,chy,cl,clc,clfhsi,clg,clgf,clgfrl,clghrl,clghsi,clgrl,\ @@ -73,7 +73,7 @@ vllezf,vllezg,vllezh,vllezlf,vlrepb,vlrepf,vlrepg,vlreph,vlrl,vlrlr,vst,\ vstef,vsteg,vstl,vstrl,vstrlr,x,xg,xi,xiy,xy") (const_int 1)] (const_int 0))) -(define_attr "arch13_unit_vfu" "" +(define_attr "z15_unit_vfu" "" (cond [(eq_attr "mnemonic" "adb,adbr,adtr,aeb,aebr,axbr,axtr,cdb,\ cdbr,cdtr,ceb,cebr,cpsdr,cxbr,cxtr,ddtr,dxtr,fidbr,fidbra,fidtr,fiebr,\ fiebra,fixbr,fixbra,fixtr,lcdbr,lcebr,lcxbr,ldeb,ldebr,ldetr,le,ledbr,ledtr,\ @@ -115,7 +115,7 @@ wflpxb,wfmadb,wfmasb,wfmaxb,wfmaxxb,wfmdb,wfminxb,wfmsb,wfmsdb,wfmssb,wfmsxb,\ wfmxb,wfnmaxb,wfnmsxb,wfsdb,wfssb,wfsxb,wldeb,wledb") (const_int 1)] (const_int 0))) -(define_attr "arch13_cracked" "" +(define_attr "z15_cracked" "" (cond [(eq_attr "mnemonic" "bas,basr,cdfbr,cdftr,cdgbr,cdgtr,cdlfbr,\ cdlftr,cdlgbr,cdlgtr,cefbr,cegbr,celfbr,celgbr,cfdbr,cfebr,cfxbr,cgdbr,cgdtr,\ cgebr,cgxbr,cgxtr,chhsi,clfdbr,clfdtr,clfebr,clfxbr,clfxtr,clgdbr,clgdtr,\ @@ -123,13 +123,13 @@ clgebr,clgxbr,clgxtr,cs,csg,csy,d,efpc,ex,lcgfr,lngfr,lpgfr,lpq,lxr,lzxr,\ rxsbg,stpq,vgef,vgeg,vscef,vsceg,vsteb,vsteh") (const_int 1)] (const_int 0))) -(define_attr "arch13_expanded" "" +(define_attr "z15_expanded" "" (cond [(eq_attr "mnemonic" "cds,cdsg,cdsy,cxfbr,cxftr,cxgbr,cxgtr,\ cxlfbr,cxlftr,cxlgbr,cxlgtr,dl,dlg,dsg,dsgf,lam,lm,lmg,lmy,sldl,srda,srdl,\ stam,stm,stmg,stmy,tbegin,tbeginc") (const_int 1)] (const_int 0))) -(define_attr "arch13_groupalone" "" +(define_attr "z15_groupalone" "" (cond [(eq_attr "mnemonic" "alc,alcg,alcgr,alcr,axbr,axtr,clc,cxbr,\ cxtr,dlgr,dlr,dr,dsgfr,dsgr,dxbr,dxtr,fixbr,fixbra,fixtr,flogr,lcxbr,lnxbr,\ lpxbr,ltxbr,ltxtr,lxdb,lxdbr,lxdtr,lxeb,lxebr,m,madb,maeb,maebr,mfy,mg,mgrk,\ @@ -137,11 +137,11 @@ ml,mlg,mlgr,mlr,mr,msdb,mseb,msebr,mvc,mxbr,mxtr,nc,oc,ppa,sfpc,slb,slbg,\ slbgr,slbr,sqxbr,sxbr,sxtr,tabort,tcxb,tdcxt,tend,xc") (const_int 1)] (const_int 0))) -(define_attr "arch13_endgroup" "" +(define_attr "z15_endgroup" "" (cond [(eq_attr "mnemonic" "bras,brasl,exrl,ipm") (const_int 1)] (const_int 0))) -(define_attr "arch13_groupoftwo" "" +(define_attr "z15_groupoftwo" "" (cond [(eq_attr "mnemonic" "vacccq,vacq,vfmadb,vfmasb,vfmsdb,vfmssb,\ vfnmadb,vfnmasb,vfnmsdb,vfnmssb,vgfmab,vgfmaf,vgfmag,vgfmah,vmaeb,vmaef,vmaeh,\ vmahb,vmahf,vmahh,vmalb,vmaleb,vmalef,vmaleh,vmalf,vmalhb,vmalhf,vmalhh,\ @@ -149,8 +149,8 @@ vmalhw,vmalob,vmalof,vmaloh,vmaob,vmaof,vmaoh,vmslg,vperm,vsbcbiq,vsbiq,vsel,\ wfmadb,wfmasb,wfmaxb,wfmsdb,wfmssb,wfmsxb,wfnmaxb,wfnmsxb") (const_int 1)] (const_int 0))) -(define_insn_reservation "arch13_0" 0 - (and (eq_attr "cpu" "arch13") +(define_insn_reservation "z15_0" 0 + (and (eq_attr "cpu" "z15") (eq_attr "mnemonic" "a,afi,ag,agfi,aghi,aghik,agr,agrk,ahi,ahik,al,\ alfi,alg,algf,algfi,algfr,alghsik,algr,algrk,alhsik,alr,alrk,aly,ar,ark,ay,\ b,bc,bcr,bi,br,bras,brasl,brcl,c,cfi,cg,cgfi,cghi,cghsi,cgit,cgr,cgrl,\ @@ -168,8 +168,8 @@ sllk,slr,slrk,sly,sr,sra,srag,srak,srda,srdl,srk,srl,srlg,srlk,sy,tm,tmh,\ tmhh,tmhl,tml,tmlh,tmll,tmy,vlr,vlvgb,vlvgf,vlvgg,vlvgh,x,xg,xgr,xgrk,xihf,\ xilf,xr,xrk,xy")) "nothing") -(define_insn_reservation "arch13_1" 1 - (and (eq_attr "cpu" "arch13") +(define_insn_reservation "z15_1" 1 + (and (eq_attr "cpu" "z15") (eq_attr "mnemonic" "agf,agfr,agh,agsi,ah,ahy,algsi,alsi,asi,cgf,\ cgfr,cgfrl,cgh,cghrl,ch,chrl,chy,clm,clmy,cpsdr,laa,laag,lan,lang,lao,laog,\ lax,laxg,le,ler,ley,loc,locg,locghi,locgr,lochi,locr,mvghi,mvhhi,mvhi,mvi,\ @@ -196,8 +196,8 @@ wfcedb,wfcesb,wfcexb,wfchdb,wfchedb,wfchesb,wfchexb,wfchsb,wfchxb,wflcdb,\ wflcsb,wflcxb,wflndb,wflnsb,wflnxb,wflpdb,wflpsb,wflpxb,wfmaxxb,wfminxb,xi,\ xiy")) "nothing") -(define_insn_reservation "arch13_2" 2 - (and (eq_attr "cpu" "arch13") +(define_insn_reservation "z15_2" 2 + (and (eq_attr "cpu" "z15") (eq_attr "mnemonic" "cdb,cdbr,ceb,cebr,ear,ipm,l,lcbb,lcdbr,lcebr,ld,\ lde,ldy,lg,lgdr,lgrl,llc,llgc,llgf,llgfrl,llgh,llghrl,llgt,llh,llhrl,lm,\ lmg,lmy,lndbr,lnebr,lpdbr,lpebr,lrl,ltdbr,ltebr,ly,popcnt,sar,tcdb,tceb,\ @@ -208,8 +208,8 @@ vistrh,vlgvb,vlgvf,vlgvg,vlgvh,vllezb,vllezf,vllezg,vllezh,vllezlf,vlrepb,\ vlrepf,vlrepg,vlreph,vlrl,vlvgp,vpklsfs,vpklsgs,vpklshs,vpksfs,vpksgs,vpkshs,\ wfcdb,wfcexbs,wfchexbs,wfchxbs,wfcsb")) "nothing") -(define_insn_reservation "arch13_3" 3 - (and (eq_attr "cpu" "arch13") +(define_insn_reservation "z15_3" 3 + (and (eq_attr "cpu" "z15") (eq_attr "mnemonic" "cds,cdsy,mgh,mghi,mh,mhi,mhy,std,stdy,ste,stey,\ vcksm,vfeezbs,vfeezfs,vfeezhs,vgfmab,vgfmaf,vgfmag,vgfmah,vgfmb,vgfmf,vgfmg,\ vgfmh,vistrbs,vistrfs,vistrhs,vl,vlbb,vll,vlrlr,vmaeb,vmaef,vmaeh,vmahb,\ @@ -218,14 +218,14 @@ vmalob,vmalof,vmaloh,vmaob,vmaof,vmaoh,vmeb,vmef,vmeh,vmhb,vmhf,vmhh,vmlb,\ vmleb,vmlef,vmleh,vmlf,vmlhb,vmlhf,vmlhh,vmlhw,vmlob,vmlof,vmloh,vmob,vmof,\ vmoh,vsumb,vsumgf,vsumgh,vsumh,vsumqf,vsumqg,vtm")) "nothing") -(define_insn_reservation "arch13_4" 4 - (and (eq_attr "cpu" "arch13") +(define_insn_reservation "z15_4" 4 + (and (eq_attr "cpu" "z15") (eq_attr "mnemonic" "bas,basr,chhsi,clc,ex,lam,lcgfr,lngfr,lpgfr,lxr,\ lzxr,ms,msfi,msgf,msgfi,msgfr,msr,msy,mvc,nc,oc,ppa,rxsbg,tabort,tbegin,\ tbeginc,tend,vst,vstef,vsteg,vstl,vstrl,vstrlr,xc")) "nothing") -(define_insn_reservation "arch13_5" 5 - (and (eq_attr "cpu" "arch13") +(define_insn_reservation "z15_5" 5 + (and (eq_attr "cpu" "z15") (eq_attr "mnemonic" "adb,adbr,aeb,aebr,alc,alcg,alcgr,alcr,cs,csg,\ csy,fidbr,fidbra,fiebr,fiebra,ldeb,ldebr,ledbr,madbr,mdb,mdbr,meeb,meebr,\ msdbr,msrkc,sdb,sdbr,seb,sebr,slb,slbg,slbgr,slbr,stm,stmg,stmy,vfadb,vfasb,\ @@ -233,53 +233,53 @@ vfidb,vfisb,vfmadb,vfmasb,vfmdb,vfmsb,vfmsdb,vfmssb,vfnmadb,vfnmasb,vfnmsdb,\ vfnmssb,vfsdb,vfssb,vldeb,vledb,vmslg,wfadb,wfasb,wfidb,wfisb,wflld,wfmadb,\ wfmasb,wfmdb,wfmsb,wfmsdb,wfmssb,wfsdb,wfssb,wldeb,wledb")) "nothing") -(define_insn_reservation "arch13_6" 6 - (and (eq_attr "cpu" "arch13") +(define_insn_reservation "z15_6" 6 + (and (eq_attr "cpu" "z15") (eq_attr "mnemonic" "msg,msgr,sfpc")) "nothing") -(define_insn_reservation "arch13_7" 7 - (and (eq_attr "cpu" "arch13") +(define_insn_reservation "z15_7" 7 + (and (eq_attr "cpu" "z15") (eq_attr "mnemonic" "adtr,cdtr,fidtr,ldetr,ltdtr,msgrkc,sdtr,tdcdt,\ tdcet,vgef,vgeg")) "nothing") -(define_insn_reservation "arch13_8" 8 - (and (eq_attr "cpu" "arch13") +(define_insn_reservation "z15_8" 8 + (and (eq_attr "cpu" "z15") (eq_attr "mnemonic" "cdsg,flogr,lpq,stpq,vsteb,vsteh")) "nothing") -(define_insn_reservation "arch13_9" 9 - (and (eq_attr "cpu" "arch13") +(define_insn_reservation "z15_9" 9 + (and (eq_attr "cpu" "z15") (eq_attr "mnemonic" "cdfbr,cdgbr,cdlfbr,cdlgbr,cefbr,cegbr,celfbr,\ celgbr,cxfbr,cxgbr,cxlfbr,cxlgbr,m,madb,maeb,maebr,mfy,ml,mlr,mr,msdb,mseb,\ msebr,stam,wfaxb,wfixb,wfsxb")) "nothing") -(define_insn_reservation "arch13_10" 10 - (and (eq_attr "cpu" "arch13") +(define_insn_reservation "z15_10" 10 + (and (eq_attr "cpu" "z15") (eq_attr "mnemonic" "lxdb,lxdbr,lxeb,lxebr,vscef,vsceg")) "nothing") -(define_insn_reservation "arch13_11" 11 - (and (eq_attr "cpu" "arch13") +(define_insn_reservation "z15_11" 11 + (and (eq_attr "cpu" "z15") (eq_attr "mnemonic" "cfdbr,cfebr,cgdbr,cgebr,clfdbr,clfebr,clgdbr,\ clgebr,mg,mgrk,mlg,mlgr")) "nothing") -(define_insn_reservation "arch13_12" 12 - (and (eq_attr "cpu" "arch13") +(define_insn_reservation "z15_12" 12 + (and (eq_attr "cpu" "z15") (eq_attr "mnemonic" "cxbr,cxftr,cxlftr,cxtr,tcxb,tdcxt")) "nothing") -(define_insn_reservation "arch13_13" 13 - (and (eq_attr "cpu" "arch13") +(define_insn_reservation "z15_13" 13 + (and (eq_attr "cpu" "z15") (eq_attr "mnemonic" "axbr,axtr,fixbr,fixbra,fixtr,lcxbr,lnxbr,lpxbr,\ ltxbr,ltxtr,lxdtr,sxbr,sxtr")) "nothing") -(define_insn_reservation "arch13_14" 14 - (and (eq_attr "cpu" "arch13") +(define_insn_reservation "z15_14" 14 + (and (eq_attr "cpu" "z15") (eq_attr "mnemonic" "cfxbr,cgxbr,clfxbr,clgxbr,ledtr")) "nothing") -(define_insn_reservation "arch13_16" 16 - (and (eq_attr "cpu" "arch13") +(define_insn_reservation "z15_16" 16 + (and (eq_attr "cpu" "z15") (eq_attr "mnemonic" "cdftr,cdlftr")) "nothing") -(define_insn_reservation "arch13_20" 20 - (and (eq_attr "cpu" "arch13") +(define_insn_reservation "z15_20" 20 + (and (eq_attr "cpu" "z15") (eq_attr "mnemonic" "cdgtr,cdlgtr,cgdtr,cgxtr,clfdtr,clfxtr,clgdtr,\ clgxtr,cxgtr,cxlgtr,d,ddb,ddbr,ddtr,deb,debr,dl,dlg,dlgr,dlr,dr,dsg,dsgf,\ dsgfr,dsgr,dxbr,dxtr,efpc,mdtr,mxbr,mxtr,sqdb,sqdbr,sqeb,sqebr,sqxbr,vfddb,\ diff --git a/gcc/config/s390/driver-native.c b/gcc/config/s390/driver-native.c index a386d633a87..6bc7d590668 100644 --- a/gcc/config/s390/driver-native.c +++ b/gcc/config/s390/driver-native.c @@ -121,10 +121,10 @@ s390_host_detect_local_cpu (int argc, const char **argv) break; case 0x8561: case 0x8562: - cpu = "arch13"; + cpu = "z15"; break; default: - cpu = "arch13"; + cpu = "z15"; break; } } diff --git a/gcc/config/s390/s390-builtins.def b/gcc/config/s390/s390-builtins.def index fbf7d9f50e8..3f39b9d3b88 100644 --- a/gcc/config/s390/s390-builtins.def +++ b/gcc/config/s390/s390-builtins.def @@ -281,7 +281,7 @@ #define B_HTM (1 << 1) /* Builtins requiring the transactional execution facility. */ #define B_VX (1 << 2) /* Builtins requiring the z13 vector extensions. */ #define B_VXE (1 << 3) /* Builtins requiring the z14 vector extensions. */ -#define B_VXE2 (1 << 4) /* Builtins requiring the arch13 vector extensions. */ +#define B_VXE2 (1 << 4) /* Builtins requiring the z15 vector extensions. */ #define B_DEP (1 << 5) /* Builtin has been deprecated and a warning should be issued. */ /* B_DEF defines a standard (not overloaded) builtin diff --git a/gcc/config/s390/s390-c.c b/gcc/config/s390/s390-c.c index 97debdc3905..c2f9b507011 100644 --- a/gcc/config/s390/s390-c.c +++ b/gcc/config/s390/s390-c.c @@ -905,6 +905,12 @@ s390_resolve_overloaded_builtin (location_t loc, return error_mark_node; } + if (!TARGET_VXE2 && (ob_flags & B_VXE2)) + { + error_at (loc, "%qF requires z15 or higher", ob_fndecl); + return error_mark_node; + } + ob_fcode -= S390_BUILTIN_MAX; for (b_arg_chain = TYPE_ARG_TYPES (TREE_TYPE (ob_fndecl)); @@ -983,6 +989,15 @@ s390_resolve_overloaded_builtin (location_t loc, return error_mark_node; } + + if (!TARGET_VXE2 + && bflags_overloaded_builtin_var[last_match_index] & B_VXE2) + { + error_at (loc, "%qs matching variant requires z15 or higher", + IDENTIFIER_POINTER (DECL_NAME (ob_fndecl))); + return error_mark_node; + } + if (bflags_overloaded_builtin_var[last_match_index] & B_DEP) warning_at (loc, 0, "%qs matching variant is deprecated.", IDENTIFIER_POINTER (DECL_NAME (ob_fndecl))); diff --git a/gcc/config/s390/s390-opts.h b/gcc/config/s390/s390-opts.h index ab41cb883f3..502edea719b 100644 --- a/gcc/config/s390/s390-opts.h +++ b/gcc/config/s390/s390-opts.h @@ -37,7 +37,7 @@ enum processor_type PROCESSOR_2827_ZEC12, PROCESSOR_2964_Z13, PROCESSOR_3906_Z14, - PROCESSOR_8561_ARCH13, + PROCESSOR_8561_Z15, PROCESSOR_NATIVE, PROCESSOR_max }; diff --git a/gcc/config/s390/s390.c b/gcc/config/s390/s390.c index fc4571d0d0c..db3f94978ec 100644 --- a/gcc/config/s390/s390.c +++ b/gcc/config/s390/s390.c @@ -337,7 +337,7 @@ const struct s390_processor processor_table[] = { "zEC12", "zEC12", PROCESSOR_2827_ZEC12, &zEC12_cost, 10 }, { "z13", "z13", PROCESSOR_2964_Z13, &zEC12_cost, 11 }, { "z14", "arch12", PROCESSOR_3906_Z14, &zEC12_cost, 12 }, - { "arch13", "", PROCESSOR_8561_ARCH13, &zEC12_cost, 13 }, + { "z15", "arch13", PROCESSOR_8561_Z15, &zEC12_cost, 13 }, { "native", "", PROCESSOR_NATIVE, NULL, 0 } }; @@ -811,6 +811,12 @@ s390_expand_builtin (tree exp, rtx target, rtx subtarget ATTRIBUTE_UNUSED, error ("Builtin %qF requires z14 or higher.", fndecl); return const0_rtx; } + + if ((bflags & B_VXE2) && !TARGET_VXE2) + { + error ("Builtin %qF requires z15 or higher.", fndecl); + return const0_rtx; + } } if (fcode >= S390_OVERLOADED_BUILTIN_VAR_OFFSET && fcode < S390_ALL_BUILTIN_MAX) @@ -1782,7 +1788,7 @@ s390_canonicalize_comparison (int *code, rtx *op0, rtx *op1, if (*code == EQ) new_code = reversed_comparison_code_parts (GET_CODE (*op0), XEXP (*op0, 0), - XEXP (*op1, 0), NULL); + XEXP (*op0, 1), NULL); else new_code = GET_CODE (*op0); @@ -1795,7 +1801,7 @@ s390_canonicalize_comparison (int *code, rtx *op0, rtx *op1, } /* ~a==b -> ~(a^b)==0 ~a!=b -> ~(a^b)!=0 */ - if (TARGET_ARCH13 + if (TARGET_Z15 && (*code == EQ || *code == NE) && (GET_MODE (*op0) == DImode || GET_MODE (*op0) == SImode) && GET_CODE (*op0) == NOT) @@ -1807,7 +1813,7 @@ s390_canonicalize_comparison (int *code, rtx *op0, rtx *op1, } /* a&b == -1 -> ~a|~b == 0 a|b == -1 -> ~a&~b == 0 */ - if (TARGET_ARCH13 + if (TARGET_Z15 && (*code == EQ || *code == NE) && (GET_CODE (*op0) == AND || GET_CODE (*op0) == IOR) && (GET_MODE (*op0) == DImode || GET_MODE (*op0) == SImode) @@ -3529,7 +3535,7 @@ s390_rtx_costs (rtx x, machine_mode mode, int outer_code, /* It is a real IF-THEN-ELSE. An additional move will be needed to implement that. */ - if (!TARGET_ARCH13 + if (!TARGET_Z15 && reload_completed && !rtx_equal_p (dst, then) && !rtx_equal_p (dst, els)) @@ -3551,7 +3557,7 @@ s390_rtx_costs (rtx x, machine_mode mode, int outer_code, case IOR: /* nnrk, nngrk */ - if (TARGET_ARCH13 + if (TARGET_Z15 && (mode == SImode || mode == DImode) && GET_CODE (XEXP (x, 0)) == NOT && GET_CODE (XEXP (x, 1)) == NOT) @@ -3598,7 +3604,7 @@ s390_rtx_costs (rtx x, machine_mode mode, int outer_code, case AND: /* nork, nogrk */ - if (TARGET_ARCH13 + if (TARGET_Z15 && (mode == SImode || mode == DImode) && GET_CODE (XEXP (x, 0)) == NOT && GET_CODE (XEXP (x, 1)) == NOT) @@ -3770,7 +3776,7 @@ s390_rtx_costs (rtx x, machine_mode mode, int outer_code, *total = COSTS_N_INSNS (1); /* nxrk, nxgrk ~(a^b)==0 */ - if (TARGET_ARCH13 + if (TARGET_Z15 && GET_CODE (XEXP (x, 0)) == NOT && XEXP (x, 1) == const0_rtx && GET_CODE (XEXP (XEXP (x, 0), 0)) == XOR @@ -3785,7 +3791,7 @@ s390_rtx_costs (rtx x, machine_mode mode, int outer_code, } /* nnrk, nngrk, nork, nogrk */ - if (TARGET_ARCH13 + if (TARGET_Z15 && (GET_CODE (XEXP (x, 0)) == AND || GET_CODE (XEXP (x, 0)) == IOR) && XEXP (x, 1) == const0_rtx && (GET_MODE (XEXP (x, 0)) == SImode || GET_MODE (XEXP (x, 0)) == DImode) @@ -14440,16 +14446,16 @@ s390_get_sched_attrmask (rtx_insn *insn) if (get_attr_z14_groupoftwo (insn)) mask |= S390_SCHED_ATTR_MASK_GROUPOFTWO; break; - case PROCESSOR_8561_ARCH13: - if (get_attr_arch13_cracked (insn)) + case PROCESSOR_8561_Z15: + if (get_attr_z15_cracked (insn)) mask |= S390_SCHED_ATTR_MASK_CRACKED; - if (get_attr_arch13_expanded (insn)) + if (get_attr_z15_expanded (insn)) mask |= S390_SCHED_ATTR_MASK_EXPANDED; - if (get_attr_arch13_endgroup (insn)) + if (get_attr_z15_endgroup (insn)) mask |= S390_SCHED_ATTR_MASK_ENDGROUP; - if (get_attr_arch13_groupalone (insn)) + if (get_attr_z15_groupalone (insn)) mask |= S390_SCHED_ATTR_MASK_GROUPALONE; - if (get_attr_arch13_groupoftwo (insn)) + if (get_attr_z15_groupoftwo (insn)) mask |= S390_SCHED_ATTR_MASK_GROUPOFTWO; break; default: @@ -14487,15 +14493,15 @@ s390_get_unit_mask (rtx_insn *insn, int *units) if (get_attr_z14_unit_vfu (insn)) mask |= 1 << 3; break; - case PROCESSOR_8561_ARCH13: + case PROCESSOR_8561_Z15: *units = 4; - if (get_attr_arch13_unit_lsu (insn)) + if (get_attr_z15_unit_lsu (insn)) mask |= 1 << 0; - if (get_attr_arch13_unit_fxa (insn)) + if (get_attr_z15_unit_fxa (insn)) mask |= 1 << 1; - if (get_attr_arch13_unit_fxb (insn)) + if (get_attr_z15_unit_fxb (insn)) mask |= 1 << 2; - if (get_attr_arch13_unit_vfu (insn)) + if (get_attr_z15_unit_vfu (insn)) mask |= 1 << 3; break; default: @@ -14511,7 +14517,7 @@ s390_is_fpd (rtx_insn *insn) return false; return get_attr_z13_unit_fpd (insn) || get_attr_z14_unit_fpd (insn) - || get_attr_arch13_unit_fpd (insn); + || get_attr_z15_unit_fpd (insn); } static bool @@ -14521,7 +14527,7 @@ s390_is_fxd (rtx_insn *insn) return false; return get_attr_z13_unit_fxd (insn) || get_attr_z14_unit_fxd (insn) - || get_attr_arch13_unit_fxd (insn); + || get_attr_z15_unit_fxd (insn); } /* Returns TRUE if INSN is a long-running instruction. */ @@ -15968,13 +15974,19 @@ s390_support_vector_misalignment (machine_mode mode ATTRIBUTE_UNUSED, static HOST_WIDE_INT s390_vector_alignment (const_tree type) { + tree size = TYPE_SIZE (type); + if (!TARGET_VX_ABI) return default_vector_alignment (type); if (TYPE_USER_ALIGN (type)) return TYPE_ALIGN (type); - return MIN (64, tree_to_shwi (TYPE_SIZE (type))); + if (tree_fits_uhwi_p (size) + && tree_to_uhwi (size) < BIGGEST_ALIGNMENT) + return tree_to_uhwi (size); + + return BIGGEST_ALIGNMENT; } /* Implement TARGET_CONSTANT_ALIGNMENT. Alignment on even addresses for diff --git a/gcc/config/s390/s390.h b/gcc/config/s390/s390.h index 969f58a2ba0..f7023d985f1 100644 --- a/gcc/config/s390/s390.h +++ b/gcc/config/s390/s390.h @@ -41,12 +41,12 @@ enum processor_flags PF_Z14 = 2048, PF_VXE = 4096, PF_VXE2 = 8192, - PF_ARCH13 = 16384 + PF_Z15 = 16384 }; /* This is necessary to avoid a warning about comparing different enum types. */ -#define s390_tune_attr ((enum attr_cpu)(s390_tune > PROCESSOR_8561_ARCH13 ? PROCESSOR_8561_ARCH13 : s390_tune )) +#define s390_tune_attr ((enum attr_cpu)(s390_tune > PROCESSOR_8561_Z15 ? PROCESSOR_8561_Z15 : s390_tune )) /* These flags indicate that the generated code should run on a cpu providing the respective hardware facility regardless of the @@ -100,10 +100,10 @@ enum processor_flags (s390_arch_flags & PF_VXE) #define TARGET_CPU_VXE_P(opts) \ (opts->x_s390_arch_flags & PF_VXE) -#define TARGET_CPU_ARCH13 \ - (s390_arch_flags & PF_ARCH13) -#define TARGET_CPU_ARCH13_P(opts) \ - (opts->x_s390_arch_flags & PF_ARCH13) +#define TARGET_CPU_Z15 \ + (s390_arch_flags & PF_Z15) +#define TARGET_CPU_Z15_P(opts) \ + (opts->x_s390_arch_flags & PF_Z15) #define TARGET_CPU_VXE2 \ (s390_arch_flags & PF_VXE2) #define TARGET_CPU_VXE2_P(opts) \ @@ -160,9 +160,9 @@ enum processor_flags (TARGET_VX && TARGET_CPU_VXE) #define TARGET_VXE_P(opts) \ (TARGET_VX_P (opts) && TARGET_CPU_VXE_P (opts)) -#define TARGET_ARCH13 (TARGET_ZARCH && TARGET_CPU_ARCH13) -#define TARGET_ARCH13_P(opts) \ - (TARGET_ZARCH_P (opts->x_target_flags) && TARGET_CPU_ARCH13_P (opts)) +#define TARGET_Z15 (TARGET_ZARCH && TARGET_CPU_Z15) +#define TARGET_Z15_P(opts) \ + (TARGET_ZARCH_P (opts->x_target_flags) && TARGET_CPU_Z15_P (opts)) #define TARGET_VXE2 \ (TARGET_VX && TARGET_CPU_VXE2) #define TARGET_VXE2_P(opts) \ diff --git a/gcc/config/s390/s390.md b/gcc/config/s390/s390.md index 714d8b00a80..5a3496ac92e 100644 --- a/gcc/config/s390/s390.md +++ b/gcc/config/s390/s390.md @@ -513,11 +513,11 @@ ;; Processor type. This attribute must exactly match the processor_type ;; enumeration in s390.h. -(define_attr "cpu" "z900,z990,z9_109,z9_ec,z10,z196,zEC12,z13,z14,arch13" +(define_attr "cpu" "z900,z990,z9_109,z9_ec,z10,z196,zEC12,z13,z14,z15" (const (symbol_ref "s390_tune_attr"))) (define_attr "cpu_facility" - "standard,ieee,zarch,cpu_zarch,longdisp,extimm,dfp,z10,z196,zEC12,vx,z13,z14,vxe,arch13,vxe2" + "standard,ieee,zarch,cpu_zarch,longdisp,extimm,dfp,z10,z196,zEC12,vx,z13,z14,vxe,z15,vxe2" (const_string "standard")) (define_attr "enabled" "" @@ -575,8 +575,8 @@ (match_test "TARGET_VXE")) (const_int 1) - (and (eq_attr "cpu_facility" "arch13") - (match_test "TARGET_ARCH13")) + (and (eq_attr "cpu_facility" "z15") + (match_test "TARGET_Z15")) (const_int 1) (and (eq_attr "cpu_facility" "vxe2") @@ -613,7 +613,7 @@ ;; Pipeline description for z14 (include "3906.md") -;; Pipeline description for arch13 +;; Pipeline description for z15 (include "8561.md") ;; Predicates @@ -642,7 +642,7 @@ (define_mode_iterator DD_DF [DF DD]) (define_mode_iterator TD_TF [TF TD]) -; 32 bit int<->fp conversion instructions are available since VXE2 (arch13). +; 32 bit int<->fp conversion instructions are available since VXE2 (z15). (define_mode_iterator VX_CONV_BFP [DF (SF "TARGET_VXE2")]) (define_mode_iterator VX_CONV_INT [DI (SI "TARGET_VXE2")]) @@ -6749,7 +6749,7 @@ stoc<g>%C1\t%3,%0 stoc<g>%D1\t%4,%0" [(set_attr "op_type" "RRF,RRF,RRF,RSY,RSY,RIE,RIE,RSY,RSY") - (set_attr "cpu_facility" "*,*,arch13,*,*,z13,z13,*,*")]) + (set_attr "cpu_facility" "*,*,z15,*,*,z13,z13,*,*")]) ;; ;;- Multiply instructions. @@ -7568,7 +7568,7 @@ (and:GPR (not:GPR (match_operand:GPR 1 "nonimmediate_operand" "")) (match_operand:GPR 2 "general_operand" ""))) (clobber (reg:CC CC_REGNUM))] - "!TARGET_ARCH13 + "!TARGET_Z15 && ! reload_completed && (GET_CODE (operands[0]) != MEM /* Ensure that s390_logical_operator_ok_p will succeed even @@ -7925,7 +7925,7 @@ (set (match_operand:GPR 0 "register_operand" "=d") (ANDOR:GPR (not:GPR (match_dup 1)) (match_dup 2)))] - "TARGET_ARCH13 && s390_match_ccmode(insn, CCTmode)" + "TARGET_Z15 && s390_match_ccmode(insn, CCTmode)" "<ANDOR:noxa>c<GPR:g>rk\t%0,%2,%1" [(set_attr "op_type" "RRF")]) @@ -7937,7 +7937,7 @@ (match_operand:GPR 2 "register_operand" "d")) (const_int 0))) (clobber (match_scratch:GPR 0 "=d"))] - "TARGET_ARCH13 && s390_match_ccmode(insn, CCTmode)" + "TARGET_Z15 && s390_match_ccmode(insn, CCTmode)" "<ANDOR:noxa>c<GPR:g>rk\t%0,%2,%1" [(set_attr "op_type" "RRF")]) @@ -7947,7 +7947,7 @@ (ANDOR:GPR (not:GPR (match_operand:GPR 1 "register_operand" "d")) (match_operand:GPR 2 "register_operand" "d"))) (clobber (reg:CC CC_REGNUM))] - "TARGET_ARCH13" + "TARGET_Z15" "<ANDOR:noxa>c<GPR:g>rk\t%0,%2,%1" [(set_attr "op_type" "RRF")]) @@ -7965,7 +7965,7 @@ (set (match_operand:GPR 0 "register_operand" "=d") (ANDOR:GPR (not:GPR (match_dup 1)) (not:GPR (match_dup 2))))] - "TARGET_ARCH13 && s390_match_ccmode(insn, CCTmode)" + "TARGET_Z15 && s390_match_ccmode(insn, CCTmode)" "n<ANDOR:inv_no><GPR:g>rk\t%0,%1,%2" [(set_attr "op_type" "RRF")]) @@ -7977,7 +7977,7 @@ (not:GPR (match_operand:GPR 2 "register_operand" "d"))) (const_int 0))) (clobber (match_scratch:GPR 0 "=d"))] - "TARGET_ARCH13 && s390_match_ccmode(insn, CCTmode)" + "TARGET_Z15 && s390_match_ccmode(insn, CCTmode)" "n<ANDOR:inv_no><GPR:g>rk\t%0,%1,%2" [(set_attr "op_type" "RRF")]) @@ -7987,7 +7987,7 @@ (ANDOR:GPR (not:GPR (match_operand:GPR 1 "register_operand" "d")) (not:GPR (match_operand:GPR 2 "register_operand" "d")))) (clobber (reg:CC CC_REGNUM))] - "TARGET_ARCH13" + "TARGET_Z15" "n<ANDOR:inv_no><GPR:g>rk\t%0,%1,%2" [(set_attr "op_type" "RRF")]) @@ -8371,7 +8371,7 @@ (set (match_operand:GPR 0 "register_operand" "=d") (xor:GPR (not:GPR (match_dup 1)) (match_dup 2)))] - "TARGET_ARCH13 && s390_match_ccmode(insn, CCTmode)" + "TARGET_Z15 && s390_match_ccmode(insn, CCTmode)" "nx<GPR:g>rk\t%0,%1,%2" [(set_attr "op_type" "RRF")]) @@ -8383,7 +8383,7 @@ (match_operand:GPR 2 "register_operand" "d"))) (const_int 0))) (clobber (match_scratch:GPR 0 "=d"))] - "TARGET_ARCH13 && s390_match_ccmode(insn, CCTmode)" + "TARGET_Z15 && s390_match_ccmode(insn, CCTmode)" "nx<GPR:g>rk\t%0,%1,%2" [(set_attr "op_type" "RRF")]) @@ -8393,7 +8393,7 @@ (not:GPR (xor:GPR (match_operand:GPR 1 "register_operand" "d") (match_operand:GPR 2 "register_operand" "d")))) (clobber (reg:CC CC_REGNUM))] - "TARGET_ARCH13" + "TARGET_Z15" "nx<GPR:g>rk\t%0,%1,%2" [(set_attr "op_type" "RRF")]) @@ -11351,34 +11351,34 @@ ; Population count instruction ; -(define_insn "*popcountdi_arch13_cc" +(define_insn "*popcountdi_z15_cc" [(set (reg CC_REGNUM) (compare (popcount:DI (match_operand:DI 1 "register_operand" "d")) (const_int 0))) (set (match_operand:DI 0 "register_operand" "=d") (match_dup 1))] - "TARGET_ARCH13 && s390_match_ccmode (insn, CCTmode)" + "TARGET_Z15 && s390_match_ccmode (insn, CCTmode)" "popcnt\t%0,%1,8" [(set_attr "op_type" "RRF")]) -(define_insn "*popcountdi_arch13_cconly" +(define_insn "*popcountdi_z15_cconly" [(set (reg CC_REGNUM) (compare (popcount:DI (match_operand:DI 1 "register_operand" "d")) (const_int 0))) (clobber (match_scratch:DI 0 "=d"))] - "TARGET_ARCH13 && s390_match_ccmode(insn, CCTmode)" + "TARGET_Z15 && s390_match_ccmode(insn, CCTmode)" "popcnt\t%0,%1,8" [(set_attr "op_type" "RRF")]) -(define_insn "*popcountdi_arch13" +(define_insn "*popcountdi_z15" [(set (match_operand:DI 0 "register_operand" "=d") (popcount:DI (match_operand:DI 1 "register_operand" "d"))) (clobber (reg:CC CC_REGNUM))] - "TARGET_ARCH13" + "TARGET_Z15" "popcnt\t%0,%1,8" [(set_attr "op_type" "RRF")]) -; The pre-arch13 popcount instruction counts the bits of op1 in 8 byte +; The pre-z15 popcount instruction counts the bits of op1 in 8 byte ; portions and stores the result in the corresponding bytes in op0. (define_insn "*popcount<mode>_z196" [(set (match_operand:INT 0 "register_operand" "=d") @@ -11422,7 +11422,7 @@ (clobber (reg:CC CC_REGNUM))])] "TARGET_Z196" { - if (!TARGET_ARCH13) + if (!TARGET_Z15) { emit_insn (gen_popcountdi2_z196 (operands[0], operands[1])); DONE; @@ -11453,7 +11453,7 @@ ; popcount always counts on the full 64 bit. With the z196 version ; counting bits per byte we just ignore the upper 4 bytes. With the -; arch13 version we have to zero out the upper 32 bits first. +; z15 version we have to zero out the upper 32 bits first. (define_expand "popcountsi2" [(set (match_dup 2) (zero_extend:DI (match_operand:SI 1 "register_operand"))) @@ -11463,7 +11463,7 @@ (subreg:SI (match_dup 3) 4))] "TARGET_Z196" { - if (!TARGET_ARCH13) + if (!TARGET_Z15) { emit_insn (gen_popcountsi2_z196 (operands[0], operands[1])); DONE; @@ -11501,7 +11501,7 @@ (subreg:HI (match_dup 3) 6))] "TARGET_Z196" { - if (!TARGET_ARCH13) + if (!TARGET_Z15) { emit_insn (gen_popcounthi2_z196 (operands[0], operands[1])); DONE; @@ -11516,7 +11516,7 @@ ; For popcount on a single byte the old z196 style popcount ; instruction is ideal. Since it anyway does a byte-wise popcount we ; just use it instead of zero extending the QImode input to DImode and -; using the arch13 popcount variant. +; using the z15 popcount variant. (define_expand "popcountqi2" [; popcnt op0, op1 (parallel [(set (match_operand:QI 0 "register_operand" "") diff --git a/gcc/config/s390/s390.opt b/gcc/config/s390/s390.opt index 639f1679a56..6a6e1f75736 100644 --- a/gcc/config/s390/s390.opt +++ b/gcc/config/s390/s390.opt @@ -110,7 +110,10 @@ EnumValue Enum(processor_type) String(arch12) Value(PROCESSOR_3906_Z14) EnumValue -Enum(processor_type) String(arch13) Value(PROCESSOR_8561_ARCH13) +Enum(processor_type) String(z15) Value(PROCESSOR_8561_Z15) + +EnumValue +Enum(processor_type) String(arch13) Value(PROCESSOR_8561_Z15) EnumValue Enum(processor_type) String(native) Value(PROCESSOR_NATIVE) DriverOnly diff --git a/gcc/config/s390/vector.md b/gcc/config/s390/vector.md index 140ef474a92..21cc76534b1 100644 --- a/gcc/config/s390/vector.md +++ b/gcc/config/s390/vector.md @@ -70,7 +70,7 @@ (define_mode_iterator V_128_NOSINGLE [V16QI V8HI V4SI V4SF V2DI V2DF]) -; 32 bit int<->fp vector conversion instructions are available since VXE2 (arch13). +; 32 bit int<->fp vector conversion instructions are available since VXE2 (z15). (define_mode_iterator VX_VEC_CONV_BFP [V2DF (V4SF "TARGET_VXE2")]) (define_mode_iterator VX_VEC_CONV_INT [V2DI (V4SI "TARGET_VXE2")]) diff --git a/gcc/config/s390/vx-builtins.md b/gcc/config/s390/vx-builtins.md index 3020bc94d3e..5ec3fb4fe58 100644 --- a/gcc/config/s390/vx-builtins.md +++ b/gcc/config/s390/vx-builtins.md @@ -2147,7 +2147,7 @@ "<vw>fmax<sdx>b\t%v0,%v1,%v2,%b3" [(set_attr "op_type" "VRR")]) -; The element reversal builtins introduced with arch13 have been made +; The element reversal builtins introduced with z15 have been made ; available also for older CPUs down to z13. (define_expand "eltswap<mode>" [(set (match_operand:VEC_HW 0 "nonimmediate_operand" "") @@ -2181,8 +2181,8 @@ vster<bhfgq>\t%v1,%v0" [(set_attr "op_type" "*,VRX,VRX")]) -; arch13 has instructions for doing element reversal from mem to reg -; or the other way around. For reg to reg or on pre arch13 machines +; z15 has instructions for doing element reversal from mem to reg +; or the other way around. For reg to reg or on pre z15 machines ; we have to emulate it with vector permute. (define_insn_and_split "*eltswap<mode>_emu" [(set (match_operand:VEC_HW 0 "nonimmediate_operand" "=vR") diff --git a/gcc/config/sh/sh.c b/gcc/config/sh/sh.c index 8dc79a764df..11440988f1a 100644 --- a/gcc/config/sh/sh.c +++ b/gcc/config/sh/sh.c @@ -664,6 +664,9 @@ static const struct attribute_spec sh_attribute_table[] = #undef TARGET_CONSTANT_ALIGNMENT #define TARGET_CONSTANT_ALIGNMENT constant_alignment_word_strings +#undef TARGET_HAVE_SPECULATION_SAFE_VALUE +#define TARGET_HAVE_SPECULATION_SAFE_VALUE speculation_safe_value_not_needed + struct gcc_target targetm = TARGET_INITIALIZER; @@ -737,7 +740,7 @@ got_mode_name:; { if (tokens[i] == "strict") ret.strict = true; - else if (tokens[i].find ("gbr-offset=") == 0) + else if (!tokens[i].compare (0, strlen ("gbr-offset="), "gbr-offset=")) { std::string offset_str = tokens[i].substr (strlen ("gbr-offset=")); ret.tcb_gbr_offset = integral_argument (offset_str.c_str ()); @@ -958,11 +961,13 @@ sh_option_override (void) if (flag_unsafe_math_optimizations) { /* Enable fsca insn for SH4A if not otherwise specified by the user. */ - if (global_options_set.x_TARGET_FSCA == 0 && TARGET_SH4A_FP) + if (global_options_set.x_TARGET_FSCA == 0 + && (TARGET_SH4A_FP || TARGET_FPU_SH4_300)) TARGET_FSCA = 1; /* Enable fsrra insn for SH4A if not otherwise specified by the user. */ - if (global_options_set.x_TARGET_FSRRA == 0 && TARGET_SH4A_FP) + if (global_options_set.x_TARGET_FSRRA == 0 + && (TARGET_SH4A_FP || TARGET_FPU_SH4_300)) TARGET_FSRRA = 1; } @@ -12087,9 +12092,11 @@ sh_extending_set_of_reg::use_as_extended_reg (rtx_insn* use_at_insn) const rtx r = gen_reg_rtx (SImode); rtx_insn* i0; if (from_mode == QImode) - i0 = emit_insn_after (gen_extendqisi2 (r, set_src), insn); + i0 = sh_check_add_incdec_notes ( + emit_insn_after (gen_extendqisi2 (r, set_src), insn)); else if (from_mode == HImode) - i0 = emit_insn_after (gen_extendhisi2 (r, set_src), insn); + i0 = sh_check_add_incdec_notes ( + emit_insn_after (gen_extendhisi2 (r, set_src), insn)); else gcc_unreachable (); @@ -12507,7 +12514,7 @@ static void sh_emit_mode_set (int entity ATTRIBUTE_UNUSED, int mode, int prev_mode, HARD_REG_SET regs_live ATTRIBUTE_UNUSED) { - if ((TARGET_SH4A_FP || TARGET_SH4_300) + if ((TARGET_SH4A_FP || TARGET_FPU_SH4_300) && prev_mode != FP_MODE_NONE && prev_mode != mode) { emit_insn (gen_toggle_pr ()); diff --git a/gcc/config/sh/sh.h b/gcc/config/sh/sh.h index 0204872eae7..9372fcfa699 100644 --- a/gcc/config/sh/sh.h +++ b/gcc/config/sh/sh.h @@ -69,6 +69,8 @@ extern int code_for_indirect_jump_scratch; FPU is disabled (which makes it compatible with SH4al-dsp). */ #define TARGET_SH4A_FP (TARGET_SH4A && TARGET_FPU_ANY) +/* True if the FPU is a SH4-300 variant. */ +#define TARGET_FPU_SH4_300 (TARGET_FPU_ANY && TARGET_SH4_300) /* This is not used by the SH2E calling convention */ #define TARGET_VARARGS_PRETEND_ARGS(FUN_DECL) \ diff --git a/gcc/config/sh/sh.md b/gcc/config/sh/sh.md index 83543778619..fdb80d5d9d6 100644 --- a/gcc/config/sh/sh.md +++ b/gcc/config/sh/sh.md @@ -9163,7 +9163,7 @@ (xor:SI (reg:SI FPSCR_REG) (const_int FPSCR_PR))) (set (reg:SI FPSCR_MODES_REG) (unspec_volatile:SI [(const_int 0)] UNSPECV_FPSCR_MODES))] - "TARGET_SH4A_FP" + "TARGET_SH4A_FP || TARGET_FPU_SH4_300" "fpchg" [(set_attr "type" "fpscr_toggle")]) @@ -9391,15 +9391,31 @@ (define_expand "negsf2" [(set (match_operand:SF 0 "fp_arith_reg_operand") (neg:SF (match_operand:SF 1 "fp_arith_reg_operand")))] - "TARGET_SH2E") + "TARGET_FPU_ANY" +{ + if (TARGET_FPU_SH4_300) + emit_insn (gen_negsf2_fpscr (operands[0], operands[1])); + else + emit_insn (gen_negsf2_no_fpscr (operands[0], operands[1])); + DONE; +}) -(define_insn "*negsf2_i" +(define_insn "negsf2_no_fpscr" [(set (match_operand:SF 0 "fp_arith_reg_operand" "=f") (neg:SF (match_operand:SF 1 "fp_arith_reg_operand" "0")))] - "TARGET_SH2E" + "TARGET_FPU_ANY && !TARGET_FPU_SH4_300" "fneg %0" [(set_attr "type" "fmove")]) +(define_insn "negsf2_fpscr" + [(set (match_operand:SF 0 "fp_arith_reg_operand" "=f") + (neg:SF (match_operand:SF 1 "fp_arith_reg_operand" "0"))) + (use (reg:SI FPSCR_MODES_REG))] + "TARGET_FPU_SH4_300" + "fneg %0" + [(set_attr "type" "fmove") + (set_attr "fp_mode" "single")]) + (define_expand "sqrtsf2" [(set (match_operand:SF 0 "fp_arith_reg_operand" "") (sqrt:SF (match_operand:SF 1 "fp_arith_reg_operand" "")))] @@ -9489,15 +9505,31 @@ (define_expand "abssf2" [(set (match_operand:SF 0 "fp_arith_reg_operand") (abs:SF (match_operand:SF 1 "fp_arith_reg_operand")))] - "TARGET_SH2E") + "TARGET_FPU_ANY" +{ + if (TARGET_FPU_SH4_300) + emit_insn (gen_abssf2_fpscr (operands[0], operands[1])); + else + emit_insn (gen_abssf2_no_fpscr (operands[0], operands[1])); + DONE; +}) -(define_insn "*abssf2_i" +(define_insn "abssf2_no_fpscr" [(set (match_operand:SF 0 "fp_arith_reg_operand" "=f") (abs:SF (match_operand:SF 1 "fp_arith_reg_operand" "0")))] - "TARGET_SH2E" + "TARGET_FPU_ANY && !TARGET_FPU_SH4_300" "fabs %0" [(set_attr "type" "fmove")]) +(define_insn "abssf2_fpscr" + [(set (match_operand:SF 0 "fp_arith_reg_operand" "=f") + (abs:SF (match_operand:SF 1 "fp_arith_reg_operand" "0"))) + (use (reg:SI FPSCR_MODES_REG))] + "TARGET_FPU_SH4_300" + "fabs %0" + [(set_attr "type" "fmove") + (set_attr "fp_mode" "single")]) + (define_expand "adddf3" [(set (match_operand:DF 0 "fp_arith_reg_operand" "") (plus:DF (match_operand:DF 1 "fp_arith_reg_operand" "") @@ -9673,12 +9705,28 @@ (define_expand "negdf2" [(set (match_operand:DF 0 "fp_arith_reg_operand") (neg:DF (match_operand:DF 1 "fp_arith_reg_operand")))] - "TARGET_FPU_DOUBLE") + "TARGET_FPU_DOUBLE" +{ + if (TARGET_FPU_SH4_300) + emit_insn (gen_negdf2_fpscr (operands[0], operands[1])); + else + emit_insn (gen_negdf2_no_fpscr (operands[0], operands[1])); + DONE; +}) -(define_insn "*negdf2_i" +(define_insn "negdf2_fpscr" + [(set (match_operand:DF 0 "fp_arith_reg_operand" "=f") + (neg:DF (match_operand:DF 1 "fp_arith_reg_operand" "0"))) + (use (reg:SI FPSCR_MODES_REG))] + "TARGET_FPU_SH4_300" + "fneg %0" + [(set_attr "type" "fmove") + (set_attr "fp_mode" "double")]) + +(define_insn "negdf2_no_fpscr" [(set (match_operand:DF 0 "fp_arith_reg_operand" "=f") (neg:DF (match_operand:DF 1 "fp_arith_reg_operand" "0")))] - "TARGET_FPU_DOUBLE" + "TARGET_FPU_DOUBLE && !TARGET_FPU_SH4_300" "fneg %0" [(set_attr "type" "fmove")]) @@ -9704,15 +9752,31 @@ (define_expand "absdf2" [(set (match_operand:DF 0 "fp_arith_reg_operand") (abs:DF (match_operand:DF 1 "fp_arith_reg_operand")))] - "TARGET_FPU_DOUBLE") + "TARGET_FPU_DOUBLE" +{ + if (TARGET_FPU_SH4_300) + emit_insn (gen_absdf2_fpscr (operands[0], operands[1])); + else + emit_insn (gen_absdf2_no_fpscr (operands[0], operands[1])); + DONE; +}) -(define_insn "*absdf2_i" +(define_insn "absdf2_no_fpscr" [(set (match_operand:DF 0 "fp_arith_reg_operand" "=f") (abs:DF (match_operand:DF 1 "fp_arith_reg_operand" "0")))] - "TARGET_FPU_DOUBLE" + "TARGET_FPU_DOUBLE && !TARGET_FPU_SH4_300" "fabs %0" [(set_attr "type" "fmove")]) +(define_insn "absdf2_fpscr" + [(set (match_operand:DF 0 "fp_arith_reg_operand" "=f") + (abs:DF (match_operand:DF 1 "fp_arith_reg_operand" "0"))) + (use (reg:SI FPSCR_MODES_REG))] + "TARGET_FPU_SH4_300" + "fabs %0" + [(set_attr "type" "fmove") + (set_attr "fp_mode" "double")]) + (define_expand "extendsfdf2" [(set (match_operand:DF 0 "fp_arith_reg_operand" "") (float_extend:DF (match_operand:SF 1 "fpul_operand" "")))] diff --git a/gcc/config/sparc/sparc-protos.h b/gcc/config/sparc/sparc-protos.h index 9bdae7b9faa..ef1adb69ede 100644 --- a/gcc/config/sparc/sparc-protos.h +++ b/gcc/config/sparc/sparc-protos.h @@ -69,6 +69,7 @@ extern void sparc_split_reg_mem (rtx, rtx, machine_mode); extern void sparc_split_mem_reg (rtx, rtx, machine_mode); extern int sparc_split_reg_reg_legitimate (rtx, rtx); extern void sparc_split_reg_reg (rtx, rtx, machine_mode); +extern const char *output_load_pcrel_sym (rtx *); extern const char *output_ubranch (rtx, rtx_insn *); extern const char *output_cbranch (rtx, rtx, int, int, int, rtx_insn *); extern const char *output_return (rtx_insn *); diff --git a/gcc/config/sparc/sparc.c b/gcc/config/sparc/sparc.c index 93479ab6bdc..ca783a68e2d 100644 --- a/gcc/config/sparc/sparc.c +++ b/gcc/config/sparc/sparc.c @@ -4243,9 +4243,11 @@ sparc_cannot_force_const_mem (machine_mode mode, rtx x) } /* Global Offset Table support. */ -static GTY(()) rtx got_helper_rtx = NULL_RTX; -static GTY(()) rtx got_register_rtx = NULL_RTX; static GTY(()) rtx got_symbol_rtx = NULL_RTX; +static GTY(()) rtx got_register_rtx = NULL_RTX; +static GTY(()) rtx got_helper_rtx = NULL_RTX; + +static GTY(()) bool got_helper_needed = false; /* Return the SYMBOL_REF for the Global Offset Table. */ @@ -4258,27 +4260,6 @@ sparc_got (void) return got_symbol_rtx; } -#ifdef HAVE_GAS_HIDDEN -# define USE_HIDDEN_LINKONCE 1 -#else -# define USE_HIDDEN_LINKONCE 0 -#endif - -static void -get_pc_thunk_name (char name[32], unsigned int regno) -{ - const char *reg_name = reg_names[regno]; - - /* Skip the leading '%' as that cannot be used in a - symbol name. */ - reg_name += 1; - - if (USE_HIDDEN_LINKONCE) - sprintf (name, "__sparc_get_pc_thunk.%s", reg_name); - else - ASM_GENERATE_INTERNAL_LABEL (name, "LADDPC", regno); -} - /* Wrapper around the load_pcrel_sym{si,di} patterns. */ static rtx @@ -4298,30 +4279,78 @@ gen_load_pcrel_sym (rtx op0, rtx op1, rtx op2) return insn; } +/* Output the load_pcrel_sym{si,di} patterns. */ + +const char * +output_load_pcrel_sym (rtx *operands) +{ + if (flag_delayed_branch) + { + output_asm_insn ("sethi\t%%hi(%a1-4), %0", operands); + output_asm_insn ("call\t%a2", operands); + output_asm_insn (" add\t%0, %%lo(%a1+4), %0", operands); + } + else + { + output_asm_insn ("sethi\t%%hi(%a1-8), %0", operands); + output_asm_insn ("add\t%0, %%lo(%a1-4), %0", operands); + output_asm_insn ("call\t%a2", operands); + output_asm_insn (" nop", NULL); + } + + if (operands[2] == got_helper_rtx) + got_helper_needed = true; + + return ""; +} + +#ifdef HAVE_GAS_HIDDEN +# define USE_HIDDEN_LINKONCE 1 +#else +# define USE_HIDDEN_LINKONCE 0 +#endif + /* Emit code to load the GOT register. */ void load_got_register (void) { - if (!got_register_rtx) - got_register_rtx = gen_rtx_REG (Pmode, GLOBAL_OFFSET_TABLE_REGNUM); + rtx insn; if (TARGET_VXWORKS_RTP) - emit_insn (gen_vxworks_load_got ()); + { + if (!got_register_rtx) + got_register_rtx = pic_offset_table_rtx; + + insn = gen_vxworks_load_got (); + } else { + if (!got_register_rtx) + got_register_rtx = gen_rtx_REG (Pmode, GLOBAL_OFFSET_TABLE_REGNUM); + /* The GOT symbol is subject to a PC-relative relocation so we need a helper function to add the PC value and thus get the final value. */ if (!got_helper_rtx) { char name[32]; - get_pc_thunk_name (name, GLOBAL_OFFSET_TABLE_REGNUM); + + /* Skip the leading '%' as that cannot be used in a symbol name. */ + if (USE_HIDDEN_LINKONCE) + sprintf (name, "__sparc_get_pc_thunk.%s", + reg_names[REGNO (got_register_rtx)] + 1); + else + ASM_GENERATE_INTERNAL_LABEL (name, "LADDPC", + REGNO (got_register_rtx)); + got_helper_rtx = gen_rtx_SYMBOL_REF (Pmode, ggc_strdup (name)); } - emit_insn (gen_load_pcrel_sym (got_register_rtx, sparc_got (), - got_helper_rtx)); + insn + = gen_load_pcrel_sym (got_register_rtx, sparc_got (), got_helper_rtx); } + + emit_insn (insn); } /* Ensure that we are not using patterns that are not OK with PIC. */ @@ -4457,7 +4486,7 @@ sparc_pic_register_p (rtx x) return true; if (!HARD_REGISTER_P (pic_offset_table_rtx) - && (HARD_REGISTER_P (x) || lra_in_progress) + && (HARD_REGISTER_P (x) || lra_in_progress || reload_in_progress) && ORIGINAL_REGNO (x) == REGNO (pic_offset_table_rtx)) return true; @@ -5486,7 +5515,7 @@ save_local_or_in_reg_p (unsigned int regno, int leaf_function) return true; /* GOT register (%l7) if needed. */ - if (regno == GLOBAL_OFFSET_TABLE_REGNUM && got_register_rtx) + if (got_register_rtx && regno == REGNO (got_register_rtx)) return true; /* If the function accesses prior frames, the frame pointer and the return @@ -12529,10 +12558,9 @@ static void sparc_file_end (void) { /* If we need to emit the special GOT helper function, do so now. */ - if (got_helper_rtx) + if (got_helper_needed) { const char *name = XSTR (got_helper_rtx, 0); - const char *reg_name = reg_names[GLOBAL_OFFSET_TABLE_REGNUM]; #ifdef DWARF2_UNWIND_INFO bool do_cfi; #endif @@ -12569,17 +12597,22 @@ sparc_file_end (void) #ifdef DWARF2_UNWIND_INFO do_cfi = dwarf2out_do_cfi_asm (); if (do_cfi) - fprintf (asm_out_file, "\t.cfi_startproc\n"); + output_asm_insn (".cfi_startproc", NULL); #endif if (flag_delayed_branch) - fprintf (asm_out_file, "\tjmp\t%%o7+8\n\t add\t%%o7, %s, %s\n", - reg_name, reg_name); + { + output_asm_insn ("jmp\t%%o7+8", NULL); + output_asm_insn (" add\t%%o7, %0, %0", &got_register_rtx); + } else - fprintf (asm_out_file, "\tadd\t%%o7, %s, %s\n\tjmp\t%%o7+8\n\t nop\n", - reg_name, reg_name); + { + output_asm_insn ("add\t%%o7, %0, %0", &got_register_rtx); + output_asm_insn ("jmp\t%%o7+8", NULL); + output_asm_insn (" nop", NULL); + } #ifdef DWARF2_UNWIND_INFO if (do_cfi) - fprintf (asm_out_file, "\t.cfi_endproc\n"); + output_asm_insn (".cfi_endproc", NULL); #endif } @@ -13085,7 +13118,10 @@ sparc_init_pic_reg (void) edge entry_edge; rtx_insn *seq; - if (!crtl->uses_pic_offset_table) + /* In PIC mode, we need to always initialize the PIC register if optimization + is enabled, because we are called from IRA and LRA may later force things + to the constant pool for optimization purposes. */ + if (!flag_pic || (!crtl->uses_pic_offset_table && !optimize)) return; start_sequence (); diff --git a/gcc/config/sparc/sparc.h b/gcc/config/sparc/sparc.h index 015065ffb81..4b09fc86b78 100644 --- a/gcc/config/sparc/sparc.h +++ b/gcc/config/sparc/sparc.h @@ -736,6 +736,13 @@ along with GCC; see the file COPYING3. If not see register window instruction in the prologue. */ #define HARD_REGNO_RENAME_OK(FROM, TO) ((FROM) != 1) +/* Select a register mode required for caller save of hard regno REGNO. + Contrary to what is documented, the default is not the smallest suitable + mode but the largest suitable mode for the given (REGNO, NREGS) pair and + it quickly creates paradoxical subregs that can be problematic. */ +#define HARD_REGNO_CALLER_SAVE_MODE(REGNO, NREGS, MODE) \ + ((MODE) == VOIDmode ? choose_hard_reg_mode (REGNO, NREGS, false) : (MODE)) + /* Specify the registers used for certain standard purposes. The values of these macros are register numbers. */ diff --git a/gcc/config/sparc/sparc.md b/gcc/config/sparc/sparc.md index 7af62d599b9..0a6e27ffa83 100644 --- a/gcc/config/sparc/sparc.md +++ b/gcc/config/sparc/sparc.md @@ -1604,10 +1604,7 @@ (clobber (reg:P O7_REG))] "REGNO (operands[0]) == INTVAL (operands[3])" { - if (flag_delayed_branch) - return "sethi\t%%hi(%a1-4), %0\n\tcall\t%a2\n\t add\t%0, %%lo(%a1+4), %0"; - else - return "sethi\t%%hi(%a1-8), %0\n\tadd\t%0, %%lo(%a1-4), %0\n\tcall\t%a2\n\t nop"; + return output_load_pcrel_sym (operands); } [(set (attr "type") (const_string "multi")) (set (attr "length") diff --git a/gcc/config/xtensa/xtensa.c b/gcc/config/xtensa/xtensa.c index 19bd616d67f..ee5612441e2 100644 --- a/gcc/config/xtensa/xtensa.c +++ b/gcc/config/xtensa/xtensa.c @@ -2865,7 +2865,8 @@ xtensa_expand_prologue (void) gen_rtx_SET (mem, reg)); } } - if (total_size > 1024) + if (total_size > 1024 + || (!callee_save_size && total_size > 128)) { rtx tmp_reg = gen_rtx_REG (Pmode, A9_REG); emit_move_insn (tmp_reg, GEN_INT (total_size - |