diff options
Diffstat (limited to 'gcc/config/s390/s390.c')
-rw-r--r-- | gcc/config/s390/s390.c | 409 |
1 files changed, 263 insertions, 146 deletions
diff --git a/gcc/config/s390/s390.c b/gcc/config/s390/s390.c index 7556b322669..411b06f7f32 100644 --- a/gcc/config/s390/s390.c +++ b/gcc/config/s390/s390.c @@ -67,7 +67,6 @@ static void s390_output_mi_thunk (FILE *, tree, HOST_WIDE_INT, HOST_WIDE_INT, tree); static enum attr_type s390_safe_attr_type (rtx); -static int s390_adjust_cost (rtx, rtx, rtx, int); static int s390_adjust_priority (rtx, int); static int s390_issue_rate (void); static int s390_first_cycle_multipass_dfa_lookahead (void); @@ -121,8 +120,6 @@ static bool s390_pass_by_reference (CUMULATIVE_ARGS *, enum machine_mode mode, #undef TARGET_ASM_CAN_OUTPUT_MI_THUNK #define TARGET_ASM_CAN_OUTPUT_MI_THUNK hook_bool_tree_hwi_hwi_tree_true -#undef TARGET_SCHED_ADJUST_COST -#define TARGET_SCHED_ADJUST_COST s390_adjust_cost #undef TARGET_SCHED_ADJUST_PRIORITY #define TARGET_SCHED_ADJUST_PRIORITY s390_adjust_priority #undef TARGET_SCHED_ISSUE_RATE @@ -257,6 +254,9 @@ struct machine_function GTY(()) /* Literal pool base register. */ rtx base_reg; + /* True if we may need to perform branch splitting. */ + bool split_branches_pending_p; + /* Some local-dynamic TLS symbol name. */ const char *some_ld_name; }; @@ -292,11 +292,13 @@ static void find_constant_pool_ref (rtx, rtx *); static void replace_constant_pool_ref (rtx *, rtx, rtx); static rtx find_ltrel_base (rtx); static void replace_ltrel_base (rtx *); -static void s390_optimize_prologue (bool); +static void s390_optimize_prologue (void); static int find_unused_clobbered_reg (void); static void s390_frame_area (int *, int *); -static void s390_register_info (int, int); -static void s390_frame_info (int, int); +static void s390_register_info (int []); +static void s390_frame_info (void); +static void s390_init_frame_layout (void); +static void s390_update_frame_layout (void); static rtx save_fpr (rtx, int, int); static rtx restore_fpr (rtx, int, int); static rtx save_gprs (rtx, int, int, int); @@ -1057,6 +1059,76 @@ s390_split_ok_p (rtx dst, rtx src, enum machine_mode mode, int first_subword) return true; } +/* Expand logical operator CODE in mode MODE with operands OPERANDS. */ + +void +s390_expand_logical_operator (enum rtx_code code, enum machine_mode mode, + rtx *operands) +{ + enum machine_mode wmode = mode; + rtx dst = operands[0]; + rtx src1 = operands[1]; + rtx src2 = operands[2]; + rtx op, clob, tem; + + /* If we cannot handle the operation directly, use a temp register. */ + if (!s390_logical_operator_ok_p (operands)) + dst = gen_reg_rtx (mode); + + /* QImode and HImode patterns make sense only if we have a destination + in memory. Otherwise perform the operation in SImode. */ + if ((mode == QImode || mode == HImode) && GET_CODE (dst) != MEM) + wmode = SImode; + + /* Widen operands if required. */ + if (mode != wmode) + { + if (GET_CODE (dst) == SUBREG + && (tem = simplify_subreg (wmode, dst, mode, 0)) != 0) + dst = tem; + else if (REG_P (dst)) + dst = gen_rtx_SUBREG (wmode, dst, 0); + else + dst = gen_reg_rtx (wmode); + + if (GET_CODE (src1) == SUBREG + && (tem = simplify_subreg (wmode, src1, mode, 0)) != 0) + src1 = tem; + else if (GET_MODE (src1) != VOIDmode) + src1 = gen_rtx_SUBREG (wmode, force_reg (mode, src1), 0); + + if (GET_CODE (src2) == SUBREG + && (tem = simplify_subreg (wmode, src2, mode, 0)) != 0) + src2 = tem; + else if (GET_MODE (src2) != VOIDmode) + src2 = gen_rtx_SUBREG (wmode, force_reg (mode, src2), 0); + } + + /* Emit the instruction. */ + op = gen_rtx_SET (VOIDmode, dst, gen_rtx_fmt_ee (code, wmode, src1, src2)); + clob = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (CCmode, CC_REGNUM)); + emit_insn (gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, op, clob))); + + /* Fix up the destination if needed. */ + if (dst != operands[0]) + emit_move_insn (operands[0], gen_lowpart (mode, dst)); +} + +/* Check whether OPERANDS are OK for a logical operation (AND, IOR, XOR). */ + +bool +s390_logical_operator_ok_p (rtx *operands) +{ + /* If the destination operand is in memory, it needs to coincide + with one of the source operands. After reload, it has to be + the first source operand. */ + if (GET_CODE (operands[0]) == MEM) + return rtx_equal_p (operands[0], operands[1]) + || (!reload_completed && rtx_equal_p (operands[0], operands[2])); + + return true; +} + /* Change optimizations to be performed, depending on the optimization level. @@ -2065,12 +2137,6 @@ legitimate_reload_constant_p (register rtx op) enum reg_class s390_preferred_reload_class (rtx op, enum reg_class class) { - /* This can happen if a floating point constant is being - reloaded into an integer register. Leave well alone. */ - if (GET_MODE_CLASS (GET_MODE (op)) == MODE_FLOAT - && class != FP_REGS) - return class; - switch (GET_CODE (op)) { /* Constants we cannot reload must be forced into the @@ -2114,7 +2180,17 @@ s390_secondary_input_reload_class (enum reg_class class ATTRIBUTE_UNUSED, enum machine_mode mode, rtx in) { if (s390_plus_operand (in, mode)) - return ADDR_REGS; + { + /* ??? Reload sometimes pushes a PLUS reload with a too-large constant. + Until reload is fixed, we need to force_const_mem while emitting the + secondary reload insn -- thus we need to make sure here that we do + have a literal pool for the current function. */ + if (CONSTANT_P (XEXP (in, 1)) + && !legitimate_reload_constant_p (XEXP (in, 1))) + current_function_uses_const_pool = true; + + return ADDR_REGS; + } return NO_REGS; } @@ -2196,6 +2272,10 @@ s390_expand_plus_operand (register rtx target, register rtx src, } if (true_regnum (sum2) < 1 || true_regnum (sum2) > 15) { + /* ??? See comment in s390_secondary_input_reload_class. */ + if (CONSTANT_P (sum2) && !legitimate_reload_constant_p (sum2)) + sum2 = force_const_mem (Pmode, sum2); + emit_move_insn (scratch, sum2); sum2 = scratch; } @@ -4167,41 +4247,6 @@ s390_agen_dep_p (rtx dep_insn, rtx insn) return 0; } -/* Return the modified cost of the dependency of instruction INSN - on instruction DEP_INSN through the link LINK. COST is the - default cost of that dependency. - - Data dependencies are all handled without delay. However, if a - register is modified and subsequently used as base or index - register of a memory reference, at least 4 cycles need to pass - between setting and using the register to avoid pipeline stalls. - An exception is the LA instruction. An address generated by LA can - be used by introducing only a one cycle stall on the pipeline. */ - -static int -s390_adjust_cost (rtx insn, rtx link, rtx dep_insn, int cost) -{ - /* If the dependence is an anti-dependence, there is no cost. For an - output dependence, there is sometimes a cost, but it doesn't seem - worth handling those few cases. */ - - if (REG_NOTE_KIND (link) != 0) - return 0; - - /* If we can't recognize the insns, we can't really do anything. */ - if (recog_memoized (insn) < 0 || recog_memoized (dep_insn) < 0) - return cost; - - /* Operand forward in case of lr, load and la. */ - if (s390_tune == PROCESSOR_2084_Z990 - && cost == 1 - && (s390_safe_attr_type (dep_insn) == TYPE_LA - || s390_safe_attr_type (dep_insn) == TYPE_LR - || s390_safe_attr_type (dep_insn) == TYPE_LOAD)) - return 0; - return cost; -} - /* A C statement (sans semicolon) to update the integer scheduling priority INSN_PRIORITY (INSN). Increase the priority to execute the INSN earlier, reduce the priority to execute INSN later. Do not define this macro if @@ -4226,6 +4271,7 @@ s390_adjust_priority (rtx insn ATTRIBUTE_UNUSED, int priority) priority = priority << 3; break; case TYPE_STORE: + case TYPE_STM: priority = priority << 1; break; default: @@ -4924,7 +4970,7 @@ s390_mainpool_start (void) } } - if (!pool->pool_insn) + if (!pool->pool_insn && pool->size > 0) abort (); if (pool->size >= 4096) @@ -4947,13 +4993,17 @@ s390_mainpool_start (void) static void s390_mainpool_finish (struct constant_pool *pool) { - rtx base_reg = SET_DEST (PATTERN (pool->pool_insn)); + rtx base_reg = cfun->machine->base_reg; rtx insn; /* If the pool is empty, we're done. */ if (pool->size == 0) { - remove_insn (pool->pool_insn); + /* We don't actually need a base register after all. */ + cfun->machine->base_reg = NULL_RTX; + + if (pool->pool_insn) + remove_insn (pool->pool_insn); s390_free_pool (pool); return; } @@ -5466,20 +5516,16 @@ s390_output_pool_entry (rtx exp, enum machine_mode mode, unsigned int align) /* Rework the prologue/epilogue to avoid saving/restoring - registers unnecessarily. BASE_USED specifies whether - the literal pool base register needs to be saved. */ + registers unnecessarily. */ static void -s390_optimize_prologue (bool base_used) +s390_optimize_prologue (void) { rtx insn, new_insn, next_insn; /* Do a final recompute of the frame-related data. */ - s390_register_info (base_used, cfun_frame_layout.save_return_addr_p); - regs_ever_live[BASE_REGNUM] = base_used; - regs_ever_live[RETURN_REGNUM] = cfun_frame_layout.save_return_addr_p; - regs_ever_live[STACK_POINTER_REGNUM] = cfun_frame_layout.frame_size > 0; + s390_update_frame_layout (); /* If all special registers are in fact used, there's nothing we can do, so no point in walking the insn list. */ @@ -5538,10 +5584,13 @@ s390_optimize_prologue (bool base_used) if (GET_CODE (PATTERN (insn)) == SET && GET_CODE (SET_SRC (PATTERN (insn))) == REG - && REGNO (SET_SRC (PATTERN (insn))) == BASE_REGNUM + && (REGNO (SET_SRC (PATTERN (insn))) == BASE_REGNUM + || (!TARGET_CPU_ZARCH + && REGNO (SET_SRC (PATTERN (insn))) == RETURN_REGNUM)) && GET_CODE (SET_DEST (PATTERN (insn))) == MEM) { set = PATTERN (insn); + first = REGNO (SET_SRC (set)); offset = const0_rtx; base = eliminate_constant_term (XEXP (SET_DEST (set), 0), &offset); off = INTVAL (offset); @@ -5555,7 +5604,7 @@ s390_optimize_prologue (bool base_used) { new_insn = save_gprs (base, off + (cfun_frame_layout.first_save_gpr - - BASE_REGNUM) * UNITS_PER_WORD, + - first) * UNITS_PER_WORD, cfun_frame_layout.first_save_gpr, cfun_frame_layout.last_save_gpr); new_insn = emit_insn_before (new_insn, insn); @@ -5601,10 +5650,13 @@ s390_optimize_prologue (bool base_used) if (GET_CODE (PATTERN (insn)) == SET && GET_CODE (SET_DEST (PATTERN (insn))) == REG - && REGNO (SET_DEST (PATTERN (insn))) == BASE_REGNUM + && (REGNO (SET_DEST (PATTERN (insn))) == BASE_REGNUM + || (!TARGET_CPU_ZARCH + && REGNO (SET_DEST (PATTERN (insn))) == RETURN_REGNUM)) && GET_CODE (SET_SRC (PATTERN (insn))) == MEM) { set = PATTERN (insn); + first = REGNO (SET_DEST (set)); offset = const0_rtx; base = eliminate_constant_term (XEXP (SET_SRC (set), 0), &offset); off = INTVAL (offset); @@ -5618,7 +5670,7 @@ s390_optimize_prologue (bool base_used) { new_insn = restore_gprs (base, off + (cfun_frame_layout.first_restore_gpr - - BASE_REGNUM) * UNITS_PER_WORD, + - first) * UNITS_PER_WORD, cfun_frame_layout.first_restore_gpr, cfun_frame_layout.last_restore_gpr); new_insn = emit_insn_before (new_insn, insn); @@ -5636,7 +5688,6 @@ s390_optimize_prologue (bool base_used) static void s390_reorg (void) { - bool base_used = false; bool pool_overflow = false; /* Make sure all splits have been performed; splits after @@ -5709,19 +5760,17 @@ s390_reorg (void) /* If we made it up to here, both conditions are satisfied. Finish up literal pool related changes. */ - if ((pool_overflow || pool->size > 0) - && REGNO (cfun->machine->base_reg) == BASE_REGNUM) - base_used = true; - if (pool_overflow) s390_chunkify_finish (pool); else s390_mainpool_finish (pool); + /* We're done splitting branches. */ + cfun->machine->split_branches_pending_p = false; break; } - s390_optimize_prologue (base_used); + s390_optimize_prologue (); } @@ -5835,14 +5884,12 @@ s390_frame_area (int *area_bottom, int *area_top) *area_top = t; } -/* Fill cfun->machine with info about register usage of current - function. BASE_USED and RETURN_ADDR_USED specify whether we assume the - base and return address register will need to be saved. */ +/* Fill cfun->machine with info about register usage of current function. + Return in LIVE_REGS which GPRs are currently considered live. */ static void -s390_register_info (int base_used, int return_addr_used) +s390_register_info (int live_regs[]) { - int live_regs[16]; int i, j; /* fprs 8 - 15 are call saved for 64 Bit ABI. */ @@ -5866,17 +5913,24 @@ s390_register_info (int base_used, int return_addr_used) live_regs[i] = regs_ever_live[i] && !global_regs[i]; if (flag_pic) - live_regs[PIC_OFFSET_TABLE_REGNUM] = - regs_ever_live[PIC_OFFSET_TABLE_REGNUM]; - - live_regs[BASE_REGNUM] = base_used; - live_regs[RETURN_REGNUM] = return_addr_used; - live_regs[STACK_POINTER_REGNUM] = (!current_function_is_leaf - || TARGET_TPF_PROFILING - || cfun_save_high_fprs_p - || get_frame_size () > 0 - || current_function_calls_alloca - || current_function_stdarg); + live_regs[PIC_OFFSET_TABLE_REGNUM] + = regs_ever_live[PIC_OFFSET_TABLE_REGNUM]; + + live_regs[BASE_REGNUM] + = cfun->machine->base_reg + && REGNO (cfun->machine->base_reg) == BASE_REGNUM; + + live_regs[RETURN_REGNUM] + = cfun->machine->split_branches_pending_p + || cfun_frame_layout.save_return_addr_p; + + live_regs[STACK_POINTER_REGNUM] + = !current_function_is_leaf + || TARGET_TPF_PROFILING + || cfun_save_high_fprs_p + || get_frame_size () > 0 + || current_function_calls_alloca + || current_function_stdarg; for (i = 6; i < 16; i++) if (live_regs[i]) @@ -5924,19 +5978,14 @@ s390_register_info (int base_used, int return_addr_used) cfun_set_fpr_bit (i); } -/* Fill cfun->machine with info about frame of current - function. BASE_USED and RETURN_ADDR_USED specify whether we assume the - base and return address register will need to be saved. */ +/* Fill cfun->machine with info about frame of current function. */ static void -s390_frame_info (int base_used, int return_addr_used) +s390_frame_info (void) { int i; cfun_frame_layout.frame_size = get_frame_size (); - - s390_register_info (base_used, return_addr_used); - if (!TARGET_64BIT && cfun_frame_layout.frame_size > 0x7fff0000) fatal_error ("Total size of local variables exceeds architecture limit."); @@ -6040,37 +6089,136 @@ s390_frame_info (int base_used, int return_addr_used) } } -/* Return offset between argument pointer and frame pointer - initially after prologue. */ +/* Generate frame layout. Fills in register and frame data for the current + function in cfun->machine. This routine can be called multiple times; + it will re-do the complete frame layout every time. */ -HOST_WIDE_INT -s390_arg_frame_offset (void) +static void +s390_init_frame_layout (void) +{ + HOST_WIDE_INT frame_size; + int base_used; + int live_regs[16]; + + /* If return address register is explicitly used, we need to save it. */ + if (regs_ever_live[RETURN_REGNUM] + || !current_function_is_leaf + || TARGET_TPF_PROFILING + || current_function_stdarg + || current_function_calls_eh_return) + cfun_frame_layout.save_return_addr_p = true; + + /* On S/390 machines, we may need to perform branch splitting, which + will require both base and return address register. We have no + choice but to assume we're going to need them until right at the + end of the machine dependent reorg phase. */ + if (!TARGET_CPU_ZARCH) + cfun->machine->split_branches_pending_p = true; + + do + { + frame_size = cfun_frame_layout.frame_size; + + /* Try to predict whether we'll need the base register. */ + base_used = cfun->machine->split_branches_pending_p + || current_function_uses_const_pool + || (!DISP_IN_RANGE (-frame_size) + && !CONST_OK_FOR_CONSTRAINT_P (-frame_size, 'K', "K")); + + /* Decide which register to use as literal pool base. In small + leaf functions, try to use an unused call-clobbered register + as base register to avoid save/restore overhead. */ + if (!base_used) + cfun->machine->base_reg = NULL_RTX; + else if (current_function_is_leaf && !regs_ever_live[5]) + cfun->machine->base_reg = gen_rtx_REG (Pmode, 5); + else + cfun->machine->base_reg = gen_rtx_REG (Pmode, BASE_REGNUM); + + s390_register_info (live_regs); + s390_frame_info (); + } + while (frame_size != cfun_frame_layout.frame_size); +} + +/* Update frame layout. Recompute actual register save data based on + current info and update regs_ever_live for the special registers. + May be called multiple times, but may never cause *more* registers + to be saved than s390_init_frame_layout allocated room for. */ + +static void +s390_update_frame_layout (void) +{ + int live_regs[16]; + + s390_register_info (live_regs); + + regs_ever_live[BASE_REGNUM] = live_regs[BASE_REGNUM]; + regs_ever_live[RETURN_REGNUM] = live_regs[RETURN_REGNUM]; + regs_ever_live[STACK_POINTER_REGNUM] = live_regs[STACK_POINTER_REGNUM]; + + if (cfun->machine->base_reg) + regs_ever_live[REGNO (cfun->machine->base_reg)] = 1; +} + +/* Return true if register FROM can be eliminated via register TO. */ + +bool +s390_can_eliminate (int from, int to) { - /* See the comment in s390_emit_prologue about the assumptions we make - whether or not the base and return address register need to be saved. */ - int return_addr_used = !current_function_is_leaf - || TARGET_TPF_PROFILING - || regs_ever_live[RETURN_REGNUM] - || cfun_frame_layout.save_return_addr_p; + gcc_assert (to == STACK_POINTER_REGNUM + || to == HARD_FRAME_POINTER_REGNUM); + + gcc_assert (from == FRAME_POINTER_REGNUM + || from == ARG_POINTER_REGNUM + || from == RETURN_ADDRESS_POINTER_REGNUM); - s390_frame_info (1, !TARGET_CPU_ZARCH || return_addr_used); + /* Make sure we actually saved the return address. */ + if (from == RETURN_ADDRESS_POINTER_REGNUM) + if (!current_function_calls_eh_return + && !current_function_stdarg + && !cfun_frame_layout.save_return_addr_p) + return false; - return cfun_frame_layout.frame_size + STACK_POINTER_OFFSET; + return true; } -/* Return offset between return address pointer (location of r14 - on the stack) and frame pointer initially after prologue. */ +/* Return offset between register FROM and TO initially after prolog. */ HOST_WIDE_INT -s390_return_address_offset (void) +s390_initial_elimination_offset (int from, int to) { - s390_frame_info (1, 1); + HOST_WIDE_INT offset; + int index; - if (cfun_frame_layout.last_save_gpr < RETURN_REGNUM) - abort (); + /* ??? Why are we called for non-eliminable pairs? */ + if (!s390_can_eliminate (from, to)) + return 0; + + switch (from) + { + case FRAME_POINTER_REGNUM: + offset = 0; + break; + + case ARG_POINTER_REGNUM: + s390_init_frame_layout (); + offset = cfun_frame_layout.frame_size + STACK_POINTER_OFFSET; + break; + + case RETURN_ADDRESS_POINTER_REGNUM: + s390_init_frame_layout (); + index = RETURN_REGNUM - cfun_frame_layout.first_save_gpr; + gcc_assert (index >= 0); + offset = cfun_frame_layout.frame_size + cfun_frame_layout.gprs_offset; + offset += index * UNITS_PER_WORD; + break; + + default: + gcc_unreachable (); + } - return (cfun_frame_layout.frame_size + cfun_frame_layout.gprs_offset - + (RETURN_REGNUM - cfun_frame_layout.first_save_gpr) * UNITS_PER_WORD); + return offset; } /* Emit insn to save fpr REGNUM at offset OFFSET relative @@ -6259,41 +6407,9 @@ s390_emit_prologue (void) int offset; int next_fpr = 0; - /* At this point, we decide whether we'll need to save/restore the - return address register. This decision is final on zSeries machines; - on S/390 it can still be overridden in s390_split_branches. */ - - if (!current_function_is_leaf - || TARGET_TPF_PROFILING - || regs_ever_live[RETURN_REGNUM]) - cfun_frame_layout.save_return_addr_p = 1; - - /* Decide which register to use as literal pool base. In small leaf - functions, try to use an unused call-clobbered register as base - register to avoid save/restore overhead. */ - - if (current_function_is_leaf && !regs_ever_live[5]) - cfun->machine->base_reg = gen_rtx_REG (Pmode, 5); - else - cfun->machine->base_reg = gen_rtx_REG (Pmode, BASE_REGNUM); - - regs_ever_live[REGNO (cfun->machine->base_reg)] = 1; - - /* Compute frame info. Note that at this point, we assume the base - register and -on S/390- the return register always need to be saved. - This is done because the usage of these registers might change even - after the prologue was emitted. If it turns out later that we really - don't need them, the prologue/epilogue code is modified again. */ - - s390_frame_info (1, !TARGET_CPU_ZARCH - || cfun_frame_layout.save_return_addr_p); - - /* We need to update regs_ever_live to avoid data-flow problems. */ + /* Complete frame layout. */ - regs_ever_live[BASE_REGNUM] = 1; - regs_ever_live[RETURN_REGNUM] = (!TARGET_CPU_ZARCH - || cfun_frame_layout.save_return_addr_p); - regs_ever_live[STACK_POINTER_REGNUM] = cfun_frame_layout.frame_size > 0; + s390_update_frame_layout (); /* Annotate all constant pool references to let the scheduler know they implicitly use the base register. */ @@ -6326,7 +6442,8 @@ s390_emit_prologue (void) /* Dummy insn to mark literal pool slot. */ - emit_insn (gen_main_pool (cfun->machine->base_reg)); + if (cfun->machine->base_reg) + emit_insn (gen_main_pool (cfun->machine->base_reg)); offset = cfun_frame_layout.f0_offset; |