diff options
Diffstat (limited to 'gcc/config/arm/arm.c')
-rw-r--r-- | gcc/config/arm/arm.c | 602 |
1 files changed, 450 insertions, 152 deletions
diff --git a/gcc/config/arm/arm.c b/gcc/config/arm/arm.c index 9cb272c323e..7b01afb3136 100644 --- a/gcc/config/arm/arm.c +++ b/gcc/config/arm/arm.c @@ -40,6 +40,7 @@ #include "function.h" #include "expr.h" #include "optabs.h" +#include "diagnostic-core.h" #include "toplev.h" #include "recog.h" #include "cgraph.h" @@ -3183,13 +3184,82 @@ arm_gen_constant (enum rtx_code code, enum machine_mode mode, rtx cond, immediate value easier to load. */ enum rtx_code -arm_canonicalize_comparison (enum rtx_code code, enum machine_mode mode, - rtx * op1) +arm_canonicalize_comparison (enum rtx_code code, rtx *op0, rtx *op1) { - unsigned HOST_WIDE_INT i = INTVAL (*op1); - unsigned HOST_WIDE_INT maxval; + enum machine_mode mode; + unsigned HOST_WIDE_INT i, maxval; + + mode = GET_MODE (*op0); + if (mode == VOIDmode) + mode = GET_MODE (*op1); + maxval = (((unsigned HOST_WIDE_INT) 1) << (GET_MODE_BITSIZE(mode) - 1)) - 1; + /* For DImode, we have GE/LT/GEU/LTU comparisons. In ARM mode + we can also use cmp/cmpeq for GTU/LEU. GT/LE must be either + reversed or (for constant OP1) adjusted to GE/LT. Similarly + for GTU/LEU in Thumb mode. */ + if (mode == DImode) + { + rtx tem; + + /* To keep things simple, always use the Cirrus cfcmp64 if it is + available. */ + if (TARGET_ARM && TARGET_HARD_FLOAT && TARGET_MAVERICK) + return code; + + if (code == GT || code == LE + || (!TARGET_ARM && (code == GTU || code == LEU))) + { + /* Missing comparison. First try to use an available + comparison. */ + if (GET_CODE (*op1) == CONST_INT) + { + i = INTVAL (*op1); + switch (code) + { + case GT: + case LE: + if (i != maxval + && arm_const_double_by_immediates (GEN_INT (i + 1))) + { + *op1 = GEN_INT (i + 1); + return code == GT ? GE : LT; + } + break; + case GTU: + case LEU: + if (i != ~((unsigned HOST_WIDE_INT) 0) + && arm_const_double_by_immediates (GEN_INT (i + 1))) + { + *op1 = GEN_INT (i + 1); + return code == GTU ? GEU : LTU; + } + break; + default: + gcc_unreachable (); + } + } + + /* If that did not work, reverse the condition. */ + tem = *op0; + *op0 = *op1; + *op1 = tem; + return swap_condition (code); + } + + return code; + } + + /* Comparisons smaller than DImode. Only adjust comparisons against + an out-of-range constant. */ + if (GET_CODE (*op1) != CONST_INT + || const_ok_for_arm (INTVAL (*op1)) + || const_ok_for_arm (- INTVAL (*op1))) + return code; + + i = INTVAL (*op1); + switch (code) { case EQ: @@ -3466,7 +3536,7 @@ arm_return_in_memory (const_tree type, const_tree fntype) have been created by C++. */ for (field = TYPE_FIELDS (type); field && TREE_CODE (field) != FIELD_DECL; - field = TREE_CHAIN (field)) + field = DECL_CHAIN (field)) continue; if (field == NULL) @@ -3485,9 +3555,9 @@ arm_return_in_memory (const_tree type, const_tree fntype) /* Now check the remaining fields, if any. Only bitfields are allowed, since they are not addressable. */ - for (field = TREE_CHAIN (field); + for (field = DECL_CHAIN (field); field; - field = TREE_CHAIN (field)) + field = DECL_CHAIN (field)) { if (TREE_CODE (field) != FIELD_DECL) continue; @@ -3507,7 +3577,7 @@ arm_return_in_memory (const_tree type, const_tree fntype) integral, or can be returned in an integer register. */ for (field = TYPE_FIELDS (type); field; - field = TREE_CHAIN (field)) + field = DECL_CHAIN (field)) { if (TREE_CODE (field) != FIELD_DECL) continue; @@ -3767,7 +3837,7 @@ aapcs_vfp_sub_candidate (const_tree type, enum machine_mode *modep) if (!COMPLETE_TYPE_P(type)) return -1; - for (field = TYPE_FIELDS (type); field; field = TREE_CHAIN (field)) + for (field = TYPE_FIELDS (type); field; field = DECL_CHAIN (field)) { if (TREE_CODE (field) != FIELD_DECL) continue; @@ -3799,7 +3869,7 @@ aapcs_vfp_sub_candidate (const_tree type, enum machine_mode *modep) if (!COMPLETE_TYPE_P(type)) return -1; - for (field = TYPE_FIELDS (type); field; field = TREE_CHAIN (field)) + for (field = TYPE_FIELDS (type); field; field = DECL_CHAIN (field)) { if (TREE_CODE (field) != FIELD_DECL) continue; @@ -6214,6 +6284,7 @@ static inline int thumb1_rtx_costs (rtx x, enum rtx_code code, enum rtx_code outer) { enum machine_mode mode = GET_MODE (x); + int total; switch (code) { @@ -6312,24 +6383,20 @@ thumb1_rtx_costs (rtx x, enum rtx_code code, enum rtx_code outer) return 14; return 2; + case SIGN_EXTEND: case ZERO_EXTEND: - /* XXX still guessing. */ - switch (GET_MODE (XEXP (x, 0))) - { - case QImode: - return (1 + (mode == DImode ? 4 : 0) - + (GET_CODE (XEXP (x, 0)) == MEM ? 10 : 0)); + total = mode == DImode ? COSTS_N_INSNS (1) : 0; + total += thumb1_rtx_costs (XEXP (x, 0), GET_CODE (XEXP (x, 0)), code); - case HImode: - return (4 + (mode == DImode ? 4 : 0) - + (GET_CODE (XEXP (x, 0)) == MEM ? 10 : 0)); + if (mode == SImode) + return total; - case SImode: - return (1 + (GET_CODE (XEXP (x, 0)) == MEM ? 10 : 0)); + if (arm_arch6) + return total + COSTS_N_INSNS (1); - default: - return 99; - } + /* Assume a two-shift sequence. Increase the cost slightly so + we prefer actual shifts over an extend operation. */ + return total + 1 + COSTS_N_INSNS (2); default: return 99; @@ -6798,44 +6865,39 @@ arm_rtx_costs_1 (rtx x, enum rtx_code outer, int* total, bool speed) return false; case SIGN_EXTEND: - if (GET_MODE_CLASS (mode) == MODE_INT) - { - *total = 0; - if (mode == DImode) - *total += COSTS_N_INSNS (1); - - if (GET_MODE (XEXP (x, 0)) != SImode) - { - if (arm_arch6) - { - if (GET_CODE (XEXP (x, 0)) != MEM) - *total += COSTS_N_INSNS (1); - } - else if (!arm_arch4 || GET_CODE (XEXP (x, 0)) != MEM) - *total += COSTS_N_INSNS (2); - } - - return false; - } - - /* Fall through */ case ZERO_EXTEND: *total = 0; if (GET_MODE_CLASS (mode) == MODE_INT) { + rtx op = XEXP (x, 0); + enum machine_mode opmode = GET_MODE (op); + if (mode == DImode) *total += COSTS_N_INSNS (1); - if (GET_MODE (XEXP (x, 0)) != SImode) + if (opmode != SImode) { - if (arm_arch6) + if (MEM_P (op)) { - if (GET_CODE (XEXP (x, 0)) != MEM) - *total += COSTS_N_INSNS (1); + /* If !arm_arch4, we use one of the extendhisi2_mem + or movhi_bytes patterns for HImode. For a QImode + sign extension, we first zero-extend from memory + and then perform a shift sequence. */ + if (!arm_arch4 && (opmode != QImode || code == SIGN_EXTEND)) + *total += COSTS_N_INSNS (2); } - else if (!arm_arch4 || GET_CODE (XEXP (x, 0)) != MEM) - *total += COSTS_N_INSNS (GET_MODE (XEXP (x, 0)) == QImode ? - 1 : 2); + else if (arm_arch6) + *total += COSTS_N_INSNS (1); + + /* We don't have the necessary insn, so we need to perform some + other operation. */ + else if (TARGET_ARM && code == ZERO_EXTEND && mode == QImode) + /* An and with constant 255. */ + *total += COSTS_N_INSNS (1); + else + /* A shift sequence. Increase costs slightly to avoid + combining two shifts into an extend operation. */ + *total += COSTS_N_INSNS (2) + 1; } return false; @@ -7191,41 +7253,8 @@ arm_size_rtx_costs (rtx x, enum rtx_code code, enum rtx_code outer_code, return false; case SIGN_EXTEND: - *total = 0; - if (GET_MODE_SIZE (GET_MODE (XEXP (x, 0))) < 4) - { - if (!(arm_arch4 && MEM_P (XEXP (x, 0)))) - *total += COSTS_N_INSNS (arm_arch6 ? 1 : 2); - } - if (mode == DImode) - *total += COSTS_N_INSNS (1); - return false; - case ZERO_EXTEND: - *total = 0; - if (!(arm_arch4 && MEM_P (XEXP (x, 0)))) - { - switch (GET_MODE (XEXP (x, 0))) - { - case QImode: - *total += COSTS_N_INSNS (1); - break; - - case HImode: - *total += COSTS_N_INSNS (arm_arch6 ? 1 : 2); - - case SImode: - break; - - default: - *total += COSTS_N_INSNS (2); - } - } - - if (mode == DImode) - *total += COSTS_N_INSNS (1); - - return false; + return arm_rtx_costs_1 (x, outer_code, total, 0); case CONST_INT: if (const_ok_for_arm (INTVAL (x))) @@ -8250,8 +8279,7 @@ neon_vdup_constant (rtx vals) load. */ x = copy_to_mode_reg (inner_mode, XVECEXP (vals, 0, 0)); - return gen_rtx_UNSPEC (mode, gen_rtvec (1, x), - UNSPEC_VDUP_N); + return gen_rtx_VEC_DUPLICATE (mode, x); } /* Generate code to load VALS, which is a PARALLEL containing only @@ -8347,8 +8375,7 @@ neon_expand_vector_init (rtx target, rtx vals) { x = copy_to_mode_reg (inner_mode, XVECEXP (vals, 0, 0)); emit_insn (gen_rtx_SET (VOIDmode, target, - gen_rtx_UNSPEC (mode, gen_rtvec (1, x), - UNSPEC_VDUP_N))); + gen_rtx_VEC_DUPLICATE (mode, x))); return; } @@ -8357,7 +8384,7 @@ neon_expand_vector_init (rtx target, rtx vals) if (n_var == 1) { rtx copy = copy_rtx (vals); - rtvec ops; + rtx index = GEN_INT (one_var); /* Load constant part of vector, substitute neighboring value for varying element. */ @@ -8366,9 +8393,38 @@ neon_expand_vector_init (rtx target, rtx vals) /* Insert variable. */ x = copy_to_mode_reg (inner_mode, XVECEXP (vals, 0, one_var)); - ops = gen_rtvec (3, x, target, GEN_INT (one_var)); - emit_insn (gen_rtx_SET (VOIDmode, target, - gen_rtx_UNSPEC (mode, ops, UNSPEC_VSET_LANE))); + switch (mode) + { + case V8QImode: + emit_insn (gen_neon_vset_lanev8qi (target, x, target, index)); + break; + case V16QImode: + emit_insn (gen_neon_vset_lanev16qi (target, x, target, index)); + break; + case V4HImode: + emit_insn (gen_neon_vset_lanev4hi (target, x, target, index)); + break; + case V8HImode: + emit_insn (gen_neon_vset_lanev8hi (target, x, target, index)); + break; + case V2SImode: + emit_insn (gen_neon_vset_lanev2si (target, x, target, index)); + break; + case V4SImode: + emit_insn (gen_neon_vset_lanev4si (target, x, target, index)); + break; + case V2SFmode: + emit_insn (gen_neon_vset_lanev2sf (target, x, target, index)); + break; + case V4SFmode: + emit_insn (gen_neon_vset_lanev4sf (target, x, target, index)); + break; + case V2DImode: + emit_insn (gen_neon_vset_lanev2di (target, x, target, index)); + break; + default: + gcc_unreachable (); + } return; } @@ -10028,6 +10084,55 @@ arm_select_cc_mode (enum rtx_code op, rtx x, rtx y) && (rtx_equal_p (XEXP (x, 0), y) || rtx_equal_p (XEXP (x, 1), y))) return CC_Cmode; + if (GET_MODE (x) == DImode || GET_MODE (y) == DImode) + { + /* To keep things simple, always use the Cirrus cfcmp64 if it is + available. */ + if (TARGET_ARM && TARGET_HARD_FLOAT && TARGET_MAVERICK) + return CCmode; + + switch (op) + { + case EQ: + case NE: + /* A DImode comparison against zero can be implemented by + or'ing the two halves together. */ + if (y == const0_rtx) + return CC_Zmode; + + /* We can do an equality test in three Thumb instructions. */ + if (!TARGET_ARM) + return CC_Zmode; + + /* FALLTHROUGH */ + + case LTU: + case LEU: + case GTU: + case GEU: + /* DImode unsigned comparisons can be implemented by cmp + + cmpeq without a scratch register. Not worth doing in + Thumb-2. */ + if (TARGET_ARM) + return CC_CZmode; + + /* FALLTHROUGH */ + + case LT: + case LE: + case GT: + case GE: + /* DImode signed and unsigned comparisons can be implemented + by cmp + sbcs with a scratch register, but that does not + set the Z flag - we must reverse GT/LE/GTU/LEU. */ + gcc_assert (op != EQ && op != NE); + return CC_NCVmode; + + default: + gcc_unreachable (); + } + } + return CCmode; } @@ -10037,10 +10142,39 @@ arm_select_cc_mode (enum rtx_code op, rtx x, rtx y) rtx arm_gen_compare_reg (enum rtx_code code, rtx x, rtx y) { - enum machine_mode mode = SELECT_CC_MODE (code, x, y); - rtx cc_reg = gen_rtx_REG (mode, CC_REGNUM); + enum machine_mode mode; + rtx cc_reg; + int dimode_comparison = GET_MODE (x) == DImode || GET_MODE (y) == DImode; + + /* We might have X as a constant, Y as a register because of the predicates + used for cmpdi. If so, force X to a register here. */ + if (dimode_comparison && !REG_P (x)) + x = force_reg (DImode, x); - emit_set_insn (cc_reg, gen_rtx_COMPARE (mode, x, y)); + mode = SELECT_CC_MODE (code, x, y); + cc_reg = gen_rtx_REG (mode, CC_REGNUM); + + if (dimode_comparison + && !(TARGET_HARD_FLOAT && TARGET_MAVERICK) + && mode != CC_CZmode) + { + rtx clobber, set; + + /* To compare two non-zero values for equality, XOR them and + then compare against zero. Not used for ARM mode; there + CC_CZmode is cheaper. */ + if (mode == CC_Zmode && y != const0_rtx) + { + x = expand_binop (DImode, xor_optab, x, y, NULL_RTX, 0, OPTAB_WIDEN); + y = const0_rtx; + } + /* A scratch register is required. */ + clobber = gen_rtx_CLOBBER (VOIDmode, gen_rtx_SCRATCH (SImode)); + set = gen_rtx_SET (VOIDmode, cc_reg, gen_rtx_COMPARE (mode, x, y)); + emit_insn (gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, set, clobber))); + } + else + emit_set_insn (cc_reg, gen_rtx_COMPARE (mode, x, y)); return cc_reg; } @@ -11369,6 +11503,34 @@ arm_const_double_by_parts (rtx val) return false; } +/* Return true if it is possible to inline both the high and low parts + of a 64-bit constant into 32-bit data processing instructions. */ +bool +arm_const_double_by_immediates (rtx val) +{ + enum machine_mode mode = GET_MODE (val); + rtx part; + + if (mode == VOIDmode) + mode = DImode; + + part = gen_highpart_mode (SImode, mode, val); + + gcc_assert (GET_CODE (part) == CONST_INT); + + if (!const_ok_for_arm (INTVAL (part))) + return false; + + part = gen_lowpart (SImode, val); + + gcc_assert (GET_CODE (part) == CONST_INT); + + if (!const_ok_for_arm (INTVAL (part))) + return false; + + return true; +} + /* Scan INSN and note any of its operands that need fixing. If DO_PUSHES is false we do not actually push any of the fixups needed. The function returns TRUE if any fixups were needed/pushed. @@ -12698,6 +12860,56 @@ output_move_neon (rtx *operands) return ""; } +/* Compute and return the length of neon_mov<mode>, where <mode> is + one of VSTRUCT modes: EI, OI, CI or XI. */ +int +arm_attr_length_move_neon (rtx insn) +{ + rtx reg, mem, addr; + int load; + enum machine_mode mode; + + extract_insn_cached (insn); + + if (REG_P (recog_data.operand[0]) && REG_P (recog_data.operand[1])) + { + mode = GET_MODE (recog_data.operand[0]); + switch (mode) + { + case EImode: + case OImode: + return 8; + case CImode: + return 12; + case XImode: + return 16; + default: + gcc_unreachable (); + } + } + + load = REG_P (recog_data.operand[0]); + reg = recog_data.operand[!load]; + mem = recog_data.operand[load]; + + gcc_assert (MEM_P (mem)); + + mode = GET_MODE (reg); + addr = XEXP (mem, 0); + + /* Strip off const from addresses like (const (plus (...))). */ + if (GET_CODE (addr) == CONST && GET_CODE (XEXP (addr, 0)) == PLUS) + addr = XEXP (addr, 0); + + if (GET_CODE (addr) == LABEL_REF || GET_CODE (addr) == PLUS) + { + int insns = HARD_REGNO_NREGS (REGNO (reg), mode) / 2; + return insns * 4; + } + else + return 4; +} + /* Output an ADD r, s, #n where n may be too big for one instruction. If adding zero to one register, output nothing. */ const char * @@ -14479,7 +14691,8 @@ arm_get_frame_offsets (void) generates better code on Thumb-2 by avoiding the need to use 32-bit push/pop instructions. */ if (!crtl->tail_call_emit - && arm_size_return_regs () <= 12) + && arm_size_return_regs () <= 12 + && (offsets->saved_regs_mask & (1 << 3)) == 0) { reg = 3; } @@ -15270,8 +15483,18 @@ arm_print_operand (FILE *stream, rtx x, int code) the value being loaded is big-wordian or little-wordian. The order of the two register loads can matter however, if the address of the memory location is actually held in one of the registers - being overwritten by the load. */ + being overwritten by the load. + + The 'Q' and 'R' constraints are also available for 64-bit + constants. */ case 'Q': + if (GET_CODE (x) == CONST_INT || GET_CODE (x) == CONST_DOUBLE) + { + rtx part = gen_lowpart (SImode, x); + fprintf (stream, "#" HOST_WIDE_INT_PRINT_DEC, INTVAL (part)); + return; + } + if (GET_CODE (x) != REG || REGNO (x) > LAST_ARM_REGNUM) { output_operand_lossage ("invalid operand for code '%c'", code); @@ -15282,6 +15505,18 @@ arm_print_operand (FILE *stream, rtx x, int code) return; case 'R': + if (GET_CODE (x) == CONST_INT || GET_CODE (x) == CONST_DOUBLE) + { + enum machine_mode mode = GET_MODE (x); + rtx part; + + if (mode == VOIDmode) + mode = DImode; + part = gen_highpart_mode (SImode, mode, x); + fprintf (stream, "#" HOST_WIDE_INT_PRINT_DEC, INTVAL (part)); + return; + } + if (GET_CODE (x) != REG || REGNO (x) > LAST_ARM_REGNUM) { output_operand_lossage ("invalid operand for code '%c'", code); @@ -16102,11 +16337,33 @@ get_arm_condition_code (rtx comparison) case CC_Cmode: switch (comp_code) - { - case LTU: return ARM_CS; - case GEU: return ARM_CC; - default: gcc_unreachable (); - } + { + case LTU: return ARM_CS; + case GEU: return ARM_CC; + default: gcc_unreachable (); + } + + case CC_CZmode: + switch (comp_code) + { + case NE: return ARM_NE; + case EQ: return ARM_EQ; + case GEU: return ARM_CS; + case GTU: return ARM_HI; + case LEU: return ARM_LS; + case LTU: return ARM_CC; + default: gcc_unreachable (); + } + + case CC_NCVmode: + switch (comp_code) + { + case GE: return ARM_GE; + case LT: return ARM_LT; + case GEU: return ARM_CS; + case LTU: return ARM_CC; + default: gcc_unreachable (); + } case CCmode: switch (comp_code) @@ -19309,6 +19566,81 @@ is_called_in_ARM_mode (tree func) #endif } +/* Given the stack offsets and register mask in OFFSETS, decide how + many additional registers to push instead of subtracting a constant + from SP. For epilogues the principle is the same except we use pop. + FOR_PROLOGUE indicates which we're generating. */ +static int +thumb1_extra_regs_pushed (arm_stack_offsets *offsets, bool for_prologue) +{ + HOST_WIDE_INT amount; + unsigned long live_regs_mask = offsets->saved_regs_mask; + /* Extract a mask of the ones we can give to the Thumb's push/pop + instruction. */ + unsigned long l_mask = live_regs_mask & (for_prologue ? 0x40ff : 0xff); + /* Then count how many other high registers will need to be pushed. */ + unsigned long high_regs_pushed = bit_count (live_regs_mask & 0x0f00); + int n_free, reg_base; + + if (!for_prologue && frame_pointer_needed) + amount = offsets->locals_base - offsets->saved_regs; + else + amount = offsets->outgoing_args - offsets->saved_regs; + + /* If the stack frame size is 512 exactly, we can save one load + instruction, which should make this a win even when optimizing + for speed. */ + if (!optimize_size && amount != 512) + return 0; + + /* Can't do this if there are high registers to push. */ + if (high_regs_pushed != 0) + return 0; + + /* Shouldn't do it in the prologue if no registers would normally + be pushed at all. In the epilogue, also allow it if we'll have + a pop insn for the PC. */ + if (l_mask == 0 + && (for_prologue + || TARGET_BACKTRACE + || (live_regs_mask & 1 << LR_REGNUM) == 0 + || TARGET_INTERWORK + || crtl->args.pretend_args_size != 0)) + return 0; + + /* Don't do this if thumb_expand_prologue wants to emit instructions + between the push and the stack frame allocation. */ + if (for_prologue + && ((flag_pic && arm_pic_register != INVALID_REGNUM) + || (!frame_pointer_needed && CALLER_INTERWORKING_SLOT_SIZE > 0))) + return 0; + + reg_base = 0; + n_free = 0; + if (!for_prologue) + { + reg_base = arm_size_return_regs () / UNITS_PER_WORD; + live_regs_mask >>= reg_base; + } + + while (reg_base + n_free < 8 && !(live_regs_mask & 1) + && (for_prologue || call_used_regs[reg_base + n_free])) + { + live_regs_mask >>= 1; + n_free++; + } + + if (n_free == 0) + return 0; + gcc_assert (amount / 4 * 4 == amount); + + if (amount >= 512 && (amount - n_free * 4) < 512) + return (amount - 508) / 4; + if (amount <= n_free * 4) + return amount / 4; + return 0; +} + /* The bits which aren't usefully expanded as rtl. */ const char * thumb_unexpanded_epilogue (void) @@ -19317,6 +19649,7 @@ thumb_unexpanded_epilogue (void) int regno; unsigned long live_regs_mask = 0; int high_regs_pushed = 0; + int extra_pop; int had_to_push_lr; int size; @@ -19336,6 +19669,13 @@ thumb_unexpanded_epilogue (void) the register is used to hold a return value. */ size = arm_size_return_regs (); + extra_pop = thumb1_extra_regs_pushed (offsets, false); + if (extra_pop > 0) + { + unsigned long extra_mask = (1 << extra_pop) - 1; + live_regs_mask |= extra_mask << (size / UNITS_PER_WORD); + } + /* The prolog may have pushed some high registers to use as work registers. e.g. the testsuite file: gcc/testsuite/gcc/gcc.c-torture/execute/complex-2.c @@ -19419,7 +19759,9 @@ thumb_unexpanded_epilogue (void) live_regs_mask); /* We have either just popped the return address into the - PC or it is was kept in LR for the entire function. */ + PC or it is was kept in LR for the entire function. + Note that thumb_pushpop has already called thumb_exit if the + PC was in the list. */ if (!had_to_push_lr) thumb_exit (asm_out_file, LR_REGNUM); } @@ -19565,51 +19907,6 @@ thumb_compute_initial_elimination_offset (unsigned int from, unsigned int to) } } -/* Given the stack offsets and register mask in OFFSETS, decide - how many additional registers to push instead of subtracting - a constant from SP. */ -static int -thumb1_extra_regs_pushed (arm_stack_offsets *offsets) -{ - HOST_WIDE_INT amount = offsets->outgoing_args - offsets->saved_regs; - unsigned long live_regs_mask = offsets->saved_regs_mask; - /* Extract a mask of the ones we can give to the Thumb's push instruction. */ - unsigned long l_mask = live_regs_mask & 0x40ff; - /* Then count how many other high registers will need to be pushed. */ - unsigned long high_regs_pushed = bit_count (live_regs_mask & 0x0f00); - int n_free; - - /* If the stack frame size is 512 exactly, we can save one load - instruction, which should make this a win even when optimizing - for speed. */ - if (!optimize_size && amount != 512) - return 0; - - /* Can't do this if there are high registers to push, or if we - are not going to do a push at all. */ - if (high_regs_pushed != 0 || l_mask == 0) - return 0; - - /* Don't do this if thumb1_expand_prologue wants to emit instructions - between the push and the stack frame allocation. */ - if ((flag_pic && arm_pic_register != INVALID_REGNUM) - || (!frame_pointer_needed && CALLER_INTERWORKING_SLOT_SIZE > 0)) - return 0; - - for (n_free = 0; n_free < 8 && !(live_regs_mask & 1); live_regs_mask >>= 1) - n_free++; - - if (n_free == 0) - return 0; - gcc_assert (amount / 4 * 4 == amount); - - if (amount >= 512 && (amount - n_free * 4) < 512) - return (amount - 508) / 4; - if (amount <= n_free * 4) - return amount / 4; - return 0; -} - /* Generate the rest of a function's prologue. */ void thumb1_expand_prologue (void) @@ -19646,7 +19943,7 @@ thumb1_expand_prologue (void) stack_pointer_rtx); amount = offsets->outgoing_args - offsets->saved_regs; - amount -= 4 * thumb1_extra_regs_pushed (offsets); + amount -= 4 * thumb1_extra_regs_pushed (offsets, true); if (amount) { if (amount < 512) @@ -19731,6 +20028,7 @@ thumb1_expand_epilogue (void) emit_insn (gen_movsi (stack_pointer_rtx, hard_frame_pointer_rtx)); amount = offsets->locals_base - offsets->saved_regs; } + amount -= 4 * thumb1_extra_regs_pushed (offsets, false); gcc_assert (amount >= 0); if (amount) @@ -19953,7 +20251,7 @@ thumb1_output_function_prologue (FILE *f, HOST_WIDE_INT size ATTRIBUTE_UNUSED) || (high_regs_pushed == 0 && l_mask)) { unsigned long mask = l_mask; - mask |= (1 << thumb1_extra_regs_pushed (offsets)) - 1; + mask |= (1 << thumb1_extra_regs_pushed (offsets, true)) - 1; thumb_pushpop (f, mask, 1, &cfa_offset, mask); } |