aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorYvan Roux <yvan.roux@linaro.org>2016-09-06 14:28:23 +0200
committerYvan Roux <yvan.roux@linaro.org>2016-09-07 22:09:02 +0200
commit4825254bc87d4915a510c0d83dff86c0433fd947 (patch)
tree5f1d1300d5833d4806534739e803d7e3db4d1e13
parente23342a302f457b8045e0ba70388ed9a816cda2c (diff)
gcc/
Backport from trunk r238960. 2016-08-01 Wilco Dijkstra <wdijkstr@arm.com> * config/aarch64/aarch64.h (aarch64_frame): Remove padding0 and hardfp_offset. Add locals_offset, initial_adjust, callee_adjust, callee_offset and final_adjust. * config/aarch64/aarch64.c (aarch64_layout_frame): Remove unused padding0 and hardfp_offset initializations. Choose frame layout and set frame variables accordingly. Use INVALID_REGNUM instead of FIRST_PSEUDO_REGISTER. (aarch64_push_regs): Use INVALID_REGNUM, not FIRST_PSEUDO_REGISTER. (aarch64_pop_regs): Likewise. (aarch64_expand_prologue): Remove all decision code, just emit prolog according to frame variables. (aarch64_expand_epilogue): Remove all decision code, just emit epilog according to frame variables. (aarch64_initial_elimination_offset): Use offset to local/arg area. gcc/testsuite/ Backport from trunk r238960. 2016-08-01 Wilco Dijkstra <wdijkstr@arm.com> * gcc.target/aarch64/test_frame_10.c: Fix test to check for a single stack adjustment, no writeback. * gcc.target/aarch64/test_frame_12.c: Likewise. * gcc.target/aarch64/test_frame_13.c: Likewise. * gcc.target/aarch64/test_frame_15.c: Likewise. * gcc.target/aarch64/test_frame_6.c: Likewise. * gcc.target/aarch64/test_frame_7.c: Likewise. * gcc.target/aarch64/test_frame_8.c: Likewise. * gcc.target/aarch64/test_frame_16.c: New test. Change-Id: Id4fbbe13420b85d8cd466c4ae673206b799e7adc
-rw-r--r--gcc/config/aarch64/aarch64.c359
-rw-r--r--gcc/config/aarch64/aarch64.h28
-rw-r--r--gcc/testsuite/gcc.target/aarch64/test_frame_10.c7
-rw-r--r--gcc/testsuite/gcc.target/aarch64/test_frame_12.c4
-rw-r--r--gcc/testsuite/gcc.target/aarch64/test_frame_13.c5
-rw-r--r--gcc/testsuite/gcc.target/aarch64/test_frame_15.c5
-rw-r--r--gcc/testsuite/gcc.target/aarch64/test_frame_16.c25
-rw-r--r--gcc/testsuite/gcc.target/aarch64/test_frame_6.c8
-rw-r--r--gcc/testsuite/gcc.target/aarch64/test_frame_7.c7
-rw-r--r--gcc/testsuite/gcc.target/aarch64/test_frame_8.c4
10 files changed, 231 insertions, 221 deletions
diff --git a/gcc/config/aarch64/aarch64.c b/gcc/config/aarch64/aarch64.c
index 3bfb8dd53b1..17e36f239ae 100644
--- a/gcc/config/aarch64/aarch64.c
+++ b/gcc/config/aarch64/aarch64.c
@@ -2694,8 +2694,8 @@ aarch64_layout_frame (void)
#define SLOT_NOT_REQUIRED (-2)
#define SLOT_REQUIRED (-1)
- cfun->machine->frame.wb_candidate1 = FIRST_PSEUDO_REGISTER;
- cfun->machine->frame.wb_candidate2 = FIRST_PSEUDO_REGISTER;
+ cfun->machine->frame.wb_candidate1 = INVALID_REGNUM;
+ cfun->machine->frame.wb_candidate2 = INVALID_REGNUM;
/* First mark all the registers that really need to be saved... */
for (regno = R0_REGNUM; regno <= R30_REGNUM; regno++)
@@ -2729,7 +2729,6 @@ aarch64_layout_frame (void)
cfun->machine->frame.wb_candidate1 = R29_REGNUM;
cfun->machine->frame.reg_offset[R30_REGNUM] = UNITS_PER_WORD;
cfun->machine->frame.wb_candidate2 = R30_REGNUM;
- cfun->machine->frame.hardfp_offset = 2 * UNITS_PER_WORD;
offset += 2 * UNITS_PER_WORD;
}
@@ -2738,9 +2737,9 @@ aarch64_layout_frame (void)
if (cfun->machine->frame.reg_offset[regno] == SLOT_REQUIRED)
{
cfun->machine->frame.reg_offset[regno] = offset;
- if (cfun->machine->frame.wb_candidate1 == FIRST_PSEUDO_REGISTER)
+ if (cfun->machine->frame.wb_candidate1 == INVALID_REGNUM)
cfun->machine->frame.wb_candidate1 = regno;
- else if (cfun->machine->frame.wb_candidate2 == FIRST_PSEUDO_REGISTER)
+ else if (cfun->machine->frame.wb_candidate2 == INVALID_REGNUM)
cfun->machine->frame.wb_candidate2 = regno;
offset += UNITS_PER_WORD;
}
@@ -2749,24 +2748,23 @@ aarch64_layout_frame (void)
if (cfun->machine->frame.reg_offset[regno] == SLOT_REQUIRED)
{
cfun->machine->frame.reg_offset[regno] = offset;
- if (cfun->machine->frame.wb_candidate1 == FIRST_PSEUDO_REGISTER)
+ if (cfun->machine->frame.wb_candidate1 == INVALID_REGNUM)
cfun->machine->frame.wb_candidate1 = regno;
- else if (cfun->machine->frame.wb_candidate2 == FIRST_PSEUDO_REGISTER
+ else if (cfun->machine->frame.wb_candidate2 == INVALID_REGNUM
&& cfun->machine->frame.wb_candidate1 >= V0_REGNUM)
cfun->machine->frame.wb_candidate2 = regno;
offset += UNITS_PER_WORD;
}
- cfun->machine->frame.padding0 =
- (ROUND_UP (offset, STACK_BOUNDARY / BITS_PER_UNIT) - offset);
offset = ROUND_UP (offset, STACK_BOUNDARY / BITS_PER_UNIT);
cfun->machine->frame.saved_regs_size = offset;
+ HOST_WIDE_INT varargs_and_saved_regs_size
+ = offset + cfun->machine->frame.saved_varargs_size;
+
cfun->machine->frame.hard_fp_offset
- = ROUND_UP (cfun->machine->frame.saved_varargs_size
- + get_frame_size ()
- + cfun->machine->frame.saved_regs_size,
+ = ROUND_UP (varargs_and_saved_regs_size + get_frame_size (),
STACK_BOUNDARY / BITS_PER_UNIT);
cfun->machine->frame.frame_size
@@ -2774,6 +2772,77 @@ aarch64_layout_frame (void)
+ crtl->outgoing_args_size,
STACK_BOUNDARY / BITS_PER_UNIT);
+ cfun->machine->frame.locals_offset = cfun->machine->frame.saved_varargs_size;
+
+ cfun->machine->frame.initial_adjust = 0;
+ cfun->machine->frame.final_adjust = 0;
+ cfun->machine->frame.callee_adjust = 0;
+ cfun->machine->frame.callee_offset = 0;
+
+ HOST_WIDE_INT max_push_offset = 0;
+ if (cfun->machine->frame.wb_candidate2 != INVALID_REGNUM)
+ max_push_offset = 512;
+ else if (cfun->machine->frame.wb_candidate1 != INVALID_REGNUM)
+ max_push_offset = 256;
+
+ if (cfun->machine->frame.frame_size < max_push_offset
+ && crtl->outgoing_args_size == 0)
+ {
+ /* Simple, small frame with no outgoing arguments:
+ stp reg1, reg2, [sp, -frame_size]!
+ stp reg3, reg4, [sp, 16] */
+ cfun->machine->frame.callee_adjust = cfun->machine->frame.frame_size;
+ }
+ else if ((crtl->outgoing_args_size
+ + cfun->machine->frame.saved_regs_size < 512)
+ && !(cfun->calls_alloca
+ && cfun->machine->frame.hard_fp_offset < max_push_offset))
+ {
+ /* Frame with small outgoing arguments:
+ sub sp, sp, frame_size
+ stp reg1, reg2, [sp, outgoing_args_size]
+ stp reg3, reg4, [sp, outgoing_args_size + 16] */
+ cfun->machine->frame.initial_adjust = cfun->machine->frame.frame_size;
+ cfun->machine->frame.callee_offset
+ = cfun->machine->frame.frame_size - cfun->machine->frame.hard_fp_offset;
+ }
+ else if (cfun->machine->frame.hard_fp_offset < max_push_offset)
+ {
+ /* Frame with large outgoing arguments but a small local area:
+ stp reg1, reg2, [sp, -hard_fp_offset]!
+ stp reg3, reg4, [sp, 16]
+ sub sp, sp, outgoing_args_size */
+ cfun->machine->frame.callee_adjust = cfun->machine->frame.hard_fp_offset;
+ cfun->machine->frame.final_adjust
+ = cfun->machine->frame.frame_size - cfun->machine->frame.callee_adjust;
+ }
+ else if (!frame_pointer_needed
+ && varargs_and_saved_regs_size < max_push_offset)
+ {
+ /* Frame with large local area and outgoing arguments (this pushes the
+ callee-saves first, followed by the locals and outgoing area):
+ stp reg1, reg2, [sp, -varargs_and_saved_regs_size]!
+ stp reg3, reg4, [sp, 16]
+ sub sp, sp, frame_size - varargs_and_saved_regs_size */
+ cfun->machine->frame.callee_adjust = varargs_and_saved_regs_size;
+ cfun->machine->frame.final_adjust
+ = cfun->machine->frame.frame_size - cfun->machine->frame.callee_adjust;
+ cfun->machine->frame.hard_fp_offset = cfun->machine->frame.callee_adjust;
+ cfun->machine->frame.locals_offset = cfun->machine->frame.hard_fp_offset;
+ }
+ else
+ {
+ /* Frame with large local area and outgoing arguments using frame pointer:
+ sub sp, sp, hard_fp_offset
+ stp x29, x30, [sp, 0]
+ add x29, sp, 0
+ stp reg3, reg4, [sp, 16]
+ sub sp, sp, outgoing_args_size */
+ cfun->machine->frame.initial_adjust = cfun->machine->frame.hard_fp_offset;
+ cfun->machine->frame.final_adjust
+ = cfun->machine->frame.frame_size - cfun->machine->frame.initial_adjust;
+ }
+
cfun->machine->frame.laid_out = true;
}
@@ -2832,7 +2901,7 @@ aarch64_push_regs (unsigned regno1, unsigned regno2, HOST_WIDE_INT adjustment)
rtx_insn *insn;
machine_mode mode = (regno1 <= R30_REGNUM) ? DImode : DFmode;
- if (regno2 == FIRST_PSEUDO_REGISTER)
+ if (regno2 == INVALID_REGNUM)
return aarch64_pushwb_single_reg (mode, regno1, adjustment);
rtx reg1 = gen_rtx_REG (mode, regno1);
@@ -2871,7 +2940,7 @@ aarch64_pop_regs (unsigned regno1, unsigned regno2, HOST_WIDE_INT adjustment,
*cfi_ops = alloc_reg_note (REG_CFA_RESTORE, reg1, *cfi_ops);
- if (regno2 == FIRST_PSEUDO_REGISTER)
+ if (regno2 == INVALID_REGNUM)
{
rtx mem = plus_constant (Pmode, stack_pointer_rtx, adjustment);
mem = gen_rtx_POST_MODIFY (Pmode, stack_pointer_rtx, mem);
@@ -3072,23 +3141,16 @@ aarch64_restore_callee_saves (machine_mode mode,
void
aarch64_expand_prologue (void)
{
- /* sub sp, sp, #<frame_size>
- stp {fp, lr}, [sp, #<frame_size> - 16]
- add fp, sp, #<frame_size> - hardfp_offset
- stp {cs_reg}, [fp, #-16] etc.
-
- sub sp, sp, <final_adjustment_if_any>
- */
- HOST_WIDE_INT frame_size, offset;
- HOST_WIDE_INT fp_offset; /* Offset from hard FP to SP. */
- HOST_WIDE_INT hard_fp_offset;
- rtx_insn *insn;
-
aarch64_layout_frame ();
- offset = frame_size = cfun->machine->frame.frame_size;
- hard_fp_offset = cfun->machine->frame.hard_fp_offset;
- fp_offset = frame_size - hard_fp_offset;
+ HOST_WIDE_INT frame_size = cfun->machine->frame.frame_size;
+ HOST_WIDE_INT initial_adjust = cfun->machine->frame.initial_adjust;
+ HOST_WIDE_INT callee_adjust = cfun->machine->frame.callee_adjust;
+ HOST_WIDE_INT final_adjust = cfun->machine->frame.final_adjust;
+ HOST_WIDE_INT callee_offset = cfun->machine->frame.callee_offset;
+ unsigned reg1 = cfun->machine->frame.wb_candidate1;
+ unsigned reg2 = cfun->machine->frame.wb_candidate2;
+ rtx_insn *insn;
if (flag_stack_usage_info)
current_function_static_stack_size = frame_size;
@@ -3105,94 +3167,29 @@ aarch64_expand_prologue (void)
aarch64_emit_probe_stack_range (STACK_CHECK_PROTECT, frame_size);
}
- /* Store pairs and load pairs have a range only -512 to 504. */
- if (offset >= 512)
- {
- /* When the frame has a large size, an initial decrease is done on
- the stack pointer to jump over the callee-allocated save area for
- register varargs, the local variable area and/or the callee-saved
- register area. This will allow the pre-index write-back
- store pair instructions to be used for setting up the stack frame
- efficiently. */
- offset = hard_fp_offset;
- if (offset >= 512)
- offset = cfun->machine->frame.saved_regs_size;
-
- frame_size -= (offset + crtl->outgoing_args_size);
- fp_offset = 0;
+ aarch64_add_constant (Pmode, SP_REGNUM, IP0_REGNUM, -initial_adjust, true);
- aarch64_add_constant (Pmode, SP_REGNUM, IP0_REGNUM, -frame_size, true);
- }
- else
- frame_size = -1;
+ if (callee_adjust != 0)
+ aarch64_push_regs (reg1, reg2, callee_adjust);
- if (offset > 0)
+ if (frame_pointer_needed)
{
- bool skip_wb = false;
-
- if (frame_pointer_needed)
- {
- skip_wb = true;
-
- if (fp_offset)
- {
- insn = emit_insn (gen_add2_insn (stack_pointer_rtx,
- GEN_INT (-offset)));
- RTX_FRAME_RELATED_P (insn) = 1;
-
- aarch64_save_callee_saves (DImode, fp_offset, R29_REGNUM,
- R30_REGNUM, false);
- }
- else
- aarch64_push_regs (R29_REGNUM, R30_REGNUM, offset);
-
- /* Set up frame pointer to point to the location of the
- previous frame pointer on the stack. */
- insn = emit_insn (gen_add3_insn (hard_frame_pointer_rtx,
- stack_pointer_rtx,
- GEN_INT (fp_offset)));
- RTX_FRAME_RELATED_P (insn) = 1;
- emit_insn (gen_stack_tie (stack_pointer_rtx, hard_frame_pointer_rtx));
- }
- else
- {
- unsigned reg1 = cfun->machine->frame.wb_candidate1;
- unsigned reg2 = cfun->machine->frame.wb_candidate2;
-
- if (fp_offset
- || reg1 == FIRST_PSEUDO_REGISTER
- || (reg2 == FIRST_PSEUDO_REGISTER
- && offset >= 256))
- {
- insn = emit_insn (gen_add2_insn (stack_pointer_rtx,
- GEN_INT (-offset)));
- RTX_FRAME_RELATED_P (insn) = 1;
- }
- else
- {
- aarch64_push_regs (reg1, reg2, offset);
- skip_wb = true;
- }
- }
-
- aarch64_save_callee_saves (DImode, fp_offset, R0_REGNUM, R30_REGNUM,
- skip_wb);
- aarch64_save_callee_saves (DFmode, fp_offset, V0_REGNUM, V31_REGNUM,
- skip_wb);
+ if (callee_adjust == 0)
+ aarch64_save_callee_saves (DImode, callee_offset, R29_REGNUM,
+ R30_REGNUM, false);
+ insn = emit_insn (gen_add3_insn (hard_frame_pointer_rtx,
+ stack_pointer_rtx,
+ GEN_INT (callee_offset)));
+ RTX_FRAME_RELATED_P (insn) = 1;
+ emit_insn (gen_stack_tie (stack_pointer_rtx, hard_frame_pointer_rtx));
}
- /* when offset >= 512,
- sub sp, sp, #<outgoing_args_size> */
- if (frame_size > -1)
- {
- if (crtl->outgoing_args_size > 0)
- {
- insn = emit_insn (gen_add2_insn
- (stack_pointer_rtx,
- GEN_INT (- crtl->outgoing_args_size)));
- RTX_FRAME_RELATED_P (insn) = 1;
- }
- }
+ aarch64_save_callee_saves (DImode, callee_offset, R0_REGNUM, R30_REGNUM,
+ callee_adjust != 0 || frame_pointer_needed);
+ aarch64_save_callee_saves (DFmode, callee_offset, V0_REGNUM, V31_REGNUM,
+ callee_adjust != 0 || frame_pointer_needed);
+ aarch64_add_constant (Pmode, SP_REGNUM, IP1_REGNUM, -final_adjust,
+ !frame_pointer_needed);
}
/* Return TRUE if we can use a simple_return insn.
@@ -3215,104 +3212,80 @@ aarch64_use_return_insn_p (void)
return cfun->machine->frame.frame_size == 0;
}
-/* Generate the epilogue instructions for returning from a function. */
+/* Generate the epilogue instructions for returning from a function.
+ This is almost exactly the reverse of the prolog sequence, except
+ that we need to insert barriers to avoid scheduling loads that read
+ from a deallocated stack, and we optimize the unwind records by
+ emitting them all together if possible. */
void
aarch64_expand_epilogue (bool for_sibcall)
{
- HOST_WIDE_INT frame_size, offset;
- HOST_WIDE_INT fp_offset;
- HOST_WIDE_INT hard_fp_offset;
- rtx_insn *insn;
- /* We need to add memory barrier to prevent read from deallocated stack. */
- bool need_barrier_p = (get_frame_size () != 0
- || cfun->machine->frame.saved_varargs_size);
-
aarch64_layout_frame ();
- offset = frame_size = cfun->machine->frame.frame_size;
- hard_fp_offset = cfun->machine->frame.hard_fp_offset;
- fp_offset = frame_size - hard_fp_offset;
+ HOST_WIDE_INT initial_adjust = cfun->machine->frame.initial_adjust;
+ HOST_WIDE_INT callee_adjust = cfun->machine->frame.callee_adjust;
+ HOST_WIDE_INT final_adjust = cfun->machine->frame.final_adjust;
+ HOST_WIDE_INT callee_offset = cfun->machine->frame.callee_offset;
+ unsigned reg1 = cfun->machine->frame.wb_candidate1;
+ unsigned reg2 = cfun->machine->frame.wb_candidate2;
+ rtx cfi_ops = NULL;
+ rtx_insn *insn;
- /* Store pairs and load pairs have a range only -512 to 504. */
- if (offset >= 512)
- {
- offset = hard_fp_offset;
- if (offset >= 512)
- offset = cfun->machine->frame.saved_regs_size;
+ /* We need to add memory barrier to prevent read from deallocated stack. */
+ bool need_barrier_p = (get_frame_size ()
+ + cfun->machine->frame.saved_varargs_size) != 0;
- frame_size -= (offset + crtl->outgoing_args_size);
- fp_offset = 0;
- if (!frame_pointer_needed && crtl->outgoing_args_size > 0)
- {
- insn = emit_insn (gen_add2_insn
- (stack_pointer_rtx,
- GEN_INT (crtl->outgoing_args_size)));
- RTX_FRAME_RELATED_P (insn) = 1;
- }
+ /* Emit a barrier to prevent loads from a deallocated stack. */
+ if (final_adjust > crtl->outgoing_args_size || cfun->calls_alloca)
+ {
+ emit_insn (gen_stack_tie (stack_pointer_rtx, stack_pointer_rtx));
+ need_barrier_p = false;
}
- else
- frame_size = -1;
- /* If there were outgoing arguments or we've done dynamic stack
- allocation, then restore the stack pointer from the frame
- pointer. This is at most one insn and more efficient than using
- GCC's internal mechanism. */
- if (frame_pointer_needed
- && (crtl->outgoing_args_size || cfun->calls_alloca))
+ /* Restore the stack pointer from the frame pointer if it may not
+ be the same as the stack pointer. */
+ if (frame_pointer_needed && (final_adjust || cfun->calls_alloca))
{
- if (cfun->calls_alloca)
- emit_insn (gen_stack_tie (stack_pointer_rtx, stack_pointer_rtx));
-
insn = emit_insn (gen_add3_insn (stack_pointer_rtx,
hard_frame_pointer_rtx,
- GEN_INT (0)));
- offset = offset - fp_offset;
+ GEN_INT (-callee_offset)));
+ /* If writeback is used when restoring callee-saves, the CFA
+ is restored on the instruction doing the writeback. */
+ RTX_FRAME_RELATED_P (insn) = callee_adjust == 0;
}
+ else
+ aarch64_add_constant (Pmode, SP_REGNUM, IP1_REGNUM, final_adjust, true);
- if (offset > 0)
- {
- unsigned reg1 = cfun->machine->frame.wb_candidate1;
- unsigned reg2 = cfun->machine->frame.wb_candidate2;
- bool skip_wb = true;
- rtx cfi_ops = NULL;
-
- if (frame_pointer_needed)
- fp_offset = 0;
- else if (fp_offset
- || reg1 == FIRST_PSEUDO_REGISTER
- || (reg2 == FIRST_PSEUDO_REGISTER
- && offset >= 256))
- skip_wb = false;
-
- aarch64_restore_callee_saves (DImode, fp_offset, R0_REGNUM, R30_REGNUM,
- skip_wb, &cfi_ops);
- aarch64_restore_callee_saves (DFmode, fp_offset, V0_REGNUM, V31_REGNUM,
- skip_wb, &cfi_ops);
+ aarch64_restore_callee_saves (DImode, callee_offset, R0_REGNUM, R30_REGNUM,
+ callee_adjust != 0, &cfi_ops);
+ aarch64_restore_callee_saves (DFmode, callee_offset, V0_REGNUM, V31_REGNUM,
+ callee_adjust != 0, &cfi_ops);
- if (need_barrier_p)
- emit_insn (gen_stack_tie (stack_pointer_rtx, stack_pointer_rtx));
+ if (need_barrier_p)
+ emit_insn (gen_stack_tie (stack_pointer_rtx, stack_pointer_rtx));
- if (skip_wb)
- aarch64_pop_regs (reg1, reg2, offset, &cfi_ops);
- else
- emit_insn (gen_add2_insn (stack_pointer_rtx, GEN_INT (offset)));
+ if (callee_adjust != 0)
+ aarch64_pop_regs (reg1, reg2, callee_adjust, &cfi_ops);
- /* Reset the CFA to be SP + FRAME_SIZE. */
- rtx new_cfa = stack_pointer_rtx;
- if (frame_size > 0)
- new_cfa = plus_constant (Pmode, new_cfa, frame_size);
- cfi_ops = alloc_reg_note (REG_CFA_DEF_CFA, new_cfa, cfi_ops);
+ if (callee_adjust != 0 || initial_adjust > 65536)
+ {
+ /* Emit delayed restores and set the CFA to be SP + initial_adjust. */
insn = get_last_insn ();
- REG_NOTES (insn) = cfi_ops;
+ rtx new_cfa = plus_constant (Pmode, stack_pointer_rtx, initial_adjust);
+ REG_NOTES (insn) = alloc_reg_note (REG_CFA_DEF_CFA, new_cfa, cfi_ops);
RTX_FRAME_RELATED_P (insn) = 1;
+ cfi_ops = NULL;
}
- if (frame_size > 0)
- {
- if (need_barrier_p)
- emit_insn (gen_stack_tie (stack_pointer_rtx, stack_pointer_rtx));
+ aarch64_add_constant (Pmode, SP_REGNUM, IP0_REGNUM, initial_adjust, true);
- aarch64_add_constant (Pmode, SP_REGNUM, IP0_REGNUM, frame_size, true);
+ if (cfi_ops)
+ {
+ /* Emit delayed restores and reset the CFA to be SP. */
+ insn = get_last_insn ();
+ cfi_ops = alloc_reg_note (REG_CFA_DEF_CFA, stack_pointer_rtx, cfi_ops);
+ REG_NOTES (insn) = cfi_ops;
+ RTX_FRAME_RELATED_P (insn) = 1;
}
/* Stack adjustment for exception handler. */
@@ -5203,18 +5176,18 @@ aarch64_initial_elimination_offset (unsigned from, unsigned to)
if (to == HARD_FRAME_POINTER_REGNUM)
{
if (from == ARG_POINTER_REGNUM)
- return cfun->machine->frame.frame_size - crtl->outgoing_args_size;
+ return cfun->machine->frame.hard_fp_offset;
if (from == FRAME_POINTER_REGNUM)
- return (cfun->machine->frame.hard_fp_offset
- - cfun->machine->frame.saved_varargs_size);
+ return cfun->machine->frame.hard_fp_offset
+ - cfun->machine->frame.locals_offset;
}
if (to == STACK_POINTER_REGNUM)
{
if (from == FRAME_POINTER_REGNUM)
- return (cfun->machine->frame.frame_size
- - cfun->machine->frame.saved_varargs_size);
+ return cfun->machine->frame.frame_size
+ - cfun->machine->frame.locals_offset;
}
return cfun->machine->frame.frame_size;
diff --git a/gcc/config/aarch64/aarch64.h b/gcc/config/aarch64/aarch64.h
index 9e26eb160eb..19caf9f2979 100644
--- a/gcc/config/aarch64/aarch64.h
+++ b/gcc/config/aarch64/aarch64.h
@@ -550,11 +550,14 @@ struct GTY (()) aarch64_frame
STACK_BOUNDARY. */
HOST_WIDE_INT saved_varargs_size;
+ /* The size of the saved callee-save int/FP registers. */
+
HOST_WIDE_INT saved_regs_size;
- /* Padding if needed after the all the callee save registers have
- been saved. */
- HOST_WIDE_INT padding0;
- HOST_WIDE_INT hardfp_offset; /* HARD_FRAME_POINTER_REGNUM */
+
+ /* Offset from the base of the frame (incomming SP) to the
+ top of the locals area. This value is always a multiple of
+ STACK_BOUNDARY. */
+ HOST_WIDE_INT locals_offset;
/* Offset from the base of the frame (incomming SP) to the
hard_frame_pointer. This value is always a multiple of
@@ -564,12 +567,25 @@ struct GTY (()) aarch64_frame
/* The size of the frame. This value is the offset from base of the
* frame (incomming SP) to the stack_pointer. This value is always
* a multiple of STACK_BOUNDARY. */
+ HOST_WIDE_INT frame_size;
+
+ /* The size of the initial stack adjustment before saving callee-saves. */
+ HOST_WIDE_INT initial_adjust;
+
+ /* The writeback value when pushing callee-save registers.
+ It is zero when no push is used. */
+ HOST_WIDE_INT callee_adjust;
+
+ /* The offset from SP to the callee-save registers after initial_adjust.
+ It may be non-zero if no push is used (ie. callee_adjust == 0). */
+ HOST_WIDE_INT callee_offset;
+
+ /* The size of the stack adjustment after saving callee-saves. */
+ HOST_WIDE_INT final_adjust;
unsigned wb_candidate1;
unsigned wb_candidate2;
- HOST_WIDE_INT frame_size;
-
bool laid_out;
};
diff --git a/gcc/testsuite/gcc.target/aarch64/test_frame_10.c b/gcc/testsuite/gcc.target/aarch64/test_frame_10.c
index 70dd6539af9..e23a4a83528 100644
--- a/gcc/testsuite/gcc.target/aarch64/test_frame_10.c
+++ b/gcc/testsuite/gcc.target/aarch64/test_frame_10.c
@@ -4,8 +4,7 @@
* total frame size > 512.
area except outgoing <= 512
* number of callee-saved reg >= 2.
- * Split stack adjustment into two subtractions.
- the first subtractions could be optimized into "stp !". */
+ * Use a single stack adjustment, no writeback. */
/* { dg-do run } */
/* { dg-options "-O2 -fomit-frame-pointer --save-temps" } */
@@ -15,6 +14,6 @@
t_frame_pattern_outgoing (test10, 480, "x19", 24, a[8], a[9], a[10])
t_frame_run (test10)
-/* { dg-final { scan-assembler-times "stp\tx19, x30, \\\[sp, -\[0-9\]+\\\]!" 1 } } */
-/* { dg-final { scan-assembler-times "ldp\tx19, x30, \\\[sp\\\], \[0-9\]+" 1 } } */
+/* { dg-final { scan-assembler-times "stp\tx19, x30, \\\[sp, \[0-9\]+\\\]" 1 } } */
+/* { dg-final { scan-assembler-times "ldp\tx19, x30, \\\[sp, \[0-9\]+\\\]" 1 } } */
diff --git a/gcc/testsuite/gcc.target/aarch64/test_frame_12.c b/gcc/testsuite/gcc.target/aarch64/test_frame_12.c
index 2353477c29e..3d7d3594610 100644
--- a/gcc/testsuite/gcc.target/aarch64/test_frame_12.c
+++ b/gcc/testsuite/gcc.target/aarch64/test_frame_12.c
@@ -13,6 +13,6 @@ t_frame_run (test12)
/* { dg-final { scan-assembler-times "sub\tsp, sp, #\[0-9\]+" 1 } } */
-/* Check epilogue using write-back. */
-/* { dg-final { scan-assembler-times "ldp\tx29, x30, \\\[sp\\\], \[0-9\]+" 3 } } */
+/* Check epilogue using no write-back. */
+/* { dg-final { scan-assembler-times "ldp\tx29, x30, \\\[sp, \[0-9\]+\\\]" 1 } } */
diff --git a/gcc/testsuite/gcc.target/aarch64/test_frame_13.c b/gcc/testsuite/gcc.target/aarch64/test_frame_13.c
index f3aa2639294..74b3370fa46 100644
--- a/gcc/testsuite/gcc.target/aarch64/test_frame_13.c
+++ b/gcc/testsuite/gcc.target/aarch64/test_frame_13.c
@@ -2,8 +2,7 @@
* without outgoing.
* total frame size > 512.
* number of callee-save reg >= 2.
- * split the stack adjustment into two substractions,
- the second could be optimized into "stp !". */
+ * Use a single stack adjustment, no writeback. */
/* { dg-do run } */
/* { dg-options "-O2 --save-temps" } */
@@ -14,4 +13,4 @@ t_frame_pattern (test13, 700, )
t_frame_run (test13)
/* { dg-final { scan-assembler-times "sub\tsp, sp, #\[0-9\]+" 1 } } */
-/* { dg-final { scan-assembler-times "stp\tx29, x30, \\\[sp, -\[0-9\]+\\\]!" 2 } } */
+/* { dg-final { scan-assembler-times "stp\tx29, x30, \\\[sp\\\]" 1 } } */
diff --git a/gcc/testsuite/gcc.target/aarch64/test_frame_15.c b/gcc/testsuite/gcc.target/aarch64/test_frame_15.c
index fc6f713232d..bed6714b4fe 100644
--- a/gcc/testsuite/gcc.target/aarch64/test_frame_15.c
+++ b/gcc/testsuite/gcc.target/aarch64/test_frame_15.c
@@ -3,8 +3,7 @@
* total frame size > 512.
area except outgoing <= 512
* number of callee-save reg >= 2.
- * split the stack adjustment into two substractions,
- the first could be optimized into "stp !". */
+ * Use a single stack adjustment, no writeback. */
/* { dg-do run } */
/* { dg-options "-O2 --save-temps" } */
@@ -15,4 +14,4 @@ t_frame_pattern_outgoing (test15, 480, , 8, a[8])
t_frame_run (test15)
/* { dg-final { scan-assembler-times "sub\tsp, sp, #\[0-9\]+" 1 } } */
-/* { dg-final { scan-assembler-times "stp\tx29, x30, \\\[sp, -\[0-9\]+\\\]!" 3 } } */
+/* { dg-final { scan-assembler-times "stp\tx29, x30, \\\[sp, \[0-9\]+\\\]" 1 } } */
diff --git a/gcc/testsuite/gcc.target/aarch64/test_frame_16.c b/gcc/testsuite/gcc.target/aarch64/test_frame_16.c
new file mode 100644
index 00000000000..28f3826adad
--- /dev/null
+++ b/gcc/testsuite/gcc.target/aarch64/test_frame_16.c
@@ -0,0 +1,25 @@
+/* Verify:
+ * with outgoing.
+ * single int register push.
+ * varargs and callee-save size >= 256
+ * Use 2 stack adjustments. */
+
+/* { dg-do compile } */
+/* { dg-options "-O2 -fomit-frame-pointer --save-temps" } */
+
+#define REP8(X) X,X,X,X,X,X,X,X
+#define REP64(X) REP8(REP8(X))
+
+void outgoing (__builtin_va_list, ...);
+
+double vararg_outgoing (int x1, ...)
+{
+ double a1 = x1, a2 = x1 * 2, a3 = x1 * 3, a4 = x1 * 4, a5 = x1 * 5, a6 = x1 * 6;
+ __builtin_va_list vl;
+ __builtin_va_start (vl, x1);
+ outgoing (vl, a1, a2, a3, a4, a5, a6, REP64 (1));
+ __builtin_va_end (vl);
+ return a1 + a2 + a3 + a4 + a5 + a6;
+}
+
+/* { dg-final { scan-assembler-times "sub\tsp, sp, #\[0-9\]+" 2 } } */
diff --git a/gcc/testsuite/gcc.target/aarch64/test_frame_6.c b/gcc/testsuite/gcc.target/aarch64/test_frame_6.c
index d8481346c58..6a753dff87e 100644
--- a/gcc/testsuite/gcc.target/aarch64/test_frame_6.c
+++ b/gcc/testsuite/gcc.target/aarch64/test_frame_6.c
@@ -3,8 +3,7 @@
* without outgoing.
* total frame size > 512.
* number of callee-saved reg == 1.
- * split stack adjustment into two subtractions.
- the second subtraction should use "str !". */
+ * use a single stack adjustment, no writeback. */
/* { dg-do run } */
/* { dg-options "-O2 -fomit-frame-pointer --save-temps" } */
@@ -14,6 +13,7 @@
t_frame_pattern (test6, 700, )
t_frame_run (test6)
-/* { dg-final { scan-assembler-times "str\tx30, \\\[sp, -\[0-9\]+\\\]!" 2 } } */
-/* { dg-final { scan-assembler-times "ldr\tx30, \\\[sp\\\], \[0-9\]+" 2 } } */
+/* { dg-final { scan-assembler-times "str\tx30, \\\[sp\\\]" 1 } } */
+/* { dg-final { scan-assembler-times "ldr\tx30, \\\[sp\\\]" 2 } } */
+/* { dg-final { scan-assembler-times "ldr\tx30, \\\[sp\\\]," 1 } } */
diff --git a/gcc/testsuite/gcc.target/aarch64/test_frame_7.c b/gcc/testsuite/gcc.target/aarch64/test_frame_7.c
index d87d68b3eec..f2a8713d19d 100644
--- a/gcc/testsuite/gcc.target/aarch64/test_frame_7.c
+++ b/gcc/testsuite/gcc.target/aarch64/test_frame_7.c
@@ -3,8 +3,7 @@
* without outgoing.
* total frame size > 512.
* number of callee-saved reg == 2.
- * split stack adjustment into two subtractions.
- the second subtraction should use "stp !". */
+ * use a single stack adjustment, no writeback. */
/* { dg-do run } */
/* { dg-options "-O2 -fomit-frame-pointer --save-temps" } */
@@ -14,6 +13,6 @@
t_frame_pattern (test7, 700, "x19")
t_frame_run (test7)
-/* { dg-final { scan-assembler-times "stp\tx19, x30, \\\[sp, -\[0-9\]+\\\]!" 1 } } */
-/* { dg-final { scan-assembler-times "ldp\tx19, x30, \\\[sp\\\], \[0-9\]+" 1 } } */
+/* { dg-final { scan-assembler-times "stp\tx19, x30, \\\[sp]" 1 } } */
+/* { dg-final { scan-assembler-times "ldp\tx19, x30, \\\[sp\\\]" 1 } } */
diff --git a/gcc/testsuite/gcc.target/aarch64/test_frame_8.c b/gcc/testsuite/gcc.target/aarch64/test_frame_8.c
index 435d9d59e68..9b6c6939eb5 100644
--- a/gcc/testsuite/gcc.target/aarch64/test_frame_8.c
+++ b/gcc/testsuite/gcc.target/aarch64/test_frame_8.c
@@ -12,6 +12,6 @@
t_frame_pattern_outgoing (test8, 700, , 8, a[8])
t_frame_run (test8)
-/* { dg-final { scan-assembler-times "str\tx30, \\\[sp, -\[0-9\]+\\\]!" 3 } } */
-/* { dg-final { scan-assembler-times "ldr\tx30, \\\[sp\\\], \[0-9\]+" 3 } } */
+/* { dg-final { scan-assembler-times "str\tx30, \\\[sp, \[0-9\]+\\\]" 1 } } */
+/* { dg-final { scan-assembler-times "ldr\tx30, \\\[sp, \[0-9\]+\\\]" 1 } } */