aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorhubicka <hubicka@138bc75d-0d04-0410-961f-82ee72b054a4>2000-04-12 11:22:52 +0000
committerhubicka <hubicka@138bc75d-0d04-0410-961f-82ee72b054a4>2000-04-12 11:22:52 +0000
commitae8e37a624da94fa124be2b927e90eb7584ba5c4 (patch)
treec2ec02d30e9c730f795ab46e046b1c669c8e1333
parentf6aa5dd6f0364a23f13b2942c34b0983bc55b1a6 (diff)
* i386.c (x86_sub_esp_4, x86_sub_esp_8, x86_add_esp_4, x86_add_esp_8):
New global variables. (ix86_emit_epilogue_adjustment): Do not attempt to use pop for the adjustment. * i386.h (x86_sub_esp_4, x86_sub_esp_8, x86_add_esp_4, x86_add_esp_8): Declare, (TARGET_SUB_ESP_4, TARGET_SUB_ESP_8, TARGET_ADD_ESP_4, TARGET_ADD_ESP_8): New macros. * i386.md: Add peep2s to convert esp adjustments to push and pop instructions. (pushsi_prologue, popsi_epilogue): New patterns. git-svn-id: svn+ssh://gcc.gnu.org/svn/gcc/trunk@33100 138bc75d-0d04-0410-961f-82ee72b054a4
-rw-r--r--gcc/ChangeLog14
-rw-r--r--gcc/config/i386/i386.c57
-rw-r--r--gcc/config/i386/i386.h5
-rw-r--r--gcc/config/i386/i386.md152
4 files changed, 184 insertions, 44 deletions
diff --git a/gcc/ChangeLog b/gcc/ChangeLog
index c84f4f2d9d9..6705abd45d9 100644
--- a/gcc/ChangeLog
+++ b/gcc/ChangeLog
@@ -1,3 +1,17 @@
+Fri Apr 7 12:23:04 MET DST 2000 Jan Hubicka <jh@suse.cz>
+
+ * i386.c (x86_sub_esp_4, x86_sub_esp_8, x86_add_esp_4, x86_add_esp_8):
+ New global variables.
+ (ix86_emit_epilogue_adjustment): Do not attempt to use pop for the
+ adjustment.
+ * i386.h (x86_sub_esp_4, x86_sub_esp_8, x86_add_esp_4, x86_add_esp_8):
+ Declare,
+ (TARGET_SUB_ESP_4, TARGET_SUB_ESP_8, TARGET_ADD_ESP_4,
+ TARGET_ADD_ESP_8): New macros.
+ * i386.md: Add peep2s to convert esp adjustments to push and pop
+ instructions.
+ (pushsi_prologue, popsi_epilogue): New patterns.
+
2000-04-12 Jakub Jelinek <jakub@redhat.com>
* real.c (toe64): Remove stale #endif from the last change.
diff --git a/gcc/config/i386/i386.c b/gcc/config/i386/i386.c
index 74de03c376a..85a7c6632f0 100644
--- a/gcc/config/i386/i386.c
+++ b/gcc/config/i386/i386.c
@@ -218,6 +218,10 @@ const int x86_qimode_math = ~(0);
const int x86_promote_qi_regs = 0;
const int x86_himode_math = ~(m_PPRO);
const int x86_promote_hi_regs = m_PPRO;
+const int x86_sub_esp_4 = m_ATHLON | m_PPRO;
+const int x86_sub_esp_8 = m_ATHLON | m_PPRO | m_386 | m_486;
+const int x86_add_esp_4 = m_ATHLON | m_K6;
+const int x86_add_esp_8 = m_ATHLON | m_PPRO | m_K6 | m_386 | m_486;
#define AT_BP(mode) (gen_rtx_MEM ((mode), hard_frame_pointer_rtx))
@@ -1968,51 +1972,16 @@ static void
ix86_emit_epilogue_esp_adjustment (tsize)
int tsize;
{
- /* Intel's docs say that for 4 or 8 bytes of stack frame one should
- use `pop' and not `add'. */
- int use_pop = tsize == 4;
- rtx edx = 0, ecx;
-
- /* Use two pops only for the Pentium processors. */
- if (tsize == 8 && !TARGET_386 && !TARGET_486)
- {
- rtx retval = current_function_return_rtx;
-
- edx = gen_rtx_REG (SImode, 1);
-
- /* This case is a bit more complex. Since we cannot pop into
- %ecx twice we need a second register. But this is only
- available if the return value is not of DImode in which
- case the %edx register is not available. */
- use_pop = (retval == NULL
- || !reg_overlap_mentioned_p (edx, retval));
- }
-
- if (use_pop)
- {
- ecx = gen_rtx_REG (SImode, 2);
-
- /* We have to prevent the two pops here from being scheduled.
- GCC otherwise would try in some situation to put other
- instructions in between them which has a bad effect. */
- emit_insn (gen_blockage ());
- emit_insn (gen_popsi1 (ecx));
- if (tsize == 8)
- emit_insn (gen_popsi1 (edx));
- }
+ /* If a frame pointer is present, we must be sure to tie the sp
+ to the fp so that we don't mis-schedule. */
+ if (frame_pointer_needed)
+ emit_insn (gen_pro_epilogue_adjust_stack (stack_pointer_rtx,
+ stack_pointer_rtx,
+ GEN_INT (tsize),
+ hard_frame_pointer_rtx));
else
- {
- /* If a frame pointer is present, we must be sure to tie the sp
- to the fp so that we don't mis-schedule. */
- if (frame_pointer_needed)
- emit_insn (gen_pro_epilogue_adjust_stack (stack_pointer_rtx,
- stack_pointer_rtx,
- GEN_INT (tsize),
- hard_frame_pointer_rtx));
- else
- emit_insn (gen_addsi3 (stack_pointer_rtx, stack_pointer_rtx,
- GEN_INT (tsize)));
- }
+ emit_insn (gen_addsi3 (stack_pointer_rtx, stack_pointer_rtx,
+ GEN_INT (tsize)));
}
/* Emit code to restore saved registers using MOV insns. First register
diff --git a/gcc/config/i386/i386.h b/gcc/config/i386/i386.h
index b235af9539c..50f08251420 100644
--- a/gcc/config/i386/i386.h
+++ b/gcc/config/i386/i386.h
@@ -174,6 +174,7 @@ extern const int x86_read_modify, x86_split_long_moves;
extern const int x86_promote_QImode, x86_single_stringop;
extern const int x86_himode_math, x86_qimode_math, x86_promote_qi_regs;
extern const int x86_promote_hi_regs;
+extern const int x86_add_esp_4, x86_add_esp_8, x86_sub_esp_4, x86_sub_esp_8;
#define TARGET_USE_LEAVE (x86_use_leave & CPUMASK)
#define TARGET_PUSH_MEMORY (x86_push_memory & CPUMASK)
@@ -201,6 +202,10 @@ extern const int x86_promote_hi_regs;
#define TARGET_HIMODE_MATH (x86_himode_math & CPUMASK)
#define TARGET_PROMOTE_QI_REGS (x86_promote_qi_regs & CPUMASK)
#define TARGET_PROMOTE_HI_REGS (x86_promote_hi_regs & CPUMASK)
+#define TARGET_ADD_ESP_4 (x86_add_esp_4 & CPUMASK)
+#define TARGET_ADD_ESP_8 (x86_add_esp_8 & CPUMASK)
+#define TARGET_SUB_ESP_4 (x86_sub_esp_4 & CPUMASK)
+#define TARGET_SUB_ESP_8 (x86_sub_esp_8 & CPUMASK)
#define TARGET_STACK_PROBE (target_flags & MASK_STACK_PROBE)
diff --git a/gcc/config/i386/i386.md b/gcc/config/i386/i386.md
index 98c76160414..090d0ebd444 100644
--- a/gcc/config/i386/i386.md
+++ b/gcc/config/i386/i386.md
@@ -1307,6 +1307,24 @@
"push{l}\\t%1"
[(set_attr "type" "push")])
+(define_insn "*pushsi2_prologue"
+ [(set (match_operand:SI 0 "push_operand" "=<")
+ (match_operand:SI 1 "general_no_elim_operand" "ri*m"))
+ (set (reg:SI 6) (reg:SI 6))]
+ ""
+ "push{l}\\t%1"
+ [(set_attr "type" "push")])
+
+(define_insn "*popsi1_epilogue"
+ [(set (match_operand:SI 0 "nonimmediate_operand" "=r*m")
+ (mem:SI (reg:SI 7)))
+ (set (reg:SI 7)
+ (plus:SI (reg:SI 7) (const_int 4)))
+ (set (reg:SI 6) (reg:SI 6))]
+ ""
+ "pop{l}\\t%0"
+ [(set_attr "type" "pop")])
+
(define_insn "popsi1"
[(set (match_operand:SI 0 "nonimmediate_operand" "=r*m")
(mem:SI (reg:SI 7)))
@@ -9752,6 +9770,140 @@
[(parallel [(set (match_dup 0) (ashift:SI (match_dup 0) (match_dup 2)))
(clobber (reg:CC 17))])]
"operands[2] = GEN_INT (exact_log2 (INTVAL (operands[1])));")
+
+;; The ESP adjustments can be done by the push and pop instructions. Resulting
+;; code is shorter, since push is only 1 byte, while add imm, %esp 3 bytes. On
+;; many CPUs it is also faster, since special hardware to avoid esp
+;; dependancies is present.
+
+;; While some of these converisons may be done using splitters, we use peepholes
+;; in order to allow combine_stack_adjustments pass to see nonobfuscated RTL.
+
+;; Convert prologue esp substractions to push.
+;; We need register to push. In order to keep verify_flow_info happy we have
+;; two choices
+;; - use scratch and clobber it in order to avoid dependencies
+;; - use already live register
+;; We can't use the second way right now, since there is no reliable way how to
+;; verify that given register is live. First choice will also most likely in
+;; fewer dependencies. On the place of esp adjustments it is very likely that
+;; call clobbered registers are dead. We may want to use base pointer as an
+;; alternative when no register is available later.
+
+(define_peephole2
+ [(match_scratch:SI 0 "r")
+ (parallel [(set (reg:SI 7) (plus:SI (reg:SI 7) (const_int -4)))
+ (set (reg:SI 6) (reg:SI 6))
+ (clobber (reg:CC 17))])]
+ "optimize_size || !TARGET_SUB_ESP_4"
+ [(clobber (match_dup 0))
+ (parallel [(set (mem:SI (pre_dec:SI (reg:SI 7))) (match_dup 0))
+ (set (reg:SI 6) (reg:SI 6))])])
+
+(define_peephole2
+ [(match_scratch:SI 0 "r")
+ (parallel [(set (reg:SI 7) (plus:SI (reg:SI 7) (const_int -8)))
+ (set (reg:SI 6) (reg:SI 6))
+ (clobber (reg:CC 17))])]
+ "optimize_size || !TARGET_SUB_ESP_8"
+ [(clobber (match_dup 0))
+ (set (mem:SI (pre_dec:SI (reg:SI 7))) (match_dup 0))
+ (parallel [(set (mem:SI (pre_dec:SI (reg:SI 7))) (match_dup 0))
+ (set (reg:SI 6) (reg:SI 6))])])
+
+;; Convert esp substractions to push.
+(define_peephole2
+ [(match_scratch:SI 0 "r")
+ (parallel [(set (reg:SI 7) (plus:SI (reg:SI 7) (const_int -4)))
+ (clobber (reg:CC 17))])]
+ "optimize_size || !TARGET_SUB_ESP_4"
+ [(clobber (match_dup 0))
+ (set (mem:SI (pre_dec:SI (reg:SI 7))) (match_dup 0))])
+
+(define_peephole2
+ [(match_scratch:SI 0 "r")
+ (parallel [(set (reg:SI 7) (plus:SI (reg:SI 7) (const_int -8)))
+ (clobber (reg:CC 17))])]
+ "optimize_size || !TARGET_SUB_ESP_8"
+ [(clobber (match_dup 0))
+ (set (mem:SI (pre_dec:SI (reg:SI 7))) (match_dup 0))
+ (set (mem:SI (pre_dec:SI (reg:SI 7))) (match_dup 0))])
+
+;; Convert epilogue deallocator to pop.
+(define_peephole2
+ [(match_scratch:SI 0 "r")
+ (parallel [(set (reg:SI 7) (plus:SI (reg:SI 7) (const_int 4)))
+ (set (reg:SI 6) (reg:SI 6))
+ (clobber (reg:CC 17))])]
+ "optimize_size || !TARGET_ADD_ESP_4"
+ [(parallel [(set (match_dup 0) (mem:SI (reg:SI 7)))
+ (set (reg:SI 7) (plus:SI (reg:SI 7) (const_int 4)))
+ (set (reg:SI 6) (reg:SI 6))])]
+ "")
+
+;; Two pops case is tricky, since pop causes dependency on destination register.
+;; We use two registers if available.
+(define_peephole2
+ [(match_scratch:SI 0 "r")
+ (match_scratch:SI 1 "r")
+ (parallel [(set (reg:SI 7) (plus:SI (reg:SI 7) (const_int 8)))
+ (set (reg:SI 6) (reg:SI 6))
+ (clobber (reg:CC 17))])]
+ "optimize_size || !TARGET_ADD_ESP_8"
+ [(parallel [(set (match_dup 0) (mem:SI (reg:SI 7)))
+ (set (reg:SI 7) (plus:SI (reg:SI 7) (const_int 4)))
+ (set (reg:SI 6) (reg:SI 6))])
+ (parallel [(set (match_dup 1) (mem:SI (reg:SI 7)))
+ (set (reg:SI 7) (plus:SI (reg:SI 7) (const_int 4)))])]
+ "")
+
+(define_peephole2
+ [(match_scratch:SI 0 "r")
+ (parallel [(set (reg:SI 7) (plus:SI (reg:SI 7) (const_int 8)))
+ (set (reg:SI 6) (reg:SI 6))
+ (clobber (reg:CC 17))])]
+ "optimize_size"
+ [(parallel [(set (match_dup 0) (mem:SI (reg:SI 7)))
+ (set (reg:SI 7) (plus:SI (reg:SI 7) (const_int 4)))
+ (set (reg:SI 6) (reg:SI 6))])
+ (parallel [(set (match_dup 0) (mem:SI (reg:SI 7)))
+ (set (reg:SI 7) (plus:SI (reg:SI 7) (const_int 4)))])]
+ "")
+
+;; Convert esp additions to pop.
+(define_peephole2
+ [(match_scratch:SI 0 "r")
+ (parallel [(set (reg:SI 7) (plus:SI (reg:SI 7) (const_int 4)))
+ (clobber (reg:CC 17))])]
+ ""
+ [(parallel [(set (match_dup 0) (mem:SI (reg:SI 7)))
+ (set (reg:SI 7) (plus:SI (reg:SI 7) (const_int 4)))])]
+ "")
+
+;; Two pops case is tricky, since pop causes dependency on destination register.
+;; We use two registers if available.
+(define_peephole2
+ [(match_scratch:SI 0 "r")
+ (match_scratch:SI 1 "r")
+ (parallel [(set (reg:SI 7) (plus:SI (reg:SI 7) (const_int 8)))
+ (clobber (reg:CC 17))])]
+ ""
+ [(parallel [(set (match_dup 0) (mem:SI (reg:SI 7)))
+ (set (reg:SI 7) (plus:SI (reg:SI 7) (const_int 4)))])
+ (parallel [(set (match_dup 1) (mem:SI (reg:SI 7)))
+ (set (reg:SI 7) (plus:SI (reg:SI 7) (const_int 4)))])]
+ "")
+
+(define_peephole2
+ [(match_scratch:SI 0 "r")
+ (parallel [(set (reg:SI 7) (plus:SI (reg:SI 7) (const_int 8)))
+ (clobber (reg:CC 17))])]
+ "optimize_size"
+ [(parallel [(set (match_dup 0) (mem:SI (reg:SI 7)))
+ (set (reg:SI 7) (plus:SI (reg:SI 7) (const_int 4)))])
+ (parallel [(set (match_dup 0) (mem:SI (reg:SI 7)))
+ (set (reg:SI 7) (plus:SI (reg:SI 7) (const_int 4)))])]
+ "")
;; Call-value patterns last so that the wildcard operand does not
;; disrupt insn-recog's switch tables.