diff options
Diffstat (limited to 'gcc/config/ia64/ia64.c')
-rw-r--r-- | gcc/config/ia64/ia64.c | 731 |
1 files changed, 483 insertions, 248 deletions
diff --git a/gcc/config/ia64/ia64.c b/gcc/config/ia64/ia64.c index 10f06665999..a01ff182b24 100644 --- a/gcc/config/ia64/ia64.c +++ b/gcc/config/ia64/ia64.c @@ -1,5 +1,5 @@ /* Definitions of target machine for GNU compiler. - Copyright (C) 1999, 2000, 2001, 2002 Free Software Foundation, Inc. + Copyright (C) 1999, 2000, 2001, 2002, 2003 Free Software Foundation, Inc. Contributed by James E. Wilson <wilson@cygnus.com> and David Mosberger <davidm@hpl.hp.com>. @@ -45,6 +45,7 @@ Boston, MA 02111-1307, USA. */ #include "target.h" #include "target-def.h" #include "tm_p.h" +#include "langhooks.h" /* This is used for communication between ASM_OUTPUT_LABEL and ASM_OUTPUT_LABELREF. */ @@ -109,6 +110,39 @@ static int ia64_flag_schedule_insns2; sections. */ unsigned int ia64_section_threshold; + +/* Structure to be filled in by ia64_compute_frame_size with register + save masks and offsets for the current function. */ + +struct ia64_frame_info +{ + HOST_WIDE_INT total_size; /* size of the stack frame, not including + the caller's scratch area. */ + HOST_WIDE_INT spill_cfa_off; /* top of the reg spill area from the cfa. */ + HOST_WIDE_INT spill_size; /* size of the gr/br/fr spill area. */ + HOST_WIDE_INT extra_spill_size; /* size of spill area for others. */ + HARD_REG_SET mask; /* mask of saved registers. */ + unsigned int gr_used_mask; /* mask of registers in use as gr spill + registers or long-term scratches. */ + int n_spilled; /* number of spilled registers. */ + int reg_fp; /* register for fp. */ + int reg_save_b0; /* save register for b0. */ + int reg_save_pr; /* save register for prs. */ + int reg_save_ar_pfs; /* save register for ar.pfs. */ + int reg_save_ar_unat; /* save register for ar.unat. */ + int reg_save_ar_lc; /* save register for ar.lc. */ + int reg_save_gp; /* save register for gp. */ + int n_input_regs; /* number of input registers used. */ + int n_local_regs; /* number of local registers used. */ + int n_output_regs; /* number of output registers used. */ + int n_rotate_regs; /* number of rotating registers used. */ + + char need_regstk; /* true if a .regstk directive needed. */ + char initialized; /* true if the data is finalized. */ +}; + +/* Current frame information calculated by ia64_compute_frame_size. */ +static struct ia64_frame_info current_frame_info; static rtx gen_tls_get_addr PARAMS ((void)); static rtx gen_thread_pointer PARAMS ((void)); @@ -141,8 +175,9 @@ static rtx ia64_expand_fetch_and_op PARAMS ((optab, enum machine_mode, tree, rtx)); static rtx ia64_expand_op_and_fetch PARAMS ((optab, enum machine_mode, tree, rtx)); -static rtx ia64_expand_compare_and_swap PARAMS ((enum machine_mode, int, - tree, rtx)); +static rtx ia64_expand_compare_and_swap PARAMS ((enum machine_mode, + enum machine_mode, + int, tree, rtx)); static rtx ia64_expand_lock_test_and_set PARAMS ((enum machine_mode, tree, rtx)); static rtx ia64_expand_lock_release PARAMS ((enum machine_mode, tree, rtx)); @@ -166,13 +201,16 @@ static void ia64_output_mi_thunk PARAMS ((FILE *, tree, HOST_WIDE_INT, static void ia64_select_rtx_section PARAMS ((enum machine_mode, rtx, unsigned HOST_WIDE_INT)); -static void ia64_aix_select_section PARAMS ((tree, int, - unsigned HOST_WIDE_INT)) +static void ia64_rwreloc_select_section PARAMS ((tree, int, + unsigned HOST_WIDE_INT)) ATTRIBUTE_UNUSED; -static void ia64_aix_unique_section PARAMS ((tree, int)) +static void ia64_rwreloc_unique_section PARAMS ((tree, int)) ATTRIBUTE_UNUSED; -static void ia64_aix_select_rtx_section PARAMS ((enum machine_mode, rtx, - unsigned HOST_WIDE_INT)) +static void ia64_rwreloc_select_rtx_section PARAMS ((enum machine_mode, rtx, + unsigned HOST_WIDE_INT)) + ATTRIBUTE_UNUSED; +static unsigned int ia64_rwreloc_section_type_flags + PARAMS ((tree, const char *, int)) ATTRIBUTE_UNUSED; static void ia64_hpux_add_extern_decl PARAMS ((const char *name)) @@ -242,11 +280,6 @@ static const struct attribute_spec ia64_attribute_table[] = #undef TARGET_SCHED_REORDER2 #define TARGET_SCHED_REORDER2 ia64_sched_reorder2 -#ifdef HAVE_AS_TLS -#undef TARGET_HAVE_TLS -#define TARGET_HAVE_TLS true -#endif - #undef TARGET_ASM_OUTPUT_MI_THUNK #define TARGET_ASM_OUTPUT_MI_THUNK ia64_output_mi_thunk #undef TARGET_ASM_CAN_OUTPUT_MI_THUNK @@ -974,6 +1007,21 @@ ia64_move_ok (dst, src) return GET_CODE (src) == CONST_DOUBLE && CONST_DOUBLE_OK_FOR_G (src); } +/* Return 0 if we are doing C++ code. This optimization fails with + C++ because of GNAT c++/6685. */ + +int +addp4_optimize_ok (op1, op2) + rtx op1, op2; +{ + + if (!strcmp (lang_hooks.name, "GNU C++")) + return 0; + + return (basereg_operand (op1, GET_MODE(op1)) != + basereg_operand (op2, GET_MODE(op2))); +} + /* Check if OP is a mask suitible for use with SHIFT in a dep.z instruction. Return the length of the field, or <= 0 on failure. */ @@ -1038,8 +1086,7 @@ ia64_expand_load_address (dest, src, scratch) if (! scratch) scratch = no_new_pseudos ? subtarget : gen_reg_rtx (DImode); - emit_insn (gen_load_symptr (subtarget, plus_constant (sym, hi), - scratch)); + ia64_expand_load_address (subtarget, plus_constant (sym, hi), scratch); emit_insn (gen_adddi3 (temp, subtarget, GEN_INT (lo))); } else @@ -1102,6 +1149,7 @@ ia64_expand_move (op0, op1) if ((tls_kind = tls_symbolic_operand (op1, Pmode))) { rtx tga_op1, tga_op2, tga_ret, tga_eqv, tmp, insns; + rtx orig_op0 = op0; switch (tls_kind) { @@ -1125,8 +1173,10 @@ ia64_expand_move (op0, op1) insns = get_insns (); end_sequence (); + if (GET_MODE (op0) != Pmode) + op0 = tga_ret; emit_libcall_block (insns, op0, tga_ret, op1); - return NULL_RTX; + break; case TLS_MODEL_LOCAL_DYNAMIC: /* ??? This isn't the completely proper way to do local-dynamic @@ -1154,20 +1204,15 @@ ia64_expand_move (op0, op1) tmp = gen_reg_rtx (Pmode); emit_libcall_block (insns, tmp, tga_ret, tga_eqv); - if (register_operand (op0, Pmode)) - tga_ret = op0; - else - tga_ret = gen_reg_rtx (Pmode); + if (!register_operand (op0, Pmode)) + op0 = gen_reg_rtx (Pmode); if (TARGET_TLS64) { - emit_insn (gen_load_dtprel (tga_ret, op1)); - emit_insn (gen_adddi3 (tga_ret, tmp, tga_ret)); + emit_insn (gen_load_dtprel (op0, op1)); + emit_insn (gen_adddi3 (op0, tmp, op0)); } else - emit_insn (gen_add_dtprel (tga_ret, tmp, op1)); - if (tga_ret == op0) - return NULL_RTX; - op1 = tga_ret; + emit_insn (gen_add_dtprel (op0, tmp, op1)); break; case TLS_MODEL_INITIAL_EXEC: @@ -1177,35 +1222,32 @@ ia64_expand_move (op0, op1) RTX_UNCHANGING_P (tmp) = 1; tmp = force_reg (Pmode, tmp); - if (register_operand (op0, Pmode)) - op1 = op0; - else - op1 = gen_reg_rtx (Pmode); - emit_insn (gen_adddi3 (op1, tmp, gen_thread_pointer ())); - if (op1 == op0) - return NULL_RTX; + if (!register_operand (op0, Pmode)) + op0 = gen_reg_rtx (Pmode); + emit_insn (gen_adddi3 (op0, tmp, gen_thread_pointer ())); break; case TLS_MODEL_LOCAL_EXEC: - if (register_operand (op0, Pmode)) - tmp = op0; - else - tmp = gen_reg_rtx (Pmode); + if (!register_operand (op0, Pmode)) + op0 = gen_reg_rtx (Pmode); if (TARGET_TLS64) { - emit_insn (gen_load_tprel (tmp, op1)); - emit_insn (gen_adddi3 (tmp, gen_thread_pointer (), tmp)); + emit_insn (gen_load_tprel (op0, op1)); + emit_insn (gen_adddi3 (op0, gen_thread_pointer (), op0)); } else - emit_insn (gen_add_tprel (tmp, gen_thread_pointer (), op1)); - if (tmp == op0) - return NULL_RTX; - op1 = tmp; + emit_insn (gen_add_tprel (op0, gen_thread_pointer (), op1)); break; default: abort (); } + + if (orig_op0 == op0) + return NULL_RTX; + if (GET_MODE (orig_op0) == Pmode) + return op0; + return gen_lowpart (GET_MODE (orig_op0), op0); } else if (!TARGET_NO_PIC && (symbolic_operand (op1, Pmode) || @@ -1246,46 +1288,6 @@ ia64_expand_move (op0, op1) return op1; } -rtx -ia64_gp_save_reg (setjmp_p) - int setjmp_p; -{ - rtx save = cfun->machine->ia64_gp_save; - - if (save != NULL) - { - /* We can't save GP in a pseudo if we are calling setjmp, because - pseudos won't be restored by longjmp. For now, we save it in r4. */ - /* ??? It would be more efficient to save this directly into a stack - slot. Unfortunately, the stack slot address gets cse'd across - the setjmp call because the NOTE_INSN_SETJMP note is in the wrong - place. */ - - /* ??? Get the barf bag, Virginia. We've got to replace this thing - in place, since this rtx is used in exception handling receivers. - Moreover, we must get this rtx out of regno_reg_rtx or reload - will do the wrong thing. */ - unsigned int old_regno = REGNO (save); - if (setjmp_p && old_regno != GR_REG (4)) - { - REGNO (save) = GR_REG (4); - regno_reg_rtx[old_regno] = gen_rtx_raw_REG (DImode, old_regno); - } - } - else - { - if (setjmp_p) - save = gen_rtx_REG (DImode, GR_REG (4)); - else if (! optimize) - save = gen_rtx_REG (DImode, LOC_REG (0)); - else - save = gen_reg_rtx (DImode); - cfun->machine->ia64_gp_save = save; - } - - return save; -} - /* Split a post-reload TImode reference into two DImode components. */ rtx @@ -1371,12 +1373,12 @@ spill_tfmode_operand (in, force) && GET_MODE (SUBREG_REG (in)) == TImode && GET_CODE (SUBREG_REG (in)) == REG) { - rtx mem = gen_mem_addressof (SUBREG_REG (in), NULL_TREE); + rtx mem = gen_mem_addressof (SUBREG_REG (in), NULL_TREE, true); return gen_rtx_MEM (TFmode, copy_to_reg (XEXP (mem, 0))); } else if (force && GET_CODE (in) == REG) { - rtx mem = gen_mem_addressof (in, NULL_TREE); + rtx mem = gen_mem_addressof (in, NULL_TREE, true); return gen_rtx_MEM (TFmode, copy_to_reg (XEXP (mem, 0))); } else if (GET_CODE (in) == MEM @@ -1418,72 +1420,148 @@ ia64_expand_compare (code, mode) } /* Emit the appropriate sequence for a call. */ - void ia64_expand_call (retval, addr, nextarg, sibcall_p) rtx retval; rtx addr; - rtx nextarg; + rtx nextarg ATTRIBUTE_UNUSED; int sibcall_p; { - rtx insn, b0, pfs, gp_save, narg_rtx, dest; - bool indirect_p; - int narg; + rtx insn, b0; addr = XEXP (addr, 0); + addr = convert_memory_address (DImode, addr); b0 = gen_rtx_REG (DImode, R_BR (0)); - pfs = gen_rtx_REG (DImode, AR_PFS_REGNUM); - - if (! nextarg) - narg = 0; - else if (IN_REGNO_P (REGNO (nextarg))) - narg = REGNO (nextarg) - IN_REG (0); - else - narg = REGNO (nextarg) - OUT_REG (0); - narg_rtx = GEN_INT (narg); + /* ??? Should do this for functions known to bind local too. */ if (TARGET_NO_PIC || TARGET_AUTO_PIC) { if (sibcall_p) - insn = gen_sibcall_nopic (addr, narg_rtx, b0, pfs); + insn = gen_sibcall_nogp (addr); else if (! retval) - insn = gen_call_nopic (addr, narg_rtx, b0); + insn = gen_call_nogp (addr, b0); else - insn = gen_call_value_nopic (retval, addr, narg_rtx, b0); - emit_call_insn (insn); - return; + insn = gen_call_value_nogp (retval, addr, b0); + insn = emit_call_insn (insn); + } + else + { + if (sibcall_p) + insn = gen_sibcall_gp (addr); + else if (! retval) + insn = gen_call_gp (addr, b0); + else + insn = gen_call_value_gp (retval, addr, b0); + insn = emit_call_insn (insn); + + use_reg (&CALL_INSN_FUNCTION_USAGE (insn), pic_offset_table_rtx); } - indirect_p = ! symbolic_operand (addr, VOIDmode); + if (sibcall_p) + use_reg (&CALL_INSN_FUNCTION_USAGE (insn), b0); +} +void +ia64_reload_gp () +{ + rtx tmp; - if (sibcall_p || (TARGET_CONST_GP && !indirect_p)) - gp_save = NULL_RTX; + if (current_frame_info.reg_save_gp) + tmp = gen_rtx_REG (DImode, current_frame_info.reg_save_gp); else - gp_save = ia64_gp_save_reg (setjmp_operand (addr, VOIDmode)); + { + HOST_WIDE_INT offset; - if (gp_save) - emit_move_insn (gp_save, pic_offset_table_rtx); + offset = (current_frame_info.spill_cfa_off + + current_frame_info.spill_size); + if (frame_pointer_needed) + { + tmp = hard_frame_pointer_rtx; + offset = -offset; + } + else + { + tmp = stack_pointer_rtx; + offset = current_frame_info.total_size - offset; + } - /* If this is an indirect call, then we have the address of a descriptor. */ - if (indirect_p) - { - dest = force_reg (DImode, gen_rtx_MEM (DImode, addr)); - emit_move_insn (pic_offset_table_rtx, - gen_rtx_MEM (DImode, plus_constant (addr, 8))); + if (CONST_OK_FOR_I (offset)) + emit_insn (gen_adddi3 (pic_offset_table_rtx, + tmp, GEN_INT (offset))); + else + { + emit_move_insn (pic_offset_table_rtx, GEN_INT (offset)); + emit_insn (gen_adddi3 (pic_offset_table_rtx, + pic_offset_table_rtx, tmp)); + } + + tmp = gen_rtx_MEM (DImode, pic_offset_table_rtx); + } + + emit_move_insn (pic_offset_table_rtx, tmp); +} + +void +ia64_split_call (retval, addr, retaddr, scratch_r, scratch_b, + noreturn_p, sibcall_p) + rtx retval, addr, retaddr, scratch_r, scratch_b; + int noreturn_p, sibcall_p; +{ + rtx insn; + bool is_desc = false; + + /* If we find we're calling through a register, then we're actually + calling through a descriptor, so load up the values. */ + if (REG_P (addr) && GR_REGNO_P (REGNO (addr))) + { + rtx tmp; + bool addr_dead_p; + + /* ??? We are currently constrained to *not* use peep2, because + we can legitimiately change the global lifetime of the GP + (in the form of killing where previously live). This is + because a call through a descriptor doesn't use the previous + value of the GP, while a direct call does, and we do not + commit to either form until the split here. + + That said, this means that we lack precise life info for + whether ADDR is dead after this call. This is not terribly + important, since we can fix things up essentially for free + with the POST_DEC below, but it's nice to not use it when we + can immediately tell it's not necessary. */ + addr_dead_p = ((noreturn_p || sibcall_p + || TEST_HARD_REG_BIT (regs_invalidated_by_call, + REGNO (addr))) + && !FUNCTION_ARG_REGNO_P (REGNO (addr))); + + /* Load the code address into scratch_b. */ + tmp = gen_rtx_POST_INC (Pmode, addr); + tmp = gen_rtx_MEM (Pmode, tmp); + emit_move_insn (scratch_r, tmp); + emit_move_insn (scratch_b, scratch_r); + + /* Load the GP address. If ADDR is not dead here, then we must + revert the change made above via the POST_INCREMENT. */ + if (!addr_dead_p) + tmp = gen_rtx_POST_DEC (Pmode, addr); + else + tmp = addr; + tmp = gen_rtx_MEM (Pmode, tmp); + emit_move_insn (pic_offset_table_rtx, tmp); + + is_desc = true; + addr = scratch_b; } - else - dest = addr; if (sibcall_p) - insn = gen_sibcall_pic (dest, narg_rtx, b0, pfs); - else if (! retval) - insn = gen_call_pic (dest, narg_rtx, b0); + insn = gen_sibcall_nogp (addr); + else if (retval) + insn = gen_call_value_nogp (retval, addr, retaddr); else - insn = gen_call_value_pic (retval, dest, narg_rtx, b0); + insn = gen_call_nogp (addr, retaddr); emit_call_insn (insn); - if (gp_save) - emit_move_insn (pic_offset_table_rtx, gp_save); + if ((!TARGET_CONST_GP || is_desc) && !noreturn_p && !sibcall_p) + ia64_reload_gp (); } /* Begin the assembly file. */ @@ -1522,39 +1600,6 @@ emit_safe_across_calls (f) fputc ('\n', f); } - -/* Structure to be filled in by ia64_compute_frame_size with register - save masks and offsets for the current function. */ - -struct ia64_frame_info -{ - HOST_WIDE_INT total_size; /* size of the stack frame, not including - the caller's scratch area. */ - HOST_WIDE_INT spill_cfa_off; /* top of the reg spill area from the cfa. */ - HOST_WIDE_INT spill_size; /* size of the gr/br/fr spill area. */ - HOST_WIDE_INT extra_spill_size; /* size of spill area for others. */ - HARD_REG_SET mask; /* mask of saved registers. */ - unsigned int gr_used_mask; /* mask of registers in use as gr spill - registers or long-term scratches. */ - int n_spilled; /* number of spilled registers. */ - int reg_fp; /* register for fp. */ - int reg_save_b0; /* save register for b0. */ - int reg_save_pr; /* save register for prs. */ - int reg_save_ar_pfs; /* save register for ar.pfs. */ - int reg_save_ar_unat; /* save register for ar.unat. */ - int reg_save_ar_lc; /* save register for ar.lc. */ - int n_input_regs; /* number of input registers used. */ - int n_local_regs; /* number of local registers used. */ - int n_output_regs; /* number of output registers used. */ - int n_rotate_regs; /* number of rotating registers used. */ - - char need_regstk; /* true if a .regstk directive needed. */ - char initialized; /* true if the data is finalized. */ -}; - -/* Current frame information calculated by ia64_compute_frame_size. */ -static struct ia64_frame_info current_frame_info; - /* Helper function for ia64_compute_frame_size: find an appropriate general register to spill some special register to. SPECIAL_SPILL_MASK contains bits in GR0 to GR31 that have already been allocated by this routine. @@ -1796,6 +1841,17 @@ ia64_compute_frame_size (size) extra_spill_size += 8; n_spilled += 1; } + + /* Similarly for gp. Note that if we're calling setjmp, the stacked + registers are clobbered, so we fall back to the stack. */ + current_frame_info.reg_save_gp + = (current_function_calls_setjmp ? 0 : find_gr_spill (1)); + if (current_frame_info.reg_save_gp == 0) + { + SET_HARD_REG_BIT (mask, GR_REG (1)); + spill_size += 8; + n_spilled += 1; + } } else { @@ -1805,6 +1861,17 @@ ia64_compute_frame_size (size) spill_size += 8; n_spilled += 1; } + + if (regs_ever_live[AR_PFS_REGNUM]) + { + SET_HARD_REG_BIT (mask, AR_PFS_REGNUM); + current_frame_info.reg_save_ar_pfs = find_gr_spill (1); + if (current_frame_info.reg_save_ar_pfs == 0) + { + extra_spill_size += 8; + n_spilled += 1; + } + } } /* Unwind descriptor hackery: things are most efficient if we allocate @@ -1843,8 +1910,10 @@ ia64_compute_frame_size (size) } /* If we're forced to use st8.spill, we're forced to save and restore - ar.unat as well. */ - if (spilled_gr_p || cfun->machine->n_varargs) + ar.unat as well. The check for existing liveness allows inline asm + to touch ar.unat. */ + if (spilled_gr_p || cfun->machine->n_varargs + || regs_ever_live[AR_UNAT_REGNUM]) { regs_ever_live[AR_UNAT_REGNUM] = 1; SET_HARD_REG_BIT (mask, AR_UNAT_REGNUM); @@ -1937,10 +2006,6 @@ ia64_initial_elimination_offset (from, to) abort (); break; - case RETURN_ADDRESS_POINTER_REGNUM: - offset = 0; - break; - default: abort (); } @@ -2291,21 +2356,11 @@ ia64_expand_prologue () reg_names[current_frame_info.reg_fp] = tmp; } - /* Fix up the return address placeholder. */ - /* ??? We can fail if __builtin_return_address is used, and we didn't - allocate a register in which to save b0. I can't think of a way to - eliminate RETURN_ADDRESS_POINTER_REGNUM to a local register and - then be sure that I got the right one. Further, reload doesn't seem - to care if an eliminable register isn't used, and "eliminates" it - anyway. */ - if (regs_ever_live[RETURN_ADDRESS_POINTER_REGNUM] - && current_frame_info.reg_save_b0 != 0) - XINT (return_address_pointer_rtx, 0) = current_frame_info.reg_save_b0; - /* We don't need an alloc instruction if we've used no outputs or locals. */ if (current_frame_info.n_local_regs == 0 && current_frame_info.n_output_regs == 0 - && current_frame_info.n_input_regs <= current_function_args_info.int_regs) + && current_frame_info.n_input_regs <= current_function_args_info.int_regs + && !TEST_HARD_REG_BIT (current_frame_info.mask, AR_PFS_REGNUM)) { /* If there is no alloc, but there are input registers used, then we need a .regstk directive. */ @@ -2467,8 +2522,8 @@ ia64_expand_prologue () /* The alloc insn already copied ar.pfs into a general register. The only thing we have to do now is copy that register to a stack slot if we'd not allocated a local register for the job. */ - if (current_frame_info.reg_save_ar_pfs == 0 - && ! current_function_is_leaf) + if (TEST_HARD_REG_BIT (current_frame_info.mask, AR_PFS_REGNUM) + && current_frame_info.reg_save_ar_pfs == 0) { reg = gen_rtx_REG (DImode, AR_PFS_REGNUM); do_spill (gen_movdi_x, ar_pfs_save_reg, cfa_off, reg); @@ -2499,6 +2554,19 @@ ia64_expand_prologue () } } + if (current_frame_info.reg_save_gp) + { + insn = emit_move_insn (gen_rtx_REG (DImode, + current_frame_info.reg_save_gp), + pic_offset_table_rtx); + /* We don't know for sure yet if this is actually needed, since + we've not split the PIC call patterns. If all of the calls + are indirect, and not followed by any uses of the gp, then + this save is dead. Allow it to go away. */ + REG_NOTES (insn) + = gen_rtx_EXPR_LIST (REG_MAYBE_DEAD, const0_rtx, REG_NOTES (insn)); + } + /* We should now be at the base of the gr/br/fr spill area. */ if (cfa_off != (current_frame_info.spill_cfa_off + current_frame_info.spill_size)) @@ -2650,7 +2718,7 @@ ia64_expand_epilogue (sibcall_p) reg = gen_rtx_REG (DImode, AR_PFS_REGNUM); emit_move_insn (reg, alt_reg); } - else if (! current_function_is_leaf) + else if (TEST_HARD_REG_BIT (current_frame_info.mask, AR_PFS_REGNUM)) { alt_regno = next_scratch_gr_reg (); alt_reg = gen_rtx_REG (DImode, alt_regno); @@ -2680,8 +2748,13 @@ ia64_expand_epilogue (sibcall_p) + current_frame_info.spill_size)) abort (); + /* The GP may be stored on the stack in the prologue, but it's + never restored in the epilogue. Skip the stack slot. */ + if (TEST_HARD_REG_BIT (current_frame_info.mask, GR_REG (1))) + cfa_off -= 8; + /* Restore all general registers. */ - for (regno = GR_REG (1); regno <= GR_REG (31); ++regno) + for (regno = GR_REG (2); regno <= GR_REG (31); ++regno) if (TEST_HARD_REG_BIT (current_frame_info.mask, regno)) { reg = gen_rtx_REG (DImode, regno); @@ -2811,10 +2884,13 @@ ia64_expand_epilogue (sibcall_p) preserve those input registers used as arguments to the sibling call. It is unclear how to compute that number here. */ if (current_frame_info.n_input_regs != 0) - emit_insn (gen_alloc (gen_rtx_REG (DImode, fp), - GEN_INT (0), GEN_INT (0), - GEN_INT (current_frame_info.n_input_regs), - GEN_INT (0))); + { + rtx n_inputs = GEN_INT (current_frame_info.n_input_regs); + insn = emit_insn (gen_alloc (gen_rtx_REG (DImode, fp), + const0_rtx, const0_rtx, + n_inputs, const0_rtx)); + RTX_FRAME_RELATED_P (insn) = 1; + } } } @@ -2839,6 +2915,72 @@ ia64_direct_return () return 0; } +/* Return the magic cookie that we use to hold the return address + during early compilation. */ + +rtx +ia64_return_addr_rtx (count, frame) + HOST_WIDE_INT count; + rtx frame ATTRIBUTE_UNUSED; +{ + if (count != 0) + return NULL; + return gen_rtx_UNSPEC (Pmode, gen_rtvec (1, const0_rtx), UNSPEC_RET_ADDR); +} + +/* Split this value after reload, now that we know where the return + address is saved. */ + +void +ia64_split_return_addr_rtx (dest) + rtx dest; +{ + rtx src; + + if (TEST_HARD_REG_BIT (current_frame_info.mask, BR_REG (0))) + { + if (current_frame_info.reg_save_b0 != 0) + src = gen_rtx_REG (DImode, current_frame_info.reg_save_b0); + else + { + HOST_WIDE_INT off; + unsigned int regno; + + /* Compute offset from CFA for BR0. */ + /* ??? Must be kept in sync with ia64_expand_prologue. */ + off = (current_frame_info.spill_cfa_off + + current_frame_info.spill_size); + for (regno = GR_REG (1); regno <= GR_REG (31); ++regno) + if (TEST_HARD_REG_BIT (current_frame_info.mask, regno)) + off -= 8; + + /* Convert CFA offset to a register based offset. */ + if (frame_pointer_needed) + src = hard_frame_pointer_rtx; + else + { + src = stack_pointer_rtx; + off += current_frame_info.total_size; + } + + /* Load address into scratch register. */ + if (CONST_OK_FOR_I (off)) + emit_insn (gen_adddi3 (dest, src, GEN_INT (off))); + else + { + emit_move_insn (dest, GEN_INT (off)); + emit_insn (gen_adddi3 (dest, src, dest)); + } + + src = gen_rtx_MEM (Pmode, dest); + } + } + else + src = gen_rtx_REG (DImode, BR_REG (0)); + + emit_move_insn (dest, src); +} + int ia64_hard_regno_rename_ok (from, to) int from; @@ -2869,10 +3011,6 @@ ia64_hard_regno_rename_ok (from, to) if (PR_REGNO_P (from) && PR_REGNO_P (to)) return (from & 1) == (to & 1); - /* Reg 4 contains the saved gp; we can't reliably rename this. */ - if (from == GR_REG (4) && current_function_calls_setjmp) - return 0; - return 1; } @@ -2886,15 +3024,16 @@ ia64_assemble_integer (x, size, aligned_p) int aligned_p; { if (size == (TARGET_ILP32 ? 4 : 8) - && aligned_p && !(TARGET_NO_PIC || TARGET_AUTO_PIC) && GET_CODE (x) == SYMBOL_REF && SYMBOL_REF_FLAG (x)) { - if (TARGET_ILP32) - fputs ("\tdata4\t@fptr(", asm_out_file); - else - fputs ("\tdata8\t@fptr(", asm_out_file); + static const char * const directive[2][2] = { + /* 64-bit pointer */ /* 32-bit pointer */ + { "\tdata8.ua\t@fptr(", "\tdata4.ua\t@fptr("}, /* unaligned */ + { "\tdata8\t@fptr(", "\tdata4\t@fptr("} /* aligned */ + }; + fputs (directive[aligned_p != 0][TARGET_ILP32 != 0], asm_out_file); output_addr_const (asm_out_file, x); fputs (")\n", asm_out_file); return true; @@ -2992,9 +3131,6 @@ ia64_output_function_epilogue (file, size) { int i; - /* Reset from the function's potential modifications. */ - XINT (return_address_pointer_rtx, 0) = RETURN_ADDRESS_POINTER_REGNUM; - if (current_frame_info.reg_fp) { const char *tmp = reg_names[HARD_FRAME_POINTER_REGNUM]; @@ -3297,9 +3433,10 @@ ia64_function_arg (cum, mode, type, named, incoming) ? 1 : GET_MODE_SIZE (gr_mode) / UNITS_PER_WORD; } - /* If we ended up using just one location, just return that one loc. */ + /* If we ended up using just one location, just return that one loc, but + change the mode back to the argument mode. */ if (i == 1) - return XEXP (loc[0], 0); + return gen_rtx_REG (mode, REGNO (XEXP (loc[0], 0))); else return gen_rtx_PARALLEL (mode, gen_rtvec_v (i, loc)); } @@ -3514,8 +3651,12 @@ ia64_va_arg (valist, type) /* Variable sized types are passed by reference. */ if (TREE_CODE (TYPE_SIZE (type)) != INTEGER_CST) { - rtx addr = std_expand_builtin_va_arg (valist, build_pointer_type (type)); - return gen_rtx_MEM (ptr_mode, force_reg (Pmode, addr)); + rtx addr = force_reg (ptr_mode, + std_expand_builtin_va_arg (valist, build_pointer_type (type))); +#ifdef POINTERS_EXTEND_UNSIGNED + addr = convert_memory_address (Pmode, addr); +#endif + return gen_rtx_MEM (ptr_mode, addr); } /* Arguments with alignment larger than 8 bytes start at the next even @@ -4082,7 +4223,6 @@ ia64_secondary_reload_class (class, mode, x) return NO_REGS; } - /* Emit text to declare externally defined variables and functions, because the Intel assembler does not support undefined externals. */ @@ -5621,7 +5761,9 @@ ia64_adjust_cost (insn, link, dep_insn, cost) addr = XVECEXP (addr, 0, 0); while (GET_CODE (addr) == SUBREG || GET_CODE (addr) == ZERO_EXTEND) addr = XEXP (addr, 0); - if (GET_CODE (addr) == MEM) + + /* Note that LO_SUM is used for GOT loads. */ + if (GET_CODE (addr) == MEM || GET_CODE (addr) == LO_SUM) addr = XEXP (addr, 0); else addr = 0; @@ -6966,7 +7108,8 @@ ia64_emit_nops () { while (bundle_pos < 3) { - emit_insn_before (gen_nop_type (b->t[bundle_pos]), insn); + if (b->t[bundle_pos] != TYPE_L) + emit_insn_before (gen_nop_type (b->t[bundle_pos]), insn); bundle_pos++; } continue; @@ -7035,11 +7178,12 @@ ia64_reorg (insns) insn = get_last_insn (); if (! INSN_P (insn)) insn = prev_active_insn (insn); - if (GET_CODE (insn) == INSN - && GET_CODE (PATTERN (insn)) == UNSPEC_VOLATILE - && XINT (PATTERN (insn), 1) == UNSPECV_INSN_GROUP_BARRIER) - { - saw_stop = 1; + /* Skip over insns that expand to nothing. */ + while (GET_CODE (insn) == INSN && get_attr_empty (insn) == EMPTY_YES) + { + if (GET_CODE (PATTERN (insn)) == UNSPEC_VOLATILE + && XINT (PATTERN (insn), 1) == UNSPECV_INSN_GROUP_BARRIER) + saw_stop = 1; insn = prev_active_insn (insn); } if (GET_CODE (insn) == CALL_INSN) @@ -7064,12 +7208,11 @@ ia64_epilogue_uses (regno) switch (regno) { case R_GR (1): - /* When a function makes a call through a function descriptor, we - will write a (potentially) new value to "gp". After returning - from such a call, we need to make sure the function restores the - original gp-value, even if the function itself does not use the - gp anymore. */ - return (TARGET_CONST_GP && !(TARGET_AUTO_PIC || TARGET_NO_PIC)); + /* With a call to a function in another module, we will write a new + value to "gp". After returning from such a call, we need to make + sure the function restores the original gp-value, even if the + function itself does not use the gp anymore. */ + return !(TARGET_AUTO_PIC || TARGET_NO_PIC); case IN_REG (0): case IN_REG (1): case IN_REG (2): case IN_REG (3): case IN_REG (4): case IN_REG (5): case IN_REG (6): case IN_REG (7): @@ -7149,6 +7292,10 @@ ia64_in_small_data_p (exp) if (TARGET_NO_SDATA) return false; + /* Functions are never small data. */ + if (TREE_CODE (exp) == FUNCTION_DECL) + return false; + if (TREE_CODE (exp) == VAR_DECL && DECL_SECTION_NAME (exp)) { const char *section = TREE_STRING_POINTER (DECL_SECTION_NAME (exp)); @@ -7213,7 +7360,8 @@ ia64_encode_section_info (decl, first) if (encoding == symbol_str[1]) return; /* ??? Sdata became thread or thread becaome not thread. Lose. */ - abort (); + if (encoding == 's' || symbol_str[1] == 's') + abort (); } len = strlen (symbol_str); @@ -7243,6 +7391,24 @@ ia64_strip_name_encoding (str) str++; return str; } + +/* True if it is OK to do sibling call optimization for the specified + call expression EXP. DECL will be the called function, or NULL if + this is an indirect call. */ +bool +ia64_function_ok_for_sibcall (decl) + tree decl; +{ + /* We can't perform a sibcall if the current function has the syscall_linkage + attribute. */ + if (lookup_attribute ("syscall_linkage", + TYPE_ATTRIBUTES (TREE_TYPE (current_function_decl)))) + return false; + + /* We must always return with our current GP. This means we can + only sibcall to functions defined in the current module. */ + return decl && (*targetm.binds_local_p) (decl); +} /* Output assembly directives for prologue regions. */ @@ -7290,13 +7456,24 @@ process_set (asm_out_file, pat) { dest_regno = REGNO (dest); - /* If this isn't the final destination for ar.pfs, the alloc - shouldn't have been marked frame related. */ - if (dest_regno != current_frame_info.reg_save_ar_pfs) - abort (); - - fprintf (asm_out_file, "\t.save ar.pfs, r%d\n", - ia64_dbx_register_number (dest_regno)); + /* If this is the final destination for ar.pfs, then this must + be the alloc in the prologue. */ + if (dest_regno == current_frame_info.reg_save_ar_pfs) + fprintf (asm_out_file, "\t.save ar.pfs, r%d\n", + ia64_dbx_register_number (dest_regno)); + else + { + /* This must be an alloc before a sibcall. We must drop the + old frame info. The easiest way to drop the old frame + info is to ensure we had a ".restore sp" directive + followed by a new prologue. If the procedure doesn't + have a memory-stack frame, we'll issue a dummy ".restore + sp" now. */ + if (current_frame_info.total_size == 0 && !frame_pointer_needed) + /* if haven't done process_epilogue() yet, do it now */ + process_epilogue (); + fprintf (asm_out_file, "\t.prologue\n"); + } return 1; } @@ -7561,11 +7738,16 @@ ia64_init_builtins () psi_type_node, integer_type_node, integer_type_node, NULL_TREE); - /* __sync_val_compare_and_swap_di, __sync_bool_compare_and_swap_di */ + /* __sync_val_compare_and_swap_di */ tree di_ftype_pdi_di_di = build_function_type_list (long_integer_type_node, pdi_type_node, long_integer_type_node, long_integer_type_node, NULL_TREE); + /* __sync_bool_compare_and_swap_di */ + tree si_ftype_pdi_di_di + = build_function_type_list (integer_type_node, + pdi_type_node, long_integer_type_node, + long_integer_type_node, NULL_TREE); /* __sync_synchronize */ tree void_ftype_void = build_function_type (void_type_node, void_list_node); @@ -7598,7 +7780,7 @@ ia64_init_builtins () IA64_BUILTIN_VAL_COMPARE_AND_SWAP_DI); def_builtin ("__sync_bool_compare_and_swap_si", si_ftype_psi_si_si, IA64_BUILTIN_BOOL_COMPARE_AND_SWAP_SI); - def_builtin ("__sync_bool_compare_and_swap_di", di_ftype_pdi_di_di, + def_builtin ("__sync_bool_compare_and_swap_di", si_ftype_pdi_di_di, IA64_BUILTIN_BOOL_COMPARE_AND_SWAP_DI); def_builtin ("__sync_synchronize", void_ftype_void, @@ -7729,13 +7911,14 @@ ia64_expand_fetch_and_op (binoptab, mode, arglist, target) } tmp = gen_reg_rtx (mode); - ccv = gen_rtx_REG (mode, AR_CCV_REGNUM); + /* ar.ccv must always be loaded with a zero-extended DImode value. */ + ccv = gen_rtx_REG (DImode, AR_CCV_REGNUM); emit_move_insn (tmp, mem); label = gen_label_rtx (); emit_label (label); emit_move_insn (ret, tmp); - emit_move_insn (ccv, tmp); + convert_move (ccv, tmp, /*unsignedp=*/1); /* Perform the specific operation. Special case NAND by noticing one_cmpl_optab instead. */ @@ -7764,7 +7947,7 @@ ia64_expand_fetch_and_op (binoptab, mode, arglist, target) do { old = tmp; ar.ccv = tmp; - ret = tmp + value; + ret = tmp <op> value; cmpxchgsz.acq tmp = [ptr], ret } while (tmp != old) */ @@ -7798,14 +7981,15 @@ ia64_expand_op_and_fetch (binoptab, mode, arglist, target) emit_insn (gen_mf ()); tmp = gen_reg_rtx (mode); old = gen_reg_rtx (mode); - ccv = gen_rtx_REG (mode, AR_CCV_REGNUM); + /* ar.ccv must always be loaded with a zero-extended DImode value. */ + ccv = gen_rtx_REG (DImode, AR_CCV_REGNUM); emit_move_insn (tmp, mem); label = gen_label_rtx (); emit_label (label); emit_move_insn (old, tmp); - emit_move_insn (ccv, tmp); + convert_move (ccv, tmp, /*unsignedp=*/1); /* Perform the specific operation. Special case NAND by noticing one_cmpl_optab instead. */ @@ -7838,7 +8022,8 @@ ia64_expand_op_and_fetch (binoptab, mode, arglist, target) */ static rtx -ia64_expand_compare_and_swap (mode, boolp, arglist, target) +ia64_expand_compare_and_swap (rmode, mode, boolp, arglist, target) + enum machine_mode rmode; enum machine_mode mode; int boolp; tree arglist; @@ -7857,6 +8042,11 @@ ia64_expand_compare_and_swap (mode, boolp, arglist, target) mem = gen_rtx_MEM (mode, force_reg (ptr_mode, mem)); MEM_VOLATILE_P (mem) = 1; + if (GET_MODE (old) != mode) + old = convert_to_mode (mode, old, /*unsignedp=*/1); + if (GET_MODE (new) != mode) + new = convert_to_mode (mode, new, /*unsignedp=*/1); + if (! register_operand (old, mode)) old = copy_to_mode_reg (mode, old); if (! register_operand (new, mode)) @@ -7867,8 +8057,8 @@ ia64_expand_compare_and_swap (mode, boolp, arglist, target) else tmp = gen_reg_rtx (mode); - ccv = gen_rtx_REG (mode, AR_CCV_REGNUM); - emit_move_insn (ccv, old); + ccv = gen_rtx_REG (DImode, AR_CCV_REGNUM); + convert_move (ccv, old, /*unsignedp=*/1); emit_insn (gen_mf ()); if (mode == SImode) insn = gen_cmpxchg_acq_si (tmp, mem, new, ccv); @@ -7879,7 +8069,7 @@ ia64_expand_compare_and_swap (mode, boolp, arglist, target) if (boolp) { if (! target) - target = gen_reg_rtx (mode); + target = gen_reg_rtx (rmode); return emit_store_flag_force (target, EQ, tmp, old, mode, 1, 1); } else @@ -7954,11 +8144,16 @@ ia64_expand_builtin (exp, target, subtarget, mode, ignore) tree fndecl = TREE_OPERAND (TREE_OPERAND (exp, 0), 0); unsigned int fcode = DECL_FUNCTION_CODE (fndecl); tree arglist = TREE_OPERAND (exp, 1); + enum machine_mode rmode = VOIDmode; switch (fcode) { case IA64_BUILTIN_BOOL_COMPARE_AND_SWAP_SI: case IA64_BUILTIN_VAL_COMPARE_AND_SWAP_SI: + mode = SImode; + rmode = SImode; + break; + case IA64_BUILTIN_LOCK_TEST_AND_SET_SI: case IA64_BUILTIN_LOCK_RELEASE_SI: case IA64_BUILTIN_FETCH_AND_ADD_SI: @@ -7977,7 +8172,15 @@ ia64_expand_builtin (exp, target, subtarget, mode, ignore) break; case IA64_BUILTIN_BOOL_COMPARE_AND_SWAP_DI: + mode = DImode; + rmode = SImode; + break; + case IA64_BUILTIN_VAL_COMPARE_AND_SWAP_DI: + mode = DImode; + rmode = DImode; + break; + case IA64_BUILTIN_LOCK_TEST_AND_SET_DI: case IA64_BUILTIN_LOCK_RELEASE_DI: case IA64_BUILTIN_FETCH_AND_ADD_DI: @@ -8003,11 +8206,13 @@ ia64_expand_builtin (exp, target, subtarget, mode, ignore) { case IA64_BUILTIN_BOOL_COMPARE_AND_SWAP_SI: case IA64_BUILTIN_BOOL_COMPARE_AND_SWAP_DI: - return ia64_expand_compare_and_swap (mode, 1, arglist, target); + return ia64_expand_compare_and_swap (rmode, mode, 1, arglist, + target); case IA64_BUILTIN_VAL_COMPARE_AND_SWAP_SI: case IA64_BUILTIN_VAL_COMPARE_AND_SWAP_DI: - return ia64_expand_compare_and_swap (mode, 0, arglist, target); + return ia64_expand_compare_and_swap (rmode, mode, 0, arglist, + target); case IA64_BUILTIN_SYNCHRONIZE: emit_insn (gen_mf ()); @@ -8180,34 +8385,28 @@ ia64_select_rtx_section (mode, x, align) default_elf_select_rtx_section (mode, x, align); } -/* It is illegal to have relocations in shared segments on AIX. +/* It is illegal to have relocations in shared segments on AIX and HPUX. Pretend flag_pic is always set. */ static void -ia64_aix_select_section (exp, reloc, align) +ia64_rwreloc_select_section (exp, reloc, align) tree exp; int reloc; unsigned HOST_WIDE_INT align; { - int save_pic = flag_pic; - flag_pic = 1; - default_elf_select_section (exp, reloc, align); - flag_pic = save_pic; + default_elf_select_section_1 (exp, reloc, align, true); } static void -ia64_aix_unique_section (decl, reloc) +ia64_rwreloc_unique_section (decl, reloc) tree decl; int reloc; { - int save_pic = flag_pic; - flag_pic = 1; - default_unique_section (decl, reloc); - flag_pic = save_pic; + default_unique_section_1 (decl, reloc, true); } static void -ia64_aix_select_rtx_section (mode, x, align) +ia64_rwreloc_select_rtx_section (mode, x, align) enum machine_mode mode; rtx x; unsigned HOST_WIDE_INT align; @@ -8218,6 +8417,16 @@ ia64_aix_select_rtx_section (mode, x, align) flag_pic = save_pic; } +static unsigned int +ia64_rwreloc_section_type_flags (decl, name, reloc) + tree decl; + const char *name; + int reloc; +{ + return default_section_type_flags_1 (decl, name, reloc, true); +} + + /* Output the assembler code for a thunk function. THUNK_DECL is the declaration for the thunk function itself, FUNCTION is the decl for the target function. DELTA is an immediate constant offset to be @@ -8234,6 +8443,9 @@ ia64_output_mi_thunk (file, thunk, delta, vcall_offset, function) { rtx this, insn, funexp; + reload_completed = 1; + no_new_pseudos = 1; + /* Set things up as ia64_expand_prologue might. */ last_scratch_gr_reg = 15; @@ -8249,6 +8461,9 @@ ia64_output_mi_thunk (file, thunk, delta, vcall_offset, function) emit_note (NULL, NOTE_INSN_PROLOGUE_END); this = gen_rtx_REG (Pmode, IN_REG (0)); + if (TARGET_ILP32) + emit_insn (gen_ptr_extend (this, + gen_rtx_REG (ptr_mode, IN_REG (0)))); /* Apply the constant offset, if required. */ if (delta) @@ -8270,7 +8485,14 @@ ia64_output_mi_thunk (file, thunk, delta, vcall_offset, function) rtx vcall_offset_rtx = GEN_INT (vcall_offset); rtx tmp = gen_rtx_REG (Pmode, 2); - emit_move_insn (tmp, gen_rtx_MEM (Pmode, this)); + if (TARGET_ILP32) + { + rtx t = gen_rtx_REG (ptr_mode, 2); + emit_move_insn (t, gen_rtx_MEM (ptr_mode, this)); + emit_insn (gen_ptr_extend (tmp, t)); + } + else + emit_move_insn (tmp, gen_rtx_MEM (Pmode, this)); if (!CONST_OK_FOR_J (vcall_offset)) { @@ -8280,7 +8502,11 @@ ia64_output_mi_thunk (file, thunk, delta, vcall_offset, function) } emit_insn (gen_adddi3 (tmp, tmp, vcall_offset_rtx)); - emit_move_insn (tmp, gen_rtx_MEM (Pmode, tmp)); + if (TARGET_ILP32) + emit_move_insn (gen_rtx_REG (ptr_mode, 2), + gen_rtx_MEM (ptr_mode, tmp)); + else + emit_move_insn (tmp, gen_rtx_MEM (Pmode, tmp)); emit_insn (gen_adddi3 (this, this, tmp)); } @@ -8296,18 +8522,27 @@ ia64_output_mi_thunk (file, thunk, delta, vcall_offset, function) ia64_expand_call (NULL_RTX, funexp, NULL_RTX, 1); insn = get_last_insn (); SIBLING_CALL_P (insn) = 1; + + /* Code generation for calls relies on splitting. */ + reload_completed = 1; + try_split (PATTERN (insn), insn, 0); + emit_barrier (); /* Run just enough of rest_of_compilation to get the insns emitted. There's not really enough bulk here to make other passes such as instruction scheduling worth while. Note that use_thunk calls assemble_start_function and assemble_end_function. */ + insn = get_insns (); emit_all_insn_group_barriers (NULL, insn); shorten_branches (insn); final_start_function (insn, file, 1); final (insn, file, 1, 0); final_end_function (); + + reload_completed = 0; + no_new_pseudos = 0; } #include "gt-ia64.h" |