diff options
author | H.J. Lu <hongjiu.lu@intel.com> | 2011-06-08 13:46:43 +0000 |
---|---|---|
committer | H.J. Lu <hongjiu.lu@intel.com> | 2011-06-08 13:46:43 +0000 |
commit | cb1595572388ef311552eaf9c8bde0dffb5b2f7b (patch) | |
tree | 6aef08c655da81f56d3cf4e5fdac5a1558777f33 | |
parent | 272301f109b3c835baa51ab80d26ee903eb60979 (diff) | |
parent | f750989f9cd1f671c47d7cc804a96f2db4baf537 (diff) |
Merged with 4.5 branch at revision 174794.ix86/gcc-4_5-branch
git-svn-id: https://gcc.gnu.org/svn/gcc/branches/ix86/gcc-4_5-branch@174808 138bc75d-0d04-0410-961f-82ee72b054a4
32 files changed, 794 insertions, 206 deletions
diff --git a/gcc/ChangeLog b/gcc/ChangeLog index c23602c9544..a6047dab365 100644 --- a/gcc/ChangeLog +++ b/gcc/ChangeLog @@ -1,3 +1,98 @@ +2011-06-08 Kaz Kojima <kkojima@gcc.gnu.org> + + Backport from mainline + 2011-06-01 Kaz Kojima <kkojima@gcc.gnu.org> + + PR target/49238 + * config/sh/sh.c (expand_cbranchdi4): Use a scratch register if + needed when original operands are used for msw_skip comparison. + +2011-06-05 Eric Botcazou <ebotcazou@adacore.com> + + * config/sparc/sparc.c (output_return): Fix thinko in the output of an + EH return when delayed branches are disabled. + +2011-06-05 Kaz Kojima <kkojima@gcc.gnu.org> + + Backport from mainline + 2011-05-30 Kaz Kojima <kkojima@gcc.gnu.org> + + PR target/49186 + * config/sh/sh.c (expand_cbranchdi4): Set msw_skip when the high + part of the second operand is 0. + +2011-06-04 Ira Rosen <ira.rosen@linaro.org> + + PR tree-optimization/49038 + * tree-vect-loop-manip.c (vect_generate_tmps_on_preheader): + Ensure at least one epilogue iteration if required by data + accesses with gaps. + * tree-vectorizer.h (struct _loop_vec_info): Add new field + to mark loops that require peeling for gaps. + * tree-vect-loop.c (new_loop_vec_info): Initialize new field. + (vect_estimate_min_profitable_iters): Take peeling for gaps into + account. + (vect_transform_loop): Generate epilogue if required by data + access with gaps. + * tree-vect-data-refs.c (vect_analyze_group_access): Mark the + loop as requiring an epilogue if there are gaps in the end of + the strided group. + +2011-05-29 Richard Sandiford <rdsandiford@googlemail.com> + + PR target/43700 + * config/mips/mips.c (mips_cfun_call_saved_reg_p): Handle global + registers. + +2011-05-29 Richard Sandiford <rdsandiford@googlemail.com> + + PR target/43995 + * config/mips/mips.c (mips_pic_call_symbol_from_set): Add a + recurse_p argument. Only follow register copies if it is set, + and prevent mips_find_pic_call_symbol from recursing. + (mips_find_pic_call_symbol): Add a recurse_p argument. + Pass it to mips_pic_call_symbol_from_set. + (mips_annotate_pic_calls): Update accordingly. + +2011-05-26 Eric Botcazou <ebotcazou@adacore.com> + + * config/sparc/sparc-protos.h (sparc_optimization_options): Declare. + * config/sparc/sparc.h (OPTIMIZATION_OPTIONS): Define. + * config/sparc/sparc.c (sparc_optimization_options): New function. + Set flag_ira_share_save_slots to 0. + + Backport from mainline + 2011-01-21 Jeff Law <law@redhat.com> + + PR rtl-optimization/41619 + * caller-save.c (setup_save_areas): Break out code to determine + which hard regs are live across calls by examining the reload chains + so that it is always used. + Eliminate code which checked REG_N_CALLS_CROSSED. + +2011-05-25 Uros Bizjak <ubizjak@gmail.com> + + PR target/49133 + * config/i386/sse.md (sse2_loadhpd): Remove shufpd alternative. + +2011-05-21 Eric Botcazou <ebotcazou@adacore.com> + + * config/sparc/sparc.md (setjmp): Handle PIC mode and use the hard + frame pointer. + +2011-05-21 Eric Botcazou <ebotcazou@adacore.com> + + * config/sparc/sparc.c (eligible_for_return_delay): Do not return + false if there are call-saved registers here... + (sparc_can_use_return_insn_p): ...but here instead. + (save_or_restore_regs): Fix thinko. + (sparc_expand_prologue): Use current_function_is_leaf. + (sparc_frame_pointer_required): Likewise. + +2011-05-20 Nick Clifton <nickc@redhat.com> + + * config/rx/rx.h (HAVE_PRE_DECREMENT): Fix typo in macro name. + 2011-05-16 Uros Bizjak <ubizjak@gmail.com> * config/i386/i386.md (*movxf_internal): Disable CONST_DOUBLE diff --git a/gcc/DATESTAMP b/gcc/DATESTAMP index 7d77c0c575a..851d5ab3c4b 100644 --- a/gcc/DATESTAMP +++ b/gcc/DATESTAMP @@ -1 +1 @@ -20110519 +20110608 diff --git a/gcc/caller-save.c b/gcc/caller-save.c index 17fa028cb40..4d252d7495b 100644 --- a/gcc/caller-save.c +++ b/gcc/caller-save.c @@ -439,101 +439,93 @@ saved_hard_reg_compare_func (const void *v1p, const void *v2p) void setup_save_areas (void) { - int i, j, k; - unsigned int r; + int i, j, k, freq; HARD_REG_SET hard_regs_used; + struct saved_hard_reg *saved_reg; + rtx insn; + struct insn_chain *chain, *next; + unsigned int regno; + HARD_REG_SET hard_regs_to_save, used_regs, this_insn_sets; + reg_set_iterator rsi; - /* Allocate space in the save area for the largest multi-register - pseudos first, then work backwards to single register - pseudos. */ - - /* Find and record all call-used hard-registers in this function. */ CLEAR_HARD_REG_SET (hard_regs_used); - for (i = FIRST_PSEUDO_REGISTER; i < max_regno; i++) - if (reg_renumber[i] >= 0 && REG_N_CALLS_CROSSED (i) > 0) - { - unsigned int regno = reg_renumber[i]; - unsigned int endregno - = end_hard_regno (GET_MODE (regno_reg_rtx[i]), regno); - for (r = regno; r < endregno; r++) - if (call_used_regs[r]) - SET_HARD_REG_BIT (hard_regs_used, r); - } - if (optimize && flag_ira_share_save_slots) + /* Find every CALL_INSN and record which hard regs are live across the + call into HARD_REG_MAP and HARD_REGS_USED. */ + initiate_saved_hard_regs (); + /* Create hard reg saved regs. */ + for (chain = reload_insn_chain; chain != 0; chain = next) { - rtx insn, slot; - struct insn_chain *chain, *next; - char *saved_reg_conflicts; - unsigned int regno; - int next_k, freq; - struct saved_hard_reg *saved_reg, *saved_reg2, *saved_reg3; - int call_saved_regs_num; - struct saved_hard_reg *call_saved_regs[FIRST_PSEUDO_REGISTER]; - HARD_REG_SET hard_regs_to_save, used_regs, this_insn_sets; - reg_set_iterator rsi; - int best_slot_num; - int prev_save_slots_num; - rtx prev_save_slots[FIRST_PSEUDO_REGISTER]; - - initiate_saved_hard_regs (); - /* Create hard reg saved regs. */ - for (chain = reload_insn_chain; chain != 0; chain = next) + insn = chain->insn; + next = chain->next; + if (!CALL_P (insn) + || find_reg_note (insn, REG_NORETURN, NULL)) + continue; + freq = REG_FREQ_FROM_BB (BLOCK_FOR_INSN (insn)); + REG_SET_TO_HARD_REG_SET (hard_regs_to_save, + &chain->live_throughout); + COPY_HARD_REG_SET (used_regs, call_used_reg_set); + + /* Record all registers set in this call insn. These don't + need to be saved. N.B. the call insn might set a subreg + of a multi-hard-reg pseudo; then the pseudo is considered + live during the call, but the subreg that is set + isn't. */ + CLEAR_HARD_REG_SET (this_insn_sets); + note_stores (PATTERN (insn), mark_set_regs, &this_insn_sets); + /* Sibcalls are considered to set the return value. */ + if (SIBLING_CALL_P (insn) && crtl->return_rtx) + mark_set_regs (crtl->return_rtx, NULL_RTX, &this_insn_sets); + + AND_COMPL_HARD_REG_SET (used_regs, call_fixed_reg_set); + AND_COMPL_HARD_REG_SET (used_regs, this_insn_sets); + AND_HARD_REG_SET (hard_regs_to_save, used_regs); + for (regno = 0; regno < FIRST_PSEUDO_REGISTER; regno++) + if (TEST_HARD_REG_BIT (hard_regs_to_save, regno)) + { + if (hard_reg_map[regno] != NULL) + hard_reg_map[regno]->call_freq += freq; + else + saved_reg = new_saved_hard_reg (regno, freq); + SET_HARD_REG_BIT (hard_regs_used, regno); + } + /* Look through all live pseudos, mark their hard registers. */ + EXECUTE_IF_SET_IN_REG_SET + (&chain->live_throughout, FIRST_PSEUDO_REGISTER, regno, rsi) { - insn = chain->insn; - next = chain->next; - if (!CALL_P (insn) - || find_reg_note (insn, REG_NORETURN, NULL)) - continue; - freq = REG_FREQ_FROM_BB (BLOCK_FOR_INSN (insn)); - REG_SET_TO_HARD_REG_SET (hard_regs_to_save, - &chain->live_throughout); - COPY_HARD_REG_SET (used_regs, call_used_reg_set); + int r = reg_renumber[regno]; + int bound; - /* Record all registers set in this call insn. These don't - need to be saved. N.B. the call insn might set a subreg - of a multi-hard-reg pseudo; then the pseudo is considered - live during the call, but the subreg that is set - isn't. */ - CLEAR_HARD_REG_SET (this_insn_sets); - note_stores (PATTERN (insn), mark_set_regs, &this_insn_sets); - /* Sibcalls are considered to set the return value. */ - if (SIBLING_CALL_P (insn) && crtl->return_rtx) - mark_set_regs (crtl->return_rtx, NULL_RTX, &this_insn_sets); + if (r < 0) + continue; - AND_COMPL_HARD_REG_SET (used_regs, call_fixed_reg_set); - AND_COMPL_HARD_REG_SET (used_regs, this_insn_sets); - AND_HARD_REG_SET (hard_regs_to_save, used_regs); - for (regno = 0; regno < FIRST_PSEUDO_REGISTER; regno++) - if (TEST_HARD_REG_BIT (hard_regs_to_save, regno)) + bound = r + hard_regno_nregs[r][PSEUDO_REGNO_MODE (regno)]; + for (; r < bound; r++) + if (TEST_HARD_REG_BIT (used_regs, r)) { - if (hard_reg_map[regno] != NULL) - hard_reg_map[regno]->call_freq += freq; + if (hard_reg_map[r] != NULL) + hard_reg_map[r]->call_freq += freq; else - saved_reg = new_saved_hard_reg (regno, freq); + saved_reg = new_saved_hard_reg (r, freq); + SET_HARD_REG_BIT (hard_regs_to_save, r); + SET_HARD_REG_BIT (hard_regs_used, r); } - /* Look through all live pseudos, mark their hard registers. */ - EXECUTE_IF_SET_IN_REG_SET - (&chain->live_throughout, FIRST_PSEUDO_REGISTER, regno, rsi) - { - int r = reg_renumber[regno]; - int bound; + } + } - if (r < 0) - continue; + /* If requested, figure out which hard regs can share save slots. */ + if (optimize && flag_ira_share_save_slots) + { + rtx slot; + char *saved_reg_conflicts; + int next_k; + struct saved_hard_reg *saved_reg2, *saved_reg3; + int call_saved_regs_num; + struct saved_hard_reg *call_saved_regs[FIRST_PSEUDO_REGISTER]; + int best_slot_num; + int prev_save_slots_num; + rtx prev_save_slots[FIRST_PSEUDO_REGISTER]; - bound = r + hard_regno_nregs[r][PSEUDO_REGNO_MODE (regno)]; - for (; r < bound; r++) - if (TEST_HARD_REG_BIT (used_regs, r)) - { - if (hard_reg_map[r] != NULL) - hard_reg_map[r]->call_freq += freq; - else - saved_reg = new_saved_hard_reg (r, freq); - SET_HARD_REG_BIT (hard_regs_to_save, r); - } - } - } /* Find saved hard register conflicts. */ saved_reg_conflicts = (char *) xmalloc (saved_regs_num * saved_regs_num); memset (saved_reg_conflicts, 0, saved_regs_num * saved_regs_num); @@ -691,8 +683,10 @@ setup_save_areas (void) } else { - /* Now run through all the call-used hard-registers and allocate - space for them in the caller-save area. Try to allocate space + /* We are not sharing slots. + + Run through all the call-used hard-registers and allocate + space for each in the caller-save area. Try to allocate space in a manner which allows multi-register saves/restores to be done. */ for (i = 0; i < FIRST_PSEUDO_REGISTER; i++) diff --git a/gcc/config/i386/sse.md b/gcc/config/i386/sse.md index 73921191605..283860d3bbd 100644 --- a/gcc/config/i386/sse.md +++ b/gcc/config/i386/sse.md @@ -4964,24 +4964,22 @@ ;; Avoid combining registers from different units in a single alternative, ;; see comment above inline_secondary_memory_needed function in i386.c (define_insn "sse2_loadhpd" - [(set (match_operand:V2DF 0 "nonimmediate_operand" "=x,x,x,o,o,o") + [(set (match_operand:V2DF 0 "nonimmediate_operand" "=x,x,o,o,o") (vec_concat:V2DF (vec_select:DF - (match_operand:V2DF 1 "nonimmediate_operand" " 0,0,x,0,0,0") + (match_operand:V2DF 1 "nonimmediate_operand" " 0,0,0,0,0") (parallel [(const_int 0)])) - (match_operand:DF 2 "nonimmediate_operand" " m,x,0,x,*f,r")))] + (match_operand:DF 2 "nonimmediate_operand" " m,x,x,*f,r")))] "TARGET_SSE2 && !(MEM_P (operands[1]) && MEM_P (operands[2]))" "@ movhpd\t{%2, %0|%0, %2} unpcklpd\t{%2, %0|%0, %2} - shufpd\t{$1, %1, %0|%0, %1, 1} # # #" - [(set_attr "type" "ssemov,sselog,sselog,ssemov,fmov,imov") - (set_attr "prefix_data16" "1,*,*,*,*,*") - (set_attr "length_immediate" "*,*,1,*,*,*") - (set_attr "mode" "V1DF,V2DF,V2DF,DF,DF,DF")]) + [(set_attr "type" "ssemov,sselog,ssemov,fmov,imov") + (set_attr "prefix_data16" "1,*,*,*,*") + (set_attr "mode" "V1DF,V2DF,DF,DF,DF")]) (define_split [(set (match_operand:V2DF 0 "memory_operand" "") diff --git a/gcc/config/mips/mips.c b/gcc/config/mips/mips.c index 0207ffeae3f..fd9e4b39c59 100644 --- a/gcc/config/mips/mips.c +++ b/gcc/config/mips/mips.c @@ -1162,7 +1162,7 @@ static const struct mips_rtx_cost_data mips_rtx_cost_data[PROCESSOR_MAX] = { } }; -static rtx mips_find_pic_call_symbol (rtx, rtx); +static rtx mips_find_pic_call_symbol (rtx, rtx, bool); /* This hash table keeps track of implicit "mips16" and "nomips16" attributes for -mflip_mips16. It maps decl names onto a boolean mode setting. */ @@ -9007,6 +9007,11 @@ mips_interrupt_extra_call_saved_reg_p (unsigned int regno) static bool mips_cfun_call_saved_reg_p (unsigned int regno) { + /* If the user makes an ordinarily-call-saved register global, + that register is no longer call-saved. */ + if (global_regs[regno]) + return false; + /* Interrupt handlers need to save extra registers. */ if (cfun->machine->interrupt_handler_p && mips_interrupt_extra_call_saved_reg_p (regno)) @@ -14040,12 +14045,16 @@ mips_call_expr_from_insn (rtx insn, rtx *second_call) } /* REG is set in DEF. See if the definition is one of the ways we load a - register with a symbol address for a mips_use_pic_fn_addr_reg_p call. If - it is return the symbol reference of the function, otherwise return - NULL_RTX. */ + register with a symbol address for a mips_use_pic_fn_addr_reg_p call. + If it is, return the symbol reference of the function, otherwise return + NULL_RTX. + + If RECURSE_P is true, use mips_find_pic_call_symbol to interpret + the values of source registers, otherwise treat such registers as + having an unknown value. */ static rtx -mips_pic_call_symbol_from_set (df_ref def, rtx reg) +mips_pic_call_symbol_from_set (df_ref def, rtx reg, bool recurse_p) { rtx def_insn, set; @@ -14072,21 +14081,39 @@ mips_pic_call_symbol_from_set (df_ref def, rtx reg) return symbol; } - /* Follow simple register copies. */ - if (REG_P (src)) - return mips_find_pic_call_symbol (def_insn, src); + /* Follow at most one simple register copy. Such copies are + interesting in cases like: + + for (...) + { + locally_binding_fn (...); + } + + and: + + locally_binding_fn (...); + ... + locally_binding_fn (...); + + where the load of locally_binding_fn can legitimately be + hoisted or shared. However, we do not expect to see complex + chains of copies, so a full worklist solution to the problem + would probably be overkill. */ + if (recurse_p && REG_P (src)) + return mips_find_pic_call_symbol (def_insn, src, false); } return NULL_RTX; } -/* Find the definition of the use of REG in INSN. See if the definition is - one of the ways we load a register with a symbol address for a - mips_use_pic_fn_addr_reg_p call. If it is return the symbol reference of - the function, otherwise return NULL_RTX. */ +/* Find the definition of the use of REG in INSN. See if the definition + is one of the ways we load a register with a symbol address for a + mips_use_pic_fn_addr_reg_p call. If it is return the symbol reference + of the function, otherwise return NULL_RTX. RECURSE_P is as for + mips_pic_call_symbol_from_set. */ static rtx -mips_find_pic_call_symbol (rtx insn, rtx reg) +mips_find_pic_call_symbol (rtx insn, rtx reg, bool recurse_p) { df_ref use; struct df_link *defs; @@ -14098,7 +14125,7 @@ mips_find_pic_call_symbol (rtx insn, rtx reg) defs = DF_REF_CHAIN (use); if (!defs) return NULL_RTX; - symbol = mips_pic_call_symbol_from_set (defs->ref, reg); + symbol = mips_pic_call_symbol_from_set (defs->ref, reg, recurse_p); if (!symbol) return NULL_RTX; @@ -14107,7 +14134,7 @@ mips_find_pic_call_symbol (rtx insn, rtx reg) { rtx other; - other = mips_pic_call_symbol_from_set (defs->ref, reg); + other = mips_pic_call_symbol_from_set (defs->ref, reg, recurse_p); if (!rtx_equal_p (symbol, other)) return NULL_RTX; } @@ -14178,7 +14205,7 @@ mips_annotate_pic_calls (void) if (!REG_P (reg)) continue; - symbol = mips_find_pic_call_symbol (insn, reg); + symbol = mips_find_pic_call_symbol (insn, reg, true); if (symbol) { mips_annotate_pic_call_expr (call, symbol); diff --git a/gcc/config/rx/rx.h b/gcc/config/rx/rx.h index 215f394fe3e..217789fcea7 100644 --- a/gcc/config/rx/rx.h +++ b/gcc/config/rx/rx.h @@ -168,7 +168,7 @@ extern enum rx_cpu_types rx_cpu_type; #define HANDLE_PRAGMA_PACK_PUSH_POP 1 -#define HAVE_PRE_DECCREMENT 1 +#define HAVE_PRE_DECREMENT 1 #define HAVE_POST_INCREMENT 1 #define MOVE_RATIO(SPEED) ((SPEED) ? 4 : 2) diff --git a/gcc/config/sh/sh.c b/gcc/config/sh/sh.c index f06f4fd83d0..31a8ad645af 100644 --- a/gcc/config/sh/sh.c +++ b/gcc/config/sh/sh.c @@ -1945,7 +1945,10 @@ expand_cbranchdi4 (rtx *operands, enum rtx_code comparison) else if (op2h != CONST0_RTX (SImode)) msw_taken = LTU; else - break; + { + msw_skip = swap_condition (LTU); + break; + } msw_skip = swap_condition (msw_taken); } break; @@ -1998,6 +2001,13 @@ expand_cbranchdi4 (rtx *operands, enum rtx_code comparison) { operands[1] = op1h; operands[2] = op2h; + if (reload_completed + && ! arith_reg_or_0_operand (op2h, SImode) + && (true_regnum (op1h) || (comparison != EQ && comparison != NE))) + { + emit_move_insn (scratch, operands[2]); + operands[2] = scratch; + } } operands[3] = skip_label = gen_label_rtx (); diff --git a/gcc/config/sparc/sparc-protos.h b/gcc/config/sparc/sparc-protos.h index fc6a990dc26..88aca6fa431 100644 --- a/gcc/config/sparc/sparc-protos.h +++ b/gcc/config/sparc/sparc-protos.h @@ -48,6 +48,7 @@ extern bool sparc_can_use_return_insn_p (void); extern int check_pic (int); extern int short_branch (int, int); extern void sparc_profile_hook (int); +extern void sparc_optimization_options (int, int); extern void sparc_override_options (void); extern void sparc_output_scratch_registers (FILE *); diff --git a/gcc/config/sparc/sparc.c b/gcc/config/sparc/sparc.c index 33b0234a332..f812e3cc1b5 100644 --- a/gcc/config/sparc/sparc.c +++ b/gcc/config/sparc/sparc.c @@ -627,6 +627,17 @@ sparc_handle_option (size_t code, const char *arg, int value ATTRIBUTE_UNUSED) return true; } +/* Specify default optimizations. */ + +void +sparc_optimization_options (int l ATTRIBUTE_UNUSED, int s ATTRIBUTE_UNUSED) +{ + /* Disable save slot sharing for call-clobbered registers by default. + The IRA sharing algorithm works on single registers only and this + pessimizes for double floating-point registers. */ + flag_ira_share_save_slots = 0; +} + /* Validate and override various options, and do some machine dependent initialization. */ @@ -2767,11 +2778,6 @@ eligible_for_return_delay (rtx trial) if (get_attr_length (trial) != 1) return 0; - /* If there are any call-saved registers, we should scan TRIAL if it - does not reference them. For now just make it easy. */ - if (num_gfregs) - return 0; - /* If the function uses __builtin_eh_return, the eh_return machinery occupies the delay slot. */ if (crtl->calls_eh_return) @@ -4093,7 +4099,7 @@ save_or_restore_regs (int low, int high, rtx base, int offset, int action) emit_move_insn (gen_rtx_REG (mode, regno), mem); /* Always preserve double-word alignment. */ - offset = (offset + 7) & -8; + offset = (offset + 8) & -8; } } @@ -4200,7 +4206,7 @@ sparc_expand_prologue (void) example, the regrename pass has special provisions to not rename to non-leaf registers in a leaf function. */ sparc_leaf_function_p - = optimize > 0 && leaf_function_p () && only_leaf_regs_used (); + = optimize > 0 && current_function_is_leaf && only_leaf_regs_used (); /* Need to use actual_fsize, since we are also allocating space for our callee (and our own register save area). */ @@ -4324,6 +4330,7 @@ bool sparc_can_use_return_insn_p (void) { return sparc_prologue_data_valid_p + && num_gfregs == 0 && (actual_fsize == 0 || !sparc_leaf_function_p); } @@ -4425,18 +4432,20 @@ output_return (rtx insn) machinery occupies the delay slot. */ gcc_assert (! final_sequence); - if (! flag_delayed_branch) - fputs ("\tadd\t%fp, %g1, %fp\n", asm_out_file); - - if (TARGET_V9) - fputs ("\treturn\t%i7+8\n", asm_out_file); - else - fputs ("\trestore\n\tjmp\t%o7+8\n", asm_out_file); + if (flag_delayed_branch) + { + if (TARGET_V9) + fputs ("\treturn\t%i7+8\n", asm_out_file); + else + fputs ("\trestore\n\tjmp\t%o7+8\n", asm_out_file); - if (flag_delayed_branch) - fputs ("\t add\t%sp, %g1, %sp\n", asm_out_file); + fputs ("\t add\t%sp, %g1, %sp\n", asm_out_file); + } else - fputs ("\t nop\n", asm_out_file); + { + fputs ("\trestore\n\tadd\t%sp, %g1, %sp\n", asm_out_file); + fputs ("\tjmp\t%o7+8\n\t nop\n", asm_out_file); + } } else if (final_sequence) { @@ -9267,7 +9276,7 @@ sparc_expand_compare_and_swap_12 (rtx result, rtx mem, rtx oldval, rtx newval) bool sparc_frame_pointer_required (void) { - return !(leaf_function_p () && only_leaf_regs_used ()); + return !(current_function_is_leaf && only_leaf_regs_used ()); } /* The way this is structured, we can't eliminate SFP in favor of SP diff --git a/gcc/config/sparc/sparc.h b/gcc/config/sparc/sparc.h index 470ca9f4efd..488a3e6c23e 100644 --- a/gcc/config/sparc/sparc.h +++ b/gcc/config/sparc/sparc.h @@ -501,7 +501,8 @@ extern enum cmodel sparc_cmodel; #define CAN_DEBUG_WITHOUT_FP /* Option handling. */ - +#define OPTIMIZATION_OPTIONS(LEVEL, SIZE) \ + sparc_optimization_options ((LEVEL), (SIZE)) #define OVERRIDE_OPTIONS sparc_override_options () /* Mask of all CPU selection flags. */ diff --git a/gcc/config/sparc/sparc.md b/gcc/config/sparc/sparc.md index 5c2db0399eb..1b4fd8d3dcb 100644 --- a/gcc/config/sparc/sparc.md +++ b/gcc/config/sparc/sparc.md @@ -6491,8 +6491,8 @@ (const_int 4)))]) ;; For __builtin_setjmp we need to flush register windows iff the function -;; calls alloca as well, because otherwise the register window might be -;; saved after %sp adjustment and thus setjmp would crash +;; calls alloca as well, because otherwise the current register window might +;; be saved after the %sp adjustment and thus setjmp would crash. (define_expand "builtin_setjmp_setup" [(match_operand 0 "register_operand" "r")] "" @@ -6531,19 +6531,26 @@ (eq_attr "pic" "true") (const_int 4)] (const_int 3)))]) -;; Pattern for use after a setjmp to store FP and the return register -;; into the stack area. +;; Pattern for use after a setjmp to store registers into the save area. (define_expand "setjmp" [(const_int 0)] "" { rtx mem; - + + if (flag_pic) + { + mem = gen_rtx_MEM (Pmode, + plus_constant (stack_pointer_rtx, + SPARC_STACK_BIAS + 7 * UNITS_PER_WORD)); + emit_insn (gen_rtx_SET (VOIDmode, mem, pic_offset_table_rtx)); + } + mem = gen_rtx_MEM (Pmode, plus_constant (stack_pointer_rtx, SPARC_STACK_BIAS + 14 * UNITS_PER_WORD)); - emit_insn (gen_rtx_SET (VOIDmode, mem, frame_pointer_rtx)); + emit_insn (gen_rtx_SET (VOIDmode, mem, hard_frame_pointer_rtx)); mem = gen_rtx_MEM (Pmode, plus_constant (stack_pointer_rtx, diff --git a/gcc/cp/ChangeLog b/gcc/cp/ChangeLog index cf008b23b29..01332373aca 100644 --- a/gcc/cp/ChangeLog +++ b/gcc/cp/ChangeLog @@ -1,3 +1,16 @@ +2011-05-31 Duncan Sands <baldrick@free.fr> + + Backported from 4.6 branch + 2011-03-09 Martin Jambor <mjambor@suse.cz> + + PR tree-optimization/47714 + * method.c (use_thunk): Clear addressable flag of thunk arguments. + +2011-05-20 Jason Merrill <jason@redhat.com> + + PR c++/48873 + * tree.c (stabilize_expr): Don't make gratuitous copies of classes. + 2011-05-09 Jason Merrill <jason@redhat.com> PR c++/48936 diff --git a/gcc/cp/method.c b/gcc/cp/method.c index 5ed98bc93bd..088c9e7c1db 100644 --- a/gcc/cp/method.c +++ b/gcc/cp/method.c @@ -374,6 +374,7 @@ use_thunk (tree thunk_fndecl, bool emit_p) DECL_CONTEXT (x) = thunk_fndecl; SET_DECL_RTL (x, NULL_RTX); DECL_HAS_VALUE_EXPR_P (x) = 0; + TREE_ADDRESSABLE (x) = 0; t = x; } a = nreverse (t); diff --git a/gcc/cp/tree.c b/gcc/cp/tree.c index 994056d9ca3..c80fa995634 100644 --- a/gcc/cp/tree.c +++ b/gcc/cp/tree.c @@ -2954,7 +2954,8 @@ stabilize_expr (tree exp, tree* initp) if (!TREE_SIDE_EFFECTS (exp)) init_expr = NULL_TREE; else if (!real_lvalue_p (exp) - || !TYPE_NEEDS_CONSTRUCTING (TREE_TYPE (exp))) + || (!TYPE_NEEDS_CONSTRUCTING (TREE_TYPE (exp)) + && !TYPE_HAS_NONTRIVIAL_DESTRUCTOR (TREE_TYPE (exp)))) { init_expr = get_target_expr (exp); exp = TARGET_EXPR_SLOT (init_expr); diff --git a/gcc/fortran/ChangeLog b/gcc/fortran/ChangeLog index 9ea57f4008e..4a9037b08bf 100644 --- a/gcc/fortran/ChangeLog +++ b/gcc/fortran/ChangeLog @@ -1,3 +1,13 @@ +2011-06-02 Thomas Koenig <tkoenig@gcc.gnu.org> + + Backport from trunk + PR fortran/45786 + * interface.c (gfc_equivalent_op): New function. + (gfc_check_interface): Use gfc_equivalent_op instead + of switch statement. + * decl.c (access_attr_decl): Also set access to an + equivalent operator. + 2011-04-28 Release Manager * GCC 4.5.3 released. diff --git a/gcc/fortran/decl.c b/gcc/fortran/decl.c index 692078a11d4..19fdede27c2 100644 --- a/gcc/fortran/decl.c +++ b/gcc/fortran/decl.c @@ -6062,8 +6062,19 @@ access_attr_decl (gfc_statement st) case INTERFACE_INTRINSIC_OP: if (gfc_current_ns->operator_access[op] == ACCESS_UNKNOWN) { + gfc_intrinsic_op other_op; + gfc_current_ns->operator_access[op] = (st == ST_PUBLIC) ? ACCESS_PUBLIC : ACCESS_PRIVATE; + + /* Handle the case if there is another op with the same + function, for INTRINSIC_EQ vs. INTRINSIC_EQ_OS and so on. */ + other_op = gfc_equivalent_op (op); + + if (other_op != INTRINSIC_NONE) + gfc_current_ns->operator_access[other_op] = + (st == ST_PUBLIC) ? ACCESS_PUBLIC : ACCESS_PRIVATE; + } else { diff --git a/gcc/fortran/gfortran.h b/gcc/fortran/gfortran.h index 3fcc5ccba7c..64d1de9ae43 100644 --- a/gcc/fortran/gfortran.h +++ b/gcc/fortran/gfortran.h @@ -2718,6 +2718,7 @@ void gfc_set_current_interface_head (gfc_interface *); gfc_symtree* gfc_find_sym_in_symtree (gfc_symbol*); bool gfc_arglist_matches_symbol (gfc_actual_arglist**, gfc_symbol*); bool gfc_check_operator_interface (gfc_symbol*, gfc_intrinsic_op, locus); +gfc_intrinsic_op gfc_equivalent_op (gfc_intrinsic_op); /* io.c */ extern gfc_st_label format_asterisk; diff --git a/gcc/fortran/interface.c b/gcc/fortran/interface.c index d26edd68c56..ff081b4e6a3 100644 --- a/gcc/fortran/interface.c +++ b/gcc/fortran/interface.c @@ -1213,6 +1213,54 @@ check_uop_interfaces (gfc_user_op *uop) } } +/* Given an intrinsic op, return an equivalent op if one exists, + or INTRINSIC_NONE otherwise. */ + +gfc_intrinsic_op +gfc_equivalent_op (gfc_intrinsic_op op) +{ + switch(op) + { + case INTRINSIC_EQ: + return INTRINSIC_EQ_OS; + + case INTRINSIC_EQ_OS: + return INTRINSIC_EQ; + + case INTRINSIC_NE: + return INTRINSIC_NE_OS; + + case INTRINSIC_NE_OS: + return INTRINSIC_NE; + + case INTRINSIC_GT: + return INTRINSIC_GT_OS; + + case INTRINSIC_GT_OS: + return INTRINSIC_GT; + + case INTRINSIC_GE: + return INTRINSIC_GE_OS; + + case INTRINSIC_GE_OS: + return INTRINSIC_GE; + + case INTRINSIC_LT: + return INTRINSIC_LT_OS; + + case INTRINSIC_LT_OS: + return INTRINSIC_LT; + + case INTRINSIC_LE: + return INTRINSIC_LE_OS; + + case INTRINSIC_LE_OS: + return INTRINSIC_LE; + + default: + return INTRINSIC_NONE; + } +} /* For the namespace, check generic, user operator and intrinsic operator interfaces for consistency and to remove duplicate @@ -1253,75 +1301,19 @@ gfc_check_interfaces (gfc_namespace *ns) for (ns2 = ns; ns2; ns2 = ns2->parent) { + gfc_intrinsic_op other_op; + if (check_interface1 (ns->op[i], ns2->op[i], 0, interface_name, true)) goto done; - switch (i) - { - case INTRINSIC_EQ: - if (check_interface1 (ns->op[i], ns2->op[INTRINSIC_EQ_OS], - 0, interface_name, true)) goto done; - break; - - case INTRINSIC_EQ_OS: - if (check_interface1 (ns->op[i], ns2->op[INTRINSIC_EQ], - 0, interface_name, true)) goto done; - break; - - case INTRINSIC_NE: - if (check_interface1 (ns->op[i], ns2->op[INTRINSIC_NE_OS], - 0, interface_name, true)) goto done; - break; - - case INTRINSIC_NE_OS: - if (check_interface1 (ns->op[i], ns2->op[INTRINSIC_NE], - 0, interface_name, true)) goto done; - break; - - case INTRINSIC_GT: - if (check_interface1 (ns->op[i], ns2->op[INTRINSIC_GT_OS], - 0, interface_name, true)) goto done; - break; - - case INTRINSIC_GT_OS: - if (check_interface1 (ns->op[i], ns2->op[INTRINSIC_GT], - 0, interface_name, true)) goto done; - break; - - case INTRINSIC_GE: - if (check_interface1 (ns->op[i], ns2->op[INTRINSIC_GE_OS], - 0, interface_name, true)) goto done; - break; - - case INTRINSIC_GE_OS: - if (check_interface1 (ns->op[i], ns2->op[INTRINSIC_GE], - 0, interface_name, true)) goto done; - break; - - case INTRINSIC_LT: - if (check_interface1 (ns->op[i], ns2->op[INTRINSIC_LT_OS], - 0, interface_name, true)) goto done; - break; - - case INTRINSIC_LT_OS: - if (check_interface1 (ns->op[i], ns2->op[INTRINSIC_LT], - 0, interface_name, true)) goto done; - break; - - case INTRINSIC_LE: - if (check_interface1 (ns->op[i], ns2->op[INTRINSIC_LE_OS], - 0, interface_name, true)) goto done; - break; - - case INTRINSIC_LE_OS: - if (check_interface1 (ns->op[i], ns2->op[INTRINSIC_LE], - 0, interface_name, true)) goto done; - break; - - default: - break; - } + /* i should be gfc_intrinsic_op, but has to be int with this cast + here for stupid C++ compatibility rules. */ + other_op = gfc_equivalent_op ((gfc_intrinsic_op) i); + if (other_op != INTRINSIC_NONE + && check_interface1 (ns->op[i], ns2->op[other_op], + 0, interface_name, true)) + goto done; } } diff --git a/gcc/testsuite/ChangeLog b/gcc/testsuite/ChangeLog index 681b53cdb40..78cce68b97e 100644 --- a/gcc/testsuite/ChangeLog +++ b/gcc/testsuite/ChangeLog @@ -1,3 +1,52 @@ +2011-06-08 Kaz Kojima <kkojima@gcc.gnu.org> + + Backport from mainline + 2011-06-01 Kaz Kojima <kkojima@gcc.gnu.org> + + PR target/49238 + * gcc.c-torture/compile/pr49238.c: New. + +2011-06-05 Kaz Kojima <kkojima@gcc.gnu.org> + + Backport from mainline + 2011-05-30 Kaz Kojima <kkojima@gcc.gnu.org> + + PR target/49186 + * gcc.c-torture/execute/pr49186.c: New. + +2011-06-04 Ira Rosen <ira.rosen@linaro.org> + + PR tree-optimization/49038 + * gcc.dg/vect/vect-strided-u8-i8-gap4-unknown.c: New test. + * gcc.dg/vect/pr49038.c: New test. + +2011-06-02 Thomas Koenig <tkoenig@gcc.gnu.org> + + Backport from trunk + PR fortran/45786 + * gfortran.dg/operator_7.f90: New test case. + +2011-05-31 Duncan Sands <baldrick@free.fr> + + Backported from 4.6 branch + 2011-03-09 Martin Jambor <mjambor@suse.cz> + + PR tree-optimization/47714 + * g++.dg/torture/pr47714.C: New test. + +2011-05-29 Richard Sandiford <rdsandiford@googlemail.com> + + * gcc.target/mips/reg-var-1.c: New test. + +2011-05-25 Uros Bizjak <ubizjak@gmail.com> + + PR target/49133 + * g++.dg/other/pr49133.C: New test. + +2011-05-20 Jason Merrill <jason@redhat.com> + + * g++.dg/init/new32.C: New. + 2011-05-19 Rainer Orth <ro@CeBiTec.Uni-Bielefeld.DE> * gcc.c-torture/execute/960321-1.x: Remove. diff --git a/gcc/testsuite/g++.dg/init/new32.C b/gcc/testsuite/g++.dg/init/new32.C new file mode 100644 index 00000000000..f82785749ce --- /dev/null +++ b/gcc/testsuite/g++.dg/init/new32.C @@ -0,0 +1,16 @@ +// PR c++/48873 + +#include <new> + +struct D { +private: + ~D(); +}; + +template<class T> +T& create(); + +void f() +{ + D* dp = new (((void*) 0)) D(create<D>()); // # +} diff --git a/gcc/testsuite/g++.dg/other/pr49133.C b/gcc/testsuite/g++.dg/other/pr49133.C new file mode 100644 index 00000000000..51d8475455e --- /dev/null +++ b/gcc/testsuite/g++.dg/other/pr49133.C @@ -0,0 +1,37 @@ +/* { dg-do run { target i?86-*-* x86_64-*-* } } */ +/* { dg-options "-O2 -msse2" } */ +/* { dg-require-effective-target sse2 } */ +/* { dg-require-effective-target sse2_runtime } */ + +#include <xmmintrin.h> + +extern "C" void abort (); + +typedef double double_a __attribute__((__may_alias__)); + +struct V +{ + __m128d data; +}; + +int +main() +{ + V a; + __m128d b; + + b = _mm_set_pd (1., 0.); + a.data = _mm_set_pd (1., 0.); + a.data = _mm_add_pd (a.data, + _mm_and_pd (_mm_cmpeq_pd (a.data, _mm_set1_pd (0.)), + _mm_set1_pd (2.))); + reinterpret_cast<double_a *>(&a.data)[1] += 1.; + b = _mm_add_pd (b, _mm_and_pd (_mm_cmpeq_pd (b, _mm_set1_pd (0.)), + _mm_set1_pd (1.))); + b = _mm_add_pd (b, _mm_and_pd (_mm_cmpeq_pd (b, _mm_set1_pd (1.)), + _mm_set1_pd (1.))); + if (_mm_movemask_pd (_mm_cmpeq_pd (a.data, b)) != 0x3) + abort(); + + return 0; +} diff --git a/gcc/testsuite/g++.dg/torture/pr47714.C b/gcc/testsuite/g++.dg/torture/pr47714.C new file mode 100644 index 00000000000..4ff2eeef0a4 --- /dev/null +++ b/gcc/testsuite/g++.dg/torture/pr47714.C @@ -0,0 +1,16 @@ +struct A { virtual ~A () {} }; +struct B { virtual ~B () {} }; +struct C { virtual const A *foo (int) const = 0; }; +struct E : public B, public A { }; +struct F : public C +{ + virtual const E *foo (int) const; +}; +void bar (int &); + +const E * +F::foo (int x) const +{ + bar (x); + return __null; +} diff --git a/gcc/testsuite/gcc.c-torture/compile/pr49238.c b/gcc/testsuite/gcc.c-torture/compile/pr49238.c new file mode 100644 index 00000000000..fd8443a8f08 --- /dev/null +++ b/gcc/testsuite/gcc.c-torture/compile/pr49238.c @@ -0,0 +1,18 @@ +/* PR target/49238 */ +extern int bar (void); + +void +foo (unsigned long long a, int b) +{ + int i; + + if (b) + for (a = -12; a >= 10; a = bar ()) + break; + else + return; + + for (i = 0; i < 10; i += 10) + if ((i == bar ()) | (bar () >= a)) + bar (); +} diff --git a/gcc/testsuite/gcc.c-torture/execute/pr49186.c b/gcc/testsuite/gcc.c-torture/execute/pr49186.c new file mode 100644 index 00000000000..743815b9802 --- /dev/null +++ b/gcc/testsuite/gcc.c-torture/execute/pr49186.c @@ -0,0 +1,15 @@ +/* PR target/49186 */ +extern void abort (void); + +int +main () +{ + int x; + unsigned long long uv = 0x1000000001ULL; + + x = (uv < 0x80) ? 1 : ((uv < 0x800) ? 2 : 3); + if (x != 3) + abort (); + + return 0; +} diff --git a/gcc/testsuite/gcc.dg/vect/pr49038.c b/gcc/testsuite/gcc.dg/vect/pr49038.c new file mode 100644 index 00000000000..91c214ffd4c --- /dev/null +++ b/gcc/testsuite/gcc.dg/vect/pr49038.c @@ -0,0 +1,42 @@ +#include <sys/mman.h> +#include <stdio.h> + +#define COUNT 320 +#define MMAP_SIZE 0x10000 +#define ADDRESS 0x1122000000 +#define TYPE unsigned short + +#ifndef MAP_ANONYMOUS +#define MAP_ANONYMOUS MAP_ANON +#endif + +void __attribute__((noinline)) +foo (TYPE *__restrict a, TYPE *__restrict b) +{ + int n; + + for (n = 0; n < COUNT; n++) + a[n] = b[n * 2]; +} + +int +main (void) +{ + void *x; + size_t b_offset; + + x = mmap ((void *) ADDRESS, MMAP_SIZE, PROT_READ | PROT_WRITE, + MAP_PRIVATE | MAP_ANONYMOUS, -1, 0); + if (x == MAP_FAILED) + { + perror ("mmap"); + return 1; + } + + b_offset = MMAP_SIZE - (2 * COUNT - 1) * sizeof (TYPE); + foo ((unsigned short *) x, + (unsigned short *) ((char *) x + b_offset)); + return 0; +} + +/* { dg-final { cleanup-tree-dump "vect" } } */ diff --git a/gcc/testsuite/gcc.dg/vect/vect-strided-u8-i8-gap4-unknown.c b/gcc/testsuite/gcc.dg/vect/vect-strided-u8-i8-gap4-unknown.c new file mode 100644 index 00000000000..45066a053a2 --- /dev/null +++ b/gcc/testsuite/gcc.dg/vect/vect-strided-u8-i8-gap4-unknown.c @@ -0,0 +1,116 @@ +/* { dg-require-effective-target vect_int } */ + +#include <stdarg.h> +#include <stdio.h> +#include "tree-vect.h" + +#define N 160 + +typedef struct { + unsigned char a; + unsigned char b; + unsigned char c; + unsigned char d; + unsigned char e; + unsigned char f; + unsigned char g; + unsigned char h; +} s; + +__attribute__ ((noinline)) int +main1 (s *arr, int n) +{ + int i; + s *ptr = arr; + s res[N]; + unsigned char x; + + for (i = 0; i < N; i++) + { + res[i].a = 0; + res[i].b = 0; + res[i].c = 0; + res[i].d = 0; + res[i].e = 0; + res[i].f = 0; + res[i].g = 0; + res[i].h = 0; + __asm__ volatile (""); + } + + /* Check peeling for gaps for unknown loop bound. */ + for (i = 0; i < n; i++) + { + res[i].c = ptr->b + ptr->c; + x = ptr->c + ptr->f; + res[i].a = x + ptr->b; + res[i].d = ptr->b + ptr->c; + res[i].b = ptr->c; + res[i].f = ptr->f + ptr->e; + res[i].e = ptr->b + ptr->e; + res[i].h = ptr->c; + res[i].g = ptr->b + ptr->c; + ptr++; + } + + /* check results: */ + for (i = 0; i < n; i++) + { + if (res[i].c != arr[i].b + arr[i].c + || res[i].a != arr[i].c + arr[i].f + arr[i].b + || res[i].d != arr[i].b + arr[i].c + || res[i].b != arr[i].c + || res[i].f != arr[i].f + arr[i].e + || res[i].e != arr[i].b + arr[i].e + || res[i].h != arr[i].c + || res[i].g != arr[i].b + arr[i].c) + abort (); + } + + /* Check also that we don't do more iterations than needed. */ + for (i = n; i < N; i++) + { + if (res[i].c == arr[i].b + arr[i].c + || res[i].a == arr[i].c + arr[i].f + arr[i].b + || res[i].d == arr[i].b + arr[i].c + || res[i].b == arr[i].c + || res[i].f == arr[i].f + arr[i].e + || res[i].e == arr[i].b + arr[i].e + || res[i].h == arr[i].c + || res[i].g == arr[i].b + arr[i].c) + abort (); + } + + return 0; +} + + +int main (void) +{ + int i; + s arr[N]; + + check_vect (); + + for (i = 0; i < N; i++) + { + arr[i].a = 5; + arr[i].b = 6; + arr[i].c = 17; + arr[i].d = 3; + arr[i].e = 16; + arr[i].f = 16; + arr[i].g = 3; + arr[i].h = 56; + if (arr[i].a == 178) + abort(); + } + + main1 (arr, N-2); + + return 0; +} + +/* { dg-final { scan-tree-dump-times "vectorized 1 loops" 1 "vect" { target { vect_interleave && vect_extract_even_odd } } } } */ +/* { dg-final { cleanup-tree-dump "vect" } } */ + diff --git a/gcc/testsuite/gcc.target/mips/reg-var-1.c b/gcc/testsuite/gcc.target/mips/reg-var-1.c new file mode 100644 index 00000000000..d8b8118103a --- /dev/null +++ b/gcc/testsuite/gcc.target/mips/reg-var-1.c @@ -0,0 +1,16 @@ +/* { dg-do run } */ +register int g asm ("$18"); + +void __attribute__((noinline)) +test (void) +{ + g = g + 1; +} + +int +main (void) +{ + g = 2; + test (); + return g != 3; +} diff --git a/gcc/testsuite/gfortran.dg/operator_7.f90 b/gcc/testsuite/gfortran.dg/operator_7.f90 new file mode 100644 index 00000000000..66d8dd187ee --- /dev/null +++ b/gcc/testsuite/gfortran.dg/operator_7.f90 @@ -0,0 +1,27 @@ +! { dg-do compile } +! PR fortran/45786 - operators were not correctly marked as public +! if the alternative form was used. +! Test case contributed by Neil Carlson. +module foo_type + private + public :: foo, operator(==) + type :: foo + integer :: bar + end type + interface operator(.eq.) + module procedure eq_foo + end interface +contains + logical function eq_foo (a, b) + type(foo), intent(in) :: a, b + eq_foo = (a%bar == b%bar) + end function +end module + + subroutine use_it (a, b) + use foo_type + type(foo) :: a, b + print *, a == b +end subroutine + +! { dg-final { cleanup-modules "foo_type" } } diff --git a/gcc/tree-vect-data-refs.c b/gcc/tree-vect-data-refs.c index a4d6a5f9bc4..2e0ddb04eae 100644 --- a/gcc/tree-vect-data-refs.c +++ b/gcc/tree-vect-data-refs.c @@ -1450,7 +1450,7 @@ vect_analyze_group_access (struct data_reference *dr) loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info); bb_vec_info bb_vinfo = STMT_VINFO_BB_VINFO (stmt_info); HOST_WIDE_INT dr_step = TREE_INT_CST_LOW (step); - HOST_WIDE_INT stride; + HOST_WIDE_INT stride, last_accessed_element = 1; bool slp_impossible = false; /* For interleaving, STRIDE is STEP counted in elements, i.e., the size of the @@ -1479,6 +1479,16 @@ vect_analyze_group_access (struct data_reference *dr) fprintf (vect_dump, " step "); print_generic_expr (vect_dump, step, TDF_SLIM); } + + if (loop_vinfo) + { + LOOP_VINFO_PEELING_FOR_GAPS (loop_vinfo) = true; + + if (vect_print_dump_info (REPORT_DETAILS)) + fprintf (vect_dump, "Data access with gaps requires scalar " + "epilogue loop"); + } + return true; } if (vect_print_dump_info (REPORT_DETAILS)) @@ -1531,6 +1541,7 @@ vect_analyze_group_access (struct data_reference *dr) next = DR_GROUP_NEXT_DR (vinfo_for_stmt (next)); continue; } + prev = next; /* Check that all the accesses have the same STEP. */ @@ -1561,6 +1572,8 @@ vect_analyze_group_access (struct data_reference *dr) gaps += diff - 1; } + last_accessed_element += diff; + /* Store the gap from the previous member of the group. If there is no gap in the access, DR_GROUP_GAP is always 1. */ DR_GROUP_GAP (vinfo_for_stmt (next)) = diff; @@ -1652,6 +1665,15 @@ vect_analyze_group_access (struct data_reference *dr) VEC_safe_push (gimple, heap, BB_VINFO_STRIDED_STORES (bb_vinfo), stmt); } + + /* There is a gap in the end of the group. */ + if (stride - last_accessed_element > 0 && loop_vinfo) + { + LOOP_VINFO_PEELING_FOR_GAPS (loop_vinfo) = true; + if (vect_print_dump_info (REPORT_DETAILS)) + fprintf (vect_dump, "Data access with gaps requires scalar " + "epilogue loop"); + } } return true; diff --git a/gcc/tree-vect-loop-manip.c b/gcc/tree-vect-loop-manip.c index f4056b05891..568e640f601 100644 --- a/gcc/tree-vect-loop-manip.c +++ b/gcc/tree-vect-loop-manip.c @@ -1516,7 +1516,7 @@ vect_generate_tmps_on_preheader (loop_vec_info loop_vinfo, edge pe; basic_block new_bb; gimple_seq stmts; - tree ni_name; + tree ni_name, ni_minus_gap_name; tree var; tree ratio_name; tree ratio_mult_vf_name; @@ -1533,9 +1533,39 @@ vect_generate_tmps_on_preheader (loop_vec_info loop_vinfo, ni_name = vect_build_loop_niters (loop_vinfo, cond_expr_stmt_list); log_vf = build_int_cst (TREE_TYPE (ni), exact_log2 (vf)); + /* If epilogue loop is required because of data accesses with gaps, we + subtract one iteration from the total number of iterations here for + correct calculation of RATIO. */ + if (LOOP_VINFO_PEELING_FOR_GAPS (loop_vinfo)) + { + ni_minus_gap_name = fold_build2 (MINUS_EXPR, TREE_TYPE (ni_name), + ni_name, + build_one_cst (TREE_TYPE (ni_name))); + if (!is_gimple_val (ni_minus_gap_name)) + { + var = create_tmp_var (TREE_TYPE (ni), "ni_gap"); + add_referenced_var (var); + + stmts = NULL; + ni_minus_gap_name = force_gimple_operand (ni_minus_gap_name, &stmts, + true, var); + if (cond_expr_stmt_list) + gimple_seq_add_seq (&cond_expr_stmt_list, stmts); + else + { + pe = loop_preheader_edge (loop); + new_bb = gsi_insert_seq_on_edge_immediate (pe, stmts); + gcc_assert (!new_bb); + } + } + } + else + ni_minus_gap_name = ni_name; + /* Create: ratio = ni >> log2(vf) */ - ratio_name = fold_build2 (RSHIFT_EXPR, TREE_TYPE (ni_name), ni_name, log_vf); + ratio_name = fold_build2 (RSHIFT_EXPR, TREE_TYPE (ni_minus_gap_name), + ni_minus_gap_name, log_vf); if (!is_gimple_val (ratio_name)) { var = create_tmp_var (TREE_TYPE (ni), "bnd"); diff --git a/gcc/tree-vect-loop.c b/gcc/tree-vect-loop.c index c8ad3a63154..ad5d5d37821 100644 --- a/gcc/tree-vect-loop.c +++ b/gcc/tree-vect-loop.c @@ -711,6 +711,7 @@ new_loop_vec_info (struct loop *loop) LOOP_VINFO_STRIDED_STORES (res) = VEC_alloc (gimple, heap, 10); LOOP_VINFO_SLP_INSTANCES (res) = VEC_alloc (slp_instance, heap, 10); LOOP_VINFO_SLP_UNROLLING_FACTOR (res) = 1; + LOOP_VINFO_PEELING_FOR_GAPS (res) = false; return res; } @@ -2053,6 +2054,10 @@ vect_estimate_min_profitable_iters (loop_vec_info loop_vinfo) peel_iters_prologue = niters < peel_iters_prologue ? niters : peel_iters_prologue; peel_iters_epilogue = (niters - peel_iters_prologue) % vf; + /* If we need to peel for gaps, but no peeling is required, we have + to peel VF iterations. */ + if (LOOP_VINFO_PEELING_FOR_GAPS (loop_vinfo) && !peel_iters_epilogue) + peel_iters_epilogue = vf; } } @@ -4212,7 +4217,8 @@ vect_transform_loop (loop_vec_info loop_vinfo) do_peeling_for_loop_bound = (!LOOP_VINFO_NITERS_KNOWN_P (loop_vinfo) || (LOOP_VINFO_NITERS_KNOWN_P (loop_vinfo) - && LOOP_VINFO_INT_NITERS (loop_vinfo) % vectorization_factor != 0)); + && LOOP_VINFO_INT_NITERS (loop_vinfo) % vectorization_factor != 0) + || LOOP_VINFO_PEELING_FOR_GAPS (loop_vinfo)); if (LOOP_REQUIRES_VERSIONING_FOR_ALIGNMENT (loop_vinfo) || LOOP_REQUIRES_VERSIONING_FOR_ALIAS (loop_vinfo)) diff --git a/gcc/tree-vectorizer.h b/gcc/tree-vectorizer.h index c3ff0584025..bba771e565f 100644 --- a/gcc/tree-vectorizer.h +++ b/gcc/tree-vectorizer.h @@ -242,6 +242,12 @@ typedef struct _loop_vec_info { /* The unrolling factor needed to SLP the loop. In case of that pure SLP is applied to the loop, i.e., no unrolling is needed, this is 1. */ unsigned slp_unrolling_factor; + + /* When we have strided data accesses with gaps, we may introduce invalid + memory accesses. We peel the last iteration of the loop to prevent + this. */ + bool peeling_for_gaps; + } *loop_vec_info; /* Access Functions. */ @@ -266,6 +272,7 @@ typedef struct _loop_vec_info { #define LOOP_VINFO_STRIDED_STORES(L) (L)->strided_stores #define LOOP_VINFO_SLP_INSTANCES(L) (L)->slp_instances #define LOOP_VINFO_SLP_UNROLLING_FACTOR(L) (L)->slp_unrolling_factor +#define LOOP_VINFO_PEELING_FOR_GAPS(L) (L)->peeling_for_gaps #define LOOP_REQUIRES_VERSIONING_FOR_ALIGNMENT(L) \ VEC_length (gimple, (L)->may_misalign_stmts) > 0 |