diff options
author | Vladimir Makarov <vmakarov@redhat.com> | 2006-05-31 16:41:12 +0000 |
---|---|---|
committer | Vladimir Makarov <vmakarov@redhat.com> | 2006-05-31 16:41:12 +0000 |
commit | 9226cd47b4fe0d8d5832cce30364c4f7303003f9 (patch) | |
tree | f0c2599086f15c5eb9477d8a5e8001d9902855d0 | |
parent | cd8af4a4513cb7e87226de7b464f339d62213fe7 (diff) |
2006-05-31 Vladimir Makarov <vmakarov@redhat.com>,
Richard Henderson <rth@redhat.com>
* cgraph.c (cgraph_create_node): Initialize function_used_regs.
* cgraph.h (cgraph_node): Add new member function_used_regs.
* regrename.c (copyprop_hardreg_forward_1): Use
get_call_invalidated_used_regs.
* postreload-gcse.c (record_opr_changes,
reg_set_between_after_reload_p, reg_used_between_after_reload_p):
Ditto.
* postreload.c (reload_combine, reload_cse_move2add): Ditto.
* rtlanal.c (reg_set_p): Ditto.
* flow.c (propagate_one_insn): Ditto.
* df-scan.c (df_insn_refs_record): Ditto.
* caller-save.c (save_call_clobbered_regs): Ditto.
* gcse.c (compute_hash_table_work, compute_store_table): Ditto.
* cselib.c (cselib_process_insn): Ditto.
* loop-iv.c (simplify_using_assignment): Ditto.
* sched-deps.c (sched_analyze): Ditto.
* combine.c (record_dead_and_set_regs): Ditto.
* resource.c (mark_set_resources, mark_target_live_regs): Ditto.
* var-tracking.c (compute_bb_dataflow, emit_notes_in_bb): Ditto.
* cse.c (invalidate_for_call): Ditto. Add parameter.
(cse_insn, invalidate_skipped_block): Pass the insn for
invalidate_for_call.
* tree-pass.h (pass_lower_subreg): New external definition.
* final.c (rest_of_handle_final): Set up call_used_regs for the
cgraph node.
* toplev.h (flag_lower_subreg, flag_ipra): New external
definitions.
* rtl.def (CONCATN): New rtl expression.
* dwarf2out.c (concatn_loc_descriptor): New function.
(loc_descriptor): Process CONCATN.
* opts.c (flag_ipra, flag_lower_subreg): Set up for -O2.
* timevar.def (TV_LOWER_SUBREG): New definition.
* recog.c (peep2_find_free_register): Set up regs_ever_live.
* yara-int.h (allocno_common): Add new member clobbered_regs.
(ALLOCNO_CLOBBERED_REGS): New macro.
(can): Remove member spill_p.
(CAN_SPILL_P): Remove.
* function.c (get_call, get_call_invalidated_used_regs): New
functions.
* function.h (emit_status): Add new member call_used_regs.
(get_call_invalidated_used_regs): New external definition.
* yara-color.c (allocated_reg_p): New array.
(choose_global_hard_reg): Modify cost for cans crossing functions
and saved cans.
(pop_globals_from_stack): Initialize allocated_reg_p.
(assign_global_can_allocnos): Use ALLOCNO_CLOBBERED_REGS.
* emit-rtl.c (gen_reg_rtx_offset): New function.
(gen_lowpart_common): Process CONCATN.
* simplify-rtx.c (simplify_subreg): Process CONCATN.
* common.opt (fipra, flower-subreg): New options.
* yara.c (yara): Reset call_used_regs.
* rtl.h (gen_reg_rtx_offset): New external definition.
* yara-trans.c (check_hard_regno_for_a, assign_one_allocno): Use
ALLOCNO_CLOBBERED_REGS.
* yara-ir.c (setup_reg_class_nregs): Use CLASS_MAX_NREGS.
(create_allocno): Clear ALLOCNO_CLOBBERED_REGS.
(print_allocno): Print ALLOCNO_CLOBBERED_REGS..
(curr_call_used_function_regs): New static variable.
(set_call_info): Update curr_call_used_function_regs.
(build_insn_allocno_conflicts): Use
get_call_invalidated_used_regs.
(create_can, print_can): Remove CAN_SPILL_P.
* Makefile.in (OBJS-common): Add lower-subreg.o.
(lower-subreg.o): New entry.
(function.o): Add cgraph.h.
(yara.o, yara-ir.o): Add cgraph.h and function.h.
* passes.c (pass_lower_subreg): Add new pass.
* lower-subreg.c: New file.
git-svn-id: https://gcc.gnu.org/svn/gcc/branches/yara-branch@114273 138bc75d-0d04-0410-961f-82ee72b054a4
40 files changed, 1246 insertions, 105 deletions
diff --git a/gcc/ChangeLog b/gcc/ChangeLog index 1ea3fa4e262..760d8aa6916 100644 --- a/gcc/ChangeLog +++ b/gcc/ChangeLog @@ -1,3 +1,113 @@ +2006-05-31 Vladimir Makarov <vmakarov@redhat.com>, + Richard Henderson <rth@redhat.com> + + * cgraph.c (cgraph_create_node): Initialize function_used_regs. + + * cgraph.h (cgraph_node): Add new member function_used_regs. + + * regrename.c (copyprop_hardreg_forward_1): Use + get_call_invalidated_used_regs. + + * postreload-gcse.c (record_opr_changes, + reg_set_between_after_reload_p, reg_used_between_after_reload_p): + Ditto. + + * postreload.c (reload_combine, reload_cse_move2add): Ditto. + + * rtlanal.c (reg_set_p): Ditto. + + * flow.c (propagate_one_insn): Ditto. + + * df-scan.c (df_insn_refs_record): Ditto. + + * caller-save.c (save_call_clobbered_regs): Ditto. + + * gcse.c (compute_hash_table_work, compute_store_table): Ditto. + + * cselib.c (cselib_process_insn): Ditto. + + * loop-iv.c (simplify_using_assignment): Ditto. + + * sched-deps.c (sched_analyze): Ditto. + + * combine.c (record_dead_and_set_regs): Ditto. + + * resource.c (mark_set_resources, mark_target_live_regs): Ditto. + + * var-tracking.c (compute_bb_dataflow, emit_notes_in_bb): Ditto. + + * cse.c (invalidate_for_call): Ditto. Add parameter. + (cse_insn, invalidate_skipped_block): Pass the insn for + invalidate_for_call. + + * tree-pass.h (pass_lower_subreg): New external definition. + + * final.c (rest_of_handle_final): Set up call_used_regs for the + cgraph node. + + * toplev.h (flag_lower_subreg, flag_ipra): New external + definitions. + + * rtl.def (CONCATN): New rtl expression. + + * dwarf2out.c (concatn_loc_descriptor): New function. + (loc_descriptor): Process CONCATN. + + * opts.c (flag_ipra, flag_lower_subreg): Set up for -O2. + + * timevar.def (TV_LOWER_SUBREG): New definition. + + * recog.c (peep2_find_free_register): Set up regs_ever_live. + + * yara-int.h (allocno_common): Add new member clobbered_regs. + (ALLOCNO_CLOBBERED_REGS): New macro. + (can): Remove member spill_p. + (CAN_SPILL_P): Remove. + + * function.c (get_call, get_call_invalidated_used_regs): New + functions. + + * function.h (emit_status): Add new member call_used_regs. + (get_call_invalidated_used_regs): New external definition. + + * yara-color.c (allocated_reg_p): New array. + (choose_global_hard_reg): Modify cost for cans crossing functions + and saved cans. + (pop_globals_from_stack): Initialize allocated_reg_p. + (assign_global_can_allocnos): Use ALLOCNO_CLOBBERED_REGS. + + * emit-rtl.c (gen_reg_rtx_offset): New function. + (gen_lowpart_common): Process CONCATN. + + * simplify-rtx.c (simplify_subreg): Process CONCATN. + + * common.opt (fipra, flower-subreg): New options. + + * yara.c (yara): Reset call_used_regs. + + * rtl.h (gen_reg_rtx_offset): New external definition. + + * yara-trans.c (check_hard_regno_for_a, assign_one_allocno): Use + ALLOCNO_CLOBBERED_REGS. + + * yara-ir.c (setup_reg_class_nregs): Use CLASS_MAX_NREGS. + (create_allocno): Clear ALLOCNO_CLOBBERED_REGS. + (print_allocno): Print ALLOCNO_CLOBBERED_REGS.. + (curr_call_used_function_regs): New static variable. + (set_call_info): Update curr_call_used_function_regs. + (build_insn_allocno_conflicts): Use + get_call_invalidated_used_regs. + (create_can, print_can): Remove CAN_SPILL_P. + + * Makefile.in (OBJS-common): Add lower-subreg.o. + (lower-subreg.o): New entry. + (function.o): Add cgraph.h. + (yara.o, yara-ir.o): Add cgraph.h and function.h. + + * passes.c (pass_lower_subreg): Add new pass. + + * lower-subreg.c: New file. + 2006-05-19 Vladimir Makarov <vmakarov@redhat.com> * yara-int.h (HAVE_ANY_SECONDARY_MOVES): Remove. diff --git a/gcc/Makefile.in b/gcc/Makefile.in index 9f0846bc97d..3acf6627766 100644 --- a/gcc/Makefile.in +++ b/gcc/Makefile.in @@ -751,7 +751,7 @@ RECOG_H = recog.h ALIAS_H = alias.h EMIT_RTL_H = emit-rtl.h FLAGS_H = flags.h options.h -FUNCTION_H = function.h $(TREE_H) +FUNCTION_H = function.h $(TREE_H) hard-reg-set.h EXPR_H = expr.h insn-config.h $(FUNCTION_H) $(RTL_H) $(FLAGS_H) $(TREE_H) $(MACHMODE_H) $(EMIT_RTL_H) OPTABS_H = optabs.h insn-codes.h REGS_H = regs.h varray.h $(MACHMODE_H) $(OBSTACK_H) $(BASIC_BLOCK_H) $(FUNCTION_H) @@ -997,7 +997,8 @@ OBJS-common = \ lambda-trans.o lambda-code.o tree-loop-linear.o tree-ssa-sink.o \ tree-vrp.o tree-stdarg.o tree-cfgcleanup.o tree-ssa-reassoc.o \ tree-ssa-structalias.o tree-object-size.o rtl-factoring.o \ - yara.o yara-ir.o yara-trans.o yara-insn.o yara-color.o yara-final.o + yara.o yara-ir.o yara-trans.o yara-insn.o yara-color.o yara-final.o \ + lower-subreg.o OBJS-md = $(out_object_file) @@ -2166,7 +2167,7 @@ function.o : function.c $(CONFIG_H) $(SYSTEM_H) coretypes.h $(TM_H) $(RTL_H) \ $(OPTABS_H) libfuncs.h $(REGS_H) hard-reg-set.h insn-config.h $(RECOG_H) \ output.h toplev.h except.h $(HASHTAB_H) $(GGC_H) $(TM_P_H) langhooks.h \ gt-function.h $(TARGET_H) $(BASIC_BLOCK_H) $(INTEGRATE_H) $(PREDICT_H) \ - tree-pass.h + tree-pass.h $(CGRAPH_H) stmt.o : stmt.c $(CONFIG_H) $(SYSTEM_H) coretypes.h $(TM_H) $(RTL_H) \ $(TREE_H) $(FLAGS_H) $(FUNCTION_H) insn-config.h hard-reg-set.h $(EXPR_H) \ libfuncs.h except.h $(RECOG_H) toplev.h output.h $(GGC_H) $(TM_P_H) \ @@ -2507,12 +2508,13 @@ yara.o: yara.c $(CONFIG_H) $(SYSTEM_H) coretypes.h \ $(REGS_H) hard-reg-set.h $(FLAGS_H) $(OBSTACK_H) $(HASHTAB_H) errors.h \ $(EXPR_H) $(BASIC_BLOCK_H) toplev.h $(TM_P_H) \ $(CFGLOOP_H) yara.h yara-int.h $(TIMEVAR_H) tree-pass.h output.h \ - integrate.h $(GGC_H) + integrate.h $(FUNCTION_H) $(CGRAPH_H) $(GGC_H) yara-ir.o: yara-ir.c $(CONFIG_H) $(SYSTEM_H) coretypes.h \ $(TARGET_H) $(RTL_H) $(TREE_H) insn-codes.h insn-config.h \ $(OPTABS_H) $(RECOG_H) $(REGS_H) hard-reg-set.h $(FLAGS_H) $(HASHTAB_H) \ - errors.h $(EXPR_H) $(BASIC_BLOCK_H) toplev.h $(TM_P_H) langhooks.h\ - $(CFGLOOP_H) yara.h yara-int.h $(GGC_H) gt-yara-ir.h + errors.h $(EXPR_H) $(BASIC_BLOCK_H) toplev.h $(TM_P_H) langhooks.h \ + $(CFGLOOP_H) yara.h yara-int.h $(FUNCTION_H) $(CGRAPH_H) $(GGC_H) \ + gt-yara-ir.h yara-trans.o: yara-trans.c $(CONFIG_H) $(SYSTEM_H) coretypes.h \ $(TARGET_H) $(RTL_H) insn-codes.h insn-config.h $(OPTABS_H) $(RECOG_H) \ $(REGS_H) hard-reg-set.h $(FLAGS_H) $(HASHTAB_H) errors.h \ @@ -2627,6 +2629,8 @@ hooks.o: hooks.c $(CONFIG_H) $(SYSTEM_H) coretypes.h $(TM_H) $(HOOKS_H) pretty-print.o: $(CONFIG_H) $(SYSTEM_H) coretypes.h intl.h $(PRETTY_PRINT_H) \ $(TREE_H) errors.o : errors.c $(CONFIG_H) $(SYSTEM_H) errors.h $(BCONFIG_H) +lower-subreg.o : lower-subreg.c $(CONFIG_H) $(SYSTEM_H) coretypes.h \ + $(MACHMODE_H) $(RTL_H) bitmap.h $(out_object_file): $(out_file) $(CONFIG_H) coretypes.h $(TM_H) $(TREE_H) \ $(RTL_H) $(REGS_H) hard-reg-set.h insn-config.h conditions.h \ diff --git a/gcc/caller-save.c b/gcc/caller-save.c index ea85044cfec..be70e0846a5 100644 --- a/gcc/caller-save.c +++ b/gcc/caller-save.c @@ -409,7 +409,7 @@ save_call_clobbered_regs (void) if (code == CALL_INSN && ! find_reg_note (insn, REG_NORETURN, NULL)) { unsigned regno; - HARD_REG_SET hard_regs_to_save; + HARD_REG_SET hard_regs_to_save, used_regs; reg_set_iterator rsi; /* Use the register life information in CHAIN to compute which @@ -459,7 +459,8 @@ save_call_clobbered_regs (void) AND_COMPL_HARD_REG_SET (hard_regs_to_save, call_fixed_reg_set); AND_COMPL_HARD_REG_SET (hard_regs_to_save, this_insn_sets); AND_COMPL_HARD_REG_SET (hard_regs_to_save, hard_regs_saved); - AND_HARD_REG_SET (hard_regs_to_save, call_used_reg_set); + get_call_invalidated_used_regs (insn, &used_regs, false); + AND_HARD_REG_SET (hard_regs_to_save, used_regs); for (regno = 0; regno < FIRST_PSEUDO_REGISTER; regno++) if (TEST_HARD_REG_BIT (hard_regs_to_save, regno)) diff --git a/gcc/cgraph.c b/gcc/cgraph.c index 6a8ac08aeff..7331d182aa0 100644 --- a/gcc/cgraph.c +++ b/gcc/cgraph.c @@ -191,6 +191,7 @@ cgraph_create_node (void) node->global.estimated_growth = INT_MIN; cgraph_nodes = node; cgraph_n_nodes++; + COPY_HARD_REG_SET (node->function_used_regs, call_used_reg_set); return node; } diff --git a/gcc/cgraph.h b/gcc/cgraph.h index 6e60f8c205e..394082068a2 100644 --- a/gcc/cgraph.h +++ b/gcc/cgraph.h @@ -168,6 +168,11 @@ struct cgraph_node GTY((chain_next ("%h.next"), chain_prev ("%h.previous"))) into clone before compiling so the function in original form can be inlined later. This pointer points to the clone. */ tree inline_decl; + + /* Call unsaved hard registers really used by the corresponding + function (including ones used by functions called by the + function). */ + HARD_REG_SET function_used_regs; }; struct cgraph_edge GTY((chain_next ("%h.next_caller"), chain_prev ("%h.prev_caller"))) diff --git a/gcc/combine.c b/gcc/combine.c index 2ff106d684c..cdb145ba9d2 100644 --- a/gcc/combine.c +++ b/gcc/combine.c @@ -11003,8 +11003,11 @@ record_dead_and_set_regs (rtx insn) if (CALL_P (insn)) { + HARD_REG_SET clobbered_regs; + + get_call_invalidated_used_regs (insn, &clobbered_regs, true); for (i = 0; i < FIRST_PSEUDO_REGISTER; i++) - if (TEST_HARD_REG_BIT (regs_invalidated_by_call, i)) + if (TEST_HARD_REG_BIT (clobbered_regs, i)) { reg_stat[i].last_set_value = 0; reg_stat[i].last_set_mode = 0; diff --git a/gcc/common.opt b/gcc/common.opt index 7dc5caf9a12..d8c8ad2d010 100644 --- a/gcc/common.opt +++ b/gcc/common.opt @@ -539,6 +539,10 @@ fipa-type-escape Common Report Var(flag_ipa_type_escape) Init(0) Type based escape and alias analysis +fipra +Common Report Var(flag_ipra) Init(0) +Inter-procedural register allocation for YARA + fivopts Common Report Var(flag_ivopts) Init(1) Optimize induction variables on trees @@ -563,6 +567,10 @@ floop-optimize Common Does nothing. Preserved for backward compatibility. +flower-subreg +Common Report Var(flag_lower_subreg) +Subreg lowering + fmath-errno Common Report Var(flag_errno_math) Init(1) Set errno after built-in math functions diff --git a/gcc/cse.c b/gcc/cse.c index 53b26d039b9..18569f680fc 100644 --- a/gcc/cse.c +++ b/gcc/cse.c @@ -592,7 +592,7 @@ static void remove_invalid_subreg_refs (unsigned int, unsigned int, enum machine_mode); static void rehash_using_reg (rtx); static void invalidate_memory (void); -static void invalidate_for_call (void); +static void invalidate_for_call (rtx); static rtx use_related_value (rtx, struct table_elt *); static inline unsigned canon_hash (rtx, enum machine_mode); @@ -2003,21 +2003,23 @@ rehash_using_reg (rtx x) register. Also update their TICK values. */ static void -invalidate_for_call (void) +invalidate_for_call (rtx call_insn) { unsigned int regno, endregno; unsigned int i; unsigned hash; struct table_elt *p, *next; int in_table = 0; - + HARD_REG_SET clobbered_regs; + /* Go through all the hard registers. For each that is clobbered in a CALL_INSN, remove the register from quantity chains and update reg_tick if defined. Also see if any of these registers is currently in the table. */ + get_call_invalidated_used_regs (call_insn, &clobbered_regs, true); for (regno = 0; regno < FIRST_PSEUDO_REGISTER; regno++) - if (TEST_HARD_REG_BIT (regs_invalidated_by_call, regno)) + if (TEST_HARD_REG_BIT (clobbered_regs, regno)) { delete_reg_equiv (regno); if (REG_TICK (regno) >= 0) @@ -2047,7 +2049,7 @@ invalidate_for_call (void) endregno = regno + hard_regno_nregs[regno][GET_MODE (p->exp)]; for (i = regno; i < endregno; i++) - if (TEST_HARD_REG_BIT (regs_invalidated_by_call, i)) + if (TEST_HARD_REG_BIT (clobbered_regs, i)) { remove_from_table (p, hash); break; @@ -5995,7 +5997,7 @@ cse_insn (rtx insn, rtx libcall_insn) { if (! CONST_OR_PURE_CALL_P (insn)) invalidate_memory (); - invalidate_for_call (); + invalidate_for_call (insn); } /* Now invalidate everything set by this instruction. @@ -6581,7 +6583,7 @@ invalidate_skipped_block (rtx start) { if (! CONST_OR_PURE_CALL_P (insn)) invalidate_memory (); - invalidate_for_call (); + invalidate_for_call (insn); } invalidate_from_clobbers (PATTERN (insn)); diff --git a/gcc/cselib.c b/gcc/cselib.c index 0fc8aeef2cf..abd33a616b9 100644 --- a/gcc/cselib.c +++ b/gcc/cselib.c @@ -1404,8 +1404,11 @@ cselib_process_insn (rtx insn) memory. */ if (CALL_P (insn)) { + HARD_REG_SET used_regs; + + get_call_invalidated_used_regs (insn, &used_regs, false); for (i = 0; i < FIRST_PSEUDO_REGISTER; i++) - if (call_used_regs[i] + if (TEST_HARD_REG_BIT (used_regs, i) || (REG_VALUES (i) && REG_VALUES (i)->elt && HARD_REGNO_CALL_PART_CLOBBERED (i, GET_MODE (REG_VALUES (i)->elt->u.val_rtx)))) diff --git a/gcc/df-scan.c b/gcc/df-scan.c index 1e1ed269606..f08af7f3e3a 100644 --- a/gcc/df-scan.c +++ b/gcc/df-scan.c @@ -1494,6 +1494,8 @@ df_insn_refs_record (struct dataflow *dflow, basic_block bb, rtx insn) { bitmap_iterator bi; unsigned int ui; + HARD_REG_SET clobbered_regs; + /* Calls may also reference any of the global registers, so they are recorded as used. */ for (i = 0; i < FIRST_PSEUDO_REGISTER; i++) @@ -1501,9 +1503,13 @@ df_insn_refs_record (struct dataflow *dflow, basic_block bb, rtx insn) df_uses_record (dflow, ®no_reg_rtx[i], DF_REF_REG_USE, bb, insn, 0); + get_call_invalidated_used_regs (insn, &clobbered_regs, true); EXECUTE_IF_SET_IN_BITMAP (df_invalidated_by_call, 0, ui, bi) - df_ref_record (dflow, regno_reg_rtx[ui], ®no_reg_rtx[ui], bb, insn, - DF_REF_REG_DEF, DF_REF_CLOBBER, false); + if (ui < FIRST_PSEUDO_REGISTER + && TEST_HARD_REG_BIT (clobbered_regs, ui)) + df_ref_record (dflow, regno_reg_rtx[ui], ®no_reg_rtx[ui], + bb, insn, + DF_REF_REG_DEF, DF_REF_CLOBBER, false); } } diff --git a/gcc/dwarf2out.c b/gcc/dwarf2out.c index 538bdd4f48d..f026dbb9688 100644 --- a/gcc/dwarf2out.c +++ b/gcc/dwarf2out.c @@ -4089,6 +4089,7 @@ static dw_loc_descr_ref based_loc_descr (rtx, HOST_WIDE_INT); static int is_based_loc (rtx); static dw_loc_descr_ref mem_loc_descriptor (rtx, enum machine_mode mode); static dw_loc_descr_ref concat_loc_descriptor (rtx, rtx); +static dw_loc_descr_ref concatn_loc_descriptor (rtx); static dw_loc_descr_ref loc_descriptor (rtx); static dw_loc_descr_ref loc_descriptor_from_tree_1 (tree, int); static dw_loc_descr_ref loc_descriptor_from_tree (tree); @@ -8940,6 +8941,31 @@ concat_loc_descriptor (rtx x0, rtx x1) return cc_loc_result; } +/* Return a descriptor that describes the concatenation of N locations. */ + +static dw_loc_descr_ref +concatn_loc_descriptor (rtx concatn) +{ + dw_loc_descr_ref cc_loc_result = NULL; + unsigned int i, n = XVECLEN (concatn, 0); + + for (i = 0; i < n; ++i) + { + dw_loc_descr_ref ref; + rtx x = XVECEXP (concatn, 0, i); + + ref = loc_descriptor (x); + if (ref == NULL) + return NULL; + + add_loc_descr (&cc_loc_result, ref); + ref = new_loc_descr (DW_OP_piece, GET_MODE_SIZE (GET_MODE (x)), 0); + add_loc_descr (&cc_loc_result, ref); + } + + return cc_loc_result; +} + /* Output a proper Dwarf location descriptor for a variable or parameter which is either allocated in a register or in a memory location. For a register, we just generate an OP_REG and the register number. For a @@ -8977,6 +9003,10 @@ loc_descriptor (rtx rtl) loc_result = concat_loc_descriptor (XEXP (rtl, 0), XEXP (rtl, 1)); break; + case CONCATN: + loc_result = concatn_loc_descriptor (rtl); + break; + case VAR_LOCATION: /* Single part. */ if (GET_CODE (XEXP (rtl, 1)) != PARALLEL) diff --git a/gcc/emit-rtl.c b/gcc/emit-rtl.c index 06721c6c467..477c5813084 100644 --- a/gcc/emit-rtl.c +++ b/gcc/emit-rtl.c @@ -812,13 +812,12 @@ gen_reg_rtx (enum machine_mode mode) return val; } -/* Generate a register with same attributes as REG, but offsetted by OFFSET. +/* Update NEW with same attributes as REG, but offsetted by OFFSET. Do the big endian correction if needed. */ -rtx -gen_rtx_REG_offset (rtx reg, enum machine_mode mode, unsigned int regno, int offset) +static void +update_reg_offset (rtx new, rtx reg, int offset) { - rtx new = gen_rtx_REG (mode, regno); tree decl; HOST_WIDE_INT var_size; @@ -860,7 +859,7 @@ gen_rtx_REG_offset (rtx reg, enum machine_mode mode, unsigned int regno, int off if ((BYTES_BIG_ENDIAN || WORDS_BIG_ENDIAN) && decl != NULL && offset > 0 - && GET_MODE_SIZE (GET_MODE (reg)) > GET_MODE_SIZE (mode) + && GET_MODE_SIZE (GET_MODE (reg)) > GET_MODE_SIZE (GET_MODE (new)) && ((var_size = int_size_in_bytes (TREE_TYPE (decl))) > 0 && var_size < GET_MODE_SIZE (GET_MODE (reg)))) { @@ -904,6 +903,27 @@ gen_rtx_REG_offset (rtx reg, enum machine_mode mode, unsigned int regno, int off REG_ATTRS (new) = get_reg_attrs (REG_EXPR (reg), REG_OFFSET (reg) + offset); +} + +/* Generate a register with same attributes as REG, but offsetted by OFFSET. */ + +rtx +gen_rtx_REG_offset (rtx reg, enum machine_mode mode, + unsigned int regno, int offset) +{ + rtx new = gen_rtx_REG (mode, regno); + update_reg_offset (new, reg, offset); + return new; +} + +/* Generate a new pseudo register with same attributes as REG, but + offsetted by OFFSET. */ + +rtx +gen_reg_rtx_offset (rtx reg, enum machine_mode mode, int offset) +{ + rtx new = gen_reg_rtx (mode); + update_reg_offset (new, reg, offset); return new; } @@ -1153,8 +1173,9 @@ gen_lowpart_common (enum machine_mode mode, rtx x) return gen_rtx_fmt_e (GET_CODE (x), mode, XEXP (x, 0)); } else if (GET_CODE (x) == SUBREG || REG_P (x) - || GET_CODE (x) == CONCAT || GET_CODE (x) == CONST_VECTOR - || GET_CODE (x) == CONST_DOUBLE || GET_CODE (x) == CONST_INT) + || GET_CODE (x) == CONCAT || GET_CODE (x) == CONCATN + || GET_CODE (x) == CONST_VECTOR || GET_CODE (x) == CONST_DOUBLE + || GET_CODE (x) == CONST_INT) return simplify_gen_subreg (mode, x, innermode, offset); /* Otherwise, we can't do this. */ diff --git a/gcc/final.c b/gcc/final.c index 4ccf305cb23..2a26cc4f775 100644 --- a/gcc/final.c +++ b/gcc/final.c @@ -3902,9 +3902,40 @@ debug_free_queue (void) static unsigned int rest_of_handle_final (void) { + int i; rtx x; const char *fnname; + struct cgraph_node *node; + gcc_assert (cfun->decl != NULL); + node = cgraph_node (cfun->decl); + if (node != NULL) + { + for (i = 0; i < FIRST_PSEUDO_REGISTER; i++) + if (fixed_regs [i]) + SET_HARD_REG_BIT (cfun->emit->call_used_regs, i); + else if (call_used_regs [i] + && (regs_ever_live [i] +#ifdef STACK_REGS + || (i >= FIRST_STACK_REG && i <= LAST_STACK_REG) +#endif + )) + SET_HARD_REG_BIT (cfun->emit->call_used_regs, i); + COPY_HARD_REG_SET (node->function_used_regs, cfun->emit->call_used_regs); + if (dump_file != NULL) + { + GO_IF_HARD_REG_EQUAL (cfun->emit->call_used_regs, + call_used_reg_set, ok); + fprintf (dump_file, "unused unsaved registers: "); + for (i = 0; i < FIRST_PSEUDO_REGISTER; i++) + if (TEST_HARD_REG_BIT (call_used_reg_set, i) + && ! TEST_HARD_REG_BIT (cfun->emit->call_used_regs, i)) + fprintf (dump_file, "%s ", reg_names [i]); + fprintf (dump_file, "\n"); + ok: + ; + } + } /* Get the function's name, as described by its RTL. This may be different from the DECL_NAME name used in the source file. */ diff --git a/gcc/flow.c b/gcc/flow.c index 64d40925429..f02f200ded8 100644 --- a/gcc/flow.c +++ b/gcc/flow.c @@ -1839,7 +1839,8 @@ propagate_one_insn (struct propagate_block_info *pbi, rtx insn) bool sibcall_p; rtx note, cond; int i; - + HARD_REG_SET clobbered_regs; + cond = NULL_RTX; if (GET_CODE (PATTERN (insn)) == COND_EXEC) cond = COND_EXEC_TEST (PATTERN (insn)); @@ -1869,8 +1870,9 @@ propagate_one_insn (struct propagate_block_info *pbi, rtx insn) sibcall_p = SIBLING_CALL_P (insn); live_at_end = EXIT_BLOCK_PTR->il.rtl->global_live_at_start; + get_call_invalidated_used_regs (insn, &clobbered_regs, true); for (i = 0; i < FIRST_PSEUDO_REGISTER; i++) - if (TEST_HARD_REG_BIT (regs_invalidated_by_call, i) + if (TEST_HARD_REG_BIT (clobbered_regs, i) && ! (sibcall_p && REGNO_REG_SET_P (live_at_end, i) && ! refers_to_regno_p (i, i+1, diff --git a/gcc/function.c b/gcc/function.c index 988d613c4d7..052d565320c 100644 --- a/gcc/function.c +++ b/gcc/function.c @@ -63,6 +63,7 @@ Software Foundation, 51 Franklin Street, Fifth Floor, Boston, MA #include "tree-gimple.h" #include "tree-pass.h" #include "predict.h" +#include "cgraph.h" #ifndef LOCAL_ALIGNMENT #define LOCAL_ALIGNMENT(TYPE, ALIGNMENT) ALIGNMENT @@ -5577,6 +5578,82 @@ current_function_name (void) { return lang_hooks.decl_printable_name (cfun->decl, 2); } + + + +/* This recursive function finds and returns CALL expression in X. */ +static rtx +get_call (rtx x) +{ + int i; + rtx call_rtx; + const char *fmt; + enum rtx_code code = GET_CODE (x); + + /* Ignore registers in memory. */ + if (code == CALL) + return x; + + fmt = GET_RTX_FORMAT (code); + for (i = GET_RTX_LENGTH (code) - 1; i >= 0; i--) + { + if (fmt [i] == 'e') + { + if ((call_rtx = get_call (XEXP (x, i))) != NULL_RTX) + return call_rtx; + } + else if (fmt [i] == 'E') + { + int j; + + for (j = XVECLEN (x, i) - 1; j >= 0; j--) + if ((call_rtx = get_call (XVECEXP (x, i, j))) != NULL_RTX) + return call_rtx; + } + } + return NULL_RTX; +} + +/* This function returns call unsaved registers invalidated (if + CLOBBERED_P) or used by function called by INSN through REGS. */ +void +get_call_invalidated_used_regs (rtx insn, HARD_REG_SET *regs, bool clobbered_p) +{ + rtx x; + struct cgraph_node *node; + tree decl = NULL; + + gcc_assert (CALL_P (insn)); + + x = get_call (PATTERN (insn)); + if (x != NULL_RTX) + { + x = XEXP (x, 0); + gcc_assert (GET_CODE (x) == MEM); + x = XEXP (x, 0); + if (GET_CODE (x) == SYMBOL_REF) + decl = SYMBOL_REF_DECL (x); + } + node = decl == NULL ? NULL : cgraph_node (decl); + if (! flag_ipra || node == NULL + /* This is a call of the function itself. We don't know used + register yet. So take the worst case. */ + || node->decl == cfun->decl) + { + if (clobbered_p) + COPY_HARD_REG_SET (*regs, regs_invalidated_by_call); + else + COPY_HARD_REG_SET (*regs, call_used_reg_set); + } + else + { + COPY_HARD_REG_SET (*regs, node->function_used_regs); + if (clobbered_p) + AND_HARD_REG_SET (*regs, regs_invalidated_by_call); + } +} + + static unsigned int diff --git a/gcc/function.h b/gcc/function.h index 1b2484e4522..4d43444876b 100644 --- a/gcc/function.h +++ b/gcc/function.h @@ -23,6 +23,7 @@ Software Foundation, 51 Franklin Street, Fifth Floor, Boston, MA #define GCC_FUNCTION_H #include "tree.h" +#include "hard-reg-set.h" struct var_refs_queue GTY(()) { @@ -101,6 +102,10 @@ struct emit_status GTY(()) /* Indexed by pseudo register number, gives the rtx for that pseudo. Allocated in parallel with regno_pointer_align. */ rtx * GTY ((length ("%h.x_reg_rtx_no"))) x_regno_reg_rtx; + + /* Call unsaved hard registers really used by given function + (including ones used by functions called by given function). */ + HARD_REG_SET call_used_regs; }; /* For backward compatibility... eventually these should all go away. */ @@ -559,6 +564,8 @@ extern rtx get_arg_pointer_save_area (struct function *); /* Returns the name of the current function. */ extern const char *current_function_name (void); +extern void get_call_invalidated_used_regs (rtx, HARD_REG_SET *, bool); + extern void do_warn_unused_parameter (tree); extern bool pass_by_reference (CUMULATIVE_ARGS *, enum machine_mode, diff --git a/gcc/gcse.c b/gcc/gcse.c index f0e25a214ea..c4b8441591a 100644 --- a/gcc/gcse.c +++ b/gcc/gcse.c @@ -2063,8 +2063,11 @@ compute_hash_table_work (struct hash_table *table) if (CALL_P (insn)) { + HARD_REG_SET clobbered_regs; + + get_call_invalidated_used_regs (insn, &clobbered_regs, true); for (regno = 0; regno < FIRST_PSEUDO_REGISTER; regno++) - if (TEST_HARD_REG_BIT (regs_invalidated_by_call, regno)) + if (TEST_HARD_REG_BIT (clobbered_regs, regno)) record_last_reg_set_info (insn, regno); mark_call (insn); @@ -5762,8 +5765,11 @@ compute_store_table (void) if (CALL_P (insn)) { + HARD_REG_SET clobbered_regs; + + get_call_invalidated_used_regs (insn, &clobbered_regs, true); for (regno = 0; regno < FIRST_PSEUDO_REGISTER; regno++) - if (TEST_HARD_REG_BIT (regs_invalidated_by_call, regno)) + if (TEST_HARD_REG_BIT (clobbered_regs, regno)) { last_set_in[regno] = INSN_UID (insn); SET_BIT (reg_set_in_block[bb->index], regno); @@ -5785,8 +5791,11 @@ compute_store_table (void) if (CALL_P (insn)) { + HARD_REG_SET clobbered_regs; + + get_call_invalidated_used_regs (insn, &clobbered_regs, true); for (regno = 0; regno < FIRST_PSEUDO_REGISTER; regno++) - if (TEST_HARD_REG_BIT (regs_invalidated_by_call, regno)) + if (TEST_HARD_REG_BIT (clobbered_regs, regno)) already_set[regno] = 1; } @@ -5801,8 +5810,11 @@ compute_store_table (void) note_stores (pat, reg_clear_last_set, last_set_in); if (CALL_P (insn)) { + HARD_REG_SET clobbered_regs; + + get_call_invalidated_used_regs (insn, &clobbered_regs, true); for (regno = 0; regno < FIRST_PSEUDO_REGISTER; regno++) - if (TEST_HARD_REG_BIT (regs_invalidated_by_call, regno) + if (TEST_HARD_REG_BIT (clobbered_regs, regno) && last_set_in[regno] == INSN_UID (insn)) last_set_in[regno] = 0; } diff --git a/gcc/loop-iv.c b/gcc/loop-iv.c index e234fd93b79..9cc58e81e20 100644 --- a/gcc/loop-iv.c +++ b/gcc/loop-iv.c @@ -1404,10 +1404,12 @@ simplify_using_assignment (rtx insn, rtx *expr, regset altered) if (CALL_P (insn)) { int i; + HARD_REG_SET clobbered_regs; /* Kill all call clobbered registers. */ + get_call_invalidated_used_regs (insn, &clobbered_regs, true); for (i = 0; i < FIRST_PSEUDO_REGISTER; i++) - if (TEST_HARD_REG_BIT (regs_invalidated_by_call, i)) + if (TEST_HARD_REG_BIT (clobbered_regs, i)) SET_REGNO_REG_SET (altered, i); } diff --git a/gcc/lower-subreg.c b/gcc/lower-subreg.c new file mode 100644 index 00000000000..185f4674c9f --- /dev/null +++ b/gcc/lower-subreg.c @@ -0,0 +1,672 @@ +/* Decompose multiword subregs. + Contributed by Richard Henderson. + Copyright (C) 2005, 2006 Free Software Foundation, Inc. + +This file is part of GCC. + +GCC is free software; you can redistribute it and/or modify it under +the terms of the GNU General Public License as published by the Free +Software Foundation; either version 2, or (at your option) any later +version. + +GCC is distributed in the hope that it will be useful, but WITHOUT ANY +WARRANTY; without even the implied warranty of MERCHANTABILITY or +FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License +for more details. + +You should have received a copy of the GNU General Public License +along with GCC; see the file COPYING. If not, write to the Free +Software Foundation, 51 Franklin Street, Fifth Floor, Boston, MA +02110-1301, USA. */ + + +#include "config.h" +#include "system.h" +#include "coretypes.h" +#include "machmode.h" +#include "tm.h" +#include "rtl.h" +#include "function.h" +#include "expr.h" +#include "obstack.h" +#include "bitmap.h" +#include "tree-pass.h" +#include "timevar.h" +#include "tm_p.h" + + +#ifdef STACK_GROWS_DOWNWARD +# undef STACK_GROWS_DOWNWARD +# define STACK_GROWS_DOWNWARD 1 +#else +# define STACK_GROWS_DOWNWARD 0 +#endif + + +DEF_VEC_P(bitmap); +DEF_VEC_ALLOC_P(bitmap,heap); + +/* Bit N set if regno N is used in a context in which we can decompose it. */ +static bitmap decomposable_context; + +/* Bit N set if regno N is used in a context in which it cannot + be decomposed. */ +static bitmap non_decomposable_context; + +/* Bit N in element M set if there exists a copy from reg M to reg N. */ +static VEC(bitmap,heap) *reg_copy_graph; + + +/* Return true if INSN is a single set between two objects. Such insns + can always be decomposed. */ + +static rtx +simple_move (rtx insn) +{ + rtx x, set = single_set (insn); + + if (!set) + return NULL; + + x = SET_DEST (set); + if (!OBJECT_P (x) && GET_CODE (x) != SUBREG) + return NULL; + if (MEM_P (x) && MEM_VOLATILE_P (x)) + return NULL; + + x = SET_SRC (set); + if (!OBJECT_P (x) && GET_CODE (x) != SUBREG) + return NULL; + if (MEM_P (x) && MEM_VOLATILE_P (x)) + return NULL; + + return set; +} + +/* */ + +static void +find_pseudo_copy (rtx set) +{ + rtx dst = SET_DEST (set); + rtx src = SET_SRC (set); + unsigned int rd, rs; + bitmap b; + + if (!REG_P (dst) || !REG_P (src)) + return; + + rd = REGNO (dst); + rs = REGNO (src); + if (HARD_REGISTER_NUM_P (rd) || HARD_REGISTER_NUM_P (rs)) + return; + + if (GET_MODE_SIZE (GET_MODE (dst)) < UNITS_PER_WORD) + return; + + b = VEC_index (bitmap, reg_copy_graph, rs); + if (b == NULL) + { + b = BITMAP_ALLOC (NULL); + VEC_replace (bitmap, reg_copy_graph, rs, b); + } + bitmap_set_bit (b, rd); +} + +/* */ + +static void +propagate_pseudo_copies (void) +{ + bitmap queue, propagate; + + queue = BITMAP_ALLOC (NULL); + propagate = BITMAP_ALLOC (NULL); + + bitmap_copy (queue, decomposable_context); + do + { + bitmap_iterator iter; + unsigned int i; + + bitmap_clear (propagate); + EXECUTE_IF_SET_IN_BITMAP (queue, 0, i, iter) + { + bitmap b = VEC_index (bitmap, reg_copy_graph, i); + if (b) + bitmap_ior_and_compl_into (propagate, b, non_decomposable_context); + } + + bitmap_and_compl (queue, propagate, decomposable_context); + bitmap_ior_into (decomposable_context, propagate); + } + while (!bitmap_empty_p (queue)); + + BITMAP_FREE (queue); + BITMAP_FREE (propagate); +} + +/* Called via for_each_rtx. Examine the given expression and set bits as + appropriate in decomposable_context and non_decomposable_context. SM + is the result of simple_move for the complete insn. */ + +static int +find_decomposable_subregs (rtx *px, void *sm) +{ + rtx x = *px, inner; + unsigned int inner_size, outer_size; + unsigned int inner_words, outer_words; + unsigned int regno; + + switch (GET_CODE (x)) + { + case SUBREG: + /* Ensure we're not looking at something other than a subreg of a + pseudo register. One might hope these tests never fail, since + that would indicate someone not using simplify_gen_subreg or some + related interface, but that no doubt happens all too often. */ + inner = SUBREG_REG (x); + if (!REG_P (inner)) + break; + + regno = REGNO (inner); + if (HARD_REGISTER_NUM_P (regno)) + return -1; + + /* Compute the number of words covered by the subreg and the reg. */ + outer_size = GET_MODE_SIZE (GET_MODE (x)); + inner_size = GET_MODE_SIZE (GET_MODE (inner)); + outer_words = (outer_size + UNITS_PER_WORD - 1) / UNITS_PER_WORD; + inner_words = (inner_size + UNITS_PER_WORD - 1) / UNITS_PER_WORD; + + /* If we've got a single-word subreg of a multi-word reg, then this + should be a candidate for decomposition. Return -1 so that we + don't iterate over the inner register and decide it is used in a + context we can't decompose. */ + /* ??? This doesn't allow e.g. DImode subregs of TImode values on + 32-bit targets. We'd need to record the way in which the pseudo + is used, and only decompose if all uses were with the same number + of pieces. Hopefully this doesn't happen with any frequency. */ + /* ??? This is a bald-faced assumption that the subreg is actually + inside an operand, and is thus replacable. This might be false + if the target plays games with subregs in the patterns. Perhaps + a better approach is to mirror what regrename does wrt recognizing + the insn, iterating over the operands, smashing the operands out + and iterating over the resulting pattern. */ + if (outer_words == 1 && inner_words > 1) + { + bitmap_set_bit (decomposable_context, regno); + return -1; + } + break; + + case REG: + /* Since we see outer subregs and avoid iterating over inner registers + when we can handle the decomposition, that means that anywhere else + we come across the register must be a place we can't decompose it. + Avoid setting the bit for single-word pseudos to keep down the size + of the bitmap. */ + regno = REGNO (x); + if (!HARD_REGISTER_NUM_P (regno) && !sm + && GET_MODE_SIZE (GET_MODE (x)) > UNITS_PER_WORD) + bitmap_set_bit (non_decomposable_context, regno); + break; + + default: + break; + } + + return 0; +} + +/* Decompose psuedo REGNO into word-sized components. We smash the REG + node in place. This ensures that (1) something goes wrong quickly if + we fail to find a place in which we ought to be performing some + replacement, and (2) the debug information inside the symbol table is + automatically kept up to date. */ + +static void +decompose_register (unsigned int regno) +{ + unsigned int words; + rtx reg; + + reg = regno_reg_rtx[regno]; + regno_reg_rtx[regno] = NULL; + + words = GET_MODE_SIZE (GET_MODE (reg)); + words = (words + UNITS_PER_WORD - 1) / UNITS_PER_WORD; + + if (0 && words == 2) + { + PUT_CODE (reg, CONCAT); + XEXP (reg, 0) = gen_reg_rtx_offset (reg, word_mode, 0); + XEXP (reg, 1) = gen_reg_rtx_offset (reg, word_mode, UNITS_PER_WORD); + } + else + { + unsigned int i; + rtvec v; + + if (dump_file) + fprintf (dump_file, "; Splitting reg %u ->", REGNO (reg)); + + PUT_CODE (reg, CONCATN); + XVEC (reg, 0) = v = rtvec_alloc (words); + + for (i = 0; i < words; ++i) + RTVEC_ELT (v, i) + = gen_reg_rtx_offset (reg, word_mode, i * UNITS_PER_WORD); + + if (dump_file) + { + for (i = 0; i < words; ++i) + fprintf (dump_file, " %u", REGNO (XVECEXP (reg, 0, i))); + fputc ('\n', dump_file); + } + } +} + +static inline bool +resolve_reg_p (rtx x) +{ + return GET_CODE (x) == CONCAT || GET_CODE (x) == CONCATN; +} + +static bool +resolve_subreg_p (rtx x) +{ + if (GET_CODE (x) == SUBREG) + return resolve_reg_p (SUBREG_REG (x)); + return false; +} + +/* */ + +static int +resolve_subreg_use (rtx *px, void *data ATTRIBUTE_UNUSED) +{ + rtx x = *px; + + if (x == NULL) + return 0; + + /* If this is a (subreg (concat)) pattern, then it must be something that + we created via decompose_register. */ + if (resolve_subreg_p (x)) + { + /* This must be resolvable. */ + *px = simplify_subreg (GET_MODE (x), SUBREG_REG (x), + GET_MODE (SUBREG_REG (x)), SUBREG_BYTE (x)); + gcc_assert (*px != NULL); + return -1; + } + + if (resolve_reg_p (x)) + return 1; + + return 0; +} + +/* */ + +static void +move_libcall_note (rtx old_start, rtx new_start) +{ + rtx note0, note1, end; + + note0 = find_reg_note (old_start, REG_LIBCALL, NULL); + if (note0 == NULL) + return; + + remove_note (old_start, note0); + end = XEXP (note0, 0); + note1 = find_reg_note (end, REG_RETVAL, NULL); + + XEXP (note0, 1) = REG_NOTES (new_start); + REG_NOTES (new_start) = note0; + XEXP (note1, 0) = new_start; +} + +/* */ + +static void +remove_retval_note (rtx insn1) +{ + rtx note, note0, insn0, note1, insn; + + note1 = find_reg_note (insn1, REG_RETVAL, NULL); + if (note1 == NULL) + return; + + insn0 = XEXP (note1, 0); + note0 = find_reg_note (insn0, REG_LIBCALL, NULL); + + remove_note (insn0, note0); + remove_note (insn1, note1); + + for (insn = insn0; insn != insn1; insn = NEXT_INSN (insn)) + while ((note = find_reg_note (insn, REG_NO_CONFLICT, NULL))) + remove_note (insn, note); +} + +/* */ + +static void +resolve_reg_notes (rtx insn) +{ + rtx *pnote, note; + + note = find_reg_equal_equiv_note (insn); + if (note && for_each_rtx (&XEXP (note, 0), resolve_subreg_use, NULL)) + { + remove_note (insn, note); + remove_retval_note (insn); + } + + pnote = ®_NOTES (insn); + while ((note = *pnote)) + { + bool delete = false; + + switch (REG_NOTE_KIND (note)) + { + case REG_NO_CONFLICT: + if (resolve_reg_p (XEXP (note, 0))) + delete = true; + break; + + default: + break; + } + + if (delete) + *pnote = XEXP (note, 1); + else + pnote = &XEXP (note, 1); + } +} + +/* */ + +static bool +cannot_decompose_p (rtx x) +{ + if (REG_P (x)) + { + unsigned int regno = REGNO (x); + if (HARD_REGISTER_NUM_P (regno)) + return !validate_subreg (word_mode, GET_MODE (x), x, UNITS_PER_WORD); + else + return bitmap_bit_p (non_decomposable_context, regno); + } + return false; +} + +/* */ + +static rtx +resolve_simple_move (rtx set, rtx insn) +{ + rtx dst, src, tmp; + bool rdp, rsp, sdp, ssp, delete; + unsigned int i, words; + enum machine_mode orig_mode; + + dst = SET_DEST (set); + src = SET_SRC (set); + orig_mode = GET_MODE (dst); + + sdp = ssp = false; + if (GET_CODE (dst) == SUBREG && GET_MODE_SIZE (orig_mode) > UNITS_PER_WORD) + sdp = rdp = resolve_reg_p (SUBREG_REG (dst)); + else + rdp = resolve_reg_p (dst); + + if (GET_CODE (src) == SUBREG && GET_MODE_SIZE (orig_mode) > UNITS_PER_WORD) + ssp = rsp = resolve_reg_p (SUBREG_REG (src)); + else + rsp = resolve_reg_p (src); + + if (!rdp && !rsp) + return insn; + + start_sequence (); + + delete = true; + + if (ssp) + { + tmp = SUBREG_REG (src); + orig_mode = GET_MODE (tmp); + dst = gen_reg_rtx (orig_mode); + SUBREG_REG (src) = dst; + src = tmp; + delete = false; + } + if (!rsp && cannot_decompose_p (src)) + { + tmp = gen_reg_rtx (orig_mode); + emit_move_insn (tmp, src); + src = tmp; + } + + if (sdp) + { + tmp = SUBREG_REG (dst); + orig_mode = GET_MODE (tmp); + SUBREG_REG (dst) = gen_reg_rtx (orig_mode); + emit_move_insn (dst, src); + src = SUBREG_REG (dst); + dst = tmp; + } + if (!rdp && cannot_decompose_p (dst)) + { + dst = gen_reg_rtx (orig_mode); + SET_SRC (set) = dst; + delete = false; + } + + words = (GET_MODE_SIZE (orig_mode) + UNITS_PER_WORD - 1) / UNITS_PER_WORD; + + if (push_operand (dst, orig_mode)) + { + unsigned int j, jinc; + + gcc_assert (GET_MODE_SIZE (orig_mode) % UNITS_PER_WORD == 0); + gcc_assert (GET_CODE (XEXP (dst, 0)) != PRE_MODIFY); + gcc_assert (GET_CODE (XEXP (dst, 0)) != POST_MODIFY); + + if (WORDS_BIG_ENDIAN == STACK_GROWS_DOWNWARD) + j = 0, jinc = 1; + else + j = words - 1, jinc = -1; + + for (i = 0; i < words; i++, j += jinc) + { + tmp = copy_rtx (XEXP (dst, 0)); + tmp = adjust_automodify_address_nv (dst, word_mode, tmp, + j * UNITS_PER_WORD); + emit_move_insn (tmp, simplify_subreg (word_mode, src, orig_mode, + j * UNITS_PER_WORD)); + } + } + else + { + gcc_assert (!MEM_P (dst) + || GET_RTX_CLASS (GET_CODE (XEXP (dst, 0))) != RTX_AUTOINC); + gcc_assert (!MEM_P (src) + || GET_RTX_CLASS (GET_CODE (XEXP (src, 0))) != RTX_AUTOINC); + + if (REG_P (dst) && !HARD_REGISTER_NUM_P (REGNO (dst))) + emit_insn (gen_rtx_CLOBBER (VOIDmode, dst)); + + for (i = 0; i < words; ++i) + emit_move_insn (simplify_gen_subreg (word_mode, dst, orig_mode, + UNITS_PER_WORD * i), + simplify_gen_subreg (word_mode, src, orig_mode, + UNITS_PER_WORD * i)); + } + + tmp = get_insns (); + end_sequence (); + + emit_insn_before (tmp, insn); + if (delete) + { + move_libcall_note (insn, tmp); + remove_retval_note (insn); + delete_insn (insn); + } + + return tmp; +} + +static void +resolve_clobber (rtx pat, rtx insn) +{ + rtx reg = XEXP (pat, 0); + unsigned int words, i; + enum machine_mode orig_mode; + + if (!resolve_reg_p (reg)) + return; + + orig_mode = GET_MODE (reg); + words = GET_MODE_SIZE (orig_mode); + words = (words + UNITS_PER_WORD - 1) / UNITS_PER_WORD; + + XEXP (pat, 0) = simplify_subreg (word_mode, reg, orig_mode, 0); + for (i = words - 1; i > 0; --i) + { + pat = simplify_subreg (word_mode, reg, orig_mode, i * UNITS_PER_WORD); + pat = gen_rtx_CLOBBER (VOIDmode, pat); + emit_insn_after (pat, insn); + } +} + +static void +resolve_use (rtx pat, rtx insn) +{ + if (resolve_subreg_p (XEXP (pat, 0))) + delete_insn (insn); +} + +/* */ + +void +decompose_multiword_subregs (void) +{ + rtx insn, set; + + decomposable_context = BITMAP_ALLOC (NULL); + non_decomposable_context = BITMAP_ALLOC (NULL); + + { + unsigned int max = max_reg_num (); + reg_copy_graph = VEC_alloc (bitmap, heap, max); + VEC_safe_grow (bitmap, heap, reg_copy_graph, max); + memset (VEC_address (bitmap, reg_copy_graph), 0, sizeof (bitmap) * max); + } + + for (insn = get_insns (); insn ; insn = NEXT_INSN (insn)) + if (INSN_P (insn)) + { + if (GET_CODE (PATTERN (insn)) == CLOBBER + || GET_CODE (PATTERN (insn)) == USE) + continue; + + set = simple_move (insn); + if (set) + { + /* (set cc0 reg) is a comparison instruction and cannot be + decomposed. Clear SET so that we recognize this fact when + we see it in find_decomposable_subregs. */ + if (CC0_P (SET_DEST (set))) + set = NULL; + else + find_pseudo_copy (set); + } + for_each_rtx (&PATTERN (insn), find_decomposable_subregs, set); + } + + bitmap_and_compl_into (decomposable_context, non_decomposable_context); + if (!bitmap_empty_p (decomposable_context)) + { + bitmap_iterator iter; + unsigned int regno; + + propagate_pseudo_copies (); + + EXECUTE_IF_SET_IN_BITMAP (decomposable_context, 0, regno, iter) + decompose_register (regno); + + for (insn = get_insns (); insn ; insn = NEXT_INSN (insn)) + { + rtx pat; + + if (!INSN_P (insn)) + continue; + + pat = PATTERN (insn); + if (GET_CODE (pat) == CLOBBER) + resolve_clobber (pat, insn); + else if (GET_CODE (pat) == USE) + resolve_use (pat, insn); + else + { + set = simple_move (insn); + if (set) + insn = resolve_simple_move (set, insn); + for_each_rtx (&PATTERN (insn), resolve_subreg_use, NULL); + resolve_reg_notes (insn); + } + } + } + + BITMAP_FREE (decomposable_context); + BITMAP_FREE (non_decomposable_context); + + { + unsigned int i; + bitmap b; + for (i = 0; VEC_iterate (bitmap, reg_copy_graph, i, b); ++i) + if (b) + BITMAP_FREE (b); + } + VEC_free (bitmap, heap, reg_copy_graph); +} + + + +static bool +gate_lower_subreg (void) +{ + return flag_lower_subreg; +} + +/* Run yet another register allocator. */ +static unsigned int +rest_of_handle_lower_subreg (void) +{ + decompose_multiword_subregs (); + return 0; +} + +struct tree_opt_pass pass_lower_subreg = +{ + "lower_subreg", /* name */ + gate_lower_subreg, /* gate */ + rest_of_handle_lower_subreg, /* execute */ + NULL, /* sub */ + NULL, /* next */ + 0, /* static_pass_number */ + TV_LOWER_SUBREG, /* tv_id */ + 0, /* properties_required */ + 0, /* properties_provided */ + 0, /* properties_destroyed */ + 0, /* todo_flags_start */ + TODO_dump_func | + TODO_ggc_collect, /* todo_flags_finish */ + 'Y' /* letter */ +}; diff --git a/gcc/opts.c b/gcc/opts.c index 00b9716a607..782e21ff4ce 100644 --- a/gcc/opts.c +++ b/gcc/opts.c @@ -564,6 +564,8 @@ decode_options (unsigned int argc, const char **argv) flag_gcse = 1; flag_expensive_optimizations = 1; flag_ipa_type_escape = 1; + flag_ipra = 1; + flag_lower_subreg = 1; flag_rerun_cse_after_loop = 1; flag_caller_saves = 1; flag_peephole2 = 1; diff --git a/gcc/passes.c b/gcc/passes.c index 1a321ad19c8..f91bc074910 100644 --- a/gcc/passes.c +++ b/gcc/passes.c @@ -632,6 +632,7 @@ init_optimization_passes (void) NEXT_PASS (pass_unshare_all_rtl); NEXT_PASS (pass_instantiate_virtual_regs); NEXT_PASS (pass_jump2); + NEXT_PASS (pass_lower_subreg); NEXT_PASS (pass_cse); NEXT_PASS (pass_gcse); NEXT_PASS (pass_jump_bypass); diff --git a/gcc/postreload-gcse.c b/gcc/postreload-gcse.c index 76b7b8c9ae0..dcdd8d2d6b0 100644 --- a/gcc/postreload-gcse.c +++ b/gcc/postreload-gcse.c @@ -718,11 +718,14 @@ record_opr_changes (rtx insn) if (CALL_P (insn)) { unsigned int regno; + HARD_REG_SET clobbered_regs; + + get_call_invalidated_used_regs (insn, &clobbered_regs, true); for (regno = 0; regno < FIRST_PSEUDO_REGISTER; regno++) - if (TEST_HARD_REG_BIT (regs_invalidated_by_call, regno)) + if (TEST_HARD_REG_BIT (clobbered_regs, regno)) record_last_reg_set_info (insn, regno); - + if (! CONST_OR_PURE_CALL_P (insn)) record_last_mem_set_info (insn); } @@ -879,9 +882,15 @@ reg_set_between_after_reload_p (rtx reg, rtx from_insn, rtx to_insn) { if (set_of (reg, insn) != NULL_RTX) return insn; - if ((CALL_P (insn) - && call_used_regs[REGNO (reg)]) - || find_reg_fusage (insn, CLOBBER, reg)) + if (CALL_P (insn)) + { + HARD_REG_SET used_regs; + + get_call_invalidated_used_regs (insn, &used_regs, false); + if (TEST_HARD_REG_BIT (used_regs, REGNO (reg))) + return insn; + } + if (find_reg_fusage (insn, CLOBBER, reg)) return insn; if (FIND_REG_INC_NOTE (insn, reg)) @@ -911,13 +920,20 @@ reg_used_between_after_reload_p (rtx reg, rtx from_insn, rtx to_insn) insn = NEXT_INSN (insn)) if (INSN_P (insn)) { - if (reg_overlap_mentioned_p (reg, PATTERN (insn)) - || (CALL_P (insn) - && call_used_regs[REGNO (reg)]) - || find_reg_fusage (insn, USE, reg) + if (reg_overlap_mentioned_p (reg, PATTERN (insn))) + return insn; + if (CALL_P (insn)) + { + HARD_REG_SET used_regs; + + get_call_invalidated_used_regs (insn, &used_regs, false); + if (TEST_HARD_REG_BIT (used_regs, REGNO (reg))) + return insn; + } + if (find_reg_fusage (insn, USE, reg) || find_reg_fusage (insn, CLOBBER, reg)) return insn; - + if (FIND_REG_INC_NOTE (insn, reg)) return insn; } diff --git a/gcc/postreload.c b/gcc/postreload.c index 5f4ae4f5b18..b5efbbbf282 100644 --- a/gcc/postreload.c +++ b/gcc/postreload.c @@ -917,9 +917,11 @@ reload_combine (void) if (CALL_P (insn)) { rtx link; + HARD_REG_SET used_regs; + get_call_invalidated_used_regs (insn, &used_regs, false); for (r = 0; r < FIRST_PSEUDO_REGISTER; r++) - if (call_used_regs[r]) + if (TEST_HARD_REG_BIT (used_regs, r)) { reg_state[r].use_index = RELOAD_COMBINE_MAX_USES; reg_state[r].store_ruid = reload_combine_ruid; @@ -1411,9 +1413,12 @@ reload_cse_move2add (rtx first) unknown values. */ if (CALL_P (insn)) { + HARD_REG_SET used_regs; + + get_call_invalidated_used_regs (insn, &used_regs, false); for (i = FIRST_PSEUDO_REGISTER - 1; i >= 0; i--) { - if (call_used_regs[i]) + if (TEST_HARD_REG_BIT (used_regs, i)) /* Reset the information about this register. */ reg_set_luid[i] = 0; } diff --git a/gcc/recog.c b/gcc/recog.c index 82cacfed50d..19644e5500f 100644 --- a/gcc/recog.c +++ b/gcc/recog.c @@ -2971,7 +2971,10 @@ peep2_find_free_register (int from, int to, const char *class_str, if (success) { for (j = hard_regno_nregs[regno][mode] - 1; j >= 0; j--) - SET_HARD_REG_BIT (*reg_set, regno + j); + { + regs_ever_live [regno + j] = 1; + SET_HARD_REG_BIT (*reg_set, regno + j); + } /* Start the next search with the next register. */ if (++raw_regno >= FIRST_PSEUDO_REGISTER) diff --git a/gcc/regrename.c b/gcc/regrename.c index 49e18c4a7c2..b225fda16e0 100644 --- a/gcc/regrename.c +++ b/gcc/regrename.c @@ -1767,9 +1767,14 @@ copyprop_hardreg_forward_1 (basic_block bb, struct value_data *vd) did_replacement: /* Clobber call-clobbered registers. */ if (CALL_P (insn)) - for (i = 0; i < FIRST_PSEUDO_REGISTER; i++) - if (TEST_HARD_REG_BIT (regs_invalidated_by_call, i)) - kill_value_regno (i, 1, vd); + { + HARD_REG_SET clobbered_regs; + + get_call_invalidated_used_regs (insn, &clobbered_regs, true); + for (i = 0; i < FIRST_PSEUDO_REGISTER; i++) + if (TEST_HARD_REG_BIT (clobbered_regs, i)) + kill_value_regno (i, 1, vd); + } /* Notice stores. */ note_stores (PATTERN (insn), kill_set_value, vd); diff --git a/gcc/resource.c b/gcc/resource.c index cd4eb10628c..215ae5a49e5 100644 --- a/gcc/resource.c +++ b/gcc/resource.c @@ -662,10 +662,12 @@ mark_set_resources (rtx x, struct resources *res, int in_dest, if (mark_type == MARK_SRC_DEST_CALL) { rtx link; + HARD_REG_SET used_regs; + get_call_invalidated_used_regs (x, &used_regs, false); res->cc = res->memory = 1; for (r = 0; r < FIRST_PSEUDO_REGISTER; r++) - if (call_used_regs[r] || global_regs[r]) + if (TEST_HARD_REG_BIT (used_regs, r) || global_regs[r]) SET_HARD_REG_BIT (res->regs, r); for (link = CALL_INSN_FUNCTION_USAGE (x); @@ -1024,11 +1026,13 @@ mark_target_live_regs (rtx insns, rtx target, struct resources *res) if (CALL_P (real_insn)) { + HARD_REG_SET clobbered_regs; + + get_call_invalidated_used_regs (insn, &clobbered_regs, true); /* CALL clobbers all call-used regs that aren't fixed except sp, ap, and fp. Do this before setting the result of the call live. */ - AND_COMPL_HARD_REG_SET (current_live_regs, - regs_invalidated_by_call); + AND_COMPL_HARD_REG_SET (current_live_regs, clobbered_regs); /* A CALL_INSN sets any global register live, since it may have been modified by the call. */ diff --git a/gcc/rtl.def b/gcc/rtl.def index 078f4af67ac..201b4ab0e3d 100644 --- a/gcc/rtl.def +++ b/gcc/rtl.def @@ -383,10 +383,10 @@ DEF_RTL_EXPR(SUBREG, "subreg", "ei", RTX_EXTRA) DEF_RTL_EXPR(STRICT_LOW_PART, "strict_low_part", "e", RTX_EXTRA) -/* (CONCAT a b) represents the virtual concatenation of a and b - to make a value that has as many bits as a and b put together. - This is used for complex values. Normally it appears only - in DECL_RTLs and during RTL generation, but not in the insn chain. */ +/* (CONCAT a b) represents the virtual concatenation of a and b to make a + value that has as many bits as a and b put together. This is used for, + among other things, complex values. Normally it appears only in DECL_RTLs + and during RTL generation, but not in the insn chain. */ DEF_RTL_EXPR(CONCAT, "concat", "ee", RTX_OBJ) /* A memory location; operand is the address. The second operand is the @@ -412,6 +412,13 @@ DEF_RTL_EXPR(SYMBOL_REF, "symbol_ref", "s00", RTX_CONST_OBJ) pretend to be looking at the entire value and comparing it. */ DEF_RTL_EXPR(CC0, "cc0", "", RTX_OBJ) +/* (CONCATN [a1 a2 .. an]) represents the virtual concatenation of all + An to make a value. This is an extension of the CONCAT to larger + numbers of components. This is used for decomposing large values + into register sized components. Like CONCAT, it should not appear + in the insn chain. */ +DEF_RTL_EXPR (CONCATN, "concatn", "E", RTX_OBJ) + /* ---------------------------------------------------------------------- Expressions for operators in an rtl pattern ---------------------------------------------------------------------- */ diff --git a/gcc/rtl.h b/gcc/rtl.h index a29c73a026d..75b5eabd23e 100644 --- a/gcc/rtl.h +++ b/gcc/rtl.h @@ -1477,6 +1477,7 @@ extern int rtx_equal_p (rtx, rtx); extern rtvec gen_rtvec_v (int, rtx *); extern rtx gen_reg_rtx (enum machine_mode); extern rtx gen_rtx_REG_offset (rtx, enum machine_mode, unsigned int, int); +extern rtx gen_reg_rtx_offset (rtx, enum machine_mode, int); extern rtx gen_label_rtx (void); extern rtx gen_lowpart_common (enum machine_mode, rtx); diff --git a/gcc/rtlanal.c b/gcc/rtlanal.c index 9292a4bb9bb..907af685eb5 100644 --- a/gcc/rtlanal.c +++ b/gcc/rtlanal.c @@ -737,16 +737,24 @@ reg_set_p (rtx reg, rtx insn) { /* We can be passed an insn or part of one. If we are passed an insn, check if a side-effect of the insn clobbers REG. */ - if (INSN_P (insn) - && (FIND_REG_INC_NOTE (insn, reg) - || (CALL_P (insn) - && ((REG_P (reg) - && REGNO (reg) < FIRST_PSEUDO_REGISTER - && TEST_HARD_REG_BIT (regs_invalidated_by_call, - REGNO (reg))) - || MEM_P (reg) - || find_reg_fusage (insn, CLOBBER, reg))))) - return 1; + if (INSN_P (insn)) + { + if (FIND_REG_INC_NOTE (insn, reg)) + return 1; + if (CALL_P (insn)) + { + if (REG_P (reg) && REGNO (reg) < FIRST_PSEUDO_REGISTER) + { + HARD_REG_SET clobbered_regs; + + get_call_invalidated_used_regs (insn, &clobbered_regs, true); + if (TEST_HARD_REG_BIT (clobbered_regs, REGNO (reg))) + return 1; + } + if (MEM_P (reg) || find_reg_fusage (insn, CLOBBER, reg)) + return 1; + } + } return set_of (reg, insn) != NULL_RTX; } diff --git a/gcc/sched-deps.c b/gcc/sched-deps.c index 33ee695a930..c43169a7aeb 100644 --- a/gcc/sched-deps.c +++ b/gcc/sched-deps.c @@ -1512,6 +1512,9 @@ sched_analyze (struct deps *deps, rtx head, rtx tail) } else { + HARD_REG_SET clobbered_regs; + + get_call_invalidated_used_regs (insn, &clobbered_regs, true); for (i = 0; i < FIRST_PSEUDO_REGISTER; i++) /* A call may read and modify global register variables. */ if (global_regs[i]) @@ -1524,7 +1527,7 @@ sched_analyze (struct deps *deps, rtx head, rtx tail) and 'definitely not clobbered', we must include all partly call-clobbered registers here. */ else if (HARD_REGNO_CALL_PART_CLOBBERED (i, reg_raw_mode[i]) - || TEST_HARD_REG_BIT (regs_invalidated_by_call, i)) + || TEST_HARD_REG_BIT (clobbered_regs, i)) SET_REGNO_REG_SET (reg_pending_clobbers, i); /* We don't know what set of fixed registers might be used by the function, but it is certain that the stack pointer diff --git a/gcc/simplify-rtx.c b/gcc/simplify-rtx.c index e00e9ccca10..902347fbeb9 100644 --- a/gcc/simplify-rtx.c +++ b/gcc/simplify-rtx.c @@ -4573,15 +4573,24 @@ simplify_subreg (enum machine_mode outermode, rtx op, && GET_MODE_SIZE (outermode) <= GET_MODE_SIZE (GET_MODE (op))) return adjust_address_nv (op, outermode, byte); - /* Handle complex values represented as CONCAT - of real and imaginary part. */ - if (GET_CODE (op) == CONCAT) + /* Handle values represented as CONCAT. */ + if (GET_CODE (op) == CONCAT || GET_CODE (op) == CONCATN) { unsigned int inner_size, final_offset; rtx part, res; - inner_size = GET_MODE_UNIT_SIZE (innermode); - part = byte < inner_size ? XEXP (op, 0) : XEXP (op, 1); + if (GET_CODE (op) == CONCAT) + { + inner_size = GET_MODE_SIZE (innermode) / 2; + part = byte < inner_size ? XEXP (op, 0) : XEXP (op, 1); + } + else + { + /* ??? We've got room; perhaps we should store the inner size + of the CONCATN in one of the subsequent unused fields. */ + inner_size = GET_MODE_SIZE (innermode) / XVECLEN (op, 0); + part = XVECEXP (op, 0, byte / inner_size); + } final_offset = byte % inner_size; if (final_offset + GET_MODE_SIZE (outermode) > inner_size) return NULL_RTX; diff --git a/gcc/timevar.def b/gcc/timevar.def index dd8276f82a7..a237ee42b66 100644 --- a/gcc/timevar.def +++ b/gcc/timevar.def @@ -125,6 +125,7 @@ DEFTIMEVAR (TV_OVERLOAD , "overload resolution") DEFTIMEVAR (TV_TEMPLATE_INSTANTIATION, "template instantiation") DEFTIMEVAR (TV_EXPAND , "expand") DEFTIMEVAR (TV_VARCONST , "varconst") +DEFTIMEVAR (TV_LOWER_SUBREG , "lower subreg") DEFTIMEVAR (TV_JUMP , "jump") DEFTIMEVAR (TV_CSE , "CSE") DEFTIMEVAR (TV_LOOP , "loop analysis") diff --git a/gcc/toplev.h b/gcc/toplev.h index 51f6694b74b..69fdc8c0627 100644 --- a/gcc/toplev.h +++ b/gcc/toplev.h @@ -122,6 +122,7 @@ extern int flag_crossjumping; extern int flag_if_conversion; extern int flag_if_conversion2; extern int flag_keep_static_consts; +extern int flag_lower_subreg; extern int flag_peel_loops; extern int flag_rerun_cse_after_loop; extern int flag_thread_jumps; @@ -132,6 +133,7 @@ extern int flag_unswitch_loops; extern int flag_cprop_registers; extern int time_report; extern int flag_yara; +extern int flag_ipra; extern int flag_optimistic_coalescing; extern int flag_extended_coalescing; extern int flag_relief; diff --git a/gcc/tree-pass.h b/gcc/tree-pass.h index 22c6050ccd0..a9643c209e9 100644 --- a/gcc/tree-pass.h +++ b/gcc/tree-pass.h @@ -331,6 +331,7 @@ extern struct tree_opt_pass pass_initial_value_sets; extern struct tree_opt_pass pass_unshare_all_rtl; extern struct tree_opt_pass pass_instantiate_virtual_regs; extern struct tree_opt_pass pass_jump2; +extern struct tree_opt_pass pass_lower_subreg; extern struct tree_opt_pass pass_cse; extern struct tree_opt_pass pass_gcse; extern struct tree_opt_pass pass_jump_bypass; diff --git a/gcc/var-tracking.c b/gcc/var-tracking.c index 7e910d6bcba..8d482ff34b3 100644 --- a/gcc/var-tracking.c +++ b/gcc/var-tracking.c @@ -1583,9 +1583,15 @@ compute_bb_dataflow (basic_block bb) switch (VTI (bb)->mos[i].type) { case MO_CALL: - for (r = 0; r < FIRST_PSEUDO_REGISTER; r++) - if (TEST_HARD_REG_BIT (call_used_reg_set, r)) - var_regno_delete (out, r); + { + HARD_REG_SET used_regs; + + get_call_invalidated_used_regs (VTI (bb)->mos[i].insn, + &used_regs, false); + for (r = 0; r < FIRST_PSEUDO_REGISTER; r++) + if (TEST_HARD_REG_BIT (used_regs, r)) + var_regno_delete (out, r); + } break; case MO_USE: @@ -2345,9 +2351,11 @@ emit_notes_in_bb (basic_block bb) case MO_CALL: { int r; + HARD_REG_SET used_regs; + get_call_invalidated_used_regs (insn, &used_regs, false); for (r = 0; r < FIRST_PSEUDO_REGISTER; r++) - if (TEST_HARD_REG_BIT (call_used_reg_set, r)) + if (TEST_HARD_REG_BIT (used_regs, r)) { var_regno_delete (&set, r); } diff --git a/gcc/yara-color.c b/gcc/yara-color.c index af99305e6ec..3093bbfa69d 100644 --- a/gcc/yara-color.c +++ b/gcc/yara-color.c @@ -1868,6 +1868,10 @@ static can_t *saved_conflict_cans; static bitmap conflict_can_bitmap; static bitmap biased_can_bitmap; +/* Array whose element value is true if the corresponding hard + register already allocated for a can. */ +static bool allocated_reg_p [FIRST_PSEUDO_REGISTER]; + /* Function choosing a hard register for CAN. */ static bool choose_global_hard_reg (can_t can) @@ -1903,6 +1907,34 @@ choose_global_hard_reg (can_t can) best_hard_regno = -1; GO_IF_HARD_REG_SUBSET (reg_class_contents [cover_class], conflicting_regs, fail); + if (flag_ipra && call_p) + { + int freq; + allocno_t a, *can_allocnos; + HARD_REG_SET clobbered_regs; + + can_allocnos = CAN_ALLOCNOS (can); + for (i = 0; (a = can_allocnos [i]) != NULL; i++) + if (ALLOCNO_CALL_CROSS_P (a)) + { + freq = ALLOCNO_CALL_FREQ (a); + COPY_HARD_REG_SET (clobbered_regs, + ALLOCNO_CLOBBERED_REGS (a)); + for (j = (int) class_hard_regs_num [cover_class] - 1; + j >= 0; + j--) + { + hard_regno = class_hard_regs [cover_class] [j]; + if (TEST_HARD_REG_BIT (clobbered_regs, hard_regno)) + { + class = REGNO_REG_CLASS (hard_regno); + costs [j] + += freq * (memory_move_cost [mode] [class] [0] + + memory_move_cost [mode] [class] [1]); + } + } + } + } min_cost = INT_MAX; for (i = 0; i < class_size; i++) { @@ -1910,7 +1942,7 @@ choose_global_hard_reg (can_t can) if (hard_reg_not_in_set_p (hard_regno, mode, conflicting_regs)) { cost = costs [i]; - if (call_p + if (! flag_ipra && call_p && ! hard_reg_not_in_set_p (hard_regno, mode, call_used_reg_set)) { /* ??? If only part is call clobbered. */ @@ -1919,6 +1951,16 @@ choose_global_hard_reg (can_t can) * (memory_move_cost [mode] [class] [0] + memory_move_cost [mode] [class] [1])); } + if (! allocated_reg_p [hard_regno] + && hard_reg_not_in_set_p (hard_regno, mode, call_used_reg_set)) + /* We need to save/restore the register in + epilogue/prologue. Therefore we increase the cost. */ + { + /* ??? If only part is call clobbered. */ + class = REGNO_REG_CLASS (hard_regno); + cost += (memory_move_cost [mode] [class] [0] + + memory_move_cost [mode] [class] [1]); + } if (min_cost > cost) { min_cost = cost; @@ -1961,6 +2003,7 @@ choose_global_hard_reg (can_t can) [best_hard_regno] [mode]); } } + allocated_reg_p [best_hard_regno] = true; } return best_hard_regno >= 0; } @@ -2214,7 +2257,8 @@ push_globals_to_stack (void) } delete_can_from_bucket (can, bucket_ptr); if (yara_dump_file != NULL) - fprintf (yara_dump_file, "Pushing %d (potential spill)\n", CAN_NUM (can)); + fprintf (yara_dump_file, "Pushing %d (potential spill)\n", + CAN_NUM (can)); } CAN_IN_GRAPH_P (can) = false; VARRAY_PUSH_GENERIC_PTR (global_stack_varray, can); @@ -2265,6 +2309,7 @@ pop_globals_from_stack (void) int stack_size; enum reg_class cover_class; + memset (allocated_reg_p, 0, sizeof (allocated_reg_p)); for (;;) { stack_size = VARRAY_ACTIVE_SIZE (global_stack_varray); @@ -2480,7 +2525,9 @@ assign_global_can_allocnos (void) if (hard_regno < 0 || (ALLOCNO_CALL_CROSS_P (a) && ! hard_reg_not_in_set_p (hard_regno, CAN_MODE (can), - call_used_reg_set))) + flag_ipra + ? ALLOCNO_CLOBBERED_REGS (a) + : call_used_reg_set))) { equiv_const = (ALLOCNO_REGNO (a) >= 0 ? reg_equiv_constant [ALLOCNO_REGNO (a)] diff --git a/gcc/yara-int.h b/gcc/yara-int.h index 4b0bfbbb4c0..ff297bdf7e1 100644 --- a/gcc/yara-int.h +++ b/gcc/yara-int.h @@ -342,6 +342,11 @@ struct allocno_common /* Frequency of calls which given allocno intersects. */ int call_freq; + /* The following member is defined if the allocno intersects a call. + Hard registers which can not used for given allocno because they + might be clobbered by calls inside of hard allocno live + range. */ + HARD_REG_SET clobbered_regs; /* Allocno attributes should be logged. */ struct allocno_change change; @@ -484,6 +489,7 @@ union allocno_node #define ALLOCNO_HARD_REG_CONFLICTS(A) ((A)->common.hard_reg_conflicts) #define ALLOCNO_CALL_CROSS_P(A) ((A)->common.call_cross_p) #define ALLOCNO_CALL_FREQ(A) ((A)->common.call_freq) +#define ALLOCNO_CLOBBERED_REGS(A) ((A)->common.clobbered_regs) #define ALLOCNO_CHANGE(A) ((A)->common.change) #define ALLOCNO_HARD_REGNO(A) ((A)->common.change.hard_regno) #define ALLOCNO_HARD_REGSET(A) ((A)->common.change.hard_regset) @@ -728,6 +734,7 @@ struct can int slotno; /* Frequency of calls which given can intersects. */ int call_freq; + /* True if an allocno of the can lives through a call. */ bool call_p; /* True value means than the can was not removed from the @@ -738,8 +745,6 @@ struct can /* True value means that the can is global one, in other words it lives in more one BB. */ bool global_p; - /* True if we already spilled the can during local allocation. */ - bool spill_p; /* True if hard register or memory has been assigned to the can. */ bool assigned_p; /* Mode of allocnos belonging to the can. */ @@ -774,7 +779,6 @@ struct can #define CAN_CALL_P(C) ((C)->call_p) #define CAN_IN_GRAPH_P(C) ((C)->in_graph_p) #define CAN_GLOBAL_P(C) ((C)->global_p) -#define CAN_SPILL_P(C) ((C)->spill_p) #define CAN_ASSIGNED_P(C) ((C)->assigned_p) #define CAN_MODE(C) ((C)->mode) #define CAN_COPIES(C) ((C)->can_copies) diff --git a/gcc/yara-ir.c b/gcc/yara-ir.c index 94db2fb348c..b79f29f5501 100644 --- a/gcc/yara-ir.c +++ b/gcc/yara-ir.c @@ -47,6 +47,8 @@ Software Foundation, 51 Franklin Street, Fifth Floor, Boston, MA #include "params.h" #include "langhooks.h" #include "yara-int.h" +#include "cgraph.h" +#include "function.h" struct loops yara_loops; struct yara_loop_tree_node *yara_loop_tree_root; @@ -259,25 +261,12 @@ int reg_class_nregs [N_REG_CLASSES] [MAX_MACHINE_MODE]; static void setup_reg_class_nregs (void) { - int i, m, old, hard_regno; + int m; enum reg_class cl; - memset (reg_class_nregs, 0, sizeof (reg_class_nregs)); for (cl = 0; cl < N_REG_CLASSES; cl++) for (m = 0; m < MAX_MACHINE_MODE; m++) - { - for (i = 0; i < (int) class_hard_regs_num [cl]; i++) - { - hard_regno = class_hard_regs [cl] [i]; - old = reg_class_nregs [cl] [m]; - reg_class_nregs [cl] [m] - = HARD_REGNO_NREGS (hard_regno, (enum machine_mode) m); - if (old != 0 && old != reg_class_nregs [cl] [m]) - break; - } - if (i < (int) class_hard_regs_num [cl]) - reg_class_nregs [cl] [m] = -1; - } + reg_class_nregs [cl] [m] = CLASS_MAX_NREGS (cl, m); } /* ??? implement better class SImode instead of HImode for QImode. */ @@ -780,12 +769,12 @@ contains_eliminable_reg (rtx x) fmt = GET_RTX_FORMAT (code); for (i = GET_RTX_LENGTH (code) - 1; i >= 0; i--) { - if (fmt[i] == 'e') + if (fmt [i] == 'e') { if (contains_eliminable_reg (XEXP (x, i))) return true; } - else if (fmt[i] == 'E') + else if (fmt [i] == 'E') { int j; @@ -1450,6 +1439,7 @@ create_allocno (enum allocno_type type, int regno, enum machine_mode mode) #endif ALLOCNO_CALL_CROSS_P (a) = false; ALLOCNO_CALL_FREQ (a) = 0; + CLEAR_HARD_REG_SET (ALLOCNO_CLOBBERED_REGS (a)); return a; } @@ -1596,6 +1586,11 @@ print_allocno (FILE *f, allocno_t a) fprintf (f, ", + %d", ALLOCNO_MEMORY_SLOT_OFFSET (a)); if (ALLOCNO_USE_EQUIV_CONST_P (a)) fprintf (f, ",\nequiv const "); + if (ALLOCNO_CALL_CROSS_P (a)) + { + fprintf (f, "\n Call clobbered regs:"); + print_hard_reg_set (f, ALLOCNO_CLOBBERED_REGS (a)); + } fprintf (f, "\n Hard reg conflicts:"); print_hard_reg_set (f, ALLOCNO_HARD_REG_CONFLICTS (a)); fprintf (f, "\n Allocno conflicts:"); @@ -3113,6 +3108,10 @@ process_non_operand_hard_regs (rtx *loc, bool output_p) } } +/* Unsaved registers invalidated by function whose call is currently + being processed. */ +static HARD_REG_SET curr_call_used_function_regs; + /* The function sets up call_p and increment call_freq for allocno LIVE_A living through call insn given by DATA. The function is called by generic traverse function process_live_allocnos. */ @@ -3124,6 +3123,8 @@ set_call_info (allocno_t live_a, void *data, ALLOCNO_CALL_CROSS_P (live_a) = true; ALLOCNO_CALL_FREQ (live_a) += BLOCK_FOR_INSN (insn)->frequency; + IOR_HARD_REG_SET (ALLOCNO_CLOBBERED_REGS (live_a), + curr_call_used_function_regs); } /* The function sets up hard registers conflicting with allocno @@ -3320,7 +3321,13 @@ build_insn_allocno_conflicts (rtx insn, op_set_t single_reg_op_set) mark_hard_reg_death (REGNO (XEXP (link, 0)), GET_MODE (XEXP (link, 0))); if (CALL_P (insn)) - process_live_allocnos (set_call_info, insn); + { + get_call_invalidated_used_regs (insn, &curr_call_used_function_regs, + false); + IOR_HARD_REG_SET (cfun->emit->call_used_regs, + curr_call_used_function_regs); + process_live_allocnos (set_call_info, insn); + } /* Set up allocnos: */ for (a = curr_insn_allocnos; a != NULL; a = INSN_ALLOCNO_NEXT (a)) @@ -4837,7 +4844,6 @@ create_can (void) CAN_CALL_FREQ (can) = 0; CAN_CALL_P (can) = false; CAN_GLOBAL_P (can) = false || (YARA_PARAMS & YARA_NO_LOCAL_CAN) == 0; - CAN_SPILL_P (can) = false; CAN_MODE (can) = VOIDmode; CAN_HARD_REG_COSTS (can) = NULL; CAN_COPIES (can) = NULL; @@ -5712,14 +5718,14 @@ print_can (FILE *f, can_t can) allocno_t a, *can_allocnos; fprintf - (f, "%scan#%d (%s %s(%d,%d) freq %d (call %d) r%d m%d %s%s%sconfl %d) allocnos:\n ", + (f, "%scan#%d (%s %s(%d,%d) freq %d (call %d) r%d m%d %s%sconfl %d) allocnos:\n ", (CAN_GLOBAL_P (can) ? "g" : ""), CAN_NUM (can), GET_MODE_NAME (CAN_MODE (can)), reg_class_names [CAN_COVER_CLASS (can)], CAN_COVER_CLASS_COST (can), CAN_MEMORY_COST (can), CAN_FREQ (can), CAN_CALL_FREQ (can), CAN_HARD_REGNO (can), CAN_SLOTNO (can), (CAN_CALL_P (can) ? "call " : ""), (CAN_IN_GRAPH_P (can) ? "in " : ""), - (CAN_SPILL_P (can) ? "spill " : ""), CAN_LEFT_CONFLICTS_NUM (can)); + CAN_LEFT_CONFLICTS_NUM (can)); can_allocnos = CAN_ALLOCNOS (can); for (i = 0; (a = can_allocnos [i]) != NULL; i++) { diff --git a/gcc/yara-trans.c b/gcc/yara-trans.c index 33bb658e7bf..f3c3a21dadd 100644 --- a/gcc/yara-trans.c +++ b/gcc/yara-trans.c @@ -2132,7 +2132,10 @@ check_hard_regno_for_a (allocno_t a, int hard_regno, return false; if (ALLOCNO_CALL_CROSS_P (a)) { - COPY_HARD_REG_SET (prohibited_hard_regs, call_used_reg_set); + if (flag_ipra) + COPY_HARD_REG_SET (prohibited_hard_regs, ALLOCNO_CLOBBERED_REGS (a)); + else + COPY_HARD_REG_SET (prohibited_hard_regs, call_used_reg_set); IOR_HARD_REG_SET (prohibited_hard_regs, ALLOCNO_HARD_REG_CONFLICTS (a)); } else @@ -2520,7 +2523,12 @@ assign_one_allocno (allocno_t a, enum reg_class cl, HARD_REG_SET possible_regs) ok: #endif if (ALLOCNO_CALL_CROSS_P (a)) - COPY_HARD_REG_SET (prohibited_hard_regs, call_used_reg_set); + { + if (flag_ipra) + COPY_HARD_REG_SET (prohibited_hard_regs, ALLOCNO_CLOBBERED_REGS (a)); + else + COPY_HARD_REG_SET (prohibited_hard_regs, call_used_reg_set); + } else CLEAR_HARD_REG_SET (prohibited_hard_regs); IOR_COMPL_HARD_REG_SET (prohibited_hard_regs, possible_regs); diff --git a/gcc/yara.c b/gcc/yara.c index 577a332dce1..81a31fc3d37 100644 --- a/gcc/yara.c +++ b/gcc/yara.c @@ -45,6 +45,8 @@ Software Foundation, 51 Franklin Street, Fifth Floor, Boston, MA #include "output.h" #include "integrate.h" #include "yara-int.h" +#include "cgraph.h" +#include "function.h" FILE *yara_dump_file; int yara_max_uid; /* before the allocation */ @@ -419,6 +421,7 @@ yara (FILE *f) yara_dump_file = f; gcc_obstack_init (&yara_obstack); bitmap_obstack_initialize (&yara_bitmap_obstack); + CLEAR_HARD_REG_SET (cfun->emit->call_used_regs); yara_ir_init (); yara_trans_init (); yara_insn_init (); |