aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorVladimir Makarov <vmakarov@redhat.com>2006-06-14 20:26:03 +0000
committerVladimir Makarov <vmakarov@redhat.com>2006-06-14 20:26:03 +0000
commitbcead9750c364a70b5457e411556878a27a41df0 (patch)
treeb35147fd9e315ef89619e443d836b4545586c150
parent1e32de2ff975b091c9db0a3d0d39db33f7c968ea (diff)
2006-06-14 Vladimir Makarov <vmakarov@redhat.com>
* config/rs6000/rs6000.h (YARA_COVER_CLASSES): New macro. * config/i386/i386.h (TARGET_WOODCREST, TARGET_CPU_DEFAULT_woodcrest): New macros. (TARGET_CPU_CPP_BUILTINS): Add __tune_woodcrest__, __woodcrest, and __woodcrest__. (TARGET_CPU_DEFAULT_generic): Change the value. (TARGET_CPU_DEFAULT_NAMES): Add woodcrest. (CONDITIONAL_REGISTER_USAGE): Make mmx register fixed if there is no explicit -mmmx. (processor_type): Add PROCESSOR_WOODCREST. * config/i386/i386.c (pentium4_cost): Fix cost of moving SSE register. (woodcrest_cost): New structure. (m_WOODCREST): New macro. (x86_movx, x86_unroll_strlen, x86_cmove, x86_deep_branch, x86_use_sahf, x86_use_simode_fiop, x86_sub_esp_4, x86_sub_esp_8, x86_add_esp_8, x86_integer_DFmode_moves, x86_accumulate_outgoing_args, x86_prologue_using_move, x86_epilogue_using_move, x86_arch_always_fancy_math_387, x86_sse_partial_reg_dependency, x86_sse_load0_by_pxor, x86_rep_movl_optimal, x86_ext_80387_constants, x86_four_jump_limit): Add m_WOODCREST. (processor_target_table, processor_alias_table): Add entries for woodcrest. * Makefile.in (yara-final.o, yara-ir.o): Add dependence on output.h. * lower-subreg.c (rest_of_handle_lower_subreg): Call reg_scan. * common.opt (frematerialize): Switch off it by default. * function.c (get_call_invalidated_used_regs): Check that decl is a function. * reload1.c (reload): Set up cfun->emit->call_used_regs. * yara-int.h (reg_equiv_set_p): New external definitions. * yara-insn.c (limit_insn_allocno_class): Mark may be unused parameter. * yara-color.c (temp_hard_reg_set): New global variable. (setup_reg_subclasses): Use the variable. Don't consider fixed registers. (setup_closure_classes): Check moves between all register sub-classes. Prefer bigger cover classes. (set_up_cover_classes): New function. (setup_class_translate): Don't consider fixed registers. (find_reg_class_closure): Add call of set_up_cover_classes. (setup_cover_classes_and_reg_costs): Check that can_memory_cost and can_class_cost is defined. (set_up_can_through, choose_can_to_split, reduce_reg_pressure): Use unsigned when it is necessary. * yara-final.c (unnecessary_copy_p): Ignore copy to restore an equivalent memory. (initiate_locations): Increase size of mem_locs. * yara-trans.c: Add header toplev.h. (allocno_change_mode_ok_p): New function. (assign_copy): Call the function. (allocate_allocno_memory_slot): Set up correct memory slot offset. (compact_stack): Use unsinged where it is necessary. (check_hard_regno_for_a): Check early clobber for identical regnos too. * yara-ir.c: Add header output.h. (reg_equiv_set_p): New external variable. (scan_insn_for_reg_equivs): Switch off code for putting constant into memory. Set up reg_equiv_set_p. (initiate_equivs, finish_equivs): Allocate/free reg_equiv_set_p. (create_conflict): Check early clobber for identical regnos too. (create_insn_allocno): Update reg_equiv_set_p. (build_insn_allocno_conflicts): Removing code for killing hard register allocno. git-svn-id: https://gcc.gnu.org/svn/gcc/branches/yara-branch@114654 138bc75d-0d04-0410-961f-82ee72b054a4
-rw-r--r--gcc/ChangeLog81
-rw-r--r--gcc/Makefile.in4
-rw-r--r--gcc/common.opt2
-rw-r--r--gcc/config/i386/i386.c99
-rw-r--r--gcc/config/i386/i386.h16
-rw-r--r--gcc/config/rs6000/rs6000.h17
-rw-r--r--gcc/function.c2
-rw-r--r--gcc/lower-subreg.c4
-rw-r--r--gcc/reload1.c11
-rw-r--r--gcc/yara-color.c132
-rw-r--r--gcc/yara-final.c10
-rw-r--r--gcc/yara-insn.c3
-rw-r--r--gcc/yara-int.h5
-rw-r--r--gcc/yara-ir.c56
-rw-r--r--gcc/yara-trans.c56
15 files changed, 399 insertions, 99 deletions
diff --git a/gcc/ChangeLog b/gcc/ChangeLog
index c16caee6a31..6e0fca88551 100644
--- a/gcc/ChangeLog
+++ b/gcc/ChangeLog
@@ -1,3 +1,84 @@
+2006-06-14 Vladimir Makarov <vmakarov@redhat.com>
+
+ * config/rs6000/rs6000.h (YARA_COVER_CLASSES): New macro.
+
+ * config/i386/i386.h (TARGET_WOODCREST,
+ TARGET_CPU_DEFAULT_woodcrest): New macros.
+ (TARGET_CPU_CPP_BUILTINS): Add __tune_woodcrest__, __woodcrest,
+ and __woodcrest__.
+ (TARGET_CPU_DEFAULT_generic): Change the value.
+ (TARGET_CPU_DEFAULT_NAMES): Add woodcrest.
+ (CONDITIONAL_REGISTER_USAGE): Make mmx register fixed if there is
+ no explicit -mmmx.
+ (processor_type): Add PROCESSOR_WOODCREST.
+
+ * config/i386/i386.c (pentium4_cost): Fix cost of moving SSE
+ register.
+ (woodcrest_cost): New structure.
+ (m_WOODCREST): New macro.
+ (x86_movx, x86_unroll_strlen, x86_cmove, x86_deep_branch,
+ x86_use_sahf, x86_use_simode_fiop, x86_sub_esp_4, x86_sub_esp_8,
+ x86_add_esp_8, x86_integer_DFmode_moves,
+ x86_accumulate_outgoing_args, x86_prologue_using_move,
+ x86_epilogue_using_move, x86_arch_always_fancy_math_387,
+ x86_sse_partial_reg_dependency, x86_sse_load0_by_pxor,
+ x86_rep_movl_optimal, x86_ext_80387_constants,
+ x86_four_jump_limit): Add m_WOODCREST.
+ (processor_target_table, processor_alias_table): Add entries for
+ woodcrest.
+
+ * Makefile.in (yara-final.o, yara-ir.o): Add dependence on
+ output.h.
+
+ * lower-subreg.c (rest_of_handle_lower_subreg): Call reg_scan.
+
+ * common.opt (frematerialize): Switch off it by default.
+
+ * function.c (get_call_invalidated_used_regs): Check that decl is
+ a function.
+
+ * reload1.c (reload): Set up cfun->emit->call_used_regs.
+
+ * yara-int.h (reg_equiv_set_p): New external definitions.
+
+ * yara-insn.c (limit_insn_allocno_class): Mark may be unused
+ parameter.
+
+ * yara-color.c (temp_hard_reg_set): New global variable.
+ (setup_reg_subclasses): Use the variable. Don't consider fixed
+ registers.
+ (setup_closure_classes): Check moves between all register
+ sub-classes. Prefer bigger cover classes.
+ (set_up_cover_classes): New function.
+ (setup_class_translate): Don't consider fixed registers.
+ (find_reg_class_closure): Add call of set_up_cover_classes.
+ (setup_cover_classes_and_reg_costs): Check that can_memory_cost
+ and can_class_cost is defined.
+ (set_up_can_through, choose_can_to_split, reduce_reg_pressure):
+ Use unsigned when it is necessary.
+
+ * yara-final.c (unnecessary_copy_p): Ignore copy to restore an
+ equivalent memory.
+ (initiate_locations): Increase size of mem_locs.
+
+ * yara-trans.c: Add header toplev.h.
+ (allocno_change_mode_ok_p): New function.
+ (assign_copy): Call the function.
+ (allocate_allocno_memory_slot): Set up correct memory slot offset.
+ (compact_stack): Use unsinged where it is necessary.
+ (check_hard_regno_for_a): Check early clobber for identical regnos
+ too.
+
+ * yara-ir.c: Add header output.h.
+ (reg_equiv_set_p): New external variable.
+ (scan_insn_for_reg_equivs): Switch off code for putting constant
+ into memory. Set up reg_equiv_set_p.
+ (initiate_equivs, finish_equivs): Allocate/free reg_equiv_set_p.
+ (create_conflict): Check early clobber for identical regnos too.
+ (create_insn_allocno): Update reg_equiv_set_p.
+ (build_insn_allocno_conflicts): Removing code for killing hard
+ register allocno.
+
2006-06-01 Andrew Pinski <pinskia@physics.uc.edu>
* Makefile.in (lower-subreg.o): Add TM_H dependency.
diff --git a/gcc/Makefile.in b/gcc/Makefile.in
index 47d8423d42f..a4925e4287b 100644
--- a/gcc/Makefile.in
+++ b/gcc/Makefile.in
@@ -2514,7 +2514,7 @@ yara-ir.o: yara-ir.c $(CONFIG_H) $(SYSTEM_H) coretypes.h \
$(OPTABS_H) $(RECOG_H) $(REGS_H) hard-reg-set.h $(FLAGS_H) $(HASHTAB_H) \
errors.h $(EXPR_H) $(BASIC_BLOCK_H) toplev.h $(TM_P_H) langhooks.h \
$(CFGLOOP_H) yara.h yara-int.h $(FUNCTION_H) $(CGRAPH_H) $(GGC_H) \
- gt-yara-ir.h
+ output.h gt-yara-ir.h
yara-trans.o: yara-trans.c $(CONFIG_H) $(SYSTEM_H) coretypes.h \
$(TARGET_H) $(RTL_H) insn-codes.h insn-config.h $(OPTABS_H) $(RECOG_H) \
$(REGS_H) hard-reg-set.h $(FLAGS_H) $(HASHTAB_H) errors.h \
@@ -2533,7 +2533,7 @@ yara-insn.o: yara-insn.c $(CONFIG_H) $(SYSTEM_H) coretypes.h \
yara-final.o: yara-final.c $(CONFIG_H) $(SYSTEM_H) coretypes.h \
$(TARGET_H) $(RTL_H) insn-codes.h $(OPTABS_H) $(RECOG_H) \
$(REGS_H) hard-reg-set.h $(FLAGS_H) errors.h \
- $(EXPR_H) $(BASIC_BLOCK_H) toplev.h $(TM_P_H) \
+ $(EXPR_H) $(BASIC_BLOCK_H) toplev.h output.h $(TM_P_H) \
$(CFGLOOP_H) yara.h yara-int.h
regmove.o : regmove.c $(CONFIG_H) $(SYSTEM_H) coretypes.h $(TM_H) $(RTL_H) \
insn-config.h timevar.h tree-pass.h \
diff --git a/gcc/common.opt b/gcc/common.opt
index d8c8ad2d010..9b89d4733b2 100644
--- a/gcc/common.opt
+++ b/gcc/common.opt
@@ -722,7 +722,7 @@ Common Report Var(flag_relief)
Register pressure relief
frematerialize
-Common Report Var(flag_rematerialize) Init(2)
+Common Report Var(flag_rematerialize)
Perform a register rematerialization
frename-registers
diff --git a/gcc/config/i386/i386.c b/gcc/config/i386/i386.c
index bbf28efa6dd..d8aca59f54c 100644
--- a/gcc/config/i386/i386.c
+++ b/gcc/config/i386/i386.c
@@ -518,7 +518,7 @@ struct processor_costs pentium4_cost = {
in SImode and DImode */
{2, 2}, /* cost of storing MMX registers
in SImode and DImode */
- 12, /* cost of moving SSE register */
+ 4, /* cost of moving SSE register */
{12, 12, 12}, /* cost of loading SSE registers
in SImode, DImode and TImode */
{2, 2, 8}, /* cost of storing SSE registers
@@ -587,6 +587,58 @@ struct processor_costs nocona_cost = {
COSTS_N_INSNS (44), /* cost of FSQRT instruction. */
};
+static const
+struct processor_costs woodcrest_cost = {
+ COSTS_N_INSNS (1), /* cost of an add instruction */
+ COSTS_N_INSNS (1), /* cost of a lea instruction */
+ COSTS_N_INSNS (1), /* variable shift costs */
+ COSTS_N_INSNS (1), /* constant shift costs */
+ {COSTS_N_INSNS (10), /* cost of starting multiply for QI */
+ COSTS_N_INSNS (10), /* HI */
+ COSTS_N_INSNS (10), /* SI */
+ COSTS_N_INSNS (10), /* DI */
+ COSTS_N_INSNS (10)}, /* other */
+ 0, /* cost of multiply per each bit set */
+ {COSTS_N_INSNS (66), /* cost of a divide/mod for QI */
+ COSTS_N_INSNS (66), /* HI */
+ COSTS_N_INSNS (66), /* SI */
+ COSTS_N_INSNS (66), /* DI */
+ COSTS_N_INSNS (66)}, /* other */
+ COSTS_N_INSNS (1), /* cost of movsx */
+ COSTS_N_INSNS (1), /* cost of movzx */
+ 8, /* "large" insn */
+ 6, /* MOVE_RATIO */
+ 3, /* cost for loading QImode using movzbl */
+ {3, 3, 3}, /* cost of loading integer registers
+ in QImode, HImode and SImode.
+ Relative to reg-reg move (2). */
+ {2, 2, 2}, /* cost of storing integer registers */
+ 1, /* cost of reg,reg fld/fst */
+ {3, 3, 6}, /* cost of loading fp registers
+ in SFmode, DFmode and XFmode */
+ {2, 2, 4}, /* cost of loading integer registers */
+ 6, /* cost of moving MMX register */
+ {3, 3}, /* cost of loading MMX registers
+ in SImode and DImode */
+ {2, 2}, /* cost of storing MMX registers
+ in SImode and DImode */
+ 1, /* cost of moving SSE register */
+ {3, 3, 6}, /* cost of loading SSE registers
+ in SImode, DImode and TImode */
+ {2, 2, 4}, /* cost of storing SSE registers
+ in SImode, DImode and TImode */
+ 1, /* MMX or SSE register to integer */
+ 32, /* size of prefetch block */
+ 6, /* number of parallel prefetches */
+ 2, /* Branch cost */
+ COSTS_N_INSNS (3), /* cost of FADD and FSUB insns. */
+ COSTS_N_INSNS (5), /* cost of FMUL instruction. */
+ COSTS_N_INSNS (40), /* cost of FDIV instruction. */
+ COSTS_N_INSNS (2), /* cost of FABS instruction. */
+ COSTS_N_INSNS (2), /* cost of FCHS instruction. */
+ COSTS_N_INSNS (40), /* cost of FSQRT instruction. */
+};
+
/* Generic64 should produce code tuned for Nocona and K8. */
static const
struct processor_costs generic64_cost = {
@@ -712,12 +764,13 @@ const struct processor_costs *ix86_cost = &pentium_cost;
#define m_K8 (1<<PROCESSOR_K8)
#define m_ATHLON_K8 (m_K8 | m_ATHLON)
#define m_NOCONA (1<<PROCESSOR_NOCONA)
+#define m_WOODCREST (1<<PROCESSOR_WOODCREST)
#define m_GENERIC32 (1<<PROCESSOR_GENERIC32)
#define m_GENERIC64 (1<<PROCESSOR_GENERIC64)
#define m_GENERIC (m_GENERIC32 | m_GENERIC64)
/* Generic instruction choice should be common subset of supported CPUs
- (PPro/PENT4/NOCONA/Athlon/K8). */
+ (PPro/PENT4/NOCONA/WOODCREST/Athlon/K8). */
/* Leave is not affecting Nocona SPEC2000 results negatively, so enabling for
Generic64 seems like good code size tradeoff. We can't enable it for 32bit
@@ -725,20 +778,20 @@ const struct processor_costs *ix86_cost = &pentium_cost;
const int x86_use_leave = m_386 | m_K6 | m_ATHLON_K8 | m_GENERIC64;
const int x86_push_memory = m_386 | m_K6 | m_ATHLON_K8 | m_PENT4 | m_NOCONA | m_GENERIC;
const int x86_zero_extend_with_and = m_486 | m_PENT;
-const int x86_movx = m_ATHLON_K8 | m_PPRO | m_PENT4 | m_NOCONA | m_GENERIC /* m_386 | m_K6 */;
+const int x86_movx = m_ATHLON_K8 | m_PPRO | m_WOODCREST | m_PENT4 | m_NOCONA | m_GENERIC /* m_386 | m_K6 */;
const int x86_double_with_add = ~m_386;
const int x86_use_bit_test = m_386;
-const int x86_unroll_strlen = m_486 | m_PENT | m_PPRO | m_ATHLON_K8 | m_K6 | m_GENERIC;
-const int x86_cmove = m_PPRO | m_ATHLON_K8 | m_PENT4 | m_NOCONA;
+const int x86_unroll_strlen = m_486 | m_PENT | m_PPRO | m_WOODCREST | m_ATHLON_K8 | m_K6 | m_GENERIC;
+const int x86_cmove = m_PPRO | m_WOODCREST | m_ATHLON_K8 | m_PENT4 | m_NOCONA;
const int x86_fisttp = m_NOCONA;
const int x86_3dnow_a = m_ATHLON_K8;
-const int x86_deep_branch = m_PPRO | m_K6 | m_ATHLON_K8 | m_PENT4 | m_NOCONA | m_GENERIC;
+const int x86_deep_branch = m_PPRO | m_WOODCREST | m_K6 | m_ATHLON_K8 | m_PENT4 | m_NOCONA | m_GENERIC;
/* Branch hints were put in P4 based on simulation result. But
after P4 was made, no performance benefit was observed with
branch hints. It also increases the code size. As the result,
icc never generates branch hints. */
const int x86_branch_hints = 0;
-const int x86_use_sahf = m_PPRO | m_K6 | m_PENT4 | m_NOCONA | m_GENERIC32; /*m_GENERIC | m_ATHLON_K8 ? */
+const int x86_use_sahf = m_PPRO | m_WOODCREST | m_K6 | m_PENT4 | m_NOCONA | m_GENERIC32; /*m_GENERIC | m_ATHLON_K8 ? */
/* We probably ought to watch for partial register stalls on Generic32
compilation setting as well. However in current implementation the
partial register stalls are not eliminated very well - they can
@@ -749,7 +802,7 @@ const int x86_use_sahf = m_PPRO | m_K6 | m_PENT4 | m_NOCONA | m_GENERIC32; /*m_G
to leave it off for generic32 for now. */
const int x86_partial_reg_stall = m_PPRO;
const int x86_use_himode_fiop = m_386 | m_486 | m_K6;
-const int x86_use_simode_fiop = ~(m_PPRO | m_ATHLON_K8 | m_PENT | m_GENERIC);
+const int x86_use_simode_fiop = ~(m_PPRO | m_WOODCREST | m_ATHLON_K8 | m_PENT | m_GENERIC);
const int x86_use_mov0 = m_K6;
const int x86_use_cltd = ~(m_PENT | m_K6 | m_GENERIC);
const int x86_read_modify_write = ~m_PENT;
@@ -765,18 +818,18 @@ const int x86_promote_qi_regs = 0;
if our scheme for avoiding partial stalls was more effective. */
const int x86_himode_math = ~(m_PPRO);
const int x86_promote_hi_regs = m_PPRO;
-const int x86_sub_esp_4 = m_ATHLON_K8 | m_PPRO | m_PENT4 | m_NOCONA | m_GENERIC;
-const int x86_sub_esp_8 = m_ATHLON_K8 | m_PPRO | m_386 | m_486 | m_PENT4 | m_NOCONA | m_GENERIC;
+const int x86_sub_esp_4 = m_ATHLON_K8 | m_PPRO | m_WOODCREST | m_PENT4 | m_NOCONA | m_GENERIC;
+const int x86_sub_esp_8 = m_ATHLON_K8 | m_PPRO | m_WOODCREST | m_386 | m_486 | m_PENT4 | m_NOCONA | m_GENERIC;
const int x86_add_esp_4 = m_ATHLON_K8 | m_K6 | m_PENT4 | m_NOCONA | m_GENERIC;
-const int x86_add_esp_8 = m_ATHLON_K8 | m_PPRO | m_K6 | m_386 | m_486 | m_PENT4 | m_NOCONA | m_GENERIC;
-const int x86_integer_DFmode_moves = ~(m_ATHLON_K8 | m_PENT4 | m_NOCONA | m_PPRO | m_GENERIC);
+const int x86_add_esp_8 = m_ATHLON_K8 | m_PPRO | m_WOODCREST | m_K6 | m_386 | m_486 | m_PENT4 | m_NOCONA | m_GENERIC;
+const int x86_integer_DFmode_moves = ~(m_ATHLON_K8 | m_PENT4 | m_NOCONA | m_PPRO | m_WOODCREST | m_GENERIC);
const int x86_partial_reg_dependency = m_ATHLON_K8 | m_PENT4 | m_NOCONA | m_GENERIC;
const int x86_memory_mismatch_stall = m_ATHLON_K8 | m_PENT4 | m_NOCONA | m_GENERIC;
-const int x86_accumulate_outgoing_args = m_ATHLON_K8 | m_PENT4 | m_NOCONA | m_PPRO | m_GENERIC;
-const int x86_prologue_using_move = m_ATHLON_K8 | m_PPRO | m_GENERIC;
-const int x86_epilogue_using_move = m_ATHLON_K8 | m_PPRO | m_GENERIC;
+const int x86_accumulate_outgoing_args = m_ATHLON_K8 | m_PENT4 | m_NOCONA | m_PPRO | m_WOODCREST | m_GENERIC;
+const int x86_prologue_using_move = m_ATHLON_K8 | m_PPRO | m_WOODCREST | m_GENERIC;
+const int x86_epilogue_using_move = m_ATHLON_K8 | m_PPRO | m_WOODCREST | m_GENERIC;
const int x86_shift1 = ~m_486;
-const int x86_arch_always_fancy_math_387 = m_PENT | m_PPRO | m_ATHLON_K8 | m_PENT4 | m_NOCONA | m_GENERIC;
+const int x86_arch_always_fancy_math_387 = m_PENT | m_PPRO | m_WOODCREST | m_ATHLON_K8 | m_PENT4 | m_NOCONA | m_GENERIC;
/* In Generic model we have an conflict here in between PPro/Pentium4 based chips
that thread 128bit SSE registers as single units versus K8 based chips that
divide SSE registers to two 64bit halves.
@@ -786,26 +839,26 @@ const int x86_arch_always_fancy_math_387 = m_PENT | m_PPRO | m_ATHLON_K8 | m_PEN
this option on P4 brings over 20% SPECfp regression, while enabling it on
K8 brings roughly 2.4% regression that can be partly masked by careful scheduling
of moves. */
-const int x86_sse_partial_reg_dependency = m_PENT4 | m_NOCONA | m_PPRO | m_GENERIC;
+const int x86_sse_partial_reg_dependency = m_PENT4 | m_NOCONA | m_PPRO | m_WOODCREST | m_GENERIC;
/* Set for machines where the type and dependencies are resolved on SSE
register parts instead of whole registers, so we may maintain just
lower part of scalar values in proper format leaving the upper part
undefined. */
const int x86_sse_split_regs = m_ATHLON_K8;
const int x86_sse_typeless_stores = m_ATHLON_K8;
-const int x86_sse_load0_by_pxor = m_PPRO | m_PENT4 | m_NOCONA;
+const int x86_sse_load0_by_pxor = m_PPRO | m_WOODCREST | m_PENT4 | m_NOCONA;
const int x86_use_ffreep = m_ATHLON_K8;
-const int x86_rep_movl_optimal = m_386 | m_PENT | m_PPRO | m_K6;
+const int x86_rep_movl_optimal = m_386 | m_PENT | m_PPRO | m_WOODCREST | m_K6;
const int x86_use_incdec = ~(m_PENT4 | m_NOCONA | m_GENERIC);
/* ??? Allowing interunit moves makes it all too easy for the compiler to put
integer data in xmm registers. Which results in pretty abysmal code. */
const int x86_inter_unit_moves = 0 /* ~(m_ATHLON_K8) */;
-const int x86_ext_80387_constants = m_K6 | m_ATHLON | m_PENT4 | m_NOCONA | m_PPRO | m_GENERIC32;
+const int x86_ext_80387_constants = m_K6 | m_ATHLON | m_PENT4 | m_NOCONA | m_PPRO | m_WOODCREST | m_GENERIC32;
/* Some CPU cores are not able to predict more than 4 branch instructions in
the 16 byte window. */
-const int x86_four_jump_limit = m_PPRO | m_ATHLON_K8 | m_PENT4 | m_NOCONA | m_GENERIC;
+const int x86_four_jump_limit = m_PPRO | m_WOODCREST | m_ATHLON_K8 | m_PENT4 | m_NOCONA | m_GENERIC;
const int x86_schedule = m_PPRO | m_ATHLON_K8 | m_K6 | m_PENT | m_GENERIC;
const int x86_use_bt = m_ATHLON_K8;
/* Compare and exchange was added for 80486. */
@@ -1448,6 +1501,7 @@ override_options (void)
{&pentium4_cost, 0, 0, 0, 0, 0, 0, 0},
{&k8_cost, 0, 0, 16, 7, 16, 7, 16},
{&nocona_cost, 0, 0, 0, 0, 0, 0, 0},
+ {&woodcrest_cost, 0, 0, 16, 15, 16, 7, 16},
{&generic32_cost, 0, 0, 16, 7, 16, 7, 16},
{&generic64_cost, 0, 0, 16, 7, 16, 7, 16}
};
@@ -1494,6 +1548,9 @@ override_options (void)
| PTA_MMX | PTA_PREFETCH_SSE},
{"nocona", PROCESSOR_NOCONA, PTA_SSE | PTA_SSE2 | PTA_SSE3 | PTA_64BIT
| PTA_MMX | PTA_PREFETCH_SSE},
+ {"woodcrest", PROCESSOR_WOODCREST, PTA_SSE | PTA_SSE2 | PTA_SSE3
+ | PTA_64BIT | PTA_MMX
+ | PTA_PREFETCH_SSE},
{"k6", PROCESSOR_K6, PTA_MMX},
{"k6-2", PROCESSOR_K6, PTA_MMX | PTA_3DNOW},
{"k6-3", PROCESSOR_K6, PTA_MMX | PTA_3DNOW},
diff --git a/gcc/config/i386/i386.h b/gcc/config/i386/i386.h
index 4d2895f9726..154f74e415d 100644
--- a/gcc/config/i386/i386.h
+++ b/gcc/config/i386/i386.h
@@ -136,6 +136,7 @@ extern const struct processor_costs *ix86_cost;
#define TARGET_K8 (ix86_tune == PROCESSOR_K8)
#define TARGET_ATHLON_K8 (TARGET_K8 || TARGET_ATHLON)
#define TARGET_NOCONA (ix86_tune == PROCESSOR_NOCONA)
+#define TARGET_WOODCREST (ix86_tune == PROCESSOR_WOODCREST)
#define TARGET_GENERIC32 (ix86_tune == PROCESSOR_GENERIC32)
#define TARGET_GENERIC64 (ix86_tune == PROCESSOR_GENERIC64)
#define TARGET_GENERIC (TARGET_GENERIC32 || TARGET_GENERIC64)
@@ -374,6 +375,8 @@ extern int x86_prefetch_sse;
builtin_define ("__tune_pentium4__"); \
else if (TARGET_NOCONA) \
builtin_define ("__tune_nocona__"); \
+ else if (TARGET_WOODCREST) \
+ builtin_define ("__tune_woodcrest__"); \
\
if (TARGET_MMX) \
builtin_define ("__MMX__"); \
@@ -447,6 +450,11 @@ extern int x86_prefetch_sse;
builtin_define ("__nocona"); \
builtin_define ("__nocona__"); \
} \
+ else if (ix86_arch == PROCESSOR_WOODCREST) \
+ { \
+ builtin_define ("__woodcrest"); \
+ builtin_define ("__woodcrest__"); \
+ } \
} \
while (0)
@@ -467,14 +475,15 @@ extern int x86_prefetch_sse;
#define TARGET_CPU_DEFAULT_pentium_m 14
#define TARGET_CPU_DEFAULT_prescott 15
#define TARGET_CPU_DEFAULT_nocona 16
-#define TARGET_CPU_DEFAULT_generic 17
+#define TARGET_CPU_DEFAULT_woodcrest 17
+#define TARGET_CPU_DEFAULT_generic 18
#define TARGET_CPU_DEFAULT_NAMES {"i386", "i486", "pentium", "pentium-mmx",\
"pentiumpro", "pentium2", "pentium3", \
"pentium4", "k6", "k6-2", "k6-3",\
"athlon", "athlon-4", "k8", \
"pentium-m", "prescott", "nocona", \
- "generic"}
+ "woodcrest", "generic"}
#ifndef CC1_SPEC
#define CC1_SPEC "%(cc1_cpu) "
@@ -785,7 +794,7 @@ do { \
fixed_regs[PIC_OFFSET_TABLE_REGNUM] = 1; \
call_used_regs[PIC_OFFSET_TABLE_REGNUM] = 1; \
} \
- if (! TARGET_MMX) \
+ if (! TARGET_MMX || !(target_flags_explicit & MASK_MMX)) \
{ \
int i; \
for (i = 0; i < FIRST_PSEUDO_REGISTER; i++) \
@@ -2054,6 +2063,7 @@ enum processor_type
PROCESSOR_PENTIUM4,
PROCESSOR_K8,
PROCESSOR_NOCONA,
+ PROCESSOR_WOODCREST,
PROCESSOR_GENERIC32,
PROCESSOR_GENERIC64,
PROCESSOR_max
diff --git a/gcc/config/rs6000/rs6000.h b/gcc/config/rs6000/rs6000.h
index bfa7074247e..b4526da5076 100644
--- a/gcc/config/rs6000/rs6000.h
+++ b/gcc/config/rs6000/rs6000.h
@@ -1029,6 +1029,23 @@ enum reg_class
{ 0xffffffff, 0xffffffff, 0xffffffff, 0x0003ffff } /* ALL_REGS */ \
}
+/* The following macro defines cover classes for Yet Another Register
+ Allocator. Cover classes is a set of non-intersected register
+ classes covering all hard registers used for register allocation
+ purpose. Any move between two registers of a cover class should be
+ cheaper than load or store of the registers. Usually you don't need
+ to define the macro because YARA finds cover classes by itself but
+ in some complicated cases it can fail. In this case you should
+ define the macro. The macro value is array of register classes
+ with LIM_REG_CLASSES used as the end marker. */
+
+#define YARA_COVER_CLASSES \
+{ \
+ GENERAL_REGS, FLOAT_REGS, ALTIVEC_REGS, VRSAVE_REGS, VSCR_REGS, \
+ SPE_ACC_REGS, SPEFSCR_REGS, MQ_REGS, LINK_REGS, CTR_REGS, \
+ CR_REGS, XER_REGS, LIM_REG_CLASSES \
+}
+
/* The same information, inverted:
Return the class number of the smallest class containing
reg number REGNO. This could be a conditional expression
diff --git a/gcc/function.c b/gcc/function.c
index 052d565320c..71998f4ebe7 100644
--- a/gcc/function.c
+++ b/gcc/function.c
@@ -5633,6 +5633,8 @@ get_call_invalidated_used_regs (rtx insn, HARD_REG_SET *regs, bool clobbered_p)
x = XEXP (x, 0);
if (GET_CODE (x) == SYMBOL_REF)
decl = SYMBOL_REF_DECL (x);
+ if (decl != NULL && TREE_CODE (decl) != FUNCTION_DECL)
+ decl = NULL;
}
node = decl == NULL ? NULL : cgraph_node (decl);
if (! flag_ipra || node == NULL
diff --git a/gcc/lower-subreg.c b/gcc/lower-subreg.c
index 185f4674c9f..821ca82ab1e 100644
--- a/gcc/lower-subreg.c
+++ b/gcc/lower-subreg.c
@@ -649,7 +649,11 @@ gate_lower_subreg (void)
static unsigned int
rest_of_handle_lower_subreg (void)
{
+ int max_reg_num_before = max_reg_num ();
+
decompose_multiword_subregs ();
+ if (max_reg_num_before != max_reg_num ())
+ reg_scan (get_insns (), max_reg_num ());
return 0;
}
diff --git a/gcc/reload1.c b/gcc/reload1.c
index e61b1b6fcc2..28f62832507 100644
--- a/gcc/reload1.c
+++ b/gcc/reload1.c
@@ -1151,14 +1151,21 @@ reload (rtx first, int global)
are no longer useful or accurate. Strip and regenerate REG_INC notes
that may have been moved around. */
+ CLEAR_HARD_REG_SET (cfun->emit->call_used_regs);
for (insn = first; insn; insn = NEXT_INSN (insn))
if (INSN_P (insn))
{
rtx *pnote;
if (CALL_P (insn))
- replace_pseudos_in (& CALL_INSN_FUNCTION_USAGE (insn),
- VOIDmode, CALL_INSN_FUNCTION_USAGE (insn));
+ {
+ HARD_REG_SET used_function_regs;
+
+ get_call_invalidated_used_regs (insn, &used_function_regs, false);
+ IOR_HARD_REG_SET (cfun->emit->call_used_regs, used_function_regs);
+ replace_pseudos_in (& CALL_INSN_FUNCTION_USAGE (insn),
+ VOIDmode, CALL_INSN_FUNCTION_USAGE (insn));
+ }
if ((GET_CODE (PATTERN (insn)) == USE
/* We mark with QImode USEs introduced by reload itself. */
diff --git a/gcc/yara-color.c b/gcc/yara-color.c
index 3093bbfa69d..6e81c2c2857 100644
--- a/gcc/yara-color.c
+++ b/gcc/yara-color.c
@@ -65,14 +65,14 @@ Software Foundation, 51 Franklin Street, Fifth Floor, Boston, MA
consideration). */
static enum reg_class alloc_reg_class_subclasses[N_REG_CLASSES][N_REG_CLASSES];
+static HARD_REG_SET temp_hard_reg_set;
+
/* The function initializes the tables of subclasses of each reg
class. */
-
static void
setup_reg_subclasses (void)
{
int i, j;
- HARD_REG_SET temp_set1, temp_set2;
for (i = 0; i < N_REG_CLASSES; i++)
for (j = 0; j < N_REG_CLASSES; j++)
@@ -83,14 +83,20 @@ setup_reg_subclasses (void)
if (i == (int) NO_REGS)
continue;
- COPY_HARD_REG_SET (temp_set1, reg_class_contents [i]);
+ COPY_HARD_REG_SET (temp_hard_reg_set, reg_class_contents [i]);
+ AND_COMPL_HARD_REG_SET (temp_hard_reg_set, fixed_reg_set);
+ GO_IF_HARD_REG_EQUAL (temp_hard_reg_set, zero_hard_reg_set, cont);
+ goto ok;
+ cont:
+ continue;
+ ok:
for (j = 0; j < N_REG_CLASSES; j++)
if (i != j)
{
enum reg_class *p;
-
- COPY_HARD_REG_SET (temp_set2, reg_class_contents [j]);
- GO_IF_HARD_REG_SUBSET (temp_set1, temp_set2, subclass);
+
+ GO_IF_HARD_REG_SUBSET (reg_class_contents [i],
+ reg_class_contents [j], subclass);
continue;
subclass:
p = &alloc_reg_class_subclasses [j] [0];
@@ -100,6 +106,11 @@ setup_reg_subclasses (void)
}
}
+static int final_reg_class_cover_size;
+static enum reg_class final_reg_class_cover [N_REG_CLASSES];
+
+#ifndef YARA_COVER_CLASSES
+
/* The following is true if moving any hard register of the class into
any hard register of the same class is cheaper than load or store
any hard register of the class and the class is not sub-class of
@@ -114,7 +125,7 @@ static void
setup_closure_classes (void)
{
enum machine_mode mode;
- enum reg_class cl, *sub_cl_ptr;
+ enum reg_class cl, *sub_cl_ptr, *sub_cl_ptr1;
int cost, min_cost;
for (cl = 0; cl < N_REG_CLASSES; cl++)
@@ -126,14 +137,24 @@ setup_closure_classes (void)
{
for (mode = 0; mode < MAX_MACHINE_MODE; mode++)
if (mode != VOIDmode && mode != BLKmode
- && contains_reg_of_mode [*sub_cl_ptr] [mode]
- && contains_reg_of_mode [cl] [mode])
+ && contains_reg_of_mode [*sub_cl_ptr] [mode])
{
min_cost = memory_move_cost [mode] [*sub_cl_ptr] [0];
cost = memory_move_cost [mode] [*sub_cl_ptr] [1];
if (cost < min_cost)
min_cost = cost;
- if (min_cost < move_cost [mode] [cl] [cl])
+ for (sub_cl_ptr1 = &alloc_reg_class_subclasses [cl] [0];
+ *sub_cl_ptr1 != LIM_REG_CLASSES;
+ sub_cl_ptr1++)
+ if (*sub_cl_ptr != *sub_cl_ptr1
+ && contains_reg_of_mode [*sub_cl_ptr1] [mode]
+ && ((min_cost
+ < move_cost [mode] [*sub_cl_ptr] [*sub_cl_ptr1])
+ || (min_cost
+ < move_cost [mode] [*sub_cl_ptr1]
+ [*sub_cl_ptr])))
+ break;
+ if (*sub_cl_ptr1 != LIM_REG_CLASSES)
break;
}
if (mode < MAX_MACHINE_MODE)
@@ -149,7 +170,15 @@ setup_closure_classes (void)
for (sub_cl_ptr = &alloc_reg_class_subclasses [cl] [0];
*sub_cl_ptr != LIM_REG_CLASSES;
sub_cl_ptr++)
- class_closure_p [*sub_cl_ptr] = false;
+ {
+ /* We prefer bigger class as cover class when taking
+ fixed registers into account. */
+ GO_IF_HARD_REG_SUBSET (reg_class_contents [cl],
+ reg_class_contents [*sub_cl_ptr], skip);
+ class_closure_p [*sub_cl_ptr] = false;
+ skip:
+ ;
+ }
}
closure_classes_size = 0;
for (cl = 0; cl < N_REG_CLASSES; cl++)
@@ -157,12 +186,9 @@ setup_closure_classes (void)
closure_classes [closure_classes_size++] = cl;
}
-
-static int reg_class_cover_size, final_reg_class_cover_size;
+static int reg_class_cover_size;
static enum reg_class reg_class_cover [N_REG_CLASSES];
-static enum reg_class final_reg_class_cover [N_REG_CLASSES];
static HARD_REG_SET reg_class_cover_set;
-static HARD_REG_SET temp_hard_reg_set;
static bool
extend_reg_class_cover (void)
@@ -212,6 +238,32 @@ extend_reg_class_cover (void)
return true;
}
+#else
+
+static void
+set_up_cover_classes (void)
+{
+ int i, j;
+ enum reg_class cl;
+ static enum reg_class classes [] = YARA_COVER_CLASSES;
+
+ final_reg_class_cover_size = 0;
+ for (i = 0; (cl = classes [i]) != LIM_REG_CLASSES; i++)
+ {
+ for (j = 0; j < i; j++)
+ if (reg_classes_intersect_p (cl, classes [j]))
+ gcc_unreachable ();
+ COPY_HARD_REG_SET (temp_hard_reg_set, reg_class_contents [cl]);
+ AND_COMPL_HARD_REG_SET (temp_hard_reg_set, fixed_reg_set);
+ GO_IF_HARD_REG_EQUAL (temp_hard_reg_set, zero_hard_reg_set, cont);
+ final_reg_class_cover [final_reg_class_cover_size++] = cl;
+ cont:
+ ;
+ }
+}
+
+#endif
+
static enum reg_class class_translate [N_REG_CLASSES];
static void
@@ -234,11 +286,10 @@ setup_class_translate (void)
#ifdef ENABLE_YARA_CHECKING
else
{
- HARD_REG_SET temp_set;
-
- COPY_HARD_REG_SET (temp_set, reg_class_contents [*cl_ptr]);
- AND_COMPL_HARD_REG_SET (temp_set, fixed_reg_set);
- GO_IF_HARD_REG_SUBSET (temp_set, zero_hard_reg_set, ok);
+ COPY_HARD_REG_SET (temp_hard_reg_set,
+ reg_class_contents [*cl_ptr]);
+ AND_COMPL_HARD_REG_SET (temp_hard_reg_set, fixed_reg_set);
+ GO_IF_HARD_REG_SUBSET (temp_hard_reg_set, zero_hard_reg_set, ok);
gcc_unreachable ();
ok:
;
@@ -273,15 +324,21 @@ debug_class_cover (void)
static void
find_reg_class_closure (void)
{
- bool ok_p;
-
setup_reg_subclasses ();
- setup_closure_classes ();
- final_reg_class_cover_size = N_REG_CLASSES;
- reg_class_cover_size = 0;
- CLEAR_HARD_REG_SET (reg_class_cover_set);
- ok_p = extend_reg_class_cover ();
- yara_assert (ok_p);
+#ifdef YARA_COVER_CLASSES
+ set_up_cover_classes ();
+#else
+ {
+ bool ok_p;
+
+ setup_closure_classes ();
+ final_reg_class_cover_size = N_REG_CLASSES;
+ reg_class_cover_size = 0;
+ CLEAR_HARD_REG_SET (reg_class_cover_set);
+ ok_p = extend_reg_class_cover ();
+ yara_assert (ok_p);
+ }
+#endif
setup_class_translate ();
}
@@ -1082,14 +1139,14 @@ setup_cover_classes_and_reg_costs (void)
{
if (min_alt_memory_cost [op_num] == INT_MAX)
can_memory_cost [can_num] = INT_MAX;
- else
+ else if (can_memory_cost [can_num] != INT_MAX)
can_memory_cost [can_num]
+= min_alt_memory_cost [op_num] * freq;
costs = &can_class_cost [can_num * N_REG_CLASSES];
for (i = 0; (cl = classes [i]) != NO_REGS; i++)
if (min_alt_class_cost [op_num] [cl] == INT_MAX)
costs [cl] = INT_MAX;
- else
+ else if (costs [cl] != INT_MAX)
costs [cl] += min_alt_class_cost [op_num] [cl] * freq;
}
else if (INSN_ALLOCNO_TYPE (a) == BASE_REG
@@ -1225,7 +1282,8 @@ set_up_can_through (struct yara_loop_tree_node *node)
{
bitmap_iterator bi;
bitmap can_through, regno_refs;
- int i, regno;
+ unsigned int i;
+ int regno;
allocno_t a;
can_t can;
@@ -1301,8 +1359,8 @@ die_allocno (allocno_t a)
static void
calculate_reg_pressure (void)
{
- int i, j, class_num;
- unsigned uid;
+ int i, class_num;
+ unsigned int uid, j;
rtx insn, bound;
allocno_t a;
copy_t before_copies, after_copies, cp;
@@ -1413,14 +1471,15 @@ choose_can_to_split (struct yara_loop_tree_node *subloop)
bitmap_iterator bi;
bitmap can_through;
int max_subloops_num, i, best_num;
+ unsigned int j;
can_t best, can;
can_through = subloop->can_through;
max_subloops_num = 0;
best = NULL;
- EXECUTE_IF_SET_IN_BITMAP (can_through, 0, i, bi)
+ EXECUTE_IF_SET_IN_BITMAP (can_through, 0, j, bi)
{
- can = cans [i];
+ can = cans [j];
if (CAN_COVER_CLASS (can) == curr_reg_pressure_class)
process_can_to_choose_split_can (can, &max_subloops_num, &best);
}
@@ -1690,7 +1749,8 @@ reduce_reg_pressure_inside_loop (struct yara_loop_tree_node *loop)
static void
reduce_reg_pressure (void)
{
- int class_num, i;
+ int class_num;
+ unsigned int i;
can_t can;
bitmap_iterator bi;
diff --git a/gcc/yara-final.c b/gcc/yara-final.c
index dae5d215ebe..3c05f683f3c 100644
--- a/gcc/yara-final.c
+++ b/gcc/yara-final.c
@@ -103,6 +103,10 @@ unnecessary_copy_p (copy_t cp)
}
else if (ALLOCNO_USE_EQUIV_CONST_P (dst))
return true;
+ else if (ALLOCNO_MEMORY_SLOT (dst) != NULL
+ && ALLOCNO_MEMORY_SLOT (dst)->mem != NULL_RTX
+ && reg_equiv_set_p [ALLOCNO_REGNO (dst)])
+ return true;
else if (ALLOCNO_MEMORY_SLOT (src) != NULL
&& ALLOCNO_MEMORY_SLOT (dst) != NULL)
{
@@ -618,9 +622,9 @@ initiate_locations (void)
"location conflicts");
memset (reg_locs, 0, sizeof (reg_locs));
mem_locs = yara_allocate (sizeof (struct loc *)
- * (slot_memory_size + equiv_memory_num));
+ * (slot_memory_size + equiv_memory_num + 16));
memset (mem_locs, 0,
- sizeof (struct loc *) * (slot_memory_size + equiv_memory_num));
+ sizeof (struct loc *) * (slot_memory_size + equiv_memory_num + 16));
max_mem_loc_len = -1;
for (i = 0; i < FIRST_PSEUDO_REGISTER; i++)
reg_locs [i] = NULL;
@@ -2883,7 +2887,7 @@ modify_insn (rtx insn, bool non_operand_p)
else if (INSN_ALLOCNO_USE_WITHOUT_CHANGE_P (a))
;
else
- *loc = copy_rtx (*INSN_ALLOCNO_LOC (origin));
+ *loc = copy_rtx (*INSN_ALLOCNO_LOC (origin));
}
else if (ALLOCNO_USE_EQUIV_CONST_P (a))
{
diff --git a/gcc/yara-insn.c b/gcc/yara-insn.c
index ed4ab5edc93..1f4a8e6c3ab 100644
--- a/gcc/yara-insn.c
+++ b/gcc/yara-insn.c
@@ -39,6 +39,7 @@ Software Foundation, 51 Franklin Street, Fifth Floor, Boston, MA
#include "cfgloop.h"
#include "errors.h"
#include "params.h"
+#include "toplev.h"
#include "yara-int.h"
@@ -415,7 +416,7 @@ setup_possible_alternatives (bool strict_p)
static enum reg_class
-limit_insn_allocno_class (allocno_t a, enum reg_class class)
+limit_insn_allocno_class (allocno_t a ATTRIBUTE_UNUSED, enum reg_class class)
{
#ifdef LIMIT_RELOAD_CLASS
rtx x = *INSN_ALLOCNO_LOC (a);
diff --git a/gcc/yara-int.h b/gcc/yara-int.h
index ff297bdf7e1..e33d0fd3248 100644
--- a/gcc/yara-int.h
+++ b/gcc/yara-int.h
@@ -68,6 +68,11 @@ extern int *reg_max_ref_size, *reg_max_ref_align;
/* ??? */
extern int equiv_memory_num, *reg_equiv_memory_index;
+/* An element of the following array is true if the equivalence is set
+ by moving value to the corresponding register and the value is not
+ changed. */
+extern bool *reg_equiv_set_p;
+
/* Element N is the list of insns that initialized reg N from its
equivalent constant or memory slot. */
extern rtx *reg_equiv_init;
diff --git a/gcc/yara-ir.c b/gcc/yara-ir.c
index b79f29f5501..58f5a1ba838 100644
--- a/gcc/yara-ir.c
+++ b/gcc/yara-ir.c
@@ -44,11 +44,12 @@ Software Foundation, 51 Franklin Street, Fifth Floor, Boston, MA
#include "hashtab.h"
#include "errors.h"
#include "ggc.h"
+#include "output.h"
#include "params.h"
#include "langhooks.h"
-#include "yara-int.h"
#include "cgraph.h"
#include "function.h"
+#include "yara-int.h"
struct loops yara_loops;
struct yara_loop_tree_node *yara_loop_tree_root;
@@ -56,6 +57,7 @@ struct yara_loop_tree_node *yara_bb_nodes;
struct yara_loop_tree_node *yara_loop_nodes;
int *reg_max_ref_size, *reg_max_ref_align;
int equiv_memory_num, *reg_equiv_memory_index;
+bool *reg_equiv_set_p;
#if 0
rtx *reg_equiv_init;
#endif
@@ -1167,7 +1169,7 @@ scan_insn_for_reg_equivs (rtx insn)
rtx set = single_set (insn);
bool eliminable_invariant_p;
eliminable_invariant_p = false;
- if (set != 0 && REG_P (SET_DEST (set)))
+ if (set != NULL_RTX && REG_P (SET_DEST (set)))
{
rtx note = find_reg_note (insn, REG_EQUIV, NULL_RTX);
@@ -1226,6 +1228,7 @@ scan_insn_for_reg_equivs (rtx insn)
if (flag_rematerialize)
reg_equiv_constant [i] = x;
}
+#if 0
else
{
bool set_p = reg_equiv_memory_loc [i] == NULL;
@@ -1237,19 +1240,23 @@ scan_insn_for_reg_equivs (rtx insn)
if (set_p)
reg_equiv_memory_index [i] = equiv_memory_num++;
}
+#endif
}
else
return false;
-#if 0
/* If this register is being made equivalent to a MEM
and the MEM is not SET_SRC, the equivalencing insn is
one with the MEM as a SET_DEST and it occurs later.
So don't mark this insn now. */
if (! MEM_P (x) || rtx_equal_p (SET_SRC (set), x))
- reg_equiv_init [i]
- = gen_rtx_INSN_LIST (VOIDmode, insn, reg_equiv_init [i]);
+ {
+ reg_equiv_set_p [i] = true;
+#if 0
+ reg_equiv_init [i]
+ = gen_rtx_INSN_LIST (VOIDmode, insn, reg_equiv_init [i]);
#endif
+ }
}
}
}
@@ -1278,6 +1285,8 @@ initiate_equivs (void)
memset (reg_equiv_constant, 0, max_regno * sizeof (rtx));
reg_equiv_mem = yara_allocate (max_regno * sizeof (rtx));
memset (reg_equiv_mem, 0, max_regno * sizeof (rtx));
+ reg_equiv_set_p = yara_allocate (max_regno * sizeof (bool));
+ memset (reg_equiv_set_p, 0, max_regno * sizeof (bool));
#if 0
reg_equiv_init = yara_allocate (max_regno * sizeof (rtx));
memset (reg_equiv_init, 0, max_regno * sizeof (rtx));
@@ -1311,6 +1320,7 @@ finish_equivs (void)
yara_free (reg_max_ref_align);
yara_free (reg_max_ref_size);
yara_free (reg_equiv_address);
+ yara_free (reg_equiv_set_p);
#if 0
yara_free (reg_equiv_init);
#endif
@@ -1930,6 +1940,8 @@ create_conflict (allocno_t a1, allocno_t a2)
&& ALLOCNO_TYPE (a2) != INSN_ALLOCNO)
|| (ALLOCNO_TYPE (a1) == INSN_ALLOCNO
&& ALLOCNO_TYPE (a2) == INSN_ALLOCNO
+ && ! INSN_ALLOCNO_EARLY_CLOBBER (a1)
+ && ! INSN_ALLOCNO_EARLY_CLOBBER (a2)
&& rtx_equal_p (*INSN_ALLOCNO_LOC (a1), *INSN_ALLOCNO_LOC (a2))
&& (HARD_REGISTER_NUM_P (regno1)
|| ! mode_multi_reg_p [ALLOCNO_MODE (a1)]))
@@ -2203,7 +2215,7 @@ create_insn_allocno (enum op_type op_mode, int type, int regno,
allocno_t addr_output_allocno)
{
allocno_t a;
- rtx x;
+ rtx x, set;
yara_assert ((type != BASE_REG && type != INDEX_REG) || op_mode != OP_OUT);
a = create_allocno (INSN_ALLOCNO, regno, mode);
@@ -2214,6 +2226,21 @@ create_insn_allocno (enum op_type op_mode, int type, int regno,
if (GET_CODE (x) == SUBREG
&& GET_MODE_SIZE (GET_MODE (x)) > GET_MODE_SIZE (mode))
mode = GET_MODE (*container_loc);
+ if (op_mode != OP_IN && regno >= 0
+ && reg_equiv_memory_loc [regno] != NULL_RTX
+ && ! HARD_REGISTER_NUM_P (regno)
+ && reg_equiv_set_p [regno])
+ {
+ if ((set = single_set (insn)) == NULL_RTX)
+ reg_equiv_set_p [regno] = false;
+ else
+ {
+ yara_assert (REG_P (SET_DEST (set))
+ && REGNO (SET_DEST (set)) == regno)
+ if (! rtx_equal_p (SET_SRC (set), reg_equiv_memory_loc [regno]))
+ reg_equiv_set_p [regno] = false;
+ }
+ }
INSN_ALLOCNO_BIGGEST_MODE (a) = mode;
INSN_ALLOCNO_OP_MODE (a) = op_mode;
INSN_ALLOCNO_TYPE (a) = type;
@@ -3288,8 +3315,6 @@ build_insn_allocno_conflicts (rtx insn, op_set_t single_reg_op_set)
for (a = curr_insn_allocnos; a != NULL; a = INSN_ALLOCNO_NEXT (a))
if (INSN_ALLOCNO_OP_MODE (a) == OP_IN)
{
- int regno;
-
if (INSN_ALLOCNO_ADDR_OUTPUT_ALLOCNO (a) != NULL)
/* It is an address in an output operand so we can not kill
it now. */
@@ -3299,19 +3324,6 @@ build_insn_allocno_conflicts (rtx insn, op_set_t single_reg_op_set)
yara_assert (find_post_insn_allocno_copy (a, insn) == NULL);
mark_allocno_death (a);
}
- /* Although it may be a bit conservative to change live hard
- regs here. It should be better to do it in the
- corresponding copy before the insn but the hard register
- could be used twice in the insn and another copy might be
- between the two copies for the hard register. */
- if ((regno = ALLOCNO_REGNO (a)) >= 0 && HARD_REGISTER_NUM_P (regno))
- {
- /* We assume that there are no hard registers in
- subregisters. If it is not true we could use
- get_allocation_mode result as the mode. */
- yara_assert (GET_CODE (*INSN_ALLOCNO_CONTAINER_LOC (a)) != SUBREG);
- mark_hard_reg_death (regno, ALLOCNO_MODE (a));
- }
}
/* Death hard regs without allocnos: */
@@ -3360,7 +3372,7 @@ build_insn_allocno_conflicts (rtx insn, op_set_t single_reg_op_set)
(hard_regno >= FIRST_STACK_REG && hard_regno <= LAST_STACK_REG)
#endif
)
- mark_hard_reg_live (hard_regno, GET_MODE (output_insn_hard_regs [i]));
+ mark_hard_reg_live (hard_regno, GET_MODE (output_insn_hard_regs [i]));
}
/* Unused allocnos: */
diff --git a/gcc/yara-trans.c b/gcc/yara-trans.c
index f3c3a21dadd..f0c715c9b49 100644
--- a/gcc/yara-trans.c
+++ b/gcc/yara-trans.c
@@ -43,6 +43,7 @@ Software Foundation, 51 Franklin Street, Fifth Floor, Boston, MA
#include "errors.h"
#include "ggc.h"
#include "params.h"
+#include "toplev.h"
#include "yara-int.h"
/* Round a value to the lowest integer less than it that is a multiple of
@@ -119,6 +120,7 @@ static int find_hard_reg_for_mode (enum reg_class, enum machine_mode,
static bool allocate_copy_secondary_memory (bool, copy_t, int, enum reg_class,
enum reg_class, enum machine_mode);
#endif
+static bool allocno_change_mode_ok_p (allocno_t, allocno_t);
static bool assign_copy_interm_equiv_const_hard_reg (copy_t);
static void unassign_copy_interm_equiv_const_hard_reg (copy_t);
static bool assign_copy (copy_t);
@@ -866,7 +868,7 @@ get_copy_loc (copy_t cp, bool src_p, enum machine_mode *mode,
{
if (ALLOCNO_TYPE (a) == INSN_ALLOCNO
&& GET_MODE_SIZE (*mode) < GET_MODE_SIZE (amode)
- && (!src_p || COPY_SUBST_SRC_HARD_REGNO (cp) < 0))
+ && (! src_p || COPY_SUBST_SRC_HARD_REGNO (cp) < 0))
/* Paradoxical */
*hard_regno = (a_hard_regno
- (int) subreg_regno_offset (a_hard_regno, *mode,
@@ -993,7 +995,7 @@ allocate_allocno_memory_slot (allocno_t a)
SKIP_TO_SUBREG (x, *INSN_ALLOCNO_LOC (a));
if (GET_CODE (x) == SUBREG)
- ALLOCNO_MEMORY_SLOT_OFFSET (a) += SUBREG_BYTE (x);
+ ALLOCNO_MEMORY_SLOT_OFFSET (a) = SUBREG_BYTE (x);
}
if (slot->mem == NULL_RTX)
register_memory_slot_usage (slot, align);
@@ -1099,6 +1101,7 @@ void
compact_stack (void)
{
int i, j, n, slot_no;
+ unsigned int k;
can_t can;
int *vec;
int start, align;
@@ -1143,12 +1146,12 @@ compact_stack (void)
&& conflict_slot->mem == NULL_RTX)
reserve_stack_memory (conflict_slot->start, conflict_slot->size);
#ifdef SECONDARY_MEMORY_NEEDED
- EXECUTE_IF_SET_IN_BITMAP (secondary_memory_copies, 0, j, bi)
+ EXECUTE_IF_SET_IN_BITMAP (secondary_memory_copies, 0, k, bi)
{
- if (can_copy_conflict_p (can, copies [j]))
+ if (can_copy_conflict_p (can, copies [k]))
{
- yara_assert (COPY_CHANGE_ADDR (copies [j]) != NULL);
- conflict_slot = COPY_MEMORY_SLOT (copies [j]);
+ yara_assert (COPY_CHANGE_ADDR (copies [k]) != NULL);
+ conflict_slot = COPY_MEMORY_SLOT (copies [k]);
yara_assert (conflict_slot != NULL
&& conflict_slot->mem == NULL_RTX);
reserve_stack_memory (conflict_slot->start, conflict_slot->size);
@@ -1166,9 +1169,9 @@ compact_stack (void)
#ifdef SECONDARY_MEMORY_NEEDED
/* Try to move slots used for secondary memory closer to the stack
start. */
- EXECUTE_IF_SET_IN_BITMAP (secondary_memory_copies, 0, i, bi)
+ EXECUTE_IF_SET_IN_BITMAP (secondary_memory_copies, 0, k, bi)
{
- cp = copies [i];
+ cp = copies [k];
slot = COPY_MEMORY_SLOT (cp);
free_all_stack_memory ();
align = get_stack_align (COPY_MEMORY_MODE (cp)) / BITS_PER_UNIT;
@@ -1716,6 +1719,37 @@ unassign_copy_interm_equiv_const_hard_reg (copy_t cp)
yara_assert (ALLOCNO_USE_EQUIV_CONST_P (src));
}
+/* The following function returns TRUE if moving SRC to DST is not
+ prohibited with the point of changing mode when a subregister is
+ involved. */
+static bool
+allocno_change_mode_ok_p (allocno_t src, allocno_t dst)
+{
+ allocno_t insn_a, another_a;
+ rtx op, x;
+ int hard_regno;
+
+ if (((insn_a = dst) != NULL && ALLOCNO_TYPE (dst) == INSN_ALLOCNO)
+ || ((insn_a = src) != NULL && ALLOCNO_TYPE (src) == INSN_ALLOCNO))
+ {
+ op = *INSN_ALLOCNO_LOC (insn_a);
+ if (GET_CODE (op) == SUBREG)
+ {
+ SKIP_TO_REG (x, op);
+ another_a = (src == insn_a ? dst : src);
+ if (REG_P (x) &&
+ ((another_a != NULL
+ && (hard_regno = ALLOCNO_HARD_REGNO (another_a)) >= 0)
+ || (another_a == NULL
+ && (hard_regno = ALLOCNO_REGNO (insn_a)) >= 0))
+ && CANNOT_CHANGE_MODE_CLASS (GET_MODE (x), GET_MODE (op),
+ REGNO_REG_CLASS (hard_regno)))
+ return false;
+ }
+ }
+ return true;
+}
+
static bool
assign_copy (copy_t cp)
{
@@ -1729,6 +1763,8 @@ assign_copy (copy_t cp)
in_p = false;
a = COPY_SRC (cp);
a2 = COPY_DST (cp);
+ if (! allocno_change_mode_ok_p (a, a2))
+ return false;
if (a != NULL && ALLOCNO_USE_EQUIV_CONST_P (a)
&& ! assign_copy_interm_equiv_const_hard_reg (cp))
return false;
@@ -2175,6 +2211,10 @@ check_hard_regno_for_a (allocno_t a, int hard_regno,
INSN_ALLOCNO_INTERM_ELIMINATION_REGSET (conflict_a));
if (check_p
&& regno == ALLOCNO_REGNO (conflict_a)
+ && (ALLOCNO_TYPE (a) != INSN_ALLOCNO
+ || ALLOCNO_TYPE (conflict_a) != INSN_ALLOCNO
+ || (! INSN_ALLOCNO_EARLY_CLOBBER (a)
+ && ! INSN_ALLOCNO_EARLY_CLOBBER (conflict_a)))
&& (conflict_hard_regno = ALLOCNO_HARD_REGNO (conflict_a)) >= 0)
{
conflict_reg_hard_regno