diff options
author | Michael Meissner <meissner@linux.ibm.com> | 2018-08-28 21:50:53 +0000 |
---|---|---|
committer | Michael Meissner <meissner@linux.ibm.com> | 2018-08-28 21:50:53 +0000 |
commit | f04589e1f0fe5046b276026d4a942fc11ddc71ef (patch) | |
tree | 3cb42819fe5cac593c756cbf6cc5868997c6dca9 | |
parent | c4cdb32af39f61d10e3dac17d9c373716f9ae5f5 (diff) |
Make optimize addresses more dynamic; Delete before CSE pass
git-svn-id: https://gcc.gnu.org/svn/gcc/branches/ibm/addr2@263933 138bc75d-0d04-0410-961f-82ee72b054a4
-rw-r--r-- | gcc/ChangeLog.meissner | 34 | ||||
-rw-r--r-- | gcc/config/rs6000/rs6000-addr.c | 126 | ||||
-rw-r--r-- | gcc/config/rs6000/rs6000-cpus.def | 2 | ||||
-rw-r--r-- | gcc/config/rs6000/rs6000-passes.def | 3 | ||||
-rw-r--r-- | gcc/config/rs6000/rs6000-protos.h | 3 | ||||
-rw-r--r-- | gcc/config/rs6000/rs6000.c | 7 | ||||
-rw-r--r-- | gcc/config/rs6000/rs6000.opt | 16 |
7 files changed, 94 insertions, 97 deletions
diff --git a/gcc/ChangeLog.meissner b/gcc/ChangeLog.meissner index ea240e07807..2fea0792440 100644 --- a/gcc/ChangeLog.meissner +++ b/gcc/ChangeLog.meissner @@ -1,3 +1,37 @@ +2018-08-28 Michael Meissner <meissner@linux.ibm.com> + + * config/rs6000/rs6000-addr.c (class toc_refs): Dynamically + allocate the toc save base pointers based on the + -moptimize-addresses=n switch. Delete the option to run optimize + addresses before CSE. Add an option to specify the number of GPR + loads to allow for power8 fusion without writes. + (toc_refs::add): Likewise. + (toc_refs::update): Likewise. + (toc_refs::process_toc_refs_single): Likewise. + (rs6000_optimize_addresses): Likewise. + (pass_data_optimize_addresses): Delete CSE pass, and delete _ira + in the pass names. + (make_pass_optimize_addresses): Likewise. + * config/rs6000/rs6000-cpus.def (POWERPC_MASKS): Delete optimize + address masks. + * config/rs6000/rs6000-passes.def: Delete optimize address before + CSE pass and rename the optimize address before IRA pass. + * config/rs6000/rs6000-protos.h (make_pass_optimize_addresses): + Likewise. + * config/rs6000/rs6000.c (rs6000_debug_reg_global): Print number + of saved TOC pointers in optimize address pass. + (rs6000_init_hard_regno_mode_ok): Change test for optimized + address pass. + (rs6000_opt_masks): Delete optimize address option masks. + * config/rs6000/rs6000.opt (-moptimize-addresses): Delete CSE + option. Add support for specifing the number of TOC registers. + Add option to specify the number of GPR registers to allow without + optimization to allow power8 fusion. + (-moptimize-addresses=<n>): Likewise. + (-moptimize-addresses-cse): Likewise. + (-moptimize-addresses-fusion): Likewise. + (-moptimize-addresses-fusion=<n>): Likewise. + 2018-08-22 Michael Meissner <meissner@linux.ibm.com> * config/rs6000/rs6000-addr.c (class toc_refs): Add field to say diff --git a/gcc/config/rs6000/rs6000-addr.c b/gcc/config/rs6000/rs6000-addr.c index 55431915478..145b7a3e948 100644 --- a/gcc/config/rs6000/rs6000-addr.c +++ b/gcc/config/rs6000/rs6000-addr.c @@ -50,8 +50,6 @@ bool rs6000_optimized_address_p[NUM_MACHINE_MODES]; const unsigned INITIAL_NUM_REFS = 40; // # of refs to allocate initially. -const unsigned NUM_BASE_PTRS = 3; // # of base ptrs to save in a block -const unsigned NUM_P8_READS = 4; // cutover point to use base ptrs on P8 // Information for each base pointer struct base_ptr { @@ -63,34 +61,37 @@ struct base_ptr { unsigned num_reads; // number of reads to be modified. unsigned num_gpr_reads; // number of P8 fusion reads. unsigned num_writes; // number of writes to be modified. - unsigned max_refs; // refs array size. + size_t max_refs; // refs array size. bool different_offsets_p; // if different offsets are used. }; // Information needed for optimizing TOC references. class toc_refs { private: - struct base_ptr base[NUM_BASE_PTRS]; // all of the base pointers used + struct base_ptr *base; // all of the base pointers used + size_t max_base; // max # of base pointers unsigned total_reads; // total # of reads to be modified. unsigned total_gpr_reads; // total # of P8 fusion reads. unsigned total_writes; // total # of writes to be modified. - bool before_cse_p; // if this pass is being run before CSE public: - toc_refs (bool cse_p) + toc_refs (size_t num) { - memset ((void *) base, '\0', sizeof (base)); + max_base = num; + base = XNEWVEC (struct base_ptr, max_base); + memset ((void *) base, '\0', sizeof (struct base_ptr) * max_base); total_reads = 0; total_gpr_reads = 0; total_writes = 0; - before_cse_p = cse_p; } ~toc_refs () { - for (size_t i = 0; i < NUM_BASE_PTRS; i++) + for (size_t i = 0; i < max_base; i++) if (base[i].refs) free ((void *)base[i].refs); + + free ((void *)base); } // Reset variables for next basic block, don't reset totals or allocated @@ -111,7 +112,7 @@ class toc_refs { // Reset all of the blocks void reset_all (void) { - for (size_t i = 0; i < NUM_BASE_PTRS; i++) + for (size_t i = 0; i < max_base; i++) reset (i); } @@ -123,7 +124,7 @@ class toc_refs { unsigned get_num_refs (void) { unsigned ret = 0; - for (size_t i = 0; i < NUM_BASE_PTRS; i++) + for (size_t i = 0; i < max_base; i++) ret += base[i].num_refs; return ret; @@ -188,11 +189,11 @@ void toc_refs::add (rtx_insn *insn, rtx addr, HOST_WIDE_INT offset) { rtx set = single_set (insn); rtx dest = SET_DEST (set); - unsigned base_num = NUM_BASE_PTRS; + unsigned base_num = max_base; struct base_ptr *p; // See if the base register has already been used - for (size_t i = 0; i < NUM_BASE_PTRS; i++) + for (size_t i = 0; i < max_base; i++) { if (base[i].symbol && rtx_equal_p (base[i].symbol, addr)) { @@ -203,9 +204,9 @@ void toc_refs::add (rtx_insn *insn, rtx addr, HOST_WIDE_INT offset) // If the base register has not been previously used, see if there are any // free slots. - if (base_num == NUM_BASE_PTRS) + if (base_num == max_base) { - for (size_t i = 0; i < NUM_BASE_PTRS; i++) + for (size_t i = 0; i < max_base; i++) if (base[i].num_refs == 0) { base_num = i; @@ -215,11 +216,11 @@ void toc_refs::add (rtx_insn *insn, rtx addr, HOST_WIDE_INT offset) // We have to evict one of the base pointers, evict the one with the most // insns changed. - if (base_num == NUM_BASE_PTRS) + if (base_num == max_base) { unsigned mrefs = base[0].num_refs; base_num = 0; - for (size_t i = 1; i < NUM_BASE_PTRS; i++) + for (size_t i = 1; i < max_base; i++) if (base[i].num_refs > mrefs) { mrefs = base[i].num_refs; @@ -315,7 +316,7 @@ toc_refs::update (rtx old_mem, size_t base_num) gcc_assert (addr); - if (p->different_offsets_p || before_cse_p) + if (p->different_offsets_p) { new_addr = p->base_reg; if (offset != 0) @@ -353,10 +354,9 @@ toc_refs::process_toc_refs_single (size_t base_num) // If we are on a power8, and we just have GPR loads, fall back to not // optimizing the references, so that we use P8 fusion for each of the loads. - // However, if we have a lot of reads in the basic block do the optimization - // to save on i-cache space. - if (TARGET_P8_FUSION && !TARGET_P9_FUSION && p->num_writes == 0 - && p->num_reads == p->num_gpr_reads && p->num_reads < NUM_P8_READS) + if (TARGET_P8_FUSION && p->num_writes == 0 && optimize_addr_num > 0 + && p->num_reads == p->num_gpr_reads + && p->num_reads <= optimize_addr_fusion_num) { if (dump_file) fputs ("\nSkipping optimization, only GPR loads\n", dump_file); @@ -376,7 +376,7 @@ toc_refs::process_toc_refs_single (size_t base_num) // Set up the base register p->base_reg = gen_reg_rtx (Pmode); - if (p->different_offsets_p || before_cse_p) + if (p->different_offsets_p) set_base_reg = gen_rtx_SET (p->base_reg, p->symbol); else @@ -494,7 +494,7 @@ toc_refs::process_toc_refs_single (size_t base_num) void toc_refs::process_toc_refs (void) { - for (size_t i = 0; i < NUM_BASE_PTRS; i++) + for (size_t i = 0; i < max_base; i++) if (base[i].num_refs > 0) process_toc_refs_single (i); } @@ -505,6 +505,7 @@ void toc_refs::print_totals (void) { fputs ("\n", dump_file); + fprintf (dump_file, "Max TOC pointers = %u\n", (unsigned)max_base); fprintf (dump_file, "Total number of writes = %u\n", total_writes); fprintf (dump_file, "Total number of reads = %u (%u gprs)\n", total_reads, total_gpr_reads); @@ -514,9 +515,9 @@ toc_refs::print_totals (void) // Main entry point for this pass. unsigned int -rs6000_optimize_addresses (function *fun, bool before_cse_p) +rs6000_optimize_addresses (function *fun) { - toc_refs info (before_cse_p); + toc_refs info (optimize_addr_num); basic_block bb; rtx_insn *insn, *curr_insn = 0; @@ -586,56 +587,10 @@ rs6000_optimize_addresses (function *fun, bool before_cse_p) // Normal pass, run just before IRA (-moptimize-addresses) -const pass_data pass_data_optimize_addresses_ira = -{ - RTL_PASS, // type - "addr_ira", // name - OPTGROUP_NONE, // optinfo_flags - TV_NONE, // tv_id - 0, // properties_required - 0, // properties_provided - 0, // properties_destroyed - 0, // todo_flags_start - TODO_df_finish, // todo_flags_finish -}; - -class pass_optimize_addresses_ira : public rtl_opt_pass -{ -public: - pass_optimize_addresses_ira(gcc::context *ctxt) - : rtl_opt_pass(pass_data_optimize_addresses_ira, ctxt) - {} - - // opt_pass methods: - virtual bool gate (function *) - { - return (optimize > 0 && TARGET_OPT_ADDR); - } - - virtual unsigned int execute (function *fun) - { - return rs6000_optimize_addresses (fun, false); - } - - opt_pass *clone () - { - return new pass_optimize_addresses_ira (m_ctxt); - } - -}; // class pass_optimize_addresses_ira - -rtl_opt_pass * -make_pass_optimize_addresses_ira (gcc::context *ctxt) -{ - return new pass_optimize_addresses_ira (ctxt); -} - - -// Experimental pass, run just before CSE (-moptimize-addresses-cse) -const pass_data pass_data_optimize_addresses_cse = +const pass_data pass_data_optimize_addresses = { RTL_PASS, // type - "addr_cse", // name + "addr", // name OPTGROUP_NONE, // optinfo_flags TV_NONE, // tv_id 0, // properties_required @@ -645,33 +600,36 @@ const pass_data pass_data_optimize_addresses_cse = TODO_df_finish, // todo_flags_finish }; -class pass_optimize_addresses_cse : public rtl_opt_pass +class pass_optimize_addresses : public rtl_opt_pass { public: - pass_optimize_addresses_cse(gcc::context *ctxt) - : rtl_opt_pass(pass_data_optimize_addresses_cse, ctxt) - {} + pass_optimize_addresses(gcc::context *ctxt) + : rtl_opt_pass(pass_data_optimize_addresses, ctxt) + {} // opt_pass methods: virtual bool gate (function *) { - return (optimize > 0 && TARGET_OPT_ADDR_CSE); + return (optimize > 0 + && TARGET_POWERPC64 + && TARGET_ELF + && optimize_addr_num > 0); } virtual unsigned int execute (function *fun) { - return rs6000_optimize_addresses (fun, true); + return rs6000_optimize_addresses (fun); } opt_pass *clone () { - return new pass_optimize_addresses_cse (m_ctxt); + return new pass_optimize_addresses (m_ctxt); } -}; // class pass_optimize_addresses_cse +}; // class pass_optimize_addresses rtl_opt_pass * -make_pass_optimize_addresses_cse (gcc::context *ctxt) +make_pass_optimize_addresses (gcc::context *ctxt) { - return new pass_optimize_addresses_cse (ctxt); + return new pass_optimize_addresses (ctxt); } diff --git a/gcc/config/rs6000/rs6000-cpus.def b/gcc/config/rs6000/rs6000-cpus.def index e71af16f6d8..ccc0eb128ed 100644 --- a/gcc/config/rs6000/rs6000-cpus.def +++ b/gcc/config/rs6000/rs6000-cpus.def @@ -119,8 +119,6 @@ | OPTION_MASK_MODULO \ | OPTION_MASK_MULHW \ | OPTION_MASK_NO_UPDATE \ - | OPTION_MASK_OPT_ADDR \ - | OPTION_MASK_OPT_ADDR_CSE \ | OPTION_MASK_P8_FUSION \ | OPTION_MASK_P8_VECTOR \ | OPTION_MASK_P9_FUSION \ diff --git a/gcc/config/rs6000/rs6000-passes.def b/gcc/config/rs6000/rs6000-passes.def index 04f51f36fb5..1cbba000063 100644 --- a/gcc/config/rs6000/rs6000-passes.def +++ b/gcc/config/rs6000/rs6000-passes.def @@ -25,5 +25,4 @@ along with GCC; see the file COPYING3. If not see */ INSERT_PASS_BEFORE (pass_cse, 1, pass_analyze_swaps); - INSERT_PASS_BEFORE (pass_cse, 1, pass_optimize_addresses_cse); - INSERT_PASS_BEFORE (pass_ira, 1, pass_optimize_addresses_ira); + INSERT_PASS_BEFORE (pass_ira, 1, pass_optimize_addresses); diff --git a/gcc/config/rs6000/rs6000-protos.h b/gcc/config/rs6000/rs6000-protos.h index 838a58faa0e..269a6f54de3 100644 --- a/gcc/config/rs6000/rs6000-protos.h +++ b/gcc/config/rs6000/rs6000-protos.h @@ -245,8 +245,7 @@ namespace gcc { class context; } class rtl_opt_pass; extern rtl_opt_pass *make_pass_analyze_swaps (gcc::context *); -extern rtl_opt_pass *make_pass_optimize_addresses_ira (gcc::context *); -extern rtl_opt_pass *make_pass_optimize_addresses_cse (gcc::context *); +extern rtl_opt_pass *make_pass_optimize_addresses (gcc::context *); extern bool rs6000_sum_of_two_registers_p (const_rtx expr); extern bool rs6000_quadword_masked_address_p (const_rtx exp); extern rtx rs6000_gen_lvx (enum machine_mode, rtx, rtx); diff --git a/gcc/config/rs6000/rs6000.c b/gcc/config/rs6000/rs6000.c index 4755fb1d785..78d87db6b5a 100644 --- a/gcc/config/rs6000/rs6000.c +++ b/gcc/config/rs6000/rs6000.c @@ -2824,6 +2824,9 @@ rs6000_debug_reg_global (void) if (TARGET_DIRECT_MOVE_128) fprintf (stderr, DEBUG_FMT_D, "VSX easy 64-bit mfvsrld element", (int)VECTOR_ELEMENT_MFVSRLD_64BIT); + + if (optimize_addr_num) + fprintf (stderr, DEBUG_FMT_D, "Optimize addresses", optimize_addr_num); } @@ -3571,7 +3574,7 @@ rs6000_init_hard_regno_mode_ok (bool global_init_p) rs6000_setup_reg_addr_masks (); /* Mark which modes support optimized addresses. */ - if (TARGET_OPT_ADDR || TARGET_OPT_ADDR_CSE) + if (optimize_addr_num) { rs6000_optimized_address_p[QImode] = true; rs6000_optimized_address_p[HImode] = true; @@ -35716,8 +35719,6 @@ static struct rs6000_opt_mask const rs6000_opt_masks[] = { "modulo", OPTION_MASK_MODULO, false, true }, { "mulhw", OPTION_MASK_MULHW, false, true }, { "multiple", OPTION_MASK_MULTIPLE, false, true }, - { "optimize-addresses", OPTION_MASK_OPT_ADDR, false, true }, - { "optimize-addresses-cse", OPTION_MASK_OPT_ADDR_CSE, false, true }, { "popcntb", OPTION_MASK_POPCNTB, false, true }, { "popcntd", OPTION_MASK_POPCNTD, false, true }, { "power8-fusion", OPTION_MASK_P8_FUSION, false, true }, diff --git a/gcc/config/rs6000/rs6000.opt b/gcc/config/rs6000/rs6000.opt index e706c8101ed..c18d7a2bb78 100644 --- a/gcc/config/rs6000/rs6000.opt +++ b/gcc/config/rs6000/rs6000.opt @@ -577,9 +577,17 @@ mspeculate-indirect-jumps Target Undocumented Var(rs6000_speculate_indirect_jumps) Init(1) Save moptimize-addresses -Target Undocumented Mask(OPT_ADDR) Var(rs6000_isa_flags) +Target Undocumented Init(0) Var(optimize_addr_num, 4) Save Enable optimizing addresses for systems with fusion. -moptimize-addresses-cse -Target Undocumented Mask(OPT_ADDR_CSE) Var(rs6000_isa_flags) -Enable optimizing addresses for systems with fusion (run before CSE). +moptimize-addresses= +Target Undocumented RejectNegative Joined UInteger Var(optimize_addr_num) +Specify how many separate TOC pointers should be saved. + +moptimize-addresses-fusion +Target Undocumented Init(0) Var(optimize_addr_fusion_num, 4) Save +Number of GPR loads to allow for power8 fusion without saving TOCs. + +moptimize-addresses-fusion= +Target Undocumented RejectNegative Joined UInteger Var(optimize_addr_fusion_num) +Specify how many separate GPR loads to allow without saving TOCs. |