diff options
author | Kelvin Nilsen <kelvin@gcc.gnu.org> | 2017-10-27 18:04:37 +0000 |
---|---|---|
committer | Kelvin Nilsen <kelvin@gcc.gnu.org> | 2017-10-27 18:04:37 +0000 |
commit | e2939e1fa3fb9bd4e69794f1a6a427b8db461690 (patch) | |
tree | 6296795e4f4ab0b1d8731c4eb81a9d440481ed57 | |
parent | 60ed1f1ae85a2c300a084cad89f5c0e9ca164262 (diff) |
now working on problematic test cases
git-svn-id: https://gcc.gnu.org/svn/gcc/branches/ibm/ltc99327@254165 138bc75d-0d04-0410-961f-82ee72b054a4
-rw-r--r-- | gcc/config/rs6000/rs6000-p8swap.c | 107 | ||||
-rw-r--r-- | gcc/config/rs6000/rs6000-protos.h | 3 | ||||
-rw-r--r-- | gcc/config/rs6000/vsx.md | 253 |
3 files changed, 209 insertions, 154 deletions
diff --git a/gcc/config/rs6000/rs6000-p8swap.c b/gcc/config/rs6000/rs6000-p8swap.c index f49a9c3fb2a..88ac0e5e650 100644 --- a/gcc/config/rs6000/rs6000-p8swap.c +++ b/gcc/config/rs6000/rs6000-p8swap.c @@ -352,6 +352,39 @@ rs6000_sum_of_two_registers_p (const_rtx expr) return false; } +/* Return true iff expr represents an address expression that masks off + * the low-order 4 bits in the style of an lvx or stvx rtl pattern. */ +bool +rs6000_quadword_masked_address_p (const_rtx expr) +{ + if (GET_CODE (expr) == AND) + { + const_rtx operand1 = XEXP (expr, 0); + const_rtx operand2 = XEXP (expr, 1); + if (dump_file) + { + fprintf (dump_file, "rs6000_quadword_masked_address_p operand 1:\n"); + print_inline_rtx (dump_file, operand1, 2); + fprintf (dump_file, "\nand operand2:\n"); + print_inline_rtx (dump_file, operand2, 2); + fprintf (dump_file, "\n"); + } + if (REG_P (operand2) || rs6000_sum_of_two_registers_p (operand2)) + { + if (CONST_SCALAR_INT_P (operand1) && INTVAL (operand1) == -16) + { + if (dump_file) + fprintf (dump_file, "returning true\n"); + return true; + } + } + } + if (dump_file) + fprintf (dump_file, + "returning false from rs6000_quadword_masked_address_p\n"); + return false; +} + /* Return TRUE if insn represents a swap of a swapped load from memory and the memory address is quad-word aligned. */ static bool @@ -1661,7 +1694,7 @@ replace_swap_with_copy (swap_web_entry *insn_entry, unsigned i) } static void -mimic_memory_attributes_and_flags (rtx new_mem_exp, rtx original_mem_exp) +mimic_memory_attributes_and_flags (rtx new_mem_exp, const_rtx original_mem_exp) { RTX_FLAG (new_mem_exp, jump) = RTX_FLAG (original_mem_exp, jump); RTX_FLAG (new_mem_exp, call) = RTX_FLAG (original_mem_exp, call); @@ -1704,6 +1737,15 @@ mimic_memory_attributes_and_flags (rtx new_mem_exp, rtx original_mem_exp) else clear_mem_size (new_mem_exp); + if (dump_file) { + fprintf (dump_file, + "in mimic_memory_attributes_and_flags, original memory exp\n"); + print_inline_rtx (dump_file, original_mem_exp, 2); + fprintf (dump_file, "\n"); + fprintf (dump_file, " new memory exp (may crash during output)\n"); + print_inline_rtx (dump_file, new_mem_exp, 2); + fprintf (dump_file, "\n"); + } } @@ -1727,11 +1769,12 @@ replace_swapped_aligned_store (swap_web_entry *insn_entry, rtx store_insn) /* Generate an rtx expression that corresponds */ +/* arguments should be mode, dest, src_exp */ +/* dest_exp is SET_DEST (body); */ rtx -rs6000_gen_lvx (enum machine_mode mode, const_rtx body) +rs6000_gen_lvx (enum machine_mode mode, rtx dest_exp, rtx src_exp) { - const_rtx src_exp = XEXP (SET_SRC (body), 0); - const_rtx memory_address = XEXP (src_exp, 0); + rtx memory_address = XEXP (src_exp, 0); rtx lvx; if (rs6000_sum_of_two_registers_p (memory_address)) @@ -1744,23 +1787,23 @@ rs6000_gen_lvx (enum machine_mode mode, const_rtx body) fprintf (dump_file, "Using the 2op form of lvx\n"); if (mode == V16QImode) - lvx = gen_altivec_lvx_v16qi_2op (SET_DEST (body), op1, op2); + lvx = gen_altivec_lvx_v16qi_2op (dest_exp, op1, op2); else if (mode == V8HImode) - lvx = gen_altivec_lvx_v8hi_2op (SET_DEST (body), op1, op2); + lvx = gen_altivec_lvx_v8hi_2op (dest_exp, op1, op2); #ifdef HAVE_V8HFmode else if (mode == V8HFmode) - lvx = gen_altivec_lvx_v8hf_2op (SET_DEST (body), op1, op2); + lvx = gen_altivec_lvx_v8hf_2op (dest_exp, op1, op2); #endif else if (mode == V4SImode) - lvx = gen_altivec_lvx_v4si_2op (SET_DEST (body), op1, op2); + lvx = gen_altivec_lvx_v4si_2op (dest_exp, op1, op2); else if (mode == V4SFmode) - lvx = gen_altivec_lvx_v4sf_2op (SET_DEST (body), op1, op2); + lvx = gen_altivec_lvx_v4sf_2op (dest_exp, op1, op2); else if (mode == V2DImode) - lvx = gen_altivec_lvx_v2di_2op (SET_DEST (body), op1, op2); + lvx = gen_altivec_lvx_v2di_2op (dest_exp, op1, op2); else if (mode == V2DFmode) - lvx = gen_altivec_lvx_v2df_2op (SET_DEST (body), op1, op2); + lvx = gen_altivec_lvx_v2df_2op (dest_exp, op1, op2); else if (mode == V1TImode) - lvx = gen_altivec_lvx_v1ti_2op (SET_DEST (body), op1, op2); + lvx = gen_altivec_lvx_v1ti_2op (dest_exp, op1, op2); else /* KFmode, TFmode, other modes not expected in this context. */ gcc_unreachable (); @@ -1768,32 +1811,35 @@ rs6000_gen_lvx (enum machine_mode mode, const_rtx body) else /* REG_P (memory_address) */ { if (mode == V16QImode) - lvx = gen_altivec_lvx_v16qi_1op (SET_DEST (body), memory_address); + lvx = gen_altivec_lvx_v16qi_1op (dest_exp, memory_address); else if (mode == V8HImode) - lvx = gen_altivec_lvx_v8hi_1op (SET_DEST (body), memory_address); + lvx = gen_altivec_lvx_v8hi_1op (dest_exp, memory_address); #ifdef HAVE_V8HFmode else if (mode == V8HFmode) - lvx = gen_altivec_lvx_v8hf_1op (SET_DEST (body), memory_address); + lvx = gen_altivec_lvx_v8hf_1op (dest_exp, memory_address); #endif else if (mode == V4SImode) - lvx = gen_altivec_lvx_v4si_1op (SET_DEST (body), memory_address); + lvx = gen_altivec_lvx_v4si_1op (dest_exp, memory_address); else if (mode == V4SFmode) - lvx = gen_altivec_lvx_v4sf_1op (SET_DEST (body), memory_address); + lvx = gen_altivec_lvx_v4sf_1op (dest_exp, memory_address); else if (mode == V2DImode) - lvx = gen_altivec_lvx_v2di_1op (SET_DEST (body), memory_address); + lvx = gen_altivec_lvx_v2di_1op (dest_exp, memory_address); else if (mode == V2DFmode) - lvx = gen_altivec_lvx_v2df_1op (SET_DEST (body), memory_address); + lvx = gen_altivec_lvx_v2df_1op (dest_exp, memory_address); else if (mode == V1TImode) - lvx = gen_altivec_lvx_v1ti_1op (SET_DEST (body), memory_address); + lvx = gen_altivec_lvx_v1ti_1op (dest_exp, memory_address); else /* KFmode, TFmode, other modes not expected in this context. */ gcc_unreachable (); } + /* kelvin may rewrite the following code to use change_address. As + such, we can eliminate the mimic_memory_attributes_and_flags + function. */ rtx new_mem_exp = SET_SRC (lvx); mimic_memory_attributes_and_flags (new_mem_exp, src_exp); - return rtx; + return lvx; } /* Given that swap_insn represents a swap of an aligned @@ -1848,17 +1894,19 @@ replace_swapped_aligned_load (swap_web_entry *insn_entry, rtx swap_insn) && (GET_CODE (SET_SRC (body)) == VEC_SELECT) && (GET_CODE (XEXP (SET_SRC (body), 0)) == MEM)); - /* This is the memory expression that needs to be replaced with - * lvx expression. */ + + /* src_exp is not defined. */ + + /* const_rtx src_exp = XEXP (SET_SRC (body), 0); */ + + rtx src_exp = XEXP (SET_SRC (body), 0); + rtx memory_address = XEXP (src_exp, 0); enum machine_mode mode = GET_MODE (src_exp); rtx lvx; - /* Get rid of the VEC_SELECT operation */ - SET_SRC (body) = src_exp; - if (dump_file) { - fprintf (dump_file, "The body expression after eliminating VEC_SELECT\n"); + fprintf (dump_file, "The original body expression\n"); print_inline_rtx (dump_file, body, 2); fprintf (dump_file, "\n"); fprintf (dump_file, "The original src_exp before lvx replace is:\n"); @@ -1870,9 +1918,7 @@ replace_swapped_aligned_load (swap_web_entry *insn_entry, rtx swap_insn) fprintf (dump_file, "mode is %d\n", mode); } - - lvx = rs6000_gen_lvx (mode, body); - + lvx = rs6000_gen_lvx (mode, SET_DEST (body), src_exp); #ifdef LAMEO_SHAMEO_ATTEMPT_TO_USE_CHANGE_ADDRESS if (dump_file) { @@ -1922,7 +1968,6 @@ replace_swapped_aligned_load (swap_web_entry *insn_entry, rtx swap_insn) set_block_for_insn (new_insn, BLOCK_FOR_INSN (def_insn)); df_insn_rescan (new_insn); - if (dump_file) { unsigned int new_uid = INSN_UID (new_insn); diff --git a/gcc/config/rs6000/rs6000-protos.h b/gcc/config/rs6000/rs6000-protos.h index 781349b850e..9f250e21e26 100644 --- a/gcc/config/rs6000/rs6000-protos.h +++ b/gcc/config/rs6000/rs6000-protos.h @@ -254,5 +254,8 @@ namespace gcc { class context; } class rtl_opt_pass; extern rtl_opt_pass *make_pass_analyze_swaps (gcc::context *); +extern bool rs6000_sum_of_two_registers_p (const_rtx expr); +extern bool rs6000_quadword_masked_address_p (const_rtx exp); +extern rtx rs6000_gen_lvx (enum machine_mode, rtx, rtx); #endif /* rs6000-protos.h */ diff --git a/gcc/config/rs6000/vsx.md b/gcc/config/rs6000/vsx.md index 8065f636411..d38185729ea 100644 --- a/gcc/config/rs6000/vsx.md +++ b/gcc/config/rs6000/vsx.md @@ -430,44 +430,50 @@ (parallel [(const_int 1) (const_int 0)])))] " { - extern bool rs6000_sum_of_two_registers_p (const_rtx); - const_rtx mem = operands[1]; + rtx mem = operands[1]; - /* Note: This pattern works with VSX_D addresses, apparently for V2DI - vectors. The next pattern works with VSX_W patterns, apparently - for V4SI vectors. And below, I've got patterns for V8HI vectors - and V16QI vectors. */ if (dump_file) - fprintf (dump_file, \"*vsx_le_perm_load_<mode>, alignment %d\n\", - MEM_ALIGN (mem)); + fprintf (dump_file, + \"*vsx_le_perm_load_<mode> for doubles, alignment %d\n\", + MEM_ALIGN (mem)); if (MEM_ALIGN (mem) >= 128) { - const_rtx base_reg = XEXP (mem, 0); - - fprintf (dump_file, \"split *vsx_le_perm_load_<mode>, looking at mem exp\n\"); - print_inline_rtx (dump_file, mem, 2); - fprintf (dump_file, \"\n\"); - - fprintf (dump_file, \"base address reg:\n\"); - print_inline_rtx (dump_file, base_reg, 2); - fprintf (dump_file, \"\n\"); + rtx mem_address = XEXP (mem, 0); - if (REG_P (base_reg) || rs6000_sum_of_two_registers_p (base_reg)) + if (dump_file) { - fprintf (dump_file, \"base_reg is REG_P or sum of two registers\n\"); - /* If this is already in the form that can be translated to lvx, - transform it by masking off least significant 4 bits. */ + fprintf (dump_file, + \"split *vsx_le_perm_load_<mode>, looking at mem exp\n\"); + print_inline_rtx (dump_file, mem, 2); + fprintf (dump_file, \"\n\"); - /* - rtx masked_address = AND (mem - change_address - */ + fprintf (dump_file, \"memory address:\n\"); + print_inline_rtx (dump_file, mem_address, 2); + fprintf (dump_file, \"\n\"); + } + enum machine_mode mode = GET_MODE (mem); + if (REG_P (mem_address) || rs6000_sum_of_two_registers_p (mem_address)) + { + if (dump_file) + fprintf (dump_file, + \"mem_address is REG_P or sum of two registers\n\"); + /* Replace the source memory address with masked address. */ + rtx lvx_set_expr = rs6000_gen_lvx (mode, operands[0], mem); + emit_insn (lvx_set_expr); + DONE; + } + else if (rs6000_quadword_masked_address_p (mem_address)) + { + if (dump_file) + fprintf (dump_file, \"base_reg is quad-word-masked address\n\"); + /* This rtl is already in the form that matches lvx + instruction, so leave it alone. */ DONE; } - /* Otherwise, transform into a swapping instruction. */ + /* Otherwise, fall through to transform into a swapping load. */ } operands[2] = can_create_pseudo_p () ? gen_reg_rtx_and_attrs (operands[0]) : operands[0]; @@ -494,79 +500,49 @@ (const_int 0) (const_int 1)])))] " { - extern bool rs6000_sum_of_two_registers_p (const_rtx); - const_rtx mem = operands[1]; - - /* Note: This pattern works with VSX_D addresses, apparently for V2DI - vectors. The next pattern works with VSX_W patterns, apparently - for V4SI vectors. And below, I've got patterns for V8HI vectors - and V16QI vectors. */ + rtx mem = operands[1]; if (dump_file) - fprintf (dump_file, \"*vsx_le_perm_load_<mode>, alignment %d\n\", + fprintf (dump_file, \"*vsx_le_perm_load_<mode> for words, alignment %d\n\", MEM_ALIGN (mem)); if (MEM_ALIGN (mem) >= 128) { - const_rtx mem_address = XEXP (mem, 0); - - if (dump_file) { - fprintf (dump_file, - \"split *vsx_le_perm_load_<mode>, looking at mem exp\n\"); - print_inline_rtx (dump_file, mem, 2); - fprintf (dump_file, \"\n\"); + rtx mem_address = XEXP (mem, 0); - fprintf (dump_file, \"base address reg:\n\"); - print_inline_rtx (dump_file, mem_address, 2); - fprintf (dump_file, \"\n\"); - } + if (dump_file) + { + fprintf (dump_file, + \"split *vsx_le_perm_load_<mode>, looking at mem exp\n\"); + print_inline_rtx (dump_file, mem, 2); + fprintf (dump_file, \"\n\"); + + fprintf (dump_file, \"memory address:\n\"); + print_inline_rtx (dump_file, mem_address, 2); + fprintf (dump_file, \"\n\"); + } enum machine_mode mode = GET_MODE (mem); if (REG_P (mem_address) || rs6000_sum_of_two_registers_p (mem_address)) { - fprintf (dump_file, - \"mem_address is REG_P or sum of two registers\n\"); - - rtx lvx = rs6000_gen_lvx (mode, mem_address); - - if (rs6000_sum_of_two_registers_p (mem_address)) - { - - } - -we've got something like this in "mem". Note that the address subexpression -might even have the and:DI masking already in it, in which case we can leave -it as is: - -(mem/c:V16QI (plus:DI (reg/f:DI 111 sfp) - (reg:DI 125)) [0 v+0 S16 A128]) - -and i want to replace the memory subexpression with: - -(insn 22 11 13 2 (set (reg:V16QI 129 [ <retval> ]) - (mem/u/c:V16QI (and:DI (reg/f:DI 128) - (const_int -16 [0xfffffffffffffff0])) [0 S16 A128])) "swaps - -borrow from the code that I have over in rs6000-p8swap.c, - replace_swapped_aligned_load () - - operands[1] is the (mem expression) - so i think if i just replace operand[1] with my new expression, i'm good. - - - - /* If this is already in the form that translates to lvx, leave - it alone. */ + if (dump_file) + fprintf (dump_file, + \"mem_address is REG_P or sum of two registers\n\"); + /* Replace the source memory address with masked address. */ + rtx lvx_set_expr = rs6000_gen_lvx (mode, operands[0], mem); + emit_insn (lvx_set_expr); DONE; } - else if (rs6000_quadword_masked_address_p (base_reg)) + else if (rs6000_quadword_masked_address_p (mem_address)) { - fprintf (dump_file, \"base_reg is quad-word-masked address\n\"); - /* This rtl is already in the form that matches lvx instruction. */ + if (dump_file) + fprintf (dump_file, \"base_reg is quad-word-masked address\n\"); + /* This rtl is already in the form that matches lvx + instruction, so leave it alone. */ DONE; } - /* Otherwise, transform into a swapping instruction. */ + /* Otherwise, fall through to transform into a swapping load. */ } operands[2] = can_create_pseudo_p () ? gen_reg_rtx_and_attrs (operands[0]) : operands[0]; @@ -597,38 +573,49 @@ borrow from the code that I have over in rs6000-p8swap.c, (const_int 2) (const_int 3)])))] " { - extern bool rs6000_sum_of_two_registers_p (const_rtx); - const_rtx mem = operands[1]; - - /* Note: This pattern works with VSX_D addresses, apparently for V2DI - vectors. The next pattern works with VSX_W patterns, apparently - for V4SI vectors. And below, I've got patterns for V8HI vectors - and V16QI vectors. */ + rtx mem = operands[1]; if (dump_file) - fprintf (dump_file, \"*vsx_le_perm_load_<mode>, alignment %d\n\", + fprintf (dump_file, \"*vsx_le_perm_load_<mode> for halfs, alignment %d\n\", MEM_ALIGN (mem)); if (MEM_ALIGN (mem) >= 128) { - const_rtx base_reg = XEXP (mem, 0); + rtx mem_address = XEXP (mem, 0); - fprintf (dump_file, \"split *vsx_le_perm_load_<mode>, looking at mem exp\n\"); - print_inline_rtx (dump_file, mem, 2); - fprintf (dump_file, \"\n\"); + if (dump_file) + { + fprintf (dump_file, + \"split *vsx_le_perm_load_<mode>, looking at mem exp\n\"); + print_inline_rtx (dump_file, mem, 2); + fprintf (dump_file, \"\n\"); + + fprintf (dump_file, \"memory address:\n\"); + print_inline_rtx (dump_file, mem_address, 2); + fprintf (dump_file, \"\n\"); + } - fprintf (dump_file, \"base address reg:\n\"); - print_inline_rtx (dump_file, base_reg, 2); - fprintf (dump_file, \"\n\"); + enum machine_mode mode = GET_MODE (mem); - if (REG_P (base_reg) || rs6000_sum_of_two_registers_p (base_reg)) + if (REG_P (mem_address) || rs6000_sum_of_two_registers_p (mem_address)) + { + if (dump_file) + fprintf (dump_file, + \"mem_address is REG_P or sum of two registers\n\"); + /* Replace the source memory address with masked address. */ + rtx lvx_set_expr = rs6000_gen_lvx (mode, operands[0], mem); + emit_insn (lvx_set_expr); + DONE; + } + else if (rs6000_quadword_masked_address_p (mem_address)) { - fprintf (dump_file, \"base_reg is REG_P or sum of two registers\n\"); - /* If this is already in the form that translates to lvx, leave - it alone. */ + if (dump_file) + fprintf (dump_file, \"base_reg is quad-word-masked address\n\"); + /* This rtl is already in the form that matches lvx + instruction, so leave it alone. */ DONE; } - /* Otherwise, transform into a swapping instruction. */ + /* Otherwise, fall through to transform into a swapping load. */ } operands[2] = can_create_pseudo_p () ? gen_reg_rtx_and_attrs (operands[0]) : operands[0]; @@ -667,38 +654,58 @@ borrow from the code that I have over in rs6000-p8swap.c, (const_int 6) (const_int 7)])))] " { - extern bool rs6000_sum_of_two_registers_p (const_rtx); - const_rtx mem = operands[1]; - - /* Note: This pattern works with VSX_D addresses, apparently for V2DI - vectors. The next pattern works with VSX_W patterns, apparently - for V4SI vectors. And below, I've got patterns for V8HI vectors - and V16QI vectors. */ + rtx mem = operands[1]; if (dump_file) - fprintf (dump_file, \"*vsx_le_perm_load_<mode>, alignment %d\n\", + fprintf (dump_file, \"*vsx_le_perm_load_<mode> for bytes, alignment %d\n\", MEM_ALIGN (mem)); - if (MEM_ALIGN (operands[1]) >= 128) + if (MEM_ALIGN (mem) >= 128) { - const_rtx base_reg = XEXP (mem, 0); + rtx mem_address = XEXP (mem, 0); - fprintf (dump_file, \"split *vsx_le_perm_load_<mode>, looking at mem exp\n\"); - print_inline_rtx (dump_file, mem, 2); - fprintf (dump_file, \"\n\"); + if (dump_file) + { + fprintf (dump_file, + \"split *vsx_le_perm_load_<mode>, looking at mem exp\n\"); + print_inline_rtx (dump_file, mem, 2); + fprintf (dump_file, \"\n\"); + + fprintf (dump_file, \"memory address:\n\"); + print_inline_rtx (dump_file, mem_address, 2); + fprintf (dump_file, \"\n\"); + } - fprintf (dump_file, \"base address reg:\n\"); - print_inline_rtx (dump_file, base_reg, 2); - fprintf (dump_file, \"\n\"); + enum machine_mode mode = GET_MODE (mem); - if (REG_P (base_reg) || rs6000_sum_of_two_registers_p (base_reg)) + if (REG_P (mem_address) || rs6000_sum_of_two_registers_p (mem_address)) + { + if (dump_file) + fprintf (dump_file, + \"mem_address is REG_P or sum of two registers\n\"); + /* Replace the source memory address with masked address. */ + rtx lvx_set_expr = rs6000_gen_lvx (mode, operands[0], mem); + + if (dump_file) { + fprintf (dump_file, \"lvx_set_expr is\n\"); + print_inline_rtx (dump_file, lvx_set_expr, 2); + fprintf (dump_file, \"\nthe source of set is:\n\"); + print_inline_rtx (dump_file, SET_SRC (lvx_set_expr), 2); + fprintf (dump_file, \"\n\"); + } + + emit_insn (lvx_set_expr); + DONE; + } + else if (rs6000_quadword_masked_address_p (mem_address)) { - fprintf (dump_file, \"base_reg is REG_P or sum of two registers\n\"); - /* If this is already in the form that translates to lvx, leave - it alone. */ + if (dump_file) + fprintf (dump_file, \"base_reg is quad-word-masked address\n\"); + /* This rtl is already in the form that matches lvx + instruction, so leave it alone. */ DONE; } - /* Otherwise, transform into a swapping instruction. */ + /* Otherwise, fall through to transform into a swapping load. */ } operands[2] = can_create_pseudo_p () ? gen_reg_rtx_and_attrs (operands[0]) : operands[0]; |