diff options
Diffstat (limited to 'gcc/config/i386/i386.md')
-rw-r--r-- | gcc/config/i386/i386.md | 821 |
1 files changed, 699 insertions, 122 deletions
diff --git a/gcc/config/i386/i386.md b/gcc/config/i386/i386.md index 4596cee7f9e..4ee6b71d382 100644 --- a/gcc/config/i386/i386.md +++ b/gcc/config/i386/i386.md @@ -2133,12 +2133,12 @@ case 4: return "mov{l}\t{%1, %0|%0, %1}"; case 5: - if (TARGET_SSE2 && !TARGET_ATHLON) + if (get_attr_mode (insn) == MODE_TI) return "pxor\t%0, %0"; else return "xorps\t%0, %0"; case 6: - if (TARGET_PARTIAL_REG_DEPENDENCY) + if (get_attr_mode (insn) == MODE_V4SF) return "movaps\t{%1, %0|%0, %1}"; else return "movss\t{%1, %0|%0, %1}"; @@ -2158,7 +2158,40 @@ } } [(set_attr "type" "fmov,fmov,fmov,imov,imov,ssemov,ssemov,ssemov,ssemov,mmxmov,mmxmov,mmxmov") - (set_attr "mode" "SF,SF,SF,SI,SI,TI,SF,SF,SF,SI,SI,DI")]) + (set (attr "mode") + (cond [(eq_attr "alternative" "3,4,9,10") + (const_string "SI") + (eq_attr "alternative" "5") + (if_then_else + (and (and (ne (symbol_ref "TARGET_SSE_LOAD0_BY_PXOR") + (const_int 0)) + (ne (symbol_ref "TARGET_SSE2") + (const_int 0))) + (eq (symbol_ref "optimize_size") + (const_int 0))) + (const_string "TI") + (const_string "V4SF")) + /* For architectures resolving dependencies on + whole SSE registers use APS move to break dependency + chains, otherwise use short move to avoid extra work. + + Do the same for architectures resolving dependencies on + the parts. While in DF mode it is better to always handle + just register parts, the SF mode is different due to lack + of instructions to load just part of the register. It is + better to maintain the whole registers in single format + to avoid problems on using packed logical operations. */ + (eq_attr "alternative" "6") + (if_then_else + (ior (ne (symbol_ref "TARGET_SSE_PARTIAL_REG_DEPENDENCY") + (const_int 0)) + (ne (symbol_ref "TARGET_SSE_PARTIAL_REGS") + (const_int 0))) + (const_string "V4SF") + (const_string "SF")) + (eq_attr "alternative" "11") + (const_string "DI")] + (const_string "SF")))]) (define_insn "*swapsf" [(set (match_operand:SF 0 "register_operand" "+f") @@ -2319,25 +2352,78 @@ case 4: return "#"; case 5: - if (TARGET_ATHLON) - return "xorpd\t%0, %0"; - else - return "pxor\t%0, %0"; + switch (get_attr_mode (insn)) + { + case MODE_V4SF: + return "xorps\t%0, %0"; + case MODE_V2DF: + return "xorpd\t%0, %0"; + case MODE_TI: + return "pxor\t%0, %0"; + default: + abort (); + } case 6: - if (TARGET_PARTIAL_REG_DEPENDENCY) - return "movapd\t{%1, %0|%0, %1}"; + switch (get_attr_mode (insn)) + { + case MODE_V4SF: + return "movaps\t{%1, %0|%0, %1}"; + case MODE_V2DF: + return "movapd\t{%1, %0|%0, %1}"; + case MODE_DF: + return "movsd\t{%1, %0|%0, %1}"; + default: + abort (); + } + case 7: + if (get_attr_mode (insn) == MODE_V2DF) + return "movlpd\t{%1, %0|%0, %1}"; else return "movsd\t{%1, %0|%0, %1}"; - case 7: case 8: - return "movsd\t{%1, %0|%0, %1}"; + return "movsd\t{%1, %0|%0, %1}"; default: abort(); } } [(set_attr "type" "fmov,fmov,fmov,multi,multi,ssemov,ssemov,ssemov,ssemov") - (set_attr "mode" "DF,DF,DF,SI,SI,TI,DF,DF,DF")]) + (set (attr "mode") + (cond [(eq_attr "alternative" "3,4") + (const_string "SI") + /* xorps is one byte shorter. */ + (eq_attr "alternative" "5") + (cond [(ne (symbol_ref "optimize_size") + (const_int 0)) + (const_string "V4SF") + (ne (symbol_ref "TARGET_SSE_LOAD0_BY_PXOR") + (const_int 0)) + (const_string "TI")] + (const_string "V2DF")) + /* For architectures resolving dependencies on + whole SSE registers use APD move to break dependency + chains, otherwise use short move to avoid extra work. + + movaps encodes one byte shorter. */ + (eq_attr "alternative" "6") + (cond + [(ne (symbol_ref "optimize_size") + (const_int 0)) + (const_string "V4SF") + (ne (symbol_ref "TARGET_SSE_PARTIAL_REG_DEPENDENCY") + (const_int 0)) + (const_string "V2DF")] + (const_string "DF")) + /* For achitectures resolving dependencies on register + parts we may avoid extra work to zero out upper part + of register. */ + (eq_attr "alternative" "7") + (if_then_else + (ne (symbol_ref "TARGET_SSE_PARTIAL_REGS") + (const_int 0)) + (const_string "V2DF") + (const_string "DF"))] + (const_string "DF")))]) (define_insn "*movdf_integer" [(set (match_operand:DF 0 "nonimmediate_operand" "=f#Yr,m,f#Yr,r#Yf,o,Y#rf,Y#rf,Y#rf,m") @@ -2381,16 +2467,34 @@ return "#"; case 5: - if (TARGET_ATHLON) - return "xorpd\t%0, %0"; - else - return "pxor\t%0, %0"; + switch (get_attr_mode (insn)) + { + case MODE_V4SF: + return "xorps\t%0, %0"; + case MODE_V2DF: + return "xorpd\t%0, %0"; + case MODE_TI: + return "pxor\t%0, %0"; + default: + abort (); + } case 6: - if (TARGET_PARTIAL_REG_DEPENDENCY) - return "movapd\t{%1, %0|%0, %1}"; + switch (get_attr_mode (insn)) + { + case MODE_V4SF: + return "movaps\t{%1, %0|%0, %1}"; + case MODE_V2DF: + return "movapd\t{%1, %0|%0, %1}"; + case MODE_DF: + return "movsd\t{%1, %0|%0, %1}"; + default: + abort (); + } + case 7: + if (get_attr_mode (insn) == MODE_V2DF) + return "movlpd\t{%1, %0|%0, %1}"; else return "movsd\t{%1, %0|%0, %1}"; - case 7: case 8: return "movsd\t{%1, %0|%0, %1}"; @@ -2399,7 +2503,42 @@ } } [(set_attr "type" "fmov,fmov,fmov,multi,multi,ssemov,ssemov,ssemov,ssemov") - (set_attr "mode" "DF,DF,DF,SI,SI,TI,DF,DF,DF")]) + (set (attr "mode") + (cond [(eq_attr "alternative" "3,4") + (const_string "SI") + /* xorps is one byte shorter. */ + (eq_attr "alternative" "5") + (cond [(ne (symbol_ref "optimize_size") + (const_int 0)) + (const_string "V4SF") + (ne (symbol_ref "TARGET_SSE_LOAD0_BY_PXOR") + (const_int 0)) + (const_string "TI")] + (const_string "V2DF")) + /* For architectures resolving dependencies on + whole SSE registers use APD move to break dependency + chains, otherwise use short move to avoid extra work. + + movaps encodes one byte shorter. */ + (eq_attr "alternative" "6") + (cond + [(ne (symbol_ref "optimize_size") + (const_int 0)) + (const_string "V4SF") + (ne (symbol_ref "TARGET_SSE_PARTIAL_REG_DEPENDENCY") + (const_int 0)) + (const_string "V2DF")] + (const_string "DF")) + /* For achitectures resolving dependencies on register + parts we may avoid extra work to zero out upper part + of register. */ + (eq_attr "alternative" "7") + (if_then_else + (ne (symbol_ref "TARGET_SSE_PARTIAL_REGS") + (const_int 0)) + (const_string "V2DF") + (const_string "DF"))] + (const_string "DF")))]) (define_split [(set (match_operand:DF 0 "nonimmediate_operand" "") @@ -3706,7 +3845,7 @@ (float_truncate:SF (match_operand:DF 1 "nonimmediate_operand" "f,f,f,f,mY"))) (clobber (match_operand:SF 2 "memory_operand" "=X,m,m,m,X"))] - "TARGET_80387 && TARGET_SSE2" + "TARGET_80387 && TARGET_SSE2 && !TARGET_SSE_PARTIAL_REGS" { switch (which_alternative) { @@ -3716,7 +3855,30 @@ else return "fst%z0\t%y0"; case 4: - return "cvtsd2ss\t{%1, %0|%0, %1}"; + return "#"; + default: + abort (); + } +} + [(set_attr "type" "fmov,multi,multi,multi,ssecvt") + (set_attr "mode" "SF,SF,SF,SF,DF")]) + +(define_insn "*truncdfsf2_1_sse_nooverlap" + [(set (match_operand:SF 0 "nonimmediate_operand" "=*!m,?f#rx,?r#fx,?x#rf,&Y") + (float_truncate:SF + (match_operand:DF 1 "nonimmediate_operand" "f,f,f,f,mY"))) + (clobber (match_operand:SF 2 "memory_operand" "=X,m,m,m,X"))] + "TARGET_80387 && TARGET_SSE2 && TARGET_SSE_PARTIAL_REGS" +{ + switch (which_alternative) + { + case 0: + if (find_regno_note (insn, REG_DEAD, REGNO (operands[1]))) + return "fstp%z0\t%y0"; + else + return "fst%z0\t%y0"; + case 4: + return "#"; default: abort (); } @@ -3728,7 +3890,7 @@ [(set (match_operand:SF 0 "nonimmediate_operand" "=Y,!m") (float_truncate:SF (match_operand:DF 1 "nonimmediate_operand" "mY,f")))] - "TARGET_80387 && TARGET_SSE2 + "TARGET_80387 && TARGET_SSE2 && !TARGET_SSE_PARTIAL_REGS && (GET_CODE (operands[0]) != MEM || GET_CODE (operands[1]) != MEM)" { switch (which_alternative) @@ -3747,7 +3909,30 @@ [(set_attr "type" "ssecvt,fmov") (set_attr "mode" "DF,SF")]) -(define_insn "truncdfsf2_3" +(define_insn "*truncdfsf2_2_nooverlap" + [(set (match_operand:SF 0 "nonimmediate_operand" "=&Y,!m") + (float_truncate:SF + (match_operand:DF 1 "nonimmediate_operand" "mY,f")))] + "TARGET_80387 && TARGET_SSE2 && TARGET_SSE_PARTIAL_REGS + && (GET_CODE (operands[0]) != MEM || GET_CODE (operands[1]) != MEM)" +{ + switch (which_alternative) + { + case 0: + return "#"; + case 1: + if (find_regno_note (insn, REG_DEAD, REGNO (operands[1]))) + return "fstp%z0\t%y0"; + else + return "fst%z0\t%y0"; + default: + abort (); + } +} + [(set_attr "type" "ssecvt,fmov") + (set_attr "mode" "DF,SF")]) + +(define_insn "*truncdfsf2_3" [(set (match_operand:SF 0 "memory_operand" "=m") (float_truncate:SF (match_operand:DF 1 "register_operand" "f")))] @@ -3765,11 +3950,20 @@ [(set (match_operand:SF 0 "register_operand" "=Y") (float_truncate:SF (match_operand:DF 1 "nonimmediate_operand" "mY")))] - "!TARGET_80387 && TARGET_SSE2" + "!TARGET_80387 && TARGET_SSE2 && !TARGET_SSE_PARTIAL_REGS" "cvtsd2ss\t{%1, %0|%0, %1}" [(set_attr "type" "ssecvt") (set_attr "mode" "DF")]) +(define_insn "*truncdfsf2_sse_only_nooverlap" + [(set (match_operand:SF 0 "register_operand" "=&Y") + (float_truncate:SF + (match_operand:DF 1 "nonimmediate_operand" "mY")))] + "!TARGET_80387 && TARGET_SSE2 && TARGET_SSE_PARTIAL_REGS" + "#" + [(set_attr "type" "ssecvt") + (set_attr "mode" "DF")]) + (define_split [(set (match_operand:SF 0 "memory_operand" "") (float_truncate:SF @@ -3779,15 +3973,56 @@ [(set (match_dup 0) (float_truncate:SF (match_dup 1)))] "") +; Avoid possible reformating penalty on the destination by first +; zeroing it out (define_split - [(set (match_operand:SF 0 "nonimmediate_operand" "") + [(set (match_operand:SF 0 "register_operand" "") (float_truncate:SF (match_operand:DF 1 "nonimmediate_operand" ""))) (clobber (match_operand 2 "" ""))] + "TARGET_80387 && reload_completed && TARGET_SSE_PARTIAL_REGS + && SSE_REG_P (operands[0]) + && !STACK_REG_P (operands[1])" + [(const_int 0)] +{ + rtx src, dest; + if (!TARGET_SSE_PARTIAL_REGS) + emit_insn (gen_truncdfsf2_sse_only (operands[0], operands[1])); + else + { + dest = simplify_gen_subreg (V4SFmode, operands[0], SFmode, 0); + src = simplify_gen_subreg (V2DFmode, operands[1], DFmode, 0); + /* simplify_gen_subreg refuses to widen memory references. */ + if (GET_CODE (src) == SUBREG) + alter_subreg (&src); + if (reg_overlap_mentioned_p (operands[0], operands[1])) + abort (); + emit_insn (gen_sse_clrv4sf (dest, CONST0_RTX (V4SFmode))); + emit_insn (gen_cvtsd2ss (dest, dest, src)); + } + DONE; +}) + +(define_split + [(set (match_operand:SF 0 "register_operand" "") + (float_truncate:SF + (match_operand:DF 1 "nonimmediate_operand" "")))] "TARGET_80387 && reload_completed - && !FP_REG_P (operands[0]) && !FP_REG_P (operands[1])" - [(set (match_dup 0) (float_truncate:SF (match_dup 1)))] - "") + && SSE_REG_P (operands[0]) && TARGET_SSE_PARTIAL_REGS" + [(const_int 0)] +{ + rtx src, dest; + dest = simplify_gen_subreg (V4SFmode, operands[0], SFmode, 0); + src = simplify_gen_subreg (V2DFmode, operands[1], DFmode, 0); + /* simplify_gen_subreg refuses to widen memory references. */ + if (GET_CODE (src) == SUBREG) + alter_subreg (&src); + if (reg_overlap_mentioned_p (operands[0], operands[1])) + abort (); + emit_insn (gen_sse_clrv4sf (dest, CONST0_RTX (V4SFmode))); + emit_insn (gen_cvtsd2ss (dest, dest, src)); + DONE; +}) (define_split [(set (match_operand:SF 0 "register_operand" "") @@ -4491,6 +4726,22 @@ (set_attr "mode" "SF") (set_attr "fp_int_src" "true")]) +; Avoid possible reformating penalty on the destination by first +; zeroing it out +(define_split + [(set (match_operand:SF 0 "register_operand" "") + (float:SF (match_operand:SI 1 "nonimmediate_operand" "")))] + "TARGET_80387 && reload_completed && TARGET_SSE_PARTIAL_REGS + && SSE_REG_P (operands[0])" + [(const_int 0)] +{ + rtx dest; + dest = simplify_gen_subreg (V4SFmode, operands[0], SFmode, 0); + emit_insn (gen_sse_clrv4sf (dest, CONST0_RTX (V4SFmode))); + emit_insn (gen_cvtsi2ss (dest, dest, operands[1])); + DONE; +}) + (define_expand "floatdisf2" [(set (match_operand:SF 0 "register_operand" "") (float:SF (match_operand:DI 1 "nonimmediate_operand" "")))] @@ -4529,6 +4780,22 @@ (set_attr "mode" "SF") (set_attr "fp_int_src" "true")]) +; Avoid possible reformating penalty on the destination by first +; zeroing it out +(define_split + [(set (match_operand:SF 0 "register_operand" "") + (float:SF (match_operand:DI 1 "nonimmediate_operand" "")))] + "TARGET_80387 && reload_completed && TARGET_SSE_PARTIAL_REGS + && SSE_REG_P (operands[0])" + [(const_int 0)] +{ + rtx dest; + dest = simplify_gen_subreg (V4SFmode, operands[0], SFmode, 0); + emit_insn (gen_sse_clrv4sf (dest, CONST0_RTX (V4SFmode))); + emit_insn (gen_cvtsi2ssq (dest, dest, operands[1])); + DONE; +}) + (define_insn "floathidf2" [(set (match_operand:DF 0 "register_operand" "=f,f") (float:DF (match_operand:HI 1 "nonimmediate_operand" "m,r")))] @@ -9266,12 +9533,15 @@ in register. */ rtx reg = gen_reg_rtx (SFmode); rtx dest = operands[0]; + rtx imm = gen_lowpart (SFmode, gen_int_mode (0x80000000, SImode)); operands[1] = force_reg (SFmode, operands[1]); operands[0] = force_reg (SFmode, operands[0]); - emit_move_insn (reg, - gen_lowpart (SFmode, - gen_int_mode (0x80000000, SImode))); + reg = force_reg (V4SFmode, + gen_rtx_CONST_VECTOR (V4SFmode, + gen_rtvec (4, imm, CONST0_RTX (SFmode), + CONST0_RTX (SFmode), + CONST0_RTX (SFmode)))); emit_insn (gen_negsf2_ifs (operands[0], operands[1], reg)); if (dest != operands[0]) emit_move_insn (dest, operands[0]); @@ -9289,8 +9559,8 @@ (define_insn "negsf2_ifs" [(set (match_operand:SF 0 "nonimmediate_operand" "=x#fr,x#fr,f#xr,rm#xf") - (neg:SF (match_operand:SF 1 "nonimmediate_operand" "0,x#fr,0,0"))) - (use (match_operand:SF 2 "nonmemory_operand" "x,0#x,*g#x,*g#x")) + (neg:SF (match_operand:SF 1 "nonimmediate_operand" "0,xm#fr,0,0"))) + (use (match_operand:V4SF 2 "nonimmediate_operand" "xm,0,*x*rm,*x*rm")) (clobber (reg:CC 17))] "TARGET_SSE && (reload_in_progress || reload_completed @@ -9311,7 +9581,7 @@ (define_split [(set (match_operand:SF 0 "register_operand" "") (neg:SF (match_operand:SF 1 "register_operand" ""))) - (use (match_operand:SF 2 "" "")) + (use (match_operand:V4SF 2 "" "")) (clobber (reg:CC 17))] "reload_completed && !SSE_REG_P (operands[0])" [(parallel [(set (match_dup 0) @@ -9320,8 +9590,8 @@ (define_split [(set (match_operand:SF 0 "register_operand" "") - (neg:SF (match_operand:SF 1 "register_operand" ""))) - (use (match_operand:SF 2 "register_operand" "")) + (neg:SF (match_operand:SF 1 "nonimmediate_operand" ""))) + (use (match_operand:V4SF 2 "nonimmediate_operand" "")) (clobber (reg:CC 17))] "reload_completed && SSE_REG_P (operands[0])" [(set (subreg:TI (match_dup 0) 0) @@ -9400,7 +9670,7 @@ { /* Using SSE is tricky, since we need bitwise negation of -0 in register. */ - rtx reg = gen_reg_rtx (DFmode); + rtx reg; #if HOST_BITS_PER_WIDE_INT >= 64 rtx imm = gen_int_mode (((HOST_WIDE_INT)1) << 63, DImode); #else @@ -9410,7 +9680,10 @@ operands[1] = force_reg (DFmode, operands[1]); operands[0] = force_reg (DFmode, operands[0]); - emit_move_insn (reg, gen_lowpart (DFmode, imm)); + imm = gen_lowpart (DFmode, imm); + reg = force_reg (V2DFmode, + gen_rtx_CONST_VECTOR (V2DFmode, + gen_rtvec (2, imm, CONST0_RTX (DFmode)))); emit_insn (gen_negdf2_ifs (operands[0], operands[1], reg)); if (dest != operands[0]) emit_move_insn (dest, operands[0]); @@ -9428,8 +9701,8 @@ (define_insn "negdf2_ifs" [(set (match_operand:DF 0 "nonimmediate_operand" "=Y#fr,Y#fr,f#Yr,rm#Yf") - (neg:DF (match_operand:DF 1 "nonimmediate_operand" "0,Y#fr,0,0"))) - (use (match_operand:DF 2 "nonmemory_operand" "Y,0,*g#Y,*g#Y")) + (neg:DF (match_operand:DF 1 "nonimmediate_operand" "0,Ym#fr,0,0"))) + (use (match_operand:V2DF 2 "nonimmediate_operand" "Ym,0,*Y*rm,*Y*rm")) (clobber (reg:CC 17))] "!TARGET_64BIT && TARGET_SSE2 && (reload_in_progress || reload_completed @@ -9439,8 +9712,8 @@ (define_insn "*negdf2_ifs_rex64" [(set (match_operand:DF 0 "nonimmediate_operand" "=Y#f,Y#f,fm#Y") - (neg:DF (match_operand:DF 1 "nonimmediate_operand" "0,Y#f,0"))) - (use (match_operand:DF 2 "general_operand" "Y,0,*g#Y*r")) + (neg:DF (match_operand:DF 1 "nonimmediate_operand" "0,Y#fr,0"))) + (use (match_operand:V2DF 2 "nonimmediate_operand" "Ym,0,*Y*rm")) (clobber (reg:CC 17))] "TARGET_64BIT && TARGET_SSE2 && (reload_in_progress || reload_completed @@ -9451,7 +9724,7 @@ (define_split [(set (match_operand:DF 0 "memory_operand" "") (neg:DF (match_operand:DF 1 "memory_operand" ""))) - (use (match_operand:DF 2 "" "")) + (use (match_operand:V2DF 2 "" "")) (clobber (reg:CC 17))] "" [(parallel [(set (match_dup 0) @@ -9461,7 +9734,7 @@ (define_split [(set (match_operand:DF 0 "register_operand" "") (neg:DF (match_operand:DF 1 "register_operand" ""))) - (use (match_operand:DF 2 "" "")) + (use (match_operand:V2DF 2 "" "")) (clobber (reg:CC 17))] "reload_completed && !SSE_REG_P (operands[0]) && (!TARGET_64BIT || FP_REG_P (operands[0]))" @@ -9472,7 +9745,7 @@ (define_split [(set (match_operand:DF 0 "register_operand" "") (neg:DF (match_operand:DF 1 "register_operand" ""))) - (use (match_operand:DF 2 "" "")) + (use (match_operand:V2DF 2 "" "")) (clobber (reg:CC 17))] "TARGET_64BIT && reload_completed && GENERAL_REG_P (operands[0])" [(parallel [(set (match_dup 0) @@ -9484,14 +9757,18 @@ (define_split [(set (match_operand:DF 0 "register_operand" "") - (neg:DF (match_operand:DF 1 "register_operand" ""))) - (use (match_operand:DF 2 "register_operand" "")) + (neg:DF (match_operand:DF 1 "nonimmediate_operand" ""))) + (use (match_operand:V2DF 2 "nonimmediate_operand" "")) (clobber (reg:CC 17))] "reload_completed && SSE_REG_P (operands[0])" [(set (subreg:TI (match_dup 0) 0) (xor:TI (subreg:TI (match_dup 1) 0) (subreg:TI (match_dup 2) 0)))] { + operands[0] = simplify_gen_subreg (V2DFmode, operands[0], DFmode, 0); + /* Avoid possible reformating on the operands. */ + if (TARGET_SSE_PARTIAL_REGS && !optimize_size) + emit_insn (gen_sse2_unpcklpd (operands[0], operands[0], operands[0])); if (operands_match_p (operands[0], operands[2])) { rtx tmp; @@ -9724,14 +10001,18 @@ { /* Using SSE is tricky, since we need bitwise negation of -0 in register. */ - rtx reg = gen_reg_rtx (SFmode); + rtx reg = gen_reg_rtx (V4SFmode); rtx dest = operands[0]; + rtx imm; operands[1] = force_reg (SFmode, operands[1]); operands[0] = force_reg (SFmode, operands[0]); - emit_move_insn (reg, - gen_lowpart (SFmode, - gen_int_mode (0x80000000, SImode))); + imm = gen_lowpart (SFmode, gen_int_mode(~0x80000000, SImode)); + reg = force_reg (V4SFmode, + gen_rtx_CONST_VECTOR (V4SFmode, + gen_rtvec (4, imm, CONST0_RTX (SFmode), + CONST0_RTX (SFmode), + CONST0_RTX (SFmode)))); emit_insn (gen_abssf2_ifs (operands[0], operands[1], reg)); if (dest != operands[0]) emit_move_insn (dest, operands[0]); @@ -9748,9 +10029,9 @@ "#") (define_insn "abssf2_ifs" - [(set (match_operand:SF 0 "nonimmediate_operand" "=x#fr,f#xr,rm#xf") - (abs:SF (match_operand:SF 1 "nonimmediate_operand" "x,0,0"))) - (use (match_operand:SF 2 "nonmemory_operand" "*0#x,*g#x,*g#x")) + [(set (match_operand:SF 0 "nonimmediate_operand" "=x#fr,x#fr,f#xr,rm#xf") + (abs:SF (match_operand:SF 1 "nonimmediate_operand" "0,x,0,0"))) + (use (match_operand:V4SF 2 "nonimmediate_operand" "xm,0,x*rm,x*rm")) (clobber (reg:CC 17))] "TARGET_SSE && (reload_in_progress || reload_completed @@ -9761,7 +10042,7 @@ (define_split [(set (match_operand:SF 0 "memory_operand" "") (abs:SF (match_operand:SF 1 "memory_operand" ""))) - (use (match_operand:SF 2 "" "")) + (use (match_operand:V4SF 2 "" "")) (clobber (reg:CC 17))] "" [(parallel [(set (match_dup 0) @@ -9771,7 +10052,7 @@ (define_split [(set (match_operand:SF 0 "register_operand" "") (abs:SF (match_operand:SF 1 "register_operand" ""))) - (use (match_operand:SF 2 "" "")) + (use (match_operand:V4SF 2 "" "")) (clobber (reg:CC 17))] "reload_completed && !SSE_REG_P (operands[0])" [(parallel [(set (match_dup 0) @@ -9780,13 +10061,22 @@ (define_split [(set (match_operand:SF 0 "register_operand" "") - (abs:SF (match_operand:SF 1 "register_operand" ""))) - (use (match_operand:SF 2 "register_operand" "")) + (abs:SF (match_operand:SF 1 "nonimmediate_operand" ""))) + (use (match_operand:V4SF 2 "nonimmediate_operand" "")) (clobber (reg:CC 17))] "reload_completed && SSE_REG_P (operands[0])" [(set (subreg:TI (match_dup 0) 0) - (and:TI (not:TI (subreg:TI (match_dup 2) 0)) - (subreg:TI (match_dup 1) 0)))]) + (and:TI (subreg:TI (match_dup 1) 0) + (subreg:TI (match_dup 2) 0)))] +{ + if (operands_match_p (operands[0], operands[2])) + { + rtx tmp; + tmp = operands[1]; + operands[1] = operands[2]; + operands[2] = tmp; + } +}) ;; Keep 'f' and 'r' in separate alternatives to avoid reload problems ;; because of secondary memory needed to reload from class FLOAT_INT_REGS @@ -9849,17 +10139,22 @@ { /* Using SSE is tricky, since we need bitwise negation of -0 in register. */ - rtx reg = gen_reg_rtx (DFmode); + rtx reg = gen_reg_rtx (V2DFmode); #if HOST_BITS_PER_WIDE_INT >= 64 - rtx imm = gen_int_mode (((HOST_WIDE_INT)1) << 63, DImode); + rtx imm = gen_int_mode (~(((HOST_WIDE_INT)1) << 63), DImode); #else - rtx imm = immed_double_const (0, 0x80000000, DImode); + rtx imm = immed_double_const (~0, ~0x80000000, DImode); #endif rtx dest = operands[0]; operands[1] = force_reg (DFmode, operands[1]); operands[0] = force_reg (DFmode, operands[0]); - emit_move_insn (reg, gen_lowpart (DFmode, imm)); + + /* Produce LONG_DOUBLE with the proper immediate argument. */ + imm = gen_lowpart (DFmode, imm); + reg = force_reg (V2DFmode, + gen_rtx_CONST_VECTOR (V2DFmode, + gen_rtvec (2, imm, CONST0_RTX (DFmode)))); emit_insn (gen_absdf2_ifs (operands[0], operands[1], reg)); if (dest != operands[0]) emit_move_insn (dest, operands[0]); @@ -9876,9 +10171,9 @@ "#") (define_insn "absdf2_ifs" - [(set (match_operand:DF 0 "nonimmediate_operand" "=Y#fr,mf#Yr,mr#Yf") - (abs:DF (match_operand:DF 1 "nonimmediate_operand" "Y,0,0"))) - (use (match_operand:DF 2 "nonmemory_operand" "*0#Y,*g#Y,*g#Y")) + [(set (match_operand:DF 0 "nonimmediate_operand" "=Y#fr,Y#fr,mf#Yr,mr#Yf") + (abs:DF (match_operand:DF 1 "nonimmediate_operand" "0,Y,0,0"))) + (use (match_operand:V2DF 2 "nonimmediate_operand" "Ym,0,Y*rm,Y*rm")) (clobber (reg:CC 17))] "!TARGET_64BIT && TARGET_SSE2 && (reload_in_progress || reload_completed @@ -9887,9 +10182,9 @@ "#") (define_insn "*absdf2_ifs_rex64" - [(set (match_operand:DF 0 "nonimmediate_operand" "=Y#fr,mf#Yr") - (abs:DF (match_operand:DF 1 "nonimmediate_operand" "Y,0"))) - (use (match_operand:DF 2 "nonmemory_operand" "*0#Y,*g#Y")) + [(set (match_operand:DF 0 "nonimmediate_operand" "=Y#fr,Y#fr,mf#Yr") + (abs:DF (match_operand:DF 1 "nonimmediate_operand" "0,Ym,0"))) + (use (match_operand:V2DF 2 "nonimmediate_operand" "Ym,*0,*Y*rm")) (clobber (reg:CC 17))] "TARGET_64BIT && TARGET_SSE2 && (reload_in_progress || reload_completed @@ -9900,7 +10195,7 @@ (define_split [(set (match_operand:DF 0 "memory_operand" "") (abs:DF (match_operand:DF 1 "memory_operand" ""))) - (use (match_operand:DF 2 "" "")) + (use (match_operand:V2DF 2 "" "")) (clobber (reg:CC 17))] "" [(parallel [(set (match_dup 0) @@ -9910,7 +10205,7 @@ (define_split [(set (match_operand:DF 0 "register_operand" "") (abs:DF (match_operand:DF 1 "register_operand" ""))) - (use (match_operand:DF 2 "" "")) + (use (match_operand:V2DF 2 "" "")) (clobber (reg:CC 17))] "reload_completed && !SSE_REG_P (operands[0])" [(parallel [(set (match_dup 0) @@ -9919,13 +10214,26 @@ (define_split [(set (match_operand:DF 0 "register_operand" "") - (abs:DF (match_operand:DF 1 "register_operand" ""))) - (use (match_operand:DF 2 "register_operand" "")) + (abs:DF (match_operand:DF 1 "nonimmediate_operand" ""))) + (use (match_operand:V2DF 2 "nonimmediate_operand" "")) (clobber (reg:CC 17))] "reload_completed && SSE_REG_P (operands[0])" [(set (subreg:TI (match_dup 0) 0) - (and:TI (not:TI (subreg:TI (match_dup 2) 0)) - (subreg:TI (match_dup 1) 0)))]) + (and:TI (subreg:TI (match_dup 1) 0) + (subreg:TI (match_dup 2) 0)))] +{ + operands[0] = simplify_gen_subreg (V2DFmode, operands[0], DFmode, 0); + /* Avoid possible reformating on the operands. */ + if (TARGET_SSE_PARTIAL_REGS && !optimize_size) + emit_insn (gen_sse2_unpcklpd (operands[0], operands[0], operands[0])); + if (operands_match_p (operands[0], operands[2])) + { + rtx tmp; + tmp = operands[1]; + operands[1] = operands[2]; + operands[2] = tmp; + } +}) ;; Keep 'f' and 'r' in separate alternatives to avoid reload problems @@ -16547,6 +16855,12 @@ (clobber (reg:CC 17))] "TARGET_SSE && (GET_CODE (operands[2]) != MEM || GET_CODE (operands[3]) != MEM) + /* Avoid combine from being smart and converting min/max + instruction patterns into conditional moves. */ + && ((GET_CODE (operands[1]) != LT && GET_CODE (operands[1]) != GT + && GET_CODE (operands[1]) != UNLE && GET_CODE (operands[1]) != UNGE) + || !rtx_equal_p (operands[4], operands[2]) + || !rtx_equal_p (operands[5], operands[3])) && (!TARGET_IEEE_FP || (GET_CODE (operands[1]) != EQ && GET_CODE (operands[1]) != NE))" "#") @@ -16574,6 +16888,12 @@ (clobber (reg:CC 17))] "TARGET_SSE2 && (GET_CODE (operands[2]) != MEM || GET_CODE (operands[3]) != MEM) + /* Avoid combine from being smart and converting min/max + instruction patterns into conditional moves. */ + && ((GET_CODE (operands[1]) != LT && GET_CODE (operands[1]) != GT + && GET_CODE (operands[1]) != UNLE && GET_CODE (operands[1]) != UNGE) + || !rtx_equal_p (operands[4], operands[2]) + || !rtx_equal_p (operands[5], operands[3])) && (!TARGET_IEEE_FP || (GET_CODE (operands[1]) != EQ && GET_CODE (operands[1]) != NE))" "#") @@ -16635,6 +16955,14 @@ (set (subreg:TI (match_dup 0) 0) (ior:TI (subreg:TI (match_dup 6) 0) (subreg:TI (match_dup 7) 0)))] { + if (GET_MODE (operands[2]) == DFmode + && TARGET_SSE_PARTIAL_REGS && !optimize_size) + { + rtx op = gen_rtx_SUBREG (V2DFmode, operands[2], 0); + emit_insn (gen_sse2_unpcklpd (op, op, op)); + op = gen_rtx_SUBREG (V2DFmode, operands[3], 0); + emit_insn (gen_sse2_unpcklpd (op, op, op)); + } /* If op2 == op3, op3 will be clobbered before it is used. This should be optimized out though. */ if (operands_match_p (operands[2], operands[3])) @@ -16743,6 +17071,20 @@ (set (subreg:TI (match_dup 0) 0) (and:TI (match_dup 6) (subreg:TI (match_dup 7) 0)))] { + if (TARGET_SSE_PARTIAL_REGS && !optimize_size + && GET_MODE (operands[2]) == DFmode) + { + if (REG_P (operands[2])) + { + rtx op = gen_rtx_SUBREG (V2DFmode, operands[2], 0); + emit_insn (gen_sse2_unpcklpd (op, op, op)); + } + if (REG_P (operands[3])) + { + rtx op = gen_rtx_SUBREG (V2DFmode, operands[3], 0); + emit_insn (gen_sse2_unpcklpd (op, op, op)); + } + } PUT_MODE (operands[1], GET_MODE (operands[0])); if (!sse_comparison_operator (operands[1], VOIDmode)) { @@ -17849,28 +18191,93 @@ [(set (match_operand:V4SF 0 "nonimmediate_operand" "=x,m") (match_operand:V4SF 1 "nonimmediate_operand" "xm,x"))] "TARGET_SSE" - ;; @@@ let's try to use movaps here. "movaps\t{%1, %0|%0, %1}" [(set_attr "type" "ssemov") (set_attr "mode" "V4SF")]) +(define_split + [(set (match_operand:V4SF 0 "register_operand" "") + (match_operand:V4SF 1 "zero_extended_scalar_load_operand" ""))] + "TARGET_SSE" + [(set (match_dup 0) + (vec_merge:V4SF + (vec_duplicate:V4SF (match_dup 1)) + (match_dup 2) + (const_int 1)))] +{ + operands[1] = simplify_gen_subreg (SFmode, operands[1], V4SFmode, 0); + operands[2] = CONST0_RTX (V4SFmode); +}) + (define_insn "movv4si_internal" [(set (match_operand:V4SI 0 "nonimmediate_operand" "=x,m") (match_operand:V4SI 1 "nonimmediate_operand" "xm,x"))] "TARGET_SSE" - ;; @@@ let's try to use movaps here. - "movaps\t{%1, %0|%0, %1}" +{ + if (get_attr_mode (insn) == MODE_V4SF) + return "movaps\t{%1, %0|%0, %1}"; + else + return "movdqa\t{%1, %0|%0, %1}"; +} [(set_attr "type" "ssemov") - (set_attr "mode" "V4SF")]) + (set (attr "mode") + (cond [(eq_attr "alternative" "0") + (if_then_else + (ne (symbol_ref "optimize_size") + (const_int 0)) + (const_string "V4SF") + (const_string "TI")) + (eq_attr "alternative" "1") + (if_then_else + (ior (ne (symbol_ref "TARGET_SSE_TYPELESS_STORES") + (const_int 0)) + (ne (symbol_ref "optimize_size") + (const_int 0))) + (const_string "V4SF") + (const_string "TI"))] + (const_string "TI")))]) (define_insn "movv2di_internal" [(set (match_operand:V2DI 0 "nonimmediate_operand" "=x,m") (match_operand:V2DI 1 "nonimmediate_operand" "xm,x"))] - "TARGET_SSE" - ;; @@@ let's try to use movaps here. - "movdqa\t{%1, %0|%0, %1}" + "TARGET_SSE2" +{ + if (get_attr_mode (insn) == MODE_V4SF) + return "movaps\t{%1, %0|%0, %1}"; + else + return "movdqa\t{%1, %0|%0, %1}"; +} [(set_attr "type" "ssemov") - (set_attr "mode" "V4SF")]) + (set (attr "mode") + (cond [(eq_attr "alternative" "0") + (if_then_else + (ne (symbol_ref "optimize_size") + (const_int 0)) + (const_string "V4SF") + (const_string "TI")) + (eq_attr "alternative" "1") + (if_then_else + (ior (ne (symbol_ref "TARGET_SSE_TYPELESS_STORES") + (const_int 0)) + (ne (symbol_ref "optimize_size") + (const_int 0))) + (const_string "V4SF") + (const_string "TI"))] + (const_string "TI")))]) + +(define_split + [(set (match_operand:V2DF 0 "register_operand" "") + (match_operand:V2DF 1 "zero_extended_scalar_load_operand" ""))] + "TARGET_SSE2" + [(set (match_dup 0) + (vec_merge:V2DF + (vec_duplicate:V2DF (match_dup 1)) + (match_dup 2) + (const_int 1)))] +{ + operands[1] = simplify_gen_subreg (DFmode, operands[1], V2DFmode, 0); + operands[2] = CONST0_RTX (V2DFmode); +}) (define_insn "movv8qi_internal" [(set (match_operand:V8QI 0 "nonimmediate_operand" "=y,m") @@ -17920,28 +18327,85 @@ [(set (match_operand:V2DF 0 "nonimmediate_operand" "=x,m") (match_operand:V2DF 1 "nonimmediate_operand" "xm,x"))] "TARGET_SSE2" - ;; @@@ let's try to use movaps here. - "movapd\t{%1, %0|%0, %1}" +{ + if (get_attr_mode (insn) == MODE_V4SF) + return "movaps\t{%1, %0|%0, %1}"; + else + return "movapd\t{%1, %0|%0, %1}"; +} [(set_attr "type" "ssemov") - (set_attr "mode" "V2DF")]) + (set (attr "mode") + (cond [(eq_attr "alternative" "0") + (if_then_else + (ne (symbol_ref "optimize_size") + (const_int 0)) + (const_string "V4SF") + (const_string "V2DF")) + (eq_attr "alternative" "1") + (if_then_else + (ior (ne (symbol_ref "TARGET_SSE_TYPELESS_STORES") + (const_int 0)) + (ne (symbol_ref "optimize_size") + (const_int 0))) + (const_string "V4SF") + (const_string "V2DF"))] + (const_string "V2DF")))]) (define_insn "movv8hi_internal" [(set (match_operand:V8HI 0 "nonimmediate_operand" "=x,m") (match_operand:V8HI 1 "nonimmediate_operand" "xm,x"))] "TARGET_SSE2" - ;; @@@ let's try to use movaps here. - "movaps\t{%1, %0|%0, %1}" +{ + if (get_attr_mode (insn) == MODE_V4SF) + return "movaps\t{%1, %0|%0, %1}"; + else + return "movdqa\t{%1, %0|%0, %1}"; +} [(set_attr "type" "ssemov") - (set_attr "mode" "V4SF")]) + (set (attr "mode") + (cond [(eq_attr "alternative" "0") + (if_then_else + (ne (symbol_ref "optimize_size") + (const_int 0)) + (const_string "V4SF") + (const_string "TI")) + (eq_attr "alternative" "1") + (if_then_else + (ior (ne (symbol_ref "TARGET_SSE_TYPELESS_STORES") + (const_int 0)) + (ne (symbol_ref "optimize_size") + (const_int 0))) + (const_string "V4SF") + (const_string "TI"))] + (const_string "TI")))]) (define_insn "movv16qi_internal" [(set (match_operand:V16QI 0 "nonimmediate_operand" "=x,m") (match_operand:V16QI 1 "nonimmediate_operand" "xm,x"))] "TARGET_SSE2" - ;; @@@ let's try to use movaps here. - "movaps\t{%1, %0|%0, %1}" +{ + if (get_attr_mode (insn) == MODE_V4SF) + return "movaps\t{%1, %0|%0, %1}"; + else + return "movdqa\t{%1, %0|%0, %1}"; +} [(set_attr "type" "ssemov") - (set_attr "mode" "V4SF")]) + (set (attr "mode") + (cond [(eq_attr "alternative" "0") + (if_then_else + (ne (symbol_ref "optimize_size") + (const_int 0)) + (const_string "V4SF") + (const_string "TI")) + (eq_attr "alternative" "1") + (if_then_else + (ior (ne (symbol_ref "TARGET_SSE_TYPELESS_STORES") + (const_int 0)) + (ne (symbol_ref "optimize_size") + (const_int 0))) + (const_string "V4SF") + (const_string "TI"))] + (const_string "TI")))]) (define_expand "movv2df" [(set (match_operand:V2DF 0 "general_operand" "") @@ -18158,26 +18622,83 @@ [(set (match_operand:TI 0 "nonimmediate_operand" "=x,x,m") (match_operand:TI 1 "general_operand" "C,xm,x"))] "TARGET_SSE && !TARGET_64BIT" - "@ - xorps\t%0, %0 - movaps\t{%1, %0|%0, %1} - movaps\t{%1, %0|%0, %1}" +{ + switch (which_alternative) + { + case 0: + if (get_attr_mode (insn) == MODE_V4SF) + return "xorps\t%0, %0"; + else + return "pxor\t%0, %0"; + case 1: + case 2: + if (get_attr_mode (insn) == MODE_V4SF) + return "movaps\t{%1, %0|%0, %1}"; + else + return "movdqa\t{%1, %0|%0, %1}"; + default: + abort (); + } +} [(set_attr "type" "ssemov,ssemov,ssemov") - (set_attr "mode" "V4SF")]) + (set (attr "mode") + (cond [(eq_attr "alternative" "0,1") + (if_then_else + (ne (symbol_ref "optimize_size") + (const_int 0)) + (const_string "V4SF") + (const_string "TI")) + (eq_attr "alternative" "2") + (if_then_else + (ne (symbol_ref "optimize_size") + (const_int 0)) + (const_string "V4SF") + (const_string "TI"))] + (const_string "TI")))]) (define_insn "*movti_rex64" - [(set (match_operand:TI 0 "nonimmediate_operand" "=r,o,x,mx,x") - (match_operand:TI 1 "general_operand" "riFo,riF,O,x,m"))] + [(set (match_operand:TI 0 "nonimmediate_operand" "=r,o,x,x,xm") + (match_operand:TI 1 "general_operand" "riFo,riF,O,xm,x"))] "TARGET_64BIT && (GET_CODE (operands[0]) != MEM || GET_CODE (operands[1]) != MEM)" - "@ - # - # - xorps\t%0, %0 - movaps\\t{%1, %0|%0, %1} - movaps\\t{%1, %0|%0, %1}" +{ + switch (which_alternative) + { + case 0: + case 1: + return "#"; + case 2: + if (get_attr_mode (insn) == MODE_V4SF) + return "xorps\t%0, %0"; + else + return "pxor\t%0, %0"; + case 3: + case 4: + if (get_attr_mode (insn) == MODE_V4SF) + return "movaps\t{%1, %0|%0, %1}"; + else + return "movdqa\t{%1, %0|%0, %1}"; + default: + abort (); + } +} [(set_attr "type" "*,*,ssemov,ssemov,ssemov") - (set_attr "mode" "V4SF")]) + (set (attr "mode") + (cond [(eq_attr "alternative" "2,3") + (if_then_else + (ne (symbol_ref "optimize_size") + (const_int 0)) + (const_string "V4SF") + (const_string "TI")) + (eq_attr "alternative" "4") + (if_then_else + (ior (ne (symbol_ref "TARGET_SSE_TYPELESS_STORES") + (const_int 0)) + (ne (symbol_ref "optimize_size") + (const_int 0))) + (const_string "V4SF") + (const_string "TI"))] + (const_string "DI")))]) (define_split [(set (match_operand:TI 0 "nonimmediate_operand" "") @@ -18327,11 +18848,21 @@ [(set_attr "type" "ssecvt") (set_attr "mode" "V4SF")]) -(define_insn "sse_loadss" +(define_expand "sse_loadss" + [(match_operand:V4SF 0 "register_operand" "") + (match_operand:SF 1 "memory_operand" "")] + "TARGET_SSE" +{ + emit_insn (gen_sse_loadss_1 (operands[0], operands[1], + CONST0_RTX (V4SFmode))); + DONE; +}) + +(define_insn "sse_loadss_1" [(set (match_operand:V4SF 0 "register_operand" "=x") (vec_merge:V4SF - (match_operand:V4SF 1 "memory_operand" "m") - (vec_duplicate:V4SF (float:SF (const_int 0))) + (vec_duplicate:V4SF (match_operand:SF 1 "memory_operand" "m")) + (match_operand:V4SF 2 "const0_operand" "X") (const_int 1)))] "TARGET_SSE" "movss\t{%1, %0|%0, %1}" @@ -18854,12 +19385,26 @@ ;; this insn. (define_insn "sse_clrv4sf" [(set (match_operand:V4SF 0 "register_operand" "=x") - (unspec:V4SF [(const_int 0)] UNSPEC_NOP))] + (match_operand:V4SF 1 "const0_operand" "X"))] "TARGET_SSE" - "xorps\t{%0, %0|%0, %0}" +{ + if (get_attr_mode (insn) == MODE_TI) + return "pxor\t{%0, %0|%0, %0}"; + else + return "xorps\t{%0, %0|%0, %0}"; +} [(set_attr "type" "sselog") (set_attr "memory" "none") - (set_attr "mode" "V4SF")]) + (set (attr "mode") + (if_then_else + (and (and (ne (symbol_ref "TARGET_SSE_LOAD0_BY_PXOR") + (const_int 0)) + (ne (symbol_ref "TARGET_SSE2") + (const_int 0))) + (eq (symbol_ref "optimize_size") + (const_int 0))) + (const_string "TI") + (const_string "V4SF")))]) ;; Use xor, but don't show input operands so they aren't live before ;; this insn. @@ -19091,6 +19636,18 @@ [(set_attr "type" "ssecvt") (set_attr "mode" "SF")]) +(define_insn "cvtsi2ssq" + [(set (match_operand:V4SF 0 "register_operand" "=x") + (vec_merge:V4SF + (match_operand:V4SF 1 "register_operand" "0") + (vec_duplicate:V4SF + (float:SF (match_operand:DI 2 "nonimmediate_operand" "rm"))) + (const_int 14)))] + "TARGET_SSE && TARGET_64BIT" + "cvtsi2ssq\t{%2, %0|%0, %2}" + [(set_attr "type" "ssecvt") + (set_attr "mode" "SF")]) + (define_insn "cvtss2si" [(set (match_operand:SI 0 "register_operand" "=r") (vec_select:SI @@ -20718,7 +21275,7 @@ (vec_merge:V4SF (match_operand:V4SF 1 "register_operand" "0") (vec_duplicate:V4SF (float_truncate:V2SF - (match_operand:V2DF 2 "register_operand" "xm"))) + (match_operand:V2DF 2 "nonimmediate_operand" "xm"))) (const_int 14)))] "TARGET_SSE2" "cvtsd2ss\t{%2, %0|%0, %2}" @@ -20730,7 +21287,7 @@ (vec_merge:V2DF (match_operand:V2DF 1 "register_operand" "0") (float_extend:V2DF (vec_select:V2SF - (match_operand:V4SF 2 "register_operand" "xm") + (match_operand:V4SF 2 "nonimmediate_operand" "xm") (parallel [(const_int 0) (const_int 1)]))) (const_int 2)))] @@ -21006,10 +21563,20 @@ (define_insn "sse2_clrti" [(set (match_operand:TI 0 "register_operand" "=x") (const_int 0))] "TARGET_SSE2" - "pxor\t{%0, %0|%0, %0}" - [(set_attr "type" "sseiadd") +{ + if (get_attr_mode (insn) == MODE_TI) + return "pxor\t%0, %0"; + else + return "xorps\t%0, %0"; +} + [(set_attr "type" "ssemov") (set_attr "memory" "none") - (set_attr "mode" "TI")]) + (set (attr "mode") + (if_then_else + (ne (symbol_ref "optimize_size") + (const_int 0)) + (const_string "V4SF") + (const_string "TI")))]) ;; MMX unsigned averages/sum of absolute differences @@ -21714,11 +22281,21 @@ [(set_attr "type" "ssecvt") (set_attr "mode" "V2DF")]) -(define_insn "sse2_loadsd" +(define_expand "sse2_loadsd" + [(match_operand:V2DF 0 "register_operand" "") + (match_operand:DF 1 "memory_operand" "")] + "TARGET_SSE2" +{ + emit_insn (gen_sse2_loadsd_1 (operands[0], operands[1], + CONST0_RTX (V2DFmode))); + DONE; +}) + +(define_insn "sse2_loadsd_1" [(set (match_operand:V2DF 0 "register_operand" "=x") (vec_merge:V2DF - (match_operand:DF 1 "memory_operand" "m") - (vec_duplicate:DF (float:DF (const_int 0))) + (vec_duplicate:V2DF (match_operand:DF 1 "memory_operand" "m")) + (match_operand:V2DF 2 "const0_operand" "X") (const_int 1)))] "TARGET_SSE2" "movsd\t{%1, %0|%0, %1}" |