aboutsummaryrefslogtreecommitdiff
path: root/gcc/config/i386/i386.md
diff options
context:
space:
mode:
Diffstat (limited to 'gcc/config/i386/i386.md')
-rw-r--r--gcc/config/i386/i386.md821
1 files changed, 699 insertions, 122 deletions
diff --git a/gcc/config/i386/i386.md b/gcc/config/i386/i386.md
index 4596cee7f9e..4ee6b71d382 100644
--- a/gcc/config/i386/i386.md
+++ b/gcc/config/i386/i386.md
@@ -2133,12 +2133,12 @@
case 4:
return "mov{l}\t{%1, %0|%0, %1}";
case 5:
- if (TARGET_SSE2 && !TARGET_ATHLON)
+ if (get_attr_mode (insn) == MODE_TI)
return "pxor\t%0, %0";
else
return "xorps\t%0, %0";
case 6:
- if (TARGET_PARTIAL_REG_DEPENDENCY)
+ if (get_attr_mode (insn) == MODE_V4SF)
return "movaps\t{%1, %0|%0, %1}";
else
return "movss\t{%1, %0|%0, %1}";
@@ -2158,7 +2158,40 @@
}
}
[(set_attr "type" "fmov,fmov,fmov,imov,imov,ssemov,ssemov,ssemov,ssemov,mmxmov,mmxmov,mmxmov")
- (set_attr "mode" "SF,SF,SF,SI,SI,TI,SF,SF,SF,SI,SI,DI")])
+ (set (attr "mode")
+ (cond [(eq_attr "alternative" "3,4,9,10")
+ (const_string "SI")
+ (eq_attr "alternative" "5")
+ (if_then_else
+ (and (and (ne (symbol_ref "TARGET_SSE_LOAD0_BY_PXOR")
+ (const_int 0))
+ (ne (symbol_ref "TARGET_SSE2")
+ (const_int 0)))
+ (eq (symbol_ref "optimize_size")
+ (const_int 0)))
+ (const_string "TI")
+ (const_string "V4SF"))
+ /* For architectures resolving dependencies on
+ whole SSE registers use APS move to break dependency
+ chains, otherwise use short move to avoid extra work.
+
+ Do the same for architectures resolving dependencies on
+ the parts. While in DF mode it is better to always handle
+ just register parts, the SF mode is different due to lack
+ of instructions to load just part of the register. It is
+ better to maintain the whole registers in single format
+ to avoid problems on using packed logical operations. */
+ (eq_attr "alternative" "6")
+ (if_then_else
+ (ior (ne (symbol_ref "TARGET_SSE_PARTIAL_REG_DEPENDENCY")
+ (const_int 0))
+ (ne (symbol_ref "TARGET_SSE_PARTIAL_REGS")
+ (const_int 0)))
+ (const_string "V4SF")
+ (const_string "SF"))
+ (eq_attr "alternative" "11")
+ (const_string "DI")]
+ (const_string "SF")))])
(define_insn "*swapsf"
[(set (match_operand:SF 0 "register_operand" "+f")
@@ -2319,25 +2352,78 @@
case 4:
return "#";
case 5:
- if (TARGET_ATHLON)
- return "xorpd\t%0, %0";
- else
- return "pxor\t%0, %0";
+ switch (get_attr_mode (insn))
+ {
+ case MODE_V4SF:
+ return "xorps\t%0, %0";
+ case MODE_V2DF:
+ return "xorpd\t%0, %0";
+ case MODE_TI:
+ return "pxor\t%0, %0";
+ default:
+ abort ();
+ }
case 6:
- if (TARGET_PARTIAL_REG_DEPENDENCY)
- return "movapd\t{%1, %0|%0, %1}";
+ switch (get_attr_mode (insn))
+ {
+ case MODE_V4SF:
+ return "movaps\t{%1, %0|%0, %1}";
+ case MODE_V2DF:
+ return "movapd\t{%1, %0|%0, %1}";
+ case MODE_DF:
+ return "movsd\t{%1, %0|%0, %1}";
+ default:
+ abort ();
+ }
+ case 7:
+ if (get_attr_mode (insn) == MODE_V2DF)
+ return "movlpd\t{%1, %0|%0, %1}";
else
return "movsd\t{%1, %0|%0, %1}";
- case 7:
case 8:
- return "movsd\t{%1, %0|%0, %1}";
+ return "movsd\t{%1, %0|%0, %1}";
default:
abort();
}
}
[(set_attr "type" "fmov,fmov,fmov,multi,multi,ssemov,ssemov,ssemov,ssemov")
- (set_attr "mode" "DF,DF,DF,SI,SI,TI,DF,DF,DF")])
+ (set (attr "mode")
+ (cond [(eq_attr "alternative" "3,4")
+ (const_string "SI")
+ /* xorps is one byte shorter. */
+ (eq_attr "alternative" "5")
+ (cond [(ne (symbol_ref "optimize_size")
+ (const_int 0))
+ (const_string "V4SF")
+ (ne (symbol_ref "TARGET_SSE_LOAD0_BY_PXOR")
+ (const_int 0))
+ (const_string "TI")]
+ (const_string "V2DF"))
+ /* For architectures resolving dependencies on
+ whole SSE registers use APD move to break dependency
+ chains, otherwise use short move to avoid extra work.
+
+ movaps encodes one byte shorter. */
+ (eq_attr "alternative" "6")
+ (cond
+ [(ne (symbol_ref "optimize_size")
+ (const_int 0))
+ (const_string "V4SF")
+ (ne (symbol_ref "TARGET_SSE_PARTIAL_REG_DEPENDENCY")
+ (const_int 0))
+ (const_string "V2DF")]
+ (const_string "DF"))
+ /* For achitectures resolving dependencies on register
+ parts we may avoid extra work to zero out upper part
+ of register. */
+ (eq_attr "alternative" "7")
+ (if_then_else
+ (ne (symbol_ref "TARGET_SSE_PARTIAL_REGS")
+ (const_int 0))
+ (const_string "V2DF")
+ (const_string "DF"))]
+ (const_string "DF")))])
(define_insn "*movdf_integer"
[(set (match_operand:DF 0 "nonimmediate_operand" "=f#Yr,m,f#Yr,r#Yf,o,Y#rf,Y#rf,Y#rf,m")
@@ -2381,16 +2467,34 @@
return "#";
case 5:
- if (TARGET_ATHLON)
- return "xorpd\t%0, %0";
- else
- return "pxor\t%0, %0";
+ switch (get_attr_mode (insn))
+ {
+ case MODE_V4SF:
+ return "xorps\t%0, %0";
+ case MODE_V2DF:
+ return "xorpd\t%0, %0";
+ case MODE_TI:
+ return "pxor\t%0, %0";
+ default:
+ abort ();
+ }
case 6:
- if (TARGET_PARTIAL_REG_DEPENDENCY)
- return "movapd\t{%1, %0|%0, %1}";
+ switch (get_attr_mode (insn))
+ {
+ case MODE_V4SF:
+ return "movaps\t{%1, %0|%0, %1}";
+ case MODE_V2DF:
+ return "movapd\t{%1, %0|%0, %1}";
+ case MODE_DF:
+ return "movsd\t{%1, %0|%0, %1}";
+ default:
+ abort ();
+ }
+ case 7:
+ if (get_attr_mode (insn) == MODE_V2DF)
+ return "movlpd\t{%1, %0|%0, %1}";
else
return "movsd\t{%1, %0|%0, %1}";
- case 7:
case 8:
return "movsd\t{%1, %0|%0, %1}";
@@ -2399,7 +2503,42 @@
}
}
[(set_attr "type" "fmov,fmov,fmov,multi,multi,ssemov,ssemov,ssemov,ssemov")
- (set_attr "mode" "DF,DF,DF,SI,SI,TI,DF,DF,DF")])
+ (set (attr "mode")
+ (cond [(eq_attr "alternative" "3,4")
+ (const_string "SI")
+ /* xorps is one byte shorter. */
+ (eq_attr "alternative" "5")
+ (cond [(ne (symbol_ref "optimize_size")
+ (const_int 0))
+ (const_string "V4SF")
+ (ne (symbol_ref "TARGET_SSE_LOAD0_BY_PXOR")
+ (const_int 0))
+ (const_string "TI")]
+ (const_string "V2DF"))
+ /* For architectures resolving dependencies on
+ whole SSE registers use APD move to break dependency
+ chains, otherwise use short move to avoid extra work.
+
+ movaps encodes one byte shorter. */
+ (eq_attr "alternative" "6")
+ (cond
+ [(ne (symbol_ref "optimize_size")
+ (const_int 0))
+ (const_string "V4SF")
+ (ne (symbol_ref "TARGET_SSE_PARTIAL_REG_DEPENDENCY")
+ (const_int 0))
+ (const_string "V2DF")]
+ (const_string "DF"))
+ /* For achitectures resolving dependencies on register
+ parts we may avoid extra work to zero out upper part
+ of register. */
+ (eq_attr "alternative" "7")
+ (if_then_else
+ (ne (symbol_ref "TARGET_SSE_PARTIAL_REGS")
+ (const_int 0))
+ (const_string "V2DF")
+ (const_string "DF"))]
+ (const_string "DF")))])
(define_split
[(set (match_operand:DF 0 "nonimmediate_operand" "")
@@ -3706,7 +3845,7 @@
(float_truncate:SF
(match_operand:DF 1 "nonimmediate_operand" "f,f,f,f,mY")))
(clobber (match_operand:SF 2 "memory_operand" "=X,m,m,m,X"))]
- "TARGET_80387 && TARGET_SSE2"
+ "TARGET_80387 && TARGET_SSE2 && !TARGET_SSE_PARTIAL_REGS"
{
switch (which_alternative)
{
@@ -3716,7 +3855,30 @@
else
return "fst%z0\t%y0";
case 4:
- return "cvtsd2ss\t{%1, %0|%0, %1}";
+ return "#";
+ default:
+ abort ();
+ }
+}
+ [(set_attr "type" "fmov,multi,multi,multi,ssecvt")
+ (set_attr "mode" "SF,SF,SF,SF,DF")])
+
+(define_insn "*truncdfsf2_1_sse_nooverlap"
+ [(set (match_operand:SF 0 "nonimmediate_operand" "=*!m,?f#rx,?r#fx,?x#rf,&Y")
+ (float_truncate:SF
+ (match_operand:DF 1 "nonimmediate_operand" "f,f,f,f,mY")))
+ (clobber (match_operand:SF 2 "memory_operand" "=X,m,m,m,X"))]
+ "TARGET_80387 && TARGET_SSE2 && TARGET_SSE_PARTIAL_REGS"
+{
+ switch (which_alternative)
+ {
+ case 0:
+ if (find_regno_note (insn, REG_DEAD, REGNO (operands[1])))
+ return "fstp%z0\t%y0";
+ else
+ return "fst%z0\t%y0";
+ case 4:
+ return "#";
default:
abort ();
}
@@ -3728,7 +3890,7 @@
[(set (match_operand:SF 0 "nonimmediate_operand" "=Y,!m")
(float_truncate:SF
(match_operand:DF 1 "nonimmediate_operand" "mY,f")))]
- "TARGET_80387 && TARGET_SSE2
+ "TARGET_80387 && TARGET_SSE2 && !TARGET_SSE_PARTIAL_REGS
&& (GET_CODE (operands[0]) != MEM || GET_CODE (operands[1]) != MEM)"
{
switch (which_alternative)
@@ -3747,7 +3909,30 @@
[(set_attr "type" "ssecvt,fmov")
(set_attr "mode" "DF,SF")])
-(define_insn "truncdfsf2_3"
+(define_insn "*truncdfsf2_2_nooverlap"
+ [(set (match_operand:SF 0 "nonimmediate_operand" "=&Y,!m")
+ (float_truncate:SF
+ (match_operand:DF 1 "nonimmediate_operand" "mY,f")))]
+ "TARGET_80387 && TARGET_SSE2 && TARGET_SSE_PARTIAL_REGS
+ && (GET_CODE (operands[0]) != MEM || GET_CODE (operands[1]) != MEM)"
+{
+ switch (which_alternative)
+ {
+ case 0:
+ return "#";
+ case 1:
+ if (find_regno_note (insn, REG_DEAD, REGNO (operands[1])))
+ return "fstp%z0\t%y0";
+ else
+ return "fst%z0\t%y0";
+ default:
+ abort ();
+ }
+}
+ [(set_attr "type" "ssecvt,fmov")
+ (set_attr "mode" "DF,SF")])
+
+(define_insn "*truncdfsf2_3"
[(set (match_operand:SF 0 "memory_operand" "=m")
(float_truncate:SF
(match_operand:DF 1 "register_operand" "f")))]
@@ -3765,11 +3950,20 @@
[(set (match_operand:SF 0 "register_operand" "=Y")
(float_truncate:SF
(match_operand:DF 1 "nonimmediate_operand" "mY")))]
- "!TARGET_80387 && TARGET_SSE2"
+ "!TARGET_80387 && TARGET_SSE2 && !TARGET_SSE_PARTIAL_REGS"
"cvtsd2ss\t{%1, %0|%0, %1}"
[(set_attr "type" "ssecvt")
(set_attr "mode" "DF")])
+(define_insn "*truncdfsf2_sse_only_nooverlap"
+ [(set (match_operand:SF 0 "register_operand" "=&Y")
+ (float_truncate:SF
+ (match_operand:DF 1 "nonimmediate_operand" "mY")))]
+ "!TARGET_80387 && TARGET_SSE2 && TARGET_SSE_PARTIAL_REGS"
+ "#"
+ [(set_attr "type" "ssecvt")
+ (set_attr "mode" "DF")])
+
(define_split
[(set (match_operand:SF 0 "memory_operand" "")
(float_truncate:SF
@@ -3779,15 +3973,56 @@
[(set (match_dup 0) (float_truncate:SF (match_dup 1)))]
"")
+; Avoid possible reformating penalty on the destination by first
+; zeroing it out
(define_split
- [(set (match_operand:SF 0 "nonimmediate_operand" "")
+ [(set (match_operand:SF 0 "register_operand" "")
(float_truncate:SF
(match_operand:DF 1 "nonimmediate_operand" "")))
(clobber (match_operand 2 "" ""))]
+ "TARGET_80387 && reload_completed && TARGET_SSE_PARTIAL_REGS
+ && SSE_REG_P (operands[0])
+ && !STACK_REG_P (operands[1])"
+ [(const_int 0)]
+{
+ rtx src, dest;
+ if (!TARGET_SSE_PARTIAL_REGS)
+ emit_insn (gen_truncdfsf2_sse_only (operands[0], operands[1]));
+ else
+ {
+ dest = simplify_gen_subreg (V4SFmode, operands[0], SFmode, 0);
+ src = simplify_gen_subreg (V2DFmode, operands[1], DFmode, 0);
+ /* simplify_gen_subreg refuses to widen memory references. */
+ if (GET_CODE (src) == SUBREG)
+ alter_subreg (&src);
+ if (reg_overlap_mentioned_p (operands[0], operands[1]))
+ abort ();
+ emit_insn (gen_sse_clrv4sf (dest, CONST0_RTX (V4SFmode)));
+ emit_insn (gen_cvtsd2ss (dest, dest, src));
+ }
+ DONE;
+})
+
+(define_split
+ [(set (match_operand:SF 0 "register_operand" "")
+ (float_truncate:SF
+ (match_operand:DF 1 "nonimmediate_operand" "")))]
"TARGET_80387 && reload_completed
- && !FP_REG_P (operands[0]) && !FP_REG_P (operands[1])"
- [(set (match_dup 0) (float_truncate:SF (match_dup 1)))]
- "")
+ && SSE_REG_P (operands[0]) && TARGET_SSE_PARTIAL_REGS"
+ [(const_int 0)]
+{
+ rtx src, dest;
+ dest = simplify_gen_subreg (V4SFmode, operands[0], SFmode, 0);
+ src = simplify_gen_subreg (V2DFmode, operands[1], DFmode, 0);
+ /* simplify_gen_subreg refuses to widen memory references. */
+ if (GET_CODE (src) == SUBREG)
+ alter_subreg (&src);
+ if (reg_overlap_mentioned_p (operands[0], operands[1]))
+ abort ();
+ emit_insn (gen_sse_clrv4sf (dest, CONST0_RTX (V4SFmode)));
+ emit_insn (gen_cvtsd2ss (dest, dest, src));
+ DONE;
+})
(define_split
[(set (match_operand:SF 0 "register_operand" "")
@@ -4491,6 +4726,22 @@
(set_attr "mode" "SF")
(set_attr "fp_int_src" "true")])
+; Avoid possible reformating penalty on the destination by first
+; zeroing it out
+(define_split
+ [(set (match_operand:SF 0 "register_operand" "")
+ (float:SF (match_operand:SI 1 "nonimmediate_operand" "")))]
+ "TARGET_80387 && reload_completed && TARGET_SSE_PARTIAL_REGS
+ && SSE_REG_P (operands[0])"
+ [(const_int 0)]
+{
+ rtx dest;
+ dest = simplify_gen_subreg (V4SFmode, operands[0], SFmode, 0);
+ emit_insn (gen_sse_clrv4sf (dest, CONST0_RTX (V4SFmode)));
+ emit_insn (gen_cvtsi2ss (dest, dest, operands[1]));
+ DONE;
+})
+
(define_expand "floatdisf2"
[(set (match_operand:SF 0 "register_operand" "")
(float:SF (match_operand:DI 1 "nonimmediate_operand" "")))]
@@ -4529,6 +4780,22 @@
(set_attr "mode" "SF")
(set_attr "fp_int_src" "true")])
+; Avoid possible reformating penalty on the destination by first
+; zeroing it out
+(define_split
+ [(set (match_operand:SF 0 "register_operand" "")
+ (float:SF (match_operand:DI 1 "nonimmediate_operand" "")))]
+ "TARGET_80387 && reload_completed && TARGET_SSE_PARTIAL_REGS
+ && SSE_REG_P (operands[0])"
+ [(const_int 0)]
+{
+ rtx dest;
+ dest = simplify_gen_subreg (V4SFmode, operands[0], SFmode, 0);
+ emit_insn (gen_sse_clrv4sf (dest, CONST0_RTX (V4SFmode)));
+ emit_insn (gen_cvtsi2ssq (dest, dest, operands[1]));
+ DONE;
+})
+
(define_insn "floathidf2"
[(set (match_operand:DF 0 "register_operand" "=f,f")
(float:DF (match_operand:HI 1 "nonimmediate_operand" "m,r")))]
@@ -9266,12 +9533,15 @@
in register. */
rtx reg = gen_reg_rtx (SFmode);
rtx dest = operands[0];
+ rtx imm = gen_lowpart (SFmode, gen_int_mode (0x80000000, SImode));
operands[1] = force_reg (SFmode, operands[1]);
operands[0] = force_reg (SFmode, operands[0]);
- emit_move_insn (reg,
- gen_lowpart (SFmode,
- gen_int_mode (0x80000000, SImode)));
+ reg = force_reg (V4SFmode,
+ gen_rtx_CONST_VECTOR (V4SFmode,
+ gen_rtvec (4, imm, CONST0_RTX (SFmode),
+ CONST0_RTX (SFmode),
+ CONST0_RTX (SFmode))));
emit_insn (gen_negsf2_ifs (operands[0], operands[1], reg));
if (dest != operands[0])
emit_move_insn (dest, operands[0]);
@@ -9289,8 +9559,8 @@
(define_insn "negsf2_ifs"
[(set (match_operand:SF 0 "nonimmediate_operand" "=x#fr,x#fr,f#xr,rm#xf")
- (neg:SF (match_operand:SF 1 "nonimmediate_operand" "0,x#fr,0,0")))
- (use (match_operand:SF 2 "nonmemory_operand" "x,0#x,*g#x,*g#x"))
+ (neg:SF (match_operand:SF 1 "nonimmediate_operand" "0,xm#fr,0,0")))
+ (use (match_operand:V4SF 2 "nonimmediate_operand" "xm,0,*x*rm,*x*rm"))
(clobber (reg:CC 17))]
"TARGET_SSE
&& (reload_in_progress || reload_completed
@@ -9311,7 +9581,7 @@
(define_split
[(set (match_operand:SF 0 "register_operand" "")
(neg:SF (match_operand:SF 1 "register_operand" "")))
- (use (match_operand:SF 2 "" ""))
+ (use (match_operand:V4SF 2 "" ""))
(clobber (reg:CC 17))]
"reload_completed && !SSE_REG_P (operands[0])"
[(parallel [(set (match_dup 0)
@@ -9320,8 +9590,8 @@
(define_split
[(set (match_operand:SF 0 "register_operand" "")
- (neg:SF (match_operand:SF 1 "register_operand" "")))
- (use (match_operand:SF 2 "register_operand" ""))
+ (neg:SF (match_operand:SF 1 "nonimmediate_operand" "")))
+ (use (match_operand:V4SF 2 "nonimmediate_operand" ""))
(clobber (reg:CC 17))]
"reload_completed && SSE_REG_P (operands[0])"
[(set (subreg:TI (match_dup 0) 0)
@@ -9400,7 +9670,7 @@
{
/* Using SSE is tricky, since we need bitwise negation of -0
in register. */
- rtx reg = gen_reg_rtx (DFmode);
+ rtx reg;
#if HOST_BITS_PER_WIDE_INT >= 64
rtx imm = gen_int_mode (((HOST_WIDE_INT)1) << 63, DImode);
#else
@@ -9410,7 +9680,10 @@
operands[1] = force_reg (DFmode, operands[1]);
operands[0] = force_reg (DFmode, operands[0]);
- emit_move_insn (reg, gen_lowpart (DFmode, imm));
+ imm = gen_lowpart (DFmode, imm);
+ reg = force_reg (V2DFmode,
+ gen_rtx_CONST_VECTOR (V2DFmode,
+ gen_rtvec (2, imm, CONST0_RTX (DFmode))));
emit_insn (gen_negdf2_ifs (operands[0], operands[1], reg));
if (dest != operands[0])
emit_move_insn (dest, operands[0]);
@@ -9428,8 +9701,8 @@
(define_insn "negdf2_ifs"
[(set (match_operand:DF 0 "nonimmediate_operand" "=Y#fr,Y#fr,f#Yr,rm#Yf")
- (neg:DF (match_operand:DF 1 "nonimmediate_operand" "0,Y#fr,0,0")))
- (use (match_operand:DF 2 "nonmemory_operand" "Y,0,*g#Y,*g#Y"))
+ (neg:DF (match_operand:DF 1 "nonimmediate_operand" "0,Ym#fr,0,0")))
+ (use (match_operand:V2DF 2 "nonimmediate_operand" "Ym,0,*Y*rm,*Y*rm"))
(clobber (reg:CC 17))]
"!TARGET_64BIT && TARGET_SSE2
&& (reload_in_progress || reload_completed
@@ -9439,8 +9712,8 @@
(define_insn "*negdf2_ifs_rex64"
[(set (match_operand:DF 0 "nonimmediate_operand" "=Y#f,Y#f,fm#Y")
- (neg:DF (match_operand:DF 1 "nonimmediate_operand" "0,Y#f,0")))
- (use (match_operand:DF 2 "general_operand" "Y,0,*g#Y*r"))
+ (neg:DF (match_operand:DF 1 "nonimmediate_operand" "0,Y#fr,0")))
+ (use (match_operand:V2DF 2 "nonimmediate_operand" "Ym,0,*Y*rm"))
(clobber (reg:CC 17))]
"TARGET_64BIT && TARGET_SSE2
&& (reload_in_progress || reload_completed
@@ -9451,7 +9724,7 @@
(define_split
[(set (match_operand:DF 0 "memory_operand" "")
(neg:DF (match_operand:DF 1 "memory_operand" "")))
- (use (match_operand:DF 2 "" ""))
+ (use (match_operand:V2DF 2 "" ""))
(clobber (reg:CC 17))]
""
[(parallel [(set (match_dup 0)
@@ -9461,7 +9734,7 @@
(define_split
[(set (match_operand:DF 0 "register_operand" "")
(neg:DF (match_operand:DF 1 "register_operand" "")))
- (use (match_operand:DF 2 "" ""))
+ (use (match_operand:V2DF 2 "" ""))
(clobber (reg:CC 17))]
"reload_completed && !SSE_REG_P (operands[0])
&& (!TARGET_64BIT || FP_REG_P (operands[0]))"
@@ -9472,7 +9745,7 @@
(define_split
[(set (match_operand:DF 0 "register_operand" "")
(neg:DF (match_operand:DF 1 "register_operand" "")))
- (use (match_operand:DF 2 "" ""))
+ (use (match_operand:V2DF 2 "" ""))
(clobber (reg:CC 17))]
"TARGET_64BIT && reload_completed && GENERAL_REG_P (operands[0])"
[(parallel [(set (match_dup 0)
@@ -9484,14 +9757,18 @@
(define_split
[(set (match_operand:DF 0 "register_operand" "")
- (neg:DF (match_operand:DF 1 "register_operand" "")))
- (use (match_operand:DF 2 "register_operand" ""))
+ (neg:DF (match_operand:DF 1 "nonimmediate_operand" "")))
+ (use (match_operand:V2DF 2 "nonimmediate_operand" ""))
(clobber (reg:CC 17))]
"reload_completed && SSE_REG_P (operands[0])"
[(set (subreg:TI (match_dup 0) 0)
(xor:TI (subreg:TI (match_dup 1) 0)
(subreg:TI (match_dup 2) 0)))]
{
+ operands[0] = simplify_gen_subreg (V2DFmode, operands[0], DFmode, 0);
+ /* Avoid possible reformating on the operands. */
+ if (TARGET_SSE_PARTIAL_REGS && !optimize_size)
+ emit_insn (gen_sse2_unpcklpd (operands[0], operands[0], operands[0]));
if (operands_match_p (operands[0], operands[2]))
{
rtx tmp;
@@ -9724,14 +10001,18 @@
{
/* Using SSE is tricky, since we need bitwise negation of -0
in register. */
- rtx reg = gen_reg_rtx (SFmode);
+ rtx reg = gen_reg_rtx (V4SFmode);
rtx dest = operands[0];
+ rtx imm;
operands[1] = force_reg (SFmode, operands[1]);
operands[0] = force_reg (SFmode, operands[0]);
- emit_move_insn (reg,
- gen_lowpart (SFmode,
- gen_int_mode (0x80000000, SImode)));
+ imm = gen_lowpart (SFmode, gen_int_mode(~0x80000000, SImode));
+ reg = force_reg (V4SFmode,
+ gen_rtx_CONST_VECTOR (V4SFmode,
+ gen_rtvec (4, imm, CONST0_RTX (SFmode),
+ CONST0_RTX (SFmode),
+ CONST0_RTX (SFmode))));
emit_insn (gen_abssf2_ifs (operands[0], operands[1], reg));
if (dest != operands[0])
emit_move_insn (dest, operands[0]);
@@ -9748,9 +10029,9 @@
"#")
(define_insn "abssf2_ifs"
- [(set (match_operand:SF 0 "nonimmediate_operand" "=x#fr,f#xr,rm#xf")
- (abs:SF (match_operand:SF 1 "nonimmediate_operand" "x,0,0")))
- (use (match_operand:SF 2 "nonmemory_operand" "*0#x,*g#x,*g#x"))
+ [(set (match_operand:SF 0 "nonimmediate_operand" "=x#fr,x#fr,f#xr,rm#xf")
+ (abs:SF (match_operand:SF 1 "nonimmediate_operand" "0,x,0,0")))
+ (use (match_operand:V4SF 2 "nonimmediate_operand" "xm,0,x*rm,x*rm"))
(clobber (reg:CC 17))]
"TARGET_SSE
&& (reload_in_progress || reload_completed
@@ -9761,7 +10042,7 @@
(define_split
[(set (match_operand:SF 0 "memory_operand" "")
(abs:SF (match_operand:SF 1 "memory_operand" "")))
- (use (match_operand:SF 2 "" ""))
+ (use (match_operand:V4SF 2 "" ""))
(clobber (reg:CC 17))]
""
[(parallel [(set (match_dup 0)
@@ -9771,7 +10052,7 @@
(define_split
[(set (match_operand:SF 0 "register_operand" "")
(abs:SF (match_operand:SF 1 "register_operand" "")))
- (use (match_operand:SF 2 "" ""))
+ (use (match_operand:V4SF 2 "" ""))
(clobber (reg:CC 17))]
"reload_completed && !SSE_REG_P (operands[0])"
[(parallel [(set (match_dup 0)
@@ -9780,13 +10061,22 @@
(define_split
[(set (match_operand:SF 0 "register_operand" "")
- (abs:SF (match_operand:SF 1 "register_operand" "")))
- (use (match_operand:SF 2 "register_operand" ""))
+ (abs:SF (match_operand:SF 1 "nonimmediate_operand" "")))
+ (use (match_operand:V4SF 2 "nonimmediate_operand" ""))
(clobber (reg:CC 17))]
"reload_completed && SSE_REG_P (operands[0])"
[(set (subreg:TI (match_dup 0) 0)
- (and:TI (not:TI (subreg:TI (match_dup 2) 0))
- (subreg:TI (match_dup 1) 0)))])
+ (and:TI (subreg:TI (match_dup 1) 0)
+ (subreg:TI (match_dup 2) 0)))]
+{
+ if (operands_match_p (operands[0], operands[2]))
+ {
+ rtx tmp;
+ tmp = operands[1];
+ operands[1] = operands[2];
+ operands[2] = tmp;
+ }
+})
;; Keep 'f' and 'r' in separate alternatives to avoid reload problems
;; because of secondary memory needed to reload from class FLOAT_INT_REGS
@@ -9849,17 +10139,22 @@
{
/* Using SSE is tricky, since we need bitwise negation of -0
in register. */
- rtx reg = gen_reg_rtx (DFmode);
+ rtx reg = gen_reg_rtx (V2DFmode);
#if HOST_BITS_PER_WIDE_INT >= 64
- rtx imm = gen_int_mode (((HOST_WIDE_INT)1) << 63, DImode);
+ rtx imm = gen_int_mode (~(((HOST_WIDE_INT)1) << 63), DImode);
#else
- rtx imm = immed_double_const (0, 0x80000000, DImode);
+ rtx imm = immed_double_const (~0, ~0x80000000, DImode);
#endif
rtx dest = operands[0];
operands[1] = force_reg (DFmode, operands[1]);
operands[0] = force_reg (DFmode, operands[0]);
- emit_move_insn (reg, gen_lowpart (DFmode, imm));
+
+ /* Produce LONG_DOUBLE with the proper immediate argument. */
+ imm = gen_lowpart (DFmode, imm);
+ reg = force_reg (V2DFmode,
+ gen_rtx_CONST_VECTOR (V2DFmode,
+ gen_rtvec (2, imm, CONST0_RTX (DFmode))));
emit_insn (gen_absdf2_ifs (operands[0], operands[1], reg));
if (dest != operands[0])
emit_move_insn (dest, operands[0]);
@@ -9876,9 +10171,9 @@
"#")
(define_insn "absdf2_ifs"
- [(set (match_operand:DF 0 "nonimmediate_operand" "=Y#fr,mf#Yr,mr#Yf")
- (abs:DF (match_operand:DF 1 "nonimmediate_operand" "Y,0,0")))
- (use (match_operand:DF 2 "nonmemory_operand" "*0#Y,*g#Y,*g#Y"))
+ [(set (match_operand:DF 0 "nonimmediate_operand" "=Y#fr,Y#fr,mf#Yr,mr#Yf")
+ (abs:DF (match_operand:DF 1 "nonimmediate_operand" "0,Y,0,0")))
+ (use (match_operand:V2DF 2 "nonimmediate_operand" "Ym,0,Y*rm,Y*rm"))
(clobber (reg:CC 17))]
"!TARGET_64BIT && TARGET_SSE2
&& (reload_in_progress || reload_completed
@@ -9887,9 +10182,9 @@
"#")
(define_insn "*absdf2_ifs_rex64"
- [(set (match_operand:DF 0 "nonimmediate_operand" "=Y#fr,mf#Yr")
- (abs:DF (match_operand:DF 1 "nonimmediate_operand" "Y,0")))
- (use (match_operand:DF 2 "nonmemory_operand" "*0#Y,*g#Y"))
+ [(set (match_operand:DF 0 "nonimmediate_operand" "=Y#fr,Y#fr,mf#Yr")
+ (abs:DF (match_operand:DF 1 "nonimmediate_operand" "0,Ym,0")))
+ (use (match_operand:V2DF 2 "nonimmediate_operand" "Ym,*0,*Y*rm"))
(clobber (reg:CC 17))]
"TARGET_64BIT && TARGET_SSE2
&& (reload_in_progress || reload_completed
@@ -9900,7 +10195,7 @@
(define_split
[(set (match_operand:DF 0 "memory_operand" "")
(abs:DF (match_operand:DF 1 "memory_operand" "")))
- (use (match_operand:DF 2 "" ""))
+ (use (match_operand:V2DF 2 "" ""))
(clobber (reg:CC 17))]
""
[(parallel [(set (match_dup 0)
@@ -9910,7 +10205,7 @@
(define_split
[(set (match_operand:DF 0 "register_operand" "")
(abs:DF (match_operand:DF 1 "register_operand" "")))
- (use (match_operand:DF 2 "" ""))
+ (use (match_operand:V2DF 2 "" ""))
(clobber (reg:CC 17))]
"reload_completed && !SSE_REG_P (operands[0])"
[(parallel [(set (match_dup 0)
@@ -9919,13 +10214,26 @@
(define_split
[(set (match_operand:DF 0 "register_operand" "")
- (abs:DF (match_operand:DF 1 "register_operand" "")))
- (use (match_operand:DF 2 "register_operand" ""))
+ (abs:DF (match_operand:DF 1 "nonimmediate_operand" "")))
+ (use (match_operand:V2DF 2 "nonimmediate_operand" ""))
(clobber (reg:CC 17))]
"reload_completed && SSE_REG_P (operands[0])"
[(set (subreg:TI (match_dup 0) 0)
- (and:TI (not:TI (subreg:TI (match_dup 2) 0))
- (subreg:TI (match_dup 1) 0)))])
+ (and:TI (subreg:TI (match_dup 1) 0)
+ (subreg:TI (match_dup 2) 0)))]
+{
+ operands[0] = simplify_gen_subreg (V2DFmode, operands[0], DFmode, 0);
+ /* Avoid possible reformating on the operands. */
+ if (TARGET_SSE_PARTIAL_REGS && !optimize_size)
+ emit_insn (gen_sse2_unpcklpd (operands[0], operands[0], operands[0]));
+ if (operands_match_p (operands[0], operands[2]))
+ {
+ rtx tmp;
+ tmp = operands[1];
+ operands[1] = operands[2];
+ operands[2] = tmp;
+ }
+})
;; Keep 'f' and 'r' in separate alternatives to avoid reload problems
@@ -16547,6 +16855,12 @@
(clobber (reg:CC 17))]
"TARGET_SSE
&& (GET_CODE (operands[2]) != MEM || GET_CODE (operands[3]) != MEM)
+ /* Avoid combine from being smart and converting min/max
+ instruction patterns into conditional moves. */
+ && ((GET_CODE (operands[1]) != LT && GET_CODE (operands[1]) != GT
+ && GET_CODE (operands[1]) != UNLE && GET_CODE (operands[1]) != UNGE)
+ || !rtx_equal_p (operands[4], operands[2])
+ || !rtx_equal_p (operands[5], operands[3]))
&& (!TARGET_IEEE_FP
|| (GET_CODE (operands[1]) != EQ && GET_CODE (operands[1]) != NE))"
"#")
@@ -16574,6 +16888,12 @@
(clobber (reg:CC 17))]
"TARGET_SSE2
&& (GET_CODE (operands[2]) != MEM || GET_CODE (operands[3]) != MEM)
+ /* Avoid combine from being smart and converting min/max
+ instruction patterns into conditional moves. */
+ && ((GET_CODE (operands[1]) != LT && GET_CODE (operands[1]) != GT
+ && GET_CODE (operands[1]) != UNLE && GET_CODE (operands[1]) != UNGE)
+ || !rtx_equal_p (operands[4], operands[2])
+ || !rtx_equal_p (operands[5], operands[3]))
&& (!TARGET_IEEE_FP
|| (GET_CODE (operands[1]) != EQ && GET_CODE (operands[1]) != NE))"
"#")
@@ -16635,6 +16955,14 @@
(set (subreg:TI (match_dup 0) 0) (ior:TI (subreg:TI (match_dup 6) 0)
(subreg:TI (match_dup 7) 0)))]
{
+ if (GET_MODE (operands[2]) == DFmode
+ && TARGET_SSE_PARTIAL_REGS && !optimize_size)
+ {
+ rtx op = gen_rtx_SUBREG (V2DFmode, operands[2], 0);
+ emit_insn (gen_sse2_unpcklpd (op, op, op));
+ op = gen_rtx_SUBREG (V2DFmode, operands[3], 0);
+ emit_insn (gen_sse2_unpcklpd (op, op, op));
+ }
/* If op2 == op3, op3 will be clobbered before it is used.
This should be optimized out though. */
if (operands_match_p (operands[2], operands[3]))
@@ -16743,6 +17071,20 @@
(set (subreg:TI (match_dup 0) 0) (and:TI (match_dup 6)
(subreg:TI (match_dup 7) 0)))]
{
+ if (TARGET_SSE_PARTIAL_REGS && !optimize_size
+ && GET_MODE (operands[2]) == DFmode)
+ {
+ if (REG_P (operands[2]))
+ {
+ rtx op = gen_rtx_SUBREG (V2DFmode, operands[2], 0);
+ emit_insn (gen_sse2_unpcklpd (op, op, op));
+ }
+ if (REG_P (operands[3]))
+ {
+ rtx op = gen_rtx_SUBREG (V2DFmode, operands[3], 0);
+ emit_insn (gen_sse2_unpcklpd (op, op, op));
+ }
+ }
PUT_MODE (operands[1], GET_MODE (operands[0]));
if (!sse_comparison_operator (operands[1], VOIDmode))
{
@@ -17849,28 +18191,93 @@
[(set (match_operand:V4SF 0 "nonimmediate_operand" "=x,m")
(match_operand:V4SF 1 "nonimmediate_operand" "xm,x"))]
"TARGET_SSE"
- ;; @@@ let's try to use movaps here.
"movaps\t{%1, %0|%0, %1}"
[(set_attr "type" "ssemov")
(set_attr "mode" "V4SF")])
+(define_split
+ [(set (match_operand:V4SF 0 "register_operand" "")
+ (match_operand:V4SF 1 "zero_extended_scalar_load_operand" ""))]
+ "TARGET_SSE"
+ [(set (match_dup 0)
+ (vec_merge:V4SF
+ (vec_duplicate:V4SF (match_dup 1))
+ (match_dup 2)
+ (const_int 1)))]
+{
+ operands[1] = simplify_gen_subreg (SFmode, operands[1], V4SFmode, 0);
+ operands[2] = CONST0_RTX (V4SFmode);
+})
+
(define_insn "movv4si_internal"
[(set (match_operand:V4SI 0 "nonimmediate_operand" "=x,m")
(match_operand:V4SI 1 "nonimmediate_operand" "xm,x"))]
"TARGET_SSE"
- ;; @@@ let's try to use movaps here.
- "movaps\t{%1, %0|%0, %1}"
+{
+ if (get_attr_mode (insn) == MODE_V4SF)
+ return "movaps\t{%1, %0|%0, %1}";
+ else
+ return "movdqa\t{%1, %0|%0, %1}";
+}
[(set_attr "type" "ssemov")
- (set_attr "mode" "V4SF")])
+ (set (attr "mode")
+ (cond [(eq_attr "alternative" "0")
+ (if_then_else
+ (ne (symbol_ref "optimize_size")
+ (const_int 0))
+ (const_string "V4SF")
+ (const_string "TI"))
+ (eq_attr "alternative" "1")
+ (if_then_else
+ (ior (ne (symbol_ref "TARGET_SSE_TYPELESS_STORES")
+ (const_int 0))
+ (ne (symbol_ref "optimize_size")
+ (const_int 0)))
+ (const_string "V4SF")
+ (const_string "TI"))]
+ (const_string "TI")))])
(define_insn "movv2di_internal"
[(set (match_operand:V2DI 0 "nonimmediate_operand" "=x,m")
(match_operand:V2DI 1 "nonimmediate_operand" "xm,x"))]
- "TARGET_SSE"
- ;; @@@ let's try to use movaps here.
- "movdqa\t{%1, %0|%0, %1}"
+ "TARGET_SSE2"
+{
+ if (get_attr_mode (insn) == MODE_V4SF)
+ return "movaps\t{%1, %0|%0, %1}";
+ else
+ return "movdqa\t{%1, %0|%0, %1}";
+}
[(set_attr "type" "ssemov")
- (set_attr "mode" "V4SF")])
+ (set (attr "mode")
+ (cond [(eq_attr "alternative" "0")
+ (if_then_else
+ (ne (symbol_ref "optimize_size")
+ (const_int 0))
+ (const_string "V4SF")
+ (const_string "TI"))
+ (eq_attr "alternative" "1")
+ (if_then_else
+ (ior (ne (symbol_ref "TARGET_SSE_TYPELESS_STORES")
+ (const_int 0))
+ (ne (symbol_ref "optimize_size")
+ (const_int 0)))
+ (const_string "V4SF")
+ (const_string "TI"))]
+ (const_string "TI")))])
+
+(define_split
+ [(set (match_operand:V2DF 0 "register_operand" "")
+ (match_operand:V2DF 1 "zero_extended_scalar_load_operand" ""))]
+ "TARGET_SSE2"
+ [(set (match_dup 0)
+ (vec_merge:V2DF
+ (vec_duplicate:V2DF (match_dup 1))
+ (match_dup 2)
+ (const_int 1)))]
+{
+ operands[1] = simplify_gen_subreg (DFmode, operands[1], V2DFmode, 0);
+ operands[2] = CONST0_RTX (V2DFmode);
+})
(define_insn "movv8qi_internal"
[(set (match_operand:V8QI 0 "nonimmediate_operand" "=y,m")
@@ -17920,28 +18327,85 @@
[(set (match_operand:V2DF 0 "nonimmediate_operand" "=x,m")
(match_operand:V2DF 1 "nonimmediate_operand" "xm,x"))]
"TARGET_SSE2"
- ;; @@@ let's try to use movaps here.
- "movapd\t{%1, %0|%0, %1}"
+{
+ if (get_attr_mode (insn) == MODE_V4SF)
+ return "movaps\t{%1, %0|%0, %1}";
+ else
+ return "movapd\t{%1, %0|%0, %1}";
+}
[(set_attr "type" "ssemov")
- (set_attr "mode" "V2DF")])
+ (set (attr "mode")
+ (cond [(eq_attr "alternative" "0")
+ (if_then_else
+ (ne (symbol_ref "optimize_size")
+ (const_int 0))
+ (const_string "V4SF")
+ (const_string "V2DF"))
+ (eq_attr "alternative" "1")
+ (if_then_else
+ (ior (ne (symbol_ref "TARGET_SSE_TYPELESS_STORES")
+ (const_int 0))
+ (ne (symbol_ref "optimize_size")
+ (const_int 0)))
+ (const_string "V4SF")
+ (const_string "V2DF"))]
+ (const_string "V2DF")))])
(define_insn "movv8hi_internal"
[(set (match_operand:V8HI 0 "nonimmediate_operand" "=x,m")
(match_operand:V8HI 1 "nonimmediate_operand" "xm,x"))]
"TARGET_SSE2"
- ;; @@@ let's try to use movaps here.
- "movaps\t{%1, %0|%0, %1}"
+{
+ if (get_attr_mode (insn) == MODE_V4SF)
+ return "movaps\t{%1, %0|%0, %1}";
+ else
+ return "movdqa\t{%1, %0|%0, %1}";
+}
[(set_attr "type" "ssemov")
- (set_attr "mode" "V4SF")])
+ (set (attr "mode")
+ (cond [(eq_attr "alternative" "0")
+ (if_then_else
+ (ne (symbol_ref "optimize_size")
+ (const_int 0))
+ (const_string "V4SF")
+ (const_string "TI"))
+ (eq_attr "alternative" "1")
+ (if_then_else
+ (ior (ne (symbol_ref "TARGET_SSE_TYPELESS_STORES")
+ (const_int 0))
+ (ne (symbol_ref "optimize_size")
+ (const_int 0)))
+ (const_string "V4SF")
+ (const_string "TI"))]
+ (const_string "TI")))])
(define_insn "movv16qi_internal"
[(set (match_operand:V16QI 0 "nonimmediate_operand" "=x,m")
(match_operand:V16QI 1 "nonimmediate_operand" "xm,x"))]
"TARGET_SSE2"
- ;; @@@ let's try to use movaps here.
- "movaps\t{%1, %0|%0, %1}"
+{
+ if (get_attr_mode (insn) == MODE_V4SF)
+ return "movaps\t{%1, %0|%0, %1}";
+ else
+ return "movdqa\t{%1, %0|%0, %1}";
+}
[(set_attr "type" "ssemov")
- (set_attr "mode" "V4SF")])
+ (set (attr "mode")
+ (cond [(eq_attr "alternative" "0")
+ (if_then_else
+ (ne (symbol_ref "optimize_size")
+ (const_int 0))
+ (const_string "V4SF")
+ (const_string "TI"))
+ (eq_attr "alternative" "1")
+ (if_then_else
+ (ior (ne (symbol_ref "TARGET_SSE_TYPELESS_STORES")
+ (const_int 0))
+ (ne (symbol_ref "optimize_size")
+ (const_int 0)))
+ (const_string "V4SF")
+ (const_string "TI"))]
+ (const_string "TI")))])
(define_expand "movv2df"
[(set (match_operand:V2DF 0 "general_operand" "")
@@ -18158,26 +18622,83 @@
[(set (match_operand:TI 0 "nonimmediate_operand" "=x,x,m")
(match_operand:TI 1 "general_operand" "C,xm,x"))]
"TARGET_SSE && !TARGET_64BIT"
- "@
- xorps\t%0, %0
- movaps\t{%1, %0|%0, %1}
- movaps\t{%1, %0|%0, %1}"
+{
+ switch (which_alternative)
+ {
+ case 0:
+ if (get_attr_mode (insn) == MODE_V4SF)
+ return "xorps\t%0, %0";
+ else
+ return "pxor\t%0, %0";
+ case 1:
+ case 2:
+ if (get_attr_mode (insn) == MODE_V4SF)
+ return "movaps\t{%1, %0|%0, %1}";
+ else
+ return "movdqa\t{%1, %0|%0, %1}";
+ default:
+ abort ();
+ }
+}
[(set_attr "type" "ssemov,ssemov,ssemov")
- (set_attr "mode" "V4SF")])
+ (set (attr "mode")
+ (cond [(eq_attr "alternative" "0,1")
+ (if_then_else
+ (ne (symbol_ref "optimize_size")
+ (const_int 0))
+ (const_string "V4SF")
+ (const_string "TI"))
+ (eq_attr "alternative" "2")
+ (if_then_else
+ (ne (symbol_ref "optimize_size")
+ (const_int 0))
+ (const_string "V4SF")
+ (const_string "TI"))]
+ (const_string "TI")))])
(define_insn "*movti_rex64"
- [(set (match_operand:TI 0 "nonimmediate_operand" "=r,o,x,mx,x")
- (match_operand:TI 1 "general_operand" "riFo,riF,O,x,m"))]
+ [(set (match_operand:TI 0 "nonimmediate_operand" "=r,o,x,x,xm")
+ (match_operand:TI 1 "general_operand" "riFo,riF,O,xm,x"))]
"TARGET_64BIT
&& (GET_CODE (operands[0]) != MEM || GET_CODE (operands[1]) != MEM)"
- "@
- #
- #
- xorps\t%0, %0
- movaps\\t{%1, %0|%0, %1}
- movaps\\t{%1, %0|%0, %1}"
+{
+ switch (which_alternative)
+ {
+ case 0:
+ case 1:
+ return "#";
+ case 2:
+ if (get_attr_mode (insn) == MODE_V4SF)
+ return "xorps\t%0, %0";
+ else
+ return "pxor\t%0, %0";
+ case 3:
+ case 4:
+ if (get_attr_mode (insn) == MODE_V4SF)
+ return "movaps\t{%1, %0|%0, %1}";
+ else
+ return "movdqa\t{%1, %0|%0, %1}";
+ default:
+ abort ();
+ }
+}
[(set_attr "type" "*,*,ssemov,ssemov,ssemov")
- (set_attr "mode" "V4SF")])
+ (set (attr "mode")
+ (cond [(eq_attr "alternative" "2,3")
+ (if_then_else
+ (ne (symbol_ref "optimize_size")
+ (const_int 0))
+ (const_string "V4SF")
+ (const_string "TI"))
+ (eq_attr "alternative" "4")
+ (if_then_else
+ (ior (ne (symbol_ref "TARGET_SSE_TYPELESS_STORES")
+ (const_int 0))
+ (ne (symbol_ref "optimize_size")
+ (const_int 0)))
+ (const_string "V4SF")
+ (const_string "TI"))]
+ (const_string "DI")))])
(define_split
[(set (match_operand:TI 0 "nonimmediate_operand" "")
@@ -18327,11 +18848,21 @@
[(set_attr "type" "ssecvt")
(set_attr "mode" "V4SF")])
-(define_insn "sse_loadss"
+(define_expand "sse_loadss"
+ [(match_operand:V4SF 0 "register_operand" "")
+ (match_operand:SF 1 "memory_operand" "")]
+ "TARGET_SSE"
+{
+ emit_insn (gen_sse_loadss_1 (operands[0], operands[1],
+ CONST0_RTX (V4SFmode)));
+ DONE;
+})
+
+(define_insn "sse_loadss_1"
[(set (match_operand:V4SF 0 "register_operand" "=x")
(vec_merge:V4SF
- (match_operand:V4SF 1 "memory_operand" "m")
- (vec_duplicate:V4SF (float:SF (const_int 0)))
+ (vec_duplicate:V4SF (match_operand:SF 1 "memory_operand" "m"))
+ (match_operand:V4SF 2 "const0_operand" "X")
(const_int 1)))]
"TARGET_SSE"
"movss\t{%1, %0|%0, %1}"
@@ -18854,12 +19385,26 @@
;; this insn.
(define_insn "sse_clrv4sf"
[(set (match_operand:V4SF 0 "register_operand" "=x")
- (unspec:V4SF [(const_int 0)] UNSPEC_NOP))]
+ (match_operand:V4SF 1 "const0_operand" "X"))]
"TARGET_SSE"
- "xorps\t{%0, %0|%0, %0}"
+{
+ if (get_attr_mode (insn) == MODE_TI)
+ return "pxor\t{%0, %0|%0, %0}";
+ else
+ return "xorps\t{%0, %0|%0, %0}";
+}
[(set_attr "type" "sselog")
(set_attr "memory" "none")
- (set_attr "mode" "V4SF")])
+ (set (attr "mode")
+ (if_then_else
+ (and (and (ne (symbol_ref "TARGET_SSE_LOAD0_BY_PXOR")
+ (const_int 0))
+ (ne (symbol_ref "TARGET_SSE2")
+ (const_int 0)))
+ (eq (symbol_ref "optimize_size")
+ (const_int 0)))
+ (const_string "TI")
+ (const_string "V4SF")))])
;; Use xor, but don't show input operands so they aren't live before
;; this insn.
@@ -19091,6 +19636,18 @@
[(set_attr "type" "ssecvt")
(set_attr "mode" "SF")])
+(define_insn "cvtsi2ssq"
+ [(set (match_operand:V4SF 0 "register_operand" "=x")
+ (vec_merge:V4SF
+ (match_operand:V4SF 1 "register_operand" "0")
+ (vec_duplicate:V4SF
+ (float:SF (match_operand:DI 2 "nonimmediate_operand" "rm")))
+ (const_int 14)))]
+ "TARGET_SSE && TARGET_64BIT"
+ "cvtsi2ssq\t{%2, %0|%0, %2}"
+ [(set_attr "type" "ssecvt")
+ (set_attr "mode" "SF")])
+
(define_insn "cvtss2si"
[(set (match_operand:SI 0 "register_operand" "=r")
(vec_select:SI
@@ -20718,7 +21275,7 @@
(vec_merge:V4SF (match_operand:V4SF 1 "register_operand" "0")
(vec_duplicate:V4SF
(float_truncate:V2SF
- (match_operand:V2DF 2 "register_operand" "xm")))
+ (match_operand:V2DF 2 "nonimmediate_operand" "xm")))
(const_int 14)))]
"TARGET_SSE2"
"cvtsd2ss\t{%2, %0|%0, %2}"
@@ -20730,7 +21287,7 @@
(vec_merge:V2DF (match_operand:V2DF 1 "register_operand" "0")
(float_extend:V2DF
(vec_select:V2SF
- (match_operand:V4SF 2 "register_operand" "xm")
+ (match_operand:V4SF 2 "nonimmediate_operand" "xm")
(parallel [(const_int 0)
(const_int 1)])))
(const_int 2)))]
@@ -21006,10 +21563,20 @@
(define_insn "sse2_clrti"
[(set (match_operand:TI 0 "register_operand" "=x") (const_int 0))]
"TARGET_SSE2"
- "pxor\t{%0, %0|%0, %0}"
- [(set_attr "type" "sseiadd")
+{
+ if (get_attr_mode (insn) == MODE_TI)
+ return "pxor\t%0, %0";
+ else
+ return "xorps\t%0, %0";
+}
+ [(set_attr "type" "ssemov")
(set_attr "memory" "none")
- (set_attr "mode" "TI")])
+ (set (attr "mode")
+ (if_then_else
+ (ne (symbol_ref "optimize_size")
+ (const_int 0))
+ (const_string "V4SF")
+ (const_string "TI")))])
;; MMX unsigned averages/sum of absolute differences
@@ -21714,11 +22281,21 @@
[(set_attr "type" "ssecvt")
(set_attr "mode" "V2DF")])
-(define_insn "sse2_loadsd"
+(define_expand "sse2_loadsd"
+ [(match_operand:V2DF 0 "register_operand" "")
+ (match_operand:DF 1 "memory_operand" "")]
+ "TARGET_SSE2"
+{
+ emit_insn (gen_sse2_loadsd_1 (operands[0], operands[1],
+ CONST0_RTX (V2DFmode)));
+ DONE;
+})
+
+(define_insn "sse2_loadsd_1"
[(set (match_operand:V2DF 0 "register_operand" "=x")
(vec_merge:V2DF
- (match_operand:DF 1 "memory_operand" "m")
- (vec_duplicate:DF (float:DF (const_int 0)))
+ (vec_duplicate:V2DF (match_operand:DF 1 "memory_operand" "m"))
+ (match_operand:V2DF 2 "const0_operand" "X")
(const_int 1)))]
"TARGET_SSE2"
"movsd\t{%1, %0|%0, %1}"