aboutsummaryrefslogtreecommitdiff
path: root/gcc/config/i386/i386.md
diff options
context:
space:
mode:
Diffstat (limited to 'gcc/config/i386/i386.md')
-rw-r--r--gcc/config/i386/i386.md735
1 files changed, 503 insertions, 232 deletions
diff --git a/gcc/config/i386/i386.md b/gcc/config/i386/i386.md
index befbfe49569..1fa29985fb9 100644
--- a/gcc/config/i386/i386.md
+++ b/gcc/config/i386/i386.md
@@ -1,5 +1,6 @@
;; GCC machine description for IA-32 and x86-64.
-;; Copyright (C) 1988, 1994, 1995, 1996, 1997, 1998, 1999, 2000, 2001, 2002
+;; Copyright (C) 1988, 1994, 1995, 1996, 1997, 1998, 1999, 2000,
+;; 2001, 2002, 2003
;; Free Software Foundation, Inc.
;; Mostly by William Schelter.
;; x86_64 support added by Jan Hubicka
@@ -267,6 +268,8 @@
(define_attr "length" ""
(cond [(eq_attr "type" "other,multi,fistp")
(const_int 16)
+ (eq_attr "type" "fcmp")
+ (const_int 4)
(eq_attr "unit" "i387")
(plus (const_int 2)
(plus (attr "prefix_data16")
@@ -1099,25 +1102,20 @@
(set_attr "mode" "SI")
(set_attr "length_immediate" "1")])
-; The first alternative is used only to compute proper length of instruction.
-; Reload's algorithm does not take into account the cost of spill instructions
-; needed to free register in given class, so avoid it from choosing the first
-; alternative when eax is not available.
-
(define_insn "*movsi_1"
- [(set (match_operand:SI 0 "nonimmediate_operand" "=*?a,r,*?a,m,!*y,!rm,!*y,!*Y,!rm,!*Y")
- (match_operand:SI 1 "general_operand" "im,rinm,rinm,rin,rm,*y,*y,rm,*Y,*Y"))]
+ [(set (match_operand:SI 0 "nonimmediate_operand" "=r,m,!*y,!rm,!*y,!*Y,!*Y,!rm")
+ (match_operand:SI 1 "general_operand" "rinm,rin,rm,*y,*y,*Y,rm,*Y"))]
"GET_CODE (operands[0]) != MEM || GET_CODE (operands[1]) != MEM"
{
switch (get_attr_type (insn))
{
case TYPE_SSEMOV:
- if (get_attr_mode (insn) == TImode)
+ if (get_attr_mode (insn) == MODE_TI)
return "movdqa\t{%1, %0|%0, %1}";
return "movd\t{%1, %0|%0, %1}";
case TYPE_MMXMOV:
- if (get_attr_mode (insn) == DImode)
+ if (get_attr_mode (insn) == MODE_DI)
return "movq\t{%1, %0|%0, %1}";
return "movd\t{%1, %0|%0, %1}";
@@ -1131,40 +1129,38 @@
}
}
[(set (attr "type")
- (cond [(eq_attr "alternative" "4,5,6")
+ (cond [(eq_attr "alternative" "2,3,4")
(const_string "mmxmov")
- (eq_attr "alternative" "7,8,9")
+ (eq_attr "alternative" "5,6,7")
(const_string "ssemov")
(and (ne (symbol_ref "flag_pic") (const_int 0))
(match_operand:SI 1 "symbolic_operand" ""))
(const_string "lea")
]
(const_string "imov")))
- (set_attr "modrm" "0,*,0,*,*,*,*,*,*,*")
- (set_attr "mode" "SI,SI,SI,SI,SI,SI,DI,TI,SI,SI")])
+ (set_attr "mode" "SI,SI,SI,SI,DI,TI,SI,SI")])
;; Stores and loads of ax to arbitary constant address.
;; We fake an second form of instruction to force reload to load address
;; into register when rax is not available
(define_insn "*movabssi_1_rex64"
- [(set (mem:SI (match_operand:DI 0 "x86_64_movabs_operand" "i,r,r"))
- (match_operand:SI 1 "nonmemory_operand" "a,er,i"))]
- "TARGET_64BIT"
+ [(set (mem:SI (match_operand:DI 0 "x86_64_movabs_operand" "i,r"))
+ (match_operand:SI 1 "nonmemory_operand" "a,er"))]
+ "TARGET_64BIT && ix86_check_movabs (insn, 0)"
"@
movabs{l}\t{%1, %P0|%P0, %1}
- mov{l}\t{%1, %a0|%a0, %1}
- movabs{l}\t{%1, %a0|%a0, %1}"
+ mov{l}\t{%1, %a0|%a0, %1}"
[(set_attr "type" "imov")
- (set_attr "modrm" "0,*,*")
- (set_attr "length_address" "8,0,0")
- (set_attr "length_immediate" "0,*,*")
+ (set_attr "modrm" "0,*")
+ (set_attr "length_address" "8,0")
+ (set_attr "length_immediate" "0,*")
(set_attr "memory" "store")
(set_attr "mode" "SI")])
(define_insn "*movabssi_2_rex64"
[(set (match_operand:SI 0 "register_operand" "=a,r")
(mem:SI (match_operand:DI 1 "x86_64_movabs_operand" "i,r")))]
- "TARGET_64BIT"
+ "TARGET_64BIT && ix86_check_movabs (insn, 1)"
"@
movabs{l}\t{%P1, %0|%0, %P1}
mov{l}\t{%a1, %0|%0, %a1}"
@@ -1214,14 +1210,9 @@
[(set_attr "type" "push")
(set_attr "mode" "QI")])
-; The first alternative is used only to compute proper length of instruction.
-; Reload's algorithm does not take into account the cost of spill instructions
-; needed to free register in given class, so avoid it from choosing the first
-; alternative when eax is not available.
-
(define_insn "*movhi_1"
- [(set (match_operand:HI 0 "nonimmediate_operand" "=*?a,r,r,*?a,r,m")
- (match_operand:HI 1 "general_operand" "i,r,rn,rm,rm,rn"))]
+ [(set (match_operand:HI 0 "nonimmediate_operand" "=r,r,r,m")
+ (match_operand:HI 1 "general_operand" "r,rn,rm,rn"))]
"GET_CODE (operands[0]) != MEM || GET_CODE (operands[1]) != MEM"
{
switch (get_attr_type (insn))
@@ -1238,59 +1229,57 @@
}
}
[(set (attr "type")
- (cond [(and (eq_attr "alternative" "0,1")
+ (cond [(and (eq_attr "alternative" "0")
(ior (eq (symbol_ref "TARGET_PARTIAL_REG_STALL")
(const_int 0))
(eq (symbol_ref "TARGET_HIMODE_MATH")
(const_int 0))))
(const_string "imov")
- (and (eq_attr "alternative" "2,3,4")
+ (and (eq_attr "alternative" "1,2")
(match_operand:HI 1 "aligned_operand" ""))
(const_string "imov")
(and (ne (symbol_ref "TARGET_MOVX")
(const_int 0))
- (eq_attr "alternative" "0,1,3,4"))
+ (eq_attr "alternative" "0,2"))
(const_string "imovx")
]
(const_string "imov")))
(set (attr "mode")
(cond [(eq_attr "type" "imovx")
(const_string "SI")
- (and (eq_attr "alternative" "2,3,4")
+ (and (eq_attr "alternative" "1,2")
(match_operand:HI 1 "aligned_operand" ""))
(const_string "SI")
- (and (eq_attr "alternative" "0,1")
+ (and (eq_attr "alternative" "0")
(ior (eq (symbol_ref "TARGET_PARTIAL_REG_STALL")
(const_int 0))
(eq (symbol_ref "TARGET_HIMODE_MATH")
(const_int 0))))
(const_string "SI")
]
- (const_string "HI")))
- (set_attr "modrm" "0,*,*,0,*,*")])
+ (const_string "HI")))])
;; Stores and loads of ax to arbitary constant address.
;; We fake an second form of instruction to force reload to load address
;; into register when rax is not available
(define_insn "*movabshi_1_rex64"
- [(set (mem:HI (match_operand:DI 0 "x86_64_movabs_operand" "i,r,r"))
- (match_operand:HI 1 "nonmemory_operand" "a,er,i"))]
- "TARGET_64BIT"
+ [(set (mem:HI (match_operand:DI 0 "x86_64_movabs_operand" "i,r"))
+ (match_operand:HI 1 "nonmemory_operand" "a,er"))]
+ "TARGET_64BIT && ix86_check_movabs (insn, 0)"
"@
movabs{w}\t{%1, %P0|%P0, %1}
- mov{w}\t{%1, %a0|%a0, %1}
- movabs{w}\t{%1, %a0|%a0, %1}"
+ mov{w}\t{%1, %a0|%a0, %1}"
[(set_attr "type" "imov")
- (set_attr "modrm" "0,*,*")
- (set_attr "length_address" "8,0,0")
- (set_attr "length_immediate" "0,*,*")
+ (set_attr "modrm" "0,*")
+ (set_attr "length_address" "8,0")
+ (set_attr "length_immediate" "0,*")
(set_attr "memory" "store")
(set_attr "mode" "HI")])
(define_insn "*movabshi_2_rex64"
[(set (match_operand:HI 0 "register_operand" "=a,r")
(mem:HI (match_operand:DI 1 "x86_64_movabs_operand" "i,r")))]
- "TARGET_64BIT"
+ "TARGET_64BIT && ix86_check_movabs (insn, 1)"
"@
movabs{w}\t{%P1, %0|%0, %P1}
mov{w}\t{%a1, %0|%0, %a1}"
@@ -1488,7 +1477,7 @@
(define_expand "movstrictqi"
[(set (strict_low_part (match_operand:QI 0 "nonimmediate_operand" ""))
(match_operand:QI 1 "general_operand" ""))]
- "! TARGET_PARTIAL_REG_STALL"
+ "! TARGET_PARTIAL_REG_STALL || optimize_size"
{
/* Don't generate memory->memory moves, go through a register. */
if (GET_CODE (operands[0]) == MEM && GET_CODE (operands[1]) == MEM)
@@ -1498,7 +1487,7 @@
(define_insn "*movstrictqi_1"
[(set (strict_low_part (match_operand:QI 0 "nonimmediate_operand" "+qm,q"))
(match_operand:QI 1 "general_operand" "*qn,m"))]
- "! TARGET_PARTIAL_REG_STALL
+ "(! TARGET_PARTIAL_REG_STALL || optimize_size)
&& (GET_CODE (operands[0]) != MEM || GET_CODE (operands[1]) != MEM)"
"mov{b}\t{%1, %0|%0, %1}"
[(set_attr "type" "imov")
@@ -1592,24 +1581,23 @@
;; We fake an second form of instruction to force reload to load address
;; into register when rax is not available
(define_insn "*movabsqi_1_rex64"
- [(set (mem:QI (match_operand:DI 0 "x86_64_movabs_operand" "i,r,r"))
- (match_operand:QI 1 "nonmemory_operand" "a,er,i"))]
- "TARGET_64BIT"
+ [(set (mem:QI (match_operand:DI 0 "x86_64_movabs_operand" "i,r"))
+ (match_operand:QI 1 "nonmemory_operand" "a,er"))]
+ "TARGET_64BIT && ix86_check_movabs (insn, 0)"
"@
movabs{b}\t{%1, %P0|%P0, %1}
- mov{b}\t{%1, %a0|%a0, %1}
- movabs{b}\t{%1, %a0|%a0, %1}"
+ mov{b}\t{%1, %a0|%a0, %1}"
[(set_attr "type" "imov")
- (set_attr "modrm" "0,*,*")
- (set_attr "length_address" "8,0,0")
- (set_attr "length_immediate" "0,*,*")
+ (set_attr "modrm" "0,*")
+ (set_attr "length_address" "8,0")
+ (set_attr "length_immediate" "0,*")
(set_attr "memory" "store")
(set_attr "mode" "QI")])
(define_insn "*movabsqi_2_rex64"
[(set (match_operand:QI 0 "register_operand" "=a,r")
(mem:QI (match_operand:DI 1 "x86_64_movabs_operand" "i,r")))]
- "TARGET_64BIT"
+ "TARGET_64BIT && ix86_check_movabs (insn, 1)"
"@
movabs{b}\t{%P1, %0|%0, %P1}
mov{b}\t{%a1, %0|%0, %a1}"
@@ -1904,7 +1892,7 @@
[(set (attr "type")
(cond [(eq_attr "alternative" "5,6")
(const_string "mmxmov")
- (eq_attr "alternative" "7,8")
+ (eq_attr "alternative" "7,8,9")
(const_string "ssemov")
(eq_attr "alternative" "4")
(const_string "multi")
@@ -1921,24 +1909,23 @@
;; We fake an second form of instruction to force reload to load address
;; into register when rax is not available
(define_insn "*movabsdi_1_rex64"
- [(set (mem:DI (match_operand:DI 0 "x86_64_movabs_operand" "i,r,r"))
- (match_operand:DI 1 "nonmemory_operand" "a,er,i"))]
- "TARGET_64BIT"
+ [(set (mem:DI (match_operand:DI 0 "x86_64_movabs_operand" "i,r"))
+ (match_operand:DI 1 "nonmemory_operand" "a,er"))]
+ "TARGET_64BIT && ix86_check_movabs (insn, 0)"
"@
movabs{q}\t{%1, %P0|%P0, %1}
- mov{q}\t{%1, %a0|%a0, %1}
- movabs{q}\t{%1, %a0|%a0, %1}"
+ mov{q}\t{%1, %a0|%a0, %1}"
[(set_attr "type" "imov")
- (set_attr "modrm" "0,*,*")
- (set_attr "length_address" "8,0,0")
- (set_attr "length_immediate" "0,*,*")
+ (set_attr "modrm" "0,*")
+ (set_attr "length_address" "8,0")
+ (set_attr "length_immediate" "0,*")
(set_attr "memory" "store")
(set_attr "mode" "DI")])
(define_insn "*movabsdi_2_rex64"
[(set (match_operand:DI 0 "register_operand" "=a,r")
(mem:DI (match_operand:DI 1 "x86_64_movabs_operand" "i,r")))]
- "TARGET_64BIT"
+ "TARGET_64BIT && ix86_check_movabs (insn, 1)"
"@
movabs{q}\t{%P1, %0|%0, %P1}
mov{q}\t{%a1, %0|%0, %a1}"
@@ -12839,9 +12826,9 @@
(set_attr "modrm" "0")
(set (attr "length")
(if_then_else (and (ge (minus (match_dup 0) (pc))
- (const_int -128))
+ (const_int -126))
(lt (minus (match_dup 0) (pc))
- (const_int 124)))
+ (const_int 128)))
(const_int 2)
(const_int 6)))])
@@ -12857,9 +12844,9 @@
(set_attr "modrm" "0")
(set (attr "length")
(if_then_else (and (ge (minus (match_dup 0) (pc))
- (const_int -128))
+ (const_int -126))
(lt (minus (match_dup 0) (pc))
- (const_int 124)))
+ (const_int 128)))
(const_int 2)
(const_int 6)))])
@@ -13124,9 +13111,9 @@
[(set_attr "type" "ibr")
(set (attr "length")
(if_then_else (and (ge (minus (match_dup 0) (pc))
- (const_int -128))
+ (const_int -126))
(lt (minus (match_dup 0) (pc))
- (const_int 124)))
+ (const_int 128)))
(const_int 2)
(const_int 5)))
(set_attr "modrm" "0")])
@@ -13250,9 +13237,9 @@
(set (attr "length")
(if_then_else (and (eq_attr "alternative" "0")
(and (ge (minus (match_dup 0) (pc))
- (const_int -128))
+ (const_int -126))
(lt (minus (match_dup 0) (pc))
- (const_int 124))))
+ (const_int 128))))
(const_int 2)
(const_int 16)))
;; We don't know the type before shorten branches. Optimistically expect
@@ -13616,11 +13603,10 @@
"ix86_expand_epilogue (0); DONE;")
(define_expand "eh_return"
- [(use (match_operand 0 "register_operand" ""))
- (use (match_operand 1 "register_operand" ""))]
+ [(use (match_operand 0 "register_operand" ""))]
""
{
- rtx tmp, sa = operands[0], ra = operands[1];
+ rtx tmp, sa = EH_RETURN_STACKADJ_RTX, ra = operands[0];
/* Tricky bit: we write the address of the handler to which we will
be returning into someone else's stack frame, one word below the
@@ -13682,7 +13668,7 @@
(define_expand "ffssi2"
[(set (match_operand:SI 0 "nonimmediate_operand" "")
- (ffs:SI (match_operand:SI 1 "general_operand" "")))]
+ (ffs:SI (match_operand:SI 1 "nonimmediate_operand" "")))]
""
{
rtx out = gen_reg_rtx (SImode), tmp = gen_reg_rtx (SImode);
@@ -14823,7 +14809,7 @@
(define_insn "cosxf2"
[(set (match_operand:XF 0 "register_operand" "=f")
(unspec:XF [(match_operand:XF 1 "register_operand" "0")] UNSPEC_COS))]
- "! TARGET_NO_FANCY_MATH_387 && TARGET_80387
+ "!TARGET_64BIT && ! TARGET_NO_FANCY_MATH_387 && TARGET_80387
&& flag_unsafe_math_optimizations"
"fcos"
[(set_attr "type" "fpspc")
@@ -16734,7 +16720,7 @@
(define_split
[(set (match_operand 0 "register_operand" "")
(if_then_else (match_operator 1 "comparison_operator"
- [(match_operand 4 "register_operand" "")
+ [(match_operand 4 "nonimmediate_operand" "")
(match_operand 5 "nonimmediate_operand" "")])
(match_operand 2 "nonmemory_operand" "")
(match_operand 3 "nonmemory_operand" "")))]
@@ -16746,13 +16732,16 @@
(subreg:TI (match_dup 7) 0)))]
{
PUT_MODE (operands[1], GET_MODE (operands[0]));
- if (!sse_comparison_operator (operands[1], VOIDmode))
+ if (!sse_comparison_operator (operands[1], VOIDmode)
+ || !rtx_equal_p (operands[0], operands[4]))
{
rtx tmp = operands[5];
operands[5] = operands[4];
operands[4] = tmp;
PUT_CODE (operands[1], swap_condition (GET_CODE (operands[1])));
}
+ if (!rtx_equal_p (operands[0], operands[4]))
+ abort ();
if (const0_operand (operands[2], GET_MODE (operands[0])))
{
operands[7] = operands[3];
@@ -16853,6 +16842,10 @@
operands[2] = gen_lowpart (SImode, operands[2]);
PUT_MODE (operands[3], SImode);")
+; Promote the QImode tests, as i386 has encoding of the AND
+; instruction with 32-bit sign-extended immediate and thus the
+; instruction size is unchanged, except in the %eax case for
+; which it is increased by one byte, hence the ! optimize_size.
(define_split
[(set (reg 17)
(compare (and (match_operand 1 "aligned_operand" "")
@@ -16861,12 +16854,11 @@
(set (match_operand 0 "register_operand" "")
(and (match_dup 1) (match_dup 2)))]
"! TARGET_PARTIAL_REG_STALL && reload_completed
- && ix86_match_ccmode (insn, CCNOmode)
- && (GET_MODE (operands[0]) == HImode
- || (GET_MODE (operands[0]) == QImode
- /* Ensure that the operand will remain sign extended immediate. */
- && INTVAL (operands[2]) >= 0
- && (TARGET_PROMOTE_QImode || optimize_size)))"
+ /* Ensure that the operand will remain sign-extended immediate. */
+ && ix86_match_ccmode (insn, INTVAL (operands[2]) >= 0 ? CCNOmode : CCZmode)
+ && ! optimize_size
+ && ((GET_MODE (operands[0]) == HImode && ! TARGET_FAST_PREFIX)
+ || (GET_MODE (operands[0]) == QImode && TARGET_PROMOTE_QImode))"
[(parallel [(set (reg:CCNO 17)
(compare:CCNO (and:SI (match_dup 1) (match_dup 2))
(const_int 0)))
@@ -16879,17 +16871,20 @@
operands[0] = gen_lowpart (SImode, operands[0]);
operands[1] = gen_lowpart (SImode, operands[1]);")
-; Don't promote the QImode tests, as i386 don't have encoding of
-; the test instruction with 32bit sign extended immediate and thus
-; the code grows.
+; Don't promote the QImode tests, as i386 doesn't have encoding of
+; the TEST instruction with 32-bit sign-extended immediate and thus
+; the instruction size would at least double, which is not what we
+; want even with ! optimize_size.
(define_split
[(set (reg 17)
(compare (and (match_operand:HI 0 "aligned_operand" "")
(match_operand:HI 1 "const_int_operand" ""))
(const_int 0)))]
"! TARGET_PARTIAL_REG_STALL && reload_completed
- && ix86_match_ccmode (insn, CCNOmode)
- && GET_MODE (operands[0]) == HImode"
+ /* Ensure that the operand will remain sign-extended immediate. */
+ && ix86_match_ccmode (insn, INTVAL (operands[1]) >= 0 ? CCNOmode : CCZmode)
+ && ! TARGET_FAST_PREFIX
+ && ! optimize_size"
[(set (reg:CCNO 17)
(compare:CCNO (and:SI (match_dup 0) (match_dup 1))
(const_int 0)))]
@@ -17848,67 +17843,92 @@
;; Moves for SSE/MMX regs.
(define_insn "movv4sf_internal"
- [(set (match_operand:V4SF 0 "nonimmediate_operand" "=x,m")
- (match_operand:V4SF 1 "nonimmediate_operand" "xm,x"))]
+ [(set (match_operand:V4SF 0 "nonimmediate_operand" "=x,x,m")
+ (match_operand:V4SF 1 "vector_move_operand" "C,xm,x"))]
"TARGET_SSE"
;; @@@ let's try to use movaps here.
- "movaps\t{%1, %0|%0, %1}"
+ "@
+ xorps\t%0, %0
+ movaps\t{%1, %0|%0, %1}
+ movaps\t{%1, %0|%0, %1}"
[(set_attr "type" "ssemov")
(set_attr "mode" "V4SF")])
(define_insn "movv4si_internal"
- [(set (match_operand:V4SI 0 "nonimmediate_operand" "=x,m")
- (match_operand:V4SI 1 "nonimmediate_operand" "xm,x"))]
+ [(set (match_operand:V4SI 0 "nonimmediate_operand" "=x,x,m")
+ (match_operand:V4SI 1 "vector_move_operand" "C,xm,x"))]
"TARGET_SSE"
;; @@@ let's try to use movaps here.
- "movaps\t{%1, %0|%0, %1}"
+ "@
+ xorps\t%0, %0
+ movaps\t{%1, %0|%0, %1}
+ movaps\t{%1, %0|%0, %1}"
[(set_attr "type" "ssemov")
(set_attr "mode" "V4SF")])
(define_insn "movv2di_internal"
- [(set (match_operand:V2DI 0 "nonimmediate_operand" "=x,m")
- (match_operand:V2DI 1 "nonimmediate_operand" "xm,x"))]
+ [(set (match_operand:V2DI 0 "nonimmediate_operand" "=x,x,m")
+ (match_operand:V2DI 1 "vector_move_operand" "C,xm,x"))]
"TARGET_SSE"
;; @@@ let's try to use movaps here.
- "movdqa\t{%1, %0|%0, %1}"
+ "@
+ pxor\t%0, %0
+ movdqa\t{%1, %0|%0, %1}
+ movdqa\t{%1, %0|%0, %1}"
[(set_attr "type" "ssemov")
(set_attr "mode" "V4SF")])
(define_insn "movv8qi_internal"
- [(set (match_operand:V8QI 0 "nonimmediate_operand" "=y,m")
- (match_operand:V8QI 1 "nonimmediate_operand" "ym,y"))]
- "TARGET_MMX"
- "movq\t{%1, %0|%0, %1}"
+ [(set (match_operand:V8QI 0 "nonimmediate_operand" "=y,y,m")
+ (match_operand:V8QI 1 "vector_move_operand" "C,ym,y"))]
+ "TARGET_MMX
+ && (GET_CODE (operands[0]) != MEM || GET_CODE (operands[1]) != MEM)"
+ "@
+ pxor\t%0, %0
+ movq\t{%1, %0|%0, %1}
+ movq\t{%1, %0|%0, %1}"
[(set_attr "type" "mmxmov")
(set_attr "mode" "DI")])
(define_insn "movv4hi_internal"
- [(set (match_operand:V4HI 0 "nonimmediate_operand" "=y,m")
- (match_operand:V4HI 1 "nonimmediate_operand" "ym,y"))]
- "TARGET_MMX"
- "movq\t{%1, %0|%0, %1}"
+ [(set (match_operand:V4HI 0 "nonimmediate_operand" "=y,y,m")
+ (match_operand:V4HI 1 "vector_move_operand" "C,ym,y"))]
+ "TARGET_MMX
+ && (GET_CODE (operands[0]) != MEM || GET_CODE (operands[1]) != MEM)"
+ "@
+ pxor\t%0, %0
+ movq\t{%1, %0|%0, %1}
+ movq\t{%1, %0|%0, %1}"
[(set_attr "type" "mmxmov")
(set_attr "mode" "DI")])
(define_insn "movv2si_internal"
- [(set (match_operand:V2SI 0 "nonimmediate_operand" "=y,m")
- (match_operand:V2SI 1 "nonimmediate_operand" "ym,y"))]
- "TARGET_MMX"
- "movq\t{%1, %0|%0, %1}"
+ [(set (match_operand:V2SI 0 "nonimmediate_operand" "=y,y,m")
+ (match_operand:V2SI 1 "vector_move_operand" "C,ym,y"))]
+ "TARGET_MMX
+ && (GET_CODE (operands[0]) != MEM || GET_CODE (operands[1]) != MEM)"
+ "@
+ pxor\t%0, %0
+ movq\t{%1, %0|%0, %1}
+ movq\t{%1, %0|%0, %1}"
[(set_attr "type" "mmxcvt")
(set_attr "mode" "DI")])
(define_insn "movv2sf_internal"
- [(set (match_operand:V2SF 0 "nonimmediate_operand" "=y,m")
- (match_operand:V2SF 1 "nonimmediate_operand" "ym,y"))]
- "TARGET_3DNOW"
- "movq\\t{%1, %0|%0, %1}"
+ [(set (match_operand:V2SF 0 "nonimmediate_operand" "=y,y,m")
+ (match_operand:V2SF 1 "vector_move_operand" "C,ym,y"))]
+ "TARGET_3DNOW
+ && (GET_CODE (operands[0]) != MEM || GET_CODE (operands[1]) != MEM)"
+ "@
+ pxor\t%0, %0
+ movq\t{%1, %0|%0, %1}
+ movq\t{%1, %0|%0, %1}"
[(set_attr "type" "mmxcvt")
(set_attr "mode" "DI")])
(define_expand "movti"
- [(set (match_operand:TI 0 "general_operand" "")
- (match_operand:TI 1 "general_operand" ""))]
+ [(set (match_operand:TI 0 "nonimmediate_operand" "")
+ (match_operand:TI 1 "nonimmediate_operand" ""))]
"TARGET_SSE || TARGET_64BIT"
{
if (TARGET_64BIT)
@@ -17919,35 +17939,44 @@
})
(define_insn "movv2df_internal"
- [(set (match_operand:V2DF 0 "nonimmediate_operand" "=x,m")
- (match_operand:V2DF 1 "nonimmediate_operand" "xm,x"))]
- "TARGET_SSE2"
- ;; @@@ let's try to use movaps here.
- "movapd\t{%1, %0|%0, %1}"
+ [(set (match_operand:V2DF 0 "nonimmediate_operand" "=x,x,m")
+ (match_operand:V2DF 1 "vector_move_operand" "C,xm,x"))]
+ "TARGET_SSE2
+ && (GET_CODE (operands[0]) != MEM || GET_CODE (operands[1]) != MEM)"
+ "@
+ xorpd\t%0, %0
+ movapd\t{%1, %0|%0, %1}
+ movapd\t{%1, %0|%0, %1}"
[(set_attr "type" "ssemov")
(set_attr "mode" "V2DF")])
(define_insn "movv8hi_internal"
- [(set (match_operand:V8HI 0 "nonimmediate_operand" "=x,m")
- (match_operand:V8HI 1 "nonimmediate_operand" "xm,x"))]
- "TARGET_SSE2"
- ;; @@@ let's try to use movaps here.
- "movaps\t{%1, %0|%0, %1}"
+ [(set (match_operand:V8HI 0 "nonimmediate_operand" "=x,x,m")
+ (match_operand:V8HI 1 "vector_move_operand" "C,xm,x"))]
+ "TARGET_SSE2
+ && (GET_CODE (operands[0]) != MEM || GET_CODE (operands[1]) != MEM)"
+ "@
+ xorps\t%0, %0
+ movaps\t{%1, %0|%0, %1}
+ movaps\t{%1, %0|%0, %1}"
[(set_attr "type" "ssemov")
(set_attr "mode" "V4SF")])
(define_insn "movv16qi_internal"
- [(set (match_operand:V16QI 0 "nonimmediate_operand" "=x,m")
- (match_operand:V16QI 1 "nonimmediate_operand" "xm,x"))]
- "TARGET_SSE2"
- ;; @@@ let's try to use movaps here.
- "movaps\t{%1, %0|%0, %1}"
+ [(set (match_operand:V16QI 0 "nonimmediate_operand" "=x,x,m")
+ (match_operand:V16QI 1 "vector_move_operand" "C,xm,x"))]
+ "TARGET_SSE2
+ && (GET_CODE (operands[0]) != MEM || GET_CODE (operands[1]) != MEM)"
+ "@
+ xorps\t%0, %0
+ movaps\t{%1, %0|%0, %1}
+ movaps\t{%1, %0|%0, %1}"
[(set_attr "type" "ssemov")
(set_attr "mode" "V4SF")])
(define_expand "movv2df"
- [(set (match_operand:V2DF 0 "general_operand" "")
- (match_operand:V2DF 1 "general_operand" ""))]
+ [(set (match_operand:V2DF 0 "nonimmediate_operand" "")
+ (match_operand:V2DF 1 "nonimmediate_operand" ""))]
"TARGET_SSE2"
{
ix86_expand_vector_move (V2DFmode, operands);
@@ -17955,8 +17984,8 @@
})
(define_expand "movv8hi"
- [(set (match_operand:V8HI 0 "general_operand" "")
- (match_operand:V8HI 1 "general_operand" ""))]
+ [(set (match_operand:V8HI 0 "nonimmediate_operand" "")
+ (match_operand:V8HI 1 "nonimmediate_operand" ""))]
"TARGET_SSE2"
{
ix86_expand_vector_move (V8HImode, operands);
@@ -17964,8 +17993,8 @@
})
(define_expand "movv16qi"
- [(set (match_operand:V16QI 0 "general_operand" "")
- (match_operand:V16QI 1 "general_operand" ""))]
+ [(set (match_operand:V16QI 0 "nonimmediate_operand" "")
+ (match_operand:V16QI 1 "nonimmediate_operand" ""))]
"TARGET_SSE2"
{
ix86_expand_vector_move (V16QImode, operands);
@@ -17973,8 +18002,8 @@
})
(define_expand "movv4sf"
- [(set (match_operand:V4SF 0 "general_operand" "")
- (match_operand:V4SF 1 "general_operand" ""))]
+ [(set (match_operand:V4SF 0 "nonimmediate_operand" "")
+ (match_operand:V4SF 1 "nonimmediate_operand" ""))]
"TARGET_SSE"
{
ix86_expand_vector_move (V4SFmode, operands);
@@ -17982,8 +18011,8 @@
})
(define_expand "movv4si"
- [(set (match_operand:V4SI 0 "general_operand" "")
- (match_operand:V4SI 1 "general_operand" ""))]
+ [(set (match_operand:V4SI 0 "nonimmediate_operand" "")
+ (match_operand:V4SI 1 "nonimmediate_operand" ""))]
"TARGET_SSE"
{
ix86_expand_vector_move (V4SImode, operands);
@@ -17991,8 +18020,8 @@
})
(define_expand "movv2di"
- [(set (match_operand:V2DI 0 "general_operand" "")
- (match_operand:V2DI 1 "general_operand" ""))]
+ [(set (match_operand:V2DI 0 "nonimmediate_operand" "")
+ (match_operand:V2DI 1 "nonimmediate_operand" ""))]
"TARGET_SSE"
{
ix86_expand_vector_move (V2DImode, operands);
@@ -18000,8 +18029,8 @@
})
(define_expand "movv2si"
- [(set (match_operand:V2SI 0 "general_operand" "")
- (match_operand:V2SI 1 "general_operand" ""))]
+ [(set (match_operand:V2SI 0 "nonimmediate_operand" "")
+ (match_operand:V2SI 1 "nonimmediate_operand" ""))]
"TARGET_MMX"
{
ix86_expand_vector_move (V2SImode, operands);
@@ -18009,8 +18038,8 @@
})
(define_expand "movv4hi"
- [(set (match_operand:V4HI 0 "general_operand" "")
- (match_operand:V4HI 1 "general_operand" ""))]
+ [(set (match_operand:V4HI 0 "nonimmediate_operand" "")
+ (match_operand:V4HI 1 "nonimmediate_operand" ""))]
"TARGET_MMX"
{
ix86_expand_vector_move (V4HImode, operands);
@@ -18018,8 +18047,8 @@
})
(define_expand "movv8qi"
- [(set (match_operand:V8QI 0 "general_operand" "")
- (match_operand:V8QI 1 "general_operand" ""))]
+ [(set (match_operand:V8QI 0 "nonimmediate_operand" "")
+ (match_operand:V8QI 1 "nonimmediate_operand" ""))]
"TARGET_MMX"
{
ix86_expand_vector_move (V8QImode, operands);
@@ -18027,14 +18056,97 @@
})
(define_expand "movv2sf"
- [(set (match_operand:V2SF 0 "general_operand" "")
- (match_operand:V2SF 1 "general_operand" ""))]
+ [(set (match_operand:V2SF 0 "nonimmediate_operand" "")
+ (match_operand:V2SF 1 "nonimmediate_operand" ""))]
"TARGET_3DNOW"
{
ix86_expand_vector_move (V2SFmode, operands);
DONE;
})
+(define_insn "*pushv2df"
+ [(set (match_operand:V2DF 0 "push_operand" "=<")
+ (match_operand:V2DF 1 "register_operand" "x"))]
+ "TARGET_SSE"
+ "#")
+
+(define_insn "*pushv2di"
+ [(set (match_operand:V2DI 0 "push_operand" "=<")
+ (match_operand:V2DI 1 "register_operand" "x"))]
+ "TARGET_SSE2"
+ "#")
+
+(define_insn "*pushv8hi"
+ [(set (match_operand:V8HI 0 "push_operand" "=<")
+ (match_operand:V8HI 1 "register_operand" "x"))]
+ "TARGET_SSE2"
+ "#")
+
+(define_insn "*pushv16qi"
+ [(set (match_operand:V16QI 0 "push_operand" "=<")
+ (match_operand:V16QI 1 "register_operand" "x"))]
+ "TARGET_SSE2"
+ "#")
+
+(define_insn "*pushv4sf"
+ [(set (match_operand:V4SF 0 "push_operand" "=<")
+ (match_operand:V4SF 1 "register_operand" "x"))]
+ "TARGET_SSE"
+ "#")
+
+(define_insn "*pushv4si"
+ [(set (match_operand:V4SI 0 "push_operand" "=<")
+ (match_operand:V4SI 1 "register_operand" "x"))]
+ "TARGET_SSE2"
+ "#")
+
+(define_insn "*pushv2si"
+ [(set (match_operand:V2SI 0 "push_operand" "=<")
+ (match_operand:V2SI 1 "register_operand" "y"))]
+ "TARGET_MMX"
+ "#")
+
+(define_insn "*pushv4hi"
+ [(set (match_operand:V4HI 0 "push_operand" "=<")
+ (match_operand:V4HI 1 "register_operand" "y"))]
+ "TARGET_MMX"
+ "#")
+
+(define_insn "*pushv8qi"
+ [(set (match_operand:V8QI 0 "push_operand" "=<")
+ (match_operand:V8QI 1 "register_operand" "y"))]
+ "TARGET_MMX"
+ "#")
+
+(define_insn "*pushv2sf"
+ [(set (match_operand:V2SF 0 "push_operand" "=<")
+ (match_operand:V2SF 1 "register_operand" "y"))]
+ "TARGET_3DNOW"
+ "#")
+
+(define_split
+ [(set (match_operand 0 "push_operand" "")
+ (match_operand 1 "register_operand" ""))]
+ "!TARGET_64BIT && reload_completed
+ && (SSE_REG_P (operands[1]) || MMX_REG_P (operands[1]))"
+ [(set (reg:SI 7) (plus:SI (reg:SI 7) (match_dup 3)))
+ (set (match_dup 2) (match_dup 1))]
+ "operands[2] = change_address (operands[0], GET_MODE (operands[0]),
+ stack_pointer_rtx);
+ operands[3] = GEN_INT (-GET_MODE_SIZE (GET_MODE (operands[0])));")
+
+(define_split
+ [(set (match_operand 0 "push_operand" "")
+ (match_operand 1 "register_operand" ""))]
+ "TARGET_64BIT && reload_completed
+ && (SSE_REG_P (operands[1]) || MMX_REG_P (operands[1]))"
+ [(set (reg:DI 7) (plus:DI (reg:DI 7) (match_dup 3)))
+ (set (match_dup 2) (match_dup 1))]
+ "operands[2] = change_address (operands[0], GET_MODE (operands[0]),
+ stack_pointer_rtx);
+ operands[3] = GEN_INT (-GET_MODE_SIZE (GET_MODE (operands[0])));")
+
+
(define_insn_and_split "*pushti"
[(set (match_operand:TI 0 "push_operand" "=<")
(match_operand:TI 1 "nonmemory_operand" "x"))]
@@ -18158,8 +18270,9 @@
(define_insn "movti_internal"
[(set (match_operand:TI 0 "nonimmediate_operand" "=x,x,m")
- (match_operand:TI 1 "general_operand" "C,xm,x"))]
- "TARGET_SSE && !TARGET_64BIT"
+ (match_operand:TI 1 "vector_move_operand" "C,xm,x"))]
+ "TARGET_SSE && !TARGET_64BIT
+ && (GET_CODE (operands[0]) != MEM || GET_CODE (operands[1]) != MEM)"
"@
xorps\t%0, %0
movaps\t{%1, %0|%0, %1}
@@ -18169,7 +18282,7 @@
(define_insn "*movti_rex64"
[(set (match_operand:TI 0 "nonimmediate_operand" "=r,o,x,mx,x")
- (match_operand:TI 1 "general_operand" "riFo,riF,O,x,m"))]
+ (match_operand:TI 1 "general_operand" "riFo,riF,C,x,m"))]
"TARGET_64BIT
&& (GET_CODE (operands[0]) != MEM || GET_CODE (operands[1]) != MEM)"
"@
@@ -18191,29 +18304,56 @@
;; These two patterns are useful for specifying exactly whether to use
;; movaps or movups
-(define_insn "sse_movaps"
+(define_expand "sse_movaps"
+ [(set (match_operand:V4SF 0 "nonimmediate_operand" "")
+ (unspec:V4SF [(match_operand:V4SF 1 "nonimmediate_operand" "")]
+ UNSPEC_MOVA))]
+ "TARGET_SSE"
+{
+ if (GET_CODE (operands[0]) == MEM && GET_CODE (operands[1]) == MEM)
+ {
+ rtx tmp = gen_reg_rtx (V4SFmode);
+ emit_insn (gen_sse_movaps (tmp, operands[1]));
+ emit_move_insn (operands[0], tmp);
+ DONE;
+ }
+})
+
+(define_insn "*sse_movaps_1"
[(set (match_operand:V4SF 0 "nonimmediate_operand" "=x,m")
(unspec:V4SF [(match_operand:V4SF 1 "nonimmediate_operand" "xm,x")]
UNSPEC_MOVA))]
- "TARGET_SSE"
- "@
- movaps\t{%1, %0|%0, %1}
- movaps\t{%1, %0|%0, %1}"
+ "TARGET_SSE
+ && (GET_CODE (operands[0]) != MEM || GET_CODE (operands[1]) != MEM)"
+ "movaps\t{%1, %0|%0, %1}"
[(set_attr "type" "ssemov,ssemov")
(set_attr "mode" "V4SF")])
-(define_insn "sse_movups"
+(define_expand "sse_movups"
+ [(set (match_operand:V4SF 0 "nonimmediate_operand" "")
+ (unspec:V4SF [(match_operand:V4SF 1 "nonimmediate_operand" "")]
+ UNSPEC_MOVU))]
+ "TARGET_SSE"
+{
+ if (GET_CODE (operands[0]) == MEM && GET_CODE (operands[1]) == MEM)
+ {
+ rtx tmp = gen_reg_rtx (V4SFmode);
+ emit_insn (gen_sse_movups (tmp, operands[1]));
+ emit_move_insn (operands[0], tmp);
+ DONE;
+ }
+})
+
+(define_insn "*sse_movups_1"
[(set (match_operand:V4SF 0 "nonimmediate_operand" "=x,m")
(unspec:V4SF [(match_operand:V4SF 1 "nonimmediate_operand" "xm,x")]
UNSPEC_MOVU))]
- "TARGET_SSE"
- "@
- movups\t{%1, %0|%0, %1}
- movups\t{%1, %0|%0, %1}"
+ "TARGET_SSE
+ && (GET_CODE (operands[0]) != MEM || GET_CODE (operands[1]) != MEM)"
+ "movups\t{%1, %0|%0, %1}"
[(set_attr "type" "ssecvt,ssecvt")
(set_attr "mode" "V4SF")])
-
;; SSE Strange Moves.
(define_insn "sse_movmskps"
@@ -18329,11 +18469,21 @@
[(set_attr "type" "ssecvt")
(set_attr "mode" "V4SF")])
-(define_insn "sse_loadss"
+(define_expand "sse_loadss"
+ [(match_operand:V4SF 0 "register_operand" "")
+ (match_operand:SF 1 "memory_operand" "")]
+ "TARGET_SSE"
+{
+ emit_insn (gen_sse_loadss_1 (operands[0], operands[1],
+ CONST0_RTX (V4SFmode)));
+ DONE;
+})
+
+(define_insn "sse_loadss_1"
[(set (match_operand:V4SF 0 "register_operand" "=x")
(vec_merge:V4SF
- (match_operand:V4SF 1 "memory_operand" "m")
- (vec_duplicate:V4SF (float:SF (const_int 0)))
+ (vec_duplicate:V4SF (match_operand:SF 1 "memory_operand" "m"))
+ (match_operand:V4SF 2 "const0_operand" "X")
(const_int 1)))]
"TARGET_SSE"
"movss\t{%1, %0|%0, %1}"
@@ -18804,7 +18954,7 @@
(define_insn "sse2_nandv2di3"
[(set (match_operand:V2DI 0 "register_operand" "=x")
- (and:V2DI (not:V2DI (match_operand:V2DI 1 "nonimmediate_operand" "%0"))
+ (and:V2DI (not:V2DI (match_operand:V2DI 1 "nonimmediate_operand" "0"))
(match_operand:V2DI 2 "nonimmediate_operand" "xm")))]
"TARGET_SSE2
&& (GET_CODE (operands[1]) != MEM || GET_CODE (operands[2]) != MEM)"
@@ -18908,7 +19058,7 @@
(match_operator:V4SI 3 "sse_comparison_operator"
[(match_operand:V4SF 1 "register_operand" "0")
(match_operand:V4SF 2 "register_operand" "x")])
- (match_dup 1)
+ (subreg:V4SI (match_dup 1) 0)
(const_int 1)))]
"TARGET_SSE"
"cmp%D3ss\t{%2, %0|%0, %2}"
@@ -19093,6 +19243,19 @@
[(set_attr "type" "ssecvt")
(set_attr "mode" "SF")])
+(define_insn "cvtsi2ssq"
+ [(set (match_operand:V4SF 0 "register_operand" "=x,x")
+ (vec_merge:V4SF
+ (match_operand:V4SF 1 "register_operand" "0,0")
+ (vec_duplicate:V4SF
+ (float:SF (match_operand:DI 2 "nonimmediate_operand" "r,rm")))
+ (const_int 14)))]
+ "TARGET_SSE && TARGET_64BIT"
+ "cvtsi2ssq\t{%2, %0|%0, %2}"
+ [(set_attr "type" "ssecvt")
+ (set_attr "athlon_decode" "vector,vector")
+ (set_attr "mode" "SF")])
+
(define_insn "cvtss2si"
[(set (match_operand:SI 0 "register_operand" "=r")
(vec_select:SI
@@ -19103,6 +19266,17 @@
[(set_attr "type" "ssecvt")
(set_attr "mode" "SF")])
+(define_insn "cvtss2siq"
+ [(set (match_operand:DI 0 "register_operand" "=r,r")
+ (vec_select:DI
+ (fix:V4DI (match_operand:V4SF 1 "nonimmediate_operand" "x,m"))
+ (parallel [(const_int 0)])))]
+ "TARGET_SSE"
+ "cvtss2siq\t{%1, %0|%0, %1}"
+ [(set_attr "type" "ssecvt")
+ (set_attr "athlon_decode" "vector,vector")
+ (set_attr "mode" "SF")])
+
(define_insn "cvttss2si"
[(set (match_operand:SI 0 "register_operand" "=r")
(vec_select:SI
@@ -19114,6 +19288,18 @@
[(set_attr "type" "ssecvt")
(set_attr "mode" "SF")])
+(define_insn "cvttss2siq"
+ [(set (match_operand:DI 0 "register_operand" "=r,r")
+ (vec_select:DI
+ (unspec:V4DI [(match_operand:V4SF 1 "nonimmediate_operand" "x,xm")]
+ UNSPEC_FIX)
+ (parallel [(const_int 0)])))]
+ "TARGET_SSE && TARGET_64BIT"
+ "cvttss2siq\t{%1, %0|%0, %1}"
+ [(set_attr "type" "ssecvt")
+ (set_attr "mode" "SF")
+ (set_attr "athlon_decode" "vector,vector")])
+
;; MMX insns
@@ -19121,7 +19307,7 @@
(define_insn "addv8qi3"
[(set (match_operand:V8QI 0 "register_operand" "=y")
- (plus:V8QI (match_operand:V8QI 1 "register_operand" "0")
+ (plus:V8QI (match_operand:V8QI 1 "register_operand" "%0")
(match_operand:V8QI 2 "nonimmediate_operand" "ym")))]
"TARGET_MMX"
"paddb\t{%2, %0|%0, %2}"
@@ -19130,7 +19316,7 @@
(define_insn "addv4hi3"
[(set (match_operand:V4HI 0 "register_operand" "=y")
- (plus:V4HI (match_operand:V4HI 1 "register_operand" "0")
+ (plus:V4HI (match_operand:V4HI 1 "register_operand" "%0")
(match_operand:V4HI 2 "nonimmediate_operand" "ym")))]
"TARGET_MMX"
"paddw\t{%2, %0|%0, %2}"
@@ -19139,16 +19325,27 @@
(define_insn "addv2si3"
[(set (match_operand:V2SI 0 "register_operand" "=y")
- (plus:V2SI (match_operand:V2SI 1 "register_operand" "0")
+ (plus:V2SI (match_operand:V2SI 1 "register_operand" "%0")
(match_operand:V2SI 2 "nonimmediate_operand" "ym")))]
"TARGET_MMX"
"paddd\t{%2, %0|%0, %2}"
[(set_attr "type" "mmxadd")
(set_attr "mode" "DI")])
+(define_insn "mmx_adddi3"
+ [(set (match_operand:DI 0 "register_operand" "=y")
+ (unspec:DI
+ [(plus:DI (match_operand:DI 1 "register_operand" "%0")
+ (match_operand:DI 2 "nonimmediate_operand" "ym"))]
+ UNSPEC_NOP))]
+ "TARGET_MMX"
+ "paddq\t{%2, %0|%0, %2}"
+ [(set_attr "type" "mmxadd")
+ (set_attr "mode" "DI")])
+
(define_insn "ssaddv8qi3"
[(set (match_operand:V8QI 0 "register_operand" "=y")
- (ss_plus:V8QI (match_operand:V8QI 1 "register_operand" "0")
+ (ss_plus:V8QI (match_operand:V8QI 1 "register_operand" "%0")
(match_operand:V8QI 2 "nonimmediate_operand" "ym")))]
"TARGET_MMX"
"paddsb\t{%2, %0|%0, %2}"
@@ -19157,7 +19354,7 @@
(define_insn "ssaddv4hi3"
[(set (match_operand:V4HI 0 "register_operand" "=y")
- (ss_plus:V4HI (match_operand:V4HI 1 "register_operand" "0")
+ (ss_plus:V4HI (match_operand:V4HI 1 "register_operand" "%0")
(match_operand:V4HI 2 "nonimmediate_operand" "ym")))]
"TARGET_MMX"
"paddsw\t{%2, %0|%0, %2}"
@@ -19166,7 +19363,7 @@
(define_insn "usaddv8qi3"
[(set (match_operand:V8QI 0 "register_operand" "=y")
- (us_plus:V8QI (match_operand:V8QI 1 "register_operand" "0")
+ (us_plus:V8QI (match_operand:V8QI 1 "register_operand" "%0")
(match_operand:V8QI 2 "nonimmediate_operand" "ym")))]
"TARGET_MMX"
"paddusb\t{%2, %0|%0, %2}"
@@ -19175,7 +19372,7 @@
(define_insn "usaddv4hi3"
[(set (match_operand:V4HI 0 "register_operand" "=y")
- (us_plus:V4HI (match_operand:V4HI 1 "register_operand" "0")
+ (us_plus:V4HI (match_operand:V4HI 1 "register_operand" "%0")
(match_operand:V4HI 2 "nonimmediate_operand" "ym")))]
"TARGET_MMX"
"paddusw\t{%2, %0|%0, %2}"
@@ -19209,6 +19406,17 @@
[(set_attr "type" "mmxadd")
(set_attr "mode" "DI")])
+(define_insn "mmx_subdi3"
+ [(set (match_operand:DI 0 "register_operand" "=y")
+ (unspec:DI
+ [(minus:DI (match_operand:DI 1 "register_operand" "0")
+ (match_operand:DI 2 "nonimmediate_operand" "ym"))]
+ UNSPEC_NOP))]
+ "TARGET_MMX"
+ "psubq\t{%2, %0|%0, %2}"
+ [(set_attr "type" "mmxadd")
+ (set_attr "mode" "DI")])
+
(define_insn "sssubv8qi3"
[(set (match_operand:V8QI 0 "register_operand" "=y")
(ss_minus:V8QI (match_operand:V8QI 1 "register_operand" "0")
@@ -19312,7 +19520,7 @@
(define_insn "mmx_iordi3"
[(set (match_operand:DI 0 "register_operand" "=y")
(unspec:DI
- [(ior:DI (match_operand:DI 1 "register_operand" "0")
+ [(ior:DI (match_operand:DI 1 "register_operand" "%0")
(match_operand:DI 2 "nonimmediate_operand" "ym"))]
UNSPEC_NOP))]
"TARGET_MMX"
@@ -19323,7 +19531,7 @@
(define_insn "mmx_xordi3"
[(set (match_operand:DI 0 "register_operand" "=y")
(unspec:DI
- [(xor:DI (match_operand:DI 1 "register_operand" "0")
+ [(xor:DI (match_operand:DI 1 "register_operand" "%0")
(match_operand:DI 2 "nonimmediate_operand" "ym"))]
UNSPEC_NOP))]
"TARGET_MMX"
@@ -19346,7 +19554,7 @@
(define_insn "mmx_anddi3"
[(set (match_operand:DI 0 "register_operand" "=y")
(unspec:DI
- [(and:DI (match_operand:DI 1 "register_operand" "0")
+ [(and:DI (match_operand:DI 1 "register_operand" "%0")
(match_operand:DI 2 "nonimmediate_operand" "ym"))]
UNSPEC_NOP))]
"TARGET_MMX"
@@ -19805,17 +20013,17 @@
(define_insn "ldmxcsr"
[(unspec_volatile [(match_operand:SI 0 "memory_operand" "m")]
UNSPECV_LDMXCSR)]
- "TARGET_MMX"
+ "TARGET_SSE"
"ldmxcsr\t%0"
- [(set_attr "type" "mmx")
+ [(set_attr "type" "sse")
(set_attr "memory" "load")])
(define_insn "stmxcsr"
[(set (match_operand:SI 0 "memory_operand" "=m")
(unspec_volatile:SI [(const_int 0)] UNSPECV_STMXCSR))]
- "TARGET_MMX"
+ "TARGET_SSE"
"stmxcsr\t%0"
- [(set_attr "type" "mmx")
+ [(set_attr "type" "sse")
(set_attr "memory" "store")])
(define_expand "sfence"
@@ -20471,7 +20679,7 @@
(match_operator:V2DI 3 "sse_comparison_operator"
[(match_operand:V2DF 1 "register_operand" "0")
(match_operand:V2DF 2 "nonimmediate_operand" "x")])
- (match_dup 1)
+ (subreg:V2DI (match_dup 1) 0)
(const_int 1)))]
"TARGET_SSE2"
"cmp%D3sd\t{%2, %0|%0, %2}"
@@ -20692,6 +20900,15 @@
[(set_attr "type" "ssecvt")
(set_attr "mode" "SI")])
+(define_insn "cvtsd2siq"
+ [(set (match_operand:DI 0 "register_operand" "=r")
+ (fix:DI (vec_select:DF (match_operand:V2DF 1 "register_operand" "xm")
+ (parallel [(const_int 0)]))))]
+ "TARGET_SSE2 && TARGET_64BIT"
+ "cvtsd2siq\t{%1, %0|%0, %1}"
+ [(set_attr "type" "ssecvt")
+ (set_attr "mode" "SI")])
+
(define_insn "cvttsd2si"
[(set (match_operand:SI 0 "register_operand" "=r")
(unspec:SI [(vec_select:DF (match_operand:V2DF 1 "register_operand" "xm")
@@ -20701,6 +20918,16 @@
[(set_attr "type" "ssecvt")
(set_attr "mode" "SI")])
+(define_insn "cvttsd2siq"
+ [(set (match_operand:DI 0 "register_operand" "=r,r")
+ (unspec:DI [(vec_select:DF (match_operand:V2DF 1 "register_operand" "x,xm")
+ (parallel [(const_int 0)]))] UNSPEC_FIX))]
+ "TARGET_SSE2 && TARGET_64BIT"
+ "cvttsd2siq\t{%1, %0|%0, %1}"
+ [(set_attr "type" "ssecvt")
+ (set_attr "mode" "DI")
+ (set_attr "athlon_decode" "vector,vector")])
+
(define_insn "cvtsi2sd"
[(set (match_operand:V2DF 0 "register_operand" "=x")
(vec_merge:V2DF (match_operand:V2DF 1 "register_operand" "0")
@@ -20713,6 +20940,19 @@
[(set_attr "type" "ssecvt")
(set_attr "mode" "DF")])
+(define_insn "cvtsi2sdq"
+ [(set (match_operand:V2DF 0 "register_operand" "=x,x")
+ (vec_merge:V2DF (match_operand:V2DF 1 "register_operand" "0,0")
+ (vec_duplicate:V2DF
+ (float:DF
+ (match_operand:DI 2 "nonimmediate_operand" "r,rm")))
+ (const_int 2)))]
+ "TARGET_SSE2 && TARGET_64BIT"
+ "cvtsi2sdq\t{%2, %0|%0, %2}"
+ [(set_attr "type" "ssecvt")
+ (set_attr "mode" "DF")
+ (set_attr "athlon_decode" "vector,direct")])
+
;; Conversions between SF and DF
(define_insn "cvtsd2ss"
@@ -20770,7 +21010,7 @@
(define_insn "addv16qi3"
[(set (match_operand:V16QI 0 "register_operand" "=x")
- (plus:V16QI (match_operand:V16QI 1 "register_operand" "0")
+ (plus:V16QI (match_operand:V16QI 1 "register_operand" "%0")
(match_operand:V16QI 2 "nonimmediate_operand" "xm")))]
"TARGET_SSE2"
"paddb\t{%2, %0|%0, %2}"
@@ -20779,7 +21019,7 @@
(define_insn "addv8hi3"
[(set (match_operand:V8HI 0 "register_operand" "=x")
- (plus:V8HI (match_operand:V8HI 1 "register_operand" "0")
+ (plus:V8HI (match_operand:V8HI 1 "register_operand" "%0")
(match_operand:V8HI 2 "nonimmediate_operand" "xm")))]
"TARGET_SSE2"
"paddw\t{%2, %0|%0, %2}"
@@ -20788,7 +21028,7 @@
(define_insn "addv4si3"
[(set (match_operand:V4SI 0 "register_operand" "=x")
- (plus:V4SI (match_operand:V4SI 1 "register_operand" "0")
+ (plus:V4SI (match_operand:V4SI 1 "register_operand" "%0")
(match_operand:V4SI 2 "nonimmediate_operand" "xm")))]
"TARGET_SSE2"
"paddd\t{%2, %0|%0, %2}"
@@ -20797,7 +21037,7 @@
(define_insn "addv2di3"
[(set (match_operand:V2DI 0 "register_operand" "=x")
- (plus:V2DI (match_operand:V2DI 1 "register_operand" "0")
+ (plus:V2DI (match_operand:V2DI 1 "register_operand" "%0")
(match_operand:V2DI 2 "nonimmediate_operand" "xm")))]
"TARGET_SSE2"
"paddq\t{%2, %0|%0, %2}"
@@ -20806,7 +21046,7 @@
(define_insn "ssaddv16qi3"
[(set (match_operand:V16QI 0 "register_operand" "=x")
- (ss_plus:V16QI (match_operand:V16QI 1 "register_operand" "0")
+ (ss_plus:V16QI (match_operand:V16QI 1 "register_operand" "%0")
(match_operand:V16QI 2 "nonimmediate_operand" "xm")))]
"TARGET_SSE2"
"paddsb\t{%2, %0|%0, %2}"
@@ -20815,7 +21055,7 @@
(define_insn "ssaddv8hi3"
[(set (match_operand:V8HI 0 "register_operand" "=x")
- (ss_plus:V8HI (match_operand:V8HI 1 "register_operand" "0")
+ (ss_plus:V8HI (match_operand:V8HI 1 "register_operand" "%0")
(match_operand:V8HI 2 "nonimmediate_operand" "xm")))]
"TARGET_SSE2"
"paddsw\t{%2, %0|%0, %2}"
@@ -20824,7 +21064,7 @@
(define_insn "usaddv16qi3"
[(set (match_operand:V16QI 0 "register_operand" "=x")
- (us_plus:V16QI (match_operand:V16QI 1 "register_operand" "0")
+ (us_plus:V16QI (match_operand:V16QI 1 "register_operand" "%0")
(match_operand:V16QI 2 "nonimmediate_operand" "xm")))]
"TARGET_SSE2"
"paddusb\t{%2, %0|%0, %2}"
@@ -20833,7 +21073,7 @@
(define_insn "usaddv8hi3"
[(set (match_operand:V8HI 0 "register_operand" "=x")
- (us_plus:V8HI (match_operand:V8HI 1 "register_operand" "0")
+ (us_plus:V8HI (match_operand:V8HI 1 "register_operand" "%0")
(match_operand:V8HI 2 "nonimmediate_operand" "xm")))]
"TARGET_SSE2"
"paddusw\t{%2, %0|%0, %2}"
@@ -21069,7 +21309,8 @@
[(set (match_operand:V8HI 0 "register_operand" "=x")
(vec_merge:V8HI (match_operand:V8HI 1 "register_operand" "0")
(vec_duplicate:V8HI
- (match_operand:SI 2 "nonimmediate_operand" "rm"))
+ (truncate:HI
+ (match_operand:SI 2 "nonimmediate_operand" "rm")))
(match_operand:SI 3 "immediate_operand" "i")))]
"TARGET_SSE2"
"pinsrw\t{%3, %2, %0|%0, %2, %3}"
@@ -21218,7 +21459,7 @@
(define_insn "ashrv8hi3"
[(set (match_operand:V8HI 0 "register_operand" "=x")
(ashiftrt:V8HI (match_operand:V8HI 1 "register_operand" "0")
- (match_operand:SI 2 "nonmemory_operand" "xi")))]
+ (match_operand:TI 2 "nonmemory_operand" "xi")))]
"TARGET_SSE2"
"psraw\t{%2, %0|%0, %2}"
[(set_attr "type" "sseishft")
@@ -21227,7 +21468,7 @@
(define_insn "ashrv4si3"
[(set (match_operand:V4SI 0 "register_operand" "=x")
(ashiftrt:V4SI (match_operand:V4SI 1 "register_operand" "0")
- (match_operand:SI 2 "nonmemory_operand" "xi")))]
+ (match_operand:TI 2 "nonmemory_operand" "xi")))]
"TARGET_SSE2"
"psrad\t{%2, %0|%0, %2}"
[(set_attr "type" "sseishft")
@@ -21236,7 +21477,7 @@
(define_insn "lshrv8hi3"
[(set (match_operand:V8HI 0 "register_operand" "=x")
(lshiftrt:V8HI (match_operand:V8HI 1 "register_operand" "0")
- (match_operand:SI 2 "nonmemory_operand" "xi")))]
+ (match_operand:TI 2 "nonmemory_operand" "xi")))]
"TARGET_SSE2"
"psrlw\t{%2, %0|%0, %2}"
[(set_attr "type" "sseishft")
@@ -21245,7 +21486,7 @@
(define_insn "lshrv4si3"
[(set (match_operand:V4SI 0 "register_operand" "=x")
(lshiftrt:V4SI (match_operand:V4SI 1 "register_operand" "0")
- (match_operand:SI 2 "nonmemory_operand" "xi")))]
+ (match_operand:TI 2 "nonmemory_operand" "xi")))]
"TARGET_SSE2"
"psrld\t{%2, %0|%0, %2}"
[(set_attr "type" "sseishft")
@@ -21254,7 +21495,7 @@
(define_insn "lshrv2di3"
[(set (match_operand:V2DI 0 "register_operand" "=x")
(lshiftrt:V2DI (match_operand:V2DI 1 "register_operand" "0")
- (match_operand:SI 2 "nonmemory_operand" "xi")))]
+ (match_operand:TI 2 "nonmemory_operand" "xi")))]
"TARGET_SSE2"
"psrlq\t{%2, %0|%0, %2}"
[(set_attr "type" "sseishft")
@@ -21263,7 +21504,7 @@
(define_insn "ashlv8hi3"
[(set (match_operand:V8HI 0 "register_operand" "=x")
(ashift:V8HI (match_operand:V8HI 1 "register_operand" "0")
- (match_operand:SI 2 "nonmemory_operand" "xi")))]
+ (match_operand:TI 2 "nonmemory_operand" "xi")))]
"TARGET_SSE2"
"psllw\t{%2, %0|%0, %2}"
[(set_attr "type" "sseishft")
@@ -21272,7 +21513,7 @@
(define_insn "ashlv4si3"
[(set (match_operand:V4SI 0 "register_operand" "=x")
(ashift:V4SI (match_operand:V4SI 1 "register_operand" "0")
- (match_operand:SI 2 "nonmemory_operand" "xi")))]
+ (match_operand:TI 2 "nonmemory_operand" "xi")))]
"TARGET_SSE2"
"pslld\t{%2, %0|%0, %2}"
[(set_attr "type" "sseishft")
@@ -21281,7 +21522,7 @@
(define_insn "ashlv2di3"
[(set (match_operand:V2DI 0 "register_operand" "=x")
(ashift:V2DI (match_operand:V2DI 1 "register_operand" "0")
- (match_operand:SI 2 "nonmemory_operand" "xi")))]
+ (match_operand:TI 2 "nonmemory_operand" "xi")))]
"TARGET_SSE2"
"psllq\t{%2, %0|%0, %2}"
[(set_attr "type" "sseishft")
@@ -21595,45 +21836,41 @@
(define_insn "sse2_movapd"
[(set (match_operand:V2DF 0 "nonimmediate_operand" "=x,m")
- (unspec:V2DF [(match_operand:V2DF 1 "general_operand" "xm,x")]
+ (unspec:V2DF [(match_operand:V2DF 1 "nonimmediate_operand" "xm,x")]
UNSPEC_MOVA))]
- "TARGET_SSE2"
- "@
- movapd\t{%1, %0|%0, %1}
- movapd\t{%1, %0|%0, %1}"
+ "TARGET_SSE2
+ && (GET_CODE (operands[0]) != MEM || GET_CODE (operands[1]) != MEM)"
+ "movapd\t{%1, %0|%0, %1}"
[(set_attr "type" "ssemov")
(set_attr "mode" "V2DF")])
(define_insn "sse2_movupd"
[(set (match_operand:V2DF 0 "nonimmediate_operand" "=x,m")
- (unspec:V2DF [(match_operand:V2DF 1 "general_operand" "xm,x")]
+ (unspec:V2DF [(match_operand:V2DF 1 "nonimmediate_operand" "xm,x")]
UNSPEC_MOVU))]
- "TARGET_SSE2"
- "@
- movupd\t{%1, %0|%0, %1}
- movupd\t{%1, %0|%0, %1}"
+ "TARGET_SSE2
+ && (GET_CODE (operands[0]) != MEM || GET_CODE (operands[1]) != MEM)"
+ "movupd\t{%1, %0|%0, %1}"
[(set_attr "type" "ssecvt")
(set_attr "mode" "V2DF")])
(define_insn "sse2_movdqa"
[(set (match_operand:V16QI 0 "nonimmediate_operand" "=x,m")
- (unspec:V16QI [(match_operand:V16QI 1 "general_operand" "xm,x")]
+ (unspec:V16QI [(match_operand:V16QI 1 "nonimmediate_operand" "xm,x")]
UNSPEC_MOVA))]
- "TARGET_SSE2"
- "@
- movdqa\t{%1, %0|%0, %1}
- movdqa\t{%1, %0|%0, %1}"
+ "TARGET_SSE2
+ && (GET_CODE (operands[0]) != MEM || GET_CODE (operands[1]) != MEM)"
+ "movdqa\t{%1, %0|%0, %1}"
[(set_attr "type" "ssemov")
(set_attr "mode" "TI")])
(define_insn "sse2_movdqu"
[(set (match_operand:V16QI 0 "nonimmediate_operand" "=x,m")
- (unspec:V16QI [(match_operand:V16QI 1 "general_operand" "xm,x")]
+ (unspec:V16QI [(match_operand:V16QI 1 "nonimmediate_operand" "xm,x")]
UNSPEC_MOVU))]
- "TARGET_SSE2"
- "@
- movdqu\t{%1, %0|%0, %1}
- movdqu\t{%1, %0|%0, %1}"
+ "TARGET_SSE2
+ && (GET_CODE (operands[0]) != MEM || GET_CODE (operands[1]) != MEM)"
+ "movdqu\t{%1, %0|%0, %1}"
[(set_attr "type" "ssecvt")
(set_attr "mode" "TI")])
@@ -21641,24 +21878,48 @@
[(set (match_operand:DI 0 "nonimmediate_operand" "=m,y")
(vec_select:DI (match_operand:V2DI 1 "register_operand" "x,x")
(parallel [(const_int 0)])))]
- "TARGET_SSE2"
+ "TARGET_SSE2 && !TARGET_64BIT"
"@
movq\t{%1, %0|%0, %1}
movdq2q\t{%1, %0|%0, %1}"
[(set_attr "type" "ssecvt")
(set_attr "mode" "TI")])
+(define_insn "sse2_movdq2q_rex64"
+ [(set (match_operand:DI 0 "nonimmediate_operand" "=m,y,r")
+ (vec_select:DI (match_operand:V2DI 1 "register_operand" "x,x,x")
+ (parallel [(const_int 0)])))]
+ "TARGET_SSE2 && TARGET_64BIT"
+ "@
+ movq\t{%1, %0|%0, %1}
+ movdq2q\t{%1, %0|%0, %1}
+ movd\t{%1, %0|%0, %1}"
+ [(set_attr "type" "ssecvt")
+ (set_attr "mode" "TI")])
+
(define_insn "sse2_movq2dq"
[(set (match_operand:V2DI 0 "register_operand" "=x,?x")
(vec_concat:V2DI (match_operand:DI 1 "nonimmediate_operand" "m,y")
(const_int 0)))]
- "TARGET_SSE2"
+ "TARGET_SSE2 && !TARGET_64BIT"
"@
movq\t{%1, %0|%0, %1}
movq2dq\t{%1, %0|%0, %1}"
[(set_attr "type" "ssecvt,ssemov")
(set_attr "mode" "TI")])
+(define_insn "sse2_movq2dq_rex64"
+ [(set (match_operand:V2DI 0 "register_operand" "=x,?x,?x")
+ (vec_concat:V2DI (match_operand:DI 1 "nonimmediate_operand" "m,y,r")
+ (const_int 0)))]
+ "TARGET_SSE2 && TARGET_64BIT"
+ "@
+ movq\t{%1, %0|%0, %1}
+ movq2dq\t{%1, %0|%0, %1}
+ movd\t{%1, %0|%0, %1}"
+ [(set_attr "type" "ssecvt,ssemov,ssecvt")
+ (set_attr "mode" "TI")])
+
(define_insn "sse2_movq"
[(set (match_operand:V2DI 0 "register_operand" "=x")
(vec_concat:V2DI (vec_select:DI
@@ -21673,7 +21934,7 @@
(define_insn "sse2_loadd"
[(set (match_operand:V4SI 0 "register_operand" "=x")
(vec_merge:V4SI
- (vec_duplicate:V4HI (match_operand:SI 1 "nonimmediate_operand" "mr"))
+ (vec_duplicate:V4SI (match_operand:SI 1 "nonimmediate_operand" "mr"))
(const_vector:V4SI [(const_int 0)
(const_int 0)
(const_int 0)
@@ -21716,11 +21977,21 @@
[(set_attr "type" "ssecvt")
(set_attr "mode" "V2DF")])
-(define_insn "sse2_loadsd"
+(define_expand "sse2_loadsd"
+ [(match_operand:V2DF 0 "register_operand" "")
+ (match_operand:DF 1 "memory_operand" "")]
+ "TARGET_SSE2"
+{
+ emit_insn (gen_sse2_loadsd_1 (operands[0], operands[1],
+ CONST0_RTX (V2DFmode)));
+ DONE;
+})
+
+(define_insn "sse2_loadsd_1"
[(set (match_operand:V2DF 0 "register_operand" "=x")
(vec_merge:V2DF
- (match_operand:DF 1 "memory_operand" "m")
- (vec_duplicate:DF (float:DF (const_int 0)))
+ (vec_duplicate:V2DF (match_operand:DF 1 "memory_operand" "m"))
+ (match_operand:V2DF 2 "const0_operand" "X")
(const_int 1)))]
"TARGET_SSE2"
"movsd\t{%1, %0|%0, %1}"