aboutsummaryrefslogtreecommitdiff
path: root/gcc/config/i386/i386.md
diff options
context:
space:
mode:
Diffstat (limited to 'gcc/config/i386/i386.md')
-rw-r--r--gcc/config/i386/i386.md1915
1 files changed, 1892 insertions, 23 deletions
diff --git a/gcc/config/i386/i386.md b/gcc/config/i386/i386.md
index 72fd72088c0..9bcd4e4219c 100644
--- a/gcc/config/i386/i386.md
+++ b/gcc/config/i386/i386.md
@@ -71,7 +71,25 @@
;; 9 This is an `fnstsw' operation.
;; 10 This is a `sahf' operation.
;; 11 This is a `fstcw' operation
-;;
+
+;; For SSE/MMX support:
+;; 30 This is `fix', guaranteed to be truncating.
+;; 31 This is a `emms' operation.
+;; 32 This is a `maskmov' operation.
+;; 33 This is a `movmsk' operation.
+;; 34 This is a `non-temporal' move.
+;; 35 This is a `prefetch' operation.
+;; 36 This is used to distinguish COMISS from UCOMISS.
+;; 37 This is a `ldmxcsr' operation.
+;; 38 This is a forced `movaps' instruction (rather than whatever movti does)
+;; 39 This is a forced `movups' instruction (rather than whatever movti does)
+;; 40 This is a `stmxcsr' operation.
+;; 41 This is a `shuffle' operation.
+;; 42 This is a `rcp' operation.
+;; 43 This is a `rsqsrt' operation.
+;; 44 This is a `sfence' operation.
+;; 45 This is a noop to prevent excessive combiner cleverness.
+
;; Insns whose names begin with "x86_" are emitted by gen_FOO calls
;; from i386.c.
@@ -84,7 +102,7 @@
;; A basic instruction type. Refinements due to arguments to be
;; provided in other attributes.
(define_attr "type"
- "other,multi,alu1,negnot,alu,icmp,test,imov,imovx,lea,incdec,ishift,imul,idiv,ibr,setcc,push,pop,call,callv,icmov,fmov,fop,fop1,fsgn,fmul,fdiv,fpspc,fcmov,fcmp,fxch,str,cld"
+ "other,multi,alu1,negnot,alu,icmp,test,imov,imovx,lea,incdec,ishift,imul,idiv,ibr,setcc,push,pop,call,callv,icmov,fmov,fop,fop1,fsgn,fmul,fdiv,fpspc,fcmov,fcmp,fxch,str,cld,sse,mmx"
(const_string "other"))
;; Main data type used by the insn
@@ -234,7 +252,7 @@
(const_string "store")
(match_operand 1 "memory_operand" "")
(const_string "load")
- (and (eq_attr "type" "!icmp,test,alu1,negnot,fop1,fsgn,imov,imovx,fmov,fcmp")
+ (and (eq_attr "type" "!icmp,test,alu1,negnot,fop1,fsgn,imov,imovx,fmov,fcmp,sse,mmx")
(match_operand 2 "memory_operand" ""))
(const_string "load")
(and (eq_attr "type" "icmov")
@@ -1530,15 +1548,19 @@
(set_attr "length_immediate" "1")])
(define_insn "*movsi_1"
- [(set (match_operand:SI 0 "nonimmediate_operand" "=*a,r,*a,m")
- (match_operand:SI 1 "general_operand" "im,rinm,rinm,rin"))]
+ [(set (match_operand:SI 0 "nonimmediate_operand" "=*a,r,*a,m,!*y,!r")
+ (match_operand:SI 1 "general_operand" "im,rinm,rinm,rin,r,*y"))]
"GET_CODE (operands[0]) != MEM || GET_CODE (operands[1]) != MEM"
"*
{
switch (get_attr_type (insn))
{
+ case TYPE_MMX:
+ return \"movd\\t{%1, %0|%0, %1}\";
+
case TYPE_LEA:
return \"lea{l}\\t{%1, %0|%0, %1}\";
+
default:
if (flag_pic && SYMBOLIC_CONST (operands[1]))
abort();
@@ -1546,12 +1568,15 @@
}
}"
[(set (attr "type")
- (cond [(and (ne (symbol_ref "flag_pic") (const_int 0))
+ (cond [(ior (match_operand:SI 0 "mmx_reg_operand" "")
+ (match_operand:SI 1 "mmx_reg_operand" ""))
+ (const_string "mmx")
+ (and (ne (symbol_ref "flag_pic") (const_int 0))
(match_operand:SI 1 "symbolic_operand" ""))
(const_string "lea")
]
(const_string "imov")))
- (set_attr "modrm" "0,*,0,*")
+ (set_attr "modrm" "0,*,0,*,*,*")
(set_attr "mode" "SI")])
(define_insn "*swapsi"
@@ -1692,7 +1717,7 @@
(set_attr "mode" "HI")])
(define_insn "*movstricthi_xor"
- [(set (strict_low_part (match_operand:HI 0 "register_operand" "=r"))
+ [(set (strict_low_part (match_operand:HI 0 "register_operand" "+r"))
(match_operand:HI 1 "const0_operand" "i"))
(clobber (reg:CC 17))]
"reload_completed && (!TARGET_USE_MOV0 || optimize_size)"
@@ -1983,15 +2008,20 @@
"#")
(define_insn "*movdi_2"
- [(set (match_operand:DI 0 "nonimmediate_operand" "=r,o")
- (match_operand:DI 1 "general_operand" "riFo,riF"))]
+ [(set (match_operand:DI 0 "nonimmediate_operand" "=r,o,!m*y,!*y")
+ (match_operand:DI 1 "general_operand" "riFo,riF,*y,m"))]
"GET_CODE (operands[0]) != MEM || GET_CODE (operands[1]) != MEM"
- "#")
+ "@
+ #
+ #
+ movq\\t{%1, %0|%0, %1}
+ movq\\t{%1, %0|%0, %1}"
+ [(set_attr "type" "*,*,mmx,mmx")])
(define_split
[(set (match_operand:DI 0 "push_operand" "")
(match_operand:DI 1 "general_operand" ""))]
- "reload_completed"
+ "reload_completed && ! MMX_REG_P (operands[1])"
[(const_int 0)]
"if (!ix86_split_long_move (operands)) abort (); DONE;")
@@ -1999,7 +2029,7 @@
(define_split
[(set (match_operand:DI 0 "nonimmediate_operand" "")
(match_operand:DI 1 "general_operand" ""))]
- "reload_completed"
+ "reload_completed && ! MMX_REG_P (operands[0]) && ! MMX_REG_P (operands[1])"
[(set (match_dup 2) (match_dup 5))
(set (match_dup 3) (match_dup 6))]
"if (ix86_split_long_move (operands)) DONE;")
@@ -3967,10 +3997,55 @@
(set_attr "mode" "SI")])
(define_insn "*addsi_3"
+ [(set (reg 17)
+ (compare (neg:SI (match_operand:SI 2 "general_operand" "rmni"))
+ (match_operand:SI 1 "nonimmediate_operand" "%0")))
+ (clobber (match_scratch:SI 0 "=r"))]
+ "ix86_match_ccmode (insn, CCNOmode)
+ && (GET_CODE (operands[1]) != MEM || GET_CODE (operands[2]) != MEM)
+ /* Current assemblers are broken and do not allow @GOTOFF in
+ ought but a memory context. */
+ && ! pic_symbolic_operand (operands[2], VOIDmode)"
+ "*
+{
+ switch (get_attr_type (insn))
+ {
+ case TYPE_INCDEC:
+ if (! rtx_equal_p (operands[0], operands[1]))
+ abort ();
+ if (operands[2] == const1_rtx)
+ return \"inc{l}\\t%0\";
+ else if (operands[2] == constm1_rtx)
+ return \"dec{l}\\t%0\";
+ else
+ abort();
+
+ default:
+ if (! rtx_equal_p (operands[0], operands[1]))
+ abort ();
+ /* Make things pretty and `subl $4,%eax' rather than `addl $-4, %eax'.
+ Exceptions: -128 encodes smaller than 128, so swap sign and op. */
+ if (GET_CODE (operands[2]) == CONST_INT
+ && (INTVAL (operands[2]) == 128
+ || (INTVAL (operands[2]) < 0
+ && INTVAL (operands[2]) != -128)))
+ {
+ operands[2] = GEN_INT (-INTVAL (operands[2]));
+ return \"sub{l}\\t{%2, %0|%0, %2}\";
+ }
+ return \"add{l}\\t{%2, %0|%0, %2}\";
+ }
+}"
+ [(set (attr "type")
+ (if_then_else (match_operand:SI 2 "incdec_operand" "")
+ (const_string "incdec")
+ (const_string "alu")))
+ (set_attr "mode" "SI")])
+
+(define_insn "*addsi_4"
[(set (reg:CC 17)
- (compare:CC (plus:SI (match_operand:SI 1 "nonimmediate_operand" "%0,0")
- (match_operand:SI 2 "general_operand" "rmni,rni"))
- (const_int 0)))
+ (compare:CC (neg:SI (match_operand:SI 2 "general_operand" "rmni,rni"))
+ (match_operand:SI 1 "nonimmediate_operand" "%0,0")))
(set (match_operand:SI 0 "nonimmediate_operand" "=r,rm")
(plus:SI (match_dup 1) (match_dup 2)))]
"ix86_binary_operator_ok (PLUS, SImode, operands)
@@ -3981,6 +4056,19 @@
[(set_attr "type" "alu")
(set_attr "mode" "SI")])
+(define_insn "*addsi_5"
+ [(set (reg:CC 17)
+ (compare:CC (neg:SI (match_operand:SI 2 "general_operand" "rmni"))
+ (match_operand:SI 1 "nonimmediate_operand" "%0")))
+ (clobber (match_scratch:SI 0 "=r"))]
+ "(GET_CODE (operands[1]) != MEM || GET_CODE (operands[2]) != MEM)
+ /* Current assemblers are broken and do not allow @GOTOFF in
+ ought but a memory context. */
+ && ! pic_symbolic_operand (operands[2], VOIDmode)"
+ "add{l}\\t{%2, %0|%0, %2}"
+ [(set_attr "type" "alu")
+ (set_attr "mode" "SI")])
+
(define_expand "addhi3"
[(parallel [(set (match_operand:HI 0 "nonimmediate_operand" "")
(plus:HI (match_operand:HI 1 "nonimmediate_operand" "")
@@ -4121,10 +4209,49 @@
(set_attr "mode" "HI")])
(define_insn "*addhi_3"
+ [(set (reg 17)
+ (compare (neg:HI (match_operand:HI 2 "general_operand" "rmni"))
+ (match_operand:HI 1 "nonimmediate_operand" "%0")))
+ (clobber (match_scratch:HI 0 "=r"))]
+ "ix86_match_ccmode (insn, CCNOmode)
+ && (GET_CODE (operands[1]) != MEM || GET_CODE (operands[2]) != MEM)"
+ "*
+{
+ switch (get_attr_type (insn))
+ {
+ case TYPE_INCDEC:
+ if (operands[2] == const1_rtx)
+ return \"inc{w}\\t%0\";
+ else if (operands[2] == constm1_rtx
+ || (GET_CODE (operands[2]) == CONST_INT
+ && INTVAL (operands[2]) == 65535))
+ return \"dec{w}\\t%0\";
+ abort();
+
+ default:
+ /* Make things pretty and `subl $4,%eax' rather than `addl $-4, %eax'.
+ Exceptions: -128 encodes smaller than 128, so swap sign and op. */
+ if (GET_CODE (operands[2]) == CONST_INT
+ && (INTVAL (operands[2]) == 128
+ || (INTVAL (operands[2]) < 0
+ && INTVAL (operands[2]) != -128)))
+ {
+ operands[2] = GEN_INT (-INTVAL (operands[2]));
+ return \"sub{w}\\t{%2, %0|%0, %2}\";
+ }
+ return \"add{w}\\t{%2, %0|%0, %2}\";
+ }
+}"
+ [(set (attr "type")
+ (if_then_else (match_operand:HI 2 "incdec_operand" "")
+ (const_string "incdec")
+ (const_string "alu")))
+ (set_attr "mode" "HI")])
+
+(define_insn "*addhi_4"
[(set (reg:CC 17)
- (compare:CC (plus:HI (match_operand:HI 1 "nonimmediate_operand" "%0,0")
- (match_operand:HI 2 "general_operand" "rmni,rni"))
- (const_int 0)))
+ (compare:CC (neg:HI (match_operand:HI 2 "general_operand" "rmni,rni"))
+ (match_operand:HI 1 "nonimmediate_operand" "%0,0")))
(set (match_operand:HI 0 "nonimmediate_operand" "=r,rm")
(plus:HI (match_dup 1) (match_dup 2)))]
"ix86_binary_operator_ok (PLUS, HImode, operands)"
@@ -4132,6 +4259,16 @@
[(set_attr "type" "alu")
(set_attr "mode" "HI")])
+(define_insn "*addhi_5"
+ [(set (reg:CC 17)
+ (compare:CC (neg:HI (match_operand:HI 2 "general_operand" "rmni"))
+ (match_operand:HI 1 "nonimmediate_operand" "%0")))
+ (clobber (match_scratch:HI 0 "=r"))]
+ "(GET_CODE (operands[1]) != MEM || GET_CODE (operands[2]) != MEM)"
+ "add{w}\\t{%2, %0|%0, %2}"
+ [(set_attr "type" "alu")
+ (set_attr "mode" "HI")])
+
(define_expand "addqi3"
[(parallel [(set (match_operand:QI 0 "nonimmediate_operand" "")
(plus:QI (match_operand:QI 1 "nonimmediate_operand" "")
@@ -4280,10 +4417,46 @@
(set_attr "mode" "QI")])
(define_insn "*addqi_3"
+ [(set (reg 17)
+ (compare (neg:QI (match_operand:QI 2 "general_operand" "qmni"))
+ (match_operand:QI 1 "nonimmediate_operand" "%0")))
+ (clobber (match_scratch:QI 0 "=r"))]
+ "ix86_match_ccmode (insn, CCNOmode)
+ && (GET_CODE (operands[1]) != MEM || GET_CODE (operands[2]) != MEM)"
+ "*
+{
+ switch (get_attr_type (insn))
+ {
+ case TYPE_INCDEC:
+ if (operands[2] == const1_rtx)
+ return \"inc{b}\\t%0\";
+ else if (operands[2] == constm1_rtx
+ || (GET_CODE (operands[2]) == CONST_INT
+ && INTVAL (operands[2]) == 255))
+ return \"dec{b}\\t%0\";
+ abort();
+
+ default:
+ /* Make things pretty and `subb $4,%al' rather than `addb $-4, %al'. */
+ if (GET_CODE (operands[2]) == CONST_INT
+ && INTVAL (operands[2]) < 0)
+ {
+ operands[2] = GEN_INT (-INTVAL (operands[2]));
+ return \"sub{b}\\t{%2, %0|%0, %2}\";
+ }
+ return \"add{b}\\t{%2, %0|%0, %2}\";
+ }
+}"
+ [(set (attr "type")
+ (if_then_else (match_operand:QI 2 "incdec_operand" "")
+ (const_string "incdec")
+ (const_string "alu")))
+ (set_attr "mode" "QI")])
+
+(define_insn "*addqi_4"
[(set (reg:CC 17)
- (compare:CC (plus:QI (match_operand:QI 1 "nonimmediate_operand" "%0,0")
- (match_operand:QI 2 "general_operand" "qmni,qni"))
- (const_int 0)))
+ (compare:CC (neg:QI (match_operand:QI 2 "general_operand" "qmni,qni"))
+ (match_operand:QI 1 "nonimmediate_operand" "%0,0")))
(set (match_operand:QI 0 "nonimmediate_operand" "=q,qm")
(plus:QI (match_dup 1) (match_dup 2)))]
"ix86_binary_operator_ok (PLUS, QImode, operands)"
@@ -4291,6 +4464,16 @@
[(set_attr "type" "alu")
(set_attr "mode" "QI")])
+(define_insn "*addqi_5"
+ [(set (reg:CC 17)
+ (compare:CC (neg:QI (match_operand:QI 2 "general_operand" "qmni"))
+ (match_operand:QI 1 "nonimmediate_operand" "%0")))
+ (clobber (match_scratch:QI 0 "=r"))]
+ "(GET_CODE (operands[1]) != MEM || GET_CODE (operands[2]) != MEM)"
+ "add{b}\\t{%2, %0|%0, %2}"
+ [(set_attr "type" "alu")
+ (set_attr "mode" "QI")])
+
(define_insn "addqi_ext_1"
[(set (zero_extract:SI (match_operand 0 "ext_register_operand" "=q")
@@ -4448,6 +4631,18 @@
(const_int 0)))
(set (match_operand:SI 0 "nonimmediate_operand" "=rm,r")
(minus:SI (match_dup 1) (match_dup 2)))]
+ "ix86_match_ccmode (insn, CCNOmode)
+ && ix86_binary_operator_ok (MINUS, SImode, operands)"
+ "sub{l}\\t{%2, %0|%0, %2}"
+ [(set_attr "type" "alu")
+ (set_attr "mode" "SI")])
+
+(define_insn "*subsi_3"
+ [(set (reg 17)
+ (compare (match_operand:SI 1 "nonimmediate_operand" "0,0")
+ (match_operand:SI 2 "general_operand" "ri,rm")))
+ (set (match_operand:SI 0 "nonimmediate_operand" "=rm,r")
+ (minus:SI (match_dup 1) (match_dup 2)))]
"ix86_match_ccmode (insn, CCmode)
&& ix86_binary_operator_ok (MINUS, SImode, operands)"
"sub{l}\\t{%2, %0|%0, %2}"
@@ -4480,6 +4675,18 @@
(const_int 0)))
(set (match_operand:HI 0 "nonimmediate_operand" "=rm,r")
(minus:HI (match_dup 1) (match_dup 2)))]
+ "ix86_match_ccmode (insn, CCNOmode)
+ && ix86_binary_operator_ok (MINUS, HImode, operands)"
+ "sub{w}\\t{%2, %0|%0, %2}"
+ [(set_attr "type" "alu")
+ (set_attr "mode" "HI")])
+
+(define_insn "*subhi_3"
+ [(set (reg 17)
+ (compare (match_operand:HI 1 "nonimmediate_operand" "0,0")
+ (match_operand:HI 2 "general_operand" "ri,rm")))
+ (set (match_operand:HI 0 "nonimmediate_operand" "=rm,r")
+ (minus:HI (match_dup 1) (match_dup 2)))]
"ix86_match_ccmode (insn, CCmode)
&& ix86_binary_operator_ok (MINUS, HImode, operands)"
"sub{w}\\t{%2, %0|%0, %2}"
@@ -4512,6 +4719,18 @@
(const_int 0)))
(set (match_operand:HI 0 "nonimmediate_operand" "=qm,q")
(minus:HI (match_dup 1) (match_dup 2)))]
+ "ix86_match_ccmode (insn, CCNOmode)
+ && ix86_binary_operator_ok (MINUS, QImode, operands)"
+ "sub{b}\\t{%2, %0|%0, %2}"
+ [(set_attr "type" "alu")
+ (set_attr "mode" "QI")])
+
+(define_insn "*subqi_3"
+ [(set (reg 17)
+ (compare (match_operand:QI 1 "nonimmediate_operand" "0,0")
+ (match_operand:QI 2 "general_operand" "qi,qm")))
+ (set (match_operand:HI 0 "nonimmediate_operand" "=qm,q")
+ (minus:HI (match_dup 1) (match_dup 2)))]
"ix86_match_ccmode (insn, CCmode)
&& ix86_binary_operator_ok (MINUS, QImode, operands)"
"sub{b}\\t{%2, %0|%0, %2}"
@@ -5461,6 +5680,18 @@
[(set_attr "type" "alu")
(set_attr "mode" "SI")])
+(define_insn "*iorsi_3"
+ [(set (reg 17)
+ (compare (ior:SI (match_operand:SI 1 "nonimmediate_operand" "%0")
+ (match_operand:SI 2 "general_operand" "rim"))
+ (const_int 0)))
+ (clobber (match_scratch:SI 0 "=r"))]
+ "ix86_match_ccmode (insn, CCNOmode)
+ && (GET_CODE (operands[1]) != MEM || GET_CODE (operands[2]) != MEM)"
+ "or{l}\\t{%2, %0|%0, %2}"
+ [(set_attr "type" "alu")
+ (set_attr "mode" "SI")])
+
(define_expand "iorhi3"
[(set (match_operand:HI 0 "nonimmediate_operand" "")
(ior:HI (match_operand:HI 1 "nonimmediate_operand" "")
@@ -5492,6 +5723,18 @@
[(set_attr "type" "alu")
(set_attr "mode" "HI")])
+(define_insn "*iorhi_3"
+ [(set (reg 17)
+ (compare (ior:HI (match_operand:HI 1 "nonimmediate_operand" "%0")
+ (match_operand:HI 2 "general_operand" "rim"))
+ (const_int 0)))
+ (clobber (match_scratch:HI 0 "=r"))]
+ "ix86_match_ccmode (insn, CCNOmode)
+ && (GET_CODE (operands[1]) != MEM || GET_CODE (operands[2]) != MEM)"
+ "or{w}\\t{%2, %0|%0, %2}"
+ [(set_attr "type" "alu")
+ (set_attr "mode" "HI")])
+
(define_expand "iorqi3"
[(set (match_operand:QI 0 "nonimmediate_operand" "")
(ior:QI (match_operand:QI 1 "nonimmediate_operand" "")
@@ -5526,6 +5769,19 @@
"or{b}\\t{%2, %0|%0, %2}"
[(set_attr "type" "alu")
(set_attr "mode" "QI")])
+
+(define_insn "*iorqi_3"
+ [(set (reg 17)
+ (compare (ior:QI (match_operand:QI 1 "nonimmediate_operand" "%0")
+ (match_operand:QI 2 "general_operand" "qim"))
+ (const_int 0)))
+ (clobber (match_scratch:QI 0 "=r"))]
+ "ix86_match_ccmode (insn, CCNOmode)
+ && (GET_CODE (operands[1]) != MEM || GET_CODE (operands[2]) != MEM)"
+ "or{b}\\t{%2, %0|%0, %2}"
+ [(set_attr "type" "alu")
+ (set_attr "mode" "QI")])
+
;; Logical XOR instructions
@@ -5563,6 +5819,18 @@
[(set_attr "type" "alu")
(set_attr "mode" "SI")])
+(define_insn "*xorsi_3"
+ [(set (reg 17)
+ (compare (xor:SI (match_operand:SI 1 "nonimmediate_operand" "%0")
+ (match_operand:SI 2 "general_operand" "rim"))
+ (const_int 0)))
+ (clobber (match_scratch:SI 0 "=r"))]
+ "ix86_match_ccmode (insn, CCNOmode)
+ && (GET_CODE (operands[1]) != MEM || GET_CODE (operands[2]) != MEM)"
+ "xor{l}\\t{%2, %0|%0, %2}"
+ [(set_attr "type" "alu")
+ (set_attr "mode" "SI")])
+
(define_expand "xorhi3"
[(set (match_operand:HI 0 "nonimmediate_operand" "")
(xor:HI (match_operand:HI 1 "nonimmediate_operand" "")
@@ -5594,6 +5862,18 @@
[(set_attr "type" "alu")
(set_attr "mode" "HI")])
+(define_insn "*xorhi_3"
+ [(set (reg 17)
+ (compare (xor:HI (match_operand:HI 1 "nonimmediate_operand" "%0")
+ (match_operand:HI 2 "general_operand" "rim"))
+ (const_int 0)))
+ (clobber (match_scratch:HI 0 "=r"))]
+ "ix86_match_ccmode (insn, CCNOmode)
+ && (GET_CODE (operands[1]) != MEM || GET_CODE (operands[2]) != MEM)"
+ "xor{w}\\t{%2, %0|%0, %2}"
+ [(set_attr "type" "alu")
+ (set_attr "mode" "HI")])
+
(define_expand "xorqi3"
[(set (match_operand:QI 0 "nonimmediate_operand" "")
(xor:QI (match_operand:QI 1 "nonimmediate_operand" "")
@@ -5648,6 +5928,19 @@
[(set_attr "type" "alu")
(set_attr "mode" "QI")])
+(define_insn "*xorqi_cc_2"
+ [(set (reg 17)
+ (compare
+ (xor:QI (match_operand:QI 1 "nonimmediate_operand" "%0")
+ (match_operand:QI 2 "general_operand" "qim"))
+ (const_int 0)))
+ (clobber (match_scratch:QI 0 "=r"))]
+ "ix86_match_ccmode (insn, CCNOmode)
+ && (GET_CODE (operands[1]) != MEM || GET_CODE (operands[2]) != MEM)"
+ "xor{b}\\t{%2, %0|%0, %2}"
+ [(set_attr "type" "alu")
+ (set_attr "mode" "QI")])
+
(define_insn "xorqi_cc_ext_1"
[(set (reg:CCNO 17)
(compare:CCNO
@@ -7864,7 +8157,7 @@
[(set_attr "type" "setcc")
(set_attr "mode" "QI")])
-(define_insn "*setcc_4"
+(define_insn "setcc_4"
[(set (strict_low_part (match_operand:QI 0 "nonimmediate_operand" "+qm"))
(match_operator:QI 1 "uno_comparison_operator"
[(reg:CC 17) (const_int 0)]))]
@@ -11170,3 +11463,1579 @@
CODE_LABEL_NUMBER (operands[2]));
RET;
}")
+
+ ;; Pentium III SIMD instructions.
+
+;; Moves for SSE/MMX regs.
+
+(define_insn "movv4sf_internal"
+ [(set (match_operand:V4SF 0 "nonimmediate_operand" "=x,m")
+ (match_operand:V4SF 1 "general_operand" "xm,x"))]
+ "TARGET_SSE"
+ ;; @@@ let's try to use movaps here.
+ "movaps\\t{%1, %0|%0, %1}"
+ [(set_attr "type" "sse")])
+
+(define_insn "movv4si_internal"
+ [(set (match_operand:V4SI 0 "nonimmediate_operand" "=x,m")
+ (match_operand:V4SI 1 "general_operand" "xm,x"))]
+ "TARGET_SSE"
+ ;; @@@ let's try to use movaps here.
+ "movaps\\t{%1, %0|%0, %1}"
+ [(set_attr "type" "sse")])
+
+(define_insn "movv8qi_internal"
+ [(set (match_operand:V8QI 0 "nonimmediate_operand" "=y,m")
+ (match_operand:V8QI 1 "general_operand" "ym,y"))]
+ "TARGET_MMX"
+ "movq\\t{%1, %0|%0, %1}"
+ [(set_attr "type" "mmx")])
+
+(define_insn "movv4hi_internal"
+ [(set (match_operand:V4HI 0 "nonimmediate_operand" "=y,m")
+ (match_operand:V4HI 1 "general_operand" "ym,y"))]
+ "TARGET_MMX"
+ "movq\\t{%1, %0|%0, %1}"
+ [(set_attr "type" "mmx")])
+
+(define_insn "movv2si_internal"
+ [(set (match_operand:V2SI 0 "nonimmediate_operand" "=y,m")
+ (match_operand:V2SI 1 "general_operand" "ym,y"))]
+ "TARGET_MMX"
+ "movq\\t{%1, %0|%0, %1}"
+ [(set_attr "type" "mmx")])
+
+(define_expand "movti"
+ [(set (match_operand:TI 0 "general_operand" "")
+ (match_operand:TI 1 "general_operand" ""))]
+ "TARGET_SSE"
+ "
+{
+ /* For constants other than zero into memory. We do not know how the
+ instructions used to build constants modify the upper 64 bits
+ of the register, once we have that information we may be able
+ to handle some of them more efficiently. */
+ if ((reload_in_progress | reload_completed) == 0
+ && register_operand (operands[0], TImode)
+ && CONSTANT_P (operands[1]))
+ {
+ rtx addr = gen_reg_rtx (Pmode);
+
+ emit_move_insn (addr, XEXP (force_const_mem (TImode, operands[1]), 0));
+ operands[1] = gen_rtx_MEM (TImode, addr);
+ }
+
+ /* Make operand1 a register if it isn't already. */
+ if ((reload_in_progress | reload_completed) == 0
+ && !register_operand (operands[0], TImode)
+ && !register_operand (operands[1], TImode)
+ && operands[1] != CONST0_RTX (TImode))
+ {
+ rtx temp = force_reg (TImode, operands[1]);
+ emit_move_insn (operands[0], temp);
+ DONE;
+ }
+}")
+
+(define_expand "movv4sf"
+ [(set (match_operand:V4SF 0 "general_operand" "")
+ (match_operand:V4SF 1 "general_operand" ""))]
+ "TARGET_SSE"
+ "
+{
+ /* For constants other than zero into memory. We do not know how the
+ instructions used to build constants modify the upper 64 bits
+ of the register, once we have that information we may be able
+ to handle some of them more efficiently. */
+ if ((reload_in_progress | reload_completed) == 0
+ && register_operand (operands[0], V4SFmode)
+ && CONSTANT_P (operands[1]))
+ {
+ rtx addr = gen_reg_rtx (Pmode);
+
+ emit_move_insn (addr, XEXP (force_const_mem (V4SFmode, operands[1]), 0));
+ operands[1] = gen_rtx_MEM (V4SFmode, addr);
+ }
+
+ /* Make operand1 a register if it isn't already. */
+ if ((reload_in_progress | reload_completed) == 0
+ && !register_operand (operands[0], V4SFmode)
+ && !register_operand (operands[1], V4SFmode)
+ && operands[1] != CONST0_RTX (V4SFmode))
+ {
+ rtx temp = force_reg (V4SFmode, operands[1]);
+ emit_move_insn (operands[0], temp);
+ DONE;
+ }
+}")
+
+(define_expand "movv4si"
+ [(set (match_operand:V4SI 0 "general_operand" "")
+ (match_operand:V4SI 1 "general_operand" ""))]
+ "TARGET_MMX"
+ "
+{
+ /* For constants other than zero into memory. We do not know how the
+ instructions used to build constants modify the upper 64 bits
+ of the register, once we have that information we may be able
+ to handle some of them more efficiently. */
+ if ((reload_in_progress | reload_completed) == 0
+ && register_operand (operands[0], V4SImode)
+ && CONSTANT_P (operands[1]))
+ {
+ rtx addr = gen_reg_rtx (Pmode);
+
+ emit_move_insn (addr, XEXP (force_const_mem (V4SImode, operands[1]), 0));
+ operands[1] = gen_rtx_MEM (V4SImode, addr);
+ }
+
+ /* Make operand1 a register if it isn't already. */
+ if ((reload_in_progress | reload_completed) == 0
+ && !register_operand (operands[0], V4SImode)
+ && !register_operand (operands[1], V4SImode)
+ && operands[1] != CONST0_RTX (V4SImode))
+ {
+ rtx temp = force_reg (V4SImode, operands[1]);
+ emit_move_insn (operands[0], temp);
+ DONE;
+ }
+}")
+
+(define_expand "movv2si"
+ [(set (match_operand:V2SI 0 "general_operand" "")
+ (match_operand:V2SI 1 "general_operand" ""))]
+ "TARGET_MMX"
+ "
+{
+ /* For constants other than zero into memory. We do not know how the
+ instructions used to build constants modify the upper 64 bits
+ of the register, once we have that information we may be able
+ to handle some of them more efficiently. */
+ if ((reload_in_progress | reload_completed) == 0
+ && register_operand (operands[0], V2SImode)
+ && CONSTANT_P (operands[1]))
+ {
+ rtx addr = gen_reg_rtx (Pmode);
+
+ emit_move_insn (addr, XEXP (force_const_mem (V2SImode, operands[1]), 0));
+ operands[1] = gen_rtx_MEM (V2SImode, addr);
+ }
+
+ /* Make operand1 a register if it isn't already. */
+ if ((reload_in_progress | reload_completed) == 0
+ && !register_operand (operands[0], V2SImode)
+ && !register_operand (operands[1], V2SImode)
+ && operands[1] != CONST0_RTX (V2SImode))
+ {
+ rtx temp = force_reg (V2SImode, operands[1]);
+ emit_move_insn (operands[0], temp);
+ DONE;
+ }
+}")
+
+(define_expand "movv4hi"
+ [(set (match_operand:V4HI 0 "general_operand" "")
+ (match_operand:V4HI 1 "general_operand" ""))]
+ "TARGET_MMX"
+ "
+{
+ /* For constants other than zero into memory. We do not know how the
+ instructions used to build constants modify the upper 64 bits
+ of the register, once we have that information we may be able
+ to handle some of them more efficiently. */
+ if ((reload_in_progress | reload_completed) == 0
+ && register_operand (operands[0], V4HImode)
+ && CONSTANT_P (operands[1]))
+ {
+ rtx addr = gen_reg_rtx (Pmode);
+
+ emit_move_insn (addr, XEXP (force_const_mem (V4HImode, operands[1]), 0));
+ operands[1] = gen_rtx_MEM (V4HImode, addr);
+ }
+
+ /* Make operand1 a register if it isn't already. */
+ if ((reload_in_progress | reload_completed) == 0
+ && !register_operand (operands[0], V4HImode)
+ && !register_operand (operands[1], V4HImode)
+ && operands[1] != CONST0_RTX (V4HImode))
+ {
+ rtx temp = force_reg (V4HImode, operands[1]);
+ emit_move_insn (operands[0], temp);
+ DONE;
+ }
+}")
+
+(define_expand "movv8qi"
+ [(set (match_operand:V8QI 0 "general_operand" "")
+ (match_operand:V8QI 1 "general_operand" ""))]
+ "TARGET_MMX"
+ "
+{
+ /* For constants other than zero into memory. We do not know how the
+ instructions used to build constants modify the upper 64 bits
+ of the register, once we have that information we may be able
+ to handle some of them more efficiently. */
+ if ((reload_in_progress | reload_completed) == 0
+ && register_operand (operands[0], V8QImode)
+ && CONSTANT_P (operands[1]))
+ {
+ rtx addr = gen_reg_rtx (Pmode);
+
+ emit_move_insn (addr, XEXP (force_const_mem (V8QImode, operands[1]), 0));
+ operands[1] = gen_rtx_MEM (V8QImode, addr);
+ }
+
+ /* Make operand1 a register if it isn't already. */
+ if ((reload_in_progress | reload_completed) == 0
+ && !register_operand (operands[0], V8QImode)
+ && !register_operand (operands[1], V8QImode)
+ && operands[1] != CONST0_RTX (V8QImode))
+ {
+ rtx temp = force_reg (V8QImode, operands[1]);
+ emit_move_insn (operands[0], temp);
+ DONE;
+ }
+}")
+
+(define_insn_and_split "*pushti"
+ [(set (match_operand:TI 0 "push_operand" "=<")
+ (match_operand:TI 1 "nonmemory_operand" "x"))]
+ "TARGET_SSE"
+ "#"
+ ""
+ [(set (reg:SI 7) (plus:SI (reg:SI 7) (const_int -16)))
+ (set (mem:TI (reg:SI 7)) (match_dup 1))]
+ ""
+ [(set_attr "type" "sse")])
+
+(define_insn_and_split "*pushv4sf"
+ [(set (match_operand:V4SF 0 "push_operand" "=<")
+ (match_operand:V4SF 1 "nonmemory_operand" "x"))]
+ "TARGET_SSE"
+ "#"
+ ""
+ [(set (reg:SI 7) (plus:SI (reg:SI 7) (const_int -16)))
+ (set (mem:V4SF (reg:SI 7)) (match_dup 1))]
+ ""
+ [(set_attr "type" "sse")])
+
+(define_insn_and_split "*pushv4si"
+ [(set (match_operand:V4SI 0 "push_operand" "=<")
+ (match_operand:V4SI 1 "nonmemory_operand" "x"))]
+ "TARGET_SSE"
+ "#"
+ ""
+ [(set (reg:SI 7) (plus:SI (reg:SI 7) (const_int -16)))
+ (set (mem:V4SI (reg:SI 7)) (match_dup 1))]
+ ""
+ [(set_attr "type" "sse")])
+
+(define_insn_and_split "*pushv2si"
+ [(set (match_operand:V2SI 0 "push_operand" "=<")
+ (match_operand:V2SI 1 "nonmemory_operand" "y"))]
+ "TARGET_MMX"
+ "#"
+ ""
+ [(set (reg:SI 7) (plus:SI (reg:SI 7) (const_int -8)))
+ (set (mem:V2SI (reg:SI 7)) (match_dup 1))]
+ ""
+ [(set_attr "type" "mmx")])
+
+(define_insn_and_split "*pushv4hi"
+ [(set (match_operand:V4HI 0 "push_operand" "=<")
+ (match_operand:V4HI 1 "nonmemory_operand" "y"))]
+ "TARGET_MMX"
+ "#"
+ ""
+ [(set (reg:SI 7) (plus:SI (reg:SI 7) (const_int -8)))
+ (set (mem:V4HI (reg:SI 7)) (match_dup 1))]
+ ""
+ [(set_attr "type" "mmx")])
+
+(define_insn_and_split "*pushv8qi"
+ [(set (match_operand:V8QI 0 "push_operand" "=<")
+ (match_operand:V8QI 1 "nonmemory_operand" "y"))]
+ "TARGET_MMX"
+ "#"
+ ""
+ [(set (reg:SI 7) (plus:SI (reg:SI 7) (const_int -8)))
+ (set (mem:V8QI (reg:SI 7)) (match_dup 1))]
+ ""
+ [(set_attr "type" "mmx")])
+
+(define_insn "movti_internal"
+ [(set (match_operand:TI 0 "nonimmediate_operand" "=x,m")
+ (match_operand:TI 1 "general_operand" "xm,x"))]
+ "TARGET_SSE"
+ "@
+ movaps\\t{%1, %0|%0, %1}
+ movaps\\t{%1, %0|%0, %1}"
+ [(set_attr "type" "sse")])
+
+;; These two patterns are useful for specifying exactly whether to use
+;; movaps or movups
+(define_insn "sse_movaps"
+ [(set (match_operand:V4SF 0 "nonimmediate_operand" "=x,m")
+ (unspec:V4SF [(match_operand:V4SF 1 "general_operand" "xm,x")] 38))]
+ "TARGET_SSE"
+ "@
+ movaps\\t{%1, %0|%0, %1}
+ movaps\\t{%1, %0|%0, %1}"
+ [(set_attr "type" "sse")])
+
+(define_insn "sse_movups"
+ [(set (match_operand:V4SF 0 "nonimmediate_operand" "=x,m")
+ (unspec:V4SF [(match_operand:V4SF 1 "general_operand" "xm,x")] 39))]
+ "TARGET_SSE"
+ "@
+ movups\\t{%1, %0|%0, %1}
+ movups\\t{%1, %0|%0, %1}"
+ [(set_attr "type" "sse")])
+
+
+;; SSE Strange Moves.
+
+(define_insn "sse_movmskps"
+ [(set (match_operand:SI 0 "register_operand" "=r")
+ (unspec:SI [(match_operand:V4SF 1 "register_operand" "x")] 33))]
+ "TARGET_SSE"
+ "movmskps\\t{%1, %0|%0, %1}"
+ [(set_attr "type" "sse")])
+
+(define_insn "mmx_pmovmskb"
+ [(set (match_operand:SI 0 "register_operand" "=r")
+ (unspec:SI [(match_operand:V8QI 1 "register_operand" "y")] 33))]
+ "TARGET_SSE"
+ "pmovmskb\\t{%1, %0|%0, %1}"
+ [(set_attr "type" "sse")])
+
+(define_insn "mmx_maskmovq"
+ [(set (mem:V8QI (match_operand:SI 0 "register_operand" "D"))
+ (unspec:V8QI [(match_operand:V8QI 1 "register_operand" "y")
+ (match_operand:V8QI 2 "register_operand" "y")] 32))]
+ "TARGET_SSE"
+ ;; @@@ check ordering of operands in intel/nonintel syntax
+ "maskmovq\\t{%2, %1|%1, %2}"
+ [(set_attr "type" "sse")])
+
+(define_insn "sse_movntv4sf"
+ [(set (match_operand:V4SF 0 "memory_operand" "=m")
+ (unspec:V4SF [(match_operand:V4SF 1 "register_operand" "x")] 34))]
+ "TARGET_SSE"
+ "movntps\\t{%1, %0|%0, %1}"
+ [(set_attr "type" "sse")])
+
+(define_insn "sse_movntdi"
+ [(set (match_operand:DI 0 "memory_operand" "=m")
+ (unspec:DI [(match_operand:DI 1 "register_operand" "x")] 34))]
+ "TARGET_SSE"
+ "movntq\\t{%1, %0|%0, %1}"
+ [(set_attr "type" "sse")])
+
+(define_insn "sse_movhlps"
+ [(set (match_operand:V4SF 0 "register_operand" "=x")
+ (vec_merge:V4SF
+ (match_operand:V4SF 1 "register_operand" "0")
+ (vec_select:V4SF (match_operand:V4SF 2 "register_operand" "x")
+ (parallel [(const_int 2)
+ (const_int 3)
+ (const_int 0)
+ (const_int 1)]))
+ (const_int 3)))]
+ "TARGET_SSE"
+ "movhlps\\t{%2, %0|%0, %2}"
+ [(set_attr "type" "sse")])
+
+(define_insn "sse_movlhps"
+ [(set (match_operand:V4SF 0 "register_operand" "=x")
+ (vec_merge:V4SF
+ (match_operand:V4SF 1 "register_operand" "0")
+ (vec_select:V4SF (match_operand:V4SF 2 "register_operand" "x")
+ (parallel [(const_int 2)
+ (const_int 3)
+ (const_int 0)
+ (const_int 1)]))
+ (const_int 12)))]
+ "TARGET_SSE"
+ "movlhps\\t{%2, %0|%0, %2}"
+ [(set_attr "type" "sse")])
+
+(define_insn "sse_movhps"
+ [(set (match_operand:V4SF 0 "nonimmediate_operand" "=x,m")
+ (vec_merge:V4SF
+ (match_operand:V4SF 1 "nonimmediate_operand" "0,0")
+ (match_operand:V4SF 2 "nonimmediate_operand" "m,x")
+ (const_int 12)))]
+ "TARGET_SSE && (GET_CODE (operands[1]) == MEM || GET_CODE (operands[2]) == MEM)"
+ "movhps\\t{%2, %0|%0, %2}"
+ [(set_attr "type" "sse")])
+
+(define_insn "sse_movlps"
+ [(set (match_operand:V4SF 0 "nonimmediate_operand" "=x,m")
+ (vec_merge:V4SF
+ (match_operand:V4SF 1 "nonimmediate_operand" "0,0")
+ (match_operand:V4SF 2 "nonimmediate_operand" "m,x")
+ (const_int 3)))]
+ "TARGET_SSE && (GET_CODE (operands[1]) == MEM || GET_CODE (operands[2]) == MEM)"
+ "movlps\\t{%2, %0|%0, %2}"
+ [(set_attr "type" "sse")])
+
+(define_insn "sse_loadss"
+ [(set (match_operand:V4SF 0 "register_operand" "=x")
+ (vec_merge:V4SF
+ (match_operand:V4SF 1 "memory_operand" "m")
+ (vec_duplicate:V4SF (float:SF (const_int 0)))
+ (const_int 1)))]
+ "TARGET_SSE"
+ "movss\\t{%1, %0|%0, %1}"
+ [(set_attr "type" "sse")])
+
+(define_insn "sse_movss"
+ [(set (match_operand:V4SF 0 "register_operand" "=x")
+ (vec_merge:V4SF
+ (match_operand:V4SF 1 "register_operand" "0")
+ (match_operand:V4SF 2 "register_operand" "x")
+ (const_int 1)))]
+ "TARGET_SSE"
+ "movss\\t{%2, %0|%0, %2}"
+ [(set_attr "type" "sse")])
+
+(define_insn "sse_storess"
+ [(set (match_operand:SF 0 "memory_operand" "=m")
+ (vec_select:SF
+ (match_operand:V4SF 1 "register_operand" "x")
+ (parallel [(const_int 0)])))]
+ "TARGET_SSE"
+ "movss\\t{%1, %0|%0, %1}"
+ [(set_attr "type" "sse")])
+
+(define_insn "sse_shufps"
+ [(set (match_operand:V4SF 0 "register_operand" "=x")
+ (unspec:V4SF [(match_operand:V4SF 1 "register_operand" "0")
+ (match_operand:V4SF 2 "nonimmediate_operand" "xm")
+ (match_operand:SI 3 "immediate_operand" "i")] 41))]
+ "TARGET_SSE"
+ ;; @@@ check operand order for intel/nonintel syntax
+ "shufps\\t{%3, %2, %0|%0, %2, %3}"
+ [(set_attr "type" "sse")])
+
+
+;; SSE arithmetic
+
+(define_insn "addv4sf3"
+ [(set (match_operand:V4SF 0 "register_operand" "=x")
+ (plus:V4SF (match_operand:V4SF 1 "register_operand" "0")
+ (match_operand:V4SF 2 "nonimmediate_operand" "xm")))]
+ "TARGET_SSE"
+ "addps\\t{%2, %0|%0, %2}"
+ [(set_attr "type" "sse")])
+
+(define_insn "vmaddv4sf3"
+ [(set (match_operand:V4SF 0 "register_operand" "=x")
+ (vec_merge:V4SF (plus:V4SF (match_operand:V4SF 1 "register_operand" "0")
+ (match_operand:V4SF 2 "nonimmediate_operand" "xm"))
+ (match_dup 1)
+ (const_int 1)))]
+ "TARGET_SSE"
+ "addss\\t{%2, %0|%0, %2}"
+ [(set_attr "type" "sse")])
+
+(define_insn "subv4sf3"
+ [(set (match_operand:V4SF 0 "register_operand" "=x")
+ (minus:V4SF (match_operand:V4SF 1 "register_operand" "0")
+ (match_operand:V4SF 2 "nonimmediate_operand" "xm")))]
+ "TARGET_SSE"
+ "subps\\t{%2, %0|%0, %2}"
+ [(set_attr "type" "sse")])
+
+(define_insn "vmsubv4sf3"
+ [(set (match_operand:V4SF 0 "register_operand" "=x")
+ (vec_merge:V4SF (minus:V4SF (match_operand:V4SF 1 "register_operand" "0")
+ (match_operand:V4SF 2 "nonimmediate_operand" "xm"))
+ (match_dup 1)
+ (const_int 1)))]
+ "TARGET_SSE"
+ "subss\\t{%2, %0|%0, %2}"
+ [(set_attr "type" "sse")])
+
+(define_insn "mulv4sf3"
+ [(set (match_operand:V4SF 0 "register_operand" "=x")
+ (mult:V4SF (match_operand:V4SF 1 "register_operand" "0")
+ (match_operand:V4SF 2 "nonimmediate_operand" "xm")))]
+ "TARGET_SSE"
+ "mulps\\t{%2, %0|%0, %2}"
+ [(set_attr "type" "sse")])
+
+(define_insn "vmmulv4sf3"
+ [(set (match_operand:V4SF 0 "register_operand" "=x")
+ (vec_merge:V4SF (mult:V4SF (match_operand:V4SF 1 "register_operand" "0")
+ (match_operand:V4SF 2 "nonimmediate_operand" "xm"))
+ (match_dup 1)
+ (const_int 1)))]
+ "TARGET_SSE"
+ "mulss\\t{%2, %0|%0, %2}"
+ [(set_attr "type" "sse")])
+
+(define_insn "divv4sf3"
+ [(set (match_operand:V4SF 0 "register_operand" "=x")
+ (div:V4SF (match_operand:V4SF 1 "register_operand" "0")
+ (match_operand:V4SF 2 "nonimmediate_operand" "xm")))]
+ "TARGET_SSE"
+ "divps\\t{%2, %0|%0, %2}"
+ [(set_attr "type" "sse")])
+
+(define_insn "vmdivv4sf3"
+ [(set (match_operand:V4SF 0 "register_operand" "=x")
+ (vec_merge:V4SF (div:V4SF (match_operand:V4SF 1 "register_operand" "0")
+ (match_operand:V4SF 2 "nonimmediate_operand" "xm"))
+ (match_dup 1)
+ (const_int 1)))]
+ "TARGET_SSE"
+ "divss\\t{%2, %0|%0, %2}"
+ [(set_attr "type" "sse")])
+
+
+;; SSE square root/reciprocal
+
+(define_insn "rcpv4sf2"
+ [(set (match_operand:V4SF 0 "register_operand" "=x")
+ (unspec:V4SF [(match_operand:V4SF 1 "register_operand" "xm")] 42))]
+ "TARGET_SSE"
+ "rcpps\\t{%1, %0|%0, %1}"
+ [(set_attr "type" "sse")])
+
+(define_insn "vmrcpv4sf2"
+ [(set (match_operand:V4SF 0 "register_operand" "=x")
+ (vec_merge:V4SF (unspec:V4SF [(match_operand:V4SF 1 "register_operand" "xm")] 42)
+ (match_operand:V4SF 2 "register_operand" "0")
+ (const_int 1)))]
+ "TARGET_SSE"
+ "rcpss\\t{%1, %0|%0, %1}"
+ [(set_attr "type" "sse")])
+
+(define_insn "rsqrtv4sf2"
+ [(set (match_operand:V4SF 0 "register_operand" "=x")
+ (unspec:V4SF [(match_operand:V4SF 1 "register_operand" "xm")] 43))]
+ "TARGET_SSE"
+ "rsqrtps\\t{%1, %0|%0, %1}"
+ [(set_attr "type" "sse")])
+
+(define_insn "vmrsqrtv4sf2"
+ [(set (match_operand:V4SF 0 "register_operand" "=x")
+ (vec_merge:V4SF (unspec:V4SF [(match_operand:V4SF 1 "register_operand" "xm")] 43)
+ (match_operand:V4SF 2 "register_operand" "0")
+ (const_int 1)))]
+ "TARGET_SSE"
+ "rsqrtss\\t{%1, %0|%0, %1}"
+ [(set_attr "type" "sse")])
+
+(define_insn "sqrtv4sf2"
+ [(set (match_operand:V4SF 0 "register_operand" "=x")
+ (sqrt:V4SF (match_operand:V4SF 1 "register_operand" "xm")))]
+ "TARGET_SSE"
+ "sqrtps\\t{%1, %0|%0, %1}"
+ [(set_attr "type" "sse")])
+
+(define_insn "vmsqrtv4sf2"
+ [(set (match_operand:V4SF 0 "register_operand" "=x")
+ (vec_merge:V4SF (sqrt:V4SF (match_operand:V4SF 1 "register_operand" "xm"))
+ (match_operand:V4SF 2 "register_operand" "0")
+ (const_int 1)))]
+ "TARGET_SSE"
+ "sqrtss\\t{%1, %0|%0, %1}"
+ [(set_attr "type" "sse")])
+
+
+;; SSE logical operations.
+
+;; These are not called andti3 etc. because we really really don't want
+;; the compiler to widen DImode ands to TImode ands and then try to move
+;; into DImode subregs of SSE registers, and them together, and move out
+;; of DImode subregs again!
+
+(define_insn "sse_andti3"
+ [(set (match_operand:TI 0 "register_operand" "=x")
+ (and:TI (match_operand:TI 1 "register_operand" "0")
+ (match_operand:TI 2 "nonimmediate_operand" "xm")))]
+ "TARGET_SSE"
+ "andps\\t{%2, %0|%0, %2}"
+ [(set_attr "type" "sse")])
+
+(define_insn "sse_nandti3"
+ [(set (match_operand:TI 0 "register_operand" "=x")
+ (and:TI (not:TI (match_operand:TI 1 "register_operand" "0"))
+ (match_operand:TI 2 "nonimmediate_operand" "xm")))]
+ "TARGET_SSE"
+ "andnps\\t{%2, %0|%0, %2}"
+ [(set_attr "type" "sse")])
+
+(define_insn "sse_iorti3"
+ [(set (match_operand:TI 0 "register_operand" "=x")
+ (ior:TI (match_operand:TI 1 "register_operand" "0")
+ (match_operand:TI 2 "nonimmediate_operand" "xm")))]
+ "TARGET_SSE"
+ "iorps\\t{%2, %0|%0, %2}"
+ [(set_attr "type" "sse")])
+
+(define_insn "sse_xorti3"
+ [(set (match_operand:TI 0 "register_operand" "=x")
+ (xor:TI (match_operand:TI 1 "register_operand" "0")
+ (match_operand:TI 2 "nonimmediate_operand" "xm")))]
+ "TARGET_SSE"
+ "xorps\\t{%2, %0|%0, %2}"
+ [(set_attr "type" "sse")])
+
+;; Use xor, but don't show input operands so they aren't live before
+;; this insn.
+(define_insn "sse_clrti"
+ [(set (match_operand:TI 0 "register_operand" "=x")
+ (unspec:TI [(const_int 0)] 45))]
+ "TARGET_SSE"
+ "xorps\\t{%0, %0|%0, %0}"
+ [(set_attr "type" "sse")])
+
+
+;; SSE mask-generating compares
+
+(define_insn "maskcmpv4sf3"
+ [(set (match_operand:V4SI 0 "register_operand" "=x")
+ (match_operator:V4SI 3 "sse_comparison_operator"
+ [(match_operand:V4SF 1 "register_operand" "0")
+ (match_operand:V4SF 2 "nonimmediate_operand" "x")]))]
+ "TARGET_SSE"
+ "*
+{
+ switch (GET_CODE (operands[3]))
+ {
+ case EQ:
+ return \"cmpeqps\\t{%2, %0|%0, %2}\";
+ case LT:
+ return \"cmpltps\\t{%2, %0|%0, %2}\";
+ case LE:
+ return \"cmpleps\\t{%2, %0|%0, %2}\";
+ case UNORDERED:
+ return \"cmpunordps\\t{%2, %0|%0, %2}\";
+ default:
+ abort ();
+ }
+}"
+ [(set_attr "type" "sse")])
+
+(define_insn "maskncmpv4sf3"
+ [(set (match_operand:V4SI 0 "register_operand" "=x")
+ (not:V4SI
+ (match_operator:V4SI 3 "sse_comparison_operator"
+ [(match_operand:V4SF 1 "register_operand" "0")
+ (match_operand:V4SF 2 "nonimmediate_operand" "x")])))]
+ "TARGET_SSE"
+ "*
+{
+ switch (GET_CODE (operands[3]))
+ {
+ case EQ:
+ return \"cmpneqps\\t{%2, %0|%0, %2}\";
+ case LT:
+ return \"cmpnltps\\t{%2, %0|%0, %2}\";
+ case LE:
+ return \"cmpnleps\\t{%2, %0|%0, %2}\";
+ case UNORDERED:
+ return \"cmpordps\\t{%2, %0|%0, %2}\";
+ default:
+ abort ();
+ }
+}"
+ [(set_attr "type" "sse")])
+
+(define_insn "vmmaskcmpv4sf3"
+ [(set (match_operand:V4SI 0 "register_operand" "=x")
+ (vec_merge:V4SI
+ (match_operator:V4SI 3 "sse_comparison_operator"
+ [(match_operand:V4SF 1 "register_operand" "0")
+ (match_operand:V4SF 2 "nonimmediate_operand" "x")])
+ (match_dup 1)
+ (const_int 1)))]
+ "TARGET_SSE"
+ "*
+{
+ switch (GET_CODE (operands[3]))
+ {
+ case EQ:
+ return \"cmpeqss\\t{%2, %0|%0, %2}\";
+ case LT:
+ return \"cmpltss\\t{%2, %0|%0, %2}\";
+ case LE:
+ return \"cmpless\\t{%2, %0|%0, %2}\";
+ case UNORDERED:
+ return \"cmpunordss\\t{%2, %0|%0, %2}\";
+ default:
+ abort ();
+ }
+}"
+ [(set_attr "type" "sse")])
+
+(define_insn "vmmaskncmpv4sf3"
+ [(set (match_operand:V4SI 0 "register_operand" "=x")
+ (vec_merge:V4SI
+ (not:V4SI
+ (match_operator:V4SI 3 "sse_comparison_operator"
+ [(match_operand:V4SF 1 "register_operand" "0")
+ (match_operand:V4SF 2 "nonimmediate_operand" "x")]))
+ (subreg:V4SI (match_dup 1) 0)
+ (const_int 1)))]
+ "TARGET_SSE"
+ "*
+{
+ switch (GET_CODE (operands[3]))
+ {
+ case EQ:
+ return \"cmpneqss\\t{%2, %0|%0, %2}\";
+ case LT:
+ return \"cmpnltss\\t{%2, %0|%0, %2}\";
+ case LE:
+ return \"cmpnless\\t{%2, %0|%0, %2}\";
+ case UNORDERED:
+ return \"cmpordss\\t{%2, %0|%0, %2}\";
+ default:
+ abort ();
+ }
+}"
+ [(set_attr "type" "sse")])
+
+(define_insn "sse_comi"
+ [(set (reg:CCFP 17)
+ (match_operator:CCFP 2 "sse_comparison_operator"
+ [(vec_select:SF
+ (match_operand:V4SF 0 "register_operand" "x")
+ (parallel [(const_int 0)]))
+ (vec_select:SF
+ (match_operand:V4SF 1 "register_operand" "x")
+ (parallel [(const_int 0)]))]))]
+ "TARGET_SSE"
+ "comiss\\t{%2, %0|%0, %2}"
+ [(set_attr "type" "sse")])
+
+(define_insn "sse_ucomi"
+ [(set (reg:CCFPU 17)
+ (match_operator:CCFPU 2 "sse_comparison_operator"
+ [(vec_select:SF
+ (match_operand:V4SF 0 "register_operand" "x")
+ (parallel [(const_int 0)]))
+ (vec_select:SF
+ (match_operand:V4SF 1 "register_operand" "x")
+ (parallel [(const_int 0)]))]))]
+ "TARGET_SSE"
+ "ucomiss\\t{%2, %0|%0, %2}"
+ [(set_attr "type" "sse")])
+
+
+;; SSE unpack
+
+(define_insn "sse_unpckhps"
+ [(set (match_operand:V4SF 0 "register_operand" "=x")
+ (vec_merge:V4SF
+ (vec_select:V4SF (match_operand:V4SF 1 "register_operand" "0")
+ (parallel [(const_int 2)
+ (const_int 0)
+ (const_int 3)
+ (const_int 1)]))
+ (vec_select:V8QI (match_operand:V8QI 2 "register_operand" "x")
+ (parallel [(const_int 0)
+ (const_int 2)
+ (const_int 1)
+ (const_int 3)]))
+ (const_int 5)))]
+ "TARGET_SSE"
+ "unpckhps\\t{%2, %0|%0, %2}"
+ [(set_attr "type" "sse")])
+
+(define_insn "sse_unpcklps"
+ [(set (match_operand:V4SF 0 "register_operand" "=x")
+ (vec_merge:V4SF
+ (vec_select:V4SF (match_operand:V4SF 1 "register_operand" "0")
+ (parallel [(const_int 0)
+ (const_int 2)
+ (const_int 1)
+ (const_int 3)]))
+ (vec_select:V8QI (match_operand:V8QI 2 "register_operand" "x")
+ (parallel [(const_int 2)
+ (const_int 0)
+ (const_int 3)
+ (const_int 1)]))
+ (const_int 5)))]
+ "TARGET_SSE"
+ "unpcklps\\t{%2, %0|%0, %2}"
+ [(set_attr "type" "sse")])
+
+
+;; SSE min/max
+
+(define_insn "smaxv4sf3"
+ [(set (match_operand:V4SF 0 "register_operand" "=x")
+ (smax:V4SF (match_operand:V4SF 1 "register_operand" "0")
+ (match_operand:V4SF 2 "nonimmediate_operand" "xm")))]
+ "TARGET_SSE"
+ "maxps\\t{%2, %0|%0, %2}"
+ [(set_attr "type" "sse")])
+
+(define_insn "vmsmaxv4sf3"
+ [(set (match_operand:V4SF 0 "register_operand" "=x")
+ (vec_merge:V4SF (smax:V4SF (match_operand:V4SF 1 "register_operand" "0")
+ (match_operand:V4SF 2 "nonimmediate_operand" "xm"))
+ (match_dup 1)
+ (const_int 1)))]
+ "TARGET_SSE"
+ "maxss\\t{%2, %0|%0, %2}"
+ [(set_attr "type" "sse")])
+
+(define_insn "sminv4sf3"
+ [(set (match_operand:V4SF 0 "register_operand" "=x")
+ (smin:V4SF (match_operand:V4SF 1 "register_operand" "0")
+ (match_operand:V4SF 2 "nonimmediate_operand" "xm")))]
+ "TARGET_SSE"
+ "minps\\t{%2, %0|%0, %2}"
+ [(set_attr "type" "sse")])
+
+(define_insn "vmsminv4sf3"
+ [(set (match_operand:V4SF 0 "register_operand" "=x")
+ (vec_merge:V4SF (smin:V4SF (match_operand:V4SF 1 "register_operand" "0")
+ (match_operand:V4SF 2 "nonimmediate_operand" "xm"))
+ (match_dup 1)
+ (const_int 1)))]
+ "TARGET_SSE"
+ "minss\\t{%2, %0|%0, %2}"
+ [(set_attr "type" "sse")])
+
+
+;; SSE <-> integer/MMX conversions
+
+(define_insn "cvtpi2ps"
+ [(set (match_operand:V4SF 0 "register_operand" "=x")
+ (vec_merge:V4SF (match_operand:V4SF 1 "register_operand" "0")
+ (vec_duplicate:V4SF
+ (float:V2SF (match_operand:V2SI 2 "register_operand" "ym")))
+ (const_int 12)))]
+ "TARGET_SSE"
+ "cvtpi2ps\\t{%2, %0|%0, %2}"
+ [(set_attr "type" "sse")])
+
+(define_insn "cvtps2pi"
+ [(set (match_operand:V2SI 0 "register_operand" "=y")
+ (vec_select:V2SI (fix:V4SI (match_operand:V4SF 1 "register_operand" "xm"))
+ (parallel
+ [(const_int 0)
+ (const_int 1)])))]
+ "TARGET_SSE"
+ "cvtps2pi\\t{%1, %0|%0, %1}"
+ [(set_attr "type" "sse")])
+
+(define_insn "cvttps2pi"
+ [(set (match_operand:V2SI 0 "register_operand" "=y")
+ (vec_select:V2SI (unspec:V4SI [(match_operand:V4SF 1 "register_operand" "xm")] 30)
+ (parallel
+ [(const_int 0)
+ (const_int 1)])))]
+ "TARGET_SSE"
+ "cvttps2pi\\t{%1, %0|%0, %1}"
+ [(set_attr "type" "sse")])
+
+(define_insn "cvtsi2ss"
+ [(set (match_operand:V4SF 0 "register_operand" "=x")
+ (vec_merge:V4SF (match_operand:V4SF 1 "register_operand" "0")
+ (vec_duplicate:V4SF
+ (float:SF (match_operand:SI 2 "register_operand" "rm")))
+ (const_int 15)))]
+ "TARGET_SSE"
+ "cvtsi2ss\\t{%2, %0|%0, %2}"
+ [(set_attr "type" "sse")])
+
+(define_insn "cvtss2si"
+ [(set (match_operand:SI 0 "register_operand" "=y")
+ (vec_select:SI (fix:V4SI (match_operand:V4SF 1 "register_operand" "xm"))
+ (parallel [(const_int 0)])))]
+ "TARGET_SSE"
+ "cvtss2si\\t{%1, %0|%0, %1}"
+ [(set_attr "type" "sse")])
+
+(define_insn "cvttss2si"
+ [(set (match_operand:SI 0 "register_operand" "=y")
+ (vec_select:SI (unspec:V4SI [(match_operand:V4SF 1 "register_operand" "xm")] 30)
+ (parallel [(const_int 0)])))]
+ "TARGET_SSE"
+ "cvttss2si\\t{%1, %0|%0, %1}"
+ [(set_attr "type" "sse")])
+
+
+;; MMX insns
+
+;; MMX arithmetic
+
+(define_insn "addv8qi3"
+ [(set (match_operand:V8QI 0 "register_operand" "=y")
+ (plus:V8QI (match_operand:V8QI 1 "register_operand" "0")
+ (match_operand:V8QI 2 "nonimmediate_operand" "ym")))]
+ "TARGET_MMX"
+ "paddb\\t{%2, %0|%0, %2}"
+ [(set_attr "type" "mmx")])
+
+(define_insn "addv4hi3"
+ [(set (match_operand:V4HI 0 "register_operand" "=y")
+ (plus:V4HI (match_operand:V4HI 1 "register_operand" "0")
+ (match_operand:V4HI 2 "nonimmediate_operand" "ym")))]
+ "TARGET_MMX"
+ "paddw\\t{%2, %0|%0, %2}"
+ [(set_attr "type" "mmx")])
+
+(define_insn "addv2si3"
+ [(set (match_operand:V2SI 0 "register_operand" "=y")
+ (plus:V2SI (match_operand:V2SI 1 "register_operand" "0")
+ (match_operand:V2SI 2 "nonimmediate_operand" "ym")))]
+ "TARGET_MMX"
+ "paddd\\t{%2, %0|%0, %2}"
+ [(set_attr "type" "mmx")])
+
+(define_insn "ssaddv8qi3"
+ [(set (match_operand:V8QI 0 "register_operand" "=y")
+ (ss_plus:V8QI (match_operand:V8QI 1 "register_operand" "0")
+ (match_operand:V8QI 2 "nonimmediate_operand" "ym")))]
+ "TARGET_MMX"
+ "paddsb\\t{%2, %0|%0, %2}"
+ [(set_attr "type" "mmx")])
+
+(define_insn "ssaddv4hi3"
+ [(set (match_operand:V4HI 0 "register_operand" "=y")
+ (ss_plus:V4HI (match_operand:V4HI 1 "register_operand" "0")
+ (match_operand:V4HI 2 "nonimmediate_operand" "ym")))]
+ "TARGET_MMX"
+ "paddsw\\t{%2, %0|%0, %2}"
+ [(set_attr "type" "mmx")])
+
+(define_insn "usaddv8qi3"
+ [(set (match_operand:V8QI 0 "register_operand" "=y")
+ (us_plus:V8QI (match_operand:V8QI 1 "register_operand" "0")
+ (match_operand:V8QI 2 "nonimmediate_operand" "ym")))]
+ "TARGET_MMX"
+ "paddusb\\t{%2, %0|%0, %2}"
+ [(set_attr "type" "mmx")])
+
+(define_insn "usaddv4hi3"
+ [(set (match_operand:V4HI 0 "register_operand" "=y")
+ (us_plus:V4HI (match_operand:V4HI 1 "register_operand" "0")
+ (match_operand:V4HI 2 "nonimmediate_operand" "ym")))]
+ "TARGET_MMX"
+ "paddusw\\t{%2, %0|%0, %2}"
+ [(set_attr "type" "mmx")])
+
+(define_insn "subv8qi3"
+ [(set (match_operand:V8QI 0 "register_operand" "=y")
+ (minus:V8QI (match_operand:V8QI 1 "register_operand" "0")
+ (match_operand:V8QI 2 "nonimmediate_operand" "ym")))]
+ "TARGET_MMX"
+ "psubb\\t{%2, %0|%0, %2}"
+ [(set_attr "type" "mmx")])
+
+(define_insn "subv4hi3"
+ [(set (match_operand:V4HI 0 "register_operand" "=y")
+ (minus:V4HI (match_operand:V4HI 1 "register_operand" "0")
+ (match_operand:V4HI 2 "nonimmediate_operand" "ym")))]
+ "TARGET_MMX"
+ "psubw\\t{%2, %0|%0, %2}"
+ [(set_attr "type" "mmx")])
+
+(define_insn "subv2si3"
+ [(set (match_operand:V2SI 0 "register_operand" "=y")
+ (minus:V2SI (match_operand:V2SI 1 "register_operand" "0")
+ (match_operand:V2SI 2 "nonimmediate_operand" "ym")))]
+ "TARGET_MMX"
+ "psubd\\t{%2, %0|%0, %2}"
+ [(set_attr "type" "mmx")])
+
+(define_insn "sssubv8qi3"
+ [(set (match_operand:V8QI 0 "register_operand" "=y")
+ (ss_minus:V8QI (match_operand:V8QI 1 "register_operand" "0")
+ (match_operand:V8QI 2 "nonimmediate_operand" "ym")))]
+ "TARGET_MMX"
+ "psubsb\\t{%2, %0|%0, %2}"
+ [(set_attr "type" "mmx")])
+
+(define_insn "sssubv4hi3"
+ [(set (match_operand:V4HI 0 "register_operand" "=y")
+ (ss_minus:V4HI (match_operand:V4HI 1 "register_operand" "0")
+ (match_operand:V4HI 2 "nonimmediate_operand" "ym")))]
+ "TARGET_MMX"
+ "psubsw\\t{%2, %0|%0, %2}"
+ [(set_attr "type" "mmx")])
+
+(define_insn "ussubv8qi3"
+ [(set (match_operand:V8QI 0 "register_operand" "=y")
+ (us_minus:V8QI (match_operand:V8QI 1 "register_operand" "0")
+ (match_operand:V8QI 2 "nonimmediate_operand" "ym")))]
+ "TARGET_MMX"
+ "psubusb\\t{%2, %0|%0, %2}"
+ [(set_attr "type" "mmx")])
+
+(define_insn "ussubv4hi3"
+ [(set (match_operand:V4HI 0 "register_operand" "=y")
+ (us_minus:V4HI (match_operand:V4HI 1 "register_operand" "0")
+ (match_operand:V4HI 2 "nonimmediate_operand" "ym")))]
+ "TARGET_MMX"
+ "psubusw\\t{%2, %0|%0, %2}"
+ [(set_attr "type" "mmx")])
+
+(define_insn "mulv4hi3"
+ [(set (match_operand:V4HI 0 "register_operand" "=y")
+ (mult:V4HI (match_operand:V4HI 1 "register_operand" "0")
+ (match_operand:V4HI 2 "nonimmediate_operand" "ym")))]
+ "TARGET_MMX"
+ "pmullw\\t{%2, %0|%0, %2}"
+ [(set_attr "type" "mmx")])
+
+(define_insn "smulv4hi3_highpart"
+ [(set (match_operand:V4HI 0 "register_operand" "=y")
+ (truncate:V4HI
+ (lshiftrt:V4SI
+ (mult:V4SI (sign_extend:V4SI (match_operand:V4HI 1 "register_operand" "0"))
+ (sign_extend:V4SI (match_operand:V4HI 2 "nonimmediate_operand" "ym")))
+ (const_int 16))))]
+ "TARGET_MMX"
+ "pmulhw\\t{%2, %0|%0, %2}"
+ [(set_attr "type" "mmx")])
+
+(define_insn "umulv4hi3_highpart"
+ [(set (match_operand:V4HI 0 "register_operand" "=y")
+ (truncate:V4HI
+ (lshiftrt:V4SI
+ (mult:V4SI (zero_extend:V4SI (match_operand:V4HI 1 "register_operand" "0"))
+ (zero_extend:V4SI (match_operand:V4HI 2 "nonimmediate_operand" "ym")))
+ (const_int 16))))]
+ "TARGET_MMX"
+ "pmulhuw\\t{%2, %0|%0, %2}"
+ [(set_attr "type" "mmx")])
+
+(define_insn "mmx_pmaddwd"
+ [(set (match_operand:V2SI 0 "register_operand" "=y")
+ (plus:V2SI
+ (mult:V2SI
+ (sign_extend:V2SI (vec_select:V2HI (match_operand:V4HI 1 "register_operand" "0")
+ (parallel [(const_int 0)
+ (const_int 2)])))
+ (sign_extend:V2SI (vec_select:V2HI (match_operand:V4HI 2 "nonimmediate_operand" "ym")
+ (parallel [(const_int 0)
+ (const_int 2)]))))
+ (mult:V2SI
+ (sign_extend:V2SI (vec_select:V2HI (match_dup 1)
+ (parallel [(const_int 1)
+ (const_int 3)])))
+ (sign_extend:V2SI (vec_select:V2HI (match_dup 2)
+ (parallel [(const_int 1)
+ (const_int 3)]))))))]
+ "TARGET_MMX"
+ "pmaddwd\\t{%2, %0|%0, %2}"
+ [(set_attr "type" "mmx")])
+
+
+;; MMX logical operations
+;; Note we don't want to declare these as regular iordi3 insns to prevent
+;; normal code that also wants to use the FPU from getting broken.
+;; The UNSPECs are there to prevent the combiner from getting overly clever.
+(define_insn "mmx_iordi3"
+ [(set (match_operand:DI 0 "register_operand" "=y")
+ (unspec:DI
+ [(ior:DI (match_operand:DI 1 "register_operand" "0")
+ (match_operand:DI 2 "nonimmediate_operand" "ym"))] 45))]
+ "TARGET_MMX"
+ "por\\t{%2, %0|%0, %2}"
+ [(set_attr "type" "mmx")])
+
+(define_insn "mmx_xordi3"
+ [(set (match_operand:DI 0 "register_operand" "=y")
+ (unspec:DI
+ [(xor:DI (match_operand:DI 1 "register_operand" "0")
+ (match_operand:DI 2 "nonimmediate_operand" "ym"))] 45))]
+ "TARGET_MMX"
+ "pxor\\t{%2, %0|%0, %2}"
+ [(set_attr "type" "mmx")])
+
+;; Same as pxor, but don't show input operands so that we don't think
+;; they are live.
+(define_insn "mmx_clrdi"
+ [(set (match_operand:DI 0 "register_operand" "=y")
+ (unspec:DI [(const_int 0)] 45))]
+ "TARGET_MMX"
+ "pxor\\t{%0, %0|%0, %0}"
+ [(set_attr "type" "mmx")])
+
+(define_insn "mmx_anddi3"
+ [(set (match_operand:DI 0 "register_operand" "=y")
+ (unspec:DI
+ [(and:DI (match_operand:DI 1 "register_operand" "0")
+ (match_operand:DI 2 "nonimmediate_operand" "ym"))] 45))]
+ "TARGET_MMX"
+ "pand\\t{%2, %0|%0, %2}"
+ [(set_attr "type" "mmx")])
+
+(define_insn "mmx_nanddi3"
+ [(set (match_operand:DI 0 "register_operand" "=y")
+ (unspec:DI
+ [(and:DI (not:DI (match_operand:DI 1 "register_operand" "0"))
+ (match_operand:DI 2 "nonimmediate_operand" "ym"))] 45))]
+ "TARGET_MMX"
+ "pandn\\t{%2, %0|%0, %2}"
+ [(set_attr "type" "mmx")])
+
+
+;; MMX unsigned averages/sum of absolute differences
+
+(define_insn "mmx_uavgv8qi3"
+ [(set (match_operand:V8QI 0 "register_operand" "=y")
+ (ashiftrt:V8QI
+ (plus:V8QI (plus:V8QI
+ (match_operand:V8QI 1 "register_operand" "0")
+ (match_operand:V8QI 2 "nonimmediate_operand" "ym"))
+ (vec_const:V8QI (parallel [(const_int 1)
+ (const_int 1)
+ (const_int 1)
+ (const_int 1)
+ (const_int 1)
+ (const_int 1)
+ (const_int 1)
+ (const_int 1)])))
+ (const_int 1)))]
+ "TARGET_SSE"
+ "pavgbn\\t{%2, %0|%0, %2}"
+ [(set_attr "type" "sse")])
+
+(define_insn "mmx_uavgv4hi3"
+ [(set (match_operand:V4HI 0 "register_operand" "=y")
+ (ashiftrt:V4HI
+ (plus:V4HI (plus:V4HI
+ (match_operand:V4HI 1 "register_operand" "0")
+ (match_operand:V4HI 2 "nonimmediate_operand" "ym"))
+ (vec_const:V4HI (parallel [(const_int 1)
+ (const_int 1)
+ (const_int 1)
+ (const_int 1)])))
+ (const_int 1)))]
+ "TARGET_SSE"
+ "pavgwn\\t{%2, %0|%0, %2}"
+ [(set_attr "type" "sse")])
+
+(define_insn "mmx_psadbw"
+ [(set (match_operand:V8QI 0 "register_operand" "=y")
+ (abs:V8QI (minus:V8QI (match_operand:DI 1 "register_operand" "0")
+ (match_operand:DI 2 "nonimmediate_operand" "ym"))))]
+ "TARGET_SSE"
+ "padbw\\t{%2, %0|%0, %2}"
+ [(set_attr "type" "sse")])
+
+
+;; MMX insert/extract/shuffle
+
+(define_insn "mmx_pinsrw"
+ [(set (match_operand:V4HI 0 "register_operand" "=y")
+ (vec_merge:V4HI (match_operand:V4HI 1 "register_operand" "0")
+ (vec_duplicate:V4HI
+ (truncate:HI (match_operand:SI 2 "nonimmediate_operand" "rm")))
+ (match_operand:SI 3 "immediate_operand" "i")))]
+ "TARGET_SSE"
+ ;; @@@ check operand order for intel/nonintel syntax.
+ "pinsrw\\t%3, {%2, %0|%0, %2}"
+ [(set_attr "type" "sse")])
+
+(define_insn "mmx_pextrw"
+ [(set (match_operand:SI 0 "register_operand" "=r")
+ (zero_extend:SI (vec_select:HI (match_operand:V4HI 1 "register_operand" "y")
+ (parallel
+ [(match_operand:SI 2 "immediate_operand" "i")]))))]
+ "TARGET_SSE"
+ ;; @@@ check operand order for intel/nonintel syntax.
+ "pextrw\\t%2, {%1, %0|%0, %1}"
+ [(set_attr "type" "sse")])
+
+(define_insn "mmx_pshufw"
+ [(set (match_operand:V4HI 0 "register_operand" "=y")
+ (unspec:V4HI [(match_operand:V4HI 1 "register_operand" "0")
+ (match_operand:V4HI 2 "nonimmediate_operand" "ym")
+ (match_operand:SI 3 "immediate_operand" "i")] 41))]
+ "TARGET_SSE"
+ ;; @@@ check operand order for intel/nonintel syntax
+ "pshufw\\t %3,{%2, %0|%0, %2}"
+ [(set_attr "type" "sse")])
+
+
+;; MMX mask-generating comparisons
+
+(define_insn "eqv8qi3"
+ [(set (match_operand:V8QI 0 "register_operand" "=y")
+ (eq:V8QI (match_operand:V8QI 1 "register_operand" "0")
+ (match_operand:V8QI 2 "nonimmediate_operand" "ym")))]
+ "TARGET_MMX"
+ "pcmpeqb\\t{%2, %0|%0, %2}"
+ [(set_attr "type" "mmx")])
+
+(define_insn "eqv4hi3"
+ [(set (match_operand:V4HI 0 "register_operand" "=y")
+ (eq:V4HI (match_operand:V4HI 1 "register_operand" "0")
+ (match_operand:V4HI 2 "nonimmediate_operand" "ym")))]
+ "TARGET_MMX"
+ "pcmpeqw\\t{%2, %0|%0, %2}"
+ [(set_attr "type" "mmx")])
+
+(define_insn "eqv2si3"
+ [(set (match_operand:V2SI 0 "register_operand" "=y")
+ (eq:V2SI (match_operand:V2SI 1 "register_operand" "0")
+ (match_operand:V2SI 2 "nonimmediate_operand" "ym")))]
+ "TARGET_MMX"
+ "pcmpeqd\\t{%2, %0|%0, %2}"
+ [(set_attr "type" "mmx")])
+
+(define_insn "gtv8qi3"
+ [(set (match_operand:V8QI 0 "register_operand" "=y")
+ (gt:V8QI (match_operand:V8QI 1 "register_operand" "0")
+ (match_operand:V8QI 2 "nonimmediate_operand" "ym")))]
+ "TARGET_MMX"
+ "pcmpgtb\\t{%2, %0|%0, %2}"
+ [(set_attr "type" "mmx")])
+
+(define_insn "gtv4hi3"
+ [(set (match_operand:V4HI 0 "register_operand" "=y")
+ (gt:V4HI (match_operand:V4HI 1 "register_operand" "0")
+ (match_operand:V4HI 2 "nonimmediate_operand" "ym")))]
+ "TARGET_MMX"
+ "pcmpgtw\\t{%2, %0|%0, %2}"
+ [(set_attr "type" "mmx")])
+
+(define_insn "gtv2si3"
+ [(set (match_operand:V2SI 0 "register_operand" "=y")
+ (gt:V2SI (match_operand:V2SI 1 "register_operand" "0")
+ (match_operand:V2SI 2 "nonimmediate_operand" "ym")))]
+ "TARGET_MMX"
+ "pcmpgtd\\t{%2, %0|%0, %2}"
+ [(set_attr "type" "mmx")])
+
+
+;; MMX max/min insns
+
+(define_insn "umaxv8qi3"
+ [(set (match_operand:V8QI 0 "register_operand" "=y")
+ (umax:V8QI (match_operand:V8QI 1 "register_operand" "0")
+ (match_operand:V8QI 2 "nonimmediate_operand" "ym")))]
+ "TARGET_SSE"
+ "pmaxub\\t{%2, %0|%0, %2}"
+ [(set_attr "type" "sse")])
+
+(define_insn "smaxv4hi3"
+ [(set (match_operand:V4HI 0 "register_operand" "=y")
+ (smax:V4HI (match_operand:V4HI 1 "register_operand" "0")
+ (match_operand:V4HI 2 "nonimmediate_operand" "ym")))]
+ "TARGET_SSE"
+ "pmaxsw\\t{%2, %0|%0, %2}"
+ [(set_attr "type" "sse")])
+
+(define_insn "uminv8qi3"
+ [(set (match_operand:V8QI 0 "register_operand" "=y")
+ (umin:V8QI (match_operand:V8QI 1 "register_operand" "0")
+ (match_operand:V8QI 2 "nonimmediate_operand" "ym")))]
+ "TARGET_SSE"
+ "pminub\\t{%2, %0|%0, %2}"
+ [(set_attr "type" "sse")])
+
+(define_insn "sminv4hi3"
+ [(set (match_operand:V4HI 0 "register_operand" "=y")
+ (smin:V4HI (match_operand:V4HI 1 "register_operand" "0")
+ (match_operand:V4HI 2 "nonimmediate_operand" "ym")))]
+ "TARGET_SSE"
+ "pminsw\\t{%2, %0|%0, %2}"
+ [(set_attr "type" "sse")])
+
+
+;; MMX shifts
+
+(define_insn "ashrv4hi3"
+ [(set (match_operand:V4HI 0 "register_operand" "=y")
+ (ashiftrt:V4HI (match_operand:V4HI 1 "register_operand" "0")
+ (match_operand:DI 2 "nonmemory_operand" "yi")))]
+ "TARGET_MMX"
+ "psraw\\t{%2, %0|%0, %2}"
+ [(set_attr "type" "mmx")])
+
+(define_insn "ashrv2si3"
+ [(set (match_operand:V2SI 0 "register_operand" "=y")
+ (ashiftrt:V2SI (match_operand:V2SI 1 "register_operand" "0")
+ (match_operand:DI 2 "nonmemory_operand" "yi")))]
+ "TARGET_MMX"
+ "psrad\\t{%2, %0|%0, %2}"
+ [(set_attr "type" "mmx")])
+
+(define_insn "lshrv4hi3"
+ [(set (match_operand:V4HI 0 "register_operand" "=y")
+ (lshiftrt:V4HI (match_operand:V4HI 1 "register_operand" "0")
+ (match_operand:DI 2 "nonmemory_operand" "yi")))]
+ "TARGET_MMX"
+ "psrlw\\t{%2, %0|%0, %2}"
+ [(set_attr "type" "mmx")])
+
+(define_insn "lshrv2si3"
+ [(set (match_operand:V2SI 0 "register_operand" "=y")
+ (lshiftrt:V2SI (match_operand:V2SI 1 "register_operand" "0")
+ (match_operand:DI 2 "nonmemory_operand" "yi")))]
+ "TARGET_MMX"
+ "psrld\\t{%2, %0|%0, %2}"
+ [(set_attr "type" "mmx")])
+
+;; See logical MMX insns.
+(define_insn "mmx_lshrdi3"
+ [(set (match_operand:DI 0 "register_operand" "=y")
+ (lshiftrt:DI (match_operand:DI 1 "register_operand" "0")
+ (match_operand:DI 2 "nonmemory_operand" "yi")))]
+ "TARGET_MMX"
+ "psrlq\\t{%2, %0|%0, %2}"
+ [(set_attr "type" "mmx")])
+
+(define_insn "ashlv4hi3"
+ [(set (match_operand:V4HI 0 "register_operand" "=y")
+ (ashift:V4HI (match_operand:V4HI 1 "register_operand" "0")
+ (match_operand:DI 2 "nonmemory_operand" "yi")))]
+ "TARGET_MMX"
+ "psllw\\t{%2, %0|%0, %2}"
+ [(set_attr "type" "mmx")])
+
+(define_insn "ashlv2si3"
+ [(set (match_operand:V2SI 0 "register_operand" "=y")
+ (ashift:V2SI (match_operand:V2SI 1 "register_operand" "0")
+ (match_operand:DI 2 "nonmemory_operand" "yi")))]
+ "TARGET_MMX"
+ "pslld\\t{%2, %0|%0, %2}"
+ [(set_attr "type" "mmx")])
+
+;; See logical MMX insns.
+(define_insn "mmx_ashldi3"
+ [(set (match_operand:DI 0 "register_operand" "=y")
+ (ashift:DI (match_operand:DI 1 "register_operand" "0")
+ (match_operand:DI 2 "nonmemory_operand" "yi")))]
+ "TARGET_MMX"
+ "psllq\\t{%2, %0|%0, %2}"
+ [(set_attr "type" "mmx")])
+
+
+;; MMX pack/unpack insns.
+
+(define_insn "mmx_packsswb"
+ [(set (match_operand:V8QI 0 "register_operand" "=y")
+ (vec_concat:V8QI
+ (ss_truncate:V4QI (match_operand:V4HI 1 "register_operand" "0"))
+ (ss_truncate:V4QI (match_operand:V4HI 2 "register_operand" "y"))))]
+ "TARGET_MMX"
+ "packsswb\\t{%2, %0|%0, %2}"
+ [(set_attr "type" "mmx")])
+
+(define_insn "mmx_packssdw"
+ [(set (match_operand:V4HI 0 "register_operand" "=y")
+ (vec_concat:V4HI
+ (ss_truncate:V2HI (match_operand:V2SI 1 "register_operand" "0"))
+ (ss_truncate:V2HI (match_operand:V2SI 2 "register_operand" "y"))))]
+ "TARGET_MMX"
+ "packssdw\\t{%2, %0|%0, %2}"
+ [(set_attr "type" "mmx")])
+
+(define_insn "mmx_packuswb"
+ [(set (match_operand:V8QI 0 "register_operand" "=y")
+ (vec_concat:V8QI
+ (us_truncate:V4QI (match_operand:V4HI 1 "register_operand" "0"))
+ (us_truncate:V4QI (match_operand:V4HI 2 "register_operand" "y"))))]
+ "TARGET_MMX"
+ "packuswb\\t{%2, %0|%0, %2}"
+ [(set_attr "type" "mmx")])
+
+(define_insn "mmx_punpckhbw"
+ [(set (match_operand:V8QI 0 "register_operand" "=y")
+ (vec_merge:V8QI
+ (vec_select:V8QI (match_operand:V8QI 1 "register_operand" "0")
+ (parallel [(const_int 4)
+ (const_int 0)
+ (const_int 5)
+ (const_int 1)
+ (const_int 6)
+ (const_int 2)
+ (const_int 7)
+ (const_int 3)]))
+ (vec_select:V8QI (match_operand:V8QI 2 "register_operand" "y")
+ (parallel [(const_int 0)
+ (const_int 4)
+ (const_int 1)
+ (const_int 5)
+ (const_int 2)
+ (const_int 6)
+ (const_int 3)
+ (const_int 7)]))
+ (const_int 85)))]
+ "TARGET_MMX"
+ "punpckhbw\\t{%2, %0|%0, %2}"
+ [(set_attr "type" "mmx")])
+
+(define_insn "mmx_punpckhwd"
+ [(set (match_operand:V4HI 0 "register_operand" "=y")
+ (vec_merge:V4HI
+ (vec_select:V4HI (match_operand:V4HI 1 "register_operand" "0")
+ (parallel [(const_int 0)
+ (const_int 2)
+ (const_int 1)
+ (const_int 3)]))
+ (vec_select:V4HI (match_operand:V4HI 2 "register_operand" "y")
+ (parallel [(const_int 2)
+ (const_int 0)
+ (const_int 3)
+ (const_int 1)]))
+ (const_int 5)))]
+ "TARGET_MMX"
+ "punpckhbw\\t{%2, %0|%0, %2}"
+ [(set_attr "type" "mmx")])
+
+(define_insn "mmx_punpckhdq"
+ [(set (match_operand:V2SI 0 "register_operand" "=y")
+ (vec_merge:V2SI
+ (vec_select:V2SI (match_operand:V2SI 1 "register_operand" "0")
+ (parallel [(const_int 0)
+ (const_int 1)]))
+ (vec_select:V2SI (match_operand:V2SI 2 "register_operand" "y")
+ (parallel [(const_int 1)
+ (const_int 0)]))
+ (const_int 1)))]
+ "TARGET_MMX"
+ "punpckhbw\\t{%2, %0|%0, %2}"
+ [(set_attr "type" "mmx")])
+
+(define_insn "mmx_punpcklbw"
+ [(set (match_operand:V8QI 0 "register_operand" "=y")
+ (vec_merge:V8QI
+ (vec_select:V8QI (match_operand:V8QI 1 "register_operand" "0")
+ (parallel [(const_int 0)
+ (const_int 4)
+ (const_int 1)
+ (const_int 5)
+ (const_int 2)
+ (const_int 6)
+ (const_int 3)
+ (const_int 7)]))
+ (vec_select:V8QI (match_operand:V8QI 2 "register_operand" "y")
+ (parallel [(const_int 4)
+ (const_int 0)
+ (const_int 5)
+ (const_int 1)
+ (const_int 6)
+ (const_int 2)
+ (const_int 7)
+ (const_int 3)]))
+ (const_int 85)))]
+ "TARGET_MMX"
+ "punpcklbw\\t{%2, %0|%0, %2}"
+ [(set_attr "type" "mmx")])
+
+(define_insn "mmx_punpcklwd"
+ [(set (match_operand:V4HI 0 "register_operand" "=y")
+ (vec_merge:V4HI
+ (vec_select:V4HI (match_operand:V4HI 1 "register_operand" "0")
+ (parallel [(const_int 2)
+ (const_int 0)
+ (const_int 3)
+ (const_int 1)]))
+ (vec_select:V4HI (match_operand:V4HI 2 "register_operand" "y")
+ (parallel [(const_int 0)
+ (const_int 2)
+ (const_int 1)
+ (const_int 3)]))
+ (const_int 5)))]
+ "TARGET_MMX"
+ "punpcklbw\\t{%2, %0|%0, %2}"
+ [(set_attr "type" "mmx")])
+
+(define_insn "mmx_punpckldq"
+ [(set (match_operand:V2SI 0 "register_operand" "=y")
+ (vec_merge:V2SI
+ (vec_select:V2SI (match_operand:V2SI 1 "register_operand" "0")
+ (parallel [(const_int 1)
+ (const_int 0)]))
+ (vec_select:V2SI (match_operand:V2SI 2 "register_operand" "y")
+ (parallel [(const_int 0)
+ (const_int 1)]))
+ (const_int 1)))]
+ "TARGET_MMX"
+ "punpcklbw\\t{%2, %0|%0, %2}"
+ [(set_attr "type" "mmx")])
+
+
+;; Miscellaneous stuff
+
+(define_insn "emms"
+ [(unspec_volatile [(const_int 0)] 31)
+ (clobber (reg:XF 8))
+ (clobber (reg:XF 9))
+ (clobber (reg:XF 10))
+ (clobber (reg:XF 11))
+ (clobber (reg:XF 12))
+ (clobber (reg:XF 13))
+ (clobber (reg:XF 14))
+ (clobber (reg:XF 15))
+ (clobber (reg:DI 27))
+ (clobber (reg:DI 28))
+ (clobber (reg:DI 29))
+ (clobber (reg:DI 30))
+ (clobber (reg:DI 31))
+ (clobber (reg:DI 32))
+ (clobber (reg:DI 33))
+ (clobber (reg:DI 34))]
+ "TARGET_MMX"
+ "emms"
+ [(set_attr "type" "mmx")])
+
+(define_insn "ldmxcsr"
+ [(unspec_volatile [(match_operand:SI 0 "memory_operand" "m")] 37)]
+ "TARGET_MMX"
+ "ldmxcsr\\t%0"
+ [(set_attr "type" "mmx")])
+
+(define_insn "stmxcsr"
+ [(set (match_operand:SI 0 "memory_operand" "=m")
+ (unspec_volatile:SI [(const_int 0)] 40))]
+ "TARGET_MMX"
+ "stmxcsr\\t%0"
+ [(set_attr "type" "mmx")])
+
+(define_expand "sfence"
+ [(set (match_dup 0)
+ (unspec:BLK [(match_dup 0)] 44))]
+ "TARGET_SSE"
+ "
+{
+ operands[0] = gen_rtx_MEM (BLKmode, gen_rtx_SCRATCH (Pmode));
+ MEM_VOLATILE_P (operands[0]) = 1;
+}")
+
+(define_insn "*sfence_insn"
+ [(set (match_operand:BLK 0 "" "")
+ (unspec:BLK [(match_dup 0)] 44))]
+ "TARGET_SSE"
+ "sfence"
+ [(set_attr "type" "sse")])
+
+(define_insn "prefetch"
+ [(unspec [(match_operand:SI 0 "address_operand" "p")
+ (match_operand:SI 1 "address_operand" "p")] 35)]
+ "TARGET_SSE"
+ "*
+{
+ switch (INTVAL (operands[1]))
+ {
+ case 0:
+ return \"prefetcht0\\t%0\";
+ case 1:
+ return \"prefetcht1\\t%0\";
+ case 2:
+ return \"prefetcht2\\t%0\";
+ case 3:
+ return \"prefetchnta\\t%0\";
+ default:
+ abort ();
+ }
+}"
+ [(set_attr "type" "sse")])
+