diff options
Diffstat (limited to 'gcc/config/i386/i386.md')
-rw-r--r-- | gcc/config/i386/i386.md | 1915 |
1 files changed, 1892 insertions, 23 deletions
diff --git a/gcc/config/i386/i386.md b/gcc/config/i386/i386.md index 72fd72088c0..9bcd4e4219c 100644 --- a/gcc/config/i386/i386.md +++ b/gcc/config/i386/i386.md @@ -71,7 +71,25 @@ ;; 9 This is an `fnstsw' operation. ;; 10 This is a `sahf' operation. ;; 11 This is a `fstcw' operation -;; + +;; For SSE/MMX support: +;; 30 This is `fix', guaranteed to be truncating. +;; 31 This is a `emms' operation. +;; 32 This is a `maskmov' operation. +;; 33 This is a `movmsk' operation. +;; 34 This is a `non-temporal' move. +;; 35 This is a `prefetch' operation. +;; 36 This is used to distinguish COMISS from UCOMISS. +;; 37 This is a `ldmxcsr' operation. +;; 38 This is a forced `movaps' instruction (rather than whatever movti does) +;; 39 This is a forced `movups' instruction (rather than whatever movti does) +;; 40 This is a `stmxcsr' operation. +;; 41 This is a `shuffle' operation. +;; 42 This is a `rcp' operation. +;; 43 This is a `rsqsrt' operation. +;; 44 This is a `sfence' operation. +;; 45 This is a noop to prevent excessive combiner cleverness. + ;; Insns whose names begin with "x86_" are emitted by gen_FOO calls ;; from i386.c. @@ -84,7 +102,7 @@ ;; A basic instruction type. Refinements due to arguments to be ;; provided in other attributes. (define_attr "type" - "other,multi,alu1,negnot,alu,icmp,test,imov,imovx,lea,incdec,ishift,imul,idiv,ibr,setcc,push,pop,call,callv,icmov,fmov,fop,fop1,fsgn,fmul,fdiv,fpspc,fcmov,fcmp,fxch,str,cld" + "other,multi,alu1,negnot,alu,icmp,test,imov,imovx,lea,incdec,ishift,imul,idiv,ibr,setcc,push,pop,call,callv,icmov,fmov,fop,fop1,fsgn,fmul,fdiv,fpspc,fcmov,fcmp,fxch,str,cld,sse,mmx" (const_string "other")) ;; Main data type used by the insn @@ -234,7 +252,7 @@ (const_string "store") (match_operand 1 "memory_operand" "") (const_string "load") - (and (eq_attr "type" "!icmp,test,alu1,negnot,fop1,fsgn,imov,imovx,fmov,fcmp") + (and (eq_attr "type" "!icmp,test,alu1,negnot,fop1,fsgn,imov,imovx,fmov,fcmp,sse,mmx") (match_operand 2 "memory_operand" "")) (const_string "load") (and (eq_attr "type" "icmov") @@ -1530,15 +1548,19 @@ (set_attr "length_immediate" "1")]) (define_insn "*movsi_1" - [(set (match_operand:SI 0 "nonimmediate_operand" "=*a,r,*a,m") - (match_operand:SI 1 "general_operand" "im,rinm,rinm,rin"))] + [(set (match_operand:SI 0 "nonimmediate_operand" "=*a,r,*a,m,!*y,!r") + (match_operand:SI 1 "general_operand" "im,rinm,rinm,rin,r,*y"))] "GET_CODE (operands[0]) != MEM || GET_CODE (operands[1]) != MEM" "* { switch (get_attr_type (insn)) { + case TYPE_MMX: + return \"movd\\t{%1, %0|%0, %1}\"; + case TYPE_LEA: return \"lea{l}\\t{%1, %0|%0, %1}\"; + default: if (flag_pic && SYMBOLIC_CONST (operands[1])) abort(); @@ -1546,12 +1568,15 @@ } }" [(set (attr "type") - (cond [(and (ne (symbol_ref "flag_pic") (const_int 0)) + (cond [(ior (match_operand:SI 0 "mmx_reg_operand" "") + (match_operand:SI 1 "mmx_reg_operand" "")) + (const_string "mmx") + (and (ne (symbol_ref "flag_pic") (const_int 0)) (match_operand:SI 1 "symbolic_operand" "")) (const_string "lea") ] (const_string "imov"))) - (set_attr "modrm" "0,*,0,*") + (set_attr "modrm" "0,*,0,*,*,*") (set_attr "mode" "SI")]) (define_insn "*swapsi" @@ -1692,7 +1717,7 @@ (set_attr "mode" "HI")]) (define_insn "*movstricthi_xor" - [(set (strict_low_part (match_operand:HI 0 "register_operand" "=r")) + [(set (strict_low_part (match_operand:HI 0 "register_operand" "+r")) (match_operand:HI 1 "const0_operand" "i")) (clobber (reg:CC 17))] "reload_completed && (!TARGET_USE_MOV0 || optimize_size)" @@ -1983,15 +2008,20 @@ "#") (define_insn "*movdi_2" - [(set (match_operand:DI 0 "nonimmediate_operand" "=r,o") - (match_operand:DI 1 "general_operand" "riFo,riF"))] + [(set (match_operand:DI 0 "nonimmediate_operand" "=r,o,!m*y,!*y") + (match_operand:DI 1 "general_operand" "riFo,riF,*y,m"))] "GET_CODE (operands[0]) != MEM || GET_CODE (operands[1]) != MEM" - "#") + "@ + # + # + movq\\t{%1, %0|%0, %1} + movq\\t{%1, %0|%0, %1}" + [(set_attr "type" "*,*,mmx,mmx")]) (define_split [(set (match_operand:DI 0 "push_operand" "") (match_operand:DI 1 "general_operand" ""))] - "reload_completed" + "reload_completed && ! MMX_REG_P (operands[1])" [(const_int 0)] "if (!ix86_split_long_move (operands)) abort (); DONE;") @@ -1999,7 +2029,7 @@ (define_split [(set (match_operand:DI 0 "nonimmediate_operand" "") (match_operand:DI 1 "general_operand" ""))] - "reload_completed" + "reload_completed && ! MMX_REG_P (operands[0]) && ! MMX_REG_P (operands[1])" [(set (match_dup 2) (match_dup 5)) (set (match_dup 3) (match_dup 6))] "if (ix86_split_long_move (operands)) DONE;") @@ -3967,10 +3997,55 @@ (set_attr "mode" "SI")]) (define_insn "*addsi_3" + [(set (reg 17) + (compare (neg:SI (match_operand:SI 2 "general_operand" "rmni")) + (match_operand:SI 1 "nonimmediate_operand" "%0"))) + (clobber (match_scratch:SI 0 "=r"))] + "ix86_match_ccmode (insn, CCNOmode) + && (GET_CODE (operands[1]) != MEM || GET_CODE (operands[2]) != MEM) + /* Current assemblers are broken and do not allow @GOTOFF in + ought but a memory context. */ + && ! pic_symbolic_operand (operands[2], VOIDmode)" + "* +{ + switch (get_attr_type (insn)) + { + case TYPE_INCDEC: + if (! rtx_equal_p (operands[0], operands[1])) + abort (); + if (operands[2] == const1_rtx) + return \"inc{l}\\t%0\"; + else if (operands[2] == constm1_rtx) + return \"dec{l}\\t%0\"; + else + abort(); + + default: + if (! rtx_equal_p (operands[0], operands[1])) + abort (); + /* Make things pretty and `subl $4,%eax' rather than `addl $-4, %eax'. + Exceptions: -128 encodes smaller than 128, so swap sign and op. */ + if (GET_CODE (operands[2]) == CONST_INT + && (INTVAL (operands[2]) == 128 + || (INTVAL (operands[2]) < 0 + && INTVAL (operands[2]) != -128))) + { + operands[2] = GEN_INT (-INTVAL (operands[2])); + return \"sub{l}\\t{%2, %0|%0, %2}\"; + } + return \"add{l}\\t{%2, %0|%0, %2}\"; + } +}" + [(set (attr "type") + (if_then_else (match_operand:SI 2 "incdec_operand" "") + (const_string "incdec") + (const_string "alu"))) + (set_attr "mode" "SI")]) + +(define_insn "*addsi_4" [(set (reg:CC 17) - (compare:CC (plus:SI (match_operand:SI 1 "nonimmediate_operand" "%0,0") - (match_operand:SI 2 "general_operand" "rmni,rni")) - (const_int 0))) + (compare:CC (neg:SI (match_operand:SI 2 "general_operand" "rmni,rni")) + (match_operand:SI 1 "nonimmediate_operand" "%0,0"))) (set (match_operand:SI 0 "nonimmediate_operand" "=r,rm") (plus:SI (match_dup 1) (match_dup 2)))] "ix86_binary_operator_ok (PLUS, SImode, operands) @@ -3981,6 +4056,19 @@ [(set_attr "type" "alu") (set_attr "mode" "SI")]) +(define_insn "*addsi_5" + [(set (reg:CC 17) + (compare:CC (neg:SI (match_operand:SI 2 "general_operand" "rmni")) + (match_operand:SI 1 "nonimmediate_operand" "%0"))) + (clobber (match_scratch:SI 0 "=r"))] + "(GET_CODE (operands[1]) != MEM || GET_CODE (operands[2]) != MEM) + /* Current assemblers are broken and do not allow @GOTOFF in + ought but a memory context. */ + && ! pic_symbolic_operand (operands[2], VOIDmode)" + "add{l}\\t{%2, %0|%0, %2}" + [(set_attr "type" "alu") + (set_attr "mode" "SI")]) + (define_expand "addhi3" [(parallel [(set (match_operand:HI 0 "nonimmediate_operand" "") (plus:HI (match_operand:HI 1 "nonimmediate_operand" "") @@ -4121,10 +4209,49 @@ (set_attr "mode" "HI")]) (define_insn "*addhi_3" + [(set (reg 17) + (compare (neg:HI (match_operand:HI 2 "general_operand" "rmni")) + (match_operand:HI 1 "nonimmediate_operand" "%0"))) + (clobber (match_scratch:HI 0 "=r"))] + "ix86_match_ccmode (insn, CCNOmode) + && (GET_CODE (operands[1]) != MEM || GET_CODE (operands[2]) != MEM)" + "* +{ + switch (get_attr_type (insn)) + { + case TYPE_INCDEC: + if (operands[2] == const1_rtx) + return \"inc{w}\\t%0\"; + else if (operands[2] == constm1_rtx + || (GET_CODE (operands[2]) == CONST_INT + && INTVAL (operands[2]) == 65535)) + return \"dec{w}\\t%0\"; + abort(); + + default: + /* Make things pretty and `subl $4,%eax' rather than `addl $-4, %eax'. + Exceptions: -128 encodes smaller than 128, so swap sign and op. */ + if (GET_CODE (operands[2]) == CONST_INT + && (INTVAL (operands[2]) == 128 + || (INTVAL (operands[2]) < 0 + && INTVAL (operands[2]) != -128))) + { + operands[2] = GEN_INT (-INTVAL (operands[2])); + return \"sub{w}\\t{%2, %0|%0, %2}\"; + } + return \"add{w}\\t{%2, %0|%0, %2}\"; + } +}" + [(set (attr "type") + (if_then_else (match_operand:HI 2 "incdec_operand" "") + (const_string "incdec") + (const_string "alu"))) + (set_attr "mode" "HI")]) + +(define_insn "*addhi_4" [(set (reg:CC 17) - (compare:CC (plus:HI (match_operand:HI 1 "nonimmediate_operand" "%0,0") - (match_operand:HI 2 "general_operand" "rmni,rni")) - (const_int 0))) + (compare:CC (neg:HI (match_operand:HI 2 "general_operand" "rmni,rni")) + (match_operand:HI 1 "nonimmediate_operand" "%0,0"))) (set (match_operand:HI 0 "nonimmediate_operand" "=r,rm") (plus:HI (match_dup 1) (match_dup 2)))] "ix86_binary_operator_ok (PLUS, HImode, operands)" @@ -4132,6 +4259,16 @@ [(set_attr "type" "alu") (set_attr "mode" "HI")]) +(define_insn "*addhi_5" + [(set (reg:CC 17) + (compare:CC (neg:HI (match_operand:HI 2 "general_operand" "rmni")) + (match_operand:HI 1 "nonimmediate_operand" "%0"))) + (clobber (match_scratch:HI 0 "=r"))] + "(GET_CODE (operands[1]) != MEM || GET_CODE (operands[2]) != MEM)" + "add{w}\\t{%2, %0|%0, %2}" + [(set_attr "type" "alu") + (set_attr "mode" "HI")]) + (define_expand "addqi3" [(parallel [(set (match_operand:QI 0 "nonimmediate_operand" "") (plus:QI (match_operand:QI 1 "nonimmediate_operand" "") @@ -4280,10 +4417,46 @@ (set_attr "mode" "QI")]) (define_insn "*addqi_3" + [(set (reg 17) + (compare (neg:QI (match_operand:QI 2 "general_operand" "qmni")) + (match_operand:QI 1 "nonimmediate_operand" "%0"))) + (clobber (match_scratch:QI 0 "=r"))] + "ix86_match_ccmode (insn, CCNOmode) + && (GET_CODE (operands[1]) != MEM || GET_CODE (operands[2]) != MEM)" + "* +{ + switch (get_attr_type (insn)) + { + case TYPE_INCDEC: + if (operands[2] == const1_rtx) + return \"inc{b}\\t%0\"; + else if (operands[2] == constm1_rtx + || (GET_CODE (operands[2]) == CONST_INT + && INTVAL (operands[2]) == 255)) + return \"dec{b}\\t%0\"; + abort(); + + default: + /* Make things pretty and `subb $4,%al' rather than `addb $-4, %al'. */ + if (GET_CODE (operands[2]) == CONST_INT + && INTVAL (operands[2]) < 0) + { + operands[2] = GEN_INT (-INTVAL (operands[2])); + return \"sub{b}\\t{%2, %0|%0, %2}\"; + } + return \"add{b}\\t{%2, %0|%0, %2}\"; + } +}" + [(set (attr "type") + (if_then_else (match_operand:QI 2 "incdec_operand" "") + (const_string "incdec") + (const_string "alu"))) + (set_attr "mode" "QI")]) + +(define_insn "*addqi_4" [(set (reg:CC 17) - (compare:CC (plus:QI (match_operand:QI 1 "nonimmediate_operand" "%0,0") - (match_operand:QI 2 "general_operand" "qmni,qni")) - (const_int 0))) + (compare:CC (neg:QI (match_operand:QI 2 "general_operand" "qmni,qni")) + (match_operand:QI 1 "nonimmediate_operand" "%0,0"))) (set (match_operand:QI 0 "nonimmediate_operand" "=q,qm") (plus:QI (match_dup 1) (match_dup 2)))] "ix86_binary_operator_ok (PLUS, QImode, operands)" @@ -4291,6 +4464,16 @@ [(set_attr "type" "alu") (set_attr "mode" "QI")]) +(define_insn "*addqi_5" + [(set (reg:CC 17) + (compare:CC (neg:QI (match_operand:QI 2 "general_operand" "qmni")) + (match_operand:QI 1 "nonimmediate_operand" "%0"))) + (clobber (match_scratch:QI 0 "=r"))] + "(GET_CODE (operands[1]) != MEM || GET_CODE (operands[2]) != MEM)" + "add{b}\\t{%2, %0|%0, %2}" + [(set_attr "type" "alu") + (set_attr "mode" "QI")]) + (define_insn "addqi_ext_1" [(set (zero_extract:SI (match_operand 0 "ext_register_operand" "=q") @@ -4448,6 +4631,18 @@ (const_int 0))) (set (match_operand:SI 0 "nonimmediate_operand" "=rm,r") (minus:SI (match_dup 1) (match_dup 2)))] + "ix86_match_ccmode (insn, CCNOmode) + && ix86_binary_operator_ok (MINUS, SImode, operands)" + "sub{l}\\t{%2, %0|%0, %2}" + [(set_attr "type" "alu") + (set_attr "mode" "SI")]) + +(define_insn "*subsi_3" + [(set (reg 17) + (compare (match_operand:SI 1 "nonimmediate_operand" "0,0") + (match_operand:SI 2 "general_operand" "ri,rm"))) + (set (match_operand:SI 0 "nonimmediate_operand" "=rm,r") + (minus:SI (match_dup 1) (match_dup 2)))] "ix86_match_ccmode (insn, CCmode) && ix86_binary_operator_ok (MINUS, SImode, operands)" "sub{l}\\t{%2, %0|%0, %2}" @@ -4480,6 +4675,18 @@ (const_int 0))) (set (match_operand:HI 0 "nonimmediate_operand" "=rm,r") (minus:HI (match_dup 1) (match_dup 2)))] + "ix86_match_ccmode (insn, CCNOmode) + && ix86_binary_operator_ok (MINUS, HImode, operands)" + "sub{w}\\t{%2, %0|%0, %2}" + [(set_attr "type" "alu") + (set_attr "mode" "HI")]) + +(define_insn "*subhi_3" + [(set (reg 17) + (compare (match_operand:HI 1 "nonimmediate_operand" "0,0") + (match_operand:HI 2 "general_operand" "ri,rm"))) + (set (match_operand:HI 0 "nonimmediate_operand" "=rm,r") + (minus:HI (match_dup 1) (match_dup 2)))] "ix86_match_ccmode (insn, CCmode) && ix86_binary_operator_ok (MINUS, HImode, operands)" "sub{w}\\t{%2, %0|%0, %2}" @@ -4512,6 +4719,18 @@ (const_int 0))) (set (match_operand:HI 0 "nonimmediate_operand" "=qm,q") (minus:HI (match_dup 1) (match_dup 2)))] + "ix86_match_ccmode (insn, CCNOmode) + && ix86_binary_operator_ok (MINUS, QImode, operands)" + "sub{b}\\t{%2, %0|%0, %2}" + [(set_attr "type" "alu") + (set_attr "mode" "QI")]) + +(define_insn "*subqi_3" + [(set (reg 17) + (compare (match_operand:QI 1 "nonimmediate_operand" "0,0") + (match_operand:QI 2 "general_operand" "qi,qm"))) + (set (match_operand:HI 0 "nonimmediate_operand" "=qm,q") + (minus:HI (match_dup 1) (match_dup 2)))] "ix86_match_ccmode (insn, CCmode) && ix86_binary_operator_ok (MINUS, QImode, operands)" "sub{b}\\t{%2, %0|%0, %2}" @@ -5461,6 +5680,18 @@ [(set_attr "type" "alu") (set_attr "mode" "SI")]) +(define_insn "*iorsi_3" + [(set (reg 17) + (compare (ior:SI (match_operand:SI 1 "nonimmediate_operand" "%0") + (match_operand:SI 2 "general_operand" "rim")) + (const_int 0))) + (clobber (match_scratch:SI 0 "=r"))] + "ix86_match_ccmode (insn, CCNOmode) + && (GET_CODE (operands[1]) != MEM || GET_CODE (operands[2]) != MEM)" + "or{l}\\t{%2, %0|%0, %2}" + [(set_attr "type" "alu") + (set_attr "mode" "SI")]) + (define_expand "iorhi3" [(set (match_operand:HI 0 "nonimmediate_operand" "") (ior:HI (match_operand:HI 1 "nonimmediate_operand" "") @@ -5492,6 +5723,18 @@ [(set_attr "type" "alu") (set_attr "mode" "HI")]) +(define_insn "*iorhi_3" + [(set (reg 17) + (compare (ior:HI (match_operand:HI 1 "nonimmediate_operand" "%0") + (match_operand:HI 2 "general_operand" "rim")) + (const_int 0))) + (clobber (match_scratch:HI 0 "=r"))] + "ix86_match_ccmode (insn, CCNOmode) + && (GET_CODE (operands[1]) != MEM || GET_CODE (operands[2]) != MEM)" + "or{w}\\t{%2, %0|%0, %2}" + [(set_attr "type" "alu") + (set_attr "mode" "HI")]) + (define_expand "iorqi3" [(set (match_operand:QI 0 "nonimmediate_operand" "") (ior:QI (match_operand:QI 1 "nonimmediate_operand" "") @@ -5526,6 +5769,19 @@ "or{b}\\t{%2, %0|%0, %2}" [(set_attr "type" "alu") (set_attr "mode" "QI")]) + +(define_insn "*iorqi_3" + [(set (reg 17) + (compare (ior:QI (match_operand:QI 1 "nonimmediate_operand" "%0") + (match_operand:QI 2 "general_operand" "qim")) + (const_int 0))) + (clobber (match_scratch:QI 0 "=r"))] + "ix86_match_ccmode (insn, CCNOmode) + && (GET_CODE (operands[1]) != MEM || GET_CODE (operands[2]) != MEM)" + "or{b}\\t{%2, %0|%0, %2}" + [(set_attr "type" "alu") + (set_attr "mode" "QI")]) + ;; Logical XOR instructions @@ -5563,6 +5819,18 @@ [(set_attr "type" "alu") (set_attr "mode" "SI")]) +(define_insn "*xorsi_3" + [(set (reg 17) + (compare (xor:SI (match_operand:SI 1 "nonimmediate_operand" "%0") + (match_operand:SI 2 "general_operand" "rim")) + (const_int 0))) + (clobber (match_scratch:SI 0 "=r"))] + "ix86_match_ccmode (insn, CCNOmode) + && (GET_CODE (operands[1]) != MEM || GET_CODE (operands[2]) != MEM)" + "xor{l}\\t{%2, %0|%0, %2}" + [(set_attr "type" "alu") + (set_attr "mode" "SI")]) + (define_expand "xorhi3" [(set (match_operand:HI 0 "nonimmediate_operand" "") (xor:HI (match_operand:HI 1 "nonimmediate_operand" "") @@ -5594,6 +5862,18 @@ [(set_attr "type" "alu") (set_attr "mode" "HI")]) +(define_insn "*xorhi_3" + [(set (reg 17) + (compare (xor:HI (match_operand:HI 1 "nonimmediate_operand" "%0") + (match_operand:HI 2 "general_operand" "rim")) + (const_int 0))) + (clobber (match_scratch:HI 0 "=r"))] + "ix86_match_ccmode (insn, CCNOmode) + && (GET_CODE (operands[1]) != MEM || GET_CODE (operands[2]) != MEM)" + "xor{w}\\t{%2, %0|%0, %2}" + [(set_attr "type" "alu") + (set_attr "mode" "HI")]) + (define_expand "xorqi3" [(set (match_operand:QI 0 "nonimmediate_operand" "") (xor:QI (match_operand:QI 1 "nonimmediate_operand" "") @@ -5648,6 +5928,19 @@ [(set_attr "type" "alu") (set_attr "mode" "QI")]) +(define_insn "*xorqi_cc_2" + [(set (reg 17) + (compare + (xor:QI (match_operand:QI 1 "nonimmediate_operand" "%0") + (match_operand:QI 2 "general_operand" "qim")) + (const_int 0))) + (clobber (match_scratch:QI 0 "=r"))] + "ix86_match_ccmode (insn, CCNOmode) + && (GET_CODE (operands[1]) != MEM || GET_CODE (operands[2]) != MEM)" + "xor{b}\\t{%2, %0|%0, %2}" + [(set_attr "type" "alu") + (set_attr "mode" "QI")]) + (define_insn "xorqi_cc_ext_1" [(set (reg:CCNO 17) (compare:CCNO @@ -7864,7 +8157,7 @@ [(set_attr "type" "setcc") (set_attr "mode" "QI")]) -(define_insn "*setcc_4" +(define_insn "setcc_4" [(set (strict_low_part (match_operand:QI 0 "nonimmediate_operand" "+qm")) (match_operator:QI 1 "uno_comparison_operator" [(reg:CC 17) (const_int 0)]))] @@ -11170,3 +11463,1579 @@ CODE_LABEL_NUMBER (operands[2])); RET; }") + + ;; Pentium III SIMD instructions. + +;; Moves for SSE/MMX regs. + +(define_insn "movv4sf_internal" + [(set (match_operand:V4SF 0 "nonimmediate_operand" "=x,m") + (match_operand:V4SF 1 "general_operand" "xm,x"))] + "TARGET_SSE" + ;; @@@ let's try to use movaps here. + "movaps\\t{%1, %0|%0, %1}" + [(set_attr "type" "sse")]) + +(define_insn "movv4si_internal" + [(set (match_operand:V4SI 0 "nonimmediate_operand" "=x,m") + (match_operand:V4SI 1 "general_operand" "xm,x"))] + "TARGET_SSE" + ;; @@@ let's try to use movaps here. + "movaps\\t{%1, %0|%0, %1}" + [(set_attr "type" "sse")]) + +(define_insn "movv8qi_internal" + [(set (match_operand:V8QI 0 "nonimmediate_operand" "=y,m") + (match_operand:V8QI 1 "general_operand" "ym,y"))] + "TARGET_MMX" + "movq\\t{%1, %0|%0, %1}" + [(set_attr "type" "mmx")]) + +(define_insn "movv4hi_internal" + [(set (match_operand:V4HI 0 "nonimmediate_operand" "=y,m") + (match_operand:V4HI 1 "general_operand" "ym,y"))] + "TARGET_MMX" + "movq\\t{%1, %0|%0, %1}" + [(set_attr "type" "mmx")]) + +(define_insn "movv2si_internal" + [(set (match_operand:V2SI 0 "nonimmediate_operand" "=y,m") + (match_operand:V2SI 1 "general_operand" "ym,y"))] + "TARGET_MMX" + "movq\\t{%1, %0|%0, %1}" + [(set_attr "type" "mmx")]) + +(define_expand "movti" + [(set (match_operand:TI 0 "general_operand" "") + (match_operand:TI 1 "general_operand" ""))] + "TARGET_SSE" + " +{ + /* For constants other than zero into memory. We do not know how the + instructions used to build constants modify the upper 64 bits + of the register, once we have that information we may be able + to handle some of them more efficiently. */ + if ((reload_in_progress | reload_completed) == 0 + && register_operand (operands[0], TImode) + && CONSTANT_P (operands[1])) + { + rtx addr = gen_reg_rtx (Pmode); + + emit_move_insn (addr, XEXP (force_const_mem (TImode, operands[1]), 0)); + operands[1] = gen_rtx_MEM (TImode, addr); + } + + /* Make operand1 a register if it isn't already. */ + if ((reload_in_progress | reload_completed) == 0 + && !register_operand (operands[0], TImode) + && !register_operand (operands[1], TImode) + && operands[1] != CONST0_RTX (TImode)) + { + rtx temp = force_reg (TImode, operands[1]); + emit_move_insn (operands[0], temp); + DONE; + } +}") + +(define_expand "movv4sf" + [(set (match_operand:V4SF 0 "general_operand" "") + (match_operand:V4SF 1 "general_operand" ""))] + "TARGET_SSE" + " +{ + /* For constants other than zero into memory. We do not know how the + instructions used to build constants modify the upper 64 bits + of the register, once we have that information we may be able + to handle some of them more efficiently. */ + if ((reload_in_progress | reload_completed) == 0 + && register_operand (operands[0], V4SFmode) + && CONSTANT_P (operands[1])) + { + rtx addr = gen_reg_rtx (Pmode); + + emit_move_insn (addr, XEXP (force_const_mem (V4SFmode, operands[1]), 0)); + operands[1] = gen_rtx_MEM (V4SFmode, addr); + } + + /* Make operand1 a register if it isn't already. */ + if ((reload_in_progress | reload_completed) == 0 + && !register_operand (operands[0], V4SFmode) + && !register_operand (operands[1], V4SFmode) + && operands[1] != CONST0_RTX (V4SFmode)) + { + rtx temp = force_reg (V4SFmode, operands[1]); + emit_move_insn (operands[0], temp); + DONE; + } +}") + +(define_expand "movv4si" + [(set (match_operand:V4SI 0 "general_operand" "") + (match_operand:V4SI 1 "general_operand" ""))] + "TARGET_MMX" + " +{ + /* For constants other than zero into memory. We do not know how the + instructions used to build constants modify the upper 64 bits + of the register, once we have that information we may be able + to handle some of them more efficiently. */ + if ((reload_in_progress | reload_completed) == 0 + && register_operand (operands[0], V4SImode) + && CONSTANT_P (operands[1])) + { + rtx addr = gen_reg_rtx (Pmode); + + emit_move_insn (addr, XEXP (force_const_mem (V4SImode, operands[1]), 0)); + operands[1] = gen_rtx_MEM (V4SImode, addr); + } + + /* Make operand1 a register if it isn't already. */ + if ((reload_in_progress | reload_completed) == 0 + && !register_operand (operands[0], V4SImode) + && !register_operand (operands[1], V4SImode) + && operands[1] != CONST0_RTX (V4SImode)) + { + rtx temp = force_reg (V4SImode, operands[1]); + emit_move_insn (operands[0], temp); + DONE; + } +}") + +(define_expand "movv2si" + [(set (match_operand:V2SI 0 "general_operand" "") + (match_operand:V2SI 1 "general_operand" ""))] + "TARGET_MMX" + " +{ + /* For constants other than zero into memory. We do not know how the + instructions used to build constants modify the upper 64 bits + of the register, once we have that information we may be able + to handle some of them more efficiently. */ + if ((reload_in_progress | reload_completed) == 0 + && register_operand (operands[0], V2SImode) + && CONSTANT_P (operands[1])) + { + rtx addr = gen_reg_rtx (Pmode); + + emit_move_insn (addr, XEXP (force_const_mem (V2SImode, operands[1]), 0)); + operands[1] = gen_rtx_MEM (V2SImode, addr); + } + + /* Make operand1 a register if it isn't already. */ + if ((reload_in_progress | reload_completed) == 0 + && !register_operand (operands[0], V2SImode) + && !register_operand (operands[1], V2SImode) + && operands[1] != CONST0_RTX (V2SImode)) + { + rtx temp = force_reg (V2SImode, operands[1]); + emit_move_insn (operands[0], temp); + DONE; + } +}") + +(define_expand "movv4hi" + [(set (match_operand:V4HI 0 "general_operand" "") + (match_operand:V4HI 1 "general_operand" ""))] + "TARGET_MMX" + " +{ + /* For constants other than zero into memory. We do not know how the + instructions used to build constants modify the upper 64 bits + of the register, once we have that information we may be able + to handle some of them more efficiently. */ + if ((reload_in_progress | reload_completed) == 0 + && register_operand (operands[0], V4HImode) + && CONSTANT_P (operands[1])) + { + rtx addr = gen_reg_rtx (Pmode); + + emit_move_insn (addr, XEXP (force_const_mem (V4HImode, operands[1]), 0)); + operands[1] = gen_rtx_MEM (V4HImode, addr); + } + + /* Make operand1 a register if it isn't already. */ + if ((reload_in_progress | reload_completed) == 0 + && !register_operand (operands[0], V4HImode) + && !register_operand (operands[1], V4HImode) + && operands[1] != CONST0_RTX (V4HImode)) + { + rtx temp = force_reg (V4HImode, operands[1]); + emit_move_insn (operands[0], temp); + DONE; + } +}") + +(define_expand "movv8qi" + [(set (match_operand:V8QI 0 "general_operand" "") + (match_operand:V8QI 1 "general_operand" ""))] + "TARGET_MMX" + " +{ + /* For constants other than zero into memory. We do not know how the + instructions used to build constants modify the upper 64 bits + of the register, once we have that information we may be able + to handle some of them more efficiently. */ + if ((reload_in_progress | reload_completed) == 0 + && register_operand (operands[0], V8QImode) + && CONSTANT_P (operands[1])) + { + rtx addr = gen_reg_rtx (Pmode); + + emit_move_insn (addr, XEXP (force_const_mem (V8QImode, operands[1]), 0)); + operands[1] = gen_rtx_MEM (V8QImode, addr); + } + + /* Make operand1 a register if it isn't already. */ + if ((reload_in_progress | reload_completed) == 0 + && !register_operand (operands[0], V8QImode) + && !register_operand (operands[1], V8QImode) + && operands[1] != CONST0_RTX (V8QImode)) + { + rtx temp = force_reg (V8QImode, operands[1]); + emit_move_insn (operands[0], temp); + DONE; + } +}") + +(define_insn_and_split "*pushti" + [(set (match_operand:TI 0 "push_operand" "=<") + (match_operand:TI 1 "nonmemory_operand" "x"))] + "TARGET_SSE" + "#" + "" + [(set (reg:SI 7) (plus:SI (reg:SI 7) (const_int -16))) + (set (mem:TI (reg:SI 7)) (match_dup 1))] + "" + [(set_attr "type" "sse")]) + +(define_insn_and_split "*pushv4sf" + [(set (match_operand:V4SF 0 "push_operand" "=<") + (match_operand:V4SF 1 "nonmemory_operand" "x"))] + "TARGET_SSE" + "#" + "" + [(set (reg:SI 7) (plus:SI (reg:SI 7) (const_int -16))) + (set (mem:V4SF (reg:SI 7)) (match_dup 1))] + "" + [(set_attr "type" "sse")]) + +(define_insn_and_split "*pushv4si" + [(set (match_operand:V4SI 0 "push_operand" "=<") + (match_operand:V4SI 1 "nonmemory_operand" "x"))] + "TARGET_SSE" + "#" + "" + [(set (reg:SI 7) (plus:SI (reg:SI 7) (const_int -16))) + (set (mem:V4SI (reg:SI 7)) (match_dup 1))] + "" + [(set_attr "type" "sse")]) + +(define_insn_and_split "*pushv2si" + [(set (match_operand:V2SI 0 "push_operand" "=<") + (match_operand:V2SI 1 "nonmemory_operand" "y"))] + "TARGET_MMX" + "#" + "" + [(set (reg:SI 7) (plus:SI (reg:SI 7) (const_int -8))) + (set (mem:V2SI (reg:SI 7)) (match_dup 1))] + "" + [(set_attr "type" "mmx")]) + +(define_insn_and_split "*pushv4hi" + [(set (match_operand:V4HI 0 "push_operand" "=<") + (match_operand:V4HI 1 "nonmemory_operand" "y"))] + "TARGET_MMX" + "#" + "" + [(set (reg:SI 7) (plus:SI (reg:SI 7) (const_int -8))) + (set (mem:V4HI (reg:SI 7)) (match_dup 1))] + "" + [(set_attr "type" "mmx")]) + +(define_insn_and_split "*pushv8qi" + [(set (match_operand:V8QI 0 "push_operand" "=<") + (match_operand:V8QI 1 "nonmemory_operand" "y"))] + "TARGET_MMX" + "#" + "" + [(set (reg:SI 7) (plus:SI (reg:SI 7) (const_int -8))) + (set (mem:V8QI (reg:SI 7)) (match_dup 1))] + "" + [(set_attr "type" "mmx")]) + +(define_insn "movti_internal" + [(set (match_operand:TI 0 "nonimmediate_operand" "=x,m") + (match_operand:TI 1 "general_operand" "xm,x"))] + "TARGET_SSE" + "@ + movaps\\t{%1, %0|%0, %1} + movaps\\t{%1, %0|%0, %1}" + [(set_attr "type" "sse")]) + +;; These two patterns are useful for specifying exactly whether to use +;; movaps or movups +(define_insn "sse_movaps" + [(set (match_operand:V4SF 0 "nonimmediate_operand" "=x,m") + (unspec:V4SF [(match_operand:V4SF 1 "general_operand" "xm,x")] 38))] + "TARGET_SSE" + "@ + movaps\\t{%1, %0|%0, %1} + movaps\\t{%1, %0|%0, %1}" + [(set_attr "type" "sse")]) + +(define_insn "sse_movups" + [(set (match_operand:V4SF 0 "nonimmediate_operand" "=x,m") + (unspec:V4SF [(match_operand:V4SF 1 "general_operand" "xm,x")] 39))] + "TARGET_SSE" + "@ + movups\\t{%1, %0|%0, %1} + movups\\t{%1, %0|%0, %1}" + [(set_attr "type" "sse")]) + + +;; SSE Strange Moves. + +(define_insn "sse_movmskps" + [(set (match_operand:SI 0 "register_operand" "=r") + (unspec:SI [(match_operand:V4SF 1 "register_operand" "x")] 33))] + "TARGET_SSE" + "movmskps\\t{%1, %0|%0, %1}" + [(set_attr "type" "sse")]) + +(define_insn "mmx_pmovmskb" + [(set (match_operand:SI 0 "register_operand" "=r") + (unspec:SI [(match_operand:V8QI 1 "register_operand" "y")] 33))] + "TARGET_SSE" + "pmovmskb\\t{%1, %0|%0, %1}" + [(set_attr "type" "sse")]) + +(define_insn "mmx_maskmovq" + [(set (mem:V8QI (match_operand:SI 0 "register_operand" "D")) + (unspec:V8QI [(match_operand:V8QI 1 "register_operand" "y") + (match_operand:V8QI 2 "register_operand" "y")] 32))] + "TARGET_SSE" + ;; @@@ check ordering of operands in intel/nonintel syntax + "maskmovq\\t{%2, %1|%1, %2}" + [(set_attr "type" "sse")]) + +(define_insn "sse_movntv4sf" + [(set (match_operand:V4SF 0 "memory_operand" "=m") + (unspec:V4SF [(match_operand:V4SF 1 "register_operand" "x")] 34))] + "TARGET_SSE" + "movntps\\t{%1, %0|%0, %1}" + [(set_attr "type" "sse")]) + +(define_insn "sse_movntdi" + [(set (match_operand:DI 0 "memory_operand" "=m") + (unspec:DI [(match_operand:DI 1 "register_operand" "x")] 34))] + "TARGET_SSE" + "movntq\\t{%1, %0|%0, %1}" + [(set_attr "type" "sse")]) + +(define_insn "sse_movhlps" + [(set (match_operand:V4SF 0 "register_operand" "=x") + (vec_merge:V4SF + (match_operand:V4SF 1 "register_operand" "0") + (vec_select:V4SF (match_operand:V4SF 2 "register_operand" "x") + (parallel [(const_int 2) + (const_int 3) + (const_int 0) + (const_int 1)])) + (const_int 3)))] + "TARGET_SSE" + "movhlps\\t{%2, %0|%0, %2}" + [(set_attr "type" "sse")]) + +(define_insn "sse_movlhps" + [(set (match_operand:V4SF 0 "register_operand" "=x") + (vec_merge:V4SF + (match_operand:V4SF 1 "register_operand" "0") + (vec_select:V4SF (match_operand:V4SF 2 "register_operand" "x") + (parallel [(const_int 2) + (const_int 3) + (const_int 0) + (const_int 1)])) + (const_int 12)))] + "TARGET_SSE" + "movlhps\\t{%2, %0|%0, %2}" + [(set_attr "type" "sse")]) + +(define_insn "sse_movhps" + [(set (match_operand:V4SF 0 "nonimmediate_operand" "=x,m") + (vec_merge:V4SF + (match_operand:V4SF 1 "nonimmediate_operand" "0,0") + (match_operand:V4SF 2 "nonimmediate_operand" "m,x") + (const_int 12)))] + "TARGET_SSE && (GET_CODE (operands[1]) == MEM || GET_CODE (operands[2]) == MEM)" + "movhps\\t{%2, %0|%0, %2}" + [(set_attr "type" "sse")]) + +(define_insn "sse_movlps" + [(set (match_operand:V4SF 0 "nonimmediate_operand" "=x,m") + (vec_merge:V4SF + (match_operand:V4SF 1 "nonimmediate_operand" "0,0") + (match_operand:V4SF 2 "nonimmediate_operand" "m,x") + (const_int 3)))] + "TARGET_SSE && (GET_CODE (operands[1]) == MEM || GET_CODE (operands[2]) == MEM)" + "movlps\\t{%2, %0|%0, %2}" + [(set_attr "type" "sse")]) + +(define_insn "sse_loadss" + [(set (match_operand:V4SF 0 "register_operand" "=x") + (vec_merge:V4SF + (match_operand:V4SF 1 "memory_operand" "m") + (vec_duplicate:V4SF (float:SF (const_int 0))) + (const_int 1)))] + "TARGET_SSE" + "movss\\t{%1, %0|%0, %1}" + [(set_attr "type" "sse")]) + +(define_insn "sse_movss" + [(set (match_operand:V4SF 0 "register_operand" "=x") + (vec_merge:V4SF + (match_operand:V4SF 1 "register_operand" "0") + (match_operand:V4SF 2 "register_operand" "x") + (const_int 1)))] + "TARGET_SSE" + "movss\\t{%2, %0|%0, %2}" + [(set_attr "type" "sse")]) + +(define_insn "sse_storess" + [(set (match_operand:SF 0 "memory_operand" "=m") + (vec_select:SF + (match_operand:V4SF 1 "register_operand" "x") + (parallel [(const_int 0)])))] + "TARGET_SSE" + "movss\\t{%1, %0|%0, %1}" + [(set_attr "type" "sse")]) + +(define_insn "sse_shufps" + [(set (match_operand:V4SF 0 "register_operand" "=x") + (unspec:V4SF [(match_operand:V4SF 1 "register_operand" "0") + (match_operand:V4SF 2 "nonimmediate_operand" "xm") + (match_operand:SI 3 "immediate_operand" "i")] 41))] + "TARGET_SSE" + ;; @@@ check operand order for intel/nonintel syntax + "shufps\\t{%3, %2, %0|%0, %2, %3}" + [(set_attr "type" "sse")]) + + +;; SSE arithmetic + +(define_insn "addv4sf3" + [(set (match_operand:V4SF 0 "register_operand" "=x") + (plus:V4SF (match_operand:V4SF 1 "register_operand" "0") + (match_operand:V4SF 2 "nonimmediate_operand" "xm")))] + "TARGET_SSE" + "addps\\t{%2, %0|%0, %2}" + [(set_attr "type" "sse")]) + +(define_insn "vmaddv4sf3" + [(set (match_operand:V4SF 0 "register_operand" "=x") + (vec_merge:V4SF (plus:V4SF (match_operand:V4SF 1 "register_operand" "0") + (match_operand:V4SF 2 "nonimmediate_operand" "xm")) + (match_dup 1) + (const_int 1)))] + "TARGET_SSE" + "addss\\t{%2, %0|%0, %2}" + [(set_attr "type" "sse")]) + +(define_insn "subv4sf3" + [(set (match_operand:V4SF 0 "register_operand" "=x") + (minus:V4SF (match_operand:V4SF 1 "register_operand" "0") + (match_operand:V4SF 2 "nonimmediate_operand" "xm")))] + "TARGET_SSE" + "subps\\t{%2, %0|%0, %2}" + [(set_attr "type" "sse")]) + +(define_insn "vmsubv4sf3" + [(set (match_operand:V4SF 0 "register_operand" "=x") + (vec_merge:V4SF (minus:V4SF (match_operand:V4SF 1 "register_operand" "0") + (match_operand:V4SF 2 "nonimmediate_operand" "xm")) + (match_dup 1) + (const_int 1)))] + "TARGET_SSE" + "subss\\t{%2, %0|%0, %2}" + [(set_attr "type" "sse")]) + +(define_insn "mulv4sf3" + [(set (match_operand:V4SF 0 "register_operand" "=x") + (mult:V4SF (match_operand:V4SF 1 "register_operand" "0") + (match_operand:V4SF 2 "nonimmediate_operand" "xm")))] + "TARGET_SSE" + "mulps\\t{%2, %0|%0, %2}" + [(set_attr "type" "sse")]) + +(define_insn "vmmulv4sf3" + [(set (match_operand:V4SF 0 "register_operand" "=x") + (vec_merge:V4SF (mult:V4SF (match_operand:V4SF 1 "register_operand" "0") + (match_operand:V4SF 2 "nonimmediate_operand" "xm")) + (match_dup 1) + (const_int 1)))] + "TARGET_SSE" + "mulss\\t{%2, %0|%0, %2}" + [(set_attr "type" "sse")]) + +(define_insn "divv4sf3" + [(set (match_operand:V4SF 0 "register_operand" "=x") + (div:V4SF (match_operand:V4SF 1 "register_operand" "0") + (match_operand:V4SF 2 "nonimmediate_operand" "xm")))] + "TARGET_SSE" + "divps\\t{%2, %0|%0, %2}" + [(set_attr "type" "sse")]) + +(define_insn "vmdivv4sf3" + [(set (match_operand:V4SF 0 "register_operand" "=x") + (vec_merge:V4SF (div:V4SF (match_operand:V4SF 1 "register_operand" "0") + (match_operand:V4SF 2 "nonimmediate_operand" "xm")) + (match_dup 1) + (const_int 1)))] + "TARGET_SSE" + "divss\\t{%2, %0|%0, %2}" + [(set_attr "type" "sse")]) + + +;; SSE square root/reciprocal + +(define_insn "rcpv4sf2" + [(set (match_operand:V4SF 0 "register_operand" "=x") + (unspec:V4SF [(match_operand:V4SF 1 "register_operand" "xm")] 42))] + "TARGET_SSE" + "rcpps\\t{%1, %0|%0, %1}" + [(set_attr "type" "sse")]) + +(define_insn "vmrcpv4sf2" + [(set (match_operand:V4SF 0 "register_operand" "=x") + (vec_merge:V4SF (unspec:V4SF [(match_operand:V4SF 1 "register_operand" "xm")] 42) + (match_operand:V4SF 2 "register_operand" "0") + (const_int 1)))] + "TARGET_SSE" + "rcpss\\t{%1, %0|%0, %1}" + [(set_attr "type" "sse")]) + +(define_insn "rsqrtv4sf2" + [(set (match_operand:V4SF 0 "register_operand" "=x") + (unspec:V4SF [(match_operand:V4SF 1 "register_operand" "xm")] 43))] + "TARGET_SSE" + "rsqrtps\\t{%1, %0|%0, %1}" + [(set_attr "type" "sse")]) + +(define_insn "vmrsqrtv4sf2" + [(set (match_operand:V4SF 0 "register_operand" "=x") + (vec_merge:V4SF (unspec:V4SF [(match_operand:V4SF 1 "register_operand" "xm")] 43) + (match_operand:V4SF 2 "register_operand" "0") + (const_int 1)))] + "TARGET_SSE" + "rsqrtss\\t{%1, %0|%0, %1}" + [(set_attr "type" "sse")]) + +(define_insn "sqrtv4sf2" + [(set (match_operand:V4SF 0 "register_operand" "=x") + (sqrt:V4SF (match_operand:V4SF 1 "register_operand" "xm")))] + "TARGET_SSE" + "sqrtps\\t{%1, %0|%0, %1}" + [(set_attr "type" "sse")]) + +(define_insn "vmsqrtv4sf2" + [(set (match_operand:V4SF 0 "register_operand" "=x") + (vec_merge:V4SF (sqrt:V4SF (match_operand:V4SF 1 "register_operand" "xm")) + (match_operand:V4SF 2 "register_operand" "0") + (const_int 1)))] + "TARGET_SSE" + "sqrtss\\t{%1, %0|%0, %1}" + [(set_attr "type" "sse")]) + + +;; SSE logical operations. + +;; These are not called andti3 etc. because we really really don't want +;; the compiler to widen DImode ands to TImode ands and then try to move +;; into DImode subregs of SSE registers, and them together, and move out +;; of DImode subregs again! + +(define_insn "sse_andti3" + [(set (match_operand:TI 0 "register_operand" "=x") + (and:TI (match_operand:TI 1 "register_operand" "0") + (match_operand:TI 2 "nonimmediate_operand" "xm")))] + "TARGET_SSE" + "andps\\t{%2, %0|%0, %2}" + [(set_attr "type" "sse")]) + +(define_insn "sse_nandti3" + [(set (match_operand:TI 0 "register_operand" "=x") + (and:TI (not:TI (match_operand:TI 1 "register_operand" "0")) + (match_operand:TI 2 "nonimmediate_operand" "xm")))] + "TARGET_SSE" + "andnps\\t{%2, %0|%0, %2}" + [(set_attr "type" "sse")]) + +(define_insn "sse_iorti3" + [(set (match_operand:TI 0 "register_operand" "=x") + (ior:TI (match_operand:TI 1 "register_operand" "0") + (match_operand:TI 2 "nonimmediate_operand" "xm")))] + "TARGET_SSE" + "iorps\\t{%2, %0|%0, %2}" + [(set_attr "type" "sse")]) + +(define_insn "sse_xorti3" + [(set (match_operand:TI 0 "register_operand" "=x") + (xor:TI (match_operand:TI 1 "register_operand" "0") + (match_operand:TI 2 "nonimmediate_operand" "xm")))] + "TARGET_SSE" + "xorps\\t{%2, %0|%0, %2}" + [(set_attr "type" "sse")]) + +;; Use xor, but don't show input operands so they aren't live before +;; this insn. +(define_insn "sse_clrti" + [(set (match_operand:TI 0 "register_operand" "=x") + (unspec:TI [(const_int 0)] 45))] + "TARGET_SSE" + "xorps\\t{%0, %0|%0, %0}" + [(set_attr "type" "sse")]) + + +;; SSE mask-generating compares + +(define_insn "maskcmpv4sf3" + [(set (match_operand:V4SI 0 "register_operand" "=x") + (match_operator:V4SI 3 "sse_comparison_operator" + [(match_operand:V4SF 1 "register_operand" "0") + (match_operand:V4SF 2 "nonimmediate_operand" "x")]))] + "TARGET_SSE" + "* +{ + switch (GET_CODE (operands[3])) + { + case EQ: + return \"cmpeqps\\t{%2, %0|%0, %2}\"; + case LT: + return \"cmpltps\\t{%2, %0|%0, %2}\"; + case LE: + return \"cmpleps\\t{%2, %0|%0, %2}\"; + case UNORDERED: + return \"cmpunordps\\t{%2, %0|%0, %2}\"; + default: + abort (); + } +}" + [(set_attr "type" "sse")]) + +(define_insn "maskncmpv4sf3" + [(set (match_operand:V4SI 0 "register_operand" "=x") + (not:V4SI + (match_operator:V4SI 3 "sse_comparison_operator" + [(match_operand:V4SF 1 "register_operand" "0") + (match_operand:V4SF 2 "nonimmediate_operand" "x")])))] + "TARGET_SSE" + "* +{ + switch (GET_CODE (operands[3])) + { + case EQ: + return \"cmpneqps\\t{%2, %0|%0, %2}\"; + case LT: + return \"cmpnltps\\t{%2, %0|%0, %2}\"; + case LE: + return \"cmpnleps\\t{%2, %0|%0, %2}\"; + case UNORDERED: + return \"cmpordps\\t{%2, %0|%0, %2}\"; + default: + abort (); + } +}" + [(set_attr "type" "sse")]) + +(define_insn "vmmaskcmpv4sf3" + [(set (match_operand:V4SI 0 "register_operand" "=x") + (vec_merge:V4SI + (match_operator:V4SI 3 "sse_comparison_operator" + [(match_operand:V4SF 1 "register_operand" "0") + (match_operand:V4SF 2 "nonimmediate_operand" "x")]) + (match_dup 1) + (const_int 1)))] + "TARGET_SSE" + "* +{ + switch (GET_CODE (operands[3])) + { + case EQ: + return \"cmpeqss\\t{%2, %0|%0, %2}\"; + case LT: + return \"cmpltss\\t{%2, %0|%0, %2}\"; + case LE: + return \"cmpless\\t{%2, %0|%0, %2}\"; + case UNORDERED: + return \"cmpunordss\\t{%2, %0|%0, %2}\"; + default: + abort (); + } +}" + [(set_attr "type" "sse")]) + +(define_insn "vmmaskncmpv4sf3" + [(set (match_operand:V4SI 0 "register_operand" "=x") + (vec_merge:V4SI + (not:V4SI + (match_operator:V4SI 3 "sse_comparison_operator" + [(match_operand:V4SF 1 "register_operand" "0") + (match_operand:V4SF 2 "nonimmediate_operand" "x")])) + (subreg:V4SI (match_dup 1) 0) + (const_int 1)))] + "TARGET_SSE" + "* +{ + switch (GET_CODE (operands[3])) + { + case EQ: + return \"cmpneqss\\t{%2, %0|%0, %2}\"; + case LT: + return \"cmpnltss\\t{%2, %0|%0, %2}\"; + case LE: + return \"cmpnless\\t{%2, %0|%0, %2}\"; + case UNORDERED: + return \"cmpordss\\t{%2, %0|%0, %2}\"; + default: + abort (); + } +}" + [(set_attr "type" "sse")]) + +(define_insn "sse_comi" + [(set (reg:CCFP 17) + (match_operator:CCFP 2 "sse_comparison_operator" + [(vec_select:SF + (match_operand:V4SF 0 "register_operand" "x") + (parallel [(const_int 0)])) + (vec_select:SF + (match_operand:V4SF 1 "register_operand" "x") + (parallel [(const_int 0)]))]))] + "TARGET_SSE" + "comiss\\t{%2, %0|%0, %2}" + [(set_attr "type" "sse")]) + +(define_insn "sse_ucomi" + [(set (reg:CCFPU 17) + (match_operator:CCFPU 2 "sse_comparison_operator" + [(vec_select:SF + (match_operand:V4SF 0 "register_operand" "x") + (parallel [(const_int 0)])) + (vec_select:SF + (match_operand:V4SF 1 "register_operand" "x") + (parallel [(const_int 0)]))]))] + "TARGET_SSE" + "ucomiss\\t{%2, %0|%0, %2}" + [(set_attr "type" "sse")]) + + +;; SSE unpack + +(define_insn "sse_unpckhps" + [(set (match_operand:V4SF 0 "register_operand" "=x") + (vec_merge:V4SF + (vec_select:V4SF (match_operand:V4SF 1 "register_operand" "0") + (parallel [(const_int 2) + (const_int 0) + (const_int 3) + (const_int 1)])) + (vec_select:V8QI (match_operand:V8QI 2 "register_operand" "x") + (parallel [(const_int 0) + (const_int 2) + (const_int 1) + (const_int 3)])) + (const_int 5)))] + "TARGET_SSE" + "unpckhps\\t{%2, %0|%0, %2}" + [(set_attr "type" "sse")]) + +(define_insn "sse_unpcklps" + [(set (match_operand:V4SF 0 "register_operand" "=x") + (vec_merge:V4SF + (vec_select:V4SF (match_operand:V4SF 1 "register_operand" "0") + (parallel [(const_int 0) + (const_int 2) + (const_int 1) + (const_int 3)])) + (vec_select:V8QI (match_operand:V8QI 2 "register_operand" "x") + (parallel [(const_int 2) + (const_int 0) + (const_int 3) + (const_int 1)])) + (const_int 5)))] + "TARGET_SSE" + "unpcklps\\t{%2, %0|%0, %2}" + [(set_attr "type" "sse")]) + + +;; SSE min/max + +(define_insn "smaxv4sf3" + [(set (match_operand:V4SF 0 "register_operand" "=x") + (smax:V4SF (match_operand:V4SF 1 "register_operand" "0") + (match_operand:V4SF 2 "nonimmediate_operand" "xm")))] + "TARGET_SSE" + "maxps\\t{%2, %0|%0, %2}" + [(set_attr "type" "sse")]) + +(define_insn "vmsmaxv4sf3" + [(set (match_operand:V4SF 0 "register_operand" "=x") + (vec_merge:V4SF (smax:V4SF (match_operand:V4SF 1 "register_operand" "0") + (match_operand:V4SF 2 "nonimmediate_operand" "xm")) + (match_dup 1) + (const_int 1)))] + "TARGET_SSE" + "maxss\\t{%2, %0|%0, %2}" + [(set_attr "type" "sse")]) + +(define_insn "sminv4sf3" + [(set (match_operand:V4SF 0 "register_operand" "=x") + (smin:V4SF (match_operand:V4SF 1 "register_operand" "0") + (match_operand:V4SF 2 "nonimmediate_operand" "xm")))] + "TARGET_SSE" + "minps\\t{%2, %0|%0, %2}" + [(set_attr "type" "sse")]) + +(define_insn "vmsminv4sf3" + [(set (match_operand:V4SF 0 "register_operand" "=x") + (vec_merge:V4SF (smin:V4SF (match_operand:V4SF 1 "register_operand" "0") + (match_operand:V4SF 2 "nonimmediate_operand" "xm")) + (match_dup 1) + (const_int 1)))] + "TARGET_SSE" + "minss\\t{%2, %0|%0, %2}" + [(set_attr "type" "sse")]) + + +;; SSE <-> integer/MMX conversions + +(define_insn "cvtpi2ps" + [(set (match_operand:V4SF 0 "register_operand" "=x") + (vec_merge:V4SF (match_operand:V4SF 1 "register_operand" "0") + (vec_duplicate:V4SF + (float:V2SF (match_operand:V2SI 2 "register_operand" "ym"))) + (const_int 12)))] + "TARGET_SSE" + "cvtpi2ps\\t{%2, %0|%0, %2}" + [(set_attr "type" "sse")]) + +(define_insn "cvtps2pi" + [(set (match_operand:V2SI 0 "register_operand" "=y") + (vec_select:V2SI (fix:V4SI (match_operand:V4SF 1 "register_operand" "xm")) + (parallel + [(const_int 0) + (const_int 1)])))] + "TARGET_SSE" + "cvtps2pi\\t{%1, %0|%0, %1}" + [(set_attr "type" "sse")]) + +(define_insn "cvttps2pi" + [(set (match_operand:V2SI 0 "register_operand" "=y") + (vec_select:V2SI (unspec:V4SI [(match_operand:V4SF 1 "register_operand" "xm")] 30) + (parallel + [(const_int 0) + (const_int 1)])))] + "TARGET_SSE" + "cvttps2pi\\t{%1, %0|%0, %1}" + [(set_attr "type" "sse")]) + +(define_insn "cvtsi2ss" + [(set (match_operand:V4SF 0 "register_operand" "=x") + (vec_merge:V4SF (match_operand:V4SF 1 "register_operand" "0") + (vec_duplicate:V4SF + (float:SF (match_operand:SI 2 "register_operand" "rm"))) + (const_int 15)))] + "TARGET_SSE" + "cvtsi2ss\\t{%2, %0|%0, %2}" + [(set_attr "type" "sse")]) + +(define_insn "cvtss2si" + [(set (match_operand:SI 0 "register_operand" "=y") + (vec_select:SI (fix:V4SI (match_operand:V4SF 1 "register_operand" "xm")) + (parallel [(const_int 0)])))] + "TARGET_SSE" + "cvtss2si\\t{%1, %0|%0, %1}" + [(set_attr "type" "sse")]) + +(define_insn "cvttss2si" + [(set (match_operand:SI 0 "register_operand" "=y") + (vec_select:SI (unspec:V4SI [(match_operand:V4SF 1 "register_operand" "xm")] 30) + (parallel [(const_int 0)])))] + "TARGET_SSE" + "cvttss2si\\t{%1, %0|%0, %1}" + [(set_attr "type" "sse")]) + + +;; MMX insns + +;; MMX arithmetic + +(define_insn "addv8qi3" + [(set (match_operand:V8QI 0 "register_operand" "=y") + (plus:V8QI (match_operand:V8QI 1 "register_operand" "0") + (match_operand:V8QI 2 "nonimmediate_operand" "ym")))] + "TARGET_MMX" + "paddb\\t{%2, %0|%0, %2}" + [(set_attr "type" "mmx")]) + +(define_insn "addv4hi3" + [(set (match_operand:V4HI 0 "register_operand" "=y") + (plus:V4HI (match_operand:V4HI 1 "register_operand" "0") + (match_operand:V4HI 2 "nonimmediate_operand" "ym")))] + "TARGET_MMX" + "paddw\\t{%2, %0|%0, %2}" + [(set_attr "type" "mmx")]) + +(define_insn "addv2si3" + [(set (match_operand:V2SI 0 "register_operand" "=y") + (plus:V2SI (match_operand:V2SI 1 "register_operand" "0") + (match_operand:V2SI 2 "nonimmediate_operand" "ym")))] + "TARGET_MMX" + "paddd\\t{%2, %0|%0, %2}" + [(set_attr "type" "mmx")]) + +(define_insn "ssaddv8qi3" + [(set (match_operand:V8QI 0 "register_operand" "=y") + (ss_plus:V8QI (match_operand:V8QI 1 "register_operand" "0") + (match_operand:V8QI 2 "nonimmediate_operand" "ym")))] + "TARGET_MMX" + "paddsb\\t{%2, %0|%0, %2}" + [(set_attr "type" "mmx")]) + +(define_insn "ssaddv4hi3" + [(set (match_operand:V4HI 0 "register_operand" "=y") + (ss_plus:V4HI (match_operand:V4HI 1 "register_operand" "0") + (match_operand:V4HI 2 "nonimmediate_operand" "ym")))] + "TARGET_MMX" + "paddsw\\t{%2, %0|%0, %2}" + [(set_attr "type" "mmx")]) + +(define_insn "usaddv8qi3" + [(set (match_operand:V8QI 0 "register_operand" "=y") + (us_plus:V8QI (match_operand:V8QI 1 "register_operand" "0") + (match_operand:V8QI 2 "nonimmediate_operand" "ym")))] + "TARGET_MMX" + "paddusb\\t{%2, %0|%0, %2}" + [(set_attr "type" "mmx")]) + +(define_insn "usaddv4hi3" + [(set (match_operand:V4HI 0 "register_operand" "=y") + (us_plus:V4HI (match_operand:V4HI 1 "register_operand" "0") + (match_operand:V4HI 2 "nonimmediate_operand" "ym")))] + "TARGET_MMX" + "paddusw\\t{%2, %0|%0, %2}" + [(set_attr "type" "mmx")]) + +(define_insn "subv8qi3" + [(set (match_operand:V8QI 0 "register_operand" "=y") + (minus:V8QI (match_operand:V8QI 1 "register_operand" "0") + (match_operand:V8QI 2 "nonimmediate_operand" "ym")))] + "TARGET_MMX" + "psubb\\t{%2, %0|%0, %2}" + [(set_attr "type" "mmx")]) + +(define_insn "subv4hi3" + [(set (match_operand:V4HI 0 "register_operand" "=y") + (minus:V4HI (match_operand:V4HI 1 "register_operand" "0") + (match_operand:V4HI 2 "nonimmediate_operand" "ym")))] + "TARGET_MMX" + "psubw\\t{%2, %0|%0, %2}" + [(set_attr "type" "mmx")]) + +(define_insn "subv2si3" + [(set (match_operand:V2SI 0 "register_operand" "=y") + (minus:V2SI (match_operand:V2SI 1 "register_operand" "0") + (match_operand:V2SI 2 "nonimmediate_operand" "ym")))] + "TARGET_MMX" + "psubd\\t{%2, %0|%0, %2}" + [(set_attr "type" "mmx")]) + +(define_insn "sssubv8qi3" + [(set (match_operand:V8QI 0 "register_operand" "=y") + (ss_minus:V8QI (match_operand:V8QI 1 "register_operand" "0") + (match_operand:V8QI 2 "nonimmediate_operand" "ym")))] + "TARGET_MMX" + "psubsb\\t{%2, %0|%0, %2}" + [(set_attr "type" "mmx")]) + +(define_insn "sssubv4hi3" + [(set (match_operand:V4HI 0 "register_operand" "=y") + (ss_minus:V4HI (match_operand:V4HI 1 "register_operand" "0") + (match_operand:V4HI 2 "nonimmediate_operand" "ym")))] + "TARGET_MMX" + "psubsw\\t{%2, %0|%0, %2}" + [(set_attr "type" "mmx")]) + +(define_insn "ussubv8qi3" + [(set (match_operand:V8QI 0 "register_operand" "=y") + (us_minus:V8QI (match_operand:V8QI 1 "register_operand" "0") + (match_operand:V8QI 2 "nonimmediate_operand" "ym")))] + "TARGET_MMX" + "psubusb\\t{%2, %0|%0, %2}" + [(set_attr "type" "mmx")]) + +(define_insn "ussubv4hi3" + [(set (match_operand:V4HI 0 "register_operand" "=y") + (us_minus:V4HI (match_operand:V4HI 1 "register_operand" "0") + (match_operand:V4HI 2 "nonimmediate_operand" "ym")))] + "TARGET_MMX" + "psubusw\\t{%2, %0|%0, %2}" + [(set_attr "type" "mmx")]) + +(define_insn "mulv4hi3" + [(set (match_operand:V4HI 0 "register_operand" "=y") + (mult:V4HI (match_operand:V4HI 1 "register_operand" "0") + (match_operand:V4HI 2 "nonimmediate_operand" "ym")))] + "TARGET_MMX" + "pmullw\\t{%2, %0|%0, %2}" + [(set_attr "type" "mmx")]) + +(define_insn "smulv4hi3_highpart" + [(set (match_operand:V4HI 0 "register_operand" "=y") + (truncate:V4HI + (lshiftrt:V4SI + (mult:V4SI (sign_extend:V4SI (match_operand:V4HI 1 "register_operand" "0")) + (sign_extend:V4SI (match_operand:V4HI 2 "nonimmediate_operand" "ym"))) + (const_int 16))))] + "TARGET_MMX" + "pmulhw\\t{%2, %0|%0, %2}" + [(set_attr "type" "mmx")]) + +(define_insn "umulv4hi3_highpart" + [(set (match_operand:V4HI 0 "register_operand" "=y") + (truncate:V4HI + (lshiftrt:V4SI + (mult:V4SI (zero_extend:V4SI (match_operand:V4HI 1 "register_operand" "0")) + (zero_extend:V4SI (match_operand:V4HI 2 "nonimmediate_operand" "ym"))) + (const_int 16))))] + "TARGET_MMX" + "pmulhuw\\t{%2, %0|%0, %2}" + [(set_attr "type" "mmx")]) + +(define_insn "mmx_pmaddwd" + [(set (match_operand:V2SI 0 "register_operand" "=y") + (plus:V2SI + (mult:V2SI + (sign_extend:V2SI (vec_select:V2HI (match_operand:V4HI 1 "register_operand" "0") + (parallel [(const_int 0) + (const_int 2)]))) + (sign_extend:V2SI (vec_select:V2HI (match_operand:V4HI 2 "nonimmediate_operand" "ym") + (parallel [(const_int 0) + (const_int 2)])))) + (mult:V2SI + (sign_extend:V2SI (vec_select:V2HI (match_dup 1) + (parallel [(const_int 1) + (const_int 3)]))) + (sign_extend:V2SI (vec_select:V2HI (match_dup 2) + (parallel [(const_int 1) + (const_int 3)]))))))] + "TARGET_MMX" + "pmaddwd\\t{%2, %0|%0, %2}" + [(set_attr "type" "mmx")]) + + +;; MMX logical operations +;; Note we don't want to declare these as regular iordi3 insns to prevent +;; normal code that also wants to use the FPU from getting broken. +;; The UNSPECs are there to prevent the combiner from getting overly clever. +(define_insn "mmx_iordi3" + [(set (match_operand:DI 0 "register_operand" "=y") + (unspec:DI + [(ior:DI (match_operand:DI 1 "register_operand" "0") + (match_operand:DI 2 "nonimmediate_operand" "ym"))] 45))] + "TARGET_MMX" + "por\\t{%2, %0|%0, %2}" + [(set_attr "type" "mmx")]) + +(define_insn "mmx_xordi3" + [(set (match_operand:DI 0 "register_operand" "=y") + (unspec:DI + [(xor:DI (match_operand:DI 1 "register_operand" "0") + (match_operand:DI 2 "nonimmediate_operand" "ym"))] 45))] + "TARGET_MMX" + "pxor\\t{%2, %0|%0, %2}" + [(set_attr "type" "mmx")]) + +;; Same as pxor, but don't show input operands so that we don't think +;; they are live. +(define_insn "mmx_clrdi" + [(set (match_operand:DI 0 "register_operand" "=y") + (unspec:DI [(const_int 0)] 45))] + "TARGET_MMX" + "pxor\\t{%0, %0|%0, %0}" + [(set_attr "type" "mmx")]) + +(define_insn "mmx_anddi3" + [(set (match_operand:DI 0 "register_operand" "=y") + (unspec:DI + [(and:DI (match_operand:DI 1 "register_operand" "0") + (match_operand:DI 2 "nonimmediate_operand" "ym"))] 45))] + "TARGET_MMX" + "pand\\t{%2, %0|%0, %2}" + [(set_attr "type" "mmx")]) + +(define_insn "mmx_nanddi3" + [(set (match_operand:DI 0 "register_operand" "=y") + (unspec:DI + [(and:DI (not:DI (match_operand:DI 1 "register_operand" "0")) + (match_operand:DI 2 "nonimmediate_operand" "ym"))] 45))] + "TARGET_MMX" + "pandn\\t{%2, %0|%0, %2}" + [(set_attr "type" "mmx")]) + + +;; MMX unsigned averages/sum of absolute differences + +(define_insn "mmx_uavgv8qi3" + [(set (match_operand:V8QI 0 "register_operand" "=y") + (ashiftrt:V8QI + (plus:V8QI (plus:V8QI + (match_operand:V8QI 1 "register_operand" "0") + (match_operand:V8QI 2 "nonimmediate_operand" "ym")) + (vec_const:V8QI (parallel [(const_int 1) + (const_int 1) + (const_int 1) + (const_int 1) + (const_int 1) + (const_int 1) + (const_int 1) + (const_int 1)]))) + (const_int 1)))] + "TARGET_SSE" + "pavgbn\\t{%2, %0|%0, %2}" + [(set_attr "type" "sse")]) + +(define_insn "mmx_uavgv4hi3" + [(set (match_operand:V4HI 0 "register_operand" "=y") + (ashiftrt:V4HI + (plus:V4HI (plus:V4HI + (match_operand:V4HI 1 "register_operand" "0") + (match_operand:V4HI 2 "nonimmediate_operand" "ym")) + (vec_const:V4HI (parallel [(const_int 1) + (const_int 1) + (const_int 1) + (const_int 1)]))) + (const_int 1)))] + "TARGET_SSE" + "pavgwn\\t{%2, %0|%0, %2}" + [(set_attr "type" "sse")]) + +(define_insn "mmx_psadbw" + [(set (match_operand:V8QI 0 "register_operand" "=y") + (abs:V8QI (minus:V8QI (match_operand:DI 1 "register_operand" "0") + (match_operand:DI 2 "nonimmediate_operand" "ym"))))] + "TARGET_SSE" + "padbw\\t{%2, %0|%0, %2}" + [(set_attr "type" "sse")]) + + +;; MMX insert/extract/shuffle + +(define_insn "mmx_pinsrw" + [(set (match_operand:V4HI 0 "register_operand" "=y") + (vec_merge:V4HI (match_operand:V4HI 1 "register_operand" "0") + (vec_duplicate:V4HI + (truncate:HI (match_operand:SI 2 "nonimmediate_operand" "rm"))) + (match_operand:SI 3 "immediate_operand" "i")))] + "TARGET_SSE" + ;; @@@ check operand order for intel/nonintel syntax. + "pinsrw\\t%3, {%2, %0|%0, %2}" + [(set_attr "type" "sse")]) + +(define_insn "mmx_pextrw" + [(set (match_operand:SI 0 "register_operand" "=r") + (zero_extend:SI (vec_select:HI (match_operand:V4HI 1 "register_operand" "y") + (parallel + [(match_operand:SI 2 "immediate_operand" "i")]))))] + "TARGET_SSE" + ;; @@@ check operand order for intel/nonintel syntax. + "pextrw\\t%2, {%1, %0|%0, %1}" + [(set_attr "type" "sse")]) + +(define_insn "mmx_pshufw" + [(set (match_operand:V4HI 0 "register_operand" "=y") + (unspec:V4HI [(match_operand:V4HI 1 "register_operand" "0") + (match_operand:V4HI 2 "nonimmediate_operand" "ym") + (match_operand:SI 3 "immediate_operand" "i")] 41))] + "TARGET_SSE" + ;; @@@ check operand order for intel/nonintel syntax + "pshufw\\t %3,{%2, %0|%0, %2}" + [(set_attr "type" "sse")]) + + +;; MMX mask-generating comparisons + +(define_insn "eqv8qi3" + [(set (match_operand:V8QI 0 "register_operand" "=y") + (eq:V8QI (match_operand:V8QI 1 "register_operand" "0") + (match_operand:V8QI 2 "nonimmediate_operand" "ym")))] + "TARGET_MMX" + "pcmpeqb\\t{%2, %0|%0, %2}" + [(set_attr "type" "mmx")]) + +(define_insn "eqv4hi3" + [(set (match_operand:V4HI 0 "register_operand" "=y") + (eq:V4HI (match_operand:V4HI 1 "register_operand" "0") + (match_operand:V4HI 2 "nonimmediate_operand" "ym")))] + "TARGET_MMX" + "pcmpeqw\\t{%2, %0|%0, %2}" + [(set_attr "type" "mmx")]) + +(define_insn "eqv2si3" + [(set (match_operand:V2SI 0 "register_operand" "=y") + (eq:V2SI (match_operand:V2SI 1 "register_operand" "0") + (match_operand:V2SI 2 "nonimmediate_operand" "ym")))] + "TARGET_MMX" + "pcmpeqd\\t{%2, %0|%0, %2}" + [(set_attr "type" "mmx")]) + +(define_insn "gtv8qi3" + [(set (match_operand:V8QI 0 "register_operand" "=y") + (gt:V8QI (match_operand:V8QI 1 "register_operand" "0") + (match_operand:V8QI 2 "nonimmediate_operand" "ym")))] + "TARGET_MMX" + "pcmpgtb\\t{%2, %0|%0, %2}" + [(set_attr "type" "mmx")]) + +(define_insn "gtv4hi3" + [(set (match_operand:V4HI 0 "register_operand" "=y") + (gt:V4HI (match_operand:V4HI 1 "register_operand" "0") + (match_operand:V4HI 2 "nonimmediate_operand" "ym")))] + "TARGET_MMX" + "pcmpgtw\\t{%2, %0|%0, %2}" + [(set_attr "type" "mmx")]) + +(define_insn "gtv2si3" + [(set (match_operand:V2SI 0 "register_operand" "=y") + (gt:V2SI (match_operand:V2SI 1 "register_operand" "0") + (match_operand:V2SI 2 "nonimmediate_operand" "ym")))] + "TARGET_MMX" + "pcmpgtd\\t{%2, %0|%0, %2}" + [(set_attr "type" "mmx")]) + + +;; MMX max/min insns + +(define_insn "umaxv8qi3" + [(set (match_operand:V8QI 0 "register_operand" "=y") + (umax:V8QI (match_operand:V8QI 1 "register_operand" "0") + (match_operand:V8QI 2 "nonimmediate_operand" "ym")))] + "TARGET_SSE" + "pmaxub\\t{%2, %0|%0, %2}" + [(set_attr "type" "sse")]) + +(define_insn "smaxv4hi3" + [(set (match_operand:V4HI 0 "register_operand" "=y") + (smax:V4HI (match_operand:V4HI 1 "register_operand" "0") + (match_operand:V4HI 2 "nonimmediate_operand" "ym")))] + "TARGET_SSE" + "pmaxsw\\t{%2, %0|%0, %2}" + [(set_attr "type" "sse")]) + +(define_insn "uminv8qi3" + [(set (match_operand:V8QI 0 "register_operand" "=y") + (umin:V8QI (match_operand:V8QI 1 "register_operand" "0") + (match_operand:V8QI 2 "nonimmediate_operand" "ym")))] + "TARGET_SSE" + "pminub\\t{%2, %0|%0, %2}" + [(set_attr "type" "sse")]) + +(define_insn "sminv4hi3" + [(set (match_operand:V4HI 0 "register_operand" "=y") + (smin:V4HI (match_operand:V4HI 1 "register_operand" "0") + (match_operand:V4HI 2 "nonimmediate_operand" "ym")))] + "TARGET_SSE" + "pminsw\\t{%2, %0|%0, %2}" + [(set_attr "type" "sse")]) + + +;; MMX shifts + +(define_insn "ashrv4hi3" + [(set (match_operand:V4HI 0 "register_operand" "=y") + (ashiftrt:V4HI (match_operand:V4HI 1 "register_operand" "0") + (match_operand:DI 2 "nonmemory_operand" "yi")))] + "TARGET_MMX" + "psraw\\t{%2, %0|%0, %2}" + [(set_attr "type" "mmx")]) + +(define_insn "ashrv2si3" + [(set (match_operand:V2SI 0 "register_operand" "=y") + (ashiftrt:V2SI (match_operand:V2SI 1 "register_operand" "0") + (match_operand:DI 2 "nonmemory_operand" "yi")))] + "TARGET_MMX" + "psrad\\t{%2, %0|%0, %2}" + [(set_attr "type" "mmx")]) + +(define_insn "lshrv4hi3" + [(set (match_operand:V4HI 0 "register_operand" "=y") + (lshiftrt:V4HI (match_operand:V4HI 1 "register_operand" "0") + (match_operand:DI 2 "nonmemory_operand" "yi")))] + "TARGET_MMX" + "psrlw\\t{%2, %0|%0, %2}" + [(set_attr "type" "mmx")]) + +(define_insn "lshrv2si3" + [(set (match_operand:V2SI 0 "register_operand" "=y") + (lshiftrt:V2SI (match_operand:V2SI 1 "register_operand" "0") + (match_operand:DI 2 "nonmemory_operand" "yi")))] + "TARGET_MMX" + "psrld\\t{%2, %0|%0, %2}" + [(set_attr "type" "mmx")]) + +;; See logical MMX insns. +(define_insn "mmx_lshrdi3" + [(set (match_operand:DI 0 "register_operand" "=y") + (lshiftrt:DI (match_operand:DI 1 "register_operand" "0") + (match_operand:DI 2 "nonmemory_operand" "yi")))] + "TARGET_MMX" + "psrlq\\t{%2, %0|%0, %2}" + [(set_attr "type" "mmx")]) + +(define_insn "ashlv4hi3" + [(set (match_operand:V4HI 0 "register_operand" "=y") + (ashift:V4HI (match_operand:V4HI 1 "register_operand" "0") + (match_operand:DI 2 "nonmemory_operand" "yi")))] + "TARGET_MMX" + "psllw\\t{%2, %0|%0, %2}" + [(set_attr "type" "mmx")]) + +(define_insn "ashlv2si3" + [(set (match_operand:V2SI 0 "register_operand" "=y") + (ashift:V2SI (match_operand:V2SI 1 "register_operand" "0") + (match_operand:DI 2 "nonmemory_operand" "yi")))] + "TARGET_MMX" + "pslld\\t{%2, %0|%0, %2}" + [(set_attr "type" "mmx")]) + +;; See logical MMX insns. +(define_insn "mmx_ashldi3" + [(set (match_operand:DI 0 "register_operand" "=y") + (ashift:DI (match_operand:DI 1 "register_operand" "0") + (match_operand:DI 2 "nonmemory_operand" "yi")))] + "TARGET_MMX" + "psllq\\t{%2, %0|%0, %2}" + [(set_attr "type" "mmx")]) + + +;; MMX pack/unpack insns. + +(define_insn "mmx_packsswb" + [(set (match_operand:V8QI 0 "register_operand" "=y") + (vec_concat:V8QI + (ss_truncate:V4QI (match_operand:V4HI 1 "register_operand" "0")) + (ss_truncate:V4QI (match_operand:V4HI 2 "register_operand" "y"))))] + "TARGET_MMX" + "packsswb\\t{%2, %0|%0, %2}" + [(set_attr "type" "mmx")]) + +(define_insn "mmx_packssdw" + [(set (match_operand:V4HI 0 "register_operand" "=y") + (vec_concat:V4HI + (ss_truncate:V2HI (match_operand:V2SI 1 "register_operand" "0")) + (ss_truncate:V2HI (match_operand:V2SI 2 "register_operand" "y"))))] + "TARGET_MMX" + "packssdw\\t{%2, %0|%0, %2}" + [(set_attr "type" "mmx")]) + +(define_insn "mmx_packuswb" + [(set (match_operand:V8QI 0 "register_operand" "=y") + (vec_concat:V8QI + (us_truncate:V4QI (match_operand:V4HI 1 "register_operand" "0")) + (us_truncate:V4QI (match_operand:V4HI 2 "register_operand" "y"))))] + "TARGET_MMX" + "packuswb\\t{%2, %0|%0, %2}" + [(set_attr "type" "mmx")]) + +(define_insn "mmx_punpckhbw" + [(set (match_operand:V8QI 0 "register_operand" "=y") + (vec_merge:V8QI + (vec_select:V8QI (match_operand:V8QI 1 "register_operand" "0") + (parallel [(const_int 4) + (const_int 0) + (const_int 5) + (const_int 1) + (const_int 6) + (const_int 2) + (const_int 7) + (const_int 3)])) + (vec_select:V8QI (match_operand:V8QI 2 "register_operand" "y") + (parallel [(const_int 0) + (const_int 4) + (const_int 1) + (const_int 5) + (const_int 2) + (const_int 6) + (const_int 3) + (const_int 7)])) + (const_int 85)))] + "TARGET_MMX" + "punpckhbw\\t{%2, %0|%0, %2}" + [(set_attr "type" "mmx")]) + +(define_insn "mmx_punpckhwd" + [(set (match_operand:V4HI 0 "register_operand" "=y") + (vec_merge:V4HI + (vec_select:V4HI (match_operand:V4HI 1 "register_operand" "0") + (parallel [(const_int 0) + (const_int 2) + (const_int 1) + (const_int 3)])) + (vec_select:V4HI (match_operand:V4HI 2 "register_operand" "y") + (parallel [(const_int 2) + (const_int 0) + (const_int 3) + (const_int 1)])) + (const_int 5)))] + "TARGET_MMX" + "punpckhbw\\t{%2, %0|%0, %2}" + [(set_attr "type" "mmx")]) + +(define_insn "mmx_punpckhdq" + [(set (match_operand:V2SI 0 "register_operand" "=y") + (vec_merge:V2SI + (vec_select:V2SI (match_operand:V2SI 1 "register_operand" "0") + (parallel [(const_int 0) + (const_int 1)])) + (vec_select:V2SI (match_operand:V2SI 2 "register_operand" "y") + (parallel [(const_int 1) + (const_int 0)])) + (const_int 1)))] + "TARGET_MMX" + "punpckhbw\\t{%2, %0|%0, %2}" + [(set_attr "type" "mmx")]) + +(define_insn "mmx_punpcklbw" + [(set (match_operand:V8QI 0 "register_operand" "=y") + (vec_merge:V8QI + (vec_select:V8QI (match_operand:V8QI 1 "register_operand" "0") + (parallel [(const_int 0) + (const_int 4) + (const_int 1) + (const_int 5) + (const_int 2) + (const_int 6) + (const_int 3) + (const_int 7)])) + (vec_select:V8QI (match_operand:V8QI 2 "register_operand" "y") + (parallel [(const_int 4) + (const_int 0) + (const_int 5) + (const_int 1) + (const_int 6) + (const_int 2) + (const_int 7) + (const_int 3)])) + (const_int 85)))] + "TARGET_MMX" + "punpcklbw\\t{%2, %0|%0, %2}" + [(set_attr "type" "mmx")]) + +(define_insn "mmx_punpcklwd" + [(set (match_operand:V4HI 0 "register_operand" "=y") + (vec_merge:V4HI + (vec_select:V4HI (match_operand:V4HI 1 "register_operand" "0") + (parallel [(const_int 2) + (const_int 0) + (const_int 3) + (const_int 1)])) + (vec_select:V4HI (match_operand:V4HI 2 "register_operand" "y") + (parallel [(const_int 0) + (const_int 2) + (const_int 1) + (const_int 3)])) + (const_int 5)))] + "TARGET_MMX" + "punpcklbw\\t{%2, %0|%0, %2}" + [(set_attr "type" "mmx")]) + +(define_insn "mmx_punpckldq" + [(set (match_operand:V2SI 0 "register_operand" "=y") + (vec_merge:V2SI + (vec_select:V2SI (match_operand:V2SI 1 "register_operand" "0") + (parallel [(const_int 1) + (const_int 0)])) + (vec_select:V2SI (match_operand:V2SI 2 "register_operand" "y") + (parallel [(const_int 0) + (const_int 1)])) + (const_int 1)))] + "TARGET_MMX" + "punpcklbw\\t{%2, %0|%0, %2}" + [(set_attr "type" "mmx")]) + + +;; Miscellaneous stuff + +(define_insn "emms" + [(unspec_volatile [(const_int 0)] 31) + (clobber (reg:XF 8)) + (clobber (reg:XF 9)) + (clobber (reg:XF 10)) + (clobber (reg:XF 11)) + (clobber (reg:XF 12)) + (clobber (reg:XF 13)) + (clobber (reg:XF 14)) + (clobber (reg:XF 15)) + (clobber (reg:DI 27)) + (clobber (reg:DI 28)) + (clobber (reg:DI 29)) + (clobber (reg:DI 30)) + (clobber (reg:DI 31)) + (clobber (reg:DI 32)) + (clobber (reg:DI 33)) + (clobber (reg:DI 34))] + "TARGET_MMX" + "emms" + [(set_attr "type" "mmx")]) + +(define_insn "ldmxcsr" + [(unspec_volatile [(match_operand:SI 0 "memory_operand" "m")] 37)] + "TARGET_MMX" + "ldmxcsr\\t%0" + [(set_attr "type" "mmx")]) + +(define_insn "stmxcsr" + [(set (match_operand:SI 0 "memory_operand" "=m") + (unspec_volatile:SI [(const_int 0)] 40))] + "TARGET_MMX" + "stmxcsr\\t%0" + [(set_attr "type" "mmx")]) + +(define_expand "sfence" + [(set (match_dup 0) + (unspec:BLK [(match_dup 0)] 44))] + "TARGET_SSE" + " +{ + operands[0] = gen_rtx_MEM (BLKmode, gen_rtx_SCRATCH (Pmode)); + MEM_VOLATILE_P (operands[0]) = 1; +}") + +(define_insn "*sfence_insn" + [(set (match_operand:BLK 0 "" "") + (unspec:BLK [(match_dup 0)] 44))] + "TARGET_SSE" + "sfence" + [(set_attr "type" "sse")]) + +(define_insn "prefetch" + [(unspec [(match_operand:SI 0 "address_operand" "p") + (match_operand:SI 1 "address_operand" "p")] 35)] + "TARGET_SSE" + "* +{ + switch (INTVAL (operands[1])) + { + case 0: + return \"prefetcht0\\t%0\"; + case 1: + return \"prefetcht1\\t%0\"; + case 2: + return \"prefetcht2\\t%0\"; + case 3: + return \"prefetchnta\\t%0\"; + default: + abort (); + } +}" + [(set_attr "type" "sse")]) + |