diff options
Diffstat (limited to 'gcc/config/i386/i386.md')
-rw-r--r-- | gcc/config/i386/i386.md | 735 |
1 files changed, 503 insertions, 232 deletions
diff --git a/gcc/config/i386/i386.md b/gcc/config/i386/i386.md index befbfe49569..1fa29985fb9 100644 --- a/gcc/config/i386/i386.md +++ b/gcc/config/i386/i386.md @@ -1,5 +1,6 @@ ;; GCC machine description for IA-32 and x86-64. -;; Copyright (C) 1988, 1994, 1995, 1996, 1997, 1998, 1999, 2000, 2001, 2002 +;; Copyright (C) 1988, 1994, 1995, 1996, 1997, 1998, 1999, 2000, +;; 2001, 2002, 2003 ;; Free Software Foundation, Inc. ;; Mostly by William Schelter. ;; x86_64 support added by Jan Hubicka @@ -267,6 +268,8 @@ (define_attr "length" "" (cond [(eq_attr "type" "other,multi,fistp") (const_int 16) + (eq_attr "type" "fcmp") + (const_int 4) (eq_attr "unit" "i387") (plus (const_int 2) (plus (attr "prefix_data16") @@ -1099,25 +1102,20 @@ (set_attr "mode" "SI") (set_attr "length_immediate" "1")]) -; The first alternative is used only to compute proper length of instruction. -; Reload's algorithm does not take into account the cost of spill instructions -; needed to free register in given class, so avoid it from choosing the first -; alternative when eax is not available. - (define_insn "*movsi_1" - [(set (match_operand:SI 0 "nonimmediate_operand" "=*?a,r,*?a,m,!*y,!rm,!*y,!*Y,!rm,!*Y") - (match_operand:SI 1 "general_operand" "im,rinm,rinm,rin,rm,*y,*y,rm,*Y,*Y"))] + [(set (match_operand:SI 0 "nonimmediate_operand" "=r,m,!*y,!rm,!*y,!*Y,!*Y,!rm") + (match_operand:SI 1 "general_operand" "rinm,rin,rm,*y,*y,*Y,rm,*Y"))] "GET_CODE (operands[0]) != MEM || GET_CODE (operands[1]) != MEM" { switch (get_attr_type (insn)) { case TYPE_SSEMOV: - if (get_attr_mode (insn) == TImode) + if (get_attr_mode (insn) == MODE_TI) return "movdqa\t{%1, %0|%0, %1}"; return "movd\t{%1, %0|%0, %1}"; case TYPE_MMXMOV: - if (get_attr_mode (insn) == DImode) + if (get_attr_mode (insn) == MODE_DI) return "movq\t{%1, %0|%0, %1}"; return "movd\t{%1, %0|%0, %1}"; @@ -1131,40 +1129,38 @@ } } [(set (attr "type") - (cond [(eq_attr "alternative" "4,5,6") + (cond [(eq_attr "alternative" "2,3,4") (const_string "mmxmov") - (eq_attr "alternative" "7,8,9") + (eq_attr "alternative" "5,6,7") (const_string "ssemov") (and (ne (symbol_ref "flag_pic") (const_int 0)) (match_operand:SI 1 "symbolic_operand" "")) (const_string "lea") ] (const_string "imov"))) - (set_attr "modrm" "0,*,0,*,*,*,*,*,*,*") - (set_attr "mode" "SI,SI,SI,SI,SI,SI,DI,TI,SI,SI")]) + (set_attr "mode" "SI,SI,SI,SI,DI,TI,SI,SI")]) ;; Stores and loads of ax to arbitary constant address. ;; We fake an second form of instruction to force reload to load address ;; into register when rax is not available (define_insn "*movabssi_1_rex64" - [(set (mem:SI (match_operand:DI 0 "x86_64_movabs_operand" "i,r,r")) - (match_operand:SI 1 "nonmemory_operand" "a,er,i"))] - "TARGET_64BIT" + [(set (mem:SI (match_operand:DI 0 "x86_64_movabs_operand" "i,r")) + (match_operand:SI 1 "nonmemory_operand" "a,er"))] + "TARGET_64BIT && ix86_check_movabs (insn, 0)" "@ movabs{l}\t{%1, %P0|%P0, %1} - mov{l}\t{%1, %a0|%a0, %1} - movabs{l}\t{%1, %a0|%a0, %1}" + mov{l}\t{%1, %a0|%a0, %1}" [(set_attr "type" "imov") - (set_attr "modrm" "0,*,*") - (set_attr "length_address" "8,0,0") - (set_attr "length_immediate" "0,*,*") + (set_attr "modrm" "0,*") + (set_attr "length_address" "8,0") + (set_attr "length_immediate" "0,*") (set_attr "memory" "store") (set_attr "mode" "SI")]) (define_insn "*movabssi_2_rex64" [(set (match_operand:SI 0 "register_operand" "=a,r") (mem:SI (match_operand:DI 1 "x86_64_movabs_operand" "i,r")))] - "TARGET_64BIT" + "TARGET_64BIT && ix86_check_movabs (insn, 1)" "@ movabs{l}\t{%P1, %0|%0, %P1} mov{l}\t{%a1, %0|%0, %a1}" @@ -1214,14 +1210,9 @@ [(set_attr "type" "push") (set_attr "mode" "QI")]) -; The first alternative is used only to compute proper length of instruction. -; Reload's algorithm does not take into account the cost of spill instructions -; needed to free register in given class, so avoid it from choosing the first -; alternative when eax is not available. - (define_insn "*movhi_1" - [(set (match_operand:HI 0 "nonimmediate_operand" "=*?a,r,r,*?a,r,m") - (match_operand:HI 1 "general_operand" "i,r,rn,rm,rm,rn"))] + [(set (match_operand:HI 0 "nonimmediate_operand" "=r,r,r,m") + (match_operand:HI 1 "general_operand" "r,rn,rm,rn"))] "GET_CODE (operands[0]) != MEM || GET_CODE (operands[1]) != MEM" { switch (get_attr_type (insn)) @@ -1238,59 +1229,57 @@ } } [(set (attr "type") - (cond [(and (eq_attr "alternative" "0,1") + (cond [(and (eq_attr "alternative" "0") (ior (eq (symbol_ref "TARGET_PARTIAL_REG_STALL") (const_int 0)) (eq (symbol_ref "TARGET_HIMODE_MATH") (const_int 0)))) (const_string "imov") - (and (eq_attr "alternative" "2,3,4") + (and (eq_attr "alternative" "1,2") (match_operand:HI 1 "aligned_operand" "")) (const_string "imov") (and (ne (symbol_ref "TARGET_MOVX") (const_int 0)) - (eq_attr "alternative" "0,1,3,4")) + (eq_attr "alternative" "0,2")) (const_string "imovx") ] (const_string "imov"))) (set (attr "mode") (cond [(eq_attr "type" "imovx") (const_string "SI") - (and (eq_attr "alternative" "2,3,4") + (and (eq_attr "alternative" "1,2") (match_operand:HI 1 "aligned_operand" "")) (const_string "SI") - (and (eq_attr "alternative" "0,1") + (and (eq_attr "alternative" "0") (ior (eq (symbol_ref "TARGET_PARTIAL_REG_STALL") (const_int 0)) (eq (symbol_ref "TARGET_HIMODE_MATH") (const_int 0)))) (const_string "SI") ] - (const_string "HI"))) - (set_attr "modrm" "0,*,*,0,*,*")]) + (const_string "HI")))]) ;; Stores and loads of ax to arbitary constant address. ;; We fake an second form of instruction to force reload to load address ;; into register when rax is not available (define_insn "*movabshi_1_rex64" - [(set (mem:HI (match_operand:DI 0 "x86_64_movabs_operand" "i,r,r")) - (match_operand:HI 1 "nonmemory_operand" "a,er,i"))] - "TARGET_64BIT" + [(set (mem:HI (match_operand:DI 0 "x86_64_movabs_operand" "i,r")) + (match_operand:HI 1 "nonmemory_operand" "a,er"))] + "TARGET_64BIT && ix86_check_movabs (insn, 0)" "@ movabs{w}\t{%1, %P0|%P0, %1} - mov{w}\t{%1, %a0|%a0, %1} - movabs{w}\t{%1, %a0|%a0, %1}" + mov{w}\t{%1, %a0|%a0, %1}" [(set_attr "type" "imov") - (set_attr "modrm" "0,*,*") - (set_attr "length_address" "8,0,0") - (set_attr "length_immediate" "0,*,*") + (set_attr "modrm" "0,*") + (set_attr "length_address" "8,0") + (set_attr "length_immediate" "0,*") (set_attr "memory" "store") (set_attr "mode" "HI")]) (define_insn "*movabshi_2_rex64" [(set (match_operand:HI 0 "register_operand" "=a,r") (mem:HI (match_operand:DI 1 "x86_64_movabs_operand" "i,r")))] - "TARGET_64BIT" + "TARGET_64BIT && ix86_check_movabs (insn, 1)" "@ movabs{w}\t{%P1, %0|%0, %P1} mov{w}\t{%a1, %0|%0, %a1}" @@ -1488,7 +1477,7 @@ (define_expand "movstrictqi" [(set (strict_low_part (match_operand:QI 0 "nonimmediate_operand" "")) (match_operand:QI 1 "general_operand" ""))] - "! TARGET_PARTIAL_REG_STALL" + "! TARGET_PARTIAL_REG_STALL || optimize_size" { /* Don't generate memory->memory moves, go through a register. */ if (GET_CODE (operands[0]) == MEM && GET_CODE (operands[1]) == MEM) @@ -1498,7 +1487,7 @@ (define_insn "*movstrictqi_1" [(set (strict_low_part (match_operand:QI 0 "nonimmediate_operand" "+qm,q")) (match_operand:QI 1 "general_operand" "*qn,m"))] - "! TARGET_PARTIAL_REG_STALL + "(! TARGET_PARTIAL_REG_STALL || optimize_size) && (GET_CODE (operands[0]) != MEM || GET_CODE (operands[1]) != MEM)" "mov{b}\t{%1, %0|%0, %1}" [(set_attr "type" "imov") @@ -1592,24 +1581,23 @@ ;; We fake an second form of instruction to force reload to load address ;; into register when rax is not available (define_insn "*movabsqi_1_rex64" - [(set (mem:QI (match_operand:DI 0 "x86_64_movabs_operand" "i,r,r")) - (match_operand:QI 1 "nonmemory_operand" "a,er,i"))] - "TARGET_64BIT" + [(set (mem:QI (match_operand:DI 0 "x86_64_movabs_operand" "i,r")) + (match_operand:QI 1 "nonmemory_operand" "a,er"))] + "TARGET_64BIT && ix86_check_movabs (insn, 0)" "@ movabs{b}\t{%1, %P0|%P0, %1} - mov{b}\t{%1, %a0|%a0, %1} - movabs{b}\t{%1, %a0|%a0, %1}" + mov{b}\t{%1, %a0|%a0, %1}" [(set_attr "type" "imov") - (set_attr "modrm" "0,*,*") - (set_attr "length_address" "8,0,0") - (set_attr "length_immediate" "0,*,*") + (set_attr "modrm" "0,*") + (set_attr "length_address" "8,0") + (set_attr "length_immediate" "0,*") (set_attr "memory" "store") (set_attr "mode" "QI")]) (define_insn "*movabsqi_2_rex64" [(set (match_operand:QI 0 "register_operand" "=a,r") (mem:QI (match_operand:DI 1 "x86_64_movabs_operand" "i,r")))] - "TARGET_64BIT" + "TARGET_64BIT && ix86_check_movabs (insn, 1)" "@ movabs{b}\t{%P1, %0|%0, %P1} mov{b}\t{%a1, %0|%0, %a1}" @@ -1904,7 +1892,7 @@ [(set (attr "type") (cond [(eq_attr "alternative" "5,6") (const_string "mmxmov") - (eq_attr "alternative" "7,8") + (eq_attr "alternative" "7,8,9") (const_string "ssemov") (eq_attr "alternative" "4") (const_string "multi") @@ -1921,24 +1909,23 @@ ;; We fake an second form of instruction to force reload to load address ;; into register when rax is not available (define_insn "*movabsdi_1_rex64" - [(set (mem:DI (match_operand:DI 0 "x86_64_movabs_operand" "i,r,r")) - (match_operand:DI 1 "nonmemory_operand" "a,er,i"))] - "TARGET_64BIT" + [(set (mem:DI (match_operand:DI 0 "x86_64_movabs_operand" "i,r")) + (match_operand:DI 1 "nonmemory_operand" "a,er"))] + "TARGET_64BIT && ix86_check_movabs (insn, 0)" "@ movabs{q}\t{%1, %P0|%P0, %1} - mov{q}\t{%1, %a0|%a0, %1} - movabs{q}\t{%1, %a0|%a0, %1}" + mov{q}\t{%1, %a0|%a0, %1}" [(set_attr "type" "imov") - (set_attr "modrm" "0,*,*") - (set_attr "length_address" "8,0,0") - (set_attr "length_immediate" "0,*,*") + (set_attr "modrm" "0,*") + (set_attr "length_address" "8,0") + (set_attr "length_immediate" "0,*") (set_attr "memory" "store") (set_attr "mode" "DI")]) (define_insn "*movabsdi_2_rex64" [(set (match_operand:DI 0 "register_operand" "=a,r") (mem:DI (match_operand:DI 1 "x86_64_movabs_operand" "i,r")))] - "TARGET_64BIT" + "TARGET_64BIT && ix86_check_movabs (insn, 1)" "@ movabs{q}\t{%P1, %0|%0, %P1} mov{q}\t{%a1, %0|%0, %a1}" @@ -12839,9 +12826,9 @@ (set_attr "modrm" "0") (set (attr "length") (if_then_else (and (ge (minus (match_dup 0) (pc)) - (const_int -128)) + (const_int -126)) (lt (minus (match_dup 0) (pc)) - (const_int 124))) + (const_int 128))) (const_int 2) (const_int 6)))]) @@ -12857,9 +12844,9 @@ (set_attr "modrm" "0") (set (attr "length") (if_then_else (and (ge (minus (match_dup 0) (pc)) - (const_int -128)) + (const_int -126)) (lt (minus (match_dup 0) (pc)) - (const_int 124))) + (const_int 128))) (const_int 2) (const_int 6)))]) @@ -13124,9 +13111,9 @@ [(set_attr "type" "ibr") (set (attr "length") (if_then_else (and (ge (minus (match_dup 0) (pc)) - (const_int -128)) + (const_int -126)) (lt (minus (match_dup 0) (pc)) - (const_int 124))) + (const_int 128))) (const_int 2) (const_int 5))) (set_attr "modrm" "0")]) @@ -13250,9 +13237,9 @@ (set (attr "length") (if_then_else (and (eq_attr "alternative" "0") (and (ge (minus (match_dup 0) (pc)) - (const_int -128)) + (const_int -126)) (lt (minus (match_dup 0) (pc)) - (const_int 124)))) + (const_int 128)))) (const_int 2) (const_int 16))) ;; We don't know the type before shorten branches. Optimistically expect @@ -13616,11 +13603,10 @@ "ix86_expand_epilogue (0); DONE;") (define_expand "eh_return" - [(use (match_operand 0 "register_operand" "")) - (use (match_operand 1 "register_operand" ""))] + [(use (match_operand 0 "register_operand" ""))] "" { - rtx tmp, sa = operands[0], ra = operands[1]; + rtx tmp, sa = EH_RETURN_STACKADJ_RTX, ra = operands[0]; /* Tricky bit: we write the address of the handler to which we will be returning into someone else's stack frame, one word below the @@ -13682,7 +13668,7 @@ (define_expand "ffssi2" [(set (match_operand:SI 0 "nonimmediate_operand" "") - (ffs:SI (match_operand:SI 1 "general_operand" "")))] + (ffs:SI (match_operand:SI 1 "nonimmediate_operand" "")))] "" { rtx out = gen_reg_rtx (SImode), tmp = gen_reg_rtx (SImode); @@ -14823,7 +14809,7 @@ (define_insn "cosxf2" [(set (match_operand:XF 0 "register_operand" "=f") (unspec:XF [(match_operand:XF 1 "register_operand" "0")] UNSPEC_COS))] - "! TARGET_NO_FANCY_MATH_387 && TARGET_80387 + "!TARGET_64BIT && ! TARGET_NO_FANCY_MATH_387 && TARGET_80387 && flag_unsafe_math_optimizations" "fcos" [(set_attr "type" "fpspc") @@ -16734,7 +16720,7 @@ (define_split [(set (match_operand 0 "register_operand" "") (if_then_else (match_operator 1 "comparison_operator" - [(match_operand 4 "register_operand" "") + [(match_operand 4 "nonimmediate_operand" "") (match_operand 5 "nonimmediate_operand" "")]) (match_operand 2 "nonmemory_operand" "") (match_operand 3 "nonmemory_operand" "")))] @@ -16746,13 +16732,16 @@ (subreg:TI (match_dup 7) 0)))] { PUT_MODE (operands[1], GET_MODE (operands[0])); - if (!sse_comparison_operator (operands[1], VOIDmode)) + if (!sse_comparison_operator (operands[1], VOIDmode) + || !rtx_equal_p (operands[0], operands[4])) { rtx tmp = operands[5]; operands[5] = operands[4]; operands[4] = tmp; PUT_CODE (operands[1], swap_condition (GET_CODE (operands[1]))); } + if (!rtx_equal_p (operands[0], operands[4])) + abort (); if (const0_operand (operands[2], GET_MODE (operands[0]))) { operands[7] = operands[3]; @@ -16853,6 +16842,10 @@ operands[2] = gen_lowpart (SImode, operands[2]); PUT_MODE (operands[3], SImode);") +; Promote the QImode tests, as i386 has encoding of the AND +; instruction with 32-bit sign-extended immediate and thus the +; instruction size is unchanged, except in the %eax case for +; which it is increased by one byte, hence the ! optimize_size. (define_split [(set (reg 17) (compare (and (match_operand 1 "aligned_operand" "") @@ -16861,12 +16854,11 @@ (set (match_operand 0 "register_operand" "") (and (match_dup 1) (match_dup 2)))] "! TARGET_PARTIAL_REG_STALL && reload_completed - && ix86_match_ccmode (insn, CCNOmode) - && (GET_MODE (operands[0]) == HImode - || (GET_MODE (operands[0]) == QImode - /* Ensure that the operand will remain sign extended immediate. */ - && INTVAL (operands[2]) >= 0 - && (TARGET_PROMOTE_QImode || optimize_size)))" + /* Ensure that the operand will remain sign-extended immediate. */ + && ix86_match_ccmode (insn, INTVAL (operands[2]) >= 0 ? CCNOmode : CCZmode) + && ! optimize_size + && ((GET_MODE (operands[0]) == HImode && ! TARGET_FAST_PREFIX) + || (GET_MODE (operands[0]) == QImode && TARGET_PROMOTE_QImode))" [(parallel [(set (reg:CCNO 17) (compare:CCNO (and:SI (match_dup 1) (match_dup 2)) (const_int 0))) @@ -16879,17 +16871,20 @@ operands[0] = gen_lowpart (SImode, operands[0]); operands[1] = gen_lowpart (SImode, operands[1]);") -; Don't promote the QImode tests, as i386 don't have encoding of -; the test instruction with 32bit sign extended immediate and thus -; the code grows. +; Don't promote the QImode tests, as i386 doesn't have encoding of +; the TEST instruction with 32-bit sign-extended immediate and thus +; the instruction size would at least double, which is not what we +; want even with ! optimize_size. (define_split [(set (reg 17) (compare (and (match_operand:HI 0 "aligned_operand" "") (match_operand:HI 1 "const_int_operand" "")) (const_int 0)))] "! TARGET_PARTIAL_REG_STALL && reload_completed - && ix86_match_ccmode (insn, CCNOmode) - && GET_MODE (operands[0]) == HImode" + /* Ensure that the operand will remain sign-extended immediate. */ + && ix86_match_ccmode (insn, INTVAL (operands[1]) >= 0 ? CCNOmode : CCZmode) + && ! TARGET_FAST_PREFIX + && ! optimize_size" [(set (reg:CCNO 17) (compare:CCNO (and:SI (match_dup 0) (match_dup 1)) (const_int 0)))] @@ -17848,67 +17843,92 @@ ;; Moves for SSE/MMX regs. (define_insn "movv4sf_internal" - [(set (match_operand:V4SF 0 "nonimmediate_operand" "=x,m") - (match_operand:V4SF 1 "nonimmediate_operand" "xm,x"))] + [(set (match_operand:V4SF 0 "nonimmediate_operand" "=x,x,m") + (match_operand:V4SF 1 "vector_move_operand" "C,xm,x"))] "TARGET_SSE" ;; @@@ let's try to use movaps here. - "movaps\t{%1, %0|%0, %1}" + "@ + xorps\t%0, %0 + movaps\t{%1, %0|%0, %1} + movaps\t{%1, %0|%0, %1}" [(set_attr "type" "ssemov") (set_attr "mode" "V4SF")]) (define_insn "movv4si_internal" - [(set (match_operand:V4SI 0 "nonimmediate_operand" "=x,m") - (match_operand:V4SI 1 "nonimmediate_operand" "xm,x"))] + [(set (match_operand:V4SI 0 "nonimmediate_operand" "=x,x,m") + (match_operand:V4SI 1 "vector_move_operand" "C,xm,x"))] "TARGET_SSE" ;; @@@ let's try to use movaps here. - "movaps\t{%1, %0|%0, %1}" + "@ + xorps\t%0, %0 + movaps\t{%1, %0|%0, %1} + movaps\t{%1, %0|%0, %1}" [(set_attr "type" "ssemov") (set_attr "mode" "V4SF")]) (define_insn "movv2di_internal" - [(set (match_operand:V2DI 0 "nonimmediate_operand" "=x,m") - (match_operand:V2DI 1 "nonimmediate_operand" "xm,x"))] + [(set (match_operand:V2DI 0 "nonimmediate_operand" "=x,x,m") + (match_operand:V2DI 1 "vector_move_operand" "C,xm,x"))] "TARGET_SSE" ;; @@@ let's try to use movaps here. - "movdqa\t{%1, %0|%0, %1}" + "@ + pxor\t%0, %0 + movdqa\t{%1, %0|%0, %1} + movdqa\t{%1, %0|%0, %1}" [(set_attr "type" "ssemov") (set_attr "mode" "V4SF")]) (define_insn "movv8qi_internal" - [(set (match_operand:V8QI 0 "nonimmediate_operand" "=y,m") - (match_operand:V8QI 1 "nonimmediate_operand" "ym,y"))] - "TARGET_MMX" - "movq\t{%1, %0|%0, %1}" + [(set (match_operand:V8QI 0 "nonimmediate_operand" "=y,y,m") + (match_operand:V8QI 1 "vector_move_operand" "C,ym,y"))] + "TARGET_MMX + && (GET_CODE (operands[0]) != MEM || GET_CODE (operands[1]) != MEM)" + "@ + pxor\t%0, %0 + movq\t{%1, %0|%0, %1} + movq\t{%1, %0|%0, %1}" [(set_attr "type" "mmxmov") (set_attr "mode" "DI")]) (define_insn "movv4hi_internal" - [(set (match_operand:V4HI 0 "nonimmediate_operand" "=y,m") - (match_operand:V4HI 1 "nonimmediate_operand" "ym,y"))] - "TARGET_MMX" - "movq\t{%1, %0|%0, %1}" + [(set (match_operand:V4HI 0 "nonimmediate_operand" "=y,y,m") + (match_operand:V4HI 1 "vector_move_operand" "C,ym,y"))] + "TARGET_MMX + && (GET_CODE (operands[0]) != MEM || GET_CODE (operands[1]) != MEM)" + "@ + pxor\t%0, %0 + movq\t{%1, %0|%0, %1} + movq\t{%1, %0|%0, %1}" [(set_attr "type" "mmxmov") (set_attr "mode" "DI")]) (define_insn "movv2si_internal" - [(set (match_operand:V2SI 0 "nonimmediate_operand" "=y,m") - (match_operand:V2SI 1 "nonimmediate_operand" "ym,y"))] - "TARGET_MMX" - "movq\t{%1, %0|%0, %1}" + [(set (match_operand:V2SI 0 "nonimmediate_operand" "=y,y,m") + (match_operand:V2SI 1 "vector_move_operand" "C,ym,y"))] + "TARGET_MMX + && (GET_CODE (operands[0]) != MEM || GET_CODE (operands[1]) != MEM)" + "@ + pxor\t%0, %0 + movq\t{%1, %0|%0, %1} + movq\t{%1, %0|%0, %1}" [(set_attr "type" "mmxcvt") (set_attr "mode" "DI")]) (define_insn "movv2sf_internal" - [(set (match_operand:V2SF 0 "nonimmediate_operand" "=y,m") - (match_operand:V2SF 1 "nonimmediate_operand" "ym,y"))] - "TARGET_3DNOW" - "movq\\t{%1, %0|%0, %1}" + [(set (match_operand:V2SF 0 "nonimmediate_operand" "=y,y,m") + (match_operand:V2SF 1 "vector_move_operand" "C,ym,y"))] + "TARGET_3DNOW + && (GET_CODE (operands[0]) != MEM || GET_CODE (operands[1]) != MEM)" + "@ + pxor\t%0, %0 + movq\t{%1, %0|%0, %1} + movq\t{%1, %0|%0, %1}" [(set_attr "type" "mmxcvt") (set_attr "mode" "DI")]) (define_expand "movti" - [(set (match_operand:TI 0 "general_operand" "") - (match_operand:TI 1 "general_operand" ""))] + [(set (match_operand:TI 0 "nonimmediate_operand" "") + (match_operand:TI 1 "nonimmediate_operand" ""))] "TARGET_SSE || TARGET_64BIT" { if (TARGET_64BIT) @@ -17919,35 +17939,44 @@ }) (define_insn "movv2df_internal" - [(set (match_operand:V2DF 0 "nonimmediate_operand" "=x,m") - (match_operand:V2DF 1 "nonimmediate_operand" "xm,x"))] - "TARGET_SSE2" - ;; @@@ let's try to use movaps here. - "movapd\t{%1, %0|%0, %1}" + [(set (match_operand:V2DF 0 "nonimmediate_operand" "=x,x,m") + (match_operand:V2DF 1 "vector_move_operand" "C,xm,x"))] + "TARGET_SSE2 + && (GET_CODE (operands[0]) != MEM || GET_CODE (operands[1]) != MEM)" + "@ + xorpd\t%0, %0 + movapd\t{%1, %0|%0, %1} + movapd\t{%1, %0|%0, %1}" [(set_attr "type" "ssemov") (set_attr "mode" "V2DF")]) (define_insn "movv8hi_internal" - [(set (match_operand:V8HI 0 "nonimmediate_operand" "=x,m") - (match_operand:V8HI 1 "nonimmediate_operand" "xm,x"))] - "TARGET_SSE2" - ;; @@@ let's try to use movaps here. - "movaps\t{%1, %0|%0, %1}" + [(set (match_operand:V8HI 0 "nonimmediate_operand" "=x,x,m") + (match_operand:V8HI 1 "vector_move_operand" "C,xm,x"))] + "TARGET_SSE2 + && (GET_CODE (operands[0]) != MEM || GET_CODE (operands[1]) != MEM)" + "@ + xorps\t%0, %0 + movaps\t{%1, %0|%0, %1} + movaps\t{%1, %0|%0, %1}" [(set_attr "type" "ssemov") (set_attr "mode" "V4SF")]) (define_insn "movv16qi_internal" - [(set (match_operand:V16QI 0 "nonimmediate_operand" "=x,m") - (match_operand:V16QI 1 "nonimmediate_operand" "xm,x"))] - "TARGET_SSE2" - ;; @@@ let's try to use movaps here. - "movaps\t{%1, %0|%0, %1}" + [(set (match_operand:V16QI 0 "nonimmediate_operand" "=x,x,m") + (match_operand:V16QI 1 "vector_move_operand" "C,xm,x"))] + "TARGET_SSE2 + && (GET_CODE (operands[0]) != MEM || GET_CODE (operands[1]) != MEM)" + "@ + xorps\t%0, %0 + movaps\t{%1, %0|%0, %1} + movaps\t{%1, %0|%0, %1}" [(set_attr "type" "ssemov") (set_attr "mode" "V4SF")]) (define_expand "movv2df" - [(set (match_operand:V2DF 0 "general_operand" "") - (match_operand:V2DF 1 "general_operand" ""))] + [(set (match_operand:V2DF 0 "nonimmediate_operand" "") + (match_operand:V2DF 1 "nonimmediate_operand" ""))] "TARGET_SSE2" { ix86_expand_vector_move (V2DFmode, operands); @@ -17955,8 +17984,8 @@ }) (define_expand "movv8hi" - [(set (match_operand:V8HI 0 "general_operand" "") - (match_operand:V8HI 1 "general_operand" ""))] + [(set (match_operand:V8HI 0 "nonimmediate_operand" "") + (match_operand:V8HI 1 "nonimmediate_operand" ""))] "TARGET_SSE2" { ix86_expand_vector_move (V8HImode, operands); @@ -17964,8 +17993,8 @@ }) (define_expand "movv16qi" - [(set (match_operand:V16QI 0 "general_operand" "") - (match_operand:V16QI 1 "general_operand" ""))] + [(set (match_operand:V16QI 0 "nonimmediate_operand" "") + (match_operand:V16QI 1 "nonimmediate_operand" ""))] "TARGET_SSE2" { ix86_expand_vector_move (V16QImode, operands); @@ -17973,8 +18002,8 @@ }) (define_expand "movv4sf" - [(set (match_operand:V4SF 0 "general_operand" "") - (match_operand:V4SF 1 "general_operand" ""))] + [(set (match_operand:V4SF 0 "nonimmediate_operand" "") + (match_operand:V4SF 1 "nonimmediate_operand" ""))] "TARGET_SSE" { ix86_expand_vector_move (V4SFmode, operands); @@ -17982,8 +18011,8 @@ }) (define_expand "movv4si" - [(set (match_operand:V4SI 0 "general_operand" "") - (match_operand:V4SI 1 "general_operand" ""))] + [(set (match_operand:V4SI 0 "nonimmediate_operand" "") + (match_operand:V4SI 1 "nonimmediate_operand" ""))] "TARGET_SSE" { ix86_expand_vector_move (V4SImode, operands); @@ -17991,8 +18020,8 @@ }) (define_expand "movv2di" - [(set (match_operand:V2DI 0 "general_operand" "") - (match_operand:V2DI 1 "general_operand" ""))] + [(set (match_operand:V2DI 0 "nonimmediate_operand" "") + (match_operand:V2DI 1 "nonimmediate_operand" ""))] "TARGET_SSE" { ix86_expand_vector_move (V2DImode, operands); @@ -18000,8 +18029,8 @@ }) (define_expand "movv2si" - [(set (match_operand:V2SI 0 "general_operand" "") - (match_operand:V2SI 1 "general_operand" ""))] + [(set (match_operand:V2SI 0 "nonimmediate_operand" "") + (match_operand:V2SI 1 "nonimmediate_operand" ""))] "TARGET_MMX" { ix86_expand_vector_move (V2SImode, operands); @@ -18009,8 +18038,8 @@ }) (define_expand "movv4hi" - [(set (match_operand:V4HI 0 "general_operand" "") - (match_operand:V4HI 1 "general_operand" ""))] + [(set (match_operand:V4HI 0 "nonimmediate_operand" "") + (match_operand:V4HI 1 "nonimmediate_operand" ""))] "TARGET_MMX" { ix86_expand_vector_move (V4HImode, operands); @@ -18018,8 +18047,8 @@ }) (define_expand "movv8qi" - [(set (match_operand:V8QI 0 "general_operand" "") - (match_operand:V8QI 1 "general_operand" ""))] + [(set (match_operand:V8QI 0 "nonimmediate_operand" "") + (match_operand:V8QI 1 "nonimmediate_operand" ""))] "TARGET_MMX" { ix86_expand_vector_move (V8QImode, operands); @@ -18027,14 +18056,97 @@ }) (define_expand "movv2sf" - [(set (match_operand:V2SF 0 "general_operand" "") - (match_operand:V2SF 1 "general_operand" ""))] + [(set (match_operand:V2SF 0 "nonimmediate_operand" "") + (match_operand:V2SF 1 "nonimmediate_operand" ""))] "TARGET_3DNOW" { ix86_expand_vector_move (V2SFmode, operands); DONE; }) +(define_insn "*pushv2df" + [(set (match_operand:V2DF 0 "push_operand" "=<") + (match_operand:V2DF 1 "register_operand" "x"))] + "TARGET_SSE" + "#") + +(define_insn "*pushv2di" + [(set (match_operand:V2DI 0 "push_operand" "=<") + (match_operand:V2DI 1 "register_operand" "x"))] + "TARGET_SSE2" + "#") + +(define_insn "*pushv8hi" + [(set (match_operand:V8HI 0 "push_operand" "=<") + (match_operand:V8HI 1 "register_operand" "x"))] + "TARGET_SSE2" + "#") + +(define_insn "*pushv16qi" + [(set (match_operand:V16QI 0 "push_operand" "=<") + (match_operand:V16QI 1 "register_operand" "x"))] + "TARGET_SSE2" + "#") + +(define_insn "*pushv4sf" + [(set (match_operand:V4SF 0 "push_operand" "=<") + (match_operand:V4SF 1 "register_operand" "x"))] + "TARGET_SSE" + "#") + +(define_insn "*pushv4si" + [(set (match_operand:V4SI 0 "push_operand" "=<") + (match_operand:V4SI 1 "register_operand" "x"))] + "TARGET_SSE2" + "#") + +(define_insn "*pushv2si" + [(set (match_operand:V2SI 0 "push_operand" "=<") + (match_operand:V2SI 1 "register_operand" "y"))] + "TARGET_MMX" + "#") + +(define_insn "*pushv4hi" + [(set (match_operand:V4HI 0 "push_operand" "=<") + (match_operand:V4HI 1 "register_operand" "y"))] + "TARGET_MMX" + "#") + +(define_insn "*pushv8qi" + [(set (match_operand:V8QI 0 "push_operand" "=<") + (match_operand:V8QI 1 "register_operand" "y"))] + "TARGET_MMX" + "#") + +(define_insn "*pushv2sf" + [(set (match_operand:V2SF 0 "push_operand" "=<") + (match_operand:V2SF 1 "register_operand" "y"))] + "TARGET_3DNOW" + "#") + +(define_split + [(set (match_operand 0 "push_operand" "") + (match_operand 1 "register_operand" ""))] + "!TARGET_64BIT && reload_completed + && (SSE_REG_P (operands[1]) || MMX_REG_P (operands[1]))" + [(set (reg:SI 7) (plus:SI (reg:SI 7) (match_dup 3))) + (set (match_dup 2) (match_dup 1))] + "operands[2] = change_address (operands[0], GET_MODE (operands[0]), + stack_pointer_rtx); + operands[3] = GEN_INT (-GET_MODE_SIZE (GET_MODE (operands[0])));") + +(define_split + [(set (match_operand 0 "push_operand" "") + (match_operand 1 "register_operand" ""))] + "TARGET_64BIT && reload_completed + && (SSE_REG_P (operands[1]) || MMX_REG_P (operands[1]))" + [(set (reg:DI 7) (plus:DI (reg:DI 7) (match_dup 3))) + (set (match_dup 2) (match_dup 1))] + "operands[2] = change_address (operands[0], GET_MODE (operands[0]), + stack_pointer_rtx); + operands[3] = GEN_INT (-GET_MODE_SIZE (GET_MODE (operands[0])));") + + (define_insn_and_split "*pushti" [(set (match_operand:TI 0 "push_operand" "=<") (match_operand:TI 1 "nonmemory_operand" "x"))] @@ -18158,8 +18270,9 @@ (define_insn "movti_internal" [(set (match_operand:TI 0 "nonimmediate_operand" "=x,x,m") - (match_operand:TI 1 "general_operand" "C,xm,x"))] - "TARGET_SSE && !TARGET_64BIT" + (match_operand:TI 1 "vector_move_operand" "C,xm,x"))] + "TARGET_SSE && !TARGET_64BIT + && (GET_CODE (operands[0]) != MEM || GET_CODE (operands[1]) != MEM)" "@ xorps\t%0, %0 movaps\t{%1, %0|%0, %1} @@ -18169,7 +18282,7 @@ (define_insn "*movti_rex64" [(set (match_operand:TI 0 "nonimmediate_operand" "=r,o,x,mx,x") - (match_operand:TI 1 "general_operand" "riFo,riF,O,x,m"))] + (match_operand:TI 1 "general_operand" "riFo,riF,C,x,m"))] "TARGET_64BIT && (GET_CODE (operands[0]) != MEM || GET_CODE (operands[1]) != MEM)" "@ @@ -18191,29 +18304,56 @@ ;; These two patterns are useful for specifying exactly whether to use ;; movaps or movups -(define_insn "sse_movaps" +(define_expand "sse_movaps" + [(set (match_operand:V4SF 0 "nonimmediate_operand" "") + (unspec:V4SF [(match_operand:V4SF 1 "nonimmediate_operand" "")] + UNSPEC_MOVA))] + "TARGET_SSE" +{ + if (GET_CODE (operands[0]) == MEM && GET_CODE (operands[1]) == MEM) + { + rtx tmp = gen_reg_rtx (V4SFmode); + emit_insn (gen_sse_movaps (tmp, operands[1])); + emit_move_insn (operands[0], tmp); + DONE; + } +}) + +(define_insn "*sse_movaps_1" [(set (match_operand:V4SF 0 "nonimmediate_operand" "=x,m") (unspec:V4SF [(match_operand:V4SF 1 "nonimmediate_operand" "xm,x")] UNSPEC_MOVA))] - "TARGET_SSE" - "@ - movaps\t{%1, %0|%0, %1} - movaps\t{%1, %0|%0, %1}" + "TARGET_SSE + && (GET_CODE (operands[0]) != MEM || GET_CODE (operands[1]) != MEM)" + "movaps\t{%1, %0|%0, %1}" [(set_attr "type" "ssemov,ssemov") (set_attr "mode" "V4SF")]) -(define_insn "sse_movups" +(define_expand "sse_movups" + [(set (match_operand:V4SF 0 "nonimmediate_operand" "") + (unspec:V4SF [(match_operand:V4SF 1 "nonimmediate_operand" "")] + UNSPEC_MOVU))] + "TARGET_SSE" +{ + if (GET_CODE (operands[0]) == MEM && GET_CODE (operands[1]) == MEM) + { + rtx tmp = gen_reg_rtx (V4SFmode); + emit_insn (gen_sse_movups (tmp, operands[1])); + emit_move_insn (operands[0], tmp); + DONE; + } +}) + +(define_insn "*sse_movups_1" [(set (match_operand:V4SF 0 "nonimmediate_operand" "=x,m") (unspec:V4SF [(match_operand:V4SF 1 "nonimmediate_operand" "xm,x")] UNSPEC_MOVU))] - "TARGET_SSE" - "@ - movups\t{%1, %0|%0, %1} - movups\t{%1, %0|%0, %1}" + "TARGET_SSE + && (GET_CODE (operands[0]) != MEM || GET_CODE (operands[1]) != MEM)" + "movups\t{%1, %0|%0, %1}" [(set_attr "type" "ssecvt,ssecvt") (set_attr "mode" "V4SF")]) - ;; SSE Strange Moves. (define_insn "sse_movmskps" @@ -18329,11 +18469,21 @@ [(set_attr "type" "ssecvt") (set_attr "mode" "V4SF")]) -(define_insn "sse_loadss" +(define_expand "sse_loadss" + [(match_operand:V4SF 0 "register_operand" "") + (match_operand:SF 1 "memory_operand" "")] + "TARGET_SSE" +{ + emit_insn (gen_sse_loadss_1 (operands[0], operands[1], + CONST0_RTX (V4SFmode))); + DONE; +}) + +(define_insn "sse_loadss_1" [(set (match_operand:V4SF 0 "register_operand" "=x") (vec_merge:V4SF - (match_operand:V4SF 1 "memory_operand" "m") - (vec_duplicate:V4SF (float:SF (const_int 0))) + (vec_duplicate:V4SF (match_operand:SF 1 "memory_operand" "m")) + (match_operand:V4SF 2 "const0_operand" "X") (const_int 1)))] "TARGET_SSE" "movss\t{%1, %0|%0, %1}" @@ -18804,7 +18954,7 @@ (define_insn "sse2_nandv2di3" [(set (match_operand:V2DI 0 "register_operand" "=x") - (and:V2DI (not:V2DI (match_operand:V2DI 1 "nonimmediate_operand" "%0")) + (and:V2DI (not:V2DI (match_operand:V2DI 1 "nonimmediate_operand" "0")) (match_operand:V2DI 2 "nonimmediate_operand" "xm")))] "TARGET_SSE2 && (GET_CODE (operands[1]) != MEM || GET_CODE (operands[2]) != MEM)" @@ -18908,7 +19058,7 @@ (match_operator:V4SI 3 "sse_comparison_operator" [(match_operand:V4SF 1 "register_operand" "0") (match_operand:V4SF 2 "register_operand" "x")]) - (match_dup 1) + (subreg:V4SI (match_dup 1) 0) (const_int 1)))] "TARGET_SSE" "cmp%D3ss\t{%2, %0|%0, %2}" @@ -19093,6 +19243,19 @@ [(set_attr "type" "ssecvt") (set_attr "mode" "SF")]) +(define_insn "cvtsi2ssq" + [(set (match_operand:V4SF 0 "register_operand" "=x,x") + (vec_merge:V4SF + (match_operand:V4SF 1 "register_operand" "0,0") + (vec_duplicate:V4SF + (float:SF (match_operand:DI 2 "nonimmediate_operand" "r,rm"))) + (const_int 14)))] + "TARGET_SSE && TARGET_64BIT" + "cvtsi2ssq\t{%2, %0|%0, %2}" + [(set_attr "type" "ssecvt") + (set_attr "athlon_decode" "vector,vector") + (set_attr "mode" "SF")]) + (define_insn "cvtss2si" [(set (match_operand:SI 0 "register_operand" "=r") (vec_select:SI @@ -19103,6 +19266,17 @@ [(set_attr "type" "ssecvt") (set_attr "mode" "SF")]) +(define_insn "cvtss2siq" + [(set (match_operand:DI 0 "register_operand" "=r,r") + (vec_select:DI + (fix:V4DI (match_operand:V4SF 1 "nonimmediate_operand" "x,m")) + (parallel [(const_int 0)])))] + "TARGET_SSE" + "cvtss2siq\t{%1, %0|%0, %1}" + [(set_attr "type" "ssecvt") + (set_attr "athlon_decode" "vector,vector") + (set_attr "mode" "SF")]) + (define_insn "cvttss2si" [(set (match_operand:SI 0 "register_operand" "=r") (vec_select:SI @@ -19114,6 +19288,18 @@ [(set_attr "type" "ssecvt") (set_attr "mode" "SF")]) +(define_insn "cvttss2siq" + [(set (match_operand:DI 0 "register_operand" "=r,r") + (vec_select:DI + (unspec:V4DI [(match_operand:V4SF 1 "nonimmediate_operand" "x,xm")] + UNSPEC_FIX) + (parallel [(const_int 0)])))] + "TARGET_SSE && TARGET_64BIT" + "cvttss2siq\t{%1, %0|%0, %1}" + [(set_attr "type" "ssecvt") + (set_attr "mode" "SF") + (set_attr "athlon_decode" "vector,vector")]) + ;; MMX insns @@ -19121,7 +19307,7 @@ (define_insn "addv8qi3" [(set (match_operand:V8QI 0 "register_operand" "=y") - (plus:V8QI (match_operand:V8QI 1 "register_operand" "0") + (plus:V8QI (match_operand:V8QI 1 "register_operand" "%0") (match_operand:V8QI 2 "nonimmediate_operand" "ym")))] "TARGET_MMX" "paddb\t{%2, %0|%0, %2}" @@ -19130,7 +19316,7 @@ (define_insn "addv4hi3" [(set (match_operand:V4HI 0 "register_operand" "=y") - (plus:V4HI (match_operand:V4HI 1 "register_operand" "0") + (plus:V4HI (match_operand:V4HI 1 "register_operand" "%0") (match_operand:V4HI 2 "nonimmediate_operand" "ym")))] "TARGET_MMX" "paddw\t{%2, %0|%0, %2}" @@ -19139,16 +19325,27 @@ (define_insn "addv2si3" [(set (match_operand:V2SI 0 "register_operand" "=y") - (plus:V2SI (match_operand:V2SI 1 "register_operand" "0") + (plus:V2SI (match_operand:V2SI 1 "register_operand" "%0") (match_operand:V2SI 2 "nonimmediate_operand" "ym")))] "TARGET_MMX" "paddd\t{%2, %0|%0, %2}" [(set_attr "type" "mmxadd") (set_attr "mode" "DI")]) +(define_insn "mmx_adddi3" + [(set (match_operand:DI 0 "register_operand" "=y") + (unspec:DI + [(plus:DI (match_operand:DI 1 "register_operand" "%0") + (match_operand:DI 2 "nonimmediate_operand" "ym"))] + UNSPEC_NOP))] + "TARGET_MMX" + "paddq\t{%2, %0|%0, %2}" + [(set_attr "type" "mmxadd") + (set_attr "mode" "DI")]) + (define_insn "ssaddv8qi3" [(set (match_operand:V8QI 0 "register_operand" "=y") - (ss_plus:V8QI (match_operand:V8QI 1 "register_operand" "0") + (ss_plus:V8QI (match_operand:V8QI 1 "register_operand" "%0") (match_operand:V8QI 2 "nonimmediate_operand" "ym")))] "TARGET_MMX" "paddsb\t{%2, %0|%0, %2}" @@ -19157,7 +19354,7 @@ (define_insn "ssaddv4hi3" [(set (match_operand:V4HI 0 "register_operand" "=y") - (ss_plus:V4HI (match_operand:V4HI 1 "register_operand" "0") + (ss_plus:V4HI (match_operand:V4HI 1 "register_operand" "%0") (match_operand:V4HI 2 "nonimmediate_operand" "ym")))] "TARGET_MMX" "paddsw\t{%2, %0|%0, %2}" @@ -19166,7 +19363,7 @@ (define_insn "usaddv8qi3" [(set (match_operand:V8QI 0 "register_operand" "=y") - (us_plus:V8QI (match_operand:V8QI 1 "register_operand" "0") + (us_plus:V8QI (match_operand:V8QI 1 "register_operand" "%0") (match_operand:V8QI 2 "nonimmediate_operand" "ym")))] "TARGET_MMX" "paddusb\t{%2, %0|%0, %2}" @@ -19175,7 +19372,7 @@ (define_insn "usaddv4hi3" [(set (match_operand:V4HI 0 "register_operand" "=y") - (us_plus:V4HI (match_operand:V4HI 1 "register_operand" "0") + (us_plus:V4HI (match_operand:V4HI 1 "register_operand" "%0") (match_operand:V4HI 2 "nonimmediate_operand" "ym")))] "TARGET_MMX" "paddusw\t{%2, %0|%0, %2}" @@ -19209,6 +19406,17 @@ [(set_attr "type" "mmxadd") (set_attr "mode" "DI")]) +(define_insn "mmx_subdi3" + [(set (match_operand:DI 0 "register_operand" "=y") + (unspec:DI + [(minus:DI (match_operand:DI 1 "register_operand" "0") + (match_operand:DI 2 "nonimmediate_operand" "ym"))] + UNSPEC_NOP))] + "TARGET_MMX" + "psubq\t{%2, %0|%0, %2}" + [(set_attr "type" "mmxadd") + (set_attr "mode" "DI")]) + (define_insn "sssubv8qi3" [(set (match_operand:V8QI 0 "register_operand" "=y") (ss_minus:V8QI (match_operand:V8QI 1 "register_operand" "0") @@ -19312,7 +19520,7 @@ (define_insn "mmx_iordi3" [(set (match_operand:DI 0 "register_operand" "=y") (unspec:DI - [(ior:DI (match_operand:DI 1 "register_operand" "0") + [(ior:DI (match_operand:DI 1 "register_operand" "%0") (match_operand:DI 2 "nonimmediate_operand" "ym"))] UNSPEC_NOP))] "TARGET_MMX" @@ -19323,7 +19531,7 @@ (define_insn "mmx_xordi3" [(set (match_operand:DI 0 "register_operand" "=y") (unspec:DI - [(xor:DI (match_operand:DI 1 "register_operand" "0") + [(xor:DI (match_operand:DI 1 "register_operand" "%0") (match_operand:DI 2 "nonimmediate_operand" "ym"))] UNSPEC_NOP))] "TARGET_MMX" @@ -19346,7 +19554,7 @@ (define_insn "mmx_anddi3" [(set (match_operand:DI 0 "register_operand" "=y") (unspec:DI - [(and:DI (match_operand:DI 1 "register_operand" "0") + [(and:DI (match_operand:DI 1 "register_operand" "%0") (match_operand:DI 2 "nonimmediate_operand" "ym"))] UNSPEC_NOP))] "TARGET_MMX" @@ -19805,17 +20013,17 @@ (define_insn "ldmxcsr" [(unspec_volatile [(match_operand:SI 0 "memory_operand" "m")] UNSPECV_LDMXCSR)] - "TARGET_MMX" + "TARGET_SSE" "ldmxcsr\t%0" - [(set_attr "type" "mmx") + [(set_attr "type" "sse") (set_attr "memory" "load")]) (define_insn "stmxcsr" [(set (match_operand:SI 0 "memory_operand" "=m") (unspec_volatile:SI [(const_int 0)] UNSPECV_STMXCSR))] - "TARGET_MMX" + "TARGET_SSE" "stmxcsr\t%0" - [(set_attr "type" "mmx") + [(set_attr "type" "sse") (set_attr "memory" "store")]) (define_expand "sfence" @@ -20471,7 +20679,7 @@ (match_operator:V2DI 3 "sse_comparison_operator" [(match_operand:V2DF 1 "register_operand" "0") (match_operand:V2DF 2 "nonimmediate_operand" "x")]) - (match_dup 1) + (subreg:V2DI (match_dup 1) 0) (const_int 1)))] "TARGET_SSE2" "cmp%D3sd\t{%2, %0|%0, %2}" @@ -20692,6 +20900,15 @@ [(set_attr "type" "ssecvt") (set_attr "mode" "SI")]) +(define_insn "cvtsd2siq" + [(set (match_operand:DI 0 "register_operand" "=r") + (fix:DI (vec_select:DF (match_operand:V2DF 1 "register_operand" "xm") + (parallel [(const_int 0)]))))] + "TARGET_SSE2 && TARGET_64BIT" + "cvtsd2siq\t{%1, %0|%0, %1}" + [(set_attr "type" "ssecvt") + (set_attr "mode" "SI")]) + (define_insn "cvttsd2si" [(set (match_operand:SI 0 "register_operand" "=r") (unspec:SI [(vec_select:DF (match_operand:V2DF 1 "register_operand" "xm") @@ -20701,6 +20918,16 @@ [(set_attr "type" "ssecvt") (set_attr "mode" "SI")]) +(define_insn "cvttsd2siq" + [(set (match_operand:DI 0 "register_operand" "=r,r") + (unspec:DI [(vec_select:DF (match_operand:V2DF 1 "register_operand" "x,xm") + (parallel [(const_int 0)]))] UNSPEC_FIX))] + "TARGET_SSE2 && TARGET_64BIT" + "cvttsd2siq\t{%1, %0|%0, %1}" + [(set_attr "type" "ssecvt") + (set_attr "mode" "DI") + (set_attr "athlon_decode" "vector,vector")]) + (define_insn "cvtsi2sd" [(set (match_operand:V2DF 0 "register_operand" "=x") (vec_merge:V2DF (match_operand:V2DF 1 "register_operand" "0") @@ -20713,6 +20940,19 @@ [(set_attr "type" "ssecvt") (set_attr "mode" "DF")]) +(define_insn "cvtsi2sdq" + [(set (match_operand:V2DF 0 "register_operand" "=x,x") + (vec_merge:V2DF (match_operand:V2DF 1 "register_operand" "0,0") + (vec_duplicate:V2DF + (float:DF + (match_operand:DI 2 "nonimmediate_operand" "r,rm"))) + (const_int 2)))] + "TARGET_SSE2 && TARGET_64BIT" + "cvtsi2sdq\t{%2, %0|%0, %2}" + [(set_attr "type" "ssecvt") + (set_attr "mode" "DF") + (set_attr "athlon_decode" "vector,direct")]) + ;; Conversions between SF and DF (define_insn "cvtsd2ss" @@ -20770,7 +21010,7 @@ (define_insn "addv16qi3" [(set (match_operand:V16QI 0 "register_operand" "=x") - (plus:V16QI (match_operand:V16QI 1 "register_operand" "0") + (plus:V16QI (match_operand:V16QI 1 "register_operand" "%0") (match_operand:V16QI 2 "nonimmediate_operand" "xm")))] "TARGET_SSE2" "paddb\t{%2, %0|%0, %2}" @@ -20779,7 +21019,7 @@ (define_insn "addv8hi3" [(set (match_operand:V8HI 0 "register_operand" "=x") - (plus:V8HI (match_operand:V8HI 1 "register_operand" "0") + (plus:V8HI (match_operand:V8HI 1 "register_operand" "%0") (match_operand:V8HI 2 "nonimmediate_operand" "xm")))] "TARGET_SSE2" "paddw\t{%2, %0|%0, %2}" @@ -20788,7 +21028,7 @@ (define_insn "addv4si3" [(set (match_operand:V4SI 0 "register_operand" "=x") - (plus:V4SI (match_operand:V4SI 1 "register_operand" "0") + (plus:V4SI (match_operand:V4SI 1 "register_operand" "%0") (match_operand:V4SI 2 "nonimmediate_operand" "xm")))] "TARGET_SSE2" "paddd\t{%2, %0|%0, %2}" @@ -20797,7 +21037,7 @@ (define_insn "addv2di3" [(set (match_operand:V2DI 0 "register_operand" "=x") - (plus:V2DI (match_operand:V2DI 1 "register_operand" "0") + (plus:V2DI (match_operand:V2DI 1 "register_operand" "%0") (match_operand:V2DI 2 "nonimmediate_operand" "xm")))] "TARGET_SSE2" "paddq\t{%2, %0|%0, %2}" @@ -20806,7 +21046,7 @@ (define_insn "ssaddv16qi3" [(set (match_operand:V16QI 0 "register_operand" "=x") - (ss_plus:V16QI (match_operand:V16QI 1 "register_operand" "0") + (ss_plus:V16QI (match_operand:V16QI 1 "register_operand" "%0") (match_operand:V16QI 2 "nonimmediate_operand" "xm")))] "TARGET_SSE2" "paddsb\t{%2, %0|%0, %2}" @@ -20815,7 +21055,7 @@ (define_insn "ssaddv8hi3" [(set (match_operand:V8HI 0 "register_operand" "=x") - (ss_plus:V8HI (match_operand:V8HI 1 "register_operand" "0") + (ss_plus:V8HI (match_operand:V8HI 1 "register_operand" "%0") (match_operand:V8HI 2 "nonimmediate_operand" "xm")))] "TARGET_SSE2" "paddsw\t{%2, %0|%0, %2}" @@ -20824,7 +21064,7 @@ (define_insn "usaddv16qi3" [(set (match_operand:V16QI 0 "register_operand" "=x") - (us_plus:V16QI (match_operand:V16QI 1 "register_operand" "0") + (us_plus:V16QI (match_operand:V16QI 1 "register_operand" "%0") (match_operand:V16QI 2 "nonimmediate_operand" "xm")))] "TARGET_SSE2" "paddusb\t{%2, %0|%0, %2}" @@ -20833,7 +21073,7 @@ (define_insn "usaddv8hi3" [(set (match_operand:V8HI 0 "register_operand" "=x") - (us_plus:V8HI (match_operand:V8HI 1 "register_operand" "0") + (us_plus:V8HI (match_operand:V8HI 1 "register_operand" "%0") (match_operand:V8HI 2 "nonimmediate_operand" "xm")))] "TARGET_SSE2" "paddusw\t{%2, %0|%0, %2}" @@ -21069,7 +21309,8 @@ [(set (match_operand:V8HI 0 "register_operand" "=x") (vec_merge:V8HI (match_operand:V8HI 1 "register_operand" "0") (vec_duplicate:V8HI - (match_operand:SI 2 "nonimmediate_operand" "rm")) + (truncate:HI + (match_operand:SI 2 "nonimmediate_operand" "rm"))) (match_operand:SI 3 "immediate_operand" "i")))] "TARGET_SSE2" "pinsrw\t{%3, %2, %0|%0, %2, %3}" @@ -21218,7 +21459,7 @@ (define_insn "ashrv8hi3" [(set (match_operand:V8HI 0 "register_operand" "=x") (ashiftrt:V8HI (match_operand:V8HI 1 "register_operand" "0") - (match_operand:SI 2 "nonmemory_operand" "xi")))] + (match_operand:TI 2 "nonmemory_operand" "xi")))] "TARGET_SSE2" "psraw\t{%2, %0|%0, %2}" [(set_attr "type" "sseishft") @@ -21227,7 +21468,7 @@ (define_insn "ashrv4si3" [(set (match_operand:V4SI 0 "register_operand" "=x") (ashiftrt:V4SI (match_operand:V4SI 1 "register_operand" "0") - (match_operand:SI 2 "nonmemory_operand" "xi")))] + (match_operand:TI 2 "nonmemory_operand" "xi")))] "TARGET_SSE2" "psrad\t{%2, %0|%0, %2}" [(set_attr "type" "sseishft") @@ -21236,7 +21477,7 @@ (define_insn "lshrv8hi3" [(set (match_operand:V8HI 0 "register_operand" "=x") (lshiftrt:V8HI (match_operand:V8HI 1 "register_operand" "0") - (match_operand:SI 2 "nonmemory_operand" "xi")))] + (match_operand:TI 2 "nonmemory_operand" "xi")))] "TARGET_SSE2" "psrlw\t{%2, %0|%0, %2}" [(set_attr "type" "sseishft") @@ -21245,7 +21486,7 @@ (define_insn "lshrv4si3" [(set (match_operand:V4SI 0 "register_operand" "=x") (lshiftrt:V4SI (match_operand:V4SI 1 "register_operand" "0") - (match_operand:SI 2 "nonmemory_operand" "xi")))] + (match_operand:TI 2 "nonmemory_operand" "xi")))] "TARGET_SSE2" "psrld\t{%2, %0|%0, %2}" [(set_attr "type" "sseishft") @@ -21254,7 +21495,7 @@ (define_insn "lshrv2di3" [(set (match_operand:V2DI 0 "register_operand" "=x") (lshiftrt:V2DI (match_operand:V2DI 1 "register_operand" "0") - (match_operand:SI 2 "nonmemory_operand" "xi")))] + (match_operand:TI 2 "nonmemory_operand" "xi")))] "TARGET_SSE2" "psrlq\t{%2, %0|%0, %2}" [(set_attr "type" "sseishft") @@ -21263,7 +21504,7 @@ (define_insn "ashlv8hi3" [(set (match_operand:V8HI 0 "register_operand" "=x") (ashift:V8HI (match_operand:V8HI 1 "register_operand" "0") - (match_operand:SI 2 "nonmemory_operand" "xi")))] + (match_operand:TI 2 "nonmemory_operand" "xi")))] "TARGET_SSE2" "psllw\t{%2, %0|%0, %2}" [(set_attr "type" "sseishft") @@ -21272,7 +21513,7 @@ (define_insn "ashlv4si3" [(set (match_operand:V4SI 0 "register_operand" "=x") (ashift:V4SI (match_operand:V4SI 1 "register_operand" "0") - (match_operand:SI 2 "nonmemory_operand" "xi")))] + (match_operand:TI 2 "nonmemory_operand" "xi")))] "TARGET_SSE2" "pslld\t{%2, %0|%0, %2}" [(set_attr "type" "sseishft") @@ -21281,7 +21522,7 @@ (define_insn "ashlv2di3" [(set (match_operand:V2DI 0 "register_operand" "=x") (ashift:V2DI (match_operand:V2DI 1 "register_operand" "0") - (match_operand:SI 2 "nonmemory_operand" "xi")))] + (match_operand:TI 2 "nonmemory_operand" "xi")))] "TARGET_SSE2" "psllq\t{%2, %0|%0, %2}" [(set_attr "type" "sseishft") @@ -21595,45 +21836,41 @@ (define_insn "sse2_movapd" [(set (match_operand:V2DF 0 "nonimmediate_operand" "=x,m") - (unspec:V2DF [(match_operand:V2DF 1 "general_operand" "xm,x")] + (unspec:V2DF [(match_operand:V2DF 1 "nonimmediate_operand" "xm,x")] UNSPEC_MOVA))] - "TARGET_SSE2" - "@ - movapd\t{%1, %0|%0, %1} - movapd\t{%1, %0|%0, %1}" + "TARGET_SSE2 + && (GET_CODE (operands[0]) != MEM || GET_CODE (operands[1]) != MEM)" + "movapd\t{%1, %0|%0, %1}" [(set_attr "type" "ssemov") (set_attr "mode" "V2DF")]) (define_insn "sse2_movupd" [(set (match_operand:V2DF 0 "nonimmediate_operand" "=x,m") - (unspec:V2DF [(match_operand:V2DF 1 "general_operand" "xm,x")] + (unspec:V2DF [(match_operand:V2DF 1 "nonimmediate_operand" "xm,x")] UNSPEC_MOVU))] - "TARGET_SSE2" - "@ - movupd\t{%1, %0|%0, %1} - movupd\t{%1, %0|%0, %1}" + "TARGET_SSE2 + && (GET_CODE (operands[0]) != MEM || GET_CODE (operands[1]) != MEM)" + "movupd\t{%1, %0|%0, %1}" [(set_attr "type" "ssecvt") (set_attr "mode" "V2DF")]) (define_insn "sse2_movdqa" [(set (match_operand:V16QI 0 "nonimmediate_operand" "=x,m") - (unspec:V16QI [(match_operand:V16QI 1 "general_operand" "xm,x")] + (unspec:V16QI [(match_operand:V16QI 1 "nonimmediate_operand" "xm,x")] UNSPEC_MOVA))] - "TARGET_SSE2" - "@ - movdqa\t{%1, %0|%0, %1} - movdqa\t{%1, %0|%0, %1}" + "TARGET_SSE2 + && (GET_CODE (operands[0]) != MEM || GET_CODE (operands[1]) != MEM)" + "movdqa\t{%1, %0|%0, %1}" [(set_attr "type" "ssemov") (set_attr "mode" "TI")]) (define_insn "sse2_movdqu" [(set (match_operand:V16QI 0 "nonimmediate_operand" "=x,m") - (unspec:V16QI [(match_operand:V16QI 1 "general_operand" "xm,x")] + (unspec:V16QI [(match_operand:V16QI 1 "nonimmediate_operand" "xm,x")] UNSPEC_MOVU))] - "TARGET_SSE2" - "@ - movdqu\t{%1, %0|%0, %1} - movdqu\t{%1, %0|%0, %1}" + "TARGET_SSE2 + && (GET_CODE (operands[0]) != MEM || GET_CODE (operands[1]) != MEM)" + "movdqu\t{%1, %0|%0, %1}" [(set_attr "type" "ssecvt") (set_attr "mode" "TI")]) @@ -21641,24 +21878,48 @@ [(set (match_operand:DI 0 "nonimmediate_operand" "=m,y") (vec_select:DI (match_operand:V2DI 1 "register_operand" "x,x") (parallel [(const_int 0)])))] - "TARGET_SSE2" + "TARGET_SSE2 && !TARGET_64BIT" "@ movq\t{%1, %0|%0, %1} movdq2q\t{%1, %0|%0, %1}" [(set_attr "type" "ssecvt") (set_attr "mode" "TI")]) +(define_insn "sse2_movdq2q_rex64" + [(set (match_operand:DI 0 "nonimmediate_operand" "=m,y,r") + (vec_select:DI (match_operand:V2DI 1 "register_operand" "x,x,x") + (parallel [(const_int 0)])))] + "TARGET_SSE2 && TARGET_64BIT" + "@ + movq\t{%1, %0|%0, %1} + movdq2q\t{%1, %0|%0, %1} + movd\t{%1, %0|%0, %1}" + [(set_attr "type" "ssecvt") + (set_attr "mode" "TI")]) + (define_insn "sse2_movq2dq" [(set (match_operand:V2DI 0 "register_operand" "=x,?x") (vec_concat:V2DI (match_operand:DI 1 "nonimmediate_operand" "m,y") (const_int 0)))] - "TARGET_SSE2" + "TARGET_SSE2 && !TARGET_64BIT" "@ movq\t{%1, %0|%0, %1} movq2dq\t{%1, %0|%0, %1}" [(set_attr "type" "ssecvt,ssemov") (set_attr "mode" "TI")]) +(define_insn "sse2_movq2dq_rex64" + [(set (match_operand:V2DI 0 "register_operand" "=x,?x,?x") + (vec_concat:V2DI (match_operand:DI 1 "nonimmediate_operand" "m,y,r") + (const_int 0)))] + "TARGET_SSE2 && TARGET_64BIT" + "@ + movq\t{%1, %0|%0, %1} + movq2dq\t{%1, %0|%0, %1} + movd\t{%1, %0|%0, %1}" + [(set_attr "type" "ssecvt,ssemov,ssecvt") + (set_attr "mode" "TI")]) + (define_insn "sse2_movq" [(set (match_operand:V2DI 0 "register_operand" "=x") (vec_concat:V2DI (vec_select:DI @@ -21673,7 +21934,7 @@ (define_insn "sse2_loadd" [(set (match_operand:V4SI 0 "register_operand" "=x") (vec_merge:V4SI - (vec_duplicate:V4HI (match_operand:SI 1 "nonimmediate_operand" "mr")) + (vec_duplicate:V4SI (match_operand:SI 1 "nonimmediate_operand" "mr")) (const_vector:V4SI [(const_int 0) (const_int 0) (const_int 0) @@ -21716,11 +21977,21 @@ [(set_attr "type" "ssecvt") (set_attr "mode" "V2DF")]) -(define_insn "sse2_loadsd" +(define_expand "sse2_loadsd" + [(match_operand:V2DF 0 "register_operand" "") + (match_operand:DF 1 "memory_operand" "")] + "TARGET_SSE2" +{ + emit_insn (gen_sse2_loadsd_1 (operands[0], operands[1], + CONST0_RTX (V2DFmode))); + DONE; +}) + +(define_insn "sse2_loadsd_1" [(set (match_operand:V2DF 0 "register_operand" "=x") (vec_merge:V2DF - (match_operand:DF 1 "memory_operand" "m") - (vec_duplicate:DF (float:DF (const_int 0))) + (vec_duplicate:V2DF (match_operand:DF 1 "memory_operand" "m")) + (match_operand:V2DF 2 "const0_operand" "X") (const_int 1)))] "TARGET_SSE2" "movsd\t{%1, %0|%0, %1}" |