diff options
Diffstat (limited to 'gcc/config/i386/i386.md')
-rw-r--r-- | gcc/config/i386/i386.md | 1563 |
1 files changed, 992 insertions, 571 deletions
diff --git a/gcc/config/i386/i386.md b/gcc/config/i386/i386.md index befbfe49569..50328260450 100644 --- a/gcc/config/i386/i386.md +++ b/gcc/config/i386/i386.md @@ -1,5 +1,6 @@ ;; GCC machine description for IA-32 and x86-64. -;; Copyright (C) 1988, 1994, 1995, 1996, 1997, 1998, 1999, 2000, 2001, 2002 +;; Copyright (C) 1988, 1994, 1995, 1996, 1997, 1998, 1999, 2000, +;; 2001, 2002, 2003 ;; Free Software Foundation, Inc. ;; Mostly by William Schelter. ;; x86_64 support added by Jan Hubicka @@ -109,6 +110,13 @@ (UNSPEC_MFENCE 59) (UNSPEC_LFENCE 60) (UNSPEC_PSADBW 61) + (UNSPEC_ADDSUB 71) + (UNSPEC_HADD 72) + (UNSPEC_HSUB 73) + (UNSPEC_MOVSHDUP 74) + (UNSPEC_MOVSLDUP 75) + (UNSPEC_LDQQU 76) + (UNSPEC_MOVDDUP 77) ]) (define_constants @@ -119,6 +127,8 @@ (UNSPECV_STMXCSR 40) (UNSPECV_FEMMS 46) (UNSPECV_CLFLUSH 57) + (UNSPECV_MONITOR 69) + (UNSPECV_MWAIT 70) ]) ;; Insns whose names begin with "x86_" are emitted by gen_FOO calls @@ -267,6 +277,8 @@ (define_attr "length" "" (cond [(eq_attr "type" "other,multi,fistp") (const_int 16) + (eq_attr "type" "fcmp") + (const_int 4) (eq_attr "unit" "i387") (plus (const_int 2) (plus (attr "prefix_data16") @@ -680,7 +692,7 @@ [(set (reg:CC 17) (compare:CC (match_operand:XF 0 "cmp_fp_expander_operand" "") (match_operand:XF 1 "cmp_fp_expander_operand" "")))] - "!TARGET_64BIT && TARGET_80387" + "!TARGET_128BIT_LONG_DOUBLE && TARGET_80387" { ix86_compare_op0 = operands[0]; ix86_compare_op1 = operands[1]; @@ -808,7 +820,7 @@ (compare:CCFP (match_operand:XF 0 "register_operand" "f") (match_operand:XF 1 "register_operand" "f")))] - "!TARGET_64BIT && TARGET_80387" + "!TARGET_128BIT_LONG_DOUBLE && TARGET_80387" "* return output_fp_compare (insn, operands, 0, 0);" [(set_attr "type" "fcmp") (set_attr "mode" "XF")]) @@ -830,7 +842,7 @@ (match_operand:XF 1 "register_operand" "f") (match_operand:XF 2 "register_operand" "f"))] UNSPEC_FNSTSW))] - "!TARGET_64BIT && TARGET_80387" + "!TARGET_128BIT_LONG_DOUBLE && TARGET_80387" "* return output_fp_compare (insn, operands, 2, 0);" [(set_attr "type" "multi") (set_attr "mode" "XF")]) @@ -1099,25 +1111,20 @@ (set_attr "mode" "SI") (set_attr "length_immediate" "1")]) -; The first alternative is used only to compute proper length of instruction. -; Reload's algorithm does not take into account the cost of spill instructions -; needed to free register in given class, so avoid it from choosing the first -; alternative when eax is not available. - (define_insn "*movsi_1" - [(set (match_operand:SI 0 "nonimmediate_operand" "=*?a,r,*?a,m,!*y,!rm,!*y,!*Y,!rm,!*Y") - (match_operand:SI 1 "general_operand" "im,rinm,rinm,rin,rm,*y,*y,rm,*Y,*Y"))] + [(set (match_operand:SI 0 "nonimmediate_operand" "=r,m,!*y,!rm,!*y,!*Y,!*Y,!rm") + (match_operand:SI 1 "general_operand" "rinm,rin,rm,*y,*y,*Y,rm,*Y"))] "GET_CODE (operands[0]) != MEM || GET_CODE (operands[1]) != MEM" { switch (get_attr_type (insn)) { case TYPE_SSEMOV: - if (get_attr_mode (insn) == TImode) + if (get_attr_mode (insn) == MODE_TI) return "movdqa\t{%1, %0|%0, %1}"; return "movd\t{%1, %0|%0, %1}"; case TYPE_MMXMOV: - if (get_attr_mode (insn) == DImode) + if (get_attr_mode (insn) == MODE_DI) return "movq\t{%1, %0|%0, %1}"; return "movd\t{%1, %0|%0, %1}"; @@ -1131,40 +1138,38 @@ } } [(set (attr "type") - (cond [(eq_attr "alternative" "4,5,6") + (cond [(eq_attr "alternative" "2,3,4") (const_string "mmxmov") - (eq_attr "alternative" "7,8,9") + (eq_attr "alternative" "5,6,7") (const_string "ssemov") (and (ne (symbol_ref "flag_pic") (const_int 0)) (match_operand:SI 1 "symbolic_operand" "")) (const_string "lea") ] (const_string "imov"))) - (set_attr "modrm" "0,*,0,*,*,*,*,*,*,*") - (set_attr "mode" "SI,SI,SI,SI,SI,SI,DI,TI,SI,SI")]) + (set_attr "mode" "SI,SI,SI,SI,DI,TI,SI,SI")]) ;; Stores and loads of ax to arbitary constant address. ;; We fake an second form of instruction to force reload to load address ;; into register when rax is not available (define_insn "*movabssi_1_rex64" - [(set (mem:SI (match_operand:DI 0 "x86_64_movabs_operand" "i,r,r")) - (match_operand:SI 1 "nonmemory_operand" "a,er,i"))] - "TARGET_64BIT" + [(set (mem:SI (match_operand:DI 0 "x86_64_movabs_operand" "i,r")) + (match_operand:SI 1 "nonmemory_operand" "a,er"))] + "TARGET_64BIT && ix86_check_movabs (insn, 0)" "@ movabs{l}\t{%1, %P0|%P0, %1} - mov{l}\t{%1, %a0|%a0, %1} - movabs{l}\t{%1, %a0|%a0, %1}" + mov{l}\t{%1, %a0|%a0, %1}" [(set_attr "type" "imov") - (set_attr "modrm" "0,*,*") - (set_attr "length_address" "8,0,0") - (set_attr "length_immediate" "0,*,*") + (set_attr "modrm" "0,*") + (set_attr "length_address" "8,0") + (set_attr "length_immediate" "0,*") (set_attr "memory" "store") (set_attr "mode" "SI")]) (define_insn "*movabssi_2_rex64" [(set (match_operand:SI 0 "register_operand" "=a,r") (mem:SI (match_operand:DI 1 "x86_64_movabs_operand" "i,r")))] - "TARGET_64BIT" + "TARGET_64BIT && ix86_check_movabs (insn, 1)" "@ movabs{l}\t{%P1, %0|%0, %P1} mov{l}\t{%a1, %0|%0, %a1}" @@ -1183,10 +1188,9 @@ "" "xchg{l}\t%1, %0" [(set_attr "type" "imov") + (set_attr "mode" "SI") (set_attr "pent_pair" "np") (set_attr "athlon_decode" "vector") - (set_attr "mode" "SI") - (set_attr "modrm" "0") (set_attr "ppro_uops" "few")]) (define_expand "movhi" @@ -1214,14 +1218,9 @@ [(set_attr "type" "push") (set_attr "mode" "QI")]) -; The first alternative is used only to compute proper length of instruction. -; Reload's algorithm does not take into account the cost of spill instructions -; needed to free register in given class, so avoid it from choosing the first -; alternative when eax is not available. - (define_insn "*movhi_1" - [(set (match_operand:HI 0 "nonimmediate_operand" "=*?a,r,r,*?a,r,m") - (match_operand:HI 1 "general_operand" "i,r,rn,rm,rm,rn"))] + [(set (match_operand:HI 0 "nonimmediate_operand" "=r,r,r,m") + (match_operand:HI 1 "general_operand" "r,rn,rm,rn"))] "GET_CODE (operands[0]) != MEM || GET_CODE (operands[1]) != MEM" { switch (get_attr_type (insn)) @@ -1238,59 +1237,57 @@ } } [(set (attr "type") - (cond [(and (eq_attr "alternative" "0,1") + (cond [(and (eq_attr "alternative" "0") (ior (eq (symbol_ref "TARGET_PARTIAL_REG_STALL") (const_int 0)) (eq (symbol_ref "TARGET_HIMODE_MATH") (const_int 0)))) (const_string "imov") - (and (eq_attr "alternative" "2,3,4") + (and (eq_attr "alternative" "1,2") (match_operand:HI 1 "aligned_operand" "")) (const_string "imov") (and (ne (symbol_ref "TARGET_MOVX") (const_int 0)) - (eq_attr "alternative" "0,1,3,4")) + (eq_attr "alternative" "0,2")) (const_string "imovx") ] (const_string "imov"))) (set (attr "mode") (cond [(eq_attr "type" "imovx") (const_string "SI") - (and (eq_attr "alternative" "2,3,4") + (and (eq_attr "alternative" "1,2") (match_operand:HI 1 "aligned_operand" "")) (const_string "SI") - (and (eq_attr "alternative" "0,1") + (and (eq_attr "alternative" "0") (ior (eq (symbol_ref "TARGET_PARTIAL_REG_STALL") (const_int 0)) (eq (symbol_ref "TARGET_HIMODE_MATH") (const_int 0)))) (const_string "SI") ] - (const_string "HI"))) - (set_attr "modrm" "0,*,*,0,*,*")]) + (const_string "HI")))]) ;; Stores and loads of ax to arbitary constant address. ;; We fake an second form of instruction to force reload to load address ;; into register when rax is not available (define_insn "*movabshi_1_rex64" - [(set (mem:HI (match_operand:DI 0 "x86_64_movabs_operand" "i,r,r")) - (match_operand:HI 1 "nonmemory_operand" "a,er,i"))] - "TARGET_64BIT" + [(set (mem:HI (match_operand:DI 0 "x86_64_movabs_operand" "i,r")) + (match_operand:HI 1 "nonmemory_operand" "a,er"))] + "TARGET_64BIT && ix86_check_movabs (insn, 0)" "@ movabs{w}\t{%1, %P0|%P0, %1} - mov{w}\t{%1, %a0|%a0, %1} - movabs{w}\t{%1, %a0|%a0, %1}" + mov{w}\t{%1, %a0|%a0, %1}" [(set_attr "type" "imov") - (set_attr "modrm" "0,*,*") - (set_attr "length_address" "8,0,0") - (set_attr "length_immediate" "0,*,*") + (set_attr "modrm" "0,*") + (set_attr "length_address" "8,0") + (set_attr "length_immediate" "0,*") (set_attr "memory" "store") (set_attr "mode" "HI")]) (define_insn "*movabshi_2_rex64" [(set (match_operand:HI 0 "register_operand" "=a,r") (mem:HI (match_operand:DI 1 "x86_64_movabs_operand" "i,r")))] - "TARGET_64BIT" + "TARGET_64BIT && ix86_check_movabs (insn, 1)" "@ movabs{w}\t{%P1, %0|%0, %P1} mov{w}\t{%a1, %0|%0, %a1}" @@ -1306,12 +1303,12 @@ (match_operand:HI 1 "register_operand" "+r")) (set (match_dup 1) (match_dup 0))] - "TARGET_PARTIAL_REG_STALL" - "xchg{w}\t%1, %0" + "!TARGET_PARTIAL_REG_STALL || optimize_size" + "xchg{l}\t%k1, %k0" [(set_attr "type" "imov") + (set_attr "mode" "SI") (set_attr "pent_pair" "np") - (set_attr "mode" "HI") - (set_attr "modrm" "0") + (set_attr "athlon_decode" "vector") (set_attr "ppro_uops" "few")]) (define_insn "*swaphi_2" @@ -1319,12 +1316,12 @@ (match_operand:HI 1 "register_operand" "+r")) (set (match_dup 1) (match_dup 0))] - "! TARGET_PARTIAL_REG_STALL" - "xchg{l}\t%k1, %k0" + "TARGET_PARTIAL_REG_STALL" + "xchg{w}\t%1, %0" [(set_attr "type" "imov") + (set_attr "mode" "HI") (set_attr "pent_pair" "np") - (set_attr "mode" "SI") - (set_attr "modrm" "0") + (set_attr "athlon_decode" "vector") (set_attr "ppro_uops" "few")]) (define_expand "movstricthi" @@ -1472,23 +1469,36 @@ DONE; }) -(define_insn "*swapqi" +(define_insn "*swapqi_1" [(set (match_operand:QI 0 "register_operand" "+r") (match_operand:QI 1 "register_operand" "+r")) (set (match_dup 1) (match_dup 0))] - "" - "xchg{b}\t%1, %0" + "!TARGET_PARTIAL_REG_STALL || optimize_size" + "xchg{l}\t%k1, %k0" [(set_attr "type" "imov") + (set_attr "mode" "SI") (set_attr "pent_pair" "np") + (set_attr "athlon_decode" "vector") + (set_attr "ppro_uops" "few")]) + +(define_insn "*swapqi_2" + [(set (match_operand:QI 0 "register_operand" "+q") + (match_operand:QI 1 "register_operand" "+q")) + (set (match_dup 1) + (match_dup 0))] + "TARGET_PARTIAL_REG_STALL" + "xchg{b}\t%1, %0" + [(set_attr "type" "imov") (set_attr "mode" "QI") - (set_attr "modrm" "0") + (set_attr "pent_pair" "np") + (set_attr "athlon_decode" "vector") (set_attr "ppro_uops" "few")]) (define_expand "movstrictqi" [(set (strict_low_part (match_operand:QI 0 "nonimmediate_operand" "")) (match_operand:QI 1 "general_operand" ""))] - "! TARGET_PARTIAL_REG_STALL" + "! TARGET_PARTIAL_REG_STALL || optimize_size" { /* Don't generate memory->memory moves, go through a register. */ if (GET_CODE (operands[0]) == MEM && GET_CODE (operands[1]) == MEM) @@ -1498,7 +1508,7 @@ (define_insn "*movstrictqi_1" [(set (strict_low_part (match_operand:QI 0 "nonimmediate_operand" "+qm,q")) (match_operand:QI 1 "general_operand" "*qn,m"))] - "! TARGET_PARTIAL_REG_STALL + "(! TARGET_PARTIAL_REG_STALL || optimize_size) && (GET_CODE (operands[0]) != MEM || GET_CODE (operands[1]) != MEM)" "mov{b}\t{%1, %0|%0, %1}" [(set_attr "type" "imov") @@ -1592,24 +1602,23 @@ ;; We fake an second form of instruction to force reload to load address ;; into register when rax is not available (define_insn "*movabsqi_1_rex64" - [(set (mem:QI (match_operand:DI 0 "x86_64_movabs_operand" "i,r,r")) - (match_operand:QI 1 "nonmemory_operand" "a,er,i"))] - "TARGET_64BIT" + [(set (mem:QI (match_operand:DI 0 "x86_64_movabs_operand" "i,r")) + (match_operand:QI 1 "nonmemory_operand" "a,er"))] + "TARGET_64BIT && ix86_check_movabs (insn, 0)" "@ movabs{b}\t{%1, %P0|%P0, %1} - mov{b}\t{%1, %a0|%a0, %1} - movabs{b}\t{%1, %a0|%a0, %1}" + mov{b}\t{%1, %a0|%a0, %1}" [(set_attr "type" "imov") - (set_attr "modrm" "0,*,*") - (set_attr "length_address" "8,0,0") - (set_attr "length_immediate" "0,*,*") + (set_attr "modrm" "0,*") + (set_attr "length_address" "8,0") + (set_attr "length_immediate" "0,*") (set_attr "memory" "store") (set_attr "mode" "QI")]) (define_insn "*movabsqi_2_rex64" [(set (match_operand:QI 0 "register_operand" "=a,r") (mem:QI (match_operand:DI 1 "x86_64_movabs_operand" "i,r")))] - "TARGET_64BIT" + "TARGET_64BIT && ix86_check_movabs (insn, 1)" "@ movabs{b}\t{%P1, %0|%0, %P1} mov{b}\t{%a1, %0|%0, %a1}" @@ -1693,11 +1702,11 @@ [(set_attr "type" "imov") (set_attr "mode" "QI")]) -(define_insn "*movsi_insv_1_rex64" - [(set (zero_extract:SI (match_operand 0 "ext_register_operand" "+Q") +(define_insn "movdi_insv_1_rex64" + [(set (zero_extract:DI (match_operand 0 "ext_register_operand" "+Q") (const_int 8) (const_int 8)) - (match_operand:SI 1 "nonmemory_operand" "Qn"))] + (match_operand:DI 1 "nonmemory_operand" "Qn"))] "TARGET_64BIT" "mov{b}\t{%b1, %h0|%h0, %b1}" [(set_attr "type" "imov") @@ -1904,7 +1913,7 @@ [(set (attr "type") (cond [(eq_attr "alternative" "5,6") (const_string "mmxmov") - (eq_attr "alternative" "7,8") + (eq_attr "alternative" "7,8,9") (const_string "ssemov") (eq_attr "alternative" "4") (const_string "multi") @@ -1921,24 +1930,23 @@ ;; We fake an second form of instruction to force reload to load address ;; into register when rax is not available (define_insn "*movabsdi_1_rex64" - [(set (mem:DI (match_operand:DI 0 "x86_64_movabs_operand" "i,r,r")) - (match_operand:DI 1 "nonmemory_operand" "a,er,i"))] - "TARGET_64BIT" + [(set (mem:DI (match_operand:DI 0 "x86_64_movabs_operand" "i,r")) + (match_operand:DI 1 "nonmemory_operand" "a,er"))] + "TARGET_64BIT && ix86_check_movabs (insn, 0)" "@ movabs{q}\t{%1, %P0|%P0, %1} - mov{q}\t{%1, %a0|%a0, %1} - movabs{q}\t{%1, %a0|%a0, %1}" + mov{q}\t{%1, %a0|%a0, %1}" [(set_attr "type" "imov") - (set_attr "modrm" "0,*,*") - (set_attr "length_address" "8,0,0") - (set_attr "length_immediate" "0,*,*") + (set_attr "modrm" "0,*") + (set_attr "length_address" "8,0") + (set_attr "length_immediate" "0,*") (set_attr "memory" "store") (set_attr "mode" "DI")]) (define_insn "*movabsdi_2_rex64" [(set (match_operand:DI 0 "register_operand" "=a,r") (mem:DI (match_operand:DI 1 "x86_64_movabs_operand" "i,r")))] - "TARGET_64BIT" + "TARGET_64BIT && ix86_check_movabs (insn, 1)" "@ movabs{q}\t{%P1, %0|%0, %P1} mov{q}\t{%a1, %0|%0, %a1}" @@ -1991,13 +1999,11 @@ "TARGET_64BIT" "xchg{q}\t%1, %0" [(set_attr "type" "imov") + (set_attr "mode" "DI") (set_attr "pent_pair" "np") (set_attr "athlon_decode" "vector") - (set_attr "mode" "DI") - (set_attr "modrm" "0") (set_attr "ppro_uops" "few")]) - (define_expand "movsf" [(set (match_operand:SF 0 "nonimmediate_operand" "") (match_operand:SF 1 "general_operand" ""))] @@ -2430,7 +2436,7 @@ (define_expand "movxf" [(set (match_operand:XF 0 "nonimmediate_operand" "") (match_operand:XF 1 "general_operand" ""))] - "!TARGET_64BIT" + "!TARGET_128BIT_LONG_DOUBLE" "ix86_expand_move (XFmode, operands); DONE;") (define_expand "movtf" @@ -2449,7 +2455,7 @@ (define_insn "*pushxf_nointeger" [(set (match_operand:XF 0 "push_operand" "=X,X,X") (match_operand:XF 1 "general_no_elim_operand" "f,Fo,*r"))] - "!TARGET_64BIT && optimize_size" + "!TARGET_128BIT_LONG_DOUBLE && optimize_size" { switch (which_alternative) { @@ -2505,7 +2511,7 @@ (define_insn "*pushxf_integer" [(set (match_operand:XF 0 "push_operand" "=<,<") (match_operand:XF 1 "general_no_elim_operand" "f#r,ro#f"))] - "!TARGET_64BIT && !optimize_size" + "!TARGET_128BIT_LONG_DOUBLE && !optimize_size" { switch (which_alternative) { @@ -2576,7 +2582,7 @@ (define_split [(set (match_operand:XF 0 "push_operand" "") (match_operand:XF 1 "any_fp_register_operand" ""))] - "!TARGET_64BIT" + "!TARGET_128BIT_LONG_DOUBLE" [(set (reg:SI 7) (plus:SI (reg:SI 7) (const_int -12))) (set (mem:XF (reg:SI 7)) (match_dup 1))]) @@ -2598,7 +2604,7 @@ (define_insn "*movxf_nointeger" [(set (match_operand:XF 0 "nonimmediate_operand" "=f,m,f,*r,o") (match_operand:XF 1 "general_operand" "fm,f,G,*roF,F*r"))] - "!TARGET_64BIT + "!TARGET_128BIT_LONG_DOUBLE && optimize_size && (GET_CODE (operands[0]) != MEM || GET_CODE (operands[1]) != MEM) && (reload_in_progress || reload_completed @@ -2692,7 +2698,7 @@ (define_insn "*movxf_integer" [(set (match_operand:XF 0 "nonimmediate_operand" "=f#r,m,f#r,r#f,o") (match_operand:XF 1 "general_operand" "fm#r,f#r,G,roF#f,Fr#f"))] - "!TARGET_64BIT + "!TARGET_128BIT_LONG_DOUBLE && !optimize_size && (GET_CODE (operands[0]) != MEM || GET_CODE (operands[1]) != MEM) && (reload_in_progress || reload_completed @@ -3363,7 +3369,7 @@ (define_split [(set (match_operand:XF 0 "push_operand" "") (float_extend:XF (match_operand:SF 1 "fp_register_operand" "")))] - "!TARGET_64BIT" + "!TARGET_128BIT_LONG_DOUBLE" [(set (reg:SI 7) (plus:SI (reg:SI 7) (const_int -12))) (set (mem:XF (reg:SI 7)) (float_extend:XF (match_dup 1)))]) @@ -3396,7 +3402,7 @@ (define_split [(set (match_operand:XF 0 "push_operand" "") (float_extend:XF (match_operand:DF 1 "fp_register_operand" "")))] - "!TARGET_64BIT" + "!TARGET_128BIT_LONG_DOUBLE" [(set (reg:SI 7) (plus:SI (reg:SI 7) (const_int -12))) (set (mem:DF (reg:SI 7)) (float_extend:XF (match_dup 1)))]) @@ -3478,7 +3484,7 @@ (define_expand "extendsfxf2" [(set (match_operand:XF 0 "nonimmediate_operand" "") (float_extend:XF (match_operand:SF 1 "general_operand" "")))] - "!TARGET_64BIT && TARGET_80387" + "!TARGET_128BIT_LONG_DOUBLE && TARGET_80387" { /* ??? Needed for compress_float_constant since all fp constants are LEGITIMATE_CONSTANT_P. */ @@ -3491,7 +3497,7 @@ (define_insn "*extendsfxf2_1" [(set (match_operand:XF 0 "nonimmediate_operand" "=f,m") (float_extend:XF (match_operand:SF 1 "nonimmediate_operand" "fm,f")))] - "!TARGET_64BIT && TARGET_80387 + "!TARGET_128BIT_LONG_DOUBLE && TARGET_80387 && (GET_CODE (operands[0]) != MEM || GET_CODE (operands[1]) != MEM)" { switch (which_alternative) @@ -3568,7 +3574,7 @@ (define_expand "extenddfxf2" [(set (match_operand:XF 0 "nonimmediate_operand" "") (float_extend:XF (match_operand:DF 1 "general_operand" "")))] - "!TARGET_64BIT && TARGET_80387" + "!TARGET_128BIT_LONG_DOUBLE && TARGET_80387" { /* ??? Needed for compress_float_constant since all fp constants are LEGITIMATE_CONSTANT_P. */ @@ -3581,7 +3587,7 @@ (define_insn "*extenddfxf2_1" [(set (match_operand:XF 0 "nonimmediate_operand" "=f,m") (float_extend:XF (match_operand:DF 1 "nonimmediate_operand" "fm,f")))] - "!TARGET_64BIT && TARGET_80387 + "!TARGET_128BIT_LONG_DOUBLE && TARGET_80387 && (GET_CODE (operands[0]) != MEM || GET_CODE (operands[1]) != MEM)" { switch (which_alternative) @@ -3801,7 +3807,7 @@ (float_truncate:SF (match_operand:XF 1 "register_operand" ""))) (clobber (match_dup 2))])] - "!TARGET_64BIT && TARGET_80387" + "!TARGET_128BIT_LONG_DOUBLE && TARGET_80387" "operands[2] = assign_386_stack_local (SFmode, 0);") (define_insn "*truncxfsf2_1" @@ -3809,7 +3815,7 @@ (float_truncate:SF (match_operand:XF 1 "register_operand" "f,f,f,f"))) (clobber (match_operand:SF 2 "memory_operand" "=X,m,m,m"))] - "!TARGET_64BIT && TARGET_80387" + "!TARGET_128BIT_LONG_DOUBLE && TARGET_80387" { switch (which_alternative) { @@ -3829,7 +3835,7 @@ [(set (match_operand:SF 0 "memory_operand" "=m") (float_truncate:SF (match_operand:XF 1 "register_operand" "f")))] - "!TARGET_64BIT && TARGET_80387" + "!TARGET_128BIT_LONG_DOUBLE && TARGET_80387" { if (find_regno_note (insn, REG_DEAD, REGNO (operands[1]))) return "fstp%z0\t%y0"; @@ -3926,7 +3932,7 @@ (float_truncate:DF (match_operand:XF 1 "register_operand" ""))) (clobber (match_dup 2))])] - "!TARGET_64BIT && TARGET_80387" + "!TARGET_128BIT_LONG_DOUBLE && TARGET_80387" "operands[2] = assign_386_stack_local (DFmode, 0);") (define_insn "*truncxfdf2_1" @@ -3934,7 +3940,7 @@ (float_truncate:DF (match_operand:XF 1 "register_operand" "f,f,f,f"))) (clobber (match_operand:DF 2 "memory_operand" "=X,m,m,m"))] - "!TARGET_64BIT && TARGET_80387" + "!TARGET_128BIT_LONG_DOUBLE && TARGET_80387" { switch (which_alternative) { @@ -3955,7 +3961,7 @@ [(set (match_operand:DF 0 "memory_operand" "=m") (float_truncate:DF (match_operand:XF 1 "register_operand" "f")))] - "!TARGET_64BIT && TARGET_80387" + "!TARGET_128BIT_LONG_DOUBLE && TARGET_80387" { if (find_regno_note (insn, REG_DEAD, REGNO (operands[1]))) return "fstp%z0\t%y0"; @@ -4055,7 +4061,7 @@ (define_expand "fix_truncxfdi2" [(set (match_operand:DI 0 "nonimmediate_operand" "") (fix:DI (match_operand:XF 1 "register_operand" "")))] - "!TARGET_64BIT && TARGET_80387" + "!TARGET_128BIT_LONG_DOUBLE && TARGET_80387" "") (define_expand "fix_trunctfdi2" @@ -4194,7 +4200,7 @@ (define_expand "fix_truncxfsi2" [(set (match_operand:SI 0 "nonimmediate_operand" "") (fix:SI (match_operand:XF 1 "register_operand" "")))] - "!TARGET_64BIT && TARGET_80387" + "!TARGET_128BIT_LONG_DOUBLE && TARGET_80387" "") (define_expand "fix_trunctfsi2" @@ -4327,7 +4333,7 @@ (define_expand "fix_truncxfhi2" [(set (match_operand:HI 0 "nonimmediate_operand" "") (fix:HI (match_operand:XF 1 "register_operand" "")))] - "!TARGET_64BIT && TARGET_80387" + "!TARGET_128BIT_LONG_DOUBLE && TARGET_80387" "") (define_expand "fix_trunctfhi2" @@ -4605,7 +4611,7 @@ (define_insn "floathixf2" [(set (match_operand:XF 0 "register_operand" "=f,f") (float:XF (match_operand:HI 1 "nonimmediate_operand" "m,r")))] - "!TARGET_64BIT && TARGET_80387" + "!TARGET_128BIT_LONG_DOUBLE && TARGET_80387" "@ fild%z1\t%1 #" @@ -4627,7 +4633,7 @@ (define_insn "floatsixf2" [(set (match_operand:XF 0 "register_operand" "=f,f") (float:XF (match_operand:SI 1 "nonimmediate_operand" "m,r")))] - "!TARGET_64BIT && TARGET_80387" + "!TARGET_128BIT_LONG_DOUBLE && TARGET_80387" "@ fild%z1\t%1 #" @@ -4649,7 +4655,7 @@ (define_insn "floatdixf2" [(set (match_operand:XF 0 "register_operand" "=f,f") (float:XF (match_operand:DI 1 "nonimmediate_operand" "m,r")))] - "!TARGET_64BIT && TARGET_80387" + "!TARGET_128BIT_LONG_DOUBLE && TARGET_80387" "@ fild%z1\t%1 #" @@ -6315,7 +6321,7 @@ [(set (match_operand:XF 0 "register_operand" "") (plus:XF (match_operand:XF 1 "register_operand" "") (match_operand:XF 2 "register_operand" "")))] - "!TARGET_64BIT && TARGET_80387" + "!TARGET_128BIT_LONG_DOUBLE && TARGET_80387" "") (define_expand "addtf3" @@ -6643,7 +6649,7 @@ [(set (match_operand:XF 0 "register_operand" "") (minus:XF (match_operand:XF 1 "register_operand" "") (match_operand:XF 2 "register_operand" "")))] - "!TARGET_64BIT && TARGET_80387" + "!TARGET_128BIT_LONG_DOUBLE && TARGET_80387" "") (define_expand "subtf3" @@ -7102,7 +7108,7 @@ [(set (match_operand:XF 0 "register_operand" "") (mult:XF (match_operand:XF 1 "register_operand" "") (match_operand:XF 2 "register_operand" "")))] - "!TARGET_64BIT && TARGET_80387" + "!TARGET_128BIT_LONG_DOUBLE && TARGET_80387" "") (define_expand "multf3" @@ -7156,7 +7162,7 @@ [(set (match_operand:XF 0 "register_operand" "") (div:XF (match_operand:XF 1 "register_operand" "") (match_operand:XF 2 "register_operand" "")))] - "!TARGET_64BIT && TARGET_80387" + "!TARGET_128BIT_LONG_DOUBLE && TARGET_80387" "") (define_expand "divtf3" @@ -7563,17 +7569,21 @@ "" "") -(define_insn "*testqi_1" +(define_insn "*testqi_1_maybe_si" [(set (reg 17) - (compare (and:QI (match_operand:QI 0 "nonimmediate_operand" "%*a,q,qm,r") - (match_operand:QI 1 "nonmemory_operand" "n,n,qn,n")) - (const_int 0)))] - "ix86_match_ccmode (insn, CCNOmode)" + (compare + (and:QI + (match_operand:QI 0 "nonimmediate_operand" "%*a,q,qm,r") + (match_operand:QI 1 "nonmemory_operand" "n,n,qn,n")) + (const_int 0)))] + "(GET_CODE (operands[0]) != MEM || GET_CODE (operands[1]) != MEM) + && ix86_match_ccmode (insn, + GET_CODE (operands[1]) == CONST_INT + && INTVAL (operands[1]) >= 0 ? CCNOmode : CCZmode)" { if (which_alternative == 3) { - if (GET_CODE (operands[1]) == CONST_INT - && (INTVAL (operands[1]) & 0xffffff00)) + if (GET_CODE (operands[1]) == CONST_INT && INTVAL (operands[1]) < 0) operands[1] = GEN_INT (INTVAL (operands[1]) & 0xff); return "test{l}\t{%1, %k0|%k0, %1}"; } @@ -7584,6 +7594,18 @@ (set_attr "mode" "QI,QI,QI,SI") (set_attr "pent_pair" "uv,np,uv,np")]) +(define_insn "*testqi_1" + [(set (reg 17) + (compare (and:QI (match_operand:QI 0 "nonimmediate_operand" "%*a,q,qm") + (match_operand:QI 1 "nonmemory_operand" "n,n,qn")) + (const_int 0)))] + "ix86_match_ccmode (insn, CCNOmode)" + "test{b}\t{%1, %0|%0, %1}" + [(set_attr "type" "test") + (set_attr "modrm" "0,1,1") + (set_attr "mode" "QI") + (set_attr "pent_pair" "uv,np,uv")]) + (define_expand "testqi_ext_ccno_0" [(set (reg:CCNO 17) (compare:CCNO @@ -7701,51 +7723,53 @@ "#") (define_split - [(set (reg 17) - (compare (zero_extract - (match_operand 0 "nonimmediate_operand" "") - (match_operand 1 "const_int_operand" "") - (match_operand 2 "const_int_operand" "")) - (const_int 0)))] + [(set (match_operand 0 "flags_reg_operand" "") + (match_operator 1 "compare_operator" + [(zero_extract + (match_operand 2 "nonimmediate_operand" "") + (match_operand 3 "const_int_operand" "") + (match_operand 4 "const_int_operand" "")) + (const_int 0)]))] "ix86_match_ccmode (insn, CCNOmode)" - [(set (reg:CCNO 17) (compare:CCNO (match_dup 3) (const_int 0)))] + [(set (match_dup 0) (match_op_dup 1 [(match_dup 2) (const_int 0)]))] { - HOST_WIDE_INT len = INTVAL (operands[1]); - HOST_WIDE_INT pos = INTVAL (operands[2]); + rtx val = operands[2]; + HOST_WIDE_INT len = INTVAL (operands[3]); + HOST_WIDE_INT pos = INTVAL (operands[4]); HOST_WIDE_INT mask; enum machine_mode mode, submode; - mode = GET_MODE (operands[0]); - if (GET_CODE (operands[0]) == MEM) + mode = GET_MODE (val); + if (GET_CODE (val) == MEM) { /* ??? Combine likes to put non-volatile mem extractions in QImode no matter the size of the test. So find a mode that works. */ - if (! MEM_VOLATILE_P (operands[0])) + if (! MEM_VOLATILE_P (val)) { mode = smallest_mode_for_size (pos + len, MODE_INT); - operands[0] = adjust_address (operands[0], mode, 0); + val = adjust_address (val, mode, 0); } } - else if (GET_CODE (operands[0]) == SUBREG - && (submode = GET_MODE (SUBREG_REG (operands[0])), + else if (GET_CODE (val) == SUBREG + && (submode = GET_MODE (SUBREG_REG (val)), GET_MODE_BITSIZE (mode) > GET_MODE_BITSIZE (submode)) && pos + len <= GET_MODE_BITSIZE (submode)) { /* Narrow a paradoxical subreg to prevent partial register stalls. */ mode = submode; - operands[0] = SUBREG_REG (operands[0]); + val = SUBREG_REG (val); } else if (mode == HImode && pos + len <= 8) { /* Small HImode tests can be converted to QImode. */ mode = QImode; - operands[0] = gen_lowpart (QImode, operands[0]); + val = gen_lowpart (QImode, val); } mask = ((HOST_WIDE_INT)1 << (pos + len)) - 1; mask &= ~(((HOST_WIDE_INT)1 << pos) - 1); - operands[3] = gen_rtx_AND (mode, operands[0], gen_int_mode (mask, mode)); + operands[2] = gen_rtx_AND (mode, val, gen_int_mode (mask, mode)); }) ;; Convert HImode/SImode test instructions with immediate to QImode ones. @@ -7754,46 +7778,44 @@ ;; Do the converison only post-reload to avoid limiting of the register class ;; to QI regs. (define_split - [(set (reg 17) - (compare - (and (match_operand 0 "register_operand" "") - (match_operand 1 "const_int_operand" "")) - (const_int 0)))] + [(set (match_operand 0 "flags_reg_operand" "") + (match_operator 1 "compare_operator" + [(and (match_operand 2 "register_operand" "") + (match_operand 3 "const_int_operand" "")) + (const_int 0)]))] "reload_completed - && QI_REG_P (operands[0]) + && QI_REG_P (operands[2]) + && GET_MODE (operands[2]) != QImode && ((ix86_match_ccmode (insn, CCZmode) - && !(INTVAL (operands[1]) & ~(255 << 8))) + && !(INTVAL (operands[3]) & ~(255 << 8))) || (ix86_match_ccmode (insn, CCNOmode) - && !(INTVAL (operands[1]) & ~(127 << 8)))) - && GET_MODE (operands[0]) != QImode" - [(set (reg:CCNO 17) - (compare:CCNO - (and:SI (zero_extract:SI (match_dup 0) (const_int 8) (const_int 8)) - (match_dup 1)) - (const_int 0)))] - "operands[0] = gen_lowpart (SImode, operands[0]); - operands[1] = gen_int_mode (INTVAL (operands[1]) >> 8, SImode);") + && !(INTVAL (operands[3]) & ~(127 << 8))))" + [(set (match_dup 0) + (match_op_dup 1 + [(and:SI (zero_extract:SI (match_dup 2) (const_int 8) (const_int 8)) + (match_dup 3)) + (const_int 0)]))] + "operands[2] = gen_lowpart (SImode, operands[2]); + operands[3] = gen_int_mode (INTVAL (operands[3]) >> 8, SImode);") (define_split - [(set (reg 17) - (compare - (and (match_operand 0 "nonimmediate_operand" "") - (match_operand 1 "const_int_operand" "")) - (const_int 0)))] + [(set (match_operand 0 "flags_reg_operand" "") + (match_operator 1 "compare_operator" + [(and (match_operand 2 "nonimmediate_operand" "") + (match_operand 3 "const_int_operand" "")) + (const_int 0)]))] "reload_completed - && (!REG_P (operands[0]) || ANY_QI_REG_P (operands[0])) + && GET_MODE (operands[2]) != QImode + && (!REG_P (operands[2]) || ANY_QI_REG_P (operands[2])) && ((ix86_match_ccmode (insn, CCZmode) - && !(INTVAL (operands[1]) & ~255)) + && !(INTVAL (operands[3]) & ~255)) || (ix86_match_ccmode (insn, CCNOmode) - && !(INTVAL (operands[1]) & ~127))) - && GET_MODE (operands[0]) != QImode" - [(set (reg:CCNO 17) - (compare:CCNO - (and:QI (match_dup 0) - (match_dup 1)) - (const_int 0)))] - "operands[0] = gen_lowpart (QImode, operands[0]); - operands[1] = gen_lowpart (QImode, operands[1]);") + && !(INTVAL (operands[3]) & ~127)))" + [(set (match_dup 0) + (match_op_dup 1 [(and:QI (match_dup 2) (match_dup 3)) + (const_int 0)]))] + "operands[2] = gen_lowpart (QImode, operands[2]); + operands[3] = gen_lowpart (QImode, operands[3]);") ;; %%% This used to optimize known byte-wide and operations to memory, @@ -8070,7 +8092,7 @@ [(set_attr "type" "alu1") (set_attr "mode" "QI")]) -(define_insn "*andqi_2" +(define_insn "*andqi_2_maybe_si" [(set (reg 17) (compare (and:QI (match_operand:QI 1 "nonimmediate_operand" "%0,0,0") @@ -8078,13 +8100,14 @@ (const_int 0))) (set (match_operand:QI 0 "nonimmediate_operand" "=q,qm,*r") (and:QI (match_dup 1) (match_dup 2)))] - "ix86_match_ccmode (insn, CCNOmode) - && ix86_binary_operator_ok (AND, QImode, operands)" + "ix86_binary_operator_ok (AND, QImode, operands) + && ix86_match_ccmode (insn, + GET_CODE (operands[2]) == CONST_INT + && INTVAL (operands[2]) >= 0 ? CCNOmode : CCZmode)" { if (which_alternative == 2) { - if (GET_CODE (operands[2]) == CONST_INT - && (INTVAL (operands[2]) & 0xffffff00)) + if (GET_CODE (operands[2]) == CONST_INT && INTVAL (operands[2]) < 0) operands[2] = GEN_INT (INTVAL (operands[2]) & 0xff); return "and{l}\t{%2, %k0|%k0, %2}"; } @@ -8093,6 +8116,20 @@ [(set_attr "type" "alu") (set_attr "mode" "QI,QI,SI")]) +(define_insn "*andqi_2" + [(set (reg 17) + (compare (and:QI + (match_operand:QI 1 "nonimmediate_operand" "%0,0") + (match_operand:QI 2 "general_operand" "qim,qi")) + (const_int 0))) + (set (match_operand:QI 0 "nonimmediate_operand" "=q,qm") + (and:QI (match_dup 1) (match_dup 2)))] + "ix86_match_ccmode (insn, CCNOmode) + && ix86_binary_operator_ok (AND, QImode, operands)" + "and{b}\t{%2, %0|%0, %2}" + [(set_attr "type" "alu") + (set_attr "mode" "QI")]) + (define_insn "*andqi_2_slp" [(set (reg 17) (compare (and:QI @@ -9550,7 +9587,7 @@ [(parallel [(set (match_operand:XF 0 "nonimmediate_operand" "") (neg:XF (match_operand:XF 1 "nonimmediate_operand" ""))) (clobber (reg:CC 17))])] - "!TARGET_64BIT && TARGET_80387" + "!TARGET_128BIT_LONG_DOUBLE && TARGET_80387" "ix86_expand_unary_operator (NEG, XFmode, operands); DONE;") (define_expand "negtf2" @@ -9567,7 +9604,7 @@ [(set (match_operand:XF 0 "nonimmediate_operand" "=f#r,rm#f") (neg:XF (match_operand:XF 1 "nonimmediate_operand" "0,0"))) (clobber (reg:CC 17))] - "!TARGET_64BIT && TARGET_80387 + "!TARGET_128BIT_LONG_DOUBLE && TARGET_80387 && ix86_unary_operator_ok (NEG, XFmode, operands)" "#") @@ -9655,7 +9692,7 @@ (define_insn "*negxf2_1" [(set (match_operand:XF 0 "register_operand" "=f") (neg:XF (match_operand:XF 1 "register_operand" "0")))] - "!TARGET_64BIT && TARGET_80387 && reload_completed" + "!TARGET_128BIT_LONG_DOUBLE && TARGET_80387 && reload_completed" "fchs" [(set_attr "type" "fsgn") (set_attr "mode" "XF") @@ -9665,7 +9702,7 @@ [(set (match_operand:XF 0 "register_operand" "=f") (neg:XF (float_extend:XF (match_operand:DF 1 "register_operand" "0"))))] - "!TARGET_64BIT && TARGET_80387" + "!TARGET_128BIT_LONG_DOUBLE && TARGET_80387" "fchs" [(set_attr "type" "fsgn") (set_attr "mode" "XF") @@ -9675,7 +9712,7 @@ [(set (match_operand:XF 0 "register_operand" "=f") (neg:XF (float_extend:XF (match_operand:SF 1 "register_operand" "0"))))] - "!TARGET_64BIT && TARGET_80387" + "!TARGET_128BIT_LONG_DOUBLE && TARGET_80387" "fchs" [(set_attr "type" "fsgn") (set_attr "mode" "XF") @@ -9977,7 +10014,7 @@ [(parallel [(set (match_operand:XF 0 "nonimmediate_operand" "") (neg:XF (match_operand:XF 1 "nonimmediate_operand" ""))) (clobber (reg:CC 17))])] - "!TARGET_64BIT && TARGET_80387" + "!TARGET_128BIT_LONG_DOUBLE && TARGET_80387" "ix86_expand_unary_operator (ABS, XFmode, operands); DONE;") (define_expand "abstf2" @@ -9994,7 +10031,7 @@ [(set (match_operand:XF 0 "nonimmediate_operand" "=f#r,rm#f") (abs:XF (match_operand:XF 1 "nonimmediate_operand" "0,0"))) (clobber (reg:CC 17))] - "!TARGET_64BIT && TARGET_80387 + "!TARGET_128BIT_LONG_DOUBLE && TARGET_80387 && ix86_unary_operator_ok (ABS, XFmode, operands)" "#") @@ -10073,7 +10110,7 @@ (define_insn "*absxf2_1" [(set (match_operand:XF 0 "register_operand" "=f") (abs:XF (match_operand:XF 1 "register_operand" "0")))] - "!TARGET_64BIT && TARGET_80387 && reload_completed" + "!TARGET_128BIT_LONG_DOUBLE && TARGET_80387 && reload_completed" "fabs" [(set_attr "type" "fsgn") (set_attr "mode" "DF")]) @@ -10082,7 +10119,7 @@ [(set (match_operand:XF 0 "register_operand" "=f") (abs:XF (float_extend:XF (match_operand:DF 1 "register_operand" "0"))))] - "!TARGET_64BIT && TARGET_80387" + "!TARGET_128BIT_LONG_DOUBLE && TARGET_80387" "fabs" [(set_attr "type" "fsgn") (set_attr "mode" "XF")]) @@ -10091,7 +10128,7 @@ [(set (match_operand:XF 0 "register_operand" "=f") (abs:XF (float_extend:XF (match_operand:SF 1 "register_operand" "0"))))] - "!TARGET_64BIT && TARGET_80387" + "!TARGET_128BIT_LONG_DOUBLE && TARGET_80387" "fabs" [(set_attr "type" "fsgn") (set_attr "mode" "XF")]) @@ -10151,17 +10188,19 @@ (set_attr "mode" "DI")]) (define_split - [(set (reg 17) - (compare (not:DI (match_operand:DI 1 "nonimmediate_operand" "")) - (const_int 0))) - (set (match_operand:DI 0 "nonimmediate_operand" "") - (not:DI (match_dup 1)))] + [(set (match_operand 0 "flags_reg_operand" "") + (match_operator 2 "compare_operator" + [(not:DI (match_operand:DI 3 "nonimmediate_operand" "")) + (const_int 0)])) + (set (match_operand:DI 1 "nonimmediate_operand" "") + (not:DI (match_dup 3)))] "TARGET_64BIT && ix86_match_ccmode (insn, CCNOmode)" - [(parallel [(set (reg:CCNO 17) - (compare:CCNO (xor:DI (match_dup 1) (const_int -1)) - (const_int 0))) - (set (match_dup 0) - (xor:DI (match_dup 1) (const_int -1)))])] + [(parallel [(set (match_dup 0) + (match_op_dup 2 + [(xor:DI (match_dup 3) (const_int -1)) + (const_int 0)])) + (set (match_dup 1) + (xor:DI (match_dup 3) (const_int -1)))])] "") (define_expand "one_cmplsi2" @@ -10200,17 +10239,18 @@ (set_attr "mode" "SI")]) (define_split - [(set (reg 17) - (compare (not:SI (match_operand:SI 1 "nonimmediate_operand" "")) - (const_int 0))) - (set (match_operand:SI 0 "nonimmediate_operand" "") - (not:SI (match_dup 1)))] + [(set (match_operand 0 "flags_reg_operand" "") + (match_operator 2 "compare_operator" + [(not:SI (match_operand:SI 3 "nonimmediate_operand" "")) + (const_int 0)])) + (set (match_operand:SI 1 "nonimmediate_operand" "") + (not:SI (match_dup 3)))] "ix86_match_ccmode (insn, CCNOmode)" - [(parallel [(set (reg:CCNO 17) - (compare:CCNO (xor:SI (match_dup 1) (const_int -1)) - (const_int 0))) - (set (match_dup 0) - (xor:SI (match_dup 1) (const_int -1)))])] + [(parallel [(set (match_dup 0) + (match_op_dup 2 [(xor:SI (match_dup 3) (const_int -1)) + (const_int 0)])) + (set (match_dup 1) + (xor:SI (match_dup 3) (const_int -1)))])] "") ;; ??? Currently never generated - xor is used instead. @@ -10227,17 +10267,18 @@ (set_attr "mode" "SI")]) (define_split - [(set (reg 17) - (compare (not:SI (match_operand:SI 1 "register_operand" "")) - (const_int 0))) - (set (match_operand:DI 0 "register_operand" "") - (zero_extend:DI (not:SI (match_dup 1))))] + [(set (match_operand 0 "flags_reg_operand" "") + (match_operator 2 "compare_operator" + [(not:SI (match_operand:SI 3 "register_operand" "")) + (const_int 0)])) + (set (match_operand:DI 1 "register_operand" "") + (zero_extend:DI (not:SI (match_dup 3))))] "ix86_match_ccmode (insn, CCNOmode)" - [(parallel [(set (reg:CCNO 17) - (compare:CCNO (xor:SI (match_dup 1) (const_int -1)) - (const_int 0))) - (set (match_dup 0) - (zero_extend:DI (xor:SI (match_dup 1) (const_int -1))))])] + [(parallel [(set (match_dup 0) + (match_op_dup 2 [(xor:SI (match_dup 3) (const_int -1)) + (const_int 0)])) + (set (match_dup 1) + (zero_extend:DI (xor:SI (match_dup 3) (const_int -1))))])] "") (define_expand "one_cmplhi2" @@ -10267,17 +10308,18 @@ (set_attr "mode" "HI")]) (define_split - [(set (reg 17) - (compare (not:HI (match_operand:HI 1 "nonimmediate_operand" "")) - (const_int 0))) - (set (match_operand:HI 0 "nonimmediate_operand" "") - (not:HI (match_dup 1)))] + [(set (match_operand 0 "flags_reg_operand" "") + (match_operator 2 "compare_operator" + [(not:HI (match_operand:HI 3 "nonimmediate_operand" "")) + (const_int 0)])) + (set (match_operand:HI 1 "nonimmediate_operand" "") + (not:HI (match_dup 3)))] "ix86_match_ccmode (insn, CCNOmode)" - [(parallel [(set (reg:CCNO 17) - (compare:CCNO (xor:HI (match_dup 1) (const_int -1)) - (const_int 0))) - (set (match_dup 0) - (xor:HI (match_dup 1) (const_int -1)))])] + [(parallel [(set (match_dup 0) + (match_op_dup 2 [(xor:HI (match_dup 3) (const_int -1)) + (const_int 0)])) + (set (match_dup 1) + (xor:HI (match_dup 3) (const_int -1)))])] "") ;; %%% Potential partial reg stall on alternative 1. What to do? @@ -10310,17 +10352,18 @@ (set_attr "mode" "QI")]) (define_split - [(set (reg 17) - (compare (not:QI (match_operand:QI 1 "nonimmediate_operand" "")) - (const_int 0))) - (set (match_operand:QI 0 "nonimmediate_operand" "") - (not:QI (match_dup 1)))] + [(set (match_operand 0 "flags_reg_operand" "") + (match_operator 2 "compare_operator" + [(not:QI (match_operand:QI 3 "nonimmediate_operand" "")) + (const_int 0)])) + (set (match_operand:QI 1 "nonimmediate_operand" "") + (not:QI (match_dup 3)))] "ix86_match_ccmode (insn, CCNOmode)" - [(parallel [(set (reg:CCNO 17) - (compare:CCNO (xor:QI (match_dup 1) (const_int -1)) - (const_int 0))) - (set (match_dup 0) - (xor:QI (match_dup 1) (const_int -1)))])] + [(parallel [(set (match_dup 0) + (match_op_dup 2 [(xor:QI (match_dup 3) (const_int -1)) + (const_int 0)])) + (set (match_dup 1) + (xor:QI (match_dup 3) (const_int -1)))])] "") ;; Arithmetic shift instructions @@ -12426,10 +12469,10 @@ }) (define_expand "insv" - [(set (zero_extract:SI (match_operand 0 "ext_register_operand" "") - (match_operand:SI 1 "immediate_operand" "") - (match_operand:SI 2 "immediate_operand" "")) - (match_operand:SI 3 "register_operand" ""))] + [(set (zero_extract (match_operand 0 "ext_register_operand" "") + (match_operand 1 "immediate_operand" "") + (match_operand 2 "immediate_operand" "")) + (match_operand 3 "register_operand" ""))] "" { /* Handle extractions from %ah et al. */ @@ -12440,6 +12483,13 @@ matches the predicate, so check it again here. */ if (! register_operand (operands[0], VOIDmode)) FAIL; + + if (TARGET_64BIT) + emit_insn (gen_movdi_insv_1_rex64 (operands[0], operands[3])); + else + emit_insn (gen_movsi_insv_1 (operands[0], operands[3])); + + DONE; }) ;; %%% bts, btr, btc, bt. @@ -12839,9 +12889,9 @@ (set_attr "modrm" "0") (set (attr "length") (if_then_else (and (ge (minus (match_dup 0) (pc)) - (const_int -128)) + (const_int -126)) (lt (minus (match_dup 0) (pc)) - (const_int 124))) + (const_int 128))) (const_int 2) (const_int 6)))]) @@ -12857,9 +12907,9 @@ (set_attr "modrm" "0") (set (attr "length") (if_then_else (and (ge (minus (match_dup 0) (pc)) - (const_int -128)) + (const_int -126)) (lt (minus (match_dup 0) (pc)) - (const_int 124))) + (const_int 128))) (const_int 2) (const_int 6)))]) @@ -13124,9 +13174,9 @@ [(set_attr "type" "ibr") (set (attr "length") (if_then_else (and (ge (minus (match_dup 0) (pc)) - (const_int -128)) + (const_int -126)) (lt (minus (match_dup 0) (pc)) - (const_int 124))) + (const_int 128))) (const_int 2) (const_int 5))) (set_attr "modrm" "0")]) @@ -13250,9 +13300,9 @@ (set (attr "length") (if_then_else (and (eq_attr "alternative" "0") (and (ge (minus (match_dup 0) (pc)) - (const_int -128)) + (const_int -126)) (lt (minus (match_dup 0) (pc)) - (const_int 124)))) + (const_int 128)))) (const_int 2) (const_int 16))) ;; We don't know the type before shorten branches. Optimistically expect @@ -13616,11 +13666,10 @@ "ix86_expand_epilogue (0); DONE;") (define_expand "eh_return" - [(use (match_operand 0 "register_operand" "")) - (use (match_operand 1 "register_operand" ""))] + [(use (match_operand 0 "register_operand" ""))] "" { - rtx tmp, sa = operands[0], ra = operands[1]; + rtx tmp, sa = EH_RETURN_STACKADJ_RTX, ra = operands[0]; /* Tricky bit: we write the address of the handler to which we will be returning into someone else's stack frame, one word below the @@ -13682,7 +13731,7 @@ (define_expand "ffssi2" [(set (match_operand:SI 0 "nonimmediate_operand" "") - (ffs:SI (match_operand:SI 1 "general_operand" "")))] + (ffs:SI (match_operand:SI 1 "nonimmediate_operand" "")))] "" { rtx out = gen_reg_rtx (SImode), tmp = gen_reg_rtx (SImode); @@ -14059,7 +14108,7 @@ (match_operator:XF 3 "binary_fp_operator" [(match_operand:XF 1 "register_operand" "%0") (match_operand:XF 2 "register_operand" "f")]))] - "!TARGET_64BIT && TARGET_80387 + "!TARGET_128BIT_LONG_DOUBLE && TARGET_80387 && GET_RTX_CLASS (GET_CODE (operands[3])) == 'c'" "* return output_387_binary_op (insn, operands);" [(set (attr "type") @@ -14317,7 +14366,7 @@ (match_operator:XF 3 "binary_fp_operator" [(match_operand:XF 1 "register_operand" "0,f") (match_operand:XF 2 "register_operand" "f,0")]))] - "!TARGET_64BIT && TARGET_80387 + "!TARGET_128BIT_LONG_DOUBLE && TARGET_80387 && GET_RTX_CLASS (GET_CODE (operands[3])) != 'c'" "* return output_387_binary_op (insn, operands);" [(set (attr "type") @@ -14351,7 +14400,7 @@ (match_operator:XF 3 "binary_fp_operator" [(float:XF (match_operand:SI 1 "nonimmediate_operand" "m,?r")) (match_operand:XF 2 "register_operand" "0,0")]))] - "!TARGET_64BIT && TARGET_80387 && TARGET_USE_FIOP" + "!TARGET_128BIT_LONG_DOUBLE && TARGET_80387 && TARGET_USE_FIOP" "* return which_alternative ? \"#\" : output_387_binary_op (insn, operands);" [(set (attr "type") (cond [(match_operand:XF 3 "mult_operator" "") @@ -14387,7 +14436,7 @@ (match_operator:XF 3 "binary_fp_operator" [(match_operand:XF 1 "register_operand" "0,0") (float:XF (match_operand:SI 2 "nonimmediate_operand" "m,?r"))]))] - "!TARGET_64BIT && TARGET_80387 && TARGET_USE_FIOP" + "!TARGET_128BIT_LONG_DOUBLE && TARGET_80387 && TARGET_USE_FIOP" "* return which_alternative ? \"#\" : output_387_binary_op (insn, operands);" [(set (attr "type") (cond [(match_operand:XF 3 "mult_operator" "") @@ -14423,7 +14472,7 @@ (match_operator:XF 3 "binary_fp_operator" [(float_extend:XF (match_operand:SF 1 "nonimmediate_operand" "fm,0")) (match_operand:XF 2 "register_operand" "0,f")]))] - "!TARGET_64BIT && TARGET_80387" + "!TARGET_128BIT_LONG_DOUBLE && TARGET_80387" "* return output_387_binary_op (insn, operands);" [(set (attr "type") (cond [(match_operand:XF 3 "mult_operator" "") @@ -14456,7 +14505,7 @@ [(match_operand:XF 1 "register_operand" "0,f") (float_extend:XF (match_operand:SF 2 "nonimmediate_operand" "fm,0"))]))] - "!TARGET_64BIT && TARGET_80387" + "!TARGET_128BIT_LONG_DOUBLE && TARGET_80387" "* return output_387_binary_op (insn, operands);" [(set (attr "type") (cond [(match_operand:XF 3 "mult_operator" "") @@ -14489,7 +14538,7 @@ (match_operator:XF 3 "binary_fp_operator" [(float_extend:XF (match_operand:DF 1 "nonimmediate_operand" "fm,0")) (match_operand:XF 2 "register_operand" "0,f")]))] - "!TARGET_64BIT && TARGET_80387" + "!TARGET_128BIT_LONG_DOUBLE && TARGET_80387" "* return output_387_binary_op (insn, operands);" [(set (attr "type") (cond [(match_operand:XF 3 "mult_operator" "") @@ -14522,7 +14571,7 @@ [(match_operand:XF 1 "register_operand" "0,f") (float_extend:XF (match_operand:DF 2 "nonimmediate_operand" "fm,0"))]))] - "!TARGET_64BIT && TARGET_80387" + "!TARGET_128BIT_LONG_DOUBLE && TARGET_80387" "* return output_387_binary_op (insn, operands);" [(set (attr "type") (cond [(match_operand:XF 3 "mult_operator" "") @@ -14687,7 +14736,7 @@ (define_insn "sqrtxf2" [(set (match_operand:XF 0 "register_operand" "=f") (sqrt:XF (match_operand:XF 1 "register_operand" "0")))] - "!TARGET_64BIT && TARGET_80387 && !TARGET_NO_FANCY_MATH_387 + "!TARGET_128BIT_LONG_DOUBLE && TARGET_80387 && !TARGET_NO_FANCY_MATH_387 && (TARGET_IEEE_FP || flag_unsafe_math_optimizations) " "fsqrt" [(set_attr "type" "fpspc") @@ -14708,7 +14757,7 @@ [(set (match_operand:XF 0 "register_operand" "=f") (sqrt:XF (float_extend:XF (match_operand:DF 1 "register_operand" "0"))))] - "!TARGET_64BIT && TARGET_80387 && !TARGET_NO_FANCY_MATH_387" + "!TARGET_128BIT_LONG_DOUBLE && TARGET_80387 && !TARGET_NO_FANCY_MATH_387" "fsqrt" [(set_attr "type" "fpspc") (set_attr "mode" "XF") @@ -14728,7 +14777,7 @@ [(set (match_operand:XF 0 "register_operand" "=f") (sqrt:XF (float_extend:XF (match_operand:SF 1 "register_operand" "0"))))] - "!TARGET_64BIT && TARGET_80387 && !TARGET_NO_FANCY_MATH_387" + "!TARGET_128BIT_LONG_DOUBLE && TARGET_80387 && !TARGET_NO_FANCY_MATH_387" "fsqrt" [(set_attr "type" "fpspc") (set_attr "mode" "XF") @@ -14776,7 +14825,7 @@ (define_insn "sinxf2" [(set (match_operand:XF 0 "register_operand" "=f") (unspec:XF [(match_operand:XF 1 "register_operand" "0")] UNSPEC_SIN))] - "!TARGET_64BIT && TARGET_80387 && !TARGET_NO_FANCY_MATH_387 + "!TARGET_128BIT_LONG_DOUBLE && TARGET_80387 && !TARGET_NO_FANCY_MATH_387 && flag_unsafe_math_optimizations" "fsin" [(set_attr "type" "fpspc") @@ -14823,7 +14872,7 @@ (define_insn "cosxf2" [(set (match_operand:XF 0 "register_operand" "=f") (unspec:XF [(match_operand:XF 1 "register_operand" "0")] UNSPEC_COS))] - "! TARGET_NO_FANCY_MATH_387 && TARGET_80387 + "!TARGET_128BIT_LONG_DOUBLE && ! TARGET_NO_FANCY_MATH_387 && TARGET_80387 && flag_unsafe_math_optimizations" "fcos" [(set_attr "type" "fpspc") @@ -16056,7 +16105,7 @@ (if_then_else:XF (match_operand 1 "comparison_operator" "") (match_operand:XF 2 "register_operand" "") (match_operand:XF 3 "register_operand" "")))] - "!TARGET_64BIT && TARGET_CMOVE" + "!TARGET_128BIT_LONG_DOUBLE && TARGET_CMOVE" "if (! ix86_expand_fp_movcc (operands)) FAIL; DONE;") (define_expand "movtfcc" @@ -16073,7 +16122,7 @@ [(reg 17) (const_int 0)]) (match_operand:XF 2 "register_operand" "f,0") (match_operand:XF 3 "register_operand" "0,f")))] - "!TARGET_64BIT && TARGET_CMOVE" + "!TARGET_128BIT_LONG_DOUBLE && TARGET_CMOVE" "@ fcmov%F1\t{%2, %0|%0, %2} fcmov%f1\t{%3, %0|%0, %3}" @@ -16241,7 +16290,7 @@ && operands_match_p (operands[2], operands[3])))" [(set (reg:CCFP 17) (compare:CCFP (match_dup 2) - (match_dup 2))) + (match_dup 1))) (set (match_dup 0) (if_then_else:DF (ge (reg:CCFP 17) (const_int 0)) (match_dup 1) @@ -16637,10 +16686,12 @@ (set (subreg:TI (match_dup 0) 0) (ior:TI (subreg:TI (match_dup 6) 0) (subreg:TI (match_dup 7) 0)))] { - /* If op2 == op3, op3 will be clobbered before it is used. - This should be optimized out though. */ + /* If op2 == op3, op3 would be clobbered before it is used. */ if (operands_match_p (operands[2], operands[3])) - abort (); + { + emit_move_insn (operands[0], operands[2]); + DONE; + } PUT_MODE (operands[1], GET_MODE (operands[0])); if (operands_match_p (operands[0], operands[4])) operands[6] = operands[4], operands[7] = operands[2]; @@ -16734,7 +16785,7 @@ (define_split [(set (match_operand 0 "register_operand" "") (if_then_else (match_operator 1 "comparison_operator" - [(match_operand 4 "register_operand" "") + [(match_operand 4 "nonimmediate_operand" "") (match_operand 5 "nonimmediate_operand" "")]) (match_operand 2 "nonmemory_operand" "") (match_operand 3 "nonmemory_operand" "")))] @@ -16746,13 +16797,16 @@ (subreg:TI (match_dup 7) 0)))] { PUT_MODE (operands[1], GET_MODE (operands[0])); - if (!sse_comparison_operator (operands[1], VOIDmode)) + if (!sse_comparison_operator (operands[1], VOIDmode) + || !rtx_equal_p (operands[0], operands[4])) { rtx tmp = operands[5]; operands[5] = operands[4]; operands[4] = tmp; PUT_CODE (operands[1], swap_condition (GET_CODE (operands[1]))); } + if (!rtx_equal_p (operands[0], operands[4])) + abort (); if (const0_operand (operands[2], GET_MODE (operands[0]))) { operands[7] = operands[3]; @@ -16853,51 +16907,61 @@ operands[2] = gen_lowpart (SImode, operands[2]); PUT_MODE (operands[3], SImode);") +; Promote the QImode tests, as i386 has encoding of the AND +; instruction with 32-bit sign-extended immediate and thus the +; instruction size is unchanged, except in the %eax case for +; which it is increased by one byte, hence the ! optimize_size. (define_split - [(set (reg 17) - (compare (and (match_operand 1 "aligned_operand" "") - (match_operand 2 "const_int_operand" "")) - (const_int 0))) - (set (match_operand 0 "register_operand" "") - (and (match_dup 1) (match_dup 2)))] + [(set (match_operand 0 "flags_reg_operand" "") + (match_operator 2 "compare_operator" + [(and (match_operand 3 "aligned_operand" "") + (match_operand 4 "const_int_operand" "")) + (const_int 0)])) + (set (match_operand 1 "register_operand" "") + (and (match_dup 3) (match_dup 4)))] "! TARGET_PARTIAL_REG_STALL && reload_completed - && ix86_match_ccmode (insn, CCNOmode) - && (GET_MODE (operands[0]) == HImode - || (GET_MODE (operands[0]) == QImode - /* Ensure that the operand will remain sign extended immediate. */ - && INTVAL (operands[2]) >= 0 - && (TARGET_PROMOTE_QImode || optimize_size)))" - [(parallel [(set (reg:CCNO 17) - (compare:CCNO (and:SI (match_dup 1) (match_dup 2)) - (const_int 0))) - (set (match_dup 0) - (and:SI (match_dup 1) (match_dup 2)))])] - "operands[2] - = gen_int_mode (INTVAL (operands[2]) - & GET_MODE_MASK (GET_MODE (operands[0])), - SImode); - operands[0] = gen_lowpart (SImode, operands[0]); - operands[1] = gen_lowpart (SImode, operands[1]);") + /* Ensure that the operand will remain sign-extended immediate. */ + && ix86_match_ccmode (insn, INTVAL (operands[4]) >= 0 ? CCNOmode : CCZmode) + && ! optimize_size + && ((GET_MODE (operands[1]) == HImode && ! TARGET_FAST_PREFIX) + || (GET_MODE (operands[1]) == QImode && TARGET_PROMOTE_QImode))" + [(parallel [(set (match_dup 0) + (match_op_dup 2 [(and:SI (match_dup 3) (match_dup 4)) + (const_int 0)])) + (set (match_dup 1) + (and:SI (match_dup 3) (match_dup 4)))])] +{ + operands[4] + = gen_int_mode (INTVAL (operands[4]) + & GET_MODE_MASK (GET_MODE (operands[1])), SImode); + operands[1] = gen_lowpart (SImode, operands[1]); + operands[3] = gen_lowpart (SImode, operands[3]); +}) -; Don't promote the QImode tests, as i386 don't have encoding of -; the test instruction with 32bit sign extended immediate and thus -; the code grows. +; Don't promote the QImode tests, as i386 doesn't have encoding of +; the TEST instruction with 32-bit sign-extended immediate and thus +; the instruction size would at least double, which is not what we +; want even with ! optimize_size. (define_split - [(set (reg 17) - (compare (and (match_operand:HI 0 "aligned_operand" "") - (match_operand:HI 1 "const_int_operand" "")) - (const_int 0)))] + [(set (match_operand 0 "flags_reg_operand" "") + (match_operator 1 "compare_operator" + [(and (match_operand:HI 2 "aligned_operand" "") + (match_operand:HI 3 "const_int_operand" "")) + (const_int 0)]))] "! TARGET_PARTIAL_REG_STALL && reload_completed - && ix86_match_ccmode (insn, CCNOmode) - && GET_MODE (operands[0]) == HImode" - [(set (reg:CCNO 17) - (compare:CCNO (and:SI (match_dup 0) (match_dup 1)) - (const_int 0)))] - "operands[1] - = gen_int_mode (INTVAL (operands[1]) - & GET_MODE_MASK (GET_MODE (operands[0])), - SImode); - operands[0] = gen_lowpart (SImode, operands[0]);") + /* Ensure that the operand will remain sign-extended immediate. */ + && ix86_match_ccmode (insn, INTVAL (operands[3]) >= 0 ? CCNOmode : CCZmode) + && ! TARGET_FAST_PREFIX + && ! optimize_size" + [(set (match_dup 0) + (match_op_dup 1 [(and:SI (match_dup 2) (match_dup 3)) + (const_int 0)]))] +{ + operands[3] + = gen_int_mode (INTVAL (operands[3]) + & GET_MODE_MASK (GET_MODE (operands[2])), SImode); + operands[2] = gen_lowpart (SImode, operands[2]); +}) (define_split [(set (match_operand 0 "register_operand" "") @@ -17070,13 +17134,14 @@ ;; Don't compare memory with zero, load and use a test instead. (define_peephole2 - [(set (reg 17) - (compare (match_operand:SI 0 "memory_operand" "") - (const_int 0))) + [(set (match_operand 0 "flags_reg_operand" "") + (match_operator 1 "compare_operator" + [(match_operand:SI 2 "memory_operand" "") + (const_int 0)])) (match_scratch:SI 3 "r")] "ix86_match_ccmode (insn, CCNOmode) && ! optimize_size" - [(set (match_dup 3) (match_dup 0)) - (set (reg:CCNO 17) (compare:CCNO (match_dup 3) (const_int 0)))] + [(set (match_dup 3) (match_dup 2)) + (set (match_dup 0) (match_op_dup 1 [(match_dup 3) (const_int 0)]))] "") ;; NOT is not pairable on Pentium, while XOR is, but one byte longer. @@ -17140,77 +17205,77 @@ ;; versions if we're concerned about partial register stalls. (define_peephole2 - [(set (reg 17) - (compare (and:SI (match_operand:SI 0 "register_operand" "") - (match_operand:SI 1 "immediate_operand" "")) - (const_int 0)))] + [(set (match_operand 0 "flags_reg_operand" "") + (match_operator 1 "compare_operator" + [(and:SI (match_operand:SI 2 "register_operand" "") + (match_operand:SI 3 "immediate_operand" "")) + (const_int 0)]))] "ix86_match_ccmode (insn, CCNOmode) - && (true_regnum (operands[0]) != 0 - || (GET_CODE (operands[1]) == CONST_INT - && CONST_OK_FOR_LETTER_P (INTVAL (operands[1]), 'K'))) - && find_regno_note (insn, REG_DEAD, true_regnum (operands[0]))" + && (true_regnum (operands[2]) != 0 + || (GET_CODE (operands[3]) == CONST_INT + && CONST_OK_FOR_LETTER_P (INTVAL (operands[3]), 'K'))) + && peep2_reg_dead_p (1, operands[2])" [(parallel - [(set (reg:CCNO 17) - (compare:CCNO (and:SI (match_dup 0) - (match_dup 1)) - (const_int 0))) - (set (match_dup 0) - (and:SI (match_dup 0) (match_dup 1)))])] + [(set (match_dup 0) + (match_op_dup 1 [(and:SI (match_dup 2) (match_dup 3)) + (const_int 0)])) + (set (match_dup 2) + (and:SI (match_dup 2) (match_dup 3)))])] "") ;; We don't need to handle HImode case, because it will be promoted to SImode ;; on ! TARGET_PARTIAL_REG_STALL (define_peephole2 - [(set (reg 17) - (compare (and:QI (match_operand:QI 0 "register_operand" "") - (match_operand:QI 1 "immediate_operand" "")) - (const_int 0)))] + [(set (match_operand 0 "flags_reg_operand" "") + (match_operator 1 "compare_operator" + [(and:QI (match_operand:QI 2 "register_operand" "") + (match_operand:QI 3 "immediate_operand" "")) + (const_int 0)]))] "! TARGET_PARTIAL_REG_STALL && ix86_match_ccmode (insn, CCNOmode) - && true_regnum (operands[0]) != 0 - && find_regno_note (insn, REG_DEAD, true_regnum (operands[0]))" + && true_regnum (operands[2]) != 0 + && peep2_reg_dead_p (1, operands[2])" [(parallel - [(set (reg:CCNO 17) - (compare:CCNO (and:QI (match_dup 0) - (match_dup 1)) - (const_int 0))) - (set (match_dup 0) - (and:QI (match_dup 0) (match_dup 1)))])] + [(set (match_dup 0) + (match_op_dup 1 [(and:QI (match_dup 2) (match_dup 3)) + (const_int 0)])) + (set (match_dup 2) + (and:QI (match_dup 2) (match_dup 3)))])] "") (define_peephole2 - [(set (reg 17) - (compare - (and:SI - (zero_extract:SI - (match_operand 0 "ext_register_operand" "") - (const_int 8) - (const_int 8)) - (match_operand 1 "const_int_operand" "")) - (const_int 0)))] + [(set (match_operand 0 "flags_reg_operand" "") + (match_operator 1 "compare_operator" + [(and:SI + (zero_extract:SI + (match_operand 2 "ext_register_operand" "") + (const_int 8) + (const_int 8)) + (match_operand 3 "const_int_operand" "")) + (const_int 0)]))] "! TARGET_PARTIAL_REG_STALL && ix86_match_ccmode (insn, CCNOmode) - && true_regnum (operands[0]) != 0 - && find_regno_note (insn, REG_DEAD, true_regnum (operands[0]))" - [(parallel [(set (reg:CCNO 17) - (compare:CCNO - (and:SI - (zero_extract:SI - (match_dup 0) - (const_int 8) - (const_int 8)) - (match_dup 1)) - (const_int 0))) - (set (zero_extract:SI (match_dup 0) + && true_regnum (operands[2]) != 0 + && peep2_reg_dead_p (1, operands[2])" + [(parallel [(set (match_dup 0) + (match_op_dup 1 + [(and:SI + (zero_extract:SI + (match_dup 2) + (const_int 8) + (const_int 8)) + (match_dup 3)) + (const_int 0)])) + (set (zero_extract:SI (match_dup 2) (const_int 8) (const_int 8)) (and:SI (zero_extract:SI - (match_dup 0) + (match_dup 2) (const_int 8) (const_int 8)) - (match_dup 1)))])] + (match_dup 3)))])] "") ;; Don't do logical operations with memory inputs. @@ -17512,66 +17577,20 @@ "") ;; Convert compares with 1 to shorter inc/dec operations when CF is not -;; required and register dies. +;; required and register dies. Similarly for 128 to plus -128. (define_peephole2 - [(set (reg 17) - (compare (match_operand:SI 0 "register_operand" "") - (match_operand:SI 1 "incdec_operand" "")))] - "ix86_match_ccmode (insn, CCGCmode) - && find_regno_note (insn, REG_DEAD, true_regnum (operands[0]))" - [(parallel [(set (reg:CCGC 17) - (compare:CCGC (match_dup 0) - (match_dup 1))) - (clobber (match_dup 0))])] - "") - -(define_peephole2 - [(set (reg 17) - (compare (match_operand:HI 0 "register_operand" "") - (match_operand:HI 1 "incdec_operand" "")))] - "ix86_match_ccmode (insn, CCGCmode) - && find_regno_note (insn, REG_DEAD, true_regnum (operands[0]))" - [(parallel [(set (reg:CCGC 17) - (compare:CCGC (match_dup 0) - (match_dup 1))) - (clobber (match_dup 0))])] - "") - -(define_peephole2 - [(set (reg 17) - (compare (match_operand:QI 0 "register_operand" "") - (match_operand:QI 1 "incdec_operand" "")))] - "ix86_match_ccmode (insn, CCGCmode) - && find_regno_note (insn, REG_DEAD, true_regnum (operands[0]))" - [(parallel [(set (reg:CCGC 17) - (compare:CCGC (match_dup 0) - (match_dup 1))) - (clobber (match_dup 0))])] - "") - -;; Convert compares with 128 to shorter add -128 -(define_peephole2 - [(set (reg 17) - (compare (match_operand:SI 0 "register_operand" "") - (const_int 128)))] - "ix86_match_ccmode (insn, CCGCmode) - && find_regno_note (insn, REG_DEAD, true_regnum (operands[0]))" - [(parallel [(set (reg:CCGC 17) - (compare:CCGC (match_dup 0) - (const_int 128))) - (clobber (match_dup 0))])] - "") - -(define_peephole2 - [(set (reg 17) - (compare (match_operand:HI 0 "register_operand" "") - (const_int 128)))] - "ix86_match_ccmode (insn, CCGCmode) - && find_regno_note (insn, REG_DEAD, true_regnum (operands[0]))" - [(parallel [(set (reg:CCGC 17) - (compare:CCGC (match_dup 0) - (const_int 128))) - (clobber (match_dup 0))])] + [(set (match_operand 0 "flags_reg_operand" "") + (match_operator 1 "compare_operator" + [(match_operand 2 "register_operand" "") + (match_operand 3 "const_int_operand" "")]))] + "(INTVAL (operands[3]) == -1 + || INTVAL (operands[3]) == 1 + || INTVAL (operands[3]) == 128) + && ix86_match_ccmode (insn, CCGCmode) + && peep2_reg_dead_p (1, operands[2])" + [(parallel [(set (match_dup 0) + (match_op_dup 1 [(match_dup 2) (match_dup 3)])) + (clobber (match_dup 2))])] "") (define_peephole2 @@ -17769,9 +17788,9 @@ return "call\t%P1"; } if (SIBLING_CALL_P (insn)) - return "jmp\t%*%1"; + return "jmp\t%A1"; else - return "call\t%*%1"; + return "call\t%A1"; } [(set_attr "type" "callv")]) @@ -17848,67 +17867,92 @@ ;; Moves for SSE/MMX regs. (define_insn "movv4sf_internal" - [(set (match_operand:V4SF 0 "nonimmediate_operand" "=x,m") - (match_operand:V4SF 1 "nonimmediate_operand" "xm,x"))] + [(set (match_operand:V4SF 0 "nonimmediate_operand" "=x,x,m") + (match_operand:V4SF 1 "vector_move_operand" "C,xm,x"))] "TARGET_SSE" ;; @@@ let's try to use movaps here. - "movaps\t{%1, %0|%0, %1}" + "@ + xorps\t%0, %0 + movaps\t{%1, %0|%0, %1} + movaps\t{%1, %0|%0, %1}" [(set_attr "type" "ssemov") (set_attr "mode" "V4SF")]) (define_insn "movv4si_internal" - [(set (match_operand:V4SI 0 "nonimmediate_operand" "=x,m") - (match_operand:V4SI 1 "nonimmediate_operand" "xm,x"))] + [(set (match_operand:V4SI 0 "nonimmediate_operand" "=x,x,m") + (match_operand:V4SI 1 "vector_move_operand" "C,xm,x"))] "TARGET_SSE" ;; @@@ let's try to use movaps here. - "movaps\t{%1, %0|%0, %1}" + "@ + xorps\t%0, %0 + movaps\t{%1, %0|%0, %1} + movaps\t{%1, %0|%0, %1}" [(set_attr "type" "ssemov") (set_attr "mode" "V4SF")]) (define_insn "movv2di_internal" - [(set (match_operand:V2DI 0 "nonimmediate_operand" "=x,m") - (match_operand:V2DI 1 "nonimmediate_operand" "xm,x"))] + [(set (match_operand:V2DI 0 "nonimmediate_operand" "=x,x,m") + (match_operand:V2DI 1 "vector_move_operand" "C,xm,x"))] "TARGET_SSE" ;; @@@ let's try to use movaps here. - "movdqa\t{%1, %0|%0, %1}" + "@ + pxor\t%0, %0 + movdqa\t{%1, %0|%0, %1} + movdqa\t{%1, %0|%0, %1}" [(set_attr "type" "ssemov") (set_attr "mode" "V4SF")]) (define_insn "movv8qi_internal" - [(set (match_operand:V8QI 0 "nonimmediate_operand" "=y,m") - (match_operand:V8QI 1 "nonimmediate_operand" "ym,y"))] - "TARGET_MMX" - "movq\t{%1, %0|%0, %1}" + [(set (match_operand:V8QI 0 "nonimmediate_operand" "=y,y,m") + (match_operand:V8QI 1 "vector_move_operand" "C,ym,y"))] + "TARGET_MMX + && (GET_CODE (operands[0]) != MEM || GET_CODE (operands[1]) != MEM)" + "@ + pxor\t%0, %0 + movq\t{%1, %0|%0, %1} + movq\t{%1, %0|%0, %1}" [(set_attr "type" "mmxmov") (set_attr "mode" "DI")]) (define_insn "movv4hi_internal" - [(set (match_operand:V4HI 0 "nonimmediate_operand" "=y,m") - (match_operand:V4HI 1 "nonimmediate_operand" "ym,y"))] - "TARGET_MMX" - "movq\t{%1, %0|%0, %1}" + [(set (match_operand:V4HI 0 "nonimmediate_operand" "=y,y,m") + (match_operand:V4HI 1 "vector_move_operand" "C,ym,y"))] + "TARGET_MMX + && (GET_CODE (operands[0]) != MEM || GET_CODE (operands[1]) != MEM)" + "@ + pxor\t%0, %0 + movq\t{%1, %0|%0, %1} + movq\t{%1, %0|%0, %1}" [(set_attr "type" "mmxmov") (set_attr "mode" "DI")]) (define_insn "movv2si_internal" - [(set (match_operand:V2SI 0 "nonimmediate_operand" "=y,m") - (match_operand:V2SI 1 "nonimmediate_operand" "ym,y"))] - "TARGET_MMX" - "movq\t{%1, %0|%0, %1}" + [(set (match_operand:V2SI 0 "nonimmediate_operand" "=y,y,m") + (match_operand:V2SI 1 "vector_move_operand" "C,ym,y"))] + "TARGET_MMX + && (GET_CODE (operands[0]) != MEM || GET_CODE (operands[1]) != MEM)" + "@ + pxor\t%0, %0 + movq\t{%1, %0|%0, %1} + movq\t{%1, %0|%0, %1}" [(set_attr "type" "mmxcvt") (set_attr "mode" "DI")]) (define_insn "movv2sf_internal" - [(set (match_operand:V2SF 0 "nonimmediate_operand" "=y,m") - (match_operand:V2SF 1 "nonimmediate_operand" "ym,y"))] - "TARGET_3DNOW" - "movq\\t{%1, %0|%0, %1}" + [(set (match_operand:V2SF 0 "nonimmediate_operand" "=y,y,m") + (match_operand:V2SF 1 "vector_move_operand" "C,ym,y"))] + "TARGET_3DNOW + && (GET_CODE (operands[0]) != MEM || GET_CODE (operands[1]) != MEM)" + "@ + pxor\t%0, %0 + movq\t{%1, %0|%0, %1} + movq\t{%1, %0|%0, %1}" [(set_attr "type" "mmxcvt") (set_attr "mode" "DI")]) (define_expand "movti" - [(set (match_operand:TI 0 "general_operand" "") - (match_operand:TI 1 "general_operand" ""))] + [(set (match_operand:TI 0 "nonimmediate_operand" "") + (match_operand:TI 1 "nonimmediate_operand" ""))] "TARGET_SSE || TARGET_64BIT" { if (TARGET_64BIT) @@ -17919,35 +17963,44 @@ }) (define_insn "movv2df_internal" - [(set (match_operand:V2DF 0 "nonimmediate_operand" "=x,m") - (match_operand:V2DF 1 "nonimmediate_operand" "xm,x"))] - "TARGET_SSE2" - ;; @@@ let's try to use movaps here. - "movapd\t{%1, %0|%0, %1}" + [(set (match_operand:V2DF 0 "nonimmediate_operand" "=x,x,m") + (match_operand:V2DF 1 "vector_move_operand" "C,xm,x"))] + "TARGET_SSE2 + && (GET_CODE (operands[0]) != MEM || GET_CODE (operands[1]) != MEM)" + "@ + xorpd\t%0, %0 + movapd\t{%1, %0|%0, %1} + movapd\t{%1, %0|%0, %1}" [(set_attr "type" "ssemov") (set_attr "mode" "V2DF")]) (define_insn "movv8hi_internal" - [(set (match_operand:V8HI 0 "nonimmediate_operand" "=x,m") - (match_operand:V8HI 1 "nonimmediate_operand" "xm,x"))] - "TARGET_SSE2" - ;; @@@ let's try to use movaps here. - "movaps\t{%1, %0|%0, %1}" + [(set (match_operand:V8HI 0 "nonimmediate_operand" "=x,x,m") + (match_operand:V8HI 1 "vector_move_operand" "C,xm,x"))] + "TARGET_SSE2 + && (GET_CODE (operands[0]) != MEM || GET_CODE (operands[1]) != MEM)" + "@ + xorps\t%0, %0 + movaps\t{%1, %0|%0, %1} + movaps\t{%1, %0|%0, %1}" [(set_attr "type" "ssemov") (set_attr "mode" "V4SF")]) (define_insn "movv16qi_internal" - [(set (match_operand:V16QI 0 "nonimmediate_operand" "=x,m") - (match_operand:V16QI 1 "nonimmediate_operand" "xm,x"))] - "TARGET_SSE2" - ;; @@@ let's try to use movaps here. - "movaps\t{%1, %0|%0, %1}" + [(set (match_operand:V16QI 0 "nonimmediate_operand" "=x,x,m") + (match_operand:V16QI 1 "vector_move_operand" "C,xm,x"))] + "TARGET_SSE2 + && (GET_CODE (operands[0]) != MEM || GET_CODE (operands[1]) != MEM)" + "@ + xorps\t%0, %0 + movaps\t{%1, %0|%0, %1} + movaps\t{%1, %0|%0, %1}" [(set_attr "type" "ssemov") (set_attr "mode" "V4SF")]) (define_expand "movv2df" - [(set (match_operand:V2DF 0 "general_operand" "") - (match_operand:V2DF 1 "general_operand" ""))] + [(set (match_operand:V2DF 0 "nonimmediate_operand" "") + (match_operand:V2DF 1 "nonimmediate_operand" ""))] "TARGET_SSE2" { ix86_expand_vector_move (V2DFmode, operands); @@ -17955,8 +18008,8 @@ }) (define_expand "movv8hi" - [(set (match_operand:V8HI 0 "general_operand" "") - (match_operand:V8HI 1 "general_operand" ""))] + [(set (match_operand:V8HI 0 "nonimmediate_operand" "") + (match_operand:V8HI 1 "nonimmediate_operand" ""))] "TARGET_SSE2" { ix86_expand_vector_move (V8HImode, operands); @@ -17964,8 +18017,8 @@ }) (define_expand "movv16qi" - [(set (match_operand:V16QI 0 "general_operand" "") - (match_operand:V16QI 1 "general_operand" ""))] + [(set (match_operand:V16QI 0 "nonimmediate_operand" "") + (match_operand:V16QI 1 "nonimmediate_operand" ""))] "TARGET_SSE2" { ix86_expand_vector_move (V16QImode, operands); @@ -17973,8 +18026,8 @@ }) (define_expand "movv4sf" - [(set (match_operand:V4SF 0 "general_operand" "") - (match_operand:V4SF 1 "general_operand" ""))] + [(set (match_operand:V4SF 0 "nonimmediate_operand" "") + (match_operand:V4SF 1 "nonimmediate_operand" ""))] "TARGET_SSE" { ix86_expand_vector_move (V4SFmode, operands); @@ -17982,8 +18035,8 @@ }) (define_expand "movv4si" - [(set (match_operand:V4SI 0 "general_operand" "") - (match_operand:V4SI 1 "general_operand" ""))] + [(set (match_operand:V4SI 0 "nonimmediate_operand" "") + (match_operand:V4SI 1 "nonimmediate_operand" ""))] "TARGET_SSE" { ix86_expand_vector_move (V4SImode, operands); @@ -17991,8 +18044,8 @@ }) (define_expand "movv2di" - [(set (match_operand:V2DI 0 "general_operand" "") - (match_operand:V2DI 1 "general_operand" ""))] + [(set (match_operand:V2DI 0 "nonimmediate_operand" "") + (match_operand:V2DI 1 "nonimmediate_operand" ""))] "TARGET_SSE" { ix86_expand_vector_move (V2DImode, operands); @@ -18000,8 +18053,8 @@ }) (define_expand "movv2si" - [(set (match_operand:V2SI 0 "general_operand" "") - (match_operand:V2SI 1 "general_operand" ""))] + [(set (match_operand:V2SI 0 "nonimmediate_operand" "") + (match_operand:V2SI 1 "nonimmediate_operand" ""))] "TARGET_MMX" { ix86_expand_vector_move (V2SImode, operands); @@ -18009,8 +18062,8 @@ }) (define_expand "movv4hi" - [(set (match_operand:V4HI 0 "general_operand" "") - (match_operand:V4HI 1 "general_operand" ""))] + [(set (match_operand:V4HI 0 "nonimmediate_operand" "") + (match_operand:V4HI 1 "nonimmediate_operand" ""))] "TARGET_MMX" { ix86_expand_vector_move (V4HImode, operands); @@ -18018,8 +18071,8 @@ }) (define_expand "movv8qi" - [(set (match_operand:V8QI 0 "general_operand" "") - (match_operand:V8QI 1 "general_operand" ""))] + [(set (match_operand:V8QI 0 "nonimmediate_operand" "") + (match_operand:V8QI 1 "nonimmediate_operand" ""))] "TARGET_MMX" { ix86_expand_vector_move (V8QImode, operands); @@ -18027,14 +18080,97 @@ }) (define_expand "movv2sf" - [(set (match_operand:V2SF 0 "general_operand" "") - (match_operand:V2SF 1 "general_operand" ""))] + [(set (match_operand:V2SF 0 "nonimmediate_operand" "") + (match_operand:V2SF 1 "nonimmediate_operand" ""))] "TARGET_3DNOW" { ix86_expand_vector_move (V2SFmode, operands); DONE; }) +(define_insn "*pushv2df" + [(set (match_operand:V2DF 0 "push_operand" "=<") + (match_operand:V2DF 1 "register_operand" "x"))] + "TARGET_SSE" + "#") + +(define_insn "*pushv2di" + [(set (match_operand:V2DI 0 "push_operand" "=<") + (match_operand:V2DI 1 "register_operand" "x"))] + "TARGET_SSE2" + "#") + +(define_insn "*pushv8hi" + [(set (match_operand:V8HI 0 "push_operand" "=<") + (match_operand:V8HI 1 "register_operand" "x"))] + "TARGET_SSE2" + "#") + +(define_insn "*pushv16qi" + [(set (match_operand:V16QI 0 "push_operand" "=<") + (match_operand:V16QI 1 "register_operand" "x"))] + "TARGET_SSE2" + "#") + +(define_insn "*pushv4sf" + [(set (match_operand:V4SF 0 "push_operand" "=<") + (match_operand:V4SF 1 "register_operand" "x"))] + "TARGET_SSE" + "#") + +(define_insn "*pushv4si" + [(set (match_operand:V4SI 0 "push_operand" "=<") + (match_operand:V4SI 1 "register_operand" "x"))] + "TARGET_SSE2" + "#") + +(define_insn "*pushv2si" + [(set (match_operand:V2SI 0 "push_operand" "=<") + (match_operand:V2SI 1 "register_operand" "y"))] + "TARGET_MMX" + "#") + +(define_insn "*pushv4hi" + [(set (match_operand:V4HI 0 "push_operand" "=<") + (match_operand:V4HI 1 "register_operand" "y"))] + "TARGET_MMX" + "#") + +(define_insn "*pushv8qi" + [(set (match_operand:V8QI 0 "push_operand" "=<") + (match_operand:V8QI 1 "register_operand" "y"))] + "TARGET_MMX" + "#") + +(define_insn "*pushv2sf" + [(set (match_operand:V2SF 0 "push_operand" "=<") + (match_operand:V2SF 1 "register_operand" "y"))] + "TARGET_3DNOW" + "#") + +(define_split + [(set (match_operand 0 "push_operand" "") + (match_operand 1 "register_operand" ""))] + "!TARGET_64BIT && reload_completed + && (SSE_REG_P (operands[1]) || MMX_REG_P (operands[1]))" + [(set (reg:SI 7) (plus:SI (reg:SI 7) (match_dup 3))) + (set (match_dup 2) (match_dup 1))] + "operands[2] = change_address (operands[0], GET_MODE (operands[0]), + stack_pointer_rtx); + operands[3] = GEN_INT (-GET_MODE_SIZE (GET_MODE (operands[0])));") + +(define_split + [(set (match_operand 0 "push_operand" "") + (match_operand 1 "register_operand" ""))] + "TARGET_64BIT && reload_completed + && (SSE_REG_P (operands[1]) || MMX_REG_P (operands[1]))" + [(set (reg:DI 7) (plus:DI (reg:DI 7) (match_dup 3))) + (set (match_dup 2) (match_dup 1))] + "operands[2] = change_address (operands[0], GET_MODE (operands[0]), + stack_pointer_rtx); + operands[3] = GEN_INT (-GET_MODE_SIZE (GET_MODE (operands[0])));") + + (define_insn_and_split "*pushti" [(set (match_operand:TI 0 "push_operand" "=<") (match_operand:TI 1 "nonmemory_operand" "x"))] @@ -18158,8 +18294,9 @@ (define_insn "movti_internal" [(set (match_operand:TI 0 "nonimmediate_operand" "=x,x,m") - (match_operand:TI 1 "general_operand" "C,xm,x"))] - "TARGET_SSE && !TARGET_64BIT" + (match_operand:TI 1 "vector_move_operand" "C,xm,x"))] + "TARGET_SSE && !TARGET_64BIT + && (GET_CODE (operands[0]) != MEM || GET_CODE (operands[1]) != MEM)" "@ xorps\t%0, %0 movaps\t{%1, %0|%0, %1} @@ -18169,7 +18306,7 @@ (define_insn "*movti_rex64" [(set (match_operand:TI 0 "nonimmediate_operand" "=r,o,x,mx,x") - (match_operand:TI 1 "general_operand" "riFo,riF,O,x,m"))] + (match_operand:TI 1 "general_operand" "riFo,riF,C,x,m"))] "TARGET_64BIT && (GET_CODE (operands[0]) != MEM || GET_CODE (operands[1]) != MEM)" "@ @@ -18191,29 +18328,56 @@ ;; These two patterns are useful for specifying exactly whether to use ;; movaps or movups -(define_insn "sse_movaps" +(define_expand "sse_movaps" + [(set (match_operand:V4SF 0 "nonimmediate_operand" "") + (unspec:V4SF [(match_operand:V4SF 1 "nonimmediate_operand" "")] + UNSPEC_MOVA))] + "TARGET_SSE" +{ + if (GET_CODE (operands[0]) == MEM && GET_CODE (operands[1]) == MEM) + { + rtx tmp = gen_reg_rtx (V4SFmode); + emit_insn (gen_sse_movaps (tmp, operands[1])); + emit_move_insn (operands[0], tmp); + DONE; + } +}) + +(define_insn "*sse_movaps_1" [(set (match_operand:V4SF 0 "nonimmediate_operand" "=x,m") (unspec:V4SF [(match_operand:V4SF 1 "nonimmediate_operand" "xm,x")] UNSPEC_MOVA))] - "TARGET_SSE" - "@ - movaps\t{%1, %0|%0, %1} - movaps\t{%1, %0|%0, %1}" + "TARGET_SSE + && (GET_CODE (operands[0]) != MEM || GET_CODE (operands[1]) != MEM)" + "movaps\t{%1, %0|%0, %1}" [(set_attr "type" "ssemov,ssemov") (set_attr "mode" "V4SF")]) -(define_insn "sse_movups" +(define_expand "sse_movups" + [(set (match_operand:V4SF 0 "nonimmediate_operand" "") + (unspec:V4SF [(match_operand:V4SF 1 "nonimmediate_operand" "")] + UNSPEC_MOVU))] + "TARGET_SSE" +{ + if (GET_CODE (operands[0]) == MEM && GET_CODE (operands[1]) == MEM) + { + rtx tmp = gen_reg_rtx (V4SFmode); + emit_insn (gen_sse_movups (tmp, operands[1])); + emit_move_insn (operands[0], tmp); + DONE; + } +}) + +(define_insn "*sse_movups_1" [(set (match_operand:V4SF 0 "nonimmediate_operand" "=x,m") (unspec:V4SF [(match_operand:V4SF 1 "nonimmediate_operand" "xm,x")] UNSPEC_MOVU))] - "TARGET_SSE" - "@ - movups\t{%1, %0|%0, %1} - movups\t{%1, %0|%0, %1}" + "TARGET_SSE + && (GET_CODE (operands[0]) != MEM || GET_CODE (operands[1]) != MEM)" + "movups\t{%1, %0|%0, %1}" [(set_attr "type" "ssecvt,ssecvt") (set_attr "mode" "V4SF")]) - ;; SSE Strange Moves. (define_insn "sse_movmskps" @@ -18329,11 +18493,21 @@ [(set_attr "type" "ssecvt") (set_attr "mode" "V4SF")]) -(define_insn "sse_loadss" +(define_expand "sse_loadss" + [(match_operand:V4SF 0 "register_operand" "") + (match_operand:SF 1 "memory_operand" "")] + "TARGET_SSE" +{ + emit_insn (gen_sse_loadss_1 (operands[0], operands[1], + CONST0_RTX (V4SFmode))); + DONE; +}) + +(define_insn "sse_loadss_1" [(set (match_operand:V4SF 0 "register_operand" "=x") (vec_merge:V4SF - (match_operand:V4SF 1 "memory_operand" "m") - (vec_duplicate:V4SF (float:SF (const_int 0))) + (vec_duplicate:V4SF (match_operand:SF 1 "memory_operand" "m")) + (match_operand:V4SF 2 "const0_operand" "X") (const_int 1)))] "TARGET_SSE" "movss\t{%1, %0|%0, %1}" @@ -18804,7 +18978,7 @@ (define_insn "sse2_nandv2di3" [(set (match_operand:V2DI 0 "register_operand" "=x") - (and:V2DI (not:V2DI (match_operand:V2DI 1 "nonimmediate_operand" "%0")) + (and:V2DI (not:V2DI (match_operand:V2DI 1 "nonimmediate_operand" "0")) (match_operand:V2DI 2 "nonimmediate_operand" "xm")))] "TARGET_SSE2 && (GET_CODE (operands[1]) != MEM || GET_CODE (operands[2]) != MEM)" @@ -18908,7 +19082,7 @@ (match_operator:V4SI 3 "sse_comparison_operator" [(match_operand:V4SF 1 "register_operand" "0") (match_operand:V4SF 2 "register_operand" "x")]) - (match_dup 1) + (subreg:V4SI (match_dup 1) 0) (const_int 1)))] "TARGET_SSE" "cmp%D3ss\t{%2, %0|%0, %2}" @@ -19093,6 +19267,19 @@ [(set_attr "type" "ssecvt") (set_attr "mode" "SF")]) +(define_insn "cvtsi2ssq" + [(set (match_operand:V4SF 0 "register_operand" "=x,x") + (vec_merge:V4SF + (match_operand:V4SF 1 "register_operand" "0,0") + (vec_duplicate:V4SF + (float:SF (match_operand:DI 2 "nonimmediate_operand" "r,rm"))) + (const_int 14)))] + "TARGET_SSE && TARGET_64BIT" + "cvtsi2ssq\t{%2, %0|%0, %2}" + [(set_attr "type" "ssecvt") + (set_attr "athlon_decode" "vector,vector") + (set_attr "mode" "SF")]) + (define_insn "cvtss2si" [(set (match_operand:SI 0 "register_operand" "=r") (vec_select:SI @@ -19103,6 +19290,17 @@ [(set_attr "type" "ssecvt") (set_attr "mode" "SF")]) +(define_insn "cvtss2siq" + [(set (match_operand:DI 0 "register_operand" "=r,r") + (vec_select:DI + (fix:V4DI (match_operand:V4SF 1 "nonimmediate_operand" "x,m")) + (parallel [(const_int 0)])))] + "TARGET_SSE" + "cvtss2siq\t{%1, %0|%0, %1}" + [(set_attr "type" "ssecvt") + (set_attr "athlon_decode" "vector,vector") + (set_attr "mode" "SF")]) + (define_insn "cvttss2si" [(set (match_operand:SI 0 "register_operand" "=r") (vec_select:SI @@ -19114,6 +19312,18 @@ [(set_attr "type" "ssecvt") (set_attr "mode" "SF")]) +(define_insn "cvttss2siq" + [(set (match_operand:DI 0 "register_operand" "=r,r") + (vec_select:DI + (unspec:V4DI [(match_operand:V4SF 1 "nonimmediate_operand" "x,xm")] + UNSPEC_FIX) + (parallel [(const_int 0)])))] + "TARGET_SSE && TARGET_64BIT" + "cvttss2siq\t{%1, %0|%0, %1}" + [(set_attr "type" "ssecvt") + (set_attr "mode" "SF") + (set_attr "athlon_decode" "vector,vector")]) + ;; MMX insns @@ -19121,7 +19331,7 @@ (define_insn "addv8qi3" [(set (match_operand:V8QI 0 "register_operand" "=y") - (plus:V8QI (match_operand:V8QI 1 "register_operand" "0") + (plus:V8QI (match_operand:V8QI 1 "register_operand" "%0") (match_operand:V8QI 2 "nonimmediate_operand" "ym")))] "TARGET_MMX" "paddb\t{%2, %0|%0, %2}" @@ -19130,7 +19340,7 @@ (define_insn "addv4hi3" [(set (match_operand:V4HI 0 "register_operand" "=y") - (plus:V4HI (match_operand:V4HI 1 "register_operand" "0") + (plus:V4HI (match_operand:V4HI 1 "register_operand" "%0") (match_operand:V4HI 2 "nonimmediate_operand" "ym")))] "TARGET_MMX" "paddw\t{%2, %0|%0, %2}" @@ -19139,16 +19349,27 @@ (define_insn "addv2si3" [(set (match_operand:V2SI 0 "register_operand" "=y") - (plus:V2SI (match_operand:V2SI 1 "register_operand" "0") + (plus:V2SI (match_operand:V2SI 1 "register_operand" "%0") (match_operand:V2SI 2 "nonimmediate_operand" "ym")))] "TARGET_MMX" "paddd\t{%2, %0|%0, %2}" [(set_attr "type" "mmxadd") (set_attr "mode" "DI")]) +(define_insn "mmx_adddi3" + [(set (match_operand:DI 0 "register_operand" "=y") + (unspec:DI + [(plus:DI (match_operand:DI 1 "register_operand" "%0") + (match_operand:DI 2 "nonimmediate_operand" "ym"))] + UNSPEC_NOP))] + "TARGET_MMX" + "paddq\t{%2, %0|%0, %2}" + [(set_attr "type" "mmxadd") + (set_attr "mode" "DI")]) + (define_insn "ssaddv8qi3" [(set (match_operand:V8QI 0 "register_operand" "=y") - (ss_plus:V8QI (match_operand:V8QI 1 "register_operand" "0") + (ss_plus:V8QI (match_operand:V8QI 1 "register_operand" "%0") (match_operand:V8QI 2 "nonimmediate_operand" "ym")))] "TARGET_MMX" "paddsb\t{%2, %0|%0, %2}" @@ -19157,7 +19378,7 @@ (define_insn "ssaddv4hi3" [(set (match_operand:V4HI 0 "register_operand" "=y") - (ss_plus:V4HI (match_operand:V4HI 1 "register_operand" "0") + (ss_plus:V4HI (match_operand:V4HI 1 "register_operand" "%0") (match_operand:V4HI 2 "nonimmediate_operand" "ym")))] "TARGET_MMX" "paddsw\t{%2, %0|%0, %2}" @@ -19166,7 +19387,7 @@ (define_insn "usaddv8qi3" [(set (match_operand:V8QI 0 "register_operand" "=y") - (us_plus:V8QI (match_operand:V8QI 1 "register_operand" "0") + (us_plus:V8QI (match_operand:V8QI 1 "register_operand" "%0") (match_operand:V8QI 2 "nonimmediate_operand" "ym")))] "TARGET_MMX" "paddusb\t{%2, %0|%0, %2}" @@ -19175,7 +19396,7 @@ (define_insn "usaddv4hi3" [(set (match_operand:V4HI 0 "register_operand" "=y") - (us_plus:V4HI (match_operand:V4HI 1 "register_operand" "0") + (us_plus:V4HI (match_operand:V4HI 1 "register_operand" "%0") (match_operand:V4HI 2 "nonimmediate_operand" "ym")))] "TARGET_MMX" "paddusw\t{%2, %0|%0, %2}" @@ -19209,6 +19430,17 @@ [(set_attr "type" "mmxadd") (set_attr "mode" "DI")]) +(define_insn "mmx_subdi3" + [(set (match_operand:DI 0 "register_operand" "=y") + (unspec:DI + [(minus:DI (match_operand:DI 1 "register_operand" "0") + (match_operand:DI 2 "nonimmediate_operand" "ym"))] + UNSPEC_NOP))] + "TARGET_MMX" + "psubq\t{%2, %0|%0, %2}" + [(set_attr "type" "mmxadd") + (set_attr "mode" "DI")]) + (define_insn "sssubv8qi3" [(set (match_operand:V8QI 0 "register_operand" "=y") (ss_minus:V8QI (match_operand:V8QI 1 "register_operand" "0") @@ -19312,7 +19544,7 @@ (define_insn "mmx_iordi3" [(set (match_operand:DI 0 "register_operand" "=y") (unspec:DI - [(ior:DI (match_operand:DI 1 "register_operand" "0") + [(ior:DI (match_operand:DI 1 "register_operand" "%0") (match_operand:DI 2 "nonimmediate_operand" "ym"))] UNSPEC_NOP))] "TARGET_MMX" @@ -19323,7 +19555,7 @@ (define_insn "mmx_xordi3" [(set (match_operand:DI 0 "register_operand" "=y") (unspec:DI - [(xor:DI (match_operand:DI 1 "register_operand" "0") + [(xor:DI (match_operand:DI 1 "register_operand" "%0") (match_operand:DI 2 "nonimmediate_operand" "ym"))] UNSPEC_NOP))] "TARGET_MMX" @@ -19346,7 +19578,7 @@ (define_insn "mmx_anddi3" [(set (match_operand:DI 0 "register_operand" "=y") (unspec:DI - [(and:DI (match_operand:DI 1 "register_operand" "0") + [(and:DI (match_operand:DI 1 "register_operand" "%0") (match_operand:DI 2 "nonimmediate_operand" "ym"))] UNSPEC_NOP))] "TARGET_MMX" @@ -19805,17 +20037,17 @@ (define_insn "ldmxcsr" [(unspec_volatile [(match_operand:SI 0 "memory_operand" "m")] UNSPECV_LDMXCSR)] - "TARGET_MMX" + "TARGET_SSE" "ldmxcsr\t%0" - [(set_attr "type" "mmx") + [(set_attr "type" "sse") (set_attr "memory" "load")]) (define_insn "stmxcsr" [(set (match_operand:SI 0 "memory_operand" "=m") (unspec_volatile:SI [(const_int 0)] UNSPECV_STMXCSR))] - "TARGET_MMX" + "TARGET_SSE" "stmxcsr\t%0" - [(set_attr "type" "mmx") + [(set_attr "type" "sse") (set_attr "memory" "store")]) (define_expand "sfence" @@ -20471,7 +20703,7 @@ (match_operator:V2DI 3 "sse_comparison_operator" [(match_operand:V2DF 1 "register_operand" "0") (match_operand:V2DF 2 "nonimmediate_operand" "x")]) - (match_dup 1) + (subreg:V2DI (match_dup 1) 0) (const_int 1)))] "TARGET_SSE2" "cmp%D3sd\t{%2, %0|%0, %2}" @@ -20692,6 +20924,15 @@ [(set_attr "type" "ssecvt") (set_attr "mode" "SI")]) +(define_insn "cvtsd2siq" + [(set (match_operand:DI 0 "register_operand" "=r") + (fix:DI (vec_select:DF (match_operand:V2DF 1 "register_operand" "xm") + (parallel [(const_int 0)]))))] + "TARGET_SSE2 && TARGET_64BIT" + "cvtsd2siq\t{%1, %0|%0, %1}" + [(set_attr "type" "ssecvt") + (set_attr "mode" "SI")]) + (define_insn "cvttsd2si" [(set (match_operand:SI 0 "register_operand" "=r") (unspec:SI [(vec_select:DF (match_operand:V2DF 1 "register_operand" "xm") @@ -20701,6 +20942,16 @@ [(set_attr "type" "ssecvt") (set_attr "mode" "SI")]) +(define_insn "cvttsd2siq" + [(set (match_operand:DI 0 "register_operand" "=r,r") + (unspec:DI [(vec_select:DF (match_operand:V2DF 1 "register_operand" "x,xm") + (parallel [(const_int 0)]))] UNSPEC_FIX))] + "TARGET_SSE2 && TARGET_64BIT" + "cvttsd2siq\t{%1, %0|%0, %1}" + [(set_attr "type" "ssecvt") + (set_attr "mode" "DI") + (set_attr "athlon_decode" "vector,vector")]) + (define_insn "cvtsi2sd" [(set (match_operand:V2DF 0 "register_operand" "=x") (vec_merge:V2DF (match_operand:V2DF 1 "register_operand" "0") @@ -20713,6 +20964,19 @@ [(set_attr "type" "ssecvt") (set_attr "mode" "DF")]) +(define_insn "cvtsi2sdq" + [(set (match_operand:V2DF 0 "register_operand" "=x,x") + (vec_merge:V2DF (match_operand:V2DF 1 "register_operand" "0,0") + (vec_duplicate:V2DF + (float:DF + (match_operand:DI 2 "nonimmediate_operand" "r,rm"))) + (const_int 2)))] + "TARGET_SSE2 && TARGET_64BIT" + "cvtsi2sdq\t{%2, %0|%0, %2}" + [(set_attr "type" "ssecvt") + (set_attr "mode" "DF") + (set_attr "athlon_decode" "vector,direct")]) + ;; Conversions between SF and DF (define_insn "cvtsd2ss" @@ -20770,7 +21034,7 @@ (define_insn "addv16qi3" [(set (match_operand:V16QI 0 "register_operand" "=x") - (plus:V16QI (match_operand:V16QI 1 "register_operand" "0") + (plus:V16QI (match_operand:V16QI 1 "register_operand" "%0") (match_operand:V16QI 2 "nonimmediate_operand" "xm")))] "TARGET_SSE2" "paddb\t{%2, %0|%0, %2}" @@ -20779,7 +21043,7 @@ (define_insn "addv8hi3" [(set (match_operand:V8HI 0 "register_operand" "=x") - (plus:V8HI (match_operand:V8HI 1 "register_operand" "0") + (plus:V8HI (match_operand:V8HI 1 "register_operand" "%0") (match_operand:V8HI 2 "nonimmediate_operand" "xm")))] "TARGET_SSE2" "paddw\t{%2, %0|%0, %2}" @@ -20788,7 +21052,7 @@ (define_insn "addv4si3" [(set (match_operand:V4SI 0 "register_operand" "=x") - (plus:V4SI (match_operand:V4SI 1 "register_operand" "0") + (plus:V4SI (match_operand:V4SI 1 "register_operand" "%0") (match_operand:V4SI 2 "nonimmediate_operand" "xm")))] "TARGET_SSE2" "paddd\t{%2, %0|%0, %2}" @@ -20797,7 +21061,7 @@ (define_insn "addv2di3" [(set (match_operand:V2DI 0 "register_operand" "=x") - (plus:V2DI (match_operand:V2DI 1 "register_operand" "0") + (plus:V2DI (match_operand:V2DI 1 "register_operand" "%0") (match_operand:V2DI 2 "nonimmediate_operand" "xm")))] "TARGET_SSE2" "paddq\t{%2, %0|%0, %2}" @@ -20806,7 +21070,7 @@ (define_insn "ssaddv16qi3" [(set (match_operand:V16QI 0 "register_operand" "=x") - (ss_plus:V16QI (match_operand:V16QI 1 "register_operand" "0") + (ss_plus:V16QI (match_operand:V16QI 1 "register_operand" "%0") (match_operand:V16QI 2 "nonimmediate_operand" "xm")))] "TARGET_SSE2" "paddsb\t{%2, %0|%0, %2}" @@ -20815,7 +21079,7 @@ (define_insn "ssaddv8hi3" [(set (match_operand:V8HI 0 "register_operand" "=x") - (ss_plus:V8HI (match_operand:V8HI 1 "register_operand" "0") + (ss_plus:V8HI (match_operand:V8HI 1 "register_operand" "%0") (match_operand:V8HI 2 "nonimmediate_operand" "xm")))] "TARGET_SSE2" "paddsw\t{%2, %0|%0, %2}" @@ -20824,7 +21088,7 @@ (define_insn "usaddv16qi3" [(set (match_operand:V16QI 0 "register_operand" "=x") - (us_plus:V16QI (match_operand:V16QI 1 "register_operand" "0") + (us_plus:V16QI (match_operand:V16QI 1 "register_operand" "%0") (match_operand:V16QI 2 "nonimmediate_operand" "xm")))] "TARGET_SSE2" "paddusb\t{%2, %0|%0, %2}" @@ -20833,7 +21097,7 @@ (define_insn "usaddv8hi3" [(set (match_operand:V8HI 0 "register_operand" "=x") - (us_plus:V8HI (match_operand:V8HI 1 "register_operand" "0") + (us_plus:V8HI (match_operand:V8HI 1 "register_operand" "%0") (match_operand:V8HI 2 "nonimmediate_operand" "xm")))] "TARGET_SSE2" "paddusw\t{%2, %0|%0, %2}" @@ -21069,7 +21333,8 @@ [(set (match_operand:V8HI 0 "register_operand" "=x") (vec_merge:V8HI (match_operand:V8HI 1 "register_operand" "0") (vec_duplicate:V8HI - (match_operand:SI 2 "nonimmediate_operand" "rm")) + (truncate:HI + (match_operand:SI 2 "nonimmediate_operand" "rm"))) (match_operand:SI 3 "immediate_operand" "i")))] "TARGET_SSE2" "pinsrw\t{%3, %2, %0|%0, %2, %3}" @@ -21218,7 +21483,7 @@ (define_insn "ashrv8hi3" [(set (match_operand:V8HI 0 "register_operand" "=x") (ashiftrt:V8HI (match_operand:V8HI 1 "register_operand" "0") - (match_operand:SI 2 "nonmemory_operand" "xi")))] + (match_operand:TI 2 "nonmemory_operand" "xi")))] "TARGET_SSE2" "psraw\t{%2, %0|%0, %2}" [(set_attr "type" "sseishft") @@ -21227,7 +21492,7 @@ (define_insn "ashrv4si3" [(set (match_operand:V4SI 0 "register_operand" "=x") (ashiftrt:V4SI (match_operand:V4SI 1 "register_operand" "0") - (match_operand:SI 2 "nonmemory_operand" "xi")))] + (match_operand:TI 2 "nonmemory_operand" "xi")))] "TARGET_SSE2" "psrad\t{%2, %0|%0, %2}" [(set_attr "type" "sseishft") @@ -21236,7 +21501,7 @@ (define_insn "lshrv8hi3" [(set (match_operand:V8HI 0 "register_operand" "=x") (lshiftrt:V8HI (match_operand:V8HI 1 "register_operand" "0") - (match_operand:SI 2 "nonmemory_operand" "xi")))] + (match_operand:TI 2 "nonmemory_operand" "xi")))] "TARGET_SSE2" "psrlw\t{%2, %0|%0, %2}" [(set_attr "type" "sseishft") @@ -21245,7 +21510,7 @@ (define_insn "lshrv4si3" [(set (match_operand:V4SI 0 "register_operand" "=x") (lshiftrt:V4SI (match_operand:V4SI 1 "register_operand" "0") - (match_operand:SI 2 "nonmemory_operand" "xi")))] + (match_operand:TI 2 "nonmemory_operand" "xi")))] "TARGET_SSE2" "psrld\t{%2, %0|%0, %2}" [(set_attr "type" "sseishft") @@ -21254,7 +21519,7 @@ (define_insn "lshrv2di3" [(set (match_operand:V2DI 0 "register_operand" "=x") (lshiftrt:V2DI (match_operand:V2DI 1 "register_operand" "0") - (match_operand:SI 2 "nonmemory_operand" "xi")))] + (match_operand:TI 2 "nonmemory_operand" "xi")))] "TARGET_SSE2" "psrlq\t{%2, %0|%0, %2}" [(set_attr "type" "sseishft") @@ -21263,7 +21528,7 @@ (define_insn "ashlv8hi3" [(set (match_operand:V8HI 0 "register_operand" "=x") (ashift:V8HI (match_operand:V8HI 1 "register_operand" "0") - (match_operand:SI 2 "nonmemory_operand" "xi")))] + (match_operand:TI 2 "nonmemory_operand" "xi")))] "TARGET_SSE2" "psllw\t{%2, %0|%0, %2}" [(set_attr "type" "sseishft") @@ -21272,7 +21537,7 @@ (define_insn "ashlv4si3" [(set (match_operand:V4SI 0 "register_operand" "=x") (ashift:V4SI (match_operand:V4SI 1 "register_operand" "0") - (match_operand:SI 2 "nonmemory_operand" "xi")))] + (match_operand:TI 2 "nonmemory_operand" "xi")))] "TARGET_SSE2" "pslld\t{%2, %0|%0, %2}" [(set_attr "type" "sseishft") @@ -21281,7 +21546,7 @@ (define_insn "ashlv2di3" [(set (match_operand:V2DI 0 "register_operand" "=x") (ashift:V2DI (match_operand:V2DI 1 "register_operand" "0") - (match_operand:SI 2 "nonmemory_operand" "xi")))] + (match_operand:TI 2 "nonmemory_operand" "xi")))] "TARGET_SSE2" "psllq\t{%2, %0|%0, %2}" [(set_attr "type" "sseishft") @@ -21595,45 +21860,41 @@ (define_insn "sse2_movapd" [(set (match_operand:V2DF 0 "nonimmediate_operand" "=x,m") - (unspec:V2DF [(match_operand:V2DF 1 "general_operand" "xm,x")] + (unspec:V2DF [(match_operand:V2DF 1 "nonimmediate_operand" "xm,x")] UNSPEC_MOVA))] - "TARGET_SSE2" - "@ - movapd\t{%1, %0|%0, %1} - movapd\t{%1, %0|%0, %1}" + "TARGET_SSE2 + && (GET_CODE (operands[0]) != MEM || GET_CODE (operands[1]) != MEM)" + "movapd\t{%1, %0|%0, %1}" [(set_attr "type" "ssemov") (set_attr "mode" "V2DF")]) (define_insn "sse2_movupd" [(set (match_operand:V2DF 0 "nonimmediate_operand" "=x,m") - (unspec:V2DF [(match_operand:V2DF 1 "general_operand" "xm,x")] + (unspec:V2DF [(match_operand:V2DF 1 "nonimmediate_operand" "xm,x")] UNSPEC_MOVU))] - "TARGET_SSE2" - "@ - movupd\t{%1, %0|%0, %1} - movupd\t{%1, %0|%0, %1}" + "TARGET_SSE2 + && (GET_CODE (operands[0]) != MEM || GET_CODE (operands[1]) != MEM)" + "movupd\t{%1, %0|%0, %1}" [(set_attr "type" "ssecvt") (set_attr "mode" "V2DF")]) (define_insn "sse2_movdqa" [(set (match_operand:V16QI 0 "nonimmediate_operand" "=x,m") - (unspec:V16QI [(match_operand:V16QI 1 "general_operand" "xm,x")] + (unspec:V16QI [(match_operand:V16QI 1 "nonimmediate_operand" "xm,x")] UNSPEC_MOVA))] - "TARGET_SSE2" - "@ - movdqa\t{%1, %0|%0, %1} - movdqa\t{%1, %0|%0, %1}" + "TARGET_SSE2 + && (GET_CODE (operands[0]) != MEM || GET_CODE (operands[1]) != MEM)" + "movdqa\t{%1, %0|%0, %1}" [(set_attr "type" "ssemov") (set_attr "mode" "TI")]) (define_insn "sse2_movdqu" [(set (match_operand:V16QI 0 "nonimmediate_operand" "=x,m") - (unspec:V16QI [(match_operand:V16QI 1 "general_operand" "xm,x")] + (unspec:V16QI [(match_operand:V16QI 1 "nonimmediate_operand" "xm,x")] UNSPEC_MOVU))] - "TARGET_SSE2" - "@ - movdqu\t{%1, %0|%0, %1} - movdqu\t{%1, %0|%0, %1}" + "TARGET_SSE2 + && (GET_CODE (operands[0]) != MEM || GET_CODE (operands[1]) != MEM)" + "movdqu\t{%1, %0|%0, %1}" [(set_attr "type" "ssecvt") (set_attr "mode" "TI")]) @@ -21641,24 +21902,48 @@ [(set (match_operand:DI 0 "nonimmediate_operand" "=m,y") (vec_select:DI (match_operand:V2DI 1 "register_operand" "x,x") (parallel [(const_int 0)])))] - "TARGET_SSE2" + "TARGET_SSE2 && !TARGET_64BIT" "@ movq\t{%1, %0|%0, %1} movdq2q\t{%1, %0|%0, %1}" [(set_attr "type" "ssecvt") (set_attr "mode" "TI")]) +(define_insn "sse2_movdq2q_rex64" + [(set (match_operand:DI 0 "nonimmediate_operand" "=m,y,r") + (vec_select:DI (match_operand:V2DI 1 "register_operand" "x,x,x") + (parallel [(const_int 0)])))] + "TARGET_SSE2 && TARGET_64BIT" + "@ + movq\t{%1, %0|%0, %1} + movdq2q\t{%1, %0|%0, %1} + movd\t{%1, %0|%0, %1}" + [(set_attr "type" "ssecvt") + (set_attr "mode" "TI")]) + (define_insn "sse2_movq2dq" [(set (match_operand:V2DI 0 "register_operand" "=x,?x") (vec_concat:V2DI (match_operand:DI 1 "nonimmediate_operand" "m,y") (const_int 0)))] - "TARGET_SSE2" + "TARGET_SSE2 && !TARGET_64BIT" "@ movq\t{%1, %0|%0, %1} movq2dq\t{%1, %0|%0, %1}" [(set_attr "type" "ssecvt,ssemov") (set_attr "mode" "TI")]) +(define_insn "sse2_movq2dq_rex64" + [(set (match_operand:V2DI 0 "register_operand" "=x,?x,?x") + (vec_concat:V2DI (match_operand:DI 1 "nonimmediate_operand" "m,y,r") + (const_int 0)))] + "TARGET_SSE2 && TARGET_64BIT" + "@ + movq\t{%1, %0|%0, %1} + movq2dq\t{%1, %0|%0, %1} + movd\t{%1, %0|%0, %1}" + [(set_attr "type" "ssecvt,ssemov,ssecvt") + (set_attr "mode" "TI")]) + (define_insn "sse2_movq" [(set (match_operand:V2DI 0 "register_operand" "=x") (vec_concat:V2DI (vec_select:DI @@ -21673,7 +21958,7 @@ (define_insn "sse2_loadd" [(set (match_operand:V4SI 0 "register_operand" "=x") (vec_merge:V4SI - (vec_duplicate:V4HI (match_operand:SI 1 "nonimmediate_operand" "mr")) + (vec_duplicate:V4SI (match_operand:SI 1 "nonimmediate_operand" "mr")) (const_vector:V4SI [(const_int 0) (const_int 0) (const_int 0) @@ -21716,11 +22001,21 @@ [(set_attr "type" "ssecvt") (set_attr "mode" "V2DF")]) -(define_insn "sse2_loadsd" +(define_expand "sse2_loadsd" + [(match_operand:V2DF 0 "register_operand" "") + (match_operand:DF 1 "memory_operand" "")] + "TARGET_SSE2" +{ + emit_insn (gen_sse2_loadsd_1 (operands[0], operands[1], + CONST0_RTX (V2DFmode))); + DONE; +}) + +(define_insn "sse2_loadsd_1" [(set (match_operand:V2DF 0 "register_operand" "=x") (vec_merge:V2DF - (match_operand:DF 1 "memory_operand" "m") - (vec_duplicate:DF (float:DF (const_int 0))) + (vec_duplicate:V2DF (match_operand:DF 1 "memory_operand" "m")) + (match_operand:V2DF 2 "const0_operand" "X") (const_int 1)))] "TARGET_SSE2" "movsd\t{%1, %0|%0, %1}" @@ -21801,3 +22096,129 @@ "lfence" [(set_attr "type" "sse") (set_attr "memory" "unknown")]) + +;; SSE3 + +(define_insn "mwait" + [(unspec_volatile [(match_operand:SI 0 "register_operand" "a") + (match_operand:SI 1 "register_operand" "c")] + UNSPECV_MWAIT)] + "TARGET_SSE3" + "mwait\t%0, %1" + [(set_attr "length" "3")]) + +(define_insn "monitor" + [(unspec_volatile [(match_operand:SI 0 "register_operand" "a") + (match_operand:SI 1 "register_operand" "c") + (match_operand:SI 2 "register_operand" "d")] + UNSPECV_MONITOR)] + "TARGET_SSE3" + "monitor\t%0, %1, %2" + [(set_attr "length" "3")]) + +;; SSE3 arithmetic + +(define_insn "addsubv4sf3" + [(set (match_operand:V4SF 0 "register_operand" "=x") + (unspec:V4SF [(match_operand:V4SF 1 "register_operand" "0") + (match_operand:V4SF 2 "nonimmediate_operand" "xm")] + UNSPEC_ADDSUB))] + "TARGET_SSE3" + "addsubps\t{%2, %0|%0, %2}" + [(set_attr "type" "sseadd") + (set_attr "mode" "V4SF")]) + +(define_insn "addsubv2df3" + [(set (match_operand:V2DF 0 "register_operand" "=x") + (unspec:V2DF [(match_operand:V2DF 1 "register_operand" "0") + (match_operand:V2DF 2 "nonimmediate_operand" "xm")] + UNSPEC_ADDSUB))] + "TARGET_SSE3" + "addsubpd\t{%2, %0|%0, %2}" + [(set_attr "type" "sseadd") + (set_attr "mode" "V2DF")]) + +(define_insn "haddv4sf3" + [(set (match_operand:V4SF 0 "register_operand" "=x") + (unspec:V4SF [(match_operand:V4SF 1 "register_operand" "0") + (match_operand:V4SF 2 "nonimmediate_operand" "xm")] + UNSPEC_HADD))] + "TARGET_SSE3" + "haddps\t{%2, %0|%0, %2}" + [(set_attr "type" "sseadd") + (set_attr "mode" "V4SF")]) + +(define_insn "haddv2df3" + [(set (match_operand:V2DF 0 "register_operand" "=x") + (unspec:V2DF [(match_operand:V2DF 1 "register_operand" "0") + (match_operand:V2DF 2 "nonimmediate_operand" "xm")] + UNSPEC_HADD))] + "TARGET_SSE3" + "haddpd\t{%2, %0|%0, %2}" + [(set_attr "type" "sseadd") + (set_attr "mode" "V2DF")]) + +(define_insn "hsubv4sf3" + [(set (match_operand:V4SF 0 "register_operand" "=x") + (unspec:V4SF [(match_operand:V4SF 1 "register_operand" "0") + (match_operand:V4SF 2 "nonimmediate_operand" "xm")] + UNSPEC_HSUB))] + "TARGET_SSE3" + "hsubps\t{%2, %0|%0, %2}" + [(set_attr "type" "sseadd") + (set_attr "mode" "V4SF")]) + +(define_insn "hsubv2df3" + [(set (match_operand:V2DF 0 "register_operand" "=x") + (unspec:V2DF [(match_operand:V2DF 1 "register_operand" "0") + (match_operand:V2DF 2 "nonimmediate_operand" "xm")] + UNSPEC_HSUB))] + "TARGET_SSE3" + "hsubpd\t{%2, %0|%0, %2}" + [(set_attr "type" "sseadd") + (set_attr "mode" "V2DF")]) + +(define_insn "movshdup" + [(set (match_operand:V4SF 0 "register_operand" "=x") + (unspec:V4SF + [(match_operand:V4SF 1 "nonimmediate_operand" "xm")] UNSPEC_MOVSHDUP))] + "TARGET_SSE3" + "movshdup\t{%1, %0|%0, %1}" + [(set_attr "type" "sse") + (set_attr "mode" "V4SF")]) + +(define_insn "movsldup" + [(set (match_operand:V4SF 0 "register_operand" "=x") + (unspec:V4SF + [(match_operand:V4SF 1 "nonimmediate_operand" "xm")] UNSPEC_MOVSLDUP))] + "TARGET_SSE3" + "movsldup\t{%1, %0|%0, %1}" + [(set_attr "type" "sse") + (set_attr "mode" "V4SF")]) + +(define_insn "lddqu" + [(set (match_operand:V16QI 0 "register_operand" "=x") + (unspec:V16QI [(match_operand:V16QI 1 "memory_operand" "m")] + UNSPEC_LDQQU))] + "TARGET_SSE3" + "lddqu\t{%1, %0|%0, %1}" + [(set_attr "type" "ssecvt") + (set_attr "mode" "TI")]) + +(define_insn "loadddup" + [(set (match_operand:V2DF 0 "register_operand" "=x") + (vec_duplicate:V2DF (match_operand:DF 1 "memory_operand" "m")))] + "TARGET_SSE3" + "movddup\t{%1, %0|%0, %1}" + [(set_attr "type" "ssecvt") + (set_attr "mode" "DF")]) + +(define_insn "movddup" + [(set (match_operand:V2DF 0 "register_operand" "=x") + (vec_duplicate:V2DF + (vec_select:DF (match_operand:V2DF 1 "register_operand" "x") + (parallel [(const_int 0)]))))] + "TARGET_SSE3" + "movddup\t{%1, %0|%0, %1}" + [(set_attr "type" "ssecvt") + (set_attr "mode" "DF")]) |