diff options
Diffstat (limited to 'gcc/config/i386/i386.md')
-rw-r--r-- | gcc/config/i386/i386.md | 694 |
1 files changed, 354 insertions, 340 deletions
diff --git a/gcc/config/i386/i386.md b/gcc/config/i386/i386.md index f4d33c59551..933faf847e8 100644 --- a/gcc/config/i386/i386.md +++ b/gcc/config/i386/i386.md @@ -796,7 +796,7 @@ sse2,sse2_noavx,sse3,sse4,sse4_noavx,avx,noavx, avx2,noavx2,bmi,bmi2,fma4,fma,avx512f,noavx512f, fma_avx512f,avx512bw,noavx512bw,avx512dq,noavx512dq, - avx512vl,noavx512vl" + avx512vl,noavx512vl,x64_avx512dq" (const_string "base")) (define_attr "enabled" "" @@ -807,6 +807,8 @@ (symbol_ref "TARGET_64BIT && TARGET_SSE4_1 && !TARGET_AVX") (eq_attr "isa" "x64_avx") (symbol_ref "TARGET_64BIT && TARGET_AVX") + (eq_attr "isa" "x64_avx512dq") + (symbol_ref "TARGET_64BIT && TARGET_AVX512DQ") (eq_attr "isa" "nox64") (symbol_ref "!TARGET_64BIT") (eq_attr "isa" "sse2") (symbol_ref "TARGET_SSE2") (eq_attr "isa" "sse2_noavx") @@ -1024,6 +1026,9 @@ (define_mode_attr DWI [(QI "HI") (HI "SI") (SI "DI") (DI "TI")]) (define_mode_attr dwi [(QI "hi") (HI "si") (SI "di") (DI "ti")]) +;; LEA mode corresponding to an integer mode +(define_mode_attr LEAMODE [(QI "SI") (HI "SI") (SI "SI") (DI "DI")]) + ;; Half mode for double word integer modes. (define_mode_iterator DWIH [(SI "!TARGET_64BIT") (DI "TARGET_64BIT")]) @@ -1065,7 +1070,7 @@ (define_mode_attr g [(QI "qmn") (HI "rmn") (SI "rme") (DI "rme")]) ;; Immediate operand constraint for double integer modes. -(define_mode_attr di [(SI "nF") (DI "e")]) +(define_mode_attr di [(SI "nF") (DI "Wd")]) ;; Immediate operand constraint for shifts. (define_mode_attr S [(QI "I") (HI "I") (SI "I") (DI "J") (TI "O")]) @@ -1078,6 +1083,15 @@ (DI "x86_64_general_operand") (TI "x86_64_general_operand")]) +;; General operand predicate for integer modes, where for TImode +;; we need both words of the operand to be general operands. +(define_mode_attr general_hilo_operand + [(QI "general_operand") + (HI "general_operand") + (SI "x86_64_general_operand") + (DI "x86_64_general_operand") + (TI "x86_64_hilo_general_operand")]) + ;; General sign extend operand predicate for integer modes, ;; which disallows VOIDmode operands and thus it is suitable ;; for use inside sign_extend. @@ -1156,6 +1170,10 @@ (define_mode_attr ssevecmodelower [(QI "v16qi") (HI "v8hi") (SI "v4si") (DI "v2di") (SF "v4sf") (DF "v2df")]) +;; AVX512F vector mode corresponding to a scalar mode +(define_mode_attr avx512fvecmode + [(QI "V64QI") (HI "V32HI") (SI "V16SI") (DI "V8DI") (SF "V16SF") (DF "V8DF")]) + ;; Instruction suffix for REX 64bit operators. (define_mode_attr rex64suffix [(SI "") (DI "{q}")]) @@ -1665,12 +1683,13 @@ (define_mode_iterator FPCMP [CCFP CCFPU]) (define_mode_attr unord [(CCFP "") (CCFPU "u")]) -(define_insn "*cmpi<FPCMP:unord><MODEF:mode>_mixed" +(define_insn "*cmpi<FPCMP:unord><MODEF:mode>" [(set (reg:FPCMP FLAGS_REG) (compare:FPCMP (match_operand:MODEF 0 "register_operand" "f,v") - (match_operand:MODEF 1 "nonimmediate_operand" "f,vm")))] - "SSE_FLOAT_MODE_P (<MODEF:MODE>mode) && TARGET_SSE_MATH" + (match_operand:MODEF 1 "register_ssemem_operand" "f,vm")))] + "(SSE_FLOAT_MODE_P (<MODEF:MODE>mode) && TARGET_SSE_MATH) + || (TARGET_80387 && TARGET_CMOVE)" "* return output_fp_compare (insn, operands, true, <FPCMP:MODE>mode == CCFPUmode);" [(set_attr "type" "fcmp,ssecomi") @@ -1689,22 +1708,27 @@ (set_attr "bdver1_decode" "double") (set_attr "znver1_decode" "double") (set (attr "enabled") - (cond [(eq_attr "alternative" "0") - (symbol_ref "TARGET_MIX_SSE_I387") - ] - (symbol_ref "true")))]) + (if_then_else + (match_test ("SSE_FLOAT_MODE_P (<MODEF:MODE>mode) && TARGET_SSE_MATH")) + (if_then_else + (eq_attr "alternative" "0") + (symbol_ref "TARGET_MIX_SSE_I387") + (symbol_ref "true")) + (if_then_else + (eq_attr "alternative" "0") + (symbol_ref "true") + (symbol_ref "false"))))]) -(define_insn "*cmpi<FPCMP:unord><X87MODEF:mode>_i387" +(define_insn "*cmpi<unord>xf_i387" [(set (reg:FPCMP FLAGS_REG) (compare:FPCMP - (match_operand:X87MODEF 0 "register_operand" "f") - (match_operand:X87MODEF 1 "register_operand" "f")))] - "TARGET_80387 && TARGET_CMOVE - && !(SSE_FLOAT_MODE_P (<X87MODEF:MODE>mode) && TARGET_SSE_MATH)" + (match_operand:XF 0 "register_operand" "f") + (match_operand:XF 1 "register_operand" "f")))] + "TARGET_80387 && TARGET_CMOVE" "* return output_fp_compare (insn, operands, true, - <FPCMP:MODE>mode == CCFPUmode);" + <MODE>mode == CCFPUmode);" [(set_attr "type" "fcmp") - (set_attr "mode" "<X87MODEF:MODE>") + (set_attr "mode" "XF") (set_attr "athlon_decode" "vector") (set_attr "amdfam10_decode" "direct") (set_attr "bdver1_decode" "double") @@ -2681,34 +2705,31 @@ (set_attr "amdfam10_decode" "double") (set_attr "bdver1_decode" "double")]) -(define_insn "*swap<mode>_1" - [(set (match_operand:SWI12 0 "register_operand" "+r") - (match_operand:SWI12 1 "register_operand" "+r")) +(define_insn "*swap<mode>" + [(set (match_operand:SWI12 0 "register_operand" "+<r>,r") + (match_operand:SWI12 1 "register_operand" "+<r>,r")) (set (match_dup 1) (match_dup 0))] - "!TARGET_PARTIAL_REG_STALL || optimize_function_for_size_p (cfun)" - "xchg{l}\t%k1, %k0" + "" + "@ + xchg{<imodesuffix>}\t%1, %0 + xchg{l}\t%k1, %k0" [(set_attr "type" "imov") - (set_attr "mode" "SI") + (set_attr "mode" "<MODE>,SI") + (set (attr "preferred_for_size") + (cond [(eq_attr "alternative" "0") + (symbol_ref "false")] + (symbol_ref "true"))) + ;; Potential partial reg stall on alternative 1. + (set (attr "preferred_for_speed") + (cond [(eq_attr "alternative" "1") + (symbol_ref "!TARGET_PARTIAL_REG_STALL")] + (symbol_ref "true"))) (set_attr "pent_pair" "np") (set_attr "athlon_decode" "vector") (set_attr "amdfam10_decode" "double") (set_attr "bdver1_decode" "double")]) -;; Not added amdfam10_decode since TARGET_PARTIAL_REG_STALL -;; is disabled for AMDFAM10 -(define_insn "*swap<mode>_2" - [(set (match_operand:SWI12 0 "register_operand" "+<r>") - (match_operand:SWI12 1 "register_operand" "+<r>")) - (set (match_dup 1) - (match_dup 0))] - "TARGET_PARTIAL_REG_STALL" - "xchg{<imodesuffix>}\t%1, %0" - [(set_attr "type" "imov") - (set_attr "mode" "<MODE>") - (set_attr "pent_pair" "np") - (set_attr "athlon_decode" "vector")]) - (define_expand "movstrict<mode>" [(set (strict_low_part (match_operand:SWI12 0 "nonimmediate_operand")) (match_operand:SWI12 1 "general_operand"))] @@ -2913,7 +2934,7 @@ (define_insn "*pushtf" [(set (match_operand:TF 0 "push_operand" "=<,<") - (match_operand:TF 1 "general_no_elim_operand" "x,*roF"))] + (match_operand:TF 1 "general_no_elim_operand" "v,*roF"))] "TARGET_64BIT || TARGET_SSE" { /* This insn should be already split before reg-stack. */ @@ -3057,14 +3078,10 @@ (define_split [(set (match_operand:SF 0 "push_operand") (match_operand:SF 1 "memory_operand"))] - "reload_completed" + "reload_completed + && find_constant_src (insn)" [(set (match_dup 0) (match_dup 2))] -{ - operands[2] = find_constant_src (curr_insn); - - if (operands[2] == NULL_RTX) - FAIL; -}) + "operands[2] = find_constant_src (curr_insn);") (define_split [(set (match_operand 0 "push_operand") @@ -3092,14 +3109,14 @@ "ix86_expand_move (<MODE>mode, operands); DONE;") (define_insn "*movtf_internal" - [(set (match_operand:TF 0 "nonimmediate_operand" "=x,x ,m,?*r ,!o") - (match_operand:TF 1 "general_operand" "C ,xm,x,*roF,*rC"))] + [(set (match_operand:TF 0 "nonimmediate_operand" "=v,v ,m,?*r ,!o") + (match_operand:TF 1 "general_operand" "C ,vm,v,*roF,*rC"))] "(TARGET_64BIT || TARGET_SSE) && !(MEM_P (operands[0]) && MEM_P (operands[1])) && (!can_create_pseudo_p () - || (ix86_cmodel == CM_MEDIUM || ix86_cmodel == CM_LARGE) || !CONST_DOUBLE_P (operands[1]) - || (optimize_function_for_size_p (cfun) + || ((optimize_function_for_size_p (cfun) + || (ix86_cmodel == CM_LARGE || ix86_cmodel == CM_LARGE_PIC)) && standard_sse_constant_p (operands[1], TFmode) == 1 && !memory_operand (operands[0], TFmode)) || (!TARGET_MEMORY_MISMATCH_STALL @@ -3118,6 +3135,10 @@ { if (get_attr_mode (insn) == MODE_V4SF) return "%vmovups\t{%1, %0|%0, %1}"; + else if (TARGET_AVX512VL + && (EXT_REX_SSE_REG_P (operands[0]) + || EXT_REX_SSE_REG_P (operands[1]))) + return "vmovdqu64\t{%1, %0|%0, %1}"; else return "%vmovdqu\t{%1, %0|%0, %1}"; } @@ -3125,6 +3146,10 @@ { if (get_attr_mode (insn) == MODE_V4SF) return "%vmovaps\t{%1, %0|%0, %1}"; + else if (TARGET_AVX512VL + && (EXT_REX_SSE_REG_P (operands[0]) + || EXT_REX_SSE_REG_P (operands[1]))) + return "vmovdqa64\t{%1, %0|%0, %1}"; else return "%vmovdqa\t{%1, %0|%0, %1}"; } @@ -3175,9 +3200,9 @@ "fm,f,G,roF,r , *roF,*r,F ,C,roF,rF"))] "!(MEM_P (operands[0]) && MEM_P (operands[1])) && (!can_create_pseudo_p () - || (ix86_cmodel == CM_MEDIUM || ix86_cmodel == CM_LARGE) || !CONST_DOUBLE_P (operands[1]) - || (optimize_function_for_size_p (cfun) + || ((optimize_function_for_size_p (cfun) + || (ix86_cmodel == CM_LARGE || ix86_cmodel == CM_LARGE_PIC)) && standard_80387_constant_p (operands[1]) > 0 && !memory_operand (operands[0], XFmode)) || (!TARGET_MEMORY_MISMATCH_STALL @@ -3248,9 +3273,9 @@ "Yf*fm,Yf*f,G ,roF,r ,*roF,*r,F ,rm,rC,C ,F ,C,v,m,v,C ,*x,m ,*x,Yj,r ,roF,rF,rmF,rC"))] "!(MEM_P (operands[0]) && MEM_P (operands[1])) && (!can_create_pseudo_p () - || (ix86_cmodel == CM_MEDIUM || ix86_cmodel == CM_LARGE) || !CONST_DOUBLE_P (operands[1]) - || (optimize_function_for_size_p (cfun) + || ((optimize_function_for_size_p (cfun) + || (ix86_cmodel == CM_LARGE || ix86_cmodel == CM_LARGE_PIC)) && ((!(TARGET_SSE2 && TARGET_SSE_MATH) && standard_80387_constant_p (operands[1]) > 0) || (TARGET_SSE2 && TARGET_SSE_MATH @@ -3450,9 +3475,9 @@ "Yf*fm,Yf*f,G ,rmF,rF,C,v,m,v,Yj,r ,*y ,m ,*y,*Yn,r ,rmF,rF"))] "!(MEM_P (operands[0]) && MEM_P (operands[1])) && (!can_create_pseudo_p () - || (ix86_cmodel == CM_MEDIUM || ix86_cmodel == CM_LARGE) || !CONST_DOUBLE_P (operands[1]) - || (optimize_function_for_size_p (cfun) + || ((optimize_function_for_size_p (cfun) + || (ix86_cmodel == CM_LARGE || ix86_cmodel == CM_LARGE_PIC)) && ((!TARGET_SSE_MATH && standard_80387_constant_p (operands[1]) > 0) || (TARGET_SSE_MATH @@ -3586,19 +3611,10 @@ && (GET_MODE (operands[0]) == TFmode || GET_MODE (operands[0]) == XFmode || GET_MODE (operands[0]) == DFmode - || GET_MODE (operands[0]) == SFmode)" + || GET_MODE (operands[0]) == SFmode) + && ix86_standard_x87sse_constant_load_p (insn, operands[0])" [(set (match_dup 0) (match_dup 2))] -{ - operands[2] = find_constant_src (curr_insn); - - if (operands[2] == NULL_RTX - || (SSE_REGNO_P (REGNO (operands[0])) - && standard_sse_constant_p (operands[2], - GET_MODE (operands[0])) != 1) - || (STACK_REGNO_P (REGNO (operands[0])) - && standard_80387_constant_p (operands[2]) < 1)) - FAIL; -}) + "operands[2] = find_constant_src (curr_insn);") (define_split [(set (match_operand 0 "any_fp_register_operand") @@ -3606,19 +3622,10 @@ "reload_completed && (GET_MODE (operands[0]) == TFmode || GET_MODE (operands[0]) == XFmode - || GET_MODE (operands[0]) == DFmode)" + || GET_MODE (operands[0]) == DFmode) + && ix86_standard_x87sse_constant_load_p (insn, operands[0])" [(set (match_dup 0) (match_dup 2))] -{ - operands[2] = find_constant_src (curr_insn); - - if (operands[2] == NULL_RTX - || (SSE_REGNO_P (REGNO (operands[0])) - && standard_sse_constant_p (operands[2], - GET_MODE (operands[0])) != 1) - || (STACK_REGNO_P (REGNO (operands[0])) - && standard_80387_constant_p (operands[2]) < 1)) - FAIL; -}) + "operands[2] = find_constant_src (curr_insn);") ;; Split the load of -0.0 or -1.0 into fldz;fchs or fld1;fchs sequence (define_split @@ -3762,20 +3769,18 @@ "split_double_mode (DImode, &operands[0], 1, &operands[3], &operands[4]);") (define_split - [(set (match_operand:DI 0 "register_operand") - (zero_extend:DI (match_operand:SI 1 "register_operand")))] + [(set (match_operand:DI 0 "general_reg_operand") + (zero_extend:DI (match_operand:SI 1 "general_reg_operand")))] "!TARGET_64BIT && reload_completed - && !(MMX_REG_P (operands[0]) || SSE_REG_P (operands[0])) - && true_regnum (operands[0]) == true_regnum (operands[1])" + && REGNO (operands[0]) == REGNO (operands[1])" [(set (match_dup 4) (const_int 0))] "split_double_mode (DImode, &operands[0], 1, &operands[3], &operands[4]);") (define_split - [(set (match_operand:DI 0 "nonimmediate_operand") - (zero_extend:DI (match_operand:SI 1 "nonimmediate_operand")))] + [(set (match_operand:DI 0 "nonimmediate_gr_operand") + (zero_extend:DI (match_operand:SI 1 "nonimmediate_gr_operand")))] "!TARGET_64BIT && reload_completed - && !(MEM_P (operands[0]) && MEM_P (operands[1])) - && !(MMX_REG_P (operands[0]) || SSE_REG_P (operands[0]))" + && !(MEM_P (operands[0]) && MEM_P (operands[1]))" [(set (match_dup 3) (match_dup 1)) (set (match_dup 4) (const_int 0))] "split_double_mode (DImode, &operands[0], 1, &operands[3], &operands[4]);") @@ -3813,7 +3818,8 @@ [(parallel [(set (match_dup 0) (and:SI (match_dup 0) (match_dup 2))) (clobber (reg:CC FLAGS_REG))])] { - if (true_regnum (operands[0]) != true_regnum (operands[1])) + if (!REG_P (operands[1]) + || REGNO (operands[0]) != REGNO (operands[1])) { ix86_expand_clear (operands[0]); @@ -3860,7 +3866,8 @@ [(parallel [(set (match_dup 0) (and:SI (match_dup 0) (const_int 255))) (clobber (reg:CC FLAGS_REG))])] { - if (true_regnum (operands[0]) != true_regnum (operands[1])) + if (!REG_P (operands[1]) + || REGNO (operands[0]) != REGNO (operands[1])) { ix86_expand_clear (operands[0]); @@ -3973,8 +3980,8 @@ /* Generate a cltd if possible and doing so it profitable. */ if ((optimize_function_for_size_p (cfun) || TARGET_USE_CLTD) - && true_regnum (operands[1]) == AX_REG - && true_regnum (operands[2]) == DX_REG) + && REGNO (operands[1]) == AX_REG + && REGNO (operands[2]) == DX_REG) { emit_insn (gen_ashrsi3_cvt (operands[2], operands[1], GEN_INT (31))); } @@ -4015,8 +4022,8 @@ (set (match_operand:SI 3 "memory_operand") (match_dup 2))] "/* cltd is shorter than sarl $31, %eax */ !optimize_function_for_size_p (cfun) - && true_regnum (operands[1]) == AX_REG - && true_regnum (operands[2]) == DX_REG + && REGNO (operands[1]) == AX_REG + && REGNO (operands[2]) == DX_REG && peep2_reg_dead_p (2, operands[1]) && peep2_reg_dead_p (3, operands[2]) && !reg_mentioned_p (operands[2], operands[3])" @@ -4037,19 +4044,19 @@ { split_double_mode (DImode, &operands[0], 1, &operands[3], &operands[4]); - if (true_regnum (operands[3]) != true_regnum (operands[1])) + if (REGNO (operands[3]) != REGNO (operands[1])) emit_move_insn (operands[3], operands[1]); /* Generate a cltd if possible and doing so it profitable. */ if ((optimize_function_for_size_p (cfun) || TARGET_USE_CLTD) - && true_regnum (operands[3]) == AX_REG - && true_regnum (operands[4]) == DX_REG) + && REGNO (operands[3]) == AX_REG + && REGNO (operands[4]) == DX_REG) { emit_insn (gen_ashrsi3_cvt (operands[4], operands[3], GEN_INT (31))); DONE; } - if (true_regnum (operands[4]) != true_regnum (operands[1])) + if (REGNO (operands[4]) != REGNO (operands[1])) emit_move_insn (operands[4], operands[1]); emit_insn (gen_ashrsi3_cvt (operands[4], operands[4], GEN_INT (31))); @@ -4188,15 +4195,15 @@ "operands[2] = GEN_INT (-GET_MODE_SIZE (XFmode));") (define_expand "extendsfdf2" - [(set (match_operand:DF 0 "nonimmediate_operand") + [(set (match_operand:DF 0 "nonimm_ssenomem_operand") (float_extend:DF (match_operand:SF 1 "general_operand")))] - "TARGET_80387 || (TARGET_SSE2 && TARGET_SSE_MATH)" + "TARGET_80387 || (SSE_FLOAT_MODE_P (DFmode) && TARGET_SSE_MATH)" { /* ??? Needed for compress_float_constant since all fp constants are TARGET_LEGITIMATE_CONSTANT_P. */ if (CONST_DOUBLE_P (operands[1])) { - if ((!TARGET_SSE2 || TARGET_MIX_SSE_I387) + if ((!SSE_FLOAT_MODE_P (DFmode) || TARGET_MIX_SSE_I387) && standard_80387_constant_p (operands[1]) > 0) { operands[1] = simplify_const_unary_operation @@ -4216,12 +4223,12 @@ that might lead to ICE on 32bit target. The sequence unlikely combine anyway. */ (define_split - [(set (match_operand:DF 0 "register_operand") + [(set (match_operand:DF 0 "sse_reg_operand") (float_extend:DF (match_operand:SF 1 "nonimmediate_operand")))] "TARGET_USE_VECTOR_FP_CONVERTS && optimize_insn_for_speed_p () - && reload_completed && SSE_REG_P (operands[0]) + && reload_completed && (!EXT_REX_SSE_REG_P (operands[0]) || TARGET_AVX512VL)" [(set (match_dup 2) @@ -4238,13 +4245,11 @@ { /* If it is unsafe to overwrite upper half of source, we need to move to destination and unpack there. */ - if (((ORIGINAL_REGNO (operands[1]) < FIRST_PSEUDO_REGISTER - || PSEUDO_REGNO_BYTES (ORIGINAL_REGNO (operands[1])) > 4) - && true_regnum (operands[0]) != true_regnum (operands[1])) + if (REGNO (operands[0]) != REGNO (operands[1]) || (EXT_REX_SSE_REG_P (operands[1]) && !TARGET_AVX512VL)) { - rtx tmp = gen_rtx_REG (SFmode, true_regnum (operands[0])); + rtx tmp = lowpart_subreg (SFmode, operands[0], DFmode); emit_move_insn (tmp, operands[1]); } else @@ -4252,7 +4257,7 @@ /* FIXME: vec_interleave_lowv4sf for AVX512VL should allow =v, v, then vbroadcastss will be only needed for AVX512F without AVX512VL. */ - if (!EXT_REX_SSE_REGNO_P (true_regnum (operands[3]))) + if (!EXT_REX_SSE_REGNO_P (REGNO (operands[3]))) emit_insn (gen_vec_interleave_lowv4sf (operands[3], operands[3], operands[3])); else @@ -4268,21 +4273,20 @@ ;; It's more profitable to split and then extend in the same register. (define_peephole2 - [(set (match_operand:DF 0 "register_operand") + [(set (match_operand:DF 0 "sse_reg_operand") (float_extend:DF (match_operand:SF 1 "memory_operand")))] "TARGET_SPLIT_MEM_OPND_FOR_FP_CONVERTS - && optimize_insn_for_speed_p () - && SSE_REG_P (operands[0])" + && optimize_insn_for_speed_p ()" [(set (match_dup 2) (match_dup 1)) (set (match_dup 0) (float_extend:DF (match_dup 2)))] - "operands[2] = gen_rtx_REG (SFmode, REGNO (operands[0]));") + "operands[2] = lowpart_subreg (SFmode, operands[0], DFmode);") -(define_insn "*extendsfdf2_mixed" - [(set (match_operand:DF 0 "nonimmediate_operand" "=f,m,v") +(define_insn "*extendsfdf2" + [(set (match_operand:DF 0 "nonimm_ssenomem_operand" "=f,m,v") (float_extend:DF (match_operand:SF 1 "nonimmediate_operand" "fm,f,vm")))] - "TARGET_SSE2 && TARGET_SSE_MATH" + "TARGET_80387 || (SSE_FLOAT_MODE_P (DFmode) && TARGET_SSE_MATH)" { switch (which_alternative) { @@ -4301,18 +4305,16 @@ (set_attr "prefix" "orig,orig,maybe_vex") (set_attr "mode" "SF,XF,DF") (set (attr "enabled") - (cond [(eq_attr "alternative" "0,1") - (symbol_ref "TARGET_MIX_SSE_I387") - ] - (symbol_ref "true")))]) - -(define_insn "*extendsfdf2_i387" - [(set (match_operand:DF 0 "nonimmediate_operand" "=f,m") - (float_extend:DF (match_operand:SF 1 "nonimmediate_operand" "fm,f")))] - "TARGET_80387" - "* return output_387_reg_move (insn, operands);" - [(set_attr "type" "fmov") - (set_attr "mode" "SF,XF")]) + (if_then_else + (match_test ("SSE_FLOAT_MODE_P (DFmode) && TARGET_SSE_MATH")) + (if_then_else + (eq_attr "alternative" "0,1") + (symbol_ref "TARGET_MIX_SSE_I387") + (symbol_ref "true")) + (if_then_else + (eq_attr "alternative" "0,1") + (symbol_ref "true") + (symbol_ref "false"))))]) (define_expand "extend<mode>xf2" [(set (match_operand:XF 0 "nonimmediate_operand") @@ -4355,9 +4357,9 @@ [(set (match_operand:SF 0 "nonimmediate_operand") (float_truncate:SF (match_operand:DF 1 "nonimmediate_operand")))] - "TARGET_80387 || (TARGET_SSE2 && TARGET_SSE_MATH)" + "TARGET_80387 || (SSE_FLOAT_MODE_P (DFmode) && TARGET_SSE_MATH)" { - if (TARGET_SSE2 && TARGET_SSE_MATH && !TARGET_MIX_SSE_I387) + if (SSE_FLOAT_MODE_P (DFmode) && TARGET_SSE_MATH && !TARGET_MIX_SSE_I387) ; else if (flag_unsafe_math_optimizations) ; @@ -4377,12 +4379,12 @@ that might lead to ICE on 32bit target. The sequence unlikely combine anyway. */ (define_split - [(set (match_operand:SF 0 "register_operand") + [(set (match_operand:SF 0 "sse_reg_operand") (float_truncate:SF (match_operand:DF 1 "nonimmediate_operand")))] "TARGET_USE_VECTOR_FP_CONVERTS && optimize_insn_for_speed_p () - && reload_completed && SSE_REG_P (operands[0]) + && reload_completed && (!EXT_REX_SSE_REG_P (operands[0]) || TARGET_AVX512VL)" [(set (match_dup 2) @@ -4400,9 +4402,7 @@ if (REG_P (operands[1])) { if (!TARGET_SSE3 - && true_regnum (operands[0]) != true_regnum (operands[1]) - && (ORIGINAL_REGNO (operands[1]) < FIRST_PSEUDO_REGISTER - || PSEUDO_REGNO_BYTES (ORIGINAL_REGNO (operands[1])) > 8)) + && REGNO (operands[0]) != REGNO (operands[1])) { rtx tmp = lowpart_subreg (DFmode, operands[0], SFmode); emit_move_insn (tmp, operands[1]); @@ -4419,15 +4419,14 @@ ;; It's more profitable to split and then extend in the same register. (define_peephole2 - [(set (match_operand:SF 0 "register_operand") + [(set (match_operand:SF 0 "sse_reg_operand") (float_truncate:SF (match_operand:DF 1 "memory_operand")))] "TARGET_SPLIT_MEM_OPND_FOR_FP_CONVERTS - && optimize_insn_for_speed_p () - && SSE_REG_P (operands[0])" + && optimize_insn_for_speed_p ()" [(set (match_dup 2) (match_dup 1)) (set (match_dup 0) (float_truncate:SF (match_dup 2)))] - "operands[2] = gen_rtx_REG (DFmode, REGNO (operands[0]));") + "operands[2] = lowpart_subreg (DFmode, operands[0], SFmode);") (define_expand "truncdfsf2_with_temp" [(parallel [(set (match_operand:SF 0) @@ -4440,7 +4439,7 @@ [(set (match_operand:SF 0 "nonimmediate_operand" "=fm,v") (float_truncate:SF (match_operand:DF 1 "nonimmediate_operand" "f ,vm")))] - "TARGET_SSE2 && TARGET_SSE_MATH" + "SSE_FLOAT_MODE_P (DFmode) && TARGET_SSE_MATH" { switch (which_alternative) { @@ -4472,9 +4471,9 @@ (set_attr "mode" "SF")]) (define_insn "*truncdfsf_mixed" - [(set (match_operand:SF 0 "nonimmediate_operand" "=m,x ,?f,?x,?*r") + [(set (match_operand:SF 0 "nonimmediate_operand" "=m,v ,?f,?v,?*r") (float_truncate:SF - (match_operand:DF 1 "nonimmediate_operand" "f ,xm,f ,f ,f"))) + (match_operand:DF 1 "nonimmediate_operand" "f ,vm,f ,f ,f"))) (clobber (match_operand:SF 2 "memory_operand" "=X,X ,m ,m ,m"))] "TARGET_MIX_SSE_I387" { @@ -4496,7 +4495,7 @@ (set_attr "mode" "SF")]) (define_insn "*truncdfsf_i387" - [(set (match_operand:SF 0 "nonimmediate_operand" "=m,?f,?x,?*r") + [(set (match_operand:SF 0 "nonimmediate_operand" "=m,?f,?v,?*r") (float_truncate:SF (match_operand:DF 1 "nonimmediate_operand" "f ,f ,f ,f"))) (clobber (match_operand:SF 2 "memory_operand" "=X,m ,m ,m"))] @@ -4534,7 +4533,7 @@ "reload_completed" [(set (match_dup 2) (match_dup 1)) (set (match_dup 0) (match_dup 2))] - "operands[1] = gen_rtx_REG (SFmode, true_regnum (operands[1]));") + "operands[1] = gen_rtx_REG (SFmode, REGNO (operands[1]));") ;; Conversion from XFmode to {SF,DF}mode @@ -4558,7 +4557,7 @@ }) (define_insn "*truncxfsf2_mixed" - [(set (match_operand:SF 0 "nonimmediate_operand" "=m,?f,?x,?*r") + [(set (match_operand:SF 0 "nonimmediate_operand" "=m,?f,?v,?*r") (float_truncate:SF (match_operand:XF 1 "register_operand" "f ,f ,f ,f"))) (clobber (match_operand:SF 2 "memory_operand" "=X,m ,m ,m"))] @@ -4572,7 +4571,7 @@ (set_attr "mode" "SF")]) (define_insn "*truncxfdf2_mixed" - [(set (match_operand:DF 0 "nonimmediate_operand" "=m,?f,?x,?*r") + [(set (match_operand:DF 0 "nonimmediate_operand" "=m,?f,?v,?*r") (float_truncate:DF (match_operand:XF 1 "register_operand" "f ,f ,f ,f"))) (clobber (match_operand:DF 2 "memory_operand" "=X,m ,m ,m"))] @@ -5140,11 +5139,11 @@ ;; slots when !TARGET_INTER_UNIT_MOVES_TO_VEC disables the general_regs ;; alternative in sse2_loadld. (define_split - [(set (match_operand:MODEF 0 "register_operand") + [(set (match_operand:MODEF 0 "sse_reg_operand") (float:MODEF (match_operand:SI 1 "nonimmediate_operand")))] - "TARGET_SSE2 && TARGET_SSE_MATH - && TARGET_USE_VECTOR_CONVERTS && optimize_function_for_speed_p (cfun) - && reload_completed && SSE_REG_P (operands[0]) + "TARGET_USE_VECTOR_CONVERTS + && optimize_function_for_speed_p (cfun) + && reload_completed && (MEM_P (operands[1]) || TARGET_INTER_UNIT_MOVES_TO_VEC) && (!EXT_REX_SSE_REG_P (operands[0]) || TARGET_AVX512VL)" @@ -5163,83 +5162,83 @@ DONE; }) -;; Avoid partial SSE register dependency stalls +;; Avoid partial SSE register dependency stalls. This splitter should split +;; late in the pass sequence (after register rename pass), so allocated +;; registers won't change anymore + (define_split - [(set (match_operand:MODEF 0 "register_operand") + [(set (match_operand:MODEF 0 "sse_reg_operand") (float:MODEF (match_operand:SWI48 1 "nonimmediate_operand")))] - "TARGET_SSE2 && TARGET_SSE_MATH - && TARGET_SSE_PARTIAL_REG_DEPENDENCY + "TARGET_SSE_PARTIAL_REG_DEPENDENCY && optimize_function_for_speed_p (cfun) - && reload_completed && SSE_REG_P (operands[0]) + && epilogue_completed && (!EXT_REX_SSE_REG_P (operands[0]) || TARGET_AVX512VL)" - [(const_int 0)] + [(set (match_dup 0) + (vec_merge:<MODEF:ssevecmode> + (vec_duplicate:<MODEF:ssevecmode> + (float:MODEF + (match_dup 1))) + (match_dup 0) + (const_int 1)))] { const machine_mode vmode = <MODEF:ssevecmode>mode; - const machine_mode mode = <MODEF:MODE>mode; - rtx t, op0 = lowpart_subreg (vmode, operands[0], mode); - emit_move_insn (op0, CONST0_RTX (vmode)); - - t = gen_rtx_FLOAT (mode, operands[1]); - t = gen_rtx_VEC_DUPLICATE (vmode, t); - t = gen_rtx_VEC_MERGE (vmode, t, op0, const1_rtx); - emit_insn (gen_rtx_SET (op0, t)); - DONE; + operands[0] = lowpart_subreg (vmode, operands[0], <MODEF:MODE>mode); + emit_move_insn (operands[0], CONST0_RTX (vmode)); }) -;; Break partial reg stall for cvtsd2ss. +;; Break partial reg stall for cvtsd2ss. This splitter should split +;; late in the pass sequence (after register rename pass), +;; so allocated registers won't change anymore. -(define_peephole2 - [(set (match_operand:SF 0 "register_operand") +(define_split + [(set (match_operand:SF 0 "sse_reg_operand") (float_truncate:SF (match_operand:DF 1 "nonimmediate_operand")))] - "TARGET_SSE2 && TARGET_SSE_MATH - && TARGET_SSE_PARTIAL_REG_DEPENDENCY + "TARGET_SSE_PARTIAL_REG_DEPENDENCY && optimize_function_for_speed_p (cfun) - && SSE_REG_P (operands[0]) - && (!SSE_REG_P (operands[1]) + && epilogue_completed + && (!REG_P (operands[1]) || REGNO (operands[0]) != REGNO (operands[1])) && (!EXT_REX_SSE_REG_P (operands[0]) || TARGET_AVX512VL)" [(set (match_dup 0) (vec_merge:V4SF (vec_duplicate:V4SF - (float_truncate:V2SF + (float_truncate:SF (match_dup 1))) (match_dup 0) (const_int 1)))] { operands[0] = lowpart_subreg (V4SFmode, operands[0], SFmode); - operands[1] = lowpart_subreg (V2DFmode, operands[1], DFmode); emit_move_insn (operands[0], CONST0_RTX (V4SFmode)); }) -;; Break partial reg stall for cvtss2sd. +;; Break partial reg stall for cvtss2sd. This splitter should split +;; late in the pass sequence (after register rename pass), +;; so allocated registers won't change anymore. -(define_peephole2 - [(set (match_operand:DF 0 "register_operand") +(define_split + [(set (match_operand:DF 0 "sse_reg_operand") (float_extend:DF (match_operand:SF 1 "nonimmediate_operand")))] - "TARGET_SSE2 && TARGET_SSE_MATH - && TARGET_SSE_PARTIAL_REG_DEPENDENCY + "TARGET_SSE_PARTIAL_REG_DEPENDENCY && optimize_function_for_speed_p (cfun) - && SSE_REG_P (operands[0]) - && (!SSE_REG_P (operands[1]) + && epilogue_completed + && (!REG_P (operands[1]) || REGNO (operands[0]) != REGNO (operands[1])) && (!EXT_REX_SSE_REG_P (operands[0]) || TARGET_AVX512VL)" [(set (match_dup 0) (vec_merge:V2DF - (float_extend:V2DF - (vec_select:V2SF - (match_dup 1) - (parallel [(const_int 0) (const_int 1)]))) - (match_dup 0) + (vec_duplicate:V2DF + (float_extend:DF + (match_dup 1))) + (match_dup 0) (const_int 1)))] { operands[0] = lowpart_subreg (V2DFmode, operands[0], DFmode); - operands[1] = lowpart_subreg (V4SFmode, operands[1], SFmode); emit_move_insn (operands[0], CONST0_RTX (V2DFmode)); }) @@ -5284,7 +5283,7 @@ emit_insn (gen_vec_interleave_lowv4si (operands[3], operands[3], operands[4])); - operands[3] = gen_rtx_REG (DImode, REGNO (operands[3])); + operands[3] = gen_lowpart (DImode, operands[3]); }) (define_split @@ -5427,7 +5426,7 @@ (define_expand "add<mode>3" [(set (match_operand:SDWIM 0 "nonimmediate_operand") (plus:SDWIM (match_operand:SDWIM 1 "nonimmediate_operand") - (match_operand:SDWIM 2 "<general_operand>")))] + (match_operand:SDWIM 2 "<general_hilo_operand>")))] "" "ix86_expand_binary_operator (PLUS, <MODE>mode, operands); DONE;") @@ -5435,7 +5434,8 @@ [(set (match_operand:<DWI> 0 "nonimmediate_operand" "=r,o") (plus:<DWI> (match_operand:<DWI> 1 "nonimmediate_operand" "%0,0") - (match_operand:<DWI> 2 "<general_operand>" "ro<di>,r<di>"))) + (match_operand:<DWI> 2 "x86_64_hilo_general_operand" + "ro<di>,r<di>"))) (clobber (reg:CC FLAGS_REG))] "ix86_binary_operator_ok (PLUS, <DWI>mode, operands)" "#" @@ -5615,7 +5615,6 @@ (const_string "*"))) (set_attr "mode" "HI,HI,HI,SI")]) -;; %%% Potential partial reg stall on alternatives 3 and 4. What to do? (define_insn "*addqi_1" [(set (match_operand:QI 0 "nonimmediate_operand" "=qm,q,q,r,r,Yp") (plus:QI (match_operand:QI 1 "nonimmediate_operand" "%0,0,q,0,r,Yp") @@ -5623,7 +5622,7 @@ (clobber (reg:CC FLAGS_REG))] "ix86_binary_operator_ok (PLUS, QImode, operands)" { - bool widen = (which_alternative == 3 || which_alternative == 4); + bool widen = (get_attr_mode (insn) != MODE_QI); switch (get_attr_type (insn)) { @@ -5672,7 +5671,12 @@ (and (eq_attr "type" "alu") (match_operand 2 "const128_operand")) (const_string "1") (const_string "*"))) - (set_attr "mode" "QI,QI,QI,SI,SI,SI")]) + (set_attr "mode" "QI,QI,QI,SI,SI,SI") + ;; Potential partial reg stall on alternatives 3 and 4. + (set (attr "preferred_for_speed") + (cond [(eq_attr "alternative" "3,4") + (symbol_ref "!TARGET_PARTIAL_REG_STALL")] + (symbol_ref "true")))]) (define_insn "*addqi_1_slp" [(set (strict_low_part (match_operand:QI 0 "nonimmediate_operand" "+qm,q")) @@ -5721,32 +5725,6 @@ (parallel [(set (match_dup 0) (plus:SWI48 (match_dup 0) (match_dup 2))) (clobber (reg:CC FLAGS_REG))])]) -;; Convert add to the lea pattern to avoid flags dependency. -(define_split - [(set (match_operand:SWI 0 "register_operand") - (plus:SWI (match_operand:SWI 1 "register_operand") - (match_operand:SWI 2 "<nonmemory_operand>"))) - (clobber (reg:CC FLAGS_REG))] - "reload_completed && ix86_lea_for_add_ok (insn, operands)" - [(const_int 0)] -{ - machine_mode mode = <MODE>mode; - rtx pat; - - if (<MODE_SIZE> < GET_MODE_SIZE (SImode)) - { - mode = SImode; - operands[0] = gen_lowpart (mode, operands[0]); - operands[1] = gen_lowpart (mode, operands[1]); - operands[2] = gen_lowpart (mode, operands[2]); - } - - pat = gen_rtx_PLUS (mode, operands[1], operands[2]); - - emit_insn (gen_rtx_SET (operands[0], pat)); - DONE; -}) - ;; Split non destructive adds if we cannot use lea. (define_split [(set (match_operand:DI 0 "register_operand") @@ -5764,6 +5742,24 @@ ;; Convert add to the lea pattern to avoid flags dependency. (define_split + [(set (match_operand:SWI 0 "register_operand") + (plus:SWI (match_operand:SWI 1 "register_operand") + (match_operand:SWI 2 "<nonmemory_operand>"))) + (clobber (reg:CC FLAGS_REG))] + "reload_completed && ix86_lea_for_add_ok (insn, operands)" + [(set (match_dup 0) + (plus:<LEAMODE> (match_dup 1) (match_dup 2)))] +{ + if (<MODE>mode != <LEAMODE>mode) + { + operands[0] = gen_lowpart (<LEAMODE>mode, operands[0]); + operands[1] = gen_lowpart (<LEAMODE>mode, operands[1]); + operands[2] = gen_lowpart (<LEAMODE>mode, operands[2]); + } +}) + +;; Convert add to the lea pattern to avoid flags dependency. +(define_split [(set (match_operand:DI 0 "register_operand") (zero_extend:DI (plus:SI (match_operand:SI 1 "register_operand") @@ -6248,7 +6244,7 @@ [(set (match_operand:SWI12 0 "register_operand" "=r") (plus:SWI12 (mult:SWI12 (match_operand:SWI12 1 "index_register_operand" "l") - (match_operand:SWI12 2 "const248_operand" "n")) + (match_operand 2 "const248_operand" "n")) (match_operand:SWI12 3 "nonmemory_operand" "ri")))] "!TARGET_PARTIAL_REG_STALL || optimize_function_for_size_p (cfun)" "#" @@ -6270,7 +6266,7 @@ (plus:SWI12 (plus:SWI12 (mult:SWI12 (match_operand:SWI12 1 "index_register_operand" "l") - (match_operand:SWI12 2 "const248_operand" "n")) + (match_operand 2 "const248_operand" "n")) (match_operand:SWI12 3 "register_operand" "r")) (match_operand:SWI12 4 "immediate_operand" "i")))] "!TARGET_PARTIAL_REG_STALL || optimize_function_for_size_p (cfun)" @@ -6296,8 +6292,8 @@ (any_or:SWI12 (ashift:SWI12 (match_operand:SWI12 1 "index_register_operand" "l") - (match_operand:SWI12 2 "const_0_to_3_operand" "n")) - (match_operand:SWI12 3 "const_int_operand" "n")))] + (match_operand 2 "const_0_to_3_operand" "n")) + (match_operand 3 "const_int_operand" "n")))] "(!TARGET_PARTIAL_REG_STALL || optimize_function_for_size_p (cfun)) && ((unsigned HOST_WIDE_INT) INTVAL (operands[3]) < (HOST_WIDE_INT_1U << INTVAL (operands[2])))" @@ -6320,8 +6316,8 @@ (any_or:SWI48 (ashift:SWI48 (match_operand:SWI48 1 "index_register_operand" "l") - (match_operand:SWI48 2 "const_0_to_3_operand" "n")) - (match_operand:SWI48 3 "const_int_operand" "n")))] + (match_operand 2 "const_0_to_3_operand" "n")) + (match_operand 3 "const_int_operand" "n")))] "(unsigned HOST_WIDE_INT) INTVAL (operands[3]) < (HOST_WIDE_INT_1U << INTVAL (operands[2]))" "#" @@ -6339,7 +6335,7 @@ (define_expand "sub<mode>3" [(set (match_operand:SDWIM 0 "nonimmediate_operand") (minus:SDWIM (match_operand:SDWIM 1 "nonimmediate_operand") - (match_operand:SDWIM 2 "<general_operand>")))] + (match_operand:SDWIM 2 "<general_hilo_operand>")))] "" "ix86_expand_binary_operator (MINUS, <MODE>mode, operands); DONE;") @@ -6347,7 +6343,8 @@ [(set (match_operand:<DWI> 0 "nonimmediate_operand" "=r,o") (minus:<DWI> (match_operand:<DWI> 1 "nonimmediate_operand" "0,0") - (match_operand:<DWI> 2 "<general_operand>" "ro<di>,r<di>"))) + (match_operand:<DWI> 2 "x86_64_hilo_general_operand" + "ro<di>,r<di>"))) (clobber (reg:CC FLAGS_REG))] "ix86_binary_operator_ok (MINUS, <MODE>mode, operands)" "#" @@ -7184,7 +7181,7 @@ (match_operand:DWIH 2 "nonimmediate_operand")))) (clobber (reg:CC FLAGS_REG))] "TARGET_BMI2 && reload_completed - && true_regnum (operands[1]) == DX_REG" + && REGNO (operands[1]) == DX_REG" [(parallel [(set (match_dup 3) (mult:DWIH (match_dup 1) (match_dup 2))) (set (match_dup 4) @@ -8230,7 +8227,6 @@ (const_string "*"))) (set_attr "mode" "HI,HI,SI,HI")]) -;; %%% Potential partial reg stall on alternative 2. What to do? (define_insn "*andqi_1" [(set (match_operand:QI 0 "nonimmediate_operand" "=qm,q,r,!k") (and:QI (match_operand:QI 1 "nonimmediate_operand" "%0,0,0,k") @@ -8253,7 +8249,12 @@ } } [(set_attr "type" "alu,alu,alu,msklog") - (set_attr "mode" "QI,QI,SI,HI")]) + (set_attr "mode" "QI,QI,SI,HI") + ;; Potential partial reg stall on alternative 2. + (set (attr "preferred_for_speed") + (cond [(eq_attr "alternative" "2") + (symbol_ref "!TARGET_PARTIAL_REG_STALL")] + (symbol_ref "true")))]) (define_insn "*andqi_1_slp" [(set (strict_low_part (match_operand:QI 0 "nonimmediate_operand" "+qm,q")) @@ -8329,7 +8330,8 @@ (match_operand:SWI248 2 "const_int_operand"))) (clobber (reg:CC FLAGS_REG))] "reload_completed - && true_regnum (operands[0]) != true_regnum (operands[1])" + && (!REG_P (operands[1]) + || REGNO (operands[0]) != REGNO (operands[1]))" [(const_int 0)] { HOST_WIDE_INT ival = INTVAL (operands[2]); @@ -8737,7 +8739,6 @@ [(set_attr "type" "alu,alu,msklog") (set_attr "mode" "HI")]) -;; %%% Potential partial reg stall on alternative 2. What to do? (define_insn "*<code>qi_1" [(set (match_operand:QI 0 "nonimmediate_operand" "=q,m,r,!k") (any_or:QI (match_operand:QI 1 "nonimmediate_operand" "%0,0,0,k") @@ -8750,7 +8751,12 @@ <logic>{l}\t{%k2, %k0|%k0, %k2} k<logic>w\t{%2, %1, %0|%0, %1, %2}" [(set_attr "type" "alu,alu,alu,msklog") - (set_attr "mode" "QI,QI,SI,HI")]) + (set_attr "mode" "QI,QI,SI,HI") + ;; Potential partial reg stall on alternative 2. + (set (attr "preferred_for_speed") + (cond [(eq_attr "alternative" "2") + (symbol_ref "!TARGET_PARTIAL_REG_STALL")] + (symbol_ref "true")))]) ;; See comment for addsi_1_zext why we do use nonimmediate_operand (define_insn "*<code>si_1_zext" @@ -9235,27 +9241,33 @@ "TARGET_80387 || (SSE_FLOAT_MODE_P (<MODE>mode) && TARGET_SSE_MATH)" "ix86_expand_fp_absneg_operator (<CODE>, <MODE>mode, operands); DONE;") -(define_insn "*absneg<mode>2_mixed" - [(set (match_operand:MODEF 0 "register_operand" "=x,x,f,!r") +(define_insn "*absneg<mode>2" + [(set (match_operand:MODEF 0 "register_operand" "=Yv,Yv,f,!r") (match_operator:MODEF 3 "absneg_operator" - [(match_operand:MODEF 1 "register_operand" "0,x,0,0")])) - (use (match_operand:<ssevecmode> 2 "nonimmediate_operand" "xm,0,X,X")) + [(match_operand:MODEF 1 "register_operand" "0,Yv,0,0")])) + (use (match_operand:<ssevecmode> 2 "nonimmediate_operand" "Yvm,0,X,X")) (clobber (reg:CC FLAGS_REG))] - "SSE_FLOAT_MODE_P (<MODE>mode) && TARGET_SSE_MATH" + "TARGET_80387 || (SSE_FLOAT_MODE_P (<MODE>mode) && TARGET_SSE_MATH)" "#" [(set (attr "enabled") - (cond [(eq_attr "alternative" "2") - (symbol_ref "TARGET_MIX_SSE_I387") - ] - (symbol_ref "true")))]) + (if_then_else + (match_test ("SSE_FLOAT_MODE_P (<MODE>mode) && TARGET_SSE_MATH")) + (if_then_else + (eq_attr "alternative" "2") + (symbol_ref "TARGET_MIX_SSE_I387") + (symbol_ref "true")) + (if_then_else + (eq_attr "alternative" "2,3") + (symbol_ref "true") + (symbol_ref "false"))))]) -(define_insn "*absneg<mode>2_i387" - [(set (match_operand:X87MODEF 0 "register_operand" "=f,!r") - (match_operator:X87MODEF 3 "absneg_operator" - [(match_operand:X87MODEF 1 "register_operand" "0,0")])) +(define_insn "*absnegxf2_i387" + [(set (match_operand:XF 0 "register_operand" "=f,!r") + (match_operator:XF 3 "absneg_operator" + [(match_operand:XF 1 "register_operand" "0,0")])) (use (match_operand 2)) (clobber (reg:CC FLAGS_REG))] - "TARGET_80387 && !(SSE_FLOAT_MODE_P (<MODE>mode) && TARGET_SSE_MATH)" + "TARGET_80387" "#") (define_expand "<code>tf2" @@ -9265,10 +9277,10 @@ "ix86_expand_fp_absneg_operator (<CODE>, TFmode, operands); DONE;") (define_insn "*absnegtf2_sse" - [(set (match_operand:TF 0 "register_operand" "=x,x") + [(set (match_operand:TF 0 "register_operand" "=Yv,Yv") (match_operator:TF 3 "absneg_operator" - [(match_operand:TF 1 "register_operand" "0,x")])) - (use (match_operand:TF 2 "nonimmediate_operand" "xm,0")) + [(match_operand:TF 1 "register_operand" "0,Yv")])) + (use (match_operand:TF 2 "nonimmediate_operand" "Yvm,0")) (clobber (reg:CC FLAGS_REG))] "TARGET_SSE" "#") @@ -9284,12 +9296,12 @@ [(set (match_dup 0) (match_op_dup 1 [(match_dup 0)]))]) (define_split - [(set (match_operand 0 "register_operand") + [(set (match_operand 0 "sse_reg_operand") (match_operator 3 "absneg_operator" [(match_operand 1 "register_operand")])) (use (match_operand 2 "nonimmediate_operand")) (clobber (reg:CC FLAGS_REG))] - "reload_completed && SSE_REG_P (operands[0])" + "reload_completed" [(set (match_dup 0) (match_dup 3))] { machine_mode mode = GET_MODE (operands[0]); @@ -9308,7 +9320,7 @@ }) (define_split - [(set (match_operand:SF 0 "register_operand") + [(set (match_operand:SF 0 "general_reg_operand") (match_operator:SF 1 "absneg_operator" [(match_dup 0)])) (use (match_operand:V4SF 2)) (clobber (reg:CC FLAGS_REG))] @@ -9332,7 +9344,7 @@ }) (define_split - [(set (match_operand:DF 0 "register_operand") + [(set (match_operand:DF 0 "general_reg_operand") (match_operator:DF 1 "absneg_operator" [(match_dup 0)])) (use (match_operand 2)) (clobber (reg:CC FLAGS_REG))] @@ -9370,7 +9382,7 @@ }) (define_split - [(set (match_operand:XF 0 "register_operand") + [(set (match_operand:XF 0 "general_reg_operand") (match_operator:XF 1 "absneg_operator" [(match_dup 0)])) (use (match_operand 2)) (clobber (reg:CC FLAGS_REG))] @@ -9380,8 +9392,7 @@ { rtx tmp; operands[0] = gen_rtx_REG (SImode, - true_regnum (operands[0]) - + (TARGET_64BIT ? 1 : 2)); + REGNO (operands[0]) + (TARGET_64BIT ? 1 : 2)); if (GET_CODE (operands[1]) == ABS) { tmp = GEN_INT (0x7fff); @@ -9449,11 +9460,11 @@ "ix86_expand_copysign (operands); DONE;") (define_insn_and_split "copysign<mode>3_const" - [(set (match_operand:CSGNMODE 0 "register_operand" "=x") + [(set (match_operand:CSGNMODE 0 "register_operand" "=Yv") (unspec:CSGNMODE - [(match_operand:<CSGNVMODE> 1 "vector_move_operand" "xmC") + [(match_operand:<CSGNVMODE> 1 "vector_move_operand" "YvmC") (match_operand:CSGNMODE 2 "register_operand" "0") - (match_operand:<CSGNVMODE> 3 "nonimmediate_operand" "xm")] + (match_operand:<CSGNVMODE> 3 "nonimmediate_operand" "Yvm")] UNSPEC_COPYSIGN))] "(SSE_FLOAT_MODE_P (<MODE>mode) && TARGET_SSE_MATH) || (TARGET_SSE && (<MODE>mode == TFmode))" @@ -9463,14 +9474,16 @@ "ix86_split_copysign_const (operands); DONE;") (define_insn "copysign<mode>3_var" - [(set (match_operand:CSGNMODE 0 "register_operand" "=x,x,x,x,x") + [(set (match_operand:CSGNMODE 0 "register_operand" "=Yv,Yv,Yv,Yv,Yv") (unspec:CSGNMODE - [(match_operand:CSGNMODE 2 "register_operand" "x,0,0,x,x") - (match_operand:CSGNMODE 3 "register_operand" "1,1,x,1,x") - (match_operand:<CSGNVMODE> 4 "nonimmediate_operand" "X,xm,xm,0,0") - (match_operand:<CSGNVMODE> 5 "nonimmediate_operand" "0,xm,1,xm,1")] + [(match_operand:CSGNMODE 2 "register_operand" "Yv,0,0,Yv,Yv") + (match_operand:CSGNMODE 3 "register_operand" "1,1,Yv,1,Yv") + (match_operand:<CSGNVMODE> 4 + "nonimmediate_operand" "X,Yvm,Yvm,0,0") + (match_operand:<CSGNVMODE> 5 + "nonimmediate_operand" "0,Yvm,1,Yvm,1")] UNSPEC_COPYSIGN)) - (clobber (match_scratch:<CSGNVMODE> 1 "=x,x,x,x,x"))] + (clobber (match_scratch:<CSGNVMODE> 1 "=Yv,Yv,Yv,Yv,Yv"))] "(SSE_FLOAT_MODE_P (<MODE>mode) && TARGET_SSE_MATH) || (TARGET_SSE && (<MODE>mode == TFmode))" "#") @@ -9522,7 +9535,6 @@ (set_attr "prefix" "*,vex") (set_attr "mode" "HI")]) -;; %%% Potential partial reg stall on alternative 1. What to do? (define_insn "*one_cmplqi2_1" [(set (match_operand:QI 0 "nonimmediate_operand" "=qm,r,!k") (not:QI (match_operand:QI 1 "nonimmediate_operand" "0,0,k")))] @@ -9545,7 +9557,12 @@ [(set_attr "isa" "*,*,avx512f") (set_attr "type" "negnot,negnot,msklog") (set_attr "prefix" "*,*,vex") - (set_attr "mode" "QI,SI,QI")]) + (set_attr "mode" "QI,SI,QI") + ;; Potential partial reg stall on alternative 1. + (set (attr "preferred_for_speed") + (cond [(eq_attr "alternative" "1") + (symbol_ref "!TARGET_PARTIAL_REG_STALL")] + (symbol_ref "true")))]) ;; ??? Currently never generated - xor is used instead. (define_insn "*one_cmplsi2_1_zext" @@ -9964,7 +9981,6 @@ (const_string "*"))) (set_attr "mode" "HI,SI")]) -;; %%% Potential partial reg stall on alternative 1. What to do? (define_insn "*ashlqi3_1" [(set (match_operand:QI 0 "nonimmediate_operand" "=qm,r,Yp") (ashift:QI (match_operand:QI 1 "nonimmediate_operand" "0,0,l") @@ -10020,7 +10036,12 @@ (match_test "optimize_function_for_size_p (cfun)"))))) (const_string "0") (const_string "*"))) - (set_attr "mode" "QI,SI,SI")]) + (set_attr "mode" "QI,SI,SI") + ;; Potential partial reg stall on alternative 1. + (set (attr "preferred_for_speed") + (cond [(eq_attr "alternative" "1") + (symbol_ref "!TARGET_PARTIAL_REG_STALL")] + (symbol_ref "true")))]) (define_insn "*ashlqi3_1_slp" [(set (strict_low_part (match_operand:QI 0 "nonimmediate_operand" "+qm")) @@ -10067,31 +10088,21 @@ ;; Convert ashift to the lea pattern to avoid flags dependency. (define_split - [(set (match_operand 0 "register_operand") - (ashift (match_operand 1 "index_register_operand") - (match_operand:QI 2 "const_int_operand"))) + [(set (match_operand:SWI 0 "register_operand") + (ashift:SWI (match_operand:SWI 1 "index_register_operand") + (match_operand 2 "const_0_to_3_operand"))) (clobber (reg:CC FLAGS_REG))] - "GET_MODE (operands[0]) == GET_MODE (operands[1]) - && reload_completed - && true_regnum (operands[0]) != true_regnum (operands[1])" - [(const_int 0)] + "reload_completed + && REGNO (operands[0]) != REGNO (operands[1])" + [(set (match_dup 0) + (mult:<LEAMODE> (match_dup 1) (match_dup 2)))] { - machine_mode mode = GET_MODE (operands[0]); - rtx pat; - - if (GET_MODE_SIZE (mode) < GET_MODE_SIZE (SImode)) - { - mode = SImode; - operands[0] = gen_lowpart (mode, operands[0]); - operands[1] = gen_lowpart (mode, operands[1]); + if (<MODE>mode != <LEAMODE>mode) + { + operands[0] = gen_lowpart (<LEAMODE>mode, operands[0]); + operands[1] = gen_lowpart (<LEAMODE>mode, operands[1]); } - - operands[2] = gen_int_mode (1 << INTVAL (operands[2]), mode); - - pat = gen_rtx_MULT (mode, operands[1], operands[2]); - - emit_insn (gen_rtx_SET (operands[0], pat)); - DONE; + operands[2] = GEN_INT (1 << INTVAL (operands[2])); }) ;; Convert ashift to the lea pattern to avoid flags dependency. @@ -10099,15 +10110,15 @@ [(set (match_operand:DI 0 "register_operand") (zero_extend:DI (ashift:SI (match_operand:SI 1 "index_register_operand") - (match_operand:QI 2 "const_int_operand")))) + (match_operand 2 "const_0_to_3_operand")))) (clobber (reg:CC FLAGS_REG))] "TARGET_64BIT && reload_completed - && true_regnum (operands[0]) != true_regnum (operands[1])" + && REGNO (operands[0]) != REGNO (operands[1])" [(set (match_dup 0) (zero_extend:DI (mult:SI (match_dup 1) (match_dup 2))))] { operands[1] = gen_lowpart (SImode, operands[1]); - operands[2] = gen_int_mode (1 << INTVAL (operands[2]), SImode); + operands[2] = GEN_INT (1 << INTVAL (operands[2])); }) ;; This pattern can't accept a variable shift count, since shifts by @@ -11054,20 +11065,19 @@ (const_int 1)) (clobber (reg:CC FLAGS_REG))])] "TARGET_64BIT && !TARGET_USE_BT" - [(const_int 0)] + [(parallel [(set (match_dup 0) + (ior:DI (match_dup 0) (match_dup 3))) + (clobber (reg:CC FLAGS_REG))])] { int i = INTVAL (operands[1]); - rtx op1 = gen_int_mode (HOST_WIDE_INT_1U << i, DImode); + operands[3] = gen_int_mode (HOST_WIDE_INT_1U << i, DImode); - if (i >= 31) + if (!x86_64_immediate_operand (operands[3], DImode)) { - emit_move_insn (operands[2], op1); - op1 = operands[2]; + emit_move_insn (operands[2], operands[3]); + operands[3] = operands[2]; } - - emit_insn (gen_iordi3 (operands[0], operands[0], op1)); - DONE; }) (define_peephole2 @@ -11079,20 +11089,19 @@ (const_int 0)) (clobber (reg:CC FLAGS_REG))])] "TARGET_64BIT && !TARGET_USE_BT" - [(const_int 0)] + [(parallel [(set (match_dup 0) + (and:DI (match_dup 0) (match_dup 3))) + (clobber (reg:CC FLAGS_REG))])] { int i = INTVAL (operands[1]); - rtx op1 = gen_int_mode (HOST_WIDE_INT_1U << i, DImode); + operands[3] = gen_int_mode (~(HOST_WIDE_INT_1U << i), DImode); - if (i >= 32) + if (!x86_64_immediate_operand (operands[3], DImode)) { - emit_move_insn (operands[2], op1); - op1 = operands[2]; + emit_move_insn (operands[2], operands[3]); + operands[3] = operands[2]; } - - emit_insn (gen_anddi3 (operands[0], operands[0], op1)); - DONE; }) (define_peephole2 @@ -11105,20 +11114,19 @@ (match_dup 0) (const_int 1) (match_dup 1)))) (clobber (reg:CC FLAGS_REG))])] "TARGET_64BIT && !TARGET_USE_BT" - [(const_int 0)] + [(parallel [(set (match_dup 0) + (xor:DI (match_dup 0) (match_dup 3))) + (clobber (reg:CC FLAGS_REG))])] { int i = INTVAL (operands[1]); - rtx op1 = gen_int_mode (HOST_WIDE_INT_1U << i, DImode); + operands[3] = gen_int_mode (HOST_WIDE_INT_1U << i, DImode); - if (i >= 31) + if (!x86_64_immediate_operand (operands[3], DImode)) { - emit_move_insn (operands[2], op1); - op1 = operands[2]; + emit_move_insn (operands[2], operands[3]); + operands[3] = operands[2]; } - - emit_insn (gen_xordi3 (operands[0], operands[0], op1)); - DONE; }) (define_insn "*bt<mode>" @@ -11942,7 +11950,10 @@ (match_operand:SI 0 "GOT_memory_operand" "Bg"))) (match_operand 1))] "TARGET_X32" - "* return ix86_output_call_insn (insn, operands[0]);" +{ + rtx fnaddr = gen_const_mem (DImode, XEXP (operands[0], 0)); + return ix86_output_call_insn (insn, fnaddr); +} [(set_attr "type" "call")]) ;; Since sibcall never returns, we can only use call-clobbered register @@ -11955,8 +11966,8 @@ (match_operand 2))] "!TARGET_MACHO && !TARGET_64BIT && SIBLING_CALL_P (insn)" { - rtx fnaddr = gen_rtx_PLUS (Pmode, operands[0], operands[1]); - fnaddr = gen_const_mem (Pmode, fnaddr); + rtx fnaddr = gen_rtx_PLUS (SImode, operands[0], operands[1]); + fnaddr = gen_const_mem (SImode, fnaddr); return ix86_output_call_insn (insn, fnaddr); } [(set_attr "type" "call")]) @@ -12135,7 +12146,10 @@ (match_operand:SI 1 "GOT_memory_operand" "Bg"))) (match_operand 2)))] "TARGET_X32" - "* return ix86_output_call_insn (insn, operands[1]);" +{ + rtx fnaddr = gen_const_mem (DImode, XEXP (operands[1], 0)); + return ix86_output_call_insn (insn, fnaddr); +} [(set_attr "type" "callv")]) ;; Since sibcall never returns, we can only use call-clobbered register @@ -12149,8 +12163,8 @@ (match_operand 3)))] "!TARGET_MACHO && !TARGET_64BIT && SIBLING_CALL_P (insn)" { - rtx fnaddr = gen_rtx_PLUS (Pmode, operands[1], operands[2]); - fnaddr = gen_const_mem (Pmode, fnaddr); + rtx fnaddr = gen_rtx_PLUS (SImode, operands[1], operands[2]); + fnaddr = gen_const_mem (SImode, fnaddr); return ix86_output_call_insn (insn, fnaddr); } [(set_attr "type" "callv")]) @@ -12513,7 +12527,7 @@ (define_expand "set_got" [(parallel - [(set (match_operand:SI 0 "register_operand" "=r") + [(set (match_operand:SI 0 "register_operand") (unspec:SI [(const_int 0)] UNSPEC_SET_GOT)) (clobber (reg:CC FLAGS_REG))])] "!TARGET_64BIT" @@ -12533,7 +12547,7 @@ (define_expand "set_got_labelled" [(parallel - [(set (match_operand:SI 0 "register_operand" "=r") + [(set (match_operand:SI 0 "register_operand") (unspec:SI [(label_ref (match_operand 1))] UNSPEC_SET_GOT)) (clobber (reg:CC FLAGS_REG))])] @@ -14039,9 +14053,9 @@ [(set (match_operand:MODEF 0 "register_operand" "=f,f,x,v") (match_operator:MODEF 3 "binary_fp_operator" [(match_operand:MODEF 1 - "nonimm_ssenomem_operand" "0,fm,0,v") + "x87nonimm_ssenomem_operand" "0,fm,0,v") (match_operand:MODEF 2 - "nonimmediate_operand" "fm,0,xm,vm")]))] + "nonimmediate_operand" "fm,0,xm,vm")]))] "((SSE_FLOAT_MODE_P (<MODE>mode) && TARGET_SSE_MATH) || (TARGET_80387 && X87_ENABLE_ARITH (<MODE>mode))) && !COMMUTATIVE_ARITH_P (operands[3]) @@ -17215,7 +17229,7 @@ (set_attr "mode" "DF,DF,DI,DI,DI,DI")]) (define_split - [(set (match_operand:DF 0 "register_and_not_any_fp_reg_operand") + [(set (match_operand:DF 0 "general_reg_operand") (if_then_else:DF (match_operator 1 "fcmov_comparison_operator" [(reg FLAGS_REG) (const_int 0)]) (match_operand:DF 2 "nonimmediate_operand") @@ -17271,7 +17285,7 @@ ;; Don't do conditional moves with memory inputs (define_peephole2 [(match_scratch:MODEF 4 "r") - (set (match_operand:MODEF 0 "register_and_not_any_fp_reg_operand") + (set (match_operand:MODEF 0 "general_reg_operand") (if_then_else:MODEF (match_operator 1 "fcmov_comparison_operator" [(reg FLAGS_REG) (const_int 0)]) (match_operand:MODEF 2 "nonimmediate_operand") @@ -17821,7 +17835,7 @@ (match_operand:SI 3 "immediate_operand")) (const_int 0)]))] "ix86_match_ccmode (insn, CCNOmode) - && (true_regnum (operands[2]) != AX_REG + && (REGNO (operands[2]) != AX_REG || satisfies_constraint_K (operands[3])) && peep2_reg_dead_p (1, operands[2])" [(parallel @@ -17842,7 +17856,7 @@ (const_int 0)]))] "! TARGET_PARTIAL_REG_STALL && ix86_match_ccmode (insn, CCNOmode) - && true_regnum (operands[2]) != AX_REG + && REGNO (operands[2]) != AX_REG && peep2_reg_dead_p (1, operands[2])" [(parallel [(set (match_dup 0) @@ -17863,7 +17877,7 @@ (const_int 0)]))] "! TARGET_PARTIAL_REG_STALL && ix86_match_ccmode (insn, CCNOmode) - && true_regnum (operands[2]) != AX_REG + && REGNO (operands[2]) != AX_REG && peep2_reg_dead_p (1, operands[2])" [(parallel [(set (match_dup 0) (match_op_dup 1 @@ -19032,7 +19046,7 @@ ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; (define_expand "lwp_llwpcb" - [(unspec_volatile [(match_operand 0 "register_operand" "r")] + [(unspec_volatile [(match_operand 0 "register_operand")] UNSPECV_LLWP_INTRINSIC)] "TARGET_LWP") @@ -19046,7 +19060,7 @@ (set_attr "length" "5")]) (define_expand "lwp_slwpcb" - [(set (match_operand 0 "register_operand" "=r") + [(set (match_operand 0 "register_operand") (unspec_volatile [(const_int 0)] UNSPECV_SLWP_INTRINSIC))] "TARGET_LWP" { @@ -19070,9 +19084,9 @@ (set_attr "length" "5")]) (define_expand "lwp_lwpval<mode>3" - [(unspec_volatile [(match_operand:SWI48 1 "register_operand" "r") - (match_operand:SI 2 "nonimmediate_operand" "rm") - (match_operand:SI 3 "const_int_operand" "i")] + [(unspec_volatile [(match_operand:SWI48 1 "register_operand") + (match_operand:SI 2 "nonimmediate_operand") + (match_operand:SI 3 "const_int_operand")] UNSPECV_LWPVAL_INTRINSIC)] "TARGET_LWP" ;; Avoid unused variable warning. @@ -19092,11 +19106,11 @@ (define_expand "lwp_lwpins<mode>3" [(set (reg:CCC FLAGS_REG) - (unspec_volatile:CCC [(match_operand:SWI48 1 "register_operand" "r") - (match_operand:SI 2 "nonimmediate_operand" "rm") - (match_operand:SI 3 "const_int_operand" "i")] + (unspec_volatile:CCC [(match_operand:SWI48 1 "register_operand") + (match_operand:SI 2 "nonimmediate_operand") + (match_operand:SI 3 "const_int_operand")] UNSPECV_LWPINS_INTRINSIC)) - (set (match_operand:QI 0 "nonimmediate_operand" "=qm") + (set (match_operand:QI 0 "nonimmediate_operand") (eq:QI (reg:CCC FLAGS_REG) (const_int 0)))] "TARGET_LWP") |