diff options
Diffstat (limited to 'gcc/config/i386/i386.md')
-rw-r--r-- | gcc/config/i386/i386.md | 494 |
1 files changed, 231 insertions, 263 deletions
diff --git a/gcc/config/i386/i386.md b/gcc/config/i386/i386.md index be7cfbfd64e..9b5407aa697 100644 --- a/gcc/config/i386/i386.md +++ b/gcc/config/i386/i386.md @@ -1024,6 +1024,9 @@ (define_mode_attr DWI [(QI "HI") (HI "SI") (SI "DI") (DI "TI")]) (define_mode_attr dwi [(QI "hi") (HI "si") (SI "di") (DI "ti")]) +;; LEA mode corresponding to an integer mode +(define_mode_attr LEAMODE [(QI "SI") (HI "SI") (SI "SI") (DI "DI")]) + ;; Half mode for double word integer modes. (define_mode_iterator DWIH [(SI "!TARGET_64BIT") (DI "TARGET_64BIT")]) @@ -2696,34 +2699,31 @@ (set_attr "amdfam10_decode" "double") (set_attr "bdver1_decode" "double")]) -(define_insn "*swap<mode>_1" - [(set (match_operand:SWI12 0 "register_operand" "+r") - (match_operand:SWI12 1 "register_operand" "+r")) +(define_insn "*swap<mode>" + [(set (match_operand:SWI12 0 "register_operand" "+<r>,r") + (match_operand:SWI12 1 "register_operand" "+<r>,r")) (set (match_dup 1) (match_dup 0))] - "!TARGET_PARTIAL_REG_STALL || optimize_function_for_size_p (cfun)" - "xchg{l}\t%k1, %k0" + "" + "@ + xchg{<imodesuffix>}\t%1, %0 + xchg{l}\t%k1, %k0" [(set_attr "type" "imov") - (set_attr "mode" "SI") + (set_attr "mode" "<MODE>,SI") + (set (attr "preferred_for_size") + (cond [(eq_attr "alternative" "0") + (symbol_ref "false")] + (symbol_ref "true"))) + ;; Potential partial reg stall on alternative 1. + (set (attr "preferred_for_speed") + (cond [(eq_attr "alternative" "1") + (symbol_ref "!TARGET_PARTIAL_REG_STALL")] + (symbol_ref "true"))) (set_attr "pent_pair" "np") (set_attr "athlon_decode" "vector") (set_attr "amdfam10_decode" "double") (set_attr "bdver1_decode" "double")]) -;; Not added amdfam10_decode since TARGET_PARTIAL_REG_STALL -;; is disabled for AMDFAM10 -(define_insn "*swap<mode>_2" - [(set (match_operand:SWI12 0 "register_operand" "+<r>") - (match_operand:SWI12 1 "register_operand" "+<r>")) - (set (match_dup 1) - (match_dup 0))] - "TARGET_PARTIAL_REG_STALL" - "xchg{<imodesuffix>}\t%1, %0" - [(set_attr "type" "imov") - (set_attr "mode" "<MODE>") - (set_attr "pent_pair" "np") - (set_attr "athlon_decode" "vector")]) - (define_expand "movstrict<mode>" [(set (strict_low_part (match_operand:SWI12 0 "nonimmediate_operand")) (match_operand:SWI12 1 "general_operand"))] @@ -3072,14 +3072,10 @@ (define_split [(set (match_operand:SF 0 "push_operand") (match_operand:SF 1 "memory_operand"))] - "reload_completed" + "reload_completed + && find_constant_src (insn)" [(set (match_dup 0) (match_dup 2))] -{ - operands[2] = find_constant_src (curr_insn); - - if (operands[2] == NULL_RTX) - FAIL; -}) + "operands[2] = find_constant_src (curr_insn);") (define_split [(set (match_operand 0 "push_operand") @@ -3601,19 +3597,10 @@ && (GET_MODE (operands[0]) == TFmode || GET_MODE (operands[0]) == XFmode || GET_MODE (operands[0]) == DFmode - || GET_MODE (operands[0]) == SFmode)" + || GET_MODE (operands[0]) == SFmode) + && ix86_standard_x87sse_constant_load_p (insn, operands[0])" [(set (match_dup 0) (match_dup 2))] -{ - operands[2] = find_constant_src (curr_insn); - - if (operands[2] == NULL_RTX - || (SSE_REGNO_P (REGNO (operands[0])) - && standard_sse_constant_p (operands[2], - GET_MODE (operands[0])) != 1) - || (STACK_REGNO_P (REGNO (operands[0])) - && standard_80387_constant_p (operands[2]) < 1)) - FAIL; -}) + "operands[2] = find_constant_src (curr_insn);") (define_split [(set (match_operand 0 "any_fp_register_operand") @@ -3621,19 +3608,10 @@ "reload_completed && (GET_MODE (operands[0]) == TFmode || GET_MODE (operands[0]) == XFmode - || GET_MODE (operands[0]) == DFmode)" + || GET_MODE (operands[0]) == DFmode) + && ix86_standard_x87sse_constant_load_p (insn, operands[0])" [(set (match_dup 0) (match_dup 2))] -{ - operands[2] = find_constant_src (curr_insn); - - if (operands[2] == NULL_RTX - || (SSE_REGNO_P (REGNO (operands[0])) - && standard_sse_constant_p (operands[2], - GET_MODE (operands[0])) != 1) - || (STACK_REGNO_P (REGNO (operands[0])) - && standard_80387_constant_p (operands[2]) < 1)) - FAIL; -}) + "operands[2] = find_constant_src (curr_insn);") ;; Split the load of -0.0 or -1.0 into fldz;fchs or fld1;fchs sequence (define_split @@ -3777,20 +3755,18 @@ "split_double_mode (DImode, &operands[0], 1, &operands[3], &operands[4]);") (define_split - [(set (match_operand:DI 0 "register_operand") - (zero_extend:DI (match_operand:SI 1 "register_operand")))] + [(set (match_operand:DI 0 "general_reg_operand") + (zero_extend:DI (match_operand:SI 1 "general_reg_operand")))] "!TARGET_64BIT && reload_completed - && !(MMX_REG_P (operands[0]) || SSE_REG_P (operands[0])) - && true_regnum (operands[0]) == true_regnum (operands[1])" + && REGNO (operands[0]) == REGNO (operands[1])" [(set (match_dup 4) (const_int 0))] "split_double_mode (DImode, &operands[0], 1, &operands[3], &operands[4]);") (define_split - [(set (match_operand:DI 0 "nonimmediate_operand") - (zero_extend:DI (match_operand:SI 1 "nonimmediate_operand")))] + [(set (match_operand:DI 0 "nonimmediate_gr_operand") + (zero_extend:DI (match_operand:SI 1 "nonimmediate_gr_operand")))] "!TARGET_64BIT && reload_completed - && !(MEM_P (operands[0]) && MEM_P (operands[1])) - && !(MMX_REG_P (operands[0]) || SSE_REG_P (operands[0]))" + && !(MEM_P (operands[0]) && MEM_P (operands[1]))" [(set (match_dup 3) (match_dup 1)) (set (match_dup 4) (const_int 0))] "split_double_mode (DImode, &operands[0], 1, &operands[3], &operands[4]);") @@ -3828,7 +3804,8 @@ [(parallel [(set (match_dup 0) (and:SI (match_dup 0) (match_dup 2))) (clobber (reg:CC FLAGS_REG))])] { - if (true_regnum (operands[0]) != true_regnum (operands[1])) + if (!REG_P (operands[1]) + || REGNO (operands[0]) != REGNO (operands[1])) { ix86_expand_clear (operands[0]); @@ -3875,7 +3852,8 @@ [(parallel [(set (match_dup 0) (and:SI (match_dup 0) (const_int 255))) (clobber (reg:CC FLAGS_REG))])] { - if (true_regnum (operands[0]) != true_regnum (operands[1])) + if (!REG_P (operands[1]) + || REGNO (operands[0]) != REGNO (operands[1])) { ix86_expand_clear (operands[0]); @@ -3988,8 +3966,8 @@ /* Generate a cltd if possible and doing so it profitable. */ if ((optimize_function_for_size_p (cfun) || TARGET_USE_CLTD) - && true_regnum (operands[1]) == AX_REG - && true_regnum (operands[2]) == DX_REG) + && REGNO (operands[1]) == AX_REG + && REGNO (operands[2]) == DX_REG) { emit_insn (gen_ashrsi3_cvt (operands[2], operands[1], GEN_INT (31))); } @@ -4030,8 +4008,8 @@ (set (match_operand:SI 3 "memory_operand") (match_dup 2))] "/* cltd is shorter than sarl $31, %eax */ !optimize_function_for_size_p (cfun) - && true_regnum (operands[1]) == AX_REG - && true_regnum (operands[2]) == DX_REG + && REGNO (operands[1]) == AX_REG + && REGNO (operands[2]) == DX_REG && peep2_reg_dead_p (2, operands[1]) && peep2_reg_dead_p (3, operands[2]) && !reg_mentioned_p (operands[2], operands[3])" @@ -4052,19 +4030,19 @@ { split_double_mode (DImode, &operands[0], 1, &operands[3], &operands[4]); - if (true_regnum (operands[3]) != true_regnum (operands[1])) + if (REGNO (operands[3]) != REGNO (operands[1])) emit_move_insn (operands[3], operands[1]); /* Generate a cltd if possible and doing so it profitable. */ if ((optimize_function_for_size_p (cfun) || TARGET_USE_CLTD) - && true_regnum (operands[3]) == AX_REG - && true_regnum (operands[4]) == DX_REG) + && REGNO (operands[3]) == AX_REG + && REGNO (operands[4]) == DX_REG) { emit_insn (gen_ashrsi3_cvt (operands[4], operands[3], GEN_INT (31))); DONE; } - if (true_regnum (operands[4]) != true_regnum (operands[1])) + if (REGNO (operands[4]) != REGNO (operands[1])) emit_move_insn (operands[4], operands[1]); emit_insn (gen_ashrsi3_cvt (operands[4], operands[4], GEN_INT (31))); @@ -4203,15 +4181,15 @@ "operands[2] = GEN_INT (-GET_MODE_SIZE (XFmode));") (define_expand "extendsfdf2" - [(set (match_operand:DF 0 "nonimmediate_operand") + [(set (match_operand:DF 0 "nonimm_ssenomem_operand") (float_extend:DF (match_operand:SF 1 "general_operand")))] - "TARGET_80387 || (TARGET_SSE2 && TARGET_SSE_MATH)" + "TARGET_80387 || (SSE_FLOAT_MODE_P (DFmode) && TARGET_SSE_MATH)" { /* ??? Needed for compress_float_constant since all fp constants are TARGET_LEGITIMATE_CONSTANT_P. */ if (CONST_DOUBLE_P (operands[1])) { - if ((!TARGET_SSE2 || TARGET_MIX_SSE_I387) + if ((!SSE_FLOAT_MODE_P (DFmode) || TARGET_MIX_SSE_I387) && standard_80387_constant_p (operands[1]) > 0) { operands[1] = simplify_const_unary_operation @@ -4231,12 +4209,12 @@ that might lead to ICE on 32bit target. The sequence unlikely combine anyway. */ (define_split - [(set (match_operand:DF 0 "register_operand") + [(set (match_operand:DF 0 "sse_reg_operand") (float_extend:DF (match_operand:SF 1 "nonimmediate_operand")))] "TARGET_USE_VECTOR_FP_CONVERTS && optimize_insn_for_speed_p () - && reload_completed && SSE_REG_P (operands[0]) + && reload_completed && (!EXT_REX_SSE_REG_P (operands[0]) || TARGET_AVX512VL)" [(set (match_dup 2) @@ -4253,13 +4231,11 @@ { /* If it is unsafe to overwrite upper half of source, we need to move to destination and unpack there. */ - if (((ORIGINAL_REGNO (operands[1]) < FIRST_PSEUDO_REGISTER - || PSEUDO_REGNO_BYTES (ORIGINAL_REGNO (operands[1])) > 4) - && true_regnum (operands[0]) != true_regnum (operands[1])) + if (REGNO (operands[0]) != REGNO (operands[1]) || (EXT_REX_SSE_REG_P (operands[1]) && !TARGET_AVX512VL)) { - rtx tmp = gen_rtx_REG (SFmode, true_regnum (operands[0])); + rtx tmp = lowpart_subreg (SFmode, operands[0], DFmode); emit_move_insn (tmp, operands[1]); } else @@ -4267,7 +4243,7 @@ /* FIXME: vec_interleave_lowv4sf for AVX512VL should allow =v, v, then vbroadcastss will be only needed for AVX512F without AVX512VL. */ - if (!EXT_REX_SSE_REGNO_P (true_regnum (operands[3]))) + if (!EXT_REX_SSE_REGNO_P (REGNO (operands[3]))) emit_insn (gen_vec_interleave_lowv4sf (operands[3], operands[3], operands[3])); else @@ -4283,21 +4259,20 @@ ;; It's more profitable to split and then extend in the same register. (define_peephole2 - [(set (match_operand:DF 0 "register_operand") + [(set (match_operand:DF 0 "sse_reg_operand") (float_extend:DF (match_operand:SF 1 "memory_operand")))] "TARGET_SPLIT_MEM_OPND_FOR_FP_CONVERTS - && optimize_insn_for_speed_p () - && SSE_REG_P (operands[0])" + && optimize_insn_for_speed_p ()" [(set (match_dup 2) (match_dup 1)) (set (match_dup 0) (float_extend:DF (match_dup 2)))] - "operands[2] = gen_rtx_REG (SFmode, REGNO (operands[0]));") + "operands[2] = lowpart_subreg (SFmode, operands[0], DFmode);") -(define_insn "*extendsfdf2_mixed" - [(set (match_operand:DF 0 "nonimmediate_operand" "=f,m,v") +(define_insn "*extendsfdf2" + [(set (match_operand:DF 0 "nonimm_ssenomem_operand" "=f,m,v") (float_extend:DF (match_operand:SF 1 "nonimmediate_operand" "fm,f,vm")))] - "TARGET_SSE2 && TARGET_SSE_MATH" + "TARGET_80387 || (SSE_FLOAT_MODE_P (DFmode) && TARGET_SSE_MATH)" { switch (which_alternative) { @@ -4316,18 +4291,16 @@ (set_attr "prefix" "orig,orig,maybe_vex") (set_attr "mode" "SF,XF,DF") (set (attr "enabled") - (cond [(eq_attr "alternative" "0,1") - (symbol_ref "TARGET_MIX_SSE_I387") - ] - (symbol_ref "true")))]) - -(define_insn "*extendsfdf2_i387" - [(set (match_operand:DF 0 "nonimmediate_operand" "=f,m") - (float_extend:DF (match_operand:SF 1 "nonimmediate_operand" "fm,f")))] - "TARGET_80387" - "* return output_387_reg_move (insn, operands);" - [(set_attr "type" "fmov") - (set_attr "mode" "SF,XF")]) + (if_then_else + (match_test ("SSE_FLOAT_MODE_P (DFmode) && TARGET_SSE_MATH")) + (if_then_else + (eq_attr "alternative" "0,1") + (symbol_ref "TARGET_MIX_SSE_I387") + (symbol_ref "true")) + (if_then_else + (eq_attr "alternative" "0,1") + (symbol_ref "true") + (symbol_ref "false"))))]) (define_expand "extend<mode>xf2" [(set (match_operand:XF 0 "nonimmediate_operand") @@ -4370,9 +4343,9 @@ [(set (match_operand:SF 0 "nonimmediate_operand") (float_truncate:SF (match_operand:DF 1 "nonimmediate_operand")))] - "TARGET_80387 || (TARGET_SSE2 && TARGET_SSE_MATH)" + "TARGET_80387 || (SSE_FLOAT_MODE_P (DFmode) && TARGET_SSE_MATH)" { - if (TARGET_SSE2 && TARGET_SSE_MATH && !TARGET_MIX_SSE_I387) + if (SSE_FLOAT_MODE_P (DFmode) && TARGET_SSE_MATH && !TARGET_MIX_SSE_I387) ; else if (flag_unsafe_math_optimizations) ; @@ -4392,12 +4365,12 @@ that might lead to ICE on 32bit target. The sequence unlikely combine anyway. */ (define_split - [(set (match_operand:SF 0 "register_operand") + [(set (match_operand:SF 0 "sse_reg_operand") (float_truncate:SF (match_operand:DF 1 "nonimmediate_operand")))] "TARGET_USE_VECTOR_FP_CONVERTS && optimize_insn_for_speed_p () - && reload_completed && SSE_REG_P (operands[0]) + && reload_completed && (!EXT_REX_SSE_REG_P (operands[0]) || TARGET_AVX512VL)" [(set (match_dup 2) @@ -4415,9 +4388,7 @@ if (REG_P (operands[1])) { if (!TARGET_SSE3 - && true_regnum (operands[0]) != true_regnum (operands[1]) - && (ORIGINAL_REGNO (operands[1]) < FIRST_PSEUDO_REGISTER - || PSEUDO_REGNO_BYTES (ORIGINAL_REGNO (operands[1])) > 8)) + && REGNO (operands[0]) != REGNO (operands[1])) { rtx tmp = lowpart_subreg (DFmode, operands[0], SFmode); emit_move_insn (tmp, operands[1]); @@ -4434,15 +4405,14 @@ ;; It's more profitable to split and then extend in the same register. (define_peephole2 - [(set (match_operand:SF 0 "register_operand") + [(set (match_operand:SF 0 "sse_reg_operand") (float_truncate:SF (match_operand:DF 1 "memory_operand")))] "TARGET_SPLIT_MEM_OPND_FOR_FP_CONVERTS - && optimize_insn_for_speed_p () - && SSE_REG_P (operands[0])" + && optimize_insn_for_speed_p ()" [(set (match_dup 2) (match_dup 1)) (set (match_dup 0) (float_truncate:SF (match_dup 2)))] - "operands[2] = gen_rtx_REG (DFmode, REGNO (operands[0]));") + "operands[2] = lowpart_subreg (DFmode, operands[0], SFmode);") (define_expand "truncdfsf2_with_temp" [(parallel [(set (match_operand:SF 0) @@ -4455,7 +4425,7 @@ [(set (match_operand:SF 0 "nonimmediate_operand" "=fm,v") (float_truncate:SF (match_operand:DF 1 "nonimmediate_operand" "f ,vm")))] - "TARGET_SSE2 && TARGET_SSE_MATH" + "SSE_FLOAT_MODE_P (DFmode) && TARGET_SSE_MATH" { switch (which_alternative) { @@ -4549,7 +4519,7 @@ "reload_completed" [(set (match_dup 2) (match_dup 1)) (set (match_dup 0) (match_dup 2))] - "operands[1] = gen_rtx_REG (SFmode, true_regnum (operands[1]));") + "operands[1] = gen_rtx_REG (SFmode, REGNO (operands[1]));") ;; Conversion from XFmode to {SF,DF}mode @@ -5155,11 +5125,11 @@ ;; slots when !TARGET_INTER_UNIT_MOVES_TO_VEC disables the general_regs ;; alternative in sse2_loadld. (define_split - [(set (match_operand:MODEF 0 "register_operand") + [(set (match_operand:MODEF 0 "sse_reg_operand") (float:MODEF (match_operand:SI 1 "nonimmediate_operand")))] - "TARGET_SSE2 && TARGET_SSE_MATH - && TARGET_USE_VECTOR_CONVERTS && optimize_function_for_speed_p (cfun) - && reload_completed && SSE_REG_P (operands[0]) + "TARGET_USE_VECTOR_CONVERTS + && optimize_function_for_speed_p (cfun) + && reload_completed && (MEM_P (operands[1]) || TARGET_INTER_UNIT_MOVES_TO_VEC) && (!EXT_REX_SSE_REG_P (operands[0]) || TARGET_AVX512VL)" @@ -5178,83 +5148,83 @@ DONE; }) -;; Avoid partial SSE register dependency stalls +;; Avoid partial SSE register dependency stalls. This splitter should split +;; late in the pass sequence (after register rename pass), so allocated +;; registers won't change anymore + (define_split - [(set (match_operand:MODEF 0 "register_operand") + [(set (match_operand:MODEF 0 "sse_reg_operand") (float:MODEF (match_operand:SWI48 1 "nonimmediate_operand")))] - "TARGET_SSE2 && TARGET_SSE_MATH - && TARGET_SSE_PARTIAL_REG_DEPENDENCY + "TARGET_SSE_PARTIAL_REG_DEPENDENCY && optimize_function_for_speed_p (cfun) - && reload_completed && SSE_REG_P (operands[0]) + && epilogue_completed && (!EXT_REX_SSE_REG_P (operands[0]) || TARGET_AVX512VL)" - [(const_int 0)] + [(set (match_dup 0) + (vec_merge:<MODEF:ssevecmode> + (vec_duplicate:<MODEF:ssevecmode> + (float:MODEF + (match_dup 1))) + (match_dup 0) + (const_int 1)))] { const machine_mode vmode = <MODEF:ssevecmode>mode; - const machine_mode mode = <MODEF:MODE>mode; - rtx t, op0 = lowpart_subreg (vmode, operands[0], mode); - - emit_move_insn (op0, CONST0_RTX (vmode)); - t = gen_rtx_FLOAT (mode, operands[1]); - t = gen_rtx_VEC_DUPLICATE (vmode, t); - t = gen_rtx_VEC_MERGE (vmode, t, op0, const1_rtx); - emit_insn (gen_rtx_SET (op0, t)); - DONE; + operands[0] = lowpart_subreg (vmode, operands[0], <MODEF:MODE>mode); + emit_move_insn (operands[0], CONST0_RTX (vmode)); }) -;; Break partial reg stall for cvtsd2ss. +;; Break partial reg stall for cvtsd2ss. This splitter should split +;; late in the pass sequence (after register rename pass), +;; so allocated registers won't change anymore. -(define_peephole2 - [(set (match_operand:SF 0 "register_operand") +(define_split + [(set (match_operand:SF 0 "sse_reg_operand") (float_truncate:SF (match_operand:DF 1 "nonimmediate_operand")))] - "TARGET_SSE2 && TARGET_SSE_MATH - && TARGET_SSE_PARTIAL_REG_DEPENDENCY + "TARGET_SSE_PARTIAL_REG_DEPENDENCY && optimize_function_for_speed_p (cfun) - && SSE_REG_P (operands[0]) - && (!SSE_REG_P (operands[1]) + && epilogue_completed + && (!REG_P (operands[1]) || REGNO (operands[0]) != REGNO (operands[1])) && (!EXT_REX_SSE_REG_P (operands[0]) || TARGET_AVX512VL)" [(set (match_dup 0) (vec_merge:V4SF (vec_duplicate:V4SF - (float_truncate:V2SF + (float_truncate:SF (match_dup 1))) (match_dup 0) (const_int 1)))] { operands[0] = lowpart_subreg (V4SFmode, operands[0], SFmode); - operands[1] = lowpart_subreg (V2DFmode, operands[1], DFmode); emit_move_insn (operands[0], CONST0_RTX (V4SFmode)); }) -;; Break partial reg stall for cvtss2sd. +;; Break partial reg stall for cvtss2sd. This splitter should split +;; late in the pass sequence (after register rename pass), +;; so allocated registers won't change anymore. -(define_peephole2 - [(set (match_operand:DF 0 "register_operand") +(define_split + [(set (match_operand:DF 0 "sse_reg_operand") (float_extend:DF (match_operand:SF 1 "nonimmediate_operand")))] - "TARGET_SSE2 && TARGET_SSE_MATH - && TARGET_SSE_PARTIAL_REG_DEPENDENCY + "TARGET_SSE_PARTIAL_REG_DEPENDENCY && optimize_function_for_speed_p (cfun) - && SSE_REG_P (operands[0]) - && (!SSE_REG_P (operands[1]) + && epilogue_completed + && (!REG_P (operands[1]) || REGNO (operands[0]) != REGNO (operands[1])) && (!EXT_REX_SSE_REG_P (operands[0]) || TARGET_AVX512VL)" [(set (match_dup 0) (vec_merge:V2DF - (float_extend:V2DF - (vec_select:V2SF - (match_dup 1) - (parallel [(const_int 0) (const_int 1)]))) - (match_dup 0) + (vec_duplicate:V2DF + (float_extend:DF + (match_dup 1))) + (match_dup 0) (const_int 1)))] { operands[0] = lowpart_subreg (V2DFmode, operands[0], DFmode); - operands[1] = lowpart_subreg (V4SFmode, operands[1], SFmode); emit_move_insn (operands[0], CONST0_RTX (V2DFmode)); }) @@ -5299,7 +5269,7 @@ emit_insn (gen_vec_interleave_lowv4si (operands[3], operands[3], operands[4])); - operands[3] = gen_rtx_REG (DImode, REGNO (operands[3])); + operands[3] = gen_lowpart (DImode, operands[3]); }) (define_split @@ -5631,7 +5601,6 @@ (const_string "*"))) (set_attr "mode" "HI,HI,HI,SI")]) -;; %%% Potential partial reg stall on alternatives 3 and 4. What to do? (define_insn "*addqi_1" [(set (match_operand:QI 0 "nonimmediate_operand" "=qm,q,q,r,r,Yp") (plus:QI (match_operand:QI 1 "nonimmediate_operand" "%0,0,q,0,r,Yp") @@ -5639,7 +5608,7 @@ (clobber (reg:CC FLAGS_REG))] "ix86_binary_operator_ok (PLUS, QImode, operands)" { - bool widen = (which_alternative == 3 || which_alternative == 4); + bool widen = (get_attr_mode (insn) != MODE_QI); switch (get_attr_type (insn)) { @@ -5688,7 +5657,12 @@ (and (eq_attr "type" "alu") (match_operand 2 "const128_operand")) (const_string "1") (const_string "*"))) - (set_attr "mode" "QI,QI,QI,SI,SI,SI")]) + (set_attr "mode" "QI,QI,QI,SI,SI,SI") + ;; Potential partial reg stall on alternatives 3 and 4. + (set (attr "preferred_for_speed") + (cond [(eq_attr "alternative" "3,4") + (symbol_ref "!TARGET_PARTIAL_REG_STALL")] + (symbol_ref "true")))]) (define_insn "*addqi_1_slp" [(set (strict_low_part (match_operand:QI 0 "nonimmediate_operand" "+qm,q")) @@ -5737,32 +5711,6 @@ (parallel [(set (match_dup 0) (plus:SWI48 (match_dup 0) (match_dup 2))) (clobber (reg:CC FLAGS_REG))])]) -;; Convert add to the lea pattern to avoid flags dependency. -(define_split - [(set (match_operand:SWI 0 "register_operand") - (plus:SWI (match_operand:SWI 1 "register_operand") - (match_operand:SWI 2 "<nonmemory_operand>"))) - (clobber (reg:CC FLAGS_REG))] - "reload_completed && ix86_lea_for_add_ok (insn, operands)" - [(const_int 0)] -{ - machine_mode mode = <MODE>mode; - rtx pat; - - if (<MODE_SIZE> < GET_MODE_SIZE (SImode)) - { - mode = SImode; - operands[0] = gen_lowpart (mode, operands[0]); - operands[1] = gen_lowpart (mode, operands[1]); - operands[2] = gen_lowpart (mode, operands[2]); - } - - pat = gen_rtx_PLUS (mode, operands[1], operands[2]); - - emit_insn (gen_rtx_SET (operands[0], pat)); - DONE; -}) - ;; Split non destructive adds if we cannot use lea. (define_split [(set (match_operand:DI 0 "register_operand") @@ -5780,6 +5728,24 @@ ;; Convert add to the lea pattern to avoid flags dependency. (define_split + [(set (match_operand:SWI 0 "register_operand") + (plus:SWI (match_operand:SWI 1 "register_operand") + (match_operand:SWI 2 "<nonmemory_operand>"))) + (clobber (reg:CC FLAGS_REG))] + "reload_completed && ix86_lea_for_add_ok (insn, operands)" + [(set (match_dup 0) + (plus:<LEAMODE> (match_dup 1) (match_dup 2)))] +{ + if (<MODE>mode != <LEAMODE>mode) + { + operands[0] = gen_lowpart (<LEAMODE>mode, operands[0]); + operands[1] = gen_lowpart (<LEAMODE>mode, operands[1]); + operands[2] = gen_lowpart (<LEAMODE>mode, operands[2]); + } +}) + +;; Convert add to the lea pattern to avoid flags dependency. +(define_split [(set (match_operand:DI 0 "register_operand") (zero_extend:DI (plus:SI (match_operand:SI 1 "register_operand") @@ -6264,7 +6230,7 @@ [(set (match_operand:SWI12 0 "register_operand" "=r") (plus:SWI12 (mult:SWI12 (match_operand:SWI12 1 "index_register_operand" "l") - (match_operand:SWI12 2 "const248_operand" "n")) + (match_operand 2 "const248_operand" "n")) (match_operand:SWI12 3 "nonmemory_operand" "ri")))] "!TARGET_PARTIAL_REG_STALL || optimize_function_for_size_p (cfun)" "#" @@ -6286,7 +6252,7 @@ (plus:SWI12 (plus:SWI12 (mult:SWI12 (match_operand:SWI12 1 "index_register_operand" "l") - (match_operand:SWI12 2 "const248_operand" "n")) + (match_operand 2 "const248_operand" "n")) (match_operand:SWI12 3 "register_operand" "r")) (match_operand:SWI12 4 "immediate_operand" "i")))] "!TARGET_PARTIAL_REG_STALL || optimize_function_for_size_p (cfun)" @@ -6312,8 +6278,8 @@ (any_or:SWI12 (ashift:SWI12 (match_operand:SWI12 1 "index_register_operand" "l") - (match_operand:SWI12 2 "const_0_to_3_operand" "n")) - (match_operand:SWI12 3 "const_int_operand" "n")))] + (match_operand 2 "const_0_to_3_operand" "n")) + (match_operand 3 "const_int_operand" "n")))] "(!TARGET_PARTIAL_REG_STALL || optimize_function_for_size_p (cfun)) && ((unsigned HOST_WIDE_INT) INTVAL (operands[3]) < (HOST_WIDE_INT_1U << INTVAL (operands[2])))" @@ -6336,8 +6302,8 @@ (any_or:SWI48 (ashift:SWI48 (match_operand:SWI48 1 "index_register_operand" "l") - (match_operand:SWI48 2 "const_0_to_3_operand" "n")) - (match_operand:SWI48 3 "const_int_operand" "n")))] + (match_operand 2 "const_0_to_3_operand" "n")) + (match_operand 3 "const_int_operand" "n")))] "(unsigned HOST_WIDE_INT) INTVAL (operands[3]) < (HOST_WIDE_INT_1U << INTVAL (operands[2]))" "#" @@ -7201,7 +7167,7 @@ (match_operand:DWIH 2 "nonimmediate_operand")))) (clobber (reg:CC FLAGS_REG))] "TARGET_BMI2 && reload_completed - && true_regnum (operands[1]) == DX_REG" + && REGNO (operands[1]) == DX_REG" [(parallel [(set (match_dup 3) (mult:DWIH (match_dup 1) (match_dup 2))) (set (match_dup 4) @@ -8247,7 +8213,6 @@ (const_string "*"))) (set_attr "mode" "HI,HI,SI,HI")]) -;; %%% Potential partial reg stall on alternative 2. What to do? (define_insn "*andqi_1" [(set (match_operand:QI 0 "nonimmediate_operand" "=qm,q,r,!k") (and:QI (match_operand:QI 1 "nonimmediate_operand" "%0,0,0,k") @@ -8270,7 +8235,12 @@ } } [(set_attr "type" "alu,alu,alu,msklog") - (set_attr "mode" "QI,QI,SI,HI")]) + (set_attr "mode" "QI,QI,SI,HI") + ;; Potential partial reg stall on alternative 2. + (set (attr "preferred_for_speed") + (cond [(eq_attr "alternative" "2") + (symbol_ref "!TARGET_PARTIAL_REG_STALL")] + (symbol_ref "true")))]) (define_insn "*andqi_1_slp" [(set (strict_low_part (match_operand:QI 0 "nonimmediate_operand" "+qm,q")) @@ -8346,7 +8316,8 @@ (match_operand:SWI248 2 "const_int_operand"))) (clobber (reg:CC FLAGS_REG))] "reload_completed - && true_regnum (operands[0]) != true_regnum (operands[1])" + && (!REG_P (operands[1]) + || REGNO (operands[0]) != REGNO (operands[1]))" [(const_int 0)] { HOST_WIDE_INT ival = INTVAL (operands[2]); @@ -8754,7 +8725,6 @@ [(set_attr "type" "alu,alu,msklog") (set_attr "mode" "HI")]) -;; %%% Potential partial reg stall on alternative 2. What to do? (define_insn "*<code>qi_1" [(set (match_operand:QI 0 "nonimmediate_operand" "=q,m,r,!k") (any_or:QI (match_operand:QI 1 "nonimmediate_operand" "%0,0,0,k") @@ -8767,7 +8737,12 @@ <logic>{l}\t{%k2, %k0|%k0, %k2} k<logic>w\t{%2, %1, %0|%0, %1, %2}" [(set_attr "type" "alu,alu,alu,msklog") - (set_attr "mode" "QI,QI,SI,HI")]) + (set_attr "mode" "QI,QI,SI,HI") + ;; Potential partial reg stall on alternative 2. + (set (attr "preferred_for_speed") + (cond [(eq_attr "alternative" "2") + (symbol_ref "!TARGET_PARTIAL_REG_STALL")] + (symbol_ref "true")))]) ;; See comment for addsi_1_zext why we do use nonimmediate_operand (define_insn "*<code>si_1_zext" @@ -9258,8 +9233,7 @@ [(match_operand:MODEF 1 "register_operand" "0,x,0,0")])) (use (match_operand:<ssevecmode> 2 "nonimmediate_operand" "xm,0,X,X")) (clobber (reg:CC FLAGS_REG))] - "(SSE_FLOAT_MODE_P (<MODE>mode) && TARGET_SSE_MATH) - || TARGET_80387" + "TARGET_80387 || (SSE_FLOAT_MODE_P (<MODE>mode) && TARGET_SSE_MATH)" "#" [(set (attr "enabled") (if_then_else @@ -9308,12 +9282,12 @@ [(set (match_dup 0) (match_op_dup 1 [(match_dup 0)]))]) (define_split - [(set (match_operand 0 "register_operand") + [(set (match_operand 0 "sse_reg_operand") (match_operator 3 "absneg_operator" [(match_operand 1 "register_operand")])) (use (match_operand 2 "nonimmediate_operand")) (clobber (reg:CC FLAGS_REG))] - "reload_completed && SSE_REG_P (operands[0])" + "reload_completed" [(set (match_dup 0) (match_dup 3))] { machine_mode mode = GET_MODE (operands[0]); @@ -9332,7 +9306,7 @@ }) (define_split - [(set (match_operand:SF 0 "register_operand") + [(set (match_operand:SF 0 "general_reg_operand") (match_operator:SF 1 "absneg_operator" [(match_dup 0)])) (use (match_operand:V4SF 2)) (clobber (reg:CC FLAGS_REG))] @@ -9356,7 +9330,7 @@ }) (define_split - [(set (match_operand:DF 0 "register_operand") + [(set (match_operand:DF 0 "general_reg_operand") (match_operator:DF 1 "absneg_operator" [(match_dup 0)])) (use (match_operand 2)) (clobber (reg:CC FLAGS_REG))] @@ -9394,7 +9368,7 @@ }) (define_split - [(set (match_operand:XF 0 "register_operand") + [(set (match_operand:XF 0 "general_reg_operand") (match_operator:XF 1 "absneg_operator" [(match_dup 0)])) (use (match_operand 2)) (clobber (reg:CC FLAGS_REG))] @@ -9404,8 +9378,7 @@ { rtx tmp; operands[0] = gen_rtx_REG (SImode, - true_regnum (operands[0]) - + (TARGET_64BIT ? 1 : 2)); + REGNO (operands[0]) + (TARGET_64BIT ? 1 : 2)); if (GET_CODE (operands[1]) == ABS) { tmp = GEN_INT (0x7fff); @@ -9546,7 +9519,6 @@ (set_attr "prefix" "*,vex") (set_attr "mode" "HI")]) -;; %%% Potential partial reg stall on alternative 1. What to do? (define_insn "*one_cmplqi2_1" [(set (match_operand:QI 0 "nonimmediate_operand" "=qm,r,!k") (not:QI (match_operand:QI 1 "nonimmediate_operand" "0,0,k")))] @@ -9569,7 +9541,12 @@ [(set_attr "isa" "*,*,avx512f") (set_attr "type" "negnot,negnot,msklog") (set_attr "prefix" "*,*,vex") - (set_attr "mode" "QI,SI,QI")]) + (set_attr "mode" "QI,SI,QI") + ;; Potential partial reg stall on alternative 1. + (set (attr "preferred_for_speed") + (cond [(eq_attr "alternative" "1") + (symbol_ref "!TARGET_PARTIAL_REG_STALL")] + (symbol_ref "true")))]) ;; ??? Currently never generated - xor is used instead. (define_insn "*one_cmplsi2_1_zext" @@ -9988,7 +9965,6 @@ (const_string "*"))) (set_attr "mode" "HI,SI")]) -;; %%% Potential partial reg stall on alternative 1. What to do? (define_insn "*ashlqi3_1" [(set (match_operand:QI 0 "nonimmediate_operand" "=qm,r,Yp") (ashift:QI (match_operand:QI 1 "nonimmediate_operand" "0,0,l") @@ -10044,7 +10020,12 @@ (match_test "optimize_function_for_size_p (cfun)"))))) (const_string "0") (const_string "*"))) - (set_attr "mode" "QI,SI,SI")]) + (set_attr "mode" "QI,SI,SI") + ;; Potential partial reg stall on alternative 1. + (set (attr "preferred_for_speed") + (cond [(eq_attr "alternative" "1") + (symbol_ref "!TARGET_PARTIAL_REG_STALL")] + (symbol_ref "true")))]) (define_insn "*ashlqi3_1_slp" [(set (strict_low_part (match_operand:QI 0 "nonimmediate_operand" "+qm")) @@ -10091,31 +10072,21 @@ ;; Convert ashift to the lea pattern to avoid flags dependency. (define_split - [(set (match_operand 0 "register_operand") - (ashift (match_operand 1 "index_register_operand") - (match_operand:QI 2 "const_int_operand"))) + [(set (match_operand:SWI 0 "register_operand") + (ashift:SWI (match_operand:SWI 1 "index_register_operand") + (match_operand 2 "const_0_to_3_operand"))) (clobber (reg:CC FLAGS_REG))] - "GET_MODE (operands[0]) == GET_MODE (operands[1]) - && reload_completed - && true_regnum (operands[0]) != true_regnum (operands[1])" - [(const_int 0)] + "reload_completed + && REGNO (operands[0]) != REGNO (operands[1])" + [(set (match_dup 0) + (mult:<LEAMODE> (match_dup 1) (match_dup 2)))] { - machine_mode mode = GET_MODE (operands[0]); - rtx pat; - - if (GET_MODE_SIZE (mode) < GET_MODE_SIZE (SImode)) - { - mode = SImode; - operands[0] = gen_lowpart (mode, operands[0]); - operands[1] = gen_lowpart (mode, operands[1]); + if (<MODE>mode != <LEAMODE>mode) + { + operands[0] = gen_lowpart (<LEAMODE>mode, operands[0]); + operands[1] = gen_lowpart (<LEAMODE>mode, operands[1]); } - - operands[2] = gen_int_mode (1 << INTVAL (operands[2]), mode); - - pat = gen_rtx_MULT (mode, operands[1], operands[2]); - - emit_insn (gen_rtx_SET (operands[0], pat)); - DONE; + operands[2] = GEN_INT (1 << INTVAL (operands[2])); }) ;; Convert ashift to the lea pattern to avoid flags dependency. @@ -10123,15 +10094,15 @@ [(set (match_operand:DI 0 "register_operand") (zero_extend:DI (ashift:SI (match_operand:SI 1 "index_register_operand") - (match_operand:QI 2 "const_int_operand")))) + (match_operand 2 "const_0_to_3_operand")))) (clobber (reg:CC FLAGS_REG))] "TARGET_64BIT && reload_completed - && true_regnum (operands[0]) != true_regnum (operands[1])" + && REGNO (operands[0]) != REGNO (operands[1])" [(set (match_dup 0) (zero_extend:DI (mult:SI (match_dup 1) (match_dup 2))))] { operands[1] = gen_lowpart (SImode, operands[1]); - operands[2] = gen_int_mode (1 << INTVAL (operands[2]), SImode); + operands[2] = GEN_INT (1 << INTVAL (operands[2])); }) ;; This pattern can't accept a variable shift count, since shifts by @@ -11078,20 +11049,19 @@ (const_int 1)) (clobber (reg:CC FLAGS_REG))])] "TARGET_64BIT && !TARGET_USE_BT" - [(const_int 0)] + [(parallel [(set (match_dup 0) + (ior:DI (match_dup 0) (match_dup 3))) + (clobber (reg:CC FLAGS_REG))])] { int i = INTVAL (operands[1]); - rtx op1 = gen_int_mode (HOST_WIDE_INT_1U << i, DImode); + operands[3] = gen_int_mode (HOST_WIDE_INT_1U << i, DImode); - if (i >= 31) + if (!x86_64_immediate_operand (operands[3], DImode)) { - emit_move_insn (operands[2], op1); - op1 = operands[2]; + emit_move_insn (operands[2], operands[3]); + operands[3] = operands[2]; } - - emit_insn (gen_iordi3 (operands[0], operands[0], op1)); - DONE; }) (define_peephole2 @@ -11103,20 +11073,19 @@ (const_int 0)) (clobber (reg:CC FLAGS_REG))])] "TARGET_64BIT && !TARGET_USE_BT" - [(const_int 0)] + [(parallel [(set (match_dup 0) + (and:DI (match_dup 0) (match_dup 3))) + (clobber (reg:CC FLAGS_REG))])] { int i = INTVAL (operands[1]); - rtx op1 = gen_int_mode (HOST_WIDE_INT_1U << i, DImode); + operands[3] = gen_int_mode (~(HOST_WIDE_INT_1U << i), DImode); - if (i >= 32) + if (!x86_64_immediate_operand (operands[3], DImode)) { - emit_move_insn (operands[2], op1); - op1 = operands[2]; + emit_move_insn (operands[2], operands[3]); + operands[3] = operands[2]; } - - emit_insn (gen_anddi3 (operands[0], operands[0], op1)); - DONE; }) (define_peephole2 @@ -11129,20 +11098,19 @@ (match_dup 0) (const_int 1) (match_dup 1)))) (clobber (reg:CC FLAGS_REG))])] "TARGET_64BIT && !TARGET_USE_BT" - [(const_int 0)] + [(parallel [(set (match_dup 0) + (xor:DI (match_dup 0) (match_dup 3))) + (clobber (reg:CC FLAGS_REG))])] { int i = INTVAL (operands[1]); - rtx op1 = gen_int_mode (HOST_WIDE_INT_1U << i, DImode); + operands[3] = gen_int_mode (HOST_WIDE_INT_1U << i, DImode); - if (i >= 31) + if (!x86_64_immediate_operand (operands[3], DImode)) { - emit_move_insn (operands[2], op1); - op1 = operands[2]; + emit_move_insn (operands[2], operands[3]); + operands[3] = operands[2]; } - - emit_insn (gen_xordi3 (operands[0], operands[0], op1)); - DONE; }) (define_insn "*bt<mode>" @@ -14063,9 +14031,9 @@ [(set (match_operand:MODEF 0 "register_operand" "=f,f,x,v") (match_operator:MODEF 3 "binary_fp_operator" [(match_operand:MODEF 1 - "nonimm_ssenomem_operand" "0,fm,0,v") + "x87nonimm_ssenomem_operand" "0,fm,0,v") (match_operand:MODEF 2 - "nonimmediate_operand" "fm,0,xm,vm")]))] + "nonimmediate_operand" "fm,0,xm,vm")]))] "((SSE_FLOAT_MODE_P (<MODE>mode) && TARGET_SSE_MATH) || (TARGET_80387 && X87_ENABLE_ARITH (<MODE>mode))) && !COMMUTATIVE_ARITH_P (operands[3]) @@ -17239,7 +17207,7 @@ (set_attr "mode" "DF,DF,DI,DI,DI,DI")]) (define_split - [(set (match_operand:DF 0 "register_and_not_any_fp_reg_operand") + [(set (match_operand:DF 0 "general_reg_operand") (if_then_else:DF (match_operator 1 "fcmov_comparison_operator" [(reg FLAGS_REG) (const_int 0)]) (match_operand:DF 2 "nonimmediate_operand") @@ -17295,7 +17263,7 @@ ;; Don't do conditional moves with memory inputs (define_peephole2 [(match_scratch:MODEF 4 "r") - (set (match_operand:MODEF 0 "register_and_not_any_fp_reg_operand") + (set (match_operand:MODEF 0 "general_reg_operand") (if_then_else:MODEF (match_operator 1 "fcmov_comparison_operator" [(reg FLAGS_REG) (const_int 0)]) (match_operand:MODEF 2 "nonimmediate_operand") @@ -17845,7 +17813,7 @@ (match_operand:SI 3 "immediate_operand")) (const_int 0)]))] "ix86_match_ccmode (insn, CCNOmode) - && (true_regnum (operands[2]) != AX_REG + && (REGNO (operands[2]) != AX_REG || satisfies_constraint_K (operands[3])) && peep2_reg_dead_p (1, operands[2])" [(parallel @@ -17866,7 +17834,7 @@ (const_int 0)]))] "! TARGET_PARTIAL_REG_STALL && ix86_match_ccmode (insn, CCNOmode) - && true_regnum (operands[2]) != AX_REG + && REGNO (operands[2]) != AX_REG && peep2_reg_dead_p (1, operands[2])" [(parallel [(set (match_dup 0) @@ -17887,7 +17855,7 @@ (const_int 0)]))] "! TARGET_PARTIAL_REG_STALL && ix86_match_ccmode (insn, CCNOmode) - && true_regnum (operands[2]) != AX_REG + && REGNO (operands[2]) != AX_REG && peep2_reg_dead_p (1, operands[2])" [(parallel [(set (match_dup 0) (match_op_dup 1 |