aboutsummaryrefslogtreecommitdiff
path: root/gcc/config/i386/i386.md
diff options
context:
space:
mode:
Diffstat (limited to 'gcc/config/i386/i386.md')
-rw-r--r--gcc/config/i386/i386.md494
1 files changed, 231 insertions, 263 deletions
diff --git a/gcc/config/i386/i386.md b/gcc/config/i386/i386.md
index be7cfbfd64e..9b5407aa697 100644
--- a/gcc/config/i386/i386.md
+++ b/gcc/config/i386/i386.md
@@ -1024,6 +1024,9 @@
(define_mode_attr DWI [(QI "HI") (HI "SI") (SI "DI") (DI "TI")])
(define_mode_attr dwi [(QI "hi") (HI "si") (SI "di") (DI "ti")])
+;; LEA mode corresponding to an integer mode
+(define_mode_attr LEAMODE [(QI "SI") (HI "SI") (SI "SI") (DI "DI")])
+
;; Half mode for double word integer modes.
(define_mode_iterator DWIH [(SI "!TARGET_64BIT")
(DI "TARGET_64BIT")])
@@ -2696,34 +2699,31 @@
(set_attr "amdfam10_decode" "double")
(set_attr "bdver1_decode" "double")])
-(define_insn "*swap<mode>_1"
- [(set (match_operand:SWI12 0 "register_operand" "+r")
- (match_operand:SWI12 1 "register_operand" "+r"))
+(define_insn "*swap<mode>"
+ [(set (match_operand:SWI12 0 "register_operand" "+<r>,r")
+ (match_operand:SWI12 1 "register_operand" "+<r>,r"))
(set (match_dup 1)
(match_dup 0))]
- "!TARGET_PARTIAL_REG_STALL || optimize_function_for_size_p (cfun)"
- "xchg{l}\t%k1, %k0"
+ ""
+ "@
+ xchg{<imodesuffix>}\t%1, %0
+ xchg{l}\t%k1, %k0"
[(set_attr "type" "imov")
- (set_attr "mode" "SI")
+ (set_attr "mode" "<MODE>,SI")
+ (set (attr "preferred_for_size")
+ (cond [(eq_attr "alternative" "0")
+ (symbol_ref "false")]
+ (symbol_ref "true")))
+ ;; Potential partial reg stall on alternative 1.
+ (set (attr "preferred_for_speed")
+ (cond [(eq_attr "alternative" "1")
+ (symbol_ref "!TARGET_PARTIAL_REG_STALL")]
+ (symbol_ref "true")))
(set_attr "pent_pair" "np")
(set_attr "athlon_decode" "vector")
(set_attr "amdfam10_decode" "double")
(set_attr "bdver1_decode" "double")])
-;; Not added amdfam10_decode since TARGET_PARTIAL_REG_STALL
-;; is disabled for AMDFAM10
-(define_insn "*swap<mode>_2"
- [(set (match_operand:SWI12 0 "register_operand" "+<r>")
- (match_operand:SWI12 1 "register_operand" "+<r>"))
- (set (match_dup 1)
- (match_dup 0))]
- "TARGET_PARTIAL_REG_STALL"
- "xchg{<imodesuffix>}\t%1, %0"
- [(set_attr "type" "imov")
- (set_attr "mode" "<MODE>")
- (set_attr "pent_pair" "np")
- (set_attr "athlon_decode" "vector")])
-
(define_expand "movstrict<mode>"
[(set (strict_low_part (match_operand:SWI12 0 "nonimmediate_operand"))
(match_operand:SWI12 1 "general_operand"))]
@@ -3072,14 +3072,10 @@
(define_split
[(set (match_operand:SF 0 "push_operand")
(match_operand:SF 1 "memory_operand"))]
- "reload_completed"
+ "reload_completed
+ && find_constant_src (insn)"
[(set (match_dup 0) (match_dup 2))]
-{
- operands[2] = find_constant_src (curr_insn);
-
- if (operands[2] == NULL_RTX)
- FAIL;
-})
+ "operands[2] = find_constant_src (curr_insn);")
(define_split
[(set (match_operand 0 "push_operand")
@@ -3601,19 +3597,10 @@
&& (GET_MODE (operands[0]) == TFmode
|| GET_MODE (operands[0]) == XFmode
|| GET_MODE (operands[0]) == DFmode
- || GET_MODE (operands[0]) == SFmode)"
+ || GET_MODE (operands[0]) == SFmode)
+ && ix86_standard_x87sse_constant_load_p (insn, operands[0])"
[(set (match_dup 0) (match_dup 2))]
-{
- operands[2] = find_constant_src (curr_insn);
-
- if (operands[2] == NULL_RTX
- || (SSE_REGNO_P (REGNO (operands[0]))
- && standard_sse_constant_p (operands[2],
- GET_MODE (operands[0])) != 1)
- || (STACK_REGNO_P (REGNO (operands[0]))
- && standard_80387_constant_p (operands[2]) < 1))
- FAIL;
-})
+ "operands[2] = find_constant_src (curr_insn);")
(define_split
[(set (match_operand 0 "any_fp_register_operand")
@@ -3621,19 +3608,10 @@
"reload_completed
&& (GET_MODE (operands[0]) == TFmode
|| GET_MODE (operands[0]) == XFmode
- || GET_MODE (operands[0]) == DFmode)"
+ || GET_MODE (operands[0]) == DFmode)
+ && ix86_standard_x87sse_constant_load_p (insn, operands[0])"
[(set (match_dup 0) (match_dup 2))]
-{
- operands[2] = find_constant_src (curr_insn);
-
- if (operands[2] == NULL_RTX
- || (SSE_REGNO_P (REGNO (operands[0]))
- && standard_sse_constant_p (operands[2],
- GET_MODE (operands[0])) != 1)
- || (STACK_REGNO_P (REGNO (operands[0]))
- && standard_80387_constant_p (operands[2]) < 1))
- FAIL;
-})
+ "operands[2] = find_constant_src (curr_insn);")
;; Split the load of -0.0 or -1.0 into fldz;fchs or fld1;fchs sequence
(define_split
@@ -3777,20 +3755,18 @@
"split_double_mode (DImode, &operands[0], 1, &operands[3], &operands[4]);")
(define_split
- [(set (match_operand:DI 0 "register_operand")
- (zero_extend:DI (match_operand:SI 1 "register_operand")))]
+ [(set (match_operand:DI 0 "general_reg_operand")
+ (zero_extend:DI (match_operand:SI 1 "general_reg_operand")))]
"!TARGET_64BIT && reload_completed
- && !(MMX_REG_P (operands[0]) || SSE_REG_P (operands[0]))
- && true_regnum (operands[0]) == true_regnum (operands[1])"
+ && REGNO (operands[0]) == REGNO (operands[1])"
[(set (match_dup 4) (const_int 0))]
"split_double_mode (DImode, &operands[0], 1, &operands[3], &operands[4]);")
(define_split
- [(set (match_operand:DI 0 "nonimmediate_operand")
- (zero_extend:DI (match_operand:SI 1 "nonimmediate_operand")))]
+ [(set (match_operand:DI 0 "nonimmediate_gr_operand")
+ (zero_extend:DI (match_operand:SI 1 "nonimmediate_gr_operand")))]
"!TARGET_64BIT && reload_completed
- && !(MEM_P (operands[0]) && MEM_P (operands[1]))
- && !(MMX_REG_P (operands[0]) || SSE_REG_P (operands[0]))"
+ && !(MEM_P (operands[0]) && MEM_P (operands[1]))"
[(set (match_dup 3) (match_dup 1))
(set (match_dup 4) (const_int 0))]
"split_double_mode (DImode, &operands[0], 1, &operands[3], &operands[4]);")
@@ -3828,7 +3804,8 @@
[(parallel [(set (match_dup 0) (and:SI (match_dup 0) (match_dup 2)))
(clobber (reg:CC FLAGS_REG))])]
{
- if (true_regnum (operands[0]) != true_regnum (operands[1]))
+ if (!REG_P (operands[1])
+ || REGNO (operands[0]) != REGNO (operands[1]))
{
ix86_expand_clear (operands[0]);
@@ -3875,7 +3852,8 @@
[(parallel [(set (match_dup 0) (and:SI (match_dup 0) (const_int 255)))
(clobber (reg:CC FLAGS_REG))])]
{
- if (true_regnum (operands[0]) != true_regnum (operands[1]))
+ if (!REG_P (operands[1])
+ || REGNO (operands[0]) != REGNO (operands[1]))
{
ix86_expand_clear (operands[0]);
@@ -3988,8 +3966,8 @@
/* Generate a cltd if possible and doing so it profitable. */
if ((optimize_function_for_size_p (cfun) || TARGET_USE_CLTD)
- && true_regnum (operands[1]) == AX_REG
- && true_regnum (operands[2]) == DX_REG)
+ && REGNO (operands[1]) == AX_REG
+ && REGNO (operands[2]) == DX_REG)
{
emit_insn (gen_ashrsi3_cvt (operands[2], operands[1], GEN_INT (31)));
}
@@ -4030,8 +4008,8 @@
(set (match_operand:SI 3 "memory_operand") (match_dup 2))]
"/* cltd is shorter than sarl $31, %eax */
!optimize_function_for_size_p (cfun)
- && true_regnum (operands[1]) == AX_REG
- && true_regnum (operands[2]) == DX_REG
+ && REGNO (operands[1]) == AX_REG
+ && REGNO (operands[2]) == DX_REG
&& peep2_reg_dead_p (2, operands[1])
&& peep2_reg_dead_p (3, operands[2])
&& !reg_mentioned_p (operands[2], operands[3])"
@@ -4052,19 +4030,19 @@
{
split_double_mode (DImode, &operands[0], 1, &operands[3], &operands[4]);
- if (true_regnum (operands[3]) != true_regnum (operands[1]))
+ if (REGNO (operands[3]) != REGNO (operands[1]))
emit_move_insn (operands[3], operands[1]);
/* Generate a cltd if possible and doing so it profitable. */
if ((optimize_function_for_size_p (cfun) || TARGET_USE_CLTD)
- && true_regnum (operands[3]) == AX_REG
- && true_regnum (operands[4]) == DX_REG)
+ && REGNO (operands[3]) == AX_REG
+ && REGNO (operands[4]) == DX_REG)
{
emit_insn (gen_ashrsi3_cvt (operands[4], operands[3], GEN_INT (31)));
DONE;
}
- if (true_regnum (operands[4]) != true_regnum (operands[1]))
+ if (REGNO (operands[4]) != REGNO (operands[1]))
emit_move_insn (operands[4], operands[1]);
emit_insn (gen_ashrsi3_cvt (operands[4], operands[4], GEN_INT (31)));
@@ -4203,15 +4181,15 @@
"operands[2] = GEN_INT (-GET_MODE_SIZE (XFmode));")
(define_expand "extendsfdf2"
- [(set (match_operand:DF 0 "nonimmediate_operand")
+ [(set (match_operand:DF 0 "nonimm_ssenomem_operand")
(float_extend:DF (match_operand:SF 1 "general_operand")))]
- "TARGET_80387 || (TARGET_SSE2 && TARGET_SSE_MATH)"
+ "TARGET_80387 || (SSE_FLOAT_MODE_P (DFmode) && TARGET_SSE_MATH)"
{
/* ??? Needed for compress_float_constant since all fp constants
are TARGET_LEGITIMATE_CONSTANT_P. */
if (CONST_DOUBLE_P (operands[1]))
{
- if ((!TARGET_SSE2 || TARGET_MIX_SSE_I387)
+ if ((!SSE_FLOAT_MODE_P (DFmode) || TARGET_MIX_SSE_I387)
&& standard_80387_constant_p (operands[1]) > 0)
{
operands[1] = simplify_const_unary_operation
@@ -4231,12 +4209,12 @@
that might lead to ICE on 32bit target. The sequence unlikely combine
anyway. */
(define_split
- [(set (match_operand:DF 0 "register_operand")
+ [(set (match_operand:DF 0 "sse_reg_operand")
(float_extend:DF
(match_operand:SF 1 "nonimmediate_operand")))]
"TARGET_USE_VECTOR_FP_CONVERTS
&& optimize_insn_for_speed_p ()
- && reload_completed && SSE_REG_P (operands[0])
+ && reload_completed
&& (!EXT_REX_SSE_REG_P (operands[0])
|| TARGET_AVX512VL)"
[(set (match_dup 2)
@@ -4253,13 +4231,11 @@
{
/* If it is unsafe to overwrite upper half of source, we need
to move to destination and unpack there. */
- if (((ORIGINAL_REGNO (operands[1]) < FIRST_PSEUDO_REGISTER
- || PSEUDO_REGNO_BYTES (ORIGINAL_REGNO (operands[1])) > 4)
- && true_regnum (operands[0]) != true_regnum (operands[1]))
+ if (REGNO (operands[0]) != REGNO (operands[1])
|| (EXT_REX_SSE_REG_P (operands[1])
&& !TARGET_AVX512VL))
{
- rtx tmp = gen_rtx_REG (SFmode, true_regnum (operands[0]));
+ rtx tmp = lowpart_subreg (SFmode, operands[0], DFmode);
emit_move_insn (tmp, operands[1]);
}
else
@@ -4267,7 +4243,7 @@
/* FIXME: vec_interleave_lowv4sf for AVX512VL should allow
=v, v, then vbroadcastss will be only needed for AVX512F without
AVX512VL. */
- if (!EXT_REX_SSE_REGNO_P (true_regnum (operands[3])))
+ if (!EXT_REX_SSE_REGNO_P (REGNO (operands[3])))
emit_insn (gen_vec_interleave_lowv4sf (operands[3], operands[3],
operands[3]));
else
@@ -4283,21 +4259,20 @@
;; It's more profitable to split and then extend in the same register.
(define_peephole2
- [(set (match_operand:DF 0 "register_operand")
+ [(set (match_operand:DF 0 "sse_reg_operand")
(float_extend:DF
(match_operand:SF 1 "memory_operand")))]
"TARGET_SPLIT_MEM_OPND_FOR_FP_CONVERTS
- && optimize_insn_for_speed_p ()
- && SSE_REG_P (operands[0])"
+ && optimize_insn_for_speed_p ()"
[(set (match_dup 2) (match_dup 1))
(set (match_dup 0) (float_extend:DF (match_dup 2)))]
- "operands[2] = gen_rtx_REG (SFmode, REGNO (operands[0]));")
+ "operands[2] = lowpart_subreg (SFmode, operands[0], DFmode);")
-(define_insn "*extendsfdf2_mixed"
- [(set (match_operand:DF 0 "nonimmediate_operand" "=f,m,v")
+(define_insn "*extendsfdf2"
+ [(set (match_operand:DF 0 "nonimm_ssenomem_operand" "=f,m,v")
(float_extend:DF
(match_operand:SF 1 "nonimmediate_operand" "fm,f,vm")))]
- "TARGET_SSE2 && TARGET_SSE_MATH"
+ "TARGET_80387 || (SSE_FLOAT_MODE_P (DFmode) && TARGET_SSE_MATH)"
{
switch (which_alternative)
{
@@ -4316,18 +4291,16 @@
(set_attr "prefix" "orig,orig,maybe_vex")
(set_attr "mode" "SF,XF,DF")
(set (attr "enabled")
- (cond [(eq_attr "alternative" "0,1")
- (symbol_ref "TARGET_MIX_SSE_I387")
- ]
- (symbol_ref "true")))])
-
-(define_insn "*extendsfdf2_i387"
- [(set (match_operand:DF 0 "nonimmediate_operand" "=f,m")
- (float_extend:DF (match_operand:SF 1 "nonimmediate_operand" "fm,f")))]
- "TARGET_80387"
- "* return output_387_reg_move (insn, operands);"
- [(set_attr "type" "fmov")
- (set_attr "mode" "SF,XF")])
+ (if_then_else
+ (match_test ("SSE_FLOAT_MODE_P (DFmode) && TARGET_SSE_MATH"))
+ (if_then_else
+ (eq_attr "alternative" "0,1")
+ (symbol_ref "TARGET_MIX_SSE_I387")
+ (symbol_ref "true"))
+ (if_then_else
+ (eq_attr "alternative" "0,1")
+ (symbol_ref "true")
+ (symbol_ref "false"))))])
(define_expand "extend<mode>xf2"
[(set (match_operand:XF 0 "nonimmediate_operand")
@@ -4370,9 +4343,9 @@
[(set (match_operand:SF 0 "nonimmediate_operand")
(float_truncate:SF
(match_operand:DF 1 "nonimmediate_operand")))]
- "TARGET_80387 || (TARGET_SSE2 && TARGET_SSE_MATH)"
+ "TARGET_80387 || (SSE_FLOAT_MODE_P (DFmode) && TARGET_SSE_MATH)"
{
- if (TARGET_SSE2 && TARGET_SSE_MATH && !TARGET_MIX_SSE_I387)
+ if (SSE_FLOAT_MODE_P (DFmode) && TARGET_SSE_MATH && !TARGET_MIX_SSE_I387)
;
else if (flag_unsafe_math_optimizations)
;
@@ -4392,12 +4365,12 @@
that might lead to ICE on 32bit target. The sequence unlikely combine
anyway. */
(define_split
- [(set (match_operand:SF 0 "register_operand")
+ [(set (match_operand:SF 0 "sse_reg_operand")
(float_truncate:SF
(match_operand:DF 1 "nonimmediate_operand")))]
"TARGET_USE_VECTOR_FP_CONVERTS
&& optimize_insn_for_speed_p ()
- && reload_completed && SSE_REG_P (operands[0])
+ && reload_completed
&& (!EXT_REX_SSE_REG_P (operands[0])
|| TARGET_AVX512VL)"
[(set (match_dup 2)
@@ -4415,9 +4388,7 @@
if (REG_P (operands[1]))
{
if (!TARGET_SSE3
- && true_regnum (operands[0]) != true_regnum (operands[1])
- && (ORIGINAL_REGNO (operands[1]) < FIRST_PSEUDO_REGISTER
- || PSEUDO_REGNO_BYTES (ORIGINAL_REGNO (operands[1])) > 8))
+ && REGNO (operands[0]) != REGNO (operands[1]))
{
rtx tmp = lowpart_subreg (DFmode, operands[0], SFmode);
emit_move_insn (tmp, operands[1]);
@@ -4434,15 +4405,14 @@
;; It's more profitable to split and then extend in the same register.
(define_peephole2
- [(set (match_operand:SF 0 "register_operand")
+ [(set (match_operand:SF 0 "sse_reg_operand")
(float_truncate:SF
(match_operand:DF 1 "memory_operand")))]
"TARGET_SPLIT_MEM_OPND_FOR_FP_CONVERTS
- && optimize_insn_for_speed_p ()
- && SSE_REG_P (operands[0])"
+ && optimize_insn_for_speed_p ()"
[(set (match_dup 2) (match_dup 1))
(set (match_dup 0) (float_truncate:SF (match_dup 2)))]
- "operands[2] = gen_rtx_REG (DFmode, REGNO (operands[0]));")
+ "operands[2] = lowpart_subreg (DFmode, operands[0], SFmode);")
(define_expand "truncdfsf2_with_temp"
[(parallel [(set (match_operand:SF 0)
@@ -4455,7 +4425,7 @@
[(set (match_operand:SF 0 "nonimmediate_operand" "=fm,v")
(float_truncate:SF
(match_operand:DF 1 "nonimmediate_operand" "f ,vm")))]
- "TARGET_SSE2 && TARGET_SSE_MATH"
+ "SSE_FLOAT_MODE_P (DFmode) && TARGET_SSE_MATH"
{
switch (which_alternative)
{
@@ -4549,7 +4519,7 @@
"reload_completed"
[(set (match_dup 2) (match_dup 1))
(set (match_dup 0) (match_dup 2))]
- "operands[1] = gen_rtx_REG (SFmode, true_regnum (operands[1]));")
+ "operands[1] = gen_rtx_REG (SFmode, REGNO (operands[1]));")
;; Conversion from XFmode to {SF,DF}mode
@@ -5155,11 +5125,11 @@
;; slots when !TARGET_INTER_UNIT_MOVES_TO_VEC disables the general_regs
;; alternative in sse2_loadld.
(define_split
- [(set (match_operand:MODEF 0 "register_operand")
+ [(set (match_operand:MODEF 0 "sse_reg_operand")
(float:MODEF (match_operand:SI 1 "nonimmediate_operand")))]
- "TARGET_SSE2 && TARGET_SSE_MATH
- && TARGET_USE_VECTOR_CONVERTS && optimize_function_for_speed_p (cfun)
- && reload_completed && SSE_REG_P (operands[0])
+ "TARGET_USE_VECTOR_CONVERTS
+ && optimize_function_for_speed_p (cfun)
+ && reload_completed
&& (MEM_P (operands[1]) || TARGET_INTER_UNIT_MOVES_TO_VEC)
&& (!EXT_REX_SSE_REG_P (operands[0])
|| TARGET_AVX512VL)"
@@ -5178,83 +5148,83 @@
DONE;
})
-;; Avoid partial SSE register dependency stalls
+;; Avoid partial SSE register dependency stalls. This splitter should split
+;; late in the pass sequence (after register rename pass), so allocated
+;; registers won't change anymore
+
(define_split
- [(set (match_operand:MODEF 0 "register_operand")
+ [(set (match_operand:MODEF 0 "sse_reg_operand")
(float:MODEF (match_operand:SWI48 1 "nonimmediate_operand")))]
- "TARGET_SSE2 && TARGET_SSE_MATH
- && TARGET_SSE_PARTIAL_REG_DEPENDENCY
+ "TARGET_SSE_PARTIAL_REG_DEPENDENCY
&& optimize_function_for_speed_p (cfun)
- && reload_completed && SSE_REG_P (operands[0])
+ && epilogue_completed
&& (!EXT_REX_SSE_REG_P (operands[0])
|| TARGET_AVX512VL)"
- [(const_int 0)]
+ [(set (match_dup 0)
+ (vec_merge:<MODEF:ssevecmode>
+ (vec_duplicate:<MODEF:ssevecmode>
+ (float:MODEF
+ (match_dup 1)))
+ (match_dup 0)
+ (const_int 1)))]
{
const machine_mode vmode = <MODEF:ssevecmode>mode;
- const machine_mode mode = <MODEF:MODE>mode;
- rtx t, op0 = lowpart_subreg (vmode, operands[0], mode);
-
- emit_move_insn (op0, CONST0_RTX (vmode));
- t = gen_rtx_FLOAT (mode, operands[1]);
- t = gen_rtx_VEC_DUPLICATE (vmode, t);
- t = gen_rtx_VEC_MERGE (vmode, t, op0, const1_rtx);
- emit_insn (gen_rtx_SET (op0, t));
- DONE;
+ operands[0] = lowpart_subreg (vmode, operands[0], <MODEF:MODE>mode);
+ emit_move_insn (operands[0], CONST0_RTX (vmode));
})
-;; Break partial reg stall for cvtsd2ss.
+;; Break partial reg stall for cvtsd2ss. This splitter should split
+;; late in the pass sequence (after register rename pass),
+;; so allocated registers won't change anymore.
-(define_peephole2
- [(set (match_operand:SF 0 "register_operand")
+(define_split
+ [(set (match_operand:SF 0 "sse_reg_operand")
(float_truncate:SF
(match_operand:DF 1 "nonimmediate_operand")))]
- "TARGET_SSE2 && TARGET_SSE_MATH
- && TARGET_SSE_PARTIAL_REG_DEPENDENCY
+ "TARGET_SSE_PARTIAL_REG_DEPENDENCY
&& optimize_function_for_speed_p (cfun)
- && SSE_REG_P (operands[0])
- && (!SSE_REG_P (operands[1])
+ && epilogue_completed
+ && (!REG_P (operands[1])
|| REGNO (operands[0]) != REGNO (operands[1]))
&& (!EXT_REX_SSE_REG_P (operands[0])
|| TARGET_AVX512VL)"
[(set (match_dup 0)
(vec_merge:V4SF
(vec_duplicate:V4SF
- (float_truncate:V2SF
+ (float_truncate:SF
(match_dup 1)))
(match_dup 0)
(const_int 1)))]
{
operands[0] = lowpart_subreg (V4SFmode, operands[0], SFmode);
- operands[1] = lowpart_subreg (V2DFmode, operands[1], DFmode);
emit_move_insn (operands[0], CONST0_RTX (V4SFmode));
})
-;; Break partial reg stall for cvtss2sd.
+;; Break partial reg stall for cvtss2sd. This splitter should split
+;; late in the pass sequence (after register rename pass),
+;; so allocated registers won't change anymore.
-(define_peephole2
- [(set (match_operand:DF 0 "register_operand")
+(define_split
+ [(set (match_operand:DF 0 "sse_reg_operand")
(float_extend:DF
(match_operand:SF 1 "nonimmediate_operand")))]
- "TARGET_SSE2 && TARGET_SSE_MATH
- && TARGET_SSE_PARTIAL_REG_DEPENDENCY
+ "TARGET_SSE_PARTIAL_REG_DEPENDENCY
&& optimize_function_for_speed_p (cfun)
- && SSE_REG_P (operands[0])
- && (!SSE_REG_P (operands[1])
+ && epilogue_completed
+ && (!REG_P (operands[1])
|| REGNO (operands[0]) != REGNO (operands[1]))
&& (!EXT_REX_SSE_REG_P (operands[0])
|| TARGET_AVX512VL)"
[(set (match_dup 0)
(vec_merge:V2DF
- (float_extend:V2DF
- (vec_select:V2SF
- (match_dup 1)
- (parallel [(const_int 0) (const_int 1)])))
- (match_dup 0)
+ (vec_duplicate:V2DF
+ (float_extend:DF
+ (match_dup 1)))
+ (match_dup 0)
(const_int 1)))]
{
operands[0] = lowpart_subreg (V2DFmode, operands[0], DFmode);
- operands[1] = lowpart_subreg (V4SFmode, operands[1], SFmode);
emit_move_insn (operands[0], CONST0_RTX (V2DFmode));
})
@@ -5299,7 +5269,7 @@
emit_insn (gen_vec_interleave_lowv4si (operands[3], operands[3],
operands[4]));
- operands[3] = gen_rtx_REG (DImode, REGNO (operands[3]));
+ operands[3] = gen_lowpart (DImode, operands[3]);
})
(define_split
@@ -5631,7 +5601,6 @@
(const_string "*")))
(set_attr "mode" "HI,HI,HI,SI")])
-;; %%% Potential partial reg stall on alternatives 3 and 4. What to do?
(define_insn "*addqi_1"
[(set (match_operand:QI 0 "nonimmediate_operand" "=qm,q,q,r,r,Yp")
(plus:QI (match_operand:QI 1 "nonimmediate_operand" "%0,0,q,0,r,Yp")
@@ -5639,7 +5608,7 @@
(clobber (reg:CC FLAGS_REG))]
"ix86_binary_operator_ok (PLUS, QImode, operands)"
{
- bool widen = (which_alternative == 3 || which_alternative == 4);
+ bool widen = (get_attr_mode (insn) != MODE_QI);
switch (get_attr_type (insn))
{
@@ -5688,7 +5657,12 @@
(and (eq_attr "type" "alu") (match_operand 2 "const128_operand"))
(const_string "1")
(const_string "*")))
- (set_attr "mode" "QI,QI,QI,SI,SI,SI")])
+ (set_attr "mode" "QI,QI,QI,SI,SI,SI")
+ ;; Potential partial reg stall on alternatives 3 and 4.
+ (set (attr "preferred_for_speed")
+ (cond [(eq_attr "alternative" "3,4")
+ (symbol_ref "!TARGET_PARTIAL_REG_STALL")]
+ (symbol_ref "true")))])
(define_insn "*addqi_1_slp"
[(set (strict_low_part (match_operand:QI 0 "nonimmediate_operand" "+qm,q"))
@@ -5737,32 +5711,6 @@
(parallel [(set (match_dup 0) (plus:SWI48 (match_dup 0) (match_dup 2)))
(clobber (reg:CC FLAGS_REG))])])
-;; Convert add to the lea pattern to avoid flags dependency.
-(define_split
- [(set (match_operand:SWI 0 "register_operand")
- (plus:SWI (match_operand:SWI 1 "register_operand")
- (match_operand:SWI 2 "<nonmemory_operand>")))
- (clobber (reg:CC FLAGS_REG))]
- "reload_completed && ix86_lea_for_add_ok (insn, operands)"
- [(const_int 0)]
-{
- machine_mode mode = <MODE>mode;
- rtx pat;
-
- if (<MODE_SIZE> < GET_MODE_SIZE (SImode))
- {
- mode = SImode;
- operands[0] = gen_lowpart (mode, operands[0]);
- operands[1] = gen_lowpart (mode, operands[1]);
- operands[2] = gen_lowpart (mode, operands[2]);
- }
-
- pat = gen_rtx_PLUS (mode, operands[1], operands[2]);
-
- emit_insn (gen_rtx_SET (operands[0], pat));
- DONE;
-})
-
;; Split non destructive adds if we cannot use lea.
(define_split
[(set (match_operand:DI 0 "register_operand")
@@ -5780,6 +5728,24 @@
;; Convert add to the lea pattern to avoid flags dependency.
(define_split
+ [(set (match_operand:SWI 0 "register_operand")
+ (plus:SWI (match_operand:SWI 1 "register_operand")
+ (match_operand:SWI 2 "<nonmemory_operand>")))
+ (clobber (reg:CC FLAGS_REG))]
+ "reload_completed && ix86_lea_for_add_ok (insn, operands)"
+ [(set (match_dup 0)
+ (plus:<LEAMODE> (match_dup 1) (match_dup 2)))]
+{
+ if (<MODE>mode != <LEAMODE>mode)
+ {
+ operands[0] = gen_lowpart (<LEAMODE>mode, operands[0]);
+ operands[1] = gen_lowpart (<LEAMODE>mode, operands[1]);
+ operands[2] = gen_lowpart (<LEAMODE>mode, operands[2]);
+ }
+})
+
+;; Convert add to the lea pattern to avoid flags dependency.
+(define_split
[(set (match_operand:DI 0 "register_operand")
(zero_extend:DI
(plus:SI (match_operand:SI 1 "register_operand")
@@ -6264,7 +6230,7 @@
[(set (match_operand:SWI12 0 "register_operand" "=r")
(plus:SWI12
(mult:SWI12 (match_operand:SWI12 1 "index_register_operand" "l")
- (match_operand:SWI12 2 "const248_operand" "n"))
+ (match_operand 2 "const248_operand" "n"))
(match_operand:SWI12 3 "nonmemory_operand" "ri")))]
"!TARGET_PARTIAL_REG_STALL || optimize_function_for_size_p (cfun)"
"#"
@@ -6286,7 +6252,7 @@
(plus:SWI12
(plus:SWI12
(mult:SWI12 (match_operand:SWI12 1 "index_register_operand" "l")
- (match_operand:SWI12 2 "const248_operand" "n"))
+ (match_operand 2 "const248_operand" "n"))
(match_operand:SWI12 3 "register_operand" "r"))
(match_operand:SWI12 4 "immediate_operand" "i")))]
"!TARGET_PARTIAL_REG_STALL || optimize_function_for_size_p (cfun)"
@@ -6312,8 +6278,8 @@
(any_or:SWI12
(ashift:SWI12
(match_operand:SWI12 1 "index_register_operand" "l")
- (match_operand:SWI12 2 "const_0_to_3_operand" "n"))
- (match_operand:SWI12 3 "const_int_operand" "n")))]
+ (match_operand 2 "const_0_to_3_operand" "n"))
+ (match_operand 3 "const_int_operand" "n")))]
"(!TARGET_PARTIAL_REG_STALL || optimize_function_for_size_p (cfun))
&& ((unsigned HOST_WIDE_INT) INTVAL (operands[3])
< (HOST_WIDE_INT_1U << INTVAL (operands[2])))"
@@ -6336,8 +6302,8 @@
(any_or:SWI48
(ashift:SWI48
(match_operand:SWI48 1 "index_register_operand" "l")
- (match_operand:SWI48 2 "const_0_to_3_operand" "n"))
- (match_operand:SWI48 3 "const_int_operand" "n")))]
+ (match_operand 2 "const_0_to_3_operand" "n"))
+ (match_operand 3 "const_int_operand" "n")))]
"(unsigned HOST_WIDE_INT) INTVAL (operands[3])
< (HOST_WIDE_INT_1U << INTVAL (operands[2]))"
"#"
@@ -7201,7 +7167,7 @@
(match_operand:DWIH 2 "nonimmediate_operand"))))
(clobber (reg:CC FLAGS_REG))]
"TARGET_BMI2 && reload_completed
- && true_regnum (operands[1]) == DX_REG"
+ && REGNO (operands[1]) == DX_REG"
[(parallel [(set (match_dup 3)
(mult:DWIH (match_dup 1) (match_dup 2)))
(set (match_dup 4)
@@ -8247,7 +8213,6 @@
(const_string "*")))
(set_attr "mode" "HI,HI,SI,HI")])
-;; %%% Potential partial reg stall on alternative 2. What to do?
(define_insn "*andqi_1"
[(set (match_operand:QI 0 "nonimmediate_operand" "=qm,q,r,!k")
(and:QI (match_operand:QI 1 "nonimmediate_operand" "%0,0,0,k")
@@ -8270,7 +8235,12 @@
}
}
[(set_attr "type" "alu,alu,alu,msklog")
- (set_attr "mode" "QI,QI,SI,HI")])
+ (set_attr "mode" "QI,QI,SI,HI")
+ ;; Potential partial reg stall on alternative 2.
+ (set (attr "preferred_for_speed")
+ (cond [(eq_attr "alternative" "2")
+ (symbol_ref "!TARGET_PARTIAL_REG_STALL")]
+ (symbol_ref "true")))])
(define_insn "*andqi_1_slp"
[(set (strict_low_part (match_operand:QI 0 "nonimmediate_operand" "+qm,q"))
@@ -8346,7 +8316,8 @@
(match_operand:SWI248 2 "const_int_operand")))
(clobber (reg:CC FLAGS_REG))]
"reload_completed
- && true_regnum (operands[0]) != true_regnum (operands[1])"
+ && (!REG_P (operands[1])
+ || REGNO (operands[0]) != REGNO (operands[1]))"
[(const_int 0)]
{
HOST_WIDE_INT ival = INTVAL (operands[2]);
@@ -8754,7 +8725,6 @@
[(set_attr "type" "alu,alu,msklog")
(set_attr "mode" "HI")])
-;; %%% Potential partial reg stall on alternative 2. What to do?
(define_insn "*<code>qi_1"
[(set (match_operand:QI 0 "nonimmediate_operand" "=q,m,r,!k")
(any_or:QI (match_operand:QI 1 "nonimmediate_operand" "%0,0,0,k")
@@ -8767,7 +8737,12 @@
<logic>{l}\t{%k2, %k0|%k0, %k2}
k<logic>w\t{%2, %1, %0|%0, %1, %2}"
[(set_attr "type" "alu,alu,alu,msklog")
- (set_attr "mode" "QI,QI,SI,HI")])
+ (set_attr "mode" "QI,QI,SI,HI")
+ ;; Potential partial reg stall on alternative 2.
+ (set (attr "preferred_for_speed")
+ (cond [(eq_attr "alternative" "2")
+ (symbol_ref "!TARGET_PARTIAL_REG_STALL")]
+ (symbol_ref "true")))])
;; See comment for addsi_1_zext why we do use nonimmediate_operand
(define_insn "*<code>si_1_zext"
@@ -9258,8 +9233,7 @@
[(match_operand:MODEF 1 "register_operand" "0,x,0,0")]))
(use (match_operand:<ssevecmode> 2 "nonimmediate_operand" "xm,0,X,X"))
(clobber (reg:CC FLAGS_REG))]
- "(SSE_FLOAT_MODE_P (<MODE>mode) && TARGET_SSE_MATH)
- || TARGET_80387"
+ "TARGET_80387 || (SSE_FLOAT_MODE_P (<MODE>mode) && TARGET_SSE_MATH)"
"#"
[(set (attr "enabled")
(if_then_else
@@ -9308,12 +9282,12 @@
[(set (match_dup 0) (match_op_dup 1 [(match_dup 0)]))])
(define_split
- [(set (match_operand 0 "register_operand")
+ [(set (match_operand 0 "sse_reg_operand")
(match_operator 3 "absneg_operator"
[(match_operand 1 "register_operand")]))
(use (match_operand 2 "nonimmediate_operand"))
(clobber (reg:CC FLAGS_REG))]
- "reload_completed && SSE_REG_P (operands[0])"
+ "reload_completed"
[(set (match_dup 0) (match_dup 3))]
{
machine_mode mode = GET_MODE (operands[0]);
@@ -9332,7 +9306,7 @@
})
(define_split
- [(set (match_operand:SF 0 "register_operand")
+ [(set (match_operand:SF 0 "general_reg_operand")
(match_operator:SF 1 "absneg_operator" [(match_dup 0)]))
(use (match_operand:V4SF 2))
(clobber (reg:CC FLAGS_REG))]
@@ -9356,7 +9330,7 @@
})
(define_split
- [(set (match_operand:DF 0 "register_operand")
+ [(set (match_operand:DF 0 "general_reg_operand")
(match_operator:DF 1 "absneg_operator" [(match_dup 0)]))
(use (match_operand 2))
(clobber (reg:CC FLAGS_REG))]
@@ -9394,7 +9368,7 @@
})
(define_split
- [(set (match_operand:XF 0 "register_operand")
+ [(set (match_operand:XF 0 "general_reg_operand")
(match_operator:XF 1 "absneg_operator" [(match_dup 0)]))
(use (match_operand 2))
(clobber (reg:CC FLAGS_REG))]
@@ -9404,8 +9378,7 @@
{
rtx tmp;
operands[0] = gen_rtx_REG (SImode,
- true_regnum (operands[0])
- + (TARGET_64BIT ? 1 : 2));
+ REGNO (operands[0]) + (TARGET_64BIT ? 1 : 2));
if (GET_CODE (operands[1]) == ABS)
{
tmp = GEN_INT (0x7fff);
@@ -9546,7 +9519,6 @@
(set_attr "prefix" "*,vex")
(set_attr "mode" "HI")])
-;; %%% Potential partial reg stall on alternative 1. What to do?
(define_insn "*one_cmplqi2_1"
[(set (match_operand:QI 0 "nonimmediate_operand" "=qm,r,!k")
(not:QI (match_operand:QI 1 "nonimmediate_operand" "0,0,k")))]
@@ -9569,7 +9541,12 @@
[(set_attr "isa" "*,*,avx512f")
(set_attr "type" "negnot,negnot,msklog")
(set_attr "prefix" "*,*,vex")
- (set_attr "mode" "QI,SI,QI")])
+ (set_attr "mode" "QI,SI,QI")
+ ;; Potential partial reg stall on alternative 1.
+ (set (attr "preferred_for_speed")
+ (cond [(eq_attr "alternative" "1")
+ (symbol_ref "!TARGET_PARTIAL_REG_STALL")]
+ (symbol_ref "true")))])
;; ??? Currently never generated - xor is used instead.
(define_insn "*one_cmplsi2_1_zext"
@@ -9988,7 +9965,6 @@
(const_string "*")))
(set_attr "mode" "HI,SI")])
-;; %%% Potential partial reg stall on alternative 1. What to do?
(define_insn "*ashlqi3_1"
[(set (match_operand:QI 0 "nonimmediate_operand" "=qm,r,Yp")
(ashift:QI (match_operand:QI 1 "nonimmediate_operand" "0,0,l")
@@ -10044,7 +10020,12 @@
(match_test "optimize_function_for_size_p (cfun)")))))
(const_string "0")
(const_string "*")))
- (set_attr "mode" "QI,SI,SI")])
+ (set_attr "mode" "QI,SI,SI")
+ ;; Potential partial reg stall on alternative 1.
+ (set (attr "preferred_for_speed")
+ (cond [(eq_attr "alternative" "1")
+ (symbol_ref "!TARGET_PARTIAL_REG_STALL")]
+ (symbol_ref "true")))])
(define_insn "*ashlqi3_1_slp"
[(set (strict_low_part (match_operand:QI 0 "nonimmediate_operand" "+qm"))
@@ -10091,31 +10072,21 @@
;; Convert ashift to the lea pattern to avoid flags dependency.
(define_split
- [(set (match_operand 0 "register_operand")
- (ashift (match_operand 1 "index_register_operand")
- (match_operand:QI 2 "const_int_operand")))
+ [(set (match_operand:SWI 0 "register_operand")
+ (ashift:SWI (match_operand:SWI 1 "index_register_operand")
+ (match_operand 2 "const_0_to_3_operand")))
(clobber (reg:CC FLAGS_REG))]
- "GET_MODE (operands[0]) == GET_MODE (operands[1])
- && reload_completed
- && true_regnum (operands[0]) != true_regnum (operands[1])"
- [(const_int 0)]
+ "reload_completed
+ && REGNO (operands[0]) != REGNO (operands[1])"
+ [(set (match_dup 0)
+ (mult:<LEAMODE> (match_dup 1) (match_dup 2)))]
{
- machine_mode mode = GET_MODE (operands[0]);
- rtx pat;
-
- if (GET_MODE_SIZE (mode) < GET_MODE_SIZE (SImode))
- {
- mode = SImode;
- operands[0] = gen_lowpart (mode, operands[0]);
- operands[1] = gen_lowpart (mode, operands[1]);
+ if (<MODE>mode != <LEAMODE>mode)
+ {
+ operands[0] = gen_lowpart (<LEAMODE>mode, operands[0]);
+ operands[1] = gen_lowpart (<LEAMODE>mode, operands[1]);
}
-
- operands[2] = gen_int_mode (1 << INTVAL (operands[2]), mode);
-
- pat = gen_rtx_MULT (mode, operands[1], operands[2]);
-
- emit_insn (gen_rtx_SET (operands[0], pat));
- DONE;
+ operands[2] = GEN_INT (1 << INTVAL (operands[2]));
})
;; Convert ashift to the lea pattern to avoid flags dependency.
@@ -10123,15 +10094,15 @@
[(set (match_operand:DI 0 "register_operand")
(zero_extend:DI
(ashift:SI (match_operand:SI 1 "index_register_operand")
- (match_operand:QI 2 "const_int_operand"))))
+ (match_operand 2 "const_0_to_3_operand"))))
(clobber (reg:CC FLAGS_REG))]
"TARGET_64BIT && reload_completed
- && true_regnum (operands[0]) != true_regnum (operands[1])"
+ && REGNO (operands[0]) != REGNO (operands[1])"
[(set (match_dup 0)
(zero_extend:DI (mult:SI (match_dup 1) (match_dup 2))))]
{
operands[1] = gen_lowpart (SImode, operands[1]);
- operands[2] = gen_int_mode (1 << INTVAL (operands[2]), SImode);
+ operands[2] = GEN_INT (1 << INTVAL (operands[2]));
})
;; This pattern can't accept a variable shift count, since shifts by
@@ -11078,20 +11049,19 @@
(const_int 1))
(clobber (reg:CC FLAGS_REG))])]
"TARGET_64BIT && !TARGET_USE_BT"
- [(const_int 0)]
+ [(parallel [(set (match_dup 0)
+ (ior:DI (match_dup 0) (match_dup 3)))
+ (clobber (reg:CC FLAGS_REG))])]
{
int i = INTVAL (operands[1]);
- rtx op1 = gen_int_mode (HOST_WIDE_INT_1U << i, DImode);
+ operands[3] = gen_int_mode (HOST_WIDE_INT_1U << i, DImode);
- if (i >= 31)
+ if (!x86_64_immediate_operand (operands[3], DImode))
{
- emit_move_insn (operands[2], op1);
- op1 = operands[2];
+ emit_move_insn (operands[2], operands[3]);
+ operands[3] = operands[2];
}
-
- emit_insn (gen_iordi3 (operands[0], operands[0], op1));
- DONE;
})
(define_peephole2
@@ -11103,20 +11073,19 @@
(const_int 0))
(clobber (reg:CC FLAGS_REG))])]
"TARGET_64BIT && !TARGET_USE_BT"
- [(const_int 0)]
+ [(parallel [(set (match_dup 0)
+ (and:DI (match_dup 0) (match_dup 3)))
+ (clobber (reg:CC FLAGS_REG))])]
{
int i = INTVAL (operands[1]);
- rtx op1 = gen_int_mode (HOST_WIDE_INT_1U << i, DImode);
+ operands[3] = gen_int_mode (~(HOST_WIDE_INT_1U << i), DImode);
- if (i >= 32)
+ if (!x86_64_immediate_operand (operands[3], DImode))
{
- emit_move_insn (operands[2], op1);
- op1 = operands[2];
+ emit_move_insn (operands[2], operands[3]);
+ operands[3] = operands[2];
}
-
- emit_insn (gen_anddi3 (operands[0], operands[0], op1));
- DONE;
})
(define_peephole2
@@ -11129,20 +11098,19 @@
(match_dup 0) (const_int 1) (match_dup 1))))
(clobber (reg:CC FLAGS_REG))])]
"TARGET_64BIT && !TARGET_USE_BT"
- [(const_int 0)]
+ [(parallel [(set (match_dup 0)
+ (xor:DI (match_dup 0) (match_dup 3)))
+ (clobber (reg:CC FLAGS_REG))])]
{
int i = INTVAL (operands[1]);
- rtx op1 = gen_int_mode (HOST_WIDE_INT_1U << i, DImode);
+ operands[3] = gen_int_mode (HOST_WIDE_INT_1U << i, DImode);
- if (i >= 31)
+ if (!x86_64_immediate_operand (operands[3], DImode))
{
- emit_move_insn (operands[2], op1);
- op1 = operands[2];
+ emit_move_insn (operands[2], operands[3]);
+ operands[3] = operands[2];
}
-
- emit_insn (gen_xordi3 (operands[0], operands[0], op1));
- DONE;
})
(define_insn "*bt<mode>"
@@ -14063,9 +14031,9 @@
[(set (match_operand:MODEF 0 "register_operand" "=f,f,x,v")
(match_operator:MODEF 3 "binary_fp_operator"
[(match_operand:MODEF 1
- "nonimm_ssenomem_operand" "0,fm,0,v")
+ "x87nonimm_ssenomem_operand" "0,fm,0,v")
(match_operand:MODEF 2
- "nonimmediate_operand" "fm,0,xm,vm")]))]
+ "nonimmediate_operand" "fm,0,xm,vm")]))]
"((SSE_FLOAT_MODE_P (<MODE>mode) && TARGET_SSE_MATH)
|| (TARGET_80387 && X87_ENABLE_ARITH (<MODE>mode)))
&& !COMMUTATIVE_ARITH_P (operands[3])
@@ -17239,7 +17207,7 @@
(set_attr "mode" "DF,DF,DI,DI,DI,DI")])
(define_split
- [(set (match_operand:DF 0 "register_and_not_any_fp_reg_operand")
+ [(set (match_operand:DF 0 "general_reg_operand")
(if_then_else:DF (match_operator 1 "fcmov_comparison_operator"
[(reg FLAGS_REG) (const_int 0)])
(match_operand:DF 2 "nonimmediate_operand")
@@ -17295,7 +17263,7 @@
;; Don't do conditional moves with memory inputs
(define_peephole2
[(match_scratch:MODEF 4 "r")
- (set (match_operand:MODEF 0 "register_and_not_any_fp_reg_operand")
+ (set (match_operand:MODEF 0 "general_reg_operand")
(if_then_else:MODEF (match_operator 1 "fcmov_comparison_operator"
[(reg FLAGS_REG) (const_int 0)])
(match_operand:MODEF 2 "nonimmediate_operand")
@@ -17845,7 +17813,7 @@
(match_operand:SI 3 "immediate_operand"))
(const_int 0)]))]
"ix86_match_ccmode (insn, CCNOmode)
- && (true_regnum (operands[2]) != AX_REG
+ && (REGNO (operands[2]) != AX_REG
|| satisfies_constraint_K (operands[3]))
&& peep2_reg_dead_p (1, operands[2])"
[(parallel
@@ -17866,7 +17834,7 @@
(const_int 0)]))]
"! TARGET_PARTIAL_REG_STALL
&& ix86_match_ccmode (insn, CCNOmode)
- && true_regnum (operands[2]) != AX_REG
+ && REGNO (operands[2]) != AX_REG
&& peep2_reg_dead_p (1, operands[2])"
[(parallel
[(set (match_dup 0)
@@ -17887,7 +17855,7 @@
(const_int 0)]))]
"! TARGET_PARTIAL_REG_STALL
&& ix86_match_ccmode (insn, CCNOmode)
- && true_regnum (operands[2]) != AX_REG
+ && REGNO (operands[2]) != AX_REG
&& peep2_reg_dead_p (1, operands[2])"
[(parallel [(set (match_dup 0)
(match_op_dup 1