aboutsummaryrefslogtreecommitdiff
path: root/gcc/config/i386/sse.md
diff options
context:
space:
mode:
Diffstat (limited to 'gcc/config/i386/sse.md')
-rw-r--r--gcc/config/i386/sse.md160
1 files changed, 126 insertions, 34 deletions
diff --git a/gcc/config/i386/sse.md b/gcc/config/i386/sse.md
index e2db3b17f05..253ff5d5a7d 100644
--- a/gcc/config/i386/sse.md
+++ b/gcc/config/i386/sse.md
@@ -175,7 +175,7 @@
(V32HI "TARGET_AVX512F") (V16HI "TARGET_AVX") V8HI
(V16SI "TARGET_AVX512F") (V8SI "TARGET_AVX") V4SI
(V8DI "TARGET_AVX512F") (V4DI "TARGET_AVX") V2DI
- (V4TI "TARGET_AVX512BW") (V2TI "TARGET_AVX") V1TI
+ (V4TI "TARGET_AVX512F") (V2TI "TARGET_AVX") V1TI
(V16SF "TARGET_AVX512F") (V8SF "TARGET_AVX") V4SF
(V8DF "TARGET_AVX512F") (V4DF "TARGET_AVX") V2DF])
@@ -658,13 +658,21 @@
;; Mapping of vector modes to a vector mode of half size
(define_mode_attr ssehalfvecmode
- [(V64QI "V32QI") (V32HI "V16HI") (V16SI "V8SI") (V8DI "V4DI")
+ [(V64QI "V32QI") (V32HI "V16HI") (V16SI "V8SI") (V8DI "V4DI") (V4TI "V2TI")
(V32QI "V16QI") (V16HI "V8HI") (V8SI "V4SI") (V4DI "V2DI")
(V16QI "V8QI") (V8HI "V4HI") (V4SI "V2SI")
(V16SF "V8SF") (V8DF "V4DF")
(V8SF "V4SF") (V4DF "V2DF")
(V4SF "V2SF")])
+(define_mode_attr ssehalfvecmodelower
+ [(V64QI "v32qi") (V32HI "v16hi") (V16SI "v8si") (V8DI "v4di") (V4TI "v2ti")
+ (V32QI "v16qi") (V16HI "v8hi") (V8SI "v4si") (V4DI "v2di")
+ (V16QI "v8qi") (V8HI "v4hi") (V4SI "v2si")
+ (V16SF "v8sf") (V8DF "v4df")
+ (V8SF "v4sf") (V4DF "v2df")
+ (V4SF "v2sf")])
+
;; Mapping of vector modes ti packed single mode of the same size
(define_mode_attr ssePSmode
[(V16SI "V16SF") (V8DF "V16SF")
@@ -687,7 +695,18 @@
(V16SI "SI") (V8SI "SI") (V4SI "SI")
(V8DI "DI") (V4DI "DI") (V2DI "DI")
(V16SF "SF") (V8SF "SF") (V4SF "SF")
- (V8DF "DF") (V4DF "DF") (V2DF "DF")])
+ (V8DF "DF") (V4DF "DF") (V2DF "DF")
+ (V4TI "TI") (V2TI "TI")])
+
+;; Mapping of vector modes back to the scalar modes
+(define_mode_attr ssescalarmodelower
+ [(V64QI "qi") (V32QI "qi") (V16QI "qi")
+ (V32HI "hi") (V16HI "hi") (V8HI "hi")
+ (V16SI "si") (V8SI "si") (V4SI "si")
+ (V8DI "di") (V4DI "di") (V2DI "di")
+ (V16SF "sf") (V8SF "sf") (V4SF "sf")
+ (V8DF "df") (V4DF "df") (V2DF "df")
+ (V4TI "ti") (V2TI "ti")])
;; Mapping of vector modes to the 128bit modes
(define_mode_attr ssexmmmode
@@ -2355,7 +2374,7 @@
{
rtx tmp = gen_reg_rtx (V8DFmode);
ix86_expand_reduc (gen_addv8df3, tmp, operands[1]);
- emit_insn (gen_vec_extractv8df (operands[0], tmp, const0_rtx));
+ emit_insn (gen_vec_extractv8dfdf (operands[0], tmp, const0_rtx));
DONE;
})
@@ -2370,7 +2389,7 @@
emit_insn (gen_avx_haddv4df3 (tmp, operands[1], operands[1]));
emit_insn (gen_avx_vperm2f128v4df3 (tmp2, tmp, tmp, GEN_INT (1)));
emit_insn (gen_addv4df3 (vec_res, tmp, tmp2));
- emit_insn (gen_vec_extractv4df (operands[0], vec_res, const0_rtx));
+ emit_insn (gen_vec_extractv4dfdf (operands[0], vec_res, const0_rtx));
DONE;
})
@@ -2381,7 +2400,7 @@
{
rtx tmp = gen_reg_rtx (V2DFmode);
emit_insn (gen_sse3_haddv2df3 (tmp, operands[1], operands[1]));
- emit_insn (gen_vec_extractv2df (operands[0], tmp, const0_rtx));
+ emit_insn (gen_vec_extractv2dfdf (operands[0], tmp, const0_rtx));
DONE;
})
@@ -2392,7 +2411,7 @@
{
rtx tmp = gen_reg_rtx (V16SFmode);
ix86_expand_reduc (gen_addv16sf3, tmp, operands[1]);
- emit_insn (gen_vec_extractv16sf (operands[0], tmp, const0_rtx));
+ emit_insn (gen_vec_extractv16sfsf (operands[0], tmp, const0_rtx));
DONE;
})
@@ -2408,7 +2427,7 @@
emit_insn (gen_avx_haddv8sf3 (tmp2, tmp, tmp));
emit_insn (gen_avx_vperm2f128v8sf3 (tmp, tmp2, tmp2, GEN_INT (1)));
emit_insn (gen_addv8sf3 (vec_res, tmp, tmp2));
- emit_insn (gen_vec_extractv8sf (operands[0], vec_res, const0_rtx));
+ emit_insn (gen_vec_extractv8sfsf (operands[0], vec_res, const0_rtx));
DONE;
})
@@ -2426,7 +2445,7 @@
}
else
ix86_expand_reduc (gen_addv4sf3, vec_res, operands[1]);
- emit_insn (gen_vec_extractv4sf (operands[0], vec_res, const0_rtx));
+ emit_insn (gen_vec_extractv4sfsf (operands[0], vec_res, const0_rtx));
DONE;
})
@@ -2448,7 +2467,8 @@
{
rtx tmp = gen_reg_rtx (<MODE>mode);
ix86_expand_reduc (gen_<code><mode>3, tmp, operands[1]);
- emit_insn (gen_vec_extract<mode> (operands[0], tmp, const0_rtx));
+ emit_insn (gen_vec_extract<mode><ssescalarmodelower> (operands[0], tmp,
+ const0_rtx));
DONE;
})
@@ -2460,7 +2480,8 @@
{
rtx tmp = gen_reg_rtx (<MODE>mode);
ix86_expand_reduc (gen_<code><mode>3, tmp, operands[1]);
- emit_insn (gen_vec_extract<mode> (operands[0], tmp, const0_rtx));
+ emit_insn (gen_vec_extract<mode><ssescalarmodelower> (operands[0], tmp,
+ const0_rtx));
DONE;
})
@@ -2472,7 +2493,8 @@
{
rtx tmp = gen_reg_rtx (<MODE>mode);
ix86_expand_reduc (gen_<code><mode>3, tmp, operands[1]);
- emit_insn (gen_vec_extract<mode> (operands[0], tmp, const0_rtx));
+ emit_insn (gen_vec_extract<mode><ssescalarmodelower> (operands[0], tmp,
+ const0_rtx));
DONE;
})
@@ -2484,7 +2506,7 @@
{
rtx tmp = gen_reg_rtx (V8HImode);
ix86_expand_reduc (gen_uminv8hi3, tmp, operands[1]);
- emit_insn (gen_vec_extractv8hi (operands[0], tmp, const0_rtx));
+ emit_insn (gen_vec_extractv8hihi (operands[0], tmp, const0_rtx));
DONE;
})
@@ -6920,15 +6942,6 @@
(set_attr "prefix" "orig,maybe_evex,orig,maybe_evex")
(set_attr "mode" "V4SF,V4SF,V2SF,V2SF")])
-(define_expand "vec_init<mode>"
- [(match_operand:V_128 0 "register_operand")
- (match_operand 1)]
- "TARGET_SSE"
-{
- ix86_expand_vector_init (false, operands[0], operands[1]);
- DONE;
-})
-
;; Avoid combining registers from different units in a single alternative,
;; see comment above inline_secondary_memory_needed function in i386.c
(define_insn "vec_set<mode>_0"
@@ -7886,9 +7899,10 @@
(V16SI "TARGET_AVX512F") (V8SI "TARGET_AVX") V4SI
(V8DI "TARGET_AVX512F") (V4DI "TARGET_AVX") V2DI
(V16SF "TARGET_AVX512F") (V8SF "TARGET_AVX") V4SF
- (V8DF "TARGET_AVX512F") (V4DF "TARGET_AVX") V2DF])
+ (V8DF "TARGET_AVX512F") (V4DF "TARGET_AVX") V2DF
+ (V4TI "TARGET_AVX512F") (V2TI "TARGET_AVX")])
-(define_expand "vec_extract<mode>"
+(define_expand "vec_extract<mode><ssescalarmodelower>"
[(match_operand:<ssescalarmode> 0 "register_operand")
(match_operand:VEC_EXTRACT_MODE 1 "register_operand")
(match_operand 2 "const_int_operand")]
@@ -7899,6 +7913,19 @@
DONE;
})
+(define_expand "vec_extract<mode><ssehalfvecmodelower>"
+ [(match_operand:<ssehalfvecmode> 0 "nonimmediate_operand")
+ (match_operand:V_512 1 "register_operand")
+ (match_operand 2 "const_0_to_1_operand")]
+ "TARGET_AVX512F"
+{
+ if (INTVAL (operands[2]))
+ emit_insn (gen_vec_extract_hi_<mode> (operands[0], operands[1]));
+ else
+ emit_insn (gen_vec_extract_lo_<mode> (operands[0], operands[1]));
+ DONE;
+})
+
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
;;
;; Parallel double-precision floating point element swizzling
@@ -13734,6 +13761,50 @@
operands[1] = adjust_address (operands[1], <ssescalarmode>mode, offs);
})
+(define_insn "*vec_extractv2ti"
+ [(set (match_operand:TI 0 "nonimmediate_operand" "=xm,vm")
+ (vec_select:TI
+ (match_operand:V2TI 1 "register_operand" "x,v")
+ (parallel
+ [(match_operand:SI 2 "const_0_to_1_operand")])))]
+ "TARGET_AVX"
+ "@
+ vextract%~128\t{%2, %1, %0|%0, %1, %2}
+ vextracti32x4\t{%2, %g1, %0|%0, %g1, %2}"
+ [(set_attr "type" "sselog")
+ (set_attr "prefix_extra" "1")
+ (set_attr "length_immediate" "1")
+ (set_attr "prefix" "vex,evex")
+ (set_attr "mode" "OI")])
+
+(define_insn "*vec_extractv4ti"
+ [(set (match_operand:TI 0 "nonimmediate_operand" "=vm")
+ (vec_select:TI
+ (match_operand:V4TI 1 "register_operand" "v")
+ (parallel
+ [(match_operand:SI 2 "const_0_to_3_operand")])))]
+ "TARGET_AVX512F"
+ "vextracti32x4\t{%2, %1, %0|%0, %1, %2}"
+ [(set_attr "type" "sselog")
+ (set_attr "prefix_extra" "1")
+ (set_attr "length_immediate" "1")
+ (set_attr "prefix" "evex")
+ (set_attr "mode" "XI")])
+
+(define_mode_iterator VEXTRACTI128_MODE
+ [(V4TI "TARGET_AVX512F") V2TI])
+
+(define_split
+ [(set (match_operand:TI 0 "nonimmediate_operand")
+ (vec_select:TI
+ (match_operand:VEXTRACTI128_MODE 1 "register_operand")
+ (parallel [(const_int 0)])))]
+ "TARGET_AVX
+ && reload_completed
+ && (TARGET_AVX512VL || !EXT_REX_SSE_REG_P (operands[1]))"
+ [(set (match_dup 0) (match_dup 1))]
+ "operands[1] = gen_lowpart (TImode, operands[1]);")
+
;; Turn SImode or DImode extraction from arbitrary SSE/AVX/AVX512F
;; vector modes into vec_extract*.
(define_split
@@ -16656,7 +16727,7 @@
for (i = 0; i < <ssescalarnum>; i++)
RTVEC_ELT (vs, i) = op2;
- emit_insn (gen_vec_init<mode> (reg, par));
+ emit_insn (gen_vec_init<mode><ssescalarmodelower> (reg, par));
emit_insn (gen_xop_vrotl<mode>3 (operands[0], operands[1], reg));
DONE;
}
@@ -16688,7 +16759,7 @@
for (i = 0; i < <ssescalarnum>; i++)
RTVEC_ELT (vs, i) = op2;
- emit_insn (gen_vec_init<mode> (reg, par));
+ emit_insn (gen_vec_init<mode><ssescalarmodelower> (reg, par));
emit_insn (gen_neg<mode>2 (neg, reg));
emit_insn (gen_xop_vrotl<mode>3 (operands[0], operands[1], neg));
DONE;
@@ -16982,7 +17053,7 @@
XVECEXP (par, 0, i) = operands[2];
tmp = gen_reg_rtx (V16QImode);
- emit_insn (gen_vec_initv16qi (tmp, par));
+ emit_insn (gen_vec_initv16qiqi (tmp, par));
if (negate)
emit_insn (gen_negv16qi2 (tmp, tmp));
@@ -17018,7 +17089,7 @@
for (i = 0; i < 2; i++)
XVECEXP (par, 0, i) = operands[2];
- emit_insn (gen_vec_initv2di (reg, par));
+ emit_insn (gen_vec_initv2didi (reg, par));
if (negate)
emit_insn (gen_negv2di2 (reg, reg));
@@ -18738,19 +18809,40 @@
<ssehalfvecmode>mode);
})
-(define_expand "vec_init<mode>"
- [(match_operand:V_256 0 "register_operand")
+;; Modes handled by vec_init expanders.
+(define_mode_iterator VEC_INIT_MODE
+ [(V64QI "TARGET_AVX512F") (V32QI "TARGET_AVX") V16QI
+ (V32HI "TARGET_AVX512F") (V16HI "TARGET_AVX") V8HI
+ (V16SI "TARGET_AVX512F") (V8SI "TARGET_AVX") V4SI
+ (V8DI "TARGET_AVX512F") (V4DI "TARGET_AVX") V2DI
+ (V16SF "TARGET_AVX512F") (V8SF "TARGET_AVX") V4SF
+ (V8DF "TARGET_AVX512F") (V4DF "TARGET_AVX") (V2DF "TARGET_SSE2")
+ (V4TI "TARGET_AVX512F") (V2TI "TARGET_AVX")])
+
+;; Likewise, but for initialization from half sized vectors.
+;; Thus, these are all VEC_INIT_MODE modes except V2??.
+(define_mode_iterator VEC_INIT_HALF_MODE
+ [(V64QI "TARGET_AVX512F") (V32QI "TARGET_AVX") V16QI
+ (V32HI "TARGET_AVX512F") (V16HI "TARGET_AVX") V8HI
+ (V16SI "TARGET_AVX512F") (V8SI "TARGET_AVX") V4SI
+ (V8DI "TARGET_AVX512F") (V4DI "TARGET_AVX")
+ (V16SF "TARGET_AVX512F") (V8SF "TARGET_AVX") V4SF
+ (V8DF "TARGET_AVX512F") (V4DF "TARGET_AVX")
+ (V4TI "TARGET_AVX512F")])
+
+(define_expand "vec_init<mode><ssescalarmodelower>"
+ [(match_operand:VEC_INIT_MODE 0 "register_operand")
(match_operand 1)]
- "TARGET_AVX"
+ "TARGET_SSE"
{
ix86_expand_vector_init (false, operands[0], operands[1]);
DONE;
})
-(define_expand "vec_init<mode>"
- [(match_operand:VF48_I1248 0 "register_operand")
+(define_expand "vec_init<mode><ssehalfvecmodelower>"
+ [(match_operand:VEC_INIT_HALF_MODE 0 "register_operand")
(match_operand 1)]
- "TARGET_AVX512F"
+ "TARGET_SSE"
{
ix86_expand_vector_init (false, operands[0], operands[1]);
DONE;