aboutsummaryrefslogtreecommitdiff
path: root/gcc/config/i386/sse.md
diff options
context:
space:
mode:
Diffstat (limited to 'gcc/config/i386/sse.md')
-rw-r--r--gcc/config/i386/sse.md1081
1 files changed, 656 insertions, 425 deletions
diff --git a/gcc/config/i386/sse.md b/gcc/config/i386/sse.md
index 42d553cfdaa..745b6b665f5 100644
--- a/gcc/config/i386/sse.md
+++ b/gcc/config/i386/sse.md
@@ -778,6 +778,12 @@
(V64QI "i64x4") (V32QI "%~128") (V32HI "i64x4") (V16HI "%~128")
(V16SI "i64x4") (V8SI "%~128") (V8DI "i64x4") (V4DI "%~128")])
+;; For 256-bit modes for TARGET_AVX512VL && TARGET_AVX512DQ
+;; i32x4, f32x4, i64x2 or f64x2 suffixes.
+(define_mode_attr i128vldq
+ [(V8SF "f32x4") (V4DF "f64x2")
+ (V32QI "i32x4") (V16HI "i32x4") (V8SI "i32x4") (V4DI "i64x2")])
+
;; Mix-n-match
(define_mode_iterator AVX256MODE2P [V8SI V8SF V4DF])
(define_mode_iterator AVX512MODE2P [V16SI V16SF V8DF])
@@ -1076,10 +1082,10 @@
(set_attr "mode" "<sseinsnmode>")])
(define_insn "sse2_movq128"
- [(set (match_operand:V2DI 0 "register_operand" "=x")
+ [(set (match_operand:V2DI 0 "register_operand" "=v")
(vec_concat:V2DI
(vec_select:DI
- (match_operand:V2DI 1 "nonimmediate_operand" "xm")
+ (match_operand:V2DI 1 "nonimmediate_operand" "vm")
(parallel [(const_int 0)]))
(const_int 0)))]
"TARGET_SSE2"
@@ -1485,7 +1491,7 @@
(match_operand:VF_128 2 "register_operand" "v")
(const_int 1)))]
"TARGET_AVX512F"
- "vrcp14<ssescalarmodesuffix>\t{%1, %2, %0|%0, %2, %1}"
+ "vrcp14<ssescalarmodesuffix>\t{%1, %2, %0|%0, %2, %<iptr>1}"
[(set_attr "type" "sse")
(set_attr "prefix" "evex")
(set_attr "mode" "<MODE>")])
@@ -1583,7 +1589,7 @@
(match_operand:VF_128 2 "register_operand" "v")
(const_int 1)))]
"TARGET_AVX512F"
- "vrsqrt14<ssescalarmodesuffix>\t{%1, %2, %0|%0, %2, %1}"
+ "vrsqrt14<ssescalarmodesuffix>\t{%1, %2, %0|%0, %2, %<iptr>1}"
[(set_attr "type" "sse")
(set_attr "prefix" "evex")
(set_attr "mode" "<MODE>")])
@@ -2783,54 +2789,61 @@
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
(define_insn "<sse>_andnot<mode>3<mask_name>"
- [(set (match_operand:VF_128_256 0 "register_operand" "=x,v")
+ [(set (match_operand:VF_128_256 0 "register_operand" "=x,x,v,v")
(and:VF_128_256
(not:VF_128_256
- (match_operand:VF_128_256 1 "register_operand" "0,v"))
- (match_operand:VF_128_256 2 "vector_operand" "xBm,vm")))]
+ (match_operand:VF_128_256 1 "register_operand" "0,x,v,v"))
+ (match_operand:VF_128_256 2 "vector_operand" "xBm,xm,vm,vm")))]
"TARGET_SSE && <mask_avx512vl_condition>"
{
static char buf[128];
const char *ops;
const char *suffix;
- switch (get_attr_mode (insn))
- {
- case MODE_V8SF:
- case MODE_V4SF:
- suffix = "ps";
- break;
- default:
- suffix = "<ssemodesuffix>";
- }
-
switch (which_alternative)
{
case 0:
ops = "andn%s\t{%%2, %%0|%%0, %%2}";
break;
case 1:
+ case 2:
+ case 3:
ops = "vandn%s\t{%%2, %%1, %%0<mask_operand3_1>|%%0<mask_operand3_1>, %%1, %%2}";
break;
default:
gcc_unreachable ();
}
- /* There is no vandnp[sd] in avx512f. Use vpandn[qd]. */
- if (<mask_applied> && !TARGET_AVX512DQ)
+ switch (get_attr_mode (insn))
{
+ case MODE_V8SF:
+ case MODE_V4SF:
+ suffix = "ps";
+ break;
+ case MODE_OI:
+ case MODE_TI:
+ /* There is no vandnp[sd] in avx512f. Use vpandn[qd]. */
suffix = GET_MODE_INNER (<MODE>mode) == DFmode ? "q" : "d";
ops = "vpandn%s\t{%%2, %%1, %%0<mask_operand3_1>|%%0<mask_operand3_1>, %%1, %%2}";
+ break;
+ default:
+ suffix = "<ssemodesuffix>";
}
snprintf (buf, sizeof (buf), ops, suffix);
return buf;
}
- [(set_attr "isa" "noavx,avx")
+ [(set_attr "isa" "noavx,avx,avx512dq,avx512f")
(set_attr "type" "sselog")
- (set_attr "prefix" "orig,maybe_evex")
+ (set_attr "prefix" "orig,maybe_vex,evex,evex")
(set (attr "mode")
- (cond [(and (match_test "<MODE_SIZE> == 16")
+ (cond [(and (match_test "<mask_applied>")
+ (and (eq_attr "alternative" "1")
+ (match_test "!TARGET_AVX512DQ")))
+ (const_string "<sseintvecmode2>")
+ (eq_attr "alternative" "3")
+ (const_string "<sseintvecmode2>")
+ (and (match_test "<MODE_SIZE> == 16")
(match_test "TARGET_SSE_PACKED_SINGLE_INSN_OPTIMAL"))
(const_string "<ssePSmode>")
(match_test "TARGET_AVX")
@@ -2870,7 +2883,10 @@
}
[(set_attr "type" "sselog")
(set_attr "prefix" "evex")
- (set_attr "mode" "<sseinsnmode>")])
+ (set (attr "mode")
+ (if_then_else (match_test "TARGET_AVX512DQ")
+ (const_string "<sseinsnmode>")
+ (const_string "XI")))])
(define_expand "<code><mode>3<mask_name>"
[(set (match_operand:VF_128_256 0 "register_operand")
@@ -2889,10 +2905,10 @@
"ix86_fixup_binary_operands_no_copy (<CODE>, <MODE>mode, operands);")
(define_insn "*<code><mode>3<mask_name>"
- [(set (match_operand:VF_128_256 0 "register_operand" "=x,v")
+ [(set (match_operand:VF_128_256 0 "register_operand" "=x,x,v,v")
(any_logic:VF_128_256
- (match_operand:VF_128_256 1 "vector_operand" "%0,v")
- (match_operand:VF_128_256 2 "vector_operand" "xBm,vm")))]
+ (match_operand:VF_128_256 1 "vector_operand" "%0,x,v,v")
+ (match_operand:VF_128_256 2 "vector_operand" "xBm,xm,vm,vm")))]
"TARGET_SSE && <mask_avx512vl_condition>
&& ix86_binary_operator_ok (<CODE>, <MODE>mode, operands)"
{
@@ -2900,43 +2916,50 @@
const char *ops;
const char *suffix;
- switch (get_attr_mode (insn))
- {
- case MODE_V8SF:
- case MODE_V4SF:
- suffix = "ps";
- break;
- default:
- suffix = "<ssemodesuffix>";
- }
-
switch (which_alternative)
{
case 0:
ops = "<logic>%s\t{%%2, %%0|%%0, %%2}";
break;
case 1:
+ case 2:
+ case 3:
ops = "v<logic>%s\t{%%2, %%1, %%0<mask_operand3_1>|%%0<mask_operand3_1>, %%1, %%2}";
break;
default:
gcc_unreachable ();
}
- /* There is no v<logic>p[sd] in avx512f. Use vp<logic>[dq]. */
- if (<mask_applied> && !TARGET_AVX512DQ)
+ switch (get_attr_mode (insn))
{
+ case MODE_V8SF:
+ case MODE_V4SF:
+ suffix = "ps";
+ break;
+ case MODE_OI:
+ case MODE_TI:
+ /* There is no v<logic>p[sd] in avx512f. Use vp<logic>[qd]. */
suffix = GET_MODE_INNER (<MODE>mode) == DFmode ? "q" : "d";
ops = "vp<logic>%s\t{%%2, %%1, %%0<mask_operand3_1>|%%0<mask_operand3_1>, %%1, %%2}";
+ break;
+ default:
+ suffix = "<ssemodesuffix>";
}
snprintf (buf, sizeof (buf), ops, suffix);
return buf;
}
- [(set_attr "isa" "noavx,avx")
+ [(set_attr "isa" "noavx,avx,avx512dq,avx512f")
(set_attr "type" "sselog")
- (set_attr "prefix" "orig,maybe_evex")
+ (set_attr "prefix" "orig,maybe_evex,evex,evex")
(set (attr "mode")
- (cond [(and (match_test "<MODE_SIZE> == 16")
+ (cond [(and (match_test "<mask_applied>")
+ (and (eq_attr "alternative" "1")
+ (match_test "!TARGET_AVX512DQ")))
+ (const_string "<sseintvecmode2>")
+ (eq_attr "alternative" "3")
+ (const_string "<sseintvecmode2>")
+ (and (match_test "<MODE_SIZE> == 16")
(match_test "TARGET_SSE_PACKED_SINGLE_INSN_OPTIMAL"))
(const_string "<ssePSmode>")
(match_test "TARGET_AVX")
@@ -2961,7 +2984,7 @@
ops = "";
/* There is no v<logic>p[sd] in avx512f. Use vp<logic>[dq]. */
- if ((<MODE_SIZE> == 64 || <mask_applied>) && !TARGET_AVX512DQ)
+ if (!TARGET_AVX512DQ)
{
suffix = GET_MODE_INNER (<MODE>mode) == DFmode ? "q" : "d";
ops = "p";
@@ -2974,7 +2997,10 @@
}
[(set_attr "type" "sselog")
(set_attr "prefix" "evex")
- (set_attr "mode" "<sseinsnmode>")])
+ (set (attr "mode")
+ (if_then_else (match_test "TARGET_AVX512DQ")
+ (const_string "<sseinsnmode>")
+ (const_string "XI")))])
(define_expand "copysign<mode>3"
[(set (match_dup 4)
@@ -3000,14 +3026,14 @@
;; because the native instructions read the full 128-bits.
(define_insn "*andnot<mode>3"
- [(set (match_operand:MODEF 0 "register_operand" "=x,x")
+ [(set (match_operand:MODEF 0 "register_operand" "=x,x,v,v")
(and:MODEF
(not:MODEF
- (match_operand:MODEF 1 "register_operand" "0,x"))
- (match_operand:MODEF 2 "register_operand" "x,x")))]
+ (match_operand:MODEF 1 "register_operand" "0,x,v,v"))
+ (match_operand:MODEF 2 "register_operand" "x,x,v,v")))]
"SSE_FLOAT_MODE_P (<MODE>mode)"
{
- static char buf[32];
+ static char buf[128];
const char *ops;
const char *suffix
= (get_attr_mode (insn) == MODE_V4SF) ? "ps" : "<ssevecmodesuffix>";
@@ -3020,6 +3046,24 @@
case 1:
ops = "vandn%s\t{%%2, %%1, %%0|%%0, %%1, %%2}";
break;
+ case 2:
+ if (TARGET_AVX512DQ)
+ ops = "vandn%s\t{%%2, %%1, %%0|%%0, %%1, %%2}";
+ else
+ {
+ suffix = <MODE>mode == DFmode ? "q" : "d";
+ ops = "vpandn%s\t{%%2, %%1, %%0|%%0, %%1, %%2}";
+ }
+ break;
+ case 3:
+ if (TARGET_AVX512DQ)
+ ops = "vandn%s\t{%%g2, %%g1, %%g0|%%g0, %%g1, %%g2}";
+ else
+ {
+ suffix = <MODE>mode == DFmode ? "q" : "d";
+ ops = "vpandn%s\t{%%g2, %%g1, %%g0|%%g0, %%g1, %%g2}";
+ }
+ break;
default:
gcc_unreachable ();
}
@@ -3027,11 +3071,19 @@
snprintf (buf, sizeof (buf), ops, suffix);
return buf;
}
- [(set_attr "isa" "noavx,avx")
+ [(set_attr "isa" "noavx,avx,avx512vl,avx512f")
(set_attr "type" "sselog")
- (set_attr "prefix" "orig,vex")
+ (set_attr "prefix" "orig,vex,evex,evex")
(set (attr "mode")
- (cond [(and (match_test "<MODE_SIZE> == 16")
+ (cond [(eq_attr "alternative" "2")
+ (if_then_else (match_test "TARGET_AVX512DQ")
+ (const_string "<ssevecmode>")
+ (const_string "TI"))
+ (eq_attr "alternative" "3")
+ (if_then_else (match_test "TARGET_AVX512DQ")
+ (const_string "<avx512fvecmode>")
+ (const_string "XI"))
+ (and (match_test "<MODE_SIZE> == 16")
(match_test "TARGET_SSE_PACKED_SINGLE_INSN_OPTIMAL"))
(const_string "V4SF")
(match_test "TARGET_AVX")
@@ -3042,16 +3094,17 @@
(const_string "<ssevecmode>")))])
(define_insn "*andnottf3"
- [(set (match_operand:TF 0 "register_operand" "=x,x")
+ [(set (match_operand:TF 0 "register_operand" "=x,x,v,v")
(and:TF
- (not:TF (match_operand:TF 1 "register_operand" "0,x"))
- (match_operand:TF 2 "vector_operand" "xBm,xm")))]
+ (not:TF (match_operand:TF 1 "register_operand" "0,x,v,v"))
+ (match_operand:TF 2 "vector_operand" "xBm,xm,vm,v")))]
"TARGET_SSE"
{
- static char buf[32];
+ static char buf[128];
const char *ops;
const char *tmp
- = (get_attr_mode (insn) == MODE_V4SF) ? "andnps" : "pandn";
+ = (which_alternative >= 2 ? "pandnq"
+ : get_attr_mode (insn) == MODE_V4SF ? "andnps" : "pandn");
switch (which_alternative)
{
@@ -3059,8 +3112,12 @@
ops = "%s\t{%%2, %%0|%%0, %%2}";
break;
case 1:
+ case 2:
ops = "v%s\t{%%2, %%1, %%0|%%0, %%1, %%2}";
break;
+ case 3:
+ ops = "v%s\t{%%g2, %%g1, %%g0|%%g0, %%g1, %%g2}";
+ break;
default:
gcc_unreachable ();
}
@@ -3068,7 +3125,7 @@
snprintf (buf, sizeof (buf), ops, tmp);
return buf;
}
- [(set_attr "isa" "noavx,avx")
+ [(set_attr "isa" "noavx,avx,avx512vl,avx512f")
(set_attr "type" "sselog")
(set (attr "prefix_data16")
(if_then_else
@@ -3076,9 +3133,13 @@
(eq_attr "mode" "TI"))
(const_string "1")
(const_string "*")))
- (set_attr "prefix" "orig,vex")
+ (set_attr "prefix" "orig,vex,evex,evex")
(set (attr "mode")
- (cond [(match_test "TARGET_SSE_PACKED_SINGLE_INSN_OPTIMAL")
+ (cond [(eq_attr "alternative" "2")
+ (const_string "TI")
+ (eq_attr "alternative" "3")
+ (const_string "XI")
+ (match_test "TARGET_SSE_PACKED_SINGLE_INSN_OPTIMAL")
(const_string "V4SF")
(match_test "TARGET_AVX")
(const_string "TI")
@@ -3089,13 +3150,13 @@
(const_string "TI")))])
(define_insn "*<code><mode>3"
- [(set (match_operand:MODEF 0 "register_operand" "=x,x")
+ [(set (match_operand:MODEF 0 "register_operand" "=x,x,v,v")
(any_logic:MODEF
- (match_operand:MODEF 1 "register_operand" "%0,x")
- (match_operand:MODEF 2 "register_operand" "x,x")))]
+ (match_operand:MODEF 1 "register_operand" "%0,x,v,v")
+ (match_operand:MODEF 2 "register_operand" "x,x,v,v")))]
"SSE_FLOAT_MODE_P (<MODE>mode)"
{
- static char buf[32];
+ static char buf[128];
const char *ops;
const char *suffix
= (get_attr_mode (insn) == MODE_V4SF) ? "ps" : "<ssevecmodesuffix>";
@@ -3105,9 +3166,26 @@
case 0:
ops = "<logic>%s\t{%%2, %%0|%%0, %%2}";
break;
+ case 2:
+ if (!TARGET_AVX512DQ)
+ {
+ suffix = <MODE>mode == DFmode ? "q" : "d";
+ ops = "vp<logic>%s\t{%%2, %%1, %%0|%%0, %%1, %%2}";
+ break;
+ }
+ /* FALLTHRU */
case 1:
ops = "v<logic>%s\t{%%2, %%1, %%0|%%0, %%1, %%2}";
break;
+ case 3:
+ if (TARGET_AVX512DQ)
+ ops = "v<logic>%s\t{%%g2, %%g1, %%g0|%%g0, %%g1, %%g2}";
+ else
+ {
+ suffix = <MODE>mode == DFmode ? "q" : "d";
+ ops = "vp<logic>%s\t{%%g2, %%g1, %%g0|%%g0, %%g1, %%g2}";
+ }
+ break;
default:
gcc_unreachable ();
}
@@ -3115,11 +3193,19 @@
snprintf (buf, sizeof (buf), ops, suffix);
return buf;
}
- [(set_attr "isa" "noavx,avx")
+ [(set_attr "isa" "noavx,avx,avx512vl,avx512f")
(set_attr "type" "sselog")
- (set_attr "prefix" "orig,vex")
+ (set_attr "prefix" "orig,vex,evex,evex")
(set (attr "mode")
- (cond [(and (match_test "<MODE_SIZE> == 16")
+ (cond [(eq_attr "alternative" "2")
+ (if_then_else (match_test "TARGET_AVX512DQ")
+ (const_string "<ssevecmode>")
+ (const_string "TI"))
+ (eq_attr "alternative" "3")
+ (if_then_else (match_test "TARGET_AVX512DQ")
+ (const_string "<avx512fvecmode>")
+ (const_string "XI"))
+ (and (match_test "<MODE_SIZE> == 16")
(match_test "TARGET_SSE_PACKED_SINGLE_INSN_OPTIMAL"))
(const_string "V4SF")
(match_test "TARGET_AVX")
@@ -3138,17 +3224,18 @@
"ix86_fixup_binary_operands_no_copy (<CODE>, TFmode, operands);")
(define_insn "*<code>tf3"
- [(set (match_operand:TF 0 "register_operand" "=x,x")
+ [(set (match_operand:TF 0 "register_operand" "=x,x,v,v")
(any_logic:TF
- (match_operand:TF 1 "vector_operand" "%0,x")
- (match_operand:TF 2 "vector_operand" "xBm,xm")))]
+ (match_operand:TF 1 "vector_operand" "%0,x,v,v")
+ (match_operand:TF 2 "vector_operand" "xBm,xm,vm,v")))]
"TARGET_SSE
&& ix86_binary_operator_ok (<CODE>, TFmode, operands)"
{
- static char buf[32];
+ static char buf[128];
const char *ops;
const char *tmp
- = (get_attr_mode (insn) == MODE_V4SF) ? "<logic>ps" : "p<logic>";
+ = (which_alternative >= 2 ? "p<logic>q"
+ : get_attr_mode (insn) == MODE_V4SF ? "<logic>ps" : "p<logic>");
switch (which_alternative)
{
@@ -3156,8 +3243,12 @@
ops = "%s\t{%%2, %%0|%%0, %%2}";
break;
case 1:
+ case 2:
ops = "v%s\t{%%2, %%1, %%0|%%0, %%1, %%2}";
break;
+ case 3:
+ ops = "v%s\t{%%g2, %%g1, %%g0|%%g0, %%g1, %%g2}";
+ break;
default:
gcc_unreachable ();
}
@@ -3165,7 +3256,7 @@
snprintf (buf, sizeof (buf), ops, tmp);
return buf;
}
- [(set_attr "isa" "noavx,avx")
+ [(set_attr "isa" "noavx,avx,avx512vl,avx512f")
(set_attr "type" "sselog")
(set (attr "prefix_data16")
(if_then_else
@@ -3173,9 +3264,13 @@
(eq_attr "mode" "TI"))
(const_string "1")
(const_string "*")))
- (set_attr "prefix" "orig,vex")
+ (set_attr "prefix" "orig,vex,evex,evex")
(set (attr "mode")
- (cond [(match_test "TARGET_SSE_PACKED_SINGLE_INSN_OPTIMAL")
+ (cond [(eq_attr "alternative" "2")
+ (const_string "TI")
+ (eq_attr "alternative" "3")
+ (const_string "QI")
+ (match_test "TARGET_SSE_PACKED_SINGLE_INSN_OPTIMAL")
(const_string "V4SF")
(match_test "TARGET_AVX")
(const_string "TI")
@@ -3327,10 +3422,10 @@
(set_attr "mode" "<MODE>")])
(define_insn "<avx512>_fmadd_<mode>_mask3<round_name>"
- [(set (match_operand:VF_AVX512VL 0 "register_operand" "=x")
+ [(set (match_operand:VF_AVX512VL 0 "register_operand" "=v")
(vec_merge:VF_AVX512VL
(fma:VF_AVX512VL
- (match_operand:VF_AVX512VL 1 "register_operand" "x")
+ (match_operand:VF_AVX512VL 1 "register_operand" "v")
(match_operand:VF_AVX512VL 2 "nonimmediate_operand" "<round_constraint>")
(match_operand:VF_AVX512VL 3 "register_operand" "0"))
(match_dup 3)
@@ -4187,7 +4282,7 @@
(parallel [(const_int 0) (const_int 1)]))]
UNSPEC_FIX_NOTRUNC))]
"TARGET_AVX512DQ && TARGET_AVX512VL"
- "vcvtps2qq\t{%1, %0<mask_operand2>|%0<mask_operand2>, %1}"
+ "vcvtps2qq\t{%1, %0<mask_operand2>|%0<mask_operand2>, %q1}"
[(set_attr "type" "ssecvt")
(set_attr "prefix" "evex")
(set_attr "mode" "TI")])
@@ -4210,7 +4305,7 @@
(parallel [(const_int 0) (const_int 1)]))]
UNSPEC_UNSIGNED_FIX_NOTRUNC))]
"TARGET_AVX512DQ && TARGET_AVX512VL"
- "vcvtps2uqq\t{%1, %0<mask_operand2>|%0<mask_operand2>, %1}"
+ "vcvtps2uqq\t{%1, %0<mask_operand2>|%0<mask_operand2>, %q1}"
[(set_attr "type" "ssecvt")
(set_attr "prefix" "evex")
(set_attr "mode" "TI")])
@@ -4735,9 +4830,9 @@
"operands[2] = CONST0_RTX (V4SImode);")
(define_insn "*avx_cvtpd2dq256_2"
- [(set (match_operand:V8SI 0 "register_operand" "=x")
+ [(set (match_operand:V8SI 0 "register_operand" "=v")
(vec_concat:V8SI
- (unspec:V4SI [(match_operand:V4DF 1 "nonimmediate_operand" "xm")]
+ (unspec:V4SI [(match_operand:V4DF 1 "nonimmediate_operand" "vm")]
UNSPEC_FIX_NOTRUNC)
(match_operand:V4SI 2 "const0_operand")))]
"TARGET_AVX"
@@ -4885,7 +4980,7 @@
(match_operand:V4SF 1 "nonimmediate_operand" "vm")
(parallel [(const_int 0) (const_int 1)]))))]
"TARGET_AVX512DQ && TARGET_AVX512VL"
- "vcvttps2<fixsuffix>qq\t{%1, %0<mask_operand2>|%0<mask_operand2>, %1}"
+ "vcvttps2<fixsuffix>qq\t{%1, %0<mask_operand2>|%0<mask_operand2>, %q1}"
[(set_attr "type" "ssecvt")
(set_attr "prefix" "evex")
(set_attr "mode" "TI")])
@@ -4949,6 +5044,27 @@
(set_attr "prefix" "orig,orig,<round_prefix>")
(set_attr "mode" "SF")])
+(define_insn "*sse2_vd_cvtsd2ss"
+ [(set (match_operand:V4SF 0 "register_operand" "=x,x,v")
+ (vec_merge:V4SF
+ (vec_duplicate:V4SF
+ (float_truncate:SF (match_operand:DF 2 "nonimmediate_operand" "x,m,vm")))
+ (match_operand:V4SF 1 "register_operand" "0,0,v")
+ (const_int 1)))]
+ "TARGET_SSE2"
+ "@
+ cvtsd2ss\t{%2, %0|%0, %2}
+ cvtsd2ss\t{%2, %0|%0, %2}
+ vcvtsd2ss\t{%2, %1, %0|%0, %1, %2}"
+ [(set_attr "isa" "noavx,noavx,avx")
+ (set_attr "type" "ssecvt")
+ (set_attr "athlon_decode" "vector,double,*")
+ (set_attr "amdfam10_decode" "vector,double,*")
+ (set_attr "bdver1_decode" "direct,direct,*")
+ (set_attr "btver2_decode" "double,double,double")
+ (set_attr "prefix" "orig,orig,vex")
+ (set_attr "mode" "SF")])
+
(define_insn "sse2_cvtss2sd<round_saeonly_name>"
[(set (match_operand:V2DF 0 "register_operand" "=x,x,v")
(vec_merge:V2DF
@@ -4972,6 +5088,27 @@
(set_attr "prefix" "orig,orig,<round_saeonly_prefix>")
(set_attr "mode" "DF")])
+(define_insn "*sse2_vd_cvtss2sd"
+ [(set (match_operand:V2DF 0 "register_operand" "=x,x,v")
+ (vec_merge:V2DF
+ (vec_duplicate:V2DF
+ (float_extend:DF (match_operand:SF 2 "nonimmediate_operand" "x,m,vm")))
+ (match_operand:V2DF 1 "register_operand" "0,0,v")
+ (const_int 1)))]
+ "TARGET_SSE2"
+ "@
+ cvtss2sd\t{%2, %0|%0, %2}
+ cvtss2sd\t{%2, %0|%0, %2}
+ vcvtss2sd\t{%2, %1, %0|%0, %1, %2}"
+ [(set_attr "isa" "noavx,noavx,avx")
+ (set_attr "type" "ssecvt")
+ (set_attr "amdfam10_decode" "vector,double,*")
+ (set_attr "athlon_decode" "direct,direct,*")
+ (set_attr "bdver1_decode" "direct,direct,*")
+ (set_attr "btver2_decode" "double,double,double")
+ (set_attr "prefix" "orig,orig,vex")
+ (set_attr "mode" "DF")])
+
(define_insn "<mask_codefor>avx512f_cvtpd2ps512<mask_name><round_name>"
[(set (match_operand:V8SF 0 "register_operand" "=v")
(float_truncate:V8SF
@@ -5050,10 +5187,10 @@
(set_attr "mode" "<MODE>")])
(define_insn "*avx_cvtps2pd256_2"
- [(set (match_operand:V4DF 0 "register_operand" "=x")
+ [(set (match_operand:V4DF 0 "register_operand" "=v")
(float_extend:V4DF
(vec_select:V4SF
- (match_operand:V8SF 1 "nonimmediate_operand" "xm")
+ (match_operand:V8SF 1 "nonimmediate_operand" "vm")
(parallel [(const_int 0) (const_int 1)
(const_int 2) (const_int 3)]))))]
"TARGET_AVX"
@@ -5744,11 +5881,11 @@
})
(define_insn "sse_movhlps"
- [(set (match_operand:V4SF 0 "nonimmediate_operand" "=x,x,x,x,m")
+ [(set (match_operand:V4SF 0 "nonimmediate_operand" "=x,v,x,v,m")
(vec_select:V4SF
(vec_concat:V8SF
- (match_operand:V4SF 1 "nonimmediate_operand" " 0,x,0,x,0")
- (match_operand:V4SF 2 "nonimmediate_operand" " x,x,o,o,x"))
+ (match_operand:V4SF 1 "nonimmediate_operand" " 0,v,0,v,0")
+ (match_operand:V4SF 2 "nonimmediate_operand" " x,v,o,o,v"))
(parallel [(const_int 6)
(const_int 7)
(const_int 2)
@@ -5762,7 +5899,7 @@
%vmovhps\t{%2, %0|%q0, %2}"
[(set_attr "isa" "noavx,avx,noavx,avx,*")
(set_attr "type" "ssemov")
- (set_attr "prefix" "orig,vex,orig,vex,maybe_vex")
+ (set_attr "prefix" "orig,maybe_evex,orig,maybe_evex,maybe_vex")
(set_attr "mode" "V4SF,V4SF,V2SF,V2SF,V2SF")])
(define_expand "sse_movlhps_exp"
@@ -5789,11 +5926,11 @@
})
(define_insn "sse_movlhps"
- [(set (match_operand:V4SF 0 "nonimmediate_operand" "=x,x,x,x,o")
+ [(set (match_operand:V4SF 0 "nonimmediate_operand" "=x,v,x,v,o")
(vec_select:V4SF
(vec_concat:V8SF
- (match_operand:V4SF 1 "nonimmediate_operand" " 0,x,0,x,0")
- (match_operand:V4SF 2 "nonimmediate_operand" " x,x,m,m,x"))
+ (match_operand:V4SF 1 "nonimmediate_operand" " 0,v,0,v,0")
+ (match_operand:V4SF 2 "nonimmediate_operand" " x,v,m,v,v"))
(parallel [(const_int 0)
(const_int 1)
(const_int 4)
@@ -5807,7 +5944,7 @@
%vmovlps\t{%2, %H0|%H0, %2}"
[(set_attr "isa" "noavx,avx,noavx,avx,*")
(set_attr "type" "ssemov")
- (set_attr "prefix" "orig,vex,orig,vex,maybe_vex")
+ (set_attr "prefix" "orig,maybe_evex,orig,maybe_evex,maybe_vex")
(set_attr "mode" "V4SF,V4SF,V2SF,V2SF,V2SF")])
(define_insn "<mask_codefor>avx512f_unpckhps512<mask_name>"
@@ -5851,8 +5988,8 @@
[(set (match_dup 3)
(vec_select:V8SF
(vec_concat:V16SF
- (match_operand:V8SF 1 "register_operand" "x")
- (match_operand:V8SF 2 "nonimmediate_operand" "xm"))
+ (match_operand:V8SF 1 "register_operand")
+ (match_operand:V8SF 2 "nonimmediate_operand"))
(parallel [(const_int 0) (const_int 8)
(const_int 1) (const_int 9)
(const_int 4) (const_int 12)
@@ -5956,8 +6093,8 @@
[(set (match_dup 3)
(vec_select:V8SF
(vec_concat:V16SF
- (match_operand:V8SF 1 "register_operand" "x")
- (match_operand:V8SF 2 "nonimmediate_operand" "xm"))
+ (match_operand:V8SF 1 "register_operand")
+ (match_operand:V8SF 2 "nonimmediate_operand"))
(parallel [(const_int 0) (const_int 8)
(const_int 1) (const_int 9)
(const_int 4) (const_int 12)
@@ -5987,11 +6124,11 @@
})
(define_insn "vec_interleave_lowv4sf"
- [(set (match_operand:V4SF 0 "register_operand" "=x,x")
+ [(set (match_operand:V4SF 0 "register_operand" "=x,v")
(vec_select:V4SF
(vec_concat:V8SF
- (match_operand:V4SF 1 "register_operand" "0,x")
- (match_operand:V4SF 2 "vector_operand" "xBm,xm"))
+ (match_operand:V4SF 1 "register_operand" "0,v")
+ (match_operand:V4SF 2 "vector_operand" "xBm,vm"))
(parallel [(const_int 0) (const_int 4)
(const_int 1) (const_int 5)])))]
"TARGET_SSE"
@@ -6000,7 +6137,7 @@
vunpcklps\t{%2, %1, %0|%0, %1, %2}"
[(set_attr "isa" "noavx,avx")
(set_attr "type" "sselog")
- (set_attr "prefix" "orig,vex")
+ (set_attr "prefix" "orig,maybe_evex")
(set_attr "mode" "V4SF")])
;; These are modeled with the same vec_concat as the others so that we
@@ -6219,11 +6356,11 @@
(set_attr "mode" "V4SF")])
(define_insn "sse_shufps_<mode>"
- [(set (match_operand:VI4F_128 0 "register_operand" "=x,x")
+ [(set (match_operand:VI4F_128 0 "register_operand" "=x,v")
(vec_select:VI4F_128
(vec_concat:<ssedoublevecmode>
- (match_operand:VI4F_128 1 "register_operand" "0,x")
- (match_operand:VI4F_128 2 "vector_operand" "xBm,xm"))
+ (match_operand:VI4F_128 1 "register_operand" "0,v")
+ (match_operand:VI4F_128 2 "vector_operand" "xBm,vm"))
(parallel [(match_operand 3 "const_0_to_3_operand")
(match_operand 4 "const_0_to_3_operand")
(match_operand 5 "const_4_to_7_operand")
@@ -6250,13 +6387,13 @@
[(set_attr "isa" "noavx,avx")
(set_attr "type" "sseshuf")
(set_attr "length_immediate" "1")
- (set_attr "prefix" "orig,vex")
+ (set_attr "prefix" "orig,maybe_evex")
(set_attr "mode" "V4SF")])
(define_insn "sse_storehps"
- [(set (match_operand:V2SF 0 "nonimmediate_operand" "=m,x,x")
+ [(set (match_operand:V2SF 0 "nonimmediate_operand" "=m,v,v")
(vec_select:V2SF
- (match_operand:V4SF 1 "nonimmediate_operand" "x,x,o")
+ (match_operand:V4SF 1 "nonimmediate_operand" "v,v,o")
(parallel [(const_int 2) (const_int 3)])))]
"TARGET_SSE"
"@
@@ -6288,12 +6425,12 @@
})
(define_insn "sse_loadhps"
- [(set (match_operand:V4SF 0 "nonimmediate_operand" "=x,x,x,x,o")
+ [(set (match_operand:V4SF 0 "nonimmediate_operand" "=x,v,x,v,o")
(vec_concat:V4SF
(vec_select:V2SF
- (match_operand:V4SF 1 "nonimmediate_operand" " 0,x,0,x,0")
+ (match_operand:V4SF 1 "nonimmediate_operand" " 0,v,0,v,0")
(parallel [(const_int 0) (const_int 1)]))
- (match_operand:V2SF 2 "nonimmediate_operand" " m,m,x,x,x")))]
+ (match_operand:V2SF 2 "nonimmediate_operand" " m,m,x,v,v")))]
"TARGET_SSE"
"@
movhps\t{%2, %0|%0, %q2}
@@ -6303,13 +6440,13 @@
%vmovlps\t{%2, %H0|%H0, %2}"
[(set_attr "isa" "noavx,avx,noavx,avx,*")
(set_attr "type" "ssemov")
- (set_attr "prefix" "orig,vex,orig,vex,maybe_vex")
+ (set_attr "prefix" "orig,maybe_evex,orig,maybe_evex,maybe_vex")
(set_attr "mode" "V2SF,V2SF,V4SF,V4SF,V2SF")])
(define_insn "sse_storelps"
- [(set (match_operand:V2SF 0 "nonimmediate_operand" "=m,x,x")
+ [(set (match_operand:V2SF 0 "nonimmediate_operand" "=m,v,v")
(vec_select:V2SF
- (match_operand:V4SF 1 "nonimmediate_operand" " x,x,m")
+ (match_operand:V4SF 1 "nonimmediate_operand" " v,v,m")
(parallel [(const_int 0) (const_int 1)])))]
"TARGET_SSE"
"@
@@ -6341,11 +6478,11 @@
})
(define_insn "sse_loadlps"
- [(set (match_operand:V4SF 0 "nonimmediate_operand" "=x,x,x,x,m")
+ [(set (match_operand:V4SF 0 "nonimmediate_operand" "=x,v,x,v,m")
(vec_concat:V4SF
- (match_operand:V2SF 2 "nonimmediate_operand" " 0,x,m,m,x")
+ (match_operand:V2SF 2 "nonimmediate_operand" " 0,v,m,m,v")
(vec_select:V2SF
- (match_operand:V4SF 1 "nonimmediate_operand" " x,x,0,x,0")
+ (match_operand:V4SF 1 "nonimmediate_operand" " x,v,0,v,0")
(parallel [(const_int 2) (const_int 3)]))))]
"TARGET_SSE"
"@
@@ -6357,14 +6494,14 @@
[(set_attr "isa" "noavx,avx,noavx,avx,*")
(set_attr "type" "sseshuf,sseshuf,ssemov,ssemov,ssemov")
(set_attr "length_immediate" "1,1,*,*,*")
- (set_attr "prefix" "orig,vex,orig,vex,maybe_vex")
+ (set_attr "prefix" "orig,maybe_evex,orig,maybe_evex,maybe_vex")
(set_attr "mode" "V4SF,V4SF,V2SF,V2SF,V2SF")])
(define_insn "sse_movss"
- [(set (match_operand:V4SF 0 "register_operand" "=x,x")
+ [(set (match_operand:V4SF 0 "register_operand" "=x,v")
(vec_merge:V4SF
- (match_operand:V4SF 2 "register_operand" " x,x")
- (match_operand:V4SF 1 "register_operand" " 0,x")
+ (match_operand:V4SF 2 "register_operand" " x,v")
+ (match_operand:V4SF 1 "register_operand" " 0,v")
(const_int 1)))]
"TARGET_SSE"
"@
@@ -6372,31 +6509,31 @@
vmovss\t{%2, %1, %0|%0, %1, %2}"
[(set_attr "isa" "noavx,avx")
(set_attr "type" "ssemov")
- (set_attr "prefix" "orig,vex")
+ (set_attr "prefix" "orig,maybe_evex")
(set_attr "mode" "SF")])
(define_insn "avx2_vec_dup<mode>"
- [(set (match_operand:VF1_128_256 0 "register_operand" "=x")
+ [(set (match_operand:VF1_128_256 0 "register_operand" "=v")
(vec_duplicate:VF1_128_256
(vec_select:SF
- (match_operand:V4SF 1 "register_operand" "x")
+ (match_operand:V4SF 1 "register_operand" "v")
(parallel [(const_int 0)]))))]
"TARGET_AVX2"
"vbroadcastss\t{%1, %0|%0, %1}"
[(set_attr "type" "sselog1")
- (set_attr "prefix" "vex")
+ (set_attr "prefix" "maybe_evex")
(set_attr "mode" "<MODE>")])
(define_insn "avx2_vec_dupv8sf_1"
- [(set (match_operand:V8SF 0 "register_operand" "=x")
+ [(set (match_operand:V8SF 0 "register_operand" "=v")
(vec_duplicate:V8SF
(vec_select:SF
- (match_operand:V8SF 1 "register_operand" "x")
+ (match_operand:V8SF 1 "register_operand" "v")
(parallel [(const_int 0)]))))]
"TARGET_AVX2"
"vbroadcastss\t{%x1, %0|%0, %x1}"
[(set_attr "type" "sselog1")
- (set_attr "prefix" "vex")
+ (set_attr "prefix" "maybe_evex")
(set_attr "mode" "V8SF")])
(define_insn "avx512f_vec_dup<mode>_1"
@@ -6415,12 +6552,12 @@
;; unpcklps with register source since it is shorter.
(define_insn "*vec_concatv2sf_sse4_1"
[(set (match_operand:V2SF 0 "register_operand"
- "=Yr,*x,x,Yr,*x,x,x,*y ,*y")
+ "=Yr,*x, v,Yr,*x,v,v,*y ,*y")
(vec_concat:V2SF
(match_operand:SF 1 "nonimmediate_operand"
- " 0, 0,x, 0,0, x,m, 0 , m")
+ " 0, 0,Yv, 0,0, v,m, 0 , m")
(match_operand:SF 2 "vector_move_operand"
- " Yr,*x,x, m,m, m,C,*ym, C")))]
+ " Yr,*x,Yv, m,m, m,C,*ym, C")))]
"TARGET_SSE4_1 && !(MEM_P (operands[1]) && MEM_P (operands[2]))"
"@
unpcklps\t{%2, %0|%0, %2}
@@ -6437,7 +6574,7 @@
(set_attr "prefix_data16" "*,*,*,1,1,*,*,*,*")
(set_attr "prefix_extra" "*,*,*,1,1,1,*,*,*")
(set_attr "length_immediate" "*,*,*,1,1,1,*,*,*")
- (set_attr "prefix" "orig,orig,vex,orig,orig,vex,maybe_vex,orig,orig")
+ (set_attr "prefix" "orig,orig,maybe_evex,orig,orig,maybe_evex,maybe_vex,orig,orig")
(set_attr "mode" "V4SF,V4SF,V4SF,V4SF,V4SF,V4SF,SF,DI,DI")])
;; ??? In theory we can match memory for the MMX alternative, but allowing
@@ -6458,10 +6595,10 @@
(set_attr "mode" "V4SF,SF,DI,DI")])
(define_insn "*vec_concatv4sf"
- [(set (match_operand:V4SF 0 "register_operand" "=x,x,x,x")
+ [(set (match_operand:V4SF 0 "register_operand" "=x,v,x,v")
(vec_concat:V4SF
- (match_operand:V2SF 1 "register_operand" " 0,x,0,x")
- (match_operand:V2SF 2 "nonimmediate_operand" " x,x,m,m")))]
+ (match_operand:V2SF 1 "register_operand" " 0,v,0,v")
+ (match_operand:V2SF 2 "nonimmediate_operand" " x,v,m,m")))]
"TARGET_SSE"
"@
movlhps\t{%2, %0|%0, %2}
@@ -6470,7 +6607,7 @@
vmovhps\t{%2, %1, %0|%0, %1, %q2}"
[(set_attr "isa" "noavx,avx,noavx,avx")
(set_attr "type" "ssemov")
- (set_attr "prefix" "orig,vex,orig,vex")
+ (set_attr "prefix" "orig,maybe_evex,orig,maybe_evex")
(set_attr "mode" "V4SF,V4SF,V2SF,V2SF")])
(define_expand "vec_init<mode>"
@@ -6526,11 +6663,11 @@
;; A subset is vec_setv4sf.
(define_insn "*vec_setv4sf_sse4_1"
- [(set (match_operand:V4SF 0 "register_operand" "=Yr,*x,x")
+ [(set (match_operand:V4SF 0 "register_operand" "=Yr,*x,v")
(vec_merge:V4SF
(vec_duplicate:V4SF
- (match_operand:SF 2 "nonimmediate_operand" "Yrm,*xm,xm"))
- (match_operand:V4SF 1 "register_operand" "0,0,x")
+ (match_operand:SF 2 "nonimmediate_operand" "Yrm,*xm,vm"))
+ (match_operand:V4SF 1 "register_operand" "0,0,v")
(match_operand:SI 3 "const_int_operand")))]
"TARGET_SSE4_1
&& ((unsigned) exact_log2 (INTVAL (operands[3]))
@@ -6553,13 +6690,13 @@
(set_attr "prefix_data16" "1,1,*")
(set_attr "prefix_extra" "1")
(set_attr "length_immediate" "1")
- (set_attr "prefix" "orig,orig,vex")
+ (set_attr "prefix" "orig,orig,maybe_evex")
(set_attr "mode" "V4SF")])
(define_insn "sse4_1_insertps"
- [(set (match_operand:V4SF 0 "register_operand" "=Yr,*x,x")
- (unspec:V4SF [(match_operand:V4SF 2 "nonimmediate_operand" "Yrm,*xm,xm")
- (match_operand:V4SF 1 "register_operand" "0,0,x")
+ [(set (match_operand:V4SF 0 "register_operand" "=Yr,*x,v")
+ (unspec:V4SF [(match_operand:V4SF 2 "nonimmediate_operand" "Yrm,*xm,vm")
+ (match_operand:V4SF 1 "register_operand" "0,0,v")
(match_operand:SI 3 "const_0_to_255_operand" "n,n,n")]
UNSPEC_INSERTPS))]
"TARGET_SSE4_1"
@@ -6587,7 +6724,7 @@
(set_attr "prefix_data16" "1,1,*")
(set_attr "prefix_extra" "1")
(set_attr "length_immediate" "1")
- (set_attr "prefix" "orig,orig,vex")
+ (set_attr "prefix" "orig,orig,maybe_evex")
(set_attr "mode" "V4SF")])
(define_split
@@ -6613,9 +6750,9 @@
})
(define_insn_and_split "*vec_extractv4sf_0"
- [(set (match_operand:SF 0 "nonimmediate_operand" "=x,m,f,r")
+ [(set (match_operand:SF 0 "nonimmediate_operand" "=v,m,f,r")
(vec_select:SF
- (match_operand:V4SF 1 "nonimmediate_operand" "xm,x,m,m")
+ (match_operand:V4SF 1 "nonimmediate_operand" "vm,v,m,m")
(parallel [(const_int 0)])))]
"TARGET_SSE && !(MEM_P (operands[0]) && MEM_P (operands[1]))"
"#"
@@ -6624,9 +6761,9 @@
"operands[1] = gen_lowpart (SFmode, operands[1]);")
(define_insn_and_split "*sse4_1_extractps"
- [(set (match_operand:SF 0 "nonimmediate_operand" "=rm,rm,x,x")
+ [(set (match_operand:SF 0 "nonimmediate_operand" "=rm,rm,v,v")
(vec_select:SF
- (match_operand:V4SF 1 "register_operand" "Yr,*x,0,x")
+ (match_operand:V4SF 1 "register_operand" "Yr,*v,0,v")
(parallel [(match_operand:SI 2 "const_0_to_3_operand" "n,n,n,n")])))]
"TARGET_SSE4_1"
"@
@@ -6665,7 +6802,7 @@
(set_attr "mode" "V4SF,V4SF,*,*")])
(define_insn_and_split "*vec_extractv4sf_mem"
- [(set (match_operand:SF 0 "register_operand" "=x,*r,f")
+ [(set (match_operand:SF 0 "register_operand" "=v,*r,f")
(vec_select:SF
(match_operand:V4SF 1 "memory_operand" "o,o,o")
(parallel [(match_operand 2 "const_0_to_3_operand" "n,n,n")])))]
@@ -7239,9 +7376,9 @@
(set_attr "mode" "XI")])
(define_insn_and_split "vec_extract_lo_v16hi"
- [(set (match_operand:V8HI 0 "nonimmediate_operand" "=x,m")
+ [(set (match_operand:V8HI 0 "nonimmediate_operand" "=v,m")
(vec_select:V8HI
- (match_operand:V16HI 1 "nonimmediate_operand" "xm,x")
+ (match_operand:V16HI 1 "nonimmediate_operand" "vm,v")
(parallel [(const_int 0) (const_int 1)
(const_int 2) (const_int 3)
(const_int 4) (const_int 5)
@@ -7253,20 +7390,27 @@
"operands[1] = gen_lowpart (V8HImode, operands[1]);")
(define_insn "vec_extract_hi_v16hi"
- [(set (match_operand:V8HI 0 "nonimmediate_operand" "=x,m")
+ [(set (match_operand:V8HI 0 "nonimmediate_operand" "=x,m,v,m,v,m")
(vec_select:V8HI
- (match_operand:V16HI 1 "register_operand" "x,x")
+ (match_operand:V16HI 1 "register_operand" "x,x,v,v,v,v")
(parallel [(const_int 8) (const_int 9)
(const_int 10) (const_int 11)
(const_int 12) (const_int 13)
(const_int 14) (const_int 15)])))]
"TARGET_AVX"
- "vextract%~128\t{$0x1, %1, %0|%0, %1, 0x1}"
+ "@
+ vextract%~128\t{$0x1, %1, %0|%0, %1, 0x1}
+ vextract%~128\t{$0x1, %1, %0|%0, %1, 0x1}
+ vextracti32x4\t{$0x1, %1, %0|%0, %1, 0x1}
+ vextracti32x4\t{$0x1, %1, %0|%0, %1, 0x1}
+ vextracti32x4\t{$0x1, %g1, %0|%0, %g1, 0x1}
+ vextracti32x4\t{$0x1, %g1, %0|%0, %g1, 0x1}"
[(set_attr "type" "sselog")
(set_attr "prefix_extra" "1")
(set_attr "length_immediate" "1")
- (set_attr "memory" "none,store")
- (set_attr "prefix" "vex")
+ (set_attr "isa" "*,*,avx512dq,avx512dq,avx512f,avx512f")
+ (set_attr "memory" "none,store,none,store,none,store")
+ (set_attr "prefix" "vex,vex,evex,evex,evex,evex")
(set_attr "mode" "OI")])
(define_insn_and_split "vec_extract_lo_v64qi"
@@ -7325,9 +7469,9 @@
(set_attr "mode" "XI")])
(define_insn_and_split "vec_extract_lo_v32qi"
- [(set (match_operand:V16QI 0 "nonimmediate_operand" "=x,m")
+ [(set (match_operand:V16QI 0 "nonimmediate_operand" "=v,m")
(vec_select:V16QI
- (match_operand:V32QI 1 "nonimmediate_operand" "xm,x")
+ (match_operand:V32QI 1 "nonimmediate_operand" "vm,v")
(parallel [(const_int 0) (const_int 1)
(const_int 2) (const_int 3)
(const_int 4) (const_int 5)
@@ -7343,9 +7487,9 @@
"operands[1] = gen_lowpart (V16QImode, operands[1]);")
(define_insn "vec_extract_hi_v32qi"
- [(set (match_operand:V16QI 0 "nonimmediate_operand" "=x,m")
+ [(set (match_operand:V16QI 0 "nonimmediate_operand" "=x,m,v,m,v,m")
(vec_select:V16QI
- (match_operand:V32QI 1 "register_operand" "x,x")
+ (match_operand:V32QI 1 "register_operand" "x,x,v,v,v,v")
(parallel [(const_int 16) (const_int 17)
(const_int 18) (const_int 19)
(const_int 20) (const_int 21)
@@ -7355,12 +7499,19 @@
(const_int 28) (const_int 29)
(const_int 30) (const_int 31)])))]
"TARGET_AVX"
- "vextract%~128\t{$0x1, %1, %0|%0, %1, 0x1}"
+ "@
+ vextract%~128\t{$0x1, %1, %0|%0, %1, 0x1}
+ vextract%~128\t{$0x1, %1, %0|%0, %1, 0x1}
+ vextracti32x4\t{$0x1, %1, %0|%0, %1, 0x1}
+ vextracti32x4\t{$0x1, %1, %0|%0, %1, 0x1}
+ vextracti32x4\t{$0x1, %g1, %0|%0, %g1, 0x1}
+ vextracti32x4\t{$0x1, %g1, %0|%0, %g1, 0x1}"
[(set_attr "type" "sselog")
(set_attr "prefix_extra" "1")
(set_attr "length_immediate" "1")
- (set_attr "memory" "none,store")
- (set_attr "prefix" "vex")
+ (set_attr "isa" "*,*,avx512dq,avx512dq,avx512f,avx512f")
+ (set_attr "memory" "none,store,none,store,none,store")
+ (set_attr "prefix" "vex,vex,evex,evex,evex,evex")
(set_attr "mode" "OI")])
;; Modes handled by vec_extract patterns.
@@ -7424,8 +7575,8 @@
[(set (match_dup 3)
(vec_select:V4DF
(vec_concat:V8DF
- (match_operand:V4DF 1 "register_operand" "x")
- (match_operand:V4DF 2 "nonimmediate_operand" "xm"))
+ (match_operand:V4DF 1 "register_operand")
+ (match_operand:V4DF 2 "nonimmediate_operand"))
(parallel [(const_int 0) (const_int 4)
(const_int 2) (const_int 6)])))
(set (match_dup 4)
@@ -7480,11 +7631,11 @@
})
(define_insn "*vec_interleave_highv2df"
- [(set (match_operand:V2DF 0 "nonimmediate_operand" "=x,x,x,x,x,m")
+ [(set (match_operand:V2DF 0 "nonimmediate_operand" "=x,v,v,x,v,m")
(vec_select:V2DF
(vec_concat:V4DF
- (match_operand:V2DF 1 "nonimmediate_operand" " 0,x,o,o,o,x")
- (match_operand:V2DF 2 "nonimmediate_operand" " x,x,1,0,x,0"))
+ (match_operand:V2DF 1 "nonimmediate_operand" " 0,v,o,o,o,v")
+ (match_operand:V2DF 2 "nonimmediate_operand" " x,v,1,0,v,0"))
(parallel [(const_int 1)
(const_int 3)])))]
"TARGET_SSE2 && ix86_vec_interleave_v2df_operator_ok (operands, 1)"
@@ -7498,7 +7649,7 @@
[(set_attr "isa" "noavx,avx,sse3,noavx,avx,*")
(set_attr "type" "sselog,sselog,sselog,ssemov,ssemov,ssemov")
(set_attr "prefix_data16" "*,*,*,1,*,1")
- (set_attr "prefix" "orig,vex,maybe_vex,orig,vex,maybe_vex")
+ (set_attr "prefix" "orig,maybe_evex,maybe_vex,orig,maybe_evex,maybe_vex")
(set_attr "mode" "V2DF,V2DF,DF,V1DF,V1DF,V1DF")])
(define_expand "avx512f_movddup512<mask_name>"
@@ -7584,8 +7735,8 @@
[(set (match_dup 3)
(vec_select:V4DF
(vec_concat:V8DF
- (match_operand:V4DF 1 "register_operand" "x")
- (match_operand:V4DF 2 "nonimmediate_operand" "xm"))
+ (match_operand:V4DF 1 "register_operand")
+ (match_operand:V4DF 2 "nonimmediate_operand"))
(parallel [(const_int 0) (const_int 4)
(const_int 2) (const_int 6)])))
(set (match_dup 4)
@@ -7639,11 +7790,11 @@
})
(define_insn "*vec_interleave_lowv2df"
- [(set (match_operand:V2DF 0 "nonimmediate_operand" "=x,x,x,x,x,o")
+ [(set (match_operand:V2DF 0 "nonimmediate_operand" "=x,v,v,x,v,o")
(vec_select:V2DF
(vec_concat:V4DF
- (match_operand:V2DF 1 "nonimmediate_operand" " 0,x,m,0,x,0")
- (match_operand:V2DF 2 "nonimmediate_operand" " x,x,1,m,m,x"))
+ (match_operand:V2DF 1 "nonimmediate_operand" " 0,v,m,0,v,0")
+ (match_operand:V2DF 2 "nonimmediate_operand" " x,v,1,m,m,v"))
(parallel [(const_int 0)
(const_int 2)])))]
"TARGET_SSE2 && ix86_vec_interleave_v2df_operator_ok (operands, 0)"
@@ -7657,7 +7808,7 @@
[(set_attr "isa" "noavx,avx,sse3,noavx,avx,*")
(set_attr "type" "sselog,sselog,sselog,ssemov,ssemov,ssemov")
(set_attr "prefix_data16" "*,*,*,1,*,1")
- (set_attr "prefix" "orig,vex,maybe_vex,orig,vex,maybe_vex")
+ (set_attr "prefix" "orig,maybe_evex,maybe_vex,orig,maybe_evex,maybe_vex")
(set_attr "mode" "V2DF,V2DF,DF,V1DF,V1DF,V1DF")])
(define_split
@@ -8248,11 +8399,11 @@
(set_attr "mode" "TI")])
(define_insn "sse2_shufpd_<mode>"
- [(set (match_operand:VI8F_128 0 "register_operand" "=x,x")
+ [(set (match_operand:VI8F_128 0 "register_operand" "=x,v")
(vec_select:VI8F_128
(vec_concat:<ssedoublevecmode>
- (match_operand:VI8F_128 1 "register_operand" "0,x")
- (match_operand:VI8F_128 2 "vector_operand" "xBm,xm"))
+ (match_operand:VI8F_128 1 "register_operand" "0,v")
+ (match_operand:VI8F_128 2 "vector_operand" "xBm,vm"))
(parallel [(match_operand 3 "const_0_to_1_operand")
(match_operand 4 "const_2_to_3_operand")])))]
"TARGET_SSE2"
@@ -8275,15 +8426,15 @@
[(set_attr "isa" "noavx,avx")
(set_attr "type" "sseshuf")
(set_attr "length_immediate" "1")
- (set_attr "prefix" "orig,vex")
+ (set_attr "prefix" "orig,maybe_evex")
(set_attr "mode" "V2DF")])
;; Avoid combining registers from different units in a single alternative,
;; see comment above inline_secondary_memory_needed function in i386.c
(define_insn "sse2_storehpd"
- [(set (match_operand:DF 0 "nonimmediate_operand" "=m,x,x,x,*f,r")
+ [(set (match_operand:DF 0 "nonimmediate_operand" "=m,x,Yv,x,*f,r")
(vec_select:DF
- (match_operand:V2DF 1 "nonimmediate_operand" " x,0,x,o,o,o")
+ (match_operand:V2DF 1 "nonimmediate_operand" " v,0, v,o,o,o")
(parallel [(const_int 1)])))]
"TARGET_SSE2 && !(MEM_P (operands[0]) && MEM_P (operands[1]))"
"@
@@ -8301,7 +8452,7 @@
(not (match_test "TARGET_AVX")))
(const_string "1")
(const_string "*")))
- (set_attr "prefix" "maybe_vex,orig,vex,*,*,*")
+ (set_attr "prefix" "maybe_vex,orig,maybe_evex,*,*,*")
(set_attr "mode" "V1DF,V1DF,V2DF,DF,DF,DF")])
(define_split
@@ -8332,7 +8483,7 @@
(define_insn "sse2_storelpd"
[(set (match_operand:DF 0 "nonimmediate_operand" "=m,x,x,*f,r")
(vec_select:DF
- (match_operand:V2DF 1 "nonimmediate_operand" " x,x,m,m,m")
+ (match_operand:V2DF 1 "nonimmediate_operand" " v,x,m,m,m")
(parallel [(const_int 0)])))]
"TARGET_SSE2 && !(MEM_P (operands[0]) && MEM_P (operands[1]))"
"@
@@ -8393,14 +8544,14 @@
;; see comment above inline_secondary_memory_needed function in i386.c
(define_insn "sse2_loadhpd"
[(set (match_operand:V2DF 0 "nonimmediate_operand"
- "=x,x,x,x,o,o ,o")
+ "=x,v,x,v,o,o ,o")
(vec_concat:V2DF
(vec_select:DF
(match_operand:V2DF 1 "nonimmediate_operand"
- " 0,x,0,x,0,0 ,0")
+ " 0,v,0,v,0,0 ,0")
(parallel [(const_int 0)]))
(match_operand:DF 2 "nonimmediate_operand"
- " m,m,x,x,x,*f,r")))]
+ " m,m,x,v,x,*f,r")))]
"TARGET_SSE2 && !(MEM_P (operands[1]) && MEM_P (operands[2]))"
"@
movhpd\t{%2, %0|%0, %2}
@@ -8413,7 +8564,7 @@
[(set_attr "isa" "noavx,avx,noavx,avx,*,*,*")
(set_attr "type" "ssemov,ssemov,sselog,sselog,ssemov,fmov,imov")
(set_attr "prefix_data16" "1,*,*,*,*,*,*")
- (set_attr "prefix" "orig,vex,orig,vex,*,*,*")
+ (set_attr "prefix" "orig,maybe_evex,orig,maybe_evex,*,*,*")
(set_attr "mode" "V1DF,V1DF,V2DF,V2DF,DF,DF,DF")])
(define_split
@@ -8449,13 +8600,13 @@
;; see comment above inline_secondary_memory_needed function in i386.c
(define_insn "sse2_loadlpd"
[(set (match_operand:V2DF 0 "nonimmediate_operand"
- "=x,x,x,x,x,x,x,x,m,m ,m")
+ "=v,x,v,x,v,x,x,v,m,m ,m")
(vec_concat:V2DF
(match_operand:DF 2 "nonimmediate_operand"
- "xm,m,m,x,x,0,0,x,x,*f,r")
+ "vm,m,m,x,v,0,0,v,x,*f,r")
(vec_select:DF
(match_operand:V2DF 1 "vector_move_operand"
- " C,0,x,0,x,x,o,o,0,0 ,0")
+ " C,0,v,0,v,x,o,o,0,0 ,0")
(parallel [(const_int 1)]))))]
"TARGET_SSE2 && !(MEM_P (operands[1]) && MEM_P (operands[2]))"
"@
@@ -8482,7 +8633,7 @@
(const_string "ssemov")))
(set_attr "prefix_data16" "*,1,*,*,*,*,1,*,*,*,*")
(set_attr "length_immediate" "*,*,*,*,*,1,*,*,*,*,*")
- (set_attr "prefix" "maybe_vex,orig,vex,orig,vex,orig,orig,vex,*,*,*")
+ (set_attr "prefix" "maybe_vex,orig,maybe_evex,orig,maybe_evex,orig,orig,maybe_evex,*,*,*")
(set_attr "mode" "DF,V1DF,V1DF,V1DF,V1DF,V2DF,V1DF,V1DF,DF,DF,DF")])
(define_split
@@ -8495,10 +8646,10 @@
"operands[0] = adjust_address (operands[0], DFmode, 0);")
(define_insn "sse2_movsd"
- [(set (match_operand:V2DF 0 "nonimmediate_operand" "=x,x,x,x,m,x,x,x,o")
+ [(set (match_operand:V2DF 0 "nonimmediate_operand" "=x,v,x,v,m,x,x,v,o")
(vec_merge:V2DF
- (match_operand:V2DF 2 "nonimmediate_operand" " x,x,m,m,x,0,0,x,0")
- (match_operand:V2DF 1 "nonimmediate_operand" " 0,x,0,x,0,x,o,o,x")
+ (match_operand:V2DF 2 "nonimmediate_operand" " x,v,m,m,v,0,0,v,0")
+ (match_operand:V2DF 1 "nonimmediate_operand" " 0,v,0,v,0,x,o,o,v")
(const_int 1)))]
"TARGET_SSE2"
"@
@@ -8524,7 +8675,7 @@
(const_string "1")
(const_string "*")))
(set_attr "length_immediate" "*,*,*,*,*,1,*,*,*")
- (set_attr "prefix" "orig,vex,orig,vex,maybe_vex,orig,orig,vex,maybe_vex")
+ (set_attr "prefix" "orig,maybe_evex,orig,maybe_evex,maybe_vex,orig,orig,maybe_evex,maybe_vex")
(set_attr "mode" "DF,DF,V1DF,V1DF,V1DF,V2DF,V1DF,V1DF,V1DF")])
(define_insn "vec_dupv2df<mask_name>"
@@ -8799,7 +8950,7 @@
(const_int 12) (const_int 13)
(const_int 14) (const_int 15)]))))]
"TARGET_AVX512VL"
- "vpmov<trunsuffix>qb\t{%1, %0%{%2%}|%0%{%2%}, %1}"
+ "vpmov<trunsuffix>qb\t{%1, %0%{%2%}|%w0%{%2%}, %1}"
[(set_attr "type" "ssemov")
(set_attr "memory" "store")
(set_attr "prefix" "evex")
@@ -8889,7 +9040,11 @@
(const_int 12) (const_int 13)
(const_int 14) (const_int 15)]))))]
"TARGET_AVX512VL"
- "vpmov<trunsuffix><pmov_suff_3>\t{%1, %0%{%2%}|%0%{%2%}, %1}"
+{
+ if (GET_MODE_SIZE (GET_MODE_INNER (<MODE>mode)) == 8)
+ return "vpmov<trunsuffix><pmov_suff_3>\t{%1, %0%{%2%}|%k0%{%2%}, %1}";
+ return "vpmov<trunsuffix><pmov_suff_3>\t{%1, %0%{%2%}|%0%{%2%}, %g1}";
+}
[(set_attr "type" "ssemov")
(set_attr "memory" "store")
(set_attr "prefix" "evex")
@@ -8980,7 +9135,11 @@
(const_int 12) (const_int 13)
(const_int 14) (const_int 15)]))))]
"TARGET_AVX512VL"
- "vpmov<trunsuffix><pmov_suff_3>\t{%1, %0%{%2%}|%0%{%2%}, %1}"
+{
+ if (GET_MODE_SIZE (GET_MODE_INNER (<MODE>mode)) == 4)
+ return "vpmov<trunsuffix><pmov_suff_3>\t{%1, %0%{%2%}|%0%{%2%}, %g1}";
+ return "vpmov<trunsuffix><pmov_suff_3>\t{%1, %0%{%2%}|%0%{%2%}, %1}";
+}
[(set_attr "type" "ssemov")
(set_attr "memory" "store")
(set_attr "prefix" "evex")
@@ -9074,7 +9233,11 @@
(parallel [(const_int 4) (const_int 5)
(const_int 6) (const_int 7)]))))]
"TARGET_AVX512VL"
- "vpmov<trunsuffix><pmov_suff_4>\t{%1, %0%{%2%}|%0%{%2%}, %1}"
+{
+ if (GET_MODE_SIZE (GET_MODE_INNER (<MODE>mode)) == 4)
+ return "vpmov<trunsuffix><pmov_suff_4>\t{%1, %0%{%2%}|%0%{%2%}, %t1}";
+ return "vpmov<trunsuffix><pmov_suff_4>\t{%1, %0%{%2%}|%0%{%2%}, %g1}";
+}
[(set_attr "type" "ssemov")
(set_attr "memory" "store")
(set_attr "prefix" "evex")
@@ -9149,7 +9312,7 @@
(const_int 4) (const_int 5)
(const_int 6) (const_int 7)]))))]
"TARGET_AVX512VL"
- "vpmov<trunsuffix>qw\t{%1, %0%{%2%}|%0%{%2%}, %1}"
+ "vpmov<trunsuffix>qw\t{%1, %0%{%2%}|%0%{%2%}, %g1}"
[(set_attr "type" "ssemov")
(set_attr "memory" "store")
(set_attr "prefix" "evex")
@@ -9228,7 +9391,7 @@
(match_dup 0)
(parallel [(const_int 2) (const_int 3)]))))]
"TARGET_AVX512VL"
- "vpmov<trunsuffix>qd\t{%1, %0%{%2%}|%0%{%2%}, %1}"
+ "vpmov<trunsuffix>qd\t{%1, %0%{%2%}|%0%{%2%}, %t1}"
[(set_attr "type" "ssemov")
(set_attr "memory" "store")
(set_attr "prefix" "evex")
@@ -9331,7 +9494,7 @@
(const_int 12) (const_int 13)
(const_int 14) (const_int 15)]))))]
"TARGET_AVX512F"
- "vpmov<trunsuffix>qb\t{%1, %0%{%2%}|%0%{%2%}, %1}"
+ "vpmov<trunsuffix>qb\t{%1, %0%{%2%}|%q0%{%2%}, %1}"
[(set_attr "type" "ssemov")
(set_attr "memory" "store")
(set_attr "prefix" "evex")
@@ -9803,19 +9966,19 @@
"ix86_fixup_binary_operands_no_copy (MULT, V16HImode, operands);")
(define_insn "*avx2_pmaddwd"
- [(set (match_operand:V8SI 0 "register_operand" "=x")
+ [(set (match_operand:V8SI 0 "register_operand" "=x,v")
(plus:V8SI
(mult:V8SI
(sign_extend:V8SI
(vec_select:V8HI
- (match_operand:V16HI 1 "nonimmediate_operand" "%x")
+ (match_operand:V16HI 1 "nonimmediate_operand" "%x,v")
(parallel [(const_int 0) (const_int 2)
(const_int 4) (const_int 6)
(const_int 8) (const_int 10)
(const_int 12) (const_int 14)])))
(sign_extend:V8SI
(vec_select:V8HI
- (match_operand:V16HI 2 "nonimmediate_operand" "xm")
+ (match_operand:V16HI 2 "nonimmediate_operand" "xm,vm")
(parallel [(const_int 0) (const_int 2)
(const_int 4) (const_int 6)
(const_int 8) (const_int 10)
@@ -9836,7 +9999,8 @@
"TARGET_AVX2 && ix86_binary_operator_ok (MULT, V16HImode, operands)"
"vpmaddwd\t{%2, %1, %0|%0, %1, %2}"
[(set_attr "type" "sseiadd")
- (set_attr "prefix" "vex")
+ (set_attr "isa" "*,avx512bw")
+ (set_attr "prefix" "vex,evex")
(set_attr "mode" "OI")])
(define_expand "sse2_pmaddwd"
@@ -9866,17 +10030,17 @@
"ix86_fixup_binary_operands_no_copy (MULT, V8HImode, operands);")
(define_insn "*sse2_pmaddwd"
- [(set (match_operand:V4SI 0 "register_operand" "=x,x")
+ [(set (match_operand:V4SI 0 "register_operand" "=x,x,v")
(plus:V4SI
(mult:V4SI
(sign_extend:V4SI
(vec_select:V4HI
- (match_operand:V8HI 1 "vector_operand" "%0,x")
+ (match_operand:V8HI 1 "vector_operand" "%0,x,v")
(parallel [(const_int 0) (const_int 2)
(const_int 4) (const_int 6)])))
(sign_extend:V4SI
(vec_select:V4HI
- (match_operand:V8HI 2 "vector_operand" "xBm,xm")
+ (match_operand:V8HI 2 "vector_operand" "xBm,xm,vm")
(parallel [(const_int 0) (const_int 2)
(const_int 4) (const_int 6)]))))
(mult:V4SI
@@ -9891,12 +10055,13 @@
"TARGET_SSE2 && ix86_binary_operator_ok (MULT, V8HImode, operands)"
"@
pmaddwd\t{%2, %0|%0, %2}
+ vpmaddwd\t{%2, %1, %0|%0, %1, %2}
vpmaddwd\t{%2, %1, %0|%0, %1, %2}"
- [(set_attr "isa" "noavx,avx")
+ [(set_attr "isa" "noavx,avx,avx512bw")
(set_attr "type" "sseiadd")
(set_attr "atom_unit" "simul")
- (set_attr "prefix_data16" "1,*")
- (set_attr "prefix" "orig,vex")
+ (set_attr "prefix_data16" "1,*,*")
+ (set_attr "prefix" "orig,vex,evex")
(set_attr "mode" "TI")])
(define_insn "avx512dq_mul<mode>3<mask_name>"
@@ -10072,6 +10237,20 @@
DONE;
})
+(define_insn "<mask_codefor>ashr<mode>3<mask_name>"
+ [(set (match_operand:VI24_AVX512BW_1 0 "register_operand" "=v,v")
+ (ashiftrt:VI24_AVX512BW_1
+ (match_operand:VI24_AVX512BW_1 1 "nonimmediate_operand" "v,vm")
+ (match_operand:SI 2 "nonmemory_operand" "v,N")))]
+ "TARGET_AVX512VL"
+ "vpsra<ssemodesuffix>\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}"
+ [(set_attr "type" "sseishft")
+ (set (attr "length_immediate")
+ (if_then_else (match_operand 2 "const_int_operand")
+ (const_string "1")
+ (const_string "0")))
+ (set_attr "mode" "<sseinsnmode>")])
+
(define_insn "ashr<mode>3"
[(set (match_operand:VI24_AVX2 0 "register_operand" "=x,x")
(ashiftrt:VI24_AVX2
@@ -10091,20 +10270,6 @@
(set_attr "prefix" "orig,vex")
(set_attr "mode" "<sseinsnmode>")])
-(define_insn "<mask_codefor>ashr<mode>3<mask_name>"
- [(set (match_operand:VI24_AVX512BW_1 0 "register_operand" "=v,v")
- (ashiftrt:VI24_AVX512BW_1
- (match_operand:VI24_AVX512BW_1 1 "nonimmediate_operand" "v,vm")
- (match_operand:SI 2 "nonmemory_operand" "v,N")))]
- "TARGET_AVX512VL"
- "vpsra<ssemodesuffix>\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}"
- [(set_attr "type" "sseishft")
- (set (attr "length_immediate")
- (if_then_else (match_operand 2 "const_int_operand")
- (const_string "1")
- (const_string "0")))
- (set_attr "mode" "<sseinsnmode>")])
-
(define_insn "<mask_codefor>ashrv2di3<mask_name>"
[(set (match_operand:V2DI 0 "register_operand" "=v,v")
(ashiftrt:V2DI
@@ -10442,19 +10607,20 @@
(set_attr "mode" "TI")])
(define_insn "*<code>v8hi3"
- [(set (match_operand:V8HI 0 "register_operand" "=x,x")
+ [(set (match_operand:V8HI 0 "register_operand" "=x,x,v")
(smaxmin:V8HI
- (match_operand:V8HI 1 "vector_operand" "%0,x")
- (match_operand:V8HI 2 "vector_operand" "xBm,xm")))]
+ (match_operand:V8HI 1 "vector_operand" "%0,x,v")
+ (match_operand:V8HI 2 "vector_operand" "xBm,xm,vm")))]
"TARGET_SSE2 && ix86_binary_operator_ok (<CODE>, V8HImode, operands)"
"@
p<maxmin_int>w\t{%2, %0|%0, %2}
+ vp<maxmin_int>w\t{%2, %1, %0|%0, %1, %2}
vp<maxmin_int>w\t{%2, %1, %0|%0, %1, %2}"
- [(set_attr "isa" "noavx,avx")
+ [(set_attr "isa" "noavx,avx,avx512bw")
(set_attr "type" "sseiadd")
- (set_attr "prefix_data16" "1,*")
- (set_attr "prefix_extra" "*,1")
- (set_attr "prefix" "orig,vex")
+ (set_attr "prefix_data16" "1,*,*")
+ (set_attr "prefix_extra" "*,1,1")
+ (set_attr "prefix" "orig,vex,evex")
(set_attr "mode" "TI")])
(define_expand "<code><mode>3"
@@ -10526,19 +10692,20 @@
(set_attr "mode" "TI")])
(define_insn "*<code>v16qi3"
- [(set (match_operand:V16QI 0 "register_operand" "=x,x")
+ [(set (match_operand:V16QI 0 "register_operand" "=x,x,v")
(umaxmin:V16QI
- (match_operand:V16QI 1 "vector_operand" "%0,x")
- (match_operand:V16QI 2 "vector_operand" "xBm,xm")))]
+ (match_operand:V16QI 1 "vector_operand" "%0,x,v")
+ (match_operand:V16QI 2 "vector_operand" "xBm,xm,vm")))]
"TARGET_SSE2 && ix86_binary_operator_ok (<CODE>, V16QImode, operands)"
"@
p<maxmin_int>b\t{%2, %0|%0, %2}
+ vp<maxmin_int>b\t{%2, %1, %0|%0, %1, %2}
vp<maxmin_int>b\t{%2, %1, %0|%0, %1, %2}"
- [(set_attr "isa" "noavx,avx")
+ [(set_attr "isa" "noavx,avx,avx512bw")
(set_attr "type" "sseiadd")
- (set_attr "prefix_data16" "1,*")
- (set_attr "prefix_extra" "*,1")
- (set_attr "prefix" "orig,vex")
+ (set_attr "prefix_data16" "1,*,*")
+ (set_attr "prefix_extra" "*,1,1")
+ (set_attr "prefix" "orig,vex,evex")
(set_attr "mode" "TI")])
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
@@ -11351,54 +11518,57 @@
})
(define_insn "<sse2_avx2>_packsswb<mask_name>"
- [(set (match_operand:VI1_AVX512 0 "register_operand" "=x,x")
+ [(set (match_operand:VI1_AVX512 0 "register_operand" "=x,x,v")
(vec_concat:VI1_AVX512
(ss_truncate:<ssehalfvecmode>
- (match_operand:<sseunpackmode> 1 "register_operand" "0,v"))
+ (match_operand:<sseunpackmode> 1 "register_operand" "0,x,v"))
(ss_truncate:<ssehalfvecmode>
- (match_operand:<sseunpackmode> 2 "vector_operand" "xBm,vm"))))]
+ (match_operand:<sseunpackmode> 2 "vector_operand" "xBm,xm,vm"))))]
"TARGET_SSE2 && <mask_mode512bit_condition> && <mask_avx512bw_condition>"
"@
packsswb\t{%2, %0|%0, %2}
+ vpacksswb\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}
vpacksswb\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}"
- [(set_attr "isa" "noavx,avx")
+ [(set_attr "isa" "noavx,avx,avx512bw")
(set_attr "type" "sselog")
- (set_attr "prefix_data16" "1,*")
- (set_attr "prefix" "orig,maybe_evex")
+ (set_attr "prefix_data16" "1,*,*")
+ (set_attr "prefix" "orig,<mask_prefix>,evex")
(set_attr "mode" "<sseinsnmode>")])
(define_insn "<sse2_avx2>_packssdw<mask_name>"
- [(set (match_operand:VI2_AVX2 0 "register_operand" "=x,v")
+ [(set (match_operand:VI2_AVX2 0 "register_operand" "=x,x,v")
(vec_concat:VI2_AVX2
(ss_truncate:<ssehalfvecmode>
- (match_operand:<sseunpackmode> 1 "register_operand" "0,v"))
+ (match_operand:<sseunpackmode> 1 "register_operand" "0,x,v"))
(ss_truncate:<ssehalfvecmode>
- (match_operand:<sseunpackmode> 2 "vector_operand" "xBm,vm"))))]
+ (match_operand:<sseunpackmode> 2 "vector_operand" "xBm,xm,vm"))))]
"TARGET_SSE2 && <mask_mode512bit_condition> && <mask_avx512bw_condition>"
"@
packssdw\t{%2, %0|%0, %2}
+ vpackssdw\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}
vpackssdw\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}"
- [(set_attr "isa" "noavx,avx")
+ [(set_attr "isa" "noavx,avx,avx512bw")
(set_attr "type" "sselog")
- (set_attr "prefix_data16" "1,*")
- (set_attr "prefix" "orig,vex")
+ (set_attr "prefix_data16" "1,*,*")
+ (set_attr "prefix" "orig,<mask_prefix>,evex")
(set_attr "mode" "<sseinsnmode>")])
(define_insn "<sse2_avx2>_packuswb<mask_name>"
- [(set (match_operand:VI1_AVX512 0 "register_operand" "=x,x")
+ [(set (match_operand:VI1_AVX512 0 "register_operand" "=x,x,v")
(vec_concat:VI1_AVX512
(us_truncate:<ssehalfvecmode>
- (match_operand:<sseunpackmode> 1 "register_operand" "0,v"))
+ (match_operand:<sseunpackmode> 1 "register_operand" "0,x,v"))
(us_truncate:<ssehalfvecmode>
- (match_operand:<sseunpackmode> 2 "vector_operand" "xBm,vm"))))]
+ (match_operand:<sseunpackmode> 2 "vector_operand" "xBm,xm,vm"))))]
"TARGET_SSE2 && <mask_mode512bit_condition> && <mask_avx512bw_condition>"
"@
packuswb\t{%2, %0|%0, %2}
+ vpackuswb\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}
vpackuswb\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}"
- [(set_attr "isa" "noavx,avx")
+ [(set_attr "isa" "noavx,avx,avx512bw")
(set_attr "type" "sselog")
- (set_attr "prefix_data16" "1,*")
- (set_attr "prefix" "orig,vex")
+ (set_attr "prefix_data16" "1,*,*")
+ (set_attr "prefix" "orig,<mask_prefix>,evex")
(set_attr "mode" "<sseinsnmode>")])
(define_insn "avx512bw_interleave_highv64qi<mask_name>"
@@ -11839,9 +12009,9 @@
(set_attr "mode" "TI")])
(define_expand "vec_interleave_high<mode>"
- [(match_operand:VI_256 0 "register_operand" "=x")
- (match_operand:VI_256 1 "register_operand" "x")
- (match_operand:VI_256 2 "nonimmediate_operand" "xm")]
+ [(match_operand:VI_256 0 "register_operand")
+ (match_operand:VI_256 1 "register_operand")
+ (match_operand:VI_256 2 "nonimmediate_operand")]
"TARGET_AVX2"
{
rtx t1 = gen_reg_rtx (<MODE>mode);
@@ -11857,9 +12027,9 @@
})
(define_expand "vec_interleave_low<mode>"
- [(match_operand:VI_256 0 "register_operand" "=x")
- (match_operand:VI_256 1 "register_operand" "x")
- (match_operand:VI_256 2 "nonimmediate_operand" "xm")]
+ [(match_operand:VI_256 0 "register_operand")
+ (match_operand:VI_256 1 "register_operand")
+ (match_operand:VI_256 2 "nonimmediate_operand")]
"TARGET_AVX2"
{
rtx t1 = gen_reg_rtx (<MODE>mode);
@@ -11884,13 +12054,17 @@
[(V16QI "sse4_1") (V8HI "sse2")
(V4SI "sse4_1") (V2DI "sse4_1")])
+(define_mode_attr pinsr_evex_isa
+ [(V16QI "avx512bw") (V8HI "avx512bw")
+ (V4SI "avx512dq") (V2DI "avx512dq")])
+
;; sse4_1_pinsrd must come before sse2_loadld since it is preferred.
(define_insn "<sse2p4_1>_pinsr<ssemodesuffix>"
- [(set (match_operand:PINSR_MODE 0 "register_operand" "=x,x,x,x")
+ [(set (match_operand:PINSR_MODE 0 "register_operand" "=x,x,x,x,v,v")
(vec_merge:PINSR_MODE
(vec_duplicate:PINSR_MODE
- (match_operand:<ssescalarmode> 2 "nonimmediate_operand" "r,m,r,m"))
- (match_operand:PINSR_MODE 1 "register_operand" "0,0,x,x")
+ (match_operand:<ssescalarmode> 2 "nonimmediate_operand" "r,m,r,m,r,m"))
+ (match_operand:PINSR_MODE 1 "register_operand" "0,0,x,x,v,v")
(match_operand:SI 3 "const_int_operand")))]
"TARGET_SSE2
&& ((unsigned) exact_log2 (INTVAL (operands[3]))
@@ -11907,16 +12081,18 @@
case 1:
return "pinsr<ssemodesuffix>\t{%3, %2, %0|%0, %2, %3}";
case 2:
+ case 4:
if (GET_MODE_SIZE (<ssescalarmode>mode) < GET_MODE_SIZE (SImode))
return "vpinsr<ssemodesuffix>\t{%3, %k2, %1, %0|%0, %1, %k2, %3}";
/* FALLTHRU */
case 3:
+ case 5:
return "vpinsr<ssemodesuffix>\t{%3, %2, %1, %0|%0, %1, %2, %3}";
default:
gcc_unreachable ();
}
}
- [(set_attr "isa" "noavx,noavx,avx,avx")
+ [(set_attr "isa" "noavx,noavx,avx,avx,<pinsr_evex_isa>,<pinsr_evex_isa>")
(set_attr "type" "sselog")
(set (attr "prefix_rex")
(if_then_else
@@ -11937,7 +12113,7 @@
(const_string "*")
(const_string "1")))
(set_attr "length_immediate" "1")
- (set_attr "prefix" "orig,orig,vex,vex")
+ (set_attr "prefix" "orig,orig,vex,vex,evex,evex")
(set_attr "mode" "TI")])
(define_expand "<extract_type>_vinsert<shuffletype><extract_suf>_mask"
@@ -12071,7 +12247,7 @@
(const_int 2) (const_int 3)]))
(match_operand:<ssehalfvecmode> 2 "nonimmediate_operand" "vm")))]
"TARGET_AVX512F"
- "vinsert<shuffletype>64x4\t{$0x1, %2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2, $0x1}"
+ "vinsert<shuffletype>64x4\t{$0x1, %2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2, 0x1}"
[(set_attr "type" "sselog")
(set_attr "length_immediate" "1")
(set_attr "prefix" "evex")
@@ -12855,11 +13031,11 @@
"operands[2] = CONST0_RTX (V4SImode);")
(define_insn "sse2_loadld"
- [(set (match_operand:V4SI 0 "register_operand" "=x,Yi,x,x,x")
+ [(set (match_operand:V4SI 0 "register_operand" "=v,Yi,x,x,v")
(vec_merge:V4SI
(vec_duplicate:V4SI
- (match_operand:SI 2 "nonimmediate_operand" "m ,r ,m,x,x"))
- (match_operand:V4SI 1 "reg_or_0_operand" "C ,C ,C,0,x")
+ (match_operand:SI 2 "nonimmediate_operand" "m ,r ,m,x,v"))
+ (match_operand:V4SI 1 "reg_or_0_operand" "C ,C ,C,0,v")
(const_int 1)))]
"TARGET_SSE"
"@
@@ -12870,7 +13046,7 @@
vmovss\t{%2, %1, %0|%0, %1, %2}"
[(set_attr "isa" "sse2,sse2,noavx,noavx,avx")
(set_attr "type" "ssemov")
- (set_attr "prefix" "maybe_vex,maybe_vex,orig,orig,vex")
+ (set_attr "prefix" "maybe_vex,maybe_vex,orig,orig,maybe_evex")
(set_attr "mode" "TI,TI,V4SF,SF,SF")])
;; QI and HI modes handled by pextr patterns.
@@ -12878,39 +13054,44 @@
[(V16QI "TARGET_SSE4_1") V8HI])
(define_insn "*vec_extract<mode>"
- [(set (match_operand:<ssescalarmode> 0 "register_sse4nonimm_operand" "=r,m")
+ [(set (match_operand:<ssescalarmode> 0 "register_sse4nonimm_operand" "=r,m,r,m")
(vec_select:<ssescalarmode>
- (match_operand:PEXTR_MODE12 1 "register_operand" "x,x")
+ (match_operand:PEXTR_MODE12 1 "register_operand" "x,x,v,v")
(parallel
[(match_operand:SI 2 "const_0_to_<ssescalarnummask>_operand")])))]
"TARGET_SSE2"
"@
%vpextr<ssemodesuffix>\t{%2, %1, %k0|%k0, %1, %2}
- %vpextr<ssemodesuffix>\t{%2, %1, %0|%0, %1, %2}"
- [(set_attr "isa" "*,sse4")
+ %vpextr<ssemodesuffix>\t{%2, %1, %0|%0, %1, %2}
+ vpextr<ssemodesuffix>\t{%2, %1, %k0|%k0, %1, %2}
+ vpextr<ssemodesuffix>\t{%2, %1, %0|%0, %1, %2}"
+ [(set_attr "isa" "*,sse4,avx512bw,avx512bw")
(set_attr "type" "sselog1")
(set_attr "prefix_data16" "1")
(set (attr "prefix_extra")
(if_then_else
- (and (eq_attr "alternative" "0")
+ (and (eq_attr "alternative" "0,2")
(eq (const_string "<MODE>mode") (const_string "V8HImode")))
(const_string "*")
(const_string "1")))
(set_attr "length_immediate" "1")
- (set_attr "prefix" "maybe_vex")
+ (set_attr "prefix" "maybe_vex,maybe_vex,evex,evex")
(set_attr "mode" "TI")])
(define_insn "*vec_extract<PEXTR_MODE12:mode>_zext"
- [(set (match_operand:SWI48 0 "register_operand" "=r")
+ [(set (match_operand:SWI48 0 "register_operand" "=r,r")
(zero_extend:SWI48
(vec_select:<PEXTR_MODE12:ssescalarmode>
- (match_operand:PEXTR_MODE12 1 "register_operand" "x")
+ (match_operand:PEXTR_MODE12 1 "register_operand" "x,v")
(parallel
[(match_operand:SI 2
"const_0_to_<PEXTR_MODE12:ssescalarnummask>_operand")]))))]
"TARGET_SSE2"
- "%vpextr<PEXTR_MODE12:ssemodesuffix>\t{%2, %1, %k0|%k0, %1, %2}"
- [(set_attr "type" "sselog1")
+ "@
+ %vpextr<PEXTR_MODE12:ssemodesuffix>\t{%2, %1, %k0|%k0, %1, %2}
+ vpextr<PEXTR_MODE12:ssemodesuffix>\t{%2, %1, %k0|%k0, %1, %2}"
+ [(set_attr "isa" "*,avx512bw")
+ (set_attr "type" "sselog1")
(set_attr "prefix_data16" "1")
(set (attr "prefix_extra")
(if_then_else
@@ -12931,9 +13112,9 @@
"#")
(define_insn "*vec_extract<ssevecmodelower>_0"
- [(set (match_operand:SWI48 0 "nonimmediate_operand" "=r ,r,x ,m")
+ [(set (match_operand:SWI48 0 "nonimmediate_operand" "=r ,r,v ,m")
(vec_select:SWI48
- (match_operand:<ssevecmode> 1 "nonimmediate_operand" "mYj,x,xm,x")
+ (match_operand:<ssevecmode> 1 "nonimmediate_operand" "mYj,v,vm,v")
(parallel [(const_int 0)])))]
"TARGET_SSE && !(MEM_P (operands[0]) && MEM_P (operands[1]))"
"#"
@@ -12943,7 +13124,7 @@
[(set (match_operand:DI 0 "register_operand" "=r")
(zero_extend:DI
(vec_select:SI
- (match_operand:V4SI 1 "register_operand" "x")
+ (match_operand:V4SI 1 "register_operand" "v")
(parallel [(const_int 0)]))))]
"TARGET_64BIT && TARGET_SSE2 && TARGET_INTER_UNIT_MOVES_FROM_VEC"
"#"
@@ -12952,9 +13133,9 @@
"operands[1] = gen_lowpart (SImode, operands[1]);")
(define_insn "*vec_extractv2di_0_sse"
- [(set (match_operand:DI 0 "nonimmediate_operand" "=x,m")
+ [(set (match_operand:DI 0 "nonimmediate_operand" "=v,m")
(vec_select:DI
- (match_operand:V2DI 1 "nonimmediate_operand" "xm,x")
+ (match_operand:V2DI 1 "nonimmediate_operand" "vm,v")
(parallel [(const_int 0)])))]
"TARGET_SSE && !TARGET_64BIT
&& !(MEM_P (operands[0]) && MEM_P (operands[1]))"
@@ -12970,46 +13151,49 @@
"operands[1] = gen_lowpart (<MODE>mode, operands[1]);")
(define_insn "*vec_extractv4si"
- [(set (match_operand:SI 0 "nonimmediate_operand" "=rm,Yr,*x,x")
+ [(set (match_operand:SI 0 "nonimmediate_operand" "=rm,rm,Yr,*x,x,Yv")
(vec_select:SI
- (match_operand:V4SI 1 "register_operand" "x,0,0,x")
+ (match_operand:V4SI 1 "register_operand" "x,v,0,0,x,v")
(parallel [(match_operand:SI 2 "const_0_to_3_operand")])))]
"TARGET_SSE4_1"
{
switch (which_alternative)
{
case 0:
+ case 1:
return "%vpextrd\t{%2, %1, %0|%0, %1, %2}";
- case 1:
case 2:
- operands [2] = GEN_INT (INTVAL (operands[2]) * 4);
+ case 3:
+ operands[2] = GEN_INT (INTVAL (operands[2]) * 4);
return "psrldq\t{%2, %0|%0, %2}";
- case 3:
- operands [2] = GEN_INT (INTVAL (operands[2]) * 4);
+ case 4:
+ case 5:
+ operands[2] = GEN_INT (INTVAL (operands[2]) * 4);
return "vpsrldq\t{%2, %1, %0|%0, %1, %2}";
default:
gcc_unreachable ();
}
}
- [(set_attr "isa" "*,noavx,noavx,avx")
- (set_attr "type" "sselog1,sseishft1,sseishft1,sseishft1")
- (set_attr "prefix_extra" "1,*,*,*")
+ [(set_attr "isa" "*,avx512dq,noavx,noavx,avx,avx512bw")
+ (set_attr "type" "sselog1,sselog1,sseishft1,sseishft1,sseishft1,sseishft1")
+ (set_attr "prefix_extra" "1,1,*,*,*,*")
(set_attr "length_immediate" "1")
- (set_attr "prefix" "maybe_vex,orig,orig,vex")
+ (set_attr "prefix" "maybe_vex,evex,orig,orig,vex,evex")
(set_attr "mode" "TI")])
(define_insn "*vec_extractv4si_zext"
- [(set (match_operand:DI 0 "register_operand" "=r")
+ [(set (match_operand:DI 0 "register_operand" "=r,r")
(zero_extend:DI
(vec_select:SI
- (match_operand:V4SI 1 "register_operand" "x")
+ (match_operand:V4SI 1 "register_operand" "x,v")
(parallel [(match_operand:SI 2 "const_0_to_3_operand")]))))]
"TARGET_64BIT && TARGET_SSE4_1"
"%vpextrd\t{%2, %1, %k0|%k0, %1, %2}"
- [(set_attr "type" "sselog1")
+ [(set_attr "isa" "*,avx512dq")
+ (set_attr "type" "sselog1")
(set_attr "prefix_extra" "1")
(set_attr "length_immediate" "1")
(set_attr "prefix" "maybe_vex")
@@ -13038,26 +13222,28 @@
})
(define_insn "*vec_extractv2di_1"
- [(set (match_operand:DI 0 "nonimmediate_operand" "=rm,m,x,x,x,x,r")
+ [(set (match_operand:DI 0 "nonimmediate_operand" "=rm,rm,m,x,x,Yv,x,v,r")
(vec_select:DI
- (match_operand:V2DI 1 "nonimmediate_operand" "x ,x,0,x,x,o,o")
+ (match_operand:V2DI 1 "nonimmediate_operand" "x ,v ,v,0,x, v,x,o,o")
(parallel [(const_int 1)])))]
"TARGET_SSE && !(MEM_P (operands[0]) && MEM_P (operands[1]))"
"@
%vpextrq\t{$1, %1, %0|%0, %1, 1}
+ vpextrq\t{$1, %1, %0|%0, %1, 1}
%vmovhps\t{%1, %0|%0, %1}
psrldq\t{$8, %0|%0, 8}
vpsrldq\t{$8, %1, %0|%0, %1, 8}
+ vpsrldq\t{$8, %1, %0|%0, %1, 8}
movhlps\t{%1, %0|%0, %1}
#
#"
- [(set_attr "isa" "x64_sse4,*,sse2_noavx,avx,noavx,*,x64")
- (set_attr "type" "sselog1,ssemov,sseishft1,sseishft1,ssemov,ssemov,imov")
- (set_attr "length_immediate" "1,*,1,1,*,*,*")
- (set_attr "prefix_rex" "1,*,*,*,*,*,*")
- (set_attr "prefix_extra" "1,*,*,*,*,*,*")
- (set_attr "prefix" "maybe_vex,maybe_vex,orig,vex,orig,*,*")
- (set_attr "mode" "TI,V2SF,TI,TI,V4SF,DI,DI")])
+ [(set_attr "isa" "x64_sse4,x64_avx512dq,*,sse2_noavx,avx,avx512bw,noavx,*,x64")
+ (set_attr "type" "sselog1,sselog1,ssemov,sseishft1,sseishft1,sseishft1,ssemov,ssemov,imov")
+ (set_attr "length_immediate" "1,1,*,1,1,1,*,*,*")
+ (set_attr "prefix_rex" "1,1,*,*,*,*,*,*,*")
+ (set_attr "prefix_extra" "1,1,*,*,*,*,*,*,*")
+ (set_attr "prefix" "maybe_vex,evex,maybe_vex,orig,vex,evex,orig,*,*")
+ (set_attr "mode" "TI,TI,V2SF,TI,TI,TI,V4SF,DI,DI")])
(define_split
[(set (match_operand:<ssescalarmode> 0 "register_operand")
@@ -13765,12 +13951,12 @@
(set_attr "mode" "DI")])
(define_insn "avx2_pmaddubsw256"
- [(set (match_operand:V16HI 0 "register_operand" "=x")
+ [(set (match_operand:V16HI 0 "register_operand" "=x,v")
(ss_plus:V16HI
(mult:V16HI
(zero_extend:V16HI
(vec_select:V16QI
- (match_operand:V32QI 1 "register_operand" "x")
+ (match_operand:V32QI 1 "register_operand" "x,v")
(parallel [(const_int 0) (const_int 2)
(const_int 4) (const_int 6)
(const_int 8) (const_int 10)
@@ -13781,7 +13967,7 @@
(const_int 28) (const_int 30)])))
(sign_extend:V16HI
(vec_select:V16QI
- (match_operand:V32QI 2 "nonimmediate_operand" "xm")
+ (match_operand:V32QI 2 "nonimmediate_operand" "xm,vm")
(parallel [(const_int 0) (const_int 2)
(const_int 4) (const_int 6)
(const_int 8) (const_int 10)
@@ -13813,9 +13999,10 @@
(const_int 29) (const_int 31)]))))))]
"TARGET_AVX2"
"vpmaddubsw\t{%2, %1, %0|%0, %1, %2}"
- [(set_attr "type" "sseiadd")
+ [(set_attr "isa" "*,avx512bw")
+ (set_attr "type" "sseiadd")
(set_attr "prefix_extra" "1")
- (set_attr "prefix" "vex")
+ (set_attr "prefix" "vex,evex")
(set_attr "mode" "OI")])
;; The correct representation for this is absolutely enormous, and
@@ -13868,19 +14055,19 @@
(set_attr "mode" "XI")])
(define_insn "ssse3_pmaddubsw128"
- [(set (match_operand:V8HI 0 "register_operand" "=x,x")
+ [(set (match_operand:V8HI 0 "register_operand" "=x,x,v")
(ss_plus:V8HI
(mult:V8HI
(zero_extend:V8HI
(vec_select:V8QI
- (match_operand:V16QI 1 "register_operand" "0,x")
+ (match_operand:V16QI 1 "register_operand" "0,x,v")
(parallel [(const_int 0) (const_int 2)
(const_int 4) (const_int 6)
(const_int 8) (const_int 10)
(const_int 12) (const_int 14)])))
(sign_extend:V8HI
(vec_select:V8QI
- (match_operand:V16QI 2 "vector_operand" "xBm,xm")
+ (match_operand:V16QI 2 "vector_operand" "xBm,xm,vm")
(parallel [(const_int 0) (const_int 2)
(const_int 4) (const_int 6)
(const_int 8) (const_int 10)
@@ -13901,13 +14088,14 @@
"TARGET_SSSE3"
"@
pmaddubsw\t{%2, %0|%0, %2}
+ vpmaddubsw\t{%2, %1, %0|%0, %1, %2}
vpmaddubsw\t{%2, %1, %0|%0, %1, %2}"
- [(set_attr "isa" "noavx,avx")
+ [(set_attr "isa" "noavx,avx,avx512bw")
(set_attr "type" "sseiadd")
(set_attr "atom_unit" "simul")
- (set_attr "prefix_data16" "1,*")
+ (set_attr "prefix_data16" "1,*,*")
(set_attr "prefix_extra" "1")
- (set_attr "prefix" "orig,vex")
+ (set_attr "prefix" "orig,vex,evex")
(set_attr "mode" "TI")])
(define_insn "ssse3_pmaddubsw"
@@ -13988,16 +14176,16 @@
})
(define_insn "*<ssse3_avx2>_pmulhrsw<mode>3<mask_name>"
- [(set (match_operand:VI2_AVX2 0 "register_operand" "=x,v")
+ [(set (match_operand:VI2_AVX2 0 "register_operand" "=x,x,v")
(truncate:VI2_AVX2
(lshiftrt:<ssedoublemode>
(plus:<ssedoublemode>
(lshiftrt:<ssedoublemode>
(mult:<ssedoublemode>
(sign_extend:<ssedoublemode>
- (match_operand:VI2_AVX2 1 "vector_operand" "%0,v"))
+ (match_operand:VI2_AVX2 1 "vector_operand" "%0,x,v"))
(sign_extend:<ssedoublemode>
- (match_operand:VI2_AVX2 2 "vector_operand" "xBm,vm")))
+ (match_operand:VI2_AVX2 2 "vector_operand" "xBm,xm,vm")))
(const_int 14))
(match_operand:VI2_AVX2 3 "const1_operand"))
(const_int 1))))]
@@ -14005,12 +14193,13 @@
&& ix86_binary_operator_ok (MULT, <MODE>mode, operands)"
"@
pmulhrsw\t{%2, %0|%0, %2}
+ vpmulhrsw\t{%2, %1, %0<mask_operand4>|%0<mask_operand4>, %1, %2}
vpmulhrsw\t{%2, %1, %0<mask_operand4>|%0<mask_operand4>, %1, %2}"
- [(set_attr "isa" "noavx,avx")
+ [(set_attr "isa" "noavx,avx,avx512bw")
(set_attr "type" "sseimul")
- (set_attr "prefix_data16" "1,*")
+ (set_attr "prefix_data16" "1,*,*")
(set_attr "prefix_extra" "1")
- (set_attr "prefix" "orig,maybe_evex")
+ (set_attr "prefix" "orig,maybe_evex,evex")
(set_attr "mode" "<sseinsnmode>")])
(define_insn "*ssse3_pmulhrswv4hi3"
@@ -14035,21 +14224,22 @@
(set_attr "mode" "DI")])
(define_insn "<ssse3_avx2>_pshufb<mode>3<mask_name>"
- [(set (match_operand:VI1_AVX512 0 "register_operand" "=x,v")
+ [(set (match_operand:VI1_AVX512 0 "register_operand" "=x,x,v")
(unspec:VI1_AVX512
- [(match_operand:VI1_AVX512 1 "register_operand" "0,v")
- (match_operand:VI1_AVX512 2 "vector_operand" "xBm,vm")]
+ [(match_operand:VI1_AVX512 1 "register_operand" "0,x,v")
+ (match_operand:VI1_AVX512 2 "vector_operand" "xBm,xm,vm")]
UNSPEC_PSHUFB))]
"TARGET_SSSE3 && <mask_mode512bit_condition> && <mask_avx512bw_condition>"
"@
pshufb\t{%2, %0|%0, %2}
+ vpshufb\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}
vpshufb\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}"
- [(set_attr "isa" "noavx,avx")
+ [(set_attr "isa" "noavx,avx,avx512bw")
(set_attr "type" "sselog1")
- (set_attr "prefix_data16" "1,*")
+ (set_attr "prefix_data16" "1,*,*")
(set_attr "prefix_extra" "1")
- (set_attr "prefix" "orig,maybe_evex")
- (set_attr "btver2_decode" "vector,vector")
+ (set_attr "prefix" "orig,maybe_evex,evex")
+ (set_attr "btver2_decode" "vector")
(set_attr "mode" "<sseinsnmode>")])
(define_insn "ssse3_pshufbv8qi3"
@@ -14117,11 +14307,11 @@
(set_attr "mode" "<sseinsnmode>")])
(define_insn "<ssse3_avx2>_palignr<mode>"
- [(set (match_operand:SSESCALARMODE 0 "register_operand" "=x,v")
+ [(set (match_operand:SSESCALARMODE 0 "register_operand" "=x,x,v")
(unspec:SSESCALARMODE
- [(match_operand:SSESCALARMODE 1 "register_operand" "0,v")
- (match_operand:SSESCALARMODE 2 "vector_operand" "xBm,vm")
- (match_operand:SI 3 "const_0_to_255_mul_8_operand" "n,n")]
+ [(match_operand:SSESCALARMODE 1 "register_operand" "0,x,v")
+ (match_operand:SSESCALARMODE 2 "vector_operand" "xBm,xm,vm")
+ (match_operand:SI 3 "const_0_to_255_mul_8_operand" "n,n,n")]
UNSPEC_PALIGNR))]
"TARGET_SSSE3"
{
@@ -14132,18 +14322,19 @@
case 0:
return "palignr\t{%3, %2, %0|%0, %2, %3}";
case 1:
+ case 2:
return "vpalignr\t{%3, %2, %1, %0|%0, %1, %2, %3}";
default:
gcc_unreachable ();
}
}
- [(set_attr "isa" "noavx,avx")
+ [(set_attr "isa" "noavx,avx,avx512bw")
(set_attr "type" "sseishft")
(set_attr "atom_unit" "sishuf")
- (set_attr "prefix_data16" "1,*")
+ (set_attr "prefix_data16" "1,*,*")
(set_attr "prefix_extra" "1")
(set_attr "length_immediate" "1")
- (set_attr "prefix" "orig,vex")
+ (set_attr "prefix" "orig,vex,evex")
(set_attr "mode" "<sseinsnmode>")])
(define_insn "ssse3_palignrdi"
@@ -14423,21 +14614,22 @@
(set_attr "mode" "<sseinsnmode>")])
(define_insn "<sse4_1_avx2>_packusdw<mask_name>"
- [(set (match_operand:VI2_AVX2 0 "register_operand" "=Yr,*x,v")
+ [(set (match_operand:VI2_AVX2 0 "register_operand" "=Yr,*x,x,v")
(vec_concat:VI2_AVX2
(us_truncate:<ssehalfvecmode>
- (match_operand:<sseunpackmode> 1 "register_operand" "0,0,v"))
+ (match_operand:<sseunpackmode> 1 "register_operand" "0,0,x,v"))
(us_truncate:<ssehalfvecmode>
- (match_operand:<sseunpackmode> 2 "vector_operand" "YrBm,*xBm,vm"))))]
+ (match_operand:<sseunpackmode> 2 "vector_operand" "YrBm,*xBm,xm,vm"))))]
"TARGET_SSE4_1 && <mask_mode512bit_condition> && <mask_avx512bw_condition>"
"@
packusdw\t{%2, %0|%0, %2}
packusdw\t{%2, %0|%0, %2}
+ vpackusdw\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}
vpackusdw\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}"
- [(set_attr "isa" "noavx,noavx,avx")
+ [(set_attr "isa" "noavx,noavx,avx,avx512bw")
(set_attr "type" "sselog")
(set_attr "prefix_extra" "1")
- (set_attr "prefix" "orig,orig,maybe_evex")
+ (set_attr "prefix" "orig,orig,<mask_prefix>,evex")
(set_attr "mode" "<sseinsnmode>")])
(define_insn "<sse4_1_avx2>_pblendvb"
@@ -16551,30 +16743,40 @@
(set_attr "btver2_decode" "vector")
(set_attr "mode" "OI")])
+(define_mode_attr pbroadcast_evex_isa
+ [(V64QI "avx512bw") (V32QI "avx512bw") (V16QI "avx512bw")
+ (V32HI "avx512bw") (V16HI "avx512bw") (V8HI "avx512bw")
+ (V16SI "avx512f") (V8SI "avx512f") (V4SI "avx512f")
+ (V8DI "avx512f") (V4DI "avx512f") (V2DI "avx512f")])
+
(define_insn "avx2_pbroadcast<mode>"
- [(set (match_operand:VI 0 "register_operand" "=x")
+ [(set (match_operand:VI 0 "register_operand" "=x,v")
(vec_duplicate:VI
(vec_select:<ssescalarmode>
- (match_operand:<ssexmmmode> 1 "nonimmediate_operand" "xm")
+ (match_operand:<ssexmmmode> 1 "nonimmediate_operand" "xm,vm")
(parallel [(const_int 0)]))))]
"TARGET_AVX2"
"vpbroadcast<ssemodesuffix>\t{%1, %0|%0, %<iptr>1}"
- [(set_attr "type" "ssemov")
+ [(set_attr "isa" "*,<pbroadcast_evex_isa>")
+ (set_attr "type" "ssemov")
(set_attr "prefix_extra" "1")
- (set_attr "prefix" "vex")
+ (set_attr "prefix" "vex,evex")
(set_attr "mode" "<sseinsnmode>")])
(define_insn "avx2_pbroadcast<mode>_1"
- [(set (match_operand:VI_256 0 "register_operand" "=x,x")
+ [(set (match_operand:VI_256 0 "register_operand" "=x,x,v,v")
(vec_duplicate:VI_256
(vec_select:<ssescalarmode>
- (match_operand:VI_256 1 "nonimmediate_operand" "m,x")
+ (match_operand:VI_256 1 "nonimmediate_operand" "m,x,m,v")
(parallel [(const_int 0)]))))]
"TARGET_AVX2"
"@
vpbroadcast<ssemodesuffix>\t{%1, %0|%0, %<iptr>1}
+ vpbroadcast<ssemodesuffix>\t{%x1, %0|%0, %x1}
+ vpbroadcast<ssemodesuffix>\t{%1, %0|%0, %<iptr>1}
vpbroadcast<ssemodesuffix>\t{%x1, %0|%0, %x1}"
- [(set_attr "type" "ssemov")
+ [(set_attr "isa" "*,*,<pbroadcast_evex_isa>,<pbroadcast_evex_isa>")
+ (set_attr "type" "ssemov")
(set_attr "prefix_extra" "1")
(set_attr "prefix" "vex")
(set_attr "mode" "<sseinsnmode>")])
@@ -16684,15 +16886,15 @@
(set_attr "mode" "OI")])
(define_insn "avx2_vec_dupv4df"
- [(set (match_operand:V4DF 0 "register_operand" "=x")
+ [(set (match_operand:V4DF 0 "register_operand" "=v")
(vec_duplicate:V4DF
(vec_select:DF
- (match_operand:V2DF 1 "register_operand" "x")
+ (match_operand:V2DF 1 "register_operand" "v")
(parallel [(const_int 0)]))))]
"TARGET_AVX2"
"vbroadcastsd\t{%1, %0|%0, %1}"
[(set_attr "type" "sselog1")
- (set_attr "prefix" "vex")
+ (set_attr "prefix" "maybe_evex")
(set_attr "mode" "V4DF")])
(define_insn "<avx512>_vec_dup<mode>_1"
@@ -16795,9 +16997,9 @@
(const_int 1)))])
(define_insn "vec_dupv4sf"
- [(set (match_operand:V4SF 0 "register_operand" "=x,x,x")
+ [(set (match_operand:V4SF 0 "register_operand" "=v,v,x")
(vec_duplicate:V4SF
- (match_operand:SF 1 "nonimmediate_operand" "x,m,0")))]
+ (match_operand:SF 1 "nonimmediate_operand" "Yv,m,0")))]
"TARGET_SSE"
"@
vshufps\t{$0, %1, %1, %0|%0, %1, %1, 0}
@@ -16807,13 +17009,13 @@
(set_attr "type" "sseshuf1,ssemov,sseshuf1")
(set_attr "length_immediate" "1,0,1")
(set_attr "prefix_extra" "0,1,*")
- (set_attr "prefix" "vex,vex,orig")
+ (set_attr "prefix" "maybe_evex,maybe_evex,orig")
(set_attr "mode" "V4SF")])
(define_insn "*vec_dupv4si"
- [(set (match_operand:V4SI 0 "register_operand" "=x,x,x")
+ [(set (match_operand:V4SI 0 "register_operand" "=v,v,x")
(vec_duplicate:V4SI
- (match_operand:SI 1 "nonimmediate_operand" " x,m,0")))]
+ (match_operand:SI 1 "nonimmediate_operand" "Yv,m,0")))]
"TARGET_SSE"
"@
%vpshufd\t{$0, %1, %0|%0, %1, 0}
@@ -16823,13 +17025,13 @@
(set_attr "type" "sselog1,ssemov,sselog1")
(set_attr "length_immediate" "1,0,1")
(set_attr "prefix_extra" "0,1,*")
- (set_attr "prefix" "maybe_vex,vex,orig")
+ (set_attr "prefix" "maybe_vex,maybe_evex,orig")
(set_attr "mode" "TI,V4SF,V4SF")])
(define_insn "*vec_dupv2di"
- [(set (match_operand:V2DI 0 "register_operand" "=x,x,x,x")
+ [(set (match_operand:V2DI 0 "register_operand" "=x,v,v,x")
(vec_duplicate:V2DI
- (match_operand:DI 1 "nonimmediate_operand" " 0,x,m,0")))]
+ (match_operand:DI 1 "nonimmediate_operand" " 0,Yv,m,0")))]
"TARGET_SSE"
"@
punpcklqdq\t%0, %0
@@ -16838,19 +17040,23 @@
movlhps\t%0, %0"
[(set_attr "isa" "sse2_noavx,avx,sse3,noavx")
(set_attr "type" "sselog1,sselog1,sselog1,ssemov")
- (set_attr "prefix" "orig,vex,maybe_vex,orig")
+ (set_attr "prefix" "orig,maybe_evex,maybe_vex,orig")
(set_attr "mode" "TI,TI,DF,V4SF")])
(define_insn "avx2_vbroadcasti128_<mode>"
- [(set (match_operand:VI_256 0 "register_operand" "=x")
+ [(set (match_operand:VI_256 0 "register_operand" "=x,v,v")
(vec_concat:VI_256
- (match_operand:<ssehalfvecmode> 1 "memory_operand" "m")
+ (match_operand:<ssehalfvecmode> 1 "memory_operand" "m,m,m")
(match_dup 1)))]
"TARGET_AVX2"
- "vbroadcasti128\t{%1, %0|%0, %1}"
- [(set_attr "type" "ssemov")
+ "@
+ vbroadcasti128\t{%1, %0|%0, %1}
+ vbroadcast<i128vldq>\t{%1, %0|%0, %1}
+ vbroadcast<shuffletype>32x4\t{%1, %0|%0, %1}"
+ [(set_attr "isa" "*,avx512dq,avx512vl")
+ (set_attr "type" "ssemov")
(set_attr "prefix_extra" "1")
- (set_attr "prefix" "vex")
+ (set_attr "prefix" "vex,evex,evex")
(set_attr "mode" "OI")])
;; Modes handled by AVX vec_dup patterns.
@@ -16927,19 +17133,24 @@
"operands[2] = gen_lowpart (<ssehalfvecmode>mode, operands[0]);")
(define_insn "avx_vbroadcastf128_<mode>"
- [(set (match_operand:V_256 0 "register_operand" "=x,x,x")
+ [(set (match_operand:V_256 0 "register_operand" "=x,x,x,v,v,v,v")
(vec_concat:V_256
- (match_operand:<ssehalfvecmode> 1 "nonimmediate_operand" "m,0,?x")
+ (match_operand:<ssehalfvecmode> 1 "nonimmediate_operand" "m,0,?x,m,0,m,0")
(match_dup 1)))]
"TARGET_AVX"
"@
vbroadcast<i128>\t{%1, %0|%0, %1}
vinsert<i128>\t{$1, %1, %0, %0|%0, %0, %1, 1}
- vperm2<i128>\t{$0, %t1, %t1, %0|%0, %t1, %t1, 0}"
- [(set_attr "type" "ssemov,sselog1,sselog1")
+ vperm2<i128>\t{$0, %t1, %t1, %0|%0, %t1, %t1, 0}
+ vbroadcast<i128vldq>\t{%1, %0|%0, %1}
+ vinsert<i128vldq>\t{$1, %1, %0, %0|%0, %0, %1, 1}
+ vbroadcast<shuffletype>32x4\t{%1, %0|%0, %1}
+ vinsert<shuffletype>32x4\t{$1, %1, %0, %0|%0, %0, %1, 1}"
+ [(set_attr "isa" "*,*,*,avx512dq,avx512dq,avx512vl,avx512vl")
+ (set_attr "type" "ssemov,sselog1,sselog1,ssemov,sselog1,ssemov,sselog1")
(set_attr "prefix_extra" "1")
- (set_attr "length_immediate" "0,1,1")
- (set_attr "prefix" "vex")
+ (set_attr "length_immediate" "0,1,1,0,1,0,1")
+ (set_attr "prefix" "vex,vex,vex,evex,evex,evex,evex")
(set_attr "mode" "<sseinsnmode>")])
;; For broadcast[i|f]32x2. Yes there is no v4sf version, only v4si.
@@ -16961,7 +17172,7 @@
(match_operand:<ssexmmmode> 1 "nonimmediate_operand" "vm")
(parallel [(const_int 0) (const_int 1)]))))]
"TARGET_AVX512DQ"
- "vbroadcast<shuffletype>32x2\t{%1, %0<mask_operand2>|%0<mask_operand2>, %1}"
+ "vbroadcast<shuffletype>32x2\t{%1, %0<mask_operand2>|%0<mask_operand2>, %q1}"
[(set_attr "type" "ssemov")
(set_attr "prefix_extra" "1")
(set_attr "prefix" "evex")
@@ -17184,11 +17395,11 @@
(set_attr "mode" "<sseinsnmode>")])
(define_expand "<avx512>_vpermi2var<mode>3_maskz"
- [(match_operand:VI48F 0 "register_operand" "=v")
- (match_operand:VI48F 1 "register_operand" "v")
- (match_operand:<sseintvecmode> 2 "register_operand" "0")
- (match_operand:VI48F 3 "nonimmediate_operand" "vm")
- (match_operand:<avx512fmaskmode> 4 "register_operand" "Yk")]
+ [(match_operand:VI48F 0 "register_operand")
+ (match_operand:VI48F 1 "register_operand")
+ (match_operand:<sseintvecmode> 2 "register_operand")
+ (match_operand:VI48F 3 "nonimmediate_operand")
+ (match_operand:<avx512fmaskmode> 4 "register_operand")]
"TARGET_AVX512F"
{
emit_insn (gen_<avx512>_vpermi2var<mode>3_maskz_1 (
@@ -17212,11 +17423,11 @@
})
(define_expand "<avx512>_vpermi2var<mode>3_maskz"
- [(match_operand:VI2_AVX512VL 0 "register_operand" "=v")
- (match_operand:VI2_AVX512VL 1 "register_operand" "v")
- (match_operand:<sseintvecmode> 2 "register_operand" "0")
- (match_operand:VI2_AVX512VL 3 "nonimmediate_operand" "vm")
- (match_operand:<avx512fmaskmode> 4 "register_operand" "Yk")]
+ [(match_operand:VI2_AVX512VL 0 "register_operand")
+ (match_operand:VI2_AVX512VL 1 "register_operand")
+ (match_operand:<sseintvecmode> 2 "register_operand")
+ (match_operand:VI2_AVX512VL 3 "nonimmediate_operand")
+ (match_operand:<avx512fmaskmode> 4 "register_operand")]
"TARGET_AVX512BW"
{
emit_insn (gen_<avx512>_vpermi2var<mode>3_maskz_1 (
@@ -17313,11 +17524,11 @@
(set_attr "mode" "<sseinsnmode>")])
(define_expand "<avx512>_vpermt2var<mode>3_maskz"
- [(match_operand:VI48F 0 "register_operand" "=v")
- (match_operand:<sseintvecmode> 1 "register_operand" "v")
- (match_operand:VI48F 2 "register_operand" "0")
- (match_operand:VI48F 3 "nonimmediate_operand" "vm")
- (match_operand:<avx512fmaskmode> 4 "register_operand" "Yk")]
+ [(match_operand:VI48F 0 "register_operand")
+ (match_operand:<sseintvecmode> 1 "register_operand")
+ (match_operand:VI48F 2 "register_operand")
+ (match_operand:VI48F 3 "nonimmediate_operand")
+ (match_operand:<avx512fmaskmode> 4 "register_operand")]
"TARGET_AVX512F"
{
emit_insn (gen_<avx512>_vpermt2var<mode>3_maskz_1 (
@@ -17327,11 +17538,11 @@
})
(define_expand "<avx512>_vpermt2var<mode>3_maskz"
- [(match_operand:VI1_AVX512VL 0 "register_operand" "=v")
- (match_operand:<sseintvecmode> 1 "register_operand" "v")
- (match_operand:VI1_AVX512VL 2 "register_operand" "0")
- (match_operand:VI1_AVX512VL 3 "nonimmediate_operand" "vm")
- (match_operand:<avx512fmaskmode> 4 "register_operand" "Yk")]
+ [(match_operand:VI1_AVX512VL 0 "register_operand")
+ (match_operand:<sseintvecmode> 1 "register_operand")
+ (match_operand:VI1_AVX512VL 2 "register_operand")
+ (match_operand:VI1_AVX512VL 3 "nonimmediate_operand")
+ (match_operand:<avx512fmaskmode> 4 "register_operand")]
"TARGET_AVX512VBMI"
{
emit_insn (gen_<avx512>_vpermt2var<mode>3_maskz_1 (
@@ -17341,11 +17552,11 @@
})
(define_expand "<avx512>_vpermt2var<mode>3_maskz"
- [(match_operand:VI2_AVX512VL 0 "register_operand" "=v")
- (match_operand:<sseintvecmode> 1 "register_operand" "v")
- (match_operand:VI2_AVX512VL 2 "register_operand" "0")
- (match_operand:VI2_AVX512VL 3 "nonimmediate_operand" "vm")
- (match_operand:<avx512fmaskmode> 4 "register_operand" "Yk")]
+ [(match_operand:VI2_AVX512VL 0 "register_operand")
+ (match_operand:<sseintvecmode> 1 "register_operand")
+ (match_operand:VI2_AVX512VL 2 "register_operand")
+ (match_operand:VI2_AVX512VL 3 "nonimmediate_operand")
+ (match_operand:<avx512fmaskmode> 4 "register_operand")]
"TARGET_AVX512BW"
{
emit_insn (gen_<avx512>_vpermt2var<mode>3_maskz_1 (
@@ -17607,10 +17818,12 @@
(vec_select:<ssehalfvecmode>
(match_operand:VI8F_256 1 "register_operand" "v")
(parallel [(const_int 2) (const_int 3)]))))]
- "TARGET_AVX"
+ "TARGET_AVX && <mask_avx512dq_condition>"
{
- if (TARGET_AVX512VL)
+ if (TARGET_AVX512DQ)
return "vinsert<shuffletype>64x2\t{$0x0, %2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2, 0x0}";
+ else if (TARGET_AVX512VL)
+ return "vinsert<shuffletype>32x4\t{$0x0, %2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2, 0x0}";
else
return "vinsert<i128>\t{$0x0, %2, %1, %0|%0, %1, %2, 0x0}";
}
@@ -17627,10 +17840,12 @@
(match_operand:VI8F_256 1 "register_operand" "v")
(parallel [(const_int 0) (const_int 1)]))
(match_operand:<ssehalfvecmode> 2 "nonimmediate_operand" "vm")))]
- "TARGET_AVX"
+ "TARGET_AVX && <mask_avx512dq_condition>"
{
- if (TARGET_AVX512VL)
+ if (TARGET_AVX512DQ)
return "vinsert<shuffletype>64x2\t{$0x1, %2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2, 0x1}";
+ else if (TARGET_AVX512VL)
+ return "vinsert<shuffletype>32x4\t{$0x1, %2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2, 0x1}";
else
return "vinsert<i128>\t{$0x1, %2, %1, %0|%0, %1, %2, 0x1}";
}
@@ -17683,47 +17898,51 @@
(set_attr "mode" "<sseinsnmode>")])
(define_insn "vec_set_lo_v16hi"
- [(set (match_operand:V16HI 0 "register_operand" "=x")
+ [(set (match_operand:V16HI 0 "register_operand" "=x,v")
(vec_concat:V16HI
- (match_operand:V8HI 2 "nonimmediate_operand" "xm")
+ (match_operand:V8HI 2 "nonimmediate_operand" "xm,vm")
(vec_select:V8HI
- (match_operand:V16HI 1 "register_operand" "x")
+ (match_operand:V16HI 1 "register_operand" "x,v")
(parallel [(const_int 8) (const_int 9)
(const_int 10) (const_int 11)
(const_int 12) (const_int 13)
(const_int 14) (const_int 15)]))))]
"TARGET_AVX"
- "vinsert%~128\t{$0x0, %2, %1, %0|%0, %1, %2, 0x0}"
+ "@
+ vinsert%~128\t{$0x0, %2, %1, %0|%0, %1, %2, 0x0}
+ vinserti32x4\t{$0x0, %2, %1, %0|%0, %1, %2, 0x0}"
[(set_attr "type" "sselog")
(set_attr "prefix_extra" "1")
(set_attr "length_immediate" "1")
- (set_attr "prefix" "vex")
+ (set_attr "prefix" "vex,evex")
(set_attr "mode" "OI")])
(define_insn "vec_set_hi_v16hi"
- [(set (match_operand:V16HI 0 "register_operand" "=x")
+ [(set (match_operand:V16HI 0 "register_operand" "=x,v")
(vec_concat:V16HI
(vec_select:V8HI
- (match_operand:V16HI 1 "register_operand" "x")
+ (match_operand:V16HI 1 "register_operand" "x,v")
(parallel [(const_int 0) (const_int 1)
(const_int 2) (const_int 3)
(const_int 4) (const_int 5)
(const_int 6) (const_int 7)]))
- (match_operand:V8HI 2 "nonimmediate_operand" "xm")))]
+ (match_operand:V8HI 2 "nonimmediate_operand" "xm,vm")))]
"TARGET_AVX"
- "vinsert%~128\t{$0x1, %2, %1, %0|%0, %1, %2, 0x1}"
+ "@
+ vinsert%~128\t{$0x1, %2, %1, %0|%0, %1, %2, 0x1}
+ vinserti32x4\t{$0x1, %2, %1, %0|%0, %1, %2, 0x1}"
[(set_attr "type" "sselog")
(set_attr "prefix_extra" "1")
(set_attr "length_immediate" "1")
- (set_attr "prefix" "vex")
+ (set_attr "prefix" "vex,evex")
(set_attr "mode" "OI")])
(define_insn "vec_set_lo_v32qi"
- [(set (match_operand:V32QI 0 "register_operand" "=x")
+ [(set (match_operand:V32QI 0 "register_operand" "=x,v")
(vec_concat:V32QI
- (match_operand:V16QI 2 "nonimmediate_operand" "xm")
+ (match_operand:V16QI 2 "nonimmediate_operand" "xm,v")
(vec_select:V16QI
- (match_operand:V32QI 1 "register_operand" "x")
+ (match_operand:V32QI 1 "register_operand" "x,v")
(parallel [(const_int 16) (const_int 17)
(const_int 18) (const_int 19)
(const_int 20) (const_int 21)
@@ -17733,18 +17952,20 @@
(const_int 28) (const_int 29)
(const_int 30) (const_int 31)]))))]
"TARGET_AVX"
- "vinsert%~128\t{$0x0, %2, %1, %0|%0, %1, %2, 0x0}"
+ "@
+ vinsert%~128\t{$0x0, %2, %1, %0|%0, %1, %2, 0x0}
+ vinserti32x4\t{$0x0, %2, %1, %0|%0, %1, %2, 0x0}"
[(set_attr "type" "sselog")
(set_attr "prefix_extra" "1")
(set_attr "length_immediate" "1")
- (set_attr "prefix" "vex")
+ (set_attr "prefix" "vex,evex")
(set_attr "mode" "OI")])
(define_insn "vec_set_hi_v32qi"
- [(set (match_operand:V32QI 0 "register_operand" "=x")
+ [(set (match_operand:V32QI 0 "register_operand" "=x,v")
(vec_concat:V32QI
(vec_select:V16QI
- (match_operand:V32QI 1 "register_operand" "x")
+ (match_operand:V32QI 1 "register_operand" "x,v")
(parallel [(const_int 0) (const_int 1)
(const_int 2) (const_int 3)
(const_int 4) (const_int 5)
@@ -17753,13 +17974,15 @@
(const_int 10) (const_int 11)
(const_int 12) (const_int 13)
(const_int 14) (const_int 15)]))
- (match_operand:V16QI 2 "nonimmediate_operand" "xm")))]
+ (match_operand:V16QI 2 "nonimmediate_operand" "xm,vm")))]
"TARGET_AVX"
- "vinsert%~128\t{$0x1, %2, %1, %0|%0, %1, %2, 0x1}"
+ "@
+ vinsert%~128\t{$0x1, %2, %1, %0|%0, %1, %2, 0x1}
+ vinserti32x4\t{$0x1, %2, %1, %0|%0, %1, %2, 0x1}"
[(set_attr "type" "sselog")
(set_attr "prefix_extra" "1")
(set_attr "length_immediate" "1")
- (set_attr "prefix" "vex")
+ (set_attr "prefix" "vex,evex")
(set_attr "mode" "OI")])
(define_insn "<avx_avx2>_maskload<ssemodesuffix><avxsizesuffix>"
@@ -18375,7 +18598,11 @@
UNSPEC_GATHER))
(clobber (match_scratch:QI 2 "=&Yk"))]
"TARGET_AVX512F"
- "v<sseintprefix>gatherq<ssemodesuffix>\t{%6, %1%{%2%}|%1%{%2%}, %g6}"
+{
+ if (GET_MODE_SIZE (GET_MODE_INNER (<MODE>mode)) == 4)
+ return "v<sseintprefix>gatherq<ssemodesuffix>\t{%6, %1%{%2%}|%1%{%2%}, %t6}";
+ return "v<sseintprefix>gatherq<ssemodesuffix>\t{%6, %1%{%2%}|%1%{%2%}, %g6}";
+}
[(set_attr "type" "ssemov")
(set_attr "prefix" "evex")
(set_attr "mode" "<sseinsnmode>")])
@@ -18475,7 +18702,11 @@
UNSPEC_SCATTER))
(clobber (match_scratch:QI 1 "=&Yk"))]
"TARGET_AVX512F"
- "v<sseintprefix>scatterq<ssemodesuffix>\t{%3, %5%{%1%}|%5%{%1%}, %3}"
+{
+ if (GET_MODE_SIZE (GET_MODE_INNER (<MODE>mode)) == 8)
+ return "v<sseintprefix>scatterq<ssemodesuffix>\t{%3, %5%{%1%}|%5%{%1%}, %3}";
+ return "v<sseintprefix>scatterq<ssemodesuffix>\t{%3, %5%{%1%}|%t5%{%1%}, %3}";
+}
[(set_attr "type" "ssemov")
(set_attr "prefix" "evex")
(set_attr "mode" "<sseinsnmode>")])