diff options
Diffstat (limited to 'gcc/config/i386/sse.md')
-rw-r--r-- | gcc/config/i386/sse.md | 1081 |
1 files changed, 656 insertions, 425 deletions
diff --git a/gcc/config/i386/sse.md b/gcc/config/i386/sse.md index 42d553cfdaa..745b6b665f5 100644 --- a/gcc/config/i386/sse.md +++ b/gcc/config/i386/sse.md @@ -778,6 +778,12 @@ (V64QI "i64x4") (V32QI "%~128") (V32HI "i64x4") (V16HI "%~128") (V16SI "i64x4") (V8SI "%~128") (V8DI "i64x4") (V4DI "%~128")]) +;; For 256-bit modes for TARGET_AVX512VL && TARGET_AVX512DQ +;; i32x4, f32x4, i64x2 or f64x2 suffixes. +(define_mode_attr i128vldq + [(V8SF "f32x4") (V4DF "f64x2") + (V32QI "i32x4") (V16HI "i32x4") (V8SI "i32x4") (V4DI "i64x2")]) + ;; Mix-n-match (define_mode_iterator AVX256MODE2P [V8SI V8SF V4DF]) (define_mode_iterator AVX512MODE2P [V16SI V16SF V8DF]) @@ -1076,10 +1082,10 @@ (set_attr "mode" "<sseinsnmode>")]) (define_insn "sse2_movq128" - [(set (match_operand:V2DI 0 "register_operand" "=x") + [(set (match_operand:V2DI 0 "register_operand" "=v") (vec_concat:V2DI (vec_select:DI - (match_operand:V2DI 1 "nonimmediate_operand" "xm") + (match_operand:V2DI 1 "nonimmediate_operand" "vm") (parallel [(const_int 0)])) (const_int 0)))] "TARGET_SSE2" @@ -1485,7 +1491,7 @@ (match_operand:VF_128 2 "register_operand" "v") (const_int 1)))] "TARGET_AVX512F" - "vrcp14<ssescalarmodesuffix>\t{%1, %2, %0|%0, %2, %1}" + "vrcp14<ssescalarmodesuffix>\t{%1, %2, %0|%0, %2, %<iptr>1}" [(set_attr "type" "sse") (set_attr "prefix" "evex") (set_attr "mode" "<MODE>")]) @@ -1583,7 +1589,7 @@ (match_operand:VF_128 2 "register_operand" "v") (const_int 1)))] "TARGET_AVX512F" - "vrsqrt14<ssescalarmodesuffix>\t{%1, %2, %0|%0, %2, %1}" + "vrsqrt14<ssescalarmodesuffix>\t{%1, %2, %0|%0, %2, %<iptr>1}" [(set_attr "type" "sse") (set_attr "prefix" "evex") (set_attr "mode" "<MODE>")]) @@ -2783,54 +2789,61 @@ ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; (define_insn "<sse>_andnot<mode>3<mask_name>" - [(set (match_operand:VF_128_256 0 "register_operand" "=x,v") + [(set (match_operand:VF_128_256 0 "register_operand" "=x,x,v,v") (and:VF_128_256 (not:VF_128_256 - (match_operand:VF_128_256 1 "register_operand" "0,v")) - (match_operand:VF_128_256 2 "vector_operand" "xBm,vm")))] + (match_operand:VF_128_256 1 "register_operand" "0,x,v,v")) + (match_operand:VF_128_256 2 "vector_operand" "xBm,xm,vm,vm")))] "TARGET_SSE && <mask_avx512vl_condition>" { static char buf[128]; const char *ops; const char *suffix; - switch (get_attr_mode (insn)) - { - case MODE_V8SF: - case MODE_V4SF: - suffix = "ps"; - break; - default: - suffix = "<ssemodesuffix>"; - } - switch (which_alternative) { case 0: ops = "andn%s\t{%%2, %%0|%%0, %%2}"; break; case 1: + case 2: + case 3: ops = "vandn%s\t{%%2, %%1, %%0<mask_operand3_1>|%%0<mask_operand3_1>, %%1, %%2}"; break; default: gcc_unreachable (); } - /* There is no vandnp[sd] in avx512f. Use vpandn[qd]. */ - if (<mask_applied> && !TARGET_AVX512DQ) + switch (get_attr_mode (insn)) { + case MODE_V8SF: + case MODE_V4SF: + suffix = "ps"; + break; + case MODE_OI: + case MODE_TI: + /* There is no vandnp[sd] in avx512f. Use vpandn[qd]. */ suffix = GET_MODE_INNER (<MODE>mode) == DFmode ? "q" : "d"; ops = "vpandn%s\t{%%2, %%1, %%0<mask_operand3_1>|%%0<mask_operand3_1>, %%1, %%2}"; + break; + default: + suffix = "<ssemodesuffix>"; } snprintf (buf, sizeof (buf), ops, suffix); return buf; } - [(set_attr "isa" "noavx,avx") + [(set_attr "isa" "noavx,avx,avx512dq,avx512f") (set_attr "type" "sselog") - (set_attr "prefix" "orig,maybe_evex") + (set_attr "prefix" "orig,maybe_vex,evex,evex") (set (attr "mode") - (cond [(and (match_test "<MODE_SIZE> == 16") + (cond [(and (match_test "<mask_applied>") + (and (eq_attr "alternative" "1") + (match_test "!TARGET_AVX512DQ"))) + (const_string "<sseintvecmode2>") + (eq_attr "alternative" "3") + (const_string "<sseintvecmode2>") + (and (match_test "<MODE_SIZE> == 16") (match_test "TARGET_SSE_PACKED_SINGLE_INSN_OPTIMAL")) (const_string "<ssePSmode>") (match_test "TARGET_AVX") @@ -2870,7 +2883,10 @@ } [(set_attr "type" "sselog") (set_attr "prefix" "evex") - (set_attr "mode" "<sseinsnmode>")]) + (set (attr "mode") + (if_then_else (match_test "TARGET_AVX512DQ") + (const_string "<sseinsnmode>") + (const_string "XI")))]) (define_expand "<code><mode>3<mask_name>" [(set (match_operand:VF_128_256 0 "register_operand") @@ -2889,10 +2905,10 @@ "ix86_fixup_binary_operands_no_copy (<CODE>, <MODE>mode, operands);") (define_insn "*<code><mode>3<mask_name>" - [(set (match_operand:VF_128_256 0 "register_operand" "=x,v") + [(set (match_operand:VF_128_256 0 "register_operand" "=x,x,v,v") (any_logic:VF_128_256 - (match_operand:VF_128_256 1 "vector_operand" "%0,v") - (match_operand:VF_128_256 2 "vector_operand" "xBm,vm")))] + (match_operand:VF_128_256 1 "vector_operand" "%0,x,v,v") + (match_operand:VF_128_256 2 "vector_operand" "xBm,xm,vm,vm")))] "TARGET_SSE && <mask_avx512vl_condition> && ix86_binary_operator_ok (<CODE>, <MODE>mode, operands)" { @@ -2900,43 +2916,50 @@ const char *ops; const char *suffix; - switch (get_attr_mode (insn)) - { - case MODE_V8SF: - case MODE_V4SF: - suffix = "ps"; - break; - default: - suffix = "<ssemodesuffix>"; - } - switch (which_alternative) { case 0: ops = "<logic>%s\t{%%2, %%0|%%0, %%2}"; break; case 1: + case 2: + case 3: ops = "v<logic>%s\t{%%2, %%1, %%0<mask_operand3_1>|%%0<mask_operand3_1>, %%1, %%2}"; break; default: gcc_unreachable (); } - /* There is no v<logic>p[sd] in avx512f. Use vp<logic>[dq]. */ - if (<mask_applied> && !TARGET_AVX512DQ) + switch (get_attr_mode (insn)) { + case MODE_V8SF: + case MODE_V4SF: + suffix = "ps"; + break; + case MODE_OI: + case MODE_TI: + /* There is no v<logic>p[sd] in avx512f. Use vp<logic>[qd]. */ suffix = GET_MODE_INNER (<MODE>mode) == DFmode ? "q" : "d"; ops = "vp<logic>%s\t{%%2, %%1, %%0<mask_operand3_1>|%%0<mask_operand3_1>, %%1, %%2}"; + break; + default: + suffix = "<ssemodesuffix>"; } snprintf (buf, sizeof (buf), ops, suffix); return buf; } - [(set_attr "isa" "noavx,avx") + [(set_attr "isa" "noavx,avx,avx512dq,avx512f") (set_attr "type" "sselog") - (set_attr "prefix" "orig,maybe_evex") + (set_attr "prefix" "orig,maybe_evex,evex,evex") (set (attr "mode") - (cond [(and (match_test "<MODE_SIZE> == 16") + (cond [(and (match_test "<mask_applied>") + (and (eq_attr "alternative" "1") + (match_test "!TARGET_AVX512DQ"))) + (const_string "<sseintvecmode2>") + (eq_attr "alternative" "3") + (const_string "<sseintvecmode2>") + (and (match_test "<MODE_SIZE> == 16") (match_test "TARGET_SSE_PACKED_SINGLE_INSN_OPTIMAL")) (const_string "<ssePSmode>") (match_test "TARGET_AVX") @@ -2961,7 +2984,7 @@ ops = ""; /* There is no v<logic>p[sd] in avx512f. Use vp<logic>[dq]. */ - if ((<MODE_SIZE> == 64 || <mask_applied>) && !TARGET_AVX512DQ) + if (!TARGET_AVX512DQ) { suffix = GET_MODE_INNER (<MODE>mode) == DFmode ? "q" : "d"; ops = "p"; @@ -2974,7 +2997,10 @@ } [(set_attr "type" "sselog") (set_attr "prefix" "evex") - (set_attr "mode" "<sseinsnmode>")]) + (set (attr "mode") + (if_then_else (match_test "TARGET_AVX512DQ") + (const_string "<sseinsnmode>") + (const_string "XI")))]) (define_expand "copysign<mode>3" [(set (match_dup 4) @@ -3000,14 +3026,14 @@ ;; because the native instructions read the full 128-bits. (define_insn "*andnot<mode>3" - [(set (match_operand:MODEF 0 "register_operand" "=x,x") + [(set (match_operand:MODEF 0 "register_operand" "=x,x,v,v") (and:MODEF (not:MODEF - (match_operand:MODEF 1 "register_operand" "0,x")) - (match_operand:MODEF 2 "register_operand" "x,x")))] + (match_operand:MODEF 1 "register_operand" "0,x,v,v")) + (match_operand:MODEF 2 "register_operand" "x,x,v,v")))] "SSE_FLOAT_MODE_P (<MODE>mode)" { - static char buf[32]; + static char buf[128]; const char *ops; const char *suffix = (get_attr_mode (insn) == MODE_V4SF) ? "ps" : "<ssevecmodesuffix>"; @@ -3020,6 +3046,24 @@ case 1: ops = "vandn%s\t{%%2, %%1, %%0|%%0, %%1, %%2}"; break; + case 2: + if (TARGET_AVX512DQ) + ops = "vandn%s\t{%%2, %%1, %%0|%%0, %%1, %%2}"; + else + { + suffix = <MODE>mode == DFmode ? "q" : "d"; + ops = "vpandn%s\t{%%2, %%1, %%0|%%0, %%1, %%2}"; + } + break; + case 3: + if (TARGET_AVX512DQ) + ops = "vandn%s\t{%%g2, %%g1, %%g0|%%g0, %%g1, %%g2}"; + else + { + suffix = <MODE>mode == DFmode ? "q" : "d"; + ops = "vpandn%s\t{%%g2, %%g1, %%g0|%%g0, %%g1, %%g2}"; + } + break; default: gcc_unreachable (); } @@ -3027,11 +3071,19 @@ snprintf (buf, sizeof (buf), ops, suffix); return buf; } - [(set_attr "isa" "noavx,avx") + [(set_attr "isa" "noavx,avx,avx512vl,avx512f") (set_attr "type" "sselog") - (set_attr "prefix" "orig,vex") + (set_attr "prefix" "orig,vex,evex,evex") (set (attr "mode") - (cond [(and (match_test "<MODE_SIZE> == 16") + (cond [(eq_attr "alternative" "2") + (if_then_else (match_test "TARGET_AVX512DQ") + (const_string "<ssevecmode>") + (const_string "TI")) + (eq_attr "alternative" "3") + (if_then_else (match_test "TARGET_AVX512DQ") + (const_string "<avx512fvecmode>") + (const_string "XI")) + (and (match_test "<MODE_SIZE> == 16") (match_test "TARGET_SSE_PACKED_SINGLE_INSN_OPTIMAL")) (const_string "V4SF") (match_test "TARGET_AVX") @@ -3042,16 +3094,17 @@ (const_string "<ssevecmode>")))]) (define_insn "*andnottf3" - [(set (match_operand:TF 0 "register_operand" "=x,x") + [(set (match_operand:TF 0 "register_operand" "=x,x,v,v") (and:TF - (not:TF (match_operand:TF 1 "register_operand" "0,x")) - (match_operand:TF 2 "vector_operand" "xBm,xm")))] + (not:TF (match_operand:TF 1 "register_operand" "0,x,v,v")) + (match_operand:TF 2 "vector_operand" "xBm,xm,vm,v")))] "TARGET_SSE" { - static char buf[32]; + static char buf[128]; const char *ops; const char *tmp - = (get_attr_mode (insn) == MODE_V4SF) ? "andnps" : "pandn"; + = (which_alternative >= 2 ? "pandnq" + : get_attr_mode (insn) == MODE_V4SF ? "andnps" : "pandn"); switch (which_alternative) { @@ -3059,8 +3112,12 @@ ops = "%s\t{%%2, %%0|%%0, %%2}"; break; case 1: + case 2: ops = "v%s\t{%%2, %%1, %%0|%%0, %%1, %%2}"; break; + case 3: + ops = "v%s\t{%%g2, %%g1, %%g0|%%g0, %%g1, %%g2}"; + break; default: gcc_unreachable (); } @@ -3068,7 +3125,7 @@ snprintf (buf, sizeof (buf), ops, tmp); return buf; } - [(set_attr "isa" "noavx,avx") + [(set_attr "isa" "noavx,avx,avx512vl,avx512f") (set_attr "type" "sselog") (set (attr "prefix_data16") (if_then_else @@ -3076,9 +3133,13 @@ (eq_attr "mode" "TI")) (const_string "1") (const_string "*"))) - (set_attr "prefix" "orig,vex") + (set_attr "prefix" "orig,vex,evex,evex") (set (attr "mode") - (cond [(match_test "TARGET_SSE_PACKED_SINGLE_INSN_OPTIMAL") + (cond [(eq_attr "alternative" "2") + (const_string "TI") + (eq_attr "alternative" "3") + (const_string "XI") + (match_test "TARGET_SSE_PACKED_SINGLE_INSN_OPTIMAL") (const_string "V4SF") (match_test "TARGET_AVX") (const_string "TI") @@ -3089,13 +3150,13 @@ (const_string "TI")))]) (define_insn "*<code><mode>3" - [(set (match_operand:MODEF 0 "register_operand" "=x,x") + [(set (match_operand:MODEF 0 "register_operand" "=x,x,v,v") (any_logic:MODEF - (match_operand:MODEF 1 "register_operand" "%0,x") - (match_operand:MODEF 2 "register_operand" "x,x")))] + (match_operand:MODEF 1 "register_operand" "%0,x,v,v") + (match_operand:MODEF 2 "register_operand" "x,x,v,v")))] "SSE_FLOAT_MODE_P (<MODE>mode)" { - static char buf[32]; + static char buf[128]; const char *ops; const char *suffix = (get_attr_mode (insn) == MODE_V4SF) ? "ps" : "<ssevecmodesuffix>"; @@ -3105,9 +3166,26 @@ case 0: ops = "<logic>%s\t{%%2, %%0|%%0, %%2}"; break; + case 2: + if (!TARGET_AVX512DQ) + { + suffix = <MODE>mode == DFmode ? "q" : "d"; + ops = "vp<logic>%s\t{%%2, %%1, %%0|%%0, %%1, %%2}"; + break; + } + /* FALLTHRU */ case 1: ops = "v<logic>%s\t{%%2, %%1, %%0|%%0, %%1, %%2}"; break; + case 3: + if (TARGET_AVX512DQ) + ops = "v<logic>%s\t{%%g2, %%g1, %%g0|%%g0, %%g1, %%g2}"; + else + { + suffix = <MODE>mode == DFmode ? "q" : "d"; + ops = "vp<logic>%s\t{%%g2, %%g1, %%g0|%%g0, %%g1, %%g2}"; + } + break; default: gcc_unreachable (); } @@ -3115,11 +3193,19 @@ snprintf (buf, sizeof (buf), ops, suffix); return buf; } - [(set_attr "isa" "noavx,avx") + [(set_attr "isa" "noavx,avx,avx512vl,avx512f") (set_attr "type" "sselog") - (set_attr "prefix" "orig,vex") + (set_attr "prefix" "orig,vex,evex,evex") (set (attr "mode") - (cond [(and (match_test "<MODE_SIZE> == 16") + (cond [(eq_attr "alternative" "2") + (if_then_else (match_test "TARGET_AVX512DQ") + (const_string "<ssevecmode>") + (const_string "TI")) + (eq_attr "alternative" "3") + (if_then_else (match_test "TARGET_AVX512DQ") + (const_string "<avx512fvecmode>") + (const_string "XI")) + (and (match_test "<MODE_SIZE> == 16") (match_test "TARGET_SSE_PACKED_SINGLE_INSN_OPTIMAL")) (const_string "V4SF") (match_test "TARGET_AVX") @@ -3138,17 +3224,18 @@ "ix86_fixup_binary_operands_no_copy (<CODE>, TFmode, operands);") (define_insn "*<code>tf3" - [(set (match_operand:TF 0 "register_operand" "=x,x") + [(set (match_operand:TF 0 "register_operand" "=x,x,v,v") (any_logic:TF - (match_operand:TF 1 "vector_operand" "%0,x") - (match_operand:TF 2 "vector_operand" "xBm,xm")))] + (match_operand:TF 1 "vector_operand" "%0,x,v,v") + (match_operand:TF 2 "vector_operand" "xBm,xm,vm,v")))] "TARGET_SSE && ix86_binary_operator_ok (<CODE>, TFmode, operands)" { - static char buf[32]; + static char buf[128]; const char *ops; const char *tmp - = (get_attr_mode (insn) == MODE_V4SF) ? "<logic>ps" : "p<logic>"; + = (which_alternative >= 2 ? "p<logic>q" + : get_attr_mode (insn) == MODE_V4SF ? "<logic>ps" : "p<logic>"); switch (which_alternative) { @@ -3156,8 +3243,12 @@ ops = "%s\t{%%2, %%0|%%0, %%2}"; break; case 1: + case 2: ops = "v%s\t{%%2, %%1, %%0|%%0, %%1, %%2}"; break; + case 3: + ops = "v%s\t{%%g2, %%g1, %%g0|%%g0, %%g1, %%g2}"; + break; default: gcc_unreachable (); } @@ -3165,7 +3256,7 @@ snprintf (buf, sizeof (buf), ops, tmp); return buf; } - [(set_attr "isa" "noavx,avx") + [(set_attr "isa" "noavx,avx,avx512vl,avx512f") (set_attr "type" "sselog") (set (attr "prefix_data16") (if_then_else @@ -3173,9 +3264,13 @@ (eq_attr "mode" "TI")) (const_string "1") (const_string "*"))) - (set_attr "prefix" "orig,vex") + (set_attr "prefix" "orig,vex,evex,evex") (set (attr "mode") - (cond [(match_test "TARGET_SSE_PACKED_SINGLE_INSN_OPTIMAL") + (cond [(eq_attr "alternative" "2") + (const_string "TI") + (eq_attr "alternative" "3") + (const_string "QI") + (match_test "TARGET_SSE_PACKED_SINGLE_INSN_OPTIMAL") (const_string "V4SF") (match_test "TARGET_AVX") (const_string "TI") @@ -3327,10 +3422,10 @@ (set_attr "mode" "<MODE>")]) (define_insn "<avx512>_fmadd_<mode>_mask3<round_name>" - [(set (match_operand:VF_AVX512VL 0 "register_operand" "=x") + [(set (match_operand:VF_AVX512VL 0 "register_operand" "=v") (vec_merge:VF_AVX512VL (fma:VF_AVX512VL - (match_operand:VF_AVX512VL 1 "register_operand" "x") + (match_operand:VF_AVX512VL 1 "register_operand" "v") (match_operand:VF_AVX512VL 2 "nonimmediate_operand" "<round_constraint>") (match_operand:VF_AVX512VL 3 "register_operand" "0")) (match_dup 3) @@ -4187,7 +4282,7 @@ (parallel [(const_int 0) (const_int 1)]))] UNSPEC_FIX_NOTRUNC))] "TARGET_AVX512DQ && TARGET_AVX512VL" - "vcvtps2qq\t{%1, %0<mask_operand2>|%0<mask_operand2>, %1}" + "vcvtps2qq\t{%1, %0<mask_operand2>|%0<mask_operand2>, %q1}" [(set_attr "type" "ssecvt") (set_attr "prefix" "evex") (set_attr "mode" "TI")]) @@ -4210,7 +4305,7 @@ (parallel [(const_int 0) (const_int 1)]))] UNSPEC_UNSIGNED_FIX_NOTRUNC))] "TARGET_AVX512DQ && TARGET_AVX512VL" - "vcvtps2uqq\t{%1, %0<mask_operand2>|%0<mask_operand2>, %1}" + "vcvtps2uqq\t{%1, %0<mask_operand2>|%0<mask_operand2>, %q1}" [(set_attr "type" "ssecvt") (set_attr "prefix" "evex") (set_attr "mode" "TI")]) @@ -4735,9 +4830,9 @@ "operands[2] = CONST0_RTX (V4SImode);") (define_insn "*avx_cvtpd2dq256_2" - [(set (match_operand:V8SI 0 "register_operand" "=x") + [(set (match_operand:V8SI 0 "register_operand" "=v") (vec_concat:V8SI - (unspec:V4SI [(match_operand:V4DF 1 "nonimmediate_operand" "xm")] + (unspec:V4SI [(match_operand:V4DF 1 "nonimmediate_operand" "vm")] UNSPEC_FIX_NOTRUNC) (match_operand:V4SI 2 "const0_operand")))] "TARGET_AVX" @@ -4885,7 +4980,7 @@ (match_operand:V4SF 1 "nonimmediate_operand" "vm") (parallel [(const_int 0) (const_int 1)]))))] "TARGET_AVX512DQ && TARGET_AVX512VL" - "vcvttps2<fixsuffix>qq\t{%1, %0<mask_operand2>|%0<mask_operand2>, %1}" + "vcvttps2<fixsuffix>qq\t{%1, %0<mask_operand2>|%0<mask_operand2>, %q1}" [(set_attr "type" "ssecvt") (set_attr "prefix" "evex") (set_attr "mode" "TI")]) @@ -4949,6 +5044,27 @@ (set_attr "prefix" "orig,orig,<round_prefix>") (set_attr "mode" "SF")]) +(define_insn "*sse2_vd_cvtsd2ss" + [(set (match_operand:V4SF 0 "register_operand" "=x,x,v") + (vec_merge:V4SF + (vec_duplicate:V4SF + (float_truncate:SF (match_operand:DF 2 "nonimmediate_operand" "x,m,vm"))) + (match_operand:V4SF 1 "register_operand" "0,0,v") + (const_int 1)))] + "TARGET_SSE2" + "@ + cvtsd2ss\t{%2, %0|%0, %2} + cvtsd2ss\t{%2, %0|%0, %2} + vcvtsd2ss\t{%2, %1, %0|%0, %1, %2}" + [(set_attr "isa" "noavx,noavx,avx") + (set_attr "type" "ssecvt") + (set_attr "athlon_decode" "vector,double,*") + (set_attr "amdfam10_decode" "vector,double,*") + (set_attr "bdver1_decode" "direct,direct,*") + (set_attr "btver2_decode" "double,double,double") + (set_attr "prefix" "orig,orig,vex") + (set_attr "mode" "SF")]) + (define_insn "sse2_cvtss2sd<round_saeonly_name>" [(set (match_operand:V2DF 0 "register_operand" "=x,x,v") (vec_merge:V2DF @@ -4972,6 +5088,27 @@ (set_attr "prefix" "orig,orig,<round_saeonly_prefix>") (set_attr "mode" "DF")]) +(define_insn "*sse2_vd_cvtss2sd" + [(set (match_operand:V2DF 0 "register_operand" "=x,x,v") + (vec_merge:V2DF + (vec_duplicate:V2DF + (float_extend:DF (match_operand:SF 2 "nonimmediate_operand" "x,m,vm"))) + (match_operand:V2DF 1 "register_operand" "0,0,v") + (const_int 1)))] + "TARGET_SSE2" + "@ + cvtss2sd\t{%2, %0|%0, %2} + cvtss2sd\t{%2, %0|%0, %2} + vcvtss2sd\t{%2, %1, %0|%0, %1, %2}" + [(set_attr "isa" "noavx,noavx,avx") + (set_attr "type" "ssecvt") + (set_attr "amdfam10_decode" "vector,double,*") + (set_attr "athlon_decode" "direct,direct,*") + (set_attr "bdver1_decode" "direct,direct,*") + (set_attr "btver2_decode" "double,double,double") + (set_attr "prefix" "orig,orig,vex") + (set_attr "mode" "DF")]) + (define_insn "<mask_codefor>avx512f_cvtpd2ps512<mask_name><round_name>" [(set (match_operand:V8SF 0 "register_operand" "=v") (float_truncate:V8SF @@ -5050,10 +5187,10 @@ (set_attr "mode" "<MODE>")]) (define_insn "*avx_cvtps2pd256_2" - [(set (match_operand:V4DF 0 "register_operand" "=x") + [(set (match_operand:V4DF 0 "register_operand" "=v") (float_extend:V4DF (vec_select:V4SF - (match_operand:V8SF 1 "nonimmediate_operand" "xm") + (match_operand:V8SF 1 "nonimmediate_operand" "vm") (parallel [(const_int 0) (const_int 1) (const_int 2) (const_int 3)]))))] "TARGET_AVX" @@ -5744,11 +5881,11 @@ }) (define_insn "sse_movhlps" - [(set (match_operand:V4SF 0 "nonimmediate_operand" "=x,x,x,x,m") + [(set (match_operand:V4SF 0 "nonimmediate_operand" "=x,v,x,v,m") (vec_select:V4SF (vec_concat:V8SF - (match_operand:V4SF 1 "nonimmediate_operand" " 0,x,0,x,0") - (match_operand:V4SF 2 "nonimmediate_operand" " x,x,o,o,x")) + (match_operand:V4SF 1 "nonimmediate_operand" " 0,v,0,v,0") + (match_operand:V4SF 2 "nonimmediate_operand" " x,v,o,o,v")) (parallel [(const_int 6) (const_int 7) (const_int 2) @@ -5762,7 +5899,7 @@ %vmovhps\t{%2, %0|%q0, %2}" [(set_attr "isa" "noavx,avx,noavx,avx,*") (set_attr "type" "ssemov") - (set_attr "prefix" "orig,vex,orig,vex,maybe_vex") + (set_attr "prefix" "orig,maybe_evex,orig,maybe_evex,maybe_vex") (set_attr "mode" "V4SF,V4SF,V2SF,V2SF,V2SF")]) (define_expand "sse_movlhps_exp" @@ -5789,11 +5926,11 @@ }) (define_insn "sse_movlhps" - [(set (match_operand:V4SF 0 "nonimmediate_operand" "=x,x,x,x,o") + [(set (match_operand:V4SF 0 "nonimmediate_operand" "=x,v,x,v,o") (vec_select:V4SF (vec_concat:V8SF - (match_operand:V4SF 1 "nonimmediate_operand" " 0,x,0,x,0") - (match_operand:V4SF 2 "nonimmediate_operand" " x,x,m,m,x")) + (match_operand:V4SF 1 "nonimmediate_operand" " 0,v,0,v,0") + (match_operand:V4SF 2 "nonimmediate_operand" " x,v,m,v,v")) (parallel [(const_int 0) (const_int 1) (const_int 4) @@ -5807,7 +5944,7 @@ %vmovlps\t{%2, %H0|%H0, %2}" [(set_attr "isa" "noavx,avx,noavx,avx,*") (set_attr "type" "ssemov") - (set_attr "prefix" "orig,vex,orig,vex,maybe_vex") + (set_attr "prefix" "orig,maybe_evex,orig,maybe_evex,maybe_vex") (set_attr "mode" "V4SF,V4SF,V2SF,V2SF,V2SF")]) (define_insn "<mask_codefor>avx512f_unpckhps512<mask_name>" @@ -5851,8 +5988,8 @@ [(set (match_dup 3) (vec_select:V8SF (vec_concat:V16SF - (match_operand:V8SF 1 "register_operand" "x") - (match_operand:V8SF 2 "nonimmediate_operand" "xm")) + (match_operand:V8SF 1 "register_operand") + (match_operand:V8SF 2 "nonimmediate_operand")) (parallel [(const_int 0) (const_int 8) (const_int 1) (const_int 9) (const_int 4) (const_int 12) @@ -5956,8 +6093,8 @@ [(set (match_dup 3) (vec_select:V8SF (vec_concat:V16SF - (match_operand:V8SF 1 "register_operand" "x") - (match_operand:V8SF 2 "nonimmediate_operand" "xm")) + (match_operand:V8SF 1 "register_operand") + (match_operand:V8SF 2 "nonimmediate_operand")) (parallel [(const_int 0) (const_int 8) (const_int 1) (const_int 9) (const_int 4) (const_int 12) @@ -5987,11 +6124,11 @@ }) (define_insn "vec_interleave_lowv4sf" - [(set (match_operand:V4SF 0 "register_operand" "=x,x") + [(set (match_operand:V4SF 0 "register_operand" "=x,v") (vec_select:V4SF (vec_concat:V8SF - (match_operand:V4SF 1 "register_operand" "0,x") - (match_operand:V4SF 2 "vector_operand" "xBm,xm")) + (match_operand:V4SF 1 "register_operand" "0,v") + (match_operand:V4SF 2 "vector_operand" "xBm,vm")) (parallel [(const_int 0) (const_int 4) (const_int 1) (const_int 5)])))] "TARGET_SSE" @@ -6000,7 +6137,7 @@ vunpcklps\t{%2, %1, %0|%0, %1, %2}" [(set_attr "isa" "noavx,avx") (set_attr "type" "sselog") - (set_attr "prefix" "orig,vex") + (set_attr "prefix" "orig,maybe_evex") (set_attr "mode" "V4SF")]) ;; These are modeled with the same vec_concat as the others so that we @@ -6219,11 +6356,11 @@ (set_attr "mode" "V4SF")]) (define_insn "sse_shufps_<mode>" - [(set (match_operand:VI4F_128 0 "register_operand" "=x,x") + [(set (match_operand:VI4F_128 0 "register_operand" "=x,v") (vec_select:VI4F_128 (vec_concat:<ssedoublevecmode> - (match_operand:VI4F_128 1 "register_operand" "0,x") - (match_operand:VI4F_128 2 "vector_operand" "xBm,xm")) + (match_operand:VI4F_128 1 "register_operand" "0,v") + (match_operand:VI4F_128 2 "vector_operand" "xBm,vm")) (parallel [(match_operand 3 "const_0_to_3_operand") (match_operand 4 "const_0_to_3_operand") (match_operand 5 "const_4_to_7_operand") @@ -6250,13 +6387,13 @@ [(set_attr "isa" "noavx,avx") (set_attr "type" "sseshuf") (set_attr "length_immediate" "1") - (set_attr "prefix" "orig,vex") + (set_attr "prefix" "orig,maybe_evex") (set_attr "mode" "V4SF")]) (define_insn "sse_storehps" - [(set (match_operand:V2SF 0 "nonimmediate_operand" "=m,x,x") + [(set (match_operand:V2SF 0 "nonimmediate_operand" "=m,v,v") (vec_select:V2SF - (match_operand:V4SF 1 "nonimmediate_operand" "x,x,o") + (match_operand:V4SF 1 "nonimmediate_operand" "v,v,o") (parallel [(const_int 2) (const_int 3)])))] "TARGET_SSE" "@ @@ -6288,12 +6425,12 @@ }) (define_insn "sse_loadhps" - [(set (match_operand:V4SF 0 "nonimmediate_operand" "=x,x,x,x,o") + [(set (match_operand:V4SF 0 "nonimmediate_operand" "=x,v,x,v,o") (vec_concat:V4SF (vec_select:V2SF - (match_operand:V4SF 1 "nonimmediate_operand" " 0,x,0,x,0") + (match_operand:V4SF 1 "nonimmediate_operand" " 0,v,0,v,0") (parallel [(const_int 0) (const_int 1)])) - (match_operand:V2SF 2 "nonimmediate_operand" " m,m,x,x,x")))] + (match_operand:V2SF 2 "nonimmediate_operand" " m,m,x,v,v")))] "TARGET_SSE" "@ movhps\t{%2, %0|%0, %q2} @@ -6303,13 +6440,13 @@ %vmovlps\t{%2, %H0|%H0, %2}" [(set_attr "isa" "noavx,avx,noavx,avx,*") (set_attr "type" "ssemov") - (set_attr "prefix" "orig,vex,orig,vex,maybe_vex") + (set_attr "prefix" "orig,maybe_evex,orig,maybe_evex,maybe_vex") (set_attr "mode" "V2SF,V2SF,V4SF,V4SF,V2SF")]) (define_insn "sse_storelps" - [(set (match_operand:V2SF 0 "nonimmediate_operand" "=m,x,x") + [(set (match_operand:V2SF 0 "nonimmediate_operand" "=m,v,v") (vec_select:V2SF - (match_operand:V4SF 1 "nonimmediate_operand" " x,x,m") + (match_operand:V4SF 1 "nonimmediate_operand" " v,v,m") (parallel [(const_int 0) (const_int 1)])))] "TARGET_SSE" "@ @@ -6341,11 +6478,11 @@ }) (define_insn "sse_loadlps" - [(set (match_operand:V4SF 0 "nonimmediate_operand" "=x,x,x,x,m") + [(set (match_operand:V4SF 0 "nonimmediate_operand" "=x,v,x,v,m") (vec_concat:V4SF - (match_operand:V2SF 2 "nonimmediate_operand" " 0,x,m,m,x") + (match_operand:V2SF 2 "nonimmediate_operand" " 0,v,m,m,v") (vec_select:V2SF - (match_operand:V4SF 1 "nonimmediate_operand" " x,x,0,x,0") + (match_operand:V4SF 1 "nonimmediate_operand" " x,v,0,v,0") (parallel [(const_int 2) (const_int 3)]))))] "TARGET_SSE" "@ @@ -6357,14 +6494,14 @@ [(set_attr "isa" "noavx,avx,noavx,avx,*") (set_attr "type" "sseshuf,sseshuf,ssemov,ssemov,ssemov") (set_attr "length_immediate" "1,1,*,*,*") - (set_attr "prefix" "orig,vex,orig,vex,maybe_vex") + (set_attr "prefix" "orig,maybe_evex,orig,maybe_evex,maybe_vex") (set_attr "mode" "V4SF,V4SF,V2SF,V2SF,V2SF")]) (define_insn "sse_movss" - [(set (match_operand:V4SF 0 "register_operand" "=x,x") + [(set (match_operand:V4SF 0 "register_operand" "=x,v") (vec_merge:V4SF - (match_operand:V4SF 2 "register_operand" " x,x") - (match_operand:V4SF 1 "register_operand" " 0,x") + (match_operand:V4SF 2 "register_operand" " x,v") + (match_operand:V4SF 1 "register_operand" " 0,v") (const_int 1)))] "TARGET_SSE" "@ @@ -6372,31 +6509,31 @@ vmovss\t{%2, %1, %0|%0, %1, %2}" [(set_attr "isa" "noavx,avx") (set_attr "type" "ssemov") - (set_attr "prefix" "orig,vex") + (set_attr "prefix" "orig,maybe_evex") (set_attr "mode" "SF")]) (define_insn "avx2_vec_dup<mode>" - [(set (match_operand:VF1_128_256 0 "register_operand" "=x") + [(set (match_operand:VF1_128_256 0 "register_operand" "=v") (vec_duplicate:VF1_128_256 (vec_select:SF - (match_operand:V4SF 1 "register_operand" "x") + (match_operand:V4SF 1 "register_operand" "v") (parallel [(const_int 0)]))))] "TARGET_AVX2" "vbroadcastss\t{%1, %0|%0, %1}" [(set_attr "type" "sselog1") - (set_attr "prefix" "vex") + (set_attr "prefix" "maybe_evex") (set_attr "mode" "<MODE>")]) (define_insn "avx2_vec_dupv8sf_1" - [(set (match_operand:V8SF 0 "register_operand" "=x") + [(set (match_operand:V8SF 0 "register_operand" "=v") (vec_duplicate:V8SF (vec_select:SF - (match_operand:V8SF 1 "register_operand" "x") + (match_operand:V8SF 1 "register_operand" "v") (parallel [(const_int 0)]))))] "TARGET_AVX2" "vbroadcastss\t{%x1, %0|%0, %x1}" [(set_attr "type" "sselog1") - (set_attr "prefix" "vex") + (set_attr "prefix" "maybe_evex") (set_attr "mode" "V8SF")]) (define_insn "avx512f_vec_dup<mode>_1" @@ -6415,12 +6552,12 @@ ;; unpcklps with register source since it is shorter. (define_insn "*vec_concatv2sf_sse4_1" [(set (match_operand:V2SF 0 "register_operand" - "=Yr,*x,x,Yr,*x,x,x,*y ,*y") + "=Yr,*x, v,Yr,*x,v,v,*y ,*y") (vec_concat:V2SF (match_operand:SF 1 "nonimmediate_operand" - " 0, 0,x, 0,0, x,m, 0 , m") + " 0, 0,Yv, 0,0, v,m, 0 , m") (match_operand:SF 2 "vector_move_operand" - " Yr,*x,x, m,m, m,C,*ym, C")))] + " Yr,*x,Yv, m,m, m,C,*ym, C")))] "TARGET_SSE4_1 && !(MEM_P (operands[1]) && MEM_P (operands[2]))" "@ unpcklps\t{%2, %0|%0, %2} @@ -6437,7 +6574,7 @@ (set_attr "prefix_data16" "*,*,*,1,1,*,*,*,*") (set_attr "prefix_extra" "*,*,*,1,1,1,*,*,*") (set_attr "length_immediate" "*,*,*,1,1,1,*,*,*") - (set_attr "prefix" "orig,orig,vex,orig,orig,vex,maybe_vex,orig,orig") + (set_attr "prefix" "orig,orig,maybe_evex,orig,orig,maybe_evex,maybe_vex,orig,orig") (set_attr "mode" "V4SF,V4SF,V4SF,V4SF,V4SF,V4SF,SF,DI,DI")]) ;; ??? In theory we can match memory for the MMX alternative, but allowing @@ -6458,10 +6595,10 @@ (set_attr "mode" "V4SF,SF,DI,DI")]) (define_insn "*vec_concatv4sf" - [(set (match_operand:V4SF 0 "register_operand" "=x,x,x,x") + [(set (match_operand:V4SF 0 "register_operand" "=x,v,x,v") (vec_concat:V4SF - (match_operand:V2SF 1 "register_operand" " 0,x,0,x") - (match_operand:V2SF 2 "nonimmediate_operand" " x,x,m,m")))] + (match_operand:V2SF 1 "register_operand" " 0,v,0,v") + (match_operand:V2SF 2 "nonimmediate_operand" " x,v,m,m")))] "TARGET_SSE" "@ movlhps\t{%2, %0|%0, %2} @@ -6470,7 +6607,7 @@ vmovhps\t{%2, %1, %0|%0, %1, %q2}" [(set_attr "isa" "noavx,avx,noavx,avx") (set_attr "type" "ssemov") - (set_attr "prefix" "orig,vex,orig,vex") + (set_attr "prefix" "orig,maybe_evex,orig,maybe_evex") (set_attr "mode" "V4SF,V4SF,V2SF,V2SF")]) (define_expand "vec_init<mode>" @@ -6526,11 +6663,11 @@ ;; A subset is vec_setv4sf. (define_insn "*vec_setv4sf_sse4_1" - [(set (match_operand:V4SF 0 "register_operand" "=Yr,*x,x") + [(set (match_operand:V4SF 0 "register_operand" "=Yr,*x,v") (vec_merge:V4SF (vec_duplicate:V4SF - (match_operand:SF 2 "nonimmediate_operand" "Yrm,*xm,xm")) - (match_operand:V4SF 1 "register_operand" "0,0,x") + (match_operand:SF 2 "nonimmediate_operand" "Yrm,*xm,vm")) + (match_operand:V4SF 1 "register_operand" "0,0,v") (match_operand:SI 3 "const_int_operand")))] "TARGET_SSE4_1 && ((unsigned) exact_log2 (INTVAL (operands[3])) @@ -6553,13 +6690,13 @@ (set_attr "prefix_data16" "1,1,*") (set_attr "prefix_extra" "1") (set_attr "length_immediate" "1") - (set_attr "prefix" "orig,orig,vex") + (set_attr "prefix" "orig,orig,maybe_evex") (set_attr "mode" "V4SF")]) (define_insn "sse4_1_insertps" - [(set (match_operand:V4SF 0 "register_operand" "=Yr,*x,x") - (unspec:V4SF [(match_operand:V4SF 2 "nonimmediate_operand" "Yrm,*xm,xm") - (match_operand:V4SF 1 "register_operand" "0,0,x") + [(set (match_operand:V4SF 0 "register_operand" "=Yr,*x,v") + (unspec:V4SF [(match_operand:V4SF 2 "nonimmediate_operand" "Yrm,*xm,vm") + (match_operand:V4SF 1 "register_operand" "0,0,v") (match_operand:SI 3 "const_0_to_255_operand" "n,n,n")] UNSPEC_INSERTPS))] "TARGET_SSE4_1" @@ -6587,7 +6724,7 @@ (set_attr "prefix_data16" "1,1,*") (set_attr "prefix_extra" "1") (set_attr "length_immediate" "1") - (set_attr "prefix" "orig,orig,vex") + (set_attr "prefix" "orig,orig,maybe_evex") (set_attr "mode" "V4SF")]) (define_split @@ -6613,9 +6750,9 @@ }) (define_insn_and_split "*vec_extractv4sf_0" - [(set (match_operand:SF 0 "nonimmediate_operand" "=x,m,f,r") + [(set (match_operand:SF 0 "nonimmediate_operand" "=v,m,f,r") (vec_select:SF - (match_operand:V4SF 1 "nonimmediate_operand" "xm,x,m,m") + (match_operand:V4SF 1 "nonimmediate_operand" "vm,v,m,m") (parallel [(const_int 0)])))] "TARGET_SSE && !(MEM_P (operands[0]) && MEM_P (operands[1]))" "#" @@ -6624,9 +6761,9 @@ "operands[1] = gen_lowpart (SFmode, operands[1]);") (define_insn_and_split "*sse4_1_extractps" - [(set (match_operand:SF 0 "nonimmediate_operand" "=rm,rm,x,x") + [(set (match_operand:SF 0 "nonimmediate_operand" "=rm,rm,v,v") (vec_select:SF - (match_operand:V4SF 1 "register_operand" "Yr,*x,0,x") + (match_operand:V4SF 1 "register_operand" "Yr,*v,0,v") (parallel [(match_operand:SI 2 "const_0_to_3_operand" "n,n,n,n")])))] "TARGET_SSE4_1" "@ @@ -6665,7 +6802,7 @@ (set_attr "mode" "V4SF,V4SF,*,*")]) (define_insn_and_split "*vec_extractv4sf_mem" - [(set (match_operand:SF 0 "register_operand" "=x,*r,f") + [(set (match_operand:SF 0 "register_operand" "=v,*r,f") (vec_select:SF (match_operand:V4SF 1 "memory_operand" "o,o,o") (parallel [(match_operand 2 "const_0_to_3_operand" "n,n,n")])))] @@ -7239,9 +7376,9 @@ (set_attr "mode" "XI")]) (define_insn_and_split "vec_extract_lo_v16hi" - [(set (match_operand:V8HI 0 "nonimmediate_operand" "=x,m") + [(set (match_operand:V8HI 0 "nonimmediate_operand" "=v,m") (vec_select:V8HI - (match_operand:V16HI 1 "nonimmediate_operand" "xm,x") + (match_operand:V16HI 1 "nonimmediate_operand" "vm,v") (parallel [(const_int 0) (const_int 1) (const_int 2) (const_int 3) (const_int 4) (const_int 5) @@ -7253,20 +7390,27 @@ "operands[1] = gen_lowpart (V8HImode, operands[1]);") (define_insn "vec_extract_hi_v16hi" - [(set (match_operand:V8HI 0 "nonimmediate_operand" "=x,m") + [(set (match_operand:V8HI 0 "nonimmediate_operand" "=x,m,v,m,v,m") (vec_select:V8HI - (match_operand:V16HI 1 "register_operand" "x,x") + (match_operand:V16HI 1 "register_operand" "x,x,v,v,v,v") (parallel [(const_int 8) (const_int 9) (const_int 10) (const_int 11) (const_int 12) (const_int 13) (const_int 14) (const_int 15)])))] "TARGET_AVX" - "vextract%~128\t{$0x1, %1, %0|%0, %1, 0x1}" + "@ + vextract%~128\t{$0x1, %1, %0|%0, %1, 0x1} + vextract%~128\t{$0x1, %1, %0|%0, %1, 0x1} + vextracti32x4\t{$0x1, %1, %0|%0, %1, 0x1} + vextracti32x4\t{$0x1, %1, %0|%0, %1, 0x1} + vextracti32x4\t{$0x1, %g1, %0|%0, %g1, 0x1} + vextracti32x4\t{$0x1, %g1, %0|%0, %g1, 0x1}" [(set_attr "type" "sselog") (set_attr "prefix_extra" "1") (set_attr "length_immediate" "1") - (set_attr "memory" "none,store") - (set_attr "prefix" "vex") + (set_attr "isa" "*,*,avx512dq,avx512dq,avx512f,avx512f") + (set_attr "memory" "none,store,none,store,none,store") + (set_attr "prefix" "vex,vex,evex,evex,evex,evex") (set_attr "mode" "OI")]) (define_insn_and_split "vec_extract_lo_v64qi" @@ -7325,9 +7469,9 @@ (set_attr "mode" "XI")]) (define_insn_and_split "vec_extract_lo_v32qi" - [(set (match_operand:V16QI 0 "nonimmediate_operand" "=x,m") + [(set (match_operand:V16QI 0 "nonimmediate_operand" "=v,m") (vec_select:V16QI - (match_operand:V32QI 1 "nonimmediate_operand" "xm,x") + (match_operand:V32QI 1 "nonimmediate_operand" "vm,v") (parallel [(const_int 0) (const_int 1) (const_int 2) (const_int 3) (const_int 4) (const_int 5) @@ -7343,9 +7487,9 @@ "operands[1] = gen_lowpart (V16QImode, operands[1]);") (define_insn "vec_extract_hi_v32qi" - [(set (match_operand:V16QI 0 "nonimmediate_operand" "=x,m") + [(set (match_operand:V16QI 0 "nonimmediate_operand" "=x,m,v,m,v,m") (vec_select:V16QI - (match_operand:V32QI 1 "register_operand" "x,x") + (match_operand:V32QI 1 "register_operand" "x,x,v,v,v,v") (parallel [(const_int 16) (const_int 17) (const_int 18) (const_int 19) (const_int 20) (const_int 21) @@ -7355,12 +7499,19 @@ (const_int 28) (const_int 29) (const_int 30) (const_int 31)])))] "TARGET_AVX" - "vextract%~128\t{$0x1, %1, %0|%0, %1, 0x1}" + "@ + vextract%~128\t{$0x1, %1, %0|%0, %1, 0x1} + vextract%~128\t{$0x1, %1, %0|%0, %1, 0x1} + vextracti32x4\t{$0x1, %1, %0|%0, %1, 0x1} + vextracti32x4\t{$0x1, %1, %0|%0, %1, 0x1} + vextracti32x4\t{$0x1, %g1, %0|%0, %g1, 0x1} + vextracti32x4\t{$0x1, %g1, %0|%0, %g1, 0x1}" [(set_attr "type" "sselog") (set_attr "prefix_extra" "1") (set_attr "length_immediate" "1") - (set_attr "memory" "none,store") - (set_attr "prefix" "vex") + (set_attr "isa" "*,*,avx512dq,avx512dq,avx512f,avx512f") + (set_attr "memory" "none,store,none,store,none,store") + (set_attr "prefix" "vex,vex,evex,evex,evex,evex") (set_attr "mode" "OI")]) ;; Modes handled by vec_extract patterns. @@ -7424,8 +7575,8 @@ [(set (match_dup 3) (vec_select:V4DF (vec_concat:V8DF - (match_operand:V4DF 1 "register_operand" "x") - (match_operand:V4DF 2 "nonimmediate_operand" "xm")) + (match_operand:V4DF 1 "register_operand") + (match_operand:V4DF 2 "nonimmediate_operand")) (parallel [(const_int 0) (const_int 4) (const_int 2) (const_int 6)]))) (set (match_dup 4) @@ -7480,11 +7631,11 @@ }) (define_insn "*vec_interleave_highv2df" - [(set (match_operand:V2DF 0 "nonimmediate_operand" "=x,x,x,x,x,m") + [(set (match_operand:V2DF 0 "nonimmediate_operand" "=x,v,v,x,v,m") (vec_select:V2DF (vec_concat:V4DF - (match_operand:V2DF 1 "nonimmediate_operand" " 0,x,o,o,o,x") - (match_operand:V2DF 2 "nonimmediate_operand" " x,x,1,0,x,0")) + (match_operand:V2DF 1 "nonimmediate_operand" " 0,v,o,o,o,v") + (match_operand:V2DF 2 "nonimmediate_operand" " x,v,1,0,v,0")) (parallel [(const_int 1) (const_int 3)])))] "TARGET_SSE2 && ix86_vec_interleave_v2df_operator_ok (operands, 1)" @@ -7498,7 +7649,7 @@ [(set_attr "isa" "noavx,avx,sse3,noavx,avx,*") (set_attr "type" "sselog,sselog,sselog,ssemov,ssemov,ssemov") (set_attr "prefix_data16" "*,*,*,1,*,1") - (set_attr "prefix" "orig,vex,maybe_vex,orig,vex,maybe_vex") + (set_attr "prefix" "orig,maybe_evex,maybe_vex,orig,maybe_evex,maybe_vex") (set_attr "mode" "V2DF,V2DF,DF,V1DF,V1DF,V1DF")]) (define_expand "avx512f_movddup512<mask_name>" @@ -7584,8 +7735,8 @@ [(set (match_dup 3) (vec_select:V4DF (vec_concat:V8DF - (match_operand:V4DF 1 "register_operand" "x") - (match_operand:V4DF 2 "nonimmediate_operand" "xm")) + (match_operand:V4DF 1 "register_operand") + (match_operand:V4DF 2 "nonimmediate_operand")) (parallel [(const_int 0) (const_int 4) (const_int 2) (const_int 6)]))) (set (match_dup 4) @@ -7639,11 +7790,11 @@ }) (define_insn "*vec_interleave_lowv2df" - [(set (match_operand:V2DF 0 "nonimmediate_operand" "=x,x,x,x,x,o") + [(set (match_operand:V2DF 0 "nonimmediate_operand" "=x,v,v,x,v,o") (vec_select:V2DF (vec_concat:V4DF - (match_operand:V2DF 1 "nonimmediate_operand" " 0,x,m,0,x,0") - (match_operand:V2DF 2 "nonimmediate_operand" " x,x,1,m,m,x")) + (match_operand:V2DF 1 "nonimmediate_operand" " 0,v,m,0,v,0") + (match_operand:V2DF 2 "nonimmediate_operand" " x,v,1,m,m,v")) (parallel [(const_int 0) (const_int 2)])))] "TARGET_SSE2 && ix86_vec_interleave_v2df_operator_ok (operands, 0)" @@ -7657,7 +7808,7 @@ [(set_attr "isa" "noavx,avx,sse3,noavx,avx,*") (set_attr "type" "sselog,sselog,sselog,ssemov,ssemov,ssemov") (set_attr "prefix_data16" "*,*,*,1,*,1") - (set_attr "prefix" "orig,vex,maybe_vex,orig,vex,maybe_vex") + (set_attr "prefix" "orig,maybe_evex,maybe_vex,orig,maybe_evex,maybe_vex") (set_attr "mode" "V2DF,V2DF,DF,V1DF,V1DF,V1DF")]) (define_split @@ -8248,11 +8399,11 @@ (set_attr "mode" "TI")]) (define_insn "sse2_shufpd_<mode>" - [(set (match_operand:VI8F_128 0 "register_operand" "=x,x") + [(set (match_operand:VI8F_128 0 "register_operand" "=x,v") (vec_select:VI8F_128 (vec_concat:<ssedoublevecmode> - (match_operand:VI8F_128 1 "register_operand" "0,x") - (match_operand:VI8F_128 2 "vector_operand" "xBm,xm")) + (match_operand:VI8F_128 1 "register_operand" "0,v") + (match_operand:VI8F_128 2 "vector_operand" "xBm,vm")) (parallel [(match_operand 3 "const_0_to_1_operand") (match_operand 4 "const_2_to_3_operand")])))] "TARGET_SSE2" @@ -8275,15 +8426,15 @@ [(set_attr "isa" "noavx,avx") (set_attr "type" "sseshuf") (set_attr "length_immediate" "1") - (set_attr "prefix" "orig,vex") + (set_attr "prefix" "orig,maybe_evex") (set_attr "mode" "V2DF")]) ;; Avoid combining registers from different units in a single alternative, ;; see comment above inline_secondary_memory_needed function in i386.c (define_insn "sse2_storehpd" - [(set (match_operand:DF 0 "nonimmediate_operand" "=m,x,x,x,*f,r") + [(set (match_operand:DF 0 "nonimmediate_operand" "=m,x,Yv,x,*f,r") (vec_select:DF - (match_operand:V2DF 1 "nonimmediate_operand" " x,0,x,o,o,o") + (match_operand:V2DF 1 "nonimmediate_operand" " v,0, v,o,o,o") (parallel [(const_int 1)])))] "TARGET_SSE2 && !(MEM_P (operands[0]) && MEM_P (operands[1]))" "@ @@ -8301,7 +8452,7 @@ (not (match_test "TARGET_AVX"))) (const_string "1") (const_string "*"))) - (set_attr "prefix" "maybe_vex,orig,vex,*,*,*") + (set_attr "prefix" "maybe_vex,orig,maybe_evex,*,*,*") (set_attr "mode" "V1DF,V1DF,V2DF,DF,DF,DF")]) (define_split @@ -8332,7 +8483,7 @@ (define_insn "sse2_storelpd" [(set (match_operand:DF 0 "nonimmediate_operand" "=m,x,x,*f,r") (vec_select:DF - (match_operand:V2DF 1 "nonimmediate_operand" " x,x,m,m,m") + (match_operand:V2DF 1 "nonimmediate_operand" " v,x,m,m,m") (parallel [(const_int 0)])))] "TARGET_SSE2 && !(MEM_P (operands[0]) && MEM_P (operands[1]))" "@ @@ -8393,14 +8544,14 @@ ;; see comment above inline_secondary_memory_needed function in i386.c (define_insn "sse2_loadhpd" [(set (match_operand:V2DF 0 "nonimmediate_operand" - "=x,x,x,x,o,o ,o") + "=x,v,x,v,o,o ,o") (vec_concat:V2DF (vec_select:DF (match_operand:V2DF 1 "nonimmediate_operand" - " 0,x,0,x,0,0 ,0") + " 0,v,0,v,0,0 ,0") (parallel [(const_int 0)])) (match_operand:DF 2 "nonimmediate_operand" - " m,m,x,x,x,*f,r")))] + " m,m,x,v,x,*f,r")))] "TARGET_SSE2 && !(MEM_P (operands[1]) && MEM_P (operands[2]))" "@ movhpd\t{%2, %0|%0, %2} @@ -8413,7 +8564,7 @@ [(set_attr "isa" "noavx,avx,noavx,avx,*,*,*") (set_attr "type" "ssemov,ssemov,sselog,sselog,ssemov,fmov,imov") (set_attr "prefix_data16" "1,*,*,*,*,*,*") - (set_attr "prefix" "orig,vex,orig,vex,*,*,*") + (set_attr "prefix" "orig,maybe_evex,orig,maybe_evex,*,*,*") (set_attr "mode" "V1DF,V1DF,V2DF,V2DF,DF,DF,DF")]) (define_split @@ -8449,13 +8600,13 @@ ;; see comment above inline_secondary_memory_needed function in i386.c (define_insn "sse2_loadlpd" [(set (match_operand:V2DF 0 "nonimmediate_operand" - "=x,x,x,x,x,x,x,x,m,m ,m") + "=v,x,v,x,v,x,x,v,m,m ,m") (vec_concat:V2DF (match_operand:DF 2 "nonimmediate_operand" - "xm,m,m,x,x,0,0,x,x,*f,r") + "vm,m,m,x,v,0,0,v,x,*f,r") (vec_select:DF (match_operand:V2DF 1 "vector_move_operand" - " C,0,x,0,x,x,o,o,0,0 ,0") + " C,0,v,0,v,x,o,o,0,0 ,0") (parallel [(const_int 1)]))))] "TARGET_SSE2 && !(MEM_P (operands[1]) && MEM_P (operands[2]))" "@ @@ -8482,7 +8633,7 @@ (const_string "ssemov"))) (set_attr "prefix_data16" "*,1,*,*,*,*,1,*,*,*,*") (set_attr "length_immediate" "*,*,*,*,*,1,*,*,*,*,*") - (set_attr "prefix" "maybe_vex,orig,vex,orig,vex,orig,orig,vex,*,*,*") + (set_attr "prefix" "maybe_vex,orig,maybe_evex,orig,maybe_evex,orig,orig,maybe_evex,*,*,*") (set_attr "mode" "DF,V1DF,V1DF,V1DF,V1DF,V2DF,V1DF,V1DF,DF,DF,DF")]) (define_split @@ -8495,10 +8646,10 @@ "operands[0] = adjust_address (operands[0], DFmode, 0);") (define_insn "sse2_movsd" - [(set (match_operand:V2DF 0 "nonimmediate_operand" "=x,x,x,x,m,x,x,x,o") + [(set (match_operand:V2DF 0 "nonimmediate_operand" "=x,v,x,v,m,x,x,v,o") (vec_merge:V2DF - (match_operand:V2DF 2 "nonimmediate_operand" " x,x,m,m,x,0,0,x,0") - (match_operand:V2DF 1 "nonimmediate_operand" " 0,x,0,x,0,x,o,o,x") + (match_operand:V2DF 2 "nonimmediate_operand" " x,v,m,m,v,0,0,v,0") + (match_operand:V2DF 1 "nonimmediate_operand" " 0,v,0,v,0,x,o,o,v") (const_int 1)))] "TARGET_SSE2" "@ @@ -8524,7 +8675,7 @@ (const_string "1") (const_string "*"))) (set_attr "length_immediate" "*,*,*,*,*,1,*,*,*") - (set_attr "prefix" "orig,vex,orig,vex,maybe_vex,orig,orig,vex,maybe_vex") + (set_attr "prefix" "orig,maybe_evex,orig,maybe_evex,maybe_vex,orig,orig,maybe_evex,maybe_vex") (set_attr "mode" "DF,DF,V1DF,V1DF,V1DF,V2DF,V1DF,V1DF,V1DF")]) (define_insn "vec_dupv2df<mask_name>" @@ -8799,7 +8950,7 @@ (const_int 12) (const_int 13) (const_int 14) (const_int 15)]))))] "TARGET_AVX512VL" - "vpmov<trunsuffix>qb\t{%1, %0%{%2%}|%0%{%2%}, %1}" + "vpmov<trunsuffix>qb\t{%1, %0%{%2%}|%w0%{%2%}, %1}" [(set_attr "type" "ssemov") (set_attr "memory" "store") (set_attr "prefix" "evex") @@ -8889,7 +9040,11 @@ (const_int 12) (const_int 13) (const_int 14) (const_int 15)]))))] "TARGET_AVX512VL" - "vpmov<trunsuffix><pmov_suff_3>\t{%1, %0%{%2%}|%0%{%2%}, %1}" +{ + if (GET_MODE_SIZE (GET_MODE_INNER (<MODE>mode)) == 8) + return "vpmov<trunsuffix><pmov_suff_3>\t{%1, %0%{%2%}|%k0%{%2%}, %1}"; + return "vpmov<trunsuffix><pmov_suff_3>\t{%1, %0%{%2%}|%0%{%2%}, %g1}"; +} [(set_attr "type" "ssemov") (set_attr "memory" "store") (set_attr "prefix" "evex") @@ -8980,7 +9135,11 @@ (const_int 12) (const_int 13) (const_int 14) (const_int 15)]))))] "TARGET_AVX512VL" - "vpmov<trunsuffix><pmov_suff_3>\t{%1, %0%{%2%}|%0%{%2%}, %1}" +{ + if (GET_MODE_SIZE (GET_MODE_INNER (<MODE>mode)) == 4) + return "vpmov<trunsuffix><pmov_suff_3>\t{%1, %0%{%2%}|%0%{%2%}, %g1}"; + return "vpmov<trunsuffix><pmov_suff_3>\t{%1, %0%{%2%}|%0%{%2%}, %1}"; +} [(set_attr "type" "ssemov") (set_attr "memory" "store") (set_attr "prefix" "evex") @@ -9074,7 +9233,11 @@ (parallel [(const_int 4) (const_int 5) (const_int 6) (const_int 7)]))))] "TARGET_AVX512VL" - "vpmov<trunsuffix><pmov_suff_4>\t{%1, %0%{%2%}|%0%{%2%}, %1}" +{ + if (GET_MODE_SIZE (GET_MODE_INNER (<MODE>mode)) == 4) + return "vpmov<trunsuffix><pmov_suff_4>\t{%1, %0%{%2%}|%0%{%2%}, %t1}"; + return "vpmov<trunsuffix><pmov_suff_4>\t{%1, %0%{%2%}|%0%{%2%}, %g1}"; +} [(set_attr "type" "ssemov") (set_attr "memory" "store") (set_attr "prefix" "evex") @@ -9149,7 +9312,7 @@ (const_int 4) (const_int 5) (const_int 6) (const_int 7)]))))] "TARGET_AVX512VL" - "vpmov<trunsuffix>qw\t{%1, %0%{%2%}|%0%{%2%}, %1}" + "vpmov<trunsuffix>qw\t{%1, %0%{%2%}|%0%{%2%}, %g1}" [(set_attr "type" "ssemov") (set_attr "memory" "store") (set_attr "prefix" "evex") @@ -9228,7 +9391,7 @@ (match_dup 0) (parallel [(const_int 2) (const_int 3)]))))] "TARGET_AVX512VL" - "vpmov<trunsuffix>qd\t{%1, %0%{%2%}|%0%{%2%}, %1}" + "vpmov<trunsuffix>qd\t{%1, %0%{%2%}|%0%{%2%}, %t1}" [(set_attr "type" "ssemov") (set_attr "memory" "store") (set_attr "prefix" "evex") @@ -9331,7 +9494,7 @@ (const_int 12) (const_int 13) (const_int 14) (const_int 15)]))))] "TARGET_AVX512F" - "vpmov<trunsuffix>qb\t{%1, %0%{%2%}|%0%{%2%}, %1}" + "vpmov<trunsuffix>qb\t{%1, %0%{%2%}|%q0%{%2%}, %1}" [(set_attr "type" "ssemov") (set_attr "memory" "store") (set_attr "prefix" "evex") @@ -9803,19 +9966,19 @@ "ix86_fixup_binary_operands_no_copy (MULT, V16HImode, operands);") (define_insn "*avx2_pmaddwd" - [(set (match_operand:V8SI 0 "register_operand" "=x") + [(set (match_operand:V8SI 0 "register_operand" "=x,v") (plus:V8SI (mult:V8SI (sign_extend:V8SI (vec_select:V8HI - (match_operand:V16HI 1 "nonimmediate_operand" "%x") + (match_operand:V16HI 1 "nonimmediate_operand" "%x,v") (parallel [(const_int 0) (const_int 2) (const_int 4) (const_int 6) (const_int 8) (const_int 10) (const_int 12) (const_int 14)]))) (sign_extend:V8SI (vec_select:V8HI - (match_operand:V16HI 2 "nonimmediate_operand" "xm") + (match_operand:V16HI 2 "nonimmediate_operand" "xm,vm") (parallel [(const_int 0) (const_int 2) (const_int 4) (const_int 6) (const_int 8) (const_int 10) @@ -9836,7 +9999,8 @@ "TARGET_AVX2 && ix86_binary_operator_ok (MULT, V16HImode, operands)" "vpmaddwd\t{%2, %1, %0|%0, %1, %2}" [(set_attr "type" "sseiadd") - (set_attr "prefix" "vex") + (set_attr "isa" "*,avx512bw") + (set_attr "prefix" "vex,evex") (set_attr "mode" "OI")]) (define_expand "sse2_pmaddwd" @@ -9866,17 +10030,17 @@ "ix86_fixup_binary_operands_no_copy (MULT, V8HImode, operands);") (define_insn "*sse2_pmaddwd" - [(set (match_operand:V4SI 0 "register_operand" "=x,x") + [(set (match_operand:V4SI 0 "register_operand" "=x,x,v") (plus:V4SI (mult:V4SI (sign_extend:V4SI (vec_select:V4HI - (match_operand:V8HI 1 "vector_operand" "%0,x") + (match_operand:V8HI 1 "vector_operand" "%0,x,v") (parallel [(const_int 0) (const_int 2) (const_int 4) (const_int 6)]))) (sign_extend:V4SI (vec_select:V4HI - (match_operand:V8HI 2 "vector_operand" "xBm,xm") + (match_operand:V8HI 2 "vector_operand" "xBm,xm,vm") (parallel [(const_int 0) (const_int 2) (const_int 4) (const_int 6)])))) (mult:V4SI @@ -9891,12 +10055,13 @@ "TARGET_SSE2 && ix86_binary_operator_ok (MULT, V8HImode, operands)" "@ pmaddwd\t{%2, %0|%0, %2} + vpmaddwd\t{%2, %1, %0|%0, %1, %2} vpmaddwd\t{%2, %1, %0|%0, %1, %2}" - [(set_attr "isa" "noavx,avx") + [(set_attr "isa" "noavx,avx,avx512bw") (set_attr "type" "sseiadd") (set_attr "atom_unit" "simul") - (set_attr "prefix_data16" "1,*") - (set_attr "prefix" "orig,vex") + (set_attr "prefix_data16" "1,*,*") + (set_attr "prefix" "orig,vex,evex") (set_attr "mode" "TI")]) (define_insn "avx512dq_mul<mode>3<mask_name>" @@ -10072,6 +10237,20 @@ DONE; }) +(define_insn "<mask_codefor>ashr<mode>3<mask_name>" + [(set (match_operand:VI24_AVX512BW_1 0 "register_operand" "=v,v") + (ashiftrt:VI24_AVX512BW_1 + (match_operand:VI24_AVX512BW_1 1 "nonimmediate_operand" "v,vm") + (match_operand:SI 2 "nonmemory_operand" "v,N")))] + "TARGET_AVX512VL" + "vpsra<ssemodesuffix>\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}" + [(set_attr "type" "sseishft") + (set (attr "length_immediate") + (if_then_else (match_operand 2 "const_int_operand") + (const_string "1") + (const_string "0"))) + (set_attr "mode" "<sseinsnmode>")]) + (define_insn "ashr<mode>3" [(set (match_operand:VI24_AVX2 0 "register_operand" "=x,x") (ashiftrt:VI24_AVX2 @@ -10091,20 +10270,6 @@ (set_attr "prefix" "orig,vex") (set_attr "mode" "<sseinsnmode>")]) -(define_insn "<mask_codefor>ashr<mode>3<mask_name>" - [(set (match_operand:VI24_AVX512BW_1 0 "register_operand" "=v,v") - (ashiftrt:VI24_AVX512BW_1 - (match_operand:VI24_AVX512BW_1 1 "nonimmediate_operand" "v,vm") - (match_operand:SI 2 "nonmemory_operand" "v,N")))] - "TARGET_AVX512VL" - "vpsra<ssemodesuffix>\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}" - [(set_attr "type" "sseishft") - (set (attr "length_immediate") - (if_then_else (match_operand 2 "const_int_operand") - (const_string "1") - (const_string "0"))) - (set_attr "mode" "<sseinsnmode>")]) - (define_insn "<mask_codefor>ashrv2di3<mask_name>" [(set (match_operand:V2DI 0 "register_operand" "=v,v") (ashiftrt:V2DI @@ -10442,19 +10607,20 @@ (set_attr "mode" "TI")]) (define_insn "*<code>v8hi3" - [(set (match_operand:V8HI 0 "register_operand" "=x,x") + [(set (match_operand:V8HI 0 "register_operand" "=x,x,v") (smaxmin:V8HI - (match_operand:V8HI 1 "vector_operand" "%0,x") - (match_operand:V8HI 2 "vector_operand" "xBm,xm")))] + (match_operand:V8HI 1 "vector_operand" "%0,x,v") + (match_operand:V8HI 2 "vector_operand" "xBm,xm,vm")))] "TARGET_SSE2 && ix86_binary_operator_ok (<CODE>, V8HImode, operands)" "@ p<maxmin_int>w\t{%2, %0|%0, %2} + vp<maxmin_int>w\t{%2, %1, %0|%0, %1, %2} vp<maxmin_int>w\t{%2, %1, %0|%0, %1, %2}" - [(set_attr "isa" "noavx,avx") + [(set_attr "isa" "noavx,avx,avx512bw") (set_attr "type" "sseiadd") - (set_attr "prefix_data16" "1,*") - (set_attr "prefix_extra" "*,1") - (set_attr "prefix" "orig,vex") + (set_attr "prefix_data16" "1,*,*") + (set_attr "prefix_extra" "*,1,1") + (set_attr "prefix" "orig,vex,evex") (set_attr "mode" "TI")]) (define_expand "<code><mode>3" @@ -10526,19 +10692,20 @@ (set_attr "mode" "TI")]) (define_insn "*<code>v16qi3" - [(set (match_operand:V16QI 0 "register_operand" "=x,x") + [(set (match_operand:V16QI 0 "register_operand" "=x,x,v") (umaxmin:V16QI - (match_operand:V16QI 1 "vector_operand" "%0,x") - (match_operand:V16QI 2 "vector_operand" "xBm,xm")))] + (match_operand:V16QI 1 "vector_operand" "%0,x,v") + (match_operand:V16QI 2 "vector_operand" "xBm,xm,vm")))] "TARGET_SSE2 && ix86_binary_operator_ok (<CODE>, V16QImode, operands)" "@ p<maxmin_int>b\t{%2, %0|%0, %2} + vp<maxmin_int>b\t{%2, %1, %0|%0, %1, %2} vp<maxmin_int>b\t{%2, %1, %0|%0, %1, %2}" - [(set_attr "isa" "noavx,avx") + [(set_attr "isa" "noavx,avx,avx512bw") (set_attr "type" "sseiadd") - (set_attr "prefix_data16" "1,*") - (set_attr "prefix_extra" "*,1") - (set_attr "prefix" "orig,vex") + (set_attr "prefix_data16" "1,*,*") + (set_attr "prefix_extra" "*,1,1") + (set_attr "prefix" "orig,vex,evex") (set_attr "mode" "TI")]) ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; @@ -11351,54 +11518,57 @@ }) (define_insn "<sse2_avx2>_packsswb<mask_name>" - [(set (match_operand:VI1_AVX512 0 "register_operand" "=x,x") + [(set (match_operand:VI1_AVX512 0 "register_operand" "=x,x,v") (vec_concat:VI1_AVX512 (ss_truncate:<ssehalfvecmode> - (match_operand:<sseunpackmode> 1 "register_operand" "0,v")) + (match_operand:<sseunpackmode> 1 "register_operand" "0,x,v")) (ss_truncate:<ssehalfvecmode> - (match_operand:<sseunpackmode> 2 "vector_operand" "xBm,vm"))))] + (match_operand:<sseunpackmode> 2 "vector_operand" "xBm,xm,vm"))))] "TARGET_SSE2 && <mask_mode512bit_condition> && <mask_avx512bw_condition>" "@ packsswb\t{%2, %0|%0, %2} + vpacksswb\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2} vpacksswb\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}" - [(set_attr "isa" "noavx,avx") + [(set_attr "isa" "noavx,avx,avx512bw") (set_attr "type" "sselog") - (set_attr "prefix_data16" "1,*") - (set_attr "prefix" "orig,maybe_evex") + (set_attr "prefix_data16" "1,*,*") + (set_attr "prefix" "orig,<mask_prefix>,evex") (set_attr "mode" "<sseinsnmode>")]) (define_insn "<sse2_avx2>_packssdw<mask_name>" - [(set (match_operand:VI2_AVX2 0 "register_operand" "=x,v") + [(set (match_operand:VI2_AVX2 0 "register_operand" "=x,x,v") (vec_concat:VI2_AVX2 (ss_truncate:<ssehalfvecmode> - (match_operand:<sseunpackmode> 1 "register_operand" "0,v")) + (match_operand:<sseunpackmode> 1 "register_operand" "0,x,v")) (ss_truncate:<ssehalfvecmode> - (match_operand:<sseunpackmode> 2 "vector_operand" "xBm,vm"))))] + (match_operand:<sseunpackmode> 2 "vector_operand" "xBm,xm,vm"))))] "TARGET_SSE2 && <mask_mode512bit_condition> && <mask_avx512bw_condition>" "@ packssdw\t{%2, %0|%0, %2} + vpackssdw\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2} vpackssdw\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}" - [(set_attr "isa" "noavx,avx") + [(set_attr "isa" "noavx,avx,avx512bw") (set_attr "type" "sselog") - (set_attr "prefix_data16" "1,*") - (set_attr "prefix" "orig,vex") + (set_attr "prefix_data16" "1,*,*") + (set_attr "prefix" "orig,<mask_prefix>,evex") (set_attr "mode" "<sseinsnmode>")]) (define_insn "<sse2_avx2>_packuswb<mask_name>" - [(set (match_operand:VI1_AVX512 0 "register_operand" "=x,x") + [(set (match_operand:VI1_AVX512 0 "register_operand" "=x,x,v") (vec_concat:VI1_AVX512 (us_truncate:<ssehalfvecmode> - (match_operand:<sseunpackmode> 1 "register_operand" "0,v")) + (match_operand:<sseunpackmode> 1 "register_operand" "0,x,v")) (us_truncate:<ssehalfvecmode> - (match_operand:<sseunpackmode> 2 "vector_operand" "xBm,vm"))))] + (match_operand:<sseunpackmode> 2 "vector_operand" "xBm,xm,vm"))))] "TARGET_SSE2 && <mask_mode512bit_condition> && <mask_avx512bw_condition>" "@ packuswb\t{%2, %0|%0, %2} + vpackuswb\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2} vpackuswb\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}" - [(set_attr "isa" "noavx,avx") + [(set_attr "isa" "noavx,avx,avx512bw") (set_attr "type" "sselog") - (set_attr "prefix_data16" "1,*") - (set_attr "prefix" "orig,vex") + (set_attr "prefix_data16" "1,*,*") + (set_attr "prefix" "orig,<mask_prefix>,evex") (set_attr "mode" "<sseinsnmode>")]) (define_insn "avx512bw_interleave_highv64qi<mask_name>" @@ -11839,9 +12009,9 @@ (set_attr "mode" "TI")]) (define_expand "vec_interleave_high<mode>" - [(match_operand:VI_256 0 "register_operand" "=x") - (match_operand:VI_256 1 "register_operand" "x") - (match_operand:VI_256 2 "nonimmediate_operand" "xm")] + [(match_operand:VI_256 0 "register_operand") + (match_operand:VI_256 1 "register_operand") + (match_operand:VI_256 2 "nonimmediate_operand")] "TARGET_AVX2" { rtx t1 = gen_reg_rtx (<MODE>mode); @@ -11857,9 +12027,9 @@ }) (define_expand "vec_interleave_low<mode>" - [(match_operand:VI_256 0 "register_operand" "=x") - (match_operand:VI_256 1 "register_operand" "x") - (match_operand:VI_256 2 "nonimmediate_operand" "xm")] + [(match_operand:VI_256 0 "register_operand") + (match_operand:VI_256 1 "register_operand") + (match_operand:VI_256 2 "nonimmediate_operand")] "TARGET_AVX2" { rtx t1 = gen_reg_rtx (<MODE>mode); @@ -11884,13 +12054,17 @@ [(V16QI "sse4_1") (V8HI "sse2") (V4SI "sse4_1") (V2DI "sse4_1")]) +(define_mode_attr pinsr_evex_isa + [(V16QI "avx512bw") (V8HI "avx512bw") + (V4SI "avx512dq") (V2DI "avx512dq")]) + ;; sse4_1_pinsrd must come before sse2_loadld since it is preferred. (define_insn "<sse2p4_1>_pinsr<ssemodesuffix>" - [(set (match_operand:PINSR_MODE 0 "register_operand" "=x,x,x,x") + [(set (match_operand:PINSR_MODE 0 "register_operand" "=x,x,x,x,v,v") (vec_merge:PINSR_MODE (vec_duplicate:PINSR_MODE - (match_operand:<ssescalarmode> 2 "nonimmediate_operand" "r,m,r,m")) - (match_operand:PINSR_MODE 1 "register_operand" "0,0,x,x") + (match_operand:<ssescalarmode> 2 "nonimmediate_operand" "r,m,r,m,r,m")) + (match_operand:PINSR_MODE 1 "register_operand" "0,0,x,x,v,v") (match_operand:SI 3 "const_int_operand")))] "TARGET_SSE2 && ((unsigned) exact_log2 (INTVAL (operands[3])) @@ -11907,16 +12081,18 @@ case 1: return "pinsr<ssemodesuffix>\t{%3, %2, %0|%0, %2, %3}"; case 2: + case 4: if (GET_MODE_SIZE (<ssescalarmode>mode) < GET_MODE_SIZE (SImode)) return "vpinsr<ssemodesuffix>\t{%3, %k2, %1, %0|%0, %1, %k2, %3}"; /* FALLTHRU */ case 3: + case 5: return "vpinsr<ssemodesuffix>\t{%3, %2, %1, %0|%0, %1, %2, %3}"; default: gcc_unreachable (); } } - [(set_attr "isa" "noavx,noavx,avx,avx") + [(set_attr "isa" "noavx,noavx,avx,avx,<pinsr_evex_isa>,<pinsr_evex_isa>") (set_attr "type" "sselog") (set (attr "prefix_rex") (if_then_else @@ -11937,7 +12113,7 @@ (const_string "*") (const_string "1"))) (set_attr "length_immediate" "1") - (set_attr "prefix" "orig,orig,vex,vex") + (set_attr "prefix" "orig,orig,vex,vex,evex,evex") (set_attr "mode" "TI")]) (define_expand "<extract_type>_vinsert<shuffletype><extract_suf>_mask" @@ -12071,7 +12247,7 @@ (const_int 2) (const_int 3)])) (match_operand:<ssehalfvecmode> 2 "nonimmediate_operand" "vm")))] "TARGET_AVX512F" - "vinsert<shuffletype>64x4\t{$0x1, %2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2, $0x1}" + "vinsert<shuffletype>64x4\t{$0x1, %2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2, 0x1}" [(set_attr "type" "sselog") (set_attr "length_immediate" "1") (set_attr "prefix" "evex") @@ -12855,11 +13031,11 @@ "operands[2] = CONST0_RTX (V4SImode);") (define_insn "sse2_loadld" - [(set (match_operand:V4SI 0 "register_operand" "=x,Yi,x,x,x") + [(set (match_operand:V4SI 0 "register_operand" "=v,Yi,x,x,v") (vec_merge:V4SI (vec_duplicate:V4SI - (match_operand:SI 2 "nonimmediate_operand" "m ,r ,m,x,x")) - (match_operand:V4SI 1 "reg_or_0_operand" "C ,C ,C,0,x") + (match_operand:SI 2 "nonimmediate_operand" "m ,r ,m,x,v")) + (match_operand:V4SI 1 "reg_or_0_operand" "C ,C ,C,0,v") (const_int 1)))] "TARGET_SSE" "@ @@ -12870,7 +13046,7 @@ vmovss\t{%2, %1, %0|%0, %1, %2}" [(set_attr "isa" "sse2,sse2,noavx,noavx,avx") (set_attr "type" "ssemov") - (set_attr "prefix" "maybe_vex,maybe_vex,orig,orig,vex") + (set_attr "prefix" "maybe_vex,maybe_vex,orig,orig,maybe_evex") (set_attr "mode" "TI,TI,V4SF,SF,SF")]) ;; QI and HI modes handled by pextr patterns. @@ -12878,39 +13054,44 @@ [(V16QI "TARGET_SSE4_1") V8HI]) (define_insn "*vec_extract<mode>" - [(set (match_operand:<ssescalarmode> 0 "register_sse4nonimm_operand" "=r,m") + [(set (match_operand:<ssescalarmode> 0 "register_sse4nonimm_operand" "=r,m,r,m") (vec_select:<ssescalarmode> - (match_operand:PEXTR_MODE12 1 "register_operand" "x,x") + (match_operand:PEXTR_MODE12 1 "register_operand" "x,x,v,v") (parallel [(match_operand:SI 2 "const_0_to_<ssescalarnummask>_operand")])))] "TARGET_SSE2" "@ %vpextr<ssemodesuffix>\t{%2, %1, %k0|%k0, %1, %2} - %vpextr<ssemodesuffix>\t{%2, %1, %0|%0, %1, %2}" - [(set_attr "isa" "*,sse4") + %vpextr<ssemodesuffix>\t{%2, %1, %0|%0, %1, %2} + vpextr<ssemodesuffix>\t{%2, %1, %k0|%k0, %1, %2} + vpextr<ssemodesuffix>\t{%2, %1, %0|%0, %1, %2}" + [(set_attr "isa" "*,sse4,avx512bw,avx512bw") (set_attr "type" "sselog1") (set_attr "prefix_data16" "1") (set (attr "prefix_extra") (if_then_else - (and (eq_attr "alternative" "0") + (and (eq_attr "alternative" "0,2") (eq (const_string "<MODE>mode") (const_string "V8HImode"))) (const_string "*") (const_string "1"))) (set_attr "length_immediate" "1") - (set_attr "prefix" "maybe_vex") + (set_attr "prefix" "maybe_vex,maybe_vex,evex,evex") (set_attr "mode" "TI")]) (define_insn "*vec_extract<PEXTR_MODE12:mode>_zext" - [(set (match_operand:SWI48 0 "register_operand" "=r") + [(set (match_operand:SWI48 0 "register_operand" "=r,r") (zero_extend:SWI48 (vec_select:<PEXTR_MODE12:ssescalarmode> - (match_operand:PEXTR_MODE12 1 "register_operand" "x") + (match_operand:PEXTR_MODE12 1 "register_operand" "x,v") (parallel [(match_operand:SI 2 "const_0_to_<PEXTR_MODE12:ssescalarnummask>_operand")]))))] "TARGET_SSE2" - "%vpextr<PEXTR_MODE12:ssemodesuffix>\t{%2, %1, %k0|%k0, %1, %2}" - [(set_attr "type" "sselog1") + "@ + %vpextr<PEXTR_MODE12:ssemodesuffix>\t{%2, %1, %k0|%k0, %1, %2} + vpextr<PEXTR_MODE12:ssemodesuffix>\t{%2, %1, %k0|%k0, %1, %2}" + [(set_attr "isa" "*,avx512bw") + (set_attr "type" "sselog1") (set_attr "prefix_data16" "1") (set (attr "prefix_extra") (if_then_else @@ -12931,9 +13112,9 @@ "#") (define_insn "*vec_extract<ssevecmodelower>_0" - [(set (match_operand:SWI48 0 "nonimmediate_operand" "=r ,r,x ,m") + [(set (match_operand:SWI48 0 "nonimmediate_operand" "=r ,r,v ,m") (vec_select:SWI48 - (match_operand:<ssevecmode> 1 "nonimmediate_operand" "mYj,x,xm,x") + (match_operand:<ssevecmode> 1 "nonimmediate_operand" "mYj,v,vm,v") (parallel [(const_int 0)])))] "TARGET_SSE && !(MEM_P (operands[0]) && MEM_P (operands[1]))" "#" @@ -12943,7 +13124,7 @@ [(set (match_operand:DI 0 "register_operand" "=r") (zero_extend:DI (vec_select:SI - (match_operand:V4SI 1 "register_operand" "x") + (match_operand:V4SI 1 "register_operand" "v") (parallel [(const_int 0)]))))] "TARGET_64BIT && TARGET_SSE2 && TARGET_INTER_UNIT_MOVES_FROM_VEC" "#" @@ -12952,9 +13133,9 @@ "operands[1] = gen_lowpart (SImode, operands[1]);") (define_insn "*vec_extractv2di_0_sse" - [(set (match_operand:DI 0 "nonimmediate_operand" "=x,m") + [(set (match_operand:DI 0 "nonimmediate_operand" "=v,m") (vec_select:DI - (match_operand:V2DI 1 "nonimmediate_operand" "xm,x") + (match_operand:V2DI 1 "nonimmediate_operand" "vm,v") (parallel [(const_int 0)])))] "TARGET_SSE && !TARGET_64BIT && !(MEM_P (operands[0]) && MEM_P (operands[1]))" @@ -12970,46 +13151,49 @@ "operands[1] = gen_lowpart (<MODE>mode, operands[1]);") (define_insn "*vec_extractv4si" - [(set (match_operand:SI 0 "nonimmediate_operand" "=rm,Yr,*x,x") + [(set (match_operand:SI 0 "nonimmediate_operand" "=rm,rm,Yr,*x,x,Yv") (vec_select:SI - (match_operand:V4SI 1 "register_operand" "x,0,0,x") + (match_operand:V4SI 1 "register_operand" "x,v,0,0,x,v") (parallel [(match_operand:SI 2 "const_0_to_3_operand")])))] "TARGET_SSE4_1" { switch (which_alternative) { case 0: + case 1: return "%vpextrd\t{%2, %1, %0|%0, %1, %2}"; - case 1: case 2: - operands [2] = GEN_INT (INTVAL (operands[2]) * 4); + case 3: + operands[2] = GEN_INT (INTVAL (operands[2]) * 4); return "psrldq\t{%2, %0|%0, %2}"; - case 3: - operands [2] = GEN_INT (INTVAL (operands[2]) * 4); + case 4: + case 5: + operands[2] = GEN_INT (INTVAL (operands[2]) * 4); return "vpsrldq\t{%2, %1, %0|%0, %1, %2}"; default: gcc_unreachable (); } } - [(set_attr "isa" "*,noavx,noavx,avx") - (set_attr "type" "sselog1,sseishft1,sseishft1,sseishft1") - (set_attr "prefix_extra" "1,*,*,*") + [(set_attr "isa" "*,avx512dq,noavx,noavx,avx,avx512bw") + (set_attr "type" "sselog1,sselog1,sseishft1,sseishft1,sseishft1,sseishft1") + (set_attr "prefix_extra" "1,1,*,*,*,*") (set_attr "length_immediate" "1") - (set_attr "prefix" "maybe_vex,orig,orig,vex") + (set_attr "prefix" "maybe_vex,evex,orig,orig,vex,evex") (set_attr "mode" "TI")]) (define_insn "*vec_extractv4si_zext" - [(set (match_operand:DI 0 "register_operand" "=r") + [(set (match_operand:DI 0 "register_operand" "=r,r") (zero_extend:DI (vec_select:SI - (match_operand:V4SI 1 "register_operand" "x") + (match_operand:V4SI 1 "register_operand" "x,v") (parallel [(match_operand:SI 2 "const_0_to_3_operand")]))))] "TARGET_64BIT && TARGET_SSE4_1" "%vpextrd\t{%2, %1, %k0|%k0, %1, %2}" - [(set_attr "type" "sselog1") + [(set_attr "isa" "*,avx512dq") + (set_attr "type" "sselog1") (set_attr "prefix_extra" "1") (set_attr "length_immediate" "1") (set_attr "prefix" "maybe_vex") @@ -13038,26 +13222,28 @@ }) (define_insn "*vec_extractv2di_1" - [(set (match_operand:DI 0 "nonimmediate_operand" "=rm,m,x,x,x,x,r") + [(set (match_operand:DI 0 "nonimmediate_operand" "=rm,rm,m,x,x,Yv,x,v,r") (vec_select:DI - (match_operand:V2DI 1 "nonimmediate_operand" "x ,x,0,x,x,o,o") + (match_operand:V2DI 1 "nonimmediate_operand" "x ,v ,v,0,x, v,x,o,o") (parallel [(const_int 1)])))] "TARGET_SSE && !(MEM_P (operands[0]) && MEM_P (operands[1]))" "@ %vpextrq\t{$1, %1, %0|%0, %1, 1} + vpextrq\t{$1, %1, %0|%0, %1, 1} %vmovhps\t{%1, %0|%0, %1} psrldq\t{$8, %0|%0, 8} vpsrldq\t{$8, %1, %0|%0, %1, 8} + vpsrldq\t{$8, %1, %0|%0, %1, 8} movhlps\t{%1, %0|%0, %1} # #" - [(set_attr "isa" "x64_sse4,*,sse2_noavx,avx,noavx,*,x64") - (set_attr "type" "sselog1,ssemov,sseishft1,sseishft1,ssemov,ssemov,imov") - (set_attr "length_immediate" "1,*,1,1,*,*,*") - (set_attr "prefix_rex" "1,*,*,*,*,*,*") - (set_attr "prefix_extra" "1,*,*,*,*,*,*") - (set_attr "prefix" "maybe_vex,maybe_vex,orig,vex,orig,*,*") - (set_attr "mode" "TI,V2SF,TI,TI,V4SF,DI,DI")]) + [(set_attr "isa" "x64_sse4,x64_avx512dq,*,sse2_noavx,avx,avx512bw,noavx,*,x64") + (set_attr "type" "sselog1,sselog1,ssemov,sseishft1,sseishft1,sseishft1,ssemov,ssemov,imov") + (set_attr "length_immediate" "1,1,*,1,1,1,*,*,*") + (set_attr "prefix_rex" "1,1,*,*,*,*,*,*,*") + (set_attr "prefix_extra" "1,1,*,*,*,*,*,*,*") + (set_attr "prefix" "maybe_vex,evex,maybe_vex,orig,vex,evex,orig,*,*") + (set_attr "mode" "TI,TI,V2SF,TI,TI,TI,V4SF,DI,DI")]) (define_split [(set (match_operand:<ssescalarmode> 0 "register_operand") @@ -13765,12 +13951,12 @@ (set_attr "mode" "DI")]) (define_insn "avx2_pmaddubsw256" - [(set (match_operand:V16HI 0 "register_operand" "=x") + [(set (match_operand:V16HI 0 "register_operand" "=x,v") (ss_plus:V16HI (mult:V16HI (zero_extend:V16HI (vec_select:V16QI - (match_operand:V32QI 1 "register_operand" "x") + (match_operand:V32QI 1 "register_operand" "x,v") (parallel [(const_int 0) (const_int 2) (const_int 4) (const_int 6) (const_int 8) (const_int 10) @@ -13781,7 +13967,7 @@ (const_int 28) (const_int 30)]))) (sign_extend:V16HI (vec_select:V16QI - (match_operand:V32QI 2 "nonimmediate_operand" "xm") + (match_operand:V32QI 2 "nonimmediate_operand" "xm,vm") (parallel [(const_int 0) (const_int 2) (const_int 4) (const_int 6) (const_int 8) (const_int 10) @@ -13813,9 +13999,10 @@ (const_int 29) (const_int 31)]))))))] "TARGET_AVX2" "vpmaddubsw\t{%2, %1, %0|%0, %1, %2}" - [(set_attr "type" "sseiadd") + [(set_attr "isa" "*,avx512bw") + (set_attr "type" "sseiadd") (set_attr "prefix_extra" "1") - (set_attr "prefix" "vex") + (set_attr "prefix" "vex,evex") (set_attr "mode" "OI")]) ;; The correct representation for this is absolutely enormous, and @@ -13868,19 +14055,19 @@ (set_attr "mode" "XI")]) (define_insn "ssse3_pmaddubsw128" - [(set (match_operand:V8HI 0 "register_operand" "=x,x") + [(set (match_operand:V8HI 0 "register_operand" "=x,x,v") (ss_plus:V8HI (mult:V8HI (zero_extend:V8HI (vec_select:V8QI - (match_operand:V16QI 1 "register_operand" "0,x") + (match_operand:V16QI 1 "register_operand" "0,x,v") (parallel [(const_int 0) (const_int 2) (const_int 4) (const_int 6) (const_int 8) (const_int 10) (const_int 12) (const_int 14)]))) (sign_extend:V8HI (vec_select:V8QI - (match_operand:V16QI 2 "vector_operand" "xBm,xm") + (match_operand:V16QI 2 "vector_operand" "xBm,xm,vm") (parallel [(const_int 0) (const_int 2) (const_int 4) (const_int 6) (const_int 8) (const_int 10) @@ -13901,13 +14088,14 @@ "TARGET_SSSE3" "@ pmaddubsw\t{%2, %0|%0, %2} + vpmaddubsw\t{%2, %1, %0|%0, %1, %2} vpmaddubsw\t{%2, %1, %0|%0, %1, %2}" - [(set_attr "isa" "noavx,avx") + [(set_attr "isa" "noavx,avx,avx512bw") (set_attr "type" "sseiadd") (set_attr "atom_unit" "simul") - (set_attr "prefix_data16" "1,*") + (set_attr "prefix_data16" "1,*,*") (set_attr "prefix_extra" "1") - (set_attr "prefix" "orig,vex") + (set_attr "prefix" "orig,vex,evex") (set_attr "mode" "TI")]) (define_insn "ssse3_pmaddubsw" @@ -13988,16 +14176,16 @@ }) (define_insn "*<ssse3_avx2>_pmulhrsw<mode>3<mask_name>" - [(set (match_operand:VI2_AVX2 0 "register_operand" "=x,v") + [(set (match_operand:VI2_AVX2 0 "register_operand" "=x,x,v") (truncate:VI2_AVX2 (lshiftrt:<ssedoublemode> (plus:<ssedoublemode> (lshiftrt:<ssedoublemode> (mult:<ssedoublemode> (sign_extend:<ssedoublemode> - (match_operand:VI2_AVX2 1 "vector_operand" "%0,v")) + (match_operand:VI2_AVX2 1 "vector_operand" "%0,x,v")) (sign_extend:<ssedoublemode> - (match_operand:VI2_AVX2 2 "vector_operand" "xBm,vm"))) + (match_operand:VI2_AVX2 2 "vector_operand" "xBm,xm,vm"))) (const_int 14)) (match_operand:VI2_AVX2 3 "const1_operand")) (const_int 1))))] @@ -14005,12 +14193,13 @@ && ix86_binary_operator_ok (MULT, <MODE>mode, operands)" "@ pmulhrsw\t{%2, %0|%0, %2} + vpmulhrsw\t{%2, %1, %0<mask_operand4>|%0<mask_operand4>, %1, %2} vpmulhrsw\t{%2, %1, %0<mask_operand4>|%0<mask_operand4>, %1, %2}" - [(set_attr "isa" "noavx,avx") + [(set_attr "isa" "noavx,avx,avx512bw") (set_attr "type" "sseimul") - (set_attr "prefix_data16" "1,*") + (set_attr "prefix_data16" "1,*,*") (set_attr "prefix_extra" "1") - (set_attr "prefix" "orig,maybe_evex") + (set_attr "prefix" "orig,maybe_evex,evex") (set_attr "mode" "<sseinsnmode>")]) (define_insn "*ssse3_pmulhrswv4hi3" @@ -14035,21 +14224,22 @@ (set_attr "mode" "DI")]) (define_insn "<ssse3_avx2>_pshufb<mode>3<mask_name>" - [(set (match_operand:VI1_AVX512 0 "register_operand" "=x,v") + [(set (match_operand:VI1_AVX512 0 "register_operand" "=x,x,v") (unspec:VI1_AVX512 - [(match_operand:VI1_AVX512 1 "register_operand" "0,v") - (match_operand:VI1_AVX512 2 "vector_operand" "xBm,vm")] + [(match_operand:VI1_AVX512 1 "register_operand" "0,x,v") + (match_operand:VI1_AVX512 2 "vector_operand" "xBm,xm,vm")] UNSPEC_PSHUFB))] "TARGET_SSSE3 && <mask_mode512bit_condition> && <mask_avx512bw_condition>" "@ pshufb\t{%2, %0|%0, %2} + vpshufb\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2} vpshufb\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}" - [(set_attr "isa" "noavx,avx") + [(set_attr "isa" "noavx,avx,avx512bw") (set_attr "type" "sselog1") - (set_attr "prefix_data16" "1,*") + (set_attr "prefix_data16" "1,*,*") (set_attr "prefix_extra" "1") - (set_attr "prefix" "orig,maybe_evex") - (set_attr "btver2_decode" "vector,vector") + (set_attr "prefix" "orig,maybe_evex,evex") + (set_attr "btver2_decode" "vector") (set_attr "mode" "<sseinsnmode>")]) (define_insn "ssse3_pshufbv8qi3" @@ -14117,11 +14307,11 @@ (set_attr "mode" "<sseinsnmode>")]) (define_insn "<ssse3_avx2>_palignr<mode>" - [(set (match_operand:SSESCALARMODE 0 "register_operand" "=x,v") + [(set (match_operand:SSESCALARMODE 0 "register_operand" "=x,x,v") (unspec:SSESCALARMODE - [(match_operand:SSESCALARMODE 1 "register_operand" "0,v") - (match_operand:SSESCALARMODE 2 "vector_operand" "xBm,vm") - (match_operand:SI 3 "const_0_to_255_mul_8_operand" "n,n")] + [(match_operand:SSESCALARMODE 1 "register_operand" "0,x,v") + (match_operand:SSESCALARMODE 2 "vector_operand" "xBm,xm,vm") + (match_operand:SI 3 "const_0_to_255_mul_8_operand" "n,n,n")] UNSPEC_PALIGNR))] "TARGET_SSSE3" { @@ -14132,18 +14322,19 @@ case 0: return "palignr\t{%3, %2, %0|%0, %2, %3}"; case 1: + case 2: return "vpalignr\t{%3, %2, %1, %0|%0, %1, %2, %3}"; default: gcc_unreachable (); } } - [(set_attr "isa" "noavx,avx") + [(set_attr "isa" "noavx,avx,avx512bw") (set_attr "type" "sseishft") (set_attr "atom_unit" "sishuf") - (set_attr "prefix_data16" "1,*") + (set_attr "prefix_data16" "1,*,*") (set_attr "prefix_extra" "1") (set_attr "length_immediate" "1") - (set_attr "prefix" "orig,vex") + (set_attr "prefix" "orig,vex,evex") (set_attr "mode" "<sseinsnmode>")]) (define_insn "ssse3_palignrdi" @@ -14423,21 +14614,22 @@ (set_attr "mode" "<sseinsnmode>")]) (define_insn "<sse4_1_avx2>_packusdw<mask_name>" - [(set (match_operand:VI2_AVX2 0 "register_operand" "=Yr,*x,v") + [(set (match_operand:VI2_AVX2 0 "register_operand" "=Yr,*x,x,v") (vec_concat:VI2_AVX2 (us_truncate:<ssehalfvecmode> - (match_operand:<sseunpackmode> 1 "register_operand" "0,0,v")) + (match_operand:<sseunpackmode> 1 "register_operand" "0,0,x,v")) (us_truncate:<ssehalfvecmode> - (match_operand:<sseunpackmode> 2 "vector_operand" "YrBm,*xBm,vm"))))] + (match_operand:<sseunpackmode> 2 "vector_operand" "YrBm,*xBm,xm,vm"))))] "TARGET_SSE4_1 && <mask_mode512bit_condition> && <mask_avx512bw_condition>" "@ packusdw\t{%2, %0|%0, %2} packusdw\t{%2, %0|%0, %2} + vpackusdw\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2} vpackusdw\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}" - [(set_attr "isa" "noavx,noavx,avx") + [(set_attr "isa" "noavx,noavx,avx,avx512bw") (set_attr "type" "sselog") (set_attr "prefix_extra" "1") - (set_attr "prefix" "orig,orig,maybe_evex") + (set_attr "prefix" "orig,orig,<mask_prefix>,evex") (set_attr "mode" "<sseinsnmode>")]) (define_insn "<sse4_1_avx2>_pblendvb" @@ -16551,30 +16743,40 @@ (set_attr "btver2_decode" "vector") (set_attr "mode" "OI")]) +(define_mode_attr pbroadcast_evex_isa + [(V64QI "avx512bw") (V32QI "avx512bw") (V16QI "avx512bw") + (V32HI "avx512bw") (V16HI "avx512bw") (V8HI "avx512bw") + (V16SI "avx512f") (V8SI "avx512f") (V4SI "avx512f") + (V8DI "avx512f") (V4DI "avx512f") (V2DI "avx512f")]) + (define_insn "avx2_pbroadcast<mode>" - [(set (match_operand:VI 0 "register_operand" "=x") + [(set (match_operand:VI 0 "register_operand" "=x,v") (vec_duplicate:VI (vec_select:<ssescalarmode> - (match_operand:<ssexmmmode> 1 "nonimmediate_operand" "xm") + (match_operand:<ssexmmmode> 1 "nonimmediate_operand" "xm,vm") (parallel [(const_int 0)]))))] "TARGET_AVX2" "vpbroadcast<ssemodesuffix>\t{%1, %0|%0, %<iptr>1}" - [(set_attr "type" "ssemov") + [(set_attr "isa" "*,<pbroadcast_evex_isa>") + (set_attr "type" "ssemov") (set_attr "prefix_extra" "1") - (set_attr "prefix" "vex") + (set_attr "prefix" "vex,evex") (set_attr "mode" "<sseinsnmode>")]) (define_insn "avx2_pbroadcast<mode>_1" - [(set (match_operand:VI_256 0 "register_operand" "=x,x") + [(set (match_operand:VI_256 0 "register_operand" "=x,x,v,v") (vec_duplicate:VI_256 (vec_select:<ssescalarmode> - (match_operand:VI_256 1 "nonimmediate_operand" "m,x") + (match_operand:VI_256 1 "nonimmediate_operand" "m,x,m,v") (parallel [(const_int 0)]))))] "TARGET_AVX2" "@ vpbroadcast<ssemodesuffix>\t{%1, %0|%0, %<iptr>1} + vpbroadcast<ssemodesuffix>\t{%x1, %0|%0, %x1} + vpbroadcast<ssemodesuffix>\t{%1, %0|%0, %<iptr>1} vpbroadcast<ssemodesuffix>\t{%x1, %0|%0, %x1}" - [(set_attr "type" "ssemov") + [(set_attr "isa" "*,*,<pbroadcast_evex_isa>,<pbroadcast_evex_isa>") + (set_attr "type" "ssemov") (set_attr "prefix_extra" "1") (set_attr "prefix" "vex") (set_attr "mode" "<sseinsnmode>")]) @@ -16684,15 +16886,15 @@ (set_attr "mode" "OI")]) (define_insn "avx2_vec_dupv4df" - [(set (match_operand:V4DF 0 "register_operand" "=x") + [(set (match_operand:V4DF 0 "register_operand" "=v") (vec_duplicate:V4DF (vec_select:DF - (match_operand:V2DF 1 "register_operand" "x") + (match_operand:V2DF 1 "register_operand" "v") (parallel [(const_int 0)]))))] "TARGET_AVX2" "vbroadcastsd\t{%1, %0|%0, %1}" [(set_attr "type" "sselog1") - (set_attr "prefix" "vex") + (set_attr "prefix" "maybe_evex") (set_attr "mode" "V4DF")]) (define_insn "<avx512>_vec_dup<mode>_1" @@ -16795,9 +16997,9 @@ (const_int 1)))]) (define_insn "vec_dupv4sf" - [(set (match_operand:V4SF 0 "register_operand" "=x,x,x") + [(set (match_operand:V4SF 0 "register_operand" "=v,v,x") (vec_duplicate:V4SF - (match_operand:SF 1 "nonimmediate_operand" "x,m,0")))] + (match_operand:SF 1 "nonimmediate_operand" "Yv,m,0")))] "TARGET_SSE" "@ vshufps\t{$0, %1, %1, %0|%0, %1, %1, 0} @@ -16807,13 +17009,13 @@ (set_attr "type" "sseshuf1,ssemov,sseshuf1") (set_attr "length_immediate" "1,0,1") (set_attr "prefix_extra" "0,1,*") - (set_attr "prefix" "vex,vex,orig") + (set_attr "prefix" "maybe_evex,maybe_evex,orig") (set_attr "mode" "V4SF")]) (define_insn "*vec_dupv4si" - [(set (match_operand:V4SI 0 "register_operand" "=x,x,x") + [(set (match_operand:V4SI 0 "register_operand" "=v,v,x") (vec_duplicate:V4SI - (match_operand:SI 1 "nonimmediate_operand" " x,m,0")))] + (match_operand:SI 1 "nonimmediate_operand" "Yv,m,0")))] "TARGET_SSE" "@ %vpshufd\t{$0, %1, %0|%0, %1, 0} @@ -16823,13 +17025,13 @@ (set_attr "type" "sselog1,ssemov,sselog1") (set_attr "length_immediate" "1,0,1") (set_attr "prefix_extra" "0,1,*") - (set_attr "prefix" "maybe_vex,vex,orig") + (set_attr "prefix" "maybe_vex,maybe_evex,orig") (set_attr "mode" "TI,V4SF,V4SF")]) (define_insn "*vec_dupv2di" - [(set (match_operand:V2DI 0 "register_operand" "=x,x,x,x") + [(set (match_operand:V2DI 0 "register_operand" "=x,v,v,x") (vec_duplicate:V2DI - (match_operand:DI 1 "nonimmediate_operand" " 0,x,m,0")))] + (match_operand:DI 1 "nonimmediate_operand" " 0,Yv,m,0")))] "TARGET_SSE" "@ punpcklqdq\t%0, %0 @@ -16838,19 +17040,23 @@ movlhps\t%0, %0" [(set_attr "isa" "sse2_noavx,avx,sse3,noavx") (set_attr "type" "sselog1,sselog1,sselog1,ssemov") - (set_attr "prefix" "orig,vex,maybe_vex,orig") + (set_attr "prefix" "orig,maybe_evex,maybe_vex,orig") (set_attr "mode" "TI,TI,DF,V4SF")]) (define_insn "avx2_vbroadcasti128_<mode>" - [(set (match_operand:VI_256 0 "register_operand" "=x") + [(set (match_operand:VI_256 0 "register_operand" "=x,v,v") (vec_concat:VI_256 - (match_operand:<ssehalfvecmode> 1 "memory_operand" "m") + (match_operand:<ssehalfvecmode> 1 "memory_operand" "m,m,m") (match_dup 1)))] "TARGET_AVX2" - "vbroadcasti128\t{%1, %0|%0, %1}" - [(set_attr "type" "ssemov") + "@ + vbroadcasti128\t{%1, %0|%0, %1} + vbroadcast<i128vldq>\t{%1, %0|%0, %1} + vbroadcast<shuffletype>32x4\t{%1, %0|%0, %1}" + [(set_attr "isa" "*,avx512dq,avx512vl") + (set_attr "type" "ssemov") (set_attr "prefix_extra" "1") - (set_attr "prefix" "vex") + (set_attr "prefix" "vex,evex,evex") (set_attr "mode" "OI")]) ;; Modes handled by AVX vec_dup patterns. @@ -16927,19 +17133,24 @@ "operands[2] = gen_lowpart (<ssehalfvecmode>mode, operands[0]);") (define_insn "avx_vbroadcastf128_<mode>" - [(set (match_operand:V_256 0 "register_operand" "=x,x,x") + [(set (match_operand:V_256 0 "register_operand" "=x,x,x,v,v,v,v") (vec_concat:V_256 - (match_operand:<ssehalfvecmode> 1 "nonimmediate_operand" "m,0,?x") + (match_operand:<ssehalfvecmode> 1 "nonimmediate_operand" "m,0,?x,m,0,m,0") (match_dup 1)))] "TARGET_AVX" "@ vbroadcast<i128>\t{%1, %0|%0, %1} vinsert<i128>\t{$1, %1, %0, %0|%0, %0, %1, 1} - vperm2<i128>\t{$0, %t1, %t1, %0|%0, %t1, %t1, 0}" - [(set_attr "type" "ssemov,sselog1,sselog1") + vperm2<i128>\t{$0, %t1, %t1, %0|%0, %t1, %t1, 0} + vbroadcast<i128vldq>\t{%1, %0|%0, %1} + vinsert<i128vldq>\t{$1, %1, %0, %0|%0, %0, %1, 1} + vbroadcast<shuffletype>32x4\t{%1, %0|%0, %1} + vinsert<shuffletype>32x4\t{$1, %1, %0, %0|%0, %0, %1, 1}" + [(set_attr "isa" "*,*,*,avx512dq,avx512dq,avx512vl,avx512vl") + (set_attr "type" "ssemov,sselog1,sselog1,ssemov,sselog1,ssemov,sselog1") (set_attr "prefix_extra" "1") - (set_attr "length_immediate" "0,1,1") - (set_attr "prefix" "vex") + (set_attr "length_immediate" "0,1,1,0,1,0,1") + (set_attr "prefix" "vex,vex,vex,evex,evex,evex,evex") (set_attr "mode" "<sseinsnmode>")]) ;; For broadcast[i|f]32x2. Yes there is no v4sf version, only v4si. @@ -16961,7 +17172,7 @@ (match_operand:<ssexmmmode> 1 "nonimmediate_operand" "vm") (parallel [(const_int 0) (const_int 1)]))))] "TARGET_AVX512DQ" - "vbroadcast<shuffletype>32x2\t{%1, %0<mask_operand2>|%0<mask_operand2>, %1}" + "vbroadcast<shuffletype>32x2\t{%1, %0<mask_operand2>|%0<mask_operand2>, %q1}" [(set_attr "type" "ssemov") (set_attr "prefix_extra" "1") (set_attr "prefix" "evex") @@ -17184,11 +17395,11 @@ (set_attr "mode" "<sseinsnmode>")]) (define_expand "<avx512>_vpermi2var<mode>3_maskz" - [(match_operand:VI48F 0 "register_operand" "=v") - (match_operand:VI48F 1 "register_operand" "v") - (match_operand:<sseintvecmode> 2 "register_operand" "0") - (match_operand:VI48F 3 "nonimmediate_operand" "vm") - (match_operand:<avx512fmaskmode> 4 "register_operand" "Yk")] + [(match_operand:VI48F 0 "register_operand") + (match_operand:VI48F 1 "register_operand") + (match_operand:<sseintvecmode> 2 "register_operand") + (match_operand:VI48F 3 "nonimmediate_operand") + (match_operand:<avx512fmaskmode> 4 "register_operand")] "TARGET_AVX512F" { emit_insn (gen_<avx512>_vpermi2var<mode>3_maskz_1 ( @@ -17212,11 +17423,11 @@ }) (define_expand "<avx512>_vpermi2var<mode>3_maskz" - [(match_operand:VI2_AVX512VL 0 "register_operand" "=v") - (match_operand:VI2_AVX512VL 1 "register_operand" "v") - (match_operand:<sseintvecmode> 2 "register_operand" "0") - (match_operand:VI2_AVX512VL 3 "nonimmediate_operand" "vm") - (match_operand:<avx512fmaskmode> 4 "register_operand" "Yk")] + [(match_operand:VI2_AVX512VL 0 "register_operand") + (match_operand:VI2_AVX512VL 1 "register_operand") + (match_operand:<sseintvecmode> 2 "register_operand") + (match_operand:VI2_AVX512VL 3 "nonimmediate_operand") + (match_operand:<avx512fmaskmode> 4 "register_operand")] "TARGET_AVX512BW" { emit_insn (gen_<avx512>_vpermi2var<mode>3_maskz_1 ( @@ -17313,11 +17524,11 @@ (set_attr "mode" "<sseinsnmode>")]) (define_expand "<avx512>_vpermt2var<mode>3_maskz" - [(match_operand:VI48F 0 "register_operand" "=v") - (match_operand:<sseintvecmode> 1 "register_operand" "v") - (match_operand:VI48F 2 "register_operand" "0") - (match_operand:VI48F 3 "nonimmediate_operand" "vm") - (match_operand:<avx512fmaskmode> 4 "register_operand" "Yk")] + [(match_operand:VI48F 0 "register_operand") + (match_operand:<sseintvecmode> 1 "register_operand") + (match_operand:VI48F 2 "register_operand") + (match_operand:VI48F 3 "nonimmediate_operand") + (match_operand:<avx512fmaskmode> 4 "register_operand")] "TARGET_AVX512F" { emit_insn (gen_<avx512>_vpermt2var<mode>3_maskz_1 ( @@ -17327,11 +17538,11 @@ }) (define_expand "<avx512>_vpermt2var<mode>3_maskz" - [(match_operand:VI1_AVX512VL 0 "register_operand" "=v") - (match_operand:<sseintvecmode> 1 "register_operand" "v") - (match_operand:VI1_AVX512VL 2 "register_operand" "0") - (match_operand:VI1_AVX512VL 3 "nonimmediate_operand" "vm") - (match_operand:<avx512fmaskmode> 4 "register_operand" "Yk")] + [(match_operand:VI1_AVX512VL 0 "register_operand") + (match_operand:<sseintvecmode> 1 "register_operand") + (match_operand:VI1_AVX512VL 2 "register_operand") + (match_operand:VI1_AVX512VL 3 "nonimmediate_operand") + (match_operand:<avx512fmaskmode> 4 "register_operand")] "TARGET_AVX512VBMI" { emit_insn (gen_<avx512>_vpermt2var<mode>3_maskz_1 ( @@ -17341,11 +17552,11 @@ }) (define_expand "<avx512>_vpermt2var<mode>3_maskz" - [(match_operand:VI2_AVX512VL 0 "register_operand" "=v") - (match_operand:<sseintvecmode> 1 "register_operand" "v") - (match_operand:VI2_AVX512VL 2 "register_operand" "0") - (match_operand:VI2_AVX512VL 3 "nonimmediate_operand" "vm") - (match_operand:<avx512fmaskmode> 4 "register_operand" "Yk")] + [(match_operand:VI2_AVX512VL 0 "register_operand") + (match_operand:<sseintvecmode> 1 "register_operand") + (match_operand:VI2_AVX512VL 2 "register_operand") + (match_operand:VI2_AVX512VL 3 "nonimmediate_operand") + (match_operand:<avx512fmaskmode> 4 "register_operand")] "TARGET_AVX512BW" { emit_insn (gen_<avx512>_vpermt2var<mode>3_maskz_1 ( @@ -17607,10 +17818,12 @@ (vec_select:<ssehalfvecmode> (match_operand:VI8F_256 1 "register_operand" "v") (parallel [(const_int 2) (const_int 3)]))))] - "TARGET_AVX" + "TARGET_AVX && <mask_avx512dq_condition>" { - if (TARGET_AVX512VL) + if (TARGET_AVX512DQ) return "vinsert<shuffletype>64x2\t{$0x0, %2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2, 0x0}"; + else if (TARGET_AVX512VL) + return "vinsert<shuffletype>32x4\t{$0x0, %2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2, 0x0}"; else return "vinsert<i128>\t{$0x0, %2, %1, %0|%0, %1, %2, 0x0}"; } @@ -17627,10 +17840,12 @@ (match_operand:VI8F_256 1 "register_operand" "v") (parallel [(const_int 0) (const_int 1)])) (match_operand:<ssehalfvecmode> 2 "nonimmediate_operand" "vm")))] - "TARGET_AVX" + "TARGET_AVX && <mask_avx512dq_condition>" { - if (TARGET_AVX512VL) + if (TARGET_AVX512DQ) return "vinsert<shuffletype>64x2\t{$0x1, %2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2, 0x1}"; + else if (TARGET_AVX512VL) + return "vinsert<shuffletype>32x4\t{$0x1, %2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2, 0x1}"; else return "vinsert<i128>\t{$0x1, %2, %1, %0|%0, %1, %2, 0x1}"; } @@ -17683,47 +17898,51 @@ (set_attr "mode" "<sseinsnmode>")]) (define_insn "vec_set_lo_v16hi" - [(set (match_operand:V16HI 0 "register_operand" "=x") + [(set (match_operand:V16HI 0 "register_operand" "=x,v") (vec_concat:V16HI - (match_operand:V8HI 2 "nonimmediate_operand" "xm") + (match_operand:V8HI 2 "nonimmediate_operand" "xm,vm") (vec_select:V8HI - (match_operand:V16HI 1 "register_operand" "x") + (match_operand:V16HI 1 "register_operand" "x,v") (parallel [(const_int 8) (const_int 9) (const_int 10) (const_int 11) (const_int 12) (const_int 13) (const_int 14) (const_int 15)]))))] "TARGET_AVX" - "vinsert%~128\t{$0x0, %2, %1, %0|%0, %1, %2, 0x0}" + "@ + vinsert%~128\t{$0x0, %2, %1, %0|%0, %1, %2, 0x0} + vinserti32x4\t{$0x0, %2, %1, %0|%0, %1, %2, 0x0}" [(set_attr "type" "sselog") (set_attr "prefix_extra" "1") (set_attr "length_immediate" "1") - (set_attr "prefix" "vex") + (set_attr "prefix" "vex,evex") (set_attr "mode" "OI")]) (define_insn "vec_set_hi_v16hi" - [(set (match_operand:V16HI 0 "register_operand" "=x") + [(set (match_operand:V16HI 0 "register_operand" "=x,v") (vec_concat:V16HI (vec_select:V8HI - (match_operand:V16HI 1 "register_operand" "x") + (match_operand:V16HI 1 "register_operand" "x,v") (parallel [(const_int 0) (const_int 1) (const_int 2) (const_int 3) (const_int 4) (const_int 5) (const_int 6) (const_int 7)])) - (match_operand:V8HI 2 "nonimmediate_operand" "xm")))] + (match_operand:V8HI 2 "nonimmediate_operand" "xm,vm")))] "TARGET_AVX" - "vinsert%~128\t{$0x1, %2, %1, %0|%0, %1, %2, 0x1}" + "@ + vinsert%~128\t{$0x1, %2, %1, %0|%0, %1, %2, 0x1} + vinserti32x4\t{$0x1, %2, %1, %0|%0, %1, %2, 0x1}" [(set_attr "type" "sselog") (set_attr "prefix_extra" "1") (set_attr "length_immediate" "1") - (set_attr "prefix" "vex") + (set_attr "prefix" "vex,evex") (set_attr "mode" "OI")]) (define_insn "vec_set_lo_v32qi" - [(set (match_operand:V32QI 0 "register_operand" "=x") + [(set (match_operand:V32QI 0 "register_operand" "=x,v") (vec_concat:V32QI - (match_operand:V16QI 2 "nonimmediate_operand" "xm") + (match_operand:V16QI 2 "nonimmediate_operand" "xm,v") (vec_select:V16QI - (match_operand:V32QI 1 "register_operand" "x") + (match_operand:V32QI 1 "register_operand" "x,v") (parallel [(const_int 16) (const_int 17) (const_int 18) (const_int 19) (const_int 20) (const_int 21) @@ -17733,18 +17952,20 @@ (const_int 28) (const_int 29) (const_int 30) (const_int 31)]))))] "TARGET_AVX" - "vinsert%~128\t{$0x0, %2, %1, %0|%0, %1, %2, 0x0}" + "@ + vinsert%~128\t{$0x0, %2, %1, %0|%0, %1, %2, 0x0} + vinserti32x4\t{$0x0, %2, %1, %0|%0, %1, %2, 0x0}" [(set_attr "type" "sselog") (set_attr "prefix_extra" "1") (set_attr "length_immediate" "1") - (set_attr "prefix" "vex") + (set_attr "prefix" "vex,evex") (set_attr "mode" "OI")]) (define_insn "vec_set_hi_v32qi" - [(set (match_operand:V32QI 0 "register_operand" "=x") + [(set (match_operand:V32QI 0 "register_operand" "=x,v") (vec_concat:V32QI (vec_select:V16QI - (match_operand:V32QI 1 "register_operand" "x") + (match_operand:V32QI 1 "register_operand" "x,v") (parallel [(const_int 0) (const_int 1) (const_int 2) (const_int 3) (const_int 4) (const_int 5) @@ -17753,13 +17974,15 @@ (const_int 10) (const_int 11) (const_int 12) (const_int 13) (const_int 14) (const_int 15)])) - (match_operand:V16QI 2 "nonimmediate_operand" "xm")))] + (match_operand:V16QI 2 "nonimmediate_operand" "xm,vm")))] "TARGET_AVX" - "vinsert%~128\t{$0x1, %2, %1, %0|%0, %1, %2, 0x1}" + "@ + vinsert%~128\t{$0x1, %2, %1, %0|%0, %1, %2, 0x1} + vinserti32x4\t{$0x1, %2, %1, %0|%0, %1, %2, 0x1}" [(set_attr "type" "sselog") (set_attr "prefix_extra" "1") (set_attr "length_immediate" "1") - (set_attr "prefix" "vex") + (set_attr "prefix" "vex,evex") (set_attr "mode" "OI")]) (define_insn "<avx_avx2>_maskload<ssemodesuffix><avxsizesuffix>" @@ -18375,7 +18598,11 @@ UNSPEC_GATHER)) (clobber (match_scratch:QI 2 "=&Yk"))] "TARGET_AVX512F" - "v<sseintprefix>gatherq<ssemodesuffix>\t{%6, %1%{%2%}|%1%{%2%}, %g6}" +{ + if (GET_MODE_SIZE (GET_MODE_INNER (<MODE>mode)) == 4) + return "v<sseintprefix>gatherq<ssemodesuffix>\t{%6, %1%{%2%}|%1%{%2%}, %t6}"; + return "v<sseintprefix>gatherq<ssemodesuffix>\t{%6, %1%{%2%}|%1%{%2%}, %g6}"; +} [(set_attr "type" "ssemov") (set_attr "prefix" "evex") (set_attr "mode" "<sseinsnmode>")]) @@ -18475,7 +18702,11 @@ UNSPEC_SCATTER)) (clobber (match_scratch:QI 1 "=&Yk"))] "TARGET_AVX512F" - "v<sseintprefix>scatterq<ssemodesuffix>\t{%3, %5%{%1%}|%5%{%1%}, %3}" +{ + if (GET_MODE_SIZE (GET_MODE_INNER (<MODE>mode)) == 8) + return "v<sseintprefix>scatterq<ssemodesuffix>\t{%3, %5%{%1%}|%5%{%1%}, %3}"; + return "v<sseintprefix>scatterq<ssemodesuffix>\t{%3, %5%{%1%}|%t5%{%1%}, %3}"; +} [(set_attr "type" "ssemov") (set_attr "prefix" "evex") (set_attr "mode" "<sseinsnmode>")]) |