aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorH.J. Lu <hjl.tools@gmail.com>2020-03-05 16:45:05 -0800
committerH.J. Lu <hjl.tools@gmail.com>2020-03-05 16:45:45 -0800
commit5358e8f5800daa0012fc9d06705d64bbb21fa07b (patch)
treea097ef8a4c634da8fd0c5e70659d7b46e07d2ee0
parent34ec7d5347e3ddb15782ea9a2e3fecc280723521 (diff)
i386: Properly encode vector registers in vector move
On x86, when AVX and AVX512 are enabled, vector move instructions can be encoded with either 2-byte/3-byte VEX (AVX) or 4-byte EVEX (AVX512): 0: c5 f9 6f d1 vmovdqa %xmm1,%xmm2 4: 62 f1 fd 08 6f d1 vmovdqa64 %xmm1,%xmm2 We prefer VEX encoding over EVEX since VEX is shorter. Also AVX512F only supports 512-bit vector moves. AVX512F + AVX512VL supports 128-bit and 256-bit vector moves. xmm16-xmm31 and ymm16-ymm31 are disallowed in 128-bit and 256-bit modes when AVX512VL is disabled. Mode attributes on x86 vector move patterns indicate target preferences of vector move encoding. For scalar register to register move, we can use 512-bit vector move instructions to move 32-bit/64-bit scalar if AVX512VL isn't available. With AVX512F and AVX512VL, we should use VEX encoding for 128-bit/256-bit vector moves if upper 16 vector registers aren't used. This patch adds a function, ix86_output_ssemov, to generate vector moves: 1. If zmm registers are used, use EVEX encoding. 2. If xmm16-xmm31/ymm16-ymm31 registers aren't used, SSE or VEX encoding will be generated. 3. If xmm16-xmm31/ymm16-ymm31 registers are used: a. With AVX512VL, AVX512VL vector moves will be generated. b. Without AVX512VL, xmm16-xmm31/ymm16-ymm31 register to register move will be done with zmm register move. There is no need to set mode attribute to XImode explicitly since ix86_output_ssemov can properly encode xmm16-xmm31/ymm16-ymm31 registers with and without AVX512VL. Tested on AVX2 and AVX512 with and without --with-arch=native. gcc/ PR target/89229 PR target/89346 * config/i386/i386-protos.h (ix86_output_ssemov): New prototype. * config/i386/i386.c (ix86_get_ssemov): New function. (ix86_output_ssemov): Likewise. * config/i386/sse.md (VMOVE:mov<mode>_internal): Call ix86_output_ssemov for TYPE_SSEMOV. Remove TARGET_AVX512VL check. (*movxi_internal_avx512f): Call ix86_output_ssemov for TYPE_SSEMOV. (*movoi_internal_avx): Call ix86_output_ssemov for TYPE_SSEMOV. Remove ext_sse_reg_operand and TARGET_AVX512VL check. (*movti_internal): Likewise. (*movtf_internal): Call ix86_output_ssemov for TYPE_SSEMOV. gcc/testsuite/ PR target/89229 PR target/89346 * gcc.target/i386/avx512vl-vmovdqa64-1.c: Updated. * gcc.target/i386/pr89229-2a.c: New test. * gcc.target/i386/pr89229-2b.c: Likewise. * gcc.target/i386/pr89229-2c.c: Likewise. * gcc.target/i386/pr89229-3a.c: Likewise. * gcc.target/i386/pr89229-3b.c: Likewise. * gcc.target/i386/pr89229-3c.c: Likewise. * gcc.target/i386/pr89346.c: Likewise.
-rw-r--r--gcc/ChangeLog16
-rw-r--r--gcc/config/i386/i386-protos.h2
-rw-r--r--gcc/config/i386/i386.c208
-rw-r--r--gcc/config/i386/i386.md86
-rw-r--r--gcc/config/i386/sse.md98
-rw-r--r--gcc/testsuite/ChangeLog13
-rw-r--r--gcc/testsuite/gcc.target/i386/avx512vl-vmovdqa64-1.c7
-rw-r--r--gcc/testsuite/gcc.target/i386/pr89229-2a.c15
-rw-r--r--gcc/testsuite/gcc.target/i386/pr89229-2b.c13
-rw-r--r--gcc/testsuite/gcc.target/i386/pr89229-2c.c6
-rw-r--r--gcc/testsuite/gcc.target/i386/pr89229-3a.c16
-rw-r--r--gcc/testsuite/gcc.target/i386/pr89229-3b.c12
-rw-r--r--gcc/testsuite/gcc.target/i386/pr89229-3c.c6
-rw-r--r--gcc/testsuite/gcc.target/i386/pr89346.c15
14 files changed, 332 insertions, 181 deletions
diff --git a/gcc/ChangeLog b/gcc/ChangeLog
index 14e90fbd8b7..78a118903a7 100644
--- a/gcc/ChangeLog
+++ b/gcc/ChangeLog
@@ -1,3 +1,19 @@
+2020-03-05 H.J. Lu <hongjiu.lu@intel.com>
+
+ PR target/89229
+ PR target/89346
+ * config/i386/i386-protos.h (ix86_output_ssemov): New prototype.
+ * config/i386/i386.c (ix86_get_ssemov): New function.
+ (ix86_output_ssemov): Likewise.
+ * config/i386/sse.md (VMOVE:mov<mode>_internal): Call
+ ix86_output_ssemov for TYPE_SSEMOV. Remove TARGET_AVX512VL
+ check.
+ (*movxi_internal_avx512f): Call ix86_output_ssemov for TYPE_SSEMOV.
+ (*movoi_internal_avx): Call ix86_output_ssemov for TYPE_SSEMOV.
+ Remove ext_sse_reg_operand and TARGET_AVX512VL check.
+ (*movti_internal): Likewise.
+ (*movtf_internal): Call ix86_output_ssemov for TYPE_SSEMOV.
+
2020-03-05 Jeff Law <law@redhat.com>
PR tree-optimization/91890
diff --git a/gcc/config/i386/i386-protos.h b/gcc/config/i386/i386-protos.h
index 266381ca5a6..39fcaa0ad5f 100644
--- a/gcc/config/i386/i386-protos.h
+++ b/gcc/config/i386/i386-protos.h
@@ -38,6 +38,8 @@ extern void ix86_expand_split_stack_prologue (void);
extern void ix86_output_addr_vec_elt (FILE *, int);
extern void ix86_output_addr_diff_elt (FILE *, int, int);
+extern const char *ix86_output_ssemov (rtx_insn *, rtx *);
+
extern enum calling_abi ix86_cfun_abi (void);
extern enum calling_abi ix86_function_type_abi (const_tree);
diff --git a/gcc/config/i386/i386.c b/gcc/config/i386/i386.c
index dac7a3fc5fd..7bbfbb4c5a7 100644
--- a/gcc/config/i386/i386.c
+++ b/gcc/config/i386/i386.c
@@ -4915,6 +4915,214 @@ ix86_pre_reload_split (void)
&& !(cfun->curr_properties & PROP_rtl_split_insns));
}
+/* Return the opcode of the TYPE_SSEMOV instruction. To move from
+ or to xmm16-xmm31/ymm16-ymm31 registers, we either require
+ TARGET_AVX512VL or it is a register to register move which can
+ be done with zmm register move. */
+
+static const char *
+ix86_get_ssemov (rtx *operands, unsigned size,
+ enum attr_mode insn_mode, machine_mode mode)
+{
+ char buf[128];
+ bool misaligned_p = (misaligned_operand (operands[0], mode)
+ || misaligned_operand (operands[1], mode));
+ bool evex_reg_p = (size == 64
+ || EXT_REX_SSE_REG_P (operands[0])
+ || EXT_REX_SSE_REG_P (operands[1]));
+ machine_mode scalar_mode;
+
+ const char *opcode = NULL;
+ enum
+ {
+ opcode_int,
+ opcode_float,
+ opcode_double
+ } type = opcode_int;
+
+ switch (insn_mode)
+ {
+ case MODE_V16SF:
+ case MODE_V8SF:
+ case MODE_V4SF:
+ scalar_mode = E_SFmode;
+ type = opcode_float;
+ break;
+ case MODE_V8DF:
+ case MODE_V4DF:
+ case MODE_V2DF:
+ scalar_mode = E_DFmode;
+ type = opcode_double;
+ break;
+ case MODE_XI:
+ case MODE_OI:
+ case MODE_TI:
+ scalar_mode = GET_MODE_INNER (mode);
+ break;
+ default:
+ gcc_unreachable ();
+ }
+
+ /* NB: To move xmm16-xmm31/ymm16-ymm31 registers without AVX512VL,
+ we can only use zmm register move without memory operand. */
+ if (evex_reg_p
+ && !TARGET_AVX512VL
+ && GET_MODE_SIZE (mode) < 64)
+ {
+ /* NB: Since ix86_hard_regno_mode_ok only allows xmm16-xmm31 or
+ ymm16-ymm31 in 128/256 bit modes when AVX512VL is enabled,
+ we get here only for xmm16-xmm31 or ymm16-ymm31 in 32/64 bit
+ modes. */
+ if (GET_MODE_SIZE (mode) >= 16
+ || memory_operand (operands[0], mode)
+ || memory_operand (operands[1], mode))
+ gcc_unreachable ();
+ size = 64;
+ switch (type)
+ {
+ case opcode_int:
+ opcode = misaligned_p ? "vmovdqu32" : "vmovdqa32";
+ break;
+ case opcode_float:
+ opcode = misaligned_p ? "vmovups" : "vmovaps";
+ break;
+ case opcode_double:
+ opcode = misaligned_p ? "vmovupd" : "vmovapd";
+ break;
+ }
+ }
+ else if (SCALAR_FLOAT_MODE_P (scalar_mode))
+ {
+ switch (scalar_mode)
+ {
+ case E_SFmode:
+ opcode = misaligned_p ? "%vmovups" : "%vmovaps";
+ break;
+ case E_DFmode:
+ opcode = misaligned_p ? "%vmovupd" : "%vmovapd";
+ break;
+ case E_TFmode:
+ if (evex_reg_p)
+ opcode = misaligned_p ? "vmovdqu64" : "vmovdqa64";
+ else
+ opcode = misaligned_p ? "%vmovdqu" : "%vmovdqa";
+ break;
+ default:
+ gcc_unreachable ();
+ }
+ }
+ else if (SCALAR_INT_MODE_P (scalar_mode))
+ {
+ switch (scalar_mode)
+ {
+ case E_QImode:
+ if (evex_reg_p)
+ opcode = (misaligned_p
+ ? (TARGET_AVX512BW
+ ? "vmovdqu8"
+ : "vmovdqu64")
+ : "vmovdqa64");
+ else
+ opcode = (misaligned_p
+ ? (TARGET_AVX512BW
+ ? "vmovdqu8"
+ : "%vmovdqu")
+ : "%vmovdqa");
+ break;
+ case E_HImode:
+ if (evex_reg_p)
+ opcode = (misaligned_p
+ ? (TARGET_AVX512BW
+ ? "vmovdqu16"
+ : "vmovdqu64")
+ : "vmovdqa64");
+ else
+ opcode = (misaligned_p
+ ? (TARGET_AVX512BW
+ ? "vmovdqu16"
+ : "%vmovdqu")
+ : "%vmovdqa");
+ break;
+ case E_SImode:
+ if (evex_reg_p)
+ opcode = misaligned_p ? "vmovdqu32" : "vmovdqa32";
+ else
+ opcode = misaligned_p ? "%vmovdqu" : "%vmovdqa";
+ break;
+ case E_DImode:
+ case E_TImode:
+ case E_OImode:
+ if (evex_reg_p)
+ opcode = misaligned_p ? "vmovdqu64" : "vmovdqa64";
+ else
+ opcode = misaligned_p ? "%vmovdqu" : "%vmovdqa";
+ break;
+ case E_XImode:
+ opcode = misaligned_p ? "vmovdqu64" : "vmovdqa64";
+ break;
+ default:
+ gcc_unreachable ();
+ }
+ }
+ else
+ gcc_unreachable ();
+
+ switch (size)
+ {
+ case 64:
+ snprintf (buf, sizeof (buf), "%s\t{%%g1, %%g0|%%g0, %%g1}",
+ opcode);
+ break;
+ case 32:
+ snprintf (buf, sizeof (buf), "%s\t{%%t1, %%t0|%%t0, %%t1}",
+ opcode);
+ break;
+ case 16:
+ snprintf (buf, sizeof (buf), "%s\t{%%x1, %%x0|%%x0, %%x1}",
+ opcode);
+ break;
+ default:
+ gcc_unreachable ();
+ }
+ output_asm_insn (buf, operands);
+ return "";
+}
+
+/* Return the template of the TYPE_SSEMOV instruction to move
+ operands[1] into operands[0]. */
+
+const char *
+ix86_output_ssemov (rtx_insn *insn, rtx *operands)
+{
+ machine_mode mode = GET_MODE (operands[0]);
+ if (get_attr_type (insn) != TYPE_SSEMOV
+ || mode != GET_MODE (operands[1]))
+ gcc_unreachable ();
+
+ enum attr_mode insn_mode = get_attr_mode (insn);
+
+ switch (insn_mode)
+ {
+ case MODE_XI:
+ case MODE_V8DF:
+ case MODE_V16SF:
+ return ix86_get_ssemov (operands, 64, insn_mode, mode);
+
+ case MODE_OI:
+ case MODE_V4DF:
+ case MODE_V8SF:
+ return ix86_get_ssemov (operands, 32, insn_mode, mode);
+
+ case MODE_TI:
+ case MODE_V2DF:
+ case MODE_V4SF:
+ return ix86_get_ssemov (operands, 16, insn_mode, mode);
+
+ default:
+ gcc_unreachable ();
+ }
+}
+
/* Returns true if OP contains a symbol reference */
bool
diff --git a/gcc/config/i386/i386.md b/gcc/config/i386/i386.md
index 8e29dffafa6..a4ee549567d 100644
--- a/gcc/config/i386/i386.md
+++ b/gcc/config/i386/i386.md
@@ -1902,11 +1902,7 @@
return standard_sse_constant_opcode (insn, operands);
case TYPE_SSEMOV:
- if (misaligned_operand (operands[0], XImode)
- || misaligned_operand (operands[1], XImode))
- return "vmovdqu32\t{%1, %0|%0, %1}";
- else
- return "vmovdqa32\t{%1, %0|%0, %1}";
+ return ix86_output_ssemov (insn, operands);
default:
gcc_unreachable ();
@@ -1929,21 +1925,7 @@
return standard_sse_constant_opcode (insn, operands);
case TYPE_SSEMOV:
- if (misaligned_operand (operands[0], OImode)
- || misaligned_operand (operands[1], OImode))
- {
- if (get_attr_mode (insn) == MODE_XI)
- return "vmovdqu32\t{%1, %0|%0, %1}";
- else
- return "vmovdqu\t{%1, %0|%0, %1}";
- }
- else
- {
- if (get_attr_mode (insn) == MODE_XI)
- return "vmovdqa32\t{%1, %0|%0, %1}";
- else
- return "vmovdqa\t{%1, %0|%0, %1}";
- }
+ return ix86_output_ssemov (insn, operands);
default:
gcc_unreachable ();
@@ -1952,15 +1934,7 @@
[(set_attr "isa" "*,avx2,*,*")
(set_attr "type" "sselog1,sselog1,ssemov,ssemov")
(set_attr "prefix" "vex")
- (set (attr "mode")
- (cond [(ior (match_operand 0 "ext_sse_reg_operand")
- (match_operand 1 "ext_sse_reg_operand"))
- (const_string "XI")
- (and (eq_attr "alternative" "1")
- (match_test "TARGET_AVX512VL"))
- (const_string "XI")
- ]
- (const_string "OI")))])
+ (set_attr "mode" "OI")])
(define_insn "*movti_internal"
[(set (match_operand:TI 0 "nonimmediate_operand" "=!r ,o ,v,v ,v ,m,?r,?Yd")
@@ -1981,27 +1955,7 @@
return standard_sse_constant_opcode (insn, operands);
case TYPE_SSEMOV:
- /* TDmode values are passed as TImode on the stack. Moving them
- to stack may result in unaligned memory access. */
- if (misaligned_operand (operands[0], TImode)
- || misaligned_operand (operands[1], TImode))
- {
- if (get_attr_mode (insn) == MODE_V4SF)
- return "%vmovups\t{%1, %0|%0, %1}";
- else if (get_attr_mode (insn) == MODE_XI)
- return "vmovdqu32\t{%1, %0|%0, %1}";
- else
- return "%vmovdqu\t{%1, %0|%0, %1}";
- }
- else
- {
- if (get_attr_mode (insn) == MODE_V4SF)
- return "%vmovaps\t{%1, %0|%0, %1}";
- else if (get_attr_mode (insn) == MODE_XI)
- return "vmovdqa32\t{%1, %0|%0, %1}";
- else
- return "%vmovdqa\t{%1, %0|%0, %1}";
- }
+ return ix86_output_ssemov (insn, operands);
default:
gcc_unreachable ();
@@ -2028,12 +1982,6 @@
(set (attr "mode")
(cond [(eq_attr "alternative" "0,1")
(const_string "DI")
- (ior (match_operand 0 "ext_sse_reg_operand")
- (match_operand 1 "ext_sse_reg_operand"))
- (const_string "XI")
- (and (eq_attr "alternative" "3")
- (match_test "TARGET_AVX512VL"))
- (const_string "XI")
(match_test "TARGET_AVX")
(const_string "TI")
(ior (not (match_test "TARGET_SSE2"))
@@ -3254,31 +3202,7 @@
return standard_sse_constant_opcode (insn, operands);
case TYPE_SSEMOV:
- /* Handle misaligned load/store since we
- don't have movmisaligntf pattern. */
- if (misaligned_operand (operands[0], TFmode)
- || misaligned_operand (operands[1], TFmode))
- {
- if (get_attr_mode (insn) == MODE_V4SF)
- return "%vmovups\t{%1, %0|%0, %1}";
- else if (TARGET_AVX512VL
- && (EXT_REX_SSE_REG_P (operands[0])
- || EXT_REX_SSE_REG_P (operands[1])))
- return "vmovdqu64\t{%1, %0|%0, %1}";
- else
- return "%vmovdqu\t{%1, %0|%0, %1}";
- }
- else
- {
- if (get_attr_mode (insn) == MODE_V4SF)
- return "%vmovaps\t{%1, %0|%0, %1}";
- else if (TARGET_AVX512VL
- && (EXT_REX_SSE_REG_P (operands[0])
- || EXT_REX_SSE_REG_P (operands[1])))
- return "vmovdqa64\t{%1, %0|%0, %1}";
- else
- return "%vmovdqa\t{%1, %0|%0, %1}";
- }
+ return ix86_output_ssemov (insn, operands);
case TYPE_MULTI:
return "#";
diff --git a/gcc/config/i386/sse.md b/gcc/config/i386/sse.md
index ee1f138d1af..8f5902292c6 100644
--- a/gcc/config/i386/sse.md
+++ b/gcc/config/i386/sse.md
@@ -1013,98 +1013,7 @@
return standard_sse_constant_opcode (insn, operands);
case TYPE_SSEMOV:
- /* There is no evex-encoded vmov* for sizes smaller than 64-bytes
- in avx512f, so we need to use workarounds, to access sse registers
- 16-31, which are evex-only. In avx512vl we don't need workarounds. */
- if (TARGET_AVX512F && <MODE_SIZE> < 64 && !TARGET_AVX512VL
- && (EXT_REX_SSE_REG_P (operands[0])
- || EXT_REX_SSE_REG_P (operands[1])))
- {
- if (memory_operand (operands[0], <MODE>mode))
- {
- if (<MODE_SIZE> == 32)
- return "vextract<shuffletype>64x4\t{$0x0, %g1, %0|%0, %g1, 0x0}";
- else if (<MODE_SIZE> == 16)
- return "vextract<shuffletype>32x4\t{$0x0, %g1, %0|%0, %g1, 0x0}";
- else
- gcc_unreachable ();
- }
- else if (memory_operand (operands[1], <MODE>mode))
- {
- if (<MODE_SIZE> == 32)
- return "vbroadcast<shuffletype>64x4\t{%1, %g0|%g0, %1}";
- else if (<MODE_SIZE> == 16)
- return "vbroadcast<shuffletype>32x4\t{%1, %g0|%g0, %1}";
- else
- gcc_unreachable ();
- }
- else
- /* Reg -> reg move is always aligned. Just use wider move. */
- switch (get_attr_mode (insn))
- {
- case MODE_V8SF:
- case MODE_V4SF:
- return "vmovaps\t{%g1, %g0|%g0, %g1}";
- case MODE_V4DF:
- case MODE_V2DF:
- return "vmovapd\t{%g1, %g0|%g0, %g1}";
- case MODE_OI:
- case MODE_TI:
- return "vmovdqa64\t{%g1, %g0|%g0, %g1}";
- default:
- gcc_unreachable ();
- }
- }
-
- switch (get_attr_mode (insn))
- {
- case MODE_V16SF:
- case MODE_V8SF:
- case MODE_V4SF:
- if (misaligned_operand (operands[0], <MODE>mode)
- || misaligned_operand (operands[1], <MODE>mode))
- return "%vmovups\t{%1, %0|%0, %1}";
- else
- return "%vmovaps\t{%1, %0|%0, %1}";
-
- case MODE_V8DF:
- case MODE_V4DF:
- case MODE_V2DF:
- if (misaligned_operand (operands[0], <MODE>mode)
- || misaligned_operand (operands[1], <MODE>mode))
- return "%vmovupd\t{%1, %0|%0, %1}";
- else
- return "%vmovapd\t{%1, %0|%0, %1}";
-
- case MODE_OI:
- case MODE_TI:
- if (misaligned_operand (operands[0], <MODE>mode)
- || misaligned_operand (operands[1], <MODE>mode))
- return TARGET_AVX512VL
- && (<MODE>mode == V4SImode
- || <MODE>mode == V2DImode
- || <MODE>mode == V8SImode
- || <MODE>mode == V4DImode
- || TARGET_AVX512BW)
- ? "vmovdqu<ssescalarsize>\t{%1, %0|%0, %1}"
- : "%vmovdqu\t{%1, %0|%0, %1}";
- else
- return TARGET_AVX512VL ? "vmovdqa64\t{%1, %0|%0, %1}"
- : "%vmovdqa\t{%1, %0|%0, %1}";
- case MODE_XI:
- if (misaligned_operand (operands[0], <MODE>mode)
- || misaligned_operand (operands[1], <MODE>mode))
- return (<MODE>mode == V16SImode
- || <MODE>mode == V8DImode
- || TARGET_AVX512BW)
- ? "vmovdqu<ssescalarsize>\t{%1, %0|%0, %1}"
- : "vmovdqu64\t{%1, %0|%0, %1}";
- else
- return "vmovdqa64\t{%1, %0|%0, %1}";
-
- default:
- gcc_unreachable ();
- }
+ return ix86_output_ssemov (insn, operands);
default:
gcc_unreachable ();
@@ -1113,10 +1022,7 @@
[(set_attr "type" "sselog1,sselog1,ssemov,ssemov")
(set_attr "prefix" "maybe_vex")
(set (attr "mode")
- (cond [(and (eq_attr "alternative" "1")
- (match_test "TARGET_AVX512VL"))
- (const_string "<sseinsnmode>")
- (match_test "TARGET_AVX")
+ (cond [(match_test "TARGET_AVX")
(const_string "<sseinsnmode>")
(ior (not (match_test "TARGET_SSE2"))
(match_test "optimize_function_for_size_p (cfun)"))
diff --git a/gcc/testsuite/ChangeLog b/gcc/testsuite/ChangeLog
index c4f161abf3e..6b4f301bb39 100644
--- a/gcc/testsuite/ChangeLog
+++ b/gcc/testsuite/ChangeLog
@@ -1,3 +1,16 @@
+2020-03-05 H.J. Lu <hongjiu.lu@intel.com>
+
+ PR target/89229
+ PR target/89346
+ * gcc.target/i386/avx512vl-vmovdqa64-1.c: Updated.
+ * gcc.target/i386/pr89229-2a.c: New test.
+ * gcc.target/i386/pr89229-2b.c: Likewise.
+ * gcc.target/i386/pr89229-2c.c: Likewise.
+ * gcc.target/i386/pr89229-3a.c: Likewise.
+ * gcc.target/i386/pr89229-3b.c: Likewise.
+ * gcc.target/i386/pr89229-3c.c: Likewise.
+ * gcc.target/i386/pr89346.c: Likewise.
+
2020-03-05 Andre Vieira <andre.simoesdiasvieira@arm.com>
* g++.dg/pr80481.C: Disable epilogue vectorization.
diff --git a/gcc/testsuite/gcc.target/i386/avx512vl-vmovdqa64-1.c b/gcc/testsuite/gcc.target/i386/avx512vl-vmovdqa64-1.c
index 14fe4b84544..db4d9d14875 100644
--- a/gcc/testsuite/gcc.target/i386/avx512vl-vmovdqa64-1.c
+++ b/gcc/testsuite/gcc.target/i386/avx512vl-vmovdqa64-1.c
@@ -4,14 +4,13 @@
/* { dg-final { scan-assembler-times "vmovdqa64\[ \\t\]+\[^\{\n\]*%xmm\[0-9\]+\[^\n\]*%xmm\[0-9\]+\{%k\[1-7\]\}(?:\n|\[ \\t\]+#)" 1 } } */
/* { dg-final { scan-assembler-times "vmovdqa64\[ \\t\]+\[^\{\n\]*%ymm\[0-9\]+\[^\n\]*%ymm\[0-9\]+\{%k\[1-7\]\}\{z\}(?:\n|\[ \\t\]+#)" 1 } } */
/* { dg-final { scan-assembler-times "vmovdqa64\[ \\t\]+\[^\{\n\]*%xmm\[0-9\]+\[^\n\]*%xmm\[0-9\]+\{%k\[1-7\]\}\{z\}(?:\n|\[ \\t\]+#)" 1 } } */
-/* { dg-final { scan-assembler-times "vmovdqa64\[ \\t\]+\\(\[^\n\]*%ymm\[0-9\]+(?:\n|\[ \\t\]+#)" 1 { target nonpic } } } */
-/* { dg-final { scan-assembler-times "vmovdqa64\[ \\t\]+\\(\[^\n\]*%xmm\[0-9\]+(?:\n|\[ \\t\]+#)" 1 { target nonpic } } } */
+/* { dg-final { scan-assembler-times "vmovdqa\[ \\t\]+\\(\[^\n\]*%ymm\[0-9\]+(?:\n|\[ \\t\]+#)" 1 { target nonpic } } } */
+/* { dg-final { scan-assembler-times "vmovdqa\[ \\t\]+\\(\[^\n\]*%xmm\[0-9\]+(?:\n|\[ \\t\]+#)" 1 { target nonpic } } } */
/* { dg-final { scan-assembler-times "vmovdqa64\[ \\t\]+\[^\{\n\]*\\)\[^\n\]*%ymm\[0-9\]+\{%k\[1-7\]\}(?:\n|\[ \\t\]+#)" 1 } } */
/* { dg-final { scan-assembler-times "vmovdqa64\[ \\t\]+\[^\{\n\]*\\)\[^\n\]*%xmm\[0-9\]+\{%k\[1-7\]\}(?:\n|\[ \\t\]+#)" 1 } } */
/* { dg-final { scan-assembler-times "vmovdqa64\[ \\t\]+\[^\{\n\]*\\)\[^\n\]*%ymm\[0-9\]+\{%k\[1-7\]\}\{z\}(?:\n|\[ \\t\]+#)" 1 } } */
/* { dg-final { scan-assembler-times "vmovdqa64\[ \\t\]+\[^\{\n\]*\\)\[^\n\]*%xmm\[0-9\]+\{%k\[1-7\]\}\{z\}(?:\n|\[ \\t\]+#)" 1 } } */
-/* { dg-final { scan-assembler-times "vmovdqa64\[ \\t\]+\[^\{\n\]*%ymm\[0-9\]+\[^\nxy\]*\\(.{5,6}(?:\n|\[ \\t\]+#)" 1 { target nonpic } } } */
-/* { dg-final { scan-assembler-times "vmovdqa64\[ \\t\]+\[^\{\n\]*%xmm\[0-9\]+\[^\nxy\]*\\((?:\n|\[ \\t\]+#)" 1 { xfail *-*-* } } } */
+/* { dg-final { scan-assembler-times "vmovdqa\[ \\t\]+\[^\{\n\]*%ymm\[0-9\]+\[^\nxy\]*\\(.{5,6}(?:\n|\[ \\t\]+#)" 1 { target nonpic } } } */
/* { dg-final { scan-assembler-times "vmovdqa64\[ \\t\]+\[^\{\n\]*%ymm\[0-9\]+\[^\n\]*\\)\{%k\[1-7\]\}(?:\n|\[ \\t\]+#)" 1 } } */
/* { dg-final { scan-assembler-times "vmovdqa64\[ \\t\]+\[^\{\n\]*%xmm\[0-9\]+\[^\n\]*\\)\{%k\[1-7\]\}(?:\n|\[ \\t\]+#)" 1 } } */
diff --git a/gcc/testsuite/gcc.target/i386/pr89229-2a.c b/gcc/testsuite/gcc.target/i386/pr89229-2a.c
new file mode 100644
index 00000000000..0cf78039481
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/pr89229-2a.c
@@ -0,0 +1,15 @@
+/* { dg-do compile { target { ! ia32 } } } */
+/* { dg-options "-O2 -march=skylake-avx512" } */
+
+typedef __int128 __m128t __attribute__ ((__vector_size__ (16),
+ __may_alias__));
+
+__m128t
+foo1 (void)
+{
+ register __int128 xmm16 __asm ("xmm16") = (__int128) -1;
+ asm volatile ("" : "+v" (xmm16));
+ return (__m128t) xmm16;
+}
+
+/* { dg-final { scan-assembler-not "%zmm\[0-9\]+" } } */
diff --git a/gcc/testsuite/gcc.target/i386/pr89229-2b.c b/gcc/testsuite/gcc.target/i386/pr89229-2b.c
new file mode 100644
index 00000000000..8d5d6c41d30
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/pr89229-2b.c
@@ -0,0 +1,13 @@
+/* { dg-do compile { target { ! ia32 } } } */
+/* { dg-options "-O2 -march=skylake-avx512 -mno-avx512vl" } */
+
+typedef __int128 __m128t __attribute__ ((__vector_size__ (16),
+ __may_alias__));
+
+__m128t
+foo1 (void)
+{
+ register __int128 xmm16 __asm ("xmm16") = (__int128) -1; /* { dg-error "register specified for 'xmm16'" } */
+ asm volatile ("" : "+v" (xmm16));
+ return (__m128t) xmm16;
+}
diff --git a/gcc/testsuite/gcc.target/i386/pr89229-2c.c b/gcc/testsuite/gcc.target/i386/pr89229-2c.c
new file mode 100644
index 00000000000..218da46dcd0
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/pr89229-2c.c
@@ -0,0 +1,6 @@
+/* { dg-do compile { target { ! ia32 } } } */
+/* { dg-options "-O2 -march=skylake-avx512 -mprefer-vector-width=512" } */
+
+#include "pr89229-2a.c"
+
+/* { dg-final { scan-assembler-not "%zmm\[0-9\]+" } } */
diff --git a/gcc/testsuite/gcc.target/i386/pr89229-3a.c b/gcc/testsuite/gcc.target/i386/pr89229-3a.c
new file mode 100644
index 00000000000..fcb85c366b6
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/pr89229-3a.c
@@ -0,0 +1,16 @@
+/* { dg-do compile { target { ! ia32 } } } */
+/* { dg-options "-O2 -march=skylake-avx512" } */
+
+extern __float128 d;
+
+void
+foo1 (__float128 x)
+{
+ register __float128 xmm16 __asm ("xmm16") = x;
+ asm volatile ("" : "+v" (xmm16));
+ register __float128 xmm17 __asm ("xmm17") = xmm16;
+ asm volatile ("" : "+v" (xmm17));
+ d = xmm17;
+}
+
+/* { dg-final { scan-assembler-not "%zmm\[0-9\]+" } } */
diff --git a/gcc/testsuite/gcc.target/i386/pr89229-3b.c b/gcc/testsuite/gcc.target/i386/pr89229-3b.c
new file mode 100644
index 00000000000..37eb83c783b
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/pr89229-3b.c
@@ -0,0 +1,12 @@
+/* { dg-do compile { target { ! ia32 } } } */
+/* { dg-options "-O2 -march=skylake-avx512 -mno-avx512vl" } */
+
+extern __float128 d;
+
+void
+foo1 (__float128 x)
+{
+ register __float128 xmm16 __asm ("xmm16") = x; /* { dg-error "register specified for 'xmm16'" } */
+ asm volatile ("" : "+v" (xmm16));
+ d = xmm16;
+}
diff --git a/gcc/testsuite/gcc.target/i386/pr89229-3c.c b/gcc/testsuite/gcc.target/i386/pr89229-3c.c
new file mode 100644
index 00000000000..529a520133c
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/pr89229-3c.c
@@ -0,0 +1,6 @@
+/* { dg-do compile { target { ! ia32 } } } */
+/* { dg-options "-O2 -march=skylake-avx512 -mprefer-vector-width=512" } */
+
+#include "pr89229-5a.c"
+
+/* { dg-final { scan-assembler-not "%zmm\[0-9\]+" } } */
diff --git a/gcc/testsuite/gcc.target/i386/pr89346.c b/gcc/testsuite/gcc.target/i386/pr89346.c
new file mode 100644
index 00000000000..cdc9accf521
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/pr89346.c
@@ -0,0 +1,15 @@
+/* { dg-do compile } */
+/* { dg-options "-O2 -march=skylake-avx512" } */
+
+#include <immintrin.h>
+
+long long *p;
+volatile __m256i y;
+
+void
+foo (void)
+{
+ _mm256_store_epi64 (p, y);
+}
+
+/* { dg-final { scan-assembler-not "vmovdqa64" } } */