PR target/56776

* config/i386/sse.md (*avx_addsubv4df3_1): New insn pattern. (*avx_addsubv4df3_1s): Ditto. (*sse3_addsubv2df3_1): Ditto. (*sse3_addsubv2df3_1s): Ditto. (*avx_addsubv8sf3_1): Ditto. (*avx_addsubv8sf3_1s): Ditto. (*sse3_addsubv4sf3_1): Ditto. (*sse3_addsubv4sf3_1s): Ditto. testsuite/ChangeLog: PR target/56776 * gcc.target/i386/pr56776-1.c: New test. * gcc.target/i386/pr56776-2.c: Ditto. git-svn-id: svn+ssh://gcc.gnu.org/svn/gcc/trunk@224527 138bc75d-0d04-0410-961f-82ee72b054a4
author: uros <uros@138bc75d-0d04-0410-961f-82ee72b054a4> 2015-06-16 17:14:00 +0000
committer: uros <uros@138bc75d-0d04-0410-961f-82ee72b054a4> 2015-06-16 17:14:00 +0000
commit: 6c197bf1ddfaea3872fe73d9007d7cc69f9d4cb4 (patch)
tree: ef1d3ef8ba885765f9a5ef18659215ef258098b7
parent: 510e444f7bb40f9dca0f77235da6d06b568976d9 (diff)
3 files changed, 228 insertions, 0 deletions
diff --git a/gcc/config/i386/sse.md b/gcc/config/i386/sse.md
index 4ef51d66803..d1277ca8a9d 100644
--- a/gcc/config/i386/sse.md
+++ b/gcc/config/i386/sse.md
@@ -2032,6 +2032,38 @@
    (set_attr "prefix" "vex")
    (set_attr "mode" "V4DF")])
 
+(define_insn "*avx_addsubv4df3_1"
+  [(set (match_operand:V4DF 0 "register_operand" "=x")
+  	(vec_select:V4DF
+	  (vec_concat:V8DF
+	    (minus:V4DF
+	      (match_operand:V4DF 1 "register_operand" "x")
+	      (match_operand:V4DF 2 "nonimmediate_operand" "xm"))
+	    (plus:V4DF (match_dup 1) (match_dup 2)))
+	  (parallel [(const_int 0) (const_int 5)
+		     (const_int 2) (const_int 7)])))]
+  "TARGET_AVX"
+  "vaddsubpd\t{%2, %1, %0|%0, %1, %2}"
+  [(set_attr "type" "sseadd")
+   (set_attr "prefix" "vex")
+   (set_attr "mode" "V4DF")])
+
+(define_insn "*avx_addsubv4df3_1s"
+  [(set (match_operand:V4DF 0 "register_operand" "=x")
+  	(vec_select:V4DF
+	  (vec_concat:V8DF
+	    (minus:V4DF
+	      (match_operand:V4DF 1 "register_operand" "x")
+	      (match_operand:V4DF 2 "nonimmediate_operand" "xm"))
+	    (plus:V4DF (match_dup 2) (match_dup 1)))
+	  (parallel [(const_int 0) (const_int 5)
+		     (const_int 2) (const_int 7)])))]
+  "TARGET_AVX"
+  "vaddsubpd\t{%2, %1, %0|%0, %1, %2}"
+  [(set_attr "type" "sseadd")
+   (set_attr "prefix" "vex")
+   (set_attr "mode" "V4DF")])
+
 (define_insn "sse3_addsubv2df3"
   [(set (match_operand:V2DF 0 "register_operand" "=x,x")
 	(vec_merge:V2DF
@@ -2050,6 +2082,44 @@
    (set_attr "prefix" "orig,vex")
    (set_attr "mode" "V2DF")])
 
+(define_insn "*sse3_addsubv2df3_1"
+  [(set (match_operand:V2DF 0 "register_operand" "=x,x")
+	(vec_select:V2DF
+	  (vec_concat:V4DF
+	    (minus:V2DF
+	      (match_operand:V2DF 1 "register_operand" "0,x")
+	      (match_operand:V2DF 2 "nonimmediate_operand" "xm,xm"))
+	    (plus:V2DF (match_dup 1) (match_dup 2)))
+	  (parallel [(const_int 0) (const_int 3)])))]
+  "TARGET_SSE3"
+  "@
+   addsubpd\t{%2, %0|%0, %2}
+   vaddsubpd\t{%2, %1, %0|%0, %1, %2}"
+  [(set_attr "isa" "noavx,avx")
+   (set_attr "type" "sseadd")
+   (set_attr "atom_unit" "complex")
+   (set_attr "prefix" "orig,vex")
+   (set_attr "mode" "V2DF")])
+
+(define_insn "*sse3_addsubv2df3_1s"
+  [(set (match_operand:V2DF 0 "register_operand" "=x,x")
+	(vec_select:V2DF
+	  (vec_concat:V4DF
+	    (minus:V2DF
+	      (match_operand:V2DF 1 "register_operand" "0,x")
+	      (match_operand:V2DF 2 "nonimmediate_operand" "xm,xm"))
+	    (plus:V2DF (match_dup 2) (match_dup 1)))
+	  (parallel [(const_int 0) (const_int 3)])))]
+  "TARGET_SSE3"
+  "@
+   addsubpd\t{%2, %0|%0, %2}
+   vaddsubpd\t{%2, %1, %0|%0, %1, %2}"
+  [(set_attr "isa" "noavx,avx")
+   (set_attr "type" "sseadd")
+   (set_attr "atom_unit" "complex")
+   (set_attr "prefix" "orig,vex")
+   (set_attr "mode" "V2DF")])
+
 (define_insn "avx_addsubv8sf3"
   [(set (match_operand:V8SF 0 "register_operand" "=x")
 	(vec_merge:V8SF
@@ -2064,6 +2134,42 @@
    (set_attr "prefix" "vex")
    (set_attr "mode" "V8SF")])
 
+(define_insn "*avx_addsubv8sf3_1"
+  [(set (match_operand:V8SF 0 "register_operand" "=x")
+	(vec_select:V8SF
+	  (vec_concat:V16SF
+	    (minus:V8SF
+	      (match_operand:V8SF 1 "register_operand" "x")
+	      (match_operand:V8SF 2 "nonimmediate_operand" "xm"))
+	    (plus:V8SF (match_dup 1) (match_dup 2)))
+	  (parallel [(const_int 0) (const_int 9)
+		     (const_int 2) (const_int 11)
+		     (const_int 4) (const_int 13)
+		     (const_int 6) (const_int 15)])))]
+  "TARGET_AVX"
+  "vaddsubps\t{%2, %1, %0|%0, %1, %2}"
+  [(set_attr "type" "sseadd")
+   (set_attr "prefix" "vex")
+   (set_attr "mode" "V8SF")])
+
+(define_insn "*avx_addsubv8sf3_1s"
+  [(set (match_operand:V8SF 0 "register_operand" "=x")
+	(vec_select:V8SF
+	  (vec_concat:V16SF
+	    (minus:V8SF
+	      (match_operand:V8SF 1 "register_operand" "x")
+	      (match_operand:V8SF 2 "nonimmediate_operand" "xm"))
+	    (plus:V8SF (match_dup 2) (match_dup 1)))
+	  (parallel [(const_int 0) (const_int 9)
+		     (const_int 2) (const_int 11)
+		     (const_int 4) (const_int 13)
+		     (const_int 6) (const_int 15)])))]
+  "TARGET_AVX"
+  "vaddsubps\t{%2, %1, %0|%0, %1, %2}"
+  [(set_attr "type" "sseadd")
+   (set_attr "prefix" "vex")
+   (set_attr "mode" "V8SF")])
+
 (define_insn "sse3_addsubv4sf3"
   [(set (match_operand:V4SF 0 "register_operand" "=x,x")
 	(vec_merge:V4SF
@@ -2082,6 +2188,46 @@
    (set_attr "prefix_rep" "1,*")
    (set_attr "mode" "V4SF")])
 
+(define_insn "*sse3_addsubv4sf3_1"
+  [(set (match_operand:V4SF 0 "register_operand" "=x,x")
+	(vec_select:V4SF
+	  (vec_concat:V8SF
+	    (minus:V4SF
+	      (match_operand:V4SF 1 "register_operand" "0,x")
+	      (match_operand:V4SF 2 "nonimmediate_operand" "xm,xm"))
+	    (plus:V4SF (match_dup 1) (match_dup 2)))
+	  (parallel [(const_int 0) (const_int 5)
+		     (const_int 2) (const_int 7)])))]
+  "TARGET_SSE3"
+  "@
+   addsubps\t{%2, %0|%0, %2}
+   vaddsubps\t{%2, %1, %0|%0, %1, %2}"
+  [(set_attr "isa" "noavx,avx")
+   (set_attr "type" "sseadd")
+   (set_attr "prefix" "orig,vex")
+   (set_attr "prefix_rep" "1,*")
+   (set_attr "mode" "V4SF")])
+
+(define_insn "*sse3_addsubv4sf3_1s"
+  [(set (match_operand:V4SF 0 "register_operand" "=x,x")
+	(vec_select:V4SF
+	  (vec_concat:V8SF
+	    (minus:V4SF
+	      (match_operand:V4SF 1 "register_operand" "0,x")
+	      (match_operand:V4SF 2 "nonimmediate_operand" "xm,xm"))
+	    (plus:V4SF (match_dup 2) (match_dup 1)))
+	  (parallel [(const_int 0) (const_int 5)
+		     (const_int 2) (const_int 7)])))]
+  "TARGET_SSE3"
+  "@
+   addsubps\t{%2, %0|%0, %2}
+   vaddsubps\t{%2, %1, %0|%0, %1, %2}"
+  [(set_attr "isa" "noavx,avx")
+   (set_attr "type" "sseadd")
+   (set_attr "prefix" "orig,vex")
+   (set_attr "prefix_rep" "1,*")
+   (set_attr "mode" "V4SF")])
+
 (define_insn "avx_h<plusminus_insn>v4df3"
   [(set (match_operand:V4DF 0 "register_operand" "=x")
 	(vec_concat:V4DF
diff --git a/gcc/testsuite/gcc.target/i386/pr56766-1.c b/gcc/testsuite/gcc.target/i386/pr56766-1.c
new file mode 100644
index 00000000000..dff489f4f4d
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/pr56766-1.c
@@ -0,0 +1,42 @@
+/* PR target/56766 */
+/* { dg-do compile } */
+/* { dg-options "-O2 -mavx" } */
+
+typedef float v4sf __attribute__((vector_size(16)));
+typedef int v4si __attribute__((vector_size(16)));
+v4sf foo_v4sf (v4sf x, v4sf y)
+{
+  v4sf tem0 = x - y;
+  v4sf tem1 = x + y;
+  return __builtin_shuffle (tem0, tem1, (v4si) { 0, 5, 2, 7 });
+}
+
+typedef float v8sf __attribute__((vector_size(32)));
+typedef int v8si __attribute__((vector_size(32)));
+v8sf foo_v8sf (v8sf x, v8sf y)
+{
+  v8sf tem0 = x - y;
+  v8sf tem1 = x + y;
+  return __builtin_shuffle (tem0, tem1, (v8si) { 0, 9, 2, 11, 4, 13, 6, 15 });
+}
+
+typedef double v2df __attribute__((vector_size(16)));
+typedef long long v2di __attribute__((vector_size(16)));
+v2df foo_v2df (v2df x, v2df y)
+{
+  v2df tem0 = x - y;
+  v2df tem1 = x + y;
+  return __builtin_shuffle (tem0, tem1, (v2di) { 0, 3 });
+}
+
+typedef double v4df __attribute__((vector_size(32)));
+typedef long long v4di __attribute__((vector_size(32)));
+v4df foo_v4df (v4df x, v4df y)
+{
+  v4df tem0 = x - y;
+  v4df tem1 = x + y;
+  return __builtin_shuffle (tem0, tem1, (v4di) { 0, 5, 2, 7 });
+}
+
+/* { dg-final { scan-assembler-times "vaddsubps" 2 } } */
+/* { dg-final { scan-assembler-times "vaddsubpd" 2 } } */
diff --git a/gcc/testsuite/gcc.target/i386/pr56766-2.c b/gcc/testsuite/gcc.target/i386/pr56766-2.c
new file mode 100644
index 00000000000..fa3a706caeb
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/pr56766-2.c
@@ -0,0 +1,40 @@
+/* PR target/56766 */
+/* { dg-do compile } */
+/* { dg-options "-O2 -ftree-vectorize -fno-vect-cost-model -mavx" } */
+
+void test_v4sf (float * __restrict__ p, float * __restrict q)
+{
+  p[0] = p[0] - q[0];
+  p[1] = p[1] + q[1];
+  p[2] = p[2] - q[2];
+  p[3] = p[3] + q[3];
+}
+
+void test_v8sf (float * __restrict__ p, float * __restrict q)
+{
+  p[0] = p[0] - q[0];
+  p[1] = p[1] + q[1];
+  p[2] = p[2] - q[2];
+  p[3] = p[3] + q[3];
+  p[4] = p[4] - q[4];
+  p[5] = p[5] + q[5];
+  p[6] = p[6] - q[6];
+  p[7] = p[7] + q[7];
+}
+
+void test_v2df (double * __restrict__ p, double * __restrict q)
+{
+  p[0] = p[0] - q[0];
+  p[1] = p[1] + q[1];
+}
+
+void test_v4df (double * __restrict__ p, double * __restrict q)
+{
+  p[0] = p[0] - q[0];
+  p[1] = p[1] + q[1];
+  p[2] = p[2] - q[2];
+  p[3] = p[3] + q[3];
+}
+
+/* { dg-final { scan-assembler-times "vaddsubps" 2 } } */
+/* { dg-final { scan-assembler-times "vaddsubpd" 2 } } */
author	uros <uros@138bc75d-0d04-0410-961f-82ee72b054a4>	2015-06-16 17:14:00 +0000
committer	uros <uros@138bc75d-0d04-0410-961f-82ee72b054a4>	2015-06-16 17:14:00 +0000
commit	6c197bf1ddfaea3872fe73d9007d7cc69f9d4cb4 (patch)
tree	ef1d3ef8ba885765f9a5ef18659215ef258098b7
parent	510e444f7bb40f9dca0f77235da6d06b568976d9 (diff)