aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorjakub <jakub@138bc75d-0d04-0410-961f-82ee72b054a4>2011-11-07 16:00:08 +0000
committerjakub <jakub@138bc75d-0d04-0410-961f-82ee72b054a4>2011-11-07 16:00:08 +0000
commit6296bd9601caec8b0c01944c5a06eb1fcfd216c6 (patch)
treec76018ce8a3bf4d0b9e9c7b74c7f46fd636d2477
parent16dfb11221da8bb69bfcc5ca457cb6a6f145b03a (diff)
* config/i386/i386.c (ix86_expand_builtin): If gather mask
argument is known to have all high bits set, pass pc_rtx as second argument to the expander instead of op0. * config/i386/sse.md (*avx2_gathersi<mode>_2, *avx2_gatherdi<mode>_2): New patterns. * config/i386/avx2intrin.h (_mm256_i32gather_pd, _mm256_i64gather_pd, _mm256_i32gather_ps): Set mask using _mm256_cmp_pd with zero vector arguments and _CMP_EQ_OQ instead of _mm256_set1_pd. git-svn-id: svn+ssh://gcc.gnu.org/svn/gcc/trunk@181090 138bc75d-0d04-0410-961f-82ee72b054a4
-rw-r--r--gcc/ChangeLog10
-rw-r--r--gcc/config/i386/avx2intrin.h6
-rw-r--r--gcc/config/i386/i386.c65
-rw-r--r--gcc/config/i386/sse.md44
4 files changed, 122 insertions, 3 deletions
diff --git a/gcc/ChangeLog b/gcc/ChangeLog
index 0d0db8a391d..0bfd95afc3d 100644
--- a/gcc/ChangeLog
+++ b/gcc/ChangeLog
@@ -1,5 +1,15 @@
2011-11-07 Jakub Jelinek <jakub@redhat.com>
+ * config/i386/i386.c (ix86_expand_builtin): If gather mask
+ argument is known to have all high bits set, pass pc_rtx as
+ second argument to the expander instead of op0.
+ * config/i386/sse.md (*avx2_gathersi<mode>_2,
+ *avx2_gatherdi<mode>_2): New patterns.
+ * config/i386/avx2intrin.h (_mm256_i32gather_pd,
+ _mm256_i64gather_pd, _mm256_i32gather_ps): Set mask using
+ _mm256_cmp_pd with zero vector arguments and _CMP_EQ_OQ instead of
+ _mm256_set1_pd.
+
PR tree-optimization/50789
* tree-vect-stmts.c (process_use): Add force argument, avoid
exist_non_indexing_operands_for_use_p check if true.
diff --git a/gcc/config/i386/avx2intrin.h b/gcc/config/i386/avx2intrin.h
index 3c8f3600d68..12ed05fe029 100644
--- a/gcc/config/i386/avx2intrin.h
+++ b/gcc/config/i386/avx2intrin.h
@@ -1252,7 +1252,7 @@ __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm256_i32gather_pd (double const *base, __m128i index, const int scale)
{
__v4df src = _mm256_setzero_pd ();
- __v4df mask = _mm256_set1_pd((double)(long long int) -1);
+ __v4df mask = _mm256_cmp_pd (src, src, _CMP_EQ_OQ);
return (__m256d) __builtin_ia32_gathersiv4df (src,
base,
@@ -1304,7 +1304,7 @@ __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm256_i64gather_pd (double const *base, __m256i index, const int scale)
{
__v4df src = _mm256_setzero_pd ();
- __v4df mask = _mm256_set1_pd((double)(long long int) -1);
+ __v4df mask = _mm256_cmp_pd (src, src, _CMP_EQ_OQ);
return (__m256d) __builtin_ia32_gatherdiv4df (src,
base,
@@ -1356,7 +1356,7 @@ __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm256_i32gather_ps (float const *base, __m256i index, const int scale)
{
__v8sf src = _mm256_setzero_ps ();
- __v8sf mask = _mm256_set1_ps((float)(int) -1);
+ __v8sf mask = _mm256_cmp_ps (src, src, _CMP_EQ_OQ);
return (__m256) __builtin_ia32_gathersiv8sf (src,
base,
diff --git a/gcc/config/i386/i386.c b/gcc/config/i386/i386.c
index 4d7d2cf1d4a..4461fbba82d 100644
--- a/gcc/config/i386/i386.c
+++ b/gcc/config/i386/i386.c
@@ -29087,6 +29087,71 @@ rdrand_step:
error ("last argument must be scale 1, 2, 4, 8");
return const0_rtx;
}
+
+ /* Optimize. If mask is known to have all high bits set,
+ replace op0 with pc_rtx to signal that the instruction
+ overwrites the whole destination and doesn't use its
+ previous contents. */
+ if (optimize)
+ {
+ if (TREE_CODE (arg3) == VECTOR_CST)
+ {
+ tree elt;
+ unsigned int negative = 0;
+ for (elt = TREE_VECTOR_CST_ELTS (arg3);
+ elt; elt = TREE_CHAIN (elt))
+ {
+ tree cst = TREE_VALUE (elt);
+ if (TREE_CODE (cst) == INTEGER_CST
+ && tree_int_cst_sign_bit (cst))
+ negative++;
+ else if (TREE_CODE (cst) == REAL_CST
+ && REAL_VALUE_NEGATIVE (TREE_REAL_CST (cst)))
+ negative++;
+ }
+ if (negative == TYPE_VECTOR_SUBPARTS (TREE_TYPE (arg3)))
+ op0 = pc_rtx;
+ }
+ else if (TREE_CODE (arg3) == SSA_NAME)
+ {
+ /* Recognize also when mask is like:
+ __v2df src = _mm_setzero_pd ();
+ __v2df mask = _mm_cmpeq_pd (src, src);
+ or
+ __v8sf src = _mm256_setzero_ps ();
+ __v8sf mask = _mm256_cmp_ps (src, src, _CMP_EQ_OQ);
+ as that is a cheaper way to load all ones into
+ a register than having to load a constant from
+ memory. */
+ gimple def_stmt = SSA_NAME_DEF_STMT (arg3);
+ if (is_gimple_call (def_stmt))
+ {
+ tree fndecl = gimple_call_fndecl (def_stmt);
+ if (fndecl
+ && DECL_BUILT_IN_CLASS (fndecl) == BUILT_IN_MD)
+ switch ((unsigned int) DECL_FUNCTION_CODE (fndecl))
+ {
+ case IX86_BUILTIN_CMPPD:
+ case IX86_BUILTIN_CMPPS:
+ case IX86_BUILTIN_CMPPD256:
+ case IX86_BUILTIN_CMPPS256:
+ if (!integer_zerop (gimple_call_arg (def_stmt, 2)))
+ break;
+ /* FALLTHRU */
+ case IX86_BUILTIN_CMPEQPD:
+ case IX86_BUILTIN_CMPEQPS:
+ if (initializer_zerop (gimple_call_arg (def_stmt, 0))
+ && initializer_zerop (gimple_call_arg (def_stmt,
+ 1)))
+ op0 = pc_rtx;
+ break;
+ default:
+ break;
+ }
+ }
+ }
+ }
+
pat = GEN_FCN (icode) (subtarget, op0, op1, op2, op3, op4);
if (! pat)
return const0_rtx;
diff --git a/gcc/config/i386/sse.md b/gcc/config/i386/sse.md
index e3de9ecdd24..688b5be9648 100644
--- a/gcc/config/i386/sse.md
+++ b/gcc/config/i386/sse.md
@@ -12567,6 +12567,26 @@
(set_attr "prefix" "vex")
(set_attr "mode" "<sseinsnmode>")])
+(define_insn "*avx2_gathersi<mode>_2"
+ [(set (match_operand:VEC_GATHER_MODE 0 "register_operand" "=&x")
+ (unspec:VEC_GATHER_MODE
+ [(pc)
+ (match_operator:<ssescalarmode> 6 "vsib_mem_operator"
+ [(unspec:P
+ [(match_operand:P 2 "vsib_address_operand" "p")
+ (match_operand:<VEC_GATHER_IDXSI> 3 "register_operand" "x")
+ (match_operand:SI 5 "const1248_operand" "n")]
+ UNSPEC_VSIBADDR)])
+ (mem:BLK (scratch))
+ (match_operand:VEC_GATHER_MODE 4 "register_operand" "1")]
+ UNSPEC_GATHER))
+ (clobber (match_scratch:VEC_GATHER_MODE 1 "=&x"))]
+ "TARGET_AVX2"
+ "v<sseintprefix>gatherd<ssemodesuffix>\t{%1, %6, %0|%0, %6, %1}"
+ [(set_attr "type" "ssemov")
+ (set_attr "prefix" "vex")
+ (set_attr "mode" "<sseinsnmode>")])
+
(define_expand "avx2_gatherdi<mode>"
[(parallel [(set (match_operand:VEC_GATHER_MODE 0 "register_operand" "")
(unspec:VEC_GATHER_MODE
@@ -12608,3 +12628,27 @@
[(set_attr "type" "ssemov")
(set_attr "prefix" "vex")
(set_attr "mode" "<sseinsnmode>")])
+
+(define_insn "*avx2_gatherdi<mode>_2"
+ [(set (match_operand:VEC_GATHER_MODE 0 "register_operand" "=&x")
+ (unspec:VEC_GATHER_MODE
+ [(pc)
+ (match_operator:<ssescalarmode> 6 "vsib_mem_operator"
+ [(unspec:P
+ [(match_operand:P 2 "vsib_address_operand" "p")
+ (match_operand:<VEC_GATHER_IDXDI> 3 "register_operand" "x")
+ (match_operand:SI 5 "const1248_operand" "n")]
+ UNSPEC_VSIBADDR)])
+ (mem:BLK (scratch))
+ (match_operand:<VEC_GATHER_SRCDI> 4 "register_operand" "1")]
+ UNSPEC_GATHER))
+ (clobber (match_scratch:VEC_GATHER_MODE 1 "=&x"))]
+ "TARGET_AVX2"
+{
+ if (<MODE>mode != <VEC_GATHER_SRCDI>mode)
+ return "v<sseintprefix>gatherq<ssemodesuffix>\t{%4, %6, %x0|%x0, %6, %4}";
+ return "v<sseintprefix>gatherq<ssemodesuffix>\t{%4, %6, %0|%0, %6, %4}";
+}
+ [(set_attr "type" "ssemov")
+ (set_attr "prefix" "vex")
+ (set_attr "mode" "<sseinsnmode>")])