aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorH.J. Lu <hongjiu.lu@intel.com>2008-08-11 18:48:38 +0000
committerH.J. Lu <hongjiu.lu@intel.com>2008-08-11 18:48:38 +0000
commitac59cc41c2a1b7e3422bd59efc48d7dbedd27660 (patch)
treecb66e4bbf2787a6ed756e6d70b6a3911d45ea1d1
parent87e7a747270eede017d212c769320cd105ef9da7 (diff)
2008-08-11 H.J. Lu <hongjiu.lu@intel.com>
* config/i386/i386.c (ix86_builtin_type): Add V8SF_FTYPE_V8SF_V8SI, V4DF_FTYPE_V4DF_V4DI, V4SF_FTYPE_V4SF_V4SI and V2DF_FTYPE_V2DF_V2DI. Replace V8SF_FTYPE_V8SF_V8SF_V8SF_INT, V4DF_FTYPE_V4DF_V4DF_V4DF_INT, V4SF_FTYPE_V4SF_V4SF_V4SF_INT and V2DF_FTYPE_V2DF_V2DF_V2DF_INT with V8SF_FTYPE_V8SF_V8SF_V8SI_INT, V4DF_FTYPE_V4DF_V4DF_V4DI_INT, V4SF_FTYPE_V4SF_V4SF_V4SI_INT and V2DF_FTYPE_V2DF_V2DF_V2DI_INT, respectively. (bdesc_args): Correct the type of control parameter for __builtin_ia32_vpermilvarpd, __builtin_ia32_vpermilvarps, __builtin_ia32_vpermilvarpd256, __builtin_ia32_vpermilvarps256, __builtin_ia32_vpermil2pd, __builtin_ia32_vpermil2ps, __builtin_ia32_vpermil2pd256 and __builtin_ia32_vpermil2ps256. (ix86_init_mmx_sse_builtins): Replace v8sf_ftype_v8sf_v8sf_v8sf_int, v4df_ftype_v4df_v4df_v4df_int, v4sf_ftype_v4sf_v4sf_v4sf_int and v2df_ftype_v2df_v2df_v2df_int with v8sf_ftype_v8sf_v8sf_v8si_int, v4df_ftype_v4df_v4df_v4di_int, v4sf_ftype_v4sf_v4sf_v4si_int and v2df_ftype_v2df_v2df_v2di_int, respectively. Add v8sf_ftype_v8sf_v8si, v4df_ftype_v4df_v4di, v4sf_ftype_v4sf_v4si and v2df_ftype_v2df_v2di. Handle V8SF_FTYPE_V8SF_V8SI, V4DF_FTYPE_V4DF_V4DI, V4SF_FTYPE_V4SF_V4SI and V2DF_FTYPE_V2DF_V2DI. (ix86_expand_args_builtin): Handle V8SF_FTYPE_V8SF_V8SI, V4DF_FTYPE_V4DF_V4DI, V4SF_FTYPE_V4SF_V4SI and V2DF_FTYPE_V2DF_V2DI. Replace V8SF_FTYPE_V8SF_V8SF_V8SF_INT, V4DF_FTYPE_V4DF_V4DF_V4DF_INT, V4SF_FTYPE_V4SF_V4SF_V4SF_INT and V2DF_FTYPE_V2DF_V2DF_V2DF_INT with V8SF_FTYPE_V8SF_V8SF_V8SI_INT, V4DF_FTYPE_V4DF_V4DF_V4DI_INT, V4SF_FTYPE_V4SF_V4SF_V4SI_INT and V2DF_FTYPE_V2DF_V2DF_V2DI_INT, respectively. * config/i386/gmmintrin.h (_mm_permutevar_pd): Correct the type of control parameter. (_mm256_permutevar_pd): Likewise. (_mm_permutevar_ps): Likewise. (_mm256_permutevar_ps): Likewise. (_mm_permute2_pd): Likewise. (_mm256_permute2_pd): Likewise. (_mm_permute2_ps): Likewise. (_mm256_permute2_ps): Likewise. * config/i386/sse.md (avxpermvecmode): New. (avx_vpermilvar<mode>3): Use <avxpermvecmode>. (avx_vpermil2<mode>3): Likewise. git-svn-id: https://gcc.gnu.org/svn/gcc/branches/ix86/avx@138959 138bc75d-0d04-0410-961f-82ee72b054a4
-rw-r--r--gcc/ChangeLog.avx46
-rw-r--r--gcc/config/i386/gmmintrin.h40
-rw-r--r--gcc/config/i386/i386.c98
-rw-r--r--gcc/config/i386/sse.md6
4 files changed, 136 insertions, 54 deletions
diff --git a/gcc/ChangeLog.avx b/gcc/ChangeLog.avx
index 70d1bd12467..845b7911fde 100644
--- a/gcc/ChangeLog.avx
+++ b/gcc/ChangeLog.avx
@@ -1,3 +1,49 @@
+2008-08-11 H.J. Lu <hongjiu.lu@intel.com>
+
+ * config/i386/i386.c (ix86_builtin_type): Add
+ V8SF_FTYPE_V8SF_V8SI, V4DF_FTYPE_V4DF_V4DI,
+ V4SF_FTYPE_V4SF_V4SI and V2DF_FTYPE_V2DF_V2DI. Replace
+ V8SF_FTYPE_V8SF_V8SF_V8SF_INT, V4DF_FTYPE_V4DF_V4DF_V4DF_INT,
+ V4SF_FTYPE_V4SF_V4SF_V4SF_INT and V2DF_FTYPE_V2DF_V2DF_V2DF_INT
+ with V8SF_FTYPE_V8SF_V8SF_V8SI_INT, V4DF_FTYPE_V4DF_V4DF_V4DI_INT,
+ V4SF_FTYPE_V4SF_V4SF_V4SI_INT and V2DF_FTYPE_V2DF_V2DF_V2DI_INT,
+ respectively.
+ (bdesc_args): Correct the type of control parameter for
+ __builtin_ia32_vpermilvarpd, __builtin_ia32_vpermilvarps,
+ __builtin_ia32_vpermilvarpd256, __builtin_ia32_vpermilvarps256,
+ __builtin_ia32_vpermil2pd, __builtin_ia32_vpermil2ps,
+ __builtin_ia32_vpermil2pd256 and __builtin_ia32_vpermil2ps256.
+ (ix86_init_mmx_sse_builtins): Replace
+ v8sf_ftype_v8sf_v8sf_v8sf_int, v4df_ftype_v4df_v4df_v4df_int,
+ v4sf_ftype_v4sf_v4sf_v4sf_int and v2df_ftype_v2df_v2df_v2df_int
+ with v8sf_ftype_v8sf_v8sf_v8si_int, v4df_ftype_v4df_v4df_v4di_int,
+ v4sf_ftype_v4sf_v4sf_v4si_int and v2df_ftype_v2df_v2df_v2di_int,
+ respectively. Add v8sf_ftype_v8sf_v8si, v4df_ftype_v4df_v4di,
+ v4sf_ftype_v4sf_v4si and v2df_ftype_v2df_v2di. Handle
+ V8SF_FTYPE_V8SF_V8SI, V4DF_FTYPE_V4DF_V4DI,
+ V4SF_FTYPE_V4SF_V4SI and V2DF_FTYPE_V2DF_V2DI.
+ (ix86_expand_args_builtin): Handle V8SF_FTYPE_V8SF_V8SI,
+ V4DF_FTYPE_V4DF_V4DI, V4SF_FTYPE_V4SF_V4SI and
+ V2DF_FTYPE_V2DF_V2DI. Replace V8SF_FTYPE_V8SF_V8SF_V8SF_INT,
+ V4DF_FTYPE_V4DF_V4DF_V4DF_INT, V4SF_FTYPE_V4SF_V4SF_V4SF_INT and
+ V2DF_FTYPE_V2DF_V2DF_V2DF_INT with V8SF_FTYPE_V8SF_V8SF_V8SI_INT,
+ V4DF_FTYPE_V4DF_V4DF_V4DI_INT, V4SF_FTYPE_V4SF_V4SF_V4SI_INT and
+ V2DF_FTYPE_V2DF_V2DF_V2DI_INT, respectively.
+
+ * config/i386/gmmintrin.h (_mm_permutevar_pd): Correct the
+ type of control parameter.
+ (_mm256_permutevar_pd): Likewise.
+ (_mm_permutevar_ps): Likewise.
+ (_mm256_permutevar_ps): Likewise.
+ (_mm_permute2_pd): Likewise.
+ (_mm256_permute2_pd): Likewise.
+ (_mm_permute2_ps): Likewise.
+ (_mm256_permute2_ps): Likewise.
+
+ * config/i386/sse.md (avxpermvecmode): New.
+ (avx_vpermilvar<mode>3): Use <avxpermvecmode>.
+ (avx_vpermil2<mode>3): Likewise.
+
2008-08-06 H.J. Lu <hongjiu.lu@intel.com>
* config/i386/i386.c (ix86_expand_special_args_builtin): Replace
diff --git a/gcc/config/i386/gmmintrin.h b/gcc/config/i386/gmmintrin.h
index 505b8605257..1c6bb18be83 100644
--- a/gcc/config/i386/gmmintrin.h
+++ b/gcc/config/i386/gmmintrin.h
@@ -582,31 +582,31 @@ _mm256_zeroupper (void)
}
extern __inline __m128d __attribute__((__gnu_inline__, __always_inline__, __artificial__))
-_mm_permutevar_pd (__m128d __A, __m128d __B)
+_mm_permutevar_pd (__m128d __A, __m128i __C)
{
return (__m128d) __builtin_ia32_vpermilvarpd ((__v2df)__A,
- (__v2df)__B);
+ (__v2di)__C);
}
extern __inline __m256d __attribute__((__gnu_inline__, __always_inline__, __artificial__))
-_mm256_permutevar_pd (__m256d __A, __m256d __B)
+_mm256_permutevar_pd (__m256d __A, __m256i __C)
{
return (__m256d) __builtin_ia32_vpermilvarpd256 ((__v4df)__A,
- (__v4df)__B);
+ (__v4di)__C);
}
extern __inline __m128 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
-_mm_permutevar_ps (__m128 __A, __m128 __B)
+_mm_permutevar_ps (__m128 __A, __m128i __C)
{
return (__m128) __builtin_ia32_vpermilvarps ((__v4sf)__A,
- (__v4sf)__B);
+ (__v4si)__C);
}
extern __inline __m256 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
-_mm256_permutevar_ps (__m256 __A, __m256 __B)
+_mm256_permutevar_ps (__m256 __A, __m256i __C)
{
return (__m256) __builtin_ia32_vpermilvarps256 ((__v8sf)__A,
- (__v8sf)__B);
+ (__v8si)__C);
}
#ifdef __OPTIMIZE__
@@ -635,38 +635,38 @@ _mm256_permute_ps (__m256 __X, const int __C)
}
extern __inline __m128d __attribute__((__gnu_inline__, __always_inline__, __artificial__))
-_mm_permute2_pd (__m128d __X, __m128d __Y, __m128d __C, const int __I)
+_mm_permute2_pd (__m128d __X, __m128d __Y, __m128i __C, const int __I)
{
return (__m128d) __builtin_ia32_vpermil2pd ((__v2df)__X,
(__v2df)__Y,
- (__v2df)__C,
+ (__v2di)__C,
__I);
}
extern __inline __m256d __attribute__((__gnu_inline__, __always_inline__, __artificial__))
-_mm256_permute2_pd (__m256d __X, __m256d __Y, __m256d __C, const int __I)
+_mm256_permute2_pd (__m256d __X, __m256d __Y, __m256i __C, const int __I)
{
return (__m256d) __builtin_ia32_vpermil2pd256 ((__v4df)__X,
(__v4df)__Y,
- (__v4df)__C,
+ (__v4di)__C,
__I);
}
extern __inline __m128 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
-_mm_permute2_ps (__m128 __X, __m128 __Y, __m128 __C, const int __I)
+_mm_permute2_ps (__m128 __X, __m128 __Y, __m128i __C, const int __I)
{
return (__m128) __builtin_ia32_vpermil2ps ((__v4sf)__X,
(__v4sf)__Y,
- (__v4sf)__C,
+ (__v4si)__C,
__I);
}
extern __inline __m256 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
-_mm256_permute2_ps (__m256 __X, __m256 __Y, __m256 __C, const int __I)
+_mm256_permute2_ps (__m256 __X, __m256 __Y, __m256i __C, const int __I)
{
return (__m256) __builtin_ia32_vpermil2ps256 ((__v8sf)__X,
(__v8sf)__Y,
- (__v8sf)__C,
+ (__v8si)__C,
__I);
}
#else
@@ -685,25 +685,25 @@ _mm256_permute2_ps (__m256 __X, __m256 __Y, __m256 __C, const int __I)
#define _mm_permute2_pd(X, Y, C, I) \
((__m128d) __builtin_ia32_vpermil2pd ((__v2df)(__m128d)(X), \
(__v2df)(__m128d)(Y), \
- (__v2df)(__m128d)(C), \
+ (__v2di)(__m128d)(C), \
(int)(I)))
#define _mm256_permute2_pd(X, Y, C, I) \
((__m256d) __builtin_ia32_vpermil2pd256 ((__v4df)(__m256d)(X), \
(__v4df)(__m256d)(Y), \
- (__v4df)(__m256d)(C), \
+ (__v4di)(__m256d)(C), \
(int)(I)))
#define _mm_permute2_ps(X, Y, C, I) \
((__m128) __builtin_ia32_vpermil2ps ((__v4sf)(__m128)(X), \
(__v4sf)(__m128)(Y), \
- (__v4sf)(__m128)(C), \
+ (__v4si)(__m128)(C), \
(int)(I)))
#define _mm256_permute2_ps(X, Y, C, I) \
((__m256) __builtin_ia32_vpermil2ps256 ((__v8sf)(__m256)(X), \
(__v8sf)(__m256)(Y), \
- (__v8sf)(__m256)(C), \
+ (__v8si)(__m256)(C), \
(int)(I)))
#endif
diff --git a/gcc/config/i386/i386.c b/gcc/config/i386/i386.c
index 61c51226b8e..2bb2246f5d5 100644
--- a/gcc/config/i386/i386.c
+++ b/gcc/config/i386/i386.c
@@ -19957,6 +19957,7 @@ enum ix86_builtin_type
V8HI_FTYPE_V4SI_V4SI,
V8HI_FTYPE_V8HI_SI_COUNT,
V8SF_FTYPE_V8SF_V8SF,
+ V8SF_FTYPE_V8SF_V8SI,
V4SI_FTYPE_V4SI_V4SI,
V4SI_FTYPE_V4SI_V4SI_COUNT,
V4SI_FTYPE_V8HI_V8HI,
@@ -19969,8 +19970,10 @@ enum ix86_builtin_type
V4HI_FTYPE_V2SI_V2SI,
V4HI_FTYPE_V4HI_SI_COUNT,
V4DF_FTYPE_V4DF_V4DF,
+ V4DF_FTYPE_V4DF_V4DI,
V4SF_FTYPE_V4SF_V4SF,
V4SF_FTYPE_V4SF_V4SF_SWAP,
+ V4SF_FTYPE_V4SF_V4SI,
V4SF_FTYPE_V4SF_V2SI,
V4SF_FTYPE_V4SF_V2DF,
V4SF_FTYPE_V4SF_DI,
@@ -19990,6 +19993,7 @@ enum ix86_builtin_type
V2DF_FTYPE_V2DF_V2DF,
V2DF_FTYPE_V2DF_V2DF_SWAP,
V2DF_FTYPE_V2DF_V4SF,
+ V2DF_FTYPE_V2DF_V2DI,
V2DF_FTYPE_V2DF_DI,
V2DF_FTYPE_V2DF_SI,
V2SF_FTYPE_V2SF_V2SF,
@@ -20033,10 +20037,10 @@ enum ix86_builtin_type
V2DI2TI_FTYPE_V2DI_V2DI_INT,
V1DI2DI_FTYPE_V1DI_V1DI_INT,
V2DF_FTYPE_V2DF_V2DF_INT,
- V8SF_FTYPE_V8SF_V8SF_V8SF_INT,
- V4DF_FTYPE_V4DF_V4DF_V4DF_INT,
- V4SF_FTYPE_V4SF_V4SF_V4SF_INT,
- V2DF_FTYPE_V2DF_V2DF_V2DF_INT,
+ V8SF_FTYPE_V8SF_V8SF_V8SI_INT,
+ V4DF_FTYPE_V4DF_V4DF_V4DI_INT,
+ V4SF_FTYPE_V4SF_V4SF_V4SI_INT,
+ V2DF_FTYPE_V2DF_V2DF_V2DI_INT,
V2DI_FTYPE_V2DI_UINT_UINT,
V2DI_FTYPE_V2DI_V2DI_UINT_UINT
};
@@ -20628,10 +20632,10 @@ static const struct builtin_description bdesc_args[] =
{ OPTION_MASK_ISA_AVX, CODE_FOR_xorv4df3, "__builtin_ia32_xorpd256", IX86_BUILTIN_XORPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF },
{ OPTION_MASK_ISA_AVX, CODE_FOR_xorv8sf3, "__builtin_ia32_xorps256", IX86_BUILTIN_XORPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF },
- { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vpermilvarv2df3, "__builtin_ia32_vpermilvarpd", IX86_BUILTIN_VPERMILVARPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
- { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vpermilvarv4sf3, "__builtin_ia32_vpermilvarps", IX86_BUILTIN_VPERMILVARPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
- { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vpermilvarv4df3, "__builtin_ia32_vpermilvarpd256", IX86_BUILTIN_VPERMILVARPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF },
- { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vpermilvarv8sf3, "__builtin_ia32_vpermilvarps256", IX86_BUILTIN_VPERMILVARPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF },
+ { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vpermilvarv2df3, "__builtin_ia32_vpermilvarpd", IX86_BUILTIN_VPERMILVARPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DI },
+ { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vpermilvarv4sf3, "__builtin_ia32_vpermilvarps", IX86_BUILTIN_VPERMILVARPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SI },
+ { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vpermilvarv4df3, "__builtin_ia32_vpermilvarpd256", IX86_BUILTIN_VPERMILVARPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DI },
+ { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vpermilvarv8sf3, "__builtin_ia32_vpermilvarps256", IX86_BUILTIN_VPERMILVARPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SI },
{ OPTION_MASK_ISA_AVX, CODE_FOR_avx_blendpd256, "__builtin_ia32_blendpd256", IX86_BUILTIN_BLENDPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF_INT },
{ OPTION_MASK_ISA_AVX, CODE_FOR_avx_blendps256, "__builtin_ia32_blendps256", IX86_BUILTIN_BLENDPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF_INT },
@@ -20664,10 +20668,10 @@ static const struct builtin_description bdesc_args[] =
{ OPTION_MASK_ISA_AVX, CODE_FOR_avx_vpermilv4sf, "__builtin_ia32_vpermilps", IX86_BUILTIN_VPERMILPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_INT },
{ OPTION_MASK_ISA_AVX, CODE_FOR_avx_vpermilv4df, "__builtin_ia32_vpermilpd256", IX86_BUILTIN_VPERMILPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_INT },
{ OPTION_MASK_ISA_AVX, CODE_FOR_avx_vpermilv8sf, "__builtin_ia32_vpermilps256", IX86_BUILTIN_VPERMILPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_INT },
- { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vpermil2v2df3, "__builtin_ia32_vpermil2pd", IX86_BUILTIN_VPERMIL2PD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_V2DF_INT },
- { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vpermil2v4sf3, "__builtin_ia32_vpermil2ps", IX86_BUILTIN_VPERMIL2PS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_V4SF_INT },
- { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vpermil2v4df3, "__builtin_ia32_vpermil2pd256", IX86_BUILTIN_VPERMIL2PD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF_V4DF_INT },
- { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vpermil2v8sf3, "__builtin_ia32_vpermil2ps256", IX86_BUILTIN_VPERMIL2PS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF_V8SF_INT },
+ { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vpermil2v2df3, "__builtin_ia32_vpermil2pd", IX86_BUILTIN_VPERMIL2PD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_V2DI_INT },
+ { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vpermil2v4sf3, "__builtin_ia32_vpermil2ps", IX86_BUILTIN_VPERMIL2PS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_V4SI_INT },
+ { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vpermil2v4df3, "__builtin_ia32_vpermil2pd256", IX86_BUILTIN_VPERMIL2PD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF_V4DI_INT },
+ { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vpermil2v8sf3, "__builtin_ia32_vpermil2ps256", IX86_BUILTIN_VPERMIL2PS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF_V8SI_INT },
{ OPTION_MASK_ISA_AVX, CODE_FOR_avx_vinsertf128v4df, "__builtin_ia32_vinsertf128_pd256", IX86_BUILTIN_VINSERTF128PD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V2DF_INT },
{ OPTION_MASK_ISA_AVX, CODE_FOR_avx_vinsertf128v8sf, "__builtin_ia32_vinsertf128_ps256", IX86_BUILTIN_VINSERTF128PS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V4SF_INT },
{ OPTION_MASK_ISA_AVX, CODE_FOR_avx_vinsertf128v8si, "__builtin_ia32_vinsertf128_si256", IX86_BUILTIN_VINSERTF128SI256, UNKNOWN, (int) V8SI_FTYPE_V8SI_V4SI_INT },
@@ -21609,25 +21613,25 @@ ix86_init_mmx_sse_builtins (void)
V4DF_type_node, V4DF_type_node,
integer_type_node,
NULL_TREE);
- tree v8sf_ftype_v8sf_v8sf_v8sf_int
+ tree v8sf_ftype_v8sf_v8sf_v8si_int
= build_function_type_list (V8SF_type_node,
V8SF_type_node, V8SF_type_node,
- V8SF_type_node, integer_type_node,
+ V8SI_type_node, integer_type_node,
NULL_TREE);
- tree v4df_ftype_v4df_v4df_v4df_int
+ tree v4df_ftype_v4df_v4df_v4di_int
= build_function_type_list (V4DF_type_node,
V4DF_type_node, V4DF_type_node,
- V4DF_type_node, integer_type_node,
+ V4DI_type_node, integer_type_node,
NULL_TREE);
- tree v4sf_ftype_v4sf_v4sf_v4sf_int
+ tree v4sf_ftype_v4sf_v4sf_v4si_int
= build_function_type_list (V4SF_type_node,
V4SF_type_node, V4SF_type_node,
- V4SF_type_node, integer_type_node,
+ V4SI_type_node, integer_type_node,
NULL_TREE);
- tree v2df_ftype_v2df_v2df_v2df_int
+ tree v2df_ftype_v2df_v2df_v2di_int
= build_function_type_list (V2DF_type_node,
V2DF_type_node, V2DF_type_node,
- V2DF_type_node, integer_type_node,
+ V2DI_type_node, integer_type_node,
NULL_TREE);
tree v8sf_ftype_pcfloat
= build_function_type_list (V8SF_type_node,
@@ -21768,6 +21772,20 @@ ix86_init_mmx_sse_builtins (void)
= build_function_type_list (integer_type_node,
V4DF_type_node, V4DF_type_node,
NULL_TREE);
+ tree v8sf_ftype_v8sf_v8si
+ = build_function_type_list (V8SF_type_node,
+ V8SF_type_node, V8SI_type_node,
+ NULL_TREE);
+ tree v4df_ftype_v4df_v4di
+ = build_function_type_list (V4DF_type_node,
+ V4DF_type_node, V4DI_type_node,
+ NULL_TREE);
+ tree v4sf_ftype_v4sf_v4si
+ = build_function_type_list (V4SF_type_node,
+ V4SF_type_node, V4SI_type_node, NULL_TREE);
+ tree v2df_ftype_v2df_v2di
+ = build_function_type_list (V2DF_type_node,
+ V2DF_type_node, V2DI_type_node, NULL_TREE);
tree ftype;
@@ -22090,6 +22108,9 @@ ix86_init_mmx_sse_builtins (void)
case V8SF_FTYPE_V8SF_V8SF:
type = v8sf_ftype_v8sf_v8sf;
break;
+ case V8SF_FTYPE_V8SF_V8SI:
+ type = v8sf_ftype_v8sf_v8si;
+ break;
case V4SI_FTYPE_V4SI_V4SI:
case V4SI_FTYPE_V4SI_V4SI_COUNT:
type = v4si_ftype_v4si_v4si;
@@ -22122,10 +22143,16 @@ ix86_init_mmx_sse_builtins (void)
case V4DF_FTYPE_V4DF_V4DF:
type = v4df_ftype_v4df_v4df;
break;
+ case V4DF_FTYPE_V4DF_V4DI:
+ type = v4df_ftype_v4df_v4di;
+ break;
case V4SF_FTYPE_V4SF_V4SF:
case V4SF_FTYPE_V4SF_V4SF_SWAP:
type = v4sf_ftype_v4sf_v4sf;
break;
+ case V4SF_FTYPE_V4SF_V4SI:
+ type = v4sf_ftype_v4sf_v4si;
+ break;
case V4SF_FTYPE_V4SF_V2SI:
type = v4sf_ftype_v4sf_v2si;
break;
@@ -22177,6 +22204,9 @@ ix86_init_mmx_sse_builtins (void)
case V2DF_FTYPE_V2DF_V4SF:
type = v2df_ftype_v2df_v4sf;
break;
+ case V2DF_FTYPE_V2DF_V2DI:
+ type = v2df_ftype_v2df_v2di;
+ break;
case V2DF_FTYPE_V2DF_DI:
type = v2df_ftype_v2df_int64;
break;
@@ -22306,17 +22336,17 @@ ix86_init_mmx_sse_builtins (void)
case V1DI2DI_FTYPE_V1DI_V1DI_INT:
type = v1di_ftype_v1di_v1di_int;
break;
- case V8SF_FTYPE_V8SF_V8SF_V8SF_INT:
- type = v8sf_ftype_v8sf_v8sf_v8sf_int;
+ case V8SF_FTYPE_V8SF_V8SF_V8SI_INT:
+ type = v8sf_ftype_v8sf_v8sf_v8si_int;
break;
- case V4DF_FTYPE_V4DF_V4DF_V4DF_INT:
- type = v4df_ftype_v4df_v4df_v4df_int;
+ case V4DF_FTYPE_V4DF_V4DF_V4DI_INT:
+ type = v4df_ftype_v4df_v4df_v4di_int;
break;
- case V4SF_FTYPE_V4SF_V4SF_V4SF_INT:
- type = v4sf_ftype_v4sf_v4sf_v4sf_int;
+ case V4SF_FTYPE_V4SF_V4SF_V4SI_INT:
+ type = v4sf_ftype_v4sf_v4sf_v4si_int;
break;
- case V2DF_FTYPE_V2DF_V2DF_V2DF_INT:
- type = v2df_ftype_v2df_v2df_v2df_int;
+ case V2DF_FTYPE_V2DF_V2DF_V2DI_INT:
+ type = v2df_ftype_v2df_v2df_v2di_int;
break;
default:
gcc_unreachable ();
@@ -23360,6 +23390,7 @@ ix86_expand_args_builtin (const struct builtin_description *d,
case V8HI_FTYPE_V16QI_V16QI:
case V8HI_FTYPE_V4SI_V4SI:
case V8SF_FTYPE_V8SF_V8SF:
+ case V8SF_FTYPE_V8SF_V8SI:
case V4SI_FTYPE_V4SI_V4SI:
case V4SI_FTYPE_V8HI_V8HI:
case V4SI_FTYPE_V4SF_V4SF:
@@ -23368,7 +23399,9 @@ ix86_expand_args_builtin (const struct builtin_description *d,
case V4HI_FTYPE_V8QI_V8QI:
case V4HI_FTYPE_V2SI_V2SI:
case V4DF_FTYPE_V4DF_V4DF:
+ case V4DF_FTYPE_V4DF_V4DI:
case V4SF_FTYPE_V4SF_V4SF:
+ case V4SF_FTYPE_V4SF_V4SI:
case V4SF_FTYPE_V4SF_V2SI:
case V4SF_FTYPE_V4SF_V2DF:
case V4SF_FTYPE_V4SF_DI:
@@ -23383,6 +23416,7 @@ ix86_expand_args_builtin (const struct builtin_description *d,
case V2SI_FTYPE_V2SF_V2SF:
case V2DF_FTYPE_V2DF_V2DF:
case V2DF_FTYPE_V2DF_V4SF:
+ case V2DF_FTYPE_V2DF_V2DI:
case V2DF_FTYPE_V2DF_DI:
case V2DF_FTYPE_V2DF_SI:
case V2SF_FTYPE_V2SF_V2SF:
@@ -23475,10 +23509,10 @@ ix86_expand_args_builtin (const struct builtin_description *d,
nargs = 3;
nargs_constant = 2;
break;
- case V8SF_FTYPE_V8SF_V8SF_V8SF_INT:
- case V4DF_FTYPE_V4DF_V4DF_V4DF_INT:
- case V4SF_FTYPE_V4SF_V4SF_V4SF_INT:
- case V2DF_FTYPE_V2DF_V2DF_V2DF_INT:
+ case V8SF_FTYPE_V8SF_V8SF_V8SI_INT:
+ case V4DF_FTYPE_V4DF_V4DF_V4DI_INT:
+ case V4SF_FTYPE_V4SF_V4SF_V4SI_INT:
+ case V2DF_FTYPE_V2DF_V2DF_V2DI_INT:
nargs = 4;
nargs_constant = 1;
break;
diff --git a/gcc/config/i386/sse.md b/gcc/config/i386/sse.md
index 600f595ff52..d1ec9ea829d 100644
--- a/gcc/config/i386/sse.md
+++ b/gcc/config/i386/sse.md
@@ -107,6 +107,8 @@
(V8SF "SF") (V4DF "DF")])
(define_mode_attr avxcvtvecmode
[(V4SF "V4SI") (V8SF "V8SI") (V4SI "V4SF") (V8SI "V8SF")])
+(define_mode_attr avxpermvecmode
+ [(V2DF "V2DI") (V4SF "V4SI") (V4DF "V4DI") (V8SF "V8SI")])
(define_mode_attr avxmodesuffixf2c
[(V4SF "s") (V2DF "d") (V8SF "s") (V4DF "d")])
(define_mode_attr avxmodesuffixp
@@ -11526,7 +11528,7 @@
[(set (match_operand:AVXMODEF2P 0 "register_operand" "=x")
(unspec:AVXMODEF2P
[(match_operand:AVXMODEF2P 1 "register_operand" "x")
- (match_operand:AVXMODEF2P 2 "nonimmediate_operand" "xm")]
+ (match_operand:<avxpermvecmode> 2 "nonimmediate_operand" "xm")]
UNSPEC_VPERMIL))]
"TARGET_AVX"
"vpermilp<avxmodesuffixf2c>\t{%2, %1, %0|%0, %1, %2}"
@@ -11539,7 +11541,7 @@
(unspec:AVXMODEF2P
[(match_operand:AVXMODEF2P 1 "register_operand" "x,x")
(match_operand:AVXMODEF2P 2 "nonimmediate_operand" "x,xm")
- (match_operand:AVXMODEF2P 3 "nonimmediate_operand" "xm,x")
+ (match_operand:<avxpermvecmode> 3 "nonimmediate_operand" "xm,x")
(match_operand:SI 4 "const_0_to_3_operand" "n,n")]
UNSPEC_VPERMIL2))]
"TARGET_AVX"