diff options
author | H.J. Lu <hongjiu.lu@intel.com> | 2008-08-11 18:48:38 +0000 |
---|---|---|
committer | H.J. Lu <hongjiu.lu@intel.com> | 2008-08-11 18:48:38 +0000 |
commit | ac59cc41c2a1b7e3422bd59efc48d7dbedd27660 (patch) | |
tree | cb66e4bbf2787a6ed756e6d70b6a3911d45ea1d1 | |
parent | 87e7a747270eede017d212c769320cd105ef9da7 (diff) |
2008-08-11 H.J. Lu <hongjiu.lu@intel.com>
* config/i386/i386.c (ix86_builtin_type): Add
V8SF_FTYPE_V8SF_V8SI, V4DF_FTYPE_V4DF_V4DI,
V4SF_FTYPE_V4SF_V4SI and V2DF_FTYPE_V2DF_V2DI. Replace
V8SF_FTYPE_V8SF_V8SF_V8SF_INT, V4DF_FTYPE_V4DF_V4DF_V4DF_INT,
V4SF_FTYPE_V4SF_V4SF_V4SF_INT and V2DF_FTYPE_V2DF_V2DF_V2DF_INT
with V8SF_FTYPE_V8SF_V8SF_V8SI_INT, V4DF_FTYPE_V4DF_V4DF_V4DI_INT,
V4SF_FTYPE_V4SF_V4SF_V4SI_INT and V2DF_FTYPE_V2DF_V2DF_V2DI_INT,
respectively.
(bdesc_args): Correct the type of control parameter for
__builtin_ia32_vpermilvarpd, __builtin_ia32_vpermilvarps,
__builtin_ia32_vpermilvarpd256, __builtin_ia32_vpermilvarps256,
__builtin_ia32_vpermil2pd, __builtin_ia32_vpermil2ps,
__builtin_ia32_vpermil2pd256 and __builtin_ia32_vpermil2ps256.
(ix86_init_mmx_sse_builtins): Replace
v8sf_ftype_v8sf_v8sf_v8sf_int, v4df_ftype_v4df_v4df_v4df_int,
v4sf_ftype_v4sf_v4sf_v4sf_int and v2df_ftype_v2df_v2df_v2df_int
with v8sf_ftype_v8sf_v8sf_v8si_int, v4df_ftype_v4df_v4df_v4di_int,
v4sf_ftype_v4sf_v4sf_v4si_int and v2df_ftype_v2df_v2df_v2di_int,
respectively. Add v8sf_ftype_v8sf_v8si, v4df_ftype_v4df_v4di,
v4sf_ftype_v4sf_v4si and v2df_ftype_v2df_v2di. Handle
V8SF_FTYPE_V8SF_V8SI, V4DF_FTYPE_V4DF_V4DI,
V4SF_FTYPE_V4SF_V4SI and V2DF_FTYPE_V2DF_V2DI.
(ix86_expand_args_builtin): Handle V8SF_FTYPE_V8SF_V8SI,
V4DF_FTYPE_V4DF_V4DI, V4SF_FTYPE_V4SF_V4SI and
V2DF_FTYPE_V2DF_V2DI. Replace V8SF_FTYPE_V8SF_V8SF_V8SF_INT,
V4DF_FTYPE_V4DF_V4DF_V4DF_INT, V4SF_FTYPE_V4SF_V4SF_V4SF_INT and
V2DF_FTYPE_V2DF_V2DF_V2DF_INT with V8SF_FTYPE_V8SF_V8SF_V8SI_INT,
V4DF_FTYPE_V4DF_V4DF_V4DI_INT, V4SF_FTYPE_V4SF_V4SF_V4SI_INT and
V2DF_FTYPE_V2DF_V2DF_V2DI_INT, respectively.
* config/i386/gmmintrin.h (_mm_permutevar_pd): Correct the
type of control parameter.
(_mm256_permutevar_pd): Likewise.
(_mm_permutevar_ps): Likewise.
(_mm256_permutevar_ps): Likewise.
(_mm_permute2_pd): Likewise.
(_mm256_permute2_pd): Likewise.
(_mm_permute2_ps): Likewise.
(_mm256_permute2_ps): Likewise.
* config/i386/sse.md (avxpermvecmode): New.
(avx_vpermilvar<mode>3): Use <avxpermvecmode>.
(avx_vpermil2<mode>3): Likewise.
git-svn-id: https://gcc.gnu.org/svn/gcc/branches/ix86/avx@138959 138bc75d-0d04-0410-961f-82ee72b054a4
-rw-r--r-- | gcc/ChangeLog.avx | 46 | ||||
-rw-r--r-- | gcc/config/i386/gmmintrin.h | 40 | ||||
-rw-r--r-- | gcc/config/i386/i386.c | 98 | ||||
-rw-r--r-- | gcc/config/i386/sse.md | 6 |
4 files changed, 136 insertions, 54 deletions
diff --git a/gcc/ChangeLog.avx b/gcc/ChangeLog.avx index 70d1bd12467..845b7911fde 100644 --- a/gcc/ChangeLog.avx +++ b/gcc/ChangeLog.avx @@ -1,3 +1,49 @@ +2008-08-11 H.J. Lu <hongjiu.lu@intel.com> + + * config/i386/i386.c (ix86_builtin_type): Add + V8SF_FTYPE_V8SF_V8SI, V4DF_FTYPE_V4DF_V4DI, + V4SF_FTYPE_V4SF_V4SI and V2DF_FTYPE_V2DF_V2DI. Replace + V8SF_FTYPE_V8SF_V8SF_V8SF_INT, V4DF_FTYPE_V4DF_V4DF_V4DF_INT, + V4SF_FTYPE_V4SF_V4SF_V4SF_INT and V2DF_FTYPE_V2DF_V2DF_V2DF_INT + with V8SF_FTYPE_V8SF_V8SF_V8SI_INT, V4DF_FTYPE_V4DF_V4DF_V4DI_INT, + V4SF_FTYPE_V4SF_V4SF_V4SI_INT and V2DF_FTYPE_V2DF_V2DF_V2DI_INT, + respectively. + (bdesc_args): Correct the type of control parameter for + __builtin_ia32_vpermilvarpd, __builtin_ia32_vpermilvarps, + __builtin_ia32_vpermilvarpd256, __builtin_ia32_vpermilvarps256, + __builtin_ia32_vpermil2pd, __builtin_ia32_vpermil2ps, + __builtin_ia32_vpermil2pd256 and __builtin_ia32_vpermil2ps256. + (ix86_init_mmx_sse_builtins): Replace + v8sf_ftype_v8sf_v8sf_v8sf_int, v4df_ftype_v4df_v4df_v4df_int, + v4sf_ftype_v4sf_v4sf_v4sf_int and v2df_ftype_v2df_v2df_v2df_int + with v8sf_ftype_v8sf_v8sf_v8si_int, v4df_ftype_v4df_v4df_v4di_int, + v4sf_ftype_v4sf_v4sf_v4si_int and v2df_ftype_v2df_v2df_v2di_int, + respectively. Add v8sf_ftype_v8sf_v8si, v4df_ftype_v4df_v4di, + v4sf_ftype_v4sf_v4si and v2df_ftype_v2df_v2di. Handle + V8SF_FTYPE_V8SF_V8SI, V4DF_FTYPE_V4DF_V4DI, + V4SF_FTYPE_V4SF_V4SI and V2DF_FTYPE_V2DF_V2DI. + (ix86_expand_args_builtin): Handle V8SF_FTYPE_V8SF_V8SI, + V4DF_FTYPE_V4DF_V4DI, V4SF_FTYPE_V4SF_V4SI and + V2DF_FTYPE_V2DF_V2DI. Replace V8SF_FTYPE_V8SF_V8SF_V8SF_INT, + V4DF_FTYPE_V4DF_V4DF_V4DF_INT, V4SF_FTYPE_V4SF_V4SF_V4SF_INT and + V2DF_FTYPE_V2DF_V2DF_V2DF_INT with V8SF_FTYPE_V8SF_V8SF_V8SI_INT, + V4DF_FTYPE_V4DF_V4DF_V4DI_INT, V4SF_FTYPE_V4SF_V4SF_V4SI_INT and + V2DF_FTYPE_V2DF_V2DF_V2DI_INT, respectively. + + * config/i386/gmmintrin.h (_mm_permutevar_pd): Correct the + type of control parameter. + (_mm256_permutevar_pd): Likewise. + (_mm_permutevar_ps): Likewise. + (_mm256_permutevar_ps): Likewise. + (_mm_permute2_pd): Likewise. + (_mm256_permute2_pd): Likewise. + (_mm_permute2_ps): Likewise. + (_mm256_permute2_ps): Likewise. + + * config/i386/sse.md (avxpermvecmode): New. + (avx_vpermilvar<mode>3): Use <avxpermvecmode>. + (avx_vpermil2<mode>3): Likewise. + 2008-08-06 H.J. Lu <hongjiu.lu@intel.com> * config/i386/i386.c (ix86_expand_special_args_builtin): Replace diff --git a/gcc/config/i386/gmmintrin.h b/gcc/config/i386/gmmintrin.h index 505b8605257..1c6bb18be83 100644 --- a/gcc/config/i386/gmmintrin.h +++ b/gcc/config/i386/gmmintrin.h @@ -582,31 +582,31 @@ _mm256_zeroupper (void) } extern __inline __m128d __attribute__((__gnu_inline__, __always_inline__, __artificial__)) -_mm_permutevar_pd (__m128d __A, __m128d __B) +_mm_permutevar_pd (__m128d __A, __m128i __C) { return (__m128d) __builtin_ia32_vpermilvarpd ((__v2df)__A, - (__v2df)__B); + (__v2di)__C); } extern __inline __m256d __attribute__((__gnu_inline__, __always_inline__, __artificial__)) -_mm256_permutevar_pd (__m256d __A, __m256d __B) +_mm256_permutevar_pd (__m256d __A, __m256i __C) { return (__m256d) __builtin_ia32_vpermilvarpd256 ((__v4df)__A, - (__v4df)__B); + (__v4di)__C); } extern __inline __m128 __attribute__((__gnu_inline__, __always_inline__, __artificial__)) -_mm_permutevar_ps (__m128 __A, __m128 __B) +_mm_permutevar_ps (__m128 __A, __m128i __C) { return (__m128) __builtin_ia32_vpermilvarps ((__v4sf)__A, - (__v4sf)__B); + (__v4si)__C); } extern __inline __m256 __attribute__((__gnu_inline__, __always_inline__, __artificial__)) -_mm256_permutevar_ps (__m256 __A, __m256 __B) +_mm256_permutevar_ps (__m256 __A, __m256i __C) { return (__m256) __builtin_ia32_vpermilvarps256 ((__v8sf)__A, - (__v8sf)__B); + (__v8si)__C); } #ifdef __OPTIMIZE__ @@ -635,38 +635,38 @@ _mm256_permute_ps (__m256 __X, const int __C) } extern __inline __m128d __attribute__((__gnu_inline__, __always_inline__, __artificial__)) -_mm_permute2_pd (__m128d __X, __m128d __Y, __m128d __C, const int __I) +_mm_permute2_pd (__m128d __X, __m128d __Y, __m128i __C, const int __I) { return (__m128d) __builtin_ia32_vpermil2pd ((__v2df)__X, (__v2df)__Y, - (__v2df)__C, + (__v2di)__C, __I); } extern __inline __m256d __attribute__((__gnu_inline__, __always_inline__, __artificial__)) -_mm256_permute2_pd (__m256d __X, __m256d __Y, __m256d __C, const int __I) +_mm256_permute2_pd (__m256d __X, __m256d __Y, __m256i __C, const int __I) { return (__m256d) __builtin_ia32_vpermil2pd256 ((__v4df)__X, (__v4df)__Y, - (__v4df)__C, + (__v4di)__C, __I); } extern __inline __m128 __attribute__((__gnu_inline__, __always_inline__, __artificial__)) -_mm_permute2_ps (__m128 __X, __m128 __Y, __m128 __C, const int __I) +_mm_permute2_ps (__m128 __X, __m128 __Y, __m128i __C, const int __I) { return (__m128) __builtin_ia32_vpermil2ps ((__v4sf)__X, (__v4sf)__Y, - (__v4sf)__C, + (__v4si)__C, __I); } extern __inline __m256 __attribute__((__gnu_inline__, __always_inline__, __artificial__)) -_mm256_permute2_ps (__m256 __X, __m256 __Y, __m256 __C, const int __I) +_mm256_permute2_ps (__m256 __X, __m256 __Y, __m256i __C, const int __I) { return (__m256) __builtin_ia32_vpermil2ps256 ((__v8sf)__X, (__v8sf)__Y, - (__v8sf)__C, + (__v8si)__C, __I); } #else @@ -685,25 +685,25 @@ _mm256_permute2_ps (__m256 __X, __m256 __Y, __m256 __C, const int __I) #define _mm_permute2_pd(X, Y, C, I) \ ((__m128d) __builtin_ia32_vpermil2pd ((__v2df)(__m128d)(X), \ (__v2df)(__m128d)(Y), \ - (__v2df)(__m128d)(C), \ + (__v2di)(__m128d)(C), \ (int)(I))) #define _mm256_permute2_pd(X, Y, C, I) \ ((__m256d) __builtin_ia32_vpermil2pd256 ((__v4df)(__m256d)(X), \ (__v4df)(__m256d)(Y), \ - (__v4df)(__m256d)(C), \ + (__v4di)(__m256d)(C), \ (int)(I))) #define _mm_permute2_ps(X, Y, C, I) \ ((__m128) __builtin_ia32_vpermil2ps ((__v4sf)(__m128)(X), \ (__v4sf)(__m128)(Y), \ - (__v4sf)(__m128)(C), \ + (__v4si)(__m128)(C), \ (int)(I))) #define _mm256_permute2_ps(X, Y, C, I) \ ((__m256) __builtin_ia32_vpermil2ps256 ((__v8sf)(__m256)(X), \ (__v8sf)(__m256)(Y), \ - (__v8sf)(__m256)(C), \ + (__v8si)(__m256)(C), \ (int)(I))) #endif diff --git a/gcc/config/i386/i386.c b/gcc/config/i386/i386.c index 61c51226b8e..2bb2246f5d5 100644 --- a/gcc/config/i386/i386.c +++ b/gcc/config/i386/i386.c @@ -19957,6 +19957,7 @@ enum ix86_builtin_type V8HI_FTYPE_V4SI_V4SI, V8HI_FTYPE_V8HI_SI_COUNT, V8SF_FTYPE_V8SF_V8SF, + V8SF_FTYPE_V8SF_V8SI, V4SI_FTYPE_V4SI_V4SI, V4SI_FTYPE_V4SI_V4SI_COUNT, V4SI_FTYPE_V8HI_V8HI, @@ -19969,8 +19970,10 @@ enum ix86_builtin_type V4HI_FTYPE_V2SI_V2SI, V4HI_FTYPE_V4HI_SI_COUNT, V4DF_FTYPE_V4DF_V4DF, + V4DF_FTYPE_V4DF_V4DI, V4SF_FTYPE_V4SF_V4SF, V4SF_FTYPE_V4SF_V4SF_SWAP, + V4SF_FTYPE_V4SF_V4SI, V4SF_FTYPE_V4SF_V2SI, V4SF_FTYPE_V4SF_V2DF, V4SF_FTYPE_V4SF_DI, @@ -19990,6 +19993,7 @@ enum ix86_builtin_type V2DF_FTYPE_V2DF_V2DF, V2DF_FTYPE_V2DF_V2DF_SWAP, V2DF_FTYPE_V2DF_V4SF, + V2DF_FTYPE_V2DF_V2DI, V2DF_FTYPE_V2DF_DI, V2DF_FTYPE_V2DF_SI, V2SF_FTYPE_V2SF_V2SF, @@ -20033,10 +20037,10 @@ enum ix86_builtin_type V2DI2TI_FTYPE_V2DI_V2DI_INT, V1DI2DI_FTYPE_V1DI_V1DI_INT, V2DF_FTYPE_V2DF_V2DF_INT, - V8SF_FTYPE_V8SF_V8SF_V8SF_INT, - V4DF_FTYPE_V4DF_V4DF_V4DF_INT, - V4SF_FTYPE_V4SF_V4SF_V4SF_INT, - V2DF_FTYPE_V2DF_V2DF_V2DF_INT, + V8SF_FTYPE_V8SF_V8SF_V8SI_INT, + V4DF_FTYPE_V4DF_V4DF_V4DI_INT, + V4SF_FTYPE_V4SF_V4SF_V4SI_INT, + V2DF_FTYPE_V2DF_V2DF_V2DI_INT, V2DI_FTYPE_V2DI_UINT_UINT, V2DI_FTYPE_V2DI_V2DI_UINT_UINT }; @@ -20628,10 +20632,10 @@ static const struct builtin_description bdesc_args[] = { OPTION_MASK_ISA_AVX, CODE_FOR_xorv4df3, "__builtin_ia32_xorpd256", IX86_BUILTIN_XORPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF }, { OPTION_MASK_ISA_AVX, CODE_FOR_xorv8sf3, "__builtin_ia32_xorps256", IX86_BUILTIN_XORPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF }, - { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vpermilvarv2df3, "__builtin_ia32_vpermilvarpd", IX86_BUILTIN_VPERMILVARPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF }, - { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vpermilvarv4sf3, "__builtin_ia32_vpermilvarps", IX86_BUILTIN_VPERMILVARPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF }, - { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vpermilvarv4df3, "__builtin_ia32_vpermilvarpd256", IX86_BUILTIN_VPERMILVARPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF }, - { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vpermilvarv8sf3, "__builtin_ia32_vpermilvarps256", IX86_BUILTIN_VPERMILVARPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF }, + { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vpermilvarv2df3, "__builtin_ia32_vpermilvarpd", IX86_BUILTIN_VPERMILVARPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DI }, + { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vpermilvarv4sf3, "__builtin_ia32_vpermilvarps", IX86_BUILTIN_VPERMILVARPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SI }, + { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vpermilvarv4df3, "__builtin_ia32_vpermilvarpd256", IX86_BUILTIN_VPERMILVARPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DI }, + { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vpermilvarv8sf3, "__builtin_ia32_vpermilvarps256", IX86_BUILTIN_VPERMILVARPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SI }, { OPTION_MASK_ISA_AVX, CODE_FOR_avx_blendpd256, "__builtin_ia32_blendpd256", IX86_BUILTIN_BLENDPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF_INT }, { OPTION_MASK_ISA_AVX, CODE_FOR_avx_blendps256, "__builtin_ia32_blendps256", IX86_BUILTIN_BLENDPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF_INT }, @@ -20664,10 +20668,10 @@ static const struct builtin_description bdesc_args[] = { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vpermilv4sf, "__builtin_ia32_vpermilps", IX86_BUILTIN_VPERMILPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_INT }, { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vpermilv4df, "__builtin_ia32_vpermilpd256", IX86_BUILTIN_VPERMILPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_INT }, { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vpermilv8sf, "__builtin_ia32_vpermilps256", IX86_BUILTIN_VPERMILPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_INT }, - { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vpermil2v2df3, "__builtin_ia32_vpermil2pd", IX86_BUILTIN_VPERMIL2PD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_V2DF_INT }, - { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vpermil2v4sf3, "__builtin_ia32_vpermil2ps", IX86_BUILTIN_VPERMIL2PS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_V4SF_INT }, - { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vpermil2v4df3, "__builtin_ia32_vpermil2pd256", IX86_BUILTIN_VPERMIL2PD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF_V4DF_INT }, - { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vpermil2v8sf3, "__builtin_ia32_vpermil2ps256", IX86_BUILTIN_VPERMIL2PS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF_V8SF_INT }, + { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vpermil2v2df3, "__builtin_ia32_vpermil2pd", IX86_BUILTIN_VPERMIL2PD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_V2DI_INT }, + { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vpermil2v4sf3, "__builtin_ia32_vpermil2ps", IX86_BUILTIN_VPERMIL2PS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_V4SI_INT }, + { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vpermil2v4df3, "__builtin_ia32_vpermil2pd256", IX86_BUILTIN_VPERMIL2PD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF_V4DI_INT }, + { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vpermil2v8sf3, "__builtin_ia32_vpermil2ps256", IX86_BUILTIN_VPERMIL2PS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF_V8SI_INT }, { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vinsertf128v4df, "__builtin_ia32_vinsertf128_pd256", IX86_BUILTIN_VINSERTF128PD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V2DF_INT }, { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vinsertf128v8sf, "__builtin_ia32_vinsertf128_ps256", IX86_BUILTIN_VINSERTF128PS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V4SF_INT }, { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vinsertf128v8si, "__builtin_ia32_vinsertf128_si256", IX86_BUILTIN_VINSERTF128SI256, UNKNOWN, (int) V8SI_FTYPE_V8SI_V4SI_INT }, @@ -21609,25 +21613,25 @@ ix86_init_mmx_sse_builtins (void) V4DF_type_node, V4DF_type_node, integer_type_node, NULL_TREE); - tree v8sf_ftype_v8sf_v8sf_v8sf_int + tree v8sf_ftype_v8sf_v8sf_v8si_int = build_function_type_list (V8SF_type_node, V8SF_type_node, V8SF_type_node, - V8SF_type_node, integer_type_node, + V8SI_type_node, integer_type_node, NULL_TREE); - tree v4df_ftype_v4df_v4df_v4df_int + tree v4df_ftype_v4df_v4df_v4di_int = build_function_type_list (V4DF_type_node, V4DF_type_node, V4DF_type_node, - V4DF_type_node, integer_type_node, + V4DI_type_node, integer_type_node, NULL_TREE); - tree v4sf_ftype_v4sf_v4sf_v4sf_int + tree v4sf_ftype_v4sf_v4sf_v4si_int = build_function_type_list (V4SF_type_node, V4SF_type_node, V4SF_type_node, - V4SF_type_node, integer_type_node, + V4SI_type_node, integer_type_node, NULL_TREE); - tree v2df_ftype_v2df_v2df_v2df_int + tree v2df_ftype_v2df_v2df_v2di_int = build_function_type_list (V2DF_type_node, V2DF_type_node, V2DF_type_node, - V2DF_type_node, integer_type_node, + V2DI_type_node, integer_type_node, NULL_TREE); tree v8sf_ftype_pcfloat = build_function_type_list (V8SF_type_node, @@ -21768,6 +21772,20 @@ ix86_init_mmx_sse_builtins (void) = build_function_type_list (integer_type_node, V4DF_type_node, V4DF_type_node, NULL_TREE); + tree v8sf_ftype_v8sf_v8si + = build_function_type_list (V8SF_type_node, + V8SF_type_node, V8SI_type_node, + NULL_TREE); + tree v4df_ftype_v4df_v4di + = build_function_type_list (V4DF_type_node, + V4DF_type_node, V4DI_type_node, + NULL_TREE); + tree v4sf_ftype_v4sf_v4si + = build_function_type_list (V4SF_type_node, + V4SF_type_node, V4SI_type_node, NULL_TREE); + tree v2df_ftype_v2df_v2di + = build_function_type_list (V2DF_type_node, + V2DF_type_node, V2DI_type_node, NULL_TREE); tree ftype; @@ -22090,6 +22108,9 @@ ix86_init_mmx_sse_builtins (void) case V8SF_FTYPE_V8SF_V8SF: type = v8sf_ftype_v8sf_v8sf; break; + case V8SF_FTYPE_V8SF_V8SI: + type = v8sf_ftype_v8sf_v8si; + break; case V4SI_FTYPE_V4SI_V4SI: case V4SI_FTYPE_V4SI_V4SI_COUNT: type = v4si_ftype_v4si_v4si; @@ -22122,10 +22143,16 @@ ix86_init_mmx_sse_builtins (void) case V4DF_FTYPE_V4DF_V4DF: type = v4df_ftype_v4df_v4df; break; + case V4DF_FTYPE_V4DF_V4DI: + type = v4df_ftype_v4df_v4di; + break; case V4SF_FTYPE_V4SF_V4SF: case V4SF_FTYPE_V4SF_V4SF_SWAP: type = v4sf_ftype_v4sf_v4sf; break; + case V4SF_FTYPE_V4SF_V4SI: + type = v4sf_ftype_v4sf_v4si; + break; case V4SF_FTYPE_V4SF_V2SI: type = v4sf_ftype_v4sf_v2si; break; @@ -22177,6 +22204,9 @@ ix86_init_mmx_sse_builtins (void) case V2DF_FTYPE_V2DF_V4SF: type = v2df_ftype_v2df_v4sf; break; + case V2DF_FTYPE_V2DF_V2DI: + type = v2df_ftype_v2df_v2di; + break; case V2DF_FTYPE_V2DF_DI: type = v2df_ftype_v2df_int64; break; @@ -22306,17 +22336,17 @@ ix86_init_mmx_sse_builtins (void) case V1DI2DI_FTYPE_V1DI_V1DI_INT: type = v1di_ftype_v1di_v1di_int; break; - case V8SF_FTYPE_V8SF_V8SF_V8SF_INT: - type = v8sf_ftype_v8sf_v8sf_v8sf_int; + case V8SF_FTYPE_V8SF_V8SF_V8SI_INT: + type = v8sf_ftype_v8sf_v8sf_v8si_int; break; - case V4DF_FTYPE_V4DF_V4DF_V4DF_INT: - type = v4df_ftype_v4df_v4df_v4df_int; + case V4DF_FTYPE_V4DF_V4DF_V4DI_INT: + type = v4df_ftype_v4df_v4df_v4di_int; break; - case V4SF_FTYPE_V4SF_V4SF_V4SF_INT: - type = v4sf_ftype_v4sf_v4sf_v4sf_int; + case V4SF_FTYPE_V4SF_V4SF_V4SI_INT: + type = v4sf_ftype_v4sf_v4sf_v4si_int; break; - case V2DF_FTYPE_V2DF_V2DF_V2DF_INT: - type = v2df_ftype_v2df_v2df_v2df_int; + case V2DF_FTYPE_V2DF_V2DF_V2DI_INT: + type = v2df_ftype_v2df_v2df_v2di_int; break; default: gcc_unreachable (); @@ -23360,6 +23390,7 @@ ix86_expand_args_builtin (const struct builtin_description *d, case V8HI_FTYPE_V16QI_V16QI: case V8HI_FTYPE_V4SI_V4SI: case V8SF_FTYPE_V8SF_V8SF: + case V8SF_FTYPE_V8SF_V8SI: case V4SI_FTYPE_V4SI_V4SI: case V4SI_FTYPE_V8HI_V8HI: case V4SI_FTYPE_V4SF_V4SF: @@ -23368,7 +23399,9 @@ ix86_expand_args_builtin (const struct builtin_description *d, case V4HI_FTYPE_V8QI_V8QI: case V4HI_FTYPE_V2SI_V2SI: case V4DF_FTYPE_V4DF_V4DF: + case V4DF_FTYPE_V4DF_V4DI: case V4SF_FTYPE_V4SF_V4SF: + case V4SF_FTYPE_V4SF_V4SI: case V4SF_FTYPE_V4SF_V2SI: case V4SF_FTYPE_V4SF_V2DF: case V4SF_FTYPE_V4SF_DI: @@ -23383,6 +23416,7 @@ ix86_expand_args_builtin (const struct builtin_description *d, case V2SI_FTYPE_V2SF_V2SF: case V2DF_FTYPE_V2DF_V2DF: case V2DF_FTYPE_V2DF_V4SF: + case V2DF_FTYPE_V2DF_V2DI: case V2DF_FTYPE_V2DF_DI: case V2DF_FTYPE_V2DF_SI: case V2SF_FTYPE_V2SF_V2SF: @@ -23475,10 +23509,10 @@ ix86_expand_args_builtin (const struct builtin_description *d, nargs = 3; nargs_constant = 2; break; - case V8SF_FTYPE_V8SF_V8SF_V8SF_INT: - case V4DF_FTYPE_V4DF_V4DF_V4DF_INT: - case V4SF_FTYPE_V4SF_V4SF_V4SF_INT: - case V2DF_FTYPE_V2DF_V2DF_V2DF_INT: + case V8SF_FTYPE_V8SF_V8SF_V8SI_INT: + case V4DF_FTYPE_V4DF_V4DF_V4DI_INT: + case V4SF_FTYPE_V4SF_V4SF_V4SI_INT: + case V2DF_FTYPE_V2DF_V2DF_V2DI_INT: nargs = 4; nargs_constant = 1; break; diff --git a/gcc/config/i386/sse.md b/gcc/config/i386/sse.md index 600f595ff52..d1ec9ea829d 100644 --- a/gcc/config/i386/sse.md +++ b/gcc/config/i386/sse.md @@ -107,6 +107,8 @@ (V8SF "SF") (V4DF "DF")]) (define_mode_attr avxcvtvecmode [(V4SF "V4SI") (V8SF "V8SI") (V4SI "V4SF") (V8SI "V8SF")]) +(define_mode_attr avxpermvecmode + [(V2DF "V2DI") (V4SF "V4SI") (V4DF "V4DI") (V8SF "V8SI")]) (define_mode_attr avxmodesuffixf2c [(V4SF "s") (V2DF "d") (V8SF "s") (V4DF "d")]) (define_mode_attr avxmodesuffixp @@ -11526,7 +11528,7 @@ [(set (match_operand:AVXMODEF2P 0 "register_operand" "=x") (unspec:AVXMODEF2P [(match_operand:AVXMODEF2P 1 "register_operand" "x") - (match_operand:AVXMODEF2P 2 "nonimmediate_operand" "xm")] + (match_operand:<avxpermvecmode> 2 "nonimmediate_operand" "xm")] UNSPEC_VPERMIL))] "TARGET_AVX" "vpermilp<avxmodesuffixf2c>\t{%2, %1, %0|%0, %1, %2}" @@ -11539,7 +11541,7 @@ (unspec:AVXMODEF2P [(match_operand:AVXMODEF2P 1 "register_operand" "x,x") (match_operand:AVXMODEF2P 2 "nonimmediate_operand" "x,xm") - (match_operand:AVXMODEF2P 3 "nonimmediate_operand" "xm,x") + (match_operand:<avxpermvecmode> 3 "nonimmediate_operand" "xm,x") (match_operand:SI 4 "const_0_to_3_operand" "n,n")] UNSPEC_VPERMIL2))] "TARGET_AVX" |